Exemplo n.º 1
0
    def testTokens(self):
        print(Id.Op_Newline)
        print(ast.token(Id.Op_Newline, '\n'))

        print(IdName(Id.Op_Newline))

        print(Kind.Eof)
        print(Kind.Left)
        print('--')
        num_kinds = 0
        for name in dir(Kind):
            if name[0].isupper():
                print(name, getattr(Kind, name))
                num_kinds += 1

        print('Number of Kinds:', num_kinds)
        # 233 out of 256 tokens now
        print('Number of IDs:', len(_ID_NAMES))

        # Make sure we're not exporting too much
        print(dir(id_kind))

        t = ast.token(Id.Arith_Plus, '+')
        self.assertEqual(Kind.Arith, LookupKind(t.id))
        t = ast.token(Id.Arith_CaretEqual, '^=')
        self.assertEqual(Kind.Arith, LookupKind(t.id))
        t = ast.token(Id.Arith_RBrace, '}')
        self.assertEqual(Kind.Arith, LookupKind(t.id))

        t = ast.token(Id.BoolBinary_GlobDEqual, '==')
        self.assertEqual(Kind.BoolBinary, LookupKind(t.id))

        t = ast.token(Id.BoolBinary_Equal, '=')
        self.assertEqual(Kind.BoolBinary, LookupKind(t.id))
Exemplo n.º 2
0
    def _NextOne(self, lex_mode=lex_mode_e.DBRACKET):
        #print('_Next', self.cur_word)
        n = len(self.words)
        if n == 2:
            assert lex_mode == lex_mode_e.DBRACKET
            self.words[0] = self.words[1]
            self.cur_word = self.words[0]
            del self.words[1]
        elif n in (0, 1):
            w = self.w_parser.ReadWord(lex_mode)
            if not w:
                err = self.w_parser.Error()
                self.error_stack.extend(err)
                return False
            if n == 0:
                self.words.append(w)
            else:
                self.words[0] = w
            self.cur_word = w

        self.op_id = word.BoolId(self.cur_word)
        self.b_kind = LookupKind(self.op_id)
        #log('--- word %s', self.cur_word)
        #log('op_id %s %s %s', self.op_id, self.b_kind, lex_mode)
        return True
Exemplo n.º 3
0
    def _ApplyUnarySuffixOp(self, val, op):
        assert val.tag != value_e.Undef

        op_kind = LookupKind(op.op_id)

        if op_kind == Kind.VOp1:
            #log('%s', op)
            arg_val = self.EvalWordToString(op.arg_word, do_fnmatch=True)
            assert arg_val.tag == value_e.Str

            if val.tag == value_e.Str:
                s = libstr.DoUnarySuffixOp(val.s, op, arg_val.s)
                new_val = runtime.Str(s)
            else:  # val.tag == value_e.StrArray:
                # ${a[@]#prefix} is VECTORIZED on arrays.  Oil should have this too.
                strs = []
                for s in val.strs:
                    if s is not None:
                        strs.append(libstr.DoUnarySuffixOp(s, op, arg_val.s))
                new_val = runtime.StrArray(strs)

        else:
            raise AssertionError(op_kind)

        return new_val
Exemplo n.º 4
0
def CommandKind(w):
    if w.tag == word_e.TokenWord:
        return LookupKind(w.token.id)

    # NOTE: This is a bit inconsistent with CommandId, because we never retur
    # Kind.KW (or Kind.Lit).  But the CommandParser is easier to write this way.
    return Kind.Word
Exemplo n.º 5
0
def BoolId(node):
    if node.tag == word_e.StringWord:  # for test/[
        return node.id

    if node.tag == word_e.TokenWord:
        return node.token.id

    # Assume it's a CompoundWord
    #assert node.tag == word_e.CompoundWord

    if len(node.parts) != 1:
        return Id.Word_Compound

    token_type = _LiteralPartId(node.parts[0])
    if token_type == Id.Undefined_Tok:
        return Id.Word_Compound  # It's a regular word

    # This is outside the BoolUnary/BoolBinary namespace, but works the same.
    if token_type in (Id.KW_Bang, Id.Lit_DRightBracket):
        return token_type  # special boolean "tokens"

    token_kind = LookupKind(token_type)
    if token_kind in (Kind.BoolUnary, Kind.BoolBinary):
        return token_type  # boolean operators

    return Id.Word_Compound
Exemplo n.º 6
0
    def _Peek(self):
        """Helper method."""
        if self.next_lex_mode is not None:
            self.prev_token = self.cur_token  # for completion
            self.cur_token = self.lexer.Read(self.next_lex_mode)
            self.token_kind = LookupKind(self.cur_token.id)
            self.token_type = self.cur_token.id

            self.next_lex_mode = None
        return self.cur_token
Exemplo n.º 7
0
    def _NextOne(self, lex_mode=lex_mode_e.DBRACKET):
        n = len(self.words)
        if n == 2:
            assert lex_mode == lex_mode_e.DBRACKET
            self.words[0] = self.words[1]
            self.cur_word = self.words[0]
            del self.words[1]
        elif n in (0, 1):
            w = self.w_parser.ReadWord(lex_mode)  # may raise
            if n == 0:
                self.words.append(w)
            else:
                self.words[0] = w
            self.cur_word = w

        assert self.cur_word is not None
        self.op_id = word.BoolId(self.cur_word)
        self.b_kind = LookupKind(self.op_id)
Exemplo n.º 8
0
def KeywordToken(w):
    """Tests if a word is an assignment or control flow word.

  Returns:
    kind, token
  """
    assert w.tag == word_e.CompoundWord

    err = (Kind.Undefined, None)
    if len(w.parts) != 1:
        return err

    token_type = _LiteralPartId(w.parts[0])
    if token_type == Id.Undefined_Tok:
        return err

    token_kind = LookupKind(token_type)
    if token_kind in (Kind.Assign, Kind.ControlFlow):
        return token_kind, w.parts[0].token

    return err
Exemplo n.º 9
0
def CommandId(node):
    if node.tag == word_e.TokenWord:
        return node.token.id

    # Assume it's a CompoundWord
    assert node.tag == word_e.CompoundWord

    # Has to be a single literal part
    if len(node.parts) != 1:
        return Id.Word_Compound

    token_type = _LiteralPartId(node.parts[0])
    if token_type == Id.Undefined_Tok:
        return Id.Word_Compound

    elif token_type in (Id.Lit_LBrace, Id.Lit_RBrace):
        return token_type

    token_kind = LookupKind(token_type)
    if token_kind == Kind.KW:
        return token_type

    return Id.Word_Compound
Exemplo n.º 10
0
    def ParseFactor(self):
        """
    Factor  : WORD
            | UNARY_OP WORD
            | WORD BINARY_OP WORD
            | '(' Expr ')'
    """
        if self.b_kind == Kind.BoolUnary:
            # Just save the type and not the token itself?
            op = self.op_id
            if not self._Next(): return None
            w = self.cur_word
            if not self._Next(): return None
            node = ast.BoolUnary(op, w)
            return node

        if self.b_kind == Kind.Word:
            # Peek ahead another token.
            t2 = self._LookAhead()
            t2_op_id = word.BoolId(t2)
            t2_b_kind = LookupKind(t2_op_id)

            #log('t2 %s / t2_op_id %s / t2_b_kind %s', t2, t2_op_id, t2_b_kind)
            # Redir pun for < and >, -a and -o pun
            if t2_b_kind in (Kind.BoolBinary, Kind.Redir):
                left = self.cur_word

                if not self._Next(): return None
                op = self.op_id

                # TODO: Need to change to lex_mode_e.BASH_REGEX.
                # _Next(lex_mode) then?
                is_regex = t2_op_id == Id.BoolBinary_EqualTilde
                if is_regex:
                    if not self._Next(lex_mode=lex_mode_e.BASH_REGEX):
                        return None
                else:
                    if not self._Next(): return None

                right = self.cur_word
                if is_regex:
                    # TODO: Quoted parts need to be regex-escaped, e.g. [[ $a =~ "{" ]].
                    # I don't think libc has a function to do this.  Escape these
                    # characters:
                    # https://www.gnu.org/software/sed/manual/html_node/ERE-syntax.html0

                    ok, regex_str, unused_quoted = word.StaticEval(right)

                    # doesn't contain $foo, etc.
                    if ok and not libc.regex_parse(regex_str):
                        self.AddErrorContext("Invalid regex: %r" % regex_str,
                                             word=right)
                        return None

                if not self._Next(): return None
                return ast.BoolBinary(op, left, right)
            else:
                # [[ foo ]]
                w = self.cur_word
                if not self._Next(): return None
                return ast.WordTest(w)

        if self.op_id == Id.Op_LParen:
            if not self._Next(): return None
            node = self.ParseExpr()
            if self.op_id != Id.Op_RParen:
                self.AddErrorContext('Expected ), got %s',
                                     self.cur_word,
                                     word=self.cur_word)
                return None
            if not self._Next(): return None
            return node

        # TODO: A proper error, e.g. for [[ && ]] or [[ ]]
        self.AddErrorContext('Unexpected token: %s' % self.cur_word,
                             word=self.cur_word)
        return None
Exemplo n.º 11
0
    def _EvalBracedVarSub(self, part, part_vals, quoted):
        """
    Args:
      part_vals: output param to append to.
    """
        # We have four types of operator that interact.
        #
        # 1. Bracket: value -> (value, bool maybe_decay_array)
        #
        # 2. Then these four cases are mutually exclusive:
        #
        #   a. Prefix length: value -> value
        #   b. Test: value -> part_value[]
        #   c. Other Suffix: value -> value
        #   d. no operator: you have a value
        #
        # That is, we don't have both prefix and suffix operators.
        #
        # 3. Process maybe_decay_array here before returning.

        maybe_decay_array = False  # for $*, ${a[*]}, etc.

        var_name = None  # For ${foo=default}

        # 1. Evaluate from (var_name, var_num, token Id) -> value
        if part.token.id == Id.VSub_Name:
            var_name = part.token.val
            val = self.mem.GetVar(var_name)
            #log('EVAL NAME %s -> %s', var_name, val)

        elif part.token.id == Id.VSub_Number:
            var_num = int(part.token.val)
            val = self._EvalVarNum(var_num)
        else:
            # $* decays
            val, maybe_decay_array = self._EvalSpecialVar(
                part.token.id, quoted)

        # 2. Bracket: value -> (value v, bool maybe_decay_array)
        # maybe_decay_array is for joining ${a[*]} and unquoted ${a[@]} AFTER
        # suffix ops are applied.  If we take the length with a prefix op, the
        # distinction is ignored.
        if part.bracket_op:
            if part.bracket_op.tag == bracket_op_e.WholeArray:
                op_id = part.bracket_op.op_id

                if op_id == Id.Lit_At:
                    if not quoted:
                        maybe_decay_array = True  # ${a[@]} decays but "${a[@]}" doesn't
                    if val.tag == value_e.Undef:
                        val = self._EmptyStrArrayOrError(part.token)
                    elif val.tag == value_e.Str:
                        e_die("Can't index string with @: %r", val, part=part)
                    elif val.tag == value_e.StrArray:
                        # TODO: Is this a no-op?  Just leave 'val' alone.
                        val = runtime.StrArray(val.strs)

                elif op_id == Id.Arith_Star:
                    maybe_decay_array = True  # both ${a[*]} and "${a[*]}" decay
                    if val.tag == value_e.Undef:
                        val = self._EmptyStrArrayOrError(part.token)
                    elif val.tag == value_e.Str:
                        e_die("Can't index string with *: %r", val, part=part)
                    elif val.tag == value_e.StrArray:
                        # TODO: Is this a no-op?  Just leave 'val' alone.
                        # ${a[*]} or "${a[*]}" :  maybe_decay_array is always true
                        val = runtime.StrArray(val.strs)

                else:
                    raise AssertionError(op_id)  # unknown

            elif part.bracket_op.tag == bracket_op_e.ArrayIndex:
                anode = part.bracket_op.expr

                if val.tag == value_e.Undef:
                    pass  # it will be checked later

                elif val.tag == value_e.Str:
                    # Bash treats any string as an array, so we can't add our own
                    # behavior here without making valid OSH invalid bash.
                    e_die("Can't index string %r with integer",
                          part.token.val,
                          token=part.token)

                elif val.tag == value_e.StrArray:
                    index = self.arith_ev.Eval(anode)
                    try:
                        # could be None because representation is sparse
                        s = val.strs[index]
                    except IndexError:
                        s = None

                    if s is None:
                        val = runtime.Undef()
                    else:
                        val = runtime.Str(s)

                elif val.tag == value_e.AssocArray:
                    key = self.arith_ev.Eval(anode, int_coerce=False)
                    try:
                        val = runtime.Str(val.d[key])
                    except KeyError:
                        val = runtime.Undef()

                else:
                    raise AssertionError(val.__class__.__name__)

            else:
                raise AssertionError(part.bracket_op.tag)

        if part.prefix_op:
            val = self._EmptyStrOrError(val)  # maybe error
            val = self._ApplyPrefixOp(val, part.prefix_op)
            # NOTE: When applying the length operator, we can't have a test or
            # suffix afterward.  And we don't want to decay the array

        elif part.suffix_op:
            op = part.suffix_op
            if op.tag == suffix_op_e.StringNullary:
                if op.op_id == Id.VOp0_P:
                    # TODO: Use dependency injection
                    #val = self.prompt._EvalPS1(val)
                    prompt = ui.PROMPT.EvalPrompt(val)
                    val = runtime.Str(prompt)
                else:
                    raise NotImplementedError(op.op_id)

            elif op.tag == suffix_op_e.StringUnary:
                if LookupKind(part.suffix_op.op_id) == Kind.VTest:
                    # TODO: Change style to:
                    # if self._ApplyTestOp(...)
                    #   return
                    # It should return whether anything was done.  If not, we continue to
                    # the end, where we might throw an error.

                    assign_part_vals, effect = self._ApplyTestOp(
                        val, part.suffix_op, quoted, part_vals)

                    # NOTE: Splicing part_values is necessary because of code like
                    # ${undef:-'a b' c 'd # e'}.  Each part_value can have a different
                    # do_glob/do_elide setting.
                    if effect == effect_e.SpliceParts:
                        return  # EARLY RETURN, part_vals mutated

                    elif effect == effect_e.SpliceAndAssign:
                        if var_name is None:
                            # TODO: error context
                            e_die("Can't assign to special variable")
                        else:
                            # NOTE: This decays arrays too!  'set -o strict_array' could
                            # avoid it.
                            rhs_str = _DecayPartValuesToString(
                                assign_part_vals, self.splitter.GetJoinChar())
                            state.SetLocalString(self.mem, var_name, rhs_str)
                        return  # EARLY RETURN, part_vals mutated

                    elif effect == effect_e.Error:
                        raise NotImplementedError

                    else:
                        # The old one
                        #val = self._EmptyStringPartOrError(part_val, quoted)
                        pass  # do nothing, may still be undefined

                else:
                    val = self._EmptyStrOrError(val)  # maybe error
                    # Other suffix: value -> value
                    val = self._ApplyUnarySuffixOp(val, part.suffix_op)

            elif op.tag == suffix_op_e.PatSub:  # PatSub, vectorized
                val = self._EmptyStrOrError(val)  # ${undef//x/y}

                pat_val = self.EvalWordToString(op.pat, do_fnmatch=True)
                assert pat_val.tag == value_e.Str, pat_val

                if op.replace:
                    replace_val = self.EvalWordToString(op.replace,
                                                        do_fnmatch=True)
                    assert replace_val.tag == value_e.Str, replace_val
                    replace_str = replace_val.s
                else:
                    replace_str = ''

                # Either GlobReplacer or ConstStringReplacer
                replacer = libstr.MakeReplacer(pat_val.s, replace_str,
                                               op.spids[0])

                if val.tag == value_e.Str:
                    s = replacer.Replace(val.s, op)
                    val = runtime.Str(s)

                elif val.tag == value_e.StrArray:
                    strs = []
                    for s in val.strs:
                        if s is not None:
                            strs.append(replacer.Replace(s, op))
                    val = runtime.StrArray(strs)

                else:
                    raise AssertionError(val.__class__.__name__)

            elif op.tag == suffix_op_e.Slice:
                val = self._EmptyStrOrError(val)  # ${undef:3:1}

                if op.begin:
                    begin = self.arith_ev.Eval(op.begin)
                else:
                    begin = 0

                if op.length:
                    length = self.arith_ev.Eval(op.length)
                else:
                    length = None

                if val.tag == value_e.Str:  # Slice UTF-8 characters in a string.
                    s = val.s

                    try:
                        if begin < 0:
                            # It could be negative if we compute unicode length, but that's
                            # confusing.

                            # TODO: Instead of attributing it to the word part, it would be
                            # better if we attributed it to arith_expr begin.
                            raise util.InvalidSlice(
                                "The start index of a string slice can't be negative: %d",
                                begin,
                                part=part)

                        byte_begin = libstr.AdvanceUtf8Chars(s, begin, 0)

                        if length is None:
                            byte_end = len(s)
                        else:
                            if length < 0:
                                # TODO: Instead of attributing it to the word part, it would be
                                # better if we attributed it to arith_expr begin.
                                raise util.InvalidSlice(
                                    "The length of a string slice can't be negative: %d",
                                    length,
                                    part=part)

                            byte_end = libstr.AdvanceUtf8Chars(
                                s, length, byte_begin)

                    except (util.InvalidSlice, util.InvalidUtf8) as e:
                        if self.exec_opts.strict_word_eval:
                            raise
                        else:
                            # TODO:
                            # - We don't see the error location here, but we see it when set
                            #   -o strict-word-eval.
                            # - Doesn't make the command exit with 1.  It just sets the word
                            #   to empty string.
                            util.warn(e.UserErrorString())
                            substr = ''  # error condition
                    else:
                        substr = s[byte_begin:byte_end]

                    val = runtime.Str(substr)

                elif val.tag == value_e.StrArray:  # Slice array entries.
                    # NOTE: unset elements don't count towards the length.
                    strs = []
                    for s in val.strs[begin:]:
                        if s is not None:
                            strs.append(s)
                            if len(
                                    strs
                            ) == length:  # never true for unspecified length
                                break
                    val = runtime.StrArray(strs)

                else:
                    raise AssertionError(
                        val.__class__.__name__)  # Not possible

        # After applying suffixes, process maybe_decay_array here.
        if maybe_decay_array and val.tag == value_e.StrArray:
            val = self._DecayArray(val)

        # For the case where there are no prefix or suffix ops.
        val = self._EmptyStrOrError(val)

        # For example, ${a} evaluates to value_t.Str(), but we want a
        # part_value.StringPartValue.
        part_val = _ValueToPartValue(val, quoted)
        part_vals.append(part_val)
Exemplo n.º 12
0
 def testDBracketState(self):
     lexer = _InitLexer('-z foo')
     t = lexer.Read(lex_mode_e.DBRACKET)
     self.assertTokensEqual(ast.token(Id.BoolUnary_z, '-z'), t)
     self.assertEqual(Kind.BoolUnary, LookupKind(t.id))
Exemplo n.º 13
0
  def _EvalBracedVarSub(self, part, part_vals, quoted):
    """
    Args:
      part_vals: output param to append to.
    """
    # We have four types of operator that interact.
    #
    # 1. Bracket: value -> (value, bool maybe_decay_array)
    #
    # 2. Then these four cases are mutually exclusive:
    #
    #   a. Prefix length: value -> value
    #   b. Test: value -> part_value[]
    #   c. Other Suffix: value -> value
    #   d. no operator: you have a value
    #
    # That is, we don't have both prefix and suffix operators.
    #
    # 3. Process maybe_decay_array here before returning.

    maybe_decay_array = False  # for $*, ${a[*]}, etc.

    var_name = None  # For ${foo=default}

    # 1. Evaluate from (var_name, var_num, token Id) -> value
    if part.token.id == Id.VSub_Name:
      var_name = part.token.val
      val = self.mem.GetVar(var_name)
      #log('EVAL NAME %s -> %s', var_name, val)

    elif part.token.id == Id.VSub_Number:
      var_num = int(part.token.val)
      val = self._EvalVarNum(var_num)
    else:
      # $* decays
      val, maybe_decay_array = self._EvalSpecialVar(part.token.id, quoted)

    # 2. Bracket: value -> (value v, bool maybe_decay_array)
    # maybe_decay_array is for joining ${a[*]} and unquoted ${a[@]} AFTER suffix ops
    # are applied.  If we take the length with a prefix op, the distinction is
    # ignored.
    if part.bracket_op:
      if part.bracket_op.tag == bracket_op_e.WholeArray:
        op_id = part.bracket_op.op_id

        if op_id == Id.Lit_At:
          if not quoted:
            maybe_decay_array = True  # ${a[@]} decays but "${a[@]}" doesn't
          if val.tag == value_e.Undef:
            val = self._EmptyStrArrayOrError(part.token)
          elif val.tag == value_e.Str:
            e_die("Can't index string with @: %r", val, part=part)
          elif val.tag == value_e.StrArray:
            val = runtime.StrArray(val.strs)

        elif op_id == Id.Arith_Star:
          maybe_decay_array = True  # both ${a[*]} and "${a[*]}" decay
          if val.tag == value_e.Undef:
            val = self._EmptyStrArrayOrError(part.token)
          elif val.tag == value_e.Str:
            e_die("Can't index string with *: %r", val, part=part)
          elif val.tag == value_e.StrArray:
            # Always maybe_decay_array with ${a[*]} or "${a[*]}"
            val = runtime.StrArray(val.strs)

        else:
          raise AssertionError(op_id)  # unknown

      elif part.bracket_op.tag == bracket_op_e.ArrayIndex:
        anode = part.bracket_op.expr
        index = self.arith_ev.Eval(anode)

        if val.tag == value_e.Undef:
          pass  # it will be checked later
        elif val.tag == value_e.Str:
          # TODO: Implement this as an extension. Requires unicode support.
          # Bash treats it as an array.
          e_die("Can't index string %r with integer", part.token.val)
        elif val.tag == value_e.StrArray:
          try:
            s = val.strs[index]
          except IndexError:
            s = None

          if s is None:
            val = runtime.Undef()
          else:
            val = runtime.Str(s)

      else:
        raise AssertionError(part.bracket_op.tag)

    if part.prefix_op:
      val = self._EmptyStrOrError(val)  # maybe error
      val = self._ApplyPrefixOp(val, part.prefix_op)
      # NOTE: When applying the length operator, we can't have a test or
      # suffix afterward.  And we don't want to decay the array

    elif part.suffix_op:
      op = part.suffix_op
      if op.tag == suffix_op_e.StringUnary:
        if LookupKind(part.suffix_op.op_id) == Kind.VTest:
          # TODO: Change style to:
          # if self._ApplyTestOp(...)
          #   return
          # It should return whether anything was done.  If not, we continue to
          # the end, where we might throw an error.

          assign_part_vals, effect = self._ApplyTestOp(val, part.suffix_op,
                                                       quoted, part_vals)

          # NOTE: Splicing part_values is necessary because of code like
          # ${undef:-'a b' c 'd # e'}.  Each part_value can have a different
          # do_glob/do_elide setting.
          if effect == effect_e.SpliceParts:
            return  # EARLY RETURN, part_vals mutated

          elif effect == effect_e.SpliceAndAssign:
            if var_name is None:
              # TODO: error context
              e_die("Can't assign to special variable")
            else:
              # NOTE: This decays arrays too!  'set -o strict_array' could
              # avoid it.
              rhs_str = _DecayPartValuesToString(assign_part_vals,
                                                 self.splitter.GetJoinChar())
              state.SetLocalString(self.mem, var_name, rhs_str)
            return  # EARLY RETURN, part_vals mutated

          elif effect == effect_e.Error:
            raise NotImplementedError

          else:
            # The old one
            #val = self._EmptyStringPartOrError(part_val, quoted)
            pass  # do nothing, may still be undefined

        else:
          val = self._EmptyStrOrError(val)  # maybe error
          # Other suffix: value -> value
          val = self._ApplyUnarySuffixOp(val, part.suffix_op)

      elif op.tag == suffix_op_e.PatSub:  # PatSub, vectorized
        val = self._EmptyStrOrError(val)

        pat_val = self.EvalWordToString(op.pat, do_fnmatch=True)
        assert pat_val.tag == value_e.Str, pat_val

        if op.replace:
          replace_val = self.EvalWordToString(op.replace, do_fnmatch=True)
          assert replace_val.tag == value_e.Str, replace_val
          replace_str = replace_val.s
        else:
          replace_str = ''

        pat = pat_val.s
        if val.tag == value_e.Str:
          s = libstr.PatSub(val.s, op, pat, replace_str)
          val = runtime.Str(s)

        elif val.tag == value_e.StrArray:
          strs = []
          for s in val.strs:
            if s is not None:
              strs.append(libstr.PatSub(s, op, pat, replace_str))
          val = runtime.StrArray(strs)

        else:
          raise AssertionError(val.__class__.__name__)

      elif op.tag == suffix_op_e.Slice:
        # NOTE: The beginning can be negative, but Python handles this.  Might
        # want to make it explicit.
        # TODO: Check out of bounds errors?  begin > end?
        if op.begin:
          begin = self.arith_ev.Eval(op.begin)
        else:
          begin = 0

        if op.length:
          length = self.arith_ev.Eval(op.length)
          end = begin + length
        else:
          length = None
          end = None  # Python supports None as the end

        if val.tag == value_e.Str:  # Slice characters in a string.
          # TODO: Need to support unicode?  Write spec # tests.
          val = runtime.Str(val.s[begin : end])

        elif val.tag == value_e.StrArray:  # Slice array entries.
          # NOTE: unset elements don't count towards the length
          strs = []
          for s in val.strs[begin:]:
            if s is not None:
              strs.append(s)
              if len(strs) == length: # never true for unspecified length
                break
          val = runtime.StrArray(strs)

        else:
          raise AssertionError(val.__class__.__name__)

    # After applying suffixes, process maybe_decay_array here.
    if maybe_decay_array and val.tag == value_e.StrArray:
      val = self._DecayArray(val)

    # No prefix or suffix ops
    val = self._EmptyStrOrError(val)

    # For example, ${a} evaluates to value_t.Str(), but we want a
    # part_value.StringPartValue.
    part_val = _ValueToPartValue(val, quoted)
    part_vals.append(part_val)
Exemplo n.º 14
0
def main(argv):
    try:
        action = argv[1]
    except IndexError:
        raise RuntimeError('Action required')

    if action == 'c':
        ids = list(ID_SPEC.token_names.iteritems())
        ids.sort(key=lambda pair: pair[0])  # Sort by ID
        for i, name in ids:
            print('#define id__%s %s' % (name, i))

    elif action == 'cpp':
        # For blog post
        try:
            labels = argv[2]
        except IndexError:
            label_lines = []
        else:
            with open(labels) as f:
                label_lines = f.readlines()

        from collections import defaultdict

        id_by_kind_index = defaultdict(list)  # Kind name -> [list of Id names]
        for name in dir(Id):
            if name[0].isupper():
                id_ = getattr(Id, name)
                kind_index = LookupKind(id_)
                id_by_kind_index[kind_index].append(name)

        kinds = []
        for name in dir(Kind):
            if name[0].isupper():
                kind_index = getattr(Kind, name)
                #print(kind, name)
                kinds.append(
                    (name, kind_index, len(id_by_kind_index[kind_index])))

        # Sort descending by length of ID list
        kinds = sorted(kinds, key=lambda p: p[2], reverse=True)

        id_labels = {}  # Id name -> integer
        kind_labels = {}  # Kind name -> integer

        for k, line in enumerate(label_lines):  # descending order by kind size

            parts = line.split()
            id_list_len, _, actual_len, _, kind_label, _ = parts[:6]
            id_list_len = int(id_list_len)
            kind_label = int(kind_label)
            id_list = [int(id_) for id_ in parts[6:]]

            try:
                kind_name, kind_index, len_id_list = kinds[k]
            except IndexError:
                break
            kind_labels[kind_name] = kind_label

            id_names = id_by_kind_index[kind_index]
            #print(id_names)
            for i, name in enumerate(id_names):
                try:
                    id_labels[name] = id_list[i]
                except IndexError:
                    raise RuntimeError('%s %s' % (name, i))

        if 0:  # disable labeling
            id_labels = None
            kind_labels = None

        kind_names = [k[0] for k in kinds]

        id_names = []
        for _, kind_index, _ in kinds:
            n = id_by_kind_index[kind_index]
            id_names.append(n)

        GenCppCode(kind_names,
                   id_names,
                   sys.stdout,
                   id_labels=id_labels,
                   kind_labels=kind_labels)

    else:
        raise RuntimeError('Invalid action %r' % action)
Exemplo n.º 15
0
    def ParseFactor(self):
        """
    Factor  : WORD
            | UNARY_OP WORD
            | WORD BINARY_OP WORD
            | '(' Expr ')'
    """
        if self.b_kind == Kind.BoolUnary:
            # Just save the type and not the token itself?
            op = self.op_id
            self._Next()
            w = self.cur_word
            # e.g. [[ -f < ]].  But [[ -f '<' ]] is OK
            if w.tag not in (word_e.CompoundWord, word_e.StringWord):
                p_die('Invalid argument to unary operator', word=w)
            self._Next()
            node = ast.BoolUnary(op, w)
            return node

        if self.b_kind == Kind.Word:
            # Peek ahead another token.
            t2 = self._LookAhead()
            t2_op_id = word.BoolId(t2)
            t2_b_kind = LookupKind(t2_op_id)

            #log('t2 %s / t2_op_id %s / t2_b_kind %s', t2, t2_op_id, t2_b_kind)
            # Redir pun for < and >, -a and -o pun
            if t2_b_kind in (Kind.BoolBinary, Kind.Redir):
                left = self.cur_word

                self._Next()
                op = self.op_id

                # TODO: Need to change to lex_mode_e.BASH_REGEX.
                # _Next(lex_mode) then?
                is_regex = t2_op_id == Id.BoolBinary_EqualTilde
                if is_regex:
                    self._Next(lex_mode=lex_mode_e.BASH_REGEX)
                else:
                    self._Next()

                right = self.cur_word
                if is_regex:
                    # NOTE: StaticEval for checking regex syntax isn't enough.  We could
                    # need to pass do_ere so that the quoted parts get escaped.
                    #ok, s, unused_quoted = word.StaticEval(right)
                    pass

                self._Next()
                return ast.BoolBinary(op, left, right)
            else:
                # [[ foo ]]
                w = self.cur_word
                self._Next()
                return ast.WordTest(w)

        if self.op_id == Id.Op_LParen:
            self._Next()
            node = self.ParseExpr()
            if self.op_id != Id.Op_RParen:
                p_die('Expected ), got %s', self.cur_word, word=self.cur_word)
            self._Next()
            return node

        # It's not WORD, UNARY_OP, or '('
        p_die('Unexpected token in boolean expression', word=self.cur_word)