Пример #1
0
  def testVarOps(self):
    ev = InitEvaluator()  # initializes x=xxx and y=yyy
    unset_sub = word_part.BracedVarSub(token(Id.VSub_Name, 'unset'))
    part_vals = []
    ev._EvalWordPart(unset_sub, part_vals)
    print(part_vals)

    set_sub = word_part.BracedVarSub(token(Id.VSub_Name, 'x'))
    part_vals = []
    ev._EvalWordPart(set_sub, part_vals)
    print(part_vals)

    # Now add some ops
    part = word_part.Literal(token(Id.Lit_Chars, 'default'))
    arg_word = word.Compound([part])
    test_op = suffix_op.Unary(Id.VTest_ColonHyphen, arg_word)
    unset_sub.suffix_op = test_op
    set_sub.suffix_op = test_op

    part_vals = []
    ev._EvalWordPart(unset_sub, part_vals)
    print(part_vals)

    part_vals = []
    ev._EvalWordPart(set_sub, part_vals)
    print(part_vals)
Пример #2
0
  def _ReadLikeDQ(self, left_dq_token, out_parts):
    # type: (Optional[token], List[word_part_t]) -> None
    """
    Args:
      left_dq_token: A token if we are reading a double quoted part, or None if
        we're reading a here doc.
      out_parts: list of word_part to append to
    """
    done = False
    while not done:
      self._Next(lex_mode_e.DQ)
      self._Peek()

      if self.token_kind == Kind.Lit:
        if self.token_type == Id.Lit_EscapedChar:
          part = word_part.EscapedLiteral(self.cur_token)  # type: word_part_t
        else:
          part = word_part.Literal(self.cur_token)
        out_parts.append(part)

      elif self.token_kind == Kind.Left:
        part = self._ReadDoubleQuotedLeftParts()
        out_parts.append(part)

      elif self.token_kind == Kind.VSub:
        part = simple_var_sub(self.cur_token)
        out_parts.append(part)
        # NOTE: parsing "$f(x)" would BREAK CODE.  Could add a more for it
        # later.

      elif self.token_kind == Kind.Right:
        assert self.token_type == Id.Right_DoubleQuote, self.token_type
        if left_dq_token:
          done = True
        else:
          # In a here doc, the right quote is literal!
          out_parts.append(word_part.Literal(self.cur_token))

      elif self.token_kind == Kind.Eof:
        if left_dq_token:
          p_die('Unexpected EOF reading double-quoted string that began here',
                token=left_dq_token)
        else:  # here docs will have an EOF in their token stream
          done = True

      else:
        raise AssertionError(self.cur_token)
Пример #3
0
  def _ReadPatSubVarOp(self, lex_mode):
    # type: (lex_mode_t) -> suffix_op__PatSub
    """
    Match     = ('/' | '#' | '%') WORD
    VarSub    = ...
              | VarOf '/' Match '/' WORD
    """
    pat = self._ReadVarOpArg(lex_mode, eof_type=Id.Lit_Slash, empty_ok=False)
    assert isinstance(pat, word__Compound)  # Because empty_ok=False

    if len(pat.parts) == 1:
      ok, s, quoted = word_.StaticEval(pat)
      if ok and s == '/' and not quoted:  # Looks like ${a////c}, read again
        self._Next(lex_mode)
        self._Peek()
        p = word_part.Literal(self.cur_token)
        pat.parts.append(p)

    if len(pat.parts) == 0:
      p_die('Pattern in ${x/pat/replace} must not be empty',
            token=self.cur_token)

    replace_mode = Id.Undefined_Tok
    # Check for / # % modifier on pattern.
    first_part = pat.parts[0]
    if isinstance(first_part, word_part__Literal):
      lit_id = first_part.token.id
      if lit_id in (Id.Lit_Slash, Id.Lit_Pound, Id.Lit_Percent):
        pat.parts.pop(0)
        replace_mode = lit_id

    # NOTE: If there is a modifier, the pattern can be empty, e.g.
    # ${s/#/foo} and ${a/%/foo}.

    if self.token_type == Id.Right_DollarBrace:
      # e.g. ${v/a} is the same as ${v/a/}  -- empty replacement string
      return suffix_op.PatSub(pat, None, replace_mode)

    if self.token_type == Id.Lit_Slash:
      replace = self._ReadVarOpArg(lex_mode)  # do not stop at /

      self._Peek()
      if self.token_type != Id.Right_DollarBrace:
        # NOTE: I think this never happens.
        # We're either in the VS_ARG_UNQ or VS_ARG_DQ lex state, and everything
        # there is Lit_ or Left_, except for }.
        p_die("Expected } after replacement string, got %s", self.cur_token,
              token=self.cur_token)

      return suffix_op.PatSub(pat, replace, replace_mode)

    # Happens with ${x//} and ${x///foo}, see test/parse-errors.sh
    p_die("Expected } after pat sub, got %r", self.cur_token.val,
          token=self.cur_token)
Пример #4
0
def _ExpandPart(
        parts,  # type: List[word_part_t]
        first_alt_index,  # type: int
        suffixes,  # type: List[List[word_part_t]]
):
    # type: (...) -> List[List[word_part_t]]
    """Mutually recursive with _BraceExpand.

  Args:
    parts: input parts
    first_alt_index: index of the first BracedTuple
    suffixes: List of suffixes to append.
  """
    out = []

    prefix = parts[:first_alt_index]
    expand_part = parts[first_alt_index]

    if isinstance(expand_part, word_part__BracedTuple):
        # Call _BraceExpand on each of the inner words too!
        expanded_alts = []  # type: List[List[word_part_t]]
        for w in expand_part.words:
            assert isinstance(w, word__Compound)  # for MyPy
            expanded_alts.extend(_BraceExpand(w.parts))

        for alt_parts in expanded_alts:
            for suffix in suffixes:
                out_parts = []  # type: List[word_part_t]
                out_parts.extend(prefix)
                out_parts.extend(alt_parts)
                out_parts.extend(suffix)
                out.append(out_parts)

    elif isinstance(expand_part, word_part__BracedRange):
        # Not mutually recursive with _BraceExpand
        strs = _RangeStrings(expand_part)
        for s in strs:
            for suffix in suffixes:
                out_parts_ = []  # type: List[word_part_t]
                out_parts_.extend(prefix)
                # Preserve span_id from the original
                t = token(Id.Lit_Chars, s, expand_part.spids[0])
                out_parts_.append(word_part.Literal(t))
                out_parts_.extend(suffix)
                out.append(out_parts_)

    else:
        raise AssertionError

    return out
Пример #5
0
  def testMultiLine(self):
    w_parser = test_lib.InitWordParser("""\
ls foo

# Multiple newlines and comments should be ignored

ls bar
""")
    print('--MULTI')
    w = w_parser.ReadWord(lex_mode_e.ShCommand)
    parts = [word_part.Literal(token(Id.Lit_Chars, 'ls'))]
    test_lib.AssertAsdlEqual(self, word.Compound(parts), w)

    w = w_parser.ReadWord(lex_mode_e.ShCommand)
    parts = [word_part.Literal(token(Id.Lit_Chars, 'foo'))]
    test_lib.AssertAsdlEqual(self, word.Compound(parts), w)

    w = w_parser.ReadWord(lex_mode_e.ShCommand)
    t = token(Id.Op_Newline, '\n')
    test_lib.AssertAsdlEqual(self, word.Token(t), w)

    w = w_parser.ReadWord(lex_mode_e.ShCommand)
    parts = [word_part.Literal(token(Id.Lit_Chars, 'ls'))]
    test_lib.AssertAsdlEqual(self, word.Compound(parts), w)

    w = w_parser.ReadWord(lex_mode_e.ShCommand)
    parts = [word_part.Literal(token(Id.Lit_Chars, 'bar'))]
    test_lib.AssertAsdlEqual(self, word.Compound(parts), w)

    w = w_parser.ReadWord(lex_mode_e.ShCommand)
    t = token(Id.Op_Newline, '\n')
    test_lib.AssertAsdlEqual(self, word.Token(t), w)

    w = w_parser.ReadWord(lex_mode_e.ShCommand)
    t = token(Id.Eof_Real, '')
    test_lib.AssertAsdlEqual(self, word.Token(t), w)
Пример #6
0
def ErrorWord(fmt, err):
    # type: (str, _ErrorWithLocation) -> word__Compound
    error_str = fmt % err.UserErrorString()
    t = token(Id.Lit_Chars, error_str, const.NO_INTEGER)
    return word.Compound([word_part.Literal(t)])
Пример #7
0
    def Expr(self, pnode):
        # type: (PNode) -> expr_t
        """Transform expressions (as opposed to statements)."""
        typ = pnode.typ
        tok = pnode.tok
        children = pnode.children

        if ISNONTERMINAL(typ):
            c = '-' if not children else len(children)
            #log('non-terminal %s %s', nt_name, c)

            if typ == grammar_nt.oil_expr:  # for if/while
                # oil_expr: '(' testlist ')'
                return self.Expr(children[1])

            if typ == grammar_nt.return_expr:  # for if/while
                # return_expr: testlist end_stmt
                return self.Expr(children[0])

            if typ == grammar_nt.lvalue_list:
                return self._AssocBinary(children)

            if typ == grammar_nt.atom:
                return self.atom(children)

            if typ == grammar_nt.eval_input:
                # testlist_input: testlist NEWLINE* ENDMARKER
                return self.Expr(children[0])

            if typ == grammar_nt.testlist:
                # testlist: test (',' test)* [',']
                return self._AssocBinary(children)

            elif typ == grammar_nt.arith_expr:
                # expr: term (('+'|'-') term)*
                return self._AssocBinary(children)

            elif typ == grammar_nt.term:
                # term: factor (('*'|'/'|'div'|'mod') factor)*
                return self._AssocBinary(children)

            elif typ == grammar_nt.expr:
                # expr: xor_expr ('|' xor_expr)*
                return self._AssocBinary(children)

            elif typ == grammar_nt.shift_expr:
                # shift_expr: arith_expr (('<<'|'>>') arith_expr)*
                return self._AssocBinary(children)

            elif typ == grammar_nt.comparison:
                # comparison: expr (comp_op expr)*
                return self._AssocBinary(children)

            elif typ == grammar_nt.factor:
                # factor: ('+'|'-'|'~') factor | power
                # the power would have already been reduced
                assert len(children) == 2, children
                op, e = children
                assert isinstance(op.tok, token)
                return expr.Unary(op.tok, self.Expr(e))

            elif typ == grammar_nt.atom_expr:
                # atom_expr: ['await'] atom trailer*

                # NOTE: This would be shorter in a recursive style.
                base = self.Expr(children[0])
                n = len(children)
                for i in xrange(1, n):
                    pnode = children[i]
                    tok = pnode.tok
                    base = self.trailer(base, pnode)

                return base

            elif typ == grammar_nt.power:
                # power: atom_expr ['^' factor]

                # This doesn't repeat, so it doesn't matter if it's left or right
                # associative.
                return self._AssocBinary(children)

            elif typ == grammar_nt.array_literal:
                left_tok = children[0].tok

                # Approximation for now.
                tokens = [
                    pnode.tok for pnode in children[1:-1]
                    if pnode.tok.id == Id.Lit_Chars
                ]
                items = [expr.Const(t) for t in tokens]  # type: List[expr_t]
                return expr.ArrayLiteral(left_tok, items)

            elif typ == grammar_nt.sh_array_literal:
                left_tok = children[0].tok

                # HACK: When typ is Id.Expr_WordsDummy, the 'tok' field ('opaque')
                # actually has a list of words!
                typ1 = children[1].typ
                assert typ1 == Id.Expr_WordsDummy.enum_id, typ1
                array_words = cast('List[word_t]', children[1].tok)

                return expr.ShellArrayLiteral(left_tok, array_words)

            elif typ == grammar_nt.regex_literal:
                left_tok = children[0].tok

                # Approximation for now.
                tokens = [
                    pnode.tok for pnode in children[1:-1]
                    if pnode.tok.id == Id.Expr_Name
                ]
                parts = [regex.Var(t) for t in tokens]  # type: List[regex_t]

                return expr.RegexLiteral(left_tok, regex.Concat(parts))

            elif typ == grammar_nt.command_sub:
                left_tok = children[0].tok

                # Approximation for now.
                tokens = [
                    pnode.tok for pnode in children[1:-1]
                    if pnode.tok.id == Id.Lit_Chars
                ]
                words = [
                    word.Compound([word_part.Literal(t)]) for t in tokens
                ]  # type: List[word_t]
                return expr.CommandSub(left_tok, command.Simple(words))

            elif typ == grammar_nt.sh_command_sub:
                left_tok = children[0].tok

                # HACK: When typ is Id.Expr_CommandDummy, the 'tok' field ('opaque')
                # actually has a word_part.CommandSub!
                typ1 = children[1].typ
                assert typ1 == Id.Expr_CommandDummy.enum_id, typ1
                cs_part = cast(word_part__CommandSub, children[1].tok)

                # Awkward: the schemas are different
                expr_part = expr.CommandSub(cs_part.left_token,
                                            cs_part.command_list)
                expr_part.spids.extend(cs_part.spids)
                return expr_part

            elif typ == grammar_nt.var_sub:
                left_tok = children[0].tok

                return expr.VarSub(left_tok, self.Expr(children[1]))

            elif typ == grammar_nt.dq_string:
                left_tok = children[0].tok

                tokens = [
                    pnode.tok for pnode in children[1:-1]
                    if pnode.tok.id == Id.Lit_Chars
                ]
                parts2 = [word_part.Literal(t)
                          for t in tokens]  # type: List[word_part_t]
                return expr.DoubleQuoted(left_tok, parts2)

            else:
                nt_name = self.number2symbol[typ]
                raise AssertionError("PNode type %d (%s) wasn't handled" %
                                     (typ, nt_name))

        else:  # Terminals should have a token
            #log('terminal %s', tok)

            if tok.id == Id.Expr_Name:
                return expr.Var(tok)
            elif tok.id == Id.Expr_Digits:
                return expr.Const(tok)

            else:
                raise AssertionError(tok.id)
Пример #8
0
  def _ReadCompoundWord(self, eof_type=Id.Undefined_Tok,
                        lex_mode=lex_mode_e.ShCommand, empty_ok=True):
    # type: (Id_t, lex_mode_t, bool) -> word__Compound
    """
    Precondition: Looking at the first token of the first word part
    Postcondition: Looking at the token after, e.g. space or operator

    NOTE: eof_type is necessary because / is a literal, i.e. Lit_Slash, but it
    could be an operator delimiting a compound word.  Can we change lexer modes
    and remove this special case?
    """
    w = word.Compound()
    num_parts = 0
    brace_count = 0
    done = False
    while not done:
      self._Peek()

      allow_done = empty_ok or num_parts != 0
      if allow_done and self.token_type == eof_type:
        done = True  # e.g. for ${foo//pat/replace}

      # Keywords like "for" are treated like literals
      elif self.token_kind in (
          Kind.Lit, Kind.History, Kind.KW, Kind.ControlFlow,
          Kind.BoolUnary, Kind.BoolBinary):
        if self.token_type == Id.Lit_EscapedChar:
          part = word_part.EscapedLiteral(self.cur_token)  # type: word_part_t
        else:
          part = word_part.Literal(self.cur_token)

        if self.token_type == Id.Lit_VarLike and num_parts == 0:  # foo=
          w.parts.append(part)
          # Unfortunately it's awkward to pull the check for a=(1 2) up to
          # _ReadWord.
          t = self.lexer.LookAhead(lex_mode_e.ShCommand)
          if t.id == Id.Op_LParen:
            self.lexer.PushHint(Id.Op_RParen, Id.Right_ShArrayLiteral)
            part2 = self._ReadArrayLiteral()
            w.parts.append(part2)

            # Array literal must be the last part of the word.
            self._Next(lex_mode)
            self._Peek()
            # EOF, whitespace, newline, Right_Subshell
            if self.token_kind not in self.KINDS_THAT_END_WORDS:
              p_die('Unexpected token after array literal',
                    token=self.cur_token)
            done = True

        elif (self.parse_opts.at and self.token_type == Id.Lit_Splice and
              num_parts == 0):

          splice_token = self.cur_token

          t = self.lexer.LookAhead(lex_mode_e.ShCommand)
          if t.id == Id.Op_LParen:  # @arrayfunc(x)
            arglist = arg_list()
            self._ParseCallArguments(arglist)
            part = word_part.FuncCall(splice_token, arglist)
          else:
            part = word_part.Splice(splice_token)

          w.parts.append(part)

          # @words or @arrayfunc() must be the last part of the word
          self._Next(lex_mode)
          self._Peek()
          # EOF, whitespace, newline, Right_Subshell
          if self.token_kind not in self.KINDS_THAT_END_WORDS:
            p_die('Unexpected token after array splice',
                  token=self.cur_token)
          done = True

        else:
          # Syntax error for { and }
          if self.token_type == Id.Lit_LBrace:
            brace_count += 1
          elif self.token_type == Id.Lit_RBrace:
            brace_count -= 1

          # not a literal with lookahead; append it
          w.parts.append(part)

      elif self.token_kind == Kind.VSub:
        vsub_token = self.cur_token

        part = simple_var_sub(vsub_token)
        if self.token_type == Id.VSub_DollarName:
          # Look ahead for $strfunc(x)
          #   $f(x) or --name=$f(x) is allowed
          #   but "--name=$f(x)" not allowed?  This would BREAK EXISTING CODE.
          #   It would need a parse option.

          t = self.lexer.LookAhead(lex_mode_e.ShCommand)
          if t.id == Id.Op_LParen:
            arglist = arg_list()
            self._ParseCallArguments(arglist)
            part = word_part.FuncCall(vsub_token, arglist)

            # Unlike @arrayfunc(x), it makes sense to allow $f(1)$f(2)
            # var a = f(1); var b = f(2); echo $a$b
            # It's consistent with other uses of $.

        w.parts.append(part)

      elif self.token_kind == Kind.ExtGlob:
        part = self._ReadExtGlob()
        w.parts.append(part)

      elif self.token_kind == Kind.Left:
        part = self._ReadLeftParts()
        w.parts.append(part)

      # NOT done yet, will advance below
      elif self.token_kind == Kind.Right:
        # Still part of the word; will be done on the next iter.
        if self.token_type == Id.Right_DoubleQuote:
          pass
        # Never happens, no PushHint for this case.
        #elif self.token_type == Id.Right_DollarParen:
        #  pass
        elif self.token_type == Id.Right_Subshell:
          # LEXER HACK for (case x in x) ;; esac )
          assert self.next_lex_mode is None  # Rewind before it's used
          if self.lexer.MaybeUnreadOne():
            self.lexer.PushHint(Id.Op_RParen, Id.Right_Subshell)
            self._Next(lex_mode)
          done = True
        else:
          done = True

      elif self.token_kind == Kind.Ignored:
        done = True

      else:
        # LEXER HACK for unbalanced case clause.  'case foo in esac' is valid,
        # so to test for ESAC, we can read ) before getting a chance to
        # PushHint(Id.Op_RParen, Id.Right_CasePat).  So here we unread one
        # token and do it again.

        # We get Id.Op_RParen at top level:      case x in x) ;; esac
        # We get Id.Eof_RParen inside ComSub:  $(case x in x) ;; esac )
        if self.token_type in (Id.Op_RParen, Id.Eof_RParen):
          assert self.next_lex_mode is None  # Rewind before it's used
          if self.lexer.MaybeUnreadOne():
            if self.token_type == Id.Eof_RParen:
              # Redo translation
              self.lexer.PushHint(Id.Op_RParen, Id.Eof_RParen)
            self._Next(lex_mode)

        done = True  # anything we don't recognize means we're done

      if not done:
        self._Next(lex_mode)
        num_parts += 1

    if self.parse_opts.brace and num_parts > 1 and brace_count != 0:
      # accept { and }, but not foo{
      p_die(
          'Word has unbalanced { }.  Maybe add a space or quote it like \{',
          word=w)

    return w