Exemplo n.º 1
0
    def _Next(self, lex_mode):
        # type: (lex_mode_t) -> None
        """Set the next lex state, but don't actually read a token.

    We need this for proper interactive parsing.
    """
        self.cur_token = self.lexer.Read(lex_mode)
        self.token_type = self.cur_token.id
        self.token_kind = meta.LookupKind(self.token_type)
Exemplo n.º 2
0
 def _Peek(self):
     # type: () -> token
     """Helper method."""
     if self.next_lex_mode is not None:
         self.cur_token = self.lexer.Read(self.next_lex_mode)
         self.token_type = self.cur_token.id
         self.token_kind = meta.LookupKind(self.token_type)
         self.parse_ctx.trail.AppendToken(self.cur_token)  # For completion
         self.next_lex_mode = None
     return self.cur_token
Exemplo n.º 3
0
def _PushOilTokens(p, lex, gr):
  # type: (parse.Parser, Lexer, Grammar) -> token
  """Push tokens onto pgen2's parser.

  Returns the last token so it can be reused/seen by the CommandParser.
  """
  #log('keywords = %s', gr.keywords)
  #log('tokens = %s', gr.tokens)

  mode = lex_mode_e.Expr
  mode_stack = [mode]

  balance = 0

  while True:
    tok = lex.Read(mode)
    #log('tok = %s', tok)

    # Comments and whitespace.  Newlines aren't ignored.
    if meta.LookupKind(tok.id) == Kind.Ignored:
      continue

    # For var x = {
    #   a: 1, b: 2
    # }
    if balance > 0 and tok.id == Id.Op_Newline:
      #log('*** SKIPPING NEWLINE')
      continue

    action = _MODE_TRANSITIONS.get((mode, tok.id))
    if action == POP:
      mode_stack.pop()
      mode = mode_stack[-1]
      balance -= 1
      #log('POPPED to %s', mode)
    elif action:  # it's an Id
      new_mode = action
      mode_stack.append(new_mode)
      mode = new_mode
      balance += 1  # e.g. var x = $/ NEWLINE /
      #log('PUSHED to %s', mode)
    else:
      # If we didn't already so something with the balance, look at another table.
      balance += _OTHER_BALANCE.get(tok.id, 0)
      #log('BALANCE after seeing %s = %d', tok.id, balance)

    #if tok.id == Id.Expr_Name and tok.val in KEYWORDS:
    #  tok.id = KEYWORDS[tok.val]
    #  log('Replaced with %s', tok.id)

    if tok.id.enum_id >= 256:
      raise AssertionError(str(tok))

    ilabel = _Classify(gr, tok)
    #log('tok = %s, ilabel = %d', tok, ilabel)

    if p.addtoken(tok.id.enum_id, tok, ilabel):
      return tok

  else:
    # We never broke out -- EOF is too soon (how can this happen???)
    raise parse.ParseError("incomplete input", tok.id.enum_id, tok)
Exemplo n.º 4
0
def _PushOilTokens(parse_ctx, gr, p, lex):
  # type: (ParseContext, Grammar, parse.Parser, Lexer) -> token
  """Push tokens onto pgen2's parser.

  Returns the last token so it can be reused/seen by the CommandParser.
  """
  #log('keywords = %s', gr.keywords)
  #log('tokens = %s', gr.tokens)

  mode = lex_mode_e.Expr
  mode_stack = [mode]
  last_token = None

  balance = 0

  from core.util import log
  while True:
    if last_token:  # e.g. left over from WordParser
      tok = last_token
      #log('last_token = %s', last_token)
      last_token = None
    else:
      tok = lex.Read(mode)
      #log('tok = %s', tok)

    # Comments and whitespace.  Newlines aren't ignored.
    if meta.LookupKind(tok.id) == Kind.Ignored:
      continue

    # For var x = {
    #   a: 1, b: 2
    # }
    if balance > 0 and tok.id == Id.Op_Newline:
      #log('*** SKIPPING NEWLINE')
      continue

    action = _MODE_TRANSITIONS.get((mode, tok.id))
    if action == POP:
      mode_stack.pop()
      mode = mode_stack[-1]
      balance -= 1
      #log('POPPED to %s', mode)
    elif action:  # it's an Id
      new_mode = action
      mode_stack.append(new_mode)
      mode = new_mode
      balance += 1  # e.g. var x = $/ NEWLINE /
      #log('PUSHED to %s', mode)
    else:
      # If we didn't already so something with the balance, look at another table.
      balance += _OTHER_BALANCE.get(tok.id, 0)
      #log('BALANCE after seeing %s = %d', tok.id, balance)

    #if tok.id == Id.Expr_Name and tok.val in KEYWORDS:
    #  tok.id = KEYWORDS[tok.val]
    #  log('Replaced with %s', tok.id)

    if tok.id.enum_id >= 256:
      raise AssertionError(str(tok))

    ilabel = _Classify(gr, tok)
    #log('tok = %s, ilabel = %d', tok, ilabel)

    if p.addtoken(tok.id.enum_id, tok, ilabel):
      return tok

    #
    # Extra handling of the body of @() and $().  Lex in the ShCommand mode.
    #

    if tok.id == Id.Left_AtParen:
      lex.PushHint(Id.Op_RParen, Id.Right_ShArrayLiteral)

      # Blame the opening token
      line_reader = reader.DisallowedLineReader(parse_ctx.arena, tok)
      w_parser = parse_ctx.MakeWordParser(lex, line_reader)
      words = []
      while True:
        w = w_parser.ReadWord(lex_mode_e.ShCommand)
        if 0:
          log('w = %s', w)

        if isinstance(w, word__Token):
          word_id = word_.CommandId(w)
          if word_id == Id.Right_ShArrayLiteral:
            break
          elif word_id == Id.Op_Newline:  # internal newlines allowed
            continue
          else:
            # Token
            p_die('Unexpected token in array literal: %r', w.token.val, word=w)

        assert isinstance(w, word__Compound)  # for MyPy
        words.append(w)

      words2 = braces.BraceDetectAll(words)
      words3 = word_.TildeDetectAll(words2)

      typ = Id.Expr_CastedDummy.enum_id
      opaque = cast(token, words3)  # HACK for expr_to_ast
      done = p.addtoken(typ, opaque, gr.tokens[typ])
      assert not done  # can't end the expression

      # Now push the closing )
      tok = w.token
      ilabel = _Classify(gr, tok)
      done = p.addtoken(tok.id.enum_id, tok, ilabel)
      assert not done  # can't end the expression

      continue

    if tok.id == Id.Left_DollarParen:
      left_token = tok

      lex.PushHint(Id.Op_RParen, Id.Eof_RParen)
      line_reader = reader.DisallowedLineReader(parse_ctx.arena, tok)
      c_parser = parse_ctx.MakeParserForCommandSub(line_reader, lex,
                                                   Id.Eof_RParen)
      node = c_parser.ParseCommandSub()
      # A little gross: Copied from osh/word_parse.py
      right_token = c_parser.w_parser.cur_token

      cs_part = command_sub(left_token, node)
      cs_part.spids.append(left_token.span_id)
      cs_part.spids.append(right_token.span_id)

      typ = Id.Expr_CastedDummy.enum_id
      opaque = cast(token, cs_part)  # HACK for expr_to_ast
      done = p.addtoken(typ, opaque, gr.tokens[typ])
      assert not done  # can't end the expression

      # Now push the closing )
      ilabel = _Classify(gr, right_token)
      done = p.addtoken(right_token.id.enum_id, right_token, ilabel)
      assert not done  # can't end the expression

      continue

    if tok.id == Id.Left_DoubleQuote:
      left_token = tok
      line_reader = reader.DisallowedLineReader(parse_ctx.arena, tok)
      w_parser = parse_ctx.MakeWordParser(lex, line_reader)

      parts = []  # type: List[word_part_t]
      last_token = w_parser.ReadDoubleQuoted(left_token, parts)
      expr_dq_part = double_quoted(left_token, parts)

      typ = Id.Expr_CastedDummy.enum_id
      opaque = cast(token, expr_dq_part)  # HACK for expr_to_ast
      done = p.addtoken(typ, opaque, gr.tokens[typ])
      assert not done  # can't end the expression

      continue

    if tok.id == Id.Left_DollarBrace:
      left_token = tok
      line_reader = reader.DisallowedLineReader(parse_ctx.arena, tok)
      w_parser = parse_ctx.MakeWordParser(lex, line_reader)

      part, last_token = w_parser.ReadBracedBracedVarSub(left_token)

      # It's casted word_part__BracedVarSub -> dummy -> expr__BracedVarSub!
      typ = Id.Expr_CastedDummy.enum_id
      opaque = cast(token, part)  # HACK for expr_to_ast
      done = p.addtoken(typ, opaque, gr.tokens[typ])
      assert not done  # can't end the expression

      continue

    # '' and c''
    if tok.id in (Id.Left_SingleQuoteRaw, Id.Left_SingleQuoteC):
      left_token = tok
      line_reader = reader.DisallowedLineReader(parse_ctx.arena, tok)
      w_parser = parse_ctx.MakeWordParser(lex, line_reader)

      # mode can be SQ or DollarSQ
      tokens = []  # type: List[token]
      no_backslashes = (left_token.val == "'")
      last_token = w_parser.ReadSingleQuoted(mode, left_token, tokens,
                                             no_backslashes)
      sq_part = single_quoted(left_token, tokens)

      typ = Id.Expr_CastedDummy.enum_id
      opaque = cast(token, sq_part)  # HACK for expr_to_ast
      done = p.addtoken(typ, opaque, gr.tokens[typ])
      assert not done  # can't end the expression

      continue

  else:
    # We never broke out -- EOF is too soon (how can this happen???)
    raise parse.ParseError("incomplete input", tok.id.enum_id, tok)
Exemplo n.º 5
0
    def _MaybeReplaceLeaf(self, node):
        # type: (re_t) -> Optional[re_t]
        """
    If a leaf node needs to be evaluated, do it and return the replacement.
    Otherwise return None.
    """
        new_leaf = None
        recurse = True

        if node.tag == re_e.Speck:
            id_ = node.id
            if id_ == Id.Expr_Dot:
                new_leaf = re.Primitive(Id.Re_Dot)
            elif id_ == Id.Arith_Caret:  # ^
                new_leaf = re.Primitive(Id.Re_Start)
            elif id_ == Id.Expr_Dollar:  # $
                new_leaf = re.Primitive(Id.Re_End)
            else:
                raise NotImplementedError(id_)

        elif node.tag == re_e.Token:
            id_ = node.id
            val = node.val

            if id_ == Id.Expr_Name:
                if val == 'dot':
                    new_leaf = re.Primitive(Id.Re_Dot)
                else:
                    raise NotImplementedError(val)

            elif id_ == Id.Expr_Symbol:
                if val == '%start':
                    new_leaf = re.Primitive(Id.Re_Start)
                elif val == '%end':
                    new_leaf = re.Primitive(Id.Re_End)
                else:
                    raise NotImplementedError(val)

            else:  # Must be Id.Char_{OneChar,Hex,Unicode4,Unicode8}
                kind = meta.LookupKind(id_)
                assert kind == Kind.Char, id_
                s = word_compile.EvalCStringToken(id_, val)
                new_leaf = re.LiteralChars(s, node.span_id)

        elif node.tag == re_e.SingleQuoted:
            s = word_eval.EvalSingleQuoted(node)
            new_leaf = re.LiteralChars(s, node.left.span_id)

        elif node.tag == re_e.DoubleQuoted:
            s = self.word_ev.EvalDoubleQuotedToString(node)
            new_leaf = re.LiteralChars(s, node.left.span_id)

        elif node.tag == re_e.BracedVarSub:
            s = self.word_ev.EvalBracedVarSubToString(node)
            new_leaf = re.LiteralChars(s, node.spids[0])

        elif node.tag == re_e.SimpleVarSub:
            s = self.word_ev.EvalSimpleVarSubToString(node.token)
            new_leaf = re.LiteralChars(s, node.token.span_id)

        elif node.tag == re_e.Splice:
            obj = self.LookupVar(node.name.val)
            if not isinstance(obj, objects.Regex):
                e_die("Can't splice object of type %r into regex",
                      obj.__class__,
                      token=node.name)
            # Note: we only splice the regex, and ignore flags.
            # Should we warn about this?
            new_leaf = obj.regex

        # These are leaves we don't need to do anything with.
        elif node.tag == re_e.PosixClass:
            recurse = False
        elif node.tag == re_e.PerlClass:
            recurse = False

        return new_leaf, recurse