def _Next(self, lex_mode): # type: (lex_mode_t) -> None """Set the next lex state, but don't actually read a token. We need this for proper interactive parsing. """ self.cur_token = self.lexer.Read(lex_mode) self.token_type = self.cur_token.id self.token_kind = meta.LookupKind(self.token_type)
def _Peek(self): # type: () -> token """Helper method.""" if self.next_lex_mode is not None: self.cur_token = self.lexer.Read(self.next_lex_mode) self.token_type = self.cur_token.id self.token_kind = meta.LookupKind(self.token_type) self.parse_ctx.trail.AppendToken(self.cur_token) # For completion self.next_lex_mode = None return self.cur_token
def _PushOilTokens(p, lex, gr): # type: (parse.Parser, Lexer, Grammar) -> token """Push tokens onto pgen2's parser. Returns the last token so it can be reused/seen by the CommandParser. """ #log('keywords = %s', gr.keywords) #log('tokens = %s', gr.tokens) mode = lex_mode_e.Expr mode_stack = [mode] balance = 0 while True: tok = lex.Read(mode) #log('tok = %s', tok) # Comments and whitespace. Newlines aren't ignored. if meta.LookupKind(tok.id) == Kind.Ignored: continue # For var x = { # a: 1, b: 2 # } if balance > 0 and tok.id == Id.Op_Newline: #log('*** SKIPPING NEWLINE') continue action = _MODE_TRANSITIONS.get((mode, tok.id)) if action == POP: mode_stack.pop() mode = mode_stack[-1] balance -= 1 #log('POPPED to %s', mode) elif action: # it's an Id new_mode = action mode_stack.append(new_mode) mode = new_mode balance += 1 # e.g. var x = $/ NEWLINE / #log('PUSHED to %s', mode) else: # If we didn't already so something with the balance, look at another table. balance += _OTHER_BALANCE.get(tok.id, 0) #log('BALANCE after seeing %s = %d', tok.id, balance) #if tok.id == Id.Expr_Name and tok.val in KEYWORDS: # tok.id = KEYWORDS[tok.val] # log('Replaced with %s', tok.id) if tok.id.enum_id >= 256: raise AssertionError(str(tok)) ilabel = _Classify(gr, tok) #log('tok = %s, ilabel = %d', tok, ilabel) if p.addtoken(tok.id.enum_id, tok, ilabel): return tok else: # We never broke out -- EOF is too soon (how can this happen???) raise parse.ParseError("incomplete input", tok.id.enum_id, tok)
def _PushOilTokens(parse_ctx, gr, p, lex): # type: (ParseContext, Grammar, parse.Parser, Lexer) -> token """Push tokens onto pgen2's parser. Returns the last token so it can be reused/seen by the CommandParser. """ #log('keywords = %s', gr.keywords) #log('tokens = %s', gr.tokens) mode = lex_mode_e.Expr mode_stack = [mode] last_token = None balance = 0 from core.util import log while True: if last_token: # e.g. left over from WordParser tok = last_token #log('last_token = %s', last_token) last_token = None else: tok = lex.Read(mode) #log('tok = %s', tok) # Comments and whitespace. Newlines aren't ignored. if meta.LookupKind(tok.id) == Kind.Ignored: continue # For var x = { # a: 1, b: 2 # } if balance > 0 and tok.id == Id.Op_Newline: #log('*** SKIPPING NEWLINE') continue action = _MODE_TRANSITIONS.get((mode, tok.id)) if action == POP: mode_stack.pop() mode = mode_stack[-1] balance -= 1 #log('POPPED to %s', mode) elif action: # it's an Id new_mode = action mode_stack.append(new_mode) mode = new_mode balance += 1 # e.g. var x = $/ NEWLINE / #log('PUSHED to %s', mode) else: # If we didn't already so something with the balance, look at another table. balance += _OTHER_BALANCE.get(tok.id, 0) #log('BALANCE after seeing %s = %d', tok.id, balance) #if tok.id == Id.Expr_Name and tok.val in KEYWORDS: # tok.id = KEYWORDS[tok.val] # log('Replaced with %s', tok.id) if tok.id.enum_id >= 256: raise AssertionError(str(tok)) ilabel = _Classify(gr, tok) #log('tok = %s, ilabel = %d', tok, ilabel) if p.addtoken(tok.id.enum_id, tok, ilabel): return tok # # Extra handling of the body of @() and $(). Lex in the ShCommand mode. # if tok.id == Id.Left_AtParen: lex.PushHint(Id.Op_RParen, Id.Right_ShArrayLiteral) # Blame the opening token line_reader = reader.DisallowedLineReader(parse_ctx.arena, tok) w_parser = parse_ctx.MakeWordParser(lex, line_reader) words = [] while True: w = w_parser.ReadWord(lex_mode_e.ShCommand) if 0: log('w = %s', w) if isinstance(w, word__Token): word_id = word_.CommandId(w) if word_id == Id.Right_ShArrayLiteral: break elif word_id == Id.Op_Newline: # internal newlines allowed continue else: # Token p_die('Unexpected token in array literal: %r', w.token.val, word=w) assert isinstance(w, word__Compound) # for MyPy words.append(w) words2 = braces.BraceDetectAll(words) words3 = word_.TildeDetectAll(words2) typ = Id.Expr_CastedDummy.enum_id opaque = cast(token, words3) # HACK for expr_to_ast done = p.addtoken(typ, opaque, gr.tokens[typ]) assert not done # can't end the expression # Now push the closing ) tok = w.token ilabel = _Classify(gr, tok) done = p.addtoken(tok.id.enum_id, tok, ilabel) assert not done # can't end the expression continue if tok.id == Id.Left_DollarParen: left_token = tok lex.PushHint(Id.Op_RParen, Id.Eof_RParen) line_reader = reader.DisallowedLineReader(parse_ctx.arena, tok) c_parser = parse_ctx.MakeParserForCommandSub(line_reader, lex, Id.Eof_RParen) node = c_parser.ParseCommandSub() # A little gross: Copied from osh/word_parse.py right_token = c_parser.w_parser.cur_token cs_part = command_sub(left_token, node) cs_part.spids.append(left_token.span_id) cs_part.spids.append(right_token.span_id) typ = Id.Expr_CastedDummy.enum_id opaque = cast(token, cs_part) # HACK for expr_to_ast done = p.addtoken(typ, opaque, gr.tokens[typ]) assert not done # can't end the expression # Now push the closing ) ilabel = _Classify(gr, right_token) done = p.addtoken(right_token.id.enum_id, right_token, ilabel) assert not done # can't end the expression continue if tok.id == Id.Left_DoubleQuote: left_token = tok line_reader = reader.DisallowedLineReader(parse_ctx.arena, tok) w_parser = parse_ctx.MakeWordParser(lex, line_reader) parts = [] # type: List[word_part_t] last_token = w_parser.ReadDoubleQuoted(left_token, parts) expr_dq_part = double_quoted(left_token, parts) typ = Id.Expr_CastedDummy.enum_id opaque = cast(token, expr_dq_part) # HACK for expr_to_ast done = p.addtoken(typ, opaque, gr.tokens[typ]) assert not done # can't end the expression continue if tok.id == Id.Left_DollarBrace: left_token = tok line_reader = reader.DisallowedLineReader(parse_ctx.arena, tok) w_parser = parse_ctx.MakeWordParser(lex, line_reader) part, last_token = w_parser.ReadBracedBracedVarSub(left_token) # It's casted word_part__BracedVarSub -> dummy -> expr__BracedVarSub! typ = Id.Expr_CastedDummy.enum_id opaque = cast(token, part) # HACK for expr_to_ast done = p.addtoken(typ, opaque, gr.tokens[typ]) assert not done # can't end the expression continue # '' and c'' if tok.id in (Id.Left_SingleQuoteRaw, Id.Left_SingleQuoteC): left_token = tok line_reader = reader.DisallowedLineReader(parse_ctx.arena, tok) w_parser = parse_ctx.MakeWordParser(lex, line_reader) # mode can be SQ or DollarSQ tokens = [] # type: List[token] no_backslashes = (left_token.val == "'") last_token = w_parser.ReadSingleQuoted(mode, left_token, tokens, no_backslashes) sq_part = single_quoted(left_token, tokens) typ = Id.Expr_CastedDummy.enum_id opaque = cast(token, sq_part) # HACK for expr_to_ast done = p.addtoken(typ, opaque, gr.tokens[typ]) assert not done # can't end the expression continue else: # We never broke out -- EOF is too soon (how can this happen???) raise parse.ParseError("incomplete input", tok.id.enum_id, tok)
def _MaybeReplaceLeaf(self, node): # type: (re_t) -> Optional[re_t] """ If a leaf node needs to be evaluated, do it and return the replacement. Otherwise return None. """ new_leaf = None recurse = True if node.tag == re_e.Speck: id_ = node.id if id_ == Id.Expr_Dot: new_leaf = re.Primitive(Id.Re_Dot) elif id_ == Id.Arith_Caret: # ^ new_leaf = re.Primitive(Id.Re_Start) elif id_ == Id.Expr_Dollar: # $ new_leaf = re.Primitive(Id.Re_End) else: raise NotImplementedError(id_) elif node.tag == re_e.Token: id_ = node.id val = node.val if id_ == Id.Expr_Name: if val == 'dot': new_leaf = re.Primitive(Id.Re_Dot) else: raise NotImplementedError(val) elif id_ == Id.Expr_Symbol: if val == '%start': new_leaf = re.Primitive(Id.Re_Start) elif val == '%end': new_leaf = re.Primitive(Id.Re_End) else: raise NotImplementedError(val) else: # Must be Id.Char_{OneChar,Hex,Unicode4,Unicode8} kind = meta.LookupKind(id_) assert kind == Kind.Char, id_ s = word_compile.EvalCStringToken(id_, val) new_leaf = re.LiteralChars(s, node.span_id) elif node.tag == re_e.SingleQuoted: s = word_eval.EvalSingleQuoted(node) new_leaf = re.LiteralChars(s, node.left.span_id) elif node.tag == re_e.DoubleQuoted: s = self.word_ev.EvalDoubleQuotedToString(node) new_leaf = re.LiteralChars(s, node.left.span_id) elif node.tag == re_e.BracedVarSub: s = self.word_ev.EvalBracedVarSubToString(node) new_leaf = re.LiteralChars(s, node.spids[0]) elif node.tag == re_e.SimpleVarSub: s = self.word_ev.EvalSimpleVarSubToString(node.token) new_leaf = re.LiteralChars(s, node.token.span_id) elif node.tag == re_e.Splice: obj = self.LookupVar(node.name.val) if not isinstance(obj, objects.Regex): e_die("Can't splice object of type %r into regex", obj.__class__, token=node.name) # Note: we only splice the regex, and ignore flags. # Should we warn about this? new_leaf = obj.regex # These are leaves we don't need to do anything with. elif node.tag == re_e.PosixClass: recurse = False elif node.tag == re_e.PerlClass: recurse = False return new_leaf, recurse