def _PushOilTokens(parse_ctx, gr, p, lex): # type: (ParseContext, Grammar, parse.Parser, Lexer) -> Token """Push tokens onto pgen2's parser. Returns the last token so it can be reused/seen by the CommandParser. """ #log('keywords = %s', gr.keywords) #log('tokens = %s', gr.tokens) last_token = None # type: Optional[Token] prev_was_newline = False balance = 0 # to ignore newlines while True: if last_token: # e.g. left over from WordParser tok = last_token #log('last_token = %s', last_token) last_token = None else: tok = lex.Read(lex_mode_e.Expr) #log('tok = %s', tok) # Comments and whitespace. Newlines aren't ignored. if consts.GetKind(tok.id) == Kind.Ignored: continue # For multiline lists, maps, etc. if tok.id == Id.Op_Newline: if balance > 0: #log('*** SKIPPING NEWLINE') continue # Eliminate duplicate newline tokens. It makes the grammar simpler, and # it's consistent with CPython's lexer and our own WordParser. if prev_was_newline: continue prev_was_newline = True else: prev_was_newline = False balance += _OTHER_BALANCE.get(tok.id, 0) #log('BALANCE after seeing %s = %d', tok.id, balance) #if tok.id == Id.Expr_Name and tok.val in KEYWORDS: # tok.id = KEYWORDS[tok.val] # log('Replaced with %s', tok.id) assert tok.id < 256, Id_str(tok.id) ilabel = _Classify(gr, tok) #log('tok = %s, ilabel = %d', tok, ilabel) if p.addtoken(tok.id, tok, ilabel): return tok # # Mututally recursive calls into the command/word parsers. # if mylib.PYTHON: if tok.id == Id.Left_PercentParen: # %( left_tok = tok lex.PushHint(Id.Op_RParen, Id.Right_ShArrayLiteral) # Blame the opening token line_reader = reader.DisallowedLineReader(parse_ctx.arena, tok) w_parser = parse_ctx.MakeWordParser(lex, line_reader) words = [] close_tok = None # type: Optional[Token] while True: w = w_parser.ReadWord(lex_mode_e.ShCommand) if 0: log('w = %s', w) if w.tag_() == word_e.Token: tok = cast(Token, w) if tok.id == Id.Right_ShArrayLiteral: close_tok = tok break elif tok.id == Id.Op_Newline: # internal newlines allowed continue else: # Token p_die('Unexpected token in array literal: %r', tok.val, word=w) assert isinstance(w, compound_word) # for MyPy words.append(w) words2 = braces.BraceDetectAll(words) words3 = word_.TildeDetectAll(words2) typ = Id.Expr_CastedDummy lit_part = sh_array_literal(left_tok, words3) opaque = cast(Token, lit_part) # HACK for expr_to_ast done = p.addtoken(typ, opaque, gr.tokens[typ]) assert not done # can't end the expression # Now push the closing ) ilabel = _Classify(gr, close_tok) done = p.addtoken(tok.id, close_tok, ilabel) assert not done # can't end the expression continue if tok.id in (Id.Left_DollarParen, Id.Left_AtParen): # $( @( left_token = tok lex.PushHint(Id.Op_RParen, Id.Eof_RParen) line_reader = reader.DisallowedLineReader(parse_ctx.arena, tok) c_parser = parse_ctx.MakeParserForCommandSub( line_reader, lex, Id.Eof_RParen) node = c_parser.ParseCommandSub() # A little gross: Copied from osh/word_parse.py right_token = c_parser.w_parser.cur_token cs_part = command_sub(left_token, node) cs_part.spids.append(left_token.span_id) cs_part.spids.append(right_token.span_id) typ = Id.Expr_CastedDummy opaque = cast(Token, cs_part) # HACK for expr_to_ast done = p.addtoken(typ, opaque, gr.tokens[typ]) assert not done # can't end the expression # Now push the closing ) ilabel = _Classify(gr, right_token) done = p.addtoken(right_token.id, right_token, ilabel) assert not done # can't end the expression continue if tok.id == Id.Left_DoubleQuote: left_token = tok line_reader = reader.DisallowedLineReader(parse_ctx.arena, tok) w_parser = parse_ctx.MakeWordParser(lex, line_reader) parts = [] # type: List[word_part_t] last_token = w_parser.ReadDoubleQuoted(left_token, parts) expr_dq_part = double_quoted(left_token, parts) typ = Id.Expr_CastedDummy opaque = cast(Token, expr_dq_part) # HACK for expr_to_ast done = p.addtoken(typ, opaque, gr.tokens[typ]) assert not done # can't end the expression continue if tok.id == Id.Left_DollarBrace: left_token = tok line_reader = reader.DisallowedLineReader(parse_ctx.arena, tok) w_parser = parse_ctx.MakeWordParser(lex, line_reader) part, last_token = w_parser.ReadBracedVarSub(left_token) # It's casted word_part__BracedVarSub -> dummy -> expr__BracedVarSub! typ = Id.Expr_CastedDummy opaque = cast(Token, part) # HACK for expr_to_ast done = p.addtoken(typ, opaque, gr.tokens[typ]) assert not done # can't end the expression continue # '' and c'' if tok.id in (Id.Left_SingleQuoteRaw, Id.Left_SingleQuoteC): if tok.id == Id.Left_SingleQuoteRaw: sq_mode = lex_mode_e.SQ_Raw else: sq_mode = lex_mode_e.SQ_C left_token = tok line_reader = reader.DisallowedLineReader(parse_ctx.arena, tok) w_parser = parse_ctx.MakeWordParser(lex, line_reader) tokens = [] # type: List[Token] no_backslashes = (left_token.val == "'") last_token = w_parser.ReadSingleQuoted(sq_mode, left_token, tokens, no_backslashes) sq_part = single_quoted(left_token, tokens) typ = Id.Expr_CastedDummy opaque = cast(Token, sq_part) # HACK for expr_to_ast done = p.addtoken(typ, opaque, gr.tokens[typ]) assert not done # can't end the expression continue else: # We never broke out -- EOF is too soon (how can this happen???) raise parse.ParseError("incomplete input", tok.id, tok)
def Matches(self, comp): # type: (Api) -> Iterator[Union[Iterator, Iterator[str]]] """ Args: comp: Callback args from readline. Readline uses set_completer_delims to tokenize the string. Returns a list of matches relative to readline's completion_delims. We have to post-process the output of various completers. """ arena = self.parse_ctx.arena # Used by inner functions # Pass the original line "out of band" to the completion callback. line_until_tab = comp.line[:comp.end] self.comp_ui_state.line_until_tab = line_until_tab self.parse_ctx.trail.Clear() line_reader = reader.StringLineReader(line_until_tab, self.parse_ctx.arena) c_parser = self.parse_ctx.MakeOshParser(line_reader, emit_comp_dummy=True) # We want the output from parse_ctx, so we don't use the return value. try: c_parser.ParseLogicalLine() except error.Parse as e: # e.g. 'ls | ' will not parse. Now inspect the parser state! pass debug_f = self.debug_f trail = self.parse_ctx.trail if 1: trail.PrintDebugString(debug_f) # # First try completing the shell language itself. # # NOTE: We get Eof_Real in the command state, but not in the middle of a # BracedVarSub. This is due to the difference between the CommandParser # and WordParser. tokens = trail.tokens last = -1 if tokens[-1].id == Id.Eof_Real: last -= 1 # ignore it try: t1 = tokens[last] except IndexError: t1 = None try: t2 = tokens[last - 1] except IndexError: t2 = None debug_f.log('line: %r', comp.line) debug_f.log('rl_slice from byte %d to %d: %r', comp.begin, comp.end, comp.line[comp.begin:comp.end]) debug_f.log('t1 %s', t1) debug_f.log('t2 %s', t2) # Each of the 'yield' statements below returns a fully-completed line, to # appease the readline library. The root cause of this dance: If there's # one candidate, readline is responsible for redrawing the input line. OSH # only displays candidates and never redraws the input line. def _TokenStart(tok): # type: (Token) -> int span = arena.GetLineSpan(tok.span_id) return span.col if t2: # We always have t1? # echo $ if IsDollar(t2) and IsDummy(t1): self.comp_ui_state.display_pos = _TokenStart(t2) + 1 # 1 for $ for name in self.mem.VarNames(): yield line_until_tab + name # no need to quote var names return # echo ${ if t2.id == Id.Left_DollarBrace and IsDummy(t1): self.comp_ui_state.display_pos = _TokenStart( t2) + 2 # 2 for ${ for name in self.mem.VarNames(): yield line_until_tab + name # no need to quote var names return # echo $P if t2.id == Id.VSub_DollarName and IsDummy(t1): # Example: ${undef:-$P # readline splits at ':' so we have to prepend '-$' to every completed # variable name. self.comp_ui_state.display_pos = _TokenStart(t2) + 1 # 1 for $ to_complete = t2.val[1:] n = len(to_complete) for name in self.mem.VarNames(): if name.startswith(to_complete): yield line_until_tab + name[ n:] # no need to quote var names return # echo ${P if t2.id == Id.VSub_Name and IsDummy(t1): self.comp_ui_state.display_pos = _TokenStart(t2) # no offset to_complete = t2.val n = len(to_complete) for name in self.mem.VarNames(): if name.startswith(to_complete): yield line_until_tab + name[ n:] # no need to quote var names return # echo $(( VAR if t2.id == Id.Lit_ArithVarLike and IsDummy(t1): self.comp_ui_state.display_pos = _TokenStart(t2) # no offset to_complete = t2.val n = len(to_complete) for name in self.mem.VarNames(): if name.startswith(to_complete): yield line_until_tab + name[ n:] # no need to quote var names return if trail.words: # echo ~<TAB> # echo ~a<TAB> $(home dirs) # This must be done at a word level, and TildeDetectAll() does NOT help # here, because they don't have trailing slashes yet! We can't do it on # tokens, because otherwise f~a will complete. Looking at word_part is # EXACTLY what we want. parts = trail.words[-1].parts if (len(parts) == 2 and parts[0].tag_() == word_part_e.Literal and parts[1].tag_() == word_part_e.Literal and parts[0].id == Id.Lit_TildeLike and parts[1].id == Id.Lit_CompDummy): t2 = parts[0] # +1 for ~ self.comp_ui_state.display_pos = _TokenStart(parts[0]) + 1 to_complete = t2.val[1:] n = len(to_complete) for u in pwd.getpwall(): # catch errors? name = u.pw_name if name.startswith(to_complete): yield line_until_tab + ShellQuoteB(name[n:]) + '/' return # echo hi > f<TAB> (complete redirect arg) if trail.redirects: r = trail.redirects[-1] # Only complete 'echo >', but not 'echo >&' or 'cat <<' # TODO: Don't complete <<< 'h' if (r.arg.tag_() == redir_param_e.Word and consts.RedirArgType(r.op.id) == redir_arg_type_e.Path): arg_word = r.arg if WordEndsWithCompDummy(arg_word): debug_f.log('Completing redirect arg') try: val = self.word_ev.EvalWordToString(r.arg) except error.FatalRuntime as e: debug_f.log('Error evaluating redirect word: %s', e) return if val.tag_() != value_e.Str: debug_f.log("Didn't get a string from redir arg") return span_id = word_.LeftMostSpanForWord(arg_word) span = arena.GetLineSpan(span_id) self.comp_ui_state.display_pos = span.col comp.Update( to_complete=val.s) # FileSystemAction uses only this n = len(val.s) action = FileSystemAction(add_slash=True) for name in action.Matches(comp): yield line_until_tab + ShellQuoteB(name[n:]) return # # We're not completing the shell language. Delegate to user-defined # completion for external tools. # # Set below, and set on retries. base_opts = None user_spec = None # Used on retries. partial_argv = [] num_partial = -1 first = None if trail.words: # Now check if we're completing a word! if WordEndsWithCompDummy(trail.words[-1]): debug_f.log('Completing words') # # It didn't look like we need to complete var names, tilde, redirects, # etc. Now try partial_argv, which may involve invoking PLUGINS. # needed to complete paths with ~ words2 = word_.TildeDetectAll(trail.words) if 0: debug_f.log('After tilde detection') for w in words2: print(w, file=debug_f) if 0: debug_f.log('words2:') for w2 in words2: debug_f.log(' %s', w2) for w in words2: try: # TODO: # - Should we call EvalWordSequence? But turn globbing off? It # can do splitting and such. # - We could have a variant to eval TildeSub to ~ ? val = self.word_ev.EvalWordToString(w) except error.FatalRuntime: # Why would it fail? continue if val.tag_() == value_e.Str: partial_argv.append(val.s) else: pass debug_f.log('partial_argv: %s', partial_argv) num_partial = len(partial_argv) first = partial_argv[0] alias_first = None debug_f.log('alias_words: %s', trail.alias_words) if trail.alias_words: w = trail.alias_words[0] try: val = self.word_ev.EvalWordToString(w) except error.FatalRuntime: pass alias_first = val.s debug_f.log('alias_first: %s', alias_first) if num_partial == 0: # should never happen because of Lit_CompDummy raise AssertionError() elif num_partial == 1: base_opts, user_spec = self.comp_lookup.GetFirstSpec() # Display/replace since the beginning of the first word. Note: this # is non-zero in the case of # echo $(gr and # echo `gr span_id = word_.LeftMostSpanForWord(trail.words[0]) span = arena.GetLineSpan(span_id) self.comp_ui_state.display_pos = span.col self.debug_f.log('** DISPLAY_POS = %d', self.comp_ui_state.display_pos) else: base_opts, user_spec = self.comp_lookup.GetSpecForName( first) if not user_spec and alias_first: base_opts, user_spec = self.comp_lookup.GetSpecForName( alias_first) if user_spec: # Pass the aliased command to the user-defined function, and use # it for retries. first = alias_first if not user_spec: base_opts, user_spec = self.comp_lookup.GetFallback() # Display since the beginning span_id = word_.LeftMostSpanForWord(trail.words[-1]) span = arena.GetLineSpan(span_id) self.comp_ui_state.display_pos = span.col self.debug_f.log('words[-1]: %r', trail.words[-1]) self.debug_f.log('display_pos %d', self.comp_ui_state.display_pos) # Update the API for user-defined functions. index = len( partial_argv) - 1 # COMP_CWORD is -1 when it's empty prev = '' if index == 0 else partial_argv[index - 1] comp.Update(first=first, to_complete=partial_argv[-1], prev=prev, index=index, partial_argv=partial_argv) # This happens in the case of [[ and ((, or a syntax error like 'echo < >'. if not user_spec: debug_f.log("Didn't find anything to complete") return # Reset it back to what was registered. User-defined functions can mutate # it. dynamic_opts = {} self.compopt_state.dynamic_opts = dynamic_opts self.compopt_state.currently_completing = True try: done = False while not done: try: for candidate in self._PostProcess(base_opts, dynamic_opts, user_spec, comp): yield candidate except _RetryCompletion as e: debug_f.log('Got 124, trying again ...') # Get another user_spec. The ShellFuncAction may have 'sourced' code # and run 'complete' to mutate comp_lookup, and we want to get that # new entry. if num_partial == 0: raise AssertionError() elif num_partial == 1: base_opts, user_spec = self.comp_lookup.GetFirstSpec() else: # (already processed alias_first) base_opts, user_spec = self.comp_lookup.GetSpecForName( first) if not user_spec: base_opts, user_spec = self.comp_lookup.GetFallback( ) else: done = True # exhausted candidates without getting a retry finally: self.compopt_state.currently_completing = False
def _ReadArrayLiteral(self): # type: () -> word_part_t """ a=(1 2 3) TODO: See osh/cmd_parse.py:164 for Id.Lit_ArrayLhsOpen, for a[x++]=1 We want: A=(['x']=1 ["x"]=2 [$x$y]=3) Maybe allow this as a literal string? Because I think I've seen it before? Or maybe force people to patch to learn the rule. A=([x]=4) Starts with Lit_Other '[', and then it has Lit_ArrayLhsClose Maybe enforce that ALL have keys or NONE of have keys. """ self._Next(lex_mode_e.ShCommand) # advance past ( self._Peek() if self.cur_token.id != Id.Op_LParen: p_die('Expected ( after =, got %r', self.cur_token.val, token=self.cur_token) left_token = self.cur_token paren_spid = self.cur_token.span_id # MUST use a new word parser (with same lexer). w_parser = self.parse_ctx.MakeWordParser(self.lexer, self.line_reader) words = [] while True: w = w_parser.ReadWord(lex_mode_e.ShCommand) if isinstance(w, word__Token): word_id = word_.CommandId(w) if word_id == Id.Right_ShArrayLiteral: break # Unlike command parsing, array parsing allows embedded \n. elif word_id == Id.Op_Newline: continue else: # Token p_die('Unexpected token in array literal: %r', w.token.val, word=w) assert isinstance(w, word__Compound) # for MyPy words.append(w) if not words: # a=() is empty indexed array # ignore for invariant List? node = sh_array_literal(left_token, words) # type: ignore node.spids.append(left_token.span_id) return node # If the first one is a key/value pair, then the rest are assumed to be. pair = word_.DetectAssocPair(words[0]) if pair: pairs = [pair[0], pair[1]] # flat representation n = len(words) for i in xrange(1, n): w = words[i] pair = word_.DetectAssocPair(w) if not pair: p_die("Expected associative array pair", word=w) pairs.append(pair[0]) # flat representation pairs.append(pair[1]) # invariant List? node = word_part.AssocArrayLiteral(left_token, pairs) # type: ignore node.spids.append(paren_spid) return node words2 = braces.BraceDetectAll(words) words3 = word_.TildeDetectAll(words2) node = sh_array_literal(left_token, words3) node.spids.append(paren_spid) return node