def Eat(self, token_type): # type: (Id_t) -> None """Assert that we're at the current token and advance.""" if not self.AtToken(token_type): p_die('Parser expected %s, got %s', ui.PrettyId(token_type), ui.PrettyId(self.op_id), word=self.cur_word) self.Next()
def _ReadPatSubVarOp(self): # type: () -> suffix_op__PatSub """ Match = ('/' | '#' | '%') WORD VarSub = ... | VarOf '/' Match '/' WORD """ # Exception: VSub_ArgUnquoted even if it's quoted # stop at eof_type=Lit_Slash, empty_ok=False UP_pat = self._ReadVarOpArg3(lex_mode_e.VSub_ArgUnquoted, Id.Lit_Slash, False) assert UP_pat.tag_() == word_e.Compound, UP_pat # Because empty_ok=False pat = cast(compound_word, UP_pat) if len(pat.parts) == 1: ok, s, quoted = word_.StaticEval(pat) if ok and s == '/' and not quoted: # Looks like ${a////c}, read again self._Next(lex_mode_e.VSub_ArgUnquoted) self._Peek() pat.parts.append(self.cur_token) if len(pat.parts) == 0: p_die('Pattern in ${x/pat/replace} must not be empty', token=self.cur_token) replace_mode = Id.Undefined_Tok # Check for / # % modifier on pattern. UP_first_part = pat.parts[0] if UP_first_part.tag_() == word_part_e.Literal: lit_id = cast(Token, UP_first_part).id if lit_id in (Id.Lit_Slash, Id.Lit_Pound, Id.Lit_Percent): pat.parts.pop(0) replace_mode = lit_id # NOTE: If there is a modifier, the pattern can be empty, e.g. # ${s/#/foo} and ${a/%/foo}. if self.token_type == Id.Right_DollarBrace: # e.g. ${v/a} is the same as ${v/a/} -- empty replacement string return suffix_op.PatSub(pat, None, replace_mode) if self.token_type == Id.Lit_Slash: replace = self._ReadVarOpArg(lex_mode_e.VSub_ArgUnquoted) # do not stop at / self._Peek() if self.token_type != Id.Right_DollarBrace: # NOTE: I think this never happens. # We're either in the VS_ARG_UNQ or VS_ARG_DQ lex state, and everything # there is Lit_ or Left_, except for }. p_die("Expected } after replacement string, got %s", ui.PrettyId(self.token_type), token=self.cur_token) return suffix_op.PatSub(pat, replace, replace_mode) # Happens with ${x//} and ${x///foo}, see test/parse-errors.sh p_die('Expected } or / to close pattern', token=self.cur_token)
def _Classify(gr, tok): # type: (Grammar, Token) -> int # We have to match up what ParserGenerator.make_grammar() did when # calling make_label() and make_first(). See classify() in # opy/pgen2/driver.py. # 'x' and 'for' are both tokenized as Expr_Name. This handles the 'for' # case. if tok.id == Id.Expr_Name: if tok.val in gr.keywords: return gr.keywords[tok.val] # This handles 'x'. typ = tok.id if typ in gr.tokens: return gr.tokens[typ] type_str = '' if tok.id == Id.Unknown_Tok else (' (%s)' % ui.PrettyId(tok.id)) p_die('Unexpected token in expression mode%s', type_str, token=tok)
def Parse(self, lexer, start_symbol): # type: (Lexer, int) -> Tuple[PNode, Token] # Reuse the parser self.push_parser.setup(start_symbol) try: last_token = _PushOilTokens(self.parse_ctx, self.gr, self.push_parser, lexer) except parse.ParseError as e: #log('ERROR %s', e) # TODO: # - Describe what lexer mode we're in (Invalid syntax in regex) # - Maybe say where the mode started # - Id.Unknown_Tok could say "This character is invalid" # ParseError has a "too much input" case but I haven't been able to # tickle it. Mabye it's because of the Eof tokens? p_die('Syntax error in expression (near %s)', ui.PrettyId(e.tok.id), token=e.tok) #raise error.Parse('Syntax error in expression', token=e.tok) return self.push_parser.rootnode, last_token
def Eval(self, node): # type: (bool_expr_t) -> bool UP_node = node with tagswitch(node) as case: if case(bool_expr_e.WordTest): node = cast(bool_expr__WordTest, UP_node) s = self._EvalCompoundWord(node.w) return bool(s) elif case(bool_expr_e.LogicalNot): node = cast(bool_expr__LogicalNot, UP_node) b = self.Eval(node.child) return not b elif case(bool_expr_e.LogicalAnd): node = cast(bool_expr__LogicalAnd, UP_node) # Short-circuit evaluation if self.Eval(node.left): return self.Eval(node.right) else: return False elif case(bool_expr_e.LogicalOr): node = cast(bool_expr__LogicalOr, UP_node) if self.Eval(node.left): return True else: return self.Eval(node.right) elif case(bool_expr_e.Unary): node = cast(bool_expr__Unary, UP_node) op_id = node.op_id s = self._EvalCompoundWord(node.child) # Now dispatch on arg type arg_type = consts.BoolArgType( op_id) # could be static in the LST? if arg_type == bool_arg_type_e.Path: return bool_stat.DoUnaryOp(op_id, s) if arg_type == bool_arg_type_e.Str: if op_id == Id.BoolUnary_z: return not bool(s) if op_id == Id.BoolUnary_n: return bool(s) raise AssertionError(op_id) # should never happen if arg_type == bool_arg_type_e.Other: if op_id == Id.BoolUnary_t: try: fd = int(s) except ValueError: # TODO: Need location information of [ e_die('Invalid file descriptor %r', s, word=node.child) return bool_stat.isatty(fd, s, node.child) # See whether 'set -o' options have been set if op_id == Id.BoolUnary_o: index = match.MatchOption(s) if index == 0: return False else: return self.exec_opts.opt_array[index] e_die("%s isn't implemented", ui.PrettyId(op_id)) # implicit location raise AssertionError(arg_type) # should never happen elif case(bool_expr_e.Binary): node = cast(bool_expr__Binary, UP_node) op_id = node.op_id # Whether to glob escape with switch(op_id) as case2: if case2(Id.BoolBinary_GlobEqual, Id.BoolBinary_GlobDEqual, Id.BoolBinary_GlobNEqual): quote_kind = quote_e.FnMatch elif case2(Id.BoolBinary_EqualTilde): quote_kind = quote_e.ERE else: quote_kind = quote_e.Default s1 = self._EvalCompoundWord(node.left) s2 = self._EvalCompoundWord(node.right, quote_kind=quote_kind) # Now dispatch on arg type arg_type = consts.BoolArgType(op_id) if arg_type == bool_arg_type_e.Path: return bool_stat.DoBinaryOp(op_id, s1, s2) if arg_type == bool_arg_type_e.Int: # NOTE: We assume they are constants like [[ 3 -eq 3 ]]. # Bash also allows [[ 1+2 -eq 3 ]]. i1 = self._StringToIntegerOrError(s1, blame_word=node.left) i2 = self._StringToIntegerOrError(s2, blame_word=node.right) if op_id == Id.BoolBinary_eq: return i1 == i2 if op_id == Id.BoolBinary_ne: return i1 != i2 if op_id == Id.BoolBinary_gt: return i1 > i2 if op_id == Id.BoolBinary_ge: return i1 >= i2 if op_id == Id.BoolBinary_lt: return i1 < i2 if op_id == Id.BoolBinary_le: return i1 <= i2 raise AssertionError(op_id) # should never happen if arg_type == bool_arg_type_e.Str: if op_id in (Id.BoolBinary_GlobEqual, Id.BoolBinary_GlobDEqual): #log('Matching %s against pattern %s', s1, s2) return libc.fnmatch(s2, s1) if op_id == Id.BoolBinary_GlobNEqual: return not libc.fnmatch(s2, s1) if op_id in (Id.BoolBinary_Equal, Id.BoolBinary_DEqual): return s1 == s2 if op_id == Id.BoolBinary_NEqual: return s1 != s2 if op_id == Id.BoolBinary_EqualTilde: # TODO: This should go to --debug-file #log('Matching %r against regex %r', s1, s2) try: matches = libc.regex_match(s2, s1) except RuntimeError: # Status 2 indicates a regex parse error. This is fatal in OSH but # not in bash, which treats [[ like a command with an exit code. e_die("Invalid regex %r", s2, word=node.right, status=2) if matches is None: return False self._SetRegexMatches(matches) return True if op_id == Id.Op_Less: return s1 < s2 if op_id == Id.Op_Great: return s1 > s2 raise AssertionError(op_id) # should never happen raise AssertionError(node.tag_())
def ParseFactor(self): # type: () -> bool_expr_t """ Factor : WORD | UNARY_OP WORD | WORD BINARY_OP WORD | '(' Expr ')' """ if self.bool_kind == Kind.BoolUnary: # Just save the type and not the token itself? op = self.bool_id self._Next() w = self.cur_word # e.g. [[ -f < ]]. But [[ -f '<' ]] is OK tag = w.tag_() if tag != word_e.Compound and tag != word_e.String: p_die('Invalid argument to unary operator', word=w) self._Next() tilde = word_.TildeDetect(w) if tilde: w = tilde node = bool_expr.Unary(op, w) # type: bool_expr_t return node if self.bool_kind == Kind.Word: # Peek ahead another token. t2 = self._LookAhead() t2_bool_id = word_.BoolId(t2) t2_bool_kind = consts.GetKind(t2_bool_id) #log('t2 %s / t2_bool_id %s / t2_bool_kind %s', t2, t2_bool_id, t2_bool_kind) # Op for < and >, -a and -o pun if t2_bool_kind == Kind.BoolBinary or t2_bool_id in (Id.Op_Less, Id.Op_Great): left = self.cur_word self._Next() op = self.bool_id # TODO: Need to change to lex_mode_e.BashRegex. # _Next(lex_mode) then? is_regex = t2_bool_id == Id.BoolBinary_EqualTilde if is_regex: self._Next(lex_mode=lex_mode_e.BashRegex) else: self._Next() right = self.cur_word if is_regex: # NOTE: StaticEval for checking regex syntax isn't enough. We could # need to pass do_ere so that the quoted parts get escaped. #ok, s, unused_quoted = word_.StaticEval(right) pass self._Next() tilde = word_.TildeDetect(left) if tilde: left = tilde tilde = word_.TildeDetect(right) if tilde: right = tilde return bool_expr.Binary(op, left, right) else: # [[ foo ]] w = self.cur_word tilde = word_.TildeDetect(w) if tilde: w = tilde self._Next() return bool_expr.WordTest(w) if self.bool_id == Id.Op_LParen: self._Next() node = self.ParseExpr() if self.bool_id != Id.Op_RParen: p_die('Expected ), got %s', word_.Pretty(self.cur_word), word=self.cur_word) self._Next() return node # It's not WORD, UNARY_OP, or '(' p_die('Unexpected token in boolean expression (%s)', ui.PrettyId(self.bool_id), word=self.cur_word)
def DoUnaryOp(op_id, s): # type: (Id_t, str) -> bool # Only use lstat if we're testing for a symlink. if op_id in (Id.BoolUnary_h, Id.BoolUnary_L): try: mode = posix.lstat(s).st_mode except OSError: # TODO: simple_test_builtin should this as status=2. #e_die("lstat() error: %s", e, word=node.child) return False return stat.S_ISLNK(mode) try: st = posix.stat(s) except OSError as e: # TODO: simple_test_builtin should this as status=2. # Problem: we really need errno, because test -f / is bad argument, # while test -f /nonexistent is a good argument but failed. Gah. # ENOENT vs. ENAMETOOLONG. #e_die("stat() error: %s", e, word=node.child) return False mode = st.st_mode if op_id in (Id.BoolUnary_e, Id.BoolUnary_a): # -a is alias for -e return True if op_id == Id.BoolUnary_f: return stat.S_ISREG(mode) if op_id == Id.BoolUnary_d: return stat.S_ISDIR(mode) if op_id == Id.BoolUnary_b: return stat.S_ISBLK(mode) if op_id == Id.BoolUnary_c: return stat.S_ISCHR(mode) if op_id == Id.BoolUnary_k: # need 'bool' for MyPy return bool(stat.S_IMODE(mode) & stat.S_ISVTX) if op_id == Id.BoolUnary_p: return stat.S_ISFIFO(mode) if op_id == Id.BoolUnary_S: return stat.S_ISSOCK(mode) if op_id == Id.BoolUnary_x: return posix.access(s, posix.X_OK_) if op_id == Id.BoolUnary_r: return posix.access(s, posix.R_OK_) if op_id == Id.BoolUnary_w: return posix.access(s, posix.W_OK_) if op_id == Id.BoolUnary_s: return st.st_size != 0 if op_id == Id.BoolUnary_O: return st.st_uid == posix.geteuid() if op_id == Id.BoolUnary_G: return st.st_gid == posix.getegid() e_die("%s isn't implemented", ui.PrettyId(op_id)) # implicit location
def DoUnarySuffixOp(s, op, arg, extglob): # type: (str, suffix_op__Unary, str, bool) -> str """Helper for ${x#prefix} and family.""" # Fast path for constant strings. if not glob_.LooksLikeGlob(arg): # It doesn't look like a glob, but we glob-escaped it (e.g. [ -> \[). So # reverse it. NOTE: We also do this check in Globber.Expand(). It would # be nice to somehow store the original string rather tahn # escaping/unescaping. arg = glob_.GlobUnescape(arg) if op.op_id in (Id.VOp1_Pound, Id.VOp1_DPound): # const prefix # explicit check for non-empty arg (len for mycpp) if len(arg) and s.startswith(arg): return s[len(arg):] else: return s elif op.op_id in (Id.VOp1_Percent, Id.VOp1_DPercent): # const suffix # need explicit check for non-empty arg (len for mycpp) if len(arg) and s.endswith(arg): return s[:-len(arg)] else: return s # These operators take glob arguments, we don't implement that obscure case. elif op.op_id == Id.VOp1_Comma: # Only lowercase the first letter if arg != '': # TODO: location info for op e_die("%s can't have an argument", ui.PrettyId(op.op_id)) if len(s): return s[0].lower() + s[1:] else: return s elif op.op_id == Id.VOp1_DComma: if arg != '': e_die("%s can't have an argument", ui.PrettyId(op.op_id)) return s.lower() elif op.op_id == Id.VOp1_Caret: # Only uppercase the first letter if arg != '': e_die("%s can't have an argument", ui.PrettyId(op.op_id)) if len(s): return s[0].upper() + s[1:] else: return s elif op.op_id == Id.VOp1_DCaret: if arg != '': e_die("%s can't have an argument", ui.PrettyId(op.op_id)) return s.upper() else: # e.g. ^ ^^ , ,, raise AssertionError(op.op_id) # For patterns, do fnmatch() in a loop. # # TODO: # - Another potential fast path: # v=aabbccdd # echo ${v#*b} # strip shortest prefix # # If the whole thing doesn't match '*b*', then no test can succeed. So we # can fail early. Conversely echo ${v%%c*} and '*c*'. # # (Although honestly this whole construct is nuts and should be deprecated.) n = len(s) if op.op_id == Id.VOp1_Pound: # shortest prefix # 'abcd': match '', 'a', 'ab', 'abc', ... i = 0 while True: assert i <= n #log('Matching pattern %r with %r', arg, s[:i]) if libc.fnmatch(arg, s[:i], extglob): return s[i:] if i >= n: break i = _NextUtf8Char(s, i) return s elif op.op_id == Id.VOp1_DPound: # longest prefix # 'abcd': match 'abc', 'ab', 'a' i = n while True: assert i >= 0 #log('Matching pattern %r with %r', arg, s[:i]) if libc.fnmatch(arg, s[:i], extglob): return s[i:] if i == 0: break i = PreviousUtf8Char(s, i) return s elif op.op_id == Id.VOp1_Percent: # shortest suffix # 'abcd': match 'abcd', 'abc', 'ab', 'a' i = n while True: assert i >= 0 #log('Matching pattern %r with %r', arg, s[:i]) if libc.fnmatch(arg, s[i:], extglob): return s[:i] if i == 0: break i = PreviousUtf8Char(s, i) return s elif op.op_id == Id.VOp1_DPercent: # longest suffix # 'abcd': match 'abc', 'bc', 'c', ... i = 0 while True: assert i <= n #log('Matching pattern %r with %r', arg, s[:i]) if libc.fnmatch(arg, s[i:], extglob): return s[:i] if i >= n: break i = _NextUtf8Char(s, i) return s else: raise NotImplementedError(ui.PrettyId(op.op_id))