def testFnmatch(self): print(dir(libc)) # pattern, string, result cases = [ ('', '', 1), # no pattern is valid ('a', 'a', 1), ('?', 'a', 1), ('\?', 'a', 0), ('\?', '?', 1), ('\\\\', '\\', 1), # What is another error? Invalid escape is OK? ('\\', '\\', 0), # no pattern is valid ('[[:alpha:]]', 'a', 1), ('[^[:alpha:]]', 'a', 0), # negate ('[[:alpha:]]', 'aa', 0), # exact match fails # Combining char class and a literal character ('[[:alpha:]7]', '7', 1), ('[[:alpha:]][[:alpha:]]', 'az', 1), ] for pat, s, expected in cases: actual = libc.fnmatch(pat, s) self.assertEqual(expected, actual)
def testFnmatchExtglob(self): return # With GNU extension. cases = [ # One of these ('--@(help|verbose)', '--verbose', 1), ('--@(help|verbose)', '--foo', 0), ('--*(help|verbose)', '--verbose', 1), ('--*(help|verbose)', '--', 1), ('--*(help|verbose)', '--helpverbose', 1), # Not what we want ('--+(help|verbose)', '--verbose', 1), ('--+(help|verbose)', '--', 0), ('--+(help|verbose)', '--helpverbose', 1), # Not what we want ('--?(help|verbose)', '--verbose', 1), ('--?(help|verbose)', '--helpverbose', 0), # Neither of these ('--!(help|verbose)', '--verbose', 0), ] for pat, s, expected in cases: actual = libc.fnmatch(pat, s) self.assertEqual( expected, actual, "Matching %s against %s: got %s but expected %s" % (pat, s, actual, expected))
def __call__(self, candidate): """Should we INCLUDE the candidate or not?""" matched = libc.fnmatch(self.glob_pat, candidate) # This is confusing because of bash's double-negative syntax if self.include: return not matched else: return matched
def testFnmatch(self): cases = [ # (pattern, string, result) ('', '', 1), # no pattern is valid ('a', 'a', 1), ('?', 'a', 1), # Test escaping of glob operator chars ('\\?', '-', 0), ('\\?', '?', 1), ('\\*', '-', 0), ('\\*', '*', 1), ('\\[', '-', 0), ('\\[', '[', 1), ('\\!', '-', 0), ('\\!', '!', 1), ('\\\\', '\\', 1), ('\\\\', 'x', 0), ('\\\\', '\\extra', 0), ('\\f', '\\', 0), # no match # Hm this is weird, c is not a special character ('\\c', 'c', 1), ('\\c', '\\c', 0), ('\\\\c', '\\c', 1), # the proper way to match ('c:\\foo', 'c:\\foo', 0), ('c:\\foo', 'c:foo', 1), ('strange]one', 'strange]one', 1), # What is another error? Invalid escape is OK? ('\\', '\\', 0), # no pattern is valid ('[[:alpha:]]', 'a', 1), ('[^[:alpha:]]', 'a', 0), # negate ('[[:alpha:]]', 'aa', 0), # exact match fails # Combining char class and a literal character ('[[:alpha:]7]', '7', 1), ('[[:alpha:]][[:alpha:]]', 'az', 1), ('[a]', 'a', 1), # Hm [] is treated as a constant string, not an empty char class. # Should we change LooksLikeGlob? ('[]', '', 0), ('[]', 'a', 0), ('[]', '[]', 1), ] for pat, s, expected in cases: actual = libc.fnmatch(pat, s) self.assertEqual(expected, actual, '%r %r -> got %d' % (pat, s, actual))
def testFnmatch(self): print(dir(libc)) # pattern, string, result cases = [ ('', '', 1), # no pattern is valid ('a', 'a', 1), ('?', 'a', 1), ('\?', 'a', 0), ('\?', '?', 1), ('\\\\', '\\', 1), # What is another error? Invalid escape is OK? ('\\', '\\', 0), # no pattern is valid ] for pat, s, expected in cases: actual = libc.fnmatch(pat, s) self.assertEqual(expected, actual)
def GetSpecForName(self, argv0): """ Args: argv0: A finished argv0 to lookup """ pair = self.lookup.get(argv0) # NOTE: Could be '' if pair: return pair key = os_path.basename(argv0) pair = self.lookup.get(key) if pair: return pair for glob_pat, base_opts, user_spec in self.patterns: #log('Matching %r %r', key, glob_pat) if libc.fnmatch(glob_pat, key): return base_opts, user_spec return None, None
def GetSpecForName(self, argv0): """ Args: argv0: A finished argv0 to lookup """ user_spec = self.lookup.get(argv0) # NOTE: Could be '' if user_spec: return user_spec key = os_path.basename(argv0) actions = self.lookup.get(key) if user_spec: return user_spec for glob_pat, base_opts, user_spec in self.patterns: #log('Matching %r %r', key, glob_pat) if libc.fnmatch(glob_pat, key): return base_opts, user_spec # Nothing matched return self.lookup['__fallback']
def GetCompleterForName(self, argv0): """ Args: argv0: A finished argv0 to lookup """ if not argv0: return self.empty_comp chain = self.lookup.get(argv0) # NOTE: Could be '' if chain: return chain key = os.path.basename(argv0) actions = self.lookup.get(key) if chain: return chain for glob_pat, chain in self.patterns: #log('Matching %r %r', key, glob_pat) if libc.fnmatch(glob_pat, key): return chain return self.lookup['__default__']
def GetCompleterForName(self, argv0): """ Args: argv0: A finished argv0 to lookup """ if not argv0: return self.GetFirstCompleter() chain = self.lookup.get(argv0) # NOTE: Could be '' if chain: return chain key = os_path.basename(argv0) actions = self.lookup.get(key) if chain: return chain for glob_pat, chain in self.patterns: #log('Matching %r %r', key, glob_pat) if libc.fnmatch(glob_pat, key): return chain # Nothing matched return self.lookup['__fallback']
def testFnmatch(self): cases = [ # (pattern, string, result) ('', '', 1), # no pattern is valid ('a', 'a', 1), ('?', 'a', 1), # Test escaping of glob operator chars ('\\?', '-', 0), ('\\?', '?', 1), ('\\*', '-', 0), ('\\*', '*', 1), ('\\[', '-', 0), ('\\[', '[', 1), ('\\!', '-', 0), ('\\!', '!', 1), ('\\\\', '\\', 1), ('\\\\', 'x', 0), ('\\\\', '\\extra', 0), ('\\f', '\\', 0), # no match # Hm this is weird, c is not a special character ('\\c', 'c', 1), ('\\c', '\\c', 0), ('\\\\c', '\\c', 1), # the proper way to match ('c:\\foo', 'c:\\foo', 0), ('c:\\foo', 'c:foo', 1), ('strange]one', 'strange]one', 1), # What is another error? Invalid escape is OK? None if IS_DARWIN else ('\\', '\\', 0), # no pattern is valid ('[[:alpha:]]', 'a', 1), ('[^[:alpha:]]', 'a', 0), # negate ('[[:alpha:]]', 'aa', 0), # exact match fails # Combining char class and a literal character ('[[:alpha:]7]', '7', 1), ('[[:alpha:]][[:alpha:]]', 'az', 1), ('[a]', 'a', 1), # Hm [] is treated as a constant string, not an empty char class. # Should we change LooksLikeGlob? ('[]', '', 0), ('[a-z]', 'a', 1), ('[a-z]', '-', 0), # THIS IS INCONSISTENT WITH REGEX! # Somehow in regexes (at least ERE) GNU libc treats [a\-z] as [a-z]. # See below. ('[a\-z]', '-', 1), ('[a\-z]', 'b', 0), # Need double backslash in character class ('[\\\\]', '\\', 1), # Can you escape ] with \? Yes in fnmatch ('[\\]]', '\\', 0), ('[\\]]', ']', 1), None if IS_DARWIN else ('[]', 'a', 0), None if IS_DARWIN else ('[]', '[]', 1), ] for pat, s, expected in filter(None, cases): actual = libc.fnmatch(pat, s, False) self.assertEqual(expected, actual, '%r %r -> got %d' % (pat, s, actual))
def DoUnarySuffixOp(s, op, arg): """Helper for ${x#prefix} and family.""" # Fast path for constant strings. if not glob_.LooksLikeGlob(arg): if op.op_id in (Id.VOp1_Pound, Id.VOp1_DPound): # const prefix if s.startswith(arg): return s[len(arg):] else: return s elif op.op_id in (Id.VOp1_Percent, Id.VOp1_DPercent): # const suffix if s.endswith(arg): # Mutate it so we preserve the flags. return s[:-len(arg)] else: return s else: # e.g. ^ ^^ , ,, raise AssertionError(op.op_id) # For patterns, do fnmatch() in a loop. # # TODO: Check another fast path first? # # v=aabbccdd # echo ${v#*b} # strip shortest prefix # # If the whole thing doesn't match '*b*', then no test can succeed. So we # can fail early. Conversely echo ${v%%c*} and '*c*'. n = len(s) if op.op_id == Id.VOp1_Pound: # shortest prefix # 'abcd': match 'a', 'ab', 'abc', ... for i in xrange(1, n + 1): #log('Matching pattern %r with %r', arg, s[:i]) if libc.fnmatch(arg, s[:i]): return s[i:] else: return s elif op.op_id == Id.VOp1_DPound: # longest prefix # 'abcd': match 'abc', 'ab', 'a' for i in xrange(n, 0, -1): #log('Matching pattern %r with %r', arg, s[:i]) if libc.fnmatch(arg, s[:i]): return s[i:] else: return s elif op.op_id == Id.VOp1_Percent: # shortest suffix # 'abcd': match 'abc', 'ab', 'a' for i in xrange(n - 1, -1, -1): #log('Matching pattern %r with %r', arg, s[:i]) if libc.fnmatch(arg, s[i:]): return s[:i] else: return s elif op.op_id == Id.VOp1_DPercent: # longest suffix # 'abcd': match 'abc', 'bc', 'c', ... for i in xrange(0, n): #log('Matching pattern %r with %r', arg, s[:i]) if libc.fnmatch(arg, s[i:]): return s[:i] else: return s
def _Dispatch(self, node, fork_external): argv0 = None # for error message check_errexit = True # for errexit if node.tag == command_e.SimpleCommand: # PROBLEM: We want to log argv in 'xtrace' mode, but we may have already # redirected here, which screws up loggnig. For example, 'echo hi # >/dev/null 2>&1'. We want to evaluate argv and log it BEFORE applying # redirects. # Another problem: # - tracing can be called concurrently from multiple processes, leading # to overlap. Maybe have a mode that creates a file per process. # xtrace-proc # - line numbers for every command would be very nice. But then you have # to print the filename too. words = braces.BraceExpandWords(node.words) argv = self.ev.EvalWordSequence(words) if argv: argv0 = argv[0] environ = self.mem.GetExported() self._EvalEnv(node.more_env, environ) if self.exec_opts.xtrace: log('+ %s', argv) #print('+ %s' % argv, file=sys.stderr) #print('+ %s' % argv, file=self.XFILE) #os.write(2, '+ %s\n' % argv) status = self._RunSimpleCommand(argv, environ, fork_external) if self.exec_opts.xtrace: #log('+ %s -> %d', argv, status) pass elif node.tag == command_e.Sentence: if node.terminator.id == Id.Op_Semi: # Don't check_errexit since this isn't a real node! check_errexit = False status = self._Execute(node.child) else: status = self._RunJobInBackground(node.child) elif node.tag == command_e.Pipeline: if node.stderr_indices: raise NotImplementedError('|&') if node.negated: self._PushErrExit() try: status2 = self._RunPipeline(node) finally: self._PopErrExit() # errexit is disabled for !. check_errexit = False status = 1 if status2 == 0 else 0 else: status = self._RunPipeline(node) elif node.tag == command_e.Subshell: # This makes sure we don't waste a process if we'd launch one anyway. p = self._MakeProcess(node.child) status = p.Run(self.waiter) elif node.tag == command_e.DBracket: result = self.bool_ev.Eval(node.expr) status = 0 if result else 1 elif node.tag == command_e.DParen: i = self.arith_ev.Eval(node.child) status = 0 if i != 0 else 1 elif node.tag == command_e.Assignment: pairs = [] if node.keyword == Id.Assign_Local: lookup_mode = scope.LocalOnly flags = () elif node.keyword == Id.Assign_Declare: # declare is like local, except it can also be used outside functions? lookup_mode = scope.LocalOnly # TODO: Respect flags. -r and -x matter, but -a and -A might be # implicit in the RHS? flags = () elif node.keyword == Id.Assign_Readonly: lookup_mode = scope.Dynamic flags = (var_flags.ReadOnly,) elif node.keyword == Id.Assign_None: # mutate existing local or global lookup_mode = scope.Dynamic flags = () else: # TODO: typeset, declare, etc. Those are dynamic though. raise NotImplementedError(node.keyword) for pair in node.pairs: if pair.rhs: # RHS can be a string or array. val = self.ev.EvalWordToAny(pair.rhs) assert isinstance(val, runtime.value), val else: # 'local x' is equivalent to local x="" val = runtime.Str('') if pair.op == assign_op.PlusEqual: old_val, lval = expr_eval.EvalLhs(pair.lhs, self.arith_ev, self.mem, self.exec_opts) sig = (old_val.tag, val.tag) if sig == (value_e.Str, value_e.Str): val = runtime.Str(old_val.s + val.s) elif sig == (value_e.Str, value_e.StrArray): e_die("Can't append array to string") elif sig == (value_e.StrArray, value_e.Str): e_die("Can't append string to array") elif sig == (value_e.StrArray, value_e.StrArray): val = runtime.StrArray(old_val.strs + val.strs) else: lval = self._EvalLhs(pair.lhs) #log('ASSIGNING %s -> %s', lval, val) self.mem.SetVar(lval, val, flags, lookup_mode) # TODO: This should be eval of RHS, unlike bash! status = 0 elif node.tag == command_e.ControlFlow: if node.arg_word: # Evaluate the argument val = self.ev.EvalWordToString(node.arg_word) assert val.tag == value_e.Str arg = int(val.s) # They all take integers else: arg = 0 # return 0, break 0 levels, etc. # NOTE: always raises so we don't set status. raise _ControlFlow(node.token, arg) # The only difference between these two is that CommandList has no # redirects. We already took care of that above. elif node.tag in (command_e.CommandList, command_e.BraceGroup): status = self._ExecuteList(node.children) elif node.tag == command_e.AndOr: #print(node.children) left, right = node.children # This is everything except the last one. self._PushErrExit() try: status = self._Execute(left) finally: self._PopErrExit() if node.op_id == Id.Op_DPipe: if status != 0: status = self._Execute(right) elif node.op_id == Id.Op_DAmp: if status == 0: status = self._Execute(right) else: raise AssertionError elif node.tag in (command_e.While, command_e.Until): # TODO: Compile this out? if node.tag == command_e.While: _DonePredicate = lambda status: status != 0 else: _DonePredicate = lambda status: status == 0 status = 0 while True: self._PushErrExit() try: cond_status = self._ExecuteList(node.cond) finally: self._PopErrExit() done = cond_status != 0 if _DonePredicate(cond_status): break try: status = self._Execute(node.body) # last one wins except _ControlFlow as e: if e.IsBreak(): status = 0 break elif e.IsContinue(): status = 0 continue else: # return needs to pop up more raise elif node.tag == command_e.ForEach: iter_name = node.iter_name if node.do_arg_iter: iter_list = self.mem.GetArgv() else: words = braces.BraceExpandWords(node.iter_words) iter_list = self.ev.EvalWordSequence(words) # We need word splitting and so forth # NOTE: This expands globs too. TODO: We should pass in a Globber() # object. status = 0 # in case we don't loop for x in iter_list: #log('> ForEach setting %r', x) state.SetLocalString(self.mem, iter_name, x) #log('<') try: status = self._Execute(node.body) # last one wins except _ControlFlow as e: if e.IsBreak(): status = 0 break elif e.IsContinue(): status = 0 continue else: # return needs to pop up more raise elif node.tag == command_e.ForExpr: raise NotImplementedError(node.tag) elif node.tag == command_e.DoGroup: status = self._ExecuteList(node.children) elif node.tag == command_e.FuncDef: # NOTE: Would it make sense to evaluate the redirects BEFORE entering? # It will save time on function calls. self.funcs[node.name] = node status = 0 elif node.tag == command_e.If: done = False for arm in node.arms: self._PushErrExit() try: status = self._ExecuteList(arm.cond) finally: self._PopErrExit() if status == 0: status = self._ExecuteList(arm.action) done = True break # TODO: The compiler should flatten this if not done and node.else_action is not None: status = self._ExecuteList(node.else_action) elif node.tag == command_e.NoOp: status = 0 # make it true elif node.tag == command_e.Case: val = self.ev.EvalWordToString(node.to_match) to_match = val.s status = 0 # If there are no arms, it should be zero? done = False for arm in node.arms: for pat_word in arm.pat_list: # NOTE: Is it OK that we're evaluating these as we go? pat_val = self.ev.EvalWordToString(pat_word, do_fnmatch=True) #log('Matching word %r against pattern %r', to_match, pat_val.s) if libc.fnmatch(pat_val.s, to_match): status = self._ExecuteList(arm.action) done = True # TODO: Parse ;;& and for fallthrough and such? if done: break elif node.tag == command_e.TimeBlock: # TODO: # - When do we need RUSAGE_CHILDREN? # - Respect TIMEFORMAT environment variable. # "If this variable is not set, Bash acts as if it had the value" # $'\nreal\t%3lR\nuser\t%3lU\nsys\t%3lS' # "A trailing newline is added when the format string is displayed." start_t = time.time() # calls gettimeofday() under the hood start_u = resource.getrusage(resource.RUSAGE_SELF) status = self._Execute(node.pipeline) end_t = time.time() end_u = resource.getrusage(resource.RUSAGE_SELF) real = end_t - start_t user = end_u.ru_utime - start_u.ru_utime sys_ = end_u.ru_stime - start_u.ru_stime print('real\t%.3f' % real, file=sys.stderr) print('user\t%.3f' % user, file=sys.stderr) print('sys\t%.3f' % sys_, file=sys.stderr) else: raise AssertionError(node.tag) return status, check_errexit
def Eval(self, node): #print('!!', node.tag) if node.tag == bool_expr_e.WordTest: s = self._EvalCompoundWord(node.w) return bool(s) if node.tag == bool_expr_e.LogicalNot: b = self.Eval(node.child) return not b if node.tag == bool_expr_e.LogicalAnd: # Short-circuit evaluation if self.Eval(node.left): return self.Eval(node.right) else: return False if node.tag == bool_expr_e.LogicalOr: if self.Eval(node.left): return True else: return self.Eval(node.right) if node.tag == bool_expr_e.BoolUnary: op_id = node.op_id s = self._EvalCompoundWord(node.child) # Now dispatch on arg type arg_type = BOOL_OPS[op_id] if arg_type == OperandType.Path: try: mode = os.stat(s).st_mode except OSError as e: # Python 3: FileNotFoundError # TODO: Signal extra debug information? #self._AddErrorContext("Error from stat(%r): %s" % (s, e)) return False if op_id == Id.BoolUnary_f: return stat.S_ISREG(mode) if arg_type == OperandType.Str: if op_id == Id.BoolUnary_z: return not bool(s) if op_id == Id.BoolUnary_n: return bool(s) raise NotImplementedError(op_id) raise NotImplementedError(arg_type) #if node.id == Id.Node_BinaryExpr: if node.tag == bool_expr_e.BoolBinary: op_id = node.op_id s1 = self._EvalCompoundWord(node.left) # Whehter to glob escape do_fnmatch = op_id in (Id.BoolBinary_Equal, Id.BoolBinary_DEqual, Id.BoolBinary_NEqual) s2 = self._EvalCompoundWord(node.right, do_fnmatch=do_fnmatch) # Now dispatch on arg type arg_type = BOOL_OPS[op_id] if arg_type == OperandType.Path: st1 = os.stat(s1) st2 = os.stat(s2) if op_id == Id.BoolBinary_nt: return True # TODO: test newer than (mtime) if arg_type == OperandType.Int: # NOTE: We assume they are constants like [[ 3 -eq 3 ]]. # Bash also allows [[ 1+2 -eq 3 ]]. i1 = self._StringToIntegerOrError(s1) i2 = self._StringToIntegerOrError(s2) if op_id == Id.BoolBinary_eq: return i1 == i2 if op_id == Id.BoolBinary_ne: return i1 != i2 raise NotImplementedError(op_id) if arg_type == OperandType.Str: # TODO: # - Compare arrays. (Although bash coerces them to string first) if op_id in (Id.BoolBinary_Equal, Id.BoolBinary_DEqual): #log('Comparing %s and %s', s2, s1) return libc.fnmatch(s2, s1) if op_id == Id.BoolBinary_NEqual: return not libc.fnmatch(s2, s1) if op_id == Id.BoolBinary_EqualTilde: # NOTE: regex matching can't fail if compilation succeeds. match = libc.regex_match(s2, s1) # TODO: BASH_REMATCH or REGEX_MATCH if match == 1: self._SetRegexMatches('TODO') is_match = True elif match == 0: is_match = False elif match == -1: raise AssertionError( "Invalid regex %r: should have been caught at compile time" % s2) else: raise AssertionError return is_match if op_id == Id.Redir_Less: # pun return s1 < s2 if op_id == Id.Redir_Great: # pun return s1 > s2 raise NotImplementedError(op_id) # We could have govered all node IDs raise AssertionError(IdName(node.id))
def Eval(self, node): #print('!!', node.tag) if node.tag == bool_expr_e.WordTest: s = self._EvalCompoundWord(node.w) return bool(s) if node.tag == bool_expr_e.LogicalNot: b = self.Eval(node.child) return not b if node.tag == bool_expr_e.LogicalAnd: # Short-circuit evaluation if self.Eval(node.left): return self.Eval(node.right) else: return False if node.tag == bool_expr_e.LogicalOr: if self.Eval(node.left): return True else: return self.Eval(node.right) if node.tag == bool_expr_e.BoolUnary: op_id = node.op_id s = self._EvalCompoundWord(node.child) # Now dispatch on arg type arg_type = BOOL_ARG_TYPES[op_id] # could be static in the LST? if arg_type == bool_arg_type_e.Path: # Only use lstat if we're testing for a symlink. if op_id in (Id.BoolUnary_h, Id.BoolUnary_L): try: mode = os.lstat(s).st_mode except OSError: return False return stat.S_ISLNK(mode) try: mode = os.stat(s).st_mode except OSError: # TODO: Signal extra debug information? #self._AddErrorContext("Error from stat(%r): %s" % (s, e)) return False if op_id in (Id.BoolUnary_e, Id.BoolUnary_a): # -a is alias for -e return True if op_id == Id.BoolUnary_f: return stat.S_ISREG(mode) if op_id == Id.BoolUnary_d: return stat.S_ISDIR(mode) if op_id == Id.BoolUnary_x: return os.access(s, os.X_OK) if op_id == Id.BoolUnary_r: return os.access(s, os.R_OK) if op_id == Id.BoolUnary_w: return os.access(s, os.W_OK) raise NotImplementedError(op_id) if arg_type == bool_arg_type_e.Str: if op_id == Id.BoolUnary_z: return not bool(s) if op_id == Id.BoolUnary_n: return bool(s) raise NotImplementedError(op_id) if arg_type == bool_arg_type_e.Other: if op_id == Id.BoolUnary_t: try: fd = int(s) except ValueError: # TODO: Need location information of [ e_die('Invalid file descriptor %r', s) return os.isatty(fd) raise NotImplementedError(op_id) raise NotImplementedError(arg_type) if node.tag == bool_expr_e.BoolBinary: op_id = node.op_id s1 = self._EvalCompoundWord(node.left) # Whehter to glob escape do_fnmatch = op_id in (Id.BoolBinary_GlobEqual, Id.BoolBinary_GlobDEqual, Id.BoolBinary_GlobNEqual) s2 = self._EvalCompoundWord(node.right, do_fnmatch=do_fnmatch) # Now dispatch on arg type arg_type = BOOL_ARG_TYPES[op_id] if arg_type == bool_arg_type_e.Path: st1 = os.stat(s1) st2 = os.stat(s2) # TODO: test newer than (mtime) if op_id == Id.BoolBinary_nt: return st1[stat.ST_MTIME] > st2[stat.ST_MTIME] if op_id == Id.BoolBinary_ot: return st1[stat.ST_MTIME] < st2[stat.ST_MTIME] raise NotImplementedError(op_id) if arg_type == bool_arg_type_e.Int: # NOTE: We assume they are constants like [[ 3 -eq 3 ]]. # Bash also allows [[ 1+2 -eq 3 ]]. i1 = self._StringToIntegerOrError(s1) i2 = self._StringToIntegerOrError(s2) if op_id == Id.BoolBinary_eq: return i1 == i2 if op_id == Id.BoolBinary_ne: return i1 != i2 if op_id == Id.BoolBinary_gt: return i1 > i2 if op_id == Id.BoolBinary_ge: return i1 >= i2 if op_id == Id.BoolBinary_lt: return i1 < i2 if op_id == Id.BoolBinary_le: return i1 <= i2 raise NotImplementedError(op_id) if arg_type == bool_arg_type_e.Str: # TODO: # - Compare arrays. (Although bash coerces them to string first) if op_id in (Id.BoolBinary_GlobEqual, Id.BoolBinary_GlobDEqual): #log('Comparing %s and %s', s2, s1) return libc.fnmatch(s2, s1) if op_id == Id.BoolBinary_GlobNEqual: return not libc.fnmatch(s2, s1) if op_id in (Id.BoolBinary_Equal, Id.BoolBinary_DEqual): return s1 == s2 if op_id == Id.BoolBinary_NEqual: return s1 != s2 if op_id == Id.BoolBinary_EqualTilde: # NOTE: regex matching can't fail if compilation succeeds. match = libc.regex_match(s2, s1) # TODO: BASH_REMATCH or REGEX_MATCH if match == 1: self._SetRegexMatches('TODO') is_match = True elif match == 0: is_match = False elif match == -1: raise AssertionError( "Invalid regex %r: should have been caught at compile time" % s2) else: raise AssertionError return is_match if op_id == Id.Redir_Less: # pun return s1 < s2 if op_id == Id.Redir_Great: # pun return s1 > s2 raise NotImplementedError(op_id) raise AssertionError(node.tag)
def __call__(self, match): return libc.fnmatch(self.glob_pat, match)
def Eval(self, node): #print('!!', node.tag) if node.tag == bool_expr_e.WordTest: s = self._EvalCompoundWord(node.w) return bool(s) if node.tag == bool_expr_e.LogicalNot: b = self.Eval(node.child) return not b if node.tag == bool_expr_e.LogicalAnd: # Short-circuit evaluation if self.Eval(node.left): return self.Eval(node.right) else: return False if node.tag == bool_expr_e.LogicalOr: if self.Eval(node.left): return True else: return self.Eval(node.right) if node.tag == bool_expr_e.BoolUnary: op_id = node.op_id s = self._EvalCompoundWord(node.child) # Now dispatch on arg type arg_type = BOOL_ARG_TYPES[op_id] # could be static in the LST? if arg_type == bool_arg_type_e.Path: # Only use lstat if we're testing for a symlink. if op_id in (Id.BoolUnary_h, Id.BoolUnary_L): try: mode = posix.lstat(s).st_mode except OSError: return False return stat.S_ISLNK(mode) try: mode = posix.stat(s).st_mode except OSError: # TODO: Signal extra debug information? #log("Error from stat(%r): %s" % (s, e)) return False if op_id in (Id.BoolUnary_e, Id.BoolUnary_a): # -a is alias for -e return True if op_id == Id.BoolUnary_f: return stat.S_ISREG(mode) if op_id == Id.BoolUnary_d: return stat.S_ISDIR(mode) if op_id == Id.BoolUnary_x: return posix.access(s, posix.X_OK) if op_id == Id.BoolUnary_r: return posix.access(s, posix.R_OK) if op_id == Id.BoolUnary_w: return posix.access(s, posix.W_OK) raise NotImplementedError(op_id) if arg_type == bool_arg_type_e.Str: if op_id == Id.BoolUnary_z: return not bool(s) if op_id == Id.BoolUnary_n: return bool(s) raise NotImplementedError(op_id) if arg_type == bool_arg_type_e.Other: if op_id == Id.BoolUnary_t: try: fd = int(s) except ValueError: # TODO: Need location information of [ e_die('Invalid file descriptor %r', s) return posix.isatty(fd) raise NotImplementedError(op_id) raise NotImplementedError(arg_type) if node.tag == bool_expr_e.BoolBinary: op_id = node.op_id s1 = self._EvalCompoundWord(node.left) # Whether to glob escape do_fnmatch = op_id in (Id.BoolBinary_GlobEqual, Id.BoolBinary_GlobDEqual, Id.BoolBinary_GlobNEqual) do_ere = (op_id == Id.BoolBinary_EqualTilde) s2 = self._EvalCompoundWord(node.right, do_fnmatch=do_fnmatch, do_ere=do_ere) # Now dispatch on arg type arg_type = BOOL_ARG_TYPES[op_id] if arg_type == bool_arg_type_e.Path: st1 = posix.stat(s1) st2 = posix.stat(s2) # TODO: test newer than (mtime) if op_id == Id.BoolBinary_nt: return st1[stat.ST_MTIME] > st2[stat.ST_MTIME] if op_id == Id.BoolBinary_ot: return st1[stat.ST_MTIME] < st2[stat.ST_MTIME] raise NotImplementedError(op_id) if arg_type == bool_arg_type_e.Int: # NOTE: We assume they are constants like [[ 3 -eq 3 ]]. # Bash also allows [[ 1+2 -eq 3 ]]. i1 = self._StringToIntegerOrError(s1, blame_word=node.left) i2 = self._StringToIntegerOrError(s2, blame_word=node.right) if op_id == Id.BoolBinary_eq: return i1 == i2 if op_id == Id.BoolBinary_ne: return i1 != i2 if op_id == Id.BoolBinary_gt: return i1 > i2 if op_id == Id.BoolBinary_ge: return i1 >= i2 if op_id == Id.BoolBinary_lt: return i1 < i2 if op_id == Id.BoolBinary_le: return i1 <= i2 raise NotImplementedError(op_id) if arg_type == bool_arg_type_e.Str: if op_id in (Id.BoolBinary_GlobEqual, Id.BoolBinary_GlobDEqual): #log('Matching %s against pattern %s', s1, s2) # TODO: Respect extended glob? * and ! and ? are quoted improperly. # But @ and + are OK. return libc.fnmatch(s2, s1) if op_id == Id.BoolBinary_GlobNEqual: return not libc.fnmatch(s2, s1) if op_id in (Id.BoolBinary_Equal, Id.BoolBinary_DEqual): return s1 == s2 if op_id == Id.BoolBinary_NEqual: return s1 != s2 if op_id == Id.BoolBinary_EqualTilde: #log('Matching %r against regex %r', s1, s2) try: matches = libc.regex_match(s2, s1) except RuntimeError: # 2 means a parse error. Note this is a fatal error in OSH but not # in bash. e_die("Invalid regex %r", s2, word=node.right, status=2) if matches is None: return False self._SetRegexMatches(matches) return True if op_id == Id.Redir_Less: # pun return s1 < s2 if op_id == Id.Redir_Great: # pun return s1 > s2 raise NotImplementedError(op_id) raise AssertionError(node.tag)
def _Execute(self, node): """ Args: node: of type AstNode """ redirects = self._EvalRedirects(node) # TODO: Only eval argv[0] once. It can have side effects! if node.tag == command_e.SimpleCommand: words = braces.BraceExpandWords(node.words) argv = self.ev.EvalWordSequence(words) more_env = self.mem.GetExported() self._EvalEnv(node.more_env, more_env) thunk = self._GetThunkForSimpleCommand(argv, more_env) # Don't waste a process if we'd launch one anyway. if thunk.IsExternal(): p = process.Process(thunk, fd_state=self.fd_state, redirects=redirects) status = p.Run() else: # Internal #log('ARGV %s', argv) # NOTE: _EvalRedirects turns LST nodes into core/process.py nodes. And # then we use polymorphism here. Does it make sense to use functional # style based on the RedirType? Might be easier to read. self.fd_state.PushFrame() for r in redirects: r.ApplyInParent(self.fd_state) status = thunk.RunInParent() restore_fd_state = thunk.ShouldRestoreFdState() # Special case for exec 1>&2 (with no args): we permanently change the # fd state. BUT we don't want to restore later. # TODO: Instead of this, maybe r.ApplyPermaent(self.fd_state)? if restore_fd_state: self.fd_state.PopAndRestore() else: self.fd_state.PopAndForget() elif node.tag == command_e.Sentence: # TODO: Compile this away. status = self._Execute(node.command) elif node.tag == command_e.Pipeline: status = self._RunPipeline(node) elif node.tag == command_e.Subshell: # This makes sure we don't waste a process if we'd launch one anyway. p = self._GetProcessForNode(node.children[0]) status = p.Run() elif node.tag == command_e.DBracket: bool_ev = expr_eval.BoolEvaluator(self.mem, self.ev) ok = bool_ev.Eval(node.expr) if ok: status = 0 if bool_ev.Result() else 1 else: e_die('Error evaluating boolean: %s' % bool_ev.Error()) elif node.tag == command_e.DParen: arith_ev = expr_eval.ArithEvaluator(self.mem, self.ev) ok = arith_ev.Eval(node.child) if ok: i = arith_ev.Result() # Negate the value: non-zero in arithmetic is true, which is zero in # shell land status = 0 if i != 0 else 1 else: e_die('Error evaluating (( )): %s' % arith_ev.Error()) elif node.tag == command_e.Assignment: pairs = [] for pair in node.pairs: if pair.rhs: # RHS can be a string or array. val = self.ev.EvalWordToAny(pair.rhs) assert isinstance(val, runtime.value), val else: # 'local x' is equivalent to local x="" val = runtime.Str('') pairs.append((pair.lhs, val)) if node.keyword == Id.Assign_Local: self.mem.SetLocals(pairs) else: # NOTE: could be readonly/export/etc. self.mem.SetLocalsOrGlobals(pairs) # TODO: This should be eval of RHS, unlike bash! status = 0 elif node.tag == command_e.ControlFlow: if node.arg_word: # Evaluate the argument _, val = self.ev.EvalWordToString(node.arg_word) assert val.tag == value_e.Str arg = int(val.s) # They all take integers else: arg = 0 # return 0, break 0 levels, etc. raise _ControlFlow(node.token, arg) # The only difference between these two is that CommandList has no # redirects. We already took care of that above. elif node.tag in (command_e.CommandList, command_e.BraceGroup): self.fd_state.PushFrame() for r in redirects: r.ApplyInParent(self.fd_state) status = 0 # for empty list for child in node.children: status = self._Execute(child) # last status wins self.fd_state.PopAndRestore() elif node.tag == command_e.AndOr: #print(node.children) left, right = node.children status = self._Execute(left) if node.op_id == Id.Op_DPipe: if status != 0: status = self._Execute(right) elif node.op_id == Id.Op_DAmp: if status == 0: status = self._Execute(right) else: raise AssertionError elif node.tag in (command_e.While, command_e.Until): # TODO: Compile this out? if node.tag == command_e.While: _DonePredicate = lambda status: status != 0 else: _DonePredicate = lambda status: status == 0 while True: status = self._Execute(node.cond) done = status != 0 if _DonePredicate(status): break try: status = self._Execute(node.body) # last one wins except _ControlFlow as e: if e.IsBreak(): status = 0 break elif e.IsContinue(): status = 0 continue else: # return needs to pop up more raise elif node.tag == command_e.ForEach: iter_name = node.iter_name if node.do_arg_iter: iter_list = self.mem.GetArgv() else: words = braces.BraceExpandWords(node.iter_words) iter_list = self.ev.EvalWordSequence(words) # We need word splitting and so forth # NOTE: This expands globs too. TODO: We should pass in a Globber() # object. status = 0 # in case we don't loop for x in iter_list: #log('> ForEach setting %r', x) self.mem.SetLocal(iter_name, runtime.Str(x)) #log('<') try: status = self._Execute(node.body) # last one wins except _ControlFlow as e: if e.IsBreak(): status = 0 break elif e.IsContinue(): status = 0 continue else: # return needs to pop up more raise elif node.tag == command_e.ForExpr: raise NotImplementedError(node.tag) elif node.tag == command_e.DoGroup: # Delegate to command list # TODO: This should be compiled out! status = self._Execute(node.child) elif node.tag == command_e.FuncDef: self.funcs[node.name] = node status = 0 elif node.tag == command_e.If: done = False for arm in node.arms: status = self._Execute(arm.cond) if status == 0: status = self._Execute(arm.action) done = True break # TODO: The compiler should flatten this if not done and node.else_action is not None: status = self._Execute(node.else_action) elif node.tag == command_e.NoOp: status = 0 # make it true elif node.tag == command_e.Case: ok, val = self.ev.EvalWordToString(node.to_match) assert ok to_match = val.s status = 0 # If there are no arms, it should be zero? done = False for arm in node.arms: for pat_word in arm.pat_list: # NOTE: Is it OK that we're evaluating these as we go? ok, pat_val = self.ev.EvalWordToString(pat_word, do_fnmatch=True) assert ok #log('Matching word %r against pattern %r', to_match, pat_val.s) if libc.fnmatch(pat_val.s, to_match): status = self._Execute(arm.action) done = True # TODO: Parse ;;& and for fallthrough and such? if done: break elif node.tag == command_e.TimeBlock: # TODO: # - When do we need RUSAGE_CHILDREN? # - Respect TIMEFORMAT environment variable. # "If this variable is not set, Bash acts as if it had the value" # $'\nreal\t%3lR\nuser\t%3lU\nsys\t%3lS' # "A trailing newline is added when the format string is displayed." start_t = time.time() # calls gettimeofday() under the hood start_u = resource.getrusage(resource.RUSAGE_SELF) status = self._Execute(node.pipeline) end_t = time.time() end_u = resource.getrusage(resource.RUSAGE_SELF) real = end_t - start_t user = end_u.ru_utime - start_u.ru_utime sys_ = end_u.ru_stime - start_u.ru_stime print('real\t%.3f' % real, file=sys.stderr) print('user\t%.3f' % user, file=sys.stderr) print('sys\t%.3f' % sys_, file=sys.stderr) else: raise AssertionError(node.tag) if self.exec_opts.errexit and status != 0: if node.tag == command_e.SimpleCommand: # TODO: Add context e_die('%r command exited with status %d (%s)', argv[0], status, node.words[0]) else: e_die('%r command exited with status %d', node.__class__.__name__, status) # TODO: Is this the right place to put it? Does it need a stack for # function calls? self.mem.last_status = status return status
def Eval(self, node): #print('!!', node.tag) if node.tag == bool_expr_e.WordTest: s = self._EvalCompoundWord(node.w) return bool(s) if node.tag == bool_expr_e.LogicalNot: b = self.Eval(node.child) return not b if node.tag == bool_expr_e.LogicalAnd: # Short-circuit evaluation if self.Eval(node.left): return self.Eval(node.right) else: return False if node.tag == bool_expr_e.LogicalOr: if self.Eval(node.left): return True else: return self.Eval(node.right) if node.tag == bool_expr_e.Unary: op_id = node.op_id s = self._EvalCompoundWord(node.child) # Now dispatch on arg type arg_type = BOOL_ARG_TYPES[op_id.enum_id] # could be static in the LST? if arg_type == bool_arg_type_e.Path: # Only use lstat if we're testing for a symlink. if op_id in (Id.BoolUnary_h, Id.BoolUnary_L): try: mode = posix.lstat(s).st_mode except OSError: # TODO: simple_test_builtin should this as status=2. #e_die("lstat() error: %s", e, word=node.child) return False return stat.S_ISLNK(mode) try: st = posix.stat(s) except OSError as e: # TODO: simple_test_builtin should this as status=2. # Problem: we really need errno, because test -f / is bad argument, # while test -f /nonexistent is a good argument but failed. Gah. # ENOENT vs. ENAMETOOLONG. #e_die("stat() error: %s", e, word=node.child) return False mode = st.st_mode if op_id in (Id.BoolUnary_e, Id.BoolUnary_a): # -a is alias for -e return True if op_id == Id.BoolUnary_f: return stat.S_ISREG(mode) if op_id == Id.BoolUnary_d: return stat.S_ISDIR(mode) if op_id == Id.BoolUnary_b: return stat.S_ISBLK(mode) if op_id == Id.BoolUnary_c: return stat.S_ISCHR(mode) if op_id == Id.BoolUnary_p: return stat.S_ISFIFO(mode) if op_id == Id.BoolUnary_S: return stat.S_ISSOCK(mode) if op_id == Id.BoolUnary_x: return posix.access(s, posix.X_OK) if op_id == Id.BoolUnary_r: return posix.access(s, posix.R_OK) if op_id == Id.BoolUnary_w: return posix.access(s, posix.W_OK) if op_id == Id.BoolUnary_s: return st.st_size != 0 if op_id == Id.BoolUnary_O: return st.st_uid == posix.geteuid() if op_id == Id.BoolUnary_G: return st.st_gid == posix.getegid() e_die("%s isn't implemented", op_id) # implicit location if arg_type == bool_arg_type_e.Str: if op_id == Id.BoolUnary_z: return not bool(s) if op_id == Id.BoolUnary_n: return bool(s) raise AssertionError(op_id) # should never happen if arg_type == bool_arg_type_e.Other: if op_id == Id.BoolUnary_t: try: fd = int(s) except ValueError: # TODO: Need location information of [ e_die('Invalid file descriptor %r', s, word=node.child) try: return posix.isatty(fd) # fd is user input, and causes this exception in the binding. except OverflowError: e_die('File descriptor %r is too big', s, word=node.child) # See whether 'set -o' options have been set if op_id == Id.BoolUnary_o: b = getattr(self.exec_opts, s, None) return False if b is None else b e_die("%s isn't implemented", op_id) # implicit location raise AssertionError(arg_type) # should never happen if node.tag == bool_expr_e.Binary: op_id = node.op_id s1 = self._EvalCompoundWord(node.left) # Whether to glob escape do_fnmatch = op_id in (Id.BoolBinary_GlobEqual, Id.BoolBinary_GlobDEqual, Id.BoolBinary_GlobNEqual) do_ere = (op_id == Id.BoolBinary_EqualTilde) s2 = self._EvalCompoundWord(node.right, do_fnmatch=do_fnmatch, do_ere=do_ere) # Now dispatch on arg type arg_type = BOOL_ARG_TYPES[op_id.enum_id] if arg_type == bool_arg_type_e.Path: try: st1 = posix.stat(s1) except OSError: st1 = None try: st2 = posix.stat(s2) except OSError: st2 = None if op_id in (Id.BoolBinary_nt, Id.BoolBinary_ot): # pretend it's a very old file m1 = 0 if st1 is None else st1.st_mtime m2 = 0 if st2 is None else st2.st_mtime if op_id == Id.BoolBinary_nt: return m1 > m2 else: return m1 < m2 if op_id == Id.BoolBinary_ef: if st1 is None: return False if st2 is None: return False return st1.st_dev == st2.st_dev and st1.st_ino == st2.st_ino raise AssertionError(op_id) if arg_type == bool_arg_type_e.Int: # NOTE: We assume they are constants like [[ 3 -eq 3 ]]. # Bash also allows [[ 1+2 -eq 3 ]]. i1 = self._StringToIntegerOrError(s1, blame_word=node.left) i2 = self._StringToIntegerOrError(s2, blame_word=node.right) if op_id == Id.BoolBinary_eq: return i1 == i2 if op_id == Id.BoolBinary_ne: return i1 != i2 if op_id == Id.BoolBinary_gt: return i1 > i2 if op_id == Id.BoolBinary_ge: return i1 >= i2 if op_id == Id.BoolBinary_lt: return i1 < i2 if op_id == Id.BoolBinary_le: return i1 <= i2 raise AssertionError(op_id) # should never happen if arg_type == bool_arg_type_e.Str: if op_id in (Id.BoolBinary_GlobEqual, Id.BoolBinary_GlobDEqual): #log('Matching %s against pattern %s', s1, s2) return libc.fnmatch(s2, s1) if op_id == Id.BoolBinary_GlobNEqual: return not libc.fnmatch(s2, s1) if op_id in (Id.BoolBinary_Equal, Id.BoolBinary_DEqual): return s1 == s2 if op_id == Id.BoolBinary_NEqual: return s1 != s2 if op_id == Id.BoolBinary_EqualTilde: # TODO: This should go to --debug-file #log('Matching %r against regex %r', s1, s2) try: matches = libc.regex_match(s2, s1) except RuntimeError: # Status 2 indicates a regex parse error. This is fatal in OSH but # not in bash, which treats [[ like a command with an exit code. e_die("Invalid regex %r", s2, word=node.right, status=2) if matches is None: return False self._SetRegexMatches(matches) return True if op_id == Id.Op_Less: return s1 < s2 if op_id == Id.Op_Great: return s1 > s2 raise AssertionError(op_id) # should never happen raise AssertionError(node.tag)
def DoUnarySuffixOp(s, op, arg, extglob): # type: (str, suffix_op__Unary, str, bool) -> str """Helper for ${x#prefix} and family.""" # Fast path for constant strings. if not glob_.LooksLikeGlob(arg): # It doesn't look like a glob, but we glob-escaped it (e.g. [ -> \[). So # reverse it. NOTE: We also do this check in Globber.Expand(). It would # be nice to somehow store the original string rather tahn # escaping/unescaping. arg = glob_.GlobUnescape(arg) if op.op_id in (Id.VOp1_Pound, Id.VOp1_DPound): # const prefix # explicit check for non-empty arg (len for mycpp) if len(arg) and s.startswith(arg): return s[len(arg):] else: return s elif op.op_id in (Id.VOp1_Percent, Id.VOp1_DPercent): # const suffix # need explicit check for non-empty arg (len for mycpp) if len(arg) and s.endswith(arg): return s[:-len(arg)] else: return s # These operators take glob arguments, we don't implement that obscure case. elif op.op_id == Id.VOp1_Comma: # Only lowercase the first letter if arg != '': # TODO: location info for op e_die("%s can't have an argument", ui.PrettyId(op.op_id)) if len(s): return s[0].lower() + s[1:] else: return s elif op.op_id == Id.VOp1_DComma: if arg != '': e_die("%s can't have an argument", ui.PrettyId(op.op_id)) return s.lower() elif op.op_id == Id.VOp1_Caret: # Only uppercase the first letter if arg != '': e_die("%s can't have an argument", ui.PrettyId(op.op_id)) if len(s): return s[0].upper() + s[1:] else: return s elif op.op_id == Id.VOp1_DCaret: if arg != '': e_die("%s can't have an argument", ui.PrettyId(op.op_id)) return s.upper() else: # e.g. ^ ^^ , ,, raise AssertionError(op.op_id) # For patterns, do fnmatch() in a loop. # # TODO: # - Another potential fast path: # v=aabbccdd # echo ${v#*b} # strip shortest prefix # # If the whole thing doesn't match '*b*', then no test can succeed. So we # can fail early. Conversely echo ${v%%c*} and '*c*'. # # (Although honestly this whole construct is nuts and should be deprecated.) n = len(s) if op.op_id == Id.VOp1_Pound: # shortest prefix # 'abcd': match '', 'a', 'ab', 'abc', ... i = 0 while True: assert i <= n #log('Matching pattern %r with %r', arg, s[:i]) if libc.fnmatch(arg, s[:i], extglob): return s[i:] if i >= n: break i = _NextUtf8Char(s, i) return s elif op.op_id == Id.VOp1_DPound: # longest prefix # 'abcd': match 'abc', 'ab', 'a' i = n while True: assert i >= 0 #log('Matching pattern %r with %r', arg, s[:i]) if libc.fnmatch(arg, s[:i], extglob): return s[i:] if i == 0: break i = PreviousUtf8Char(s, i) return s elif op.op_id == Id.VOp1_Percent: # shortest suffix # 'abcd': match 'abcd', 'abc', 'ab', 'a' i = n while True: assert i >= 0 #log('Matching pattern %r with %r', arg, s[:i]) if libc.fnmatch(arg, s[i:], extglob): return s[:i] if i == 0: break i = PreviousUtf8Char(s, i) return s elif op.op_id == Id.VOp1_DPercent: # longest suffix # 'abcd': match 'abc', 'bc', 'c', ... i = 0 while True: assert i <= n #log('Matching pattern %r with %r', arg, s[:i]) if libc.fnmatch(arg, s[i:], extglob): return s[:i] if i >= n: break i = _NextUtf8Char(s, i) return s else: raise NotImplementedError(ui.PrettyId(op.op_id))
def Eval(self, node): # type: (bool_expr_t) -> bool UP_node = node with tagswitch(node) as case: if case(bool_expr_e.WordTest): node = cast(bool_expr__WordTest, UP_node) s = self._EvalCompoundWord(node.w) return bool(s) elif case(bool_expr_e.LogicalNot): node = cast(bool_expr__LogicalNot, UP_node) b = self.Eval(node.child) return not b elif case(bool_expr_e.LogicalAnd): node = cast(bool_expr__LogicalAnd, UP_node) # Short-circuit evaluation if self.Eval(node.left): return self.Eval(node.right) else: return False elif case(bool_expr_e.LogicalOr): node = cast(bool_expr__LogicalOr, UP_node) if self.Eval(node.left): return True else: return self.Eval(node.right) elif case(bool_expr_e.Unary): node = cast(bool_expr__Unary, UP_node) op_id = node.op_id s = self._EvalCompoundWord(node.child) # Now dispatch on arg type arg_type = consts.BoolArgType( op_id) # could be static in the LST? if arg_type == bool_arg_type_e.Path: return bool_stat.DoUnaryOp(op_id, s) if arg_type == bool_arg_type_e.Str: if op_id == Id.BoolUnary_z: return not bool(s) if op_id == Id.BoolUnary_n: return bool(s) raise AssertionError(op_id) # should never happen if arg_type == bool_arg_type_e.Other: if op_id == Id.BoolUnary_t: try: fd = int(s) except ValueError: # TODO: Need location information of [ e_die('Invalid file descriptor %r', s, word=node.child) return bool_stat.isatty(fd, s, node.child) # See whether 'set -o' options have been set if op_id == Id.BoolUnary_o: index = match.MatchOption(s) if index == 0: return False else: return self.exec_opts.opt_array[index] e_die("%s isn't implemented", ui.PrettyId(op_id)) # implicit location raise AssertionError(arg_type) # should never happen elif case(bool_expr_e.Binary): node = cast(bool_expr__Binary, UP_node) op_id = node.op_id # Whether to glob escape with switch(op_id) as case2: if case2(Id.BoolBinary_GlobEqual, Id.BoolBinary_GlobDEqual, Id.BoolBinary_GlobNEqual): quote_kind = quote_e.FnMatch elif case2(Id.BoolBinary_EqualTilde): quote_kind = quote_e.ERE else: quote_kind = quote_e.Default s1 = self._EvalCompoundWord(node.left) s2 = self._EvalCompoundWord(node.right, quote_kind=quote_kind) # Now dispatch on arg type arg_type = consts.BoolArgType(op_id) if arg_type == bool_arg_type_e.Path: return bool_stat.DoBinaryOp(op_id, s1, s2) if arg_type == bool_arg_type_e.Int: # NOTE: We assume they are constants like [[ 3 -eq 3 ]]. # Bash also allows [[ 1+2 -eq 3 ]]. i1 = self._StringToIntegerOrError(s1, blame_word=node.left) i2 = self._StringToIntegerOrError(s2, blame_word=node.right) if op_id == Id.BoolBinary_eq: return i1 == i2 if op_id == Id.BoolBinary_ne: return i1 != i2 if op_id == Id.BoolBinary_gt: return i1 > i2 if op_id == Id.BoolBinary_ge: return i1 >= i2 if op_id == Id.BoolBinary_lt: return i1 < i2 if op_id == Id.BoolBinary_le: return i1 <= i2 raise AssertionError(op_id) # should never happen if arg_type == bool_arg_type_e.Str: if op_id in (Id.BoolBinary_GlobEqual, Id.BoolBinary_GlobDEqual): #log('Matching %s against pattern %s', s1, s2) return libc.fnmatch(s2, s1) if op_id == Id.BoolBinary_GlobNEqual: return not libc.fnmatch(s2, s1) if op_id in (Id.BoolBinary_Equal, Id.BoolBinary_DEqual): return s1 == s2 if op_id == Id.BoolBinary_NEqual: return s1 != s2 if op_id == Id.BoolBinary_EqualTilde: # TODO: This should go to --debug-file #log('Matching %r against regex %r', s1, s2) try: matches = libc.regex_match(s2, s1) except RuntimeError: # Status 2 indicates a regex parse error. This is fatal in OSH but # not in bash, which treats [[ like a command with an exit code. e_die("Invalid regex %r", s2, word=node.right, status=2) if matches is None: return False self._SetRegexMatches(matches) return True if op_id == Id.Op_Less: return s1 < s2 if op_id == Id.Op_Great: return s1 > s2 raise AssertionError(op_id) # should never happen raise AssertionError(node.tag_())
def DoUnarySuffixOp(s, op, arg): """Helper for ${x#prefix} and family.""" # Fast path for constant strings. if not glob_.LooksLikeGlob(arg): if op.op_id in (Id.VOp1_Pound, Id.VOp1_DPound): # const prefix if s.startswith(arg): return s[len(arg):] else: return s elif op.op_id in (Id.VOp1_Percent, Id.VOp1_DPercent): # const suffix if s.endswith(arg): return s[:-len(arg)] else: return s elif op.op_id == Id.VOp1_Comma: # Only lowercase the first letter if arg != '': raise NotImplementedError("%s can't have an argument" % op.op_id) return s[0].lower() + s[1:] elif op.op_id == Id.VOp1_DComma: if arg != '': raise NotImplementedError("%s can't have an argument" % op.op_id) return s.lower() elif op.op_id == Id.VOp1_Caret: # Only uppercase the first letter if arg != '': raise NotImplementedError("%s can't have an argument" % op.op_id) return s[0].upper() + s[1:] elif op.op_id == Id.VOp1_DCaret: if arg != '': raise NotImplementedError("%s can't have an argument" % op.op_id) return s.upper() else: # e.g. ^ ^^ , ,, raise AssertionError(op.op_id) # For patterns, do fnmatch() in a loop. # # TODO: The loop needs to iterate over code points, not bytes! # - The forward case can probably be handled in a similar manner. # - The backward case might be handled by pre-calculating an array of start # positions with _NextUtf8Char. # # TODO: Another potential fast path: # # v=aabbccdd # echo ${v#*b} # strip shortest prefix # # If the whole thing doesn't match '*b*', then no test can succeed. So we # can fail early. Conversely echo ${v%%c*} and '*c*'. # # (Although honestly this whole construct is nuts and should be deprecated.) n = len(s) if op.op_id == Id.VOp1_Pound: # shortest prefix # 'abcd': match 'a', 'ab', 'abc', ... for i in xrange(1, n+1): #log('Matching pattern %r with %r', arg, s[:i]) if libc.fnmatch(arg, s[:i]): return s[i:] else: return s elif op.op_id == Id.VOp1_DPound: # longest prefix # 'abcd': match 'abc', 'ab', 'a' for i in xrange(n, 0, -1): #log('Matching pattern %r with %r', arg, s[:i]) if libc.fnmatch(arg, s[:i]): return s[i:] else: return s elif op.op_id == Id.VOp1_Percent: # shortest suffix # 'abcd': match 'abc', 'ab', 'a' for i in xrange(n-1, -1, -1): #log('Matching pattern %r with %r', arg, s[:i]) if libc.fnmatch(arg, s[i:]): return s[:i] else: return s elif op.op_id == Id.VOp1_DPercent: # longest suffix # 'abcd': match 'abc', 'bc', 'c', ... for i in xrange(0, n): #log('Matching pattern %r with %r', arg, s[:i]) if libc.fnmatch(arg, s[i:]): return s[:i] else: return s else: raise NotImplementedError("Can't use %s with pattern" % op.op_id)
def _Dispatch(self, node, fork_external): # If we call RunCommandSub in a recursive call to the executor, this will # be set true (if strict-errexit is false). But it only lasts for one # command. self.check_command_sub_status = False #argv0 = None # for error message check_errexit = False # for errexit if node.tag == command_e.SimpleCommand: check_errexit = True # Find span_id for a basic implementation of $LINENO, e.g. # PS4='+$SOURCE_NAME:$LINENO:' # NOTE: osh2oil uses node.more_env, but we don't need that. span_id = const.NO_INTEGER if node.words: first_word = node.words[0] span_id = word.LeftMostSpanForWord(first_word) self.mem.SetCurrentSpanId(span_id) # PROBLEM: We want to log argv in 'xtrace' mode, but we may have already # redirected here, which screws up logging. For example, 'echo hi # >/dev/null 2>&1'. We want to evaluate argv and log it BEFORE applying # redirects. # Another problem: # - tracing can be called concurrently from multiple processes, leading # to overlap. Maybe have a mode that creates a file per process. # xtrace-proc # - line numbers for every command would be very nice. But then you have # to print the filename too. words = braces.BraceExpandWords(node.words) argv = self.word_ev.EvalWordSequence(words) # This comes before evaluating env, in case there are problems evaluating # it. We could trace the env separately? Also trace unevaluated code # with set-o verbose? self.tracer.OnSimpleCommand(argv) if node.more_env: self.mem.PushTemp() try: for env_pair in node.more_env: val = self.word_ev.EvalWordToString(env_pair.val) # Set each var so the next one can reference it. Example: # FOO=1 BAR=$FOO ls / self.mem.SetVar(ast.LhsName(env_pair.name), val, (var_flags_e.Exported,), scope_e.TempEnv) # NOTE: This might never return! In the case of fork_external=False. status = self._RunSimpleCommand(argv, fork_external, span_id) finally: if node.more_env: self.mem.PopTemp() elif node.tag == command_e.Sentence: # Don't check_errexit since this isn't a real node! if node.terminator.id == Id.Op_Semi: status = self._Execute(node.child) else: status = self._RunJobInBackground(node.child) elif node.tag == command_e.Pipeline: check_errexit = True if node.stderr_indices: raise NotImplementedError('|&') if node.negated: self._PushErrExit() try: status2 = self._RunPipeline(node) finally: self._PopErrExit() # errexit is disabled for !. check_errexit = False status = 1 if status2 == 0 else 0 else: status = self._RunPipeline(node) elif node.tag == command_e.Subshell: check_errexit = True # This makes sure we don't waste a process if we'd launch one anyway. p = self._MakeProcess(node.child) status = p.Run(self.waiter) elif node.tag == command_e.DBracket: check_errexit = True result = self.bool_ev.Eval(node.expr) status = 0 if result else 1 elif node.tag == command_e.DParen: check_errexit = True i = self.arith_ev.Eval(node.child) status = 0 if i != 0 else 1 elif node.tag == command_e.Assignment: flags = word_compile.ParseAssignFlags(node.flags) if node.keyword == Id.Assign_Local: lookup_mode = scope_e.LocalOnly # typeset and declare are synonyms? I see typeset -a a=() the most. elif node.keyword in (Id.Assign_Declare, Id.Assign_Typeset): # declare is like local, except it can also be used outside functions? if var_flags_e.Global in flags: lookup_mode = scope_e.GlobalOnly else: lookup_mode = scope_e.LocalOnly elif node.keyword == Id.Assign_Readonly: lookup_mode = scope_e.Dynamic flags.append(var_flags_e.ReadOnly) elif node.keyword == Id.Assign_None: # mutate existing local or global lookup_mode = scope_e.Dynamic else: raise AssertionError(node.keyword) for pair in node.pairs: if pair.op == assign_op_e.PlusEqual: assert pair.rhs, pair.rhs # I don't think a+= is valid? val = self.word_ev.EvalRhsWord(pair.rhs) old_val, lval = expr_eval.EvalLhsAndLookup(pair.lhs, self.arith_ev, self.mem, self.exec_opts) sig = (old_val.tag, val.tag) if sig == (value_e.Undef, value_e.Str): pass # val is RHS elif sig == (value_e.Undef, value_e.StrArray): pass # val is RHS elif sig == (value_e.Str, value_e.Str): val = runtime.Str(old_val.s + val.s) elif sig == (value_e.Str, value_e.StrArray): e_die("Can't append array to string") elif sig == (value_e.StrArray, value_e.Str): e_die("Can't append string to array") elif sig == (value_e.StrArray, value_e.StrArray): val = runtime.StrArray(old_val.strs + val.strs) else: # plain assignment spid = pair.spids[0] # Source location for tracing lval = self._EvalLhs(pair.lhs, spid, lookup_mode) # RHS can be a string or array. if pair.rhs: val = self.word_ev.EvalRhsWord(pair.rhs) assert isinstance(val, runtime.value), val else: # e.g. 'readonly x' or 'local x' val = None # NOTE: In bash and mksh, declare -a myarray makes an empty cell with # Undef value, but the 'array' attribute. #log('setting %s to %s with flags %s', lval, val, flags) self.mem.SetVar(lval, val, flags, lookup_mode, strict_array=self.exec_opts.strict_array) # Assignment always appears to have a spid. if node.spids: current_spid = node.spids[0] else: current_spid = const.NO_INTEGER self.mem.SetCurrentSpanId(current_spid) self.tracer.OnAssignment(lval, pair.op, val, flags, lookup_mode) # PATCH to be compatible with existing shells: If the assignment had a # command sub like: # # s=$(echo one; false) # # then its status will be in mem.last_status, and we can check it here. # If there was NOT a command sub in the assignment, then we don't want to # check it. if node.keyword == Id.Assign_None: # mutate existing local or global # Only do this if there was a command sub? How? Look at node? # Set a flag in mem? self.mem.last_status or if self.check_command_sub_status: self._CheckStatus(self.mem.last_status, node) # A global assignment shouldn't clear $?. status = self.mem.last_status else: status = 0 else: # To be compatible with existing shells, local assignments DO clear # $?. Even in strict mode, we don't need to bother setting # check_errexit = True, because we would have already checked the # command sub in RunCommandSub. status = 0 # TODO: maybe we should have a "sane-status" that respects this: # false; echo $?; local f=x; echo $? elif node.tag == command_e.ControlFlow: if node.arg_word: # Evaluate the argument val = self.word_ev.EvalWordToString(node.arg_word) assert val.tag == value_e.Str arg = int(val.s) # They all take integers else: arg = 0 # return 0, exit 0, break 0 levels, etc. # NOTE: We don't do anything about a top-level 'return' here. Unlike in # bash, that is OK. If you can return from a sourced script, it makes # sense to return from a main script. ok = True tok = node.token if (tok.id in (Id.ControlFlow_Break, Id.ControlFlow_Continue) and self.loop_level == 0): ok = False msg = 'Invalid control flow at top level' if ok: raise _ControlFlow(tok, arg) if self.exec_opts.strict_control_flow: e_die(msg, token=tok) else: # Only print warnings, never fatal. # Bash oddly only exits 1 for 'return', but no other shell does. ui.PrintFilenameAndLine(tok.span_id, self.arena) util.warn(msg) status = 0 # The only difference between these two is that CommandList has no # redirects. We already took care of that above. elif node.tag in (command_e.CommandList, command_e.BraceGroup): status = self._ExecuteList(node.children) check_errexit = False elif node.tag == command_e.AndOr: # NOTE: && and || have EQUAL precedence in command mode. See case #13 # in dbracket.test.sh. left = node.children[0] # Suppress failure for every child except the last one. self._PushErrExit() try: status = self._Execute(left) finally: self._PopErrExit() i = 1 n = len(node.children) while i < n: #log('i %d status %d', i, status) child = node.children[i] op_id = node.ops[i-1] #log('child %s op_id %s', child, op_id) if op_id == Id.Op_DPipe and status == 0: i += 1 continue # short circuit elif op_id == Id.Op_DAmp and status != 0: i += 1 continue # short circuit if i == n - 1: # errexit handled differently for last child status = self._Execute(child) check_errexit = True else: self._PushErrExit() try: status = self._Execute(child) finally: self._PopErrExit() i += 1 elif node.tag == command_e.WhileUntil: if node.keyword.id == Id.KW_While: _DonePredicate = lambda status: status != 0 else: _DonePredicate = lambda status: status == 0 status = 0 self.loop_level += 1 try: while True: self._PushErrExit() try: cond_status = self._ExecuteList(node.cond) finally: self._PopErrExit() done = cond_status != 0 if _DonePredicate(cond_status): break try: status = self._Execute(node.body) # last one wins except _ControlFlow as e: if e.IsBreak(): status = 0 break elif e.IsContinue(): status = 0 continue else: # return needs to pop up more raise finally: self.loop_level -= 1 elif node.tag == command_e.ForEach: iter_name = node.iter_name if node.do_arg_iter: iter_list = self.mem.GetArgv() else: words = braces.BraceExpandWords(node.iter_words) iter_list = self.word_ev.EvalWordSequence(words) # We need word splitting and so forth # NOTE: This expands globs too. TODO: We should pass in a Globber() # object. status = 0 # in case we don't loop self.loop_level += 1 try: for x in iter_list: #log('> ForEach setting %r', x) state.SetLocalString(self.mem, iter_name, x) #log('<') try: status = self._Execute(node.body) # last one wins except _ControlFlow as e: if e.IsBreak(): status = 0 break elif e.IsContinue(): status = 0 else: # return needs to pop up more raise finally: self.loop_level -= 1 elif node.tag == command_e.ForExpr: status = 0 init, cond, body, update = node.init, node.cond, node.body, node.update if init: self.arith_ev.Eval(init) self.loop_level += 1 try: while True: if cond: b = self.arith_ev.Eval(cond) if not b: break try: status = self._Execute(body) except _ControlFlow as e: if e.IsBreak(): status = 0 break elif e.IsContinue(): status = 0 else: # return needs to pop up more raise if update: self.arith_ev.Eval(update) finally: self.loop_level -= 1 elif node.tag == command_e.DoGroup: status = self._ExecuteList(node.children) check_errexit = False # not real statements elif node.tag == command_e.FuncDef: # NOTE: Would it make sense to evaluate the redirects BEFORE entering? # It will save time on function calls. self.funcs[node.name] = node status = 0 elif node.tag == command_e.If: done = False for arm in node.arms: self._PushErrExit() try: status = self._ExecuteList(arm.cond) finally: self._PopErrExit() if status == 0: status = self._ExecuteList(arm.action) done = True break # TODO: The compiler should flatten this if not done and node.else_action is not None: status = self._ExecuteList(node.else_action) elif node.tag == command_e.NoOp: status = 0 # make it true elif node.tag == command_e.Case: val = self.word_ev.EvalWordToString(node.to_match) to_match = val.s status = 0 # If there are no arms, it should be zero? done = False for arm in node.arms: for pat_word in arm.pat_list: # NOTE: Is it OK that we're evaluating these as we go? # TODO: case "$@") shouldn't succeed? That's a type error? # That requires strict-array? pat_val = self.word_ev.EvalWordToString(pat_word, do_fnmatch=True) #log('Matching word %r against pattern %r', to_match, pat_val.s) if libc.fnmatch(pat_val.s, to_match): status = self._ExecuteList(arm.action) done = True # TODO: Parse ;;& and for fallthrough and such? break # Only execute action ONCE if done: break elif node.tag == command_e.TimeBlock: # TODO: # - When do we need RUSAGE_CHILDREN? # - Respect TIMEFORMAT environment variable. # "If this variable is not set, Bash acts as if it had the value" # $'\nreal\t%3lR\nuser\t%3lU\nsys\t%3lS' # "A trailing newline is added when the format string is displayed." start_t = time.time() # calls gettimeofday() under the hood start_u = resource.getrusage(resource.RUSAGE_SELF) status = self._Execute(node.pipeline) end_t = time.time() end_u = resource.getrusage(resource.RUSAGE_SELF) real = end_t - start_t user = end_u.ru_utime - start_u.ru_utime sys_ = end_u.ru_stime - start_u.ru_stime libc.print_time(real, user, sys_) else: raise NotImplementedError(node.__class__.__name__) return status, check_errexit
def DoUnarySuffixOp(s, op, arg): """Helper for ${x#prefix} and family.""" # Fast path for constant strings. if not glob_.LooksLikeGlob(arg): # It doesn't look like a glob, but we glob-escaped it (e.g. [ -> \[). So # reverse it. NOTE: We also do this check in Globber.Expand(). It would # be nice to somehow store the original string rather tahn # escaping/unescaping. arg = glob_.GlobUnescape(arg) if op.op_id in (Id.VOp1_Pound, Id.VOp1_DPound): # const prefix # explicit check for non-empty arg (len for mycpp) if len(arg) and s.startswith(arg): return s[len(arg):] else: return s elif op.op_id in (Id.VOp1_Percent, Id.VOp1_DPercent): # const suffix # need explicit check for non-empty arg (len for mycpp) if len(arg) and s.endswith(arg): return s[:-len(arg)] else: return s # These operators take glob arguments, we don't implement that obscure case. elif op.op_id == Id.VOp1_Comma: # Only lowercase the first letter if arg != '': raise NotImplementedError("%s can't have an argument" % op.op_id) return s[0].lower() + s[1:] elif op.op_id == Id.VOp1_DComma: if arg != '': raise NotImplementedError("%s can't have an argument" % op.op_id) return s.lower() elif op.op_id == Id.VOp1_Caret: # Only uppercase the first letter if arg != '': raise NotImplementedError("%s can't have an argument" % op.op_id) return s[0].upper() + s[1:] elif op.op_id == Id.VOp1_DCaret: if arg != '': raise NotImplementedError("%s can't have an argument" % op.op_id) return s.upper() else: # e.g. ^ ^^ , ,, raise AssertionError(op.op_id) # For patterns, do fnmatch() in a loop. # # TODO: The loop needs to iterate over code points, not bytes! # - The forward case can probably be handled in a similar manner. # - The backward case might be handled by pre-calculating an array of start # positions with _NextUtf8Char. # # TODO: Another potential fast path: # # v=aabbccdd # echo ${v#*b} # strip shortest prefix # # If the whole thing doesn't match '*b*', then no test can succeed. So we # can fail early. Conversely echo ${v%%c*} and '*c*'. # # (Although honestly this whole construct is nuts and should be deprecated.) n = len(s) if op.op_id == Id.VOp1_Pound: # shortest prefix # 'abcd': match 'a', 'ab', 'abc', ... for i in xrange(1, n + 1): #log('Matching pattern %r with %r', arg, s[:i]) if libc.fnmatch(arg, s[:i]): return s[i:] else: return s elif op.op_id == Id.VOp1_DPound: # longest prefix # 'abcd': match 'abc', 'ab', 'a' for i in xrange(n, 0, -1): #log('Matching pattern %r with %r', arg, s[:i]) if libc.fnmatch(arg, s[:i]): return s[i:] else: return s elif op.op_id == Id.VOp1_Percent: # shortest suffix # 'abcd': match 'abc', 'ab', 'a' for i in xrange(n - 1, -1, -1): #log('Matching pattern %r with %r', arg, s[:i]) if libc.fnmatch(arg, s[i:]): return s[:i] else: return s elif op.op_id == Id.VOp1_DPercent: # longest suffix # 'abcd': match 'abc', 'bc', 'c', ... for i in xrange(0, n): #log('Matching pattern %r with %r', arg, s[:i]) if libc.fnmatch(arg, s[i:]): return s[:i] else: return s else: raise NotImplementedError("Can't use %s with pattern" % op.op_id)