def ParseAndEval(code_str): w_parser, _ = parse_lib.MakeParserForCompletion(code_str) #spec = arith_parse.MakeShellSpec() #a_parser = tdop.TdopParser(spec, w_parser) # Calls ReadWord(LexMode.ARITH) #anode = a_parser.Parse() anode = w_parser._ReadArithExpr() # need the right lex state? if not anode: raise ExprSyntaxError("failed %s" % w_parser.Error()) print('node:', anode) mem = cmd_exec.Mem('', []) exec_opts = cmd_exec.ExecOpts() ev = word_eval.CompletionWordEvaluator(mem, exec_opts) arith_ev = expr_eval.ArithEvaluator(mem, ev) ok = arith_ev.Eval(anode) if ok: value = arith_ev.Result() print('value:', value) else: raise AssertionError(code_str) return value
def __init__(self, mem, exec_opts, splitter): self.mem = mem # for $HOME, $1, etc. self.exec_opts = exec_opts # for nounset self.splitter = splitter self.globber = glob_.Globber(exec_opts) # NOTE: Executor also instantiates one. self.arith_ev = expr_eval.ArithEvaluator(mem, exec_opts, self)
def EvalArithSub(self, anode): arith_ev = expr_eval.ArithEvaluator(self.mem, self) if arith_ev.Eval(anode): num = arith_ev.Result() return True, Value.FromString(str(num)) else: self.error_stack.extend(arith_ev.Error()) return False, None
def __init__(self, mem, fd_state, status_lines, funcs, readline, completion, comp_lookup, exec_opts, arena): """ Args: mem: Mem instance for storing variables fd_state: FdState() for managing descriptors status_lines: shared with completion. TODO: Move this to the end. funcs: registry of functions (these names are completed) completion: completion module, if available comp_lookup: completion pattern/action exec_opts: ExecOpts arena: for printing error locations """ self.mem = mem self.fd_state = fd_state self.status_lines = status_lines # function space is different than var space. Not hierarchical. self.funcs = funcs self.completion = completion # Completion hooks, set by 'complete' builtin. self.comp_lookup = comp_lookup # This is for shopt and set -o. They are initialized by flags. self.exec_opts = exec_opts self.exec_opts.readline = readline self.arena = arena self.splitter = legacy.SplitContext(self.mem) self.word_ev = word_eval.NormalWordEvaluator( mem, exec_opts, self.splitter, self) self.arith_ev = expr_eval.ArithEvaluator(mem, exec_opts, self.word_ev) self.bool_ev = expr_eval.BoolEvaluator(mem, exec_opts, self.word_ev) self.traps = {} # signal/hook name -> callable self.nodes_to_run = [] # list of nodes, appended to by signal handlers self.dir_stack = state.DirStack() # TODO: Pass these in from main() self.aliases = {} # alias name -> string self.targets = [] # make syntax enters stuff here -- Target() # metaprogramming or regular target syntax # Whether argv[0] is make determines if it is executed self.waiter = process.Waiter() # sleep 5 & puts a (PID, job#) entry here. And then "jobs" displays it. self.job_state = process.JobState() self.loop_level = 0 # for detecting bad top-level break/continue self.tracer = Tracer(exec_opts, mem, self.word_ev) self.check_command_sub_status = False # a hack
def __init__(self, mem, fd_state, funcs, comp_lookup, exec_opts, parse_ctx, devtools): """ Args: mem: Mem instance for storing variables fd_state: FdState() for managing descriptors funcs: dict of functions comp_lookup: registry of completion hooks exec_opts: ExecOpts parse_ctx: for instantiating parsers """ self.mem = mem self.fd_state = fd_state self.funcs = funcs # Completion hooks, set by 'complete' builtin. self.comp_lookup = comp_lookup # This is for shopt and set -o. They are initialized by flags. self.exec_opts = exec_opts self.parse_ctx = parse_ctx self.arena = parse_ctx.arena self.aliases = parse_ctx.aliases # alias name -> string self.dumper = devtools.dumper self.debug_f = devtools.debug_f # Used by ShellFuncAction too self.splitter = legacy.SplitContext(self.mem) self.word_ev = word_eval.NormalWordEvaluator(mem, exec_opts, self.splitter, self.arena, self) self.arith_ev = expr_eval.ArithEvaluator(mem, exec_opts, self.word_ev, self.arena) self.bool_ev = expr_eval.BoolEvaluator(mem, exec_opts, self.word_ev, self.arena) self.traps = {} # signal/hook name -> callable self.nodes_to_run = [] # list of nodes, appended to by signal handlers self.dir_stack = state.DirStack() self.targets = [] # make syntax enters stuff here -- Target() # metaprogramming or regular target syntax # Whether argv[0] is make determines if it is executed self.waiter = process.Waiter() # sleep 5 & puts a (PID, job#) entry here. And then "jobs" displays it. self.job_state = process.JobState() self.tracer = Tracer(parse_ctx, exec_opts, mem, self.word_ev, devtools.trace_f) self.loop_level = 0 # for detecting bad top-level break/continue self.check_command_sub_status = False # a hack
def ParseAndEval(code_str): arena = test_lib.MakeArena('<arith_parse_test.py>') parse_ctx = parse_lib.ParseContext(arena, {}) w_parser, _ = parse_ctx.MakeParserForCompletion(code_str, arena) w_parser._Next(lex_mode_e.ARITH) # Calling private method anode = w_parser._ReadArithExpr() # need the right lex state? print('node:', anode) mem = state.Mem('', [], {}, arena) exec_opts = state.ExecOpts(mem, None) splitter = legacy.SplitContext(mem) ev = word_eval.CompletionWordEvaluator(mem, exec_opts, splitter, arena) arith_ev = expr_eval.ArithEvaluator(mem, exec_opts, ev, arena) value = arith_ev.Eval(anode) return value
def ParseAndEval(code_str): arena = test_lib.MakeArena('<arith_parse_test.py>') w_parser, _ = parse_lib.MakeParserForCompletion(code_str, arena) anode = w_parser._ReadArithExpr() # need the right lex state? if not anode: raise ExprSyntaxError("failed %s" % w_parser.Error()) print('node:', anode) mem = state.Mem('', [], {}, None) exec_opts = state.ExecOpts(mem) splitter = legacy.SplitContext(mem) ev = word_eval.CompletionWordEvaluator(mem, exec_opts, splitter) arith_ev = expr_eval.ArithEvaluator(mem, exec_opts, ev) value = arith_ev.Eval(anode) return value
def __init__(self, mem, status_lines, funcs, completion, comp_lookup, exec_opts, arena): """ Args: mem: Mem instance for storing variables status_lines: shared with completion. TODO: Move this to the end. funcs: registry of functions (these names are completed) completion: completion module, if available comp_lookup: completion pattern/action exec_opts: ExecOpts arena: for printing error locations """ self.mem = mem self.status_lines = status_lines # function space is different than var space. Not hierarchical. self.funcs = funcs self.completion = completion # Completion hooks, set by 'complete' builtin. self.comp_lookup = comp_lookup # This is for shopt and set -o. They are initialized by flags. self.exec_opts = exec_opts self.arena = arena self.ev = word_eval.NormalWordEvaluator(mem, exec_opts, self) self.arith_ev = expr_eval.ArithEvaluator(mem, exec_opts, self.ev) self.bool_ev = expr_eval.BoolEvaluator(mem, exec_opts, self.ev) self.traps = {} self.fd_state = process.FdState() self.dir_stack = [] # TODO: Pass these in from main() self.aliases = {} # alias name -> string self.targets = [] # make syntax enters stuff here -- Target() # metaprogramming or regular target syntax # Whether argv[0] is make determines if it is executed self.waiter = process.Waiter() # sleep 5 & puts a (PID, job#) entry here. And then "jobs" displays it. self.job_state = process.JobState()
def _Execute(self, node): """ Args: node: of type AstNode """ redirects = self._EvalRedirects(node) # TODO: Only eval argv[0] once. It can have side effects! if node.tag == command_e.SimpleCommand: words = braces.BraceExpandWords(node.words) argv = self.ev.EvalWordSequence(words) more_env = self.mem.GetExported() self._EvalEnv(node.more_env, more_env) thunk = self._GetThunkForSimpleCommand(argv, more_env) # Don't waste a process if we'd launch one anyway. if thunk.IsExternal(): p = process.Process(thunk, fd_state=self.fd_state, redirects=redirects) status = p.Run() else: # Internal #log('ARGV %s', argv) # NOTE: _EvalRedirects turns LST nodes into core/process.py nodes. And # then we use polymorphism here. Does it make sense to use functional # style based on the RedirType? Might be easier to read. self.fd_state.PushFrame() for r in redirects: r.ApplyInParent(self.fd_state) status = thunk.RunInParent() restore_fd_state = thunk.ShouldRestoreFdState() # Special case for exec 1>&2 (with no args): we permanently change the # fd state. BUT we don't want to restore later. # TODO: Instead of this, maybe r.ApplyPermaent(self.fd_state)? if restore_fd_state: self.fd_state.PopAndRestore() else: self.fd_state.PopAndForget() elif node.tag == command_e.Sentence: # TODO: Compile this away. status = self._Execute(node.command) elif node.tag == command_e.Pipeline: status = self._RunPipeline(node) elif node.tag == command_e.Subshell: # This makes sure we don't waste a process if we'd launch one anyway. p = self._GetProcessForNode(node.children[0]) status = p.Run() elif node.tag == command_e.DBracket: bool_ev = expr_eval.BoolEvaluator(self.mem, self.ev) ok = bool_ev.Eval(node.expr) if ok: status = 0 if bool_ev.Result() else 1 else: e_die('Error evaluating boolean: %s' % bool_ev.Error()) elif node.tag == command_e.DParen: arith_ev = expr_eval.ArithEvaluator(self.mem, self.ev) ok = arith_ev.Eval(node.child) if ok: i = arith_ev.Result() # Negate the value: non-zero in arithmetic is true, which is zero in # shell land status = 0 if i != 0 else 1 else: e_die('Error evaluating (( )): %s' % arith_ev.Error()) elif node.tag == command_e.Assignment: pairs = [] for pair in node.pairs: if pair.rhs: # RHS can be a string or array. val = self.ev.EvalWordToAny(pair.rhs) assert isinstance(val, runtime.value), val else: # 'local x' is equivalent to local x="" val = runtime.Str('') pairs.append((pair.lhs, val)) if node.keyword == Id.Assign_Local: self.mem.SetLocals(pairs) else: # NOTE: could be readonly/export/etc. self.mem.SetLocalsOrGlobals(pairs) # TODO: This should be eval of RHS, unlike bash! status = 0 elif node.tag == command_e.ControlFlow: if node.arg_word: # Evaluate the argument _, val = self.ev.EvalWordToString(node.arg_word) assert val.tag == value_e.Str arg = int(val.s) # They all take integers else: arg = 0 # return 0, break 0 levels, etc. raise _ControlFlow(node.token, arg) # The only difference between these two is that CommandList has no # redirects. We already took care of that above. elif node.tag in (command_e.CommandList, command_e.BraceGroup): self.fd_state.PushFrame() for r in redirects: r.ApplyInParent(self.fd_state) status = 0 # for empty list for child in node.children: status = self._Execute(child) # last status wins self.fd_state.PopAndRestore() elif node.tag == command_e.AndOr: #print(node.children) left, right = node.children status = self._Execute(left) if node.op_id == Id.Op_DPipe: if status != 0: status = self._Execute(right) elif node.op_id == Id.Op_DAmp: if status == 0: status = self._Execute(right) else: raise AssertionError elif node.tag in (command_e.While, command_e.Until): # TODO: Compile this out? if node.tag == command_e.While: _DonePredicate = lambda status: status != 0 else: _DonePredicate = lambda status: status == 0 while True: status = self._Execute(node.cond) done = status != 0 if _DonePredicate(status): break try: status = self._Execute(node.body) # last one wins except _ControlFlow as e: if e.IsBreak(): status = 0 break elif e.IsContinue(): status = 0 continue else: # return needs to pop up more raise elif node.tag == command_e.ForEach: iter_name = node.iter_name if node.do_arg_iter: iter_list = self.mem.GetArgv() else: words = braces.BraceExpandWords(node.iter_words) iter_list = self.ev.EvalWordSequence(words) # We need word splitting and so forth # NOTE: This expands globs too. TODO: We should pass in a Globber() # object. status = 0 # in case we don't loop for x in iter_list: #log('> ForEach setting %r', x) self.mem.SetLocal(iter_name, runtime.Str(x)) #log('<') try: status = self._Execute(node.body) # last one wins except _ControlFlow as e: if e.IsBreak(): status = 0 break elif e.IsContinue(): status = 0 continue else: # return needs to pop up more raise elif node.tag == command_e.ForExpr: raise NotImplementedError(node.tag) elif node.tag == command_e.DoGroup: # Delegate to command list # TODO: This should be compiled out! status = self._Execute(node.child) elif node.tag == command_e.FuncDef: self.funcs[node.name] = node status = 0 elif node.tag == command_e.If: done = False for arm in node.arms: status = self._Execute(arm.cond) if status == 0: status = self._Execute(arm.action) done = True break # TODO: The compiler should flatten this if not done and node.else_action is not None: status = self._Execute(node.else_action) elif node.tag == command_e.NoOp: status = 0 # make it true elif node.tag == command_e.Case: ok, val = self.ev.EvalWordToString(node.to_match) assert ok to_match = val.s status = 0 # If there are no arms, it should be zero? done = False for arm in node.arms: for pat_word in arm.pat_list: # NOTE: Is it OK that we're evaluating these as we go? ok, pat_val = self.ev.EvalWordToString(pat_word, do_fnmatch=True) assert ok #log('Matching word %r against pattern %r', to_match, pat_val.s) if libc.fnmatch(pat_val.s, to_match): status = self._Execute(arm.action) done = True # TODO: Parse ;;& and for fallthrough and such? if done: break elif node.tag == command_e.TimeBlock: # TODO: # - When do we need RUSAGE_CHILDREN? # - Respect TIMEFORMAT environment variable. # "If this variable is not set, Bash acts as if it had the value" # $'\nreal\t%3lR\nuser\t%3lU\nsys\t%3lS' # "A trailing newline is added when the format string is displayed." start_t = time.time() # calls gettimeofday() under the hood start_u = resource.getrusage(resource.RUSAGE_SELF) status = self._Execute(node.pipeline) end_t = time.time() end_u = resource.getrusage(resource.RUSAGE_SELF) real = end_t - start_t user = end_u.ru_utime - start_u.ru_utime sys_ = end_u.ru_stime - start_u.ru_stime print('real\t%.3f' % real, file=sys.stderr) print('user\t%.3f' % user, file=sys.stderr) print('sys\t%.3f' % sys_, file=sys.stderr) else: raise AssertionError(node.tag) if self.exec_opts.errexit and status != 0: if node.tag == command_e.SimpleCommand: # TODO: Add context e_die('%r command exited with status %d (%s)', argv[0], status, node.words[0]) else: e_die('%r command exited with status %d', node.__class__.__name__, status) # TODO: Is this the right place to put it? Does it need a stack for # function calls? self.mem.last_status = status return status
def Execute(self, node): """ Args: node: of type AstNode """ redirects = self._EvalRedirects(node) # TODO: Change this to its own enum? # or add EBuiltin.THROW _throw? For testing. # Is this different han exit? exit should really be throw. Because we # want to be able to unwind the stack, show stats, etc. Exiting in the # middle is bad. # exit and _throw could be the same, except _throw takes an error message, # and exits 1, and shows traceback. cflow = EBuiltin.NONE # TODO: Only eval argv[0] once. It can have side effects! if node.tag == command_e.SimpleCommand: argv = self.ev.EvalWords(node.words) if argv is None: err = self.ev.Error() # TODO: Throw shell exception raise AssertionError('Error evaluating words: %s' % err) more_env = self.ev.EvalEnv(node.more_env) if more_env is None: print(self.error_stack) # TODO: throw exception raise AssertionError() thunk = self._GetThunkForSimpleCommand(argv, more_env) # Don't waste a process if we'd launch one anyway. if thunk.IsExternal(): p = Process(thunk, fd_state=self.fd_state, redirects=redirects) status = p.Run() if os.WIFEXITED(status): status = os.WEXITSTATUS(status) #print('exited with code', code) else: sig = os.WTERMSIG(status) #print('exited with signal', sig) # TODO: Is this right? status = 0 else: # Internal for r in redirects: r.ApplyInParent(self.fd_state) status, cflow = thunk.RunInParent() restore_fd_state = thunk.ShouldRestoreFdState() # Special case for exec 1>&2 (with no args): we permanently change the # fd state. BUT we don't want to restore later. # # TODO: Instead of this, maybe r.ApplyPermaent(self.fd_state)? if restore_fd_state: self.fd_state.RestoreAll() else: self.fd_state.ForgetAll() elif node.tag == command_e.Sentence: # TODO: Compile this away status, cflow = self.Execute(node.command) elif node.tag == command_e.Pipeline: status, cflow = self._RunPipeline(node) elif node.tag == command_e.Subshell: # This makes sure we don't waste a process if we'd launch one anyway. p = self._GetProcessForNode(node.children[0]) status = p.Run() elif node.tag == command_e.DBracket: bool_ev = expr_eval.BoolEvaluator(self.mem, self.ev) ok = bool_ev.Eval(node.expr) if ok: status = 0 if bool_ev.Result() else 1 else: raise AssertionError('Error evaluating boolean: %s' % bool_ev.Error()) elif node.tag == command_e.DParen: arith_ev = expr_eval.ArithEvaluator(self.mem, self.ev) ok = arith_ev.Eval(node.child) if ok: i = arith_ev.Result() # Negate the value: non-zero in arithmetic is true, which is zero in # shell land status = 0 if i != 0 else 1 else: raise AssertionError('Error evaluating (( )): %s' % arith_ev.Error()) elif node.tag == command_e.Assignment: pairs = [] for pair in node.pairs: # NOTE: do_glob=False, because foo=*.a makes foo equal to '*.a', # literally. # TODO: Also have to evaluate the right hand side. ok, val = self.ev.EvalCompoundWord(pair.rhs) if not ok: return None pairs.append((pair.lhs, val)) flags = 0 # TODO: Calculate from keyword/flags if node.keyword == Id.Assign_Local: self.mem.SetLocal(pairs, flags) else: # could be readonly/export/etc. self.mem.SetGlobal(pairs, flags) # TODO: This should be eval of RHS, unlike bash! status = 0 # The only difference between these two is that CommandList has no # redirects. We already took care of that above. elif node.tag in (command_e.CommandList, command_e.BraceGroup): status = 0 # for empty list for child in node.children: status, cflow = self.Execute(child) # last status wins if cflow in (EBuiltin.BREAK, EBuiltin.CONTINUE): break elif node.tag == command_e.AndOr: #print(node.children) left, right = node.children status, cflow = self.Execute(left) if node.op_id == Id.Op_DPipe: if status != 0: status, cflow = self.Execute(right) elif node.op_id == Id.Op_DAmp: if status == 0: status, cflow = self.Execute(right) else: raise AssertionError elif node.tag == command_e.While: while True: status, _ = self.Execute(node.cond) if status != 0: break status, cflow = self.Execute(node.body) # last one wins if cflow == EBuiltin.BREAK: cflow = EBuiltin.NONE # reset since we respected it break if cflow == EBuiltin.CONTINUE: cflow = EBuiltin.NONE # reset since we respected it elif node.tag == command_e.ForEach: iter_name = node.iter_name if node.do_arg_iter: iter_list = self.mem.GetArgv() else: iter_list = self.ev.EvalWords(node.iter_words) # We need word splitting and so forth # NOTE: This expands globs too. TODO: We should pass in a Globber() # object. status = 0 # in case we don't loop cflow = EBuiltin.NONE for x in iter_list: self.mem.SetSimpleVar(iter_name, Value.FromString(x)) status, cflow = self.Execute(node.body) if cflow == EBuiltin.BREAK: cflow = EBuiltin.NONE # reset since we respected it break if cflow == EBuiltin.CONTINUE: cflow = EBuiltin.NONE # reset since we respected it elif node.tag == command_e.DoGroup: # Delegate to command list # TODO: This should be compiled out! status, cflow = self.Execute(node.child) elif node.tag == command_e.FuncDef: self.funcs[node.name] = node status = 0 elif node.tag == command_e.If: done = False for arm in node.arms: status, _ = self.Execute(arm.cond) if status == 0: status, _ = self.Execute(arm.action) done = True break # TODO: The compiler should flatten this if not done and node.else_action is not None: status, _ = self.Execute(node.else_action) elif node.tag == command_e.NoOp: status = 0 # make it true elif node.tag == command_e.Case: raise NotImplementedError else: raise AssertionError(node.tag) if self.exec_opts.errexit: if status != 0: # TODO: token should be set to what? Is it node.begin_word and # node.end_word? token = None tb = self.mem.GetTraceback(token) self._SetException( tb, "Command %s exited with code %d" % ('TODO', status)) # cflow should be EXCEPT # TODO: Is this the right place to put it? Does it need a stack for # function calls? self.mem.last_status = status return status, cflow
def __init__(self, mem, exec_opts, word_ev): self.mem = mem # for $HOME, $1, etc. self.exec_opts = exec_opts # for nounset self.word_ev = word_ev # for arith words, var op words # NOTE: Executor also instantiates one. self.arith_ev = expr_eval.ArithEvaluator(mem, exec_opts, word_ev)
def EvalVarSub(self, part, quoted=False): """ Args: part: SimpleVarSub or BracedVar ops: list of VarOp to execute quoted: whether the var sub was double quoted """ if part.tag == word_part_e.SimpleVarSub: name = part.token.val[1:] # strip off leading $ defined, val = self._EvalVar(name, quoted=quoted) if not defined: self._AddErrorContext("Undefined variable %s" % name) return False, None # TODO: Fix this and merge it with logic below. Respect 'nounset' # option, etc. return True, val name = part.token.val # Possibilities: Array OR Index; then Test OR Transform # So instead of a list of ops, it should be optional IndexOp, optional # TestOp, optional TransformOp. # empty='' # $unset -> '' EMPTY_UNQUOTED # ${unset:-foo} -> foo # ${unset-foo} -> foo # $empty -> '' # ${empty:-foo} -> foo # ${empty-foo} -> '' defined, val = self._EvalVar(name, quoted=quoted) #print('@@@', defined, val) array_ok = (name == '@') # Don't need any op for array $@ index_error = False # test_op can suppress this if defined and part.bracket_op: if part.bracket_op.tag == bracket_op_e.WholeArray: op_id = part.bracket_op.op_id # TODO: Change this to array_op instead of bracket_op? if op_id == Id.Lit_At: if val.IsArray(): array_ok = True else: self._AddErrorContext("Can't index non-array with @") return False, None elif op_id == Id.Arith_Star: if val.IsArray(): array_ok = True else: self._AddErrorContext("Can't index non-array with *") return False, None else: raise AssertionError(op_id) elif part.bracket_op.tag == bracket_op_e.ArrayIndex: array_ok = True is_array, a = val.AsArray() if is_array: anode = part.bracket_op.expr # TODO: This should propagate errors arith_ev = expr_eval.ArithEvaluator(self.mem, self) ok = arith_ev.Eval(anode) if not ok: self._AddErrorContext( 'Error evaluating arith sub in index expression') return False, None index = arith_ev.Result() try: s = a[index] except IndexError: index_error = True defined = False val = None else: val = Value.FromString(s) else: # it's a string raise NotImplementedError( "String indexing not implemented") else: raise AssertionError(part.bracket_op.tag) if defined and val.IsArray(): if not array_ok: self._AddErrorContext( "Array was referenced without explicit index, e.g. ${a[@]} " "or ${a[0]}") return False, None # if the op does NOT have colon #use_default = not defined if part.suffix_op and LookupKind(part.suffix_op.op_id) == Kind.VTest: op = part.suffix_op # TODO: Change this to a bit test. if op.op_id in (Id.VTest_ColonHyphen, Id.VTest_ColonEquals, Id.VTest_ColonQMark, Id.VTest_ColonPlus): is_falsey = not defined or val.IsEmptyString() else: is_falsey = not defined #print('!!',id, is_falsey) if op.op_id in (Id.VTest_ColonHyphen, Id.VTest_Hyphen): if is_falsey: argv = [] ok, val2 = self.EvalCompoundWord(op.arg_word) if not ok: return False, None val2.AppendTo(argv) # TODO: This is roundabout val = Value.FromArray(argv) defined = True # now we have a variable #print("DEFAULT", val) # TODO: # + -- inverted test -- assign to default # ? -- error # = -- side effect assignment else: raise NotImplementedError(id) if not defined: # TODO: MOVE down to EvalVarSub(). Test ops will change this. if self.exec_opts.nounset: # stack will unwind token = None # TODO: Need to have the ast for varsub. BracedVarSub should have a # token? #tb = self.mem.GetTraceback(token) self._AddErrorContext("Unset variable %s" % name) return False, None else: print("UNDEFINED") # TODO: Isn't this where we do EMPTY_UNQUOTED? return True, Value.FromString('') if part.prefix_op: op_id = part.prefix_op if op_id == Id.VSub_Pound: # LENGTH if val.IsArray(): #print("ARRAY LENGTH", len(val.a)) length = len(val.a) else: #print("STRING LENGTH", len(val.s)) length = len(val.s) val = Value.FromString(str(length)) # NOTE: You could have both prefix and suffix if part.suffix_op and LookupKind( part.suffix_op.op_id) in (Kind.VOp1, Kind.VOp2): op = part.suffix_op # NOTES: # - These are VECTORIZED on arrays # - I want to allow this with my shell dialect: @{files|slice 1 # 2|upper} does the same thing to all of them. # - How to do longest and shortest possible match? bash and mksh both # call fnmatch() in a loop, with possible short-circuit optimizations. # - TODO: Write a test program to show quadratic behavior? # - original AT&T ksh has special glob routines that returned the match # positions. # Implementation: # - Test if it is a LITERAL or a Glob. Then do a plain string # operation. # - If it's a glob, do the quadratic algorithm. # - NOTE: bash also has an optimization where it extracts the LITERAL # parts of the string, and does a prematch. If none of them match, # then it SKIPs the quadratic algorithm. # - The real solution is to compile a glob to RE2, but I want to avoid # that dependency right now... libc regex is good for a bunch of # things. # - Bash has WIDE CHAR support for this. With wchar_t. # - All sorts of functions like xdupmbstowcs # # And then pat_subst() does some special cases. Geez. # prefix strip if op.op_id == Id.VOp1_DPound: pass elif op.op_id == Id.VOp1_Pound: pass # suffix strip elif op.op_id == Id.VOp1_Percent: print(op.words) argv = [] for w in op.words: ok, val2 = self.EvalCompoundWord(w) if not ok: return False, None val2.AppendTo(argv) # TODO: Evaluate words, and add the SPACE VALUES, getting a single # string. And then test if it's a literal or glob. suffix = argv[0] if val.IsArray(): # TODO: Vectorize it raise NotImplementedError else: s = val.s if s.endswith(suffix): s = s[:-len(suffix)] val = Value.FromString(s) elif op.op_id == Id.VOp1_DPercent: pass # Patsub, vectorized elif op.op_id == Id.VOp2_Slash: pass # Either string slicing or array slicing. However string slicing has a # unicode problem? TODO: Test bash out. We need utf-8 parsing in C++? # # Or maybe have a different operator for byte slice and char slice. elif op.op_id == Id.VOp2_Colon: pass else: raise NotImplementedError(op) return True, val
def _EvalWordPart(self, part, quoted=False): """Evaluate a word part. Returns: A LIST of part_value, rather than just a single part_value, because of the quirk where ${a:-'x'y} is a single WordPart, but yields two part_values. """ if part.tag == word_part_e.ArrayLiteralPart: raise AssertionError( 'Array literal should have been handled at word level') elif part.tag == word_part_e.LiteralPart: s = part.token.val do_split_elide = not quoted do_glob = True return [runtime.StringPartValue(s, do_split_elide, do_glob)] elif part.tag == word_part_e.EscapedLiteralPart: val = part.token.val assert len(val) == 2, val # e.g. \* assert val[0] == '\\' s = val[1] return [runtime.StringPartValue(s, False, False)] elif part.tag == word_part_e.SingleQuotedPart: s = ''.join(t.val for t in part.tokens) return [runtime.StringPartValue(s, False, False)] elif part.tag == word_part_e.DoubleQuotedPart: return [self._EvalDoubleQuotedPart(part)] elif part.tag == word_part_e.CommandSubPart: # TODO: If token is Id.Left_ProcSubIn or Id.Left_ProcSubOut, we have to # supply something like /dev/fd/63. return [self._EvalCommandSub(part.command_list, quoted)] elif part.tag == word_part_e.SimpleVarSub: decay_array = False # 1. Evaluate from (var_name, var_num, token) -> defined, value if part.token.id == Id.VSub_Name: var_name = part.token.val[1:] val = self.mem.Get(var_name) elif part.token.id == Id.VSub_Number: var_num = int(part.token.val[1:]) val = self._EvalVarNum(var_num) else: val, decay_array = self._EvalSpecialVar(part.token.id, quoted) #log('SIMPLE %s', part) val = self._EmptyStrOrError(val, token=part.token) if decay_array: val = self._DecayArray(val) part_val = _ValueToPartValue(val, quoted) return [part_val] elif part.tag == word_part_e.BracedVarSub: return self._EvalBracedVarSub(part, quoted) elif part.tag == word_part_e.TildeSubPart: # We never parse a quoted string into a TildeSubPart. assert not quoted s = self._EvalTildeSub(part.prefix) return [runtime.StringPartValue(s, False, False)] elif part.tag == word_part_e.ArithSubPart: arith_ev = expr_eval.ArithEvaluator(self.mem, self.word_ev) num = arith_ev.Eval(part.anode) return [runtime.StringPartValue(str(num), True, True)] else: raise AssertionError(part.tag)
def _EvalBracedVarSub(self, part, quoted): """ Returns: part_value[] """ # We have four types of operator that interact. # # 1. Bracket: value -> (value, bool decay_array) # # 2. Then these four cases are mutually exclusive: # # a. Prefix length: value -> value # b. Test: value -> part_value[] # c. Other Suffix: value -> value # d. no operator: you have a value # # That is, we don't have both prefix and suffix operators. # # 3. Process decay_array here before returning. decay_array = False # for $*, ${a[*]}, etc. # 1. Evaluate from (var_name, var_num, token Id) -> value if part.token.id == Id.VSub_Name: var_name = part.token.val val = self.mem.Get(var_name) #log('EVAL NAME %s -> %s', var_name, val) elif part.token.id == Id.VSub_Number: var_num = int(part.token.val) val = self._EvalVarNum(var_num) else: # $* decays val, decay_array = self._EvalSpecialVar(part.token.id, quoted) # 2. Bracket: value -> (value v, bool decay_array) # decay_array is for joining ${a[*]} and unquoted ${a[@]} AFTER suffix ops # are applied. If we take the length with a prefix op, the distinction is # ignored. if part.bracket_op: if part.bracket_op.tag == bracket_op_e.WholeArray: op_id = part.bracket_op.op_id if op_id == Id.Lit_At: if not quoted: decay_array = True # ${a[@]} decays but "${a[@]}" doesn't if val.tag == value_e.Undef: val = self._EmptyStrArrayOrError(part.token) elif val.tag == value_e.Str: raise RuntimeError("Can't index string with @") elif val.tag == value_e.StrArray: val = runtime.StrArray(val.strs) elif op_id == Id.Arith_Star: decay_array = True # both ${a[*]} and "${a[*]}" decay if val.tag == value_e.Undef: val = self._EmptyStrArrayOrError(part.token) elif val.tag == value_e.Str: raise RuntimeError("Can't index string with *") elif val.tag == value_e.StrArray: # Always decay_array with ${a[*]} or "${a[*]}" val = runtime.StrArray(val.strs) else: raise AssertionError(op_id) # unknown elif part.bracket_op.tag == bracket_op_e.ArrayIndex: anode = part.bracket_op.expr # TODO: This should propagate errors arith_ev = expr_eval.ArithEvaluator(self.mem, self.word_ev) index = arith_ev.Eval(anode) if val.tag == value_e.Undef: pass # it will be checked later elif val.tag == value_e.Str: # TODO: Implement this as an extension. Requires unicode support. # Bash treats it as an array. e_die("Can't index string %r with integer", part.token.val) elif val.tag == value_e.StrArray: try: s = val.strs[index] except IndexError: val = runtime.Undef() else: val = runtime.Str(s) else: raise AssertionError(part.bracket_op.tag) if part.prefix_op: val = self._EmptyStrOrError(val) # maybe error val = self._ApplyPrefixOp(val, part.prefix_op) # At least for length, we can't have a test or suffix afterward. elif part.suffix_op: out_part_vals = [] if LookupKind(part.suffix_op.op_id) == Kind.VTest: # VTest: value -> part_value[] new_part_vals, effect = self._ApplyTestOp( val, part.suffix_op, quoted) # NOTE: Splicing part_values is necessary because of code like # ${undef:-'a b' c 'd # e'}. Each part_value can have a different # do_glob/do_elide setting. if effect == Effect.SpliceParts: return new_part_vals # EARLY RETURN elif effect == Effect.SpliceAndAssign: raise NotImplementedError elif effect == Effect.Error: raise NotImplementedError else: # The old one #val = self._EmptyStringPartOrError(part_val, quoted) #out_part_vals.append(part_val) pass # do nothing, may still be undefined else: val = self._EmptyStrOrError(val) # maybe error # Other suffix: value -> value val = self._ApplyOtherSuffixOp(val, part.suffix_op) # After applying suffixes, process decay_array here. if decay_array: val = self._DecayArray(val) # No prefix or suffix ops val = self._EmptyStrOrError(val) return [_ValueToPartValue(val, quoted)]
def _Execute(self, node): """ Args: node: of type AstNode """ redirects = self._EvalRedirects(node) # TODO: Only eval argv[0] once. It can have side effects! if node.tag == command_e.SimpleCommand: words = braces.BraceExpandWords(node.words) argv = self.ev.EvalWordSequence(words) if argv is None: self.error_stack.extend(self.ev.Error()) raise _FatalError() more_env = self.mem.GetExported() self._EvalEnv(node.more_env, more_env) thunk = self._GetThunkForSimpleCommand(argv, more_env) # Don't waste a process if we'd launch one anyway. if thunk.IsExternal(): p = Process(thunk, fd_state=self.fd_state, redirects=redirects) status = p.Run() else: # Internal #log('ARGV %s', argv) # NOTE: _EvalRedirects turns LST nodes into core/process.py nodes. And # then we use polymorphism here. Does it make sense to use functional # style based on the RedirType? Might be easier to read. self.fd_state.PushFrame() for r in redirects: r.ApplyInParent(self.fd_state) status = thunk.RunInParent() restore_fd_state = thunk.ShouldRestoreFdState() # Special case for exec 1>&2 (with no args): we permanently change the # fd state. BUT we don't want to restore later. # TODO: Instead of this, maybe r.ApplyPermaent(self.fd_state)? if restore_fd_state: self.fd_state.PopAndRestore() else: self.fd_state.PopAndForget() elif node.tag == command_e.Sentence: # TODO: Compile this away. status = self._Execute(node.command) elif node.tag == command_e.Pipeline: status = self._RunPipeline(node) elif node.tag == command_e.Subshell: # This makes sure we don't waste a process if we'd launch one anyway. p = self._GetProcessForNode(node.children[0]) status = p.Run() elif node.tag == command_e.DBracket: bool_ev = expr_eval.BoolEvaluator(self.mem, self.ev) ok = bool_ev.Eval(node.expr) if ok: status = 0 if bool_ev.Result() else 1 else: raise AssertionError('Error evaluating boolean: %s' % bool_ev.Error()) elif node.tag == command_e.DParen: arith_ev = expr_eval.ArithEvaluator(self.mem, self.ev) ok = arith_ev.Eval(node.child) if ok: i = arith_ev.Result() # Negate the value: non-zero in arithmetic is true, which is zero in # shell land status = 0 if i != 0 else 1 else: raise AssertionError('Error evaluating (( )): %s' % arith_ev.Error()) elif node.tag == command_e.Assignment: pairs = [] for pair in node.pairs: # RHS can be a string or array. ok, val = self.ev.EvalWordToAny(pair.rhs) assert isinstance(val, runtime.value), val #log('RHS %s -> %s', pair.rhs, val) if not ok: self.error_stack.extend(self.ev.Error()) raise _FatalError() pairs.append((pair.lhs, val)) if node.keyword == Id.Assign_Local: self.mem.SetLocals(pairs) else: # could be readonly/export/etc. self.mem.SetGlobals(pairs) # TODO: This should be eval of RHS, unlike bash! status = 0 elif node.tag == command_e.ControlFlow: if node.arg_word: # Evaluate the argument ok, val = self.ev.EvalWordToString(node.arg_word) if not ok: self.error_stack.extend(self.ev.Error()) raise _FatalError() assert val.tag == value_e.Str arg = int(val.s) # They all take integers else: arg = 0 # return 0, break 0 levels, etc. raise _ControlFlow(node.token, arg) # The only difference between these two is that CommandList has no # redirects. We already took care of that above. elif node.tag in (command_e.CommandList, command_e.BraceGroup): status = 0 # for empty list for child in node.children: status = self._Execute(child) # last status wins elif node.tag == command_e.AndOr: #print(node.children) left, right = node.children status = self._Execute(left) if node.op_id == Id.Op_DPipe: if status != 0: status = self._Execute(right) elif node.op_id == Id.Op_DAmp: if status == 0: status = self._Execute(right) else: raise AssertionError elif node.tag in (command_e.While, command_e.Until): # TODO: Compile this out? if node.tag == command_e.While: _DonePredicate = lambda status: status != 0 else: _DonePredicate = lambda status: status == 0 while True: status = self._Execute(node.cond) done = status != 0 if _DonePredicate(status): break try: status = self._Execute(node.body) # last one wins except _ControlFlow as e: if e.IsBreak(): status = 0 break elif e.IsContinue(): status = 0 continue else: # return needs to pop up more raise elif node.tag == command_e.ForEach: iter_name = node.iter_name if node.do_arg_iter: iter_list = self.mem.GetArgv() else: words = braces.BraceExpandWords(node.iter_words) iter_list = self.ev.EvalWordSequence(words) # We need word splitting and so forth # NOTE: This expands globs too. TODO: We should pass in a Globber() # object. status = 0 # in case we don't loop for x in iter_list: #log('> ForEach setting %r', x) self.mem.SetLocal(iter_name, runtime.Str(x)) #log('<') try: status = self._Execute(node.body) # last one wins except _ControlFlow as e: if e.IsBreak(): status = 0 break elif e.IsContinue(): status = 0 continue else: # return needs to pop up more raise elif node.tag == command_e.ForExpr: raise NotImplementedError(node.tag) elif node.tag == command_e.DoGroup: # Delegate to command list # TODO: This should be compiled out! status = self._Execute(node.child) elif node.tag == command_e.FuncDef: self.funcs[node.name] = node status = 0 elif node.tag == command_e.If: done = False for arm in node.arms: status = self._Execute(arm.cond) if status == 0: status = self._Execute(arm.action) done = True break # TODO: The compiler should flatten this if not done and node.else_action is not None: status = self._Execute(node.else_action) elif node.tag == command_e.NoOp: status = 0 # make it true elif node.tag == command_e.Case: ok, val = self.ev.EvalWordToString(node.to_match) assert ok to_match = val.s status = 0 # If there are no arms, it should be zero? done = False for arm in node.arms: for pat_word in arm.pat_list: # NOTE: Is it OK that we're evaluating these as we go? ok, pat_val = self.ev.EvalWordToString(pat_word, do_fnmatch=True) assert ok #log('Matching word %r against pattern %r', to_match, pat_val.s) if libc.fnmatch(pat_val.s, to_match): status = self._Execute(arm.action) done = True # TODO: Parse ;;& and for fallthrough and such? if done: break else: raise AssertionError(node.tag) if self.exec_opts.errexit: if status != 0: # TODO: token should be set to what? Is it node.begin_word and # node.end_word? token = None tb = self.mem.GetTraceback(token) self._SetException(tb, "Command %s exited with code %d" % ('TODO', status)) # TODO: raise _ControlFlow? Except? # Dummy? # TODO: Is this the right place to put it? Does it need a stack for # function calls? self.mem.last_status = status return status