def make_function_at(static, addr): if static[addr]["function"] != None: # already function return rc = static.r2core rc.cmd("af @ %d" % (addr,)) this_function = Function(addr) static["functions"].add(this_function) info = rc.cmd_json("afj %d" % (addr,))[0] callrefs = info["callrefs"] for ref in callrefs: if ref["type"] == "J": static[ref["addr"]]["crefs"].add(addr) if ref["type"] == "C": static[ref["addr"]]["xrefs"].add(addr) function_details = rc.cmd_json("pdfj @ %d" % addr) if function_details["addr"] == addr: for opcode in function_details["ops"]: static[opcode["offset"]]["function"] = this_function i = static[opcode["offset"]]["instruction"] addr_re = re.compile(r"\| (0x[a-f0-9]+) ") blocks = rc.cmd_json("agj %d" % addr)[0]["blocks"] for block in blocks: this_block = Block(block["offset"]) this_function.add_block(this_block) for op in block["ops"]: address = op["offset"] this_block.add(address) static[address]["block"] = this_block static["blocks"].add(this_block)
def make_function_at(static, addr): if static[addr]['function'] != None: # already function return rc = static.r2core rc.cmd("af @ %d" % (addr, )) this_function = Function(addr) static['functions'].add(this_function) info = rc.cmd_json("afj %d" % (addr, ))[0] callrefs = info['callrefs'] for ref in callrefs: if ref["type"] == "J": static[ref['addr']]['crefs'].add(addr) if ref["type"] == "C": static[ref['addr']]['xrefs'].add(addr) function_details = rc.cmd_json("pdfj @ %d" % addr) if function_details['addr'] == addr: for opcode in function_details['ops']: static[opcode['offset']]['function'] = this_function i = static[opcode['offset']]['instruction'] addr_re = re.compile(r'\| (0x[a-f0-9]+) ') blocks = rc.cmd_json("agj %d" % addr)[0]['blocks'] for block in blocks: this_block = Block(block['offset']) this_function.add_block(this_block) for op in block['ops']: address = op['offset'] this_block.add(address) static[address]['block'] = this_block static['blocks'].add(this_block)
def make_function_at(static, addr): if static[addr]['function'] != None: # already function return rc = static.r2core rc.cmd("af @ %d" % (addr,)) this_function = Function(addr) static['functions'].add(this_function) info = rc.cmd_json("afj %d" % (addr,))[0] callrefs = info['callrefs'] for ref in callrefs: if ref["type"] == "J": static[ref['addr']]['crefs'].add(addr) if ref["type"] == "C": static[ref['addr']]['xrefs'].add(addr) function_details = rc.cmd_json("pdfj @ %d" % addr) if function_details['addr'] == addr: for opcode in function_details['ops']: static[opcode['offset']]['function'] = this_function i = static[opcode['offset']]['instruction'] addr_re = re.compile(r'\| (0x[a-f0-9]+) ') blocks = rc.cmd_json("agj %d" % addr)[0]['blocks'] for block in blocks: this_block = Block(block['offset']) this_function.add_block(this_block) addresses = addr_re.findall(block['code']) for address in addresses: address = int(address[2:],16) this_block.add(address) static[address]['block'] = this_block static['blocks'].add(this_block)
def create_reduce_function(function_id, registry, run_registry=None): function_name = "red-{}".format(function_id) deploy_info = create_deploy_info( settings.REDUCE_PATH, settings.REDUCE_CONFIG_TEMPLATE_PATH, registry, run_registry ) # create function function = Function(function_name, deploy_info, debug=settings.DEBUG) # load config data config_data = function.get_config_data() spec = config_data['spec'] if spec['env'] == None: spec['env'] = [] # update env vars update_env_var(spec['env'], name="ID", value=str(function_id)) update_env_var(spec['env'], name=settings.HDFS_HOST_KEY, value=settings.HDFS_HOST_VALUE) output_dir = "" if settings.HDFS_OUTPUT_DIR_VALUE == "/" else settings.HDFS_OUTPUT_DIR_VALUE update_env_var(spec['env'], name='REDUCER_OUTPUT_FILENAME', value="{}/out_{}.txt".format(output_dir, function_id)) update_env_var(spec['env'], name=settings.HDFS_CHUNK_COUNT_KEY, value=settings.HDFS_CHUNK_COUNT_VALUE) update_env_var(spec['env'], name=settings.NUM_MAPPERS_KEY, value=settings.NUM_MAPPERS_VALUE) update_env_var(spec['env'], name=settings.HDFS_USER_KEY, value=settings.HDFS_USER_VALUE) update_env_var(spec['env'], name=settings.DONE_TOPIC_KEY, value=settings.DONE_TOPIC_VALUE) update_env_var(spec['env'], name=settings.RMQ_HOST_KEY, value=settings.RMQ_HOST_VALUE) update_env_var(spec['env'], name=settings.RMQ_PORT_KEY, value=settings.RMQ_PORT_VALUE) update_env_var(spec['env'], name=settings.RMQ_USER_KEY, value=settings.RMQ_USER_VALUE) update_env_var(spec['env'], name=settings.RMQ_PASS_KEY, value=settings.RMQ_PASS_VALUE) update_env_var(spec['env'], name=settings.EXCHANGE_NAME_KEY, value=settings.EXCHANGE_NAME_VALUE) # update rmq trigger info update_rmq_trigger( spec['triggers'], settings.RMQ_URL_VALUE, settings.EXCHANGE_NAME_VALUE, ["{}{}".format(settings.REDUCE_TOPIC_PREFIX_VALUE, function_id)], max_workers=3, ) # update http trigger update_http_trigger( spec['triggers'], settings.REDUCE_PORT_START + function_id, max_workers=3, ) # apply config changes function.apply_config(config_data) return function
def create_map_function(function_id, registry, run_registry=None): function_name = "map-{}".format(function_id) deploy_info = create_deploy_info( settings.MAP_PATH, settings.MAP_CONFIG_TEMPLATE_PATH, registry, run_registry ) # create function function = Function(function_name, deploy_info, debug=settings.DEBUG) # load config data config_data = function.get_config_data() spec = config_data['spec'] if spec['env'] == None: spec['env'] = [] # update env vars update_env_var(spec['env'], name="ID", value=str(function_id)) update_env_var(spec['env'], name=settings.RMQ_HOST_KEY, value=settings.RMQ_HOST_VALUE) update_env_var(spec['env'], name=settings.RMQ_PORT_KEY, value=settings.RMQ_PORT_VALUE) update_env_var(spec['env'], name=settings.RMQ_USER_KEY, value=settings.RMQ_USER_VALUE) update_env_var(spec['env'], name=settings.RMQ_PASS_KEY, value=settings.RMQ_PASS_VALUE) update_env_var(spec['env'], name=settings.NUM_REDUCERS_KEY, value=settings.NUM_REDUCERS_VALUE) update_env_var(spec['env'], name=settings.REDUCE_TOPIC_PREFIX_KEY, value=settings.REDUCE_TOPIC_PREFIX_VALUE) update_env_var(spec['env'], name=settings.EXCHANGE_NAME_KEY, value=settings.EXCHANGE_NAME_VALUE) update_env_var(spec['env'], name=settings.HDFS_HOST_KEY, value=settings.HDFS_HOST_VALUE) update_env_var(spec['env'], name=settings.DONE_TOPIC_KEY, value=settings.DONE_TOPIC_VALUE) update_env_var(spec['env'], name=settings.HDFS_USER_KEY, value=settings.HDFS_USER_VALUE) # update rmq trigger info update_rmq_trigger( spec['triggers'], settings.RMQ_URL_VALUE, settings.EXCHANGE_NAME_VALUE, ["{}{}".format(settings.MAP_TOPIC_PREFIX_VALUE, function_id)], max_workers=3 ) # update http trigger update_http_trigger( spec['triggers'], settings.MAP_PORT_START + function_id, max_workers=3, ) # apply config changes function.apply_config(config_data) return function
def test_function_definition(): scope = Scope() func = Function(['a', 'b'], [BinaryOperation(Reference('a'), '+', Reference('b'))]) func_def = FunctionDefinition('sum', func) assert func_def.evaluate(scope) == func assert scope['sum'] == func
def load_functions(): """Load functions data to database.""" print "Functions" # delete modules before data gets added to avoid duplicate info # Function.query.delete() # insert data from seed_function with open("seed_data/seed_function") as function_data: for row in function_data: name, description, additional_info, sample_code, output, user_id, module_id = row.rstrip( ).split("|") function = Function(name=name, description=description, additional_info=additional_info, sample_code=sample_code, output=output, user_id=user_id, module_id=module_id) # add function to session db.session.add(function) # commit changes db.session.commit() print "Function loaded."
def apply_unifier(a, unifier): """Apply unifier to atom/function.""" if bool(unifier): new_terms = [] for term in a.terms: if isinstance(term, Constant): new_terms.append(term) elif isinstance(term, Variable): if term in unifier: new_terms.append(unifier.get(term)) else: return False elif isinstance(term, Function): new_terms.append(apply_unifier(term, unifier)) else: return False if all(new_terms): return Function(name=a.name, terms=new_terms) if isinstance( a, Function) else Atom( name=a.name, terms=new_terms, neg=a.neg, naf=a.naf) if isinstance(a, Atom) else False else: return False else: return False
def add_modules(username): """Add function/module information""" if not verify_user(username): return redirect("/login") mname = request.form.get("mname") mdesc = request.form.get("mdesc") maddinfo = request.form.get("maddinfo") fname = request.form.get("fname") fdesc = request.form.get("fdesc") faddinfo = request.form.get("faddinfo") samplecode = request.form.get("samplecode") output = request.form.get("output") if fname == "": flash("Please input a function name.") return redirect("/{}/addmodules".format(username)) # fetch user to get user_id user = User.query.filter_by(username=username).first() existing_mod = Module.query.filter( (Module.user_id == user.user_id) | (Module.user_id == 1), Module.name == mname).first() if mname == "": module = Module.query.filter_by(module_id=1).first() elif existing_mod: module = existing_mod else: module = Module(name=mname, description=mdesc, additional_info=maddinfo, user_id=user.user_id) db.session.add(module) db.session.commit() function = Function(name=fname, description=fdesc, additional_info=faddinfo, sample_code=samplecode, output=output, user_id=user.user_id, module_id=module.module_id) db.session.add(function) db.session.commit() flash("Your notes have been added.") return redirect("/{}/studynotes".format(username))
def test_end_to_end(capsys): yat_fac = FunctionDefinition( 'fac', Function(['n'], [ Conditional(BinaryOperation( Reference('n'), '==', Number(0)), [Number(1)], [ BinaryOperation( Reference('n'), '*', FunctionCall( Reference('fac'), [BinaryOperation(Reference('n'), '-', Number(1))])) ]) ])) s = Scope() yat_fac.evaluate(s) a = Print(FunctionCall(Reference('fac'), [Number(5)])).evaluate(s) out, err = capsys.readouterr() assert a == Number(120) assert out == '120\n' assert err == ''
def addfnc(self, name, params, rettype): if self.fnc: self.fncsl.append((self.fnc, self.loc)) self.fnc = Function(name, params, rettype) self.fncs[name] = self.fnc self.prog.addfnc(self.fnc)
class Parser(object): ''' Common stuff for parser for any language ''' def __init__(self, optifs=True, postprocess=True, nobcs=False, slice=False): self.prog = Program() self.fncs = {} self.fncsl = [] self.fnc = None self.loc = None self.optifs = optifs self.postproc = postprocess self.slice = slice self.loops = [] self.cnt = 0 self.warns = [] self.hasbcs = False self.nobcs = nobcs def newcnt(self): self.cnt += 1 return self.cnt def ssavar(self, var): return '%s_&%d&' % (var, self.newcnt()) def addwarn(self, msg, *args): if args: msg %= args self.prog.addwarn(str(msg)) def rmemptyfncs(self): ''' Removes empty functions, i.e., declarations only ''' for fnc in self.prog.getfncs(): if fnc.initloc is None: self.prog.rmfnc(fnc.name) def rmunreachlocs(self, fnc): ''' Removes unreachable locations from the graph ''' visited = set() tovisit = [fnc.initloc] while len(tovisit) > 0: loc = tovisit.pop() if loc in visited: continue visited.add(loc) l1 = fnc.trans(loc, True) if l1: tovisit.append(l1) l2 = fnc.trans(loc, False) if l2: tovisit.append(l2) for loc in fnc.locs(): if loc not in visited: fnc.rmloc(loc) def ssa(self, fnc): ''' Converts exprs of each loc to SSA form ''' for loc in fnc.locs(): # Find last appearance of each var last = {} for i, (var, _) in enumerate(fnc.exprs(loc)): last[var] = i # Replace non-last appearance by a fresh var m = {} exprs = [] for i, (var, expr) in enumerate(fnc.exprs(loc)): for v1, v2 in m.items(): expr = expr.replace(v1, Var(v2)) if var == VAR_RET: newvar = var else: if last[var] > i: newvar = m[var] = self.ssavar(var) else: m.pop(var, None) newvar = var exprs.append((newvar, expr)) fnc.replaceexprs(loc, exprs) def rmtmp(self, fnc): ''' Removes (merges) "tmp" or SSA-generated assignments ''' for loc in fnc.locs(): m = {} exprs = [] primed = set([]) lastret = None # Remember "real" vars and replace temps for var, expr in fnc.exprs(loc): expr.prime(primed) for v, e in m.items(): expr = expr.replace(v, e) if var.endswith('&'): m[var] = expr else: if var == VAR_RET: lastret = len(exprs) exprs.append((var, expr)) primed.add(var) # "Merge" return stmts nexprs = [] retexpr = None retcond = None for i, (var, expr) in enumerate(exprs): if var == VAR_RET: tmpretcond = self.getretcond(expr) if tmpretcond is True or retcond is None: retcond = tmpretcond elif tmpretcond is not None and retcond is not True: retcond = Op(self.OROP, retcond, tmpretcond) if retexpr: retexpr = retexpr.replace(VAR_RET, expr) else: retexpr = expr if i == lastret: nexprs.append((var, retexpr)) else: if retcond is True: continue elif retcond: expr = Op('ite', Op(self.NOTOP, retcond), expr, Var(var)) nexprs.append((var, expr)) fnc.replaceexprs(loc, nexprs) def getretcond(self, expr): if isinstance(expr, Op) and expr.name == 'ite': icond = expr.args[0] ct = self.getretcond(expr.args[1]) cf = self.getretcond(expr.args[2]) cond = [] if ct is None and cf is None: return None if ct is True and cf is True: return True if ct: if ct is True: cond.append(icond.copy()) else: cond.append(Op(self.ANDOP, icond.copy(), ct.copy())) if cf: nicond = Op(self.NOTOP, icond) if cf is True: cond.append(nicond.copy()) else: cond.append(Op(self.ANDOP, nicond.copy(), cf.copy())) if len(cond) == 1: return cond[0] else: return Op(self.OROP, cond[0], cond[1]) elif isinstance(expr, Var) and expr.name == VAR_RET: return None else: return True def postprocess(self): if not self.postproc: return self.rmemptyfncs() for fnc in self.prog.fncs.values(): self.rmunreachlocs(fnc) self.ssa(fnc) self.rmtmp(fnc) def visit(self, node): # Skip None-node if node is None: return # Name of the node class name = node.__class__.__name__ # Get method meth = getattr(self, 'visit_%s' % (name, ), None) if meth is None: raise NotSupported("Unimplemented visitor: '%s'" % (name, )) # Call visitor method return meth(node) def visit_expr(self, node, allowlist=False, allownone=False): res = self.visit(node) if isinstance(res, list) and allowlist: ok = True for r in res: if not isinstance(r, Expr): ok = False break if ok: return res if res and not isinstance(res, Expr): raise ParseError("Expected expression, got '%s'" % (res, ), line=node.coord.line) if (not res) and (not allownone): if node: self.addwarn("Expression expected at line %s" % (node.coord.line, )) else: self.addwarn("Expression expected") res = Const('?') return res def visit_if(self, node, cond, true, false): # Add condition (with new location) preloc = self.loc condloc = self.addloc('the condition of the if-statement at line %d' % (self.getline(cond))) condexpr = self.visit_expr(cond, allowlist=True) if isinstance(condexpr, list): condexpr = self.expr_list_and(condexpr) self.addexpr(VAR_COND, condexpr) # Add true loc trueline = self.getline(true) or self.getline(node) trueloc = self.addloc('inside the if-branch starting at line %d' % (trueline)) self.visit(true) afterloc1 = self.loc afterloc = self.addloc('after the if-statement beginning at line %s' % (self.getline(node))) # Add (general) transitions self.addtrans(preloc, True, condloc) self.addtrans(condloc, True, trueloc) self.addtrans(afterloc1, True, afterloc) # Add false loc if false: falseloc = self.addloc( 'inside the else-branch starting at line %d' % (self.getline(false))) self.visit(false) afterloc2 = self.loc self.addtrans(condloc, False, falseloc) self.addtrans(afterloc2, True, afterloc) else: self.addtrans(condloc, False, afterloc) falseloc = None # "Loop-less" if-statement if trueloc == afterloc1 and ((not false) or falseloc == afterloc2): if self.optifs: self.optimizeif(preloc, condexpr, trueloc, falseloc) return self.loc = afterloc def optimizeif(self, preloc, condexpr, trueloc, falseloc): ''' Optimized "simple" or "loop-less" if statement ''' # Remove unneded part of the graph self.fnc.rmtrans(preloc, True) self.loc = preloc # Keep track of assigned vars varss = set() varsl = [] mt = {} mf = {} # Add exprs from branches def addvars(loc, m): for (var, expr) in self.fnc.exprs(loc): newvar = self.ssavar(var) if var not in varss: varss.add(var) varsl.append(var) # Replace vars mapped so far for (v1, v2) in m.items(): expr = expr.replace(v1, Var(v2)) self.addexpr(newvar, expr) # Remember replacement m[var] = newvar addvars(trueloc, mt) if falseloc is not None: addvars(falseloc, mf) # Add condition condvar = self.ssavar('$cond') self.addexpr(condvar, condexpr.copy()) # Merge branches for var in varsl: self.addexpr( var, Op('ite', Var(condvar), Var(mt.get(var, var)), Var(mf.get(var, var)))) def expr_list_and(self, exprs): if len(exprs) == 0: return None else: newexpr = exprs[0] for expr in exprs[1:]: newexpr = Op('&&', newexpr, expr, line=expr.line) return newexpr def visit_loop(self, node, init, cond, next, body, do, name, prebody=None): # Visit init stmts if init: self.visit(init) # Add condition (with new location) preloc = self.loc if isinstance(cond, Expr): condexpr = cond else: condexpr = self.visit_expr(cond, allowlist=True) if isinstance(condexpr, list): condexpr = self.expr_list_and(condexpr) if not condexpr: condexpr = Const('1') condloc = self.addloc("the condition of the '%s' loop at line %s" % (name, condexpr.line or self.getline(node))) self.addexpr(VAR_COND, condexpr) # Add exit loc exitloc = self.addloc("*after* the '%s' loop starting at line %d" % (name, self.getline(node))) # Add body with (new location) bodyloc = self.addloc( "inside the body of the '%s' loop beginning at line %d" % (name, self.getline(body) or self.getline(node))) self.addloop((condloc, exitloc)) if prebody: map(lambda x: self.addexpr(*x), prebody) self.visit(body) if next: self.visit(next) self.poploop() afterloc = self.loc # Connect transitions self.addtrans(preloc, True, bodyloc if do else condloc) self.addtrans(condloc, True, bodyloc) self.addtrans(condloc, False, exitloc) self.addtrans(afterloc, True, condloc) self.loc = exitloc def addfnc(self, name, params, rettype): if self.fnc: self.fncsl.append((self.fnc, self.loc)) self.fnc = Function(name, params, rettype) self.fncs[name] = self.fnc self.prog.addfnc(self.fnc) def endfnc(self): if self.fncsl: self.fnc, self.loc = self.fncsl.pop() else: self.fnc = None self.loc = None def addloc(self, desc): assert (self.fnc), 'No active fnc!' self.loc = self.fnc.addloc(desc=desc) return self.loc def addexpr(self, name, expr, loc=None, idx=None): assert (self.fnc), 'No active fnc!' if not loc: loc = self.loc self.fnc.addexpr(loc, name, expr, idx=idx) def numexprs(self, loc=None): assert (self.fnc), 'No active fnc!' if not loc: loc = self.loc return self.fnc.numexprs(loc) def rmlastexprs(self, loc=None, num=1): assert (self.fnc), 'No active fnc!' if not loc: loc = self.loc self.fnc.rmlastexprs(loc, num) def addtrans(self, loc1, cond, loc2): assert (self.fnc), 'No active fnc!' self.fnc.addtrans(loc1, cond, loc2) def addtype(self, var, type, skiponexist=True): assert (self.fnc), 'No active fnc!' self.fnc.addtype(var, type, skiponexist) def addloop(self, l): self.loops.append(l) def poploop(self): return self.loops.pop() def lastloop(self): return self.loops[-1] if len(self.loops) else None @classmethod def parse_code(cls, code, *args, **kwargs): parser = cls(*args, **kwargs) parser.parse(code) parser.postprocess() if parser.slice: parser.prog.slice() return parser.prog
def make_function_at(static, address, recurse = True): if static['arch'] != "i386" and static['arch'] != "x86-64": print "*** static only works with x86(_64), someone should fix it" return if static[address]['function'] != None: # already function return block_starts = set([address]) function_starts = set() this_function = Function(address) static['functions'].add(this_function) def disassemble(address): raw = static.memory(address, 0x10) d = static[address]['instruction'] static[address]['function'] = this_function for (c,flag) in d.dests(): if flag == DESTTYPE.call: static._auto_update_name(c,"sub_%x"%(c)) function_starts.add(c) #print "%s %x is in %x xrefs" % (d,address, c) static[c]['xrefs'].add(address) # add this to the potential function boundary starts continue if c != address + d.size(): #print "%s %x is in %x crefs" % (d,address, c) static[c]['crefs'].add(address) static._auto_update_name(c,"loc_%x"%(c)) block_starts.add(c) #if we come after a jump and are an implicit xref, we are the start #of a new block elif d.is_jump(): static._auto_update_name(c,"loc_%x"%(c)) block_starts.add(c) return d.dests() # recursive descent pass pending = Queue.Queue() done = set() pending.put(address) while not pending.empty(): dests = disassemble(pending.get()) for (d,flag) in dests: if flag == DESTTYPE.call: #this will get handled in the function pass continue if d not in done: pending.put(d) done.add(d) #print map(hex, done) # block finding pass for b in block_starts: this_block = Block(b) this_function.add_block(this_block) address = b i = static[address]['instruction'] while not i.is_ending() and i.size() != 0: if address + i.size() in block_starts: break address += i.size() i = static[address]['instruction'] this_block.add(address) static[address]['block'] = this_block static['blocks'].add(this_block) # find more functions for f in function_starts: if static[f]['function'] == None: make_function_at(static, f)
elif isinstance(term, Function): new_terms.append(apply_unifier(term, unifier)) else: return False if all(new_terms): return Function(name=a.name, terms=new_terms) if isinstance( a, Function) else Atom( name=a.name, terms=new_terms, neg=a.neg, naf=a.naf) if isinstance(a, Atom) else False else: return False else: return False # Test if __name__ == '__main__': from model import * a = Atom(name='a', terms=[ Function('f1', [Function('f2', [Variable('X')]), Variable('Y')]) ]) b = Atom(name='a', terms=[ Function('f1', [Function('f2', [Constant('a')]), Constant('b')]) ]) print(apply_unifier(a, unify_atom(a, b)))
def test_function_call(): scope = Scope() function = Function(['a'], [Reference('a')]) scope['function'] = function function_call = FunctionCall(Reference('function'), [Number(8)]) assert function_call.evaluate(scope) == Number(8)
def make_function_at(static, address, recurse=True): if static[address]['function'] != None: # already function return start = time.time() block_starts = set([address]) function_starts = set() this_function = Function(address) static['functions'].add(this_function) def disassemble(address): raw = static.memory(address, 0x10) d = static[address]['instruction'] static[address]['function'] = this_function for (c, flag) in d.dests(): if flag == DESTTYPE.call: static._auto_update_name(c, "sub_%x" % (c)) function_starts.add(c) #print "%s %x is in %x xrefs" % (d,address, c) static[c]['xrefs'].add(address) # add this to the potential function boundary starts continue if c != address + d.size(): #print "%s %x is in %x crefs" % (d,address, c) static[c]['crefs'].add(address) static._auto_update_name(c, "loc_%x" % (c)) block_starts.add(c) #if we come after a jump and are an implicit xref, we are the start #of a new block elif d.is_jump() and not d.is_call(): static._auto_update_name(c, "loc_%x" % (c)) block_starts.add(c) return d.dests() # recursive descent pass pending = Queue.Queue() done = set() pending.put(address) while not pending.empty(): dests = disassemble(pending.get()) for (d, flag) in dests: if flag == DESTTYPE.call: #this will get handled in the function pass continue if d not in done: pending.put(d) done.add(d) if (time.time() - start) > 0.01: time.sleep(0.01) start = time.time() #print map(hex, done) # block finding pass for b in block_starts: this_block = Block(b) this_function.add_block(this_block) address = b i = static[address]['instruction'] while not i.is_ending() and i.size() != 0: if address + i.size() in block_starts: break address += i.size() i = static[address]['instruction'] this_block.add(address) static[address]['block'] = this_block if (time.time() - start) > 0.01: time.sleep(0.01) start = time.time() static['blocks'].add(this_block) # find more functions if recurse: for f in function_starts: if static[f]['function'] == None: make_function_at(static, f)
class Parser(object): ''' Common stuff for parser for any language ''' def __init__(self, optifs=True, postprocess=True, nobcs=False, slice=False): self.prog = Program() self.fncs = {} self.fncsl = [] self.fnc = None self.loc = None self.optifs = optifs self.postproc = postprocess self.slice = slice self.loops = [] self.cnt = 0 self.warns = [] self.hasbcs = False self.nobcs = nobcs def newcnt(self): self.cnt += 1 return self.cnt def ssavar(self, var): return '%s_&%d&' % (var, self.newcnt()) def addwarn(self, msg, *args): if args: msg %= args self.prog.addwarn(str(msg)) def rmemptyfncs(self): ''' Removes empty functions, i.e., declarations only ''' for fnc in self.prog.getfncs(): if fnc.initloc is None: self.prog.rmfnc(fnc.name) def rmunreachlocs(self, fnc): ''' Removes unreachable locations from the graph ''' visited = set() tovisit = [fnc.initloc] while len(tovisit) > 0: loc = tovisit.pop() if loc in visited: continue visited.add(loc) l1 = fnc.trans(loc, True) if l1: tovisit.append(l1) l2 = fnc.trans(loc, False) if l2: tovisit.append(l2) for loc in fnc.locs(): if loc not in visited: fnc.rmloc(loc) def ssa(self, fnc): ''' Converts exprs of each loc to SSA form ''' for loc in fnc.locs(): # Find last appearance of each var last = {} for i, (var, _) in enumerate(fnc.exprs(loc)): last[var] = i # Replace non-last appearance by a fresh var m = {} exprs = [] for i, (var, expr) in enumerate(fnc.exprs(loc)): for v1, v2 in m.items(): expr = expr.replace(v1, Var(v2)) if var == VAR_RET: newvar = var else: if last[var] > i: newvar = m[var] = self.ssavar(var) else: m.pop(var, None) newvar = var if var != newvar: expr.original = (var, self.cnt) exprs.append((newvar, expr)) fnc.replaceexprs(loc, exprs) def rmtmp(self, fnc): ''' Removes (merges) "tmp" or SSA-generated assignments ''' for loc in fnc.locs(): m = {} exprs = [] primed = set([]) lastret = None # Remember "real" vars and replace temps for var, expr in fnc.exprs(loc): #expr.statement = True expr.prime(primed) for v, e in m.items(): expr = expr.replace(v, e) if isinstance(expr, Op) and expr.name == 'ite': expr.args[0].original = None expr.args[1].original = None expr.args[2].original = None if var.endswith('&'): m[var] = expr else: if var == VAR_RET: lastret = len(exprs) exprs.append((var, expr)) if var != VAR_RET: primed.add(var) # "Merge" return stmts nexprs = [] retexpr = None retcond = None for i, (var, expr) in enumerate(exprs): if var == VAR_RET: tmpretcond = self.getretcond(expr) if tmpretcond is True or retcond is None: retcond = tmpretcond elif tmpretcond is not None and retcond is not True: retcond = Op(self.OROP, retcond, tmpretcond) if retexpr: retexpr = retexpr.replace(VAR_RET, expr) else: retexpr = expr if i == lastret: nexprs.append((var, retexpr)) else: if retcond is True: continue elif retcond: expr = Op('ite', Op(self.NOTOP, retcond), expr, Var(var)) nexprs.append((var, expr)) fnc.replaceexprs(loc, nexprs) def getretcond(self, expr): if isinstance(expr, Op) and expr.name == 'ite': icond = expr.args[0] ct = self.getretcond(expr.args[1]) cf = self.getretcond(expr.args[2]) cond = [] if ct is None and cf is None: return None if ct is True and cf is True: return True if ct: if ct is True: cond.append(icond.copy()) else: cond.append(Op(self.ANDOP, icond.copy(), ct.copy())) if cf: nicond = Op(self.NOTOP, icond) if cf is True: cond.append(nicond.copy()) else: cond.append(Op(self.ANDOP, nicond.copy(), cf.copy())) if len(cond) == 1: return cond[0] else: return Op(self.OROP, cond[0], cond[1]) elif isinstance(expr, Var) and expr.name == VAR_RET: return None else: return True def postprocess(self): if not self.postproc: return self.rmemptyfncs() for fnc in self.prog.fncs.values(): self.rmunreachlocs(fnc) self.ssa(fnc) self.rmtmp(fnc) def visit(self, node): # Skip None-node if node is None: return # Name of the node class name = node.__class__.__name__ # Get method meth = getattr(self, 'visit_%s' % (name,), None) if meth is None: raise NotSupported("Unimplemented visitor: '%s'" % (name,)) # Call visitor method return meth(node) def visit_expr(self, node, allowlist=False, allownone=False): res = self.visit(node) if isinstance(res, list) and allowlist: ok = True for r in res: if not isinstance(r, Expr): ok = False break if ok: return res if res and not isinstance(res, Expr): raise ParseError("Expected expression, got '%s'" % (res,), line=node.coord.line) if (not res) and (not allownone): if node: self.addwarn("Expression expected at line %s" % ( node.coord.line,)) else: self.addwarn("Expression expected") res = Const('?') return res def visit_if(self, node, cond, true, false): # Add condition (with new location) preloc = self.loc condloc = self.addloc('the condition of the if-statement at line %d' % ( self.getline(cond) )) condexpr = self.visit_expr(cond, allowlist=True) if isinstance(condexpr, list): condexpr = self.expr_list_and(condexpr) self.addexpr(VAR_COND, condexpr) # Add true loc trueline = self.getline(true) or self.getline(node) trueloc = self.addloc('inside the if-branch starting at line %d' % ( trueline)) self.visit(true) afterloc1 = self.loc afterloc = self.addloc('after the if-statement beginning at line %s' % ( self.getline(node) )) # Add (general) transitions self.addtrans(preloc, True, condloc) self.addtrans(condloc, True, trueloc) self.addtrans(afterloc1, True, afterloc) # Add false loc if false: falseloc = self.addloc('inside the else-branch starting at line %d' % ( self.getline(false))) self.visit(false) afterloc2 = self.loc self.addtrans(condloc, False, falseloc) self.addtrans(afterloc2, True, afterloc) else: self.addtrans(condloc, False, afterloc) falseloc = None # "Loop-less" if-statement if trueloc == afterloc1 and ((not false) or falseloc == afterloc2): if self.optifs: self.optimizeif(preloc, condexpr, trueloc, falseloc) return self.loc = afterloc def optimizeif(self, preloc, condexpr, trueloc, falseloc): ''' Optimized "simple" or "loop-less" if statement ''' # Remove unneded part of the graph self.fnc.rmtrans(preloc, True) self.loc = preloc # Keep track of assigned vars varss = set() varsl = [] mt = {} mf = {} # Add exprs from branches def addvars(loc, m): for (var, expr) in self.fnc.exprs(loc): newvar = self.ssavar(var) if var not in varss: varss.add(var) varsl.append(var) # Replace vars mapped so far for (v1, v2) in m.items(): expr = expr.replace(v1, Var(v2)) expr.original = (var, self.cnt) self.addexpr(newvar, expr) # Remember replacement m[var] = newvar addvars(trueloc, mt) if falseloc is not None: addvars(falseloc, mf) # Add condition condvar = self.ssavar('$cond') self.addexpr(condvar, condexpr.copy()) # Merge branches for var in varsl: self.addexpr(var, Op('ite', Var(condvar), Var(mt.get(var, var)), Var(mf.get(var, var)))) def expr_list_and(self, exprs): if len(exprs) == 0: return None else: newexpr = exprs[0] for expr in exprs[1:]: newexpr = Op('&&', newexpr, expr, line=expr.line) return newexpr def visit_loop(self, node, init, cond, next, body, do, name, prebody=None): # Visit init stmts if init: self.visit(init) # Add condition (with new location) preloc = self.loc if isinstance(cond, Expr): condexpr = cond else: condexpr = self.visit_expr(cond, allowlist=True) if isinstance(condexpr, list): condexpr = self.expr_list_and(condexpr) if not condexpr: condexpr = Const('1') condloc = self.addloc("the condition of the '%s' loop at line %s" % ( name, condexpr.line or self.getline(node))) self.addexpr(VAR_COND, condexpr) # Add exit loc exitloc = self.addloc("*after* the '%s' loop starting at line %d" % ( name, self.getline(node) )) # Add body with (new location) bodyloc = self.addloc("inside the body of the '%s' loop beginning at line %d" % ( name, self.getline(body) or self.getline(node) )) self.addloop((condloc, exitloc)) if prebody: map(lambda x: self.addexpr(*x), prebody) self.visit(body) if next: self.visit(next) self.poploop() afterloc = self.loc # Connect transitions self.addtrans(preloc, True, bodyloc if do else condloc) self.addtrans(condloc, True, bodyloc) self.addtrans(condloc, False, exitloc) self.addtrans(afterloc, True, condloc) self.loc = exitloc def addfnc(self, name, params, rettype): if self.fnc: self.fncsl.append((self.fnc, self.loc)) self.fnc = Function(name, params, rettype) self.fncs[name] = self.fnc self.prog.addfnc(self.fnc) def endfnc(self): if self.fncsl: self.fnc, self.loc = self.fncsl.pop() else: self.fnc = None self.loc = None def addloc(self, desc): assert (self.fnc), 'No active fnc!' self.loc = self.fnc.addloc(desc=desc) return self.loc def addexpr(self, name, expr, loc=None, idx=None): assert (self.fnc), 'No active fnc!' if not loc: loc = self.loc self.fnc.addexpr(loc, name, expr, idx=idx) def numexprs(self, loc=None): assert (self.fnc), 'No active fnc!' if not loc: loc = self.loc return self.fnc.numexprs(loc) def rmlastexprs(self, loc=None, num=1): assert (self.fnc), 'No active fnc!' if not loc: loc = self.loc self.fnc.rmlastexprs(loc, num) def addtrans(self, loc1, cond, loc2): assert (self.fnc), 'No active fnc!' self.fnc.addtrans(loc1, cond, loc2) def addtype(self, var, type, skiponexist=True): assert (self.fnc), 'No active fnc!' self.fnc.addtype(var, type, skiponexist) def hasvar(self, var): assert (self.fnc), 'No active fnc' return self.fnc.gettype(var) is not None def addloop(self, l): self.loops.append(l) def poploop(self): return self.loops.pop() def lastloop(self): return self.loops[-1] if len(self.loops) else None def isfncname(self, name): return name in self.fncs @classmethod def parse_code(cls, code, *args, **kwargs): parser = cls(*args, **kwargs) parser.parse(code) parser.postprocess() if parser.slice: parser.prog.slice() return parser.prog