def to_while(trace, jd, path=None): path = path or [] while True: if trace == []: raise line = trace[0] trace = trace[1:] if m := match(line, ("if", ":cond", ":if_true", ":if_false")): cond, if_true, if_false = m.cond, m.if_true, m.if_false if is_revert(if_true): path.append(("require", is_zero(cond))) trace = if_false continue if is_revert(if_false): path.append(("require", cond)) trace = if_true continue jds_true = find_f_list(if_true, get_jds) jds_false = find_f_list(if_false, get_jds) assert (jd in jds_true) != (jd in jds_false), (jds_true, jds_false) def add_path(line): if m := match(line, ("goto", Any, ":svs")): path2 = path for _, v_idx, v_val in m.svs: path2 = replace(path2, ("var", v_idx), v_val) return path2 + [line] else: return [line]
def make_asts(self): """ we need to do ast creation from the contract, not function level, because some simplifications (type/field removal) require insight to all the functions, not just a single one """ for func in self.functions: func.ast = self.make_ast(func.trace) def find_stor_masks(exp): if opcode(exp) == "type": return [exp] else: return [] stor_masks = frozenset( find_f_list([f.ast for f in self.functions], find_stor_masks)) stor_loc_to_masks = collections.defaultdict(set) stor_name_to_masks = collections.defaultdict(set) for mask in stor_masks: stor_loc_to_masks[get_loc(mask)].add(mask) stor_name_to_masks[get_name(mask)].add(mask) def cleanup(exp): if m := match(exp, ("field", 0, ("stor", ("length", ":idx")))): return ("stor", ("length", m.idx)) if m := match(exp, ("type", 256, ("field", 0, ("stor", ("length", ":idx"))))): return ("stor", ("length", m.idx))
def find_default(exp): if (m := match( exp, ("if", ":cond", ":if_true", ":if_false"))) and str( ("cd", 0)) in str(m.cond): if find_f_list(m.if_false, func_calls) == []: fi = m.if_false[0] if m2 := match(fi, ("jd", ":jd")): return int(m2.jd)
def make_params(self): """ figures out parameter types from the decompiled function code. does so by looking at all 'cd'/calldata occurences and figuring out how they are accessed - are they masked? are they used as pointers? """ params = get_func_params(self.hash) if len(params) > 0: res = {} idx = 4 for p in params: res[idx] = (p["type"], p["name"]) idx += 32 else: # good testing: solidstamp, auditContract # try to find all the references to parameters and guess their types def f(exp): if match(exp, ("mask_shl", Any, Any, Any, ("cd", Any))) or match(exp, ("cd", Any)): return [exp] return [] occurences = find_f_list(self.trace, f) sizes = {} for o in occurences: if m := match(o, ("mask_shl", ":size", Any, Any, ("cd", ":idx"))): size, idx = m.size, m.idx if m := match(o, ("cd", ":idx")): idx = m.idx size = 256 if idx == 0: continue if m := match(idx, ("add", 4, ("cd", ":in_idx"))): # this is a mark of 'cd' being used as a pointer sizes[m.in_idx] = -1 continue
try: # decompiles the code, starting from location 0 # and running VM in a special mode that returns 'funccall' # in places where it looks like there is a func call trace = vm.run(0, timeout=LOADER_TIMEOUT) def func_calls(exp): if m := match(exp, ("funccall", ":fx_hash", ":target", ":stack")): return [(m.fx_hash, m.target, m.stack)] else: return [] func_list = find_f_list(trace, func_calls) for fx_hash, target, stack in func_list: self.add_func(target=target, hash=fx_hash, stack=stack) # find default def find_default(exp): if (m := match( exp, ("if", ":cond", ":if_true", ":if_false"))) and str( ("cd", 0)) in str(m.cond): if find_f_list(m.if_false, func_calls) == []: fi = m.if_false[0] if m2 := match(fi, ("jd", ":jd")):
def analyse(self): assert len(self.trace) > 0 def find_returns(exp): if opcode(exp) == "return": return [exp] else: return [] exp_text = [] self.returns = find_f_list(self.trace, find_returns) exp_text.append(("possible return values", prettify(self.returns))) first = self.trace[0] if (opcode(first) == "if" and simplify_bool(first[1]) == "callvalue" and (first[2][0] == ("revert", 0) or opcode(first[2][0]) == "invalid")): self.trace = self.trace[0][3] self.payable = False elif (opcode(first) == "if" and simplify_bool(first[1]) == ("iszero", "callvalue") and (first[3][0] == ("revert", 0) or opcode(first[3][0]) == "invalid")): self.trace = self.trace[0][2] self.payable = False else: self.payable = True exp_text.append(("payable", self.payable)) self.read_only = True for op in [ "store", "selfdestruct", "call", "delegatecall", "codecall", "create", ]: if f"'{op}'" in str(self.trace): self.read_only = False exp_text.append(("read_only", self.read_only)) """ const func detection """ self.const = self.read_only for exp in ["storage", "calldata", "calldataload", "store", "cd"]: if exp in str(self.trace) or len(self.returns) != 1: self.const = False if self.const: self.const = self.returns[0] if len(self.const) == 3 and opcode(self.const[2]) == "data": self.const = self.const[2] if len(self.const) == 3 and opcode(self.const[2]) == "mask_shl": self.const = self.const[2] if len(self.const) == 3 and type(self.const[2]) == int: self.const = self.const[2] else: self.const = None if self.const: exp_text.append(("const", self.const)) """ getter detection """ self.getter = None self.simplify_string_getter_from_storage() if self.const is None and self.read_only and len(self.returns) == 1: ret = self.returns[0][1] if match(ret, ("bool", ("storage", Any, Any, ":loc"))): self.getter = ( ret # we have to be careful when using this for naming purposes, ) # because sometimes the storage can refer to array length elif opcode(ret) == "mask_shl" and opcode(ret[4]) == "storage": self.getter = ret[4] elif opcode(ret) == "storage": self.getter = ret elif opcode(ret) == "data": terms = ret[1:] # for structs, we check if all the parts of the struct are storage from the same # location. if so, we return the location number t0 = terms[ 0] # 0xFAFfea71A6da719D6CAfCF7F52eA04Eb643F6De2 - documents if m := match(t0, ("storage", 256, 0, ":loc")): loc = m.loc for e in terms[1:]: if not match(e, ("storage", 256, 0, ("add", Any, loc))): break else: self.getter = t0 # kitties getKitten - with more cases this and the above could be uniformed if self.getter is None: prev_loc = -1 for e in terms: def l2(x): if m := match(x, ("sha3", ("data", Any, ":l"))): if type(m.l) == int and m.l < 1000: return m.l if (opcode(x) == "sha3" and type(x[1]) == int and x[1] < 1000): return x[1] return None loc = find_f(e, l2) if not loc or (prev_loc != -1 and prev_loc != loc): break prev_loc = loc else: self.getter = ("struct", ("loc", loc))