def ends_exec(path): # check if all the subpaths end execution # only checking the last line, previous ones may end execution as well # but at least one leading up to the last line didn't - otherwise # we wouldn't see it line = path[-1] if opcode(line) in ( "return", "stop", "selfdestruct", "invalid", "assert_fail", "revert", "continue", "undefined", ): return True elif opcode(line) == "or": assert len(line) == 3 return ends_exec(line[1]) and ends_exec(line[2]) elif opcode(line) == "while": # well, 'while True' ends execution, but all the other # ones most likely don't. if we miss some cases nothing # bad will happen - just slightly less readable code return False else: return False
def format_exp(exp): if type(exp) == str: return f'"{exp}"' if type(exp) == int: if exp > 10**6 and exp % 10**6 != 0: return hex(exp) else: return str(exp) elif type(exp) != list: return str(exp) else: if len(exp) == 0: return COLOR_GRAY + "[]" + ENDC if type(opcode(exp)) == list: return (COLOR_GRAY + "[" + ENDC + f"{COLOR_GRAY}, {ENDC}".join([format_exp(e) for e in exp]) + COLOR_GRAY + "]" + ENDC) else: return ( COLOR_GRAY + "[" + ENDC + f"{COLOR_GRAY}, {ENDC}".join([opcode(exp)] + [format_exp(e) for e in exp[1:]]) + COLOR_GRAY + "]" + ENDC)
def as_paths(trace, path=None): assert type(trace) == list path = path or tuple() # self.find_offsets() trace = replace_f(trace, make_fands) for line in trace: if opcode(line) == "if": # assumes 'ifs' end trace cond, if_true, if_false = line[1], line[2], line[3] return as_paths(if_true, path + (cond,)) + as_paths( if_false, path + (is_zero(cond),) ) if opcode(line) == "LOOP": path += (("LOOP", line[2]),) return as_paths(line[1], path) path += (line,) # pprint_logic() return (list(path),)
def and_op(*args): assert len(args) > 1 left = args[0] if len(args) > 2: right = and_op(*args[1:]) else: right = args[1] if type(left) == int and type(right) == int: return left & right res = tuple() if opcode(left) == "and": res += left[1:] else: res += (left, ) if opcode(right) == "and": res += right[1:] else: res += (right, ) return ("and", ) + res
def unmake_fands(exp): if opcode(exp) == "for": return ("or",) + exp[1:] elif opcode(exp) == "fand": return ("and",) + exp[1:] else: return exp
def cleanup(self): stack = self.stack for i, s in enumerate(stack): if type(stack[i]) == tuple: if s[0] == "lt" and type(s[1]) == int and type(s[2]) == int: if s[1] < s[2]: stack[i] = ("bool", 1) else: stack[i] = ("bool", 0) elif s[0] == "iszero" and type(s[1]) == int: if s[1] == 0: stack[i] = ("bool", 1) else: stack[i] = ("bool", 0) elif (s[0] == "iszero" and opcode(s[1]) == "bool" and type(s[1][1]) == int): stack[i] = ("bool", 1 - s[1][1]) elif stack[i][0] == "iszero" and opcode( stack[i][1]) == "iszero": if opcode(stack[i][1][1]) in ( "iszero", "eq", "lt", "gt", "slt", "sgt", ): stack[i] = stack[i][1][1] else: stack[i] = ("bool", stack[i][1][1])
def replace_names_in_assoc(names, storages_assoc): for pattern, name in names.items(): if opcode(pattern) == "bool": continue if opcode(pattern) == "struct": stor_id = pattern else: stor_id = storages_assoc[pattern] if m := match(stor_id, ("stor", ":size", ":off", ("loc", ":num"))): size, off, num = m.size, m.off, m.num # if we found a simple getter for a storage number, # we need to check first if a given location is only accessed # this way. otherwise it may be a function like getLength, that # returns the array length, and we don't want to use it as a storage name if all( match(pattern, ("stor", Any, Any, ("loc", Any))) for pattern in storages_assoc if get_loc(pattern) == num): used_locs.add(stor_id) for src, pattern in storages_assoc.items(): if pattern == stor_id: storages_assoc[src] = ("stor", size, off, ("name", name, num)) elif ((m := match(stor_id, ("stor", Any, Any, ("map", Any, ":loc")))) or (m := match(stor_id, ("stor", Any, Any, ("array", Any, ":loc")))) or (m := match(stor_id, ("struct", ":loc")))):
def deserialize(trace): res = [] for line in trace: line_t = tuple(line) if opcode(line_t) == "while": _, cond, path, lid, setvars = line_t cond = tuplify(cond) setvars = tuplify(setvars) assert type(lid) == str path = deserialize(path) res.append(("while", cond, path, lid, setvars)) elif opcode(line_t) == "if": _, cond, if_true, if_false = line_t cond = tuplify(cond) if_true = deserialize(if_true) if_false = deserialize(if_false) res.append(("if", cond, if_true, if_false)) else: res.append(tuplify(line)) return res
def make_fands(exp): # see `ferlan.getOrderDataClaim` for why it's necessary if opcode(exp) == "or": return ("for",) + exp[1:] elif opcode(exp) == "and": return ("fand",) + exp[1:] else: return exp
def add_to_arr(exp): if m := match(exp, ("add", ":left", ":right")): left, right = m.left, m.right if opcode(left) == "loc": right, left = left, right if opcode(right) == "loc": return ("array", left, right)
def lt_op(left, right): # left < right if type(left) == int and type(right) == int: return left < right if (m := match(left, ("add", ":int:num", ":max"))) and opcode(m.max) == "max": terms = m.max[1:] left = ("max", ) + tuple(add_op(t, m.num) for t in terms)
def simplify_max(exp): if opcode(exp) != "max": return exp res = ("max", ) for e in exp[1:]: if opcode(e) == "max": res += e[1:] else: res += (e, ) return res
def fold_ands(exp): assert opcode(exp) == "and" res = tuple() for e in exp[1:]: if opcode(e) == "and": e = fold_ands(e) res += e[1:] else: res += (e, ) return ("and", ) + res
def max_to_add(exp): if opcode(exp) != "max": return exp exp = exp[1:] for e in exp: if opcode(e) != "add" and type(e) != int: return simplify_max(("max", ) + exp) for e in exp: if type(e) == int: m = min(x if type(x) == int else ( x[1] if type(x) == tuple and len(x) > 1 and type(x[1]) == int else 0) for x in exp) # used to be x[1] but 0x0000136DAE58AFCF1EDd2071973d4a7a6fbe98A5 didn't work res = ("max", e - m) for e2 in exp: if e2 != e: res += (sub_op(e2, m), ) return ("add", m, res) m = 10**20 for e in exp: if type(e[1]) != int: m = 0 break else: m = min(m, e[1]) common = [] first = exp[0] for f in first: if all(f in e[1:] for e in exp[1:]): common.append(f) if len(common) > 0: a = add_op(m, *common) else: a = m res = [] for e in exp: res.append(sub_op(e, a)) if type(a) == int: prefix = (a, ) else: prefix = a[1:] return ("add", ) + prefix + (simplify_max(("max", ) + tuple(res)), )
def simplify_bool(exp): if opcode(exp) == "iszero": inside = simplify_bool(exp[1]) if opcode(inside) == "iszero": return inside[1] else: # this had a bug and it went on unnoticed. does this check ever get executed? return is_zero(inside) if opcode(exp) == "bool": return exp[1] return exp
def replace_max_with_MAX(exp): if opcode(exp) != "max": return exp, None exp = max_to_add(exp) res = exp for e in exp: if opcode(e) == "max": res = e exp = replace(exp, res, "MAX") exp = simplify(exp) return exp, res
def flatten_adds(exp): res = exp while len([a for a in res if opcode(a) == "add"]) > 0: exp = [] for r in res: if opcode(r) == "add": assert len(r[1:]) > 1 exp += r[1:] else: exp.append(r) res = exp return res
def make_trace(self): if self.trace is None: return [("undefined", "decompilation didn't finish")] begin_vars = [] if self.is_label(): for _, var_idx, var_val, _ in self.label.begin_vars: begin_vars.append(("setvar", var_idx, var_val)) if self.vm.just_fdests and self.trace != [("revert", 0)]: t = self.trace[0] if match(t, ("jump", ":target_node", ...)): begin = [("jd", str(self.jd[0]))] # , str(self.trace))] else: begin = ["?"] else: begin = [] begin += [("label", self, tuple(begin_vars))] if self.is_label() else [] last = self.trace[-1] if opcode(last) == "jump": return begin + self.trace[:-1] + last[1].make_trace() if m := match(last, ("if", ":cond", ":if_true", ":if_false")): if_true = m.if_true.make_trace() if_false = m.if_false.make_trace() return begin + self.trace[:-1] + [("if", m.cond, if_true, if_false)]
def calc_max(exp): if type(exp) != tuple: return exp exp = (opcode(exp), ) + tuple(calc_max(e) for e in exp[1:]) if opcode(exp) == "max": m = -(2**256) for e in exp[1:]: if type(e) != int: break m = max(m, e) else: return m return exp
def or_op(*args): if len(args) == 1: return args[0] # assert len(args) > 1 res = tuple() for r in args: if r == 0: pass elif opcode(r) == "or": terms = r[1:] assert len(terms) > 1 res += terms elif r not in res: res += (r, ) if len(res) == 0: return 0 if len(res) == 1: return res[0] assert len(res) > 1 return ("or", ) + res
def _max_op(base, what): # compares base with what, different from algebra's max because it can return (max, x,y,z) if opcode(base) != "max": r = safe_lt_op(what, base) if r is True: return base elif r is False: return what return ("max", base, what) res = [] for b in base[1:]: cmp = safe_lt_op(what, b) if cmp is True: return base if cmp is False: res.append(what) if cmp is None: res.append(b) res.append(what) res = tuple(set(res)) if len(res) > 1: return ("max", ) + res return res[0]
def find_storage_names(functions): res = {} for func in functions: if func.getter: getter = func.getter assert opcode(getter) in ("storage", "struct", "bool") # func name into potential storage name new_name = func.name if new_name[:3] == "get" and len(new_name.split("(")[0]) > 3: new_name = new_name[3:] if new_name != new_name.upper(): # otherwise we get stuff like bILLIONS in 0xF0160428a8552AC9bB7E050D90eEADE4DDD52843 new_name = new_name[0].lower() + new_name[1:] new_name = new_name.split("(")[0] if match(getter, ("storage", 160, ...)): if (("address" not in new_name.lower()) and ("addr" not in new_name.lower()) and ("account" not in new_name.lower()) and ("owner" not in new_name.lower())): new_name += "Address" res[getter] = new_name return res
def simplify(exp): if opcode(exp) == "max": terms = exp[1:] els = [simplify(e) for e in terms] res = -(2**256) for e in els: try: res = max_op(res, e) except Exception: return ("max", ) + tuple(els) return res if (m := match(exp, ("mask_shl", ":size", ":offset", ":shl", ":val"))): size, offset, shl, val = ( simplify(m.size), simplify(m.offset), simplify(m.shl), simplify(m.val), ) if all_concrete(size, offset, shl, val): return apply_mask(val, size, offset, shl) if (size, offset, shl) == (256, 0, 0): return val
def add_ge_zero(exp): """ technically, it can return wrong results, e.g.: (sub (mask 4, 4, -4, 'sth') (mask 4, 0, 'sth')) for sth 11...111 == 0 for sth 0 == 0 for sth 00010011 < 0 in practice it (hopefully) doesn't happen -- need to fix "variants" to deliver more variants based on masks and other expressions? """ assert opcode(exp) == "add", exp assert len(exp) > 2, exp exp = simplify(exp) if type(exp) == int: return exp >= 0 # print(exp) var = tuple(simplify(calc_max(e)) for e in variants(exp)) if not all_concrete(*var): return None if all(v >= 0 for v in var): return True if all(v < 0 for v in var): return False return None
def merge_ifs(path): # detects if-else sections that have the same beginnings, and moves # if upstream, merging some of the code assert type(path) == list ret = [] for idx, line in enumerate(path): assert type(line) != list if opcode(line) != "if": ret.append(line) continue elif len(line) == 3: # one-sided if cond, if_true, if_false = ( line[1], merge_ifs(line[2]), merge_ifs(path[idx + 1 :]), ) lines, merged = try_merge_ifs(cond, if_true, if_false) ret.extend(lines) ret.append(merged[:3]) ret.extend(merged[3]) break else: assert len(line) == 4 cond, if_true, if_false = line[1], merge_ifs(line[2]), merge_ifs(line[3]) lines, merged = try_merge_ifs(cond, if_true, if_false) ret.extend(lines) ret.append(merged) # don't break return ret
def run(self): logger.debug("Node.run(%s)", self) self.prev_trace = self.trace self.trace = self.vm._run(self.start, self.safe, self.stack, self.condition) last = self.trace[-1] if opcode(last) == "jump": n = last[1] n.set_prev(self) if opcode(last) == "if": if_true, if_false = last[2], last[3] if_true.set_prev(self) if_false.set_prev(self)
def le_op(left, right): # left <= right # right = add_op(1, right) # return lt_op(left, right) if opcode(left) == "max": left = max_to_add(left) if opcode(right) == "max": right = max_to_add(right) if type(left) in (int, float) and type(right) in (int, float): return left <= right subbed = sub_op(right, left) return ge_zero(subbed)
def internal_f(exp, f): if opcode(exp) == "storage": return exp if type(exp) == tuple: exp = tuple(internal_f(e, f) for e in exp) return f(exp)
def flatten(path): def ends_exec(path): # check if all the subpaths end execution # only checking the last line, previous ones may end execution as well # but at least one leading up to the last line didn't - otherwise # we wouldn't see it line = path[-1] if opcode(line) in ( "return", "stop", "selfdestruct", "invalid", "assert_fail", "revert", "continue", "undefined", ): return True elif opcode(line) == "or": assert len(line) == 3 return ends_exec(line[1]) and ends_exec(line[2]) elif opcode(line) == "while": # well, 'while True' ends execution, but all the other # ones most likely don't. if we miss some cases nothing # bad will happen - just slightly less readable code return False else: return False res = [] for idx, line in enumerate(path): if opcode(line) != "or": res.append(line) continue assert ( len(line) == 3 ), line # ('or', [exec1], [exec2]) - we're dealing only with if-else at this stage if len(line[1]) == 1 and len(line[2]) == 1: # sometimes, after folding, both paths are identical, # so we can skip 'ifs', e.g. fallback func in 0xBe46324018124B2d604c2f3eaE91D3De9b388b09 continue elif ends_exec(line[1]): res.extend(try_merge(flatten(line[1]), flatten(line[2]))) # elif idx == len(path) - 1: # the last or = we're flattening # res.extend(try_merge(flatten(line[1]), flatten(line[2]))) else: res.append(("or", flatten(line[1]), flatten(line[2]))) return res
def to_stordef(exp): return exp if opcode(exp) in ("mask_shl", "cd", "storage", "var"): return "idx" if type(exp) == tuple: return tuple(to_stordef(e) for e in exp) else: return exp