def add_ge_zero(exp): """ technically, it can return wrong results, e.g.: (sub (mask 4, 4, -4, 'sth') (mask 4, 0, 'sth')) for sth 11...111 == 0 for sth 0 == 0 for sth 00010011 < 0 in practice it (hopefully) doesn't happen -- need to fix "variants" to deliver more variants based on masks and other expressions? """ assert opcode(exp) == "add", exp assert len(exp) > 2, exp exp = simplify(exp) if type(exp) == int: return exp >= 0 # print(exp) var = tuple(simplify(calc_max(e)) for e in variants(exp)) if not all_concrete(*var): return None if all(v >= 0 for v in var): return True if all(v < 0 for v in var): return False return None
def simplify(exp): if opcode(exp) == "max": terms = exp[1:] els = [simplify(e) for e in terms] res = -(2 ** 256) for e in els: try: res = max_op(res, e) except: return ("max",) + tuple(els) return res if (m := match(exp, ("mask_shl", ":size", ":offset", ":shl", ":val"))) : size, offset, shl, val = ( simplify(m.size), simplify(m.offset), simplify(m.shl), simplify(m.val), ) if all_concrete(size, offset, shl, val): return apply_mask(val, size, offset, shl) if (size, offset, shl) == (256, 0, 0): return val
def mask_mask_op(size, offset, shl, exp_size, exp_offset, exp_shl, exp): if all_concrete(offset, shl, exp_offset, exp_shl, exp_size, size): return strategy_concrete(size, offset, shl, exp_size, exp_offset, exp_shl, exp) strategies = (strategy_0, strategy_1, strategy_2, strategy_3, strategy_final) for s in strategies: res = s(size, offset, shl, exp_size, exp_offset, exp_shl, exp) if res is not None: return res assert False
def apply_mask(val, size, offset=0, shl=0): assert all_concrete(val, size, offset, shl) mask = mask_to_int(size, offset) val = val & mask if shl > 0: val = val << shl if shl < 0: val = val >> -shl return val
def apply_stack(self, ret, line): def trace(exp, *format_args): try: logger.debug("Trace: %s", str(exp).format(*format_args)) except Exception: pass if type(exp) == str: ret.append(exp.format(*format_args)) else: ret.append(exp) stack = self.stack op = line[1] previous_len = stack.len() if "--verbose" in sys.argv or "--explain" in sys.argv: trace(C.asm(" " + str(stack))) trace("") if "push" not in op and "dup" not in op and "swap" not in op: trace("[{}] {}", line[0], C.asm(op)) else: if type(line[2]) == str: trace("[{}] {} {}", line[0], C.asm(op), C.asm(" ”" + line[2] + "”")) elif line[2] > 0x1000000000: trace("[{}] {} {}", line[0], C.asm(op), C.asm(hex(line[2]))) else: trace("[{}] {} {}", line[0], C.asm(op), C.asm(str(line[2]))) param = 0 if len(line) > 2: param = line[2] if op in [ "exp", "and", "eq", "div", "lt", "gt", "slt", "sgt", "mod", "xor", "signextend", "smod", "sdiv", ]: stack.append(arithmetic.eval(( op, stack.pop(), stack.pop(), ))) elif op[:4] == "push": stack.append(param) elif op == "pop": stack.pop() elif op == "dup": stack.dup(param) elif op == "mul": stack.append(mul_op(stack.pop(), stack.pop())) elif op == "or": stack.append(or_op(stack.pop(), stack.pop())) elif op == "add": stack.append(add_op(stack.pop(), stack.pop())) elif op == "sub": left = stack.pop() right = stack.pop() if type(left) == int and type(right) == int: stack.append(arithmetic.sub(left, right)) else: stack.append(sub_op(left, right)) elif op in ["mulmod", "addmod"]: stack.append(("mulmod", stack.pop(), stack.pop(), stack.pop())) elif op == "shl": off = stack.pop() exp = stack.pop() if all_concrete(off, exp): stack.append(exp << off) else: stack.append(mask_op(exp, shl=off)) elif op == "shr": off = stack.pop() exp = stack.pop() if all_concrete(off, exp): stack.append(exp >> off) else: stack.append(mask_op(exp, offset=minus_op(off), shr=off)) elif op == "sar": off = stack.pop() exp = stack.pop() if all_concrete(off, exp): sign = exp & (1 << 255) if off >= 256: if sign: stack.append(2**256 - 1) else: stack.append(0) else: shifted = exp >> off if sign: shifted |= (2**256 - 1) << (256 - off) stack.append(shifted) else: # FIXME: This won't give the right result... stack.append(mask_op(exp, offset=minus_op(off), shr=off)) elif op in ["not", "iszero"]: stack.append((op, stack.pop())) elif op == "sha3": p = stack.pop() n = stack.pop() res = mem_load(p, n) self.counter += 1 vname = f"_{self.counter}" vval = ( "sha3", res, ) trace(("setvar", vname, vval)) stack.append(("var", vname)) elif op == "calldataload": stack.append(( "cd", stack.pop(), )) elif op == "byte": val = stack.pop() num = stack.pop() off = sub_op(256, to_bytes(num)) stack.append(mask_op(val, 8, off, shr=off)) elif op == "selfbalance": stack.append(( "balance", "address", )) elif op == "balance": addr = stack.pop() if opcode(addr) == "mask_shl" and addr[:4] == ("mask_shl", 160, 0, 0): stack.append(( "balance", addr[4], )) else: stack.append(( "balance", addr, )) elif op == "swap": stack.swap(param) elif op[:3] == "log": p = stack.pop() s = stack.pop() topics = [] param = int(op[3]) for i in range(param): el = stack.pop() topics.append(el) trace(( "log", mem_load(p, s), ) + tuple(topics)) elif op == "sload": sloc = stack.pop() stack.append(("storage", 256, 0, sloc)) elif op == "sstore": sloc = stack.pop() val = stack.pop() trace(("store", 256, 0, sloc, val)) elif op == "mload": memloc = stack.pop() self.counter += 1 vname = f"_{self.counter}" trace(("setvar", vname, ("mem", ("range", memloc, 32)))) stack.append(("var", vname)) elif op == "mstore": memloc = stack.pop() val = stack.pop() trace(( "setmem", ("range", memloc, 32), val, )) elif op == "mstore8": memloc = stack.pop() val = stack.pop() trace(( "setmem", ("range", memloc, 8), val, )) elif op == "extcodecopy": addr = stack.pop() mem_pos = stack.pop() code_pos = stack.pop() data_len = stack.pop() trace(( "setmem", ("range", mem_pos, data_len), ("extcodecopy", addr, ("range", code_pos, data_len)), )) elif op == "codecopy": mem_pos = stack.pop() call_pos = stack.pop() data_len = stack.pop() if (type(call_pos), type(data_len)) == ( int, int, ) and call_pos + data_len < len(self.loader.binary): res = 0 for i in range(call_pos - 1, call_pos + data_len - 1): res = res << 8 res += self.loader.binary[ i] # this breaks with out of range for some contracts # may be because we're usually getting compiled code binary # and not runtime binary trace(("setmem", ("range", mem_pos, data_len), res)) # ('bytes', data_len, res))) else: trace(( "setmem", ("range", mem_pos, data_len), ( "code.data", call_pos, data_len, ), )) elif op == "codesize": stack.append(len(self.loader.binary)) elif op == "calldatacopy": mem_pos = stack.pop() call_pos = stack.pop() data_len = stack.pop() if data_len != 0: call_data = ("call.data", call_pos, data_len) # call_data = mask_op(('call.data', bits(add_op(data_len, call_pos))), size=bits(data_len), shl=bits(call_pos)) trace(("setmem", ("range", mem_pos, data_len), call_data)) elif op == "returndatacopy": mem_pos = stack.pop() ret_pos = stack.pop() data_len = stack.pop() if data_len != 0: return_data = ("ext_call.return_data", ret_pos, data_len) # return_data = mask_op(('ext_call.return_data', bits(add_op(data_len, ret_pos))), size=bits(data_len), shl=bits(ret_pos)) trace(("setmem", ("range", mem_pos, data_len), return_data)) elif op == "call": self.handle_call(op, trace) elif op == "staticcall": self.handle_call(op, trace) elif op == "delegatecall": gas = stack.pop() addr = stack.pop() arg_start = stack.pop() arg_len = stack.pop() ret_start = stack.pop() ret_len = stack.pop() call_trace = ( "delegatecall", gas, addr, ) # arg_start, arg_len, ret_start, ret_len) if arg_len == 0: fname = None fparams = None elif arg_len == 4: fname = mem_load(arg_start, 4) fparams = 0 else: fname = mem_load(arg_start, 4) fparams = mem_load(add_op(arg_start, 4), sub_op(arg_len, 4)) call_trace += (fname, fparams) trace(call_trace) self.call_len = ret_len stack.append("delegate.return_code") if 0 != ret_len: return_data = ("delegate.return_data", 0, ret_len) trace(("setmem", ("range", ret_start, ret_len), return_data)) elif op == "callcode": gas = stack.pop() addr = stack.pop() value = stack.pop() arg_start = stack.pop() arg_len = stack.pop() ret_start = stack.pop() ret_len = stack.pop() call_trace = ( "callcode", gas, addr, value, ) if arg_len == 0: fname = None fparams = None elif arg_len == 4: fname = mem_load(arg_start, 4) fparams = 0 else: fname = mem_load(arg_start, 4) fparams = mem_load(add_op(arg_start, 4), sub_op(arg_len, 4)) call_trace += (fname, fparams) trace(call_trace) self.call_len = ret_len stack.append("callcode.return_code") if 0 != ret_len: return_data = ("callcode.return_data", 0, ret_len) trace(("setmem", ("range", ret_start, ret_len), return_data)) elif op == "create": wei, mem_start, mem_len = stack.pop(), stack.pop(), stack.pop() call_trace = ("create", wei) code = mem_load(mem_start, mem_len) call_trace += (code, ) trace(call_trace) stack.append("create.new_address") elif op == "create2": wei, mem_start, mem_len, salt = ( stack.pop(), stack.pop(), stack.pop(), stack.pop(), ) call_trace = ("create2", wei, ("mem", ("range", mem_start, mem_len)), salt) trace(call_trace) stack.append("create2.new_address") elif op == "pc": stack.append(line[0]) elif op == "msize": self.counter += 1 vname = f"_{self.counter}" trace(("setvar", vname, "msize")) stack.append(("var", vname)) elif op in ("extcodesize", "extcodehash", "blockhash"): stack.append(( op, stack.pop(), )) elif op in [ "callvalue", "caller", "address", "number", "gas", "origin", "timestamp", "chainid", "difficulty", "gasprice", "coinbase", "gaslimit", "calldatasize", "returndatasize", ]: stack.append(op) else: # TODO: Maybe raise an error directly? assert op not in [ "jump", "jumpi", "revert", "return", "stop", "jumpdest", "UNKNOWN", ] if stack.len() - previous_len != opcode_dict.stack_diffs[op]: logger.error("line: %s", line) logger.error("stack: %s", stack) logger.error( "expected %s, got %s stack diff", opcode_dict.stack_diffs[op], stack.len() - previous_len, ) assert False, f"opcode {op} not processed correctly" stack.cleanup()
def simplify(exp): if exp ~ ('max', *terms): els = [simplify(e) for e in terms] res = -2**256 for e in els: try: res = max_op(res, e) except: return ('max', ) + tuple(els) return res if exp ~ ('mask_shl', :size, :offset, :shl, :val): size, offset, shl, val = simplify(size), simplify(offset), simplify(shl), simplify(val) if all_concrete(size, offset, shl, val): return apply_mask(val, size, offset, shl) if (size, offset, shl) == (256, 0, 0): return val if exp ~ ('add', *terms): res = 0 for e in terms: res = add_op(res, simplify(e)) assert type(res) != list return res
return col('mem[', COLOR_HEADER) + \ pret(idx) + \ col(']', COLOR_HEADER) if exp ~ ('setvar', :idx, :val): # shouldn't be pretty line? return pret(('var', idx)) + ' = ' + pret(val, parentheses=False) if exp ~ ('setmem', :idx, :val): # --,,-- return pret(('mem', idx)) + ' = ' + \ pret(val) if exp ~ ('mask_shl', :size, :offset, :shl, :val): if all_concrete(size, offset, shl) and \ exp[1] + exp[2] == 256 and exp[2] == -exp[3] and exp[2] < 8: # e.g. (Mask(255, 1, eth.balance(this.address)) >> 1 # --> eth.balance(this.address) / 2 # for offsets smaller than 8 if exp[3] <= 8: return pret(('div', exp[4], 2**-exp[3]), parentheses=parentheses) else: return pret(('shr', exp[3], exp[4]), parentheses=parentheses) if (type(exp[1]), type(exp[2]), type(exp[3])) == (int, int, int) and\ exp[2] == exp[3] and exp[2] < 8: # e.g. (Mask(255, 1, eth.balance(this.address)) << x # --> eth.balance(this.address) * 2**x # for offsets smaller than 8
if m := match(exp, ("mem", ":idx")): assert opcode(m.idx) != "range" return col("mem[", COLOR_HEADER) + pret(m.idx) + col("]", COLOR_HEADER) if m := match(exp, ("setvar", ":idx", ":val")): # shouldn't be pretty line? return pret(("var", m.idx)) + " = " + pret(m.val, parentheses=False) if m := match(exp, ("setmem", ":idx", ":val")): # --,,-- return pret(("mem", m.idx)) + " = " + pret(m.val) if m := match(exp, ("mask_shl", ":size", ":offset", ":shl", ":val")): size, offset, shl, val = m.size, m.offset, m.shl, m.val if (all_concrete(size, offset, shl) and exp[1] + exp[2] == 256 and exp[2] == -exp[3] and exp[2] < 8): # e.g. (Mask(255, 1, eth.balance(this.address)) >> 1 # --> eth.balance(this.address) / 2 # for offsets smaller than 8 if exp[3] <= 8: return pret(("div", exp[4], 2**-exp[3]), parentheses=parentheses) else: return pret(("shr", exp[3], exp[4]), parentheses=parentheses) if ((type(exp[1]), type(exp[2]), type(exp[3])) == (int, int, int) and exp[2] == exp[3] and exp[2] < 8): # e.g. (Mask(255, 1, eth.balance(this.address)) << x # --> eth.balance(this.address) * 2**x
def apply_stack(self, ret, line): def trace(exp, *format_args): if '--verbose' in sys.argv: # otherwise breaks sometimes, e.g. 0x00a159d41a5bc12dce2f8AcA8e5BB5Beb8F6ABc8.update logger.debug("Trace: %s", str(exp).format(*format_args)) if type(exp) == str: ret.append(exp.format(*format_args)) else: ret.append(exp) def trace_extend(l): assert type(l) == list for r in l: trace(r) stack = self.stack op = line[1] previous_len = stack.len() if '--verbose' in sys.argv or '--explain' in sys.argv: trace(C.asm(' '+str(stack))) trace('') if "push" not in op and "dup" not in op and "swap" not in op: trace('[{}] {}',line[0],C.asm(op)) else: if type(line[2]) == str: trace('[{}] {} {}',line[0],C.asm(op),C.asm(" ”"+line[2]+"”")) elif line[2] > 0x1000000000: trace('[{}] {} {}',line[0],C.asm(op),C.asm(hex(line[2]))) else: trace('[{}] {} {}',line[0],C.asm(op),C.asm(str(line[2]))) assert op not in ['jump', 'jumpi', 'revert', 'return', 'stop', 'jumpdest'] param = 0 if len(line)>2: param = line[2] if op in ['exp', 'and', 'eq', 'div', 'lt', 'gt', 'slt', 'sgt', 'mod', 'xor', 'signextend', 'smod', 'sdiv']: stack.append(arithmetic.eval((op, stack.pop(), stack.pop(),))) if op in ['mulmod', 'addmod']: stack.append(('mulmod', stack.pop(), stack.pop(), stack.pop())) if op == 'mul': stack.append(mul_op(stack.pop(), stack.pop())) if op == 'or': stack.append(or_op(stack.pop(), stack.pop())) if op == 'shl': off = stack.pop() exp = stack.pop() if all_concrete(off, exp): stack.append(exp << off) else: stack.append(mask_op(exp, shl = off)) if op == 'shr': off = stack.pop() exp = stack.pop() if all_concrete(off, exp): stack.append(exp >> off) else: stack.append(mask_op(exp, offset=minus_op(off), shr = off)) if op == 'add': stack.append(add_op(stack.pop(), stack.pop())) if op == 'sub': left = stack.pop() right = stack.pop() if type(left) == int and type(right) == int: stack.append(arithmetic.sub(left, right)) else: stack.append(sub_op(left, right)) elif op in ['not', 'iszero']: stack.append((op, stack.pop())) elif op == 'sha3': p = stack.pop() n = stack.pop() res = mem_load(p, n) self.counter += 1 vname = f'_{self.counter}' vval = ('sha3', res, ) trace(('setvar', vname, vval)) stack.append(('var', vname)) elif op == 'calldataload': stack.append(('cd', stack.pop(),)) elif op == 'byte': val = stack.pop() num = stack.pop() off = sub_op(256, to_bytes(num)) stack.append(mask_op(val, 8, off, shr=off)) elif op == 'balance': addr = stack.pop() if opcode(addr) == 'mask_shl' and addr[:4] == ('mask_shl', 160, 0, 0): stack.append(('balance', addr[4],)) else: stack.append(('balance', addr,)) elif op == 'swap': stack.swap(param) elif op[:3] == 'log': p = stack.pop() s = stack.pop() topics = [] param = int(op[3]) for i in range(param): el = stack.pop() topics.append(el) trace(('log', mem_load(p, s), ) + tuple(topics)) elif op == 'sload': sloc = stack.pop() stack.append(('storage', 256, 0, sloc)) elif op == 'sstore': sloc = stack.pop() val = stack.pop() trace(('store', 256, 0, sloc, val)) elif op == 'mload': memloc = stack.pop() loaded = mem_load(memloc) self.counter += 1 vname = f'_{self.counter}' trace(('setvar', vname, ('mem', ('range', memloc, 32)))) stack.append(('var',vname)) elif op == 'mstore': memloc = stack.pop() val = stack.pop() trace(('setmem', ('range', memloc, 32), val,)) elif op == 'mstore8': memloc = stack.pop() val = stack.pop() trace(('setmem', ('range', memloc, 8), val,)) elif op == 'extcodecopy': addr = stack.pop() mem_pos = stack.pop() code_pos = stack.pop() data_len = stack.pop() trace(('setmem', ('range', mem_pos, data_len), ('extcodecopy', addr, ('range', code_pos, data_len)))) elif op == 'codecopy': mem_pos = stack.pop() call_pos = stack.pop() data_len = stack.pop() if (type(call_pos), type(data_len)) == (int, int) and call_pos+data_len < len(self.loader.binary): res = 0 for i in range(call_pos-1, call_pos+data_len-1): res = res << 8 res += self.loader.binary[i] # this breaks with out of range for some contracts # may be because we're usually getting compiled code binary # and not runtime binary trace(('setmem', ('range', mem_pos, data_len), res))# ('bytes', data_len, res))) else: trace(('setmem', ('range', mem_pos, data_len), ('code.data', call_pos, data_len, ),)) elif op == 'codesize': stack.append(len(self.loader.binary)) elif op == 'calldatacopy': mem_pos = stack.pop() call_pos = stack.pop() data_len = stack.pop() if data_len != 0: call_data = ('call.data', call_pos, data_len) # call_data = mask_op(('call.data', bits(add_op(data_len, call_pos))), size=bits(data_len), shl=bits(call_pos)) trace(('setmem', ('range', mem_pos, data_len), call_data)) elif op == 'returndatacopy': mem_pos = stack.pop() ret_pos = stack.pop() data_len = stack.pop() if data_len != 0: return_data = ('ext_call.return_data', ret_pos, data_len) # return_data = mask_op(('ext_call.return_data', bits(add_op(data_len, ret_pos))), size=bits(data_len), shl=bits(ret_pos)) trace(('setmem', ('range', mem_pos, data_len), return_data)) elif op == 'call': self.handle_call(op, trace) elif op == 'staticcall': self.handle_call(op, trace) elif op == 'delegatecall': gas = stack.pop() addr = stack.pop() arg_start = stack.pop() arg_len = stack.pop() ret_start = stack.pop() ret_len = stack.pop() call_trace = ('delegatecall', gas, addr, ) # arg_start, arg_len, ret_start, ret_len) if arg_len == 0: fname = None fparams = None elif arg_len == 4: fname = mem_load( arg_start, 4 ) fparams = 0 else: fname = mem_load( arg_start, 4 ) fparams = mem_load( add_op(arg_start, 4), sub_op(arg_len, 4)) call_trace += (fname, fparams) trace(call_trace) self.call_len = ret_len stack.append('delegate.return_code') if 0 != ret_len: return_data = ('delegate.return_data', 0, ret_len) trace(('setmem', ('range', ret_start, ret_len), return_data)) elif op == 'callcode': gas = stack.pop() addr = stack.pop() value = stack.pop() arg_start = stack.pop() arg_len = stack.pop() ret_start = stack.pop() ret_len = stack.pop() call_trace = ('callcode', gas, addr, value, ) if arg_len == 0: fname = None fparams = None elif arg_len == 4: fname = mem_load( arg_start, 4 ) fparams = 0 else: fname = mem_load( arg_start, 4 ) fparams = mem_load( add_op(arg_start, 4), sub_op(arg_len, 4)) call_trace += (fname, fparams) trace(call_trace) self.call_len = ret_len stack.append('callcode.return_code') if 0 != ret_len: return_data = ('callcode.return_data', 0, ret_len) trace(('setmem', ('range', ret_start, ret_len), return_data)) elif op == 'create': wei, mem_start, mem_len = stack.pop(), stack.pop(), stack.pop() call_trace = ('create', wei) code = mem_load(mem_start, mem_len) call_trace += (code, ) trace(call_trace) stack.append('create.new_address') elif op == 'create2': wei, mem_start, mem_len, salt = stack.pop(), stack.pop(), stack.pop(), stack.pop() call_trace = ('create2', wei, ('mem', ('range', mem_start, mem_len)), salt) trace(call_trace) stack.append('create2.new_address') elif op[:4] == 'push': stack.append(param) elif op == 'pc': stack.append(line[0]) elif op == 'pop': stack.pop() elif op == 'dup': stack.dup(param) elif op == 'msize': self.counter += 1 vname = f'_{self.counter}' trace(('setvar', vname, 'msize')) stack.append(('var',vname)) elif op in ('extcodesize', 'extcodehash', 'blockhash'): stack.append((op, stack.pop(),)) elif op in ['callvalue', 'caller', 'address', 'number', 'gas', 'origin', 'timestamp', 'difficulty', 'gasprice', 'coinbase', 'gaslimit', 'calldatasize', 'returndatasize']: stack.append(op) if stack.len() - previous_len != opcode_dict.stack_diffs[op]: logger.error('line: %s', line) logger.error('stack: %s', stack) logger.error('expected %s, got %s stack diff', opcode_dict.stack_diffs[op], stack.len() - org_len) assert False, f'opcode {op} not processed correctly' stack.cleanup()