def test_get_opcodes(evm_version): op = opcodes.get_opcodes() if evm_version == "berlin": assert "CHAINID" in op assert op["SLOAD"][-1] == 2100 elif evm_version == "istanbul": assert "CHAINID" in op assert op["SLOAD"][-1] == 800 else: assert "CHAINID" not in op assert op["SLOAD"][-1] == 200 if evm_version in ("byzantium", "atlantis"): assert "CREATE2" not in op else: assert op["CREATE2"][-1] == 32000
def _build_opcodes(bytecode: bytes) -> str: bytecode_sequence = deque(bytecode) opcode_map = dict((v[0], k) for k, v in opcodes.get_opcodes().items()) opcode_output = [] while bytecode_sequence: op = bytecode_sequence.popleft() opcode_output.append(opcode_map[op]) if "PUSH" in opcode_output[-1]: push_len = int(opcode_map[op][4:]) push_values = [ hex(bytecode_sequence.popleft())[2:] for i in range(push_len) ] opcode_output.append(f"0x{''.join(push_values).upper()}") return " ".join(opcode_output)
def _compile_to_assembly(code, withargs=None, existing_labels=None, break_dest=None, height=0): if withargs is None: withargs = {} if not isinstance(withargs, dict): raise CompilerPanic(f"Incorrect type for withargs: {type(withargs)}") def _data_ofst_of(sym, ofst, height_): # e.g. _OFST _sym_foo 32 assert is_symbol(sym) or is_mem_sym(sym) if isinstance(ofst.value, int): # resolve at compile time using magic _OFST op return ["_OFST", sym, ofst.value] else: # if we can't resolve at compile time, resolve at runtime ofst = _compile_to_assembly(ofst, withargs, existing_labels, break_dest, height_) return ofst + [sym, "ADD"] def _height_of(witharg): ret = height - withargs[witharg] if ret > 16: raise Exception("With statement too deep") return ret if existing_labels is None: existing_labels = set() if not isinstance(existing_labels, set): raise CompilerPanic(f"must be set(), but got {type(existing_labels)}") # Opcodes if isinstance(code.value, str) and code.value.upper() in get_opcodes(): o = [] for i, c in enumerate(code.args[::-1]): o.extend( _compile_to_assembly(c, withargs, existing_labels, break_dest, height + i)) o.append(code.value.upper()) return o # Numbers elif isinstance(code.value, int): if code.value < -(2**255): raise Exception(f"Value too low: {code.value}") elif code.value >= 2**256: raise Exception(f"Value too high: {code.value}") return PUSH(code.value % 2**256) # Variables connected to with statements elif isinstance(code.value, str) and code.value in withargs: return ["DUP" + str(_height_of(code.value))] # Setting variables connected to with statements elif code.value == "set": if len(code.args) != 2 or code.args[0].value not in withargs: raise Exception( "Set expects two arguments, the first being a stack variable") if height - withargs[code.args[0].value] > 16: raise Exception("With statement too deep") return _compile_to_assembly( code.args[1], withargs, existing_labels, break_dest, height) + [ "SWAP" + str(height - withargs[code.args[0].value]), "POP", ] # Pass statements # TODO remove "dummy"; no longer needed elif code.value in ("pass", "dummy"): return [] # "mload" from data section of the currently executing code elif code.value == "dload": loc = code.args[0] o = [] # codecopy 32 bytes to FREE_VAR_SPACE, then mload from FREE_VAR_SPACE o.extend(PUSH(32)) o.extend(_data_ofst_of("_sym_code_end", loc, height + 1)) o.extend(PUSH(MemoryPositions.FREE_VAR_SPACE) + ["CODECOPY"]) o.extend(PUSH(MemoryPositions.FREE_VAR_SPACE) + ["MLOAD"]) return o # batch copy from data section of the currently executing code to memory elif code.value == "dloadbytes": dst = code.args[0] src = code.args[1] len_ = code.args[2] o = [] o.extend( _compile_to_assembly(len_, withargs, existing_labels, break_dest, height)) o.extend(_data_ofst_of("_sym_code_end", src, height + 1)) o.extend( _compile_to_assembly(dst, withargs, existing_labels, break_dest, height + 2)) o.extend(["CODECOPY"]) return o # "mload" from the data section of (to-be-deployed) runtime code elif code.value == "iload": loc = code.args[0] o = [] o.extend(_data_ofst_of("_mem_deploy_end", loc, height)) o.append("MLOAD") return o # "mstore" to the data section of (to-be-deployed) runtime code elif code.value == "istore": loc = code.args[0] val = code.args[1] o = [] o.extend( _compile_to_assembly(val, withargs, existing_labels, break_dest, height)) o.extend(_data_ofst_of("_mem_deploy_end", loc, height + 1)) o.append("MSTORE") return o # batch copy from memory to the data section of runtime code elif code.value == "istorebytes": raise Exception("unimplemented") # If statements (2 arguments, ie. if x: y) elif code.value == "if" and len(code.args) == 2: o = [] o.extend( _compile_to_assembly(code.args[0], withargs, existing_labels, break_dest, height)) end_symbol = mksymbol("join") o.extend(["ISZERO", end_symbol, "JUMPI"]) o.extend( _compile_to_assembly(code.args[1], withargs, existing_labels, break_dest, height)) o.extend([end_symbol, "JUMPDEST"]) return o # If statements (3 arguments, ie. if x: y, else: z) elif code.value == "if" and len(code.args) == 3: o = [] o.extend( _compile_to_assembly(code.args[0], withargs, existing_labels, break_dest, height)) mid_symbol = mksymbol("else") end_symbol = mksymbol("join") o.extend(["ISZERO", mid_symbol, "JUMPI"]) o.extend( _compile_to_assembly(code.args[1], withargs, existing_labels, break_dest, height)) o.extend([end_symbol, "JUMP", mid_symbol, "JUMPDEST"]) o.extend( _compile_to_assembly(code.args[2], withargs, existing_labels, break_dest, height)) o.extend([end_symbol, "JUMPDEST"]) return o # repeat(counter_location, start, rounds, rounds_bound, body) # basically a do-while loop: # # assert(rounds <= rounds_bound) # if (rounds > 0) { # do { # body; # } while (++i != start + rounds) # } elif code.value == "repeat": o = [] if len(code.args) != 5: raise CompilerPanic("bad number of repeat args") # pragma: notest i_name = code.args[0] start = code.args[1] rounds = code.args[2] rounds_bound = code.args[3] body = code.args[4] entry_dest, continue_dest, exit_dest = ( mksymbol("loop_start"), mksymbol("loop_continue"), mksymbol("loop_exit"), ) # stack: [] o.extend( _compile_to_assembly(start, withargs, existing_labels, break_dest, height)) o.extend( _compile_to_assembly(rounds, withargs, existing_labels, break_dest, height + 1)) # stack: i # assert rounds <= round_bound if rounds != rounds_bound: # stack: i, rounds o.extend( _compile_to_assembly(rounds_bound, withargs, existing_labels, break_dest, height + 2)) # stack: i, rounds, rounds_bound # assert rounds <= rounds_bound # TODO this runtime assertion should never fail for # internally generated repeats. # maybe drop it or jump to 0xFE o.extend(["DUP2", "GT"] + _assert_false()) # stack: i, rounds # if (0 == rounds) { goto end_dest; } o.extend(["DUP1", "ISZERO", exit_dest, "JUMPI"]) # stack: start, rounds if start.value != 0: o.extend(["DUP2", "ADD"]) # stack: i, exit_i o.extend(["SWAP1"]) if i_name.value in withargs: raise CompilerPanic(f"shadowed loop variable {i_name}") withargs[i_name.value] = height + 1 # stack: exit_i, i o.extend([entry_dest, "JUMPDEST"]) o.extend( _compile_to_assembly(body, withargs, existing_labels, (exit_dest, continue_dest, height + 2), height + 2)) del withargs[i_name.value] # clean up any stack items left by body o.extend(["POP"] * body.valency) # stack: exit_i, i # increment i: o.extend([continue_dest, "JUMPDEST", "PUSH1", 1, "ADD"]) # stack: exit_i, i+1 (new_i) # if (exit_i != new_i) { goto entry_dest } o.extend(["DUP2", "DUP2", "XOR", entry_dest, "JUMPI"]) o.extend([exit_dest, "JUMPDEST", "POP", "POP"]) return o # Continue to the next iteration of the for loop elif code.value == "continue": if not break_dest: raise CompilerPanic("Invalid break") dest, continue_dest, break_height = break_dest return [continue_dest, "JUMP"] # Break from inside a for loop elif code.value == "break": if not break_dest: raise CompilerPanic("Invalid break") dest, continue_dest, break_height = break_dest n_local_vars = height - break_height # clean up any stack items declared in the loop body cleanup_local_vars = ["POP"] * n_local_vars return cleanup_local_vars + [dest, "JUMP"] # Break from inside one or more for loops prior to a return statement inside the loop elif code.value == "cleanup_repeat": if not break_dest: raise CompilerPanic("Invalid break") # clean up local vars and internal loop vars _, _, break_height = break_dest # except don't pop label params if "return_buffer" in withargs: break_height -= 1 if "return_pc" in withargs: break_height -= 1 return ["POP"] * break_height # With statements elif code.value == "with": o = [] o.extend( _compile_to_assembly(code.args[1], withargs, existing_labels, break_dest, height)) old = withargs.get(code.args[0].value, None) withargs[code.args[0].value] = height o.extend( _compile_to_assembly(code.args[2], withargs, existing_labels, break_dest, height + 1)) if code.args[2].valency: o.extend(["SWAP1", "POP"]) else: o.extend(["POP"]) if old is not None: withargs[code.args[0].value] = old else: del withargs[code.args[0].value] return o # runtime statement (used to deploy runtime code) elif code.value == "deploy": memsize = code.args[ 0].value # used later to calculate _mem_deploy_start ir = code.args[1] padding = code.args[2].value assert isinstance(memsize, int), "non-int memsize" assert isinstance(padding, int), "non-int padding" begincode = mksymbol("runtime_begin") subcode = _compile_to_assembly(ir) o = [] # COPY the code to memory for deploy o.extend( ["_sym_subcode_size", begincode, "_mem_deploy_start", "CODECOPY"]) # calculate the len of runtime code o.extend(["_OFST", "_sym_subcode_size", padding]) # stack: len o.extend(["_mem_deploy_start"]) # stack: len mem_ofst o.extend(["RETURN"]) # since the asm data structures are very primitive, to make sure # assembly_to_evm is able to calculate data offsets correctly, # we pass the memsize via magic opcodes to the subcode subcode = [f"_DEPLOY_MEM_OFST_{memsize}"] + subcode # append the runtime code after the ctor code o.extend([begincode, "BLANK"]) # `append(...)` call here is intentional. # each sublist is essentially its own program with its # own symbols. # in the later step when the "ir" block compiled to EVM, # symbols in subcode are resolved to position from start of # runtime-code (instead of position from start of bytecode). o.append(subcode) return o # Seq (used to piece together multiple statements) elif code.value == "seq": o = [] for arg in code.args: o.extend( _compile_to_assembly(arg, withargs, existing_labels, break_dest, height)) if arg.valency == 1 and arg != code.args[-1]: o.append("POP") return o # Seq without popping. # Assure (if false, invalid opcode) elif code.value == "assert_unreachable": o = _compile_to_assembly(code.args[0], withargs, existing_labels, break_dest, height) end_symbol = mksymbol("reachable") o.extend([end_symbol, "JUMPI", "INVALID", end_symbol, "JUMPDEST"]) return o # Assert (if false, exit) elif code.value == "assert": o = _compile_to_assembly(code.args[0], withargs, existing_labels, break_dest, height) o.extend(["ISZERO"]) o.extend(_assert_false()) return o # SHA3 a single value elif code.value == "sha3_32": o = _compile_to_assembly(code.args[0], withargs, existing_labels, break_dest, height) o.extend([ "PUSH1", MemoryPositions.FREE_VAR_SPACE, "MSTORE", "PUSH1", 32, "PUSH1", MemoryPositions.FREE_VAR_SPACE, "SHA3", ]) return o # SHA3 a 64 byte value elif code.value == "sha3_64": o = _compile_to_assembly(code.args[0], withargs, existing_labels, break_dest, height) o.extend( _compile_to_assembly(code.args[1], withargs, existing_labels, break_dest, height)) o.extend([ "PUSH1", MemoryPositions.FREE_VAR_SPACE2, "MSTORE", "PUSH1", MemoryPositions.FREE_VAR_SPACE, "MSTORE", "PUSH1", 64, "PUSH1", MemoryPositions.FREE_VAR_SPACE, "SHA3", ]) return o elif code.value == "select": # b ^ ((a ^ b) * cond) where cond is 1 or 0 # let t = a ^ b cond = code.args[0] a = code.args[1] b = code.args[2] o = [] o.extend( _compile_to_assembly(b, withargs, existing_labels, break_dest, height)) o.extend( _compile_to_assembly(a, withargs, existing_labels, break_dest, height + 1)) # stack: b a o.extend(["DUP2", "XOR"]) # stack: b t o.extend( _compile_to_assembly(cond, withargs, existing_labels, break_dest, height + 2)) # stack: b t cond o.extend(["MUL", "XOR"]) # stack: b ^ (t * cond) return o # <= operator elif code.value == "le": return _compile_to_assembly( IRnode.from_list(["iszero", ["gt", code.args[0], code.args[1]]]), withargs, existing_labels, break_dest, height, ) # >= operator elif code.value == "ge": return _compile_to_assembly( IRnode.from_list(["iszero", ["lt", code.args[0], code.args[1]]]), withargs, existing_labels, break_dest, height, ) # <= operator elif code.value == "sle": return _compile_to_assembly( IRnode.from_list(["iszero", ["sgt", code.args[0], code.args[1]]]), withargs, existing_labels, break_dest, height, ) # >= operator elif code.value == "sge": return _compile_to_assembly( IRnode.from_list(["iszero", ["slt", code.args[0], code.args[1]]]), withargs, existing_labels, break_dest, height, ) # != operator elif code.value == "ne": return _compile_to_assembly( IRnode.from_list(["iszero", ["eq", code.args[0], code.args[1]]]), withargs, existing_labels, break_dest, height, ) # e.g. 95 -> 96, 96 -> 96, 97 -> 128 elif code.value == "ceil32": # floor32(x) = x - x % 32 == x & 0b11..100000 == x & (~31) # ceil32(x) = floor32(x + 31) == (x + 31) & (~31) x = code.args[0] return _compile_to_assembly( IRnode.from_list(["and", ["add", x, 31], ["not", 31]]), withargs, existing_labels, break_dest, height, ) # jump to a symbol, and push variable # of arguments onto stack elif code.value == "goto": o = [] for i, c in enumerate(reversed(code.args[1:])): o.extend( _compile_to_assembly(c, withargs, existing_labels, break_dest, height + i)) o.extend(["_sym_" + str(code.args[0]), "JUMP"]) return o # push a literal symbol elif isinstance(code.value, str) and is_symbol(code.value): return [code.value] # set a symbol as a location. elif code.value == "label": label_name = code.args[0].value assert isinstance(label_name, str) if label_name in existing_labels: raise Exception(f"Label with name {label_name} already exists!") else: existing_labels.add(label_name) if code.args[1].value != "var_list": raise CodegenPanic("2nd arg to label must be var_list") var_args = code.args[1].args body = code.args[2] # new scope height = 0 withargs = {} for arg in reversed(var_args): assert isinstance( arg.value, str ) # already checked for higher up but only the paranoid survive withargs[arg.value] = height height += 1 body_asm = _compile_to_assembly(body, withargs=withargs, existing_labels=existing_labels, height=height) # pop_scoped_vars = ["POP"] * height # for now, _rewrite_return_sequences forces # label params to be consumed implicitly pop_scoped_vars = [] return ["_sym_" + label_name, "JUMPDEST"] + body_asm + pop_scoped_vars elif code.value == "unique_symbol": symbol = code.args[0].value assert isinstance(symbol, str) if symbol in existing_labels: raise Exception(f"symbol {symbol} already exists!") else: existing_labels.add(symbol) return [] elif code.value == "exit_to": raise CodegenPanic("exit_to not implemented yet!") # inject debug opcode. elif code.value == "debugger": return mkdebug(pc_debugger=False, source_pos=code.source_pos) # inject debug opcode. elif code.value == "pc_debugger": return mkdebug(pc_debugger=True, source_pos=code.source_pos) else: raise Exception("Weird code element: " + repr(code))
def assembly_to_evm(assembly, pc_ofst=0, insert_vyper_signature=False): """ Assembles assembly into EVM assembly: list of asm instructions pc_ofst: when constructing the source map, the amount to offset all pcs by (no effect until we add deploy code source map) insert_vyper_signature: whether to append vyper metadata to output (should be true for runtime code) """ line_number_map = { "breakpoints": set(), "pc_breakpoints": set(), "pc_jump_map": { 0: "-" }, "pc_pos_map": {}, "error_map": {}, } pc = 0 symbol_map = {} runtime_code, runtime_code_start, runtime_code_end = None, None, None bytecode_suffix = b"" if insert_vyper_signature: # CBOR encoded: {"vyper": [major,minor,patch]} bytecode_suffix += b"\xa1\x65vyper\x83" + bytes(list(version_tuple)) bytecode_suffix += len(bytecode_suffix).to_bytes(2, "big") CODE_OFST_SIZE = 2 # size of a PUSH instruction for a code symbol # to optimize the size of deploy code - we want to use the smallest # PUSH instruction possible which can support all memory symbols # (and also works with linear pass symbol resolution) # to do this, we first do a single pass to compile any runtime code # and use that to calculate mem_ofst_size. mem_ofst_size, ctor_mem_size = None, None max_mem_ofst = 0 for i, item in enumerate(assembly): if isinstance(item, list): assert runtime_code is None, "Multiple subcodes" runtime_code, runtime_map = assembly_to_evm( item, insert_vyper_signature=True) assert item[0].startswith("_DEPLOY_MEM_OFST_") assert ctor_mem_size is None ctor_mem_size = int(item[0][len("_DEPLOY_MEM_OFST_"):]) runtime_code_start, runtime_code_end = _runtime_code_offsets( ctor_mem_size, len(runtime_code)) assert runtime_code_end - runtime_code_start == len(runtime_code) if is_ofst(item) and is_mem_sym(assembly[i + 1]): max_mem_ofst = max(assembly[i + 2], max_mem_ofst) if runtime_code_end is not None: mem_ofst_size = calc_mem_ofst_size(runtime_code_end + max_mem_ofst) # go through the code, resolving symbolic locations # (i.e. JUMPDEST locations) to actual code locations for i, item in enumerate(assembly): note_line_num(line_number_map, item, pc) if item == "DEBUG": continue # skip debug # update pc_jump_map if item == "JUMP": last = assembly[i - 1] if is_symbol(last) and last.startswith("_sym_internal"): if last.endswith("cleanup"): # exit an internal function line_number_map["pc_jump_map"][pc] = "o" else: # enter an internal function line_number_map["pc_jump_map"][pc] = "i" else: # everything else line_number_map["pc_jump_map"][pc] = "-" elif item in ("JUMPI", "JUMPDEST"): line_number_map["pc_jump_map"][pc] = "-" # update pc if is_symbol(item): if assembly[i + 1] == "JUMPDEST" or assembly[i + 1] == "BLANK": # Don't increment pc as the symbol itself doesn't go into code if item in symbol_map: raise CompilerPanic(f"duplicate jumpdest {item}") symbol_map[item] = pc else: pc += CODE_OFST_SIZE + 1 # PUSH2 highbits lowbits elif is_mem_sym(item): # PUSH<n> item pc += mem_ofst_size + 1 elif is_ofst(item): assert is_symbol(assembly[i + 1]) or is_mem_sym(assembly[i + 1]) assert isinstance(assembly[i + 2], int) # [_OFST, _sym_foo, bar] -> PUSH2 (foo+bar) # [_OFST, _mem_foo, bar] -> PUSHN (foo+bar) pc -= 1 elif item == "BLANK": pc += 0 elif isinstance(item, str) and item.startswith("_DEPLOY_MEM_OFST_"): # _DEPLOY_MEM_OFST is assembly magic which will # get removed during final assembly-to-bytecode pc += 0 elif isinstance(item, list): # add source map for all items in the runtime map t = adjust_pc_maps(runtime_map, pc) for key in line_number_map: line_number_map[key].update(t[key]) pc += len(runtime_code) else: pc += 1 pc += len(bytecode_suffix) symbol_map["_sym_code_end"] = pc symbol_map["_mem_deploy_start"] = runtime_code_start symbol_map["_mem_deploy_end"] = runtime_code_end if runtime_code is not None: symbol_map["_sym_subcode_size"] = len(runtime_code) # (NOTE CMC 2022-06-17 this way of generating bytecode did not # seem to be a perf hotspot. if it is, may want to use bytearray() # instead). # TODO refactor into two functions, create posmap and assemble o = b"" # now that all symbols have been resolved, generate bytecode # using the symbol map to_skip = 0 for i, item in enumerate(assembly): if to_skip > 0: to_skip -= 1 continue if item in ("DEBUG", "BLANK"): continue # skippable opcodes elif isinstance(item, str) and item.startswith("_DEPLOY_MEM_OFST_"): continue elif is_symbol(item): if assembly[i + 1] != "JUMPDEST" and assembly[i + 1] != "BLANK": bytecode, _ = assembly_to_evm( PUSH_N(symbol_map[item], n=CODE_OFST_SIZE)) o += bytecode elif is_mem_sym(item): bytecode, _ = assembly_to_evm( PUSH_N(symbol_map[item], n=mem_ofst_size)) o += bytecode elif is_ofst(item): # _OFST _sym_foo 32 ofst = symbol_map[assembly[i + 1]] + assembly[i + 2] n = mem_ofst_size if is_mem_sym(assembly[i + 1]) else CODE_OFST_SIZE bytecode, _ = assembly_to_evm(PUSH_N(ofst, n)) o += bytecode to_skip = 2 elif isinstance(item, int): o += bytes([item]) elif isinstance(item, str) and item.upper() in get_opcodes(): o += bytes([get_opcodes()[item.upper()][0]]) elif item[:4] == "PUSH": o += bytes([PUSH_OFFSET + int(item[4:])]) elif item[:3] == "DUP": o += bytes([DUP_OFFSET + int(item[3:])]) elif item[:4] == "SWAP": o += bytes([SWAP_OFFSET + int(item[4:])]) elif isinstance(item, list): o += runtime_code else: # Should never reach because, assembly is create in _compile_to_assembly. raise Exception("Weird symbol in assembly: " + str(item)) # pragma: no cover o += bytecode_suffix line_number_map["breakpoints"] = list(line_number_map["breakpoints"]) line_number_map["pc_breakpoints"] = list(line_number_map["pc_breakpoints"]) return o, line_number_map
def assembly_to_evm(assembly, start_pos=0): line_number_map = { "breakpoints": set(), "pc_breakpoints": set(), "pc_jump_map": {0: "-"}, "pc_pos_map": {}, } posmap = {} runtime_code, runtime_code_start, runtime_code_end = None, None, None pos = start_pos # go through the code, resolving symbolic locations # (i.e. JUMPDEST locations) to actual code locations for i, item in enumerate(assembly): note_line_num(line_number_map, item, pos) if item == "DEBUG": continue # skip debug if item == "JUMP": last = assembly[i - 1] if is_symbol(last) and last.startswith("_sym_internal"): if last.endswith("cleanup"): # exit an internal function line_number_map["pc_jump_map"][pos] = "o" else: # enter an internal function line_number_map["pc_jump_map"][pos] = "i" else: # everything else line_number_map["pc_jump_map"][pos] = "-" elif item in ("JUMPI", "JUMPDEST"): line_number_map["pc_jump_map"][pos] = "-" if is_symbol(item): if assembly[i + 1] == "JUMPDEST" or assembly[i + 1] == "BLANK": # Don't increment position as the symbol itself doesn't go into code if item in posmap: raise CompilerPanic(f"duplicate jumpdest {item}") posmap[item] = pos - start_pos else: pos += 3 # PUSH2 highbits lowbits elif is_mem_sym(item): pos += 5 # PUSH4 item elif is_ofst(item): assert is_symbol(assembly[i + 1]) or is_mem_sym(assembly[i + 1]) assert isinstance(assembly[i + 2], int) # [_OFST, _sym_foo, bar] -> PUSH2 (foo+bar) # [_OFST, _mem_foo, bar] -> PUSH4 (foo+bar) pos -= 1 elif item == "BLANK": pos += 0 elif isinstance(item, str) and item.startswith("_DEPLOY_MEM_OFST_"): # _DEPLOY_MEM_OFST is assembly magic which will # get removed during final assembly-to-bytecode pos += 0 elif isinstance(item, list): assert runtime_code is None, "Multiple subcodes" runtime_code, sub_map = assembly_to_evm(item, start_pos=pos) assert item[0].startswith("_DEPLOY_MEM_OFST_") ctor_mem_size = int(item[0][len("_DEPLOY_MEM_OFST_") :]) runtime_code_start, runtime_code_end = _runtime_code_offsets( ctor_mem_size, len(runtime_code) ) assert runtime_code_end - runtime_code_start == len(runtime_code) pos += len(runtime_code) for key in line_number_map: line_number_map[key].update(sub_map[key]) else: pos += 1 code_end = pos - start_pos posmap["_sym_code_end"] = code_end posmap["_mem_deploy_start"] = runtime_code_start posmap["_mem_deploy_end"] = runtime_code_end if runtime_code is not None: posmap["_sym_subcode_size"] = len(runtime_code) o = b"" to_skip = 0 for i, item in enumerate(assembly): if to_skip > 0: to_skip -= 1 continue if item in ("DEBUG", "BLANK"): continue # skippable opcodes elif isinstance(item, str) and item.startswith("_DEPLOY_MEM_OFST_"): continue elif is_symbol(item): if assembly[i + 1] != "JUMPDEST" and assembly[i + 1] != "BLANK": bytecode, _ = assembly_to_evm(PUSH_N(posmap[item], n=2)) o += bytecode elif is_mem_sym(item): bytecode, _ = assembly_to_evm(PUSH_N(posmap[item], n=4)) o += bytecode elif is_ofst(item): # _OFST _sym_foo 32 ofst = posmap[assembly[i + 1]] + assembly[i + 2] n = 4 if is_mem_sym(assembly[i + 1]) else 2 bytecode, _ = assembly_to_evm(PUSH_N(ofst, n)) o += bytecode to_skip = 2 elif isinstance(item, int): o += bytes([item]) elif isinstance(item, str) and item.upper() in get_opcodes(): o += bytes([get_opcodes()[item.upper()][0]]) elif item[:4] == "PUSH": o += bytes([PUSH_OFFSET + int(item[4:])]) elif item[:3] == "DUP": o += bytes([DUP_OFFSET + int(item[3:])]) elif item[:4] == "SWAP": o += bytes([SWAP_OFFSET + int(item[4:])]) elif isinstance(item, list): o += runtime_code else: # Should never reach because, assembly is create in _compile_to_assembly. raise Exception("Weird symbol in assembly: " + str(item)) # pragma: no cover assert len(o) == pos - start_pos, (len(o), pos, start_pos) line_number_map["breakpoints"] = list(line_number_map["breakpoints"]) line_number_map["pc_breakpoints"] = list(line_number_map["pc_breakpoints"]) return o, line_number_map
def assembly_to_evm(assembly, start_pos=0): line_number_map = { "breakpoints": set(), "pc_breakpoints": set(), "pc_jump_map": { 0: "-" }, "pc_pos_map": {}, } posmap = {} sub_assemblies = [] codes = [] pos = start_pos # go through the code, resolving symbolic locations # (i.e. JUMPDEST locations) to actual code locations for i, item in enumerate(assembly): note_line_num(line_number_map, item, pos) if item == "DEBUG": continue # skip debug if item == "JUMP": last = assembly[i - 1] if is_symbol(last) and last.startswith("_sym_internal"): if last.endswith("cleanup"): # exit an internal function line_number_map["pc_jump_map"][pos] = "o" else: # enter an internal function line_number_map["pc_jump_map"][pos] = "i" else: # everything else line_number_map["pc_jump_map"][pos] = "-" elif item in ("JUMPI", "JUMPDEST"): line_number_map["pc_jump_map"][pos] = "-" if is_symbol(item): if assembly[i + 1] == "JUMPDEST" or assembly[i + 1] == "BLANK": # Don't increment position as the symbol itself doesn't go into code if item in posmap: raise CompilerPanic(f"duplicate jumpdest {item}") posmap[item] = pos - start_pos else: pos += 3 # PUSH2 highbits lowbits elif item == "BLANK": pos += 0 elif isinstance(item, list): c, sub_map = assembly_to_evm(item, start_pos=pos) sub_assemblies.append(item) codes.append(c) pos += len(c) for key in line_number_map: line_number_map[key].update(sub_map[key]) else: pos += 1 posmap["_sym_codeend"] = pos o = b"" for i, item in enumerate(assembly): if item == "DEBUG": continue # skip debug elif is_symbol(item): if assembly[i + 1] != "JUMPDEST" and assembly[i + 1] != "BLANK": o += bytes( [PUSH_OFFSET + 2, posmap[item] // 256, posmap[item] % 256]) elif isinstance(item, int): o += bytes([item]) elif isinstance(item, str) and item.upper() in get_opcodes(): o += bytes([get_opcodes()[item.upper()][0]]) elif item[:4] == "PUSH": o += bytes([PUSH_OFFSET + int(item[4:])]) elif item[:3] == "DUP": o += bytes([DUP_OFFSET + int(item[3:])]) elif item[:4] == "SWAP": o += bytes([SWAP_OFFSET + int(item[4:])]) elif item == "BLANK": pass elif isinstance(item, list): for j in range(len(sub_assemblies)): if sub_assemblies[j] == item: o += codes[j] break else: # Should never reach because, assembly is create in _compile_to_assembly. raise Exception("Weird symbol in assembly: " + str(item)) # pragma: no cover assert len(o) == pos - start_pos line_number_map["breakpoints"] = list(line_number_map["breakpoints"]) line_number_map["pc_breakpoints"] = list(line_number_map["pc_breakpoints"]) return o, line_number_map
def _compile_to_assembly(code, withargs=None, existing_labels=None, break_dest=None, height=0): if withargs is None: withargs = {} if not isinstance(withargs, dict): raise CompilerPanic(f"Incorrect type for withargs: {type(withargs)}") if existing_labels is None: existing_labels = set() if not isinstance(existing_labels, set): raise CompilerPanic( f"Incorrect type for existing_labels: {type(existing_labels)}") # Opcodes if isinstance(code.value, str) and code.value.upper() in get_opcodes(): o = [] for i, c in enumerate(code.args[::-1]): o.extend( _compile_to_assembly(c, withargs, existing_labels, break_dest, height + i)) o.append(code.value.upper()) return o # Numbers elif isinstance(code.value, int): if code.value < -(2**255): raise Exception(f"Value too low: {code.value}") elif code.value >= 2**256: raise Exception(f"Value too high: {code.value}") bytez = num_to_bytearray(code.value % 2**256) or [0] return ["PUSH" + str(len(bytez))] + bytez # Variables connected to with statements elif isinstance(code.value, str) and code.value in withargs: if height - withargs[code.value] > 16: raise Exception("With statement too deep") return ["DUP" + str(height - withargs[code.value])] # Setting variables connected to with statements elif code.value == "set": if len(code.args) != 2 or code.args[0].value not in withargs: raise Exception( "Set expects two arguments, the first being a stack variable") if height - withargs[code.args[0].value] > 16: raise Exception("With statement too deep") return _compile_to_assembly( code.args[1], withargs, existing_labels, break_dest, height) + [ "SWAP" + str(height - withargs[code.args[0].value]), "POP", ] # Pass statements elif code.value in ("pass", "dummy"): return [] # Code length elif code.value == "~codelen": return ["_sym_codeend"] # Calldataload equivalent for code elif code.value == "codeload": return _compile_to_assembly( LLLnode.from_list([ "seq", ["codecopy", MemoryPositions.FREE_VAR_SPACE, code.args[0], 32], ["mload", MemoryPositions.FREE_VAR_SPACE], ]), withargs, existing_labels, break_dest, height, ) # If statements (2 arguments, ie. if x: y) elif code.value == "if" and len(code.args) == 2: o = [] o.extend( _compile_to_assembly(code.args[0], withargs, existing_labels, break_dest, height)) end_symbol = mksymbol("join") o.extend(["ISZERO", end_symbol, "JUMPI"]) o.extend( _compile_to_assembly(code.args[1], withargs, existing_labels, break_dest, height)) o.extend([end_symbol, "JUMPDEST"]) return o # If statements (3 arguments, ie. if x: y, else: z) elif code.value == "if" and len(code.args) == 3: o = [] o.extend( _compile_to_assembly(code.args[0], withargs, existing_labels, break_dest, height)) mid_symbol = mksymbol("else") end_symbol = mksymbol("join") o.extend(["ISZERO", mid_symbol, "JUMPI"]) o.extend( _compile_to_assembly(code.args[1], withargs, existing_labels, break_dest, height)) o.extend([end_symbol, "JUMP", mid_symbol, "JUMPDEST"]) o.extend( _compile_to_assembly(code.args[2], withargs, existing_labels, break_dest, height)) o.extend([end_symbol, "JUMPDEST"]) return o # repeat(counter_location, start, rounds, rounds_bound, body) # basically a do-while loop: # # assert(rounds <= rounds_bound) # if (rounds > 0) { # do { # body; # } while (++i != start + rounds) # } elif code.value == "repeat": o = [] if len(code.args) != 5: raise CompilerPanic("bad number of repeat args") # pragma: notest i_name = code.args[0] start = code.args[1] rounds = code.args[2] rounds_bound = code.args[3] body = code.args[4] entry_dest, continue_dest, exit_dest = ( mksymbol("loop_start"), mksymbol("loop_continue"), mksymbol("loop_exit"), ) # stack: [] o.extend( _compile_to_assembly( start, withargs, existing_labels, break_dest, height, )) o.extend( _compile_to_assembly(rounds, withargs, existing_labels, break_dest, height + 1)) # stack: i # assert rounds <= round_bound if rounds != rounds_bound: # stack: i, rounds o.extend( _compile_to_assembly(rounds_bound, withargs, existing_labels, break_dest, height + 2)) # stack: i, rounds, rounds_bound # assert rounds <= rounds_bound # TODO this runtime assertion should never fail for # internally generated repeats. # maybe drop it or jump to 0xFE o.extend(["DUP2", "GT", "_sym_revert0", "JUMPI"]) # stack: i, rounds # if (0 == rounds) { goto end_dest; } o.extend(["DUP1", "ISZERO", exit_dest, "JUMPI"]) # stack: start, rounds if start.value != 0: o.extend(["DUP2", "ADD"]) # stack: i, exit_i o.extend(["SWAP1"]) if i_name.value in withargs: raise CompilerPanic(f"shadowed loop variable {i_name}") withargs[i_name.value] = height + 1 # stack: exit_i, i o.extend([entry_dest, "JUMPDEST"]) o.extend( _compile_to_assembly( body, withargs, existing_labels, (exit_dest, continue_dest, height + 2), height + 2, )) del withargs[i_name.value] # clean up any stack items left by body o.extend(["POP"] * body.valency) # stack: exit_i, i # increment i: o.extend([continue_dest, "JUMPDEST", "PUSH1", 1, "ADD"]) # stack: exit_i, i+1 (new_i) # if (exit_i != new_i) { goto entry_dest } o.extend(["DUP2", "DUP2", "XOR", entry_dest, "JUMPI"]) o.extend([exit_dest, "JUMPDEST", "POP", "POP"]) return o # Continue to the next iteration of the for loop elif code.value == "continue": if not break_dest: raise CompilerPanic("Invalid break") dest, continue_dest, break_height = break_dest return [continue_dest, "JUMP"] # Break from inside a for loop elif code.value == "break": if not break_dest: raise CompilerPanic("Invalid break") dest, continue_dest, break_height = break_dest n_local_vars = height - break_height # clean up any stack items declared in the loop body cleanup_local_vars = ["POP"] * n_local_vars return cleanup_local_vars + [dest, "JUMP"] # Break from inside one or more for loops prior to a return statement inside the loop elif code.value == "cleanup_repeat": if not break_dest: raise CompilerPanic("Invalid break") _, _, break_height = break_dest # clean up local vars and internal loop vars return ["POP"] * break_height # With statements elif code.value == "with": o = [] o.extend( _compile_to_assembly(code.args[1], withargs, existing_labels, break_dest, height)) old = withargs.get(code.args[0].value, None) withargs[code.args[0].value] = height o.extend( _compile_to_assembly( code.args[2], withargs, existing_labels, break_dest, height + 1, )) if code.args[2].valency: o.extend(["SWAP1", "POP"]) else: o.extend(["POP"]) if old is not None: withargs[code.args[0].value] = old else: del withargs[code.args[0].value] return o # LLL statement (used to contain code inside code) elif code.value == "lll": o = [] begincode = mksymbol("lll_begin") endcode = mksymbol("lll_end") o.extend([endcode, "JUMP", begincode, "BLANK"]) lll = _compile_to_assembly(code.args[1], {}, existing_labels, None, 0) # `append(...)` call here is intentional. # each sublist is essentially its own program with its # own symbols. # in the later step when the "lll" block compiled to EVM, # compile_to_evm has logic to resolve symbols in "lll" to # position from start of runtime-code (instead of position # from start of bytecode). o.append(lll) o.extend([endcode, "JUMPDEST", begincode, endcode, "SUB", begincode]) o.extend( _compile_to_assembly(code.args[0], withargs, existing_labels, break_dest, height)) # COPY the code to memory for deploy o.extend(["CODECOPY", begincode, endcode, "SUB"]) return o # Seq (used to piece together multiple statements) elif code.value == "seq": o = [] for arg in code.args: o.extend( _compile_to_assembly(arg, withargs, existing_labels, break_dest, height)) if arg.valency == 1 and arg != code.args[-1]: o.append("POP") return o # Seq without popping. # Assure (if false, invalid opcode) elif code.value == "assert_unreachable": o = _compile_to_assembly(code.args[0], withargs, existing_labels, break_dest, height) end_symbol = mksymbol("reachable") o.extend([end_symbol, "JUMPI", "INVALID", end_symbol, "JUMPDEST"]) return o # Assert (if false, exit) elif code.value == "assert": o = _compile_to_assembly(code.args[0], withargs, existing_labels, break_dest, height) o.extend(["ISZERO"]) o.extend(_assert_false()) return o # Unsigned/signed clamp, check less-than elif code.value in CLAMP_OP_NAMES: if isinstance(code.args[0].value, int) and isinstance( code.args[1].value, int): # Checks for clamp errors at compile time as opposed to run time # TODO move these to optimizer.py args_0_val = code.args[0].value args_1_val = code.args[1].value is_free_of_clamp_errors = any(( code.value in ("uclamplt", "clamplt") and 0 <= args_0_val < args_1_val, code.value in ("uclample", "clample") and 0 <= args_0_val <= args_1_val, code.value in ("uclampgt", "clampgt") and 0 <= args_0_val > args_1_val, code.value in ("uclampge", "clampge") and 0 <= args_0_val >= args_1_val, )) if is_free_of_clamp_errors: return _compile_to_assembly( code.args[0], withargs, existing_labels, break_dest, height, ) else: raise Exception( f"Invalid {code.value} with values {code.args[0]} and {code.args[1]}" ) o = _compile_to_assembly(code.args[0], withargs, existing_labels, break_dest, height) o.extend( _compile_to_assembly( code.args[1], withargs, existing_labels, break_dest, height + 1, )) o.extend(["DUP2"]) # Stack: num num bound if code.value == "uclamplt": o.extend(["LT", "ISZERO"]) elif code.value == "clamplt": o.extend(["SLT", "ISZERO"]) elif code.value == "uclample": o.extend(["GT"]) elif code.value == "clample": o.extend(["SGT"]) elif code.value == "uclampgt": o.extend(["GT", "ISZERO"]) elif code.value == "clampgt": o.extend(["SGT", "ISZERO"]) elif code.value == "uclampge": o.extend(["LT"]) elif code.value == "clampge": o.extend(["SLT"]) o.extend(_assert_false()) return o # Signed clamp, check against upper and lower bounds elif code.value in ("clamp", "uclamp"): comp1 = "SGT" if code.value == "clamp" else "GT" comp2 = "SLT" if code.value == "clamp" else "LT" o = _compile_to_assembly(code.args[0], withargs, existing_labels, break_dest, height) o.extend( _compile_to_assembly( code.args[1], withargs, existing_labels, break_dest, height + 1, )) o.extend(["DUP1"]) o.extend( _compile_to_assembly( code.args[2], withargs, existing_labels, break_dest, height + 3, )) o.extend(["SWAP1", comp1]) o.extend(_assert_false()) o.extend(["DUP1", "SWAP2", "SWAP1", comp2]) o.extend(_assert_false()) return o # Checks that a value is nonzero elif code.value == "clamp_nonzero": o = _compile_to_assembly(code.args[0], withargs, existing_labels, break_dest, height) o.extend(["DUP1", "ISZERO"]) o.extend(_assert_false()) return o # SHA3 a single value elif code.value == "sha3_32": o = _compile_to_assembly(code.args[0], withargs, existing_labels, break_dest, height) o.extend([ "PUSH1", MemoryPositions.FREE_VAR_SPACE, "MSTORE", "PUSH1", 32, "PUSH1", MemoryPositions.FREE_VAR_SPACE, "SHA3", ]) return o # SHA3 a 64 byte value elif code.value == "sha3_64": o = _compile_to_assembly(code.args[0], withargs, existing_labels, break_dest, height) o.extend( _compile_to_assembly(code.args[1], withargs, existing_labels, break_dest, height)) o.extend([ "PUSH1", MemoryPositions.FREE_VAR_SPACE2, "MSTORE", "PUSH1", MemoryPositions.FREE_VAR_SPACE, "MSTORE", "PUSH1", 64, "PUSH1", MemoryPositions.FREE_VAR_SPACE, "SHA3", ]) return o # <= operator elif code.value == "le": return _compile_to_assembly( LLLnode.from_list(["iszero", ["gt", code.args[0], code.args[1]]]), withargs, existing_labels, break_dest, height, ) # >= operator elif code.value == "ge": return _compile_to_assembly( LLLnode.from_list(["iszero", ["lt", code.args[0], code.args[1]]]), withargs, existing_labels, break_dest, height, ) # <= operator elif code.value == "sle": return _compile_to_assembly( LLLnode.from_list(["iszero", ["sgt", code.args[0], code.args[1]]]), withargs, existing_labels, break_dest, height, ) # >= operator elif code.value == "sge": return _compile_to_assembly( LLLnode.from_list(["iszero", ["slt", code.args[0], code.args[1]]]), withargs, existing_labels, break_dest, height, ) # != operator elif code.value == "ne": return _compile_to_assembly( LLLnode.from_list(["iszero", ["eq", code.args[0], code.args[1]]]), withargs, existing_labels, break_dest, height, ) # e.g. 95 -> 96, 96 -> 96, 97 -> 128 elif code.value == "ceil32": return _compile_to_assembly( LLLnode.from_list([ "with", "_val", code.args[0], # in mod32 arithmetic, the solution to x + y == 32 is # y = bitwise_not(x) & 31 ["add", "_val", ["and", ["not", ["sub", "_val", 1]], 31]], ]), withargs, existing_labels, break_dest, height, ) # # jump to a symbol, and push variable arguments onto stack elif code.value == "goto": o = [] for i, c in enumerate(reversed(code.args[1:])): o.extend( _compile_to_assembly(c, withargs, existing_labels, break_dest, height + i)) o.extend(["_sym_" + str(code.args[0]), "JUMP"]) return o elif isinstance(code.value, str) and is_symbol(code.value): return [code.value] # set a symbol as a location. elif code.value == "label": label_name = str(code.args[0]) if label_name in existing_labels: raise Exception(f"Label with name {label_name} already exists!") else: existing_labels.add(label_name) return ["_sym_" + label_name, "JUMPDEST"] # inject debug opcode. elif code.value == "debugger": return mkdebug(pc_debugger=False, pos=code.pos) # inject debug opcode. elif code.value == "pc_debugger": return mkdebug(pc_debugger=True, pos=code.pos) else: raise Exception("Weird code element: " + repr(code))
def _compile_to_assembly(code, withargs=None, existing_labels=None, break_dest=None, height=0): if withargs is None: withargs = {} if not isinstance(withargs, dict): raise CompilerPanic(f"Incorrect type for withargs: {type(withargs)}") if existing_labels is None: existing_labels = set() if not isinstance(existing_labels, set): raise CompilerPanic( f"Incorrect type for existing_labels: {type(existing_labels)}") # Opcodes if isinstance(code.value, str) and code.value.upper() in get_opcodes(): o = [] for i, c in enumerate(code.args[::-1]): o.extend( _compile_to_assembly(c, withargs, existing_labels, break_dest, height + i)) o.append(code.value.upper()) return o # Numbers elif isinstance(code.value, int): if code.value < -(2**255): raise Exception(f"Value too low: {code.value}") elif code.value >= 2**256: raise Exception(f"Value too high: {code.value}") bytez = num_to_bytearray(code.value % 2**256) or [0] return ["PUSH" + str(len(bytez))] + bytez # Variables connected to with statements elif isinstance(code.value, str) and code.value in withargs: if height - withargs[code.value] > 16: raise Exception("With statement too deep") return ["DUP" + str(height - withargs[code.value])] # Setting variables connected to with statements elif code.value == "set": if len(code.args) != 2 or code.args[0].value not in withargs: raise Exception( "Set expects two arguments, the first being a stack variable") if height - withargs[code.args[0].value] > 16: raise Exception("With statement too deep") return _compile_to_assembly( code.args[1], withargs, existing_labels, break_dest, height) + [ "SWAP" + str(height - withargs[code.args[0].value]), "POP", ] # Pass statements elif code.value == "pass": return [] # Code length elif code.value == "~codelen": return ["_sym_codeend"] # Calldataload equivalent for code elif code.value == "codeload": return _compile_to_assembly( LLLnode.from_list([ "seq", ["codecopy", MemoryPositions.FREE_VAR_SPACE, code.args[0], 32], ["mload", MemoryPositions.FREE_VAR_SPACE], ]), withargs, existing_labels, break_dest, height, ) # If statements (2 arguments, ie. if x: y) elif code.value in ("if", "if_unchecked") and len(code.args) == 2: o = [] o.extend( _compile_to_assembly(code.args[0], withargs, existing_labels, break_dest, height)) end_symbol = mksymbol() o.extend(["ISZERO", end_symbol, "JUMPI"]) o.extend( _compile_to_assembly(code.args[1], withargs, existing_labels, break_dest, height)) o.extend([end_symbol, "JUMPDEST"]) return o # If statements (3 arguments, ie. if x: y, else: z) elif code.value == "if" and len(code.args) == 3: o = [] o.extend( _compile_to_assembly(code.args[0], withargs, existing_labels, break_dest, height)) mid_symbol = mksymbol() end_symbol = mksymbol() o.extend(["ISZERO", mid_symbol, "JUMPI"]) o.extend( _compile_to_assembly(code.args[1], withargs, existing_labels, break_dest, height)) o.extend([end_symbol, "JUMP", mid_symbol, "JUMPDEST"]) o.extend( _compile_to_assembly(code.args[2], withargs, existing_labels, break_dest, height)) o.extend([end_symbol, "JUMPDEST"]) return o # Repeat statements (compiled from for loops) # Repeat(memloc, start, rounds, body) elif code.value == "repeat": o = [] loops = num_to_bytearray(code.args[2].value) start, continue_dest, end = mksymbol(), mksymbol(), mksymbol() o.extend( _compile_to_assembly(code.args[0], withargs, existing_labels, break_dest, height)) o.extend( _compile_to_assembly( code.args[1], withargs, existing_labels, break_dest, height + 1, )) o.extend(["PUSH" + str(len(loops))] + loops) # stack: memloc, startvalue, rounds o.extend(["DUP2", "DUP4", "MSTORE", "ADD", start, "JUMPDEST"]) # stack: memloc, exit_index o.extend( _compile_to_assembly( code.args[3], withargs, existing_labels, (end, continue_dest, height + 2), height + 2, )) # stack: memloc, exit_index o.extend([ continue_dest, "JUMPDEST", "DUP2", "MLOAD", "PUSH1", 1, "ADD", "DUP1", "DUP4", "MSTORE", ]) # stack: len(loops), index memory address, new index o.extend([ "DUP2", "EQ", "ISZERO", start, "JUMPI", end, "JUMPDEST", "POP", "POP" ]) return o # Continue to the next iteration of the for loop elif code.value == "continue": if not break_dest: raise CompilerPanic("Invalid break") dest, continue_dest, break_height = break_dest return [continue_dest, "JUMP"] # Break from inside a for loop elif code.value == "break": if not break_dest: raise CompilerPanic("Invalid break") dest, continue_dest, break_height = break_dest return ["POP"] * (height - break_height) + [dest, "JUMP"] # Break from inside one or more for loops prior to a return statement inside the loop elif code.value == "exit_repeater": if not break_dest: raise CompilerPanic("Invalid break") _, _, break_height = break_dest return ["POP"] * break_height # With statements elif code.value == "with": o = [] o.extend( _compile_to_assembly(code.args[1], withargs, existing_labels, break_dest, height)) old = withargs.get(code.args[0].value, None) withargs[code.args[0].value] = height o.extend( _compile_to_assembly( code.args[2], withargs, existing_labels, break_dest, height + 1, )) if code.args[2].valency: o.extend(["SWAP1", "POP"]) else: o.extend(["POP"]) if old is not None: withargs[code.args[0].value] = old else: del withargs[code.args[0].value] return o # LLL statement (used to contain code inside code) elif code.value == "lll": o = [] begincode = mksymbol() endcode = mksymbol() o.extend([endcode, "JUMP", begincode, "BLANK"]) lll = _compile_to_assembly(code.args[0], {}, existing_labels, None, 0) # `append(...)` call here is intentional. # each sublist is essentially its own program with its # own symbols. # in the later step when the "lll" block compiled to EVM, # compile_to_evm has logic to resolve symbols in "lll" to # position from start of runtime-code (instead of position # from start of bytecode). o.append(lll) o.extend([endcode, "JUMPDEST", begincode, endcode, "SUB", begincode]) o.extend( _compile_to_assembly(code.args[1], withargs, existing_labels, break_dest, height)) # COPY the code to memory for deploy o.extend(["CODECOPY", begincode, endcode, "SUB"]) return o # Seq (used to piece together multiple statements) elif code.value == "seq": o = [] for arg in code.args: o.extend( _compile_to_assembly(arg, withargs, existing_labels, break_dest, height)) if arg.valency == 1 and arg != code.args[-1]: o.append("POP") return o # Seq without popping. elif code.value == "seq_unchecked": o = [] for arg in code.args: o.extend( _compile_to_assembly(arg, withargs, existing_labels, break_dest, height)) # if arg.valency == 1 and arg != code.args[-1]: # o.append('POP') return o # Assure (if false, invalid opcode) elif code.value == "assert_unreachable": o = _compile_to_assembly(code.args[0], withargs, existing_labels, break_dest, height) end_symbol = mksymbol() o.extend([end_symbol, "JUMPI", "INVALID", end_symbol, "JUMPDEST"]) return o # Assert (if false, exit) elif code.value == "assert": o = _compile_to_assembly(code.args[0], withargs, existing_labels, break_dest, height) o.extend(["ISZERO"]) o.extend(_assert_false()) return o # Unsigned/signed clamp, check less-than elif code.value in CLAMP_OP_NAMES: if isinstance(code.args[0].value, int) and isinstance( code.args[1].value, int): # Checks for clamp errors at compile time as opposed to run time args_0_val = code.args[0].value args_1_val = code.args[1].value is_free_of_clamp_errors = any(( code.value in ("uclamplt", "clamplt") and 0 <= args_0_val < args_1_val, code.value in ("uclample", "clample") and 0 <= args_0_val <= args_1_val, code.value in ("uclampgt", "clampgt") and 0 <= args_0_val > args_1_val, code.value in ("uclampge", "clampge") and 0 <= args_0_val >= args_1_val, )) if is_free_of_clamp_errors: return _compile_to_assembly( code.args[0], withargs, existing_labels, break_dest, height, ) else: raise Exception( f"Invalid {code.value} with values {code.args[0]} and {code.args[1]}" ) o = _compile_to_assembly(code.args[0], withargs, existing_labels, break_dest, height) o.extend( _compile_to_assembly( code.args[1], withargs, existing_labels, break_dest, height + 1, )) o.extend(["DUP2"]) # Stack: num num bound if code.value == "uclamplt": o.extend(["LT", "ISZERO"]) elif code.value == "clamplt": o.extend(["SLT", "ISZERO"]) elif code.value == "uclample": o.extend(["GT"]) elif code.value == "clample": o.extend(["SGT"]) elif code.value == "uclampgt": o.extend(["GT", "ISZERO"]) elif code.value == "clampgt": o.extend(["SGT", "ISZERO"]) elif code.value == "uclampge": o.extend(["LT"]) elif code.value == "clampge": o.extend(["SLT"]) o.extend(_assert_false()) return o # Signed clamp, check against upper and lower bounds elif code.value in ("clamp", "uclamp"): comp1 = "SGT" if code.value == "clamp" else "GT" comp2 = "SLT" if code.value == "clamp" else "LT" o = _compile_to_assembly(code.args[0], withargs, existing_labels, break_dest, height) o.extend( _compile_to_assembly( code.args[1], withargs, existing_labels, break_dest, height + 1, )) o.extend(["DUP1"]) o.extend( _compile_to_assembly( code.args[2], withargs, existing_labels, break_dest, height + 3, )) o.extend(["SWAP1", comp1]) o.extend(_assert_false()) o.extend(["DUP1", "SWAP2", "SWAP1", comp2]) o.extend(_assert_false()) return o # Checks that a value is nonzero elif code.value == "clamp_nonzero": o = _compile_to_assembly(code.args[0], withargs, existing_labels, break_dest, height) o.extend(["DUP1", "ISZERO"]) o.extend(_assert_false()) return o # SHA3 a single value elif code.value == "sha3_32": o = _compile_to_assembly(code.args[0], withargs, existing_labels, break_dest, height) o.extend([ "PUSH1", MemoryPositions.FREE_VAR_SPACE, "MSTORE", "PUSH1", 32, "PUSH1", MemoryPositions.FREE_VAR_SPACE, "SHA3", ]) return o # SHA3 a 64 byte value elif code.value == "sha3_64": o = _compile_to_assembly(code.args[0], withargs, existing_labels, break_dest, height) o.extend( _compile_to_assembly(code.args[1], withargs, existing_labels, break_dest, height)) o.extend([ "PUSH1", MemoryPositions.FREE_VAR_SPACE2, "MSTORE", "PUSH1", MemoryPositions.FREE_VAR_SPACE, "MSTORE", "PUSH1", 64, "PUSH1", MemoryPositions.FREE_VAR_SPACE, "SHA3", ]) return o # <= operator elif code.value == "le": return _compile_to_assembly( LLLnode.from_list(["iszero", ["gt", code.args[0], code.args[1]]]), withargs, existing_labels, break_dest, height, ) # >= operator elif code.value == "ge": return _compile_to_assembly( LLLnode.from_list(["iszero", ["lt", code.args[0], code.args[1]]]), withargs, existing_labels, break_dest, height, ) # <= operator elif code.value == "sle": return _compile_to_assembly( LLLnode.from_list(["iszero", ["sgt", code.args[0], code.args[1]]]), withargs, existing_labels, break_dest, height, ) # >= operator elif code.value == "sge": return _compile_to_assembly( LLLnode.from_list(["iszero", ["slt", code.args[0], code.args[1]]]), withargs, existing_labels, break_dest, height, ) # != operator elif code.value == "ne": return _compile_to_assembly( LLLnode.from_list(["iszero", ["eq", code.args[0], code.args[1]]]), withargs, existing_labels, break_dest, height, ) # e.g. 95 -> 96, 96 -> 96, 97 -> 128 elif code.value == "ceil32": return _compile_to_assembly( LLLnode.from_list([ "with", "_val", code.args[0], ["sub", ["add", "_val", 31], ["mod", ["sub", "_val", 1], 32]], ]), withargs, existing_labels, break_dest, height, ) # # jump to a symbol elif code.value == "goto": return ["_sym_" + str(code.args[0]), "JUMP"] elif isinstance(code.value, str) and code.value.startswith("_sym_"): return code.value # set a symbol as a location. elif code.value == "label": label_name = str(code.args[0]) if label_name in existing_labels: raise Exception(f"Label with name {label_name} already exists!") else: existing_labels.add(label_name) return ["_sym_" + label_name, "JUMPDEST"] # inject debug opcode. elif code.value == "debugger": return mkdebug(pc_debugger=False, pos=code.pos) # inject debug opcode. elif code.value == "pc_debugger": return mkdebug(pc_debugger=True, pos=code.pos) else: raise Exception("Weird code element: " + repr(code))