예제 #1
0
def test_get_opcodes(evm_version):
    op = opcodes.get_opcodes()
    if evm_version == "berlin":
        assert "CHAINID" in op
        assert op["SLOAD"][-1] == 2100
    elif evm_version == "istanbul":
        assert "CHAINID" in op
        assert op["SLOAD"][-1] == 800
    else:
        assert "CHAINID" not in op
        assert op["SLOAD"][-1] == 200
    if evm_version in ("byzantium", "atlantis"):
        assert "CREATE2" not in op
    else:
        assert op["CREATE2"][-1] == 32000
예제 #2
0
def _build_opcodes(bytecode: bytes) -> str:
    bytecode_sequence = deque(bytecode)

    opcode_map = dict((v[0], k) for k, v in opcodes.get_opcodes().items())
    opcode_output = []

    while bytecode_sequence:
        op = bytecode_sequence.popleft()
        opcode_output.append(opcode_map[op])
        if "PUSH" in opcode_output[-1]:
            push_len = int(opcode_map[op][4:])
            push_values = [
                hex(bytecode_sequence.popleft())[2:] for i in range(push_len)
            ]
            opcode_output.append(f"0x{''.join(push_values).upper()}")

    return " ".join(opcode_output)
예제 #3
0
def _compile_to_assembly(code,
                         withargs=None,
                         existing_labels=None,
                         break_dest=None,
                         height=0):
    if withargs is None:
        withargs = {}
    if not isinstance(withargs, dict):
        raise CompilerPanic(f"Incorrect type for withargs: {type(withargs)}")

    def _data_ofst_of(sym, ofst, height_):
        # e.g. _OFST _sym_foo 32
        assert is_symbol(sym) or is_mem_sym(sym)
        if isinstance(ofst.value, int):
            # resolve at compile time using magic _OFST op
            return ["_OFST", sym, ofst.value]
        else:
            # if we can't resolve at compile time, resolve at runtime
            ofst = _compile_to_assembly(ofst, withargs, existing_labels,
                                        break_dest, height_)
            return ofst + [sym, "ADD"]

    def _height_of(witharg):
        ret = height - withargs[witharg]
        if ret > 16:
            raise Exception("With statement too deep")
        return ret

    if existing_labels is None:
        existing_labels = set()
    if not isinstance(existing_labels, set):
        raise CompilerPanic(f"must be set(), but got {type(existing_labels)}")

    # Opcodes
    if isinstance(code.value, str) and code.value.upper() in get_opcodes():
        o = []
        for i, c in enumerate(code.args[::-1]):
            o.extend(
                _compile_to_assembly(c, withargs, existing_labels, break_dest,
                                     height + i))
        o.append(code.value.upper())
        return o

    # Numbers
    elif isinstance(code.value, int):
        if code.value < -(2**255):
            raise Exception(f"Value too low: {code.value}")
        elif code.value >= 2**256:
            raise Exception(f"Value too high: {code.value}")
        return PUSH(code.value % 2**256)

    # Variables connected to with statements
    elif isinstance(code.value, str) and code.value in withargs:
        return ["DUP" + str(_height_of(code.value))]

    # Setting variables connected to with statements
    elif code.value == "set":
        if len(code.args) != 2 or code.args[0].value not in withargs:
            raise Exception(
                "Set expects two arguments, the first being a stack variable")
        if height - withargs[code.args[0].value] > 16:
            raise Exception("With statement too deep")
        return _compile_to_assembly(
            code.args[1], withargs, existing_labels, break_dest, height) + [
                "SWAP" + str(height - withargs[code.args[0].value]),
                "POP",
            ]

    # Pass statements
    # TODO remove "dummy"; no longer needed
    elif code.value in ("pass", "dummy"):
        return []

    # "mload" from data section of the currently executing code
    elif code.value == "dload":
        loc = code.args[0]

        o = []
        # codecopy 32 bytes to FREE_VAR_SPACE, then mload from FREE_VAR_SPACE
        o.extend(PUSH(32))
        o.extend(_data_ofst_of("_sym_code_end", loc, height + 1))
        o.extend(PUSH(MemoryPositions.FREE_VAR_SPACE) + ["CODECOPY"])
        o.extend(PUSH(MemoryPositions.FREE_VAR_SPACE) + ["MLOAD"])
        return o

    # batch copy from data section of the currently executing code to memory
    elif code.value == "dloadbytes":
        dst = code.args[0]
        src = code.args[1]
        len_ = code.args[2]

        o = []
        o.extend(
            _compile_to_assembly(len_, withargs, existing_labels, break_dest,
                                 height))
        o.extend(_data_ofst_of("_sym_code_end", src, height + 1))
        o.extend(
            _compile_to_assembly(dst, withargs, existing_labels, break_dest,
                                 height + 2))
        o.extend(["CODECOPY"])
        return o

    # "mload" from the data section of (to-be-deployed) runtime code
    elif code.value == "iload":
        loc = code.args[0]

        o = []
        o.extend(_data_ofst_of("_mem_deploy_end", loc, height))
        o.append("MLOAD")

        return o

    # "mstore" to the data section of (to-be-deployed) runtime code
    elif code.value == "istore":
        loc = code.args[0]
        val = code.args[1]

        o = []
        o.extend(
            _compile_to_assembly(val, withargs, existing_labels, break_dest,
                                 height))
        o.extend(_data_ofst_of("_mem_deploy_end", loc, height + 1))
        o.append("MSTORE")

        return o

    # batch copy from memory to the data section of runtime code
    elif code.value == "istorebytes":
        raise Exception("unimplemented")

    # If statements (2 arguments, ie. if x: y)
    elif code.value == "if" and len(code.args) == 2:
        o = []
        o.extend(
            _compile_to_assembly(code.args[0], withargs, existing_labels,
                                 break_dest, height))
        end_symbol = mksymbol("join")
        o.extend(["ISZERO", end_symbol, "JUMPI"])
        o.extend(
            _compile_to_assembly(code.args[1], withargs, existing_labels,
                                 break_dest, height))
        o.extend([end_symbol, "JUMPDEST"])
        return o
    # If statements (3 arguments, ie. if x: y, else: z)
    elif code.value == "if" and len(code.args) == 3:
        o = []
        o.extend(
            _compile_to_assembly(code.args[0], withargs, existing_labels,
                                 break_dest, height))
        mid_symbol = mksymbol("else")
        end_symbol = mksymbol("join")
        o.extend(["ISZERO", mid_symbol, "JUMPI"])
        o.extend(
            _compile_to_assembly(code.args[1], withargs, existing_labels,
                                 break_dest, height))
        o.extend([end_symbol, "JUMP", mid_symbol, "JUMPDEST"])
        o.extend(
            _compile_to_assembly(code.args[2], withargs, existing_labels,
                                 break_dest, height))
        o.extend([end_symbol, "JUMPDEST"])
        return o

    # repeat(counter_location, start, rounds, rounds_bound, body)
    # basically a do-while loop:
    #
    # assert(rounds <= rounds_bound)
    # if (rounds > 0) {
    #   do {
    #     body;
    #   } while (++i != start + rounds)
    # }
    elif code.value == "repeat":
        o = []
        if len(code.args) != 5:
            raise CompilerPanic("bad number of repeat args")  # pragma: notest

        i_name = code.args[0]
        start = code.args[1]
        rounds = code.args[2]
        rounds_bound = code.args[3]
        body = code.args[4]

        entry_dest, continue_dest, exit_dest = (
            mksymbol("loop_start"),
            mksymbol("loop_continue"),
            mksymbol("loop_exit"),
        )

        # stack: []
        o.extend(
            _compile_to_assembly(start, withargs, existing_labels, break_dest,
                                 height))

        o.extend(
            _compile_to_assembly(rounds, withargs, existing_labels, break_dest,
                                 height + 1))

        # stack: i

        # assert rounds <= round_bound
        if rounds != rounds_bound:
            # stack: i, rounds
            o.extend(
                _compile_to_assembly(rounds_bound, withargs, existing_labels,
                                     break_dest, height + 2))
            # stack: i, rounds, rounds_bound
            # assert rounds <= rounds_bound
            # TODO this runtime assertion should never fail for
            # internally generated repeats.
            # maybe drop it or jump to 0xFE
            o.extend(["DUP2", "GT"] + _assert_false())

            # stack: i, rounds
            # if (0 == rounds) { goto end_dest; }
            o.extend(["DUP1", "ISZERO", exit_dest, "JUMPI"])

        # stack: start, rounds
        if start.value != 0:
            o.extend(["DUP2", "ADD"])

        # stack: i, exit_i
        o.extend(["SWAP1"])

        if i_name.value in withargs:
            raise CompilerPanic(f"shadowed loop variable {i_name}")
        withargs[i_name.value] = height + 1

        # stack: exit_i, i
        o.extend([entry_dest, "JUMPDEST"])
        o.extend(
            _compile_to_assembly(body, withargs, existing_labels,
                                 (exit_dest, continue_dest, height + 2),
                                 height + 2))

        del withargs[i_name.value]

        # clean up any stack items left by body
        o.extend(["POP"] * body.valency)

        # stack: exit_i, i
        # increment i:
        o.extend([continue_dest, "JUMPDEST", "PUSH1", 1, "ADD"])

        # stack: exit_i, i+1 (new_i)
        # if (exit_i != new_i) { goto entry_dest }
        o.extend(["DUP2", "DUP2", "XOR", entry_dest, "JUMPI"])
        o.extend([exit_dest, "JUMPDEST", "POP", "POP"])

        return o

    # Continue to the next iteration of the for loop
    elif code.value == "continue":
        if not break_dest:
            raise CompilerPanic("Invalid break")
        dest, continue_dest, break_height = break_dest
        return [continue_dest, "JUMP"]
    # Break from inside a for loop
    elif code.value == "break":
        if not break_dest:
            raise CompilerPanic("Invalid break")
        dest, continue_dest, break_height = break_dest

        n_local_vars = height - break_height
        # clean up any stack items declared in the loop body
        cleanup_local_vars = ["POP"] * n_local_vars
        return cleanup_local_vars + [dest, "JUMP"]
    # Break from inside one or more for loops prior to a return statement inside the loop
    elif code.value == "cleanup_repeat":
        if not break_dest:
            raise CompilerPanic("Invalid break")
        # clean up local vars and internal loop vars
        _, _, break_height = break_dest
        # except don't pop label params
        if "return_buffer" in withargs:
            break_height -= 1
        if "return_pc" in withargs:
            break_height -= 1
        return ["POP"] * break_height
    # With statements
    elif code.value == "with":
        o = []
        o.extend(
            _compile_to_assembly(code.args[1], withargs, existing_labels,
                                 break_dest, height))
        old = withargs.get(code.args[0].value, None)
        withargs[code.args[0].value] = height
        o.extend(
            _compile_to_assembly(code.args[2], withargs, existing_labels,
                                 break_dest, height + 1))
        if code.args[2].valency:
            o.extend(["SWAP1", "POP"])
        else:
            o.extend(["POP"])
        if old is not None:
            withargs[code.args[0].value] = old
        else:
            del withargs[code.args[0].value]
        return o

    # runtime statement (used to deploy runtime code)
    elif code.value == "deploy":
        memsize = code.args[
            0].value  # used later to calculate _mem_deploy_start
        ir = code.args[1]
        padding = code.args[2].value
        assert isinstance(memsize, int), "non-int memsize"
        assert isinstance(padding, int), "non-int padding"

        begincode = mksymbol("runtime_begin")

        subcode = _compile_to_assembly(ir)

        o = []

        # COPY the code to memory for deploy
        o.extend(
            ["_sym_subcode_size", begincode, "_mem_deploy_start", "CODECOPY"])

        # calculate the len of runtime code
        o.extend(["_OFST", "_sym_subcode_size", padding])  # stack: len
        o.extend(["_mem_deploy_start"])  # stack: len mem_ofst
        o.extend(["RETURN"])

        # since the asm data structures are very primitive, to make sure
        # assembly_to_evm is able to calculate data offsets correctly,
        # we pass the memsize via magic opcodes to the subcode
        subcode = [f"_DEPLOY_MEM_OFST_{memsize}"] + subcode

        # append the runtime code after the ctor code
        o.extend([begincode, "BLANK"])
        # `append(...)` call here is intentional.
        # each sublist is essentially its own program with its
        # own symbols.
        # in the later step when the "ir" block compiled to EVM,
        # symbols in subcode are resolved to position from start of
        # runtime-code (instead of position from start of bytecode).
        o.append(subcode)

        return o

    # Seq (used to piece together multiple statements)
    elif code.value == "seq":
        o = []
        for arg in code.args:
            o.extend(
                _compile_to_assembly(arg, withargs, existing_labels,
                                     break_dest, height))
            if arg.valency == 1 and arg != code.args[-1]:
                o.append("POP")
        return o
    # Seq without popping.
    # Assure (if false, invalid opcode)
    elif code.value == "assert_unreachable":
        o = _compile_to_assembly(code.args[0], withargs, existing_labels,
                                 break_dest, height)
        end_symbol = mksymbol("reachable")
        o.extend([end_symbol, "JUMPI", "INVALID", end_symbol, "JUMPDEST"])
        return o
    # Assert (if false, exit)
    elif code.value == "assert":
        o = _compile_to_assembly(code.args[0], withargs, existing_labels,
                                 break_dest, height)
        o.extend(["ISZERO"])
        o.extend(_assert_false())
        return o

    # SHA3 a single value
    elif code.value == "sha3_32":
        o = _compile_to_assembly(code.args[0], withargs, existing_labels,
                                 break_dest, height)
        o.extend([
            "PUSH1",
            MemoryPositions.FREE_VAR_SPACE,
            "MSTORE",
            "PUSH1",
            32,
            "PUSH1",
            MemoryPositions.FREE_VAR_SPACE,
            "SHA3",
        ])
        return o
    # SHA3 a 64 byte value
    elif code.value == "sha3_64":
        o = _compile_to_assembly(code.args[0], withargs, existing_labels,
                                 break_dest, height)
        o.extend(
            _compile_to_assembly(code.args[1], withargs, existing_labels,
                                 break_dest, height))
        o.extend([
            "PUSH1",
            MemoryPositions.FREE_VAR_SPACE2,
            "MSTORE",
            "PUSH1",
            MemoryPositions.FREE_VAR_SPACE,
            "MSTORE",
            "PUSH1",
            64,
            "PUSH1",
            MemoryPositions.FREE_VAR_SPACE,
            "SHA3",
        ])
        return o
    elif code.value == "select":
        # b ^ ((a ^ b) * cond) where cond is 1 or 0
        # let t = a ^ b
        cond = code.args[0]
        a = code.args[1]
        b = code.args[2]

        o = []
        o.extend(
            _compile_to_assembly(b, withargs, existing_labels, break_dest,
                                 height))
        o.extend(
            _compile_to_assembly(a, withargs, existing_labels, break_dest,
                                 height + 1))
        # stack: b a
        o.extend(["DUP2", "XOR"])
        # stack: b t
        o.extend(
            _compile_to_assembly(cond, withargs, existing_labels, break_dest,
                                 height + 2))
        # stack: b t cond
        o.extend(["MUL", "XOR"])

        # stack: b ^ (t * cond)
        return o

    # <= operator
    elif code.value == "le":
        return _compile_to_assembly(
            IRnode.from_list(["iszero", ["gt", code.args[0], code.args[1]]]),
            withargs,
            existing_labels,
            break_dest,
            height,
        )
    # >= operator
    elif code.value == "ge":
        return _compile_to_assembly(
            IRnode.from_list(["iszero", ["lt", code.args[0], code.args[1]]]),
            withargs,
            existing_labels,
            break_dest,
            height,
        )
    # <= operator
    elif code.value == "sle":
        return _compile_to_assembly(
            IRnode.from_list(["iszero", ["sgt", code.args[0], code.args[1]]]),
            withargs,
            existing_labels,
            break_dest,
            height,
        )
    # >= operator
    elif code.value == "sge":
        return _compile_to_assembly(
            IRnode.from_list(["iszero", ["slt", code.args[0], code.args[1]]]),
            withargs,
            existing_labels,
            break_dest,
            height,
        )
    # != operator
    elif code.value == "ne":
        return _compile_to_assembly(
            IRnode.from_list(["iszero", ["eq", code.args[0], code.args[1]]]),
            withargs,
            existing_labels,
            break_dest,
            height,
        )

    # e.g. 95 -> 96, 96 -> 96, 97 -> 128
    elif code.value == "ceil32":
        # floor32(x) = x - x % 32 == x & 0b11..100000 == x & (~31)
        # ceil32(x) = floor32(x + 31) == (x + 31) & (~31)
        x = code.args[0]
        return _compile_to_assembly(
            IRnode.from_list(["and", ["add", x, 31], ["not", 31]]),
            withargs,
            existing_labels,
            break_dest,
            height,
        )

    # jump to a symbol, and push variable # of arguments onto stack
    elif code.value == "goto":
        o = []
        for i, c in enumerate(reversed(code.args[1:])):
            o.extend(
                _compile_to_assembly(c, withargs, existing_labels, break_dest,
                                     height + i))
        o.extend(["_sym_" + str(code.args[0]), "JUMP"])
        return o
    # push a literal symbol
    elif isinstance(code.value, str) and is_symbol(code.value):
        return [code.value]
    # set a symbol as a location.
    elif code.value == "label":
        label_name = code.args[0].value
        assert isinstance(label_name, str)

        if label_name in existing_labels:
            raise Exception(f"Label with name {label_name} already exists!")
        else:
            existing_labels.add(label_name)

        if code.args[1].value != "var_list":
            raise CodegenPanic("2nd arg to label must be var_list")
        var_args = code.args[1].args

        body = code.args[2]

        # new scope
        height = 0
        withargs = {}

        for arg in reversed(var_args):
            assert isinstance(
                arg.value, str
            )  # already checked for higher up but only the paranoid survive
            withargs[arg.value] = height
            height += 1

        body_asm = _compile_to_assembly(body,
                                        withargs=withargs,
                                        existing_labels=existing_labels,
                                        height=height)
        # pop_scoped_vars = ["POP"] * height
        # for now, _rewrite_return_sequences forces
        # label params to be consumed implicitly
        pop_scoped_vars = []

        return ["_sym_" + label_name, "JUMPDEST"] + body_asm + pop_scoped_vars

    elif code.value == "unique_symbol":
        symbol = code.args[0].value
        assert isinstance(symbol, str)

        if symbol in existing_labels:
            raise Exception(f"symbol {symbol} already exists!")
        else:
            existing_labels.add(symbol)

        return []

    elif code.value == "exit_to":
        raise CodegenPanic("exit_to not implemented yet!")

    # inject debug opcode.
    elif code.value == "debugger":
        return mkdebug(pc_debugger=False, source_pos=code.source_pos)
    # inject debug opcode.
    elif code.value == "pc_debugger":
        return mkdebug(pc_debugger=True, source_pos=code.source_pos)
    else:
        raise Exception("Weird code element: " + repr(code))
예제 #4
0
def assembly_to_evm(assembly, pc_ofst=0, insert_vyper_signature=False):
    """
    Assembles assembly into EVM

    assembly: list of asm instructions
    pc_ofst: when constructing the source map, the amount to offset all
             pcs by (no effect until we add deploy code source map)
    insert_vyper_signature: whether to append vyper metadata to output
                            (should be true for runtime code)
    """
    line_number_map = {
        "breakpoints": set(),
        "pc_breakpoints": set(),
        "pc_jump_map": {
            0: "-"
        },
        "pc_pos_map": {},
        "error_map": {},
    }

    pc = 0
    symbol_map = {}

    runtime_code, runtime_code_start, runtime_code_end = None, None, None

    bytecode_suffix = b""
    if insert_vyper_signature:
        # CBOR encoded: {"vyper": [major,minor,patch]}
        bytecode_suffix += b"\xa1\x65vyper\x83" + bytes(list(version_tuple))
        bytecode_suffix += len(bytecode_suffix).to_bytes(2, "big")

    CODE_OFST_SIZE = 2  # size of a PUSH instruction for a code symbol

    # to optimize the size of deploy code - we want to use the smallest
    # PUSH instruction possible which can support all memory symbols
    # (and also works with linear pass symbol resolution)
    # to do this, we first do a single pass to compile any runtime code
    # and use that to calculate mem_ofst_size.
    mem_ofst_size, ctor_mem_size = None, None
    max_mem_ofst = 0
    for i, item in enumerate(assembly):
        if isinstance(item, list):
            assert runtime_code is None, "Multiple subcodes"
            runtime_code, runtime_map = assembly_to_evm(
                item, insert_vyper_signature=True)

            assert item[0].startswith("_DEPLOY_MEM_OFST_")
            assert ctor_mem_size is None
            ctor_mem_size = int(item[0][len("_DEPLOY_MEM_OFST_"):])

            runtime_code_start, runtime_code_end = _runtime_code_offsets(
                ctor_mem_size, len(runtime_code))
            assert runtime_code_end - runtime_code_start == len(runtime_code)

        if is_ofst(item) and is_mem_sym(assembly[i + 1]):
            max_mem_ofst = max(assembly[i + 2], max_mem_ofst)

    if runtime_code_end is not None:
        mem_ofst_size = calc_mem_ofst_size(runtime_code_end + max_mem_ofst)

    # go through the code, resolving symbolic locations
    # (i.e. JUMPDEST locations) to actual code locations
    for i, item in enumerate(assembly):
        note_line_num(line_number_map, item, pc)
        if item == "DEBUG":
            continue  # skip debug

        # update pc_jump_map
        if item == "JUMP":
            last = assembly[i - 1]
            if is_symbol(last) and last.startswith("_sym_internal"):
                if last.endswith("cleanup"):
                    # exit an internal function
                    line_number_map["pc_jump_map"][pc] = "o"
                else:
                    # enter an internal function
                    line_number_map["pc_jump_map"][pc] = "i"
            else:
                # everything else
                line_number_map["pc_jump_map"][pc] = "-"
        elif item in ("JUMPI", "JUMPDEST"):
            line_number_map["pc_jump_map"][pc] = "-"

        # update pc
        if is_symbol(item):
            if assembly[i + 1] == "JUMPDEST" or assembly[i + 1] == "BLANK":
                # Don't increment pc as the symbol itself doesn't go into code
                if item in symbol_map:
                    raise CompilerPanic(f"duplicate jumpdest {item}")

                symbol_map[item] = pc
            else:
                pc += CODE_OFST_SIZE + 1  # PUSH2 highbits lowbits
        elif is_mem_sym(item):
            # PUSH<n> item
            pc += mem_ofst_size + 1
        elif is_ofst(item):
            assert is_symbol(assembly[i + 1]) or is_mem_sym(assembly[i + 1])
            assert isinstance(assembly[i + 2], int)
            # [_OFST, _sym_foo, bar] -> PUSH2 (foo+bar)
            # [_OFST, _mem_foo, bar] -> PUSHN (foo+bar)
            pc -= 1
        elif item == "BLANK":
            pc += 0
        elif isinstance(item, str) and item.startswith("_DEPLOY_MEM_OFST_"):
            # _DEPLOY_MEM_OFST is assembly magic which will
            # get removed during final assembly-to-bytecode
            pc += 0
        elif isinstance(item, list):
            # add source map for all items in the runtime map
            t = adjust_pc_maps(runtime_map, pc)
            for key in line_number_map:
                line_number_map[key].update(t[key])
            pc += len(runtime_code)

        else:
            pc += 1

    pc += len(bytecode_suffix)

    symbol_map["_sym_code_end"] = pc
    symbol_map["_mem_deploy_start"] = runtime_code_start
    symbol_map["_mem_deploy_end"] = runtime_code_end
    if runtime_code is not None:
        symbol_map["_sym_subcode_size"] = len(runtime_code)

    # (NOTE CMC 2022-06-17 this way of generating bytecode did not
    # seem to be a perf hotspot. if it is, may want to use bytearray()
    # instead).

    # TODO refactor into two functions, create posmap and assemble

    o = b""

    # now that all symbols have been resolved, generate bytecode
    # using the symbol map
    to_skip = 0
    for i, item in enumerate(assembly):
        if to_skip > 0:
            to_skip -= 1
            continue

        if item in ("DEBUG", "BLANK"):
            continue  # skippable opcodes

        elif isinstance(item, str) and item.startswith("_DEPLOY_MEM_OFST_"):
            continue

        elif is_symbol(item):
            if assembly[i + 1] != "JUMPDEST" and assembly[i + 1] != "BLANK":
                bytecode, _ = assembly_to_evm(
                    PUSH_N(symbol_map[item], n=CODE_OFST_SIZE))
                o += bytecode

        elif is_mem_sym(item):
            bytecode, _ = assembly_to_evm(
                PUSH_N(symbol_map[item], n=mem_ofst_size))
            o += bytecode

        elif is_ofst(item):
            # _OFST _sym_foo 32
            ofst = symbol_map[assembly[i + 1]] + assembly[i + 2]
            n = mem_ofst_size if is_mem_sym(assembly[i +
                                                     1]) else CODE_OFST_SIZE
            bytecode, _ = assembly_to_evm(PUSH_N(ofst, n))
            o += bytecode
            to_skip = 2

        elif isinstance(item, int):
            o += bytes([item])
        elif isinstance(item, str) and item.upper() in get_opcodes():
            o += bytes([get_opcodes()[item.upper()][0]])
        elif item[:4] == "PUSH":
            o += bytes([PUSH_OFFSET + int(item[4:])])
        elif item[:3] == "DUP":
            o += bytes([DUP_OFFSET + int(item[3:])])
        elif item[:4] == "SWAP":
            o += bytes([SWAP_OFFSET + int(item[4:])])
        elif isinstance(item, list):
            o += runtime_code
        else:
            # Should never reach because, assembly is create in _compile_to_assembly.
            raise Exception("Weird symbol in assembly: " +
                            str(item))  # pragma: no cover

    o += bytecode_suffix

    line_number_map["breakpoints"] = list(line_number_map["breakpoints"])
    line_number_map["pc_breakpoints"] = list(line_number_map["pc_breakpoints"])
    return o, line_number_map
예제 #5
0
def assembly_to_evm(assembly, start_pos=0):
    line_number_map = {
        "breakpoints": set(),
        "pc_breakpoints": set(),
        "pc_jump_map": {0: "-"},
        "pc_pos_map": {},
    }

    posmap = {}
    runtime_code, runtime_code_start, runtime_code_end = None, None, None
    pos = start_pos

    # go through the code, resolving symbolic locations
    # (i.e. JUMPDEST locations) to actual code locations
    for i, item in enumerate(assembly):
        note_line_num(line_number_map, item, pos)
        if item == "DEBUG":
            continue  # skip debug

        if item == "JUMP":
            last = assembly[i - 1]
            if is_symbol(last) and last.startswith("_sym_internal"):
                if last.endswith("cleanup"):
                    # exit an internal function
                    line_number_map["pc_jump_map"][pos] = "o"
                else:
                    # enter an internal function
                    line_number_map["pc_jump_map"][pos] = "i"
            else:
                # everything else
                line_number_map["pc_jump_map"][pos] = "-"
        elif item in ("JUMPI", "JUMPDEST"):
            line_number_map["pc_jump_map"][pos] = "-"

        if is_symbol(item):
            if assembly[i + 1] == "JUMPDEST" or assembly[i + 1] == "BLANK":
                # Don't increment position as the symbol itself doesn't go into code
                if item in posmap:
                    raise CompilerPanic(f"duplicate jumpdest {item}")

                posmap[item] = pos - start_pos
            else:
                pos += 3  # PUSH2 highbits lowbits
        elif is_mem_sym(item):
            pos += 5  # PUSH4 item
        elif is_ofst(item):
            assert is_symbol(assembly[i + 1]) or is_mem_sym(assembly[i + 1])
            assert isinstance(assembly[i + 2], int)
            # [_OFST, _sym_foo, bar] -> PUSH2 (foo+bar)
            # [_OFST, _mem_foo, bar] -> PUSH4 (foo+bar)
            pos -= 1
        elif item == "BLANK":
            pos += 0
        elif isinstance(item, str) and item.startswith("_DEPLOY_MEM_OFST_"):
            # _DEPLOY_MEM_OFST is assembly magic which will
            # get removed during final assembly-to-bytecode
            pos += 0
        elif isinstance(item, list):
            assert runtime_code is None, "Multiple subcodes"
            runtime_code, sub_map = assembly_to_evm(item, start_pos=pos)
            assert item[0].startswith("_DEPLOY_MEM_OFST_")
            ctor_mem_size = int(item[0][len("_DEPLOY_MEM_OFST_") :])

            runtime_code_start, runtime_code_end = _runtime_code_offsets(
                ctor_mem_size, len(runtime_code)
            )
            assert runtime_code_end - runtime_code_start == len(runtime_code)
            pos += len(runtime_code)
            for key in line_number_map:
                line_number_map[key].update(sub_map[key])
        else:
            pos += 1

    code_end = pos - start_pos
    posmap["_sym_code_end"] = code_end
    posmap["_mem_deploy_start"] = runtime_code_start
    posmap["_mem_deploy_end"] = runtime_code_end
    if runtime_code is not None:
        posmap["_sym_subcode_size"] = len(runtime_code)

    o = b""

    to_skip = 0
    for i, item in enumerate(assembly):
        if to_skip > 0:
            to_skip -= 1
            continue

        if item in ("DEBUG", "BLANK"):
            continue  # skippable opcodes

        elif isinstance(item, str) and item.startswith("_DEPLOY_MEM_OFST_"):
            continue

        elif is_symbol(item):
            if assembly[i + 1] != "JUMPDEST" and assembly[i + 1] != "BLANK":
                bytecode, _ = assembly_to_evm(PUSH_N(posmap[item], n=2))
                o += bytecode

        elif is_mem_sym(item):
            bytecode, _ = assembly_to_evm(PUSH_N(posmap[item], n=4))
            o += bytecode

        elif is_ofst(item):
            # _OFST _sym_foo 32
            ofst = posmap[assembly[i + 1]] + assembly[i + 2]
            n = 4 if is_mem_sym(assembly[i + 1]) else 2
            bytecode, _ = assembly_to_evm(PUSH_N(ofst, n))
            o += bytecode
            to_skip = 2

        elif isinstance(item, int):
            o += bytes([item])
        elif isinstance(item, str) and item.upper() in get_opcodes():
            o += bytes([get_opcodes()[item.upper()][0]])
        elif item[:4] == "PUSH":
            o += bytes([PUSH_OFFSET + int(item[4:])])
        elif item[:3] == "DUP":
            o += bytes([DUP_OFFSET + int(item[3:])])
        elif item[:4] == "SWAP":
            o += bytes([SWAP_OFFSET + int(item[4:])])
        elif isinstance(item, list):
            o += runtime_code
        else:
            # Should never reach because, assembly is create in _compile_to_assembly.
            raise Exception("Weird symbol in assembly: " + str(item))  # pragma: no cover

    assert len(o) == pos - start_pos, (len(o), pos, start_pos)
    line_number_map["breakpoints"] = list(line_number_map["breakpoints"])
    line_number_map["pc_breakpoints"] = list(line_number_map["pc_breakpoints"])
    return o, line_number_map
예제 #6
0
def assembly_to_evm(assembly, start_pos=0):
    line_number_map = {
        "breakpoints": set(),
        "pc_breakpoints": set(),
        "pc_jump_map": {
            0: "-"
        },
        "pc_pos_map": {},
    }

    posmap = {}
    sub_assemblies = []
    codes = []
    pos = start_pos

    # go through the code, resolving symbolic locations
    # (i.e. JUMPDEST locations) to actual code locations
    for i, item in enumerate(assembly):
        note_line_num(line_number_map, item, pos)
        if item == "DEBUG":
            continue  # skip debug

        if item == "JUMP":
            last = assembly[i - 1]
            if is_symbol(last) and last.startswith("_sym_internal"):
                if last.endswith("cleanup"):
                    # exit an internal function
                    line_number_map["pc_jump_map"][pos] = "o"
                else:
                    # enter an internal function
                    line_number_map["pc_jump_map"][pos] = "i"
            else:
                # everything else
                line_number_map["pc_jump_map"][pos] = "-"
        elif item in ("JUMPI", "JUMPDEST"):
            line_number_map["pc_jump_map"][pos] = "-"

        if is_symbol(item):
            if assembly[i + 1] == "JUMPDEST" or assembly[i + 1] == "BLANK":
                # Don't increment position as the symbol itself doesn't go into code
                if item in posmap:
                    raise CompilerPanic(f"duplicate jumpdest {item}")

                posmap[item] = pos - start_pos
            else:
                pos += 3  # PUSH2 highbits lowbits
        elif item == "BLANK":
            pos += 0
        elif isinstance(item, list):
            c, sub_map = assembly_to_evm(item, start_pos=pos)
            sub_assemblies.append(item)
            codes.append(c)
            pos += len(c)
            for key in line_number_map:
                line_number_map[key].update(sub_map[key])
        else:
            pos += 1

    posmap["_sym_codeend"] = pos
    o = b""
    for i, item in enumerate(assembly):
        if item == "DEBUG":
            continue  # skip debug
        elif is_symbol(item):
            if assembly[i + 1] != "JUMPDEST" and assembly[i + 1] != "BLANK":
                o += bytes(
                    [PUSH_OFFSET + 2, posmap[item] // 256, posmap[item] % 256])
        elif isinstance(item, int):
            o += bytes([item])
        elif isinstance(item, str) and item.upper() in get_opcodes():
            o += bytes([get_opcodes()[item.upper()][0]])
        elif item[:4] == "PUSH":
            o += bytes([PUSH_OFFSET + int(item[4:])])
        elif item[:3] == "DUP":
            o += bytes([DUP_OFFSET + int(item[3:])])
        elif item[:4] == "SWAP":
            o += bytes([SWAP_OFFSET + int(item[4:])])
        elif item == "BLANK":
            pass
        elif isinstance(item, list):
            for j in range(len(sub_assemblies)):
                if sub_assemblies[j] == item:
                    o += codes[j]
                    break
        else:
            # Should never reach because, assembly is create in _compile_to_assembly.
            raise Exception("Weird symbol in assembly: " +
                            str(item))  # pragma: no cover

    assert len(o) == pos - start_pos
    line_number_map["breakpoints"] = list(line_number_map["breakpoints"])
    line_number_map["pc_breakpoints"] = list(line_number_map["pc_breakpoints"])
    return o, line_number_map
예제 #7
0
def _compile_to_assembly(code,
                         withargs=None,
                         existing_labels=None,
                         break_dest=None,
                         height=0):
    if withargs is None:
        withargs = {}
    if not isinstance(withargs, dict):
        raise CompilerPanic(f"Incorrect type for withargs: {type(withargs)}")

    if existing_labels is None:
        existing_labels = set()
    if not isinstance(existing_labels, set):
        raise CompilerPanic(
            f"Incorrect type for existing_labels: {type(existing_labels)}")

    # Opcodes
    if isinstance(code.value, str) and code.value.upper() in get_opcodes():
        o = []
        for i, c in enumerate(code.args[::-1]):
            o.extend(
                _compile_to_assembly(c, withargs, existing_labels, break_dest,
                                     height + i))
        o.append(code.value.upper())
        return o
    # Numbers
    elif isinstance(code.value, int):
        if code.value < -(2**255):
            raise Exception(f"Value too low: {code.value}")
        elif code.value >= 2**256:
            raise Exception(f"Value too high: {code.value}")
        bytez = num_to_bytearray(code.value % 2**256) or [0]
        return ["PUSH" + str(len(bytez))] + bytez
    # Variables connected to with statements
    elif isinstance(code.value, str) and code.value in withargs:
        if height - withargs[code.value] > 16:
            raise Exception("With statement too deep")
        return ["DUP" + str(height - withargs[code.value])]
    # Setting variables connected to with statements
    elif code.value == "set":
        if len(code.args) != 2 or code.args[0].value not in withargs:
            raise Exception(
                "Set expects two arguments, the first being a stack variable")
        if height - withargs[code.args[0].value] > 16:
            raise Exception("With statement too deep")
        return _compile_to_assembly(
            code.args[1], withargs, existing_labels, break_dest, height) + [
                "SWAP" + str(height - withargs[code.args[0].value]),
                "POP",
            ]
    # Pass statements
    elif code.value in ("pass", "dummy"):
        return []
    # Code length
    elif code.value == "~codelen":
        return ["_sym_codeend"]
    # Calldataload equivalent for code
    elif code.value == "codeload":
        return _compile_to_assembly(
            LLLnode.from_list([
                "seq",
                ["codecopy", MemoryPositions.FREE_VAR_SPACE, code.args[0], 32],
                ["mload", MemoryPositions.FREE_VAR_SPACE],
            ]),
            withargs,
            existing_labels,
            break_dest,
            height,
        )
    # If statements (2 arguments, ie. if x: y)
    elif code.value == "if" and len(code.args) == 2:
        o = []
        o.extend(
            _compile_to_assembly(code.args[0], withargs, existing_labels,
                                 break_dest, height))
        end_symbol = mksymbol("join")
        o.extend(["ISZERO", end_symbol, "JUMPI"])
        o.extend(
            _compile_to_assembly(code.args[1], withargs, existing_labels,
                                 break_dest, height))
        o.extend([end_symbol, "JUMPDEST"])
        return o
    # If statements (3 arguments, ie. if x: y, else: z)
    elif code.value == "if" and len(code.args) == 3:
        o = []
        o.extend(
            _compile_to_assembly(code.args[0], withargs, existing_labels,
                                 break_dest, height))
        mid_symbol = mksymbol("else")
        end_symbol = mksymbol("join")
        o.extend(["ISZERO", mid_symbol, "JUMPI"])
        o.extend(
            _compile_to_assembly(code.args[1], withargs, existing_labels,
                                 break_dest, height))
        o.extend([end_symbol, "JUMP", mid_symbol, "JUMPDEST"])
        o.extend(
            _compile_to_assembly(code.args[2], withargs, existing_labels,
                                 break_dest, height))
        o.extend([end_symbol, "JUMPDEST"])
        return o

    # repeat(counter_location, start, rounds, rounds_bound, body)
    # basically a do-while loop:
    #
    # assert(rounds <= rounds_bound)
    # if (rounds > 0) {
    #   do {
    #     body;
    #   } while (++i != start + rounds)
    # }
    elif code.value == "repeat":
        o = []
        if len(code.args) != 5:
            raise CompilerPanic("bad number of repeat args")  # pragma: notest

        i_name = code.args[0]
        start = code.args[1]
        rounds = code.args[2]
        rounds_bound = code.args[3]
        body = code.args[4]

        entry_dest, continue_dest, exit_dest = (
            mksymbol("loop_start"),
            mksymbol("loop_continue"),
            mksymbol("loop_exit"),
        )

        # stack: []
        o.extend(
            _compile_to_assembly(
                start,
                withargs,
                existing_labels,
                break_dest,
                height,
            ))

        o.extend(
            _compile_to_assembly(rounds, withargs, existing_labels, break_dest,
                                 height + 1))

        # stack: i

        # assert rounds <= round_bound
        if rounds != rounds_bound:
            # stack: i, rounds
            o.extend(
                _compile_to_assembly(rounds_bound, withargs, existing_labels,
                                     break_dest, height + 2))
            # stack: i, rounds, rounds_bound
            # assert rounds <= rounds_bound
            # TODO this runtime assertion should never fail for
            # internally generated repeats.
            # maybe drop it or jump to 0xFE
            o.extend(["DUP2", "GT", "_sym_revert0", "JUMPI"])

            # stack: i, rounds
            # if (0 == rounds) { goto end_dest; }
            o.extend(["DUP1", "ISZERO", exit_dest, "JUMPI"])

        # stack: start, rounds
        if start.value != 0:
            o.extend(["DUP2", "ADD"])

        # stack: i, exit_i
        o.extend(["SWAP1"])

        if i_name.value in withargs:
            raise CompilerPanic(f"shadowed loop variable {i_name}")
        withargs[i_name.value] = height + 1

        # stack: exit_i, i
        o.extend([entry_dest, "JUMPDEST"])
        o.extend(
            _compile_to_assembly(
                body,
                withargs,
                existing_labels,
                (exit_dest, continue_dest, height + 2),
                height + 2,
            ))

        del withargs[i_name.value]

        # clean up any stack items left by body
        o.extend(["POP"] * body.valency)

        # stack: exit_i, i
        # increment i:
        o.extend([continue_dest, "JUMPDEST", "PUSH1", 1, "ADD"])

        # stack: exit_i, i+1 (new_i)
        # if (exit_i != new_i) { goto entry_dest }
        o.extend(["DUP2", "DUP2", "XOR", entry_dest, "JUMPI"])
        o.extend([exit_dest, "JUMPDEST", "POP", "POP"])

        return o

    # Continue to the next iteration of the for loop
    elif code.value == "continue":
        if not break_dest:
            raise CompilerPanic("Invalid break")
        dest, continue_dest, break_height = break_dest
        return [continue_dest, "JUMP"]
    # Break from inside a for loop
    elif code.value == "break":
        if not break_dest:
            raise CompilerPanic("Invalid break")
        dest, continue_dest, break_height = break_dest

        n_local_vars = height - break_height
        # clean up any stack items declared in the loop body
        cleanup_local_vars = ["POP"] * n_local_vars
        return cleanup_local_vars + [dest, "JUMP"]
    # Break from inside one or more for loops prior to a return statement inside the loop
    elif code.value == "cleanup_repeat":
        if not break_dest:
            raise CompilerPanic("Invalid break")
        _, _, break_height = break_dest
        # clean up local vars and internal loop vars
        return ["POP"] * break_height
    # With statements
    elif code.value == "with":
        o = []
        o.extend(
            _compile_to_assembly(code.args[1], withargs, existing_labels,
                                 break_dest, height))
        old = withargs.get(code.args[0].value, None)
        withargs[code.args[0].value] = height
        o.extend(
            _compile_to_assembly(
                code.args[2],
                withargs,
                existing_labels,
                break_dest,
                height + 1,
            ))
        if code.args[2].valency:
            o.extend(["SWAP1", "POP"])
        else:
            o.extend(["POP"])
        if old is not None:
            withargs[code.args[0].value] = old
        else:
            del withargs[code.args[0].value]
        return o
    # LLL statement (used to contain code inside code)
    elif code.value == "lll":
        o = []
        begincode = mksymbol("lll_begin")
        endcode = mksymbol("lll_end")
        o.extend([endcode, "JUMP", begincode, "BLANK"])

        lll = _compile_to_assembly(code.args[1], {}, existing_labels, None, 0)

        # `append(...)` call here is intentional.
        # each sublist is essentially its own program with its
        # own symbols.
        # in the later step when the "lll" block compiled to EVM,
        # compile_to_evm has logic to resolve symbols in "lll" to
        # position from start of runtime-code (instead of position
        # from start of bytecode).
        o.append(lll)

        o.extend([endcode, "JUMPDEST", begincode, endcode, "SUB", begincode])
        o.extend(
            _compile_to_assembly(code.args[0], withargs, existing_labels,
                                 break_dest, height))

        # COPY the code to memory for deploy
        o.extend(["CODECOPY", begincode, endcode, "SUB"])
        return o
    # Seq (used to piece together multiple statements)
    elif code.value == "seq":
        o = []
        for arg in code.args:
            o.extend(
                _compile_to_assembly(arg, withargs, existing_labels,
                                     break_dest, height))
            if arg.valency == 1 and arg != code.args[-1]:
                o.append("POP")
        return o
    # Seq without popping.
    # Assure (if false, invalid opcode)
    elif code.value == "assert_unreachable":
        o = _compile_to_assembly(code.args[0], withargs, existing_labels,
                                 break_dest, height)
        end_symbol = mksymbol("reachable")
        o.extend([end_symbol, "JUMPI", "INVALID", end_symbol, "JUMPDEST"])
        return o
    # Assert (if false, exit)
    elif code.value == "assert":
        o = _compile_to_assembly(code.args[0], withargs, existing_labels,
                                 break_dest, height)
        o.extend(["ISZERO"])
        o.extend(_assert_false())
        return o
    # Unsigned/signed clamp, check less-than
    elif code.value in CLAMP_OP_NAMES:
        if isinstance(code.args[0].value, int) and isinstance(
                code.args[1].value, int):
            # Checks for clamp errors at compile time as opposed to run time
            # TODO move these to optimizer.py
            args_0_val = code.args[0].value
            args_1_val = code.args[1].value
            is_free_of_clamp_errors = any((
                code.value in ("uclamplt", "clamplt")
                and 0 <= args_0_val < args_1_val,
                code.value in ("uclample", "clample")
                and 0 <= args_0_val <= args_1_val,
                code.value in ("uclampgt", "clampgt")
                and 0 <= args_0_val > args_1_val,
                code.value in ("uclampge", "clampge")
                and 0 <= args_0_val >= args_1_val,
            ))
            if is_free_of_clamp_errors:
                return _compile_to_assembly(
                    code.args[0],
                    withargs,
                    existing_labels,
                    break_dest,
                    height,
                )
            else:
                raise Exception(
                    f"Invalid {code.value} with values {code.args[0]} and {code.args[1]}"
                )
        o = _compile_to_assembly(code.args[0], withargs, existing_labels,
                                 break_dest, height)
        o.extend(
            _compile_to_assembly(
                code.args[1],
                withargs,
                existing_labels,
                break_dest,
                height + 1,
            ))
        o.extend(["DUP2"])
        # Stack: num num bound
        if code.value == "uclamplt":
            o.extend(["LT", "ISZERO"])
        elif code.value == "clamplt":
            o.extend(["SLT", "ISZERO"])
        elif code.value == "uclample":
            o.extend(["GT"])
        elif code.value == "clample":
            o.extend(["SGT"])
        elif code.value == "uclampgt":
            o.extend(["GT", "ISZERO"])
        elif code.value == "clampgt":
            o.extend(["SGT", "ISZERO"])
        elif code.value == "uclampge":
            o.extend(["LT"])
        elif code.value == "clampge":
            o.extend(["SLT"])
        o.extend(_assert_false())
        return o
    # Signed clamp, check against upper and lower bounds
    elif code.value in ("clamp", "uclamp"):
        comp1 = "SGT" if code.value == "clamp" else "GT"
        comp2 = "SLT" if code.value == "clamp" else "LT"
        o = _compile_to_assembly(code.args[0], withargs, existing_labels,
                                 break_dest, height)
        o.extend(
            _compile_to_assembly(
                code.args[1],
                withargs,
                existing_labels,
                break_dest,
                height + 1,
            ))
        o.extend(["DUP1"])
        o.extend(
            _compile_to_assembly(
                code.args[2],
                withargs,
                existing_labels,
                break_dest,
                height + 3,
            ))
        o.extend(["SWAP1", comp1])
        o.extend(_assert_false())
        o.extend(["DUP1", "SWAP2", "SWAP1", comp2])
        o.extend(_assert_false())
        return o
    # Checks that a value is nonzero
    elif code.value == "clamp_nonzero":
        o = _compile_to_assembly(code.args[0], withargs, existing_labels,
                                 break_dest, height)
        o.extend(["DUP1", "ISZERO"])
        o.extend(_assert_false())
        return o
    # SHA3 a single value
    elif code.value == "sha3_32":
        o = _compile_to_assembly(code.args[0], withargs, existing_labels,
                                 break_dest, height)
        o.extend([
            "PUSH1",
            MemoryPositions.FREE_VAR_SPACE,
            "MSTORE",
            "PUSH1",
            32,
            "PUSH1",
            MemoryPositions.FREE_VAR_SPACE,
            "SHA3",
        ])
        return o
    # SHA3 a 64 byte value
    elif code.value == "sha3_64":
        o = _compile_to_assembly(code.args[0], withargs, existing_labels,
                                 break_dest, height)
        o.extend(
            _compile_to_assembly(code.args[1], withargs, existing_labels,
                                 break_dest, height))
        o.extend([
            "PUSH1",
            MemoryPositions.FREE_VAR_SPACE2,
            "MSTORE",
            "PUSH1",
            MemoryPositions.FREE_VAR_SPACE,
            "MSTORE",
            "PUSH1",
            64,
            "PUSH1",
            MemoryPositions.FREE_VAR_SPACE,
            "SHA3",
        ])
        return o
    # <= operator
    elif code.value == "le":
        return _compile_to_assembly(
            LLLnode.from_list(["iszero", ["gt", code.args[0], code.args[1]]]),
            withargs,
            existing_labels,
            break_dest,
            height,
        )
    # >= operator
    elif code.value == "ge":
        return _compile_to_assembly(
            LLLnode.from_list(["iszero", ["lt", code.args[0], code.args[1]]]),
            withargs,
            existing_labels,
            break_dest,
            height,
        )
    # <= operator
    elif code.value == "sle":
        return _compile_to_assembly(
            LLLnode.from_list(["iszero", ["sgt", code.args[0], code.args[1]]]),
            withargs,
            existing_labels,
            break_dest,
            height,
        )
    # >= operator
    elif code.value == "sge":
        return _compile_to_assembly(
            LLLnode.from_list(["iszero", ["slt", code.args[0], code.args[1]]]),
            withargs,
            existing_labels,
            break_dest,
            height,
        )
    # != operator
    elif code.value == "ne":
        return _compile_to_assembly(
            LLLnode.from_list(["iszero", ["eq", code.args[0], code.args[1]]]),
            withargs,
            existing_labels,
            break_dest,
            height,
        )
    # e.g. 95 -> 96, 96 -> 96, 97 -> 128
    elif code.value == "ceil32":
        return _compile_to_assembly(
            LLLnode.from_list([
                "with",
                "_val",
                code.args[0],
                # in mod32 arithmetic, the solution to x + y == 32 is
                # y = bitwise_not(x) & 31
                ["add", "_val", ["and", ["not", ["sub", "_val", 1]], 31]],
            ]),
            withargs,
            existing_labels,
            break_dest,
            height,
        )
    # # jump to a symbol, and push variable arguments onto stack
    elif code.value == "goto":
        o = []
        for i, c in enumerate(reversed(code.args[1:])):
            o.extend(
                _compile_to_assembly(c, withargs, existing_labels, break_dest,
                                     height + i))
        o.extend(["_sym_" + str(code.args[0]), "JUMP"])
        return o
    elif isinstance(code.value, str) and is_symbol(code.value):
        return [code.value]
    # set a symbol as a location.
    elif code.value == "label":
        label_name = str(code.args[0])

        if label_name in existing_labels:
            raise Exception(f"Label with name {label_name} already exists!")
        else:
            existing_labels.add(label_name)

        return ["_sym_" + label_name, "JUMPDEST"]
    # inject debug opcode.
    elif code.value == "debugger":
        return mkdebug(pc_debugger=False, pos=code.pos)
    # inject debug opcode.
    elif code.value == "pc_debugger":
        return mkdebug(pc_debugger=True, pos=code.pos)
    else:
        raise Exception("Weird code element: " + repr(code))
예제 #8
0
def _compile_to_assembly(code,
                         withargs=None,
                         existing_labels=None,
                         break_dest=None,
                         height=0):
    if withargs is None:
        withargs = {}
    if not isinstance(withargs, dict):
        raise CompilerPanic(f"Incorrect type for withargs: {type(withargs)}")

    if existing_labels is None:
        existing_labels = set()
    if not isinstance(existing_labels, set):
        raise CompilerPanic(
            f"Incorrect type for existing_labels: {type(existing_labels)}")

    # Opcodes
    if isinstance(code.value, str) and code.value.upper() in get_opcodes():
        o = []
        for i, c in enumerate(code.args[::-1]):
            o.extend(
                _compile_to_assembly(c, withargs, existing_labels, break_dest,
                                     height + i))
        o.append(code.value.upper())
        return o
    # Numbers
    elif isinstance(code.value, int):
        if code.value < -(2**255):
            raise Exception(f"Value too low: {code.value}")
        elif code.value >= 2**256:
            raise Exception(f"Value too high: {code.value}")
        bytez = num_to_bytearray(code.value % 2**256) or [0]
        return ["PUSH" + str(len(bytez))] + bytez
    # Variables connected to with statements
    elif isinstance(code.value, str) and code.value in withargs:
        if height - withargs[code.value] > 16:
            raise Exception("With statement too deep")
        return ["DUP" + str(height - withargs[code.value])]
    # Setting variables connected to with statements
    elif code.value == "set":
        if len(code.args) != 2 or code.args[0].value not in withargs:
            raise Exception(
                "Set expects two arguments, the first being a stack variable")
        if height - withargs[code.args[0].value] > 16:
            raise Exception("With statement too deep")
        return _compile_to_assembly(
            code.args[1], withargs, existing_labels, break_dest, height) + [
                "SWAP" + str(height - withargs[code.args[0].value]),
                "POP",
            ]
    # Pass statements
    elif code.value == "pass":
        return []
    # Code length
    elif code.value == "~codelen":
        return ["_sym_codeend"]
    # Calldataload equivalent for code
    elif code.value == "codeload":
        return _compile_to_assembly(
            LLLnode.from_list([
                "seq",
                ["codecopy", MemoryPositions.FREE_VAR_SPACE, code.args[0], 32],
                ["mload", MemoryPositions.FREE_VAR_SPACE],
            ]),
            withargs,
            existing_labels,
            break_dest,
            height,
        )
    # If statements (2 arguments, ie. if x: y)
    elif code.value in ("if", "if_unchecked") and len(code.args) == 2:
        o = []
        o.extend(
            _compile_to_assembly(code.args[0], withargs, existing_labels,
                                 break_dest, height))
        end_symbol = mksymbol()
        o.extend(["ISZERO", end_symbol, "JUMPI"])
        o.extend(
            _compile_to_assembly(code.args[1], withargs, existing_labels,
                                 break_dest, height))
        o.extend([end_symbol, "JUMPDEST"])
        return o
    # If statements (3 arguments, ie. if x: y, else: z)
    elif code.value == "if" and len(code.args) == 3:
        o = []
        o.extend(
            _compile_to_assembly(code.args[0], withargs, existing_labels,
                                 break_dest, height))
        mid_symbol = mksymbol()
        end_symbol = mksymbol()
        o.extend(["ISZERO", mid_symbol, "JUMPI"])
        o.extend(
            _compile_to_assembly(code.args[1], withargs, existing_labels,
                                 break_dest, height))
        o.extend([end_symbol, "JUMP", mid_symbol, "JUMPDEST"])
        o.extend(
            _compile_to_assembly(code.args[2], withargs, existing_labels,
                                 break_dest, height))
        o.extend([end_symbol, "JUMPDEST"])
        return o
    # Repeat statements (compiled from for loops)
    # Repeat(memloc, start, rounds, body)
    elif code.value == "repeat":
        o = []
        loops = num_to_bytearray(code.args[2].value)
        start, continue_dest, end = mksymbol(), mksymbol(), mksymbol()
        o.extend(
            _compile_to_assembly(code.args[0], withargs, existing_labels,
                                 break_dest, height))
        o.extend(
            _compile_to_assembly(
                code.args[1],
                withargs,
                existing_labels,
                break_dest,
                height + 1,
            ))
        o.extend(["PUSH" + str(len(loops))] + loops)
        # stack: memloc, startvalue, rounds
        o.extend(["DUP2", "DUP4", "MSTORE", "ADD", start, "JUMPDEST"])
        # stack: memloc, exit_index
        o.extend(
            _compile_to_assembly(
                code.args[3],
                withargs,
                existing_labels,
                (end, continue_dest, height + 2),
                height + 2,
            ))
        # stack: memloc, exit_index
        o.extend([
            continue_dest,
            "JUMPDEST",
            "DUP2",
            "MLOAD",
            "PUSH1",
            1,
            "ADD",
            "DUP1",
            "DUP4",
            "MSTORE",
        ])
        # stack: len(loops), index memory address, new index
        o.extend([
            "DUP2", "EQ", "ISZERO", start, "JUMPI", end, "JUMPDEST", "POP",
            "POP"
        ])
        return o
    # Continue to the next iteration of the for loop
    elif code.value == "continue":
        if not break_dest:
            raise CompilerPanic("Invalid break")
        dest, continue_dest, break_height = break_dest
        return [continue_dest, "JUMP"]
    # Break from inside a for loop
    elif code.value == "break":
        if not break_dest:
            raise CompilerPanic("Invalid break")
        dest, continue_dest, break_height = break_dest
        return ["POP"] * (height - break_height) + [dest, "JUMP"]
    # Break from inside one or more for loops prior to a return statement inside the loop
    elif code.value == "exit_repeater":
        if not break_dest:
            raise CompilerPanic("Invalid break")
        _, _, break_height = break_dest
        return ["POP"] * break_height
    # With statements
    elif code.value == "with":
        o = []
        o.extend(
            _compile_to_assembly(code.args[1], withargs, existing_labels,
                                 break_dest, height))
        old = withargs.get(code.args[0].value, None)
        withargs[code.args[0].value] = height
        o.extend(
            _compile_to_assembly(
                code.args[2],
                withargs,
                existing_labels,
                break_dest,
                height + 1,
            ))
        if code.args[2].valency:
            o.extend(["SWAP1", "POP"])
        else:
            o.extend(["POP"])
        if old is not None:
            withargs[code.args[0].value] = old
        else:
            del withargs[code.args[0].value]
        return o
    # LLL statement (used to contain code inside code)
    elif code.value == "lll":
        o = []
        begincode = mksymbol()
        endcode = mksymbol()
        o.extend([endcode, "JUMP", begincode, "BLANK"])

        lll = _compile_to_assembly(code.args[0], {}, existing_labels, None, 0)

        # `append(...)` call here is intentional.
        # each sublist is essentially its own program with its
        # own symbols.
        # in the later step when the "lll" block compiled to EVM,
        # compile_to_evm has logic to resolve symbols in "lll" to
        # position from start of runtime-code (instead of position
        # from start of bytecode).
        o.append(lll)

        o.extend([endcode, "JUMPDEST", begincode, endcode, "SUB", begincode])
        o.extend(
            _compile_to_assembly(code.args[1], withargs, existing_labels,
                                 break_dest, height))

        # COPY the code to memory for deploy
        o.extend(["CODECOPY", begincode, endcode, "SUB"])
        return o
    # Seq (used to piece together multiple statements)
    elif code.value == "seq":
        o = []
        for arg in code.args:
            o.extend(
                _compile_to_assembly(arg, withargs, existing_labels,
                                     break_dest, height))
            if arg.valency == 1 and arg != code.args[-1]:
                o.append("POP")
        return o
    # Seq without popping.
    elif code.value == "seq_unchecked":
        o = []
        for arg in code.args:
            o.extend(
                _compile_to_assembly(arg, withargs, existing_labels,
                                     break_dest, height))
            # if arg.valency == 1 and arg != code.args[-1]:
            #     o.append('POP')
        return o
    # Assure (if false, invalid opcode)
    elif code.value == "assert_unreachable":
        o = _compile_to_assembly(code.args[0], withargs, existing_labels,
                                 break_dest, height)
        end_symbol = mksymbol()
        o.extend([end_symbol, "JUMPI", "INVALID", end_symbol, "JUMPDEST"])
        return o
    # Assert (if false, exit)
    elif code.value == "assert":
        o = _compile_to_assembly(code.args[0], withargs, existing_labels,
                                 break_dest, height)
        o.extend(["ISZERO"])
        o.extend(_assert_false())
        return o
    # Unsigned/signed clamp, check less-than
    elif code.value in CLAMP_OP_NAMES:
        if isinstance(code.args[0].value, int) and isinstance(
                code.args[1].value, int):
            # Checks for clamp errors at compile time as opposed to run time
            args_0_val = code.args[0].value
            args_1_val = code.args[1].value
            is_free_of_clamp_errors = any((
                code.value in ("uclamplt", "clamplt")
                and 0 <= args_0_val < args_1_val,
                code.value in ("uclample", "clample")
                and 0 <= args_0_val <= args_1_val,
                code.value in ("uclampgt", "clampgt")
                and 0 <= args_0_val > args_1_val,
                code.value in ("uclampge", "clampge")
                and 0 <= args_0_val >= args_1_val,
            ))
            if is_free_of_clamp_errors:
                return _compile_to_assembly(
                    code.args[0],
                    withargs,
                    existing_labels,
                    break_dest,
                    height,
                )
            else:
                raise Exception(
                    f"Invalid {code.value} with values {code.args[0]} and {code.args[1]}"
                )
        o = _compile_to_assembly(code.args[0], withargs, existing_labels,
                                 break_dest, height)
        o.extend(
            _compile_to_assembly(
                code.args[1],
                withargs,
                existing_labels,
                break_dest,
                height + 1,
            ))
        o.extend(["DUP2"])
        # Stack: num num bound
        if code.value == "uclamplt":
            o.extend(["LT", "ISZERO"])
        elif code.value == "clamplt":
            o.extend(["SLT", "ISZERO"])
        elif code.value == "uclample":
            o.extend(["GT"])
        elif code.value == "clample":
            o.extend(["SGT"])
        elif code.value == "uclampgt":
            o.extend(["GT", "ISZERO"])
        elif code.value == "clampgt":
            o.extend(["SGT", "ISZERO"])
        elif code.value == "uclampge":
            o.extend(["LT"])
        elif code.value == "clampge":
            o.extend(["SLT"])
        o.extend(_assert_false())
        return o
    # Signed clamp, check against upper and lower bounds
    elif code.value in ("clamp", "uclamp"):
        comp1 = "SGT" if code.value == "clamp" else "GT"
        comp2 = "SLT" if code.value == "clamp" else "LT"
        o = _compile_to_assembly(code.args[0], withargs, existing_labels,
                                 break_dest, height)
        o.extend(
            _compile_to_assembly(
                code.args[1],
                withargs,
                existing_labels,
                break_dest,
                height + 1,
            ))
        o.extend(["DUP1"])
        o.extend(
            _compile_to_assembly(
                code.args[2],
                withargs,
                existing_labels,
                break_dest,
                height + 3,
            ))
        o.extend(["SWAP1", comp1])
        o.extend(_assert_false())
        o.extend(["DUP1", "SWAP2", "SWAP1", comp2])
        o.extend(_assert_false())
        return o
    # Checks that a value is nonzero
    elif code.value == "clamp_nonzero":
        o = _compile_to_assembly(code.args[0], withargs, existing_labels,
                                 break_dest, height)
        o.extend(["DUP1", "ISZERO"])
        o.extend(_assert_false())
        return o
    # SHA3 a single value
    elif code.value == "sha3_32":
        o = _compile_to_assembly(code.args[0], withargs, existing_labels,
                                 break_dest, height)
        o.extend([
            "PUSH1",
            MemoryPositions.FREE_VAR_SPACE,
            "MSTORE",
            "PUSH1",
            32,
            "PUSH1",
            MemoryPositions.FREE_VAR_SPACE,
            "SHA3",
        ])
        return o
    # SHA3 a 64 byte value
    elif code.value == "sha3_64":
        o = _compile_to_assembly(code.args[0], withargs, existing_labels,
                                 break_dest, height)
        o.extend(
            _compile_to_assembly(code.args[1], withargs, existing_labels,
                                 break_dest, height))
        o.extend([
            "PUSH1",
            MemoryPositions.FREE_VAR_SPACE2,
            "MSTORE",
            "PUSH1",
            MemoryPositions.FREE_VAR_SPACE,
            "MSTORE",
            "PUSH1",
            64,
            "PUSH1",
            MemoryPositions.FREE_VAR_SPACE,
            "SHA3",
        ])
        return o
    # <= operator
    elif code.value == "le":
        return _compile_to_assembly(
            LLLnode.from_list(["iszero", ["gt", code.args[0], code.args[1]]]),
            withargs,
            existing_labels,
            break_dest,
            height,
        )
    # >= operator
    elif code.value == "ge":
        return _compile_to_assembly(
            LLLnode.from_list(["iszero", ["lt", code.args[0], code.args[1]]]),
            withargs,
            existing_labels,
            break_dest,
            height,
        )
    # <= operator
    elif code.value == "sle":
        return _compile_to_assembly(
            LLLnode.from_list(["iszero", ["sgt", code.args[0], code.args[1]]]),
            withargs,
            existing_labels,
            break_dest,
            height,
        )
    # >= operator
    elif code.value == "sge":
        return _compile_to_assembly(
            LLLnode.from_list(["iszero", ["slt", code.args[0], code.args[1]]]),
            withargs,
            existing_labels,
            break_dest,
            height,
        )
    # != operator
    elif code.value == "ne":
        return _compile_to_assembly(
            LLLnode.from_list(["iszero", ["eq", code.args[0], code.args[1]]]),
            withargs,
            existing_labels,
            break_dest,
            height,
        )
    # e.g. 95 -> 96, 96 -> 96, 97 -> 128
    elif code.value == "ceil32":
        return _compile_to_assembly(
            LLLnode.from_list([
                "with",
                "_val",
                code.args[0],
                ["sub", ["add", "_val", 31], ["mod", ["sub", "_val", 1], 32]],
            ]),
            withargs,
            existing_labels,
            break_dest,
            height,
        )
    # # jump to a symbol
    elif code.value == "goto":
        return ["_sym_" + str(code.args[0]), "JUMP"]
    elif isinstance(code.value, str) and code.value.startswith("_sym_"):
        return code.value
    # set a symbol as a location.
    elif code.value == "label":
        label_name = str(code.args[0])

        if label_name in existing_labels:
            raise Exception(f"Label with name {label_name} already exists!")
        else:
            existing_labels.add(label_name)

        return ["_sym_" + label_name, "JUMPDEST"]
    # inject debug opcode.
    elif code.value == "debugger":
        return mkdebug(pc_debugger=False, pos=code.pos)
    # inject debug opcode.
    elif code.value == "pc_debugger":
        return mkdebug(pc_debugger=True, pos=code.pos)
    else:
        raise Exception("Weird code element: " + repr(code))