def test_general_constants(self): """Test if general object could be linked as constants.""" class CustomObject: pass class UnHashableCustomObject: __hash__ = None obj1 = [1, 2, 3] obj2 = {1, 2, 3} obj3 = CustomObject() obj4 = UnHashableCustomObject() code = Bytecode( [ Instr("LOAD_CONST", obj1, lineno=1), Instr("LOAD_CONST", obj2, lineno=1), Instr("LOAD_CONST", obj3, lineno=1), Instr("LOAD_CONST", obj4, lineno=1), Instr("BUILD_TUPLE", 4, lineno=1), Instr("RETURN_VALUE", lineno=1), ] ) self.assertEqual(code.to_code().co_consts, (obj1, obj2, obj3, obj4)) def f(): return # pragma: no cover f.__code__ = code.to_code() self.assertEqual(f(), (obj1, obj2, obj3, obj4))
def test_general_constants(self): """Test if general object could be linked as constants. """ class CustomObject: pass class UnHashableCustomObject: __hash__ = None obj1 = [1, 2, 3] obj2 = {1, 2, 3} obj3 = CustomObject() obj4 = UnHashableCustomObject() code = Bytecode([Instr('LOAD_CONST', obj1, lineno=1), Instr('LOAD_CONST', obj2, lineno=1), Instr('LOAD_CONST', obj3, lineno=1), Instr('LOAD_CONST', obj4, lineno=1), Instr('BUILD_TUPLE', 4, lineno=1), Instr('RETURN_VALUE', lineno=1)]) self.assertEqual(code.to_code().co_consts, (obj1, obj2, obj3, obj4)) def f(): return # pragma: no cover f.__code__ = code.to_code() self.assertEqual(f(), (obj1, obj2, obj3, obj4))
def test_extreme_compute_jumps_convergence(self): """Test of compute_jumps() requiring absurd number of passes. NOTE: This test also serves to demonstrate that there is no worst case: the number of passes can be unlimited (or, actually, limited by the size of the provided code). This is an extension of test_compute_jumps_convergence. Instead of two jumps, where the earlier gets extended after the latter, we instead generate a series of many jumps. Each pass of compute_jumps() extends one more instruction, which in turn causes the one behind it to be extended on the next pass. """ if not WORDCODE: return # N: the number of unextended instructions that can be squeezed into a # set of bytes adressable by the arg of an unextended instruction. # The answer is "128", but here's how we arrive at it (and it also # hints at how to make this work for pre-WORDCODE). max_unextended_offset = 1 << 8 unextended_branch_instr_size = 2 N = max_unextended_offset // unextended_branch_instr_size nop = 'UNARY_POSITIVE' # don't use NOP, dis.stack_effect will raise # The number of jumps will be equal to the number of labels. The # number of passes of compute_jumps() required will be one greater # than this. labels = [Label() for x in range(0, 3 * N)] code = Bytecode() code.extend( Instr('JUMP_FORWARD', labels[len(labels) - x - 1]) for x in range(0, len(labels))) end_of_jumps = len(code) code.extend(Instr(nop) for x in range(0, N)) # Now insert the labels. The first is N instructions (i.e. 256 # bytes) after the last jump. Then they proceed to earlier positions # 4 bytes at a time. While the targets are in the range of the nop # instructions, 4 bytes is two instructions. When the targets are in # the range of JUMP_FORWARD instructions we have to allow for the fact # that the instructions will have been extended to four bytes each, so # working backwards 4 bytes per label means just one instruction per # label. offset = end_of_jumps + N for l in range(0, len(labels)): code.insert(offset, labels[l]) if offset <= end_of_jumps: offset -= 1 else: offset -= 2 code.insert(0, Instr("LOAD_CONST", 0)) del end_of_jumps code.append(Instr('RETURN_VALUE')) code.to_code(compute_jumps_passes=(len(labels) + 1))
def test_extreme_compute_jumps_convergence(self): """Test of compute_jumps() requiring absurd number of passes. NOTE: This test also serves to demonstrate that there is no worst case: the number of passes can be unlimited (or, actually, limited by the size of the provided code). This is an extension of test_compute_jumps_convergence. Instead of two jumps, where the earlier gets extended after the latter, we instead generate a series of many jumps. Each pass of compute_jumps() extends one more instruction, which in turn causes the one behind it to be extended on the next pass. """ if not WORDCODE: return # N: the number of unextended instructions that can be squeezed into a # set of bytes adressable by the arg of an unextended instruction. # The answer is "128", but here's how we arrive at it (and it also # hints at how to make this work for pre-WORDCODE). max_unextended_offset = 1 << 8 unextended_branch_instr_size = 2 N = max_unextended_offset // unextended_branch_instr_size nop = 'UNARY_POSITIVE' # don't use NOP, dis.stack_effect will raise # The number of jumps will be equal to the number of labels. The # number of passes of compute_jumps() required will be one greater # than this. labels = [Label() for x in range(0, 3 * N)] code = Bytecode() code.extend(Instr('JUMP_FORWARD', labels[len(labels) - x - 1]) for x in range(0, len(labels))) end_of_jumps = len(code) code.extend(Instr(nop) for x in range(0, N)) # Now insert the labels. The first is N instructions (i.e. 256 # bytes) after the last jump. Then they proceed to earlier positions # 4 bytes at a time. While the targets are in the range of the nop # instructions, 4 bytes is two instructions. When the targets are in # the range of JUMP_FORWARD instructions we have to allow for the fact # that the instructions will have been extended to four bytes each, so # working backwards 4 bytes per label means just one instruction per # label. offset = end_of_jumps + N for l in range(0, len(labels)): code.insert(offset, labels[l]) if offset <= end_of_jumps: offset -= 1 else: offset -= 2 code.insert(0, Instr("LOAD_CONST", 0)) del end_of_jumps code.append(Instr('RETURN_VALUE')) code.to_code(compute_jumps_passes=(len(labels) + 1))
def test_compute_jumps_convergence(self): # Consider the following sequence of instructions: # # JUMP_ABSOLUTE Label1 # JUMP_ABSOLUTE Label2 # ...126 instructions... # Label1: Offset 254 on first pass, 256 second pass # NOP # ... many more instructions ... # Label2: Offset > 256 on first pass # # On first pass of compute_jumps(), Label2 will be at address 254, so # that value encodes into the single byte arg of JUMP_ABSOLUTE. # # On second pass compute_jumps() the instr at Label1 will have offset # of 256 so will also be given an EXTENDED_ARG. # # Thus we need to make an additional pass. This test only verifies # case where 2 passes is insufficient but three is enough. # # On Python > 3.10 we need to double the number since the offset is now # in term of instructions and not bytes. # Create code from comment above. code = Bytecode() label1 = Label() label2 = Label() nop = "NOP" code.append(Instr("JUMP_ABSOLUTE", label1)) code.append(Instr("JUMP_ABSOLUTE", label2)) # Need 254 * 2 + 2 since the arg will change by 1 instruction rather than 2 # bytes. for x in range(4, 510 if OFFSET_AS_INSTRUCTION else 254, 2): code.append(Instr(nop)) code.append(label1) code.append(Instr(nop)) for x in range( 514 if OFFSET_AS_INSTRUCTION else 256, 600 if OFFSET_AS_INSTRUCTION else 300, 2, ): code.append(Instr(nop)) code.append(label2) code.append(Instr(nop)) # This should pass by default. code.to_code() # Try with max of two passes: it should raise with self.assertRaises(RuntimeError): code.to_code(compute_jumps_passes=2)
def test_to_code(self): code = Bytecode() code.first_lineno = 50 code.extend([Instr("LOAD_NAME", "print"), Instr("LOAD_CONST", "%s"), Instr("LOAD_GLOBAL", "a"), Instr("BINARY_MODULO"), Instr("CALL_FUNCTION", 1), Instr("RETURN_VALUE")]) co = code.to_code() # hopefully this is obvious from inspection? :-) self.assertEqual(co.co_stacksize, 3) co = code.to_code(stacksize=42) self.assertEqual(co.co_stacksize, 42)
def test_extended_jump(self): NOP = bytes((opcode.opmap['NOP'], )) class BigInstr(ConcreteInstr): def __init__(self, size): super().__init__('NOP') self._size = size def copy(self): return self def assemble(self): return NOP * self._size # (invalid) code using jumps > 0xffff to test extended arg label = Label() nb_nop = 2**16 code = Bytecode([ Instr("JUMP_ABSOLUTE", label), BigInstr(nb_nop), label, Instr('LOAD_CONST', None), Instr('RETURN_VALUE') ]) code_obj = code.to_code() if WORDCODE: expected = b'\x90\x01\x90\x00q\x06' + NOP * nb_nop + b'd\x00S\x00' else: expected = b'\x90\x01\x00q\x06\x00' + NOP * nb_nop + b'd\x00\x00S' self.assertEqual(code_obj.co_code, expected)
def test_extended_jump(self): NOP = bytes((opcode.opmap['NOP'],)) class BigInstr(ConcreteInstr): def __init__(self, size): super().__init__('NOP') self._size = size def copy(self): return self def assemble(self): return NOP * self._size # (invalid) code using jumps > 0xffff to test extended arg label = Label() nb_nop = 2**16 code = Bytecode([Instr("JUMP_ABSOLUTE", label), BigInstr(nb_nop), label, Instr('LOAD_CONST', None), Instr('RETURN_VALUE')]) code_obj = code.to_code() if WORDCODE: expected = b'\x90\x01\x90\x00q\x06' + NOP * nb_nop + b'd\x00S\x00' else: expected = b'\x90\x01\x00q\x06\x00' + NOP * nb_nop + b'd\x00\x00S' self.assertEqual(code_obj.co_code, expected)
def test_optimize_code_obj(self): # Test optimize() method with a code object # # x = 3 + 5 => x = 8 noopt = Bytecode([ Instr("LOAD_CONST", 3), Instr("LOAD_CONST", 5), Instr("BINARY_ADD"), Instr("STORE_NAME", "x"), Instr("LOAD_CONST", None), Instr("RETURN_VALUE"), ]) noopt = noopt.to_code() optimizer = peephole_opt.PeepholeOptimizer() optim = optimizer.optimize(noopt) code = Bytecode.from_code(optim) self.assertEqual( code, [ Instr("LOAD_CONST", 8, lineno=1), Instr("STORE_NAME", "x", lineno=1), Instr("LOAD_CONST", None, lineno=1), Instr("RETURN_VALUE", lineno=1), ], )
def test_extended_jump(self): NOP = bytes((opcode.opmap["NOP"],)) class BigInstr(ConcreteInstr): def __init__(self, size): super().__init__("NOP") self._size = size def copy(self): return self def assemble(self): return NOP * self._size # (invalid) code using jumps > 0xffff to test extended arg label = Label() nb_nop = 2 ** 16 code = Bytecode( [ Instr("JUMP_ABSOLUTE", label), BigInstr(nb_nop), label, Instr("LOAD_CONST", None), Instr("RETURN_VALUE"), ] ) code_obj = code.to_code() expected = b"\x90\x01\x90\x00q\x06" + NOP * nb_nop + b"d\x00S\x00" self.assertEqual(code_obj.co_code, expected)
def test_compute_jumps_convergence(self): # Consider the following sequence of instructions: # # JUMP_ABSOLUTE Label1 # JUMP_ABSOLUTE Label2 # ...126 instructions... # Label1: Offset 254 on first pass, 256 second pass # NOP # ... many more instructions ... # Label2: Offset > 256 on first pass # # On first pass of compute_jumps(), Label2 will be at address 254, so # that value encodes into the single byte arg of JUMP_ABSOLUTE. # # On second pass compute_jumps() the instr at Label1 will have offset # of 256 so will also be given an EXTENDED_ARG. # # Thus we need to make an additional pass. This test only verifies # case where 2 passes is insufficient but three is enough. if not WORDCODE: # Could be done pre-WORDCODE, but that requires 2**16 bytes of # code. return # Create code from comment above. code = Bytecode() label1 = Label() label2 = Label() nop = 'UNARY_POSITIVE' # don't use NOP, dis.stack_effect will raise code.append(Instr('JUMP_ABSOLUTE', label1)) code.append(Instr('JUMP_ABSOLUTE', label2)) for x in range(4, 254, 2): code.append(Instr(nop)) code.append(label1) code.append(Instr(nop)) for x in range(256, 300, 2): code.append(Instr(nop)) code.append(label2) code.append(Instr(nop)) # This should pass by default. code.to_code() # Try with max of two passes: it should raise with self.assertRaises(RuntimeError): code.to_code(compute_jumps_passes=2)
def test_to_code(self): code = Bytecode() code.first_lineno = 50 code.extend([ Instr("LOAD_NAME", "print"), Instr("LOAD_CONST", "%s"), Instr("LOAD_GLOBAL", "a"), Instr("BINARY_MODULO"), Instr("CALL_FUNCTION", 1), Instr("RETURN_VALUE"), ]) co = code.to_code() # hopefully this is obvious from inspection? :-) self.assertEqual(co.co_stacksize, 3) co = code.to_code(stacksize=42) self.assertEqual(co.co_stacksize, 42)
def test_not_enough_rot_with_disable_check_of_pre_and_post(self): opnames = ["ROT_TWO", "ROT_THREE"] for opname in opnames: with self.subTest(): code = Bytecode() code.first_lineno = 1 code.extend([Instr("LOAD_CONST", 1), Instr(opname)]) co = code.to_code(check_pre_and_post=False) self.assertEqual(co.co_stacksize, 1)
def test_negative_size_build_const_map_with_disable_check_of_pre_and_post( self): code = Bytecode() code.first_lineno = 1 code.extend( [Instr("LOAD_CONST", ("a", )), Instr("BUILD_CONST_KEY_MAP", 1)]) co = code.to_code(check_pre_and_post=False) self.assertEqual(co.co_stacksize, 1)
def get_func_from_code(code_object, fn_name): executor_code = Bytecode() executor_code.append(Instr('LOAD_CONST', code_object)) executor_code.append(Instr('LOAD_CONST', fn_name)) executor_code.append(Instr('MAKE_FUNCTION', 0)) executor_code.append(Instr('RETURN_VALUE')) executor_code.flags = CompilerFlags.OPTIMIZED | CompilerFlags.NEWLOCALS | CompilerFlags.NOFREE return eval(executor_code.to_code())
def test_negative_size_unary_with_disable_check_of_pre_and_post(self): opnames = ( "UNARY_POSITIVE", "UNARY_NEGATIVE", "UNARY_NOT", "UNARY_INVERT", ) for opname in opnames: with self.subTest(): code = Bytecode() code.first_lineno = 1 code.extend([Instr(opname)]) co = code.to_code(check_pre_and_post=False) self.assertEqual(co.co_stacksize, 0)
def r_compile(): jit_func = Aware.f(self, id(start_func)) bc = Bytecode() bc.append(PyInstr(InstrNames.LOAD_CONST, jit_func)) bc.extend([load_arg(each, cellvars, lineno) for each in argnames]) bc.extend([ PyInstr(InstrNames.CALL_FUNCTION, len(argnames)), PyInstr(InstrNames.RETURN_VALUE) ]) bc._copy_attr_from(code) start_func.__code__ = bc.to_code() start_func.__jit__ = jit_func return jit_func
def _make_bytecode(self, source, template_locator): instructions = [] symbol_table = {"write_func": io.StringIO.write} parser_obj = parser.Parser(self._template_locator) sequence = parser_obj.parse(self._get_chunks(source)) for item in sequence.elements: instructions += item.make_bytecode(symbol_table) bytecode = Bytecode(instructions + [Instr("LOAD_CONST", None), Instr("RETURN_VALUE")]) return bytecode.to_code()
def func_info(cls, func: types.FunctionType) -> types.FunctionType: names = func.__code__.co_names code = Bytecode.from_code(func.__code__) codeinfo = cls.code_info(code) def r_compile(): jit_func = Aware.f(self) print("jit_func", type(jit_func)) bc = Bytecode() bc.append(PyInstr(InstrNames.LOAD_CONST, jit_func)) bc.extend([load_arg(each, cellvars, lineno) for each in argnames]) bc.extend([ PyInstr(InstrNames.CALL_FUNCTION, len(argnames)), PyInstr(InstrNames.RETURN_VALUE) ]) bc._copy_attr_from(code) start_func.__code__ = bc.to_code() start_func.__jit__ = jit_func return jit_func start_func = copy_func(func) start_func_code = Bytecode() lineno = code.first_lineno argnames = code.argnames start_func_code.argnames = argnames cellvars = code.cellvars start_func_code.extend([ PyInstr(InstrNames.LOAD_CONST, r_compile, lineno=lineno), PyInstr(InstrNames.CALL_FUNCTION, 0, lineno=lineno), *(load_arg(each, cellvars, lineno) for each in argnames), PyInstr(InstrNames.CALL_FUNCTION, len(argnames), lineno=lineno), PyInstr(InstrNames.RETURN_VALUE, lineno=lineno) ]) start_func_code._copy_attr_from(code) self = PyFuncInfo(func.__name__, func.__module__, func.__defaults__, func.__kwdefaults__, func.__closure__, func.__globals__, codeinfo, func, {}, names) start_func.__code__ = start_func_code.to_code() start_func.__func_info__ = self start_func.__compile__ = r_compile start_func.__jit__ = None return start_func
def _make_trampoline(target_func): bytecode = Bytecode([ Instr('LOAD_CONST', target_func), Instr('LOAD_FAST', 'args'), Instr('LOAD_FAST', 'kwargs'), Instr('CALL_FUNCTION_EX', 1), Instr('RETURN_VALUE') ]) def new_varargs_func(): def func(*args, **kwargs): pass return func tramp = new_varargs_func() bytecode.flags = tramp.__code__.co_flags tramp.__code__ = bytecode.to_code() return tramp
def test_optimize_code_obj(self): # Test optimize() method with a code object # # x = 3 + 5 => x = 8 noopt = Bytecode([Instr('LOAD_CONST', 3), Instr('LOAD_CONST', 5), Instr('BINARY_ADD'), Instr('STORE_NAME', 'x'), Instr('LOAD_CONST', None), Instr('RETURN_VALUE')]) noopt = noopt.to_code() optimizer = peephole_opt.PeepholeOptimizer() optim = optimizer.optimize(noopt) code = Bytecode.from_code(optim) self.assertEqual(code, [Instr('LOAD_CONST', 8, lineno=1), Instr('STORE_NAME', 'x', lineno=1), Instr('LOAD_CONST', None, lineno=1), Instr('RETURN_VALUE', lineno=1)])
def test_negative_size_binary_with_disable_check_of_pre_and_post(self): opnames = ( "BINARY_POWER", "BINARY_MULTIPLY", "BINARY_FLOOR_DIVIDE", "BINARY_TRUE_DIVIDE", "BINARY_MODULO", "BINARY_ADD", "BINARY_SUBTRACT", "BINARY_SUBSCR", "BINARY_LSHIFT", "BINARY_RSHIFT", "BINARY_AND", "BINARY_XOR", "BINARY_OR", ) for opname in opnames: with self.subTest(): code = Bytecode() code.first_lineno = 1 code.extend([Instr("LOAD_CONST", 1), Instr(opname)]) co = code.to_code(check_pre_and_post=False) self.assertEqual(co.co_stacksize, 1)
def test_extended_jump(self): NOP = bytes((opcode.opmap['NOP'], )) class BigInstr(ConcreteInstr): def __init__(self, size): super().__init__('NOP') self._size = size def copy(self): return self def assemble(self): return NOP * self._size # (invalid) code using jumps > 0xffff to test extended arg label = Label() nb_nop = 2**16 code = Bytecode( [Instr("JUMP_ABSOLUTE", label), BigInstr(nb_nop), label]) code_obj = code.to_code() expected = (b'\x90\x01\x00q\x06\x00' + NOP * nb_nop) self.assertEqual(code_obj.co_code, expected)
def test_extended_jump(self): NOP = bytes((opcode.opmap['NOP'],)) class BigInstr(ConcreteInstr): def __init__(self, size): super().__init__('NOP') self._size = size def copy(self): return self def assemble(self): return NOP * self._size # (invalid) code using jumps > 0xffff to test extended arg label = Label() nb_nop = 2**16 code = Bytecode([Instr("JUMP_ABSOLUTE", label), BigInstr(nb_nop), label]) code_obj = code.to_code() expected = (b'\x90\x01\x00q\x06\x00' + NOP * nb_nop) self.assertEqual(code_obj.co_code, expected)
def test_optimize_code_obj(self): # Test optimize() method with a code object # # x = 3 + 5 => x = 8 noopt = Bytecode([ Instr('LOAD_CONST', 3), Instr('LOAD_CONST', 5), Instr('BINARY_ADD'), Instr('STORE_NAME', 'x'), Instr('LOAD_CONST', None), Instr('RETURN_VALUE') ]) noopt = noopt.to_code() optimizer = peephole_opt.PeepholeOptimizer() optim = optimizer.optimize(noopt) code = Bytecode.from_code(optim) self.assertEqual(code, [ Instr('LOAD_CONST', 8, lineno=1), Instr('STORE_NAME', 'x', lineno=1), Instr('LOAD_CONST', None, lineno=1), Instr('RETURN_VALUE', lineno=1) ])
def test_general_constants(self): """Test if general object could be linked as constants. """ class CustomObject: pass class UnHashableCustomObject: def __eq__(self, other): return self is other obj1 = [1, 2, 3] obj2 = {1, 2, 3} obj3 = CustomObject() obj4 = UnHashableCustomObject() code = Bytecode([ Instr('LOAD_CONST', obj1), Instr('LOAD_CONST', obj2), Instr('LOAD_CONST', obj3), Instr('LOAD_CONST', obj4) ]) self.assertEqual(code.to_code().co_consts, (obj1, obj2, obj3, obj4))
class Code(object): co_argcount = 0 co_stacksize = 0 co_flags = CO_OPTIMIZED | CO_NEWLOCALS # typical usage co_filename = '<generated code>' co_name = '<lambda>' co_firstlineno = 0 co_freevars = () co_cellvars = () _last_lineofs = 0 _last_line = 0 _ss = 0 _tmp_level = 0 def __init__(self): self.co_code = Bytecode() self.co_consts = [None] self.co_names = [] self.co_varnames = [] self.co_lnotab = array('B') self.blocks = [] self.stack_history = [] def emit_arg(self, op, arg): self.co_code.append(Instr(op, arg)) def locals_written(self): vn = self.co_varnames hl = dict.fromkeys([STORE_FAST, DELETE_FAST]) return dict.fromkeys([vn[arg] for ofs, op, arg in self if op in hl]) def set_lineno(self, lno): if (lno > 0): self.co_code.append(SetLineno(lno)) if not self.co_firstlineno: self.co_firstlineno = self._last_line = lno return append = self.co_lnotab.append incr_line = lno - self._last_line incr_addr = len(self.co_code) - self._last_lineofs if not incr_line: return if incr_addr <= 0 or incr_line <= 0: return while incr_addr > 255: append(255) append(0) incr_addr -= 255 while incr_line > 255: append(incr_addr) append(255) incr_line -= 255 incr_addr = 0 if incr_addr or incr_line: append(incr_addr) append(incr_line) self._last_line = lno self._last_lineofs = len(self.co_code) def YIELD_VALUE(self): self.stackchange(stack_effects[YIELD_VALUE]) self.co_flags |= CO_GENERATOR return self.emit(YIELD_VALUE) def LOAD_CONST(self, const): self.stackchange((0, 1)) pos = 0 hashable = True try: hash(const) except TypeError: hashable = False while 1: try: arg = self.co_consts.index(const, pos) it = self.co_consts[arg] except ValueError: arg = len(self.co_consts) self.co_consts.append(const) break else: if type(it) is type(const) and (hashable or it is const): break pos = arg + 1 continue return self.emit_arg('LOAD_CONST', const) def CALL_FUNCTION(self, argc=0, foo=0, op='CALL_FUNCTION', extra=0): self.stackchange((1 + argc + extra, 1)) self.emit_arg(op, argc) def CALL_FUNCTION_VAR(self, argc=0, kwargc=0): assert (False) self.CALL_FUNCTION(argc, kwargc, CALL_FUNCTION_VAR, 1) # 1 for *args def CALL_FUNCTION_KW(self, argc=0, kwargc=0): self.stackchange((2 + argc + kwargc, 1)) self.emit_arg('CALL_FUNCTION_KW', argc + kwargc) def CALL_FUNCTION_VAR_KW(self, argc=0, kwargc=0): assert (False) self.CALL_FUNCTION(argc, kwargc, CALL_FUNCTION_VAR_KW, 2) # 2 *args,**kw def CALL_METHOD(self, argc=0, foo=0): self.stackchange((2 + argc, 1)) self.emit_arg('CALL_METHOD', argc) def BUILD_TUPLE(self, count): self.stackchange((count, 1)) self.emit_arg('BUILD_TUPLE', count) def BUILD_LIST(self, count): self.stackchange((count, 1)) self.emit_arg('BUILD_LIST', count) def BUILD_MAP(self, count): self.stackchange((count * 2, 1)) self.emit_arg('BUILD_MAP', count) def UNPACK_SEQUENCE(self, count): self.stackchange((1, count)) self.emit_arg('UNPACK_SEQUENCE', count) def RETURN_VALUE(self): self.stackchange((1, 0)) self.co_code.append(Instr('RETURN_VALUE')) self.stack_unknown() def BUILD_SLICE(self, count): assert count in (2, 3), "Invalid number of arguments for BUILD_SLICE" self.stackchange((count, 1)) self.emit_arg(BUILD_SLICE, count) def DUP_TOPX(self, count): self.stackchange((count, count * 2)) self.emit_arg(DUP_TOPX, count) def RAISE_VARARGS(self, argc): assert 0 <= argc <= 3, "Invalid number of arguments for RAISE_VARARGS" self.stackchange((argc, 0)) self.emit_arg(RAISE_VARARGS, argc) def MAKE_FUNCTION(self, ndefaults): self.stackchange((1 + ndefaults, 1)) self.emit_arg(MAKE_FUNCTION, ndefaults) def MAKE_CLOSURE(self, ndefaults, freevars): if sys.version >= '2.5': freevars = 1 self.stackchange((1 + freevars + ndefaults, 1)) self.emit_arg(MAKE_CLOSURE, ndefaults) def here(self): return len(self.co_code) def curPos(self): label = Label() self.co_code.append(label) labelMap[label.id] = len(self.co_code) return label if 'UNARY_CONVERT' not in opcode: def UNARY_CONVERT(self): self(Const(repr)) self.ROT_TWO() self.CALL_FUNCTION(1, 0) if 'BINARY_DIVIDE' not in opcode: def BINARY_DIVIDE(self): self.BINARY_TRUE_DIVIDE() if 'DUP_TOPX' not in opcode: def DUP_TOPX(self, count): self.stackchange((count, count * 2)) if count == 2: self.emit(DUP_TOP_TWO) else: raise RuntimeError("Python 3 only supports DUP_TOP_TWO") if 'SLICE_0' not in opcode: def SLICE_0(self): self(None, None, Code.SLICE_3) def SLICE_1(self): self(None, Code.SLICE_3) def SLICE_2(self): self(None, Code.ROT_TWO, Code.SLICE_3) def SLICE_3(self): self.BUILD_SLICE(2) self.BINARY_SUBSCR() def set_stack_size(self, size): if size < 0: raise AssertionError("Stack underflow") if size > self.co_stacksize: self.co_stacksize = size bytes = len(self.co_code) - len(self.stack_history) + 1 if bytes > 0: self.stack_history.extend([self._ss] * bytes) self._ss = size def get_stack_size(self): return self._ss stack_size = property(get_stack_size, set_stack_size) def stackchange(self, inout): (inputs, outputs) = inout if self._ss is None: raise AssertionError("Unknown stack size at this location") self.stack_size -= inputs # check underflow self.stack_size += outputs # update maximum height def stack_unknown(self): self._ss = None def branch_stack(self, location, expected): location = labelMap[location.id] if location >= len(self.stack_history): if location > len(self.co_code): raise AssertionError("Forward-looking stack prediction!", location, len(self.co_code)) actual = self.stack_size if actual is None: self.stack_size = actual = expected self.stack_history[location] = actual else: actual = self.stack_history[location] if actual is None: self.stack_history[location] = actual = expected if actual != expected: raise AssertionError( "Stack level mismatch: actual=%s expected=%s" % (actual, expected)) def jump(self, op, arg=None): def backpatch(offset): assert (op != 120) #self.patch_arg(posn, 0x1FFFF, target,op) self.branch_stack(offset, old_level) if op == FOR_ITER: old_level = self.stack_size = self.stack_size - 1 self.stack_size += 2 else: old_level = self.stack_size self.stack_size -= (op in (JUMP_IF_TRUE_OR_POP, JUMP_IF_FALSE_OR_POP)) posn = self.here() if arg is not None: #print("jt: " + hex(jump_target(arg))) self.emit_arg(opname[op], arg) self.branch_stack(arg, old_level) lbl = None else: label = Label() self.emit_arg(opname[op], label) def lbl(code=None, label=label): self.co_code.append(label) labelMap[label.id] = len(self.co_code) backpatch(label) if op in (JUMP_FORWARD, JUMP_ABSOLUTE, CONTINUE_LOOP): self.stack_unknown() return lbl def COMPARE_OP(self, op): self.stackchange((2, 1)) self.emit_arg('COMPARE_OP', compares[op]) def setup_block(self, op): jmp = self.jump(op) self.blocks.append((op, self.stack_size, jmp)) return jmp def SETUP_EXCEPT(self): ss = self.stack_size self.stack_size = ss + 3 # simulate the level at "except:" time self.setup_block(SETUP_EXCEPT) self.stack_size = ss # restore the current level def SETUP_FINALLY(self): ss = self.stack_size self.stack_size = ss + 3 # allow for exceptions self.stack_size = ss + 1 # simulate the level after the None is pushed self.setup_block(SETUP_FINALLY) self.stack_size = ss # restore original level def SETUP_LOOP(self): self.setup_block(SETUP_LOOP) def POP_BLOCK(self): if not self.blocks: raise AssertionError("Not currently in a block") why, level, fwd = self.blocks.pop() self.emit(POP_BLOCK) if why != SETUP_LOOP: if why == SETUP_FINALLY: self.LOAD_CONST(None) fwd() else: self.stack_size = level - 3 # stack level resets here else_ = self.JUMP_FORWARD() fwd() return else_ else: return fwd if 'JUMP_IF_TRUE_OR_POP' not in opcode: def JUMP_IF_TRUE_OR_POP(self, address=None): lbl = self.JUMP_IF_TRUE(address) self.POP_TOP() return lbl globals()['JUMP_IF_TRUE_OR_POP'] = -1 if 'JUMP_IF_FALSE_OR_POP' not in opcode: def JUMP_IF_FALSE_OR_POP(self, address=None): lbl = self.JUMP_IF_FALSE(address) self.POP_TOP() return lbl globals()['JUMP_IF_FALSE_OR_POP'] = -1 if 'JUMP_IF_TRUE' not in opcode: def JUMP_IF_TRUE(self, address=None): self.DUP_TOP() return self.POP_JUMP_IF_TRUE(address) else: globals()['POP_JUMP_IF_TRUE'] = -1 if 'JUMP_IF_FALSE' not in opcode: def JUMP_IF_FALSE(self, address=None): self.DUP_TOP() return self.POP_JUMP_IF_FALSE(address) else: globals()['POP_JUMP_IF_FALSE'] = -1 if 'LIST_APPEND' in opcode and LIST_APPEND >= HAVE_ARGUMENT: def LIST_APPEND(self, depth): self.stackchange((depth + 1, depth)) self.emit_arg(LIST_APPEND, depth) def assert_loop(self): for why, level, fwd in self.blocks: if why == SETUP_LOOP: return raise AssertionError("Not inside a loop") def BREAK_LOOP(self): self.assert_loop() self.emit(BREAK_LOOP) self.stack_unknown() def CONTINUE_LOOP(self, label): self.assert_loop() if self.blocks[-1][0] == SETUP_LOOP: op = JUMP_ABSOLUTE # more efficient if not in a nested block else: op = CONTINUE_LOOP return self.jump(op, label) def __call__(self, *args): last = None for ob in args: if hasattr(ob, '__call__'): last = ob(self) else: try: f = generate_types[type(ob)] except KeyError: raise TypeError("Can't generate", ob) else: last = f(self, ob) return last def return_(self, ob=None): return self(ob, Code.RETURN_VALUE) @classmethod def from_function(cls, function, copy_lineno=False): code = cls.from_code(getattr(function, CODE), copy_lineno) return code @classmethod def from_code(cls, code, copy_lineno=False): import inspect self = cls.from_spec(code.co_name, *inspect.getargs(code)) if copy_lineno: self.set_lineno(code.co_firstlineno) self.co_filename = code.co_filename self.co_freevars = code.co_freevars # XXX untested! return self @classmethod def from_spec(cls, name='<lambda>', args=(), var=None, kw=None): self = cls() self.co_name = name self.co_argcount = len(args) self.co_varnames.extend(args) if var: self.co_varnames.append(var) self.co_flags |= CO_VARARGS if kw: self.co_varnames.append(kw) self.co_flags |= CO_VARKEYWORDS def tuple_arg(args): self.UNPACK_SEQUENCE(len(args)) for arg in args: if not isinstance(arg, basestring): tuple_arg(arg) else: self.STORE_FAST(arg) for narg, arg in enumerate(args): if not isinstance(arg, basestring): dummy_name = '.' + str(narg) self.co_varnames[narg] = dummy_name self.LOAD_FAST(dummy_name) tuple_arg(arg) return self def patch_arg(self, offset, oldarg, newarg, op): assert (op != 120) code = self.co_code if (newarg > 0xFFFF) and (oldarg <= 0xFFFF): raise AssertionError("Can't change argument size", oldarg, newarg) code[offset + 1] = newarg & 255 code[offset + 2] = (newarg >> 8) & 255 if newarg > 0xFFFF or oldarg > 0xFFFF or op == 120: code[offset + 1] = (newarg >> 16) & 255 code[offset + 2] = (newarg >> 24) & 255 code[offset + 4] = (newarg >> 0) & 255 code[offset + 5] = (newarg >> 8) & 255 def nested(self, name='<lambda>', args=(), var=None, kw=None, cls=None): if cls is None: cls = self.__class__ code = cls.from_spec(name, args, var, kw) code.co_filename = self.co_filename return code def __iter__(self): i = 0 extended_arg = 0 code = self.co_code n = len(code) while i < n: op = code[i] if op >= HAVE_ARGUMENT: oparg = code[i + 1] + code[i + 2] * 256 + extended_arg extended_arg = 0 if op == EXTENDED_ARG: extended_arg = oparg * long(65536) i += 3 continue yield i, op, oparg i += 3 else: yield i, op, None i += 1 def makefree(self, names): nowfree = dict.fromkeys(self.co_freevars) newfree = [n for n in names if n not in nowfree] if newfree: self.co_freevars += tuple(newfree) self._locals_to_cells() def makecells(self, names): nowcells = dict.fromkeys(self.co_cellvars + self.co_freevars) newcells = [n for n in names if n not in nowcells] if newcells: if not (self.co_flags & CO_OPTIMIZED): raise AssertionError("Can't use cellvars in unoptimized scope") cc = len(self.co_cellvars) nc = len(newcells) self.co_cellvars += tuple(newcells) if self.co_freevars: self._patch( deref_to_deref, dict([(n + cc, n + cc + nc) for n in range(len(self.co_freevars))])) self._locals_to_cells() def _locals_to_cells(self): freemap = dict([ (n, p) for p, n in enumerate(self.co_cellvars + self.co_freevars) ]) argmap = dict([(p, freemap[n]) for p, n in enumerate(self.co_varnames) if n in freemap]) if argmap: for ofs, op, arg in self: if op == DELETE_FAST and arg in argmap: raise AssertionError( "Can't delete local %r used in nested scope" % self.co_varnames[arg]) self._patch(fast_to_deref, argmap) def _patch(self, opmap, argmap={}): code = self.co_code for ofs, op, arg in self: if op in opmap: print(op) if arg in argmap: self.patch_arg(ofs, arg, argmap[arg], op) elif arg is not None: continue code[ofs] = opmap[op] def code(self, parent=None): if self.blocks: raise AssertionError("%d unclosed block(s)" % len(self.blocks)) flags = self.co_flags & ~CO_NOFREE if parent is not None: locals_written = self.locals_written() self.makefree([ n for n in self.co_varnames[self.co_argcount + ( (self.co_flags & CO_VARARGS) == CO_VARARGS) + ( (self.co_flags & CO_VARKEYWORDS) == CO_VARKEYWORDS):] if n not in locals_written ]) if not self.co_freevars and not self.co_cellvars: flags |= CO_NOFREE elif parent is not None and self.co_freevars: parent.makecells(self.co_freevars) self.co_code.argcount = self.co_argcount self.co_code.argnames = self.co_varnames[:self.co_argcount] self.co_code.name = self.co_name self.co_code.filename = self.co_filename for e in self.co_code: print(e) return self.co_code.to_code()
def as_namedlist(name, bases, namespace: dict): try: module = sys._getframe(1).f_globals.get('__name__', '__main__') except (AttributeError, ValueError): module = '__main__' namespace = {**namespace} annotations: dict = namespace.get('__annotations__') try: filepath = sys.modules[module].__file__ except (AttributeError, IndexError): filepath = "<unknown>" if annotations is not None: for i, (k, v) in enumerate(annotations.items()): if k in namespace: raise AttributeError getter_code = Bytecode() getter_code.filename = filepath getter_code.argcount = 1 getter_code.argnames.append('self') getter_code.append(Instr('LOAD_FAST', 'self')) getter_code.append(Instr('LOAD_CONST', i)) getter_code.append(Instr('BINARY_SUBSCR')) getter_code.append(Instr('RETURN_VALUE')) getter_code.flags = CompilerFlags.OPTIMIZED | CompilerFlags.NEWLOCALS | CompilerFlags.NOFREE getter_fn = property(get_func_from_code(getter_code.to_code(), k)) setter_code = Bytecode() setter_code.filename = filepath setter_code.argcount = 2 setter_code.argnames.extend(['self', 'value']) setter_code.append(Instr('LOAD_FAST', 'value')) setter_code.append(Instr('LOAD_FAST', 'self')) setter_code.append(Instr('LOAD_CONST', i)) setter_code.append(Instr('STORE_SUBSCR')) setter_code.append(Instr('LOAD_CONST', None)) setter_code.append(Instr('RETURN_VALUE')) setter_code.flags = CompilerFlags.OPTIMIZED | CompilerFlags.NEWLOCALS | CompilerFlags.NOFREE setter_fn = getter_fn.setter( get_func_from_code(setter_code.to_code(), k)) namespace[k] = setter_fn init_code = Bytecode() init_code.name = '__init__' init_code.filename = filepath ary_num = len(annotations) args = list(annotations) init_code.argcount = ary_num + 1 init_code.argnames.extend(['self', *args]) if ary_num: init_code.append(Instr('LOAD_FAST', 'self')) if ary_num >= 4: init_code.append(Instr('DUP_TOP')) for i in range((ary_num - 2) // 2): init_code.append(Instr('DUP_TOP_TWO')) if ary_num % 2: init_code.append(Instr('DUP_TOP')) else: for i in range(ary_num - 1): init_code.append(Instr('DUP_TOP')) for i in range(ary_num): init_code.append(Instr("LOAD_FAST", args[i])) init_code.append(Instr("LIST_APPEND", ary_num - i)) init_code.append(Instr('LOAD_CONST', None)) init_code.append(Instr('RETURN_VALUE')) init_code.flags = CompilerFlags.OPTIMIZED | CompilerFlags.NEWLOCALS | CompilerFlags.NOFREE namespace['__init__'] = get_func_from_code(init_code.to_code(), '__init__') fmt = '{}({})'.format(name, ', '.join(f'{arg}={{!r}}' for arg in args)) str_code = Bytecode() str_code.argcount = 1 str_code.argnames.append('self') str_code.append(Instr('LOAD_CONST', fmt.format)) str_code.append(Instr('LOAD_FAST', 'self')) str_code.append(Instr('CALL_FUNCTION_EX', 0)) str_code.append(Instr('RETURN_VALUE')) str_code.flags = CompilerFlags.OPTIMIZED | CompilerFlags.NEWLOCALS | CompilerFlags.NOFREE namespace['__str__'] = get_func_from_code(str_code.to_code(), '__str__') return bases if any(issubclass(t, list) for t in bases) else (*bases, list), namespace
print("foo1 dis.dis(foo1)") dis.dis(foo1) print("foo2 dis.dis(foo2)") dis.dis(foo2) bytecode = Bytecode([ Instr("LOAD_NAME", 'print'), Instr("LOAD_CONST", 'Hello World!'), Instr("CALL_FUNCTION", 1), Instr("POP_TOP"), Instr("LOAD_CONST", None), Instr("RETURN_VALUE") ]) code = bytecode.to_code() exec(code) print_codetype(foo1.__code__) print() print() print_codetype(foo2.__code__) print() print() print(f"ret = {foo1(1,2,3)}") print(f"ret = {foo2(1,2,3)}") wrap_stores_in_function(foo1) print("foo1 dis.dis(foo1)")
def run_bytecode(ops): cs = [] labels = [] patches = [] for c, rep in ops: if c == '>' or c == '<': # ptr += rep if c == '<': rep = -rep codes = [ I("LOAD_FAST", "ptr"), I("LOAD_CONST", rep), I("BINARY_ADD"), I("STORE_FAST", "ptr"), ] cs.extend(codes) elif c == '+' or c == '-': # *ptr += rep if c == '-': rep = -rep codes = [ I("LOAD_FAST", "mem"), I("LOAD_FAST", "ptr"), I("DUP_TOP_TWO"), I("BINARY_SUBSCR"), I("LOAD_CONST", rep), I("BINARY_ADD"), I("LOAD_CONST", 0xff), I("BINARY_AND"), I("ROT_THREE"), I("STORE_SUBSCR"), ] cs.extend(codes) elif c == '0': # *ptr = 0 codes = [ I("LOAD_CONST", 0), I("LOAD_FAST", "mem"), I("LOAD_FAST", "ptr"), I("STORE_SUBSCR") ] cs.extend(codes) elif c == '.': codes = [ I("LOAD_FAST", "sys_write"), I("LOAD_FAST", "mem"), I("LOAD_FAST", "ptr"), I("BINARY_SUBSCR"), I("LOAD_CONST", rep), I("CALL_FUNCTION", 2), I("POP_TOP") ] cs.extend(codes) elif c == ',': codes = [ I("LOAD_FAST", "sys_read"), I("LOAD_CONST", rep), I("CALL_FUNCTION", 1), I("LOAD_FAST", "mem"), I("LOAD_FAST", "ptr"), I("STORE_SUBSCR") ] cs.extend(codes) elif c == '[': start_label = Label() codes = [ start_label, I("LOAD_FAST", "mem"), I("LOAD_FAST", "ptr"), I("BINARY_SUBSCR"), I("LOAD_CONST", 0), I("COMPARE_OP", Compare.EQ), None, ] cs.extend(codes) labels.append(start_label) patches.append(len(cs) - 1) elif c == ']': start_label = labels.pop() end_label = Label() pp = patches.pop() cs[pp] = I("POP_JUMP_IF_TRUE", end_label) codes = [ end_label, I("LOAD_FAST", "mem"), I("LOAD_FAST", "ptr"), I("BINARY_SUBSCR"), I("LOAD_CONST", 0), I("COMPARE_OP", Compare.EQ), I("POP_JUMP_IF_FALSE", start_label) ] cs.extend(codes) cs.extend([I("LOAD_CONST", None), I("RETURN_VALUE")]) assert not labels and not patches for x in cs: print(x) def sys_write(c, rep): sys.stdout.write(chr(c) * rep) sys.stdout.flush() def sys_read(rep): data = sys.stdin.read(rep) return ord(data[-1]) & 0xff mem_size = 10**6 env = dict(mem=array.array('B', [0] * mem_size), ptr=0, sys_read=sys_read, sys_write=sys_write) load_cs = [] for k in env: load_cs.extend([I("LOAD_NAME", k), I("STORE_FAST", k)]) bc = Bytecode(load_cs + cs) bc.flags = CompilerFlags(CompilerFlags.OPTIMIZED) code = PeepholeOptimizer().optimize(bc.to_code()) exec(code, {}, env)
def modify(self, code, *, inner=False): initial_bytecode = Bytecode.from_code(code) modified_bytecode = Bytecode() modified_bytecode.first_lineno = initial_bytecode.first_lineno modified_bytecode.argcount = code.co_argcount modified_bytecode.argnames = initial_bytecode.argnames modified_bytecode.name = initial_bytecode.name modified_bytecode.freevars = code.co_freevars modified_bytecode.cellvars = code.co_cellvars first_line_no = initial_bytecode.first_lineno if inner: modified_bytecode.extend([ Instr('LOAD_NAME', arg=self._command, lineno=first_line_no), Instr('LOAD_CONST', arg=DebugCommand.STEP_OVER, lineno=first_line_no), Instr('COMPARE_OP', arg=Compare.EQ, lineno=first_line_no), Instr('STORE_NAME', arg='is_over', lineno=first_line_no), ]) # добавляем инструкции отладки перед первой строкой модуля if not inner: modified_bytecode.extend( self._get_trace_func_call_instructions(first_line_no)) previous_line_no = first_line_no for instr in initial_bytecode: if not isinstance(instr, Instr): modified_bytecode.append(instr) continue if isinstance(instr.arg, types.CodeType): old_instr_name = instr.name new_co = self.modify(instr.arg, inner=True) instr.set(old_instr_name, new_co) skip = Label() if instr.lineno != previous_line_no: if inner: modified_bytecode.extend([ Instr('LOAD_NAME', arg='is_over', lineno=instr.lineno), Instr('POP_JUMP_IF_TRUE', arg=skip, lineno=instr.lineno) ]) modified_bytecode.extend([ Instr('LOAD_NAME', arg=self._command, lineno=instr.lineno), Instr('LOAD_CONST', arg=DebugCommand.STEP_OUT, lineno=instr.lineno), Instr('COMPARE_OP', arg=Compare.EQ, lineno=instr.lineno), Instr('POP_JUMP_IF_TRUE', arg=skip, lineno=instr.lineno) ]) modified_bytecode.extend( self._get_trace_func_call_instructions(instr.lineno)) if inner: modified_bytecode.append(skip) previous_line_no = instr.lineno modified_bytecode.append(instr) code = modified_bytecode.to_code() return code