def test_to_bytecode(self): # if test: # x = 2 # x = 5 blocks = ControlFlowGraph() blocks.add_block() blocks.add_block() blocks[0].extend([Instr('LOAD_NAME', 'test', lineno=1), Instr('POP_JUMP_IF_FALSE', blocks[2], lineno=1)]) blocks[1].extend([Instr('LOAD_CONST', 5, lineno=2), Instr('STORE_NAME', 'x', lineno=2), Instr('JUMP_FORWARD', blocks[2], lineno=2)]) blocks[2].extend([Instr('LOAD_CONST', 7, lineno=3), Instr('STORE_NAME', 'x', lineno=3), Instr('LOAD_CONST', None, lineno=3), Instr('RETURN_VALUE', lineno=3)]) bytecode = blocks.to_bytecode() label = Label() self.assertEqual(bytecode, [Instr('LOAD_NAME', 'test', lineno=1), Instr('POP_JUMP_IF_FALSE', label, lineno=1), Instr('LOAD_CONST', 5, lineno=2), Instr('STORE_NAME', 'x', lineno=2), Instr('JUMP_FORWARD', label, lineno=2), label, Instr('LOAD_CONST', 7, lineno=3), Instr('STORE_NAME', 'x', lineno=3), Instr('LOAD_CONST', None, lineno=3), Instr('RETURN_VALUE', lineno=3)])
def test_return_value(self): # return+return: remove second return # # def func(): # return 4 # return 5 code = Bytecode([ Instr('LOAD_CONST', 4, lineno=2), Instr('RETURN_VALUE', lineno=2), Instr('LOAD_CONST', 5, lineno=3), Instr('RETURN_VALUE', lineno=3) ]) code = ControlFlowGraph.from_bytecode(code) self.check(code, Instr('LOAD_CONST', 4, lineno=2), Instr('RETURN_VALUE', lineno=2)) # return+return + return+return: remove second and fourth return # # def func(): # return 4 # return 5 # return 6 # return 7 code = Bytecode([ Instr('LOAD_CONST', 4, lineno=2), Instr('RETURN_VALUE', lineno=2), Instr('LOAD_CONST', 5, lineno=3), Instr('RETURN_VALUE', lineno=3), Instr('LOAD_CONST', 6, lineno=4), Instr('RETURN_VALUE', lineno=4), Instr('LOAD_CONST', 7, lineno=5), Instr('RETURN_VALUE', lineno=5) ]) code = ControlFlowGraph.from_bytecode(code) self.check(code, Instr('LOAD_CONST', 4, lineno=2), Instr('RETURN_VALUE', lineno=2)) # return + JUMP_ABSOLUTE: remove JUMP_ABSOLUTE # while 1: # return 7 setup_loop = Label() return_label = Label() code = Bytecode([ setup_loop, Instr('SETUP_LOOP', return_label, lineno=2), Instr('LOAD_CONST', 7, lineno=3), Instr('RETURN_VALUE', lineno=3), Instr('JUMP_ABSOLUTE', setup_loop, lineno=3), Instr('POP_BLOCK', lineno=3), return_label, Instr('LOAD_CONST', None, lineno=3), Instr('RETURN_VALUE', lineno=3) ]) code = ControlFlowGraph.from_bytecode(code) end_loop = Label() self.check(code, Instr('SETUP_LOOP', end_loop, lineno=2), Instr('LOAD_CONST', 7, lineno=3), Instr('RETURN_VALUE', lineno=3), end_loop, Instr('LOAD_CONST', None, lineno=3), Instr('RETURN_VALUE', lineno=3))
def test_return_value(self): # return+return: remove second return # # def func(): # return 4 # return 5 code = Bytecode([Instr('LOAD_CONST', 4, lineno=2), Instr('RETURN_VALUE', lineno=2), Instr('LOAD_CONST', 5, lineno=3), Instr('RETURN_VALUE', lineno=3)]) code = ControlFlowGraph.from_bytecode(code) self.check(code, Instr('LOAD_CONST', 4, lineno=2), Instr('RETURN_VALUE', lineno=2)) # return+return + return+return: remove second and fourth return # # def func(): # return 4 # return 5 # return 6 # return 7 code = Bytecode([Instr('LOAD_CONST', 4, lineno=2), Instr('RETURN_VALUE', lineno=2), Instr('LOAD_CONST', 5, lineno=3), Instr('RETURN_VALUE', lineno=3), Instr('LOAD_CONST', 6, lineno=4), Instr('RETURN_VALUE', lineno=4), Instr('LOAD_CONST', 7, lineno=5), Instr('RETURN_VALUE', lineno=5)]) code = ControlFlowGraph.from_bytecode(code) self.check(code, Instr('LOAD_CONST', 4, lineno=2), Instr('RETURN_VALUE', lineno=2)) # return + JUMP_ABSOLUTE: remove JUMP_ABSOLUTE # while 1: # return 7 setup_loop = Label() return_label = Label() code = Bytecode([setup_loop, Instr('SETUP_LOOP', return_label, lineno=2), Instr('LOAD_CONST', 7, lineno=3), Instr('RETURN_VALUE', lineno=3), Instr('JUMP_ABSOLUTE', setup_loop, lineno=3), Instr('POP_BLOCK', lineno=3), return_label, Instr('LOAD_CONST', None, lineno=3), Instr('RETURN_VALUE', lineno=3)]) code = ControlFlowGraph.from_bytecode(code) end_loop = Label() self.check(code, Instr('SETUP_LOOP', end_loop, lineno=2), Instr('LOAD_CONST', 7, lineno=3), Instr('RETURN_VALUE', lineno=3), end_loop, Instr('LOAD_CONST', None, lineno=3), Instr('RETURN_VALUE', lineno=3))
def test_get_block_index(self): blocks = ControlFlowGraph() block0 = blocks[0] block1 = blocks.add_block() block2 = blocks.add_block() self.assertEqual(blocks.get_block_index(block0), 0) self.assertEqual(blocks.get_block_index(block1), 1) self.assertEqual(blocks.get_block_index(block2), 2) other_block = BasicBlock() self.assertRaises(ValueError, blocks.get_block_index, other_block)
def test_to_bytecode(self): # if test: # x = 2 # x = 5 blocks = ControlFlowGraph() blocks.add_block() blocks.add_block() blocks[0].extend( [ Instr("LOAD_NAME", "test", lineno=1), Instr("POP_JUMP_IF_FALSE", blocks[2], lineno=1), ] ) blocks[1].extend( [ Instr("LOAD_CONST", 5, lineno=2), Instr("STORE_NAME", "x", lineno=2), Instr("JUMP_FORWARD", blocks[2], lineno=2), ] ) blocks[2].extend( [ Instr("LOAD_CONST", 7, lineno=3), Instr("STORE_NAME", "x", lineno=3), Instr("LOAD_CONST", None, lineno=3), Instr("RETURN_VALUE", lineno=3), ] ) bytecode = blocks.to_bytecode() label = Label() self.assertEqual( bytecode, [ Instr("LOAD_NAME", "test", lineno=1), Instr("POP_JUMP_IF_FALSE", label, lineno=1), Instr("LOAD_CONST", 5, lineno=2), Instr("STORE_NAME", "x", lineno=2), Instr("JUMP_FORWARD", label, lineno=2), label, Instr("LOAD_CONST", 7, lineno=3), Instr("STORE_NAME", "x", lineno=3), Instr("LOAD_CONST", None, lineno=3), Instr("RETURN_VALUE", lineno=3), ], )
def test_setlineno(self): # x = 7 # y = 8 # z = 9 code = Bytecode() code.first_lineno = 3 code.extend([Instr("LOAD_CONST", 7), Instr("STORE_NAME", 'x'), SetLineno(4), Instr("LOAD_CONST", 8), Instr("STORE_NAME", 'y'), SetLineno(5), Instr("LOAD_CONST", 9), Instr("STORE_NAME", 'z')]) blocks = ControlFlowGraph.from_bytecode(code) self.assertBlocksEqual(blocks, [Instr("LOAD_CONST", 7), Instr("STORE_NAME", 'x'), SetLineno(4), Instr("LOAD_CONST", 8), Instr("STORE_NAME", 'y'), SetLineno(5), Instr("LOAD_CONST", 9), Instr("STORE_NAME", 'z')])
def test_from_bytecode(self): bytecode = Bytecode() label = Label() bytecode.extend([Instr('LOAD_NAME', 'test', lineno=1), Instr('POP_JUMP_IF_FALSE', label, lineno=1), Instr('LOAD_CONST', 5, lineno=2), Instr('STORE_NAME', 'x', lineno=2), Instr('JUMP_FORWARD', label, lineno=2), # dead code! Instr('LOAD_CONST', 7, lineno=4), Instr('STORE_NAME', 'x', lineno=4), Label(), # unused label label, Label(), # unused label Instr('LOAD_CONST', None, lineno=4), Instr('RETURN_VALUE', lineno=4)]) blocks = ControlFlowGraph.from_bytecode(bytecode) label2 = blocks[3] self.assertBlocksEqual(blocks, [Instr('LOAD_NAME', 'test', lineno=1), Instr('POP_JUMP_IF_FALSE', label2, lineno=1)], [Instr('LOAD_CONST', 5, lineno=2), Instr('STORE_NAME', 'x', lineno=2), Instr('JUMP_FORWARD', label2, lineno=2)], [Instr('LOAD_CONST', 7, lineno=4), Instr('STORE_NAME', 'x', lineno=4)], [Instr('LOAD_CONST', None, lineno=4), Instr('RETURN_VALUE', lineno=4)])
def check_dont_optimize(self, code): code = ControlFlowGraph.from_bytecode(code) noopt = code.to_bytecode() optim = self.optimize_blocks(code) optim = optim.to_bytecode() self.assertEqual(optim, noopt)
def _create_consecutive_blocks(bytecode_cfg: ControlFlowGraph, first: BasicBlock, amount: int) -> Tuple[BasicBlock, ...]: """Split the given basic block into more blocks. The blocks are consecutive in the list of basic blocks. Args: bytecode_cfg: The control-flow graph first: The first basic block amount: The amount of consecutive blocks that should be created. Returns: A tuple of consecutive basic blocks """ assert amount > 0, "Amount of created basic blocks must be positive." current: BasicBlock = first nodes: List[BasicBlock] = [] # Can be any instruction, as it is discarded anyway. dummy_instruction = Instr("POP_TOP") for _ in range(amount): # Insert dummy instruction, which we can use to split off another block current.insert(0, dummy_instruction) current = bytecode_cfg.split_block(current, 1) nodes.append(current) # Move instructions back to first block. first.clear() first.extend(current) # Clear instructions in all created blocks. for node in nodes: node.clear() return tuple(nodes)
def test_add_del_block(self): code = ControlFlowGraph() code[0].append(Instr("LOAD_CONST", 0)) block = code.add_block() self.assertEqual(len(code), 2) self.assertIs(block, code[1]) code[1].append(Instr("LOAD_CONST", 2)) self.assertBlocksEqual(code, [Instr("LOAD_CONST", 0)], [Instr("LOAD_CONST", 2)]) del code[0] self.assertBlocksEqual(code, [Instr("LOAD_CONST", 2)]) del code[0] self.assertEqual(len(code), 0)
def code_info(cls, code: Bytecode, *, debug_passes=()) -> PyCodeInfo[Repr]: cfg = ControlFlowGraph.from_bytecode(code) current = cls.empty() run_machine(Interpreter(code.first_lineno).abs_i_cfg(cfg), current) glob_deps = tuple(current.globals) instrs = current.instrs instrs = cls.pass_push_pop_inline(instrs) instrs = list(relabel(instrs)) if Options.get('log-stack-vm'): print('DEBUG: stack-vm'.center(20, '=')) show_instrs(instrs) phi_pass_name = Options['phi-pass'] e = None try: phi_pass = { 'phi-elim-by-move': phi_elim, 'keep-phi': phi_keep }[Options['phi-pass']] except KeyError as ke: e = Exception("Unknown phi pass {!r}".format(phi_pass_name)) if e is not None: raise e instrs = list(phi_pass(instrs)) if Options.get('log-phi'): print('DEBUG: phi'.center(20, '=')) show_instrs(instrs) return PyCodeInfo(code.name, tuple(glob_deps), code.argnames, code.freevars, code.cellvars, code.filename, code.first_lineno, code.argcount, code.kwonlyargcount, bool(code.flags & CompilerFlags.GENERATOR), bool(code.flags & CompilerFlags.VARKEYWORDS), bool(code.flags & CompilerFlags.VARARGS), instrs)
def from_bytecode(bytecode: Bytecode) -> CFG: """Generates a new control-flow graph from a bytecode segment. Besides generating a node for each block in the bytecode segment, as returned by `bytecode`'s `ControlFlowGraph` implementation, we add two artificial nodes to the generated CFG: - an artificial entry node, having index -1, that is guaranteed to fulfill the property of an entry node, i.e., there is no incoming edge, and - an artificial exit node, having index `sys.maxsize`, that is guaranteed to fulfill the property of an exit node, i.e., there is no outgoing edge, and that is the only such node in the graph, which is important, e.g., for graph reversal. The index values are chosen that they do not appear in regular graphs, thus one can easily distinguish them from the normal nodes in the graph by checking for their index-property's value. :param bytecode: The bytecode segment :return: The control-flow graph for the segment """ blocks = ControlFlowGraph.from_bytecode(bytecode) cfg = CFG(blocks) # Create the nodes and a mapping of all edges to generate edges, nodes = CFG._create_nodes(blocks) # Insert all edges between the previously generated nodes CFG._create_graph(cfg, edges, nodes) # Filter all dead-code nodes cfg = CFG._filter_dead_code_nodes(cfg) # Insert dummy exit and entry nodes cfg = CFG._insert_dummy_exit_node(cfg) cfg = CFG._insert_dummy_entry_node(cfg) return cfg
def test_legalize(self): code = Bytecode() code.first_lineno = 3 code.extend( [ Instr("LOAD_CONST", 7), Instr("STORE_NAME", "x"), Instr("LOAD_CONST", 8, lineno=4), Instr("STORE_NAME", "y"), SetLineno(5), Instr("LOAD_CONST", 9, lineno=6), Instr("STORE_NAME", "z"), ] ) blocks = ControlFlowGraph.from_bytecode(code) blocks.legalize() self.assertBlocksEqual( blocks, [ Instr("LOAD_CONST", 7, lineno=3), Instr("STORE_NAME", "x", lineno=3), Instr("LOAD_CONST", 8, lineno=4), Instr("STORE_NAME", "y", lineno=4), Instr("LOAD_CONST", 9, lineno=5), Instr("STORE_NAME", "z", lineno=5), ], )
def test_from_bytecode_loop(self): # for x in (1, 2, 3): # if x == 2: # break # continue label_loop_start = Label() label_loop_exit = Label() label_loop_end = Label() code = Bytecode() code.extend((Instr('SETUP_LOOP', label_loop_end, lineno=1), Instr('LOAD_CONST', (1, 2, 3), lineno=1), Instr('GET_ITER', lineno=1), label_loop_start, Instr('FOR_ITER', label_loop_exit, lineno=1), Instr('STORE_NAME', 'x', lineno=1), Instr('LOAD_NAME', 'x', lineno=2), Instr('LOAD_CONST', 2, lineno=2), Instr('COMPARE_OP', Compare.EQ, lineno=2), Instr('POP_JUMP_IF_FALSE', label_loop_start, lineno=2), Instr('BREAK_LOOP', lineno=3), Instr('JUMP_ABSOLUTE', label_loop_start, lineno=4), Instr('JUMP_ABSOLUTE', label_loop_start, lineno=4), label_loop_exit, Instr('POP_BLOCK', lineno=4), label_loop_end, Instr('LOAD_CONST', None, lineno=4), Instr('RETURN_VALUE', lineno=4), )) blocks = ControlFlowGraph.from_bytecode(code) expected = [[Instr('SETUP_LOOP', blocks[8], lineno=1)], [Instr('LOAD_CONST', (1, 2, 3), lineno=1), Instr('GET_ITER', lineno=1)], [Instr('FOR_ITER', blocks[7], lineno=1)], [Instr('STORE_NAME', 'x', lineno=1), Instr('LOAD_NAME', 'x', lineno=2), Instr('LOAD_CONST', 2, lineno=2), Instr('COMPARE_OP', Compare.EQ, lineno=2), Instr('POP_JUMP_IF_FALSE', blocks[2], lineno=2)], [Instr('BREAK_LOOP', lineno=3)], [Instr('JUMP_ABSOLUTE', blocks[2], lineno=4)], [Instr('JUMP_ABSOLUTE', blocks[2], lineno=4)], [Instr('POP_BLOCK', lineno=4)], [Instr('LOAD_CONST', None, lineno=4), Instr('RETURN_VALUE', lineno=4)]] self.assertBlocksEqual(blocks, *expected)
def optimize(self, code_obj): bytecode = Bytecode.from_code(code_obj) cfg = ControlFlowGraph.from_bytecode(bytecode) self._optimize(cfg) bytecode = cfg.to_bytecode() code = bytecode.to_code() return code
def optimize(self, code_obj): bytecode = Bytecode.from_code(code_obj) cfg = ControlFlowGraph.from_bytecode(bytecode) self.optimize_cfg(cfg) bytecode = cfg.to_bytecode() code = bytecode.to_code() return code
def test_from_bytecode_loop(self): # for x in (1, 2, 3): # if x == 2: # break # continue label_loop_start = Label() label_loop_exit = Label() label_loop_end = Label() code = Bytecode() code.extend(( Instr("SETUP_LOOP", label_loop_end, lineno=1), Instr("LOAD_CONST", (1, 2, 3), lineno=1), Instr("GET_ITER", lineno=1), label_loop_start, Instr("FOR_ITER", label_loop_exit, lineno=1), Instr("STORE_NAME", "x", lineno=1), Instr("LOAD_NAME", "x", lineno=2), Instr("LOAD_CONST", 2, lineno=2), Instr("COMPARE_OP", Compare.EQ, lineno=2), Instr("POP_JUMP_IF_FALSE", label_loop_start, lineno=2), Instr("BREAK_LOOP", lineno=3), Instr("JUMP_ABSOLUTE", label_loop_start, lineno=4), Instr("JUMP_ABSOLUTE", label_loop_start, lineno=4), label_loop_exit, Instr("POP_BLOCK", lineno=4), label_loop_end, Instr("LOAD_CONST", None, lineno=4), Instr("RETURN_VALUE", lineno=4), )) blocks = ControlFlowGraph.from_bytecode(code) expected = [ [Instr("SETUP_LOOP", blocks[8], lineno=1)], [ Instr("LOAD_CONST", (1, 2, 3), lineno=1), Instr("GET_ITER", lineno=1) ], [Instr("FOR_ITER", blocks[7], lineno=1)], [ Instr("STORE_NAME", "x", lineno=1), Instr("LOAD_NAME", "x", lineno=2), Instr("LOAD_CONST", 2, lineno=2), Instr("COMPARE_OP", Compare.EQ, lineno=2), Instr("POP_JUMP_IF_FALSE", blocks[2], lineno=2), ], [Instr("BREAK_LOOP", lineno=3)], [Instr("JUMP_ABSOLUTE", blocks[2], lineno=4)], [Instr("JUMP_ABSOLUTE", blocks[2], lineno=4)], [Instr("POP_BLOCK", lineno=4)], [ Instr("LOAD_CONST", None, lineno=4), Instr("RETURN_VALUE", lineno=4) ], ] self.assertBlocksEqual(blocks, *expected)
def _create_nodes( blocks: ControlFlowGraph, ) -> Tuple[Dict[int, List[int]], Dict[int, pg.ProgramGraphNode]]: nodes: Dict[int, pg.ProgramGraphNode] = {} edges: Dict[int, List[int]] = {} for node_index, block in enumerate(blocks): node = pg.ProgramGraphNode(index=node_index, basic_block=block) nodes[node_index] = node if node_index not in edges: edges[node_index] = [] next_block = block.next_block if next_block: next_index = blocks.get_block_index(next_block) edges[node_index].append(next_index) if target_block := block.get_jump(): next_index = blocks.get_block_index(target_block) edges[node_index].append(next_index)
def copy_graph(cfg: CFG) -> CFG: """Provides a copy of the control-flow graph. :param cfg: The original graph :return: The copied graph """ copy = CFG(ControlFlowGraph()) # pylint: disable=attribute-defined-outside-init copy._graph = cfg._graph.copy() return copy
def test_add_del_block(self): code = ControlFlowGraph() code[0].append(Instr('LOAD_CONST', 0)) block = code.add_block() self.assertEqual(len(code), 2) self.assertIs(block, code[1]) code[1].append(Instr('LOAD_CONST', 2)) self.assertBlocksEqual(code, [Instr('LOAD_CONST', 0)], [Instr('LOAD_CONST', 2)]) del code[0] self.assertBlocksEqual(code, [Instr('LOAD_CONST', 2)]) del code[0] self.assertEqual(len(code), 0)
def test_blocks_broken_jump(self): block = BasicBlock() code = ControlFlowGraph() code[0].append(Instr('JUMP_ABSOLUTE', block)) expected = textwrap.dedent(""" block1: JUMP_ABSOLUTE <error: unknown block> """).lstrip("\n") self.check_dump_bytecode(code, expected)
def test_from_bytecode_loop(self): # for x in (1, 2, 3): # if x == 2: # break # continue label_loop_start = Label() label_loop_exit = Label() label_loop_end = Label() code = Bytecode() code.extend(( Instr('SETUP_LOOP', label_loop_end, lineno=1), Instr('LOAD_CONST', (1, 2, 3), lineno=1), Instr('GET_ITER', lineno=1), label_loop_start, Instr('FOR_ITER', label_loop_exit, lineno=1), Instr('STORE_NAME', 'x', lineno=1), Instr('LOAD_NAME', 'x', lineno=2), Instr('LOAD_CONST', 2, lineno=2), Instr('COMPARE_OP', Compare.EQ, lineno=2), Instr('POP_JUMP_IF_FALSE', label_loop_start, lineno=2), Instr('BREAK_LOOP', lineno=3), Instr('JUMP_ABSOLUTE', label_loop_start, lineno=4), Instr('JUMP_ABSOLUTE', label_loop_start, lineno=4), label_loop_exit, Instr('POP_BLOCK', lineno=4), label_loop_end, Instr('LOAD_CONST', None, lineno=4), Instr('RETURN_VALUE', lineno=4), )) blocks = ControlFlowGraph.from_bytecode(code) expected = [[Instr('SETUP_LOOP', blocks[8], lineno=1)], [ Instr('LOAD_CONST', (1, 2, 3), lineno=1), Instr('GET_ITER', lineno=1) ], [Instr('FOR_ITER', blocks[7], lineno=1)], [ Instr('STORE_NAME', 'x', lineno=1), Instr('LOAD_NAME', 'x', lineno=2), Instr('LOAD_CONST', 2, lineno=2), Instr('COMPARE_OP', Compare.EQ, lineno=2), Instr('POP_JUMP_IF_FALSE', blocks[2], lineno=2) ], [Instr('BREAK_LOOP', lineno=3)], [Instr('JUMP_ABSOLUTE', blocks[2], lineno=4)], [Instr('JUMP_ABSOLUTE', blocks[2], lineno=4)], [Instr('POP_BLOCK', lineno=4)], [ Instr('LOAD_CONST', None, lineno=4), Instr('RETURN_VALUE', lineno=4) ]] self.assertBlocksEqual(blocks, *expected)
def check(self, source, function=False): ref_code = get_code(source, function=function) code = ConcreteBytecode.from_code(ref_code).to_code() self.assertEqual(code, ref_code) code = Bytecode.from_code(ref_code).to_code() self.assertEqual(code, ref_code) bytecode = Bytecode.from_code(ref_code) blocks = ControlFlowGraph.from_bytecode(bytecode) code = blocks.to_bytecode().to_code() self.assertEqual(code, ref_code)
def copy_graph(cfg: CFG) -> CFG: """Provides a copy of the control-flow graph. Args: cfg: The original graph Returns: The copied graph """ copy = CFG(ControlFlowGraph() ) # TODO(fk) Cloning the bytecode cfg is complicated. # pylint: disable=attribute-defined-outside-init copy._graph = cfg._graph.copy() return copy
def code_info(cls, code: Bytecode) -> PyCodeInfo[Repr]: cfg = ControlFlowGraph.from_bytecode(code) current = cls.empty() run_machine(Interpreter(code.first_lineno).abs_i_cfg(cfg), current) glob_deps = tuple(current.globals) instrs = current.instrs instrs = current.pass_push_pop_inline(instrs) return PyCodeInfo(code.name, tuple(glob_deps), code.argnames, code.freevars, code.cellvars, code.filename, code.first_lineno, code.argcount, code.kwonlyargcount, bool(code.flags & CompilerFlags.GENERATOR), bool(code.flags & CompilerFlags.VARKEYWORDS), bool(code.flags & CompilerFlags.VARARGS), instrs)
def __init__(self, path: str, module_name='', to_import=['*']): self.path = path self.to_import = to_import self.module_name = module_name self._local_methods = [] source = open(path, 'rb') compiled_source = compile(source.read(), path, 'exec') self.bc = Bytecode.from_code(compiled_source) self.cfg = ControlFlowGraph.from_bytecode(self.bc) source.close() self.build()
def disassemble(source, *, filename="<string>", function=False, remove_last_return_none=False): code = _disassemble(source, filename=filename, function=function) blocks = ControlFlowGraph.from_bytecode(code) if remove_last_return_none: # drop LOAD_CONST+RETURN_VALUE to only keep 2 instructions, # to make unit tests shorter block = blocks[-1] test = (block[-2].name == "LOAD_CONST" and block[-2].arg is None and block[-1].name == "RETURN_VALUE") if not test: raise ValueError("unable to find implicit RETURN_VALUE <None>: %s" % block[-2:]) del block[-2:] return blocks
def test_label_at_the_end(self): label = Label() code = Bytecode([Instr('LOAD_NAME', 'x'), Instr('UNARY_NOT'), Instr('POP_JUMP_IF_FALSE', label), Instr('LOAD_CONST', 9), Instr('STORE_NAME', 'y'), label]) cfg = ControlFlowGraph.from_bytecode(code) self.assertBlocksEqual(cfg, [Instr('LOAD_NAME', 'x'), Instr('UNARY_NOT'), Instr('POP_JUMP_IF_FALSE', cfg[2])], [Instr('LOAD_CONST', 9), Instr('STORE_NAME', 'y')], [])
def check(self, code, *expected): if isinstance(code, Bytecode): code = ControlFlowGraph.from_bytecode(code) optimizer = peephole_opt.PeepholeOptimizer() optimizer.optimize_cfg(code) code = code.to_bytecode() try: self.assertEqual(code, expected) except AssertionError: print("Optimized code:") dump_bytecode(code) print("Expected code:") for instr in expected: print(instr) raise
def check(self, code, *expected): if isinstance(code, Bytecode): code = ControlFlowGraph.from_bytecode(code) optimizer = peephole_opt.PeepholeOptimizer() optimizer._optimize(code) code = code.to_bytecode() try: self.assertEqual(code, expected) except AssertionError: print("Optimized code:") dump_bytecode(code) print("Expected code:") for instr in expected: print(instr) raise
def test_from_bytecode(self): bytecode = Bytecode() label = Label() bytecode.extend([ Instr("LOAD_NAME", "test", lineno=1), Instr("POP_JUMP_IF_FALSE", label, lineno=1), Instr("LOAD_CONST", 5, lineno=2), Instr("STORE_NAME", "x", lineno=2), Instr("JUMP_FORWARD", label, lineno=2), # dead code! Instr("LOAD_CONST", 7, lineno=4), Instr("STORE_NAME", "x", lineno=4), Label(), # unused label label, Label(), # unused label Instr("LOAD_CONST", None, lineno=4), Instr("RETURN_VALUE", lineno=4), ]) blocks = ControlFlowGraph.from_bytecode(bytecode) label2 = blocks[3] self.assertBlocksEqual( blocks, [ Instr("LOAD_NAME", "test", lineno=1), Instr("POP_JUMP_IF_FALSE", label2, lineno=1), ], [ Instr("LOAD_CONST", 5, lineno=2), Instr("STORE_NAME", "x", lineno=2), Instr("JUMP_FORWARD", label2, lineno=2), ], [ Instr("LOAD_CONST", 7, lineno=4), Instr("STORE_NAME", "x", lineno=4) ], [ Instr("LOAD_CONST", None, lineno=4), Instr("RETURN_VALUE", lineno=4) ], )
def test_label_at_the_end(self): label = Label() code = Bytecode([ Instr("LOAD_NAME", "x"), Instr("UNARY_NOT"), Instr("POP_JUMP_IF_FALSE", label), Instr("LOAD_CONST", 9), Instr("STORE_NAME", "y"), label, ]) cfg = ControlFlowGraph.from_bytecode(code) self.assertBlocksEqual( cfg, [ Instr("LOAD_NAME", "x"), Instr("UNARY_NOT"), Instr("POP_JUMP_IF_FALSE", cfg[2]), ], [Instr("LOAD_CONST", 9), Instr("STORE_NAME", "y")], [], )
def __init__(self, path: str, module_name='', to_import=['*']): self.path = path self.to_import = to_import self.module_name = module_name self._local_methods = [] self.abi = None source = open(path, 'rb') source_src = source.read() compiled_source = compile(source_src, path, 'exec') ast_tree = ast.parse(source_src) if module_name == '': self.abi = ABI() self.abi.visit(ast_tree) self.bc = Bytecode.from_code(compiled_source) self.cfg = ControlFlowGraph.from_bytecode(self.bc) source.close() self.build()
def test_eq(self): # compare codes with multiple blocks and labels, # Code.__eq__() renumbers labels to get equal labels source = "x = 1 if test else 2" code1 = disassemble(source) code2 = disassemble(source) self.assertEqual(code1, code2) # Type mismatch self.assertFalse(code1 == 1) # argnames mismatch cfg = ControlFlowGraph() cfg.argnames = 10 self.assertFalse(code1 == cfg) # instr mismatch cfg = ControlFlowGraph() cfg.argnames = code1.argnames self.assertFalse(code1 == cfg)
def test_flag_inference(self): # Check no loss of non-infered flags code = ControlFlowGraph() code.flags |= (CompilerFlags.NEWLOCALS | CompilerFlags.VARARGS | CompilerFlags.VARKEYWORDS | CompilerFlags.NESTED | CompilerFlags.FUTURE_GENERATOR_STOP) code.update_flags() for f in (CompilerFlags.NEWLOCALS, CompilerFlags.VARARGS, CompilerFlags.VARKEYWORDS, CompilerFlags.NESTED, CompilerFlags.NOFREE, CompilerFlags.OPTIMIZED, CompilerFlags.FUTURE_GENERATOR_STOP): self.assertTrue(bool(code.flags & f)) # Infer optimized and nofree code = Bytecode() flags = infer_flags(code) self.assertTrue(bool(flags & CompilerFlags.OPTIMIZED)) self.assertTrue(bool(flags & CompilerFlags.NOFREE)) code.append(ConcreteInstr('STORE_NAME', 1)) flags = infer_flags(code) self.assertFalse(bool(flags & CompilerFlags.OPTIMIZED)) self.assertTrue(bool(flags & CompilerFlags.NOFREE)) code.append(ConcreteInstr('STORE_DEREF', 2)) code.update_flags() self.assertFalse(bool(code.flags & CompilerFlags.OPTIMIZED)) self.assertFalse(bool(code.flags & CompilerFlags.NOFREE)) # Infer generator code = ConcreteBytecode() code.append(ConcreteInstr('YIELD_VALUE')) for is_async, expected in ((False, CompilerFlags.GENERATOR), (True, CompilerFlags.ASYNC_GENERATOR)): self.assertTrue(bool(infer_flags(code, is_async) & expected)) # Infer coroutine code = ConcreteBytecode() code.append(ConcreteInstr('GET_AWAITABLE')) iter_flags = CompilerFlags(CompilerFlags.ITERABLE_COROUTINE) for f, expected in ((CompilerFlags(0), True), (iter_flags, False)): code.flags = f self.assertEqual(bool(infer_flags(code) & CompilerFlags.COROUTINE), expected) # Test check flag sanity code.append(ConcreteInstr('YIELD_VALUE')) code.flags = CompilerFlags(CompilerFlags.GENERATOR | CompilerFlags.COROUTINE) infer_flags(code, is_async=True) # Just want to be sure it pases with self.assertRaises(ValueError): code.update_flags() with self.assertRaises(ValueError): infer_flags(None)
def test_bytecode_blocks(self): source = """ def func(test): if test == 1: return 1 elif test == 2: return 2 return 3 """ code = disassemble(source, function=True) code = ControlFlowGraph.from_bytecode(code) # without line numbers expected = textwrap.dedent(""" block1: LOAD_FAST 'test' LOAD_CONST 1 COMPARE_OP <Compare.EQ: 2> POP_JUMP_IF_FALSE <block3> -> block2 block2: LOAD_CONST 1 RETURN_VALUE block3: LOAD_FAST 'test' LOAD_CONST 2 COMPARE_OP <Compare.EQ: 2> POP_JUMP_IF_FALSE <block5> -> block4 block4: LOAD_CONST 2 RETURN_VALUE block5: LOAD_CONST 3 RETURN_VALUE """).lstrip() self.check_dump_bytecode(code, expected) # with line numbers expected = textwrap.dedent(""" block1: L. 2 0: LOAD_FAST 'test' 1: LOAD_CONST 1 2: COMPARE_OP <Compare.EQ: 2> 3: POP_JUMP_IF_FALSE <block3> -> block2 block2: L. 3 0: LOAD_CONST 1 1: RETURN_VALUE block3: L. 4 0: LOAD_FAST 'test' 1: LOAD_CONST 2 2: COMPARE_OP <Compare.EQ: 2> 3: POP_JUMP_IF_FALSE <block5> -> block4 block4: L. 5 0: LOAD_CONST 2 1: RETURN_VALUE block5: L. 6 0: LOAD_CONST 3 1: RETURN_VALUE """).lstrip() self.check_dump_bytecode(code, expected, lineno=True)
def optimize_blocks(self, code): if isinstance(code, Bytecode): code = ControlFlowGraph.from_bytecode(code) optimizer = peephole_opt.PeepholeOptimizer() optimizer._optimize(code) return code
def test_to_code(self): # test resolution of jump labels bytecode = ControlFlowGraph() bytecode.first_lineno = 3 bytecode.argcount = 3 bytecode.kwonlyargcount = 2 bytecode._stacksize = 1 bytecode.name = 'func' bytecode.filename = 'hello.py' bytecode.flags = 0x43 bytecode.argnames = ('arg', 'arg2', 'arg3', 'kwonly', 'kwonly2') bytecode.docstring = None block0 = bytecode[0] block1 = bytecode.add_block() block2 = bytecode.add_block() block0.extend([Instr('LOAD_FAST', 'x', lineno=4), Instr('POP_JUMP_IF_FALSE', block2, lineno=4)]) block1.extend([Instr('LOAD_FAST', 'arg', lineno=5), Instr('STORE_FAST', 'x', lineno=5)]) block2.extend([Instr('LOAD_CONST', 3, lineno=6), Instr('STORE_FAST', 'x', lineno=6), Instr('LOAD_FAST', 'x', lineno=7), Instr('RETURN_VALUE', lineno=7)]) expected = (b'|\x05\x00' b'r\x0c\x00' b'|\x00\x00' b'}\x05\x00' b'd\x01\x00' b'}\x05\x00' b'|\x05\x00' b'S') code = bytecode.to_bytecode().to_code() self.assertEqual(code.co_consts, (None, 3)) self.assertEqual(code.co_argcount, 3) self.assertEqual(code.co_kwonlyargcount, 2) self.assertEqual(code.co_nlocals, 6) self.assertEqual(code.co_stacksize, 1) # FIXME: don't use hardcoded constants self.assertEqual(code.co_flags, 0x43) self.assertEqual(code.co_code, expected) self.assertEqual(code.co_names, ()) self.assertEqual(code.co_varnames, ('arg', 'arg2', 'arg3', 'kwonly', 'kwonly2', 'x')) self.assertEqual(code.co_filename, 'hello.py') self.assertEqual(code.co_name, 'func') self.assertEqual(code.co_firstlineno, 3)