def test_encode_example(self): config = ArithmeticRepeatsConfig(base=10, length=5) python_source = program_generators.generate_python_source( config.length, config) cfg = python_programs.to_cfg(python_source) program_encoder = encoders.get_program_encoder(config) feature = control_flow_graph_feature.ControlFlowGraphFeature( include_back_edges=False, encoder=program_encoder) feature.encode_example((cfg, python_source))
def init(python_object, info, config): """python_object -> state, reward.""" base = info.program_generator_config.base tokens_per_statement = info.program_encoder.tokens_per_statement target_output_length = info.program_generator_config.num_digits mod = info.program_generator_config.mod output_mod = info.program_generator_config.output_mod executor = python_interpreter.ExecExecutor() if isinstance(python_object, tuple): python_source, partial_python_source = python_object cfg = python_programs.to_cfg(partial_python_source) else: python_source = python_object cfg = python_programs.to_cfg(python_source) # Run until branch decision, collecting simple statements. # TODO(dbieber): This should occur in exactly one location. # (also in control_flow_programs_features.py) initial_values = {'v0': 1} values, instructions, branches = ( python_interpreter.evaluate_until_branch_decision( executor, cfg.start_block, mod=base**target_output_length, values=initial_values)) state = dict( initial_values=initial_values, values=values, instructions=instructions, branches=branches, base=base, tokens_per_statement=tokens_per_statement, target_output_length=target_output_length, mod=mod, output_mod=output_mod, executor=executor, config=config, step=0, ) reward = 0.0 return state, reward
def test_evaluate_cfg_mod_n(self): initial_value = 1 # v0 = 1 initial_values = {'v0': initial_value} python_source = """ v0 += 5 v0 *= 6 v0 -= 3 """.strip() cfg = python_programs.to_cfg(python_source) final_values = python_interpreter.evaluate_cfg( self.executor, cfg, mod=5, initial_values=initial_values) final_value = final_values['v0'] self.assertEqual(3, final_value)
def test_evaluate_cfg_break(self): initial_value = 1 # v0 = 1 initial_values = {'v0': initial_value} python_source = """ v1 = 2 while v1 > 0: v1 -= 1 v0 += 2 break """.strip() cfg = python_programs.to_cfg(python_source) values = python_interpreter.evaluate_cfg(self.executor, cfg, initial_values=initial_values) self.assertEqual(values['v0'], 3)
def test_evaluate_cfg_while(self): initial_value = 1 # v0 = 1 initial_values = {'v0': initial_value} python_source = """ v0 += 5 v0 *= 6 v1 = 5 while v1 > 0: v1 -= 1 v0 -= 3 """.strip() cfg = python_programs.to_cfg(python_source) final_values = python_interpreter.evaluate_cfg( self.executor, cfg, initial_values=initial_values) final_value = final_values['v0'] self.assertEqual(21, final_value)
def main(argv): del argv # Unused. config = arithmetic_repeats_config.ArithmeticRepeatsConfig( base=10, length=30, max_repeat_statements=10, max_repetitions=9, max_repeat_block_size=20, repeat_probability=0.2, permit_nested_repeats=True, ) python_source = program_generators.generate_python_source( config.length, config) cfg = python_programs.to_cfg(python_source) num_graphs = len(os.listdir('/tmp/control_flow_graphs/')) path = '/tmp/control_flow_graphs/cfg{:03d}.png'.format(num_graphs) control_flow_graphviz.render(cfg, include_src=python_source, path=path)
def test_evaluate_cfg_on_random_program(self): initial_value = 1 # v0 = 1 initial_values = {'v0': initial_value} config = arithmetic_repeats_config.ArithmeticRepeatsConfig( base=10, length=10, max_repeat_statements=10, max_repetitions=9, max_repeat_block_size=5, repeat_probability=0.2, permit_nested_repeats=True, ) python_source = program_generators.generate_python_source( config.length, config) cfg = python_programs.to_cfg(python_source) values = python_interpreter.evaluate_cfg(self.executor, cfg, initial_values=initial_values) self.assertIn('v0', values)
def test_get_adjacency_matrix(self): cfg = python_programs.to_cfg(""" v1 = 0 while v1 > 1: v1 -= 2 v0 += 3 """) adj = control_flow_graph_feature.get_adjacency_matrix(cfg.nodes, 4) logging.info(adj) self.assertListEqual( adj.T.tolist(), [ [0., 1., 0., 0., 0.], # 0 -> 1 [0., 0., 1., 0., 1.], # 1 -> {2, 4} [0., 0., 0., 1., 0.], # 2 -> 3 [0., 1., 0., 0., 0.], # 3 -> 1 [0., 0., 0., 0., 1.] ] # 4 -> 4 (exit node) )
def test_get_branch_list_from_nodes(self): cfg = python_programs.to_cfg(""" v1 = 2 while v1 > 0: v1 -= 1 v0 += 2 v0 -= 1 v0 *= 4 """) branch_list = control_flow_graph_feature.get_branch_list(cfg.nodes, 6) self.assertEqual( branch_list, [ [1, 1], # v1 = 2 [2, 6], # while v1 > 0: [3, 3], # v1 -= 1 [4, 4], # v0 += 2 [5, 5], # v0 -= 1 [1, 1], # v0 *= 4 [6, 6] ] # exit-node )
def test_get_adjacency_list(self): cfg = python_programs.to_cfg(""" v1 = 2 while v1 > 0: v1 -= 1 v0 += 2 v0 -= 1 v0 *= 4 """) adj = control_flow_graph_feature.get_adjacency_list(cfg.nodes, 6) self.assertEqual( adj, [ [1, 0], [2, 1], [6, 1], # while to exit [3, 2], [4, 3], [5, 4], [1, 5], [6, 6] ] # exit to exit )
def test_evaluate_cfg_nested_while_trace(self): initial_value = 1 # v0 = 1 initial_values = {'v0': initial_value} python_source = """ v0 += 49 v1 = 2 while v1 > 0: v1 -= 1 v2 = 2 while v2 > 0: v2 -= 1 v0 -= 3 v0 += 1 """.strip() cfg = python_programs.to_cfg(python_source) trace_fn = python_interpreter_trace.make_trace_fn(python_source, cfg) final_values = python_interpreter.evaluate_cfg( self.executor, cfg, initial_values=initial_values, trace_fn=trace_fn) final_value = final_values['v0'] expected_trace_cfg_node_indexes = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 5, 6, 7, 8, 5, 2, 3, 4, 5, 6, 7, 8, 5, 6, 7, 8, 5, 2 ] expected_trace_values = [ [{ 'v0': 50 }], [{ 'v0': 50, 'v1': 2 }], [{ 'v0': 50, 'v1': 2, 'vBranch': True }], [{ 'v0': 50, 'v1': 1, 'vBranch': True }], [{ 'v0': 50, 'v1': 1, 'v2': 2, 'vBranch': True }], [{ 'v0': 50, 'v1': 1, 'v2': 2, 'vBranch': True }], [{ 'v0': 50, 'v1': 1, 'v2': 1, 'vBranch': True }], [{ 'v0': 47, 'v1': 1, 'v2': 1, 'vBranch': True }], [{ 'v0': 48, 'v1': 1, 'v2': 1, 'vBranch': True }], [{ 'v0': 48, 'v1': 1, 'v2': 1, 'vBranch': True }], [{ 'v0': 48, 'v1': 1, 'v2': 0, 'vBranch': True }], [{ 'v0': 45, 'v1': 1, 'v2': 0, 'vBranch': True }], [{ 'v0': 46, 'v1': 1, 'v2': 0, 'vBranch': True }], [{ 'v0': 46, 'v1': 1, 'v2': 0, 'vBranch': False }], [{ 'v0': 46, 'v1': 1, 'v2': 0, 'vBranch': True }], [{ 'v0': 46, 'v1': 0, 'v2': 0, 'vBranch': True }], [{ 'v0': 46, 'v1': 0, 'v2': 2, 'vBranch': True }], [{ 'v0': 46, 'v1': 0, 'v2': 2, 'vBranch': True }], [{ 'v0': 46, 'v1': 0, 'v2': 1, 'vBranch': True }], [{ 'v0': 43, 'v1': 0, 'v2': 1, 'vBranch': True }], [{ 'v0': 44, 'v1': 0, 'v2': 1, 'vBranch': True }], [{ 'v0': 44, 'v1': 0, 'v2': 1, 'vBranch': True }], [{ 'v0': 44, 'v1': 0, 'v2': 0, 'vBranch': True }], [{ 'v0': 41, 'v1': 0, 'v2': 0, 'vBranch': True }], [{ 'v0': 42, 'v1': 0, 'v2': 0, 'vBranch': True }], [{ 'v0': 42, 'v1': 0, 'v2': 0, 'vBranch': False }], [{ 'v0': 42, 'v1': 0, 'v2': 0, 'vBranch': False }], ] expected_cfg_node_index_values = [ [{ 'v0': 50 }], # v0 += 49 [{ 'v0': 50, 'v1': 2 }], # v1 = 2 [ { 'v0': 50, 'v1': 2, 'vBranch': True }, # vBranch = (v1 > 0) { 'v0': 46, 'v1': 1, 'v2': 0, 'vBranch': True }, { 'v0': 42, 'v1': 0, 'v2': 0, 'vBranch': False } ], [{ 'v0': 50, 'v1': 1, 'vBranch': True }, { 'v0': 46, 'v1': 0, 'v2': 0, 'vBranch': True }], # v1 -= 1 [{ 'v0': 50, 'v1': 1, 'v2': 2, 'vBranch': True }, { 'v0': 46, 'v1': 0, 'v2': 2, 'vBranch': True }], # v2 = 2 [{ 'v0': 50, 'v1': 1, 'v2': 2, 'vBranch': True }, { 'v0': 48, 'v1': 1, 'v2': 1, 'vBranch': True }, { 'v0': 46, 'v1': 1, 'v2': 0, 'vBranch': False }, { 'v0': 46, 'v1': 0, 'v2': 2, 'vBranch': True }, { 'v0': 44, 'v1': 0, 'v2': 1, 'vBranch': True }, { 'v0': 42, 'v1': 0, 'v2': 0, 'vBranch': False }], # vBranch = (v2 > 0) [{ 'v0': 50, 'v1': 1, 'v2': 1, 'vBranch': True }, { 'v0': 48, 'v1': 1, 'v2': 0, 'vBranch': True }, { 'v0': 46, 'v1': 0, 'v2': 1, 'vBranch': True }, { 'v0': 44, 'v1': 0, 'v2': 0, 'vBranch': True }], # v2 -= 1 [{ 'v0': 47, 'v1': 1, 'v2': 1, 'vBranch': True }, { 'v0': 45, 'v1': 1, 'v2': 0, 'vBranch': True }, { 'v0': 43, 'v1': 0, 'v2': 1, 'vBranch': True }, { 'v0': 41, 'v1': 0, 'v2': 0, 'vBranch': True }], # v0 -= 3 [{ 'v0': 48, 'v1': 1, 'v2': 1, 'vBranch': True }, { 'v0': 46, 'v1': 1, 'v2': 0, 'vBranch': True }, { 'v0': 44, 'v1': 0, 'v2': 1, 'vBranch': True }, { 'v0': 42, 'v1': 0, 'v2': 0, 'vBranch': True }], # v0 += 1 ] self.assertEqual(trace_fn.trace.trace_cfg_node_indexes, expected_trace_cfg_node_indexes) self.assertEqual(trace_fn.trace.trace_values, expected_trace_values) self.assertEqual(trace_fn.trace.cfg_node_index_values, expected_cfg_node_index_values) self.assertEqual(final_value, 42)
def test_evaluate_cfg_trace(self): initial_value = 1 # v0 = 1 initial_values = {'v0': initial_value} python_source = """ v0 += 5 v0 *= 6 v1 = 2 while v1 > 0: v1 -= 1 v0 -= 3 v0 += 1 """.strip() cfg = python_programs.to_cfg(python_source) trace_fn = python_interpreter_trace.make_trace_fn(python_source, cfg) final_values = python_interpreter.evaluate_cfg( self.executor, cfg, initial_values=initial_values, trace_fn=trace_fn) final_value = final_values['v0'] expected_trace_cfg_node_indexes = [0, 1, 2, 3, 4, 5, 6, 3, 4, 5, 6, 3] expected_trace_line_indexes = expected_trace_cfg_node_indexes.copy() expected_trace_values = [ [{ 'v0': 6 }], [{ 'v0': 36 }], [{ 'v0': 36, 'v1': 2 }], [{ 'v0': 36, 'v1': 2, 'vBranch': True }], [{ 'v0': 36, 'v1': 1, 'vBranch': True }], [{ 'v0': 33, 'v1': 1, 'vBranch': True }], [{ 'v0': 34, 'v1': 1, 'vBranch': True }], [{ 'v0': 34, 'v1': 1, 'vBranch': True }], [{ 'v0': 34, 'v1': 0, 'vBranch': True }], [{ 'v0': 31, 'v1': 0, 'vBranch': True }], [{ 'v0': 32, 'v1': 0, 'vBranch': True }], [{ 'v0': 32, 'v1': 0, 'vBranch': False }], ] expected_cfg_node_index_values = [ [{ 'v0': 6 }], # v0 += 5 [{ 'v0': 36 }], # v0 *= 6 [{ 'v0': 36, 'v1': 2 }], # v1 = 2 [ { 'v0': 36, 'v1': 2, 'vBranch': True }, # vBranch = (v1 > 0) { 'v0': 34, 'v1': 1, 'vBranch': True }, { 'v0': 32, 'v1': 0, 'vBranch': False } ], [ { 'v0': 36, 'v1': 1, 'vBranch': True }, # v1 -= 1 { 'v0': 34, 'v1': 0, 'vBranch': True } ], [ { 'v0': 33, 'v1': 1, 'vBranch': True }, # v0 -= 3 { 'v0': 31, 'v1': 0, 'vBranch': True } ], [ { 'v0': 34, 'v1': 1, 'vBranch': True }, # v0 += 1 { 'v0': 32, 'v1': 0, 'vBranch': True } ], ] expected_line_index_values = expected_cfg_node_index_values self.assertEqual(final_value, 32) self.assertEqual(trace_fn.trace.trace_values, expected_trace_values) self.assertEqual(trace_fn.trace.trace_cfg_node_indexes, expected_trace_cfg_node_indexes) self.assertEqual(trace_fn.trace.trace_line_indexes, expected_trace_line_indexes) self.assertEqual(trace_fn.trace.cfg_node_index_values, expected_cfg_node_index_values) self.assertEqual(trace_fn.trace.line_index_values, expected_line_index_values)
def _generate_example_from_python_source(executor, base, python_source, tokens_per_statement, target_output_length, mod, output_mod): """Generates an example dict from the given statements.""" human_readable_code = python_source cfg = python_programs.to_cfg(python_source) python_source_lines = python_source.strip().split("\n") # TODO(dbieber): This should occur in exactly one location. # (also in environment.py) values = {"v0": 1} trace_fn = python_interpreter_trace.make_trace_fn(python_source, cfg) # TODO(dbieber): Evaluating may have already occurred in environment. try: values = python_interpreter.evaluate_cfg(executor, cfg, mod=mod, initial_values=values, trace_fn=trace_fn, timeout=200) error_type = "NoError" except Exception as e: # pylint: disable=broad-except error_type = type(e).__name__ target_output = values["v0"] if output_mod is not None: try: target_output %= output_mod except TypeError: target_output = 1 code_features = build_representation( python_source, trace_fn.trace.cfg_node_index_values, trace_fn.trace.cfg_node_index_branch_decisions, tokens_per_statement, base, target_output_length, output_mod) use_full_lines_in_trace = False if use_full_lines_in_trace: trace_lines = [ python_source_lines[line_index] for line_index in trace_fn.trace.trace_line_indexes ] trace_python_source = "\n".join(trace_lines) else: trace_control_flow_nodes = [ cfg.nodes[cfg_node_index] for cfg_node_index in trace_fn.trace.trace_cfg_node_indexes ] # TODO(dbieber): This also occurs in environment `state_as_example`. # Refactor. python_source_lines = [] for control_flow_node in trace_control_flow_nodes: ast_node = control_flow_node.instruction.node python_source_line = astunparse.unparse(ast_node, version_info=(3, 5)) python_source_line = python_source_line.strip() python_source_lines.append(python_source_line) trace_python_source = "\n".join(python_source_lines) trace_features = build_representation( trace_python_source, trace_fn.trace.trace_values, trace_fn.trace.trace_branch_decisions, tokens_per_statement, base, target_output_length, output_mod) target_output_list = encoders.as_nary_list(target_output, base, target_output_length) lm_text = f"{human_readable_code} SEP {target_output}" example_dict = { # human_readable_features "human_readable_code": human_readable_code, # "original_human_readable_code": human_readable_code, "human_readable_target_output": str(target_output), # target_output "target_output": target_output_list, "target_output_length": target_output_length, "lm_text": lm_text, "error_type": error_type, # control flow graph "cfg": (cfg, python_source), "cfg_forward": (cfg, python_source), } example_dict.update( {"code_" + key: value for key, value in code_features.items()}) example_dict.update( {"trace_" + key: value for key, value in trace_features.items()}) return example_dict