def load_cache(path): """ Given a dataset path, loads the programs from it to a form returned by gen_programs(): A dict with programs as keys and examples as values """ lines = [json.loads(x) for x in open(path, 'r').readlines()] examples = {} for i, line in enumerate(lines): print("\rLoading program cache... %d\\%d" % (i, len(lines)), end="") program = Program.parse(line['program']) p_examples = Example.from_line(line) p_examples = [(ex.inputs, ex.output) for ex in p_examples] examples[program] = p_examples print('') return examples
def generate_prog_data(line): data = json.loads(line.rstrip()) examples = Example.from_line(data) env = ProgramEnv(examples) program = Program.parse(data['program']) inputs = [] statements = [] drop = [] operators = [] for i, statement in enumerate(program.statements): inputs.append(env.get_encoding()) # Translate absolute indices to post-drop indices f, args = statement.function, list(statement.args) for j, arg in enumerate(args): if isinstance(arg, int): args[j] = env.real_var_idxs.index(arg) statement = Statement(f, args) statements.append(statement_to_index[statement]) used_args = [] for next_statement in program.statements[i:]: used_args += [x for x in next_statement.args if isinstance(x, int)] to_drop = [] for j in range(params.max_program_vars): if j >= env.num_vars or env.real_var_idxs[j] not in used_args: to_drop.append(1) else: to_drop.append(0) drop.append(to_drop) operator = Operator.from_statement(statement) operators.append(operator_to_index[operator]) if env.num_vars < params.max_program_vars: env.step(statement) else: # Choose a random var (that is not used anymore) to drop. env.step(statement, random.choice([j for j in range(len(to_drop)) if to_drop[j] > 0])) return inputs, statements, drop, operators
def solve_problem_worker(data): examples = Example.from_line(data) env = ProgramEnv(examples) if method == 'beam': solution = cab(env, max_program_len, model, params.cab_beam_size, params.cab_width, params.cab_width_growth, timeout, max_beam_size=max_beam_size) elif method == 'dfs': solution = dfs(env, max_program_len, model, params.dfs_max_width, timeout) counter.value += 1 print("\rSolving problems... %d (failed: %d)" % (counter.value, fail_counter.value), end="") if solution['result'] is False: solution['result'] = "Failed" fail_counter.value += 1 else: values = [Value.construct(x) for x in data['examples'][0]['inputs']] value_types = [x.type for x in values] solution['result'] = Program(value_types, solution['result']).encode() return solution
def solve_problem_worker(args): line, input, input_lens, output, output_lens, input_masks, output_masks = args examples = Example.from_line(line) sol = Program.parse(line['program']) env = ProgramEnv(examples) res = robustfill_cab(env, program_len, model, 100, 48, timeout, input, input_lens, output, output_lens, input_masks, output_masks) counter.value += 1 print("\rSolving problems... %d (failed: %d)" % (counter.value, fail_counter.value), end="") if res['result'] is None: res['result'] = "Failed" fail_counter.value += 1 return res else: res['result'] = str(Program(sol.input_types, res['result'])) return res
def generate_prog_data(line): data = json.loads(line.rstrip()) examples = Example.from_line(data) program = Program.parse(data['program']) prog_data = dict(input=[], input_lens=[], output=[], output_lens=[]) input_padding_mask = np.zeros((len(examples), io_max_seq_len)) output_padding_mask = np.zeros((len(examples), io_max_seq_len)) for i, example in enumerate(examples): example_input = [] for inp in example.inputs: example_input += var_to_tokens(inp) input_len = pad_seq(example_input, io_max_seq_len) prog_data['input_lens'].append(input_len) input_padding_mask[i, :input_len] = 1 prog_data['input'].append(np.array(example_input)) example_output = var_to_tokens(example.output) output_len = pad_seq(example_output, io_max_seq_len) prog_data['output_lens'].append(output_len) prog_data['output'].append(example_output) output_padding_mask[i, :output_len] = 1 prog_data['target'] = program_to_tokens(program) prog_data['target_len'] = pad_seq(prog_data['target'], program_max_seq_len) dec_padding_mask = np.zeros(program_max_seq_len) for i in range(prog_data['target_len']): dec_padding_mask[i] = 1 for k, v in prog_data.items(): prog_data[k] = np.array(v) prog_data['input_padding_mask'] = input_padding_mask prog_data['output_padding_mask'] = output_padding_mask prog_data['dec_padding_mask'] = dec_padding_mask return prog_data