def gen_program_worker(input_types): """ Generate programs with the given input types. Statements are generated by choosing a function randomly, and then sampling parameters so that unused variables take precedence. Programs that has unused variables are discarded. """ def helper(functions, program, programs): random.shuffle(functions) if progress_counter.value >= num_programs: return True if len(program) >= program_len: if get_unused_indices(program) or program in programs: return False else: programs.add(program) progress_counter.value += 1 print("\rGenerating programs... %d\\%d" % (progress_counter.value, num_programs), end="") return True type_to_vars = collections.defaultdict(list) for i, typ in enumerate(program.var_types): type_to_vars[typ].insert(0, i) # Move free indices to the front free_indxs = get_free_indices(program, program_len) for typ in program.var_types: for var in type_to_vars[typ]: if var in free_indxs: type_to_vars[typ].remove(var) type_to_vars[typ].insert(0, var) for func in LAMBDAS: type_to_vars[func.type].append(func) used = set(program.statements) for function in functions: for args in iterate_inputs(function, type_to_vars): if len([arg for arg in args if arg in free_indxs]) == 0: continue statement = Statement(function, args) if statement in used: continue next_program = Program(program.input_types, program.statements + [statement]) if helper(functions, next_program, programs): return True program_base = Program(input_types, []) res = set() while progress_counter.value < num_programs: helper(ALL_FUNCTIONS, program_base, res) return res
def solve_problem_worker(args): line, input, input_lens, output, output_lens, input_masks, output_masks = args examples = Example.from_line(line) sol = Program.parse(line['program']) env = ProgramEnv(examples) res = robustfill_cab(env, program_len, model, 100, 48, timeout, input, input_lens, output, output_lens, input_masks, output_masks) counter.value += 1 print("\rSolving problems... %d (failed: %d)" % (counter.value, fail_counter.value), end="") if res['result'] is None: res['result'] = "Failed" fail_counter.value += 1 return res else: res['result'] = str(Program(sol.input_types, res['result'])) return res
def load_cache(path): """ Given a dataset path, loads the programs from it to a form returned by gen_programs(): A dict with programs as keys and examples as values """ lines = [json.loads(x) for x in open(path, 'r').readlines()] examples = {} for i, line in enumerate(lines): print("\rLoading program cache... %d\\%d" % (i, len(lines)), end="") program = Program.parse(line['program']) p_examples = Example.from_line(line) p_examples = [(ex.inputs, ex.output) for ex in p_examples] examples[program] = p_examples print('') return examples
def generate_prog_data(line): data = json.loads(line.rstrip()) examples = Example.from_line(data) env = ProgramEnv(examples) program = Program.parse(data['program']) inputs = [] statements = [] drop = [] operators = [] for i, statement in enumerate(program.statements): inputs.append(env.get_encoding()) # Translate absolute indices to post-drop indices f, args = statement.function, list(statement.args) for j, arg in enumerate(args): if isinstance(arg, int): args[j] = env.real_var_idxs.index(arg) statement = Statement(f, args) statements.append(statement_to_index[statement]) used_args = [] for next_statement in program.statements[i:]: used_args += [x for x in next_statement.args if isinstance(x, int)] to_drop = [] for j in range(params.max_program_vars): if j >= env.num_vars or env.real_var_idxs[j] not in used_args: to_drop.append(1) else: to_drop.append(0) drop.append(to_drop) operator = Operator.from_statement(statement) operators.append(operator_to_index[operator]) if env.num_vars < params.max_program_vars: env.step(statement) else: # Choose a random var (that is not used anymore) to drop. env.step(statement, random.choice([j for j in range(len(to_drop)) if to_drop[j] > 0])) return inputs, statements, drop, operators
def helper(functions, program, programs): random.shuffle(functions) if progress_counter.value >= num_programs: return True if len(program) >= program_len: if get_unused_indices(program) or program in programs: return False else: programs.add(program) progress_counter.value += 1 print("\rGenerating programs... %d\\%d" % (progress_counter.value, num_programs), end="") return True type_to_vars = collections.defaultdict(list) for i, typ in enumerate(program.var_types): type_to_vars[typ].insert(0, i) # Move free indices to the front free_indxs = get_free_indices(program, program_len) for typ in program.var_types: for var in type_to_vars[typ]: if var in free_indxs: type_to_vars[typ].remove(var) type_to_vars[typ].insert(0, var) for func in LAMBDAS: type_to_vars[func.type].append(func) used = set(program.statements) for function in functions: for args in iterate_inputs(function, type_to_vars): if len([arg for arg in args if arg in free_indxs]) == 0: continue statement = Statement(function, args) if statement in used: continue next_program = Program(program.input_types, program.statements + [statement]) if helper(functions, next_program, programs): return True
def solve_problem_worker(data): examples = Example.from_line(data) env = ProgramEnv(examples) if method == 'beam': solution = cab(env, max_program_len, model, params.cab_beam_size, params.cab_width, params.cab_width_growth, timeout, max_beam_size=max_beam_size) elif method == 'dfs': solution = dfs(env, max_program_len, model, params.dfs_max_width, timeout) counter.value += 1 print("\rSolving problems... %d (failed: %d)" % (counter.value, fail_counter.value), end="") if solution['result'] is False: solution['result'] = "Failed" fail_counter.value += 1 else: values = [Value.construct(x) for x in data['examples'][0]['inputs']] value_types = [x.type for x in values] solution['result'] = Program(value_types, solution['result']).encode() return solution
def tokens_to_program(seq, input_types): tokens = [reverse_program_vocab[token] for token in seq] if tokens[0] == START_PROGRAM_TOKEN: tokens = tokens[1:] indx = 0 statements = [] while indx < len(tokens) and tokens[indx] != END_PROGRAM_TOKEN: token = tokens[indx] if not token in ALL_FUNCTIONS: return None if isinstance(token.input_type, tuple): num_args = len(token.input_type) else: num_args = 1 args = tokens[indx + 1:indx + 1 + num_args] statements.append(Statement(token, args)) indx = indx + 1 + num_args return Program(input_types, statements)
def generate_prog_data(line): data = json.loads(line.rstrip()) examples = Example.from_line(data) program = Program.parse(data['program']) prog_data = dict(input=[], input_lens=[], output=[], output_lens=[]) input_padding_mask = np.zeros((len(examples), io_max_seq_len)) output_padding_mask = np.zeros((len(examples), io_max_seq_len)) for i, example in enumerate(examples): example_input = [] for inp in example.inputs: example_input += var_to_tokens(inp) input_len = pad_seq(example_input, io_max_seq_len) prog_data['input_lens'].append(input_len) input_padding_mask[i, :input_len] = 1 prog_data['input'].append(np.array(example_input)) example_output = var_to_tokens(example.output) output_len = pad_seq(example_output, io_max_seq_len) prog_data['output_lens'].append(output_len) prog_data['output'].append(example_output) output_padding_mask[i, :output_len] = 1 prog_data['target'] = program_to_tokens(program) prog_data['target_len'] = pad_seq(prog_data['target'], program_max_seq_len) dec_padding_mask = np.zeros(program_max_seq_len) for i in range(prog_data['target_len']): dec_padding_mask[i] = 1 for k, v in prog_data.items(): prog_data[k] = np.array(v) prog_data['input_padding_mask'] = input_padding_mask prog_data['output_padding_mask'] = output_padding_mask prog_data['dec_padding_mask'] = dec_padding_mask return prog_data
def get_prg(prg_str): return Program.parse(prg_str)