def convert_for_step_return(step_values: tuple) -> StepOutput: if len(step_values) == 2: return StepOutput(PG_CONTINUE, step_values[0], IntegerArguments(step_values[1])) else: return StepOutput(step_values[0], step_values[1], IntegerArguments(step_values[2]))
def convert_for_step_return(step_values: tuple) -> StepOutput: if len(step_values) == 3: # this is the StepOutput on the end of primitive program, with the first item specifying PG_RETURN return StepOutput(step_values[0], step_values[1], IntegerArguments(step_values[2])) else: return StepOutput(PG_CONTINUE, step_values[0], IntegerArguments(step_values[1]))
def step(self, env_observation: np.ndarray, pg: Program, arguments: IntegerArguments) -> StepOutput: x = self.convert_input(StepInput(env_observation, pg, arguments)) results = self.model.predict(x, batch_size=1) # if batch_size==1, returns single row r, pg_one_hot, arg_values = results[0], results[1], results[2:] program = self.program_set.get(pg_one_hot.argmax()) ret = StepOutput(r, program, IntegerArguments(values=np.stack(arg_values))) return ret
def run_npi(addition_env, npi_runner, program, data): data['expect'] = data['in1'] + data['in2'] addition_env.setup_problem(data['in1'], data['in2']) npi_runner.reset() npi_runner.display_env(addition_env, force=True) npi_runner.npi_program_interface(addition_env, program, IntegerArguments()) data['result'] = addition_env.get_output() data['correct'] = data['result'] == data['expect']
def run_npi(addition_env, npi_runner, program, data): data['expect'] = data['in1'] + data['in2'] addition_env.setup_problem(data['in1'], data['in2']) npi_runner.reset() npi_runner.npi_program_interface(addition_env, program, IntegerArguments(ARG_NUM, ARG_DEPTH)) data['result'] = addition_env.get_output() data['correct'] = data['result'] == data['expect']
def run_npi(bubblesort_env, npi_runner, program, data): data['expect'] = sorted(data['raw']) bubblesort_env.setup_problem(data['raw']) npi_runner.reset() npi_runner.display_env(bubblesort_env, force=True) npi_runner.npi_program_interface(bubblesort_env, program, IntegerArguments()) data['result'] = bubblesort_env.get_output() data['correct'] = data['result'] == data['expect']
def run_npi(multiplication_env, npi_runner, program, data): data['expect'] = data['mul1'] * data['mul2'] multiplication_env.setup_problem(data['mul1'], data['mul2']) npi_runner.reset() # TODO bug here: display_env npi_runner.display_env(multiplication_env, force=True) npi_runner.npi_program_interface(multiplication_env, program, IntegerArguments()) data['result'] = multiplication_env.get_output() data['correct'] = data['result'] == data['expect']
def convert_output(self, p_out: StepOutput): y = [np.array((p_out.r, ))] weights = [[1.]] if p_out.program: arg_values = p_out.arguments.values arg_num = len(p_out.program.args or []) y += [p_out.program.to_one_hot(PROGRAM_VEC_SIZE)] weights += [[1.]] else: arg_values = IntegerArguments().values arg_num = 0 y += [np.zeros((PROGRAM_VEC_SIZE, ))] weights += [[1e-10]] for v in arg_values: # split by each args y += [v] weights += [[1.]] * arg_num + [[1e-10]] * (len(arg_values) - arg_num) weights = [np.array(w) for w in weights] return [yy.reshape((self.batch_size, -1)) for yy in y], weights
def convert_for_step_return(step_values): if len(step_values) == 2: return StepOutput(PG_CONTINUE, step_values[0], IntegerArguments(ARG_NUM, ARG_DEPTH, step_values[1])) else: return StepOutput(step_values[0], step_values[1], IntegerArguments(ARG_NUM, ARG_DEPTH, step_values[2]))