def test_meta_evaluator(): set_seed(100) tasks = SetTaskSampler(PointEnv) max_path_length = 200 with tempfile.TemporaryDirectory() as log_dir_name: runner = LocalRunner( SnapshotConfig(snapshot_dir=log_dir_name, snapshot_mode='last', snapshot_gap=1)) env = GarageEnv(PointEnv()) algo = OptimalActionInference(env=env, max_path_length=max_path_length) runner.setup(algo, env) meta_eval = MetaEvaluator(runner, test_task_sampler=tasks, max_path_length=max_path_length, n_test_tasks=10) log_file = tempfile.NamedTemporaryFile() csv_output = CsvOutput(log_file.name) logger.add_output(csv_output) meta_eval.evaluate(algo) logger.log(tabular) meta_eval.evaluate(algo) logger.log(tabular) logger.dump_output_type(CsvOutput) logger.remove_output_type(CsvOutput) with open(log_file.name, 'r') as file: rows = list(csv.DictReader(file)) assert len(rows) == 2 assert float(rows[0]['MetaTest/CompletionRate']) < 1.0 assert float(rows[0]['MetaTest/Iteration']) == 0 assert (float(rows[0]['MetaTest/MaxReturn']) >= float( rows[0]['MetaTest/AverageReturn'])) assert (float(rows[0]['MetaTest/AverageReturn']) >= float( rows[0]['MetaTest/MinReturn'])) assert float(rows[1]['MetaTest/Iteration']) == 1
def test_meta_evaluator_with_tf(): set_seed(100) tasks = SetTaskSampler(lambda: GarageEnv(PointEnv())) max_path_length = 200 env = GarageEnv(PointEnv()) n_traj = 3 with tempfile.TemporaryDirectory() as log_dir_name: ctxt = SnapshotConfig(snapshot_dir=log_dir_name, snapshot_mode='none', snapshot_gap=1) with LocalTFRunner(ctxt) as runner: meta_eval = MetaEvaluator(test_task_sampler=tasks, max_path_length=max_path_length, n_test_tasks=10, n_exploration_traj=n_traj) policy = GaussianMLPPolicy(env.spec) algo = MockAlgo(env, policy, max_path_length, n_traj, meta_eval) runner.setup(algo, env) log_file = tempfile.NamedTemporaryFile() csv_output = CsvOutput(log_file.name) logger.add_output(csv_output) meta_eval.evaluate(algo) algo_pickle = cloudpickle.dumps(algo) with tf.Graph().as_default(): with LocalTFRunner(ctxt) as runner: algo2 = cloudpickle.loads(algo_pickle) runner.setup(algo2, env) runner.train(10, 0)
def test_pickle_meta_evaluator(): set_seed(100) tasks = SetTaskSampler(lambda: GarageEnv(PointEnv())) max_path_length = 200 env = GarageEnv(PointEnv()) n_traj = 3 with tempfile.TemporaryDirectory() as log_dir_name: runner = LocalRunner( SnapshotConfig(snapshot_dir=log_dir_name, snapshot_mode='last', snapshot_gap=1)) meta_eval = MetaEvaluator(test_task_sampler=tasks, max_path_length=max_path_length, n_test_tasks=10, n_exploration_traj=n_traj) policy = RandomPolicy(env.spec.action_space) algo = MockAlgo(env, policy, max_path_length, n_traj, meta_eval) runner.setup(algo, env) log_file = tempfile.NamedTemporaryFile() csv_output = CsvOutput(log_file.name) logger.add_output(csv_output) meta_eval.evaluate(algo) meta_eval_pickle = cloudpickle.dumps(meta_eval) meta_eval2 = cloudpickle.loads(meta_eval_pickle) meta_eval2.evaluate(algo)
def test_meta_evaluator_n_traj(): set_seed(100) tasks = SetTaskSampler(PointEnv) max_path_length = 200 env = GarageEnv(PointEnv()) n_traj = 3 with tempfile.TemporaryDirectory() as log_dir_name: runner = LocalRunner( SnapshotConfig(snapshot_dir=log_dir_name, snapshot_mode='last', snapshot_gap=1)) algo = MockAlgo(env, max_path_length, n_traj) runner.setup(algo, env) meta_eval = MetaEvaluator(runner, test_task_sampler=tasks, max_path_length=max_path_length, n_test_tasks=10, n_exploration_traj=n_traj) log_file = tempfile.NamedTemporaryFile() csv_output = CsvOutput(log_file.name) logger.add_output(csv_output) meta_eval.evaluate(algo)