def test_meta_evaluator(): set_seed(100) tasks = SetTaskSampler(lambda: MetaRLEnv(PointEnv())) max_path_length = 200 with tempfile.TemporaryDirectory() as log_dir_name: runner = LocalRunner( SnapshotConfig(snapshot_dir=log_dir_name, snapshot_mode='last', snapshot_gap=1)) env = MetaRLEnv(PointEnv()) algo = OptimalActionInference(env=env, max_path_length=max_path_length) runner.setup(algo, env) meta_eval = MetaEvaluator(test_task_sampler=tasks, max_path_length=max_path_length, n_test_tasks=10) log_file = tempfile.NamedTemporaryFile() csv_output = CsvOutput(log_file.name) logger.add_output(csv_output) meta_eval.evaluate(algo) logger.log(tabular) meta_eval.evaluate(algo) logger.log(tabular) logger.dump_output_type(CsvOutput) logger.remove_output_type(CsvOutput) with open(log_file.name, 'r') as file: rows = list(csv.DictReader(file)) assert len(rows) == 2 assert float(rows[0]['MetaTest/__unnamed_task__/CompletionRate']) < 1.0 assert float(rows[0]['MetaTest/__unnamed_task__/Iteration']) == 0 assert (float(rows[0]['MetaTest/__unnamed_task__/MaxReturn']) >= float( rows[0]['MetaTest/__unnamed_task__/AverageReturn'])) assert (float(rows[0]['MetaTest/__unnamed_task__/AverageReturn']) >= float(rows[0]['MetaTest/__unnamed_task__/MinReturn'])) assert float(rows[1]['MetaTest/__unnamed_task__/Iteration']) == 1
def test_meta_evaluator_with_tf(): set_seed(100) tasks = SetTaskSampler(lambda: MetaRLEnv(PointEnv())) max_path_length = 200 env = MetaRLEnv(PointEnv()) n_traj = 3 with tempfile.TemporaryDirectory() as log_dir_name: ctxt = SnapshotConfig(snapshot_dir=log_dir_name, snapshot_mode='none', snapshot_gap=1) with LocalTFRunner(ctxt) as runner: meta_eval = MetaEvaluator(test_task_sampler=tasks, max_path_length=max_path_length, n_test_tasks=10, n_exploration_traj=n_traj) policy = GaussianMLPPolicy(env.spec) algo = MockTFAlgo(env, policy, max_path_length, n_traj, meta_eval) runner.setup(algo, env) log_file = tempfile.NamedTemporaryFile() csv_output = CsvOutput(log_file.name) logger.add_output(csv_output) meta_eval.evaluate(algo) algo_pickle = cloudpickle.dumps(algo) tf.compat.v1.reset_default_graph() with LocalTFRunner(ctxt) as runner: algo2 = cloudpickle.loads(algo_pickle) runner.setup(algo2, env) runner.train(10, 0)
def test_pickle_meta_evaluator(): set_seed(100) tasks = SetTaskSampler(lambda: MetaRLEnv(PointEnv())) max_path_length = 200 env = MetaRLEnv(PointEnv()) n_traj = 3 with tempfile.TemporaryDirectory() as log_dir_name: runner = LocalRunner( SnapshotConfig(snapshot_dir=log_dir_name, snapshot_mode='last', snapshot_gap=1)) meta_eval = MetaEvaluator(test_task_sampler=tasks, max_path_length=max_path_length, n_test_tasks=10, n_exploration_traj=n_traj) policy = RandomPolicy(env.spec.action_space) algo = MockAlgo(env, policy, max_path_length, n_traj, meta_eval) runner.setup(algo, env) log_file = tempfile.NamedTemporaryFile() csv_output = CsvOutput(log_file.name) logger.add_output(csv_output) meta_eval.evaluate(algo) meta_eval_pickle = cloudpickle.dumps(meta_eval) meta_eval2 = cloudpickle.loads(meta_eval_pickle) meta_eval2.evaluate(algo)
def test_one_folder(self, meta_train_dir, itrs): snapshot_config = SnapshotConfig(snapshot_dir=meta_train_dir, snapshot_mode='all', snapshot_gap=1) runner = LocalRunner(snapshot_config=snapshot_config) meta_sampler = AllSetTaskSampler(self.meta_task_cls) runner.restore(meta_train_dir) meta_evaluator = MetaEvaluator( runner, test_task_sampler=meta_sampler, max_path_length=self.max_path_length, n_test_tasks=meta_sampler.n_tasks, n_exploration_traj=self.adapt_rollout_per_task, prefix='') for itr in itrs: log_filename = os.path.join(meta_train_dir, 'meta-test-itr_{}.csv'.format(itr)) logger.add_output(CsvOutput(log_filename)) logger.log("Writing into {}".format(log_filename)) runner.restore(meta_train_dir, from_epoch=itr) meta_evaluator.evaluate(runner._algo, self.test_rollout_per_task) tabular.record('Iteration', runner._stats.total_epoch) tabular.record('TotalEnvSteps', runner._stats.total_env_steps) logger.log(tabular) logger.dump_output_type(CsvOutput) logger.remove_output_type(CsvOutput)
def setup_method(self): super().setup_method() self.temp_dir = tempfile.TemporaryDirectory() snapshot_config = SnapshotConfig(snapshot_dir=self.temp_dir.name, snapshot_mode='all', snapshot_gap=1) fixture_exp(snapshot_config, self.sess) for c in self.graph.collections: self.graph.clear_collection(c)
def run_metarl(env, seed, log_dir): """Create metarl PyTorch MAML model and training. Args: env (MetaRLEnv): Environment of the task. seed (int): Random positive integer for the trial. log_dir (str): Log dir path. Returns: str: Path to output csv file """ deterministic.set_seed(seed) policy = GaussianMLPPolicy( env_spec=env.spec, hidden_sizes=hyper_parameters['hidden_sizes'], hidden_nonlinearity=torch.tanh, output_nonlinearity=None, ) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = MAMLTRPO(env=env, policy=policy, baseline=baseline, max_path_length=hyper_parameters['max_path_length'], discount=hyper_parameters['discount'], gae_lambda=hyper_parameters['gae_lambda'], meta_batch_size=hyper_parameters['meta_batch_size'], inner_lr=hyper_parameters['inner_lr'], max_kl_step=hyper_parameters['max_kl'], num_grad_updates=hyper_parameters['num_grad_update']) # Set up logger since we are not using run_experiment tabular_log_file = osp.join(log_dir, 'progress.csv') dowel_logger.add_output(dowel.StdOutput()) dowel_logger.add_output(dowel.CsvOutput(tabular_log_file)) dowel_logger.add_output(dowel.TensorBoardOutput(log_dir)) snapshot_config = SnapshotConfig(snapshot_dir=log_dir, snapshot_mode='all', snapshot_gap=1) runner = LocalRunner(snapshot_config=snapshot_config) runner.setup(algo, env, sampler_args=dict(n_envs=5)) runner.train(n_epochs=hyper_parameters['n_epochs'], batch_size=(hyper_parameters['fast_batch_size'] * hyper_parameters['max_path_length'])) dowel_logger.remove_all() return tabular_log_file
def test_meta_evaluator_n_traj(): set_seed(100) tasks = SetTaskSampler(PointEnv) max_path_length = 200 env = MetaRLEnv(PointEnv()) n_traj = 3 with tempfile.TemporaryDirectory() as log_dir_name: runner = LocalRunner( SnapshotConfig(snapshot_dir=log_dir_name, snapshot_mode='last', snapshot_gap=1)) algo = MockAlgo(env, max_path_length, n_traj) runner.setup(algo, env) meta_eval = MetaEvaluator(runner, test_task_sampler=tasks, max_path_length=max_path_length, n_test_tasks=10, n_exploration_traj=n_traj) log_file = tempfile.NamedTemporaryFile() csv_output = CsvOutput(log_file.name) logger.add_output(csv_output) meta_eval.evaluate(algo)