def test_meta_evaluator_with_tf(): set_seed(100) tasks = SetTaskSampler(PointEnv, wrapper=set_length) max_episode_length = 200 env = PointEnv() n_eps = 3 with tempfile.TemporaryDirectory() as log_dir_name: ctxt = SnapshotConfig(snapshot_dir=log_dir_name, snapshot_mode='none', snapshot_gap=1) with TFTrainer(ctxt) as trainer: meta_eval = MetaEvaluator(test_task_sampler=tasks, n_test_tasks=10, n_exploration_eps=n_eps) policy = GaussianMLPPolicy(env.spec) algo = MockAlgo(env, policy, max_episode_length, n_eps, meta_eval) trainer.setup(algo, env) log_file = tempfile.NamedTemporaryFile() csv_output = CsvOutput(log_file.name) logger.add_output(csv_output) meta_eval.evaluate(algo) algo_pickle = cloudpickle.dumps(algo) tf.compat.v1.reset_default_graph() with TFTrainer(ctxt) as trainer: algo2 = cloudpickle.loads(algo_pickle) trainer.setup(algo2, env) trainer.train(10, 0)
def test_meta_evaluator(): set_seed(100) tasks = SetTaskSampler(PointEnv, wrapper=set_length) max_episode_length = 200 with tempfile.TemporaryDirectory() as log_dir_name: trainer = Trainer( SnapshotConfig(snapshot_dir=log_dir_name, snapshot_mode='last', snapshot_gap=1)) env = PointEnv(max_episode_length=max_episode_length) algo = OptimalActionInference(env=env, max_episode_length=max_episode_length) trainer.setup(algo, env) meta_eval = MetaEvaluator(test_task_sampler=tasks, n_test_tasks=10) log_file = tempfile.NamedTemporaryFile() csv_output = CsvOutput(log_file.name) logger.add_output(csv_output) meta_eval.evaluate(algo) logger.log(tabular) meta_eval.evaluate(algo) logger.log(tabular) logger.dump_output_type(CsvOutput) logger.remove_output_type(CsvOutput) with open(log_file.name, 'r') as file: rows = list(csv.DictReader(file)) assert len(rows) == 2 assert float( rows[0]['MetaTest/__unnamed_task__/TerminationRate']) < 1.0 assert float(rows[0]['MetaTest/__unnamed_task__/Iteration']) == 0 assert (float(rows[0]['MetaTest/__unnamed_task__/MaxReturn']) >= float( rows[0]['MetaTest/__unnamed_task__/AverageReturn'])) assert (float(rows[0]['MetaTest/__unnamed_task__/AverageReturn']) >= float(rows[0]['MetaTest/__unnamed_task__/MinReturn'])) assert float(rows[1]['MetaTest/__unnamed_task__/Iteration']) == 1
def test_meta_evaluator_with_tf(): set_seed(100) tasks = SetTaskSampler(lambda: GarageEnv(PointEnv())) max_path_length = 200 env = GarageEnv(PointEnv()) n_traj = 3 with tempfile.TemporaryDirectory() as log_dir_name: ctxt = SnapshotConfig(snapshot_dir=log_dir_name, snapshot_mode='none', snapshot_gap=1) with LocalTFRunner(ctxt) as runner: meta_eval = MetaEvaluator(test_task_sampler=tasks, max_path_length=max_path_length, n_test_tasks=10, n_exploration_traj=n_traj) policy = GaussianMLPPolicy(env.spec) algo = MockAlgo(env, policy, max_path_length, n_traj, meta_eval) runner.setup(algo, env) log_file = tempfile.NamedTemporaryFile() csv_output = CsvOutput(log_file.name) logger.add_output(csv_output) meta_eval.evaluate(algo) algo_pickle = cloudpickle.dumps(algo) with tf.Graph().as_default(): with LocalTFRunner(ctxt) as runner: algo2 = cloudpickle.loads(algo_pickle) runner.setup(algo2, env) runner.train(10, 0)
def test_pickle_meta_evaluator(): set_seed(100) tasks = SetTaskSampler(lambda: GarageEnv(PointEnv())) max_path_length = 200 env = GarageEnv(PointEnv()) n_traj = 3 with tempfile.TemporaryDirectory() as log_dir_name: runner = LocalRunner( SnapshotConfig(snapshot_dir=log_dir_name, snapshot_mode='last', snapshot_gap=1)) meta_eval = MetaEvaluator(test_task_sampler=tasks, max_path_length=max_path_length, n_test_tasks=10, n_exploration_traj=n_traj) policy = RandomPolicy(env.spec.action_space) algo = MockAlgo(env, policy, max_path_length, n_traj, meta_eval) runner.setup(algo, env) log_file = tempfile.NamedTemporaryFile() csv_output = CsvOutput(log_file.name) logger.add_output(csv_output) meta_eval.evaluate(algo) meta_eval_pickle = cloudpickle.dumps(meta_eval) meta_eval2 = cloudpickle.loads(meta_eval_pickle) meta_eval2.evaluate(algo)
def setup_method(self): super().setup_method() self.temp_dir = tempfile.TemporaryDirectory() snapshot_config = SnapshotConfig(snapshot_dir=self.temp_dir.name, snapshot_mode='all', snapshot_gap=1) fixture_exp(snapshot_config, self.sess) for c in self.graph.collections: self.graph.clear_collection(c)
def test_dqn_cartpole(setup): tempdir = tempfile.TemporaryDirectory() config = SnapshotConfig(snapshot_dir=tempdir.name, snapshot_mode='last', snapshot_gap=1) trainer = Trainer(config) algo, env, _, n_epochs, batch_size = setup trainer.setup(algo, env, sampler_cls=LocalSampler) last_avg_return = trainer.train(n_epochs=n_epochs, batch_size=batch_size) assert last_avg_return > 10 env.close() # test resume from snapshot trainer.restore(tempdir.name) trainer.resume(n_epochs=1, batch_size=batch_size)
def test_meta_evaluator_n_traj(): set_seed(100) tasks = SetTaskSampler(PointEnv) max_path_length = 200 env = GarageEnv(PointEnv()) n_traj = 3 with tempfile.TemporaryDirectory() as log_dir_name: runner = LocalRunner( SnapshotConfig(snapshot_dir=log_dir_name, snapshot_mode='last', snapshot_gap=1)) algo = MockAlgo(env, max_path_length, n_traj) runner.setup(algo, env) meta_eval = MetaEvaluator(runner, test_task_sampler=tasks, max_path_length=max_path_length, n_test_tasks=10, n_exploration_traj=n_traj) log_file = tempfile.NamedTemporaryFile() csv_output = CsvOutput(log_file.name) logger.add_output(csv_output) meta_eval.evaluate(algo)
def run_experiment(argv): """Run experiment. Args: argv (list[str]): Command line arguments. Raises: BaseException: Propagate any exception in the experiment. """ now = datetime.datetime.now(dateutil.tz.tzlocal()) # avoid name clashes when running distributed jobs rand_id = str(uuid.uuid4())[:5] timestamp = now.strftime('%Y_%m_%d_%H_%M_%S_%f_%Z') default_exp_name = 'experiment_%s_%s' % (timestamp, rand_id) parser = argparse.ArgumentParser() parser.add_argument( '--n_parallel', type=int, default=1, help=('Number of parallel workers to perform rollouts. ' "0 => don't start any workers")) parser.add_argument('--exp_name', type=str, default=default_exp_name, help='Name of the experiment.') parser.add_argument('--log_dir', type=str, default=None, help='Path to save the log and iteration snapshot.') parser.add_argument('--snapshot_mode', type=str, default='last', help='Mode to save the snapshot. Can be either "all" ' '(all iterations will be saved), "last" (only ' 'the last iteration will be saved), "gap" (every' '`snapshot_gap` iterations are saved), or "none" ' '(do not save snapshots)') parser.add_argument('--snapshot_gap', type=int, default=1, help='Gap between snapshot iterations.') parser.add_argument( '--resume_from_dir', type=str, default=None, help='Directory of the pickle file to resume experiment from.') parser.add_argument('--resume_from_epoch', type=str, default=None, help='Index of iteration to restore from. ' 'Can be "first", "last" or a number. ' 'Not applicable when snapshot_mode="last"') parser.add_argument('--tabular_log_file', type=str, default='progress.csv', help='Name of the tabular log file (in csv).') parser.add_argument('--text_log_file', type=str, default='debug.log', help='Name of the text log file (in pure text).') parser.add_argument('--tensorboard_step_key', type=str, default=None, help='Name of the step key in tensorboard_summary.') parser.add_argument('--params_log_file', type=str, default='params.json', help='Name of the parameter log file (in json).') parser.add_argument('--variant_log_file', type=str, default='variant.json', help='Name of the variant log file (in json).') parser.add_argument('--plot', type=ast.literal_eval, default=False, help='Whether to plot the iteration results') parser.add_argument( '--log_tabular_only', type=ast.literal_eval, default=False, help='Print only the tabular log information (in a horizontal format)') parser.add_argument('--seed', type=int, default=None, help='Random seed for numpy') parser.add_argument('--args_data', type=str, help='Pickled data for objects') parser.add_argument('--variant_data', type=str, help='Pickled data for variant configuration') args = parser.parse_args(argv[1:]) if args.seed is not None: deterministic.set_seed(args.seed) if args.n_parallel > 0: parallel_sampler.initialize(n_parallel=args.n_parallel) if args.seed is not None: parallel_sampler.set_seed(args.seed) if not args.plot: garage.plotter.Plotter.disable() garage.tf.plotter.Plotter.disable() if args.log_dir is None: log_dir = os.path.join(os.path.join(os.getcwd(), 'data'), args.exp_name) else: log_dir = args.log_dir tabular_log_file = os.path.join(log_dir, args.tabular_log_file) text_log_file = os.path.join(log_dir, args.text_log_file) params_log_file = os.path.join(log_dir, args.params_log_file) if args.variant_data is not None: variant_data = pickle.loads(base64.b64decode(args.variant_data)) variant_log_file = os.path.join(log_dir, args.variant_log_file) dump_variant(variant_log_file, variant_data) else: variant_data = None log_parameters(params_log_file, args) logger.add_output(dowel.TextOutput(text_log_file)) logger.add_output(dowel.CsvOutput(tabular_log_file)) logger.add_output(dowel.TensorBoardOutput(log_dir, x_axis='TotalEnvSteps')) logger.add_output(dowel.StdOutput()) logger.push_prefix('[%s] ' % args.exp_name) snapshot_config = SnapshotConfig(snapshot_dir=log_dir, snapshot_mode=args.snapshot_mode, snapshot_gap=args.snapshot_gap) method_call = cloudpickle.loads(base64.b64decode(args.args_data)) try: method_call(snapshot_config, variant_data, args.resume_from_dir, args.resume_from_epoch) except BaseException: children = garage.plotter.Plotter.get_plotters() children += garage.tf.plotter.Plotter.get_plotters() if args.n_parallel > 0: children += [parallel_sampler] child_proc_shutdown(children) raise logger.remove_all() logger.pop_prefix()
def run_task(snapshot_config, exp_config): logger.log(f"Config of this experiment is {exp_config}") env_config = exp_config["env"] env_name = env_config["name"] replay_buffer_size = exp_config.get("replay_buffer_size") n_epochs = exp_config.get("n_epochs") steps_per_epoch = exp_config.get("steps_per_epoch") sampler_batch_size = exp_config.get("sampler_batch_size") n_train_steps = exp_config.get("n_train_steps") learning_rate = exp_config.get("learning_rate") buffer_batch_size = exp_config.get("buffer_batch_size") target_network_update_freq = exp_config.get("target_network_update_freq") min_buffer_size = exp_config.get("min_buffer_size") net_config = exp_config.get("q-net") loss_weights = exp_config.get("loss_weights") deepmdp_config = exp_config.get("deepmdp") epsilon_greedy_config = exp_config.get("epsilon_greedy") plots = exp_config.get("plots") steps = n_epochs * steps_per_epoch * sampler_batch_size num_frames = env_config.get("num_frames") if num_frames is None: num_frames = 4 snapshot_config = SnapshotConfig( os.path.join(os.getcwd(), f'runs/{get_info()}/snapshots'), snapshot_config["snapshot_mode"], snapshot_config["snapshot_gap"]) if "LunarLander-v2" in env_name: # Pass either LunarLander-v2 or LunarLander-v2-img to have the env give back image or semantical observations. if env_name[-4:] == "-img": env = setup_lunar_lander_with_image_obs( env_name[:-4], num_frames, do_noops=env_config["do_noops"]) elif env_name[-4:] == "-obf": env = setup_lunar_lander_with_obfuscated_states( env_name[:-4], num_frames, do_noops=env_config["do_noops"]) elif env_name[-4:] == "-stk": env = setup_stacked_lunar_lander_env( env_name[:-4], num_frames, normalize=env_config["normalize"]) else: env = GarageEnv(gym.make(env_name)) elif "SpaceInvaders-v0" == env_name: env = setup_atari_env(env_name, num_frames) else: raise ValueError("Env name not known") # Set env seed env.seed(get_seed()) # Set seed for action space (needed for epsilon-greedy reproducability) env.action_space.seed(get_seed()) # Init visualizer visualizer = Visualizer(get_info() + "_main", plots) visualizer.publish_config(exp_config) runner = LocalRunner(snapshot_config) replay_buffer = SimpleReplayBuffer(env.spec, size_in_transitions=replay_buffer_size, time_horizon=1) strategy = EpsilonGreedyStrategy(env.spec, steps, **epsilon_greedy_config) qf = DiscreteCNNQFunction(env_spec=env.spec, **net_config) aux_objectives = [] if deepmdp_config["use"]: reward_objective = RewardAuxiliaryObjective( env.spec, qf.embedding_size, deepmdp_config["reward_head"]) transition_objective = TransitionAuxiliaryObjective( env.spec, qf.embedding_size, deepmdp_config["transition_head"]) aux_objectives.append(reward_objective) aux_objectives.append(transition_objective) policy = DiscreteQfDerivedPolicy(env.spec, qf) algo = DQN(policy=policy, qf=qf, env_spec=env.spec, experiment_id=get_info(), plot_list=plots, visualizer=visualizer, replay_buffer=replay_buffer, qf_optimizer=torch.optim.Adam, exploration_strategy=strategy, n_train_steps=n_train_steps, buffer_batch_size=buffer_batch_size, min_buffer_size=min_buffer_size, n_epoch_cycles=steps_per_epoch, target_network_update_freq=target_network_update_freq, qf_lr=learning_rate, max_path_length=1000, auxiliary_objectives=aux_objectives, **loss_weights) # Use modded off policy sampler for passing generating summary statistics about episode's qvals in algo-object. runner.setup(algo=algo, env=env, sampler_cls=OffPolicyVectorizedSampler) runner.train(n_epochs=n_epochs, batch_size=sampler_batch_size) # Bypass GarageEnv>>close as this requires a display env.env.close()
def run_experiment(argv): now = datetime.datetime.now(dateutil.tz.tzlocal()) # avoid name clashes when running distributed jobs rand_id = str(uuid.uuid4())[:5] timestamp = now.strftime('%Y_%m_%d_%H_%M_%S_%f_%Z') default_exp_name = 'experiment_%s_%s' % (timestamp, rand_id) parser = argparse.ArgumentParser() parser.add_argument( '--n_parallel', type=int, default=1, help=('Number of parallel workers to perform rollouts. ' "0 => don't start any workers")) parser.add_argument('--exp_name', type=str, default=default_exp_name, help='Name of the experiment.') parser.add_argument('--log_dir', type=str, default=None, help='Path to save the log and iteration snapshot.') parser.add_argument('--snapshot_mode', type=str, default='last', help='Mode to save the snapshot. Can be either "all" ' '(all iterations will be saved), "last" (only ' 'the last iteration will be saved), "gap" (every' '`snapshot_gap` iterations are saved), or "none" ' '(do not save snapshots)') parser.add_argument('--snapshot_gap', type=int, default=1, help='Gap between snapshot iterations.') parser.add_argument( '--resume_from_dir', type=str, default=None, help='Directory of the pickle file to resume experiment from.') parser.add_argument('--resume_from_epoch', type=str, default=None, help='Index of iteration to restore from. ' 'Can be "first", "last" or a number. ' 'Not applicable when snapshot_mode="last"') parser.add_argument('--tabular_log_file', type=str, default='progress.csv', help='Name of the tabular log file (in csv).') parser.add_argument('--text_log_file', type=str, default='debug.log', help='Name of the text log file (in pure text).') parser.add_argument('--tensorboard_step_key', type=str, default=None, help='Name of the step key in tensorboard_summary.') parser.add_argument('--params_log_file', type=str, default='params.json', help='Name of the parameter log file (in json).') parser.add_argument('--variant_log_file', type=str, default='variant.json', help='Name of the variant log file (in json).') parser.add_argument('--plot', type=ast.literal_eval, default=False, help='Whether to plot the iteration results') parser.add_argument( '--log_tabular_only', type=ast.literal_eval, default=False, help='Print only the tabular log information (in a horizontal format)') parser.add_argument('--seed', type=int, default=None, help='Random seed for numpy') parser.add_argument('--args_data', type=str, help='Pickled data for objects') parser.add_argument('--variant_data', type=str, help='Pickled data for variant configuration') parser.add_argument('--use_cloudpickle', type=ast.literal_eval, default=False) args = parser.parse_args(argv[1:]) if args.seed is not None: deterministic.set_seed(args.seed) # SIGINT is blocked for all processes created in parallel_sampler to avoid # the creation of sleeping and zombie processes. # # If the user interrupts run_experiment, there's a chance some processes # won't die due to a dead lock condition where one of the children in the # parallel sampler exits without releasing a lock once after it catches # SIGINT. # # Later the parent tries to acquire the same lock to proceed with his # cleanup, but it remains sleeping waiting for the lock to be released. # In the meantime, all the process in parallel sampler remain in the zombie # state since the parent cannot proceed with their clean up. with mask_signals([signal.SIGINT]): if args.n_parallel > 0: parallel_sampler.initialize(n_parallel=args.n_parallel) if args.seed is not None: parallel_sampler.set_seed(args.seed) if not args.plot: garage.plotter.Plotter.disable() garage.tf.plotter.Plotter.disable() if args.log_dir is None: log_dir = os.path.join(os.path.join(os.getcwd(), 'data'), args.exp_name) else: log_dir = args.log_dir tabular_log_file = os.path.join(log_dir, args.tabular_log_file) text_log_file = os.path.join(log_dir, args.text_log_file) params_log_file = os.path.join(log_dir, args.params_log_file) if args.variant_data is not None: variant_data = pickle.loads(base64.b64decode(args.variant_data)) variant_log_file = os.path.join(log_dir, args.variant_log_file) dump_variant(variant_log_file, variant_data) else: variant_data = None if not args.use_cloudpickle: log_parameters(params_log_file, args) logger.add_output(dowel.TextOutput(text_log_file)) logger.add_output(dowel.CsvOutput(tabular_log_file)) logger.add_output(dowel.TensorBoardOutput(log_dir)) logger.add_output(dowel.StdOutput()) logger.push_prefix('[%s] ' % args.exp_name) snapshot_config = SnapshotConfig(snapshot_dir=log_dir, snapshot_mode=args.snapshot_mode, snapshot_gap=args.snapshot_gap) # read from stdin if args.use_cloudpickle: import cloudpickle method_call = cloudpickle.loads(base64.b64decode(args.args_data)) try: method_call(snapshot_config, variant_data, args.resume_from_dir, args.resume_from_epoch) except BaseException: children = garage.plotter.Plotter.get_plotters() children += garage.tf.plotter.Plotter.get_plotters() if args.n_parallel > 0: children += [parallel_sampler] child_proc_shutdown(children) raise else: data = pickle.loads(base64.b64decode(args.args_data)) maybe_iter = concretize(data) if is_iterable(maybe_iter): for _ in maybe_iter: pass logger.remove_all() logger.pop_prefix()
self._evolution_strategy.tell( self._shared_params, -np.array(self._all_returns)) # Report back results self.policy.set_param_values( self._evolution_strategy.best.get() [0]) # TODO: DOES NOTHING, as is overwritten everywhere self._all_returns.clear() # Clear for next epoch self._resample_shared_parameters() if __name__ == "__main__": path = PROJECT_APP_PATH.user_data / "data" / "local" / "experiment" snapshot_config = SnapshotConfig(snapshot_dir=path, snapshot_mode="last", snapshot_gap=1) def stest_cma_es_cartpole(): """Test CMAES with Cartpole-v1 environment.""" with LocalTFRunner(snapshot_config) as runner: with TensorBoardPytorchWriter(PROJECT_APP_PATH.user_log / "CMA") as writer: env = GarageEnv(env_name="CartPole-v1") algo = CovarianceMatrixAdaptationEvolutionStrategyAgent( env_spec=env.spec, max_rollout_length=100) algo.build() runner.setup(algo, env, sampler_cls=LocalSampler) runner.train(n_epochs=1, batch_size=1000)