def tearDown(self): logger.remove_all() if tf.get_default_session() is self.sess: self.sess.__exit__(None, None, None) self.sess.close() # These del are crucial to prevent ENOMEM in the CI # b/c TensorFlow does not release memory explicitly del self.graph del self.sess gc.collect()
def run_garage(env, seed, log_dir): """ Create garage model and training. Replace the trpo with the algorithm you want to run. :param env: Environment of the task. :param seed: Random seed for the trial. :param log_dir: Log dir path. :return:import baselines.common.tf_util as U """ deterministic.set_seed(seed) with LocalRunner() as runner: env = TfEnv(normalize(env)) policy = GaussianMLPPolicy( env_spec=env.spec, hidden_sizes=(32, 32), hidden_nonlinearity=tf.nn.tanh, output_nonlinearity=None, ) baseline = GaussianMLPBaseline( env_spec=env.spec, regressor_args=dict( hidden_sizes=(32, 32), use_trust_region=True, ), ) algo = TRPO( env_spec=env.spec, policy=policy, baseline=baseline, max_path_length=100, discount=0.99, gae_lambda=0.98, max_kl_step=0.01, policy_ent_coeff=0.0, plot=False, ) # Set up logger since we are not using run_experiment tabular_log_file = osp.join(log_dir, "progress.csv") garage_logger.add_output(CsvOutput(tabular_log_file)) garage_logger.add_output(StdOutput()) garage_logger.add_output(TensorBoardOutput(log_dir)) runner.setup(algo, env) runner.train(n_epochs=976, batch_size=1024) garage_logger.remove_all() return tabular_log_file
def run_garage(env, seed, log_dir): ''' Create garage model and training. Replace the ddpg with the algorithm you want to run. :param env: Environment of the task. :param seed: Random seed for the trial. :param log_dir: Log dir path. :return: ''' deterministic.set_seed(seed) with LocalRunner() as runner: env = TfEnv(env) # Set up params for ddpg action_noise = OUStrategy(env.spec, sigma=params['sigma']) policy = ContinuousMLPPolicy( env_spec=env.spec, hidden_sizes=params['policy_hidden_sizes'], hidden_nonlinearity=tf.nn.relu, output_nonlinearity=tf.nn.tanh) qf = ContinuousMLPQFunction(env_spec=env.spec, hidden_sizes=params['qf_hidden_sizes'], hidden_nonlinearity=tf.nn.relu) replay_buffer = SimpleReplayBuffer( env_spec=env.spec, size_in_transitions=params['replay_buffer_size'], time_horizon=params['n_rollout_steps']) ddpg = DDPG(env_spec=env.spec, policy=policy, qf=qf, replay_buffer=replay_buffer, policy_lr=params['policy_lr'], qf_lr=params['qf_lr'], target_update_tau=params['tau'], n_train_steps=params['n_train_steps'], discount=params['discount'], min_buffer_size=int(1e4), exploration_strategy=action_noise, policy_optimizer=tf.train.AdamOptimizer, qf_optimizer=tf.train.AdamOptimizer) # Set up logger since we are not using run_experiment tabular_log_file = osp.join(log_dir, 'progress.csv') tensorboard_log_dir = osp.join(log_dir) garage_logger.add_output(StdOutput()) garage_logger.add_output(CsvOutput(tabular_log_file)) garage_logger.add_output(TensorBoardOutput(tensorboard_log_dir)) runner.setup(ddpg, env) runner.train(n_epochs=params['n_epochs'], n_epoch_cycles=params['n_epoch_cycles'], batch_size=params['n_rollout_steps']) garage_logger.remove_all() return tabular_log_file
def run_garage(env, seed, log_dir): ''' Create garage model and training. Replace the ppo with the algorithm you want to run. :param env: Environment of the task. :param seed: Random seed for the trial. :param log_dir: Log dir path. :return: ''' deterministic.set_seed(seed) with LocalRunner() as runner: env = TfEnv(normalize(env)) policy = GaussianMLPPolicy( env_spec=env.spec, hidden_sizes=(64, 64), hidden_nonlinearity=tf.nn.tanh, output_nonlinearity=None, ) baseline = GaussianMLPBaseline( env_spec=env.spec, regressor_args=dict( hidden_sizes=(64, 64), use_trust_region=False, optimizer=FirstOrderOptimizer, optimizer_args=dict( batch_size=32, max_epochs=10, tf_optimizer_args=dict(learning_rate=1e-3), ), ), ) algo = PPO( env_spec=env.spec, policy=policy, baseline=baseline, max_path_length=100, discount=0.99, gae_lambda=0.95, lr_clip_range=0.2, policy_ent_coeff=0.0, optimizer_args=dict( batch_size=32, max_epochs=10, tf_optimizer_args=dict(learning_rate=1e-3), ), plot=False, ) # Set up logger since we are not using run_experiment tabular_log_file = osp.join(log_dir, 'progress.csv') garage_logger.add_output(StdOutput()) garage_logger.add_output(CsvOutput(tabular_log_file)) garage_logger.add_output(TensorBoardOutput(log_dir)) runner.setup(algo, env) runner.train(n_epochs=488, batch_size=2048) garage_logger.remove_all() return tabular_log_file
def run_garage(env, seed, log_dir): """ Create garage model and training. Replace the ppo with the algorithm you want to run. :param env: Environment of the task. :param seed: Random seed for the trial. :param log_dir: Log dir path. :return: """ deterministic.set_seed(seed) env.reset() with LocalRunner() as runner: env = TfEnv(env) action_noise = OUStrategy(env.spec, sigma=params['sigma']) policy = ContinuousMLPPolicy( env_spec=env.spec, hidden_sizes=params['policy_hidden_sizes'], hidden_nonlinearity=tf.nn.relu, output_nonlinearity=tf.nn.tanh, input_include_goal=True, ) qf = ContinuousMLPQFunction( env_spec=env.spec, hidden_sizes=params['qf_hidden_sizes'], hidden_nonlinearity=tf.nn.relu, input_include_goal=True, ) replay_buffer = HerReplayBuffer( env_spec=env.spec, size_in_transitions=params['replay_buffer_size'], time_horizon=params['n_rollout_steps'], replay_k=0.4, reward_fun=env.compute_reward, ) algo = DDPG( env_spec=env.spec, policy=policy, qf=qf, replay_buffer=replay_buffer, policy_lr=params['policy_lr'], qf_lr=params['qf_lr'], plot=False, target_update_tau=params['tau'], n_epochs=params['n_epochs'], n_epoch_cycles=params['n_epoch_cycles'], n_train_steps=params['n_train_steps'], discount=params['discount'], exploration_strategy=action_noise, policy_optimizer=tf.train.AdamOptimizer, qf_optimizer=tf.train.AdamOptimizer, buffer_batch_size=256, input_include_goal=True, ) # Set up logger since we are not using run_experiment tabular_log_file = osp.join(log_dir, 'progress.csv') logger.add_output(StdOutput()) logger.add_output(CsvOutput(tabular_log_file)) logger.add_output(TensorBoardOutput(log_dir)) runner.setup(algo, env) runner.train(n_epochs=params['n_epochs'], n_epoch_cycles=params['n_epoch_cycles'], batch_size=params['n_rollout_steps']) logger.remove_all() return tabular_log_file
def run_experiment(argv): default_log_dir = config.GARAGE_LOG_DIR now = datetime.datetime.now(dateutil.tz.tzlocal()) # avoid name clashes when running distributed jobs rand_id = str(uuid.uuid4())[:5] timestamp = now.strftime('%Y_%m_%d_%H_%M_%S_%f_%Z') default_exp_name = 'experiment_%s_%s' % (timestamp, rand_id) parser = argparse.ArgumentParser() parser.add_argument( '--n_parallel', type=int, default=1, help=('Number of parallel workers to perform rollouts. ' "0 => don't start any workers")) parser.add_argument( '--exp_name', type=str, default=default_exp_name, help='Name of the experiment.') parser.add_argument( '--log_dir', type=str, default=None, help='Path to save the log and iteration snapshot.') parser.add_argument( '--snapshot_mode', type=str, default='all', help='Mode to save the snapshot. Can be either "all" ' '(all iterations will be saved), "last" (only ' 'the last iteration will be saved), "gap" (every' '`snapshot_gap` iterations are saved), or "none" ' '(do not save snapshots)') parser.add_argument( '--snapshot_gap', type=int, default=1, help='Gap between snapshot iterations.') parser.add_argument( '--tabular_log_file', type=str, default='progress.csv', help='Name of the tabular log file (in csv).') parser.add_argument( '--text_log_file', type=str, default='debug.log', help='Name of the text log file (in pure text).') parser.add_argument( '--tensorboard_step_key', type=str, default=None, help='Name of the step key in tensorboard_summary.') parser.add_argument( '--params_log_file', type=str, default='params.json', help='Name of the parameter log file (in json).') parser.add_argument( '--variant_log_file', type=str, default='variant.json', help='Name of the variant log file (in json).') parser.add_argument( '--resume_from_dir', type=str, default=None, help='Directory of the pickle file to resume experiment from.') parser.add_argument( '--resume_epoch', type=str, default=None, help='Index of iteration to restore from. ' 'Can be "first", "last" or a number. ' 'Not applicable when snapshot_mode="last"') parser.add_argument( '--plot', type=ast.literal_eval, default=False, help='Whether to plot the iteration results') parser.add_argument( '--log_tabular_only', type=ast.literal_eval, default=False, help='Print only the tabular log information (in a horizontal format)') parser.add_argument('--seed', type=int, help='Random seed for numpy') parser.add_argument( '--args_data', type=str, help='Pickled data for objects') parser.add_argument( '--variant_data', type=str, help='Pickled data for variant configuration') parser.add_argument( '--use_cloudpickle', type=ast.literal_eval, default=False) args = parser.parse_args(argv[1:]) if args.seed is not None: deterministic.set_seed(args.seed) # SIGINT is blocked for all processes created in parallel_sampler to avoid # the creation of sleeping and zombie processes. # # If the user interrupts run_experiment, there's a chance some processes # won't die due to a dead lock condition where one of the children in the # parallel sampler exits without releasing a lock once after it catches # SIGINT. # # Later the parent tries to acquire the same lock to proceed with his # cleanup, but it remains sleeping waiting for the lock to be released. # In the meantime, all the process in parallel sampler remain in the zombie # state since the parent cannot proceed with their clean up. with mask_signals([signal.SIGINT]): if args.n_parallel > 0: parallel_sampler.initialize(n_parallel=args.n_parallel) if args.seed is not None: parallel_sampler.set_seed(args.seed) if not args.plot: garage.plotter.Plotter.disable() garage.tf.plotter.Plotter.disable() if args.log_dir is None: if args.resume_from_dir is None: log_dir = osp.join(default_log_dir, args.exp_name) else: log_dir = args.resume_from_dir else: log_dir = args.log_dir tabular_log_file = osp.join(log_dir, args.tabular_log_file) text_log_file = osp.join(log_dir, args.text_log_file) params_log_file = osp.join(log_dir, args.params_log_file) if args.variant_data is not None: variant_data = pickle.loads(base64.b64decode(args.variant_data)) variant_log_file = osp.join(log_dir, args.variant_log_file) dump_variant(variant_log_file, variant_data) else: variant_data = None if not args.use_cloudpickle: log_parameters(params_log_file, args) logger.add_output(TextOutput(text_log_file)) logger.add_output(CsvOutput(tabular_log_file)) logger.add_output(TensorBoardOutput(log_dir)) logger.add_output(StdOutput()) prev_snapshot_dir = snapshotter.snapshot_dir prev_mode = snapshotter.snapshot_mode snapshotter.snapshot_dir = log_dir snapshotter.snapshot_mode = args.snapshot_mode snapshotter.snapshot_gap = args.snapshot_gap logger.push_prefix('[%s] ' % args.exp_name) if args.resume_from_dir is not None: with LocalRunner() as runner: runner.restore(args.resume_from_dir, from_epoch=args.resume_epoch) runner.resume() else: # read from stdin if args.use_cloudpickle: import cloudpickle method_call = cloudpickle.loads(base64.b64decode(args.args_data)) try: method_call(variant_data) except BaseException: children = garage.plotter.Plotter.get_plotters() children += garage.tf.plotter.Plotter.get_plotters() if args.n_parallel > 0: children += [parallel_sampler] child_proc_shutdown(children) raise else: data = pickle.loads(base64.b64decode(args.args_data)) maybe_iter = concretize(data) if is_iterable(maybe_iter): for _ in maybe_iter: pass snapshotter.snapshot_mode = prev_mode snapshotter.snapshot_dir = prev_snapshot_dir logger.remove_all() logger.pop_prefix()
def tearDown(self): logger.remove_all()
def tearDownClass(cls): snapshotter.snapshot_dir = cls.prev_log_dir snapshotter.snapshot_mode = cls.prev_mode logger.remove_all() cls.log_dir.cleanup()