def run_garage(env, seed, log_dir): """ Create garage model and training. Replace the ddpg with the algorithm you want to run. :param env: Environment of the task. :param seed: Random seed for the trail. :param log_dir: Log dir path. :return: """ ext.set_seed(seed) with tf.Graph().as_default(): # Set up params for ddpg action_noise = OUStrategy(env, sigma=params["sigma"]) actor_net = ContinuousMLPPolicy( env_spec=env, name="Actor", hidden_sizes=params["actor_hidden_sizes"], hidden_nonlinearity=tf.nn.relu, output_nonlinearity=tf.nn.tanh) critic_net = ContinuousMLPQFunction( env_spec=env, name="Critic", hidden_sizes=params["critic_hidden_sizes"], hidden_nonlinearity=tf.nn.relu) ddpg = DDPG(env, actor=actor_net, critic=critic_net, actor_lr=params["actor_lr"], critic_lr=params["critic_lr"], plot=False, target_update_tau=params["tau"], n_epochs=params["n_epochs"], n_epoch_cycles=params["n_epoch_cycles"], n_rollout_steps=params["n_rollout_steps"], n_train_steps=params["n_train_steps"], discount=params["discount"], replay_buffer_size=params["replay_buffer_size"], min_buffer_size=int(1e4), exploration_strategy=action_noise, actor_optimizer=tf.train.AdamOptimizer, critic_optimizer=tf.train.AdamOptimizer) # Set up logger since we are not using run_experiment tabular_log_file = osp.join(log_dir, "progress.csv") tensorboard_log_dir = osp.join(log_dir, "progress") garage_logger.add_tabular_output(tabular_log_file) garage_logger.set_tensorboard_dir(tensorboard_log_dir) ddpg.train() garage_logger.remove_tabular_output(tabular_log_file) return tabular_log_file
def run_garage(env, seed, log_dir): """ Create garage model and training. Replace the trpo with the algorithm you want to run. :param env: Environment of the task. :param seed: Random seed for the trail. :param log_dir: Log dir path. :return:import baselines.common.tf_util as U """ ext.set_seed(seed) with tf.Graph().as_default(): env = TfEnv(normalize(env)) policy = GaussianMLPPolicy( name="policy", env_spec=env.spec, hidden_sizes=(32, 32), hidden_nonlinearity=tf.nn.tanh, output_nonlinearity=None, ) baseline = GaussianMLPBaseline( env_spec=env.spec, regressor_args=dict( hidden_sizes=(32, 32), use_trust_region=True, ), ) algo = TRPO( env=env, policy=policy, baseline=baseline, batch_size=1024, max_path_length=100, n_itr=976, discount=0.99, gae_lambda=0.98, clip_range=0.1, policy_ent_coeff=0.0, plot=False, ) # Set up logger since we are not using run_experiment tabular_log_file = osp.join(log_dir, "progress.csv") garage_logger.add_tabular_output(tabular_log_file) garage_logger.set_tensorboard_dir(log_dir) algo.train() garage_logger.remove_tabular_output(tabular_log_file) return tabular_log_file
def run_garage(env, seed, log_dir): """ Create garage model and training. Replace the ddpg with the algorithm you want to run. :param env: Environment of the task. :param seed: Random seed for the trial. :param log_dir: Log dir path. :return: """ deterministic.set_seed(seed) with LocalRunner() as runner: env = TfEnv(env) # Set up params for ddpg action_noise = OUStrategy(env.spec, sigma=params["sigma"]) policy = ContinuousMLPPolicy( env_spec=env.spec, hidden_sizes=params["policy_hidden_sizes"], hidden_nonlinearity=tf.nn.relu, output_nonlinearity=tf.nn.tanh) qf = ContinuousMLPQFunction(env_spec=env.spec, hidden_sizes=params["qf_hidden_sizes"], hidden_nonlinearity=tf.nn.relu) replay_buffer = SimpleReplayBuffer( env_spec=env.spec, size_in_transitions=params["replay_buffer_size"], time_horizon=params["n_rollout_steps"]) ddpg = DDPG(env, policy=policy, qf=qf, replay_buffer=replay_buffer, policy_lr=params["policy_lr"], qf_lr=params["qf_lr"], target_update_tau=params["tau"], n_train_steps=params["n_train_steps"], discount=params["discount"], min_buffer_size=int(1e4), exploration_strategy=action_noise, policy_optimizer=tf.train.AdamOptimizer, qf_optimizer=tf.train.AdamOptimizer) # Set up logger since we are not using run_experiment tabular_log_file = osp.join(log_dir, "progress.csv") tensorboard_log_dir = osp.join(log_dir) garage_logger.add_tabular_output(tabular_log_file) garage_logger.set_tensorboard_dir(tensorboard_log_dir) runner.setup(ddpg, env) runner.train(n_epochs=params['n_epochs'], n_epoch_cycles=params['n_epoch_cycles'], batch_size=params["n_rollout_steps"]) garage_logger.remove_tabular_output(tabular_log_file) return tabular_log_file
#!/usr/bin/env python3 import tensorflow as tf from garage.misc import logger logger.set_tensorboard_dir("data/local/histogram_example") N = 400 for i in range(N): sess = tf.Session() sess.__enter__() k_val = i / float(N) logger.record_histogram_by_type('gamma', key='gamma', alpha=k_val) logger.record_histogram_by_type('normal', key='normal', mean=5 * k_val, stddev=1.0) logger.record_histogram_by_type('poisson', key='poisson', lam=k_val) logger.record_histogram_by_type('uniform', key='uniform', maxval=k_val * 10) logger.record_tabular("app", k_val) logger.record_histogram("gass", k_val) logger.dump_tensorboard(step=i)
def run_experiment(argv): default_log_dir = config.LOG_DIR now = datetime.datetime.now(dateutil.tz.tzlocal()) # avoid name clashes when running distributed jobs rand_id = str(uuid.uuid4())[:5] timestamp = now.strftime('%Y_%m_%d_%H_%M_%S_%f_%Z') default_exp_name = 'experiment_%s_%s' % (timestamp, rand_id) parser = argparse.ArgumentParser() parser.add_argument( '--n_parallel', type=int, default=1, help=("Number of parallel workers to perform rollouts. " "0 => don't start any workers")) parser.add_argument( '--exp_name', type=str, default=default_exp_name, help='Name of the experiment.') parser.add_argument( '--log_dir', type=str, default=None, help='Path to save the log and iteration snapshot.') parser.add_argument( '--snapshot_mode', type=str, default='all', help='Mode to save the snapshot. Can be either "all" ' '(all iterations will be saved), "last" (only ' 'the last iteration will be saved), "gap" (every' '`snapshot_gap` iterations are saved), or "none" ' '(do not save snapshots)') parser.add_argument( '--snapshot_gap', type=int, default=1, help='Gap between snapshot iterations.') parser.add_argument( '--tabular_log_file', type=str, default='progress.csv', help='Name of the tabular log file (in csv).') parser.add_argument( '--text_log_file', type=str, default='debug.log', help='Name of the text log file (in pure text).') parser.add_argument( '--tensorboard_step_key', type=str, default=None, help=("Name of the step key in tensorboard_summary.")) parser.add_argument( '--params_log_file', type=str, default='params.json', help='Name of the parameter log file (in json).') parser.add_argument( '--variant_log_file', type=str, default='variant.json', help='Name of the variant log file (in json).') parser.add_argument( '--resume_from', type=str, default=None, help='Name of the pickle file to resume experiment from.') parser.add_argument( '--plot', type=ast.literal_eval, default=False, help='Whether to plot the iteration results') parser.add_argument( '--log_tabular_only', type=ast.literal_eval, default=False, help='Print only the tabular log information (in a horizontal format)') parser.add_argument('--seed', type=int, help='Random seed for numpy') parser.add_argument( '--args_data', type=str, help='Pickled data for objects') parser.add_argument( '--variant_data', type=str, help='Pickled data for variant configuration') parser.add_argument( '--use_cloudpickle', type=ast.literal_eval, default=False) args = parser.parse_args(argv[1:]) if args.seed is not None: set_seed(args.seed) # SIGINT is blocked for all processes created in parallel_sampler to avoid # the creation of sleeping and zombie processes. # # If the user interrupts run_experiment, there's a chance some processes # won't die due to a dead lock condition where one of the children in the # parallel sampler exits without releasing a lock once after it catches # SIGINT. # # Later the parent tries to acquire the same lock to proceed with his # cleanup, but it remains sleeping waiting for the lock to be released. # In the meantime, all the process in parallel sampler remain in the zombie # state since the parent cannot proceed with their clean up. with mask_signals([signal.SIGINT]): if args.n_parallel > 0: parallel_sampler.initialize(n_parallel=args.n_parallel) if args.seed is not None: parallel_sampler.set_seed(args.seed) if not args.plot: garage.plotter.Plotter.disable() garage.tf.plotter.Plotter.disable() if args.log_dir is None: log_dir = osp.join(default_log_dir, args.exp_name) else: log_dir = args.log_dir tabular_log_file = osp.join(log_dir, args.tabular_log_file) text_log_file = osp.join(log_dir, args.text_log_file) params_log_file = osp.join(log_dir, args.params_log_file) if args.variant_data is not None: variant_data = pickle.loads(base64.b64decode(args.variant_data)) variant_log_file = osp.join(log_dir, args.variant_log_file) logger.log_variant(variant_log_file, variant_data) else: variant_data = None if not args.use_cloudpickle: logger.log_parameters_lite(params_log_file, args) logger.add_text_output(text_log_file) logger.add_tabular_output(tabular_log_file) logger.set_tensorboard_dir(log_dir) prev_snapshot_dir = logger.get_snapshot_dir() prev_mode = logger.get_snapshot_mode() logger.set_snapshot_dir(log_dir) logger.set_snapshot_mode(args.snapshot_mode) logger.set_snapshot_gap(args.snapshot_gap) logger.set_log_tabular_only(args.log_tabular_only) logger.set_tensorboard_step_key(args.tensorboard_step_key) logger.push_prefix("[%s] " % args.exp_name) if args.resume_from is not None: data = joblib.load(args.resume_from) assert 'algo' in data algo = data['algo'] algo.train() else: # read from stdin if args.use_cloudpickle: import cloudpickle method_call = cloudpickle.loads(base64.b64decode(args.args_data)) try: method_call(variant_data) except BaseException: children = garage.plotter.Plotter.get_plotters() children += garage.tf.plotter.Plotter.get_plotters() if args.n_parallel > 0: children += [parallel_sampler] child_proc_shutdown(children) raise else: data = pickle.loads(base64.b64decode(args.args_data)) maybe_iter = concretize(data) if is_iterable(maybe_iter): for _ in maybe_iter: pass logger.set_snapshot_mode(prev_mode) logger.set_snapshot_dir(prev_snapshot_dir) logger.remove_tabular_output(tabular_log_file) logger.remove_text_output(text_log_file) logger.pop_prefix()
def run_garage(env, seed, log_dir): """ Create garage model and training. Replace the ppo with the algorithm you want to run. :param env: Environment of the task. :param seed: Random seed for the trail. :param log_dir: Log dir path. :return: """ ext.set_seed(seed) with tf.Graph().as_default(): env = TfEnv(env) action_noise = OUStrategy(env.spec, sigma=params["sigma"]) policy = ContinuousMLPPolicy( env_spec=env.spec, name="Policy", hidden_sizes=params["policy_hidden_sizes"], hidden_nonlinearity=tf.nn.relu, output_nonlinearity=tf.nn.tanh, input_include_goal=True, ) qf = ContinuousMLPQFunction( env_spec=env.spec, name="QFunction", hidden_sizes=params["qf_hidden_sizes"], hidden_nonlinearity=tf.nn.relu, input_include_goal=True, ) replay_buffer = HerReplayBuffer( env_spec=env.spec, size_in_transitions=params["replay_buffer_size"], time_horizon=params["n_rollout_steps"], replay_k=0.4, reward_fun=env.compute_reward, ) algo = DDPG( env, policy=policy, qf=qf, replay_buffer=replay_buffer, policy_lr=params["policy_lr"], qf_lr=params["qf_lr"], plot=False, target_update_tau=params["tau"], n_epochs=params["n_epochs"], n_epoch_cycles=params["n_epoch_cycles"], max_path_length=params["n_rollout_steps"], n_train_steps=params["n_train_steps"], discount=params["discount"], exploration_strategy=action_noise, policy_optimizer=tf.train.AdamOptimizer, qf_optimizer=tf.train.AdamOptimizer, buffer_batch_size=256, input_include_goal=True, ) # Set up logger since we are not using run_experiment tabular_log_file = osp.join(log_dir, "progress.csv") garage_logger.add_tabular_output(tabular_log_file) garage_logger.set_tensorboard_dir(log_dir) algo.train() garage_logger.remove_tabular_output(tabular_log_file) return tabular_log_file
def run_experiment(argv): default_log_dir = config.LOG_DIR now = datetime.datetime.now(dateutil.tz.tzlocal()) # avoid name clashes when running distributed jobs rand_id = str(uuid.uuid4())[:5] timestamp = now.strftime('%Y_%m_%d_%H_%M_%S_%f_%Z') default_exp_name = 'experiment_%s_%s' % (timestamp, rand_id) parser = argparse.ArgumentParser() parser.add_argument( '--n_parallel', type=int, default=1, help=("Number of parallel workers to perform rollouts. " "0 => don't start any workers")) parser.add_argument( '--exp_name', type=str, default=default_exp_name, help='Name of the experiment.') parser.add_argument( '--log_dir', type=str, default=None, help='Path to save the log and iteration snapshot.') parser.add_argument( '--snapshot_mode', type=str, default='all', help='Mode to save the snapshot. Can be either "all" ' '(all iterations will be saved), "last" (only ' 'the last iteration will be saved), "gap" (every' '`snapshot_gap` iterations are saved), or "none" ' '(do not save snapshots)') parser.add_argument( '--snapshot_gap', type=int, default=1, help='Gap between snapshot iterations.') parser.add_argument( '--tabular_log_file', type=str, default='progress.csv', help='Name of the tabular log file (in csv).') parser.add_argument( '--text_log_file', type=str, default='debug.log', help='Name of the text log file (in pure text).') parser.add_argument( '--tensorboard_step_key', type=str, default=None, help=("Name of the step key in tensorboard_summary.")) parser.add_argument( '--params_log_file', type=str, default='params.json', help='Name of the parameter log file (in json).') parser.add_argument( '--variant_log_file', type=str, default='variant.json', help='Name of the variant log file (in json).') parser.add_argument( '--resume_from', type=str, default=None, help='Name of the pickle file to resume experiment from.') parser.add_argument( '--plot', type=ast.literal_eval, default=False, help='Whether to plot the iteration results') parser.add_argument( '--log_tabular_only', type=ast.literal_eval, default=False, help='Print only the tabular log information (in a horizontal format)') parser.add_argument('--seed', type=int, help='Random seed for numpy') parser.add_argument( '--args_data', type=str, help='Pickled data for stub objects') parser.add_argument( '--variant_data', type=str, help='Pickled data for variant configuration') parser.add_argument( '--use_cloudpickle', type=ast.literal_eval, default=False) args = parser.parse_args(argv[1:]) assert (os.environ.get("JOBLIB_START_METHOD", None) == "forkserver") if args.seed is not None: set_seed(args.seed) if args.n_parallel > 0: from garage.sampler import parallel_sampler parallel_sampler.initialize(n_parallel=args.n_parallel) if args.seed is not None: parallel_sampler.set_seed(args.seed) if not args.plot: garage.plotter.Plotter.disable() garage.tf.plotter.Plotter.disable() if args.log_dir is None: log_dir = osp.join(default_log_dir, args.exp_name) else: log_dir = args.log_dir tabular_log_file = osp.join(log_dir, args.tabular_log_file) text_log_file = osp.join(log_dir, args.text_log_file) params_log_file = osp.join(log_dir, args.params_log_file) if args.variant_data is not None: variant_data = pickle.loads(base64.b64decode(args.variant_data)) variant_log_file = osp.join(log_dir, args.variant_log_file) logger.log_variant(variant_log_file, variant_data) else: variant_data = None if not args.use_cloudpickle: logger.log_parameters_lite(params_log_file, args) logger.add_text_output(text_log_file) logger.add_tabular_output(tabular_log_file) logger.set_tensorboard_dir(log_dir) prev_snapshot_dir = logger.get_snapshot_dir() prev_mode = logger.get_snapshot_mode() logger.set_snapshot_dir(log_dir) logger.set_snapshot_mode(args.snapshot_mode) logger.set_snapshot_gap(args.snapshot_gap) logger.set_log_tabular_only(args.log_tabular_only) logger.set_tensorboard_step_key(args.tensorboard_step_key) logger.push_prefix("[%s] " % args.exp_name) if args.resume_from is not None: data = joblib.load(args.resume_from) assert 'algo' in data algo = data['algo'] algo.train() else: # read from stdin if args.use_cloudpickle: import cloudpickle method_call = cloudpickle.loads(base64.b64decode(args.args_data)) try: method_call(variant_data) except BaseException: if args.n_parallel > 0: parallel_sampler.terminate() raise else: data = pickle.loads(base64.b64decode(args.args_data)) maybe_iter = concretize(data) if is_iterable(maybe_iter): for _ in maybe_iter: pass logger.set_snapshot_mode(prev_mode) logger.set_snapshot_dir(prev_snapshot_dir) logger.remove_tabular_output(tabular_log_file) logger.remove_text_output(text_log_file) logger.pop_prefix()
from garage.envs import EnvSpec from garage.misc import logger import ipdb import numpy as np import tensorflow as tf from embed2learn.embeddings import GaussianMLPEmbedding from embed2learn.embeddings import EmbeddingSpec from embed2learn.policies import GaussianMLPMultitaskPolicy parser = argparse.ArgumentParser() parser.add_argument("--i", dest="i", type=int) args = parser.parse_args() with tf.Session() as sess: logger.set_tensorboard_dir("../../data/local/test_fixture/temp{}".format( args.i)) task_space = Box(low=np.array([0, 0]), high=np.array([1, 1])) latent_space = Box(low=np.array([-1, -1, -1]), high=np.array([1, 1, 1])) embed_spec = EmbeddingSpec(task_space, latent_space) obs_space = Box(low=np.array([0, 0, 0, 0]), high=np.array([1, 1, 1, 1])) act_space = Box(low=np.array([-2, -2, -2, -2, -2]), high=np.array([2, 2, 2, 2, 2])) env_spec = EnvSpec(obs_space, act_space) e = GaussianMLPEmbedding(embed_spec, std_share_network=True) p = GaussianMLPMultitaskPolicy(env_spec=env_spec, task_space=task_space, embedding=e, std_share_network=True)