def ppo_garage_pytorch(ctxt, env_id, seed): """Create garage PyTorch PPO model and training. Args: ctxt (garage.experiment.ExperimentContext): The experiment configuration used by Trainer to create the snapshotter. env_id (str): Environment id of the task. seed (int): Random positive integer for the trial. """ deterministic.set_seed(seed) trainer = Trainer(ctxt) env = normalize(GymEnv(env_id)) policy = PyTorch_GMP(env.spec, hidden_sizes=(32, 32), hidden_nonlinearity=torch.tanh, output_nonlinearity=None) value_function = GaussianMLPValueFunction(env_spec=env.spec, hidden_sizes=(32, 32), hidden_nonlinearity=torch.tanh, output_nonlinearity=None) policy_optimizer = OptimizerWrapper((torch.optim.Adam, dict(lr=2.5e-4)), policy, max_optimization_epochs=10, minibatch_size=64) vf_optimizer = OptimizerWrapper((torch.optim.Adam, dict(lr=2.5e-4)), value_function, max_optimization_epochs=10, minibatch_size=64) sampler = RaySampler(agents=policy, envs=env, max_episode_length=env.spec.max_episode_length) algo = PyTorch_PPO(env_spec=env.spec, policy=policy, value_function=value_function, sampler=sampler, policy_optimizer=policy_optimizer, vf_optimizer=vf_optimizer, discount=0.99, gae_lambda=0.95, center_adv=True, lr_clip_range=0.2) trainer.setup(algo, env) trainer.train(n_epochs=hyper_parameters['n_epochs'], batch_size=hyper_parameters['batch_size'])
def run_garage_pytorch(env, seed, log_dir): """Create garage PyTorch PPO model and training. Args: env (dict): Environment of the task. seed (int): Random positive integer for the trial. log_dir (str): Log dir path. Returns: str: Path to output csv file """ env = TfEnv(normalize(env)) deterministic.set_seed(seed) runner = LocalRunner(snapshot_config) policy = PyTorch_GMP(env.spec, hidden_sizes=(32, 32), hidden_nonlinearity=torch.tanh, output_nonlinearity=None) value_functions = LinearFeatureBaseline(env_spec=env.spec) algo = PyTorch_PPO(env_spec=env.spec, policy=policy, value_function=value_functions, optimizer=torch.optim.Adam, policy_lr=3e-4, max_path_length=hyper_parameters['max_path_length'], discount=0.99, gae_lambda=0.95, center_adv=True, lr_clip_range=0.2, minibatch_size=128, max_optimization_epochs=10) # Set up logger since we are not using run_experiment tabular_log_file = osp.join(log_dir, 'progress.csv') dowel_logger.add_output(dowel.StdOutput()) dowel_logger.add_output(dowel.CsvOutput(tabular_log_file)) dowel_logger.add_output(dowel.TensorBoardOutput(log_dir)) runner.setup(algo, env) runner.train(n_epochs=hyper_parameters['n_epochs'], batch_size=hyper_parameters['batch_size']) dowel_logger.remove_all() return tabular_log_file
def ppo_garage_pytorch(ctxt, env_id, seed): """Create garage PyTorch PPO model and training. Args: ctxt (garage.experiment.ExperimentContext): The experiment configuration used by LocalRunner to create the snapshotter. env_id (str): Environment id of the task. seed (int): Random positive integer for the trial. """ deterministic.set_seed(seed) runner = LocalRunner(ctxt) env = TfEnv(normalize(gym.make(env_id))) policy = PyTorch_GMP(env.spec, hidden_sizes=(32, 32), hidden_nonlinearity=torch.tanh, output_nonlinearity=None) value_functions = LinearFeatureBaseline(env_spec=env.spec) algo = PyTorch_PPO(env_spec=env.spec, policy=policy, value_function=value_functions, optimizer=torch.optim.Adam, policy_lr=3e-4, max_path_length=hyper_parameters['max_path_length'], discount=0.99, gae_lambda=0.95, center_adv=True, lr_clip_range=0.2, minibatch_size=128, max_optimization_epochs=10) runner.setup(algo, env) runner.train(n_epochs=hyper_parameters['n_epochs'], batch_size=hyper_parameters['batch_size'])