def run_task(*_): """Wrap VPG training task in the run_task function.""" env = TfEnv(env_name='CartPole-v1') policy = CategoricalMLPPolicy( name="policy", env_spec=env.spec, hidden_sizes=(32, 32)) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = VPG( env=env, policy=policy, baseline=baseline, batch_size=10000, max_path_length=100, n_itr=100, discount=0.99, optimizer_args=dict(tf_optimizer_args=dict(learning_rate=0.01, ))) algo.train()
def test_vpg_cartpole(self): """Test VPG with Cartpole environment.""" logger.reset() env = TfEnv(normalize(CartpoleEnv())) policy = GaussianMLPPolicy(name="policy", env_spec=env.spec, hidden_sizes=(32, 32)) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = VPG( env=env, policy=policy, baseline=baseline, batch_size=10000, max_path_length=100, n_itr=10, discount=0.99, optimizer_args=dict(tf_optimizer_args=dict(learning_rate=0.01, ))) last_avg_ret = algo.train(sess=self.sess) assert last_avg_ret > 160
def test_vpg_cartpole(self): """Test VPG with CartPole-v1 environment.""" env = TfEnv(env_name="CartPole-v1") policy = CategoricalMLPPolicy(name="policy", env_spec=env.spec, hidden_sizes=(32, 32)) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = VPG( env=env, policy=policy, baseline=baseline, batch_size=10000, max_path_length=100, n_itr=10, discount=0.99, optimizer_args=dict(tf_optimizer_args=dict(learning_rate=0.01, ))) last_avg_ret = algo.train(sess=self.sess) assert last_avg_ret > 90 env.close()
Example using VPG with ISSampler, iterations alternate between live and importance sampled iterations. """ import gym from garage.contrib.alexbeloi.is_sampler import ISSampler from garage.envs import normalize from garage.np.baselines import LinearFeatureBaseline from garage.tf.algos import VPG from garage.tf.policies import GaussianMLPPolicy env = normalize(gym.make('InvertedPendulum-v2')) policy = GaussianMLPPolicy(env_spec=env.spec, hidden_sizes=(32, 32)) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = VPG( env=env, policy=policy, baseline=baseline, batch_size=4000, max_path_length=100, n_itr=40, discount=0.99, step_size=0.01, sampler_cls=ISSampler, sampler_args=dict(n_backtrack=1), ) algo.train()
from garage.envs.box2d import CartpoleEnv from garage.envs.mujoco import SwimmerEnv from garage.tf.algos import VPG from garage.tf.envs import TfEnv from garage.tf.policies import GaussianMLPPolicy from garage.misc.instrument import run_experiment env = TfEnv(normalize(SwimmerEnv())) policy = GaussianMLPPolicy(name="policy", env_spec=env.spec, hidden_sizes=(32, 32)) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = VPG(env=env, policy=policy, baseline=baseline, batch_size=5000, max_path_length=500, n_itr=40, discount=0.995, optimizer_args=dict(tf_optimizer_args=dict(learning_rate=1e-4, ))) run_experiment(algo.train(), n_parallel=1, snapshot_mode="last", seed=1, use_gpu=True, use_tf=True)