def run_task(snapshot_config, *_): """Run the job. Args: snapshot_config (garage.experiment.SnapshotConfig): Configuration values for snapshotting. *_ (object): Hyperparameters (unused). """ with LocalTFRunner(snapshot_config=snapshot_config) as runner: env = TfEnv(normalize(gym.make('InvertedPendulum-v2'))) policy = GaussianMLPPolicy(env_spec=env.spec, hidden_sizes=(32, 32)) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = VPG( env_spec=env.spec, policy=policy, baseline=baseline, max_path_length=100, discount=0.99, max_kl_step=0.01, ) runner.setup(algo, env, sampler_cls=ISSampler, sampler_args=dict(n_backtrack=1)) runner.train(n_epochs=40, batch_size=4000)
def fixture_exp(snapshot_config, sess): """Dummy fixture experiment function. Args: snapshot_config (garage.experiment.SnapshotConfig): The snapshot configuration used by LocalRunner to create the snapshotter. If None, it will create one with default settings. sess (tf.Session): An optional TensorFlow session. A new session will be created immediately if not provided. Returns: np.ndarray: Values of the parameters evaluated in the current session """ with LocalTFRunner(snapshot_config=snapshot_config, sess=sess) as runner: env = GarageEnv(env_name='CartPole-v1') policy = CategoricalMLPPolicy(name='policy', env_spec=env.spec, hidden_sizes=(8, 8)) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = VPG(env_spec=env.spec, policy=policy, baseline=baseline, max_episode_length=100, discount=0.99, optimizer_args=dict(learning_rate=0.01, )) runner.setup(algo, env, sampler_cls=LocalSampler) runner.train(n_epochs=5, batch_size=100) return policy.get_param_values()
def test_batch_sampler(self): max_cpus = 8 with LocalRunner(max_cpus=max_cpus) as runner: env = TfEnv(env_name='CartPole-v1') policy = CategoricalMLPPolicy(name="policy", env_spec=env.spec, hidden_sizes=(32, 32)) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = VPG(env=env, policy=policy, baseline=baseline, max_path_length=1, whole_paths=True, discount=0.99) runner.setup(algo, env, sampler_cls=BatchSampler, sampler_args={'n_envs': max_cpus}) try: runner.initialize_tf_vars() except BaseException: raise self.failureException( "LocalRunner should be able to initialize tf variables.") runner.start_worker() paths = runner.sampler.obtain_samples(0, 8) self.assertGreaterEqual( len(paths), max_cpus, "BatchSampler should sample more than " "max_cpus=%d trajectories" % max_cpus)
def vpgis_inverted_pendulum(ctxt=None, seed=1): """Train TRPO with InvertedPendulum-v2 environment. Args: ctxt (garage.experiment.ExperimentContext): The experiment configuration used by LocalRunner to create the snapshotter. seed (int): Used to seed the random number generator to produce determinism. """ set_seed(seed) with LocalTFRunner(ctxt) as runner: env = GarageEnv(normalize(gym.make('InvertedPendulum-v2'))) policy = GaussianMLPPolicy(env_spec=env.spec, hidden_sizes=(32, 32)) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = VPG( env_spec=env.spec, policy=policy, baseline=baseline, max_path_length=100, discount=0.99, max_kl_step=0.01, ) runner.setup(algo, env, sampler_cls=ISSampler, sampler_args=dict(n_backtrack=1)) runner.train(n_epochs=40, batch_size=4000)
def test_vpg_cartpole(self): """Test VPG with CartPole-v1 environment.""" with LocalRunner(sess=self.sess) as runner: env = TfEnv(env_name='CartPole-v1') policy = CategoricalMLPPolicy(name='policy', env_spec=env.spec, hidden_sizes=(32, 32)) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = VPG(env_spec=env.spec, policy=policy, baseline=baseline, max_path_length=100, discount=0.99, optimizer_args=dict( tf_optimizer_args=dict(learning_rate=0.01, ))) runner.setup(algo, env) last_avg_ret = runner.train(n_epochs=10, batch_size=10000) assert last_avg_ret > 90 env.close()
def test_set_plot(self): deterministic.set_seed(1) with TFTrainer(snapshot_config) as trainer: env = GymEnv('CartPole-v1') policy = CategoricalMLPPolicy(name='policy', env_spec=env.spec, hidden_sizes=(8, 8)) baseline = LinearFeatureBaseline(env_spec=env.spec) sampler = LocalSampler( agents=policy, envs=env, max_episode_length=env.spec.max_episode_length, is_tf_worker=True) algo = VPG(env_spec=env.spec, policy=policy, baseline=baseline, sampler=sampler, discount=0.99, optimizer_args=dict(learning_rate=0.01, )) trainer.setup(algo, env) trainer.train(n_epochs=1, batch_size=100, plot=True) assert isinstance(trainer._plotter, Plotter), ( 'self.plotter in TFTrainer should be set to Plotter.')
def vpg_cartpole(ctxt=None, seed=1): """Train VPG with CartPole-v1 environment. Args: ctxt (garage.experiment.ExperimentContext): The experiment configuration used by Trainer to create the snapshotter. seed (int): Used to seed the random number generator to produce determinism. """ set_seed(seed) with TFTrainer(snapshot_config=ctxt) as trainer: env = GymEnv('CartPole-v1') policy = CategoricalMLPPolicy(name='policy', env_spec=env.spec, hidden_sizes=(32, 32)) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = VPG(env_spec=env.spec, policy=policy, baseline=baseline, discount=0.99, optimizer_args=dict(learning_rate=0.01, )) trainer.setup(algo, env) trainer.train(n_epochs=100, batch_size=10000)
def test_train(self): with TFTrainer(snapshot_config) as trainer: env = GymEnv('CartPole-v1') policy = CategoricalMLPPolicy(name='policy', env_spec=env.spec, hidden_sizes=(8, 8)) baseline = LinearFeatureBaseline(env_spec=env.spec) sampler = LocalSampler( agents=policy, envs=env, max_episode_length=env.spec.max_episode_length, is_tf_worker=True) algo = VPG(env_spec=env.spec, policy=policy, baseline=baseline, sampler=sampler, discount=0.99, optimizer_args=dict(learning_rate=0.01, )) trainer.setup(algo, env) trainer.train(n_epochs=1, batch_size=100)
def test_make_sampler_ray_sampler(self, ray_session_fixture): del ray_session_fixture assert ray.is_initialized() with TFTrainer(snapshot_config) as trainer: env = GymEnv('CartPole-v1') policy = CategoricalMLPPolicy(name='policy', env_spec=env.spec, hidden_sizes=(8, 8)) baseline = LinearFeatureBaseline(env_spec=env.spec) sampler = RaySampler( agents=policy, envs=env, max_episode_length=env.spec.max_episode_length, is_tf_worker=True) algo = VPG(env_spec=env.spec, policy=policy, baseline=baseline, sampler=sampler, discount=0.99, optimizer_args=dict(learning_rate=0.01, )) trainer.setup(algo, env) assert isinstance(trainer._sampler, RaySampler) trainer.train(n_epochs=1, batch_size=10)
def run_task(*_): """Wrap VPG training task in the run_task function.""" env = TfEnv(env_name='CartPole-v1') policy = CategoricalMLPPolicy( name="policy", env_spec=env.spec, hidden_sizes=(32, 32)) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = VPG( env=env, policy=policy, baseline=baseline, batch_size=10000, max_path_length=100, n_itr=100, discount=0.99, optimizer_args=dict(tf_optimizer_args=dict(learning_rate=0.01, ))) algo.train()
def test_vpg_cartpole(self): """Test VPG with Cartpole environment.""" logger.reset() env = TfEnv(normalize(CartpoleEnv())) policy = GaussianMLPPolicy(name="policy", env_spec=env.spec, hidden_sizes=(32, 32)) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = VPG( env=env, policy=policy, baseline=baseline, batch_size=10000, max_path_length=100, n_itr=10, discount=0.99, optimizer_args=dict(tf_optimizer_args=dict(learning_rate=0.01, ))) last_avg_ret = algo.train(sess=self.sess) assert last_avg_ret > 160
def test_vpg_cartpole(self): """Test VPG with CartPole-v1 environment.""" env = TfEnv(env_name="CartPole-v1") policy = CategoricalMLPPolicy(name="policy", env_spec=env.spec, hidden_sizes=(32, 32)) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = VPG( env=env, policy=policy, baseline=baseline, batch_size=10000, max_path_length=100, n_itr=10, discount=0.99, optimizer_args=dict(tf_optimizer_args=dict(learning_rate=0.01, ))) last_avg_ret = algo.train(sess=self.sess) assert last_avg_ret > 90 env.close()
def test_train(self): with LocalTFRunner(snapshot_config) as runner: env = GymEnv('CartPole-v1') policy = CategoricalMLPPolicy(name='policy', env_spec=env.spec, hidden_sizes=(8, 8)) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = VPG(env_spec=env.spec, policy=policy, baseline=baseline, discount=0.99, optimizer_args=dict(learning_rate=0.01, )) runner.setup(algo, env) runner.train(n_epochs=1, batch_size=100)
def run_task(snapshot_config, *_): with LocalRunner(snapshot_config=snapshot_config) as runner: env = TfEnv(env_name='CartPole-v1') policy = CategoricalMLPPolicy( name='policy', env_spec=env.spec, hidden_sizes=(32, 32)) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = VPG( env_spec=env.spec, policy=policy, baseline=baseline, max_path_length=100, discount=0.99, optimizer_args=dict(tf_optimizer_args=dict(learning_rate=0.01, ))) runner.setup(algo, env) runner.train(n_epochs=100, batch_size=10000)
def test_make_sampler_local_sampler(self): with TFTrainer(snapshot_config) as trainer: env = GymEnv('CartPole-v1') policy = CategoricalMLPPolicy(name='policy', env_spec=env.spec, hidden_sizes=(8, 8)) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = VPG(env_spec=env.spec, policy=policy, baseline=baseline, discount=0.99, optimizer_args=dict(learning_rate=0.01, )) trainer.setup(algo, env, sampler_cls=LocalSampler) assert isinstance(trainer._sampler, LocalSampler) trainer.train(n_epochs=1, batch_size=10)
def test_train(self): with LocalRunner() as runner: env = TfEnv(env_name='CartPole-v1') policy = CategoricalMLPPolicy(name="policy", env_spec=env.spec, hidden_sizes=(8, 8)) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = VPG(env=env, policy=policy, baseline=baseline, max_path_length=100, discount=0.99, optimizer_args=dict( tf_optimizer_args=dict(learning_rate=0.01, ))) runner.setup(algo, env) runner.train(n_epochs=1, batch_size=100)
def test_make_sampler_ray_sampler(self): with LocalTFRunner(snapshot_config) as runner: env = TfEnv(env_name='CartPole-v1') policy = CategoricalMLPPolicy(name='policy', env_spec=env.spec, hidden_sizes=(8, 8)) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = VPG(env_spec=env.spec, policy=policy, baseline=baseline, max_path_length=100, discount=0.99, optimizer_args=dict( tf_optimizer_args=dict(learning_rate=0.01, ))) runner.setup(algo, env, sampler_cls=RaySampler) assert isinstance(runner._sampler, RaySampler) runner.train(n_epochs=1, batch_size=10)
def test_set_plot(self): with LocalTFRunner(snapshot_config) as runner: env = GymEnv('CartPole-v1') policy = CategoricalMLPPolicy(name='policy', env_spec=env.spec, hidden_sizes=(8, 8)) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = VPG(env_spec=env.spec, policy=policy, baseline=baseline, discount=0.99, optimizer_args=dict(learning_rate=0.01, )) runner.setup(algo, env) runner.train(n_epochs=1, batch_size=100, plot=True) assert isinstance(runner._plotter, Plotter), ( 'self.plotter in LocalTFRunner should be set to Plotter.')
def test_make_sampler_ray_sampler(self, ray_session_fixture): del ray_session_fixture assert ray.is_initialized() with LocalTFRunner(snapshot_config) as runner: env = GymEnv('CartPole-v1') policy = CategoricalMLPPolicy(name='policy', env_spec=env.spec, hidden_sizes=(8, 8)) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = VPG(env_spec=env.spec, policy=policy, baseline=baseline, discount=0.99, optimizer_args=dict(learning_rate=0.01, )) runner.setup(algo, env, sampler_cls=RaySampler) assert isinstance(runner._sampler, RaySampler) runner.train(n_epochs=1, batch_size=10)
def run_task(*_): """Run the job.""" with LocalRunner() as runner: env = TfEnv(normalize(gym.make('InvertedPendulum-v2'))) policy = GaussianMLPPolicy(env_spec=env.spec, hidden_sizes=(32, 32)) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = VPG( env_spec=env.spec, policy=policy, baseline=baseline, max_path_length=100, discount=0.99, max_kl_step=0.01, ) runner.setup( algo, env, sampler_cls=ISSampler, sampler_args=dict(n_backtrack=1)) runner.train(n_epochs=40, batch_size=4000)
def fixture_exp(): with LocalRunner() as runner: env = TfEnv(env_name='CartPole-v1') policy = CategoricalMLPPolicy( name='policy', env_spec=env.spec, hidden_sizes=(8, 8)) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = VPG( env_spec=env.spec, policy=policy, baseline=baseline, max_path_length=100, discount=0.99, optimizer_args=dict(tf_optimizer_args=dict(learning_rate=0.01, ))) runner.setup(algo, env) runner.train(n_epochs=5, batch_size=100) return policy.get_param_values()
def test_vpg_cartpole(self): """Test VPG with CartPole-v1 environment.""" with LocalTFRunner(snapshot_config, sess=self.sess) as runner: env = GymEnv('CartPole-v1') policy = CategoricalMLPPolicy(name='policy', env_spec=env.spec, hidden_sizes=(32, 32)) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = VPG(env_spec=env.spec, policy=policy, baseline=baseline, discount=0.99, optimizer_args=dict(learning_rate=0.01, )) runner.setup(algo, env, sampler_cls=LocalSampler) last_avg_ret = runner.train(n_epochs=10, batch_size=10000) assert last_avg_ret > 90 env.close()
def test_tf_batch_sampler(self): max_cpus = 8 with LocalTFRunner(snapshot_config, max_cpus=max_cpus) as runner: env = TfEnv(env_name='CartPole-v1') policy = CategoricalMLPPolicy(name='policy', env_spec=env.spec, hidden_sizes=(32, 32)) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = VPG(env_spec=env.spec, policy=policy, baseline=baseline, max_path_length=1, discount=0.99) runner.setup(algo, env, sampler_cls=BatchSampler, sampler_args={'n_envs': max_cpus}) try: runner.initialize_tf_vars() except BaseException: raise AssertionError( 'LocalRunner should be able to initialize tf variables.') runner._start_worker() paths = runner._sampler.obtain_samples(0, batch_size=8, whole_paths=True) assert len(paths) >= max_cpus, ( 'BatchSampler should sample more than max_cpus={} ' 'trajectories'.format(max_cpus))
from garage.baselines import LinearFeatureBaseline from garage.envs import normalize from garage.envs.box2d import CartpoleEnv from garage.tf.algos import VPG from garage.tf.envs import TfEnv from garage.tf.policies import GaussianMLPPolicy env = TfEnv(normalize(CartpoleEnv())) policy = GaussianMLPPolicy( name="policy", env_spec=env.spec, hidden_sizes=(32, 32)) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = VPG( env=env, policy=policy, baseline=baseline, batch_size=10000, max_path_length=100, n_itr=40, discount=0.99, optimizer_args=dict(tf_optimizer_args=dict(learning_rate=0.01, ))) algo.train()
from garage.envs.box2d import CartpoleEnv from garage.envs.mujoco import SwimmerEnv from garage.tf.algos import VPG from garage.tf.envs import TfEnv from garage.tf.policies import GaussianMLPPolicy from garage.misc.instrument import run_experiment env = TfEnv(normalize(SwimmerEnv())) policy = GaussianMLPPolicy(name="policy", env_spec=env.spec, hidden_sizes=(32, 32)) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = VPG(env=env, policy=policy, baseline=baseline, batch_size=5000, max_path_length=500, n_itr=40, discount=0.995, optimizer_args=dict(tf_optimizer_args=dict(learning_rate=1e-4, ))) run_experiment(algo.train(), n_parallel=1, snapshot_mode="last", seed=1, use_gpu=True, use_tf=True)
Example using VPG with ISSampler, iterations alternate between live and importance sampled iterations. """ import gym from garage.contrib.alexbeloi.is_sampler import ISSampler from garage.envs import normalize from garage.np.baselines import LinearFeatureBaseline from garage.tf.algos import VPG from garage.tf.policies import GaussianMLPPolicy env = normalize(gym.make('InvertedPendulum-v2')) policy = GaussianMLPPolicy(env_spec=env.spec, hidden_sizes=(32, 32)) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = VPG( env=env, policy=policy, baseline=baseline, batch_size=4000, max_path_length=100, n_itr=40, discount=0.99, step_size=0.01, sampler_cls=ISSampler, sampler_args=dict(n_backtrack=1), ) algo.train()