def test_benchmark_gaussian_gru_policy(self): bench_envs = [ 'HalfCheetah-v2', 'Reacher-v2', 'Walker2d-v2', 'Hopper-v2', 'Swimmer-v2', 'InvertedPendulum-v2', 'InvertedDoublePendulum-v2' ] seeds = np.random.choice(100, size=(len(bench_envs), 3)) for env_num in range(len(bench_envs)): self._env = bench_envs[env_num] for seed in seeds[env_num]: self._seed = seed deterministic.set_seed(self._seed) name = '{}_seed_{}_metarl'.format(self._env, self._seed) run_experiment(self.run_task, snapshot_mode='last', seed=self._seed, n_parallel=12, exp_name=name)
def run_task(snapshot_config, *_): """Run task.""" with LocalTFRunner(snapshot_config=snapshot_config, max_cpus=n_envs) as runner: env = TfEnv(env_name='CartPole-v1') policy = CategoricalMLPPolicy(name='policy', env_spec=env.spec, hidden_sizes=(32, 32)) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = TRPO(env_spec=env.spec, policy=policy, baseline=baseline, max_path_length=max_path_length, discount=0.99, max_kl_step=0.01) runner.setup(algo=algo, env=env, sampler_cls=BatchSampler, sampler_args={'n_envs': n_envs}) runner.train(n_epochs=100, batch_size=4000, plot=False) run_experiment(run_task, snapshot_mode='last', seed=1)
runner.setup(algo, env, sampler_cls=LocalSampler, sampler_args=None, worker_class=TaskEmbeddingWorker) runner.train(n_epochs=600, batch_size=v.batch_size, plot=False) config = dict( tasks=TASKS, latent_length=1, inference_window=2, batch_size=1024 * len(TASKS), policy_ent_coeff=2e-2, # 2e-2 embedding_ent_coeff=2.2e-3, # 1e-2 inference_ce_coeff=5e-2, # 1e-2 max_path_length=100, embedding_init_std=1.0, embedding_max_std=2.0, embedding_min_std=0.38, policy_init_std=1.0, policy_max_std=None, policy_min_std=None, ) run_experiment(run_task, snapshot_mode='last', seed=1, variant=config, plot=False)
#!/usr/bin/env python3 """This is an example to resume training programmatically.""" from metarl.experiment import run_experiment from metarl.tf.experiment import LocalTFRunner def run_task(snapshot_config, *_): """Run task.""" with LocalTFRunner(snapshot_config=snapshot_config) as runner: runner.restore(from_dir='dir/', from_epoch=2) runner.resume() run_experiment( run_task, log_dir='new_dir/', snapshot_mode='last', seed=1, )
def run_task(*_): env = normalize(gym.make('Pendulum-v0')) policy = DummyPolicy(env_spec=env) baseline = LinearFeatureBaseline(env_spec=env) algo = InstrumentedNOP(env=env, policy=policy, baseline=baseline, batch_size=4000, max_path_length=100, n_itr=4, discount=0.99, step_size=0.01, plot=True) algo.train() env.close() run_experiment( run_task, # Number of parallel workers for sampling n_parallel=6, # Only keep the snapshot parameters for the last iteration snapshot_mode='last', # Specifies the seed for the experiment. If this is not provided, a random # seed will be used seed=1, plot=True, )
target_network_update_freq=2, buffer_batch_size=32) runner.setup(algo, env) runner.train(n_epochs=n_epochs, batch_size=sampler_batch_size) @click.command() @click.option('--buffer_size', type=int, default=int(5e4)) def _args(buffer_size): """A click command to parse arguments for automated testing purposes. Args: buffer_size (int): Size of replay buffer. Returns: int: The input argument as-is. """ return buffer_size replay_buffer_size = _args.main(standalone_mode=False) run_experiment( run_task, snapshot_mode='last', seed=1, plot=False, variant={'buffer_size': replay_buffer_size}, )
discount=0.99, max_kl_step=0.01, flatten_input=False) runner.setup(algo, env) runner.train(n_epochs=100, batch_size=variant_data['batch_size']) @click.command() @click.option('--batch_size', '_batch_size', type=int, default=4000) def _args(_batch_size): """A click command to parse arguments for automated testing purposes. Args: _batch_size (int): Number of environment steps in one batch. Returns: int: The input argument as-is. """ return _batch_size batch_size = _args.main(standalone_mode=False) run_experiment( run_task, snapshot_mode='last', seed=1, variant={'batch_size': batch_size}, )