Exemplo n.º 1
0
 def setup_method(self):
     super().setup_method()
     self.max_path_length = 100
     self.meta_batch_size = 10
     self.episode_per_task = 4
     self.tasks = task_sampler.SetTaskSampler(
         lambda: RL2Env(env=normalize(HalfCheetahDirEnv())))
     self.env_spec = RL2Env(env=normalize(HalfCheetahDirEnv())).spec
     self.policy = GaussianGRUPolicy(env_spec=self.env_spec,
                                     hidden_dim=64,
                                     state_include_action=False)
     self.baseline = LinearFeatureBaseline(env_spec=self.env_spec)
Exemplo n.º 2
0
def test_maml_trpo_pendulum():
    """Test PPO with Pendulum environment."""
    env = GarageEnv(normalize(HalfCheetahDirEnv(), expected_action_scale=10.))
    policy = GaussianMLPPolicy(
        env_spec=env.spec,
        hidden_sizes=(64, 64),
        hidden_nonlinearity=torch.tanh,
        output_nonlinearity=None,
    )
    baseline = LinearFeatureBaseline(env_spec=env.spec)

    rollouts_per_task = 5
    max_path_length = 100

    runner = LocalRunner(snapshot_config)
    algo = MAMLTRPO(env=env,
                    policy=policy,
                    baseline=baseline,
                    max_path_length=max_path_length,
                    meta_batch_size=5,
                    discount=0.99,
                    gae_lambda=1.,
                    inner_lr=0.1,
                    num_grad_updates=1)

    runner.setup(algo, env)
    last_avg_ret = runner.train(n_epochs=5,
                                batch_size=rollouts_per_task * max_path_length)

    assert last_avg_ret > -5

    env.close()
Exemplo n.º 3
0
 def setup_method(self):
     """Setup method which is called before every test."""
     self.env = GarageEnv(
         normalize(HalfCheetahDirEnv(), expected_action_scale=10.))
     self.policy = GaussianMLPPolicy(
         env_spec=self.env.spec,
         hidden_sizes=(64, 64),
         hidden_nonlinearity=torch.tanh,
         output_nonlinearity=None,
     )
     self.baseline = LinearFeatureBaseline(env_spec=self.env.spec)
Exemplo n.º 4
0
def maml_ppo(ctxt, seed, epochs, rollouts_per_task, meta_batch_size):
    """Set up environment and algorithm and run the task.

    Args:
        ctxt (garage.experiment.ExperimentContext): The experiment
            configuration used by LocalRunner to create the snapshotter.
        seed (int): Used to seed the random number generator to produce
            determinism.
        epochs (int): Number of training epochs.
        rollouts_per_task (int): Number of rollouts per epoch per task
            for training.
        meta_batch_size (int): Number of tasks sampled per batch.

    """
    set_seed(seed)
    env = GarageEnv(normalize(HalfCheetahDirEnv(), expected_action_scale=10.))

    policy = GaussianMLPPolicy(
        env_spec=env.spec,
        hidden_sizes=(64, 64),
        hidden_nonlinearity=torch.tanh,
        output_nonlinearity=None,
    )

    baseline = LinearFeatureBaseline(env_spec=env.spec)

    max_path_length = 100

    runner = LocalRunner(ctxt)
    algo = MAMLPPO(env=env,
                   policy=policy,
                   baseline=baseline,
                   max_path_length=max_path_length,
                   meta_batch_size=meta_batch_size,
                   discount=0.99,
                   gae_lambda=1.,
                   inner_lr=0.1,
                   num_grad_updates=1)

    runner.setup(algo, env)
    runner.train(n_epochs=epochs,
                 batch_size=rollouts_per_task * max_path_length)
Exemplo n.º 5
0
def run_task(snapshot_config, *_):
    """Set up environment and algorithm and run the task.

    Args:
        snapshot_config (garage.experiment.SnapshotConfig): The snapshot
            configuration used by LocalRunner to create the snapshotter.
            If None, it will create one with default settings.
        _ : Unused parameters

    """
    env = GarageEnv(normalize(HalfCheetahDirEnv(), expected_action_scale=10.))

    policy = GaussianMLPPolicy(
        env_spec=env.spec,
        hidden_sizes=(64, 64),
        hidden_nonlinearity=torch.tanh,
        output_nonlinearity=None,
    )

    baseline = LinearFeatureBaseline(env_spec=env.spec)

    rollouts_per_task = 40
    max_path_length = 100

    runner = LocalRunner(snapshot_config)
    algo = MAMLTRPO(env=env,
                    policy=policy,
                    baseline=baseline,
                    max_path_length=max_path_length,
                    meta_batch_size=20,
                    discount=0.99,
                    gae_lambda=1.,
                    inner_lr=0.1,
                    num_grad_updates=1)

    runner.setup(algo, env)
    runner.train(n_epochs=300, batch_size=rollouts_per_task * max_path_length)