Пример #1
0
 def test_pickleable(self):
     inner_env = CartpoleEnv(obs_noise=5.)
     env = OcclusionEnv(inner_env, [1])
     round_trip = pickle.loads(pickle.dumps(env))
     assert round_trip
     obs = inner_env.reset()
     assert round_trip.occlude(obs) == env.occlude(obs)
     assert round_trip.env.obs_noise == env.env.obs_noise
     step_env(round_trip)
Пример #2
0
def run_task(*_):
    env = TheanoEnv(normalize(CartpoleEnv()))

    policy = DeterministicMLPPolicy(
        env_spec=env.spec,
        # The neural network policy should have two hidden layers,
        # each with 32 hidden units.
        hidden_sizes=(32, 32))

    es = OUStrategy(env_spec=env.spec)

    qf = ContinuousMLPQFunction(env_spec=env.spec)

    algo = DDPG(
        env=env,
        policy=policy,
        es=es,
        qf=qf,
        batch_size=32,
        max_path_length=100,
        epoch_length=1000,
        min_pool_size=10000,
        n_epochs=1000,
        discount=0.99,
        scale_reward=0.01,
        qf_learning_rate=1e-3,
        policy_learning_rate=1e-4,
        # Uncomment both lines (this and the plot parameter below) to enable
        # plotting
        plot=True,
    )
    algo.train()
def run_task(*_):
    env = TfEnv(normalize(CartpoleEnv()))
    policy = GaussianMLPPolicy(
        env_spec=env.spec,
        hidden_sizes=(32, 32),
        hidden_nonlinearity=tf.nn.tanh,
        output_nonlinearity=None,
    )
    baseline = GaussianMLPBaseline(
        env_spec=env.spec,
        regressor_args=dict(hidden_sizes=(32, 32)),
    )
    algo = InstrumentedTRPO(
        env=env,
        policy=policy,
        baseline=baseline,
        batch_size=1024,
        max_path_length=100,
        n_itr=4,
        discount=0.99,
        gae_lambda=0.98,
        policy_ent_coeff=0.0,
        plot=True,
    )
    algo.train()
Пример #4
0
 def test_does_not_modify_action(self):
     inner_env = CartpoleEnv(frame_skip=10)
     env = NoisyObservationEnv(inner_env, obs_noise=5.)
     a = env.action_space.sample()
     a_copy = a.copy()
     env.step(a)
     self.assertEquals(a.all(), a_copy.all())
Пример #5
0
def run_task(v):
    env = TheanoEnv(normalize(CartpoleEnv()))

    policy = GaussianMLPPolicy(
        env_spec=env.spec,
        # The neural network policy should have two hidden layers,
        # each with 32 hidden units.
        hidden_sizes=(32, 32))

    baseline = LinearFeatureBaseline(env_spec=env.spec)

    algo = TRPO(
        env=env,
        policy=policy,
        baseline=baseline,
        batch_size=4000,
        max_path_length=100,
        n_itr=40,
        discount=0.99,
        step_size=v["step_size"],
        # Uncomment both lines (this and the plot parameter below) to enable
        # plotting
        plot=True,
    )
    algo.train()
Пример #6
0
 def test_does_not_modify_action(self):
     inner_env = CartpoleEnv(obs_noise=5.)
     env = OcclusionEnv(inner_env, [1])
     a = env.action_space.sample()
     a_copy = a
     env.reset()
     env.step(a)
     self.assertEquals(a, a_copy)
Пример #7
0
 def test_pickleable(self):
     inner_env = CartpoleEnv(obs_noise=5.)
     env = SlidingMemEnv(inner_env, n_steps=10)
     round_trip = pickle.loads(pickle.dumps(env))
     assert round_trip
     assert round_trip.n_steps == env.n_steps
     assert round_trip.env.obs_noise == env.env.obs_noise
     step_env(round_trip)
Пример #8
0
 def test_does_not_modify_action(self):
     inner_env = CartpoleEnv(obs_noise=5.)
     env = SlidingMemEnv(inner_env, n_steps=10)
     a = env.action_space.sample()
     a_copy = a.copy()
     env.reset()
     env.step(a)
     self.assertEquals(a.all(), a_copy.all())
Пример #9
0
 def test_pickleable(self):
     inner_env = CartpoleEnv(obs_noise=5.)
     env = NormalizedEnv(inner_env, scale_reward=10.)
     round_trip = pickle.loads(pickle.dumps(env))
     assert round_trip
     assert round_trip._scale_reward == env._scale_reward
     assert round_trip.env.obs_noise == env.env.obs_noise
     step_env(round_trip)
Пример #10
0
 def test_does_not_modify_action(self):
     inner_env = CartpoleEnv(obs_noise=5.)
     env = NormalizedEnv(inner_env, scale_reward=10.)
     a = env.action_space.sample()
     a_copy = a
     env.reset()
     env.step(a)
     self.assertEquals(a, a_copy)
Пример #11
0
 def test_pickleable(self):
     inner_env = CartpoleEnv(frame_skip=10)
     env = DelayedActionEnv(inner_env, action_delay=10)
     round_trip = pickle.loads(pickle.dumps(env))
     assert round_trip
     assert round_trip.action_delay == env.action_delay
     assert round_trip.env.frame_skip == env.env.frame_skip
     step_env(round_trip)
Пример #12
0
 def test_pickleable(self):
     inner_env = CartpoleEnv(frame_skip=10)
     env = NoisyObservationEnv(inner_env, obs_noise=5.)
     round_trip = pickle.loads(pickle.dumps(env))
     assert round_trip
     assert round_trip.obs_noise == env.obs_noise
     assert round_trip.env.frame_skip == env.env.frame_skip
     step_env(round_trip)
Пример #13
0
 def test_does_not_modify_action(self):
     inner_env = CartpoleEnv(frame_skip=10)
     env = DelayedActionEnv(inner_env, action_delay=10)
     env.reset()
     a = env.action_space.sample()
     a_copy = a.copy()
     env.reset()
     env.step(a)
     self.assertEquals(a.all(), a_copy.all())
Пример #14
0
 def test_baseline(self, baseline_cls):
     env = TheanoEnv(CartpoleEnv())
     policy = GaussianMLPPolicy(env_spec=env.spec, hidden_sizes=(6, ))
     baseline = baseline_cls(env_spec=env.spec)
     algo = VPG(env=env,
                policy=policy,
                baseline=baseline,
                n_itr=1,
                batch_size=1000,
                max_path_length=100)
     algo.train()
Пример #15
0
 def test_adaptive_std():
     """
     Checks if the adaptive_std parameter works.
     """
     env = TheanoEnv(CartpoleEnv())
     policy = GaussianMLPPolicy(env_spec=env, adaptive_std=True)
     baseline = ZeroBaseline(env_spec=env.spec)
     algo = TRPO(env=env,
                 policy=policy,
                 baseline=baseline,
                 batch_size=100,
                 n_itr=1)
     algo.train()
Пример #16
0
 def test_issue_3():
     """
     As reported in https://github.com/garage/garage/issues/3, the adaptive_std
     parameter was not functioning properly
     """
     env = CartpoleEnv()
     policy = GaussianMLPPolicy(env_spec=env, adaptive_std=True)
     baseline = ZeroBaseline(env_spec=env.spec)
     algo = TRPO(env=env,
                 policy=policy,
                 baseline=baseline,
                 batch_size=100,
                 n_itr=1)
     algo.train()
Пример #17
0
def run_task(*_):
    env = normalize(CartpoleEnv())

    policy = DummyPolicy(env_spec=env)

    baseline = LinearFeatureBaseline(env_spec=env)
    algo = InstrumentedNOP(env=env,
                           policy=policy,
                           baseline=baseline,
                           batch_size=4000,
                           max_path_length=100,
                           n_itr=4,
                           discount=0.99,
                           step_size=0.01,
                           plot=True)
    algo.train()
def run_task(*_):
    env = TheanoEnv(normalize(CartpoleEnv()))

    policy = GaussianMLPPolicy(env_spec=env.spec, hidden_sizes=(32, 32))

    baseline = LinearFeatureBaseline(env_spec=env.spec)
    algo = InstrumentedTRPO(env=env,
                            policy=policy,
                            baseline=baseline,
                            batch_size=4000,
                            max_path_length=100,
                            n_itr=4,
                            discount=0.99,
                            step_size=0.01,
                            plot=True)
    algo.train()
Пример #19
0
def run_task(*_):
    """Wrap ERWR training task in the run_task function."""
    env = TfEnv(normalize(CartpoleEnv()))

    policy = GaussianMLPPolicy(name="policy",
                               env_spec=env.spec,
                               hidden_sizes=(32, 32))

    baseline = LinearFeatureBaseline(env_spec=env.spec)

    algo = ERWR(env=env,
                policy=policy,
                baseline=baseline,
                batch_size=10000,
                max_path_length=100,
                n_itr=40,
                discount=0.99)
    algo.train()
Пример #20
0
def run_task(*_):
    env = TheanoEnv(normalize(CartpoleEnv()))

    policy = GaussianGRUPolicy(env_spec=env.spec, )

    baseline = LinearFeatureBaseline(env_spec=env.spec)

    algo = TRPO(env=env,
                policy=policy,
                baseline=baseline,
                batch_size=4000,
                max_path_length=100,
                n_itr=10,
                discount=0.99,
                step_size=0.01,
                optimizer=ConjugateGradientOptimizer(
                    hvp_approach=FiniteDifferenceHvp(base_eps=1e-5)))
    algo.train()
Пример #21
0
 def test_ddpg(self):
     env = TheanoEnv(CartpoleEnv())
     policy = DeterministicMLPPolicy(env.spec)
     qf = ContinuousMLPQFunction(env.spec)
     es = OUStrategy(env.spec)
     algo = DDPG(
         env=env,
         policy=policy,
         qf=qf,
         es=es,
         n_epochs=1,
         epoch_length=100,
         batch_size=32,
         min_pool_size=50,
         replay_pool_size=1000,
         eval_samples=100,
     )
     algo.train()
Пример #22
0
def run_task(v):
    env = normalize(CartpoleEnv())

    policy = GaussianMLPPolicy(env_spec=env.spec, hidden_sizes=(32, 32))

    baseline = LinearFeatureBaseline(env_spec=env.spec)

    algo = TRPO(
        env=env,
        policy=policy,
        baseline=baseline,
        batch_size=4000,
        max_path_length=100,
        n_itr=40,
        discount=0.99,
        step_size=v["step_size"],
        # plot=True,
    )
    algo.train()
Пример #23
0
def run_task(*_):
    """Wrap VPG training task in the run_task function."""
    env = TfEnv(normalize(CartpoleEnv()))

    policy = GaussianMLPPolicy(
        name="policy", env_spec=env.spec, hidden_sizes=(32, 32))

    baseline = LinearFeatureBaseline(env_spec=env.spec)

    algo = VPG(
        env=env,
        policy=policy,
        baseline=baseline,
        batch_size=10000,
        max_path_length=100,
        n_itr=100,
        discount=0.99,
        optimizer_args=dict(tf_optimizer_args=dict(learning_rate=0.01, )))
    algo.train()
Пример #24
0
 def test_ddpg(self):
     env = TheanoEnv(CartpoleEnv())
     policy = DeterministicMLPPolicy(env.spec)
     qf = ContinuousMLPQFunction(env.spec)
     es = OUStrategy(env.spec)
     replay_buffer = SimpleReplayBuffer(env_spec=env.spec,
                                        size_in_transitions=int(1000),
                                        time_horizon=100)
     algo = DDPG(
         env=env,
         policy=policy,
         qf=qf,
         es=es,
         pool=replay_buffer,
         n_epochs=1,
         epoch_length=100,
         batch_size=32,
         min_pool_size=50,
         eval_samples=100,
     )
     algo.train()
Пример #25
0
def run_task(*_):
    env = TheanoEnv(normalize(CartpoleEnv()))

    policy = GaussianMLPPolicy(env_spec=env.spec, hidden_sizes=(32, 32))

    baseline = LinearFeatureBaseline(env_spec=env.spec)

    algo = TRPO(
        env=env,
        policy=policy,
        baseline=baseline,
        batch_size=4000,
        max_path_length=100,
        n_itr=1000,
        discount=0.99,
        step_size=0.01,
        # Uncomment both lines (this and the plot parameter below) to enable
        # plotting
        #plot=True
    )
    algo.train()
Пример #26
0
    def test_erwr_cartpole(self):
        """Test ERWR with Cartpole environment."""
        logger.reset()
        env = TfEnv(normalize(CartpoleEnv()))

        policy = GaussianMLPPolicy(
            name="policy", env_spec=env.spec, hidden_sizes=(32, 32))

        baseline = LinearFeatureBaseline(env_spec=env.spec)

        algo = ERWR(
            env=env,
            policy=policy,
            baseline=baseline,
            batch_size=10000,
            max_path_length=100,
            n_itr=10,
            discount=0.99)

        last_avg_ret = algo.train(sess=self.sess)
        assert last_avg_ret > 100
Пример #27
0
    def test_tnpg_cartpole(self):
        """Test TNPG with Cartpole environment."""
        logger.reset()
        env = TfEnv(normalize(CartpoleEnv()))

        policy = GaussianMLPPolicy(name="policy",
                                   env_spec=env.spec,
                                   hidden_sizes=(32, 32))

        baseline = LinearFeatureBaseline(env_spec=env.spec)

        algo = TNPG(env=env,
                    policy=policy,
                    baseline=baseline,
                    batch_size=10000,
                    max_path_length=100,
                    n_itr=10,
                    discount=0.99,
                    optimizer_args=dict(reg_coeff=5e-2))

        last_avg_ret = algo.train(sess=self.sess)
        assert last_avg_ret > 40
Пример #28
0
    def test_gaussian_policies(self, policy_cls):
        logger._tensorboard = TensorBoardOutput()
        env = TfEnv(normalize(CartpoleEnv()))

        policy = policy_cls(name="policy", env_spec=env.spec)

        baseline = LinearFeatureBaseline(env_spec=env.spec)

        algo = TRPO(
            env=env,
            policy=policy,
            baseline=baseline,
            batch_size=4000,
            max_path_length=100,
            n_itr=1,
            discount=0.99,
            step_size=0.01,
            plot=True,
            optimizer=ConjugateGradientOptimizer,
            optimizer_args=dict(hvp_approach=FiniteDifferenceHvp(
                base_eps=1e-5)),
        )
        algo.train(sess=self.sess)
Пример #29
0
from garage.baselines import LinearFeatureBaseline
from garage.envs import normalize
from garage.envs.box2d import CartpoleEnv
from garage.theano.algos import TRPO
from garage.theano.envs import TheanoEnv
from garage.theano.policies import GaussianMLPPolicy

env = TheanoEnv(normalize(CartpoleEnv()))

policy = GaussianMLPPolicy(env_spec=env.spec, hidden_sizes=(32, 32))

baseline = LinearFeatureBaseline(env_spec=env.spec)
algo = TRPO(
    env=env,
    policy=policy,
    baseline=baseline,
    batch_size=4000,
    max_path_length=100,
    n_itr=40,
    discount=0.99,
    step_size=0.01,
    # plot=True
)
algo.train()
Пример #30
0
from garage.baselines import LinearFeatureBaseline
from garage.envs import normalize
from garage.envs.box2d import CartpoleEnv
from garage.tf.algos import TRPO
import garage.tf.core.layers as L
from garage.tf.envs import TfEnv
from garage.tf.optimizers import ConjugateGradientOptimizer, FiniteDifferenceHvp
from garage.tf.policies import GaussianLSTMPolicy

env = TfEnv(normalize(CartpoleEnv()))

policy = GaussianLSTMPolicy(
    name="policy",
    env_spec=env.spec,
    lstm_layer_cls=L.TfBasicLSTMLayer,
    # gru_layer_cls=L.GRULayer,
)

baseline = LinearFeatureBaseline(env_spec=env.spec)

algo = TRPO(
    env=env,
    policy=policy,
    baseline=baseline,
    batch_size=4000,
    max_path_length=100,
    n_itr=10,
    discount=0.99,
    step_size=0.01,
    optimizer=ConjugateGradientOptimizer(
        hvp_approach=FiniteDifferenceHvp(base_eps=1e-5)))