Пример #1
0
def run_task(*_):
    env_name = "Ant"
    hidden_sizes = (32,32)
    env = TheanoEnv(normalize(SwimmerEnv()))
    policy = GaussianMLPPolicy(env_spec=env.spec, hidden_sizes=hidden_sizes)
    backup_policy = GaussianMLPPolicy(env.spec, hidden_sizes=hidden_sizes)
    mix_policy = GaussianMLPPolicy(env.spec, hidden_sizes=hidden_sizes)
    pos_eps_policy = GaussianMLPPolicy(env.spec, hidden_sizes=hidden_sizes)
    neg_eps_policy = GaussianMLPPolicy(env.spec, hidden_sizes=hidden_sizes)

    baseline = ZeroBaseline(env_spec=env.spec)

    algo = CAPG(
        env=env,
        policy=policy,
        backup_policy=backup_policy,
        mix_policy=mix_policy,
        pos_eps_policy=pos_eps_policy,
        neg_eps_policy=neg_eps_policy,
        n_timestep=5e6,
        learning_rate=0.01,
        batch_size=5000,
        minibatch_size=500,
        n_sub_itr = 10,
        baseline=baseline,
        max_path_length=500,
        discount=0.99,
        decay_learing_rate=True,
        log_dir='./logs/' + env_name,
    )
    algo.train()
Пример #2
0
 def test_polopt_algo(self, algo_cls, env_cls, policy_cls):
     print("Testing %s, %s, %s" %
           (algo_cls.__name__, env_cls.__name__, policy_cls.__name__))
     env = TheanoEnv(env_cls())
     policy = policy_cls(env_spec=env.spec, )
     baseline = ZeroBaseline(env_spec=env.spec)
     algo = algo_cls(env=env,
                     policy=policy,
                     baseline=baseline,
                     **(algo_args.get(algo_cls, dict())))
     algo.train()
     assert not np.any(np.isnan(policy.get_param_values()))
Пример #3
0
 def test_adaptive_std():
     """
     Checks if the adaptive_std parameter works.
     """
     env = TheanoEnv(CartpoleEnv())
     policy = GaussianMLPPolicy(env_spec=env, adaptive_std=True)
     baseline = ZeroBaseline(env_spec=env.spec)
     algo = TRPO(env=env,
                 policy=policy,
                 baseline=baseline,
                 batch_size=100,
                 n_itr=1)
     algo.train()
Пример #4
0
 def test_trpo_relu_nan(self):
     env = TheanoEnv(DummyEnv())
     policy = GaussianMLPPolicy(env_spec=env.spec, hidden_sizes=(1, ))
     baseline = ZeroBaseline(env_spec=env.spec)
     algo = TRPO(env=env,
                 policy=policy,
                 baseline=baseline,
                 n_itr=1,
                 batch_size=1000,
                 max_path_length=100,
                 step_size=0.001)
     algo.train()
     assert not np.isnan(np.sum(policy.get_param_values()))
Пример #5
0
def test_trpo_deterministic_nan():
    env = TheanoEnv(DummyEnv())
    policy = GaussianMLPPolicy(env_spec=env.spec, hidden_sizes=(1, ))
    policy._l_log_std.param.set_value([np.float32(np.log(1e-8))])
    baseline = ZeroBaseline(env_spec=env.spec)
    algo = TRPO(env=env,
                policy=policy,
                baseline=baseline,
                n_itr=10,
                batch_size=1000,
                max_path_length=100,
                step_size=0.01)
    algo.train()
    assert not np.isnan(np.sum(policy.get_param_values()))
Пример #6
0
 def test_issue_3():
     """
     As reported in https://github.com/garage/garage/issues/3, the adaptive_std
     parameter was not functioning properly
     """
     env = CartpoleEnv()
     policy = GaussianMLPPolicy(env_spec=env, adaptive_std=True)
     baseline = ZeroBaseline(env_spec=env.spec)
     algo = TRPO(env=env,
                 policy=policy,
                 baseline=baseline,
                 batch_size=100,
                 n_itr=1)
     algo.train()
Пример #7
0
        for _ in range(10):
            seed = np.random.randint(1, 10000)
            env_name = "SGD_nn_CartPole"
            hidden_sizes = (8, )
            env = TheanoEnv(normalize(CartpoleEnv()))
            policy = GaussianMLPPolicy(env_spec=env.spec,
                                       hidden_sizes=hidden_sizes)
            backup_policy = GaussianMLPPolicy(env.spec,
                                              hidden_sizes=hidden_sizes)
            mix_policy = GaussianMLPPolicy(env.spec, hidden_sizes=hidden_sizes)
            pos_eps_policy = GaussianMLPPolicy(env.spec,
                                               hidden_sizes=hidden_sizes)
            neg_eps_policy = GaussianMLPPolicy(env.spec,
                                               hidden_sizes=hidden_sizes)

            baseline = ZeroBaseline(env_spec=env.spec)

            algo = CAPG(
                env=env,
                policy=policy,
                backup_policy=backup_policy,
                mix_policy=mix_policy,
                pos_eps_policy=pos_eps_policy,
                neg_eps_policy=neg_eps_policy,
                n_timestep=5e5,
                learning_rate=learning_rate,
                batch_size=batch_size,
                minibatch_size=500,
                n_sub_itr=0,
                center_adv=True,
                baseline=baseline,
Пример #8
0
from garage.baselines import LinearFeatureBaseline
from garage.baselines import ZeroBaseline
from garage.envs import normalize
from garage.envs.box2d import CartpoleEnv
from garage.envs.mujoco import SwimmerEnv
from garage.theano.envs import TheanoEnv
from garage.theano.policies import GaussianMLPPolicy

# normalize() makes sure that the actions for the environment lies within the
# range [-1, 1] (only works for environments with continuous actions)
env = TheanoEnv(normalize(SwimmerEnv()))
# Initialize a neural network policy with a single hidden layer of 8 hidden
# units
policy = GaussianMLPPolicy(env.spec, hidden_sizes=(32, 32))
# Initialize a linear baseline estimator using default hand-crafted features
baseline = ZeroBaseline(env.spec)

# We will collect 100 trajectories per iteration
N = 10
# Each trajectory will have at most 100 time steps
T = 500
# Number of iterations
n_itr = 1000
# Set the discount factor for the problem
discount = 0.995
# Learning rate for the gradient update
learning_rate = 0.005

# Construct the computation graph

# Create a Theano variable for storing the observations We could have