def run_task(*_): env_name = "Ant" hidden_sizes = (32,32) env = TheanoEnv(normalize(SwimmerEnv())) policy = GaussianMLPPolicy(env_spec=env.spec, hidden_sizes=hidden_sizes) backup_policy = GaussianMLPPolicy(env.spec, hidden_sizes=hidden_sizes) mix_policy = GaussianMLPPolicy(env.spec, hidden_sizes=hidden_sizes) pos_eps_policy = GaussianMLPPolicy(env.spec, hidden_sizes=hidden_sizes) neg_eps_policy = GaussianMLPPolicy(env.spec, hidden_sizes=hidden_sizes) baseline = ZeroBaseline(env_spec=env.spec) algo = CAPG( env=env, policy=policy, backup_policy=backup_policy, mix_policy=mix_policy, pos_eps_policy=pos_eps_policy, neg_eps_policy=neg_eps_policy, n_timestep=5e6, learning_rate=0.01, batch_size=5000, minibatch_size=500, n_sub_itr = 10, baseline=baseline, max_path_length=500, discount=0.99, decay_learing_rate=True, log_dir='./logs/' + env_name, ) algo.train()
def test_dynamics_rand(self): variations = Variations() variations.randomize() \ .at_xpath(".//geom[@name='torso']") \ .attribute("density") \ .with_method(Method.COEFFICIENT) \ .sampled_from(Distribution.UNIFORM) \ .with_range(0.5, 1.5) \ .add() env = randomize(SwimmerEnv(), variations) for i in range(5): env.reset() for j in range(5): env.step(env.action_space.sample())
from garage.envs.box2d import CartpoleEnv from garage.envs.mujoco import SwimmerEnv from garage.theano.algos.capg import CAPG from garage.theano.envs import TheanoEnv from garage.theano.baselines import GaussianMLPBaseline from garage.theano.policies import GaussianMLPPolicy from garage.misc.instrument import run_experiment from garage.misc.ext import set_seed import numpy as np for batchsize in [5000]: for learning_rate in [0.05, 0.01]: for i in range(3): seed = np.random.randint(1, 10000) env_name = "SGD_Swimmer_-t" hidden_sizes = (32, 32) env = TheanoEnv(normalize(SwimmerEnv())) policy = GaussianMLPPolicy(env_spec=env.spec, hidden_sizes=hidden_sizes) backup_policy = GaussianMLPPolicy(env.spec, hidden_sizes=hidden_sizes) mix_policy = GaussianMLPPolicy(env.spec, hidden_sizes=hidden_sizes) pos_eps_policy = GaussianMLPPolicy(env.spec, hidden_sizes=hidden_sizes) neg_eps_policy = GaussianMLPPolicy(env.spec, hidden_sizes=hidden_sizes) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = CAPG( env=env, policy=policy,
from garage.baselines import LinearFeatureBaseline from garage.envs import normalize from garage.envs.mujoco import SwimmerEnv from garage.tf.algos import TRPO from garage.tf.envs import TfEnv from garage.tf.policies import GaussianMLPPolicy env = TfEnv( normalize( SwimmerEnv(), normalize_obs=True, normalize_reward=True, )) policy = GaussianMLPPolicy(env_spec=env.spec, hidden_sizes=(32, 32)) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = TRPO( env=env, policy=policy, baseline=baseline, batch_size=4000, max_path_length=500, n_itr=40, discount=0.99, step_size=0.01, plot=False) algo.train()
from garage.envs.mujoco import SwimmerEnv from garage.envs.mujoco.randomization import Distribution from garage.envs.mujoco.randomization import Method from garage.envs.mujoco.randomization import randomize from garage.envs.mujoco.randomization import Variations variations = Variations() variations.randomize() \ .at_xpath(".//geom[@name='torso']") \ .attribute("density") \ .with_method(Method.COEFFICIENT) \ .sampled_from(Distribution.UNIFORM) \ .with_range(0.5, 1.5) \ .add() env = randomize(SwimmerEnv(), variations) for i in range(1000): env.reset() for j in range(1000): env.step(env.action_space.sample())
from garage.baselines import LinearFeatureBaseline from garage.envs import normalize from garage.envs.box2d import CartpoleEnv from garage.envs.mujoco import SwimmerEnv from garage.tf.algos import VPG from garage.tf.envs import TfEnv from garage.tf.policies import GaussianMLPPolicy from garage.misc.instrument import run_experiment env = TfEnv(normalize(SwimmerEnv())) policy = GaussianMLPPolicy(name="policy", env_spec=env.spec, hidden_sizes=(32, 32)) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = VPG(env=env, policy=policy, baseline=baseline, batch_size=5000, max_path_length=500, n_itr=40, discount=0.995, optimizer_args=dict(tf_optimizer_args=dict(learning_rate=1e-4, ))) run_experiment(algo.train(), n_parallel=1, snapshot_mode="last", seed=1, use_gpu=True,
def test_can_create_env(self): # Fixes https://github.com/rlworkgroup/garage/pull/420 env = normalize(SwimmerEnv()) assert env