Exemplo n.º 1
0
def train_lunarlander_expert():
    env = make_vec_env('LunarLander-v2', n_envs=16)
    # Used default hyperparams as tuned seemed to not work that well.
    model = PPO('MlpPolicy', env, verbose=1,
                policy_kwargs=dict(net_arch=[64, 64]))
    model.learn(total_timesteps=2e6)
    model.save("experts/LunarLander-v2/lunarlander_expert")
    gen_expert_demos('LunarLander-v2', gym.make('LunarLander-v2'), model, 25)
Exemplo n.º 2
0
def train_pendulum_expert():
    env = make_vec_env('Pendulum-v0', n_envs=8)
    model = PPO('MlpPolicy', env, verbose=1,
                n_steps=2048, batch_size=64, gae_lambda=0.95, gamma=0.99,
                n_epochs=10, ent_coef=0.0, learning_rate=3e-4,
                clip_range=0.2, policy_kwargs=dict(net_arch=[256, 256]))
    model.learn(total_timesteps=2e6)
    model.save("experts/Pendulum-v0/pendulum_expert")
    gen_expert_demos('Pendulum-v0', gym.make('Pendulum-v0'), model, 25)
Exemplo n.º 3
0
def train_cartpole_expert():
    env = make_vec_env('CartPole-v1', n_envs=8)
    model = PPO('MlpPolicy', env, verbose=1,
                n_steps=32, batch_size=256, gae_lambda=0.8, gamma=0.98,
                n_epochs=20, ent_coef=0.0, learning_rate=linear_schedule(0.001),
                clip_range=linear_schedule(0.2), policy_kwargs=dict(net_arch=[64, 64]))
    model.learn(total_timesteps=1e5)
    model.save("experts/CartPole-v1/cartpole_expert")
    gen_expert_demos('CartPole-v1', gym.make('CartPole-v1'), model, 25)
Exemplo n.º 4
0
def train_hopper_expert():
    # No env normalization.
    env = make_vec_env('HopperBulletEnv-v0', n_envs=1)
    model = SAC('MlpPolicy', env, verbose=1,
                buffer_size=300000, batch_size=256, gamma=0.98, tau=0.02,
                train_freq=64, gradient_steps=64, ent_coef='auto', learning_rate=linear_schedule(7.3e-4), 
                learning_starts=10000, policy_kwargs=dict(net_arch=[256, 256], log_std_init=-3),
                use_sde=True)
    model.learn(total_timesteps=1e6)
    model.save("experts/HopperBulletEnv-v0/hopper_expert")
    gen_expert_demos('HopperBulletEnv-v0', gym.make('HopperBulletEnv-v0'), model, 25)