Exemplo n.º 1
0
# Params for action wrapper
action_script = 'conditional_displacement_cal'
action_scale = {'alpha': 27, 'phi_g': pi, 'phi_e': pi}  # nbar of 800 ish
to_learn = {'alpha': True, 'phi_g': True, 'phi_e': True}

train_batch_size = 200
eval_batch_size = 2

train_episode_length = lambda x: 1
eval_episode_length = lambda x: 1

# Create drivers for data collection
from gkp.agents import dynamic_episode_driver_sim_env

collect_driver = dynamic_episode_driver_sim_env.DynamicEpisodeDriverSimEnv(
    env_kwargs, reward_kwargs, train_batch_size, action_script, action_scale,
    to_learn, train_episode_length)

eval_driver = dynamic_episode_driver_sim_env.DynamicEpisodeDriverSimEnv(
    env_kwargs, reward_kwargs_eval, eval_batch_size, action_script,
    action_scale, to_learn, eval_episode_length)

PPO.train_eval(
    root_dir=root_dir,
    random_seed=0,
    num_epochs=3000,
    # Params for train
    normalize_observations=True,
    normalize_rewards=False,
    discount_factor=1.0,
    lr=1e-4,
Exemplo n.º 2
0
from gkp.agents import multitask_episode_driver_sim_env

collect_driver = multitask_episode_driver_sim_env.MultitaskEpisodeDriverSimEnv(
    env_kwargs_list,
    rew_kwargs_list,
    train_batch_size,
    action_script,
    action_scale,
    to_learn,
    episode_length_list,
    env_schedule=env_schedule)

from gkp.agents import dynamic_episode_driver_sim_env

eval_driver = dynamic_episode_driver_sim_env.DynamicEpisodeDriverSimEnv(
    env_kwargs_0, rew_kwargs_0, eval_batch_size, action_script, action_scale,
    to_learn, episode_length_0)

PPO.train_eval(
    root_dir=root_dir,
    random_seed=0,
    num_epochs=50000,
    # Params for train
    normalize_observations=True,
    normalize_rewards=False,
    discount_factor=1.0,
    lr=1e-3,
    lr_schedule=lambda x: 1e-3 if x < 500 else 1e-4,
    num_policy_updates=20,
    initial_adaptive_kl_beta=0.0,
    kl_cutoff_factor=0,