def view_policy_ddpg(): env = DummyVecEnv([lambda: EnvHandler(make_env())]) view(env, seed=None, total_timesteps=10000, reward_scale=1.0, render=True, render_eval=False, noise_type=None, normalize_returns=False, normalize_observations=False, critic_l2_reg=1e-2, actor_lr=1e-4, critic_lr=1e-3, popart=False, gamma=0.99, clip_norm=None, nb_train_steps=50, nb_eval_steps=100, nb_save_epochs=None, batch_size=64, tau=0.01, action_range=(-250.0, 250.0), observation_range=(-5.0, 5.0), eval_env=None, load_path="./checkpoints/00007", save_dir=None, param_noise_adaption_interval=50)
def train_ddpg(): env = SubprocVecEnv([lambda: EnvHandler(make_env()) for _ in range(1)]) #env = SubprocVecEnv([lambda: EnvHandler(make_env(env_no=0)), lambda: EnvHandler(make_env(env_no=1))]) learn(env=env, seed=None, total_timesteps=1e5, nb_epochs=None, nb_epoch_cycles=10, nb_rollout_steps=100, reward_scale=1.0, render=False, render_eval=False, noise_type='ou-param_0.2', normalize_returns=False, normalize_observations=False, critic_l2_reg=1e-2, actor_lr=1e-4, critic_lr=1e-3, popart=False, gamma=0.99, clip_norm=None, nb_train_steps=50, nb_eval_steps=100, batch_size=64, tau=0.01, eval_env=None, param_noise_adaption_interval=50, nb_save_epochs=1, save_dir=".", load_path=None)
def train_td3(): env = SubprocVecEnv([lambda: EnvHandler(make_env()) for _ in range(1)]) learn(env, total_timesteps=1e6, nb_epochs=None, nb_rollout_steps=100, max_ep_len=250, reward_scale=1.0, render=False, render_eval=False, noise_type='adaptive-param_0.2', normalize_returns=False, normalize_observations=True, actor_lr=1e-4, critic_lr=1e-3, popart=False, gamma=0.99, clip_norm=None, start_steps=10000, nb_train_steps=50, nb_eval_steps=100, nb_log_steps=100, nb_save_steps=None, batch_size=64, polyak=0.01, action_range=(-250.0, 250.0), observation_range=(-5.0, 5.0), target_noise=0.2, noise_clip=0.5, policy_delay=2, load_path=None, save_dir=None)
def view_policy_ppo(): env = DummyVecEnv([lambda: EnvHandler(make_env())]) view(env=env, episodes=100, total_timesteps=1000000, nsteps=200, nminibatches=1, cliprange=0.2, ent_coef=0.0, lam=0.95, gamma=0.99, noptepochs=4, save_interval=100, save_dir=".", load_path="./checkpoints/00500", normalize_observations=False, normalize_returns=False)
def train_ppo(): env = SubprocVecEnv([lambda: EnvHandler(make_env())]) learn(env=env, eval_env=None, total_timesteps=3e7, nsteps=128, nminibatches=1, cliprange=0.2, ent_coef=0.01, vf_coef=0.5, lam=0.95, gamma=0.99, noptepochs=4, lr=2.5e-4, save_interval=100, save_dir=".", load_path=None, normalize_observations=False, normalize_returns=False)