Пример #1
0
# Configured in hyperparams/a2c.yml
use_sde = True
n_envs = 4

env = make_vec_env(env_id, n_envs=n_envs, seed=seed)
model = a2c(env,
            hyper,
            'MlpPolicy',
            verbose=verbose,
            tensorboard_log=tensorboard_log,
            seed=seed,
            use_sde=use_sde)
model.learn(total_timesteps=total_timesteps)
custom_eval(model,
            env_id,
            algo,
            seed=seed,
            outdir=outdir,
            value=hyper["value"])

## Compare to vanilla default execution
model = A2C('MlpPolicy',
            env,
            verbose=verbose,
            tensorboard_log=tensorboard_log,
            seed=seed,
            use_sde=use_sde)
model.learn(total_timesteps=total_timesteps)
custom_eval(model, env_id, algo, seed, "vanilla")
Пример #2
0
    'params_n_epochs': 20,
    'params_n_steps': 32,
    'params_net_arch': 'medium',
    'params_sde_sample_freq': 64,
    'params_vf_coef': 0.261185,
    'value': 51.3641
}

env = make_vec_env(env_id, n_envs=4, seed=seed)
model = ppo(env,
            hyper,
            'MlpPolicy',
            verbose=0,
            tensorboard_log=tensorboard_log,
            seed=seed,
            use_sde=True,
            device="cpu")
model.learn(total_timesteps=60000)
custom_eval(model,
            env_id,
            algo,
            seed=seed,
            outdir="results",
            value=hyper["value"])

## Compare to vanilla default execution.  Vanilla is no action noise, but tuning always uses action noise(?)
#model = PPO('MlpPolicy', env, verbose = 0, tensorboard_log = tensorboard_log,
#            seed = seed, use_sde = use_sde)
#model.learn(total_timesteps = total_timesteps)
#custom_eval(model, env_id, algo, seed, "vanilla")