# Configured in hyperparams/a2c.yml use_sde = True n_envs = 4 env = make_vec_env(env_id, n_envs=n_envs, seed=seed) model = a2c(env, hyper, 'MlpPolicy', verbose=verbose, tensorboard_log=tensorboard_log, seed=seed, use_sde=use_sde) model.learn(total_timesteps=total_timesteps) custom_eval(model, env_id, algo, seed=seed, outdir=outdir, value=hyper["value"]) ## Compare to vanilla default execution model = A2C('MlpPolicy', env, verbose=verbose, tensorboard_log=tensorboard_log, seed=seed, use_sde=use_sde) model.learn(total_timesteps=total_timesteps) custom_eval(model, env_id, algo, seed, "vanilla")
'params_n_epochs': 20, 'params_n_steps': 32, 'params_net_arch': 'medium', 'params_sde_sample_freq': 64, 'params_vf_coef': 0.261185, 'value': 51.3641 } env = make_vec_env(env_id, n_envs=4, seed=seed) model = ppo(env, hyper, 'MlpPolicy', verbose=0, tensorboard_log=tensorboard_log, seed=seed, use_sde=True, device="cpu") model.learn(total_timesteps=60000) custom_eval(model, env_id, algo, seed=seed, outdir="results", value=hyper["value"]) ## Compare to vanilla default execution. Vanilla is no action noise, but tuning always uses action noise(?) #model = PPO('MlpPolicy', env, verbose = 0, tensorboard_log = tensorboard_log, # seed = seed, use_sde = use_sde) #model.learn(total_timesteps = total_timesteps) #custom_eval(model, env_id, algo, seed, "vanilla")