""" Helper to create a model with different hyperparameters """ return ALGOS[args.algo](env=create_env(n_envs, no_log=True), tensorboard_log=tensorboard_log, verbose=0, **kwargs) study = hyperparam_optimization(args.algo, create_model, create_env, n_trials=args.n_trials, n_timesteps=n_timesteps, hyperparams=hyperparams, n_jobs=args.n_jobs, seed=args.seed, sampler_method=args.sampler, pruner_method=args.pruner, n_startup_trials=args.n_startup_trials, n_evaluations=args.n_evaluations, storage=args.storage, study_name=args.study_name, verbose=args.verbose, deterministic_eval=not is_atari) data_frame = study.trials_dataframe() report_name = (f"report_{env_id}_{args.n_trials}-trials-{n_timesteps}" f"-{args.sampler}-{args.pruner}_{int(time.time())}.csv") study_name = (f"report_{env_id}_{args.n_trials}-trials-{n_timesteps}" f"-{args.sampler}-{args.pruner}_{int(time.time())}.pkl") log_path = os.path.join(args.log_folder, args.algo, report_name) study_path = os.path.join(args.log_folder, args.algo, study_name)
def create_model(*_args, **kwargs): """ Helper to create a model with different hyperparameters """ return ALGOS[args.algo](env=create_env(n_envs), tensorboard_log=tensorboard_log, verbose=0, **kwargs) data_frame = hyperparam_optimization(args.algo, create_model, create_env, n_trials=args.n_trials, n_timesteps=n_timesteps, hyperparams=hyperparams, n_jobs=args.n_jobs, seed=args.seed, sampler_method=args.sampler, pruner_method=args.pruner, verbose=args.verbose) report_name = "report_{}_{}-trials-{}-{}-{}.csv".format( env_id, args.n_trials, n_timesteps, args.sampler, args.pruner) log_path = os.path.join(args.log_folder, args.algo, report_name) if args.verbose: print("Writing report to {}".format(log_path)) os.makedirs(os.path.dirname(log_path), exist_ok=True)
env = VecNormalize(env, **local_normalize_kwargs) return env def create_model(*_arg, **kwargs): net_arch = [dict(pi=[512, 256], vf=[512, 256])] policy_kwargs = dict(act_fun=tf.nn.relu, net_arch=net_arch) return PPO2(env=create_env(n_envs, no_log=True), policy_kwargs=policy_kwargs, tensorboard_log=tensorboard_log, verbose=0, **kwargs) data_frame = hyperparam_optimization('ppo2', create_model, create_env, n_trials=n_trials, hyperparams=hyperparams, n_jobs=n_jobs, seed=seed, sampler_method=sampler, pruner_method=pruner, verbose=True) report_name = "report_{}_{}-trials-{}-{}-{}_{}.csv".format( env_id, n_trials, n_timesteps, sampler, pruner, int(time.time())) log_path = os.path.join(log_path, 'report', report_name) os.makedirs(os.path.dirname(log_path), exist_ok=True) data_frame.to_csv(log_path)