def objective(self, trial: optuna.Trial) -> float: kwargs = self._hyperparams.copy() # Hack to use DDPG/TD3 noise sampler trial.n_actions = self.n_actions # Hack when using HerReplayBuffer trial.using_her_replay_buffer = kwargs.get( "replay_buffer_class") == HerReplayBuffer if trial.using_her_replay_buffer: trial.her_kwargs = kwargs.get("replay_buffer_kwargs", {}) # Sample candidate hyperparameters sampled_hyperparams = HYPERPARAMS_SAMPLER[self.algo](trial) kwargs.update(sampled_hyperparams) model = ALGOS[self.algo]( env=self.create_envs(self.n_envs, no_log=True), tensorboard_log=None, # We do not seed the trial seed=None, verbose=0, **kwargs, ) model.trial = trial eval_env = self.create_envs(n_envs=self.n_eval_envs, eval_env=True) optuna_eval_freq = int(self.n_timesteps / self.n_evaluations) # Account for parallel envs optuna_eval_freq = max(optuna_eval_freq // model.get_env().num_envs, 1) # Use non-deterministic eval for Atari path = None if self.optimization_log_path is not None: path = os.path.join(self.optimization_log_path, f"trial_{str(trial.number)}") callbacks = get_callback_list({"callback": self.specified_callbacks}) eval_callback = TrialEvalCallback( eval_env, trial, best_model_save_path=path, log_path=path, n_eval_episodes=self.n_eval_episodes, eval_freq=optuna_eval_freq, deterministic=self.deterministic_eval, ) callbacks.append(eval_callback) try: model.learn(self.n_timesteps, callback=callbacks) # Free memory model.env.close() eval_env.close() except (AssertionError, ValueError) as e: # Sometimes, random hyperparams can generate NaN # Free memory model.env.close() eval_env.close() # Prune hyperparams that generate NaNs print(e) print("============") print("Sampled hyperparams:") pprint(sampled_hyperparams) raise optuna.exceptions.TrialPruned() is_pruned = eval_callback.is_pruned reward = eval_callback.last_mean_reward del model.env, eval_env del model if is_pruned: raise optuna.exceptions.TrialPruned() return reward
def objective(self, trial: optuna.Trial) -> float: kwargs = self._hyperparams.copy() trial.model_class = None if self.algo == "her": trial.model_class = self._hyperparams.get("model_class", None) # Hack to use DDPG/TD3 noise sampler trial.n_actions = self.n_actions # Sample candidate hyperparameters kwargs.update(HYPERPARAMS_SAMPLER[self.algo](trial)) model = ALGOS[self.algo]( env=self.create_envs(self.n_envs, no_log=True), tensorboard_log=None, # We do not seed the trial seed=None, verbose=0, **kwargs, ) model.trial = trial eval_env = self.create_envs(n_envs=1, eval_env=True) eval_freq = int(self.n_timesteps / self.n_evaluations) # Account for parallel envs eval_freq_ = max(eval_freq // model.get_env().num_envs, 1) # Use non-deterministic eval for Atari eval_callback = TrialEvalCallback( eval_env, trial, n_eval_episodes=self.n_eval_episodes, eval_freq=eval_freq_, deterministic=self.deterministic_eval, ) try: model.learn(self.n_timesteps, callback=eval_callback) # Free memory model.env.close() eval_env.close() except AssertionError as e: # Sometimes, random hyperparams can generate NaN # Free memory model.env.close() eval_env.close() # Prune hyperparams that generate NaNs print(e) raise optuna.exceptions.TrialPruned() is_pruned = eval_callback.is_pruned reward = eval_callback.last_mean_reward del model.env, eval_env del model if is_pruned: raise optuna.exceptions.TrialPruned() return reward
def objective(self, trial: optuna.Trial) -> float: kwargs = self._hyperparams.copy() # Hack to use DDPG/TD3 noise sampler trial.n_actions = self.n_actions # Hack when using HerReplayBuffer trial.using_her_replay_buffer = kwargs.get( "replay_buffer_class") == HerReplayBuffer if trial.using_her_replay_buffer: trial.her_kwargs = kwargs.get("replay_buffer_kwargs", {}) # Sample candidate hyperparameters sampled_hyperparams = HYPERPARAMS_SAMPLER[self.algo](trial, self.n_envs) kwargs.update(sampled_hyperparams) n_envs = 1 if self.algo == "ars" else self.n_envs env = self.create_envs(n_envs, no_log=True) # By default, do not activate verbose output to keep # stdout clean with only the trials results trial_verbosity = 0 # Activate verbose mode for the trial in debug mode # See PR #214 if self.verbose >= 2: trial_verbosity = self.verbose model = ALGOS[self.algo]( env=env, tensorboard_log=None, # We do not seed the trial seed=None, verbose=trial_verbosity, device=self.device, **kwargs, ) eval_env = self.create_envs(n_envs=self.n_eval_envs, eval_env=True) optuna_eval_freq = int(self.n_timesteps / self.n_evaluations) # Account for parallel envs optuna_eval_freq = max(optuna_eval_freq // self.n_envs, 1) # Use non-deterministic eval for Atari path = None if self.optimization_log_path is not None: path = os.path.join(self.optimization_log_path, f"trial_{str(trial.number)}") callbacks = get_callback_list({"callback": self.specified_callbacks}) eval_callback = TrialEvalCallback( eval_env, trial, best_model_save_path=path, log_path=path, n_eval_episodes=self.n_eval_episodes, eval_freq=optuna_eval_freq, deterministic=self.deterministic_eval, ) callbacks.append(eval_callback) learn_kwargs = {} # Special case for ARS if self.algo == "ars" and self.n_envs > 1: learn_kwargs["async_eval"] = AsyncEval([ lambda: self.create_envs(n_envs=1, no_log=True) for _ in range(self.n_envs) ], model.policy) try: model.learn(self.n_timesteps, callback=callbacks, **learn_kwargs) # Free memory model.env.close() eval_env.close() except (AssertionError, ValueError, EOFError, BrokenPipeError) as e: # Sometimes, random hyperparams can generate NaN # Free memory # if using subprocvecenv, the following calls can produce errors themselves traceback.print_exc() try: model.env.close() except: pass try: eval_env.close() except: pass # Prune hyperparams that generate NaNs print(e) print("============") print("Sampled hyperparams:") pprint(sampled_hyperparams) return 0 raise optuna.exceptions.TrialPruned() is_pruned = eval_callback.is_pruned reward = eval_callback.last_mean_reward del model.env, eval_env del model if is_pruned: raise optuna.exceptions.TrialPruned() return reward