def objective(self, trial: optuna.Trial) -> float: kwargs = self._hyperparams.copy() trial.model_class = None if self.algo == "her": trial.model_class = self._hyperparams.get("model_class", None) # Hack to use DDPG/TD3 noise sampler trial.n_actions = self.n_actions # Sample candidate hyperparameters kwargs.update(HYPERPARAMS_SAMPLER[self.algo](trial)) model = ALGOS[self.algo]( env=self.create_envs(self.n_envs, no_log=True), tensorboard_log=None, # We do not seed the trial seed=None, verbose=0, **kwargs, ) model.trial = trial eval_env = self.create_envs(n_envs=1, eval_env=True) eval_freq = int(self.n_timesteps / self.n_evaluations) # Account for parallel envs eval_freq_ = max(eval_freq // model.get_env().num_envs, 1) # Use non-deterministic eval for Atari eval_callback = TrialEvalCallback( eval_env, trial, n_eval_episodes=self.n_eval_episodes, eval_freq=eval_freq_, deterministic=self.deterministic_eval, ) try: model.learn(self.n_timesteps, callback=eval_callback) # Free memory model.env.close() eval_env.close() except AssertionError as e: # Sometimes, random hyperparams can generate NaN # Free memory model.env.close() eval_env.close() # Prune hyperparams that generate NaNs print(e) raise optuna.exceptions.TrialPruned() is_pruned = eval_callback.is_pruned reward = eval_callback.last_mean_reward del model.env, eval_env del model if is_pruned: raise optuna.exceptions.TrialPruned() return reward
def objective(self, trial: optuna.Trial) -> float: kwargs = self._hyperparams.copy() trial.model_class = None if self.algo == "her": trial.model_class = self._hyperparams.get("model_class", None) # Hack to use DDPG/TD3 noise sampler trial.n_actions = self._env.action_space.shape[0] # Sample candidate hyperparameters kwargs.update(HYPERPARAMS_SAMPLER[self.algo](trial)) print(f"\nRunning a new trial with hyperparameters: {kwargs}") # Write hyperparameters into a file trial_params_path = os.path.join(self.params_path, "optimization") os.makedirs(trial_params_path, exist_ok=True) with open( os.path.join(trial_params_path, f"hyperparameters_trial_{trial.number}.yml"), "w") as f: yaml.dump(kwargs, f) model = ALGOS[self.algo]( env=self._env, # Note: Here I enabled tensorboard logs tensorboard_log=self.tensorboard_log, # Note: Here I differ and I seed the trial. I want all trials to have the same starting conditions seed=self.seed, verbose=self.verbose, **kwargs, ) # Pre-load replay buffer if enabled if self.preload_replay_buffer: if self.preload_replay_buffer.endswith('.pkl'): replay_buffer_path = self.preload_replay_buffer else: replay_buffer_path = os.path.join(self.preload_replay_buffer, "replay_buffer.pkl") if os.path.exists(replay_buffer_path): print("Pre-loading replay buffer") if self.algo == "her": model.load_replay_buffer(replay_buffer_path, self.truncate_last_trajectory) else: model.load_replay_buffer(replay_buffer_path) else: raise Exception(f"Replay buffer {replay_buffer_path} " "does not exist") model.trial = trial eval_freq = int(self.n_timesteps / self.n_evaluations) # Account for parallel envs eval_freq_ = max(eval_freq // model.get_env().num_envs, 1) # Use non-deterministic eval for Atari eval_callback = TrialEvalCallback( model.env, model.trial, n_eval_episodes=self.n_eval_episodes, eval_freq=eval_freq_, deterministic=self.deterministic_eval, verbose=self.verbose, ) try: model.learn(self.n_timesteps, callback=eval_callback) # Reset env self._env.reset() except AssertionError as e: # Reset env self._env.reset() print('Trial stopped:', e) # Prune hyperparams that generate NaNs raise optuna.exceptions.TrialPruned() except Exception as err: exception_type = type(err).__name__ print('Trial stopped due to raised exception:', exception_type, err) # Prune also all other exceptions raise optuna.exceptions.TrialPruned() is_pruned = eval_callback.is_pruned reward = eval_callback.last_mean_reward print( f"\nFinished a trial with reward={reward}, is_pruned={is_pruned} " f"for hyperparameters: {kwargs}") del model if is_pruned: raise optuna.exceptions.TrialPruned() return reward
def objective(self, trial: optuna.Trial) -> float: kwargs = self._hyperparams.copy() # Hack to use DDPG/TD3 noise sampler trial.n_actions = self.n_actions # Hack when using HerReplayBuffer trial.using_her_replay_buffer = kwargs.get( "replay_buffer_class") == HerReplayBuffer if trial.using_her_replay_buffer: trial.her_kwargs = kwargs.get("replay_buffer_kwargs", {}) # Sample candidate hyperparameters sampled_hyperparams = HYPERPARAMS_SAMPLER[self.algo](trial) kwargs.update(sampled_hyperparams) model = ALGOS[self.algo]( env=self.create_envs(self.n_envs, no_log=True), tensorboard_log=None, # We do not seed the trial seed=None, verbose=0, **kwargs, ) model.trial = trial eval_env = self.create_envs(n_envs=self.n_eval_envs, eval_env=True) optuna_eval_freq = int(self.n_timesteps / self.n_evaluations) # Account for parallel envs optuna_eval_freq = max(optuna_eval_freq // model.get_env().num_envs, 1) # Use non-deterministic eval for Atari path = None if self.optimization_log_path is not None: path = os.path.join(self.optimization_log_path, f"trial_{str(trial.number)}") callbacks = get_callback_list({"callback": self.specified_callbacks}) eval_callback = TrialEvalCallback( eval_env, trial, best_model_save_path=path, log_path=path, n_eval_episodes=self.n_eval_episodes, eval_freq=optuna_eval_freq, deterministic=self.deterministic_eval, ) callbacks.append(eval_callback) try: model.learn(self.n_timesteps, callback=callbacks) # Free memory model.env.close() eval_env.close() except (AssertionError, ValueError) as e: # Sometimes, random hyperparams can generate NaN # Free memory model.env.close() eval_env.close() # Prune hyperparams that generate NaNs print(e) print("============") print("Sampled hyperparams:") pprint(sampled_hyperparams) raise optuna.exceptions.TrialPruned() is_pruned = eval_callback.is_pruned reward = eval_callback.last_mean_reward del model.env, eval_env del model if is_pruned: raise optuna.exceptions.TrialPruned() return reward
def objective(self, trial: optuna.Trial) -> float: kwargs = self._hyperparams.copy() # Hack to use DDPG/TD3 noise sampler trial.n_actions = self.n_actions # Hack when using HerReplayBuffer trial.using_her_replay_buffer = kwargs.get( "replay_buffer_class") == HerReplayBuffer if trial.using_her_replay_buffer: trial.her_kwargs = kwargs.get("replay_buffer_kwargs", {}) # Sample candidate hyperparameters sampled_hyperparams = HYPERPARAMS_SAMPLER[self.algo](trial, self.n_envs) kwargs.update(sampled_hyperparams) n_envs = 1 if self.algo == "ars" else self.n_envs env = self.create_envs(n_envs, no_log=True) # By default, do not activate verbose output to keep # stdout clean with only the trials results trial_verbosity = 0 # Activate verbose mode for the trial in debug mode # See PR #214 if self.verbose >= 2: trial_verbosity = self.verbose model = ALGOS[self.algo]( env=env, tensorboard_log=None, # We do not seed the trial seed=None, verbose=trial_verbosity, device=self.device, **kwargs, ) eval_env = self.create_envs(n_envs=self.n_eval_envs, eval_env=True) optuna_eval_freq = int(self.n_timesteps / self.n_evaluations) # Account for parallel envs optuna_eval_freq = max(optuna_eval_freq // self.n_envs, 1) # Use non-deterministic eval for Atari path = None if self.optimization_log_path is not None: path = os.path.join(self.optimization_log_path, f"trial_{str(trial.number)}") callbacks = get_callback_list({"callback": self.specified_callbacks}) eval_callback = TrialEvalCallback( eval_env, trial, best_model_save_path=path, log_path=path, n_eval_episodes=self.n_eval_episodes, eval_freq=optuna_eval_freq, deterministic=self.deterministic_eval, ) callbacks.append(eval_callback) learn_kwargs = {} # Special case for ARS if self.algo == "ars" and self.n_envs > 1: learn_kwargs["async_eval"] = AsyncEval([ lambda: self.create_envs(n_envs=1, no_log=True) for _ in range(self.n_envs) ], model.policy) try: model.learn(self.n_timesteps, callback=callbacks, **learn_kwargs) # Free memory model.env.close() eval_env.close() except (AssertionError, ValueError, EOFError, BrokenPipeError) as e: # Sometimes, random hyperparams can generate NaN # Free memory # if using subprocvecenv, the following calls can produce errors themselves traceback.print_exc() try: model.env.close() except: pass try: eval_env.close() except: pass # Prune hyperparams that generate NaNs print(e) print("============") print("Sampled hyperparams:") pprint(sampled_hyperparams) return 0 raise optuna.exceptions.TrialPruned() is_pruned = eval_callback.is_pruned reward = eval_callback.last_mean_reward del model.env, eval_env del model if is_pruned: raise optuna.exceptions.TrialPruned() return reward