def _set_study_dataset( study: Study, *, dataset: Union[None, str, Dataset, Type[Dataset]] = None, dataset_kwargs: Optional[Mapping[str, Any]] = None, training: Union[None, str, TriplesFactory] = None, testing: Union[None, str, TriplesFactory] = None, validation: Union[None, str, TriplesFactory] = None, ): if ((isinstance(dataset, str) and has_dataset(dataset)) or isinstance(dataset, Dataset) or (isinstance(dataset, type) and issubclass(dataset, Dataset))): dataset_name = get_dataset(dataset=dataset).get_normalized_name() study.set_user_attr('dataset', dataset_name) else: study.set_user_attr('dataset', USER_DEFINED_CODE) study.set_user_attr( 'training', training if isinstance(training, str) else USER_DEFINED_CODE) study.set_user_attr( 'testing', testing if isinstance(testing, str) else USER_DEFINED_CODE) study.set_user_attr( 'validation', validation if isinstance(validation, str) else USER_DEFINED_CODE)
def __call__(self, study: optuna.Study, trial: optuna.trial) -> None: # Saving study and recording time study.time_overall += time() - study.time_start with open(self.pickle_path, 'wb') as f: pickle.dump(study, f) study.time_start = time() # Saving hyperparam study.trials_dataframe().iloc[study.best_trial.number].to_json( self.hyperparam_path)
def _log_plots(run, study: optuna.Study, visualization_backend='plotly', log_plot_contour=True, log_plot_edf=True, log_plot_parallel_coordinate=True, log_plot_param_importances=True, log_plot_pareto_front=True, log_plot_slice=True, log_plot_intermediate_values=True, log_plot_optimization_history=True, ): if visualization_backend == 'matplotlib': import optuna.visualization.matplotlib as vis elif visualization_backend == 'plotly': import optuna.visualization as vis else: raise NotImplementedError(f'{visualization_backend} visualisation backend is not implemented') if vis.is_available: params = list(p_name for t in study.trials for p_name in t.params.keys()) if log_plot_contour and any(params): run['visualizations/plot_contour'] = neptune.types.File.as_html(vis.plot_contour(study)) if log_plot_edf: run['visualizations/plot_edf'] = neptune.types.File.as_html(vis.plot_edf(study)) if log_plot_parallel_coordinate: run['visualizations/plot_parallel_coordinate'] = \ neptune.types.File.as_html(vis.plot_parallel_coordinate(study)) if log_plot_param_importances and len(study.get_trials(states=(optuna.trial.TrialState.COMPLETE, optuna.trial.TrialState.PRUNED,))) > 1: try: run['visualizations/plot_param_importances'] = neptune.types.File.as_html(vis.plot_param_importances(study)) except (RuntimeError, ValueError, ZeroDivisionError): # Unable to compute importances pass if log_plot_pareto_front and study._is_multi_objective() and visualization_backend == 'plotly': run['visualizations/plot_pareto_front'] = neptune.types.File.as_html(vis.plot_pareto_front(study)) if log_plot_slice and any(params): run['visualizations/plot_slice'] = neptune.types.File.as_html(vis.plot_slice(study)) if log_plot_intermediate_values and any(trial.intermediate_values for trial in study.trials): # Intermediate values plot if available only if the above condition is met run['visualizations/plot_intermediate_values'] = \ neptune.types.File.as_html(vis.plot_intermediate_values(study)) if log_plot_optimization_history: run['visualizations/plot_optimization_history'] = \ neptune.types.File.as_html(vis.plot_optimization_history(study))
def _sync_study(from_study: optuna.Study, to_study: optuna.Study) -> None: if from_study.system_attrs != to_study.system_attrs: for k in from_study.system_attrs: if (k in to_study.system_attrs and from_study.system_attrs[k] == to_study.system_attrs[k]): continue from_study.set_system_attr(k, from_study.system_attrs[k]) if from_study.user_attrs != to_study.user_attrs: for k in from_study.user_attrs: if (k in to_study.user_attrs and from_study.user_attrs[k] == to_study.user_attrs[k]): continue from_study.set_user_attr(k, from_study.user_attrs[k])
def optimize_with_study( self, study: optuna.Study, n_trials: int = 20, timeout: Optional[int] = None, ) -> Tuple[Dict[str, Any], pd.DataFrame]: """Perform the optimization step using the user-created ``optuna.Study`` object. Creating and managing the study object will be convenient e.g. when you 1. want to `store/resume the study using RDB backend <https://optuna.readthedocs.io/en/stable/tutorial/003_rdb.html>`_. 2. want perform a `distributed optimization <https://optuna.readthedocs.io/en/stable/tutorial/004_distributed.html>`_. Args: study: The study object. n_trials: The number of expected trials (include pruned trial.). Defaults to 20. timeout: If set to some value (in seconds), the study will exit after that time period. Note that the running trials is not interrupted, though. Defaults to None. Returns: A tuple that consists of 1. A dict containing the best paramaters. This dict can be passed to the recommender as ``**kwargs``. 2. A ``pandas.DataFrame`` that contains the history of optimization. """ objective_func = self.objective_function() self.logger.info( """Start parameter search for %s over the range: %s""", type(self).recommender_class.__name__, self.suggestions, ) study.optimize(objective_func, n_trials=n_trials, timeout=timeout) best_params = dict( **study.best_trial.params, **{ key: val for key, val in study.best_trial.user_attrs.items() if is_valid_param_name(key) }, ) best_params.update(self.fixed_params) return best_params, study_to_dataframe(study)
def _create_shared_study(storage, comm, pruner=None): # type: (BaseStorage, CommunicatorBase, BasePruner) -> Study name_local = create_study(storage).study_name if comm.rank == 0 else None name_bcast = comm.mpi_comm.bcast(name_local) return Study(name_bcast, storage, pruner=pruner)
def _fast_intersection_search_space( study: optuna.Study, ordered_dict: bool = False, trial_id: Optional[int] = None, ) -> Dict[str, BaseDistribution]: search_space = None # type: Optional[Dict[str, BaseDistribution]] for trial in reversed(study.get_trials(deepcopy=False)): if trial.state != optuna.structs.TrialState.COMPLETE: continue if search_space is None: search_space = copy.deepcopy(trial.distributions) continue delete_list = [] for param_name, param_distribution in search_space.items(): if param_name not in trial.distributions: delete_list.append(param_name) elif trial.distributions[param_name] != param_distribution: delete_list.append(param_name) for param_name in delete_list: del search_space[param_name] # Retrieve cache from trial_system_attrs. if trial_id is None: continue json_str = trial.system_attrs.get( "intersection_search_space", None ) # type: Optional[str] if json_str is None: continue json_dict = json.loads(json_str) delete_list = [] cached_search_space = { name: _dict_to_distribution(dic) for name, dic in json_dict.items() } for param_name in search_space: if param_name not in cached_search_space: delete_list.append(param_name) elif cached_search_space[param_name] != search_space[param_name]: delete_list.append(param_name) for param_name in delete_list: del search_space[param_name] break if trial_id is not None and search_space is not None: json_str = json.dumps( {name: _distribution_to_dict(search_space[name]) for name in search_space} ) study._storage.set_trial_system_attr( trial_id, "intersection_search_space", json_str, ) search_space = search_space or {} if ordered_dict: search_space = OrderedDict(sorted(search_space.items(), key=lambda x: x[0])) return search_space
def _initialization(self, study: optuna.Study) -> None: completed_trials = [ trial for trial in study.get_trials(deepcopy=False) if trial.state == optuna.trial.TrialState.COMPLETE ] for trial in completed_trials: self._add_distributions(trial.distributions)
def make_plot(study: Study, plot_title: str, output_file: Path): WAS_NEW_OPTIMUM_ONCE = "was_new_optimum_once" DATETIME_COMPLETE = "datetime_complete" VALUE = "value" SECONDS_ELAPSED = "seconds-elapsed" df = study.trials_dataframe()[[DATETIME_COMPLETE, VALUE]].sort_values(by=DATETIME_COMPLETE) # we want to show experiments which were the new best optimum results in a different color if study.direction == StudyDirection.MAXIMIZE: cumu = df[VALUE].cummax() else: cumu = df[VALUE].cummin() cumu_dedup = cumu.drop_duplicates() df.loc[cumu_dedup.index, WAS_NEW_OPTIMUM_ONCE] = "yes" df[WAS_NEW_OPTIMUM_ONCE] = df[WAS_NEW_OPTIMUM_ONCE].fillna("no").map({"yes": "red", "no": "gray"}) # convert to seconds elapsed to have a numeric x axis for the scatter plot df[SECONDS_ELAPSED] = (df[DATETIME_COMPLETE] - df[DATETIME_COMPLETE].min()).dt.total_seconds() ax = df.plot.scatter(x=SECONDS_ELAPSED, y=VALUE, c=df[WAS_NEW_OPTIMUM_ONCE]) ax.set_xlim(left=min(df[SECONDS_ELAPSED]), right=max(df[SECONDS_ELAPSED])) fig = ax.get_figure() fig.suptitle(plot_title) fig.savefig(str(output_file))
def __init__(self, study: optuna.Study) -> None: self.study_name = study.study_name self.user_attrs = study.user_attrs self.system_attrs = study.system_attrs self.direction = study.direction self.best_trial = study.best_trial self.best_params = study.best_params self.best_value = study.best_value self.trials = study.get_trials(deepcopy=False)
def _create_shared_study( storage: BaseStorage, comm: CommunicatorBase, pruner: Optional[BasePruner] = None, ) -> Study: name_local = create_study( storage=storage).study_name if comm.rank == 0 else None name_bcast = comm.mpi_comm.bcast(name_local) return Study(name_bcast, storage, pruner=pruner)
def test_init_with_multiple_study_names(storage_mode: str, comm: CommunicatorBase) -> None: TestChainerMNStudy._check_multi_node(comm) with MultiNodeStorageSupplier(storage_mode, comm) as storage: # Create study_name for each rank. name = create_study(storage).study_name study = Study(name, storage) with pytest.raises(ValueError): ChainerMNStudy(study, comm)
def _set_study_dataset( study: Study, *, dataset: Union[None, str, Dataset, Type[Dataset]] = None, training: Union[None, str, CoreTriplesFactory] = None, testing: Union[None, str, CoreTriplesFactory] = None, validation: Union[None, str, CoreTriplesFactory] = None, ): if dataset is not None: if training is not None or testing is not None or validation is not None: raise ValueError( "Cannot specify dataset and training, testing and validation") elif isinstance(dataset, (str, pathlib.Path)): if isinstance(dataset, str) and has_dataset(dataset): study.set_user_attr( "dataset", get_dataset(dataset=dataset).get_normalized_name()) else: # otherwise, dataset refers to a file that should be automatically split study.set_user_attr("dataset", str(dataset)) elif isinstance(dataset, Dataset) or (isinstance(dataset, type) and issubclass(dataset, Dataset)): # this could be custom data, so don't store anything. However, it's possible to check if this # was a pre-registered dataset. If that's the desired functionality, we can uncomment the following: # dataset_name = dataset.get_normalized_name() # this works both on instances and classes # if has_dataset(dataset_name): # study.set_user_attr('dataset', dataset_name) pass else: raise TypeError( f"Dataset is invalid type: ({type(dataset)}) {dataset}") else: if isinstance(training, (str, pathlib.Path)): study.set_user_attr("training", str(training)) if isinstance(testing, (str, pathlib.Path)): study.set_user_attr("testing", str(testing)) if isinstance(validation, (str, pathlib.Path)): study.set_user_attr("validation", str(validation))
def _initialization(self, study: optuna.Study) -> None: completed_trials = [ trial for trial in study.get_trials(deepcopy=False) if trial.state == optuna.trial.TrialState.COMPLETE ] for trial in completed_trials: self._add_distributions(trial.distributions) with self._writer.as_default(): hp.hparams_config(hparams=list(self._hp_params.values()), metrics=[ hp.Metric(self._metric_name, display_name=self._metric_name) ])
def study_to_dataframe(study: optuna.Study) -> pd.DataFrame: result_df: pd.DataFrame = study.trials_dataframe().set_index("number") # remove prefix result_df.columns = [ re.sub(r"^(user_attrs|params)_", "", colname) for colname in result_df.columns ] trial_and_scores: List[Tuple[float, Dict[str, float]]] = study.user_attrs.get( "scores", [] ) score_df = pd.DataFrame( [x[1] for x in trial_and_scores], index=[x[0] for x in trial_and_scores], ) score_df.index.name = "number" result_df = result_df.join(score_df, how="left") return result_df
def check_study(study: Study) -> None: for trial in study.trials: check_frozen_trial(trial) assert not study._is_multi_objective() complete_trials = [t for t in study.trials if t.state == TrialState.COMPLETE] if len(complete_trials) == 0: with pytest.raises(ValueError): study.best_params with pytest.raises(ValueError): study.best_value with pytest.raises(ValueError): study.best_trial else: check_params(study.best_params) check_value(study.best_value) check_frozen_trial(study.best_trial)
def optimize_with_study( self, study: optuna.Study, n_trials: int = 20, timeout: Optional[int] = None, ) -> Tuple[Dict[str, Any], pd.DataFrame]: """Perform the optimization step using the user-created ``optuna.Study`` object. Creating and managing the study object will be convenient e.g. when you 1. want to `store/resume the study using RDB backend <https://optuna.readthedocs.io/en/stable/tutorial/003_rdb.html>`_. 2. want perform a `distributed optimization <https://optuna.readthedocs.io/en/stable/tutorial/004_distributed.html>`_. Args: study: The study object. n_trials: The number of expected trials (include pruned trial.). Defaults to 20. timeout: If set to some value (in seconds), the study will exit after that time period. Note that the running trials is not interrupted, though. Defaults to None. Returns: A tuple that consists of 1. A dict containing the best paramaters. This dict can be passed to the recommender as ``**kwargs``. 2. A ``pandas.DataFrame`` that contains the history of optimization. """ objective_func = self._objective_function() self.logger.info( """Start parameter search for %s over the range: %s""", type(self).recommender_class.__name__, self.suggestions, ) study.optimize(objective_func, n_trials=n_trials, timeout=timeout) best_params = dict( **study.best_trial.params, **{ key: val for key, val in study.best_trial.user_attrs.items() if is_valid_param_name(key) }, ) result_df = study.trials_dataframe().set_index("number") # remove prefix result_df.columns = [ re.sub(r"^(user_attrs|params)_", "", colname) for colname in result_df.columns ] trial_and_scores: List[Tuple[float, Dict[str, float]]] = study.user_attrs.get( "scores", []) score_df = pd.DataFrame( [x[1] for x in trial_and_scores], index=[x[0] for x in trial_and_scores], ) score_df.index.name = "number" result_df = result_df.join(score_df, how="left") return best_params, result_df
def optimize_hyperparameters( train_dataloader: DataLoader, val_dataloader: DataLoader, model_path: str, max_epochs: int = 20, n_trials: int = 100, timeout: float = 3600 * 8.0, # 8 hours gradient_clip_val_range: Tuple[float, float] = (0.01, 100.0), hidden_size_range: Tuple[int, int] = (16, 265), hidden_continuous_size_range: Tuple[int, int] = (8, 64), attention_head_size_range: Tuple[int, int] = (1, 4), dropout_range: Tuple[float, float] = (0.1, 0.3), learning_rate_range: Tuple[float, float] = (1e-5, 1.0), use_learning_rate_finder: bool = True, trainer_kwargs: Dict[str, Any] = {}, log_dir: str = "lightning_logs", study: optuna.Study = None, **kwargs, ) -> optuna.Study: """ Optimize Temporal Fusion Transformer hyperparameters. Run hyperparameter optimization. Learning rate for is determined with the PyTorch Lightning learning rate finder. Args: train_dataloader (DataLoader): dataloader for training model val_dataloader (DataLoader): dataloader for validating model model_path (str): folder to which model checkpoints are saved max_epochs (int, optional): Maximum number of epochs to run training. Defaults to 20. n_trials (int, optional): Number of hyperparameter trials to run. Defaults to 100. timeout (float, optional): Time in seconds after which training is stopped regardless of number of epochs or validation metric. Defaults to 3600*8.0. hidden_size_range (Tuple[int, int], optional): Minimum and maximum of ``hidden_size`` hyperparameter. Defaults to (16, 265). hidden_continuous_size_range (Tuple[int, int], optional): Minimum and maximum of ``hidden_continuous_size`` hyperparameter. Defaults to (8, 64). attention_head_size_range (Tuple[int, int], optional): Minimum and maximum of ``attention_head_size`` hyperparameter. Defaults to (1, 4). dropout_range (Tuple[float, float], optional): Minimum and maximum of ``dropout`` hyperparameter. Defaults to (0.1, 0.3). learning_rate_range (Tuple[float, float], optional): Learning rate range. Defaults to (1e-5, 1.0). use_learning_rate_finder (bool): If to use learning rate finder or optimize as part of hyperparameters. Defaults to True. trainer_kwargs (Dict[str, Any], optional): Additional arguments to the `PyTorch Lightning trainer <https://pytorch-lightning.readthedocs.io/en/latest/trainer.html>`_ such as ``limit_train_batches``. Defaults to {}. log_dir (str, optional): Folder into which to log results for tensorboard. Defaults to "lightning_logs". study (optuna.Study, optional): study to resume. Will create new study by default. **kwargs: Additional arguments for the :py:class:`~TemporalFusionTransformer`. Returns: optuna.Study: optuna study results """ assert isinstance(train_dataloader.dataset, TimeSeriesDataSet) and isinstance( val_dataloader.dataset, TimeSeriesDataSet ), "dataloaders must be built from timeseriesdataset" # create objective function def objective(trial: optuna.Trial) -> float: # Filenames for each trial must be made unique in order to access each checkpoint. checkpoint_callback = pl.callbacks.ModelCheckpoint( os.path.join(model_path, "trial_{}".format(trial.number), "{epoch}"), monitor="val_loss" ) # The default logger in PyTorch Lightning writes to event files to be consumed by # TensorBoard. We don't use any logger here as it requires us to implement several abstract # methods. Instead we setup a simple callback, that saves metrics from each validation step. metrics_callback = MetricsCallback() learning_rate_callback = LearningRateLogger() logger = TensorBoardLogger(log_dir, name="optuna", version=trial.number) gradient_clip_val = trial.suggest_loguniform("gradient_clip_val", *gradient_clip_val_range) trainer = pl.Trainer( checkpoint_callback=checkpoint_callback, max_epochs=max_epochs, gradient_clip_val=gradient_clip_val, gpus=[0] if torch.cuda.is_available() else None, callbacks=[metrics_callback, learning_rate_callback], early_stop_callback=PyTorchLightningPruningCallback(trial, monitor="val_loss"), logger=logger, **trainer_kwargs, ) # create model hidden_size = trial.suggest_int("hidden_size", *hidden_size_range, log=True) model = TemporalFusionTransformer.from_dataset( train_dataloader.dataset, dropout=trial.suggest_uniform("dropout", *dropout_range), hidden_size=hidden_size, hidden_continuous_size=trial.suggest_int( "hidden_continuous_size", hidden_continuous_size_range[0], min(hidden_continuous_size_range[1], hidden_size), log=True, ), attention_head_size=trial.suggest_int("attention_head_size", *attention_head_size_range), log_interval=-1, **kwargs, ) # find good learning rate if use_learning_rate_finder: lr_trainer = pl.Trainer( gradient_clip_val=gradient_clip_val, gpus=[0] if torch.cuda.is_available() else None, logger=False, ) res = lr_trainer.lr_find( model, train_dataloader=train_dataloader, val_dataloaders=val_dataloader, early_stop_threshold=10000.0, min_lr=learning_rate_range[0], num_training=100, max_lr=learning_rate_range[1], ) loss_finite = np.isfinite(res.results["loss"]) lr_smoothed, loss_smoothed = sm.nonparametric.lowess( np.asarray(res.results["loss"])[loss_finite], np.asarray(res.results["lr"])[loss_finite], frac=1.0 / 10.0, )[10:-1].T optimal_idx = np.gradient(loss_smoothed).argmin() optimal_lr = lr_smoothed[optimal_idx] print(f"Using learning rate of {optimal_lr:.3g}") model.hparams.learning_rate = optimal_lr else: model.hparams.learning_rate = trial.suggest_loguniform("learning_rate_range", *learning_rate_range) # fit trainer.fit(model, train_dataloader=train_dataloader, val_dataloaders=val_dataloader) # report result return metrics_callback.metrics[-1]["val_loss"].item() # setup optuna and run pruner = optuna.pruners.SuccessiveHalvingPruner() if study is None: study = optuna.create_study(direction="minimize", pruner=pruner) study.optimize(objective, n_trials=n_trials, timeout=timeout) return study
def optimize_hyperparameters( train_dataloader: DataLoader, val_dataloader: DataLoader, model_path: str, max_epochs: int = 20, n_trials: int = 100, timeout: float = 3600 * 8.0, # 8 hours gradient_clip_val_range: Tuple[float, float] = (0.01, 100.0), hidden_size_range: Tuple[int, int] = (16, 265), hidden_continuous_size_range: Tuple[int, int] = (8, 64), attention_head_size_range: Tuple[int, int] = (1, 4), dropout_range: Tuple[float, float] = (0.1, 0.3), learning_rate_range: Tuple[float, float] = (1e-5, 1.0), use_learning_rate_finder: bool = True, trainer_kwargs: Dict[str, Any] = {}, log_dir: str = "lightning_logs", study: optuna.Study = None, verbose: Union[int, bool] = None, **kwargs, ) -> optuna.Study: """ Optimize Temporal Fusion Transformer hyperparameters. Run hyperparameter optimization. Learning rate for is determined with the PyTorch Lightning learning rate finder. Args: train_dataloader (DataLoader): dataloader for training model val_dataloader (DataLoader): dataloader for validating model model_path (str): folder to which model checkpoints are saved max_epochs (int, optional): Maximum number of epochs to run training. Defaults to 20. n_trials (int, optional): Number of hyperparameter trials to run. Defaults to 100. timeout (float, optional): Time in seconds after which training is stopped regardless of number of epochs or validation metric. Defaults to 3600*8.0. hidden_size_range (Tuple[int, int], optional): Minimum and maximum of ``hidden_size`` hyperparameter. Defaults to (16, 265). hidden_continuous_size_range (Tuple[int, int], optional): Minimum and maximum of ``hidden_continuous_size`` hyperparameter. Defaults to (8, 64). attention_head_size_range (Tuple[int, int], optional): Minimum and maximum of ``attention_head_size`` hyperparameter. Defaults to (1, 4). dropout_range (Tuple[float, float], optional): Minimum and maximum of ``dropout`` hyperparameter. Defaults to (0.1, 0.3). learning_rate_range (Tuple[float, float], optional): Learning rate range. Defaults to (1e-5, 1.0). use_learning_rate_finder (bool): If to use learning rate finder or optimize as part of hyperparameters. Defaults to True. trainer_kwargs (Dict[str, Any], optional): Additional arguments to the `PyTorch Lightning trainer <https://pytorch-lightning.readthedocs.io/en/latest/trainer.html>`_ such as ``limit_train_batches``. Defaults to {}. log_dir (str, optional): Folder into which to log results for tensorboard. Defaults to "lightning_logs". study (optuna.Study, optional): study to resume. Will create new study by default. verbose (Union[int, bool]): level of verbosity. * None: no change in verbosity level (equivalent to verbose=1 by optuna-set default). * 0 or False: log only warnings. * 1 or True: log pruning events. * 2: optuna logging level at debug level. Defaults to None. **kwargs: Additional arguments for the :py:class:`~TemporalFusionTransformer`. Returns: optuna.Study: optuna study results """ assert isinstance(train_dataloader.dataset, TimeSeriesDataSet) and isinstance( val_dataloader.dataset, TimeSeriesDataSet ), "dataloaders must be built from timeseriesdataset" logging_level = { None: optuna.logging.get_verbosity(), 0: optuna.logging.WARNING, 1: optuna.logging.INFO, 2: optuna.logging.DEBUG, } optuna_verbose = logging_level[verbose] optuna.logging.set_verbosity(optuna_verbose) loss = kwargs.get( "loss", QuantileLoss() ) # need a deepcopy of loss as it will otherwise propagate from one trial to the next # create objective function def objective(trial: optuna.Trial) -> float: # Filenames for each trial must be made unique in order to access each checkpoint. checkpoint_callback = pl.callbacks.ModelCheckpoint( dirpath=os.path.join(model_path, "trial_{}".format(trial.number)), filename="{epoch}", monitor="val_loss") # The default logger in PyTorch Lightning writes to event files to be consumed by # TensorBoard. We don't use any logger here as it requires us to implement several abstract # methods. Instead we setup a simple callback, that saves metrics from each validation step. metrics_callback = MetricsCallback() learning_rate_callback = LearningRateMonitor() logger = TensorBoardLogger(log_dir, name="optuna", version=trial.number) gradient_clip_val = trial.suggest_loguniform("gradient_clip_val", *gradient_clip_val_range) default_trainer_kwargs = dict( gpus=[0] if torch.cuda.is_available() else None, max_epochs=max_epochs, gradient_clip_val=gradient_clip_val, callbacks=[ metrics_callback, learning_rate_callback, checkpoint_callback, PyTorchLightningPruningCallback(trial, monitor="val_loss"), ], logger=logger, progress_bar_refresh_rate=[0, 1 ][optuna_verbose < optuna.logging.INFO], weights_summary=[None, "top"][optuna_verbose < optuna.logging.INFO], ) default_trainer_kwargs.update(trainer_kwargs) trainer = pl.Trainer(**default_trainer_kwargs, ) # create model hidden_size = trial.suggest_int("hidden_size", *hidden_size_range, log=True) kwargs["loss"] = copy.deepcopy(loss) model = TemporalFusionTransformer.from_dataset( train_dataloader.dataset, dropout=trial.suggest_uniform("dropout", *dropout_range), hidden_size=hidden_size, hidden_continuous_size=trial.suggest_int( "hidden_continuous_size", hidden_continuous_size_range[0], min(hidden_continuous_size_range[1], hidden_size), log=True, ), attention_head_size=trial.suggest_int("attention_head_size", *attention_head_size_range), log_interval=-1, **kwargs, ) # find good learning rate if use_learning_rate_finder: lr_trainer = pl.Trainer( gradient_clip_val=gradient_clip_val, gpus=[0] if torch.cuda.is_available() else None, logger=False, progress_bar_refresh_rate=0, weights_summary=None, ) res = lr_trainer.tuner.lr_find( model, train_dataloader=train_dataloader, val_dataloaders=val_dataloader, early_stop_threshold=10000, min_lr=learning_rate_range[0], num_training=100, max_lr=learning_rate_range[1], ) loss_finite = np.isfinite(res.results["loss"]) if loss_finite.sum( ) > 3: # at least 3 valid values required for learning rate finder lr_smoothed, loss_smoothed = sm.nonparametric.lowess( np.asarray(res.results["loss"])[loss_finite], np.asarray(res.results["lr"])[loss_finite], frac=1.0 / 10.0, )[min(loss_finite.sum() - 3, 10):-1].T optimal_idx = np.gradient(loss_smoothed).argmin() optimal_lr = lr_smoothed[optimal_idx] else: optimal_idx = np.asarray(res.results["loss"]).argmin() optimal_lr = res.results["lr"][optimal_idx] optuna_logger.info(f"Using learning rate of {optimal_lr:.3g}") # add learning rate artificially model.hparams.learning_rate = trial.suggest_uniform( "learning_rate", optimal_lr, optimal_lr) else: model.hparams.learning_rate = trial.suggest_loguniform( "learning_rate", *learning_rate_range) # fit trainer.fit(model, train_dataloader=train_dataloader, val_dataloaders=val_dataloader) # report result return metrics_callback.metrics[-1]["val_loss"].item() # setup optuna and run pruner = optuna.pruners.SuccessiveHalvingPruner() if study is None: study = optuna.create_study(direction="minimize", pruner=pruner) study.optimize(objective, n_trials=n_trials, timeout=timeout) return study
def optimize_with_study( self, study: optuna.Study, n_trials: int = 20, timeout: Optional[int] = None, ) -> Tuple[Dict[str, Any], pd.DataFrame]: """Perform the optimization step using the user-created ``optuna.Study`` object. Creating and managing the study object will be convenient e.g. when you 1. want to `store/resume the study using RDB backend <https://optuna.readthedocs.io/en/stable/tutorial/003_rdb.html>`_. 2. want perform a `distributed optimization <https://optuna.readthedocs.io/en/stable/tutorial/004_distributed.html>`_. Args: study: The study object. n_trials: The number of expected trials (include pruned trial.). Defaults to 20. timeout: If set to some value (in seconds), the study will exit after that time period. Note that the running trials is not interrupted, though. Defaults to None. Returns: A tuple that consists of 1. A dict containing the best paramaters. This dict can be passed to the recommender as ``**kwargs``. 2. A ``pandas.DataFrame`` that contains the history of optimization. """ self.current_trial = -1 self.best_val = float("inf") self.best_time = None self.valid_results = [] self.tried_configs = [] def objective_func(trial: optuna.Trial) -> float: self.current_trial += 1 # for pruning start = time.time() params = dict(**self._suggest(trial), **self.fixed_params) self.logger.info("Trial %s:", self.current_trial) self.logger.info("parameter = %s", params) arg, parameters = self.get_model_arguments(**params) self.tried_configs.append(parameters) recommender = self.recommender_class(self._data, *arg, **parameters) recommender.learn_with_optimizer(self.val_evaluator, trial) score = self.val_evaluator.get_score(recommender) end = time.time() time_spent = end - start score["time"] = time_spent self.valid_results.append(score) self.logger.info( "Config %d obtained the following scores: %s within %f seconds.", self.current_trial, score, time_spent, ) val_score = score[self.val_evaluator.target_metric.value] if (-val_score) < self.best_val: self.best_val = -val_score self.best_time = time_spent self.best_params = parameters self.learnt_config_best = dict(**recommender.learnt_config) self.logger.info( "Found best %s using this config.", self.val_evaluator.target_metric.value, ) self.best_trial_index = self.current_trial return -val_score self.logger.info( """Start parameter search for %s over the range: %s""", type(self).recommender_class.__name__, self.suggestions, ) study.optimize(objective_func, n_trials=n_trials, timeout=timeout) if self.best_params is None: raise RuntimeError("best parameter not found.") best_params = dict(**self.best_params) best_params.update(**self.learnt_config_best) self.best_params = best_params result_df = pd.concat( [ pd.DataFrame(self.tried_configs), pd.DataFrame(self.valid_results), ], axis=1, ).copy() is_best = np.zeros(result_df.shape[0], dtype=np.bool) if self.best_trial_index is not None: is_best[self.best_trial_index] = True result_df["is_best"] = is_best return best_params, result_df
def callback(study: Study, trial: FrozenTrial) -> None: if trial.number >= 4: study.stop()
def sample_relative( self, study: optuna.Study, trial: optuna.structs.FrozenTrial, search_space: Dict[str, BaseDistribution], ) -> Dict[str, Any]: if len(search_space) == 0: return {} completed_trials = [ t for t in study.get_trials(deepcopy=False) if t.state == TrialState.COMPLETE ] if len(completed_trials) < self._n_startup_trials: return {} if len(search_space) == 1: self._logger.info( "`CMASampler` only supports two or more dimensional continuous " "search space. `{}` is used instead of `CMASampler`.".format( self._independent_sampler.__class__.__name__ ) ) self._warn_independent_sampling = False return {} ordered_keys = [key for key in search_space] ordered_keys.sort() optimizer = self._restore_or_init_optimizer( completed_trials, search_space, ordered_keys ) if optimizer.dim != len(ordered_keys): self._logger.info( "`CMASampler` does not support dynamic search space. " "`{}` is used instead of `CMASampler`.".format( self._independent_sampler.__class__.__name__ ) ) self._warn_independent_sampling = False return {} solution_trials = [ t for t in completed_trials if optimizer.generation == t.system_attrs.get("cma:generation", -1) ] if len(solution_trials) >= optimizer.population_size: solutions = [] for t in solution_trials[: optimizer.population_size]: x = np.array( [_to_cma_param(search_space[k], t.params[k]) for k in ordered_keys] ) solutions.append((x, t.value)) optimizer.tell(solutions) pickled_optimizer = pickle.dumps(optimizer) if isinstance(study._storage, optuna.storages.InMemoryStorage): study._storage.set_trial_system_attr( trial._trial_id, "cma:optimizer", pickled_optimizer ) else: # RDB storage does not accept bytes object. study._storage.set_trial_system_attr( trial._trial_id, "cma:optimizer", pickled_optimizer.hex() ) # Caution: optimizer should update its seed value seed = self._cma_rng.randint(1, 2 ** 16) + trial.number optimizer._rng = np.random.RandomState(seed) params = optimizer.ask() study._storage.set_trial_system_attr( trial._trial_id, "cma:generation", optimizer.generation ) external_values = { k: _to_optuna_param(search_space[k], p) for k, p in zip(ordered_keys, params) } return external_values