Пример #1
0
def _set_study_dataset(
    study: Study,
    *,
    dataset: Union[None, str, Dataset, Type[Dataset]] = None,
    dataset_kwargs: Optional[Mapping[str, Any]] = None,
    training: Union[None, str, TriplesFactory] = None,
    testing: Union[None, str, TriplesFactory] = None,
    validation: Union[None, str, TriplesFactory] = None,
):
    if ((isinstance(dataset, str) and has_dataset(dataset))
            or isinstance(dataset, Dataset)
            or (isinstance(dataset, type) and issubclass(dataset, Dataset))):
        dataset_name = get_dataset(dataset=dataset).get_normalized_name()
        study.set_user_attr('dataset', dataset_name)
    else:
        study.set_user_attr('dataset', USER_DEFINED_CODE)
        study.set_user_attr(
            'training',
            training if isinstance(training, str) else USER_DEFINED_CODE)
        study.set_user_attr(
            'testing',
            testing if isinstance(testing, str) else USER_DEFINED_CODE)
        study.set_user_attr(
            'validation',
            validation if isinstance(validation, str) else USER_DEFINED_CODE)
Пример #2
0
    def __call__(self, study: optuna.Study, trial: optuna.trial) -> None:
        # Saving study and recording time
        study.time_overall += time() - study.time_start
        with open(self.pickle_path, 'wb') as f:
            pickle.dump(study, f)
        study.time_start = time()

        # Saving hyperparam
        study.trials_dataframe().iloc[study.best_trial.number].to_json(
            self.hyperparam_path)
Пример #3
0
def _log_plots(run,
               study: optuna.Study,
               visualization_backend='plotly',
               log_plot_contour=True,
               log_plot_edf=True,
               log_plot_parallel_coordinate=True,
               log_plot_param_importances=True,
               log_plot_pareto_front=True,
               log_plot_slice=True,
               log_plot_intermediate_values=True,
               log_plot_optimization_history=True,
               ):
    if visualization_backend == 'matplotlib':
        import optuna.visualization.matplotlib as vis
    elif visualization_backend == 'plotly':
        import optuna.visualization as vis
    else:
        raise NotImplementedError(f'{visualization_backend} visualisation backend is not implemented')

    if vis.is_available:
        params = list(p_name for t in study.trials for p_name in t.params.keys())

        if log_plot_contour and any(params):
            run['visualizations/plot_contour'] = neptune.types.File.as_html(vis.plot_contour(study))

        if log_plot_edf:
            run['visualizations/plot_edf'] = neptune.types.File.as_html(vis.plot_edf(study))

        if log_plot_parallel_coordinate:
            run['visualizations/plot_parallel_coordinate'] = \
                neptune.types.File.as_html(vis.plot_parallel_coordinate(study))

        if log_plot_param_importances and len(study.get_trials(states=(optuna.trial.TrialState.COMPLETE, optuna.trial.TrialState.PRUNED,))) > 1:
            try:
                run['visualizations/plot_param_importances'] = neptune.types.File.as_html(vis.plot_param_importances(study))
            except (RuntimeError, ValueError, ZeroDivisionError):
                # Unable to compute importances
                pass

        if log_plot_pareto_front and study._is_multi_objective() and visualization_backend == 'plotly':
            run['visualizations/plot_pareto_front'] = neptune.types.File.as_html(vis.plot_pareto_front(study))

        if log_plot_slice and any(params):
            run['visualizations/plot_slice'] = neptune.types.File.as_html(vis.plot_slice(study))

        if log_plot_intermediate_values and any(trial.intermediate_values for trial in study.trials):
            # Intermediate values plot if available only if the above condition is met
            run['visualizations/plot_intermediate_values'] = \
                neptune.types.File.as_html(vis.plot_intermediate_values(study))

        if log_plot_optimization_history:
            run['visualizations/plot_optimization_history'] = \
                neptune.types.File.as_html(vis.plot_optimization_history(study))
Пример #4
0
def _sync_study(from_study: optuna.Study, to_study: optuna.Study) -> None:
    if from_study.system_attrs != to_study.system_attrs:
        for k in from_study.system_attrs:
            if (k in to_study.system_attrs and from_study.system_attrs[k]
                    == to_study.system_attrs[k]):
                continue
            from_study.set_system_attr(k, from_study.system_attrs[k])
    if from_study.user_attrs != to_study.user_attrs:
        for k in from_study.user_attrs:
            if (k in to_study.user_attrs
                    and from_study.user_attrs[k] == to_study.user_attrs[k]):
                continue
            from_study.set_user_attr(k, from_study.user_attrs[k])
Пример #5
0
    def optimize_with_study(
        self,
        study: optuna.Study,
        n_trials: int = 20,
        timeout: Optional[int] = None,
    ) -> Tuple[Dict[str, Any], pd.DataFrame]:
        """Perform the optimization step using the user-created ``optuna.Study`` object.
        Creating and managing the study object will be convenient e.g. when you

            1. want to `store/resume the study using RDB backend <https://optuna.readthedocs.io/en/stable/tutorial/003_rdb.html>`_.
            2. want perform a `distributed optimization <https://optuna.readthedocs.io/en/stable/tutorial/004_distributed.html>`_.

        Args:
            study:
                The study object.
            n_trials:
                The number of expected trials (include pruned trial.). Defaults to 20.
            timeout:
                If set to some value (in seconds), the study will exit after that time period.
                Note that the running trials is not interrupted, though. Defaults to None.

        Returns:
            A tuple that consists of

                1. A dict containing the best paramaters.
                   This dict can be passed to the recommender as ``**kwargs``.
                2. A ``pandas.DataFrame`` that contains the history of optimization.

        """

        objective_func = self.objective_function()

        self.logger.info(
            """Start parameter search for %s over the range: %s""",
            type(self).recommender_class.__name__,
            self.suggestions,
        )

        study.optimize(objective_func, n_trials=n_trials, timeout=timeout)
        best_params = dict(
            **study.best_trial.params,
            **{
                key: val
                for key, val in study.best_trial.user_attrs.items()
                if is_valid_param_name(key)
            },
        )
        best_params.update(self.fixed_params)

        return best_params, study_to_dataframe(study)
Пример #6
0
    def _create_shared_study(storage, comm, pruner=None):
        # type: (BaseStorage, CommunicatorBase, BasePruner) -> Study

        name_local = create_study(storage).study_name if comm.rank == 0 else None
        name_bcast = comm.mpi_comm.bcast(name_local)

        return Study(name_bcast, storage, pruner=pruner)
Пример #7
0
def _fast_intersection_search_space(
    study: optuna.Study, ordered_dict: bool = False, trial_id: Optional[int] = None,
) -> Dict[str, BaseDistribution]:
    search_space = None  # type: Optional[Dict[str, BaseDistribution]]

    for trial in reversed(study.get_trials(deepcopy=False)):
        if trial.state != optuna.structs.TrialState.COMPLETE:
            continue

        if search_space is None:
            search_space = copy.deepcopy(trial.distributions)
            continue

        delete_list = []
        for param_name, param_distribution in search_space.items():
            if param_name not in trial.distributions:
                delete_list.append(param_name)
            elif trial.distributions[param_name] != param_distribution:
                delete_list.append(param_name)

        for param_name in delete_list:
            del search_space[param_name]

        # Retrieve cache from trial_system_attrs.
        if trial_id is None:
            continue

        json_str = trial.system_attrs.get(
            "intersection_search_space", None
        )  # type: Optional[str]
        if json_str is None:
            continue
        json_dict = json.loads(json_str)

        delete_list = []
        cached_search_space = {
            name: _dict_to_distribution(dic) for name, dic in json_dict.items()
        }
        for param_name in search_space:
            if param_name not in cached_search_space:
                delete_list.append(param_name)
            elif cached_search_space[param_name] != search_space[param_name]:
                delete_list.append(param_name)

        for param_name in delete_list:
            del search_space[param_name]
        break

    if trial_id is not None and search_space is not None:
        json_str = json.dumps(
            {name: _distribution_to_dict(search_space[name]) for name in search_space}
        )
        study._storage.set_trial_system_attr(
            trial_id, "intersection_search_space", json_str,
        )

    search_space = search_space or {}
    if ordered_dict:
        search_space = OrderedDict(sorted(search_space.items(), key=lambda x: x[0]))
    return search_space
Пример #8
0
 def _initialization(self, study: optuna.Study) -> None:
     completed_trials = [
         trial for trial in study.get_trials(deepcopy=False)
         if trial.state == optuna.trial.TrialState.COMPLETE
     ]
     for trial in completed_trials:
         self._add_distributions(trial.distributions)
Пример #9
0
    def make_plot(study: Study, plot_title: str, output_file: Path):
        WAS_NEW_OPTIMUM_ONCE = "was_new_optimum_once"
        DATETIME_COMPLETE = "datetime_complete"
        VALUE = "value"
        SECONDS_ELAPSED = "seconds-elapsed"

        df = study.trials_dataframe()[[DATETIME_COMPLETE, VALUE]].sort_values(by=DATETIME_COMPLETE)

        # we want to show experiments which were the new best optimum results in a different color
        if study.direction == StudyDirection.MAXIMIZE:
            cumu = df[VALUE].cummax()
        else:
            cumu = df[VALUE].cummin()
        cumu_dedup = cumu.drop_duplicates()
        df.loc[cumu_dedup.index, WAS_NEW_OPTIMUM_ONCE] = "yes"
        df[WAS_NEW_OPTIMUM_ONCE] = df[WAS_NEW_OPTIMUM_ONCE].fillna("no").map({"yes": "red", "no": "gray"})

        # convert to seconds elapsed to have a numeric x axis for the scatter plot
        df[SECONDS_ELAPSED] = (df[DATETIME_COMPLETE] - df[DATETIME_COMPLETE].min()).dt.total_seconds()

        ax = df.plot.scatter(x=SECONDS_ELAPSED, y=VALUE, c=df[WAS_NEW_OPTIMUM_ONCE])
        ax.set_xlim(left=min(df[SECONDS_ELAPSED]), right=max(df[SECONDS_ELAPSED]))
        fig = ax.get_figure()
        fig.suptitle(plot_title)
        fig.savefig(str(output_file))
Пример #10
0
 def __init__(self, study: optuna.Study) -> None:
     self.study_name = study.study_name
     self.user_attrs = study.user_attrs
     self.system_attrs = study.system_attrs
     self.direction = study.direction
     self.best_trial = study.best_trial
     self.best_params = study.best_params
     self.best_value = study.best_value
     self.trials = study.get_trials(deepcopy=False)
Пример #11
0
    def _create_shared_study(
        storage: BaseStorage,
        comm: CommunicatorBase,
        pruner: Optional[BasePruner] = None,
    ) -> Study:

        name_local = create_study(
            storage=storage).study_name if comm.rank == 0 else None
        name_bcast = comm.mpi_comm.bcast(name_local)

        return Study(name_bcast, storage, pruner=pruner)
Пример #12
0
    def test_init_with_multiple_study_names(storage_mode: str, comm: CommunicatorBase) -> None:

        TestChainerMNStudy._check_multi_node(comm)

        with MultiNodeStorageSupplier(storage_mode, comm) as storage:
            # Create study_name for each rank.
            name = create_study(storage).study_name
            study = Study(name, storage)

            with pytest.raises(ValueError):
                ChainerMNStudy(study, comm)
Пример #13
0
def _set_study_dataset(
    study: Study,
    *,
    dataset: Union[None, str, Dataset, Type[Dataset]] = None,
    training: Union[None, str, CoreTriplesFactory] = None,
    testing: Union[None, str, CoreTriplesFactory] = None,
    validation: Union[None, str, CoreTriplesFactory] = None,
):
    if dataset is not None:
        if training is not None or testing is not None or validation is not None:
            raise ValueError(
                "Cannot specify dataset and training, testing and validation")
        elif isinstance(dataset, (str, pathlib.Path)):
            if isinstance(dataset, str) and has_dataset(dataset):
                study.set_user_attr(
                    "dataset",
                    get_dataset(dataset=dataset).get_normalized_name())
            else:
                # otherwise, dataset refers to a file that should be automatically split
                study.set_user_attr("dataset", str(dataset))
        elif isinstance(dataset,
                        Dataset) or (isinstance(dataset, type)
                                     and issubclass(dataset, Dataset)):
            # this could be custom data, so don't store anything. However, it's possible to check if this
            # was a pre-registered dataset. If that's the desired functionality, we can uncomment the following:
            # dataset_name = dataset.get_normalized_name()  # this works both on instances and classes
            # if has_dataset(dataset_name):
            #     study.set_user_attr('dataset', dataset_name)
            pass
        else:
            raise TypeError(
                f"Dataset is invalid type: ({type(dataset)}) {dataset}")
    else:
        if isinstance(training, (str, pathlib.Path)):
            study.set_user_attr("training", str(training))
        if isinstance(testing, (str, pathlib.Path)):
            study.set_user_attr("testing", str(testing))
        if isinstance(validation, (str, pathlib.Path)):
            study.set_user_attr("validation", str(validation))
Пример #14
0
    def _initialization(self, study: optuna.Study) -> None:
        completed_trials = [
            trial for trial in study.get_trials(deepcopy=False)
            if trial.state == optuna.trial.TrialState.COMPLETE
        ]
        for trial in completed_trials:
            self._add_distributions(trial.distributions)

        with self._writer.as_default():
            hp.hparams_config(hparams=list(self._hp_params.values()),
                              metrics=[
                                  hp.Metric(self._metric_name,
                                            display_name=self._metric_name)
                              ])
Пример #15
0
def study_to_dataframe(study: optuna.Study) -> pd.DataFrame:
    result_df: pd.DataFrame = study.trials_dataframe().set_index("number")

    # remove prefix
    result_df.columns = [
        re.sub(r"^(user_attrs|params)_", "", colname) for colname in result_df.columns
    ]

    trial_and_scores: List[Tuple[float, Dict[str, float]]] = study.user_attrs.get(
        "scores", []
    )
    score_df = pd.DataFrame(
        [x[1] for x in trial_and_scores],
        index=[x[0] for x in trial_and_scores],
    )
    score_df.index.name = "number"
    result_df = result_df.join(score_df, how="left")
    return result_df
Пример #16
0
def check_study(study: Study) -> None:

    for trial in study.trials:
        check_frozen_trial(trial)

    assert not study._is_multi_objective()

    complete_trials = [t for t in study.trials if t.state == TrialState.COMPLETE]
    if len(complete_trials) == 0:
        with pytest.raises(ValueError):
            study.best_params
        with pytest.raises(ValueError):
            study.best_value
        with pytest.raises(ValueError):
            study.best_trial
    else:
        check_params(study.best_params)
        check_value(study.best_value)
        check_frozen_trial(study.best_trial)
Пример #17
0
    def optimize_with_study(
        self,
        study: optuna.Study,
        n_trials: int = 20,
        timeout: Optional[int] = None,
    ) -> Tuple[Dict[str, Any], pd.DataFrame]:
        """Perform the optimization step using the user-created ``optuna.Study`` object.
        Creating and managing the study object will be convenient e.g. when you

            1. want to `store/resume the study using RDB backend <https://optuna.readthedocs.io/en/stable/tutorial/003_rdb.html>`_.
            2. want perform a `distributed optimization <https://optuna.readthedocs.io/en/stable/tutorial/004_distributed.html>`_.

        Args:
            study:
                The study object.
            n_trials:
                The number of expected trials (include pruned trial.). Defaults to 20.
            timeout:
                If set to some value (in seconds), the study will exit after that time period.
                Note that the running trials is not interrupted, though. Defaults to None.

        Returns:
            A tuple that consists of

                1. A dict containing the best paramaters.
                   This dict can be passed to the recommender as ``**kwargs``.
                2. A ``pandas.DataFrame`` that contains the history of optimization.

        """

        objective_func = self._objective_function()

        self.logger.info(
            """Start parameter search for %s over the range: %s""",
            type(self).recommender_class.__name__,
            self.suggestions,
        )

        study.optimize(objective_func, n_trials=n_trials, timeout=timeout)
        best_params = dict(
            **study.best_trial.params,
            **{
                key: val
                for key, val in study.best_trial.user_attrs.items()
                if is_valid_param_name(key)
            },
        )
        result_df = study.trials_dataframe().set_index("number")

        # remove prefix
        result_df.columns = [
            re.sub(r"^(user_attrs|params)_", "", colname)
            for colname in result_df.columns
        ]

        trial_and_scores: List[Tuple[float,
                                     Dict[str, float]]] = study.user_attrs.get(
                                         "scores", [])
        score_df = pd.DataFrame(
            [x[1] for x in trial_and_scores],
            index=[x[0] for x in trial_and_scores],
        )
        score_df.index.name = "number"
        result_df = result_df.join(score_df, how="left")
        return best_params, result_df
Пример #18
0
def optimize_hyperparameters(
    train_dataloader: DataLoader,
    val_dataloader: DataLoader,
    model_path: str,
    max_epochs: int = 20,
    n_trials: int = 100,
    timeout: float = 3600 * 8.0,  # 8 hours
    gradient_clip_val_range: Tuple[float, float] = (0.01, 100.0),
    hidden_size_range: Tuple[int, int] = (16, 265),
    hidden_continuous_size_range: Tuple[int, int] = (8, 64),
    attention_head_size_range: Tuple[int, int] = (1, 4),
    dropout_range: Tuple[float, float] = (0.1, 0.3),
    learning_rate_range: Tuple[float, float] = (1e-5, 1.0),
    use_learning_rate_finder: bool = True,
    trainer_kwargs: Dict[str, Any] = {},
    log_dir: str = "lightning_logs",
    study: optuna.Study = None,
    **kwargs,
) -> optuna.Study:
    """
    Optimize Temporal Fusion Transformer hyperparameters.

    Run hyperparameter optimization. Learning rate for is determined with
    the PyTorch Lightning learning rate finder.

    Args:
        train_dataloader (DataLoader): dataloader for training model
        val_dataloader (DataLoader): dataloader for validating model
        model_path (str): folder to which model checkpoints are saved
        max_epochs (int, optional): Maximum number of epochs to run training. Defaults to 20.
        n_trials (int, optional): Number of hyperparameter trials to run. Defaults to 100.
        timeout (float, optional): Time in seconds after which training is stopped regardless of number of epochs
            or validation metric. Defaults to 3600*8.0.
        hidden_size_range (Tuple[int, int], optional): Minimum and maximum of ``hidden_size`` hyperparameter. Defaults
            to (16, 265).
        hidden_continuous_size_range (Tuple[int, int], optional):  Minimum and maximum of ``hidden_continuous_size``
            hyperparameter. Defaults to (8, 64).
        attention_head_size_range (Tuple[int, int], optional):  Minimum and maximum of ``attention_head_size``
            hyperparameter. Defaults to (1, 4).
        dropout_range (Tuple[float, float], optional):  Minimum and maximum of ``dropout`` hyperparameter. Defaults to
            (0.1, 0.3).
        learning_rate_range (Tuple[float, float], optional): Learning rate range. Defaults to (1e-5, 1.0).
        use_learning_rate_finder (bool): If to use learning rate finder or optimize as part of hyperparameters.
            Defaults to True.
        trainer_kwargs (Dict[str, Any], optional): Additional arguments to the
            `PyTorch Lightning trainer <https://pytorch-lightning.readthedocs.io/en/latest/trainer.html>`_ such
            as ``limit_train_batches``. Defaults to {}.
        log_dir (str, optional): Folder into which to log results for tensorboard. Defaults to "lightning_logs".
        study (optuna.Study, optional): study to resume. Will create new study by default.
        **kwargs: Additional arguments for the :py:class:`~TemporalFusionTransformer`.

    Returns:
        optuna.Study: optuna study results
    """
    assert isinstance(train_dataloader.dataset, TimeSeriesDataSet) and isinstance(
        val_dataloader.dataset, TimeSeriesDataSet
    ), "dataloaders must be built from timeseriesdataset"

    # create objective function
    def objective(trial: optuna.Trial) -> float:
        # Filenames for each trial must be made unique in order to access each checkpoint.
        checkpoint_callback = pl.callbacks.ModelCheckpoint(
            os.path.join(model_path, "trial_{}".format(trial.number), "{epoch}"), monitor="val_loss"
        )

        # The default logger in PyTorch Lightning writes to event files to be consumed by
        # TensorBoard. We don't use any logger here as it requires us to implement several abstract
        # methods. Instead we setup a simple callback, that saves metrics from each validation step.
        metrics_callback = MetricsCallback()
        learning_rate_callback = LearningRateLogger()
        logger = TensorBoardLogger(log_dir, name="optuna", version=trial.number)
        gradient_clip_val = trial.suggest_loguniform("gradient_clip_val", *gradient_clip_val_range)
        trainer = pl.Trainer(
            checkpoint_callback=checkpoint_callback,
            max_epochs=max_epochs,
            gradient_clip_val=gradient_clip_val,
            gpus=[0] if torch.cuda.is_available() else None,
            callbacks=[metrics_callback, learning_rate_callback],
            early_stop_callback=PyTorchLightningPruningCallback(trial, monitor="val_loss"),
            logger=logger,
            **trainer_kwargs,
        )

        # create model
        hidden_size = trial.suggest_int("hidden_size", *hidden_size_range, log=True)
        model = TemporalFusionTransformer.from_dataset(
            train_dataloader.dataset,
            dropout=trial.suggest_uniform("dropout", *dropout_range),
            hidden_size=hidden_size,
            hidden_continuous_size=trial.suggest_int(
                "hidden_continuous_size",
                hidden_continuous_size_range[0],
                min(hidden_continuous_size_range[1], hidden_size),
                log=True,
            ),
            attention_head_size=trial.suggest_int("attention_head_size", *attention_head_size_range),
            log_interval=-1,
            **kwargs,
        )
        # find good learning rate
        if use_learning_rate_finder:
            lr_trainer = pl.Trainer(
                gradient_clip_val=gradient_clip_val,
                gpus=[0] if torch.cuda.is_available() else None,
                logger=False,
            )
            res = lr_trainer.lr_find(
                model,
                train_dataloader=train_dataloader,
                val_dataloaders=val_dataloader,
                early_stop_threshold=10000.0,
                min_lr=learning_rate_range[0],
                num_training=100,
                max_lr=learning_rate_range[1],
            )

            loss_finite = np.isfinite(res.results["loss"])
            lr_smoothed, loss_smoothed = sm.nonparametric.lowess(
                np.asarray(res.results["loss"])[loss_finite],
                np.asarray(res.results["lr"])[loss_finite],
                frac=1.0 / 10.0,
            )[10:-1].T
            optimal_idx = np.gradient(loss_smoothed).argmin()
            optimal_lr = lr_smoothed[optimal_idx]
            print(f"Using learning rate of {optimal_lr:.3g}")
            model.hparams.learning_rate = optimal_lr
        else:
            model.hparams.learning_rate = trial.suggest_loguniform("learning_rate_range", *learning_rate_range)

        # fit
        trainer.fit(model, train_dataloader=train_dataloader, val_dataloaders=val_dataloader)

        # report result
        return metrics_callback.metrics[-1]["val_loss"].item()

    # setup optuna and run
    pruner = optuna.pruners.SuccessiveHalvingPruner()
    if study is None:
        study = optuna.create_study(direction="minimize", pruner=pruner)
    study.optimize(objective, n_trials=n_trials, timeout=timeout)
    return study
Пример #19
0
def optimize_hyperparameters(
    train_dataloader: DataLoader,
    val_dataloader: DataLoader,
    model_path: str,
    max_epochs: int = 20,
    n_trials: int = 100,
    timeout: float = 3600 * 8.0,  # 8 hours
    gradient_clip_val_range: Tuple[float, float] = (0.01, 100.0),
    hidden_size_range: Tuple[int, int] = (16, 265),
    hidden_continuous_size_range: Tuple[int, int] = (8, 64),
    attention_head_size_range: Tuple[int, int] = (1, 4),
    dropout_range: Tuple[float, float] = (0.1, 0.3),
    learning_rate_range: Tuple[float, float] = (1e-5, 1.0),
    use_learning_rate_finder: bool = True,
    trainer_kwargs: Dict[str, Any] = {},
    log_dir: str = "lightning_logs",
    study: optuna.Study = None,
    verbose: Union[int, bool] = None,
    **kwargs,
) -> optuna.Study:
    """
    Optimize Temporal Fusion Transformer hyperparameters.

    Run hyperparameter optimization. Learning rate for is determined with
    the PyTorch Lightning learning rate finder.

    Args:
        train_dataloader (DataLoader): dataloader for training model
        val_dataloader (DataLoader): dataloader for validating model
        model_path (str): folder to which model checkpoints are saved
        max_epochs (int, optional): Maximum number of epochs to run training. Defaults to 20.
        n_trials (int, optional): Number of hyperparameter trials to run. Defaults to 100.
        timeout (float, optional): Time in seconds after which training is stopped regardless of number of epochs
            or validation metric. Defaults to 3600*8.0.
        hidden_size_range (Tuple[int, int], optional): Minimum and maximum of ``hidden_size`` hyperparameter. Defaults
            to (16, 265).
        hidden_continuous_size_range (Tuple[int, int], optional):  Minimum and maximum of ``hidden_continuous_size``
            hyperparameter. Defaults to (8, 64).
        attention_head_size_range (Tuple[int, int], optional):  Minimum and maximum of ``attention_head_size``
            hyperparameter. Defaults to (1, 4).
        dropout_range (Tuple[float, float], optional):  Minimum and maximum of ``dropout`` hyperparameter. Defaults to
            (0.1, 0.3).
        learning_rate_range (Tuple[float, float], optional): Learning rate range. Defaults to (1e-5, 1.0).
        use_learning_rate_finder (bool): If to use learning rate finder or optimize as part of hyperparameters.
            Defaults to True.
        trainer_kwargs (Dict[str, Any], optional): Additional arguments to the
            `PyTorch Lightning trainer <https://pytorch-lightning.readthedocs.io/en/latest/trainer.html>`_ such
            as ``limit_train_batches``. Defaults to {}.
        log_dir (str, optional): Folder into which to log results for tensorboard. Defaults to "lightning_logs".
        study (optuna.Study, optional): study to resume. Will create new study by default.
        verbose (Union[int, bool]): level of verbosity.
            * None: no change in verbosity level (equivalent to verbose=1 by optuna-set default).
            * 0 or False: log only warnings.
            * 1 or True: log pruning events.
            * 2: optuna logging level at debug level.
            Defaults to None.

        **kwargs: Additional arguments for the :py:class:`~TemporalFusionTransformer`.

    Returns:
        optuna.Study: optuna study results
    """
    assert isinstance(train_dataloader.dataset,
                      TimeSeriesDataSet) and isinstance(
                          val_dataloader.dataset, TimeSeriesDataSet
                      ), "dataloaders must be built from timeseriesdataset"

    logging_level = {
        None: optuna.logging.get_verbosity(),
        0: optuna.logging.WARNING,
        1: optuna.logging.INFO,
        2: optuna.logging.DEBUG,
    }
    optuna_verbose = logging_level[verbose]
    optuna.logging.set_verbosity(optuna_verbose)

    loss = kwargs.get(
        "loss", QuantileLoss()
    )  # need a deepcopy of loss as it will otherwise propagate from one trial to the next

    # create objective function
    def objective(trial: optuna.Trial) -> float:
        # Filenames for each trial must be made unique in order to access each checkpoint.
        checkpoint_callback = pl.callbacks.ModelCheckpoint(
            dirpath=os.path.join(model_path, "trial_{}".format(trial.number)),
            filename="{epoch}",
            monitor="val_loss")

        # The default logger in PyTorch Lightning writes to event files to be consumed by
        # TensorBoard. We don't use any logger here as it requires us to implement several abstract
        # methods. Instead we setup a simple callback, that saves metrics from each validation step.
        metrics_callback = MetricsCallback()
        learning_rate_callback = LearningRateMonitor()
        logger = TensorBoardLogger(log_dir,
                                   name="optuna",
                                   version=trial.number)
        gradient_clip_val = trial.suggest_loguniform("gradient_clip_val",
                                                     *gradient_clip_val_range)
        default_trainer_kwargs = dict(
            gpus=[0] if torch.cuda.is_available() else None,
            max_epochs=max_epochs,
            gradient_clip_val=gradient_clip_val,
            callbacks=[
                metrics_callback,
                learning_rate_callback,
                checkpoint_callback,
                PyTorchLightningPruningCallback(trial, monitor="val_loss"),
            ],
            logger=logger,
            progress_bar_refresh_rate=[0, 1
                                       ][optuna_verbose < optuna.logging.INFO],
            weights_summary=[None,
                             "top"][optuna_verbose < optuna.logging.INFO],
        )
        default_trainer_kwargs.update(trainer_kwargs)
        trainer = pl.Trainer(**default_trainer_kwargs, )

        # create model
        hidden_size = trial.suggest_int("hidden_size",
                                        *hidden_size_range,
                                        log=True)
        kwargs["loss"] = copy.deepcopy(loss)
        model = TemporalFusionTransformer.from_dataset(
            train_dataloader.dataset,
            dropout=trial.suggest_uniform("dropout", *dropout_range),
            hidden_size=hidden_size,
            hidden_continuous_size=trial.suggest_int(
                "hidden_continuous_size",
                hidden_continuous_size_range[0],
                min(hidden_continuous_size_range[1], hidden_size),
                log=True,
            ),
            attention_head_size=trial.suggest_int("attention_head_size",
                                                  *attention_head_size_range),
            log_interval=-1,
            **kwargs,
        )
        # find good learning rate
        if use_learning_rate_finder:
            lr_trainer = pl.Trainer(
                gradient_clip_val=gradient_clip_val,
                gpus=[0] if torch.cuda.is_available() else None,
                logger=False,
                progress_bar_refresh_rate=0,
                weights_summary=None,
            )
            res = lr_trainer.tuner.lr_find(
                model,
                train_dataloader=train_dataloader,
                val_dataloaders=val_dataloader,
                early_stop_threshold=10000,
                min_lr=learning_rate_range[0],
                num_training=100,
                max_lr=learning_rate_range[1],
            )

            loss_finite = np.isfinite(res.results["loss"])
            if loss_finite.sum(
            ) > 3:  # at least 3 valid values required for learning rate finder
                lr_smoothed, loss_smoothed = sm.nonparametric.lowess(
                    np.asarray(res.results["loss"])[loss_finite],
                    np.asarray(res.results["lr"])[loss_finite],
                    frac=1.0 / 10.0,
                )[min(loss_finite.sum() - 3, 10):-1].T
                optimal_idx = np.gradient(loss_smoothed).argmin()
                optimal_lr = lr_smoothed[optimal_idx]
            else:
                optimal_idx = np.asarray(res.results["loss"]).argmin()
                optimal_lr = res.results["lr"][optimal_idx]
            optuna_logger.info(f"Using learning rate of {optimal_lr:.3g}")
            # add learning rate artificially
            model.hparams.learning_rate = trial.suggest_uniform(
                "learning_rate", optimal_lr, optimal_lr)
        else:
            model.hparams.learning_rate = trial.suggest_loguniform(
                "learning_rate", *learning_rate_range)

        # fit
        trainer.fit(model,
                    train_dataloader=train_dataloader,
                    val_dataloaders=val_dataloader)

        # report result
        return metrics_callback.metrics[-1]["val_loss"].item()

    # setup optuna and run
    pruner = optuna.pruners.SuccessiveHalvingPruner()
    if study is None:
        study = optuna.create_study(direction="minimize", pruner=pruner)
    study.optimize(objective, n_trials=n_trials, timeout=timeout)
    return study
Пример #20
0
    def optimize_with_study(
        self,
        study: optuna.Study,
        n_trials: int = 20,
        timeout: Optional[int] = None,
    ) -> Tuple[Dict[str, Any], pd.DataFrame]:
        """Perform the optimization step using the user-created ``optuna.Study`` object.
        Creating and managing the study object will be convenient e.g. when you

            1. want to `store/resume the study using RDB backend <https://optuna.readthedocs.io/en/stable/tutorial/003_rdb.html>`_.
            2. want perform a `distributed optimization <https://optuna.readthedocs.io/en/stable/tutorial/004_distributed.html>`_.

        Args:
            study:
                The study object.
            n_trials:
                The number of expected trials (include pruned trial.). Defaults to 20.
            timeout:
                If set to some value (in seconds), the study will exit after that time period.
                Note that the running trials is not interrupted, though. Defaults to None.

        Returns:
            A tuple that consists of

                1. A dict containing the best paramaters.
                   This dict can be passed to the recommender as ``**kwargs``.
                2. A ``pandas.DataFrame`` that contains the history of optimization.

        """
        self.current_trial = -1
        self.best_val = float("inf")
        self.best_time = None
        self.valid_results = []
        self.tried_configs = []

        def objective_func(trial: optuna.Trial) -> float:
            self.current_trial += 1  # for pruning
            start = time.time()
            params = dict(**self._suggest(trial), **self.fixed_params)
            self.logger.info("Trial %s:", self.current_trial)
            self.logger.info("parameter = %s", params)

            arg, parameters = self.get_model_arguments(**params)

            self.tried_configs.append(parameters)
            recommender = self.recommender_class(self._data, *arg,
                                                 **parameters)
            recommender.learn_with_optimizer(self.val_evaluator, trial)

            score = self.val_evaluator.get_score(recommender)
            end = time.time()

            time_spent = end - start
            score["time"] = time_spent
            self.valid_results.append(score)
            self.logger.info(
                "Config %d obtained the following scores: %s within %f seconds.",
                self.current_trial,
                score,
                time_spent,
            )
            val_score = score[self.val_evaluator.target_metric.value]
            if (-val_score) < self.best_val:
                self.best_val = -val_score
                self.best_time = time_spent
                self.best_params = parameters
                self.learnt_config_best = dict(**recommender.learnt_config)
                self.logger.info(
                    "Found best %s using this config.",
                    self.val_evaluator.target_metric.value,
                )
                self.best_trial_index = self.current_trial

            return -val_score

        self.logger.info(
            """Start parameter search for %s over the range: %s""",
            type(self).recommender_class.__name__,
            self.suggestions,
        )

        study.optimize(objective_func, n_trials=n_trials, timeout=timeout)
        if self.best_params is None:
            raise RuntimeError("best parameter not found.")
        best_params = dict(**self.best_params)
        best_params.update(**self.learnt_config_best)
        self.best_params = best_params
        result_df = pd.concat(
            [
                pd.DataFrame(self.tried_configs),
                pd.DataFrame(self.valid_results),
            ],
            axis=1,
        ).copy()
        is_best = np.zeros(result_df.shape[0], dtype=np.bool)
        if self.best_trial_index is not None:
            is_best[self.best_trial_index] = True
        result_df["is_best"] = is_best
        return best_params, result_df
Пример #21
0
 def callback(study: Study, trial: FrozenTrial) -> None:
     if trial.number >= 4:
         study.stop()
Пример #22
0
    def sample_relative(
        self,
        study: optuna.Study,
        trial: optuna.structs.FrozenTrial,
        search_space: Dict[str, BaseDistribution],
    ) -> Dict[str, Any]:
        if len(search_space) == 0:
            return {}

        completed_trials = [
            t
            for t in study.get_trials(deepcopy=False)
            if t.state == TrialState.COMPLETE
        ]
        if len(completed_trials) < self._n_startup_trials:
            return {}

        if len(search_space) == 1:
            self._logger.info(
                "`CMASampler` only supports two or more dimensional continuous "
                "search space. `{}` is used instead of `CMASampler`.".format(
                    self._independent_sampler.__class__.__name__
                )
            )
            self._warn_independent_sampling = False
            return {}

        ordered_keys = [key for key in search_space]
        ordered_keys.sort()

        optimizer = self._restore_or_init_optimizer(
            completed_trials, search_space, ordered_keys
        )

        if optimizer.dim != len(ordered_keys):
            self._logger.info(
                "`CMASampler` does not support dynamic search space. "
                "`{}` is used instead of `CMASampler`.".format(
                    self._independent_sampler.__class__.__name__
                )
            )
            self._warn_independent_sampling = False
            return {}

        solution_trials = [
            t
            for t in completed_trials
            if optimizer.generation == t.system_attrs.get("cma:generation", -1)
        ]
        if len(solution_trials) >= optimizer.population_size:
            solutions = []
            for t in solution_trials[: optimizer.population_size]:
                x = np.array(
                    [_to_cma_param(search_space[k], t.params[k]) for k in ordered_keys]
                )
                solutions.append((x, t.value))
            optimizer.tell(solutions)
            pickled_optimizer = pickle.dumps(optimizer)
            if isinstance(study._storage, optuna.storages.InMemoryStorage):
                study._storage.set_trial_system_attr(
                    trial._trial_id, "cma:optimizer", pickled_optimizer
                )
            else:
                # RDB storage does not accept bytes object.
                study._storage.set_trial_system_attr(
                    trial._trial_id, "cma:optimizer", pickled_optimizer.hex()
                )

        # Caution: optimizer should update its seed value
        seed = self._cma_rng.randint(1, 2 ** 16) + trial.number
        optimizer._rng = np.random.RandomState(seed)
        params = optimizer.ask()

        study._storage.set_trial_system_attr(
            trial._trial_id, "cma:generation", optimizer.generation
        )
        external_values = {
            k: _to_optuna_param(search_space[k], p)
            for k, p in zip(ordered_keys, params)
        }
        return external_values