Пример #1
0
    def fit(self, X, y, reload_weights: bool = False):
        # prepare target values
        # Todo: early stopping

        if reload_weights:
            self.reset_weights(self.model)

        # y = self.encode_targets(y)

        # use callbacks only when size of training set is above 100
        if X.shape[0] > 100:
            # get pseudo validation set for keras callbacks
            # fit the model
            self.model.fit(
                X,
                y,
                batch_size=self.nn_batch_size,
                validation_split=0.1,
                epochs=self.epochs,
                verbose=self.verbosity,
            )
        else:
            # fit the model
            logger.warn(
                "Cannot use Keras Callbacks because of small sample size.")
            self.model.fit(
                X,
                y,
                batch_size=self.nn_batch_size,
                epochs=self.epochs,
                verbose=self.verbosity,
            )

        return self
Пример #2
0
    def prepare(self, pipeline_elements: list, maximize_metric: bool):

        self.hyperparameter_list = []
        self.maximize_metric = maximize_metric
        # build space
        space = []
        for pipe_element in pipeline_elements:
            if hasattr(pipe_element, "hyperparameters"):
                for name, value in pipe_element.hyperparameters.items():
                    # if we only have one value we do not need to optimize
                    if isinstance(value, list) and len(value) < 2:
                        self.constant_dictionary[name] = value[0]
                        continue
                    if isinstance(value,
                                  PhotonCategorical) and len(value.values) < 2:
                        self.constant_dictionary[name] = value.values[0]
                        continue
                    skopt_param = self._convert_PHOTON_to_skopt_space(
                        value, name)
                    if skopt_param is not None:
                        space.append(skopt_param)
        if len(space) == 0:
            logger.warn(
                "Did not find any hyperparameters to convert into skopt space")
            self.optimizer = None
        else:
            self.optimizer = Optimizer(
                space,
                "ET",
                acq_func=self.acq_func,
                acq_func_kwargs=self.acq_func_kwargs,
            )
        self.ask = self.ask_generator()
Пример #3
0
 def activations(self, value):
     """
     Setter for act_func. Checks if strategy is supported.
     :param value:
     :return:
     """
     if not type(value) in [list, str]:
         raise ValueError(
             "act_func type is not supported. Please use one of [list, float]"
         )
     else:
         if not self._hidden_layer_sizes:
             self._activations = value
         else:
             if type(value) == str:
                 if value in __supported_activations__.keys():
                     self._activations = [value] * len(self.hidden_layer_sizes)
                     logger.warn("activations with type str converted to type list.")
                 else:
                     raise ValueError(
                         "activations not supported. Please use one of: "
                         + str(__supported_activations__.keys())
                     )
             elif len(value) != len(self.hidden_layer_sizes):
                 raise ValueError("activations length missmatched layer length.")
             elif any(act not in __supported_activations__.keys() for act in value):
                 raise ValueError(
                     "activations not supported. Please use one of: "
                     + str(__supported_activations__.keys())
                 )
             else:
                 self._activations = value
Пример #4
0
    def shall_continue(self, config_item):
        """
        Function to evaluate if the constraint is reached.
        If it returns True, the testing of the configuration is continued.
        If it returns False, further testing of the configuration is skipped
        to increase speed of the hyperparameter search.

        Parameters
        ----------
        * 'config_item' [MDBConfig]:
            All performance metrics and other scoring information for all configuration's performance.
            Can be used to evaluate if the configuration has any potential to serve the model's learning task.
        """
        if self.metric == "unknown":
            logger.warn("The metric is not known. Please check the metric: " +
                        self.metric + ". " +
                        "Performance constraints are constantly True.")
            return True
        if self.metric not in config_item.inner_folds[0].validation.metrics:
            logger.warn("The metric is not calculated. Please insert " +
                        self.metric + " to Hyperpipe.metrics. " +
                        "Performance constraints are constantly False.")
            return False
        if self._greater_is_better:
            if self.strategy.name == "first":
                if (config_item.inner_folds[0].validation.metrics[self.metric]
                        < self.threshold):
                    return False
            elif self.strategy.name == "all":
                if any(item < self.threshold for item in [
                        x.validation.metrics[self.metric]
                        for x in config_item.inner_folds
                ]):
                    return False
            elif self.strategy.name == "mean":
                if (np.mean([
                        x.validation.metrics[self.metric]
                        for x in config_item.inner_folds
                ]) < self.threshold):
                    return False
            return True
        else:
            if self.strategy.name == "first":
                if (config_item.inner_folds[0].validation.metrics[self.metric]
                        > self.threshold):
                    return False
            elif self.strategy.name == "all":
                if any(item > self.threshold for item in [
                        x.validation.metrics[self.metric]
                        for x in config_item.inner_folds
                ]):
                    return False
            elif self.strategy.name == "mean":
                if (np.mean([
                        x.validation.metrics[self.metric]
                        for x in config_item.inner_folds
                ]) > self.threshold):
                    return False
            return True
Пример #5
0
 def metric(self, value):
     """
     Setter for attribute metric.
     :param value: metric value
     :return:
     """
     try:
         self._metric = value
         self._greater_is_better = Scorer.greater_is_better_distinction(
             self._metric)
     except NameError:
         self._metric = "unknown"
         logger.warn(
             "Your metric is not supported. Performance constraints are constantly False."
         )
Пример #6
0
 def load_from_mongodb(self, mongodb_connect_url: str, pipe_name: str):
     connect(mongodb_connect_url)
     results = list(MDBHyperpipe.objects.raw({"name": pipe_name}))
     if len(results) == 1:
         self.results = results[0]
     elif len(results) > 1:
         self.results = (MDBHyperpipe.objects.order_by([
             ("computation_start_time", DESCENDING)
         ]).raw({
             "name": pipe_name
         }).first())
         logger.warn(
             "Found multiple hyperpipes with that name. Returning most recent one."
         )
     else:
         raise FileNotFoundError("Could not load hyperpipe from MongoDB.")
Пример #7
0
 def dropout_rate(self, value):
     """
     Setter for dropout_rate. Checks if strategy is supported.
     :param value:
     :return:
     """
     if not type(value) in [list, float]:
         raise ValueError("Dropout type is not supported. Please use one of [list, float]")
     else:
         if not self._hidden_layer_sizes:
             self._dropout_rate = value
         else:
             if type(value) == float:
                 self._dropout_rate = [value]*len(self.hidden_layer_sizes)
                 logger.warn("Dropout with type float converted to type list.")
             elif len(value) != len(self.hidden_layer_sizes):
                 raise ValueError("Dropout length missmatched layer length.")
             else:
                 self._dropout_rate = value
Пример #8
0
    def __init__(self, n_configurations=None, limit_in_minutes=60):
        self.pipeline_elements = None
        self.parameter_iterable = None
        self.ask = self.next_config_generator()
        self.n_configurations = None

        if not limit_in_minutes or limit_in_minutes <= 0:
            self.limit_in_minutes = None
        else:
            self.limit_in_minutes = limit_in_minutes
        self.start_time = None
        self.end_time = None

        if not n_configurations or n_configurations <= 0:
            self.n_configurations = None
        else:
            self.n_configurations = n_configurations
        self.k_configutration = 0  # use k++ until k==n: break

        if not n_configurations and limit_in_minutes <= 0:
            msg = "No stopping criteria for RandomSearchOptimizer."
            logger.warn(msg)
Пример #9
0
    def plot_optimizer_history(
        self,
        metric,
        title: str = "Optimizer History",
        type: str = "plot",
        reduce_scatter_by: Union[int, str] = "auto",
        file: str = None,
    ):
        """
        :param metric: specify metric that has been stored within the PHOTON results tree
        :param type: 'plot' or 'scatter'
        :param reduce_scatter_by: integer or string ('auto'), reduce the number of points plotted by scatter
        :param file: specify a filename if you want to save the plot
        :return:
        """

        if metric not in self.results.hyperpipe_info.metrics:
            raise ValueError(
                'Metric "{}" not stored in results tree'.format(metric))

        config_evaluations = self.get_config_evaluations()
        minimum_config_evaluations = self.get_minimum_config_evaluations()

        # handle different lengths
        min_corresponding = len(min(config_evaluations[metric], key=len))
        config_evaluations_corres = [
            configs[:min_corresponding]
            for configs in config_evaluations[metric]
        ]
        minimum_config_evaluations_corres = [
            configs[:min_corresponding]
            for configs in minimum_config_evaluations[metric]
        ]

        mean = np.nanmean(np.asarray(config_evaluations_corres), axis=0)
        mean_min = np.nanmean(np.asarray(minimum_config_evaluations_corres),
                              axis=0)

        greater_is_better = Scorer.greater_is_better_distinction(metric)
        if greater_is_better:
            caption = "Maximum"
        else:
            caption = "Minimum"

        plt.figure()
        if type == "plot":
            plt.plot(
                np.arange(0, len(mean)),
                mean,
                "-",
                color="gray",
                label="Mean Performance",
            )

        elif type == "scatter":
            # now do smoothing
            if isinstance(reduce_scatter_by, str):
                if reduce_scatter_by != "auto":
                    logger.warn(
                        '{} is not a valid smoothing_kernel specifier. Falling back to "auto".'
                        .format(reduce_scatter_by))

                # if auto, then calculate size of reduce_scatter_by so that 75 points on x remain
                # smallest reduce_scatter_by should be 1
                reduce_scatter_by = max(
                    [np.floor(min_corresponding / 75).astype(int), 1])

            if reduce_scatter_by > 1:
                plt.plot(
                    [],
                    [],
                    " ",
                    label="scatter reduced by factor {}".format(
                        reduce_scatter_by),
                )

            for i, fold in enumerate(config_evaluations[metric]):
                # add a few None so that list can be divided by smoothing_kernel
                remaining = len(fold) % reduce_scatter_by
                if remaining:
                    fold.extend([np.nan] * (reduce_scatter_by - remaining))
                # calculate mean over every n named_steps so that plot is less cluttered
                reduced_fold = np.nanmean(np.asarray(fold).reshape(
                    -1, reduce_scatter_by),
                                          axis=1)
                reduced_xfit = np.arange(reduce_scatter_by / 2,
                                         len(fold),
                                         step=reduce_scatter_by)
                if i == len(config_evaluations[metric]) - 1:
                    plt.scatter(
                        reduced_xfit,
                        np.asarray(reduced_fold),
                        color="gray",
                        alpha=0.5,
                        label="Performance",
                        marker=".",
                    )
                else:
                    plt.scatter(
                        reduced_xfit,
                        np.asarray(reduced_fold),
                        color="gray",
                        alpha=0.5,
                        marker=".",
                    )
        else:
            raise ValueError('Please specify either "plot" or "scatter".')

        plt.plot(
            np.arange(0, len(mean_min)),
            mean_min,
            "-",
            color="black",
            label="Mean {} Performance".format(caption),
        )

        for i, fold in enumerate(minimum_config_evaluations[metric]):
            xfit = np.arange(0, len(fold))
            plt.plot(xfit, fold, "-", color="black", alpha=0.5)

        plt.ylabel(metric.replace("_", " "))
        plt.xlabel("No of Evaluations")

        plt.legend()
        plt.title(title)
        if file:
            plt.savefig(file)
        else:
            if self.output_settings:
                file = os.path.join(self.output_settings.results_folder,
                                    "optimizer_history.png")
                plt.savefig(file)
        plt.close()
Пример #10
0
    def transform(self):

        if self.range_type == "geomspace" and self.start == 0:
            error_message = "Geometric sequence cannot include zero"
            logger.error(error_message)
            raise ValueError(error_message)
        if self.range_type == "range" and self.start > self.stop:
            warn_message = "NumberRange or one of its subclasses is empty cause np.arange " + \
                           "does not deal with start greater than stop."
            logger.warn(warn_message)

        values = []

        if self.range_type == "range":
            if not self.step:
                values = np.arange(self.start,
                                   self.stop,
                                   dtype=self.num_type,
                                   **self.range_params)
            else:
                values = np.arange(self.start,
                                   self.stop,
                                   self.step,
                                   dtype=self.num_type,
                                   **self.range_params)
        elif self.range_type == "linspace":
            if self.num:
                values = np.linspace(self.start,
                                     self.stop,
                                     num=self.num,
                                     dtype=self.num_type,
                                     **self.range_params)
            else:
                values = np.linspace(self.start,
                                     self.stop,
                                     dtype=self.num_type,
                                     **self.range_params)
        elif self.range_type == "logspace":
            if self.num_type == np.int32:
                raise ValueError(
                    "Cannot use logspace for integer,  use geomspace instead.")
            if self.num:
                values = np.logspace(self.start,
                                     self.stop,
                                     num=self.num,
                                     dtype=self.num_type,
                                     **self.range_params)
            else:
                values = np.logspace(self.start,
                                     self.stop,
                                     dtype=self.num_type,
                                     **self.range_params)
        elif self.range_type == "geomspace":
            if self.num:
                values = np.geomspace(self.start,
                                      self.stop,
                                      num=self.num,
                                      dtype=self.num_type,
                                      **self.range_params)
            else:
                values = np.geomspace(self.start,
                                      self.stop,
                                      dtype=self.num_type,
                                      **self.range_params)
        # convert to python datatype because mongodb needs it
        if self.num_type == np.int32:
            self.values = [int(i) for i in values]
        elif self.num_type == np.float32:
            self.values = [float(i) for i in values]
Пример #11
0
    def transform(self):

        if self.range_type == "geomspace" and self.start == 0:
            error_message = "Geometric sequence cannot include zero"
            logger.error(error_message)
            raise ValueError(error_message)
        if self.range_type == "range" and self.start > self.stop:
            warn_message = "NumberRange or one of its subclasses is empty cause np.arange " + \
                           "does not deal with start greater than stop."
            logger.warning(warn_message)

        values = []

        if self.range_type == "range":
            if not self.step:
                values = np.arange(self.start,
                                   self.stop,
                                   dtype=self.num_type,
                                   **self.range_params)
            else:
                values = np.arange(self.start,
                                   self.stop,
                                   self.step,
                                   dtype=self.num_type,
                                   **self.range_params)
        elif self.range_type == "linspace":
            if self.num:
                values = np.linspace(self.start,
                                     self.stop,
                                     num=self.num,
                                     dtype=self.num_type,
                                     **self.range_params)
            else:
                values = np.linspace(self.start,
                                     self.stop,
                                     dtype=self.num_type,
                                     **self.range_params)
        elif self.range_type == "logspace":
            if self.num_type == np.int32:
                raise ValueError(
                    "Cannot use logspace for integer,  use geomspace instead.")
            if self.num:
                values = np.logspace(self.start,
                                     self.stop,
                                     num=self.num,
                                     dtype=self.num_type,
                                     **self.range_params)
            else:
                values = np.logspace(self.start,
                                     self.stop,
                                     dtype=self.num_type,
                                     **self.range_params)
        elif self.range_type == "geomspace":
            if self.num:
                values = np.geomspace(self.start,
                                      self.stop,
                                      num=self.num,
                                      dtype=self.num_type,
                                      **self.range_params)
            else:
                values = np.geomspace(self.start,
                                      self.stop,
                                      dtype=self.num_type,
                                      **self.range_params)

        # convert to python datatype because mongodb needs it
        try:
            self.values = [values[i].item() for i in range(len(values))]
        except:
            msg = "PHOTON can not guarantee full mongodb support since you chose a non [np.integer, np.floating] " \
                  "subtype in NumberType.dtype."
            logger.warn(msg)
            raise Warning(msg)
            self.values = values