Пример #1
0
 def activations(self, value):
     """
     Setter for act_func. Checks if strategy is supported.
     :param value:
     :return:
     """
     if not type(value) in [list, str]:
         raise ValueError("act_func type is not supported. Please use one of [list, float]")
     else:
         if not self._hidden_layer_sizes:
             self._activations = value
         else:
             if type(value) == str:
                 if value in __supported_activations__.keys():
                     self._activations = [value]*len(self.hidden_layer_sizes)
                     logger.warning("activations with type str converted to type list.")
                 else:
                     raise ValueError(
                         "activations not supported. Please use one of: " + str(__supported_activations__.keys()))
             elif len(value) != len(self.hidden_layer_sizes):
                 raise ValueError("activations length missmatched layer length.")
             elif any(act not in __supported_activations__.keys() for act in value):
                 raise ValueError("activations not supported. Please use one of: "+str(__supported_activations__.keys()))
             else:
                 self._activations = value
Пример #2
0
    def fit(self, X, y, reload_weights: bool = False):
        # set weights to initial weights to achieve a weight reset
        self.model.set_weights(self.init_weights)

        # allow labels to be encoded before being passed to the model
        # by default self.encode_targets returns identity of y
        y = self.encode_targets(y)

        # use callbacks only when size of training set is above 100
        if X.shape[0] > 100:
            # get pseudo validation set for keras callbacks
            # fit the model
            self.model.fit(X,
                           y,
                           batch_size=self.nn_batch_size,
                           validation_split=0.1,
                           epochs=self.epochs,
                           callbacks=self.callbacks,
                           verbose=self.verbosity)
        else:
            # fit the model
            logger.warning(
                'Cannot use Keras Callbacks because of small sample size.')
            self.model.fit(X,
                           y,
                           batch_size=self.nn_batch_size,
                           epochs=self.epochs,
                           callbacks=self.callbacks,
                           verbose=self.verbosity)

        return self
Пример #3
0
    def shall_continue(self, config_item):
        """
        Function to evaluate if the constraint is reached.
        If it returns True, the testing of the configuration is continued.
        If it returns False, further testing of the configuration is skipped
        to increase speed of the hyperparameter search.

        Parameters
        ----------
        * 'config_item' [MDBConfig]:
            All performance metrics and other scoring information for all configuration's performance.
            Can be used to evaluate if the configuration has any potential to serve the model's learning task.
        """
        if self.metric == "unknown":
            logger.warning(
                "The metric is not known. Please check the metric: " +
                self.metric + ". " +
                "Performance constraints are constantly True.")
            return True
        if self.metric not in config_item.inner_folds[0].validation.metrics:
            logger.warning("The metric is not calculated. Please insert " +
                           self.metric + " to Hyperpipe.metrics. " +
                           "Performance constraints are constantly False.")
            return False
        if self._greater_is_better:
            if self.strategy.name == 'first':
                if config_item.inner_folds[0].validation.metrics[
                        self.metric] < self.threshold:
                    return False
            elif self.strategy.name == 'all':
                if any(item < self.threshold for item in [
                        x.validation.metrics[self.metric]
                        for x in config_item.inner_folds
                ]):
                    return False
            elif self.strategy.name == 'mean':
                if np.mean([
                        x.validation.metrics[self.metric]
                        for x in config_item.inner_folds
                ]) < self.threshold:
                    return False
            return True
        else:
            if self.strategy.name == 'first':
                if config_item.inner_folds[0].validation.metrics[
                        self.metric] > self.threshold:
                    return False
            elif self.strategy.name == 'all':
                if any(item > self.threshold for item in [
                        x.validation.metrics[self.metric]
                        for x in config_item.inner_folds
                ]):
                    return False
            elif self.strategy.name == 'mean':
                if np.mean([
                        x.validation.metrics[self.metric]
                        for x in config_item.inner_folds
                ]) > self.threshold:
                    return False
            return True
Пример #4
0
    def register_custom_metric(
            cls, metric: Union[Metric_Type,
                               Tuple[str, Metric_Type]]) -> Optional[str]:
        if cls.dynamic_keras_import is None:
            cls.try_import_keras()
        # if metric is already a string, don't do anything
        if isinstance(metric, str):
            return metric

        # derive name from metric class unless it is explicitly given with a tuple
        if metric is not None:
            if isinstance(metric, Tuple):
                metric_name = metric[0]
                metric = metric[1]
            elif cls.dynamic_keras_import is not None and isinstance(
                    metric, cls.dynamic_keras_import.metrics.Metric):
                metric_name = "custom_" + str(metric.__module__) + '.' + str(
                    type(metric).__name__)
            else:
                metric_name = "custom_" + str(metric.__module__) + '.' + str(
                    metric.__name__)
            metric_name = metric_name.lower()
        else:
            raise ValueError("Metric is None")

        # Check if metric_name is already registered
        if metric_name in Scorer.CUSTOM_ELEMENT_DICTIONARY:
            warn_text = 'Custom metric name ' + metric_name + ' is ambiguous. Please specify metric as tuple with ' + \
                        'cooresponding name (e.g. instead of metrics=[keras.metrics.Accuracy] use ' \
                        'metrics=[(\'MetricName1\', keras.metrics.Accuracy)]. Only the first occurance of this ' \
                        'metric will be used!'
            logger.warning(warn_text)
            raise Warning(warn_text)
            return None

        # derive a metric function from the given object
        if cls.dynamic_keras_import is not None and (
            (isinstance(metric, type) and issubclass(
                metric, cls.dynamic_keras_import.metrics.Metric)) or
                isinstance(metric, cls.dynamic_keras_import.metrics.Metric)):
            if isinstance(metric, type) and issubclass(
                    metric, cls.dynamic_keras_import.metrics.Metric):
                metric_obj = metric()
            else:
                metric_obj = metric

            def metric_func(y_true, y_pred):
                metric_obj.reset_states()
                metric_obj.update_state(y_true=y_true, y_pred=y_pred)
                return float(
                    cls.dynamic_keras_import.backend.eval(metric_obj.result()))

            Scorer.CUSTOM_ELEMENT_DICTIONARY[metric_name] = metric_func
        elif callable(metric):
            Scorer.CUSTOM_ELEMENT_DICTIONARY[metric_name] = metric
        return metric_name
Пример #5
0
    def __init__(self, patch_size=25, nr_of_processes=1):
        super(PatchImages, self).__init__(output_img=True)
        # Todo: give cache folder to mother class

        self.nr_of_processes = nr_of_processes
        self.patch_size = patch_size

        msg = "Use PatchImages wisely: not tested in content."
        logger.warning(msg)
        warnings.warn(msg)
Пример #6
0
 def _check_custom_atlas(atlas_file):
     if not path.isfile(atlas_file):
         msg = "Cannot find custom atlas {}".format(atlas_file)
         logger.error(msg)
         raise FileNotFoundError(msg)
     labels_file = path.split(atlas_file)[0] + '_labels.txt'
     if not path.isfile(labels_file):
         msg = "Didn't find .txt file with ROI labels. Using indices as labels."
         logger.warning(msg)
         warnings.warn(msg)
     return AtlasObject(name=atlas_file, path=atlas_file, labels_file=labels_file)
Пример #7
0
 def list_rois(self, atlas: str):
     """
     ROI listing of specific atlas
     :param atlas: str, atlas name
     :return: roi_names: list, list of ROIs
     """
     if atlas not in self.ATLAS_DICTIONARY.keys():
         msg = 'Atlas {} is not supported.'.format(atlas)
         logger.warning(msg)
         warnings.warn(msg)
         roi_names = []
     else:
         atlas = self.get_atlas(atlas)
         roi_names = [roi.label for roi in atlas.roi_list]
     return roi_names
Пример #8
0
 def metric(self, value):
     """
     Setter for attribute metric.
     :param value: metric value
     :return:
     """
     try:
         self._metric = value
         self._greater_is_better = Scorer.greater_is_better_distinction(
             self._metric)
     except NameError:
         self._metric = "unknown"
         logger.warning(
             "Your metric is not supported. Performance constraints are constantly False."
         )
Пример #9
0
 def load_from_mongodb(self, mongodb_connect_url: str, pipe_name: str):
     connect(mongodb_connect_url, alias="photon_core")
     results = list(MDBHyperpipe.objects.raw({'name': pipe_name}))
     if len(results) == 1:
         self.results = results[0]
     elif len(results) > 1:
         self.results = MDBHyperpipe.objects.order_by([
             ("computation_start_time", DESCENDING)
         ]).raw({
             'name': pipe_name
         }).first()
         warn_text = 'Found multiple hyperpipes with that name. Returning most recent one.'
         logger.warning(warn_text)
         raise Warning(warn_text)
     else:
         raise FileNotFoundError('Could not load hyperpipe from MongoDB.')
Пример #10
0
 def fwhm(self, fwhm):
     if isinstance(fwhm, int):  # allowing int to improve optimization
         self._fwhm = [fwhm, fwhm, fwhm]
     elif isinstance(fwhm, list) and len(fwhm) == 3 and all(isinstance(x, int) for x in fwhm):
         self._fwhm = fwhm
     elif fwhm == 'fast':
         self._fwhm = fwhm
     elif fwhm is None:
         self._fwhm = None
         warn_msg = "the fwhm in SmoothImages is None, no filtering is performed (useful when just " \
                    "removal of non-finite values is needed). "
         logger.warning(warn_msg)
         warnings.warn(warn_msg)
     else:
         msg = "SmoothImages expected fwhm as int, as str=='fast' or a list of three ints like [3, 3, 3]."
         logger.error(msg)
         raise ValueError(msg)
Пример #11
0
 def dropout_rate(self, value):
     """
     Setter for dropout_rate. Checks if strategy is supported.
     :param value:
     :return:
     """
     if not type(value) in [list, float]:
         raise ValueError("Dropout type is not supported. Please use one of [list, float]")
     else:
         if not self._hidden_layer_sizes:
             self._dropout_rate = value
         else:
             if type(value) == float:
                 self._dropout_rate = [value]*len(self.hidden_layer_sizes)
                 logger.warning("Dropout with type float converted to type list.")
             elif len(value) != len(self.hidden_layer_sizes):
                 raise ValueError("Dropout length missmatched layer length.")
             else:
                 self._dropout_rate = value
Пример #12
0
    def prepare(self, pipeline_elements: list, maximize_metric: bool):

        self.hyperparameter_list = []
        self.maximize_metric = maximize_metric
        # build space
        space = []
        for pipe_element in pipeline_elements:
            if pipe_element.__class__.__name__ == 'Switch':
                error_msg = 'Scikit-Optimize cannot operate in the specified hyperparameter space with a Switch ' \
                            'element. We recommend the use of SMAC.'
                logger.error(error_msg)
                raise ValueError(error_msg)
            if hasattr(pipe_element, 'hyperparameters'):
                for name, value in pipe_element.hyperparameters.items():
                    # if we only have one value we do not need to optimize
                    if isinstance(value, list) and len(value) < 2:
                        self.constant_dictionary[name] = value[0]
                        continue
                    if isinstance(value,
                                  PhotonCategorical) and len(value.values) < 2:
                        self.constant_dictionary[name] = value.values[0]
                        continue
                    skopt_param = self._convert_PHOTON_to_skopt_space(
                        value, name)
                    if skopt_param is not None:
                        space.append(skopt_param)
        if len(space) == 0:
            logger.warning(
                "Did not find any hyperparameters to convert into skopt space")
            self.optimizer = None
        else:
            self.optimizer = Optimizer(space,
                                       "ET",
                                       acq_func=self.acq_func,
                                       acq_func_kwargs=self.acq_func_kwargs)
        self.ask = self.ask_generator()
Пример #13
0
    def plot_optimizer_history(self,
                               metric,
                               title: str = 'Optimizer History',
                               type: str = 'plot',
                               reduce_scatter_by: Union[int, str] = 'auto',
                               file: str = None):
        """
        :param metric: specify metric that has been stored within the PHOTON results tree
        :param type: 'plot' or 'scatter'
        :param reduce_scatter_by: integer or string ('auto'), reduce the number of points plotted by scatter
        :param file: specify a filename if you want to save the plot
        :return:
        """

        if metric not in self.results.hyperpipe_info.metrics:
            raise ValueError(
                'Metric "{}" not stored in results tree'.format(metric))

        config_evaluations = self.get_config_evaluations()
        minimum_config_evaluations = self.get_minimum_config_evaluations()

        # handle different lengths
        min_corresponding = len(min(config_evaluations[metric], key=len))
        config_evaluations_corres = [
            configs[:min_corresponding]
            for configs in config_evaluations[metric]
        ]
        minimum_config_evaluations_corres = [
            configs[:min_corresponding]
            for configs in minimum_config_evaluations[metric]
        ]

        mean = np.nanmean(np.asarray(config_evaluations_corres), axis=0)
        mean_min = np.nanmean(np.asarray(minimum_config_evaluations_corres),
                              axis=0)

        greater_is_better = Scorer.greater_is_better_distinction(metric)
        if greater_is_better:
            caption = 'Maximum'
        else:
            caption = 'Minimum'

        plt.figure()
        if type == 'plot':
            plt.plot(np.arange(0, len(mean)),
                     mean,
                     '-',
                     color='gray',
                     label='Mean Performance')

        elif type == 'scatter':
            # now do smoothing
            if isinstance(reduce_scatter_by, str):
                if reduce_scatter_by != 'auto':
                    logger.warning(
                        '{} is not a valid smoothing_kernel specifier. Falling back to "auto".'
                        .format(reduce_scatter_by))

                # if auto, then calculate size of reduce_scatter_by so that 75 points on x remain
                # smallest reduce_scatter_by should be 1
                reduce_scatter_by = max(
                    [np.floor(min_corresponding / 75).astype(int), 1])

            if reduce_scatter_by > 1:
                plt.plot([], [],
                         ' ',
                         label="scatter reduced by factor {}".format(
                             reduce_scatter_by))

            for i, fold in enumerate(config_evaluations[metric]):
                # add a few None so that list can be divided by smoothing_kernel
                remaining = len(fold) % reduce_scatter_by
                if remaining:
                    fold.extend([np.nan] * (reduce_scatter_by - remaining))
                # calculate mean over every n named_steps so that plot is less cluttered
                reduced_fold = np.nanmean(np.asarray(fold).reshape(
                    -1, reduce_scatter_by),
                                          axis=1)
                reduced_xfit = np.arange(reduce_scatter_by / 2,
                                         len(fold),
                                         step=reduce_scatter_by)
                if i == len(config_evaluations[metric]) - 1:
                    plt.scatter(reduced_xfit,
                                np.asarray(reduced_fold),
                                color='gray',
                                alpha=0.5,
                                label='Performance',
                                marker='.')
                else:
                    plt.scatter(reduced_xfit,
                                np.asarray(reduced_fold),
                                color='gray',
                                alpha=0.5,
                                marker='.')
        else:
            raise ValueError('Please specify either "plot" or "scatter".')

        plt.plot(np.arange(0, len(mean_min)),
                 mean_min,
                 '-',
                 color='black',
                 label='Mean {} Performance'.format(caption))

        for i, fold in enumerate(minimum_config_evaluations[metric]):
            xfit = np.arange(0, len(fold))
            plt.plot(xfit, fold, '-', color='black', alpha=0.5)

        plt.ylabel(metric.replace('_', ' '))
        plt.xlabel('No of Evaluations')

        plt.legend()
        plt.title(title)
        if file:
            plt.savefig(file)
        else:
            if self.output_settings:
                file = os.path.join(self.output_settings.results_folder,
                                    "optimizer_history.png")
                plt.savefig(file)
        plt.close()
Пример #14
0
    def transform(self):

        if self.range_type == "geomspace" and self.start == 0:
            error_message = "Geometric sequence cannot include zero"
            logger.error(error_message)
            raise ValueError(error_message)
        if self.range_type == "range" and self.start > self.stop:
            warn_message = "NumberRange or one of its subclasses is empty cause np.arange " + \
                           "does not deal with start greater than stop."
            logger.warning(warn_message)

        values = []

        if self.range_type == "range":
            if not self.step:
                values = np.arange(self.start,
                                   self.stop,
                                   dtype=self.num_type,
                                   **self.range_params)
            else:
                values = np.arange(self.start,
                                   self.stop,
                                   self.step,
                                   dtype=self.num_type,
                                   **self.range_params)
        elif self.range_type == "linspace":
            if self.num:
                values = np.linspace(self.start,
                                     self.stop,
                                     num=self.num,
                                     dtype=self.num_type,
                                     **self.range_params)
            else:
                values = np.linspace(self.start,
                                     self.stop,
                                     dtype=self.num_type,
                                     **self.range_params)
        elif self.range_type == "logspace":
            if self.num_type == np.int32:
                raise ValueError(
                    "Cannot use logspace for integer,  use geomspace instead.")
            if self.num:
                values = np.logspace(self.start,
                                     self.stop,
                                     num=self.num,
                                     dtype=self.num_type,
                                     **self.range_params)
            else:
                values = np.logspace(self.start,
                                     self.stop,
                                     dtype=self.num_type,
                                     **self.range_params)
        elif self.range_type == "geomspace":
            if self.num:
                values = np.geomspace(self.start,
                                      self.stop,
                                      num=self.num,
                                      dtype=self.num_type,
                                      **self.range_params)
            else:
                values = np.geomspace(self.start,
                                      self.stop,
                                      dtype=self.num_type,
                                      **self.range_params)

        # convert to python datatype because mongodb needs it
        try:
            self.values = [values[i].item() for i in range(len(values))]
        except:
            msg = "PHOTON can not guarantee full mongodb support since you chose a non [np.integer, np.floating] " \
                  "subtype in NumberType.dtype."
            logger.warn(msg)
            raise Warning(msg)
            self.values = values