def fit(self, X, y, reload_weights: bool = False): # prepare target values # Todo: early stopping if reload_weights: self.reset_weights(self.model) # y = self.encode_targets(y) # use callbacks only when size of training set is above 100 if X.shape[0] > 100: # get pseudo validation set for keras callbacks # fit the model self.model.fit( X, y, batch_size=self.nn_batch_size, validation_split=0.1, epochs=self.epochs, verbose=self.verbosity, ) else: # fit the model logger.warn( "Cannot use Keras Callbacks because of small sample size.") self.model.fit( X, y, batch_size=self.nn_batch_size, epochs=self.epochs, verbose=self.verbosity, ) return self
def prepare(self, pipeline_elements: list, maximize_metric: bool): self.hyperparameter_list = [] self.maximize_metric = maximize_metric # build space space = [] for pipe_element in pipeline_elements: if hasattr(pipe_element, "hyperparameters"): for name, value in pipe_element.hyperparameters.items(): # if we only have one value we do not need to optimize if isinstance(value, list) and len(value) < 2: self.constant_dictionary[name] = value[0] continue if isinstance(value, PhotonCategorical) and len(value.values) < 2: self.constant_dictionary[name] = value.values[0] continue skopt_param = self._convert_PHOTON_to_skopt_space( value, name) if skopt_param is not None: space.append(skopt_param) if len(space) == 0: logger.warn( "Did not find any hyperparameters to convert into skopt space") self.optimizer = None else: self.optimizer = Optimizer( space, "ET", acq_func=self.acq_func, acq_func_kwargs=self.acq_func_kwargs, ) self.ask = self.ask_generator()
def activations(self, value): """ Setter for act_func. Checks if strategy is supported. :param value: :return: """ if not type(value) in [list, str]: raise ValueError( "act_func type is not supported. Please use one of [list, float]" ) else: if not self._hidden_layer_sizes: self._activations = value else: if type(value) == str: if value in __supported_activations__.keys(): self._activations = [value] * len(self.hidden_layer_sizes) logger.warn("activations with type str converted to type list.") else: raise ValueError( "activations not supported. Please use one of: " + str(__supported_activations__.keys()) ) elif len(value) != len(self.hidden_layer_sizes): raise ValueError("activations length missmatched layer length.") elif any(act not in __supported_activations__.keys() for act in value): raise ValueError( "activations not supported. Please use one of: " + str(__supported_activations__.keys()) ) else: self._activations = value
def shall_continue(self, config_item): """ Function to evaluate if the constraint is reached. If it returns True, the testing of the configuration is continued. If it returns False, further testing of the configuration is skipped to increase speed of the hyperparameter search. Parameters ---------- * 'config_item' [MDBConfig]: All performance metrics and other scoring information for all configuration's performance. Can be used to evaluate if the configuration has any potential to serve the model's learning task. """ if self.metric == "unknown": logger.warn("The metric is not known. Please check the metric: " + self.metric + ". " + "Performance constraints are constantly True.") return True if self.metric not in config_item.inner_folds[0].validation.metrics: logger.warn("The metric is not calculated. Please insert " + self.metric + " to Hyperpipe.metrics. " + "Performance constraints are constantly False.") return False if self._greater_is_better: if self.strategy.name == "first": if (config_item.inner_folds[0].validation.metrics[self.metric] < self.threshold): return False elif self.strategy.name == "all": if any(item < self.threshold for item in [ x.validation.metrics[self.metric] for x in config_item.inner_folds ]): return False elif self.strategy.name == "mean": if (np.mean([ x.validation.metrics[self.metric] for x in config_item.inner_folds ]) < self.threshold): return False return True else: if self.strategy.name == "first": if (config_item.inner_folds[0].validation.metrics[self.metric] > self.threshold): return False elif self.strategy.name == "all": if any(item > self.threshold for item in [ x.validation.metrics[self.metric] for x in config_item.inner_folds ]): return False elif self.strategy.name == "mean": if (np.mean([ x.validation.metrics[self.metric] for x in config_item.inner_folds ]) > self.threshold): return False return True
def metric(self, value): """ Setter for attribute metric. :param value: metric value :return: """ try: self._metric = value self._greater_is_better = Scorer.greater_is_better_distinction( self._metric) except NameError: self._metric = "unknown" logger.warn( "Your metric is not supported. Performance constraints are constantly False." )
def load_from_mongodb(self, mongodb_connect_url: str, pipe_name: str): connect(mongodb_connect_url) results = list(MDBHyperpipe.objects.raw({"name": pipe_name})) if len(results) == 1: self.results = results[0] elif len(results) > 1: self.results = (MDBHyperpipe.objects.order_by([ ("computation_start_time", DESCENDING) ]).raw({ "name": pipe_name }).first()) logger.warn( "Found multiple hyperpipes with that name. Returning most recent one." ) else: raise FileNotFoundError("Could not load hyperpipe from MongoDB.")
def dropout_rate(self, value): """ Setter for dropout_rate. Checks if strategy is supported. :param value: :return: """ if not type(value) in [list, float]: raise ValueError("Dropout type is not supported. Please use one of [list, float]") else: if not self._hidden_layer_sizes: self._dropout_rate = value else: if type(value) == float: self._dropout_rate = [value]*len(self.hidden_layer_sizes) logger.warn("Dropout with type float converted to type list.") elif len(value) != len(self.hidden_layer_sizes): raise ValueError("Dropout length missmatched layer length.") else: self._dropout_rate = value
def __init__(self, n_configurations=None, limit_in_minutes=60): self.pipeline_elements = None self.parameter_iterable = None self.ask = self.next_config_generator() self.n_configurations = None if not limit_in_minutes or limit_in_minutes <= 0: self.limit_in_minutes = None else: self.limit_in_minutes = limit_in_minutes self.start_time = None self.end_time = None if not n_configurations or n_configurations <= 0: self.n_configurations = None else: self.n_configurations = n_configurations self.k_configutration = 0 # use k++ until k==n: break if not n_configurations and limit_in_minutes <= 0: msg = "No stopping criteria for RandomSearchOptimizer." logger.warn(msg)
def plot_optimizer_history( self, metric, title: str = "Optimizer History", type: str = "plot", reduce_scatter_by: Union[int, str] = "auto", file: str = None, ): """ :param metric: specify metric that has been stored within the PHOTON results tree :param type: 'plot' or 'scatter' :param reduce_scatter_by: integer or string ('auto'), reduce the number of points plotted by scatter :param file: specify a filename if you want to save the plot :return: """ if metric not in self.results.hyperpipe_info.metrics: raise ValueError( 'Metric "{}" not stored in results tree'.format(metric)) config_evaluations = self.get_config_evaluations() minimum_config_evaluations = self.get_minimum_config_evaluations() # handle different lengths min_corresponding = len(min(config_evaluations[metric], key=len)) config_evaluations_corres = [ configs[:min_corresponding] for configs in config_evaluations[metric] ] minimum_config_evaluations_corres = [ configs[:min_corresponding] for configs in minimum_config_evaluations[metric] ] mean = np.nanmean(np.asarray(config_evaluations_corres), axis=0) mean_min = np.nanmean(np.asarray(minimum_config_evaluations_corres), axis=0) greater_is_better = Scorer.greater_is_better_distinction(metric) if greater_is_better: caption = "Maximum" else: caption = "Minimum" plt.figure() if type == "plot": plt.plot( np.arange(0, len(mean)), mean, "-", color="gray", label="Mean Performance", ) elif type == "scatter": # now do smoothing if isinstance(reduce_scatter_by, str): if reduce_scatter_by != "auto": logger.warn( '{} is not a valid smoothing_kernel specifier. Falling back to "auto".' .format(reduce_scatter_by)) # if auto, then calculate size of reduce_scatter_by so that 75 points on x remain # smallest reduce_scatter_by should be 1 reduce_scatter_by = max( [np.floor(min_corresponding / 75).astype(int), 1]) if reduce_scatter_by > 1: plt.plot( [], [], " ", label="scatter reduced by factor {}".format( reduce_scatter_by), ) for i, fold in enumerate(config_evaluations[metric]): # add a few None so that list can be divided by smoothing_kernel remaining = len(fold) % reduce_scatter_by if remaining: fold.extend([np.nan] * (reduce_scatter_by - remaining)) # calculate mean over every n named_steps so that plot is less cluttered reduced_fold = np.nanmean(np.asarray(fold).reshape( -1, reduce_scatter_by), axis=1) reduced_xfit = np.arange(reduce_scatter_by / 2, len(fold), step=reduce_scatter_by) if i == len(config_evaluations[metric]) - 1: plt.scatter( reduced_xfit, np.asarray(reduced_fold), color="gray", alpha=0.5, label="Performance", marker=".", ) else: plt.scatter( reduced_xfit, np.asarray(reduced_fold), color="gray", alpha=0.5, marker=".", ) else: raise ValueError('Please specify either "plot" or "scatter".') plt.plot( np.arange(0, len(mean_min)), mean_min, "-", color="black", label="Mean {} Performance".format(caption), ) for i, fold in enumerate(minimum_config_evaluations[metric]): xfit = np.arange(0, len(fold)) plt.plot(xfit, fold, "-", color="black", alpha=0.5) plt.ylabel(metric.replace("_", " ")) plt.xlabel("No of Evaluations") plt.legend() plt.title(title) if file: plt.savefig(file) else: if self.output_settings: file = os.path.join(self.output_settings.results_folder, "optimizer_history.png") plt.savefig(file) plt.close()
def transform(self): if self.range_type == "geomspace" and self.start == 0: error_message = "Geometric sequence cannot include zero" logger.error(error_message) raise ValueError(error_message) if self.range_type == "range" and self.start > self.stop: warn_message = "NumberRange or one of its subclasses is empty cause np.arange " + \ "does not deal with start greater than stop." logger.warn(warn_message) values = [] if self.range_type == "range": if not self.step: values = np.arange(self.start, self.stop, dtype=self.num_type, **self.range_params) else: values = np.arange(self.start, self.stop, self.step, dtype=self.num_type, **self.range_params) elif self.range_type == "linspace": if self.num: values = np.linspace(self.start, self.stop, num=self.num, dtype=self.num_type, **self.range_params) else: values = np.linspace(self.start, self.stop, dtype=self.num_type, **self.range_params) elif self.range_type == "logspace": if self.num_type == np.int32: raise ValueError( "Cannot use logspace for integer, use geomspace instead.") if self.num: values = np.logspace(self.start, self.stop, num=self.num, dtype=self.num_type, **self.range_params) else: values = np.logspace(self.start, self.stop, dtype=self.num_type, **self.range_params) elif self.range_type == "geomspace": if self.num: values = np.geomspace(self.start, self.stop, num=self.num, dtype=self.num_type, **self.range_params) else: values = np.geomspace(self.start, self.stop, dtype=self.num_type, **self.range_params) # convert to python datatype because mongodb needs it if self.num_type == np.int32: self.values = [int(i) for i in values] elif self.num_type == np.float32: self.values = [float(i) for i in values]
def transform(self): if self.range_type == "geomspace" and self.start == 0: error_message = "Geometric sequence cannot include zero" logger.error(error_message) raise ValueError(error_message) if self.range_type == "range" and self.start > self.stop: warn_message = "NumberRange or one of its subclasses is empty cause np.arange " + \ "does not deal with start greater than stop." logger.warning(warn_message) values = [] if self.range_type == "range": if not self.step: values = np.arange(self.start, self.stop, dtype=self.num_type, **self.range_params) else: values = np.arange(self.start, self.stop, self.step, dtype=self.num_type, **self.range_params) elif self.range_type == "linspace": if self.num: values = np.linspace(self.start, self.stop, num=self.num, dtype=self.num_type, **self.range_params) else: values = np.linspace(self.start, self.stop, dtype=self.num_type, **self.range_params) elif self.range_type == "logspace": if self.num_type == np.int32: raise ValueError( "Cannot use logspace for integer, use geomspace instead.") if self.num: values = np.logspace(self.start, self.stop, num=self.num, dtype=self.num_type, **self.range_params) else: values = np.logspace(self.start, self.stop, dtype=self.num_type, **self.range_params) elif self.range_type == "geomspace": if self.num: values = np.geomspace(self.start, self.stop, num=self.num, dtype=self.num_type, **self.range_params) else: values = np.geomspace(self.start, self.stop, dtype=self.num_type, **self.range_params) # convert to python datatype because mongodb needs it try: self.values = [values[i].item() for i in range(len(values))] except: msg = "PHOTON can not guarantee full mongodb support since you chose a non [np.integer, np.floating] " \ "subtype in NumberType.dtype." logger.warn(msg) raise Warning(msg) self.values = values