示例#1
0
def get_id_of_config(config: Configuration):
    # todo:, instance="", seed=0
    X: np.ndarray = config.get_array()
    m = hashlib.md5()
    if X.flags['C_CONTIGUOUS']:
        m.update(X.data)
        m.update(str(X.shape).encode('utf8'))
    else:
        X_tmp = np.ascontiguousarray(X.T)
        m.update(X_tmp.data)
        m.update(str(X_tmp.shape).encode('utf8'))
    # m.update(instance.encode())
    # m.update(str(seed).encode())
    hash_value = m.hexdigest()
    return hash_value
示例#2
0
    def register_result(self,
                        config: Configuration,
                        loss: float,
                        status: StatusType,
                        update_model: bool = True,
                        **kwargs) -> None:
        super().register_result(config, loss, status)
        # noinspection PyUnresolvedReferences
        actual_size = config.get_array().size
        if actual_size != self.expected_size:
            return

        if loss is None or not np.isfinite(loss):
            loss = self.worst_score

        self.kde.losses.append(loss)
        # noinspection PyTypeChecker
        self.kde.configs.append(config.get_array())

        min_points_in_model = max(
            int(1.5 * len(self.configspace.get_hyperparameters())) + 1,
            self.min_points_in_model)
        # skip model building if not enough points are available
        if len(self.kde.losses) < min_points_in_model:
            return

        train_losses = np.array(self.kde.losses)

        n_good = max(min_points_in_model,
                     (self.top_n_percent * train_losses.shape[0]) // 100)
        n_bad = max(min_points_in_model,
                    ((100 - self.top_n_percent) * train_losses.shape[0]) //
                    100)

        idx = np.argsort(train_losses)
        train_configs = np.array(self.kde.configs)
        train_data_good = self._fix_identical_cat_input(
            train_configs[idx[:n_good]])
        train_data_bad = self._fix_identical_cat_input(
            train_configs[idx[-n_bad:]])

        train_data_good = self._impute_conditional_data(
            train_data_good, self.kde.vartypes)
        train_data_bad = self._impute_conditional_data(train_data_bad,
                                                       self.kde.vartypes)

        if train_data_good.shape[0] <= train_data_good.shape[1]:
            return
        if train_data_bad.shape[0] <= train_data_bad.shape[1]:
            return

        # more expensive cross-validation method
        # bw_estimation = 'cv_ls'

        # quick rule of thumb
        bw_estimation = 'normal_reference'

        bad_kde = sm.nonparametric.KDEMultivariate(
            data=train_data_bad,
            var_type=self.kde.kde_vartypes,
            bw=bw_estimation)
        good_kde = sm.nonparametric.KDEMultivariate(
            data=train_data_good,
            var_type=self.kde.kde_vartypes,
            bw=bw_estimation)

        bad_kde.bw = np.clip(bad_kde.bw, self.min_bandwidth, None)
        good_kde.bw = np.clip(good_kde.bw, self.min_bandwidth, None)

        self.kde.kde_models = {
            'good': good_kde,
            'bad': bad_kde,
        }