Пример #1
0
def cv_confidence_intervals(base_classifier, x_train, y_train, x_test,
                            y_test, cv=2, score_type=None):
    intervals = None
    skf = StratifiedKFold(n_splits=cv, shuffle=True, random_state=seed)
    for i, (train, cali) in enumerate(skf.split(X=x_train, y=y_train)):
        if i == 0:
            x_t = x_train[train]
            y_t = y_train[train]
            x_c = x_train[cali]
            y_c = y_train[cali]
            classifier = clone(base_classifier)
            classifier.fit(x_t, y_t)
            ccv = calibrate(classifier, x_c, y_c, method=None,
                            score_type=score_type)

            scores = ccv.predict_proba(x_c)[:, 1]
            scores_test = ccv.predict_proba(x_test)[:, 1]
            ll_before = cross_entropy(scores_test, y_test)
            brier_before = brier_score(scores_test, y_test)

            calibrator = BetaCalibration(parameters="abm").fit(scores, y_c)

            ll_after = cross_entropy(calibrator.predict(scores_test), y_test)
            brier_after = brier_score(calibrator.predict(scores_test), y_test)

            original_map = calibrator.calibrator_.map_
            intervals = beta_test(original_map,
                                  test_type="adev", scores=scores)
            intervals["ll_diff"] = ll_after - ll_before
            intervals["bs_diff"] = brier_after - brier_before
    return intervals
Пример #2
0
    def fit(self, X, y, sample_weight=None):
        """Fit beta calibration only if the classifier is considered 
        uncalibrated.

        Parameters
        ----------
        X : array-like, shape (n_samples,)
            Training data.

        y : array-like, shape (n_samples,)
            Training target.

        sample_weight : array-like, shape = [n_samples] or None
            Sample weights. If None, then samples are equally weighted.

        Returns
        -------
        self : object
            Returns an instance of self.
        """
        self._calibrator = BetaCalibration(parameters="abm").fit(X, y)
        test = beta_test(self._calibrator.calibrator_.map_,
                         test_type="adev",
                         scores=X)
        if test["p-value"] >= 0.05:
            self._calibrator = _DummyCalibration().fit(X, y)
        return self
    def test_betacal_ab(self):
        bc = BetaCalibration(parameters="ab")
        bc.fit(s, y)

        pred = bc.predict(s)

        np.testing.assert_allclose(pred, pred_ab)
    def test_betacal_am(self):
        bc = BetaCalibration(parameters="am")
        bc.fit(s, y)

        pred = bc.predict(s)

        # With smaller tolerance does not pass the tests
        np.testing.assert_allclose(pred, pred_am, rtol=1e-5)
Пример #5
0
    def fit(self, X, y, sample_weight=None):
        """Calibrate the fitted model

        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            Training data.

        y : array-like, shape (n_samples,)
            Target values.

        sample_weight : array-like, shape = [n_samples] or None
            Sample weights. If None, then samples are equally weighted.

        Returns
        -------
        self : object
            Returns an instance of self.
        """
        lb = LabelBinarizer()
        Y = lb.fit_transform(y)
        self.classes_ = lb.classes_

        df, idx_pos_class = self._preproc(X)
        self.calibrators_ = []

        for k, this_df in zip(idx_pos_class, df.T):
            if self.method is None:
                calibrator = _DummyCalibration()
            elif self.method == 'isotonic':
                calibrator = IsotonicRegression(out_of_bounds='clip')
            elif self.method == 'sigmoid':
                calibrator = _SigmoidCalibration()
            elif self.method == 'beta':
                calibrator = BetaCalibration(parameters='abm')
            elif self.method == 'beta_am':
                calibrator = BetaCalibration(parameters='am')
            elif self.method == 'beta_ab':
                calibrator = BetaCalibration(parameters='ab')
            else:
                raise ValueError('method should be None, "sigmoid", '
                                 '"isotonic", "beta", "beta2" or "beta05". '
                                 'Got %s.' % self.method)
            calibrator.fit(this_df, Y[:, k], sample_weight)
            self.calibrators_.append(calibrator)

        return self
Пример #6
0
    def fit(self, X, y, sample_weight=None):
        """Calibrate the fitted model

        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            Training data.

        y : array-like, shape (n_samples,)
            Target values.

        sample_weight : array-like, shape = [n_samples] or None
            Sample weights. If None, then samples are equally weighted.

        Returns
        -------
        self : object
            Returns an instance of self.
        """

        self.label_encoder_ = LabelEncoder()
        if self.classes is None:
            self.label_encoder_.fit(y)
        else:
            self.label_encoder_.fit(self.classes)

        self.classes_ = self.label_encoder_.classes_
        Y = label_binarize(y, self.classes_)

        df, idx_pos_class = self._preproc(X)
        self.calibrators_ = []
        for k, this_df in zip(idx_pos_class, df.T):
            if self.method == 'isotonic':
                calibrator = IsotonicRegression(out_of_bounds='clip')
            elif self.method == 'sigmoid':
                calibrator = _SigmoidCalibration()
            elif self.method == 'euler':
                calibrator = _EulerSigmoidCalibration()
            elif self.method == 'beta':
                calibrator = BetaCalibration()
            elif self.method in ['rocch', 'convex']:
                calibrator = _ROCCHCalibration()
            elif isinstance(self.method, BaseEstimator):
                calibrator = self.method
            else:
                raise ValueError('method should be "sigmoid" or '
                                 '"isotonic". Got %s.' % self.method)
            calibrator.fit(this_df, Y[:, k], sample_weight)
            self.calibrators_.append(calibrator)

        return self
Пример #7
0
class _BetaTestedCalibration(BaseEstimator, RegressorMixin):
    """Dummy regression model. The purpose of this class is to give
    the CalibratedClassifierCV class the option to just return the
    probabilities of the base classifier.


    """
    def fit(self, X, y, sample_weight=None):
        """Fit beta calibration only if the classifier is considered 
        uncalibrated.

        Parameters
        ----------
        X : array-like, shape (n_samples,)
            Training data.

        y : array-like, shape (n_samples,)
            Training target.

        sample_weight : array-like, shape = [n_samples] or None
            Sample weights. If None, then samples are equally weighted.

        Returns
        -------
        self : object
            Returns an instance of self.
        """
        self._calibrator = BetaCalibration(parameters="abm").fit(X, y)
        test = beta_test(self._calibrator.calibrator_.map_,
                         test_type="adev",
                         scores=X)
        if test["p-value"] >= 0.05:
            self._calibrator = _DummyCalibration().fit(X, y)
        return self

    def predict(self, T):
        """Return the probabilities of the base classifier.

        Parameters
        ----------
        T : array-like, shape (n_samples,)
            Data to predict from.

        Returns
        -------
        T_ : array, shape (n_samples,)
            The predicted data.
        """
        return self._calibrator.predict(T)
Пример #8
0
                                           multi_class='ovr'),
 'logistic_log': LogisticCalibration(C=C_list,
                                     log_transform=True,
                                     multi_class='multinomial'),
 'logistic_logit': LogisticCalibration(C=C_list,
                                       log_transform=False,
                                       multi_class='multinomial'),
 'binning_width' :OneVsRestCalibrator(BinningCalibration(strategy='uniform',
                                                        n_bins=n_bins)),
 'binning_freq' :OneVsRestCalibrator(BinningCalibration(strategy='quantile',
                                                        n_bins=n_bins)),
 'binning_kmeans' :OneVsRestCalibrator(BinningCalibration(strategy='kmeans')), # Not working yet
 'uncalibrated': _DummyCalibration(),
 'isotonic': OneVsRestCalibrator(IsotonicCalibration(out_of_bounds='clip')),
 'sigmoid': OneVsRestCalibrator(SigmoidCalibration()),
 'beta': OneVsRestCalibrator(BetaCalibration(parameters="abm")),
 'beta_am': OneVsRestCalibrator(BetaCalibration(parameters="am")),
 'beta_ab': OneVsRestCalibrator(BetaCalibration(parameters="ab")),
 'ovr_dir_full': OneVsRestCalibrator(DirichletCalibrator(matrix_type='full')),
 'ovr_dir_full_l2': OneVsRestCalibrator(DirichletCalibrator(matrix_type='full',
                                          l2=l2_list)),
 'ovr_dir_diag': OneVsRestCalibrator(DirichletCalibrator(matrix_type='diagonal')),
 'ovr_dir_fixd': OneVsRestCalibrator(DirichletCalibrator(matrix_type='fixed_diagonal')),
 'dirichlet_keras': Dirichlet_NN(l2=10, mu=0.0001),
 'dirichlet_full': DirichletCalibrator(matrix_type='full'),
 'dirichlet_full_gen': GenerativeDirichletCalibrator(),
 'dirichlet_full_prefixdiag': DirichletCalibrator(matrix_type='full',
                                                  initializer='preFixDiag'),
 'dirichlet_full_comp_l2': DirichletCalibrator(matrix_type='full',
                                               comp_l2=True,
                                          l2=l2_list),
Пример #9
0
    def fit(self, X, y, sample_weight=None):
        """Fit the calibrated model

        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            Training data.

        y : array-like, shape (n_samples,)
            Target values.

        sample_weight : array-like, shape = [n_samples] or None
            Sample weights. If None, then samples are equally weighted.

        Returns
        -------
        self : object
            Returns an instance of self.
        """

        X, y = check_X_y(X,
                         y,
                         accept_sparse=['csc', 'csr', 'coo'],
                         force_all_finite=False)
        X, y = indexable(X, y)

        df = self._preproc(X)

        weights = None
        if self.platts_trick:
            # Bayesian priors (see Platt end of section 2.2)
            prior0 = float(np.sum(y <= 0))
            prior1 = y.shape[0] - prior0

            weights = np.zeros_like(y).astype(float)
            weights[y > 0] = (prior1 + 1.) / (prior1 + 2.)
            weights[y <= 0] = 1. / (prior0 + 2.)
            y = np.append(np.ones_like(y), np.zeros_like(y))
            weights = np.append(weights, 1.0 - weights)
            df = np.append(df, df)

        if self.method is None:
            self.calibrator = _DummyCalibration()
        elif self.method == 'isotonic':
            self.calibrator = IsotonicRegression(out_of_bounds='clip')
        elif self.method == 'sksigmoid':
            self.calibrator = sk_sigmoid()
        elif self.method == 'sksigmoid_notrick':
            self.calibrator = sk_sigmoid_notrick()
        elif self.method == 'sigmoid':
            self.calibrator = _SigmoidCalibration()
        elif self.method == 'beta':
            self.calibrator = BetaCalibration(parameters="abm")
        elif self.method == 'beta_am':
            self.calibrator = BetaCalibration(parameters="am")
        elif self.method == 'beta_ab':
            self.calibrator = BetaCalibration(parameters="ab")
        elif self.method == 'beta_test_strict':
            self.calibrator = BetaCalibration(parameters="abm")
        elif self.method == 'beta_test_relaxed':
            self.calibrator = BetaCalibration(parameters="abm")
        elif self.method == 'beta_test':
            self.calibrator = _BetaTestedCalibration()
        else:
            raise ValueError('method should be None, "sigmoid", '
                             '"isotonic", "beta", "beta_am" or "beta_ab". '
                             'Got %s.' % self.method)
        self.calibrator.fit(df, y, weights)
        if self.method == 'beta':
            df_pos = df[y == 1]
            df_neg = df[y == 0]

            # alpha_pos_nll, beta_pos_nll = fit_beta_nll(df_pos)
            # alpha_neg_nll, beta_neg_nll = fit_beta_nll(df_neg)
            #
            # a_nll = alpha_pos_nll - alpha_neg_nll
            # b_nll = beta_neg_nll - beta_pos_nll
            # m_nll = fit_beta_midpoint(alpha_pos_nll, beta_pos_nll,
            #                           alpha_neg_nll, beta_neg_nll)

            alpha_pos_mmt, beta_pos_mmt = fit_beta_moments(df_pos)
            alpha_neg_mmt, beta_neg_mmt = fit_beta_moments(df_neg)

            a_mmt = alpha_pos_mmt - alpha_neg_mmt
            if a_mmt < 0 or np.isnan(a_mmt):
                a_mmt = 0
            b_mmt = beta_neg_mmt - beta_pos_mmt
            if b_mmt < 0 or np.isnan(b_mmt):
                b_mmt = 0
            prior_pos = len(df_pos) / len(df)
            prior_neg = len(df_neg) / len(df)
            m_mmt = fit_beta_midpoint(prior_pos, alpha_pos_mmt, beta_pos_mmt,
                                      prior_neg, alpha_neg_mmt, beta_neg_mmt)
            map = self.calibrator.calibrator_.map_
            #     if a_mmt > 4 and map[0] < 2:
            #         print [a_mmt, map[0]]
            #         print [b_mmt, map[1]]
            #         print [m_mmt, map[2]]
            #         exit()
            #     if b_mmt > 4 and map[1] < 2:
            #         print [a_mmt, map[0]]
            #         print [b_mmt, map[1]]
            #         print [m_mmt, map[2]]
            #         exit()
            self.a = [a_mmt, map[0]]
            self.b = [b_mmt, map[1]]
            self.m = [m_mmt, map[2]]
            self.df_pos = df_pos
            self.df_neg = df_neg
        return self
Пример #10
0
class CalibratedModel(BaseEstimator, ClassifierMixin):
    def __init__(self, base_estimator=None, method=None, score_type=None):
        if method is None:
            self.method = method
            self.platts_trick = False
        else:
            temp = method.split('-')
            self.platts_trick = (len(temp) == 2)
            self.method = temp[0]

        self.base_estimator = base_estimator
        self.score_type = score_type

    def set_base_estimator(self, base_estimator, score_type=None):
        self.base_estimator = base_estimator
        self.score_type = score_type

    def _preproc(self, X):
        if self.score_type is None:
            if hasattr(self.base_estimator, "decision_function"):
                df = self.base_estimator.decision_function(X)
                if df.ndim == 1:
                    df = df[:, np.newaxis]
            elif hasattr(self.base_estimator, "predict_proba"):
                df = self.base_estimator.predict_proba(X)
                df = df[:, 1]
            else:
                raise RuntimeError('classifier has no decision_function or '
                                   'predict_proba method.')
        else:
            if self.score_type == "sigmoid":
                df = self.base_estimator.decision_function(X)
                df = expit(df)
                if df.ndim == 1:
                    df = df[:, np.newaxis]
            else:
                if hasattr(self.base_estimator, self.score_type):
                    df = getattr(self.base_estimator, self.score_type)(X)
                    if self.score_type == "decision_function":
                        if df.ndim == 1:
                            df = df[:, np.newaxis]
                    elif self.score_type == "predict_proba":
                        df = df[:, 1:]
                else:
                    raise RuntimeError('classifier has no ' + self.score_type +
                                       'method.')
        return df.reshape(-1)

    def fit(self, X, y, sample_weight=None):
        """Fit the calibrated model

        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            Training data.

        y : array-like, shape (n_samples,)
            Target values.

        sample_weight : array-like, shape = [n_samples] or None
            Sample weights. If None, then samples are equally weighted.

        Returns
        -------
        self : object
            Returns an instance of self.
        """

        X, y = check_X_y(X,
                         y,
                         accept_sparse=['csc', 'csr', 'coo'],
                         force_all_finite=False)
        X, y = indexable(X, y)

        df = self._preproc(X)

        weights = None
        if self.platts_trick:
            # Bayesian priors (see Platt end of section 2.2)
            prior0 = float(np.sum(y <= 0))
            prior1 = y.shape[0] - prior0

            weights = np.zeros_like(y).astype(float)
            weights[y > 0] = (prior1 + 1.) / (prior1 + 2.)
            weights[y <= 0] = 1. / (prior0 + 2.)
            y = np.append(np.ones_like(y), np.zeros_like(y))
            weights = np.append(weights, 1.0 - weights)
            df = np.append(df, df)

        if self.method is None:
            self.calibrator = _DummyCalibration()
        elif self.method == 'isotonic':
            self.calibrator = IsotonicRegression(out_of_bounds='clip')
        elif self.method == 'sksigmoid':
            self.calibrator = sk_sigmoid()
        elif self.method == 'sksigmoid_notrick':
            self.calibrator = sk_sigmoid_notrick()
        elif self.method == 'sigmoid':
            self.calibrator = _SigmoidCalibration()
        elif self.method == 'beta':
            self.calibrator = BetaCalibration(parameters="abm")
        elif self.method == 'beta_am':
            self.calibrator = BetaCalibration(parameters="am")
        elif self.method == 'beta_ab':
            self.calibrator = BetaCalibration(parameters="ab")
        elif self.method == 'beta_test_strict':
            self.calibrator = BetaCalibration(parameters="abm")
        elif self.method == 'beta_test_relaxed':
            self.calibrator = BetaCalibration(parameters="abm")
        elif self.method == 'beta_test':
            self.calibrator = _BetaTestedCalibration()
        else:
            raise ValueError('method should be None, "sigmoid", '
                             '"isotonic", "beta", "beta_am" or "beta_ab". '
                             'Got %s.' % self.method)
        self.calibrator.fit(df, y, weights)
        if self.method == 'beta':
            df_pos = df[y == 1]
            df_neg = df[y == 0]

            # alpha_pos_nll, beta_pos_nll = fit_beta_nll(df_pos)
            # alpha_neg_nll, beta_neg_nll = fit_beta_nll(df_neg)
            #
            # a_nll = alpha_pos_nll - alpha_neg_nll
            # b_nll = beta_neg_nll - beta_pos_nll
            # m_nll = fit_beta_midpoint(alpha_pos_nll, beta_pos_nll,
            #                           alpha_neg_nll, beta_neg_nll)

            alpha_pos_mmt, beta_pos_mmt = fit_beta_moments(df_pos)
            alpha_neg_mmt, beta_neg_mmt = fit_beta_moments(df_neg)

            a_mmt = alpha_pos_mmt - alpha_neg_mmt
            if a_mmt < 0 or np.isnan(a_mmt):
                a_mmt = 0
            b_mmt = beta_neg_mmt - beta_pos_mmt
            if b_mmt < 0 or np.isnan(b_mmt):
                b_mmt = 0
            prior_pos = len(df_pos) / len(df)
            prior_neg = len(df_neg) / len(df)
            m_mmt = fit_beta_midpoint(prior_pos, alpha_pos_mmt, beta_pos_mmt,
                                      prior_neg, alpha_neg_mmt, beta_neg_mmt)
            map = self.calibrator.calibrator_.map_
            #     if a_mmt > 4 and map[0] < 2:
            #         print [a_mmt, map[0]]
            #         print [b_mmt, map[1]]
            #         print [m_mmt, map[2]]
            #         exit()
            #     if b_mmt > 4 and map[1] < 2:
            #         print [a_mmt, map[0]]
            #         print [b_mmt, map[1]]
            #         print [m_mmt, map[2]]
            #         exit()
            self.a = [a_mmt, map[0]]
            self.b = [b_mmt, map[1]]
            self.m = [m_mmt, map[2]]
            self.df_pos = df_pos
            self.df_neg = df_neg
        return self

    def predict_proba(self, X):
        """Posterior probabilities of classification

        This function returns posterior probabilities of classification
        according to each class on an array of test vectors X.

        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            The samples.

        Returns
        -------
        C : array, shape (n_samples, n_classes)
            The predicted probas. Can be exact zeros.
        """

        proba = np.zeros((X.shape[0], 2))

        df = self._preproc(X)

        proba[:, 1] = self.calibrator.predict(df)
        proba[:, 0] = 1. - proba[:, 1]

        # Deal with cases where the predicted probability minimally exceeds 1.0
        proba[(1.0 < proba) & (proba <= 1.0 + 1e-5)] = 1.0

        return proba

    def predict(self, X):
        """Predict the target of new samples. Can be different from the
        prediction of the uncalibrated classifier.

        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            The samples.

        Returns
        -------
        C : array, shape (n_samples,)
            The predicted class.
        """
        check_is_fitted(self, ["calibrator"])
        return np.argmax(self.predict_proba(X), axis=1)
Пример #11
0
    def train_calibration_new(self, data, pre_cal_model, size):

        # check if the scores on calibration data are different, otherwise it is not reasonable to calibrate
        if hasattr(pre_cal_model, "predict_proba"):
            preds = pre_cal_model.predict_proba(data.get_cal_train_x(size))
            scores = pd.Series(preds[:, 1]).tolist()
        elif hasattr(pre_cal_model, "decision_function"):
            preds = pre_cal_model.decision_function(data.get_cal_train_x(size))
            if self.only_log:
                scores = pd.Series(preds).tolist()
            else:
                scores = pd.Series(preds).apply(lambda x: 1 / (1 + np.e ** (-1 * x))).tolist()

        if min(scores) == max(scores):
            return None

        # generate different corrections
        y = data.get_cal_train_y(size)
        z = scores
        z_repl, y_repl, no_need1, no_need2 = data.group_scores(z, y, method="repl")
        z_join, y_join, nr0, nr1 = data.group_scores(z, y, method="join")
        z_platt, y_platt, no_need1, no_need2 = data.platts_correction(z, y)



        if self.only_beta:
            cal10 = BetaCalibration(sklearn_lr=False)
            cal10_model = cal10.fit(z_repl, y_repl)
            cal11 = BetaCalibration(sklearn_lr=False)
            cal11_model = cal11.fit(z_platt, y_platt)
            return {"cal10_" + str(size): cal10_model, "cal11_" + str(size): cal11_model}


        if self.only_log:
            cal1 = _MySigmoidCalibration()
            cal1_model = cal1.fit(z_repl, y_repl)
            cal2 = _MySigmoidCalibration()
            cal2_model = cal2.fit(z_platt, y_platt)
            return {"cal1_" + str(size): cal1_model, "cal2_" + str(size): cal2_model}

        # Logistic calibration
        if not self.only_new:
            cal1 = _MySigmoidCalibration()
            cal1_model = cal1.fit(z_repl, y_repl)
            cal2 = _MySigmoidCalibration()
            cal2_model = cal2.fit(z_platt, y_platt)
        # cal35_model = ClippingCorrection(cal1_model, 0.95)
        # cal36_model = ClippingCorrection(cal1_model, 0.99)
        # cal37_model = ClippingCorrection(cal1_model, 0.999)

        # Isotonic calibration
        cal4 = _MyIsotonicCalibration()
        cal4_model = cal4.fit(z_repl, y_repl)
        if not self.only_new:
            cal5 = _MyIsotonicCalibration()
            cal5_model = cal5.fit(z_platt, y_platt)
            cal7 = _MyIsotonicCalibration_NEW(distr=8, kind=2)
            cal7_model = cal7.fit(z_join, y_join, z_repl, y_repl, nr0=nr0, nr1=nr1)
            if self.debug:
                cal8 = _MyIsotonicCalibration_NEW(distr=9, kind=2)
                cal8_model = cal8.fit(z_join, y_join, z_repl, y_repl, nr0=nr0, nr1=nr1)
        else:
            cal8 = _MyIsotonicCalibration_NEW(distr=9, kind=2)
            cal8_model = cal8.fit(z_join, y_join, z_repl, y_repl, nr0=nr0, nr1=nr1)
        #cal65_model = ClippingCorrection(cal4_model, 0.95)
        #cal66_model = ClippingCorrection(cal4_model, 0.99)
        #cal67_model = ClippingCorrection(cal4_model, 0.999)

        # ENIR
        if not self.debug:
            if not self.only_new:
                cal8 = _MyENIRCalibration(self.id + "_" + str(size) + "_1", seed = random.randint(1, 10000))
                cal8_model = cal8.fit(z_repl, y_repl)
                cal9 = _MyENIRCalibration(self.id + "_" + str(size) + "_2", seed = random.randint(1, 10000))
                cal9_model = cal9.fit(z_platt, y_platt)
        # cal95_model = ClippingCorrection(cal8_model, 0.95)
        # cal96_model = ClippingCorrection(cal8_model, 0.99)
        # cal97_model = ClippingCorrection(cal8_model, 0.999)

        # Betacal
        if not self.debug:
            if not self.only_new:
                cal10 = BetaCalibration(sklearn_lr=False)
                cal10_model = cal10.fit(z_repl, y_repl)
                cal11 = BetaCalibration(sklearn_lr=False)
                cal11_model = cal11.fit(z_platt, y_platt)
        # cal125_model = ClippingCorrection(cal10_model, 0.95)
        # cal126_model = ClippingCorrection(cal10_model, 0.99)
        # cal127_model = ClippingCorrection(cal10_model, 0.999)


        if self.only_new:
            return {"cal4_" + str(size): cal4_model, "cal8_" + str(size): cal8_model}
        if self.debug:
            return {"cal1_" + str(size): cal1_model, "cal2_" + str(size): cal2_model, "cal4_" + str(size): cal4_model,
                    "cal5_" + str(size): cal5_model, "cal6_" + str(size): cal6_model, "cal7_" + str(size): cal7_model,
                    "cal8_" + str(size): cal8_model}

        return {"cal1_" + str(size): cal1_model, "cal2_" + str(size): cal2_model, "cal4_" + str(size): cal4_model,
                "cal5_" + str(size): cal5_model,"cal7_" + str(size): cal7_model,
                "cal8_" + str(size): cal8_model, "cal9_" + str(size): cal9_model,
                "cal10_" + str(size): cal10_model, "cal11_" + str(size): cal11_model}
Пример #12
0
def confidence_estimation(X_train, y_train, X_test, y_test, contamination, model):
    
    """
    Estimate the example-wise confidence of different methods, included the model ExCeeD provided in the paper.
    First, we compute the outlier probabilities for all the methods but Calibrations. Then, we estimate the
    example-wise confidence for ExCeeD, ExCeed_sp (with second prior), ExCeeD with outlier probability computed
    through the linear, squashing and unify methods, and calibrated probabilities through Logistic (logcal),
    Isotonic (isocal) and Beta (betacal) Calibrations.

    Parameters
    ----------
    X_train       : list of shape (n_train, n_features) containing the training set with only features.
    y_train       : list of shape (n_train,) containing the actual labels for the training set. It is needed for Calibration.
    X_test        : list of shape (n_test, n_features) containing the test set with only features.
    y_test        : list of shape (n_test,) containing the actual labels for the test set. It is needed for Calibration.
    contamination : float representing the expected proportion of anomalies in the training set.
    model         : string, claiming the model to use for evaluating the confidence. It can be one of: KNN, IForest, OCSVM.

    Returns
    ----------
    exceed_conf    : example-wise confidence using ExCeeD (outlier probability computed by Bayesian Learning with uniform prior) 
    exceed_conf_sp : example-wise confidence using ExCeeD (outlier probability computed by Bayesian Learning with other prior)
    squash_conf    : example-wise confidence using ExCeeD (outlier probability computed by squashing function)
    linear_conf    : example-wise confidence using ExCeeD (outlier probability computed by linear function)
    unify_conf     : example-wise confidence using ExCeeD (outlier probability computed by unify method)
    logcal_conf    : example-wise calibrated probability using Logistic Calibration
    isocal_conf    : example-wise calibrated probability using Isotonic Calibration
    betacal_conf   : example-wise calibrated probability using Beta Calibration
    prediction     : list of class predictions with shape (n_test,)
    
    """
    np.random.seed(331)
    n = np.shape(X_train)[0]
    clf = train_model(X_train, contamination, model)
    col = clf.decision_function(X_train)
    prediction = clf.predict(X_test)
    test_scores = clf.decision_function(X_test)
    train_scores = clf.decision_function(X_train)
    
    n_anom = np.int(n*contamination)
    m = 10/contamination
    
    count_instances = np.vectorize(lambda x: np.count_nonzero(train_scores <= x)) 
    n_instances = count_instances(test_scores)

    prob_func = np.vectorize(lambda x: (1+x)/(2+n)) 
    exceed_posterior = prob_func(n_instances)
    
    adj_prob_func = np.vectorize(lambda x: (10+x)/(m+n)) 
    exceed_posterior_sp = adj_prob_func(n_instances)
    
    unify_proba_anom = [x[1] for x in clf.predict_proba(X_test, method='unify')]

    linear_proba_anom = [x[1] for x in clf.predict_proba(X_test, method='linear')]

    tmp_score = sorted(col, reverse = True)
    gamma = tmp_score[min(np.int(n*contamination)-1,0)]
    squashing_proba_anom = [1-squash_proba(x, gamma) for x in clf.decision_function(X_test)]
        
    mapinto01_train = [1-squash_proba(x, gamma) for x in clf.decision_function(X_train)]
    mapinto01_test = [1-squash_proba(x, gamma) for x in clf.decision_function(X_test)]

    lr = LogisticRegression(C=99999999999)
    lr.fit(np.asarray(mapinto01_train).reshape(-1, 1), y_train)
    logistic_calibration = lr.predict_proba(np.asarray(mapinto01_test).reshape(-1, 1))[:,1]

    iso = IsotonicRegression()
    iso.fit(mapinto01_train, y_train)
    isotonic_calibration = np.nan_to_num(iso.predict(mapinto01_test), nan = 1.0)

    bc = BetaCalibration(parameters="abm")
    bc.fit(np.asarray(mapinto01_train).reshape(-1, 1), y_train)
    beta_calibration = bc.predict(np.asarray(mapinto01_test).reshape(-1, 1))
    
    conf_func = np.vectorize(lambda p: 1 - binom.cdf(n - n_anom, n, p))
    
    exceed_conf = conf_func(exceed_posterior)
    np.place(exceed_conf, prediction == 0, 1 - exceed_conf[prediction == 0])
    
    exceed_conf_sp = conf_func(exceed_posterior_sp)
    np.place(exceed_conf_sp, prediction == 0, 1 - exceed_conf_sp[prediction == 0])
    
    squash_conf = conf_func(squashing_proba_anom)
    np.place(squash_conf, prediction == 0, 1 - squash_conf[prediction == 0])    

    linear_conf = conf_func(linear_proba_anom)
    np.place(linear_conf, prediction == 0, 1 - linear_conf[prediction == 0])    

    unify_conf = conf_func(unify_proba_anom)
    np.place(unify_conf, prediction == 0, 1 - unify_conf[prediction == 0])    

    logcal_conf = np.asarray([round(x,4) if prediction[i] == 1 else round(1-x,4) for i,x in enumerate(logistic_calibration)])
    isocal_conf = np.asarray([round(x,4) if prediction[i] == 1 else round(1-x,4) for i,x in enumerate(isotonic_calibration)])
    betacal_conf = np.asarray([round(x,4) if prediction[i] == 1 else round(1-x,4) for i,x in enumerate(beta_calibration)])

    return exceed_conf, exceed_conf_sp, squash_conf, linear_conf, unify_conf, logcal_conf, isocal_conf, betacal_conf, prediction
Пример #13
0
np.set_printoptions(precision=2)
predict_val = model1.predict(X_val)

plot_confusion_matrix(y_val,
                      predict_val,
                      classes=np.array([True, False]),
                      title='Confusion matrix',
                      normalize=False)
# -

# # Beta calibration
#

# +
# Fit three-parameter beta calibration
betaCal = BetaCalibration(parameters="abm")

y_cal_pred = model1.predict_proba(X_cal)[:, 1]
betaCal.fit(y_cal_pred.reshape(-1, 1), y_cal)

y_test_pred_prob = model1.predict_proba(X_test)[:, 1]
y_test_pred_prob = betaCal.predict(y_test_pred_prob.reshape(-1, 1))
y_test_pred = y_test_pred_prob > 0.5

fig, ax = plt.subplots()
sns.distplot(model1.predict_proba(X_test)[y_test['is_converted'] == 1, 1],
             kde=False)
sns.distplot(y_test_pred_prob[y_test['is_converted'] == 1], kde=False)
plt.show()
# -