Пример #1
0
def check_classifier_ratio(clf, method, cv):
    # Passing distributions directly
    p0 = Normal(mu=0.0)
    p1 = Normal(mu=0.1)

    ratio = ClassifierRatio(
        CalibratedClassifierCV(base_estimator=clf, method=method, cv=cv))
    ratio.fit(numerator=p0, denominator=p1, n_samples=10000)

    reals = np.linspace(-1, 1, num=100).reshape(-1, 1)
    assert ratio.score(reals, p0.pdf(reals) / p1.pdf(reals)) > -0.1
    assert np.mean(
        np.abs(np.log(ratio.predict(reals)) -
               ratio.predict(reals, log=True))) < 0.01

    # Passing X, y only
    X = np.vstack((p0.rvs(5000), p1.rvs(5000)))
    y = np.zeros(10000, dtype=np.int)
    y[5000:] = 1

    ratio = ClassifierRatio(
        CalibratedClassifierCV(base_estimator=clf, method=method, cv=cv))
    ratio.fit(X=X, y=y)

    reals = np.linspace(-1, 1, num=100).reshape(-1, 1)
    assert ratio.score(reals, p0.pdf(reals) / p1.pdf(reals)) > -0.1
    assert np.mean(
        np.abs(np.log(ratio.predict(reals)) -
               ratio.predict(reals, log=True))) < 0.01
 def calibrated_predict(
     self,
     X: np.ndarray,
     theta: np.ndarray,
     n_samples_per_theta: int,
     simulator_func: Callable,
     calibration_params: Dict,
     log=True,
     return_calibrated_model=False,
 ):
     cal_clf = CalibratedClassifierCV(base_estimator=self.clf,
                                      cv='prefit',
                                      **calibration_params)
     cal_model = self.__class__(theta_0=self.theta_0, clf=cal_clf)
     calibration_ds = SinglyParameterizedRatioDataset.from_simulator(
         simulator_func=simulator_func,
         theta_0=self.theta_0,
         theta_1_iterator=SingleParamIterator(theta),
         n_samples_per_theta=n_samples_per_theta)
     cal_model.fit(X=calibration_ds.x,
                   theta_1s=calibration_ds.theta_1s,
                   y=calibration_ds.y)
     theta_1s = stack_repeat(theta, len(X))
     pred = cal_model.predict(X=X, theta_1s=theta_1s, log=log)
     if return_calibrated_model:
         return pred, cal_model
     else:
         return pred
Пример #3
0
def test_classifier_ratio_identity():
    p = Normal(mu=0.0)
    ratio = ClassifierRatio(
        CalibratedClassifierCV(base_estimator=ElasticNetCV()))
    ratio.fit(numerator=p, denominator=p, n_samples=10000)

    reals = np.linspace(-0.5, 1.0, num=100).reshape(-1, 1)
    assert ratio.score(reals, p.pdf(reals) / p.pdf(reals)) == 0.0
    assert_array_almost_equal(ratio.predict(reals), np.ones(len(reals)))
    assert_array_almost_equal(ratio.predict(reals, log=True),
                              np.zeros(len(reals)))
Пример #4
0
def test_decomposed_ratio_identity():
    components = [Normal(mu=0.0), Normal(mu=0.25), Normal(mu=0.5)]
    p = Mixture(components=components, weights=[0.45, 0.1, 0.45])

    ratio = DecomposedRatio(
        ClassifierRatio(CalibratedClassifierCV(base_estimator=ElasticNetCV())))
    ratio.fit(numerator=p, denominator=p, n_samples=10000)

    reals = np.linspace(-0.5, 1.0, num=100).reshape(-1, 1)
    assert ratio.score(reals, p.pdf(reals) / p.pdf(reals)) == 0.0
    assert_array_almost_equal(ratio.predict(reals), np.ones(len(reals)))
    assert_array_almost_equal(ratio.predict(reals, log=True),
                              np.zeros(len(reals)))
Пример #5
0
def test_decomposed_ratio():
    components = [Normal(mu=0.0), Normal(mu=0.25), Normal(mu=0.5)]
    p0 = Mixture(components=components, weights=[0.45, 0.1, 0.45])
    p1 = Mixture(components=[components[0]] + [components[2]])

    ratio = DecomposedRatio(
        ClassifierRatio(CalibratedClassifierCV(base_estimator=ElasticNetCV())))
    ratio.fit(numerator=p0, denominator=p1, n_samples=10000)

    reals = np.linspace(-0.5, 1.0, num=100).reshape(-1, 1)
    assert ratio.score(reals, p0.pdf(reals) / p1.pdf(reals)) > -0.1
    assert np.mean(np.abs(np.log(ratio.predict(reals)) -
                          ratio.predict(reals, log=True))) < 0.01
Пример #6
0
def make_ratio(num):
    X_num = Xs_s[num]
    X_den = X1_s
    X = np.vstack((X_num, X_den))
    y = np.zeros(len(X_num) + len(X_den), dtype=np.int)
    y[len(X_num):] = 1

    clf = ExtraTreesClassifier(n_estimators=100, min_samples_split=20, random_state=0, n_jobs=-1)
    #clf = KerasClassifier(make_model_join, nb_epoch=50, verbose=0)

    cv =  StratifiedShuffleSplit(n_iter=3, test_size=0.5, random_state=1)

    ratio = ClassifierRatio(
        base_estimator=CalibratedClassifierCV(clf, cv=cv, bins=20),
        random_state=0)
    ratio.fit(X, y)
    
    print('Loss {0} : {1}'.format(num, log_loss(ratio.classifier_.classifiers_[0].
                   predict(X[:int(len(X)*0.3)]),y[:int(len(X)*0.3)])))
    
    return ratio
Пример #7
0
def check_calibration(method):
    # Adpated from sklearn/tests/test_calibration.py
    # Authors: Alexandre Gramfort
    # License: BSD 3 clause

    n_samples = 100
    X, y = make_classification(n_samples=2 * n_samples,
                               n_features=6,
                               random_state=42)

    X -= X.min()  # MultinomialNB only allows positive X

    # split train and test
    X_train, y_train = X[:n_samples], y[:n_samples]
    X_test, y_test = X[n_samples:], y[n_samples:]

    # Naive-Bayes
    clf = MultinomialNB().fit(X_train, y_train)
    prob_pos_clf = clf.predict_proba(X_test)[:, 1]

    pc_clf = CalibratedClassifierCV(clf, cv=y.size + 1)
    assert_raises(ValueError, pc_clf.fit, X, y)

    pc_clf = CalibratedClassifierCV(clf, method=method, cv=2)
    # Note that this fit overwrites the fit on the entire training set
    pc_clf.fit(X_train, y_train)
    prob_pos_pc_clf = pc_clf.predict_proba(X_test)[:, 1]

    # Check that brier score has improved after calibration
    assert_greater(brier_score_loss(y_test, prob_pos_clf),
                   brier_score_loss(y_test, prob_pos_pc_clf))

    # Check invariance against relabeling [0, 1] -> [1, 2]
    pc_clf.fit(X_train, y_train + 1)
    prob_pos_pc_clf_relabeled = pc_clf.predict_proba(X_test)[:, 1]
    assert_array_almost_equal(prob_pos_pc_clf, prob_pos_pc_clf_relabeled)

    # Check invariance against relabeling [0, 1] -> [-1, 1]
    pc_clf.fit(X_train, 2 * y_train - 1)
    prob_pos_pc_clf_relabeled = pc_clf.predict_proba(X_test)[:, 1]
    assert_array_almost_equal(prob_pos_pc_clf, prob_pos_pc_clf_relabeled)

    # Check invariance against relabeling [0, 1] -> [1, 0]
    pc_clf.fit(X_train, (y_train + 1) % 2)
    prob_pos_pc_clf_relabeled = pc_clf.predict_proba(X_test)[:, 1]
    if method == "sigmoid":
        assert_array_almost_equal(prob_pos_pc_clf,
                                  1 - prob_pos_pc_clf_relabeled)
    else:
        # Isotonic calibration is not invariant against relabeling
        # but should improve in both cases
        assert_greater(
            brier_score_loss(y_test, prob_pos_clf),
            brier_score_loss((y_test + 1) % 2, prob_pos_pc_clf_relabeled))
Пример #8
0
def check_calibration(method):
    # Adpated from sklearn/tests/test_calibration.py
    # Authors: Alexandre Gramfort
    # License: BSD 3 clause

    n_samples = 100
    X, y = make_classification(n_samples=2 * n_samples, n_features=6, random_state=42)

    X -= X.min()  # MultinomialNB only allows positive X

    # split train and test
    X_train, y_train = X[:n_samples], y[:n_samples]
    X_test, y_test = X[n_samples:], y[n_samples:]

    # Naive-Bayes
    clf = MultinomialNB().fit(X_train, y_train)
    prob_pos_clf = clf.predict_proba(X_test)[:, 1]

    pc_clf = CalibratedClassifierCV(clf, cv=y.size + 1)
    assert_raises(ValueError, pc_clf.fit, X, y)

    pc_clf = CalibratedClassifierCV(clf, method=method, cv=2)
    # Note that this fit overwrites the fit on the entire training set
    pc_clf.fit(X_train, y_train)
    prob_pos_pc_clf = pc_clf.predict_proba(X_test)[:, 1]

    # Check that brier score has improved after calibration
    assert_greater(brier_score_loss(y_test, prob_pos_clf), brier_score_loss(y_test, prob_pos_pc_clf))

    # Check invariance against relabeling [0, 1] -> [1, 2]
    pc_clf.fit(X_train, y_train + 1)
    prob_pos_pc_clf_relabeled = pc_clf.predict_proba(X_test)[:, 1]
    assert_array_almost_equal(prob_pos_pc_clf, prob_pos_pc_clf_relabeled)

    # Check invariance against relabeling [0, 1] -> [-1, 1]
    pc_clf.fit(X_train, 2 * y_train - 1)
    prob_pos_pc_clf_relabeled = pc_clf.predict_proba(X_test)[:, 1]
    assert_array_almost_equal(prob_pos_pc_clf, prob_pos_pc_clf_relabeled)

    # Check invariance against relabeling [0, 1] -> [1, 0]
    pc_clf.fit(X_train, (y_train + 1) % 2)
    prob_pos_pc_clf_relabeled = pc_clf.predict_proba(X_test)[:, 1]
    if method == "sigmoid":
        assert_array_almost_equal(prob_pos_pc_clf, 1 - prob_pos_pc_clf_relabeled)
    else:
        # Isotonic calibration is not invariant against relabeling
        # but should improve in both cases
        assert_greater(
            brier_score_loss(y_test, prob_pos_clf), brier_score_loss((y_test + 1) % 2, prob_pos_pc_clf_relabeled)
        )