Пример #1
0
def test_klpe_contamination():
    """Check that predict agrees with contamination parameter. """

    # This requires a certain amount of data samples because the threshold is
    # defined by a quantile.
    X = np.random.randn(50, 2)
    contamination = 0.1

    clf1 = AverageKLPE(k=5, contamination=contamination)
    clf1.fit(X)
    assert_almost_equal(np.mean(clf1.predict(X) == 1), 1 - contamination)

    clf2 = MaxKLPE(k=5, contamination=contamination)
    clf2.fit(X)
    assert_almost_equal(np.mean(clf2.predict(X) == 1), 1 - contamination)
Пример #2
0
def test_score_train_novelty_or_not():
    """Check score_fit_ attribute is the same if novelty=True of False"""

    X = np.random.randn(50, 2)

    # for AverageKLPE
    clf1 = AverageKLPE(k=10)
    clf2 = AverageKLPE(k=10, novelty=True)

    clf1.fit(X)
    clf2.fit(X)
    assert_array_equal(clf1.scores_fit_, clf2.scores_fit_)

    # for MaxKLPE
    clf3 = MaxKLPE(k=10)
    clf4 = MaxKLPE(k=10, novelty=True)

    clf3.fit(X)
    clf4.fit(X)
    assert_array_equal(clf3.scores_fit_, clf4.scores_fit_)
Пример #3
0
def test_compute_volumes():
    """Check _compute_volumes for several masses."""
    estimators = [
        AverageKLPE(k=3, novelty=True),
        MaxKLPE(k=3, novelty=True),
        OCSVM(sigma=1.),
        IsolationForest(n_estimators=5, random_state=2),
        KernelSmoothing()
    ]
    alphas = rng.randint(1, 100, size=5) / 100
    alphas = np.sort(alphas)

    for clf in estimators:
        clf = clf.fit(X_train)
        clf_test = clf.score_samples(X_test)
        min_test = np.min(clf_test)
        max_test = np.max(clf_test)

        score_function = clf.score_samples
        vols, offsets = _compute_volumes(score_function, alphas, X_test, U,
                                         vol_tot_cube)
        # check increasing order of volumes and decreasing order of offsets
        assert_array_equal(vols, np.sort(vols))
        assert_array_equal(offsets, -np.sort(-offsets))

        # check volumes in [0, vol_tot_cube]
        assert_true(np.all(0 <= vols) and np.all(vols <= vol_tot_cube))

        # check offset values
        assert_true(
            np.all(min_test <= offsets) and np.all(offsets <= max_test))

        proba_offsets_pos = (clf_test >= offsets[:, np.newaxis])
        # this test requires to have a large number of samples because
        # np.percentile is an empirical quantile which uses interpolation.
        # this is also why we ask the values to be equal only up to the
        # second decimal.
        assert_array_almost_equal(np.mean(proba_offsets_pos, axis=1),
                                  alphas,
                                  decimal=2)
Пример #4
0
def test_maxklpe():
    """Check MaxKLPE"""
    score_train_true = -np.array([np.sqrt(10), 2, np.sqrt(10)])
    pred_train_true = np.array([0, 1, 0])

    score_test_true = -np.array([np.sqrt(5), 2])
    pred_test_true = np.array([1, 1])

    # when novelty=False, i.e. scores and predict on X_train itself
    clf1 = MaxKLPE(k=2, contamination=0.7)
    clf1.fit(X_train)
    assert_equal(clf1.algo, 'max')

    score_train_attr1 = clf1.scores_fit_
    assert_array_almost_equal(score_train_attr1, score_train_true)
    score_train1 = clf1.score_samples(X_train)
    assert_array_almost_equal(score_train1, score_train_true)
    assert_array_equal((score_train1 >= clf1.threshold_).astype(int),
                       clf1.predict(X_train))

    assert_array_equal(pred_train_true, clf1.predict(X_train))

    # when novelty=True, i.e. scores and predict on X_test
    clf2 = MaxKLPE(k=2, contamination=0.7, novelty=True)
    clf2.fit(X_train)

    score_train_attr2 = clf2.scores_fit_
    assert_array_almost_equal(score_train_attr2, score_train_true)

    score_test2 = clf2.score_samples(X_test)
    assert_array_almost_equal(score_test2, score_test_true)

    assert_array_equal(pred_test_true, clf2.predict(X_test))