Пример #1
0
def check_classifier_ratio(clf, method, cv):
    # Passing distributions directly
    p0 = Normal(mu=0.0)
    p1 = Normal(mu=0.1)

    ratio = ClassifierRatio(CalibratedClassifierCV(base_estimator=clf,
                                                   method=method,
                                                   cv=cv))
    ratio.fit(numerator=p0, denominator=p1, n_samples=10000)

    reals = np.linspace(-1, 1, num=100).reshape(-1, 1)
    assert ratio.score(reals, p0.pdf(reals) / p1.pdf(reals)) > -0.1
    assert np.mean(np.abs(np.log(ratio.predict(reals)) -
                          ratio.predict(reals, log=True))) < 0.01

    # Passing X, y only
    X = np.vstack((p0.rvs(5000), p1.rvs(5000)))
    y = np.zeros(10000, dtype=np.int)
    y[5000:] = 1

    ratio = ClassifierRatio(CalibratedClassifierCV(base_estimator=clf,
                                                   method=method,
                                                   cv=cv))
    ratio.fit(X=X, y=y)

    reals = np.linspace(-1, 1, num=100).reshape(-1, 1)
    assert ratio.score(reals, p0.pdf(reals) / p1.pdf(reals)) > -0.1
    assert np.mean(np.abs(np.log(ratio.predict(reals)) -
                          ratio.predict(reals, log=True))) < 0.01
Пример #2
0
def check_classifier_ratio(clf, method, cv):
    # Passing distributions directly
    p0 = Normal(mu=0.0)
    p1 = Normal(mu=0.1)

    ratio = ClassifierRatio(
        CalibratedClassifierCV(base_estimator=clf, method=method, cv=cv))
    ratio.fit(numerator=p0, denominator=p1, n_samples=10000)

    reals = np.linspace(-1, 1, num=100).reshape(-1, 1)
    assert ratio.score(reals, p0.pdf(reals) / p1.pdf(reals)) > -0.1
    assert np.mean(
        np.abs(np.log(ratio.predict(reals)) -
               ratio.predict(reals, log=True))) < 0.01

    # Passing X, y only
    X = np.vstack((p0.rvs(5000), p1.rvs(5000)))
    y = np.zeros(10000, dtype=np.int)
    y[5000:] = 1

    ratio = ClassifierRatio(
        CalibratedClassifierCV(base_estimator=clf, method=method, cv=cv))
    ratio.fit(X=X, y=y)

    reals = np.linspace(-1, 1, num=100).reshape(-1, 1)
    assert ratio.score(reals, p0.pdf(reals) / p1.pdf(reals)) > -0.1
    assert np.mean(
        np.abs(np.log(ratio.predict(reals)) -
               ratio.predict(reals, log=True))) < 0.01
Пример #3
0
def test_classifier_ratio_identity():
    p = Normal(mu=0.0)
    ratio = ClassifierRatio(
        CalibratedClassifierCV(base_estimator=ElasticNetCV()))
    ratio.fit(numerator=p, denominator=p, n_samples=10000)

    reals = np.linspace(-0.5, 1.0, num=100).reshape(-1, 1)
    assert ratio.score(reals, p.pdf(reals) / p.pdf(reals)) == 0.0
    assert_array_almost_equal(ratio.predict(reals), np.ones(len(reals)))
    assert_array_almost_equal(ratio.predict(reals, log=True),
                              np.zeros(len(reals)))
Пример #4
0
def test_classifier_ratio_identity():
    p = Normal(mu=0.0)
    ratio = ClassifierRatio(
        CalibratedClassifierCV(base_estimator=ElasticNetCV()))
    ratio.fit(numerator=p, denominator=p, n_samples=10000)

    reals = np.linspace(-0.5, 1.0, num=100).reshape(-1, 1)
    assert ratio.score(reals, p.pdf(reals) / p.pdf(reals)) == 0.0
    assert_array_almost_equal(ratio.predict(reals), np.ones(len(reals)))
    assert_array_almost_equal(ratio.predict(reals, log=True),
                              np.zeros(len(reals)))
Пример #5
0
def reconstruct_ratio_using_estimated_pdfs(classifier,
                                           classifier_parameters,
                                           cv_val=3,
                                           with_linear_transformation=False,
                                           add_variation=False,
                                           n_samples=50000,
                                           verbose=True,
                                           inverse_weights=False,
                                           test_by_ML_GB=False):
    """
    Reconstruct weights by discriinative classifiers (calibrated and non-calibrated) 
    from `carl` on the generated samples defined by function `generate_samples`
    """
    original, target, exact_weights, original_test, target_test, exact_weights_test = \
        generate_samples(with_linear_transformation=with_linear_transformation,
                         add_variation=add_variation, n_samples=n_samples, verbose=verbose)

    predicted_weights = []
    for params in classifier_parameters:
        if verbose:
            print "Used parameters ", params
        classifier_clone = clone(classifier)
        classifier_clone.set_params(**params)
        ratio = ClassifierRatio(base_estimator=classifier_clone,
                                random_state=42)

        #reformat X0 and X1 into training data
        X = numpy.vstack((original, target))
        y = numpy.array([1] * original.shape[0] + [0] * target.shape[0])

        # fit the ration
        ratio.fit(X, y)

        carl_weights_test = ratio.predict(original_test, log=False)
        carl_weights_test[numpy.isinf(carl_weights_test)] = 0.
        predicted_weights.append(carl_weights_test)

        # plot 1d distribution for test sample
        if verbose:
            run_verbose_info(original_test,
                             target_test,
                             carl_weights_test,
                             exact_weights_test,
                             cv_val=cv_val)
    regime = [False]
    if inverse_weights:
        regime = [True, False]

    for inverse in regime:
        plt.figure(figsize=(len(classifier_parameters) * 5, 4))
        m = len(predicted_weights)
        for n, (weights, params) in enumerate(
                zip(predicted_weights, classifier_parameters)):
            plt.subplot(1, m, n + 1)
            if inverse:
                plot_scatter_weights(1. / exact_weights_test,
                                     1. / weights,
                                     title="Inverse weights for\n" +
                                     str(params))
            else:
                plot_scatter_weights(exact_weights_test,
                                     weights,
                                     title="Weights for\n" + str(params))