示例#1
0
def test_kmp_precomputed_dictionary():
    n_samples = mult_dense.shape[0]
    cv = ShuffleSplit(n_samples,
                      n_iterations=1,
                      test_fraction=0.2,
                      random_state=0)
    train, test = list(cv)[0]
    X_train, y_train = mult_dense[train], mult_target[train]
    X_test, y_test = mult_dense[test], mult_target[test]

    components = select_components(X_train, y_train,
                                   n_components=0.3,
                                   random_state=0)
    K_train = pairwise_kernels(X_train, components)

    kmp = KMPClassifier(metric="precomputed")
    kmp.fit(K_train, y_train)
    y_pred = kmp.predict(K_train)
    acc = np.mean(y_pred == y_train)
    assert_true(acc >= 0.75)

    K_test = pairwise_kernels(X_test, components)
    y_pred = kmp.predict(K_test)

    acc = np.mean(y_pred == y_test)
    assert_true(acc >= 0.63)
示例#2
0
def test_kmp_validation():
    random_state = check_random_state(0)
    perm = random_state.permutation(200)
    X = bin_dense[perm]
    y = bin_target[perm]
    X_train = X[:100]
    y_train = y[:100]
    X_test = X[100:150]
    y_test = y[100:150]
    X_val = X[150:]
    y_val = y[150:]

    kmp = KMPClassifier(n_nonzero_coefs=1.0,
                        n_components=0.5,
                        n_refit=5,
                        estimator=Ridge(alpha=1.0),
                        metric="linear",
                        X_val=X_val, y_val=y_val,
                        random_state=0)
    kmp.fit(X_train, y_train)

    assert_almost_equal(kmp.validation_scores_[-1], 0.56, decimal=2)
    n_scores = len(kmp.validation_scores_)

    # early stopping
    kmp.epsilon = 0.001
    kmp.fit(X_train, y_train)
    assert_true(kmp.validation_scores_.shape[0] < n_scores)
示例#3
0
def test_kmp_fit_multiclass():
    for metric, acc in (("rbf", 0.796),
                        ("linear", 0.803),
                        ("poly", 0.836)):
        kmp = KMPClassifier(n_nonzero_coefs=4.0/5,
                            n_components=0.5,
                            n_refit=10,
                            metric=metric,
                            random_state=0)
        kmp.fit(mult_dense, mult_target)
        y_pred = kmp.predict(mult_dense)
        assert_almost_equal(np.mean(mult_target == y_pred), acc, decimal=2)
示例#4
0
def test_kmp_init_components():
    random_state = check_random_state(0)
    perm = random_state.permutation(200)
    components = bin_dense[perm[:20]]

    kmp = KMPClassifier(init_components=components,
                        n_components=0.5,
                        n_refit=0,
                        metric="linear",
                        random_state=0)
    kmp.fit(bin_dense, bin_target)
    assert_true(kmp.components_.shape[0] < components.shape[0])
示例#5
0
def test_kmp_fit_binary_backfitting():
    for metric, acc in (("rbf", 0.723),
                        ("linear", 0.954),
                        ("poly", 0.724)):
        kmp = KMPClassifier(n_nonzero_coefs=1.0,
                            n_components=0.5,
                            n_refit=1,
                            metric=metric,
                            random_state=0)
        kmp.fit(bin_dense, bin_target)
        assert_equal(kmp.components_.shape[1], bin_dense.shape[0] / 2)
        y_pred = kmp.predict(bin_dense)
        assert_almost_equal(np.mean(bin_target == y_pred), acc, decimal=2)
示例#6
0
def test_kmp_squared_loss():
    kmp = KMPClassifier(n_nonzero_coefs=4.0/5,
                        n_components=0.5,
                        n_refit=5,
                        estimator=Ridge(alpha=1.0),
                        metric="linear",
                        random_state=0)
    kmp.fit(bin_dense, bin_target)
    y_pred = kmp.decision_function(bin_dense)

    kmp.loss = "squared"
    kmp.fit(bin_dense, bin_target)
    y_pred2 = kmp.decision_function(bin_dense)

    assert_array_almost_equal(y_pred, y_pred2)
def fit_kmp(K_train, y_train, K_test, y_test, opts, random_state):
    clf = KMPClassifier(n_nonzero_coefs=opts.n_nonzero_coefs,
                        n_refit=opts.n_refit,
                        estimator=Ridge(alpha=opts.alpha),
                        X_val=K_test, y_val=y_test,
                        metric="precomputed",
                        scale=True,
                        scale_y=opts.scale_y,
                        check_duplicates=opts.check_duplicates,
                        n_validate=opts.n_validate,
                        epsilon=opts.epsilon,
                        verbose=1,
                        random_state=random_state,
                        n_jobs=-1)
    clf.fit(K_train, y_train)
    return clf
示例#8
0
def fit_kmp(X_train, y_train, X_test, y_test, class_distrib, opts, random_state):
    components = select_components(X_train, y_train,
                                   n_components=opts.n_components,
                                   class_distrib=class_distrib,
                                   random_state=random_state)

    clf = KMPClassifier(n_nonzero_coefs=opts.n_nonzero_coefs,
                        init_components=components,
                        n_refit=opts.n_refit,
                        estimator=Ridge(alpha=opts.alpha),
                        X_val=X_test, y_val=y_test,
                        metric=opts.metric,
                        gamma=opts.gamma,
                        degree=opts.degree,
                        coef0=opts.coef0,
                        scale=opts.scale,
                        n_validate=opts.n_validate,
                        epsilon=opts.epsilon,
                        #score_func=f1_score,
                        verbose=1,
                        random_state=random_state,
                        n_jobs=-1)
    clf.fit(X_train, y_train)
    return clf
try:
    color = int(sys.argv[1])
except:
    color = True

try:
    surface = int(sys.argv[2])
except:
    surface = False


X1, y1, X2, y2 = gen_non_lin_separable_data()
X_train, y_train = split_train(X1, y1, X2, y2)
X_test, y_test = split_test(X1, y1, X2, y2)

clf = KMPClassifier(n_nonzero_coefs=0.3,
                    n_components=1.0,
                    metric="rbf",
                    gamma=0.1,
                    n_refit=1,
                    estimator=Ridge(alpha=0.01),
                    random_state=random_state)
clf.fit(X_train, y_train)

y_predict = clf.predict(X_test)
correct = np.sum(y_predict == y_test)
print "%d out of %d predictions correct" % (correct, len(y_predict))

plot_contour(X_train, clf, color, surface)