示例#1
0
def test_sag_proba():
    n_samples = 10
    X, y = make_classification(n_samples, random_state=0)
    sag = SAGClassifier(eta=1e-3, alpha=0.0, beta=0.0, max_iter=10,
                        loss='log', random_state=0)
    sag.fit(X, y)
    check_predict_proba(sag, X)
示例#2
0
def test_sag_proba():
    n_samples = 10
    X, y = make_classification(n_samples, random_state=0)
    sag = SAGClassifier(eta=1e-3, alpha=0.0, beta=0.0, max_iter=10,
                        loss='log', random_state=0)
    sag.fit(X, y)
    probas = sag.predict_proba(X)
    assert_equal(probas.sum(), n_samples)
示例#3
0
def test_sag_proba():
    n_samples = 10
    X, y = make_classification(n_samples, random_state=0)
    sag = SAGClassifier(eta=1e-3, alpha=0.0, beta=0.0, max_iter=10,
                        loss='log', random_state=0)
    sag.fit(X, y)
    probas = sag.predict_proba(X)
    assert_equal(probas.sum(), n_samples)
def test_no_reg_sag(bin_train_data):
    X_bin, y_bin = bin_train_data
    pysag = PySAGClassifier(eta=1e-3, alpha=0.0, max_iter=10, random_state=0)
    sag = SAGClassifier(eta=1e-3, alpha=0.0, max_iter=10, random_state=0)

    pysag.fit(X_bin, y_bin)
    sag.fit(X_bin, y_bin)
    np.testing.assert_array_almost_equal(pysag.coef_, sag.coef_)
示例#5
0
def test_sag_multiclass_classes():
    X, y = make_classification(n_samples=10,
                               random_state=0,
                               n_classes=3,
                               n_informative=4)
    sag = SAGClassifier()
    sag.fit(X, y)
    assert list(sag.classes_) == [0, 1, 2]
示例#6
0
def test_l2_regularized_sag():

    pysag = PySAGClassifier(eta=1e-3, alpha=1.0, max_iter=10, random_state=0)
    sag = SAGClassifier(eta=1e-3, alpha=1.0, max_iter=10, random_state=0)

    pysag.fit(X_bin, y_bin)
    sag.fit(X_bin, y_bin)
    np.testing.assert_array_almost_equal(pysag.coef_, sag.coef_)
示例#7
0
def test_l2_regularized_sag():

    pysag = PySAGClassifier(eta=1e-3, alpha=1.0, max_iter=10, random_state=0)
    sag = SAGClassifier(eta=1e-3, alpha=1.0, max_iter=10, random_state=0)

    pysag.fit(X_bin, y_bin)
    sag.fit(X_bin, y_bin)
    np.testing.assert_array_almost_equal(pysag.coef_, sag.coef_)
示例#8
0
def test_sag_score():
    X, y = make_classification(1000, random_state=0)

    pysag = PySAGClassifier(eta=1e-3, alpha=0.0, beta=0.0, max_iter=10, random_state=0)
    sag = SAGClassifier(eta=1e-3, alpha=0.0, beta=0.0, max_iter=10, random_state=0)

    pysag.fit(X, y)
    sag.fit(X, y)
    assert_equal(pysag.score(X, y), sag.score(X, y))
示例#9
0
def test_sag_score():
    X, y = make_classification(1000, random_state=0)

    pysag = PySAGClassifier(eta=1e-3, alpha=0.0, beta=0.0, max_iter=10,
                            random_state=0)
    sag = SAGClassifier(eta=1e-3, alpha=0.0, beta=0.0, max_iter=10,
                        random_state=0)

    pysag.fit(X, y)
    sag.fit(X, y)
    assert_equal(pysag.score(X, y), sag.score(X, y))
示例#10
0
def test_sag_sparse():
    # FIX for https://github.com/mblondel/lightning/issues/33
    # check that SAG has the same results with dense
    # and sparse data
    X = sparse.rand(100, 50, density=.5, random_state=0)
    y = np.random.randint(0, high=2, size=100)
    for alpha in np.logspace(-3, 3, 10):
        clf_sparse = SAGClassifier(max_iter=1, random_state=0, alpha=alpha)
        clf_sparse.fit(X, y)
        clf_dense = SAGClassifier(max_iter=1, random_state=0, alpha=alpha)
        clf_dense.fit(X.toarray(), y)
        assert_equal(clf_sparse.score(X, y), clf_dense.score(X, y))
示例#11
0
def test_sag_sparse():
    # FIX for https://github.com/mblondel/lightning/issues/33
    # check that SAG has the same results with dense
    # and sparse data
    X = sparse.rand(100, 50, density=.5, random_state=0)
    y = np.random.randint(0, high=2, size=100)
    for alpha in np.logspace(-3, 3, 10):
        clf_sparse = SAGClassifier(max_iter=1, random_state=0, alpha=alpha)
        clf_sparse.fit(X, y)
        clf_dense = SAGClassifier(max_iter=1, random_state=0, alpha=alpha)
        clf_dense.fit(X.toarray(), y)
        assert_equal(clf_sparse.score(X, y), clf_dense.score(X, y))
示例#12
0
def test_sag_adaptive():
    """Check that the adaptive step size strategy yields the same
    solution as the non-adaptive"""
    np.random.seed(0)
    X = sparse.rand(100, 10, density=.5, random_state=0).tocsr()
    y = np.random.randint(0, high=2, size=100)
    for alpha in np.logspace(-3, 1, 5):
        clf_adaptive = SAGClassifier(eta='line-search',
                                     random_state=0,
                                     alpha=alpha)
        clf_adaptive.fit(X, y)
        clf = SAGClassifier(eta='auto', random_state=0, alpha=alpha)
        clf.fit(X, y)
        np.testing.assert_almost_equal(clf_adaptive.score(X, y),
                                       clf.score(X, y), 1)

        clf_adaptive = SAGAClassifier(eta='line-search',
                                      loss='log',
                                      random_state=0,
                                      alpha=alpha,
                                      max_iter=20)
        clf_adaptive.fit(X, y)
        assert np.isnan(clf_adaptive.coef_.sum()) == False
        clf = SAGAClassifier(eta='auto',
                             loss='log',
                             random_state=0,
                             alpha=alpha,
                             max_iter=20)
        clf.fit(X, y)
        np.testing.assert_almost_equal(clf_adaptive.score(X, y),
                                       clf.score(X, y), 1)
示例#13
0
def test_sag_callback():
    class Callback(object):

        def __init__(self, X, y):
            self.X = X
            self.y = y
            self.obj = []

        def __call__(self, clf):
            clf._finalize_coef()
            y_pred = clf.decision_function(self.X).ravel()
            loss = (np.maximum(1 - self.y * y_pred, 0) ** 2).mean()
            coef = clf.coef_.ravel()
            regul = 0.5 * clf.alpha * np.dot(coef, coef)
            self.obj.append(loss + regul)

    cb = Callback(X_bin, y_bin)
    clf = SAGClassifier(loss="squared_hinge", eta=1e-3, max_iter=20,
                         random_state=0, callback=cb)
    clf.fit(X_bin, y_bin)
    assert_true(np.all(np.diff(cb.obj) <= 0))
示例#14
0
def test_sag_callback():
    class Callback(object):
        def __init__(self, X, y):
            self.X = X
            self.y = y
            self.obj = []

        def __call__(self, clf):
            clf._finalize_coef()
            y_pred = clf.decision_function(self.X).ravel()
            loss = (np.maximum(1 - self.y * y_pred, 0)**2).mean()
            coef = clf.coef_.ravel()
            regul = 0.5 * clf.alpha * np.dot(coef, coef)
            self.obj.append(loss + regul)

    cb = Callback(X_bin, y_bin)
    clf = SAGClassifier(loss="squared_hinge",
                        eta=1e-3,
                        max_iter=20,
                        random_state=0,
                        callback=cb)
    clf.fit(X_bin, y_bin)
    assert_true(np.all(np.diff(cb.obj) <= 0))
示例#15
0
def test_sag_adaptive():
    """Check that the adaptive step size strategy yields the same
    solution as the non-adaptive"""
    np.random.seed(0)
    X = sparse.rand(100, 10, density=.5, random_state=0).tocsr()
    y = np.random.randint(0, high=2, size=100)
    for alpha in np.logspace(-3, 1, 5):
        clf_adaptive = SAGClassifier(
            eta='line-search', random_state=0, alpha=alpha)
        clf_adaptive.fit(X, y)
        clf = SAGClassifier(
            eta='auto', random_state=0, alpha=alpha)
        clf.fit(X, y)
        assert_almost_equal(clf_adaptive.score(X, y), clf.score(X, y), 1)

        clf_adaptive = SAGAClassifier(
            eta='line-search', loss='log', random_state=0, alpha=alpha, max_iter=20)
        clf_adaptive.fit(X, y)
        assert np.isnan(clf_adaptive.coef_.sum()) == False
        clf = SAGAClassifier(
            eta='auto', loss='log', random_state=0, alpha=alpha, max_iter=20)
        clf.fit(X, y)
        assert_almost_equal(clf_adaptive.score(X, y), clf.score(X, y), 1)
示例#16
0
import time

import numpy as np

from sklearn.datasets import fetch_20newsgroups_vectorized
from lightning.classification import SAGClassifier

bunch = fetch_20newsgroups_vectorized(subset="all")
X = bunch.data
y = bunch.target
y[y >= 1] = 1

clf = SAGClassifier(eta=1e-4, alpha=1e-5, tol=1e-3, max_iter=20, verbose=1,
                     random_state=0)
start = time.time()
clf.fit(X, y)

print "Training time", time.time() - start
print "Accuracy", np.mean(clf.predict(X) == y)
print "% non-zero", clf.n_nonzero(percentage=True)
示例#17
0
def test_sag():
    clf = SAGClassifier(eta=1e-3, max_iter=20, verbose=0, random_state=0)
    clf.fit(X_bin, y_bin)
    assert_equal(clf.score(X_bin, y_bin), 1.0)
示例#18
0
def test_sag_multiclass_classes():
    X, y = make_classification(n_samples=10, random_state=0, n_classes=3,
                               n_informative=4)
    sag = SAGClassifier()
    sag.fit(X, y)
    assert_equal(list(sag.classes_), [0, 1, 2])
示例#19
0
def test_sag():
    clf = SAGClassifier(eta=1e-3, max_iter=20, verbose=0, random_state=0)
    clf.fit(X_bin, y_bin)
    assert_equal(clf.score(X_bin, y_bin), 1.0)