Пример #1
0
def test_sag_proba():
    n_samples = 10
    X, y = make_classification(n_samples, random_state=0)
    sag = SAGClassifier(eta=1e-3, alpha=0.0, beta=0.0, max_iter=10,
                        loss='log', random_state=0)
    sag.fit(X, y)
    check_predict_proba(sag, X)
Пример #2
0
def test_sag_proba():
    n_samples = 10
    X, y = make_classification(n_samples, random_state=0)
    sag = SAGClassifier(eta=1e-3, alpha=0.0, beta=0.0, max_iter=10,
                        loss='log', random_state=0)
    sag.fit(X, y)
    probas = sag.predict_proba(X)
    assert_equal(probas.sum(), n_samples)
Пример #3
0
def test_no_reg_sag(bin_train_data):
    X_bin, y_bin = bin_train_data
    pysag = PySAGClassifier(eta=1e-3, alpha=0.0, max_iter=10, random_state=0)
    sag = SAGClassifier(eta=1e-3, alpha=0.0, max_iter=10, random_state=0)

    pysag.fit(X_bin, y_bin)
    sag.fit(X_bin, y_bin)
    np.testing.assert_array_almost_equal(pysag.coef_, sag.coef_)
Пример #4
0
def test_sag_proba():
    n_samples = 10
    X, y = make_classification(n_samples, random_state=0)
    sag = SAGClassifier(eta=1e-3, alpha=0.0, beta=0.0, max_iter=10,
                        loss='log', random_state=0)
    sag.fit(X, y)
    probas = sag.predict_proba(X)
    assert_equal(probas.sum(), n_samples)
Пример #5
0
def test_l2_regularized_sag():

    pysag = PySAGClassifier(eta=1e-3, alpha=1.0, max_iter=10, random_state=0)
    sag = SAGClassifier(eta=1e-3, alpha=1.0, max_iter=10, random_state=0)

    pysag.fit(X_bin, y_bin)
    sag.fit(X_bin, y_bin)
    np.testing.assert_array_almost_equal(pysag.coef_, sag.coef_)
Пример #6
0
def test_sag_multiclass_classes():
    X, y = make_classification(n_samples=10,
                               random_state=0,
                               n_classes=3,
                               n_informative=4)
    sag = SAGClassifier()
    sag.fit(X, y)
    assert list(sag.classes_) == [0, 1, 2]
Пример #7
0
def test_l2_regularized_sag():

    pysag = PySAGClassifier(eta=1e-3, alpha=1.0, max_iter=10, random_state=0)
    sag = SAGClassifier(eta=1e-3, alpha=1.0, max_iter=10, random_state=0)

    pysag.fit(X_bin, y_bin)
    sag.fit(X_bin, y_bin)
    np.testing.assert_array_almost_equal(pysag.coef_, sag.coef_)
Пример #8
0
def test_sag_score():
    X, y = make_classification(1000, random_state=0)

    pysag = PySAGClassifier(eta=1e-3, alpha=0.0, beta=0.0, max_iter=10, random_state=0)
    sag = SAGClassifier(eta=1e-3, alpha=0.0, beta=0.0, max_iter=10, random_state=0)

    pysag.fit(X, y)
    sag.fit(X, y)
    assert_equal(pysag.score(X, y), sag.score(X, y))
Пример #9
0
def test_sag_score():
    X, y = make_classification(1000, random_state=0)

    pysag = PySAGClassifier(eta=1e-3, alpha=0.0, beta=0.0, max_iter=10,
                            random_state=0)
    sag = SAGClassifier(eta=1e-3, alpha=0.0, beta=0.0, max_iter=10,
                        random_state=0)

    pysag.fit(X, y)
    sag.fit(X, y)
    assert_equal(pysag.score(X, y), sag.score(X, y))
Пример #10
0
def test_sag_sparse():
    # FIX for https://github.com/mblondel/lightning/issues/33
    # check that SAG has the same results with dense
    # and sparse data
    X = sparse.rand(100, 50, density=.5, random_state=0)
    y = np.random.randint(0, high=2, size=100)
    for alpha in np.logspace(-3, 3, 10):
        clf_sparse = SAGClassifier(max_iter=1, random_state=0, alpha=alpha)
        clf_sparse.fit(X, y)
        clf_dense = SAGClassifier(max_iter=1, random_state=0, alpha=alpha)
        clf_dense.fit(X.toarray(), y)
        assert_equal(clf_sparse.score(X, y), clf_dense.score(X, y))
Пример #11
0
def test_sag_callback():
    class Callback(object):

        def __init__(self, X, y):
            self.X = X
            self.y = y
            self.obj = []

        def __call__(self, clf):
            clf._finalize_coef()
            y_pred = clf.decision_function(self.X).ravel()
            loss = (np.maximum(1 - self.y * y_pred, 0) ** 2).mean()
            coef = clf.coef_.ravel()
            regul = 0.5 * clf.alpha * np.dot(coef, coef)
            self.obj.append(loss + regul)

    cb = Callback(X_bin, y_bin)
    for clf in (
        SAGClassifier(loss="squared_hinge", eta=1e-3, max_iter=20,
                      random_state=0, callback=cb),
        PySAGClassifier(loss="squared_hinge", eta=1e-3, max_iter=20,
                        random_state=0, callback=cb),
        SAGAClassifier(loss="squared_hinge", eta=1e-3, max_iter=20,
                       random_state=0, callback=cb),
        PySAGAClassifier(loss="squared_hinge", eta=1e-3, max_iter=20,
                         random_state=0, callback=cb)
            ):
        clf.fit(X_bin, y_bin)
        # its not a descent method, just check that most of
        # updates are decreasing the objective function
        assert_true(np.mean(np.diff(cb.obj) <= 0) > 0.9)
Пример #12
0
def test_sag():
    for clf in (
        SAGClassifier(eta=1e-3, max_iter=20, verbose=0, random_state=0),
        SAGAClassifier(eta=1e-3, max_iter=20, verbose=0, random_state=0),
        PySAGClassifier(eta=1e-3, max_iter=20, random_state=0)
            ):
        clf.fit(X_bin, y_bin)
        assert_equal(clf.score(X_bin, y_bin), 1.0)
Пример #13
0
def test_auto_stepsize():

    for clf in (
        SAGClassifier(loss='log', max_iter=20, verbose=0, random_state=0),
        SAGAClassifier(loss='log', max_iter=20, verbose=0, random_state=0),
        PySAGClassifier(loss='log', max_iter=20, random_state=0)
            ):
        clf.fit(X_bin, y_bin)
        assert_equal(clf.score(X_bin, y_bin), 1.0)
Пример #14
0
def test_sag():
    for clf in (
        SAGClassifier(eta=1e-3, max_iter=20, verbose=0, random_state=0),
        SAGAClassifier(eta=1e-3, max_iter=20, verbose=0, random_state=0),
        PySAGClassifier(eta=1e-3, max_iter=20, random_state=0)
            ):
        clf.fit(X_bin, y_bin)
        assert not hasattr(clf, 'predict_proba')
        assert_equal(clf.score(X_bin, y_bin), 1.0)
        assert_equal(list(clf.classes_), [-1, 1])
Пример #15
0
def test_sag_sparse():
    # FIX for https://github.com/mblondel/lightning/issues/33
    # check that SAG has the same results with dense
    # and sparse data
    X = sparse.rand(100, 50, density=.5, random_state=0)
    y = np.random.randint(0, high=2, size=100)
    for alpha in np.logspace(-3, 3, 10):
        clf_sparse = SAGClassifier(max_iter=1, random_state=0, alpha=alpha)
        clf_sparse.fit(X, y)
        clf_dense = SAGClassifier(max_iter=1, random_state=0, alpha=alpha)
        clf_dense.fit(X.toarray(), y)
        assert_equal(clf_sparse.score(X, y), clf_dense.score(X, y))
Пример #16
0
def test_sag_callback():
    class Callback(object):

        def __init__(self, X, y):
            self.X = X
            self.y = y
            self.obj = []

        def __call__(self, clf):
            clf._finalize_coef()
            y_pred = clf.decision_function(self.X).ravel()
            loss = (np.maximum(1 - self.y * y_pred, 0) ** 2).mean()
            coef = clf.coef_.ravel()
            regul = 0.5 * clf.alpha * np.dot(coef, coef)
            self.obj.append(loss + regul)

    cb = Callback(X_bin, y_bin)
    clf = SAGClassifier(loss="squared_hinge", eta=1e-3, max_iter=20,
                         random_state=0, callback=cb)
    clf.fit(X_bin, y_bin)
    assert_true(np.all(np.diff(cb.obj) <= 0))
Пример #17
0
def test_sag_callback():
    class Callback(object):
        def __init__(self, X, y):
            self.X = X
            self.y = y
            self.obj = []

        def __call__(self, clf):
            clf._finalize_coef()
            y_pred = clf.decision_function(self.X).ravel()
            loss = (np.maximum(1 - self.y * y_pred, 0)**2).mean()
            coef = clf.coef_.ravel()
            regul = 0.5 * clf.alpha * np.dot(coef, coef)
            self.obj.append(loss + regul)

    cb = Callback(X_bin, y_bin)
    clf = SAGClassifier(loss="squared_hinge",
                        eta=1e-3,
                        max_iter=20,
                        random_state=0,
                        callback=cb)
    clf.fit(X_bin, y_bin)
    assert_true(np.all(np.diff(cb.obj) <= 0))
Пример #18
0
def test_sag_adaptive():
    """Check that the adaptive step size strategy yields the same
    solution as the non-adaptive"""
    np.random.seed(0)
    X = sparse.rand(100, 10, density=.5, random_state=0).tocsr()
    y = np.random.randint(0, high=2, size=100)
    for alpha in np.logspace(-3, 1, 5):
        clf_adaptive = SAGClassifier(eta='line-search',
                                     random_state=0,
                                     alpha=alpha)
        clf_adaptive.fit(X, y)
        clf = SAGClassifier(eta='auto', random_state=0, alpha=alpha)
        clf.fit(X, y)
        np.testing.assert_almost_equal(clf_adaptive.score(X, y),
                                       clf.score(X, y), 1)

        clf_adaptive = SAGAClassifier(eta='line-search',
                                      loss='log',
                                      random_state=0,
                                      alpha=alpha,
                                      max_iter=20)
        clf_adaptive.fit(X, y)
        assert np.isnan(clf_adaptive.coef_.sum()) == False
        clf = SAGAClassifier(eta='auto',
                             loss='log',
                             random_state=0,
                             alpha=alpha,
                             max_iter=20)
        clf.fit(X, y)
        np.testing.assert_almost_equal(clf_adaptive.score(X, y),
                                       clf.score(X, y), 1)
Пример #19
0
def test_sag_multiclass_classes():
    X, y = make_classification(n_samples=10, random_state=0, n_classes=3,
                               n_informative=4)
    sag = SAGClassifier()
    sag.fit(X, y)
    assert_equal(list(sag.classes_), [0, 1, 2])
Пример #20
0
def test_sag():
    clf = SAGClassifier(eta=1e-3, max_iter=20, verbose=0, random_state=0)
    clf.fit(X_bin, y_bin)
    assert_equal(clf.score(X_bin, y_bin), 1.0)
Пример #21
0
def test_sag_adaptive():
    """Check that the adaptive step size strategy yields the same
    solution as the non-adaptive"""
    np.random.seed(0)
    X = sparse.rand(100, 10, density=.5, random_state=0).tocsr()
    y = np.random.randint(0, high=2, size=100)
    for alpha in np.logspace(-3, 1, 5):
        clf_adaptive = SAGClassifier(
            eta='line-search', random_state=0, alpha=alpha)
        clf_adaptive.fit(X, y)
        clf = SAGClassifier(
            eta='auto', random_state=0, alpha=alpha)
        clf.fit(X, y)
        assert_almost_equal(clf_adaptive.score(X, y), clf.score(X, y), 1)

        clf_adaptive = SAGAClassifier(
            eta='line-search', loss='log', random_state=0, alpha=alpha, max_iter=20)
        clf_adaptive.fit(X, y)
        assert np.isnan(clf_adaptive.coef_.sum()) == False
        clf = SAGAClassifier(
            eta='auto', loss='log', random_state=0, alpha=alpha, max_iter=20)
        clf.fit(X, y)
        assert_almost_equal(clf_adaptive.score(X, y), clf.score(X, y), 1)
Пример #22
0
@pytest.mark.parametrize("l1", [0.1, 0.5, .99, 1., 2.])
def test_l1_prox(l1):
    x = np.ones(5)
    penalty = L1Penalty(l1=l1)
    if l1 <= 1.:
        np.testing.assert_array_equal(penalty.projection(x, stepsize=1.),
                                      x - l1)
        np.testing.assert_array_equal(penalty.projection(-x, stepsize=1.),
                                      -x + l1)
    else:
        np.testing.assert_array_equal(penalty.projection(x, stepsize=1.), 0)
        np.testing.assert_array_equal(penalty.projection(-x, stepsize=1.), 0)


@pytest.mark.parametrize("clf", [
    SAGClassifier(eta=1e-3, max_iter=20, verbose=0, random_state=0),
    SAGAClassifier(eta=1e-3, max_iter=20, verbose=0, random_state=0),
    PySAGClassifier(eta=1e-3, max_iter=20, random_state=0)
])
def test_sag(clf, bin_train_data):
    X_bin, y_bin = bin_train_data
    clf.fit(X_bin, y_bin)
    assert not hasattr(clf, 'predict_proba')
    assert clf.score(X_bin, y_bin) == 1.0
    assert list(clf.classes_) == [-1, 1]


@pytest.mark.parametrize(
    "SAG_", [SAGAClassifier, SAGClassifier, SAGRegressor, SAGARegressor])
def test_sag_dataset(SAG_, bin_train_data):
    # make sure SAG/SAGA accept a Dataset object as argument
Пример #23
0
                    random_state=0,
                    tol=tol)
clf4 = AdaGradClassifier(loss="squared_hinge",
                         alpha=alpha,
                         eta=eta_adagrad,
                         n_iter=100,
                         n_calls=X.shape[0] / 2,
                         random_state=0)
clf5 = SAGAClassifier(loss="squared_hinge",
                      alpha=alpha,
                      max_iter=100,
                      random_state=0,
                      tol=tol)
clf6 = SAGClassifier(loss="squared_hinge",
                     alpha=alpha,
                     max_iter=100,
                     random_state=0,
                     tol=tol)

plt.figure()

data = {}
for clf, name in ((clf1, "SVRG"), (clf2, "SDCA"), (clf3, "PCD"),
                  (clf4, "AdaGrad"), (clf5, "SAGA"), (clf6, "SAG")):
    print(name)
    cb = Callback(X, y)
    clf.callback = cb

    if name == "PCD" and hasattr(X, "tocsc"):
        clf.fit(X.tocsc(), y)
    else:
Пример #24
0
                      random_state=0)
clf3 = CDClassifier(loss="squared_hinge",
                    alpha=alpha,
                    C=1.0 / X.shape[0],
                    max_iter=50,
                    n_calls=X.shape[1] / 3,
                    random_state=0)
clf4 = AdaGradClassifier(loss="squared_hinge",
                         alpha=alpha,
                         eta=eta_adagrad,
                         n_iter=50,
                         n_calls=X.shape[0] / 2,
                         random_state=0)
clf5 = SAGClassifier(loss="squared_hinge",
                     alpha=alpha,
                     eta=eta_sag,
                     max_iter=50,
                     random_state=0)

plt.figure()

for clf, name in ((clf1, "SVRG"), (clf2, "SDCA"), (clf3, "PCD"),
                  (clf4, "AdaGrad"), (clf5, "SAG")):
    print name
    cb = Callback(X, y)
    clf.callback = cb

    if name == "PCD" and hasattr(X, "tocsc"):
        clf.fit(X.tocsc(), y)
    else:
        clf.fit(X, y)
Пример #25
0
import time

import numpy as np

from sklearn.datasets import fetch_20newsgroups_vectorized
from lightning.classification import SAGClassifier

bunch = fetch_20newsgroups_vectorized(subset="all")
X = bunch.data
y = bunch.target
y[y >= 1] = 1

clf = SAGClassifier(eta=1e-4, alpha=1e-5, tol=1e-3, max_iter=20, verbose=1,
                     random_state=0)
start = time.time()
clf.fit(X, y)

print "Training time", time.time() - start
print "Accuracy", np.mean(clf.predict(X) == y)
print "% non-zero", clf.n_nonzero(percentage=True)
from sklearn.datasets import fetch_20newsgroups_vectorized
from lightning.classification import SAGClassifier
from sklearn.linear_model import LogisticRegression

bunch = fetch_20newsgroups_vectorized(subset="all")
X = bunch.data
y = bunch.target
y[y >= 1] = 1

alpha = 1e-3
n_samples = X.shape[0]

sag = SAGClassifier(eta='auto',
                    loss='log',
                    alpha=alpha,
                    tol=1e-10,
                    max_iter=1000,
                    verbose=1,
                    random_state=0)
saga = SAGAClassifier(eta='auto',
                      loss='log',
                      alpha=alpha,
                      tol=1e-10,
                      max_iter=1000,
                      verbose=1,
                      random_state=0)
cd_classifier = CDClassifier(loss='log',
                             alpha=alpha / 2,
                             C=1 / n_samples,
                             tol=1e-10,
                             max_iter=100,