def test_sag_proba(): n_samples = 10 X, y = make_classification(n_samples, random_state=0) sag = SAGClassifier(eta=1e-3, alpha=0.0, beta=0.0, max_iter=10, loss='log', random_state=0) sag.fit(X, y) check_predict_proba(sag, X)
def test_sag_proba(): n_samples = 10 X, y = make_classification(n_samples, random_state=0) sag = SAGClassifier(eta=1e-3, alpha=0.0, beta=0.0, max_iter=10, loss='log', random_state=0) sag.fit(X, y) probas = sag.predict_proba(X) assert_equal(probas.sum(), n_samples)
def test_no_reg_sag(bin_train_data): X_bin, y_bin = bin_train_data pysag = PySAGClassifier(eta=1e-3, alpha=0.0, max_iter=10, random_state=0) sag = SAGClassifier(eta=1e-3, alpha=0.0, max_iter=10, random_state=0) pysag.fit(X_bin, y_bin) sag.fit(X_bin, y_bin) np.testing.assert_array_almost_equal(pysag.coef_, sag.coef_)
def test_l2_regularized_sag(): pysag = PySAGClassifier(eta=1e-3, alpha=1.0, max_iter=10, random_state=0) sag = SAGClassifier(eta=1e-3, alpha=1.0, max_iter=10, random_state=0) pysag.fit(X_bin, y_bin) sag.fit(X_bin, y_bin) np.testing.assert_array_almost_equal(pysag.coef_, sag.coef_)
def test_sag_multiclass_classes(): X, y = make_classification(n_samples=10, random_state=0, n_classes=3, n_informative=4) sag = SAGClassifier() sag.fit(X, y) assert list(sag.classes_) == [0, 1, 2]
def test_sag_score(): X, y = make_classification(1000, random_state=0) pysag = PySAGClassifier(eta=1e-3, alpha=0.0, beta=0.0, max_iter=10, random_state=0) sag = SAGClassifier(eta=1e-3, alpha=0.0, beta=0.0, max_iter=10, random_state=0) pysag.fit(X, y) sag.fit(X, y) assert_equal(pysag.score(X, y), sag.score(X, y))
def test_sag_sparse(): # FIX for https://github.com/mblondel/lightning/issues/33 # check that SAG has the same results with dense # and sparse data X = sparse.rand(100, 50, density=.5, random_state=0) y = np.random.randint(0, high=2, size=100) for alpha in np.logspace(-3, 3, 10): clf_sparse = SAGClassifier(max_iter=1, random_state=0, alpha=alpha) clf_sparse.fit(X, y) clf_dense = SAGClassifier(max_iter=1, random_state=0, alpha=alpha) clf_dense.fit(X.toarray(), y) assert_equal(clf_sparse.score(X, y), clf_dense.score(X, y))
def test_sag_callback(): class Callback(object): def __init__(self, X, y): self.X = X self.y = y self.obj = [] def __call__(self, clf): clf._finalize_coef() y_pred = clf.decision_function(self.X).ravel() loss = (np.maximum(1 - self.y * y_pred, 0) ** 2).mean() coef = clf.coef_.ravel() regul = 0.5 * clf.alpha * np.dot(coef, coef) self.obj.append(loss + regul) cb = Callback(X_bin, y_bin) for clf in ( SAGClassifier(loss="squared_hinge", eta=1e-3, max_iter=20, random_state=0, callback=cb), PySAGClassifier(loss="squared_hinge", eta=1e-3, max_iter=20, random_state=0, callback=cb), SAGAClassifier(loss="squared_hinge", eta=1e-3, max_iter=20, random_state=0, callback=cb), PySAGAClassifier(loss="squared_hinge", eta=1e-3, max_iter=20, random_state=0, callback=cb) ): clf.fit(X_bin, y_bin) # its not a descent method, just check that most of # updates are decreasing the objective function assert_true(np.mean(np.diff(cb.obj) <= 0) > 0.9)
def test_sag(): for clf in ( SAGClassifier(eta=1e-3, max_iter=20, verbose=0, random_state=0), SAGAClassifier(eta=1e-3, max_iter=20, verbose=0, random_state=0), PySAGClassifier(eta=1e-3, max_iter=20, random_state=0) ): clf.fit(X_bin, y_bin) assert_equal(clf.score(X_bin, y_bin), 1.0)
def test_auto_stepsize(): for clf in ( SAGClassifier(loss='log', max_iter=20, verbose=0, random_state=0), SAGAClassifier(loss='log', max_iter=20, verbose=0, random_state=0), PySAGClassifier(loss='log', max_iter=20, random_state=0) ): clf.fit(X_bin, y_bin) assert_equal(clf.score(X_bin, y_bin), 1.0)
def test_sag(): for clf in ( SAGClassifier(eta=1e-3, max_iter=20, verbose=0, random_state=0), SAGAClassifier(eta=1e-3, max_iter=20, verbose=0, random_state=0), PySAGClassifier(eta=1e-3, max_iter=20, random_state=0) ): clf.fit(X_bin, y_bin) assert not hasattr(clf, 'predict_proba') assert_equal(clf.score(X_bin, y_bin), 1.0) assert_equal(list(clf.classes_), [-1, 1])
def test_sag_callback(): class Callback(object): def __init__(self, X, y): self.X = X self.y = y self.obj = [] def __call__(self, clf): clf._finalize_coef() y_pred = clf.decision_function(self.X).ravel() loss = (np.maximum(1 - self.y * y_pred, 0) ** 2).mean() coef = clf.coef_.ravel() regul = 0.5 * clf.alpha * np.dot(coef, coef) self.obj.append(loss + regul) cb = Callback(X_bin, y_bin) clf = SAGClassifier(loss="squared_hinge", eta=1e-3, max_iter=20, random_state=0, callback=cb) clf.fit(X_bin, y_bin) assert_true(np.all(np.diff(cb.obj) <= 0))
def test_sag_callback(): class Callback(object): def __init__(self, X, y): self.X = X self.y = y self.obj = [] def __call__(self, clf): clf._finalize_coef() y_pred = clf.decision_function(self.X).ravel() loss = (np.maximum(1 - self.y * y_pred, 0)**2).mean() coef = clf.coef_.ravel() regul = 0.5 * clf.alpha * np.dot(coef, coef) self.obj.append(loss + regul) cb = Callback(X_bin, y_bin) clf = SAGClassifier(loss="squared_hinge", eta=1e-3, max_iter=20, random_state=0, callback=cb) clf.fit(X_bin, y_bin) assert_true(np.all(np.diff(cb.obj) <= 0))
def test_sag_adaptive(): """Check that the adaptive step size strategy yields the same solution as the non-adaptive""" np.random.seed(0) X = sparse.rand(100, 10, density=.5, random_state=0).tocsr() y = np.random.randint(0, high=2, size=100) for alpha in np.logspace(-3, 1, 5): clf_adaptive = SAGClassifier(eta='line-search', random_state=0, alpha=alpha) clf_adaptive.fit(X, y) clf = SAGClassifier(eta='auto', random_state=0, alpha=alpha) clf.fit(X, y) np.testing.assert_almost_equal(clf_adaptive.score(X, y), clf.score(X, y), 1) clf_adaptive = SAGAClassifier(eta='line-search', loss='log', random_state=0, alpha=alpha, max_iter=20) clf_adaptive.fit(X, y) assert np.isnan(clf_adaptive.coef_.sum()) == False clf = SAGAClassifier(eta='auto', loss='log', random_state=0, alpha=alpha, max_iter=20) clf.fit(X, y) np.testing.assert_almost_equal(clf_adaptive.score(X, y), clf.score(X, y), 1)
def test_sag_multiclass_classes(): X, y = make_classification(n_samples=10, random_state=0, n_classes=3, n_informative=4) sag = SAGClassifier() sag.fit(X, y) assert_equal(list(sag.classes_), [0, 1, 2])
def test_sag(): clf = SAGClassifier(eta=1e-3, max_iter=20, verbose=0, random_state=0) clf.fit(X_bin, y_bin) assert_equal(clf.score(X_bin, y_bin), 1.0)
def test_sag_adaptive(): """Check that the adaptive step size strategy yields the same solution as the non-adaptive""" np.random.seed(0) X = sparse.rand(100, 10, density=.5, random_state=0).tocsr() y = np.random.randint(0, high=2, size=100) for alpha in np.logspace(-3, 1, 5): clf_adaptive = SAGClassifier( eta='line-search', random_state=0, alpha=alpha) clf_adaptive.fit(X, y) clf = SAGClassifier( eta='auto', random_state=0, alpha=alpha) clf.fit(X, y) assert_almost_equal(clf_adaptive.score(X, y), clf.score(X, y), 1) clf_adaptive = SAGAClassifier( eta='line-search', loss='log', random_state=0, alpha=alpha, max_iter=20) clf_adaptive.fit(X, y) assert np.isnan(clf_adaptive.coef_.sum()) == False clf = SAGAClassifier( eta='auto', loss='log', random_state=0, alpha=alpha, max_iter=20) clf.fit(X, y) assert_almost_equal(clf_adaptive.score(X, y), clf.score(X, y), 1)
@pytest.mark.parametrize("l1", [0.1, 0.5, .99, 1., 2.]) def test_l1_prox(l1): x = np.ones(5) penalty = L1Penalty(l1=l1) if l1 <= 1.: np.testing.assert_array_equal(penalty.projection(x, stepsize=1.), x - l1) np.testing.assert_array_equal(penalty.projection(-x, stepsize=1.), -x + l1) else: np.testing.assert_array_equal(penalty.projection(x, stepsize=1.), 0) np.testing.assert_array_equal(penalty.projection(-x, stepsize=1.), 0) @pytest.mark.parametrize("clf", [ SAGClassifier(eta=1e-3, max_iter=20, verbose=0, random_state=0), SAGAClassifier(eta=1e-3, max_iter=20, verbose=0, random_state=0), PySAGClassifier(eta=1e-3, max_iter=20, random_state=0) ]) def test_sag(clf, bin_train_data): X_bin, y_bin = bin_train_data clf.fit(X_bin, y_bin) assert not hasattr(clf, 'predict_proba') assert clf.score(X_bin, y_bin) == 1.0 assert list(clf.classes_) == [-1, 1] @pytest.mark.parametrize( "SAG_", [SAGAClassifier, SAGClassifier, SAGRegressor, SAGARegressor]) def test_sag_dataset(SAG_, bin_train_data): # make sure SAG/SAGA accept a Dataset object as argument
random_state=0, tol=tol) clf4 = AdaGradClassifier(loss="squared_hinge", alpha=alpha, eta=eta_adagrad, n_iter=100, n_calls=X.shape[0] / 2, random_state=0) clf5 = SAGAClassifier(loss="squared_hinge", alpha=alpha, max_iter=100, random_state=0, tol=tol) clf6 = SAGClassifier(loss="squared_hinge", alpha=alpha, max_iter=100, random_state=0, tol=tol) plt.figure() data = {} for clf, name in ((clf1, "SVRG"), (clf2, "SDCA"), (clf3, "PCD"), (clf4, "AdaGrad"), (clf5, "SAGA"), (clf6, "SAG")): print(name) cb = Callback(X, y) clf.callback = cb if name == "PCD" and hasattr(X, "tocsc"): clf.fit(X.tocsc(), y) else:
random_state=0) clf3 = CDClassifier(loss="squared_hinge", alpha=alpha, C=1.0 / X.shape[0], max_iter=50, n_calls=X.shape[1] / 3, random_state=0) clf4 = AdaGradClassifier(loss="squared_hinge", alpha=alpha, eta=eta_adagrad, n_iter=50, n_calls=X.shape[0] / 2, random_state=0) clf5 = SAGClassifier(loss="squared_hinge", alpha=alpha, eta=eta_sag, max_iter=50, random_state=0) plt.figure() for clf, name in ((clf1, "SVRG"), (clf2, "SDCA"), (clf3, "PCD"), (clf4, "AdaGrad"), (clf5, "SAG")): print name cb = Callback(X, y) clf.callback = cb if name == "PCD" and hasattr(X, "tocsc"): clf.fit(X.tocsc(), y) else: clf.fit(X, y)
import time import numpy as np from sklearn.datasets import fetch_20newsgroups_vectorized from lightning.classification import SAGClassifier bunch = fetch_20newsgroups_vectorized(subset="all") X = bunch.data y = bunch.target y[y >= 1] = 1 clf = SAGClassifier(eta=1e-4, alpha=1e-5, tol=1e-3, max_iter=20, verbose=1, random_state=0) start = time.time() clf.fit(X, y) print "Training time", time.time() - start print "Accuracy", np.mean(clf.predict(X) == y) print "% non-zero", clf.n_nonzero(percentage=True)
from sklearn.datasets import fetch_20newsgroups_vectorized from lightning.classification import SAGClassifier from sklearn.linear_model import LogisticRegression bunch = fetch_20newsgroups_vectorized(subset="all") X = bunch.data y = bunch.target y[y >= 1] = 1 alpha = 1e-3 n_samples = X.shape[0] sag = SAGClassifier(eta='auto', loss='log', alpha=alpha, tol=1e-10, max_iter=1000, verbose=1, random_state=0) saga = SAGAClassifier(eta='auto', loss='log', alpha=alpha, tol=1e-10, max_iter=1000, verbose=1, random_state=0) cd_classifier = CDClassifier(loss='log', alpha=alpha / 2, C=1 / n_samples, tol=1e-10, max_iter=100,