def test_ModelHinge(self):
        """...Numerical consistency check of loss and gradient for Hinge model
        """
        np.random.seed(12)
        n_samples, n_features = 5000, 10
        w0 = np.random.randn(n_features)
        c0 = np.random.randn()

        # First check with intercept
        X, y = SimuLogReg(w0,
                          c0,
                          n_samples=n_samples,
                          verbose=False,
                          dtype=self.dtype).simulate()
        X_spars = csr_matrix(X, dtype=self.dtype)
        model = ModelHinge(fit_intercept=True).fit(X, y)
        model_spars = ModelHinge(fit_intercept=True).fit(X_spars, y)
        self.run_test_for_glm(model, model_spars)
        self._test_glm_intercept_vs_hardcoded_intercept(model)

        # Then check without intercept
        X, y = SimuLogReg(w0,
                          None,
                          n_samples=n_samples,
                          verbose=False,
                          seed=2038,
                          dtype=self.dtype).simulate()
        X_spars = csr_matrix(X)
        model = ModelHinge(fit_intercept=False).fit(X, y)

        model_spars = ModelHinge(fit_intercept=False).fit(X_spars, y)
        self.run_test_for_glm(model, model_spars)
示例#2
0
    def test_SimuLogReg(self):
        """...Test simulation of a Logistic Regression
        """
        n_samples = 10
        n_features = 3
        idx = np.arange(n_features)

        weights = np.exp(-idx / 10.)
        weights[::2] *= -1
        seed = 123
        simu = SimuLogReg(weights,
                          None,
                          n_samples=n_samples,
                          seed=seed,
                          verbose=False)
        X, y = simu.simulate()

        X_truth = np.array([[1.4912667, 0.80881799, 0.26977298],
                            [1.23227551, 0.50697013, 1.9409132],
                            [1.8891494, 1.49834791, 2.41445794],
                            [0.19431319, 0.80245126, 1.02577552],
                            [-1.61687582, -1.08411865, -0.83438387],
                            [2.30419894, -0.68987056, -0.39750262],
                            [-0.28826405, -1.23635074, -0.76124386],
                            [-1.32869473, -1.8752391, -0.182537],
                            [0.79464218, 0.65055633, 1.57572506],
                            [0.71524202, 1.66759831, 0.88679047]])

        y_truth = np.array([-1., -1., -1., -1., 1., -1., 1., -1., -1., 1.])

        np.testing.assert_array_almost_equal(X_truth, X)
        np.testing.assert_array_almost_equal(y_truth, y)
示例#3
0
    def test_ModelLogReg(self):
        """...Numerical consistency check of loss and gradient for Logistic
        Regression
        """

        np.random.seed(12)
        n_samples, n_features = 5000, 10
        w0 = np.random.randn(n_features)
        c0 = np.random.randn()

        # First check with intercept
        X, y = SimuLogReg(w0, c0, n_samples=n_samples, verbose=False,
                          dtype=self.dtype).simulate()
        X_spars = csr_matrix(X, dtype=self.dtype)
        model = ModelLogReg(fit_intercept=True).fit(X, y)
        model_spars = ModelLogReg(fit_intercept=True).fit(X_spars, y)
        self.run_test_for_glm(model, model_spars)
        self._test_glm_intercept_vs_hardcoded_intercept(model)

        # Then check without intercept
        X, y = SimuLogReg(w0, None, n_samples=n_samples, verbose=False,
                          seed=2038, dtype=self.dtype).simulate()
        X_spars = csr_matrix(X, dtype=self.dtype)
        model = ModelLogReg(fit_intercept=False).fit(X, y)

        model_spars = ModelLogReg(fit_intercept=False).fit(X_spars, y)
        self.run_test_for_glm(model, model_spars)
        self._test_glm_intercept_vs_hardcoded_intercept(model)

        # Test for the Lipschitz constants without intercept
        self.assertAlmostEqual(model.get_lip_best(), 0.67184209642814952,
                               places=self.decimal_places)
        self.assertAlmostEqual(model.get_lip_mean(), 2.48961431697108,
                               places=self.decimal_places)
        self.assertAlmostEqual(model.get_lip_max(), 13.706542412138093,
                               places=self.decimal_places)
        self.assertAlmostEqual(model_spars.get_lip_mean(),
                               model.get_lip_mean(),
                               places=self.decimal_places)
        self.assertAlmostEqual(model_spars.get_lip_max(), model.get_lip_max(),
                               places=self.decimal_places)

        # Test for the Lipschitz constants with intercept
        model = ModelLogReg(fit_intercept=True).fit(X, y)
        model_spars = ModelLogReg(fit_intercept=True).fit(X_spars, y)
        self.assertAlmostEqual(model.get_lip_best(), 0.671892096428,
                               places=self.decimal_places)
        self.assertAlmostEqual(model.get_lip_mean(), 2.739614316971082,
                               places=self.decimal_places)
        self.assertAlmostEqual(model.get_lip_max(), 13.956542412138093,
                               places=self.decimal_places)
        self.assertAlmostEqual(model_spars.get_lip_mean(),
                               model.get_lip_mean(),
                               places=self.decimal_places)
        self.assertAlmostEqual(model_spars.get_lip_max(), model.get_lip_max(),
                               places=self.decimal_places)
示例#4
0
    def test_ModelSmoothedHinge(self):
        """...Numerical consistency check of loss and gradient for SmoothedHinge
         model
        """
        np.random.seed(12)
        n_samples, n_features = 5000, 10
        w0 = np.random.randn(n_features)
        c0 = np.random.randn()

        # First check with intercept
        X, y = SimuLogReg(w0, c0, n_samples=n_samples,
                          verbose=False).simulate()
        X_spars = csr_matrix(X)
        model = ModelSmoothedHinge(fit_intercept=True,
                                   smoothness=0.2).fit(X, y)
        model_spars = ModelSmoothedHinge(fit_intercept=True,
                                         smoothness=0.2).fit(X_spars, y)
        self.run_test_for_glm(model, model_spars, 1e-5, 1e-4)
        self._test_glm_intercept_vs_hardcoded_intercept(model)

        # Then check without intercept
        X, y = SimuLogReg(w0,
                          None,
                          n_samples=n_samples,
                          verbose=False,
                          seed=2038).simulate()
        X_spars = csr_matrix(X)
        model = ModelSmoothedHinge(fit_intercept=False).fit(X, y)

        model_spars = ModelSmoothedHinge(fit_intercept=False).fit(X_spars, y)
        self.run_test_for_glm(model, model_spars, 1e-5, 1e-4)

        model = ModelSmoothedHinge(fit_intercept=False,
                                   smoothness=0.2).fit(X, y)
        model_spars = ModelSmoothedHinge(fit_intercept=False,
                                         smoothness=0.2).fit(X_spars, y)
        # Test for the Lipschitz constants without intercept
        self.assertAlmostEqual(model.get_lip_best(), 5 * 2.6873683857125981)
        self.assertAlmostEqual(model.get_lip_mean(), 5 * 9.95845726788432)
        self.assertAlmostEqual(model.get_lip_max(), 5 * 54.82616964855237)
        self.assertAlmostEqual(model_spars.get_lip_mean(),
                               model.get_lip_mean())
        self.assertAlmostEqual(model_spars.get_lip_max(), model.get_lip_max())

        # Test for the Lipschitz constants with intercept
        model = ModelSmoothedHinge(fit_intercept=True,
                                   smoothness=0.2).fit(X, y)
        model_spars = ModelSmoothedHinge(fit_intercept=True,
                                         smoothness=0.2).fit(X_spars, y)
        self.assertAlmostEqual(model.get_lip_best(), 5 * 2.687568385712598)
        self.assertAlmostEqual(model.get_lip_mean(), 5 * 10.958457267884327)
        self.assertAlmostEqual(model.get_lip_max(), 5 * 55.82616964855237)
        self.assertAlmostEqual(model_spars.get_lip_mean(),
                               model.get_lip_mean())
        self.assertAlmostEqual(model_spars.get_lip_max(), model.get_lip_max())
示例#5
0
 def generate_logistic_data(n_features, n_samples, use_intercept=False):
     """ Function to generate labels features y and X that corresponds
     to w, c
     """
     if n_features <= 5:
         raise ValueError("``n_features`` must be larger than 5")
     np.random.seed(12)
     coeffs0 = weights_sparse_gauss(n_features, nnz=5)
     if use_intercept:
         interc0 = 2.
     else:
         interc0 = None
     simu = SimuLogReg(coeffs0, interc0, n_samples=n_samples, verbose=False)
     X, y = simu.simulate()
     return y, X, coeffs0, interc0
示例#6
0
    def test_step_type_setting(self):
        """...Test that SVRG step_type parameter behaves correctly
        """
        svrg = SVRG()

        coeffs0 = weights_sparse_gauss(20, nnz=5, dtype=self.dtype)
        interc0 = None

        X, y = SimuLogReg(coeffs0,
                          interc0,
                          n_samples=3000,
                          verbose=False,
                          seed=123,
                          dtype=self.dtype).simulate()

        model = ModelLogReg().fit(X, y)
        svrg.set_model(model)
        self.assertEqual(svrg.step_type, 'fixed')
        self.assertEqual(svrg._solver.get_step_type(), SVRG_StepType_Fixed)

        svrg = SVRG(step_type='bb')
        svrg.set_model(model)
        self.assertEqual(svrg.step_type, 'bb')
        self.assertEqual(svrg._solver.get_step_type(),
                         SVRG_StepType_BarzilaiBorwein)

        svrg.step_type = 'fixed'
        self.assertEqual(svrg.step_type, 'fixed')
        self.assertEqual(svrg._solver.get_step_type(), SVRG_StepType_Fixed)

        svrg.step_type = 'bb'
        self.assertEqual(svrg.step_type, 'bb')
        self.assertEqual(svrg._solver.get_step_type(),
                         SVRG_StepType_BarzilaiBorwein)
示例#7
0
    def test_ModelSmoothedHinge_smoothness(self):
        np.random.seed(12)
        n_samples, n_features = 50, 2
        w0 = np.random.randn(n_features)
        c0 = np.random.randn()
        # First check with intercept
        X, y = SimuLogReg(w0, c0, n_samples=n_samples,
                          verbose=False).simulate()

        model = ModelSmoothedHinge(smoothness=0.123).fit(X, y)
        self.assertEqual(model._model.get_smoothness(), 0.123)
        model.smoothness = 0.765
        self.assertEqual(model._model.get_smoothness(), 0.765)

        msg = '^smoothness should be between 0.01 and 1$'
        with self.assertRaisesRegex(RuntimeError, msg):
            model = ModelSmoothedHinge(smoothness=-1).fit(X, y)
        with self.assertRaisesRegex(RuntimeError, msg):
            model = ModelSmoothedHinge(smoothness=1.2).fit(X, y)
        with self.assertRaisesRegex(RuntimeError, msg):
            model = ModelSmoothedHinge(smoothness=0.).fit(X, y)

        with self.assertRaisesRegex(RuntimeError, msg):
            model.smoothness = 0.
        with self.assertRaisesRegex(RuntimeError, msg):
            model.smoothness = -1.
        with self.assertRaisesRegex(RuntimeError, msg):
            model.smoothness = 2.
示例#8
0
 def get_train_data(n_features=20, n_samples=3000, nnz=5):
     np.random.seed(12)
     weights0 = weights_sparse_gauss(n_features, nnz=nnz)
     interc0 = 0.1
     features, y = SimuLogReg(weights0,
                              interc0,
                              n_samples=n_samples,
                              verbose=False).simulate()
     return features, y
示例#9
0
    def test_asaga_solver(self):
        """...Check ASAGA solver for a Logistic Regression with Elastic net
        penalization
        """
        seed = 1398
        np.random.seed(seed)
        n_samples = 4000
        n_features = 30
        weights = weights_sparse_gauss(n_features, nnz=3).astype(self.dtype)
        intercept = 0.2
        penalty_strength = 1e-3
        sparsity = 1e-4
        features = sparse.rand(n_samples, n_features, density=sparsity,
                               format='csr', random_state=8).astype(self.dtype)

        simulator = SimuLogReg(weights, n_samples=n_samples, features=features,
                               verbose=False, intercept=intercept,
                               dtype=self.dtype)
        features, labels = simulator.simulate()

        model = ModelLogReg(fit_intercept=True)
        model.fit(features, labels)
        prox = ProxElasticNet(penalty_strength, ratio=0.1, range=(0,
                                                                  n_features))
        solver_step = 1. / model.get_lip_max()
        saga = SAGA(step=solver_step, max_iter=100, tol=1e-10, verbose=False,
                    n_threads=1, record_every=10, seed=seed)
        saga.set_model(model).set_prox(prox)
        saga.solve()

        asaga = SAGA(step=solver_step, max_iter=100, tol=1e-10, verbose=False,
                     n_threads=2, record_every=10, seed=seed)
        asaga.set_model(model).set_prox(prox)
        asaga.solve()

        np.testing.assert_array_almost_equal(saga.solution, asaga.solution,
                                             decimal=4)
        self.assertGreater(np.linalg.norm(saga.solution[:-1]), 0)
示例#10
0
    def test_variance_reduction_setting(self):
        """...SolverTest SAGA variance_reduction parameter is correctly set"""
        svrg = SAGA()

        coeffs0 = weights_sparse_gauss(20, nnz=5, dtype=self.dtype)
        interc0 = None

        X, y = SimuLogReg(coeffs0,
                          interc0,
                          n_samples=3000,
                          verbose=False,
                          seed=123,
                          dtype=self.dtype).simulate()

        model = ModelLogReg().fit(X, y)
        svrg.set_model(model)
        svrg.astype(self.dtype)
        self.assertEqual(svrg.variance_reduction, 'last')
        self.assertEqual(svrg._solver.get_variance_reduction(),
                         SAGA_VarianceReductionMethod_Last)

        svrg = SAGA(variance_reduction='rand')
        svrg.set_model(model)
        svrg.astype(self.dtype)
        self.assertEqual(svrg.variance_reduction, 'rand')
        self.assertEqual(svrg._solver.get_variance_reduction(),
                         SAGA_VarianceReductionMethod_Random)

        svrg.variance_reduction = 'avg'
        self.assertEqual(svrg.variance_reduction, 'avg')
        self.assertEqual(svrg._solver.get_variance_reduction(),
                         SAGA_VarianceReductionMethod_Average)

        svrg.variance_reduction = 'rand'
        self.assertEqual(svrg.variance_reduction, 'rand')
        self.assertEqual(svrg._solver.get_variance_reduction(),
                         SAGA_VarianceReductionMethod_Random)

        svrg.variance_reduction = 'last'
        self.assertEqual(svrg.variance_reduction, 'last')
        self.assertEqual(svrg._solver.get_variance_reduction(),
                         SAGA_VarianceReductionMethod_Last)

        with self.assertRaises(ValueError):
            svrg.variance_reduction = 'wrong_name'
示例#11
0
 def test_solver_bfgs(self):
     """...Check BFGS solver for Logistic Regression with Ridge
     penalization
     """
     # It is the reference solver used in other unittests so we check that
     # it's actually close to the true parameter of the simulated dataset
     np.random.seed(12)
     n_samples = 3000
     n_features = 10
     coeffs0 = weights_sparse_gauss(n_features, nnz=5)
     interc0 = 2.
     X, y = SimuLogReg(coeffs0, interc0, n_samples=n_samples,
                       verbose=False).simulate()
     model = ModelLogReg(fit_intercept=True).fit(X, y)
     prox = ProxL2Sq(strength=1e-6)
     solver = BFGS(max_iter=100, print_every=1, verbose=False,
                   tol=1e-6).set_model(model).set_prox(prox)
     coeffs = solver.solve()
     err = Test.evaluate_model(coeffs, coeffs0, interc0)
     self.assertAlmostEqual(err, 0., delta=5e-1)
示例#12
0
def create_model(model_type, n_samples, n_features, with_intercept=True):
    weights = np.random.randn(n_features)
    intercept = None
    if with_intercept:
        intercept = np.random.normal()

    if model_type == 'Poisson':
        # we need to rescale features to avoid overflows
        weights /= n_features
        if intercept is not None:
            intercept /= n_features

    if model_type == 'Linear':
        simulator = SimuLinReg(weights,
                               intercept=intercept,
                               n_samples=n_samples,
                               verbose=False)
    elif model_type == 'Logistic':
        simulator = SimuLogReg(weights,
                               intercept=intercept,
                               n_samples=n_samples,
                               verbose=False)
    elif model_type == 'Poisson':
        simulator = SimuPoisReg(weights,
                                intercept=intercept,
                                n_samples=n_samples,
                                verbose=False)

    labels, features = simulator.simulate()

    if model_type == 'Linear':
        model = ModelLinReg(fit_intercept=with_intercept)
    elif model_type == 'Logistic':
        model = ModelLogReg(fit_intercept=with_intercept)
    elif model_type == 'Poisson':
        model = ModelPoisReg(fit_intercept=with_intercept)

    model.fit(labels, features)
    return model
示例#13
0
from tick.solver import SVRG, SAGA
from tick.prox import ProxElasticNet

seed = 1398
np.random.seed(seed)

n_samples = 40000
n_features = 20000
sparsity = 1e-4
penalty_strength = 1e-5

weights = weights_sparse_gauss(n_features, nnz=1000)
intercept = 0.2
features = sparse.rand(n_samples, n_features, density=sparsity, format='csr')

simulator = SimuLogReg(weights, n_samples=n_samples, features=features,
                       verbose=False, intercept=intercept)
features, labels = simulator.simulate()

model = ModelLogReg(fit_intercept=True)
model.fit(features, labels)
prox = ProxElasticNet(penalty_strength, ratio=0.5, range=(0, n_features))
svrg_step = 1. / model.get_lip_max()

test_n_threads = [1, 2, 4]

fig, axes = plt.subplots(1, 2, figsize=(8, 4))

for ax, SolverClass in zip(axes, [SVRG, SAGA]):
    solver_list = []
    solver_labels = []
示例#14
0
"""

import numpy as np
import matplotlib.pyplot as plt
from cycler import cycler

from tick.simulation import weights_sparse_gauss
from tick.solver import SVRG
from tick.linear_model import SimuLogReg, ModelLogReg
from tick.prox import ProxElasticNet
from tick.plot import plot_history

n_samples, n_features, = 5000, 50
weights0 = weights_sparse_gauss(n_features, nnz=10)
intercept0 = 0.2
X, y = SimuLogReg(weights=weights0, intercept=intercept0,
                  n_samples=n_samples, seed=123, verbose=False).simulate()

model = ModelLogReg(fit_intercept=True).fit(X, y)
prox = ProxElasticNet(strength=1e-3, ratio=0.5, range=(0, n_features))
x0 = np.zeros(model.n_coeffs)

optimal_step = 1 / model.get_lip_max()
tested_steps = [optimal_step, 1e-2 * optimal_step, 10 * optimal_step]

solvers = []
solver_labels = []

for step in tested_steps:
    svrg = SVRG(max_iter=30, tol=1e-10, verbose=False)
    svrg.set_model(model).set_prox(prox)
    svrg.solve(step=step)
    def test_ModelQuadraticHinge(self):
        """...Numerical consistency check of loss and gradient for Quadratic
        Hinge model
        """
        np.random.seed(12)
        n_samples, n_features = 5000, 10
        w0 = np.random.randn(n_features)
        c0 = np.random.randn()

        # First check with intercept
        X, y = SimuLogReg(w0,
                          c0,
                          n_samples=n_samples,
                          verbose=False,
                          dtype=self.dtype).simulate()
        X_spars = csr_matrix(X, dtype=self.dtype)
        model = ModelQuadraticHinge(fit_intercept=True).fit(X, y)
        model_spars = ModelQuadraticHinge(fit_intercept=True, ).fit(X_spars, y)
        self.run_test_for_glm(model, model_spars)
        self._test_glm_intercept_vs_hardcoded_intercept(model)

        # Then check without intercept
        X, y = SimuLogReg(w0,
                          None,
                          n_samples=n_samples,
                          verbose=False,
                          seed=2038,
                          dtype=self.dtype).simulate()
        X_spars = csr_matrix(X, dtype=self.dtype)
        model = ModelQuadraticHinge(fit_intercept=False).fit(X, y)

        model_spars = ModelQuadraticHinge(fit_intercept=False).fit(X_spars, y)
        self.run_test_for_glm(model, model_spars)

        # Test for the Lipschitz constants without intercept
        self.assertAlmostEqual(model.get_lip_best(),
                               2.6873683857125981,
                               places=self.decimal_places)
        self.assertAlmostEqual(model.get_lip_mean(),
                               9.95845726788432,
                               places=self.decimal_places)
        self.assertAlmostEqual(model.get_lip_max(),
                               54.82616964855237,
                               places=self.decimal_places)
        self.assertAlmostEqual(model_spars.get_lip_mean(),
                               model.get_lip_mean())
        self.assertAlmostEqual(model_spars.get_lip_max(), model.get_lip_max())

        # Test for the Lipschitz constants with intercept
        model = ModelQuadraticHinge(fit_intercept=True).fit(X, y)
        model_spars = ModelQuadraticHinge(fit_intercept=True).fit(X_spars, y)
        self.assertAlmostEqual(model.get_lip_best(),
                               2.687568385712598,
                               places=self.decimal_places)
        self.assertAlmostEqual(model.get_lip_mean(),
                               10.958457267884327,
                               places=self.decimal_places)
        self.assertAlmostEqual(model.get_lip_max(),
                               55.82616964855237,
                               places=self.decimal_places)
        self.assertAlmostEqual(model_spars.get_lip_mean(),
                               model.get_lip_mean(),
                               places=self.decimal_places)
        self.assertAlmostEqual(model_spars.get_lip_max(),
                               model.get_lip_max(),
                               places=self.decimal_places)
n_features = 30
n_classes = 2

nnz = 5
w0 = np.zeros(n_features)
w0[:nnz] = 1

# TODO: Seed

n_trees = 50

# w0 = weights_sparse_exp(n_features, nnz=nnz)

X, y = SimuLogReg(weights=w0,
                  intercept=None,
                  n_samples=n_samples,
                  cov_corr=0.1,
                  features_scaling='standard',
                  seed=123).simulate()
y = (y + 1) / 2

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=123)

rf = RandomForestClassifier(n_estimators=n_trees,
                            criterion="entropy",
                            random_state=123)
rf.fit(X_train, y_train)
feature_importances = rf.feature_importances_

of1 = OnlineForestClassifier(n_classes=n_classes,
                             n_trees=n_trees,
                             seed=123,
示例#17
0
    def test_variance_reduction_setting(self):
        """...Test that SVRG variance_reduction parameter behaves correctly
        """
        svrg = SVRG()

        coeffs0 = weights_sparse_gauss(20, nnz=5, dtype=self.dtype)
        interc0 = None

        X, y = SimuLogReg(coeffs0,
                          interc0,
                          n_samples=3000,
                          verbose=False,
                          seed=123,
                          dtype=self.dtype).simulate()

        model = ModelLogReg().fit(X, y)
        svrg.set_model(model)

        self.assertEqual(svrg.variance_reduction, 'last')
        self.assertEqual(svrg._solver.get_variance_reduction(),
                         SVRG_VarianceReductionMethod_Last)

        svrg = SVRG(variance_reduction='rand')
        svrg.set_model(model)
        self.assertEqual(svrg.variance_reduction, 'rand')
        self.assertEqual(svrg._solver.get_variance_reduction(),
                         SVRG_VarianceReductionMethod_Random)

        svrg.variance_reduction = 'avg'
        self.assertEqual(svrg.variance_reduction, 'avg')
        self.assertEqual(svrg._solver.get_variance_reduction(),
                         SVRG_VarianceReductionMethod_Average)

        svrg.variance_reduction = 'rand'
        self.assertEqual(svrg.variance_reduction, 'rand')
        self.assertEqual(svrg._solver.get_variance_reduction(),
                         SVRG_VarianceReductionMethod_Random)

        svrg.variance_reduction = 'last'
        self.assertEqual(svrg.variance_reduction, 'last')
        self.assertEqual(svrg._solver.get_variance_reduction(),
                         SVRG_VarianceReductionMethod_Last)

        msg = '^variance_reduction should be one of "avg, last, rand", ' \
              'got "stuff"$'
        with self.assertRaisesRegex(ValueError, msg):
            svrg = SVRG(variance_reduction='stuff')
            svrg.set_model(model)
        with self.assertRaisesRegex(ValueError, msg):
            svrg.variance_reduction = 'stuff'

        X, y = self.simu_linreg_data(dtype=self.dtype)
        model_dense, model_spars = self.get_dense_and_sparse_linreg_model(
            X, y, dtype=self.dtype)
        try:
            svrg.set_model(model_dense)
            svrg.variance_reduction = 'avg'
            svrg.variance_reduction = 'last'
            svrg.variance_reduction = 'rand'
            svrg.set_model(model_spars)
            svrg.variance_reduction = 'last'
            svrg.variance_reduction = 'rand'
        except Exception:
            self.fail('Setting variance_reduction in these cases should have '
                      'been ok')

        msg = "'avg' variance reduction cannot be used with sparse datasets"
        with catch_warnings(record=True) as w:
            simplefilter('always')
            svrg.set_model(model_spars)
            svrg.variance_reduction = 'avg'
            self.assertEqual(len(w), 1)
            self.assertTrue(issubclass(w[0].category, UserWarning))
            self.assertEqual(str(w[0].message), msg)
示例#18
0
    def check_solver(self,
                     solver,
                     fit_intercept=True,
                     model='logreg',
                     decimal=1):
        """Check solver instance finds same parameters as scipy BFGS

        Parameters
        ----------
        solver : `Solver`
            Instance of a solver to be tested

        fit_intercept : `bool`, default=True
            Model uses intercept is `True`

        model : 'linreg' | 'logreg' | 'poisreg', default='logreg'
            Name of the model used to test the solver

        decimal : `int`, default=1
            Number of decimals required for the test
        """
        # Set seed for data simulation
        np.random.seed(12)
        n_samples = TestSolver.n_samples
        n_features = TestSolver.n_features

        coeffs0 = weights_sparse_gauss(n_features, nnz=5)
        if fit_intercept:
            interc0 = 2.
        else:
            interc0 = None

        if model == 'linreg':
            X, y = SimuLinReg(coeffs0,
                              interc0,
                              n_samples=n_samples,
                              verbose=False,
                              seed=123).simulate()
            model = ModelLinReg(fit_intercept=fit_intercept).fit(X, y)
        elif model == 'logreg':
            X, y = SimuLogReg(coeffs0,
                              interc0,
                              n_samples=n_samples,
                              verbose=False,
                              seed=123).simulate()
            model = ModelLogReg(fit_intercept=fit_intercept).fit(X, y)
        elif model == 'poisreg':
            X, y = SimuPoisReg(coeffs0,
                               interc0,
                               n_samples=n_samples,
                               verbose=False,
                               seed=123).simulate()
            # Rescale features to avoid overflows in Poisson simulations
            X /= np.linalg.norm(X, axis=1).reshape(n_samples, 1)
            model = ModelPoisReg(fit_intercept=fit_intercept).fit(X, y)
        else:
            raise ValueError("``model`` must be either 'linreg', 'logreg' or"
                             " 'poisreg'")

        solver.set_model(model)

        strength = 1e-2
        prox = ProxL2Sq(strength, (0, model.n_features))

        if type(solver) is not SDCA:
            solver.set_prox(prox)
        else:
            solver.set_prox(ProxZero())
            solver.l_l2sq = strength

        coeffs_solver = solver.solve()
        # Compare with BFGS
        bfgs = BFGS(max_iter=100,
                    verbose=False).set_model(model).set_prox(prox)
        coeffs_bfgs = bfgs.solve()
        np.testing.assert_almost_equal(coeffs_solver,
                                       coeffs_bfgs,
                                       decimal=decimal)

        # We ensure that reached coeffs are not equal to zero
        self.assertGreater(norm(coeffs_solver), 0)

        self.assertAlmostEqual(solver.objective(coeffs_bfgs),
                               solver.objective(coeffs_solver),
                               delta=1e-2)
示例#19
0
n_samples, n_features = 150, 2

weights0 = np.array([0.3, 1.2])
intercept0 = 0.5

simu_linreg = SimuLinReg(weights0,
                         intercept0,
                         n_samples=n_samples,
                         seed=123,
                         verbose=False)
X_linreg, y_linreg = simu_linreg.simulate()

simu_logreg = SimuLogReg(weights0,
                         intercept0,
                         n_samples=n_samples,
                         seed=123,
                         verbose=False)
X_logreg, y_logreg = simu_logreg.simulate()

simu_poisreg = SimuPoisReg(weights0,
                           intercept0,
                           n_samples=n_samples,
                           link='exponential',
                           seed=123,
                           verbose=False)
X_poisreg, y_poisreg = simu_poisreg.simulate()

plt.figure(figsize=(12, 3))

plt.subplot(1, 3, 1)
示例#20
0
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from sklearn.datasets import make_moons, make_classification, make_circles
from sklearn.metrics import roc_auc_score
import matplotlib.pyplot as plt

from time import time


n_samples = 1000
n_features = 2
seed = 123

np.set_printoptions(precision=2)

w0 = weights_sparse_gauss(n_features, nnz=2)
X, y = SimuLogReg(w0, -1., n_samples=n_samples, seed=seed).simulate()
y = (y + 1) / 2


def plot_decisions_regression(clfs, datasets, names):
    i = 1
    h = .02
    fig = plt.figure(figsize=(4 * (len(clfs) + 1), 4 * len(datasets)))
    # iterate over datasets
    for ds_cnt, ds in enumerate(datasets):
        X, y = ds
        X_train, X_test, y_train, y_test = \
            train_test_split(X, y, test_size=.4, random_state=42)

        x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
        y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5