示例#1
0
    def get_train_data(fit_intercept=True):
        np.random.seed(12)
        n_samples = Test.n_samples
        n_features = Test.n_features
        noise_level = Test.noise_level
        nnz_outliers = Test.nnz_outliers
        outliers_intensity = Test.outliers_intensity
        if fit_intercept:
            interc0 = Test.interc0
        else:
            interc0 = 0.
        weights0 = np.sqrt(2 *
                           np.log(np.linspace(1, 10, n_features) * n_features))
        sample_intercepts0 = weights_sparse_gauss(n_weights=n_samples,
                                                  nnz=nnz_outliers)
        sample_intercepts0[sample_intercepts0 != 0] \
            = outliers_intensity \
              * np.sqrt(2 * np.log(np.linspace(1, 10, nnz_outliers)
                                   * n_samples)) \
              * np.sign(sample_intercepts0[sample_intercepts0 != 0])

        X = features_normal_cov_toeplitz(n_samples, n_features, 0.5)
        y = X.dot(
            weights0) + noise_level * np.random.randn(n_samples) + interc0
        y += sample_intercepts0
        return X, y, weights0, interc0, sample_intercepts0
示例#2
0
    def test_ModelLinRegWithInterceptsWithoutGlobalIntercept(self):
        """...Numerical consistency check of loss and gradient for linear
        regression with sample intercepts and no global intercept
        """
        np.random.seed(12)
        n_samples, n_features = 200, 5
        w0 = np.random.randn(n_features)
        intercept0 = 50 * weights_sparse_gauss(n_weights=n_samples, nnz=30)
        c0 = None
        X, y = SimuLinReg(w0,
                          c0,
                          n_samples=n_samples,
                          verbose=False,
                          seed=2038).simulate()
        # Add gross outliers to the labels
        y += intercept0
        X_spars = csr_matrix(X)
        model = ModelLinRegWithIntercepts(fit_intercept=True).fit(X, y)
        model_spars = ModelLinRegWithIntercepts(fit_intercept=True) \
            .fit(X_spars, y)
        self.run_test_for_glm(model, model_spars, 1e-4, 1e-4)

        self.assertAlmostEqual(model.get_lip_mean(), 7.324960325598536)
        self.assertAlmostEqual(model.get_lip_max(), 31.277118951892113)
        self.assertAlmostEqual(model.get_lip_mean(),
                               model_spars.get_lip_mean())
        self.assertAlmostEqual(model.get_lip_max(), model_spars.get_lip_max())
        self.assertAlmostEqual(model.get_lip_best(), 2.7267793249045438)
示例#3
0
 def get_train_data(n_features=20, n_samples=3000, nnz=5):
     np.random.seed(12)
     weights0 = weights_sparse_gauss(n_features, nnz=nnz)
     interc0 = 0.1
     features, y = SimuLogReg(weights0, interc0, n_samples=n_samples,
                              verbose=False).simulate()
     return features, y
示例#4
0
    def test_step_type_setting(self):
        """...Test that SVRG step_type parameter behaves correctly
        """
        svrg = SVRG()

        coeffs0 = weights_sparse_gauss(20, nnz=5, dtype=self.dtype)
        interc0 = None

        X, y = SimuLogReg(coeffs0,
                          interc0,
                          n_samples=3000,
                          verbose=False,
                          seed=123,
                          dtype=self.dtype).simulate()

        model = ModelLogReg().fit(X, y)
        svrg.set_model(model)
        self.assertEqual(svrg.step_type, 'fixed')
        self.assertEqual(svrg._solver.get_step_type(), SVRG_StepType_Fixed)

        svrg = SVRG(step_type='bb')
        svrg.set_model(model)
        self.assertEqual(svrg.step_type, 'bb')
        self.assertEqual(svrg._solver.get_step_type(),
                         SVRG_StepType_BarzilaiBorwein)

        svrg.step_type = 'fixed'
        self.assertEqual(svrg.step_type, 'fixed')
        self.assertEqual(svrg._solver.get_step_type(), SVRG_StepType_Fixed)

        svrg.step_type = 'bb'
        self.assertEqual(svrg.step_type, 'bb')
        self.assertEqual(svrg._solver.get_step_type(),
                         SVRG_StepType_BarzilaiBorwein)
示例#5
0
    def test_linear_model_serialization(self):
        """...Test serialization of linear models
        """
        model_map = {
            ModelLinReg: SimuLinReg,
            ModelLogReg: SimuLogReg,
            ModelPoisReg: SimuPoisReg,
            ModelHinge: SimuLogReg,
            ModelQuadraticHinge: SimuLogReg,
            ModelSmoothedHinge: SimuLogReg,
        }

        for mod in model_map:
            model = mod(fit_intercept=False)

            coeffs0 = weights_sparse_gauss(20, nnz=5)
            interc0 = None

            features, labels = model_map[mod](coeffs0, interc0, n_samples=100,
                                              verbose=False,
                                              seed=123).simulate()
            model.fit(features, labels)

            pickled = pickle.loads(pickle.dumps(model))

            self.assertTrue(model._model.compare(pickled._model))
            self.assertEqual(
                model.loss(features[0]), pickled.loss(features[0]))
 def get_train_data(n_samples=2000, n_features=20, fit_intercept=True):
     np.random.seed(12)
     weights0 = weights_sparse_gauss(n_features)
     if fit_intercept:
         intercept0 = -1.
     else:
         intercept0 = None
     X, y = SimuLinReg(weights0, intercept0, n_samples=n_samples,
                       verbose=False).simulate()
     return X, y, weights0, intercept0
示例#7
0
    def test_set_model(self):
        """...Test set_model of saga, should only accept childs of
        ModelGeneralizedLinear"""
        # We try to pass a ModelCoxRegPartialLik which is not a generalized
        # linear model to SAGA to check that the error is raised
        msg = '^SAGA accepts only childs of `ModelGeneralizedLinear`$'
        with self.assertRaisesRegex(ValueError, msg):
            w = weights_sparse_gauss(n_weights=2, nnz=0)
            X, T, C = SimuCoxReg(w).simulate()
            model = ModelCoxRegPartialLik().fit(X, T, C)
            SAGA().set_model(model)

        msg = '^SAGA accepts only childs of `ModelGeneralizedLinear`$'
        with self.assertRaisesRegex(RuntimeError, msg):
            w = weights_sparse_gauss(n_weights=2, nnz=0)
            X, T, C = SimuCoxReg(w).simulate()
            model = ModelCoxRegPartialLik().fit(X, T, C)
            saga = SAGA()
            saga._solver.set_model(model._model)
示例#8
0
 def generate_logistic_data(n_features, n_samples, use_intercept=False):
     """ Function to generate labels features y and X that corresponds
     to w, c
     """
     if n_features <= 5:
         raise ValueError("``n_features`` must be larger than 5")
     np.random.seed(12)
     coeffs0 = weights_sparse_gauss(n_features, nnz=5)
     if use_intercept:
         interc0 = 2.
     else:
         interc0 = None
     simu = SimuLogReg(coeffs0, interc0, n_samples=n_samples, verbose=False)
     X, y = simu.simulate()
     return y, X, coeffs0, interc0
示例#9
0
    def test_variance_reduction_setting(self):
        """...SolverTest SAGA variance_reduction parameter is correctly set"""
        svrg = SAGA()

        coeffs0 = weights_sparse_gauss(20, nnz=5, dtype=self.dtype)
        interc0 = None

        X, y = SimuLogReg(coeffs0,
                          interc0,
                          n_samples=3000,
                          verbose=False,
                          seed=123,
                          dtype=self.dtype).simulate()

        model = ModelLogReg().fit(X, y)
        svrg.set_model(model)
        svrg.astype(self.dtype)
        self.assertEqual(svrg.variance_reduction, 'last')
        self.assertEqual(svrg._solver.get_variance_reduction(),
                         SAGA_VarianceReductionMethod_Last)

        svrg = SAGA(variance_reduction='rand')
        svrg.set_model(model)
        svrg.astype(self.dtype)
        self.assertEqual(svrg.variance_reduction, 'rand')
        self.assertEqual(svrg._solver.get_variance_reduction(),
                         SAGA_VarianceReductionMethod_Random)

        svrg.variance_reduction = 'avg'
        self.assertEqual(svrg.variance_reduction, 'avg')
        self.assertEqual(svrg._solver.get_variance_reduction(),
                         SAGA_VarianceReductionMethod_Average)

        svrg.variance_reduction = 'rand'
        self.assertEqual(svrg.variance_reduction, 'rand')
        self.assertEqual(svrg._solver.get_variance_reduction(),
                         SAGA_VarianceReductionMethod_Random)

        svrg.variance_reduction = 'last'
        self.assertEqual(svrg.variance_reduction, 'last')
        self.assertEqual(svrg._solver.get_variance_reduction(),
                         SAGA_VarianceReductionMethod_Last)

        with self.assertRaises(ValueError):
            svrg.variance_reduction = 'wrong_name'
示例#10
0
 def test_ModelLinRegWithInterceptsWithGlobalInterceptExtras(self):
     """...Extra tests for linear regression with sample intercepts and
     global intercept, check gradient wrt homemade gradient
     """
     np.random.seed(12)
     n_samples, n_features = 200, 5
     w0 = np.random.randn(n_features)
     intercept0 = 50 * weights_sparse_gauss(n_weights=n_samples, nnz=30)
     c0 = -1.
     X, y = SimuLinReg(w0, c0, n_samples=n_samples, verbose=False,
                       seed=2038).simulate()
     # Add gross outliers to the labels
     y += intercept0
     model = ModelLinRegWithIntercepts(fit_intercept=True).fit(X, y)
     coeffs = np.random.randn(model.n_coeffs)
     grad1 = model.grad(coeffs)
     X2 = np.hstack((X, np.ones((n_samples, 1)), np.identity(n_samples)))
     grad2 = X2.T.dot(X2.dot(coeffs) - y) / n_samples
     np.testing.assert_almost_equal(grad1, grad2, decimal=10)
示例#11
0
 def test_solver_bfgs(self):
     """...Check BFGS solver for Logistic Regression with Ridge
     penalization
     """
     # It is the reference solver used in other unittests so we check that
     # it's actually close to the true parameter of the simulated dataset
     np.random.seed(12)
     n_samples = 3000
     n_features = 10
     coeffs0 = weights_sparse_gauss(n_features, nnz=5)
     interc0 = 2.
     X, y = SimuLogReg(coeffs0, interc0, n_samples=n_samples,
                       verbose=False).simulate()
     model = ModelLogReg(fit_intercept=True).fit(X, y)
     prox = ProxL2Sq(strength=1e-6)
     solver = BFGS(max_iter=100, print_every=1, verbose=False,
                   tol=1e-6).set_model(model).set_prox(prox)
     coeffs = solver.solve()
     err = Test.evaluate_model(coeffs, coeffs0, interc0)
     self.assertAlmostEqual(err, 0., delta=5e-1)
示例#12
0
    def test_set_model_and_set_prox(self):
        np.random.seed(12)
        n_samples = TestSolver.n_samples
        n_features = TestSolver.n_features
        weights0 = weights_sparse_gauss(n_features, nnz=5)
        interc0 = 2.
        model = ModelLinReg()
        msg = '^Passed object ModelLinReg has not been fitted. You must call' \
              ' ``fit`` on it before passing it to ``set_model``$'
        with self.assertRaisesRegex(ValueError, msg):
            for solver_class in self.solvers:
                if solver_class is SDCA:
                    solver = solver_class(l_l2sq=1e-1)
                else:
                    solver = solver_class()
                solver.set_model(model)

        X, y = SimuLinReg(weights0,
                          interc0,
                          n_samples=n_samples,
                          verbose=False,
                          seed=123,
                          dtype=self.dtype).simulate()
        prox = ProxL2Sq(strength=1e-1)
        msg = '^Passed object of class ProxL2Sq is not a Model class$'
        with self.assertRaisesRegex(ValueError, msg):
            for solver_class in self.solvers:
                if solver_class is SDCA:
                    solver = solver_class(l_l2sq=1e-1)
                else:
                    solver = solver_class()
                solver.set_model(prox)
        model.fit(X, y)
        msg = '^Passed object of class ModelLinReg is not a Prox class$'
        with self.assertRaisesRegex(ValueError, msg):
            for solver_class in self.solvers:
                if solver_class is SDCA:
                    solver = solver_class(l_l2sq=1e-1)
                else:
                    solver = solver_class()
                solver.set_model(model).set_prox(model)
示例#13
0
    def test_robust_model_serialization(self):
        """...Test serialization of robust models
        """
        model_map = {
            ModelAbsoluteRegression: SimuLinReg,
            ModelEpsilonInsensitive: SimuLinReg,
            ModelHuber: SimuLinReg,
            ModelLinRegWithIntercepts: SimuLinReg,
            ModelModifiedHuber: SimuLogReg
        }

        for mod in model_map:
            np.random.seed(12)
            n_samples, n_features = 100, 5
            w0 = np.random.randn(n_features)
            intercept0 = 50 * weights_sparse_gauss(n_weights=n_samples, nnz=30)
            c0 = None
            X, y = SimuLinReg(w0,
                              c0,
                              n_samples=n_samples,
                              verbose=False,
                              seed=2038).simulate()

            if mod == ModelLinRegWithIntercepts:
                y += intercept0

            model = mod(fit_intercept=False).fit(X, y)

            pickled = pickle.loads(pickle.dumps(model))

            self.assertTrue(model._model.compare(pickled._model))

            if mod == ModelLinRegWithIntercepts:
                test_vector = np.hstack((X[0], np.ones(n_samples)))
                self.assertEqual(model.loss(test_vector),
                                 pickled.loss(test_vector))
            else:
                self.assertEqual(model.loss(X[0]), pickled.loss(X[0]))
示例#14
0
    def test_asaga_solver(self):
        """...Check ASAGA solver for a Logistic Regression with Elastic net
        penalization
        """
        seed = 1398
        np.random.seed(seed)
        n_samples = 4000
        n_features = 30
        weights = weights_sparse_gauss(n_features, nnz=3).astype(self.dtype)
        intercept = 0.2
        penalty_strength = 1e-3
        sparsity = 1e-4
        features = sparse.rand(n_samples, n_features, density=sparsity,
                               format='csr', random_state=8).astype(self.dtype)

        simulator = SimuLogReg(weights, n_samples=n_samples, features=features,
                               verbose=False, intercept=intercept,
                               dtype=self.dtype)
        features, labels = simulator.simulate()

        model = ModelLogReg(fit_intercept=True)
        model.fit(features, labels)
        prox = ProxElasticNet(penalty_strength, ratio=0.1, range=(0,
                                                                  n_features))
        solver_step = 1. / model.get_lip_max()
        saga = SAGA(step=solver_step, max_iter=100, tol=1e-10, verbose=False,
                    n_threads=1, record_every=10, seed=seed)
        saga.set_model(model).set_prox(prox)
        saga.solve()

        asaga = SAGA(step=solver_step, max_iter=100, tol=1e-10, verbose=False,
                     n_threads=2, record_every=10, seed=seed)
        asaga.set_model(model).set_prox(prox)
        asaga.solve()

        np.testing.assert_array_almost_equal(saga.solution, asaga.solution,
                                             decimal=4)
        self.assertGreater(np.linalg.norm(saga.solution[:-1]), 0)
示例#15
0
.. _In Advances in Neural Information Processing Systems: http://papers.nips.cc/paper/6286-barzilai-borwein-step-size-for-stochastic-gradient-descent
"""

import numpy as np
import matplotlib.pyplot as plt
from cycler import cycler

from tick.simulation import SimuLogReg, weights_sparse_gauss
from tick.optim.solver import SVRG
from tick.optim.model import ModelLogReg
from tick.optim.prox import ProxElasticNet
from tick.plot import plot_history

n_samples, n_features, = 5000, 50
weights0 = weights_sparse_gauss(n_features, nnz=10)
intercept0 = 0.2
X, y = SimuLogReg(weights=weights0,
                  intercept=intercept0,
                  n_samples=n_samples,
                  seed=123,
                  verbose=False).simulate()

model = ModelLogReg(fit_intercept=True).fit(X, y)
prox = ProxElasticNet(strength=1e-3, ratio=0.5, range=(0, n_features))
x0 = np.zeros(model.n_coeffs)

optimal_step = 1 / model.get_lip_max()
tested_steps = [optimal_step, 1e-2 * optimal_step, 10 * optimal_step]

solvers = []
示例#16
0
 def get_train_data(n_features=10, n_samples=10000, nnz=3, seed=12):
     np.random.seed(seed)
     coeffs0 = weights_sparse_gauss(n_features, nnz=nnz)
     features, times, censoring = SimuCoxReg(coeffs0,
                                             verbose=False).simulate()
     return features, times, censoring
示例#17
0
    def test_serializing_solvers(self):
        """...Test serialization of solvers
        """
        ratio = 0.5
        l_enet = 1e-2
        sd = ratio * l_enet

        solvers = [
            AdaGrad(step=1e-3, max_iter=100, verbose=False, tol=0),
            SGD(step=1e-3, max_iter=100, verbose=False, tol=0),
            SDCA(l_l2sq=sd, max_iter=100, verbose=False, tol=0),
            SAGA(step=1e-3, max_iter=100, verbose=False, tol=0),
            SVRG(step=1e-3, max_iter=100, verbose=False, tol=0)
        ]
        model_map = {
            ModelLinReg: SimuLinReg,
            ModelLogReg: SimuLogReg,
            ModelPoisReg: SimuPoisReg,
            ModelHinge: SimuLogReg,
            ModelQuadraticHinge: SimuLogReg,
            ModelSmoothedHinge: SimuLogReg,
            ModelAbsoluteRegression: SimuLinReg,
            ModelEpsilonInsensitive: SimuLinReg,
            ModelHuber: SimuLinReg,
            ModelLinRegWithIntercepts: SimuLinReg,
            ModelModifiedHuber: SimuLogReg
        }

        for solver in solvers:
            for mod in model_map:

                np.random.seed(12)
                n_samples, n_features = 100, 5
                w0 = np.random.randn(n_features)
                intercept0 = 50 * weights_sparse_gauss(n_weights=n_samples,
                                                       nnz=30)
                c0 = None
                X, y = SimuLinReg(w0,
                                  c0,
                                  n_samples=n_samples,
                                  verbose=False,
                                  seed=2038).simulate()

                if mod == ModelLinRegWithIntercepts:
                    y += intercept0

                model = mod(fit_intercept=False).fit(X, y)

                prox = ProxL1(2.)
                solver.set_model(model)
                solver.set_prox(prox)

                pickled = pickle.loads(pickle.dumps(solver))

                self.assertTrue(solver._solver.compare(pickled._solver))

                self.assertTrue(
                    solver.model._model.compare(pickled.model._model))

                self.assertTrue(solver.prox._prox.compare(pickled.prox._prox))

                if mod == ModelLinRegWithIntercepts:
                    test_vector = np.hstack((X[0], np.ones(n_samples)))
                    self.assertEqual(model.loss(test_vector),
                                     solver.model.loss(test_vector))
                else:
                    self.assertEqual(model.loss(X[0]), solver.model.loss(X[0]))
示例#18
0
    def check_solver(self,
                     solver,
                     fit_intercept=True,
                     model='logreg',
                     decimal=1):
        """Check solver instance finds same parameters as scipy BFGS

        Parameters
        ----------
        solver : `Solver`
            Instance of a solver to be tested

        fit_intercept : `bool`, default=True
            Model uses intercept is `True`

        model : 'linreg' | 'logreg' | 'poisreg', default='logreg'
            Name of the model used to test the solver

        decimal : `int`, default=1
            Number of decimals required for the test
        """
        # Set seed for data simulation
        np.random.seed(12)
        n_samples = TestSolver.n_samples
        n_features = TestSolver.n_features

        coeffs0 = weights_sparse_gauss(n_features, nnz=5)
        if fit_intercept:
            interc0 = 2.
        else:
            interc0 = None

        if model == 'linreg':
            X, y = SimuLinReg(coeffs0,
                              interc0,
                              n_samples=n_samples,
                              verbose=False,
                              seed=123).simulate()
            model = ModelLinReg(fit_intercept=fit_intercept).fit(X, y)
        elif model == 'logreg':
            X, y = SimuLogReg(coeffs0,
                              interc0,
                              n_samples=n_samples,
                              verbose=False,
                              seed=123).simulate()
            model = ModelLogReg(fit_intercept=fit_intercept).fit(X, y)
        elif model == 'poisreg':
            X, y = SimuPoisReg(coeffs0,
                               interc0,
                               n_samples=n_samples,
                               verbose=False,
                               seed=123).simulate()
            # Rescale features to avoid overflows in Poisson simulations
            X /= np.linalg.norm(X, axis=1).reshape(n_samples, 1)
            model = ModelPoisReg(fit_intercept=fit_intercept).fit(X, y)
        else:
            raise ValueError("``model`` must be either 'linreg', 'logreg' or"
                             " 'poisreg'")

        solver.set_model(model)

        strength = 1e-2
        prox = ProxL2Sq(strength, (0, model.n_features))

        if type(solver) is not SDCA:
            solver.set_prox(prox)
        else:
            solver.set_prox(ProxZero())
            solver.l_l2sq = strength

        coeffs_solver = solver.solve()
        # Compare with BFGS
        bfgs = BFGS(max_iter=100,
                    verbose=False).set_model(model).set_prox(prox)
        coeffs_bfgs = bfgs.solve()
        np.testing.assert_almost_equal(coeffs_solver,
                                       coeffs_bfgs,
                                       decimal=decimal)

        # We ensure that reached coeffs are not equal to zero
        self.assertGreater(norm(coeffs_solver), 0)

        self.assertAlmostEqual(solver.objective(coeffs_bfgs),
                               solver.objective(coeffs_solver),
                               delta=1e-2)
示例#19
0
    def test_variance_reduction_setting(self):
        """...Test that SVRG variance_reduction parameter behaves correctly
        """
        svrg = SVRG()

        coeffs0 = weights_sparse_gauss(20, nnz=5, dtype=self.dtype)
        interc0 = None

        X, y = SimuLogReg(coeffs0,
                          interc0,
                          n_samples=3000,
                          verbose=False,
                          seed=123,
                          dtype=self.dtype).simulate()

        model = ModelLogReg().fit(X, y)
        svrg.set_model(model)

        self.assertEqual(svrg.variance_reduction, 'last')
        self.assertEqual(svrg._solver.get_variance_reduction(),
                         SVRG_VarianceReductionMethod_Last)

        svrg = SVRG(variance_reduction='rand')
        svrg.set_model(model)
        self.assertEqual(svrg.variance_reduction, 'rand')
        self.assertEqual(svrg._solver.get_variance_reduction(),
                         SVRG_VarianceReductionMethod_Random)

        svrg.variance_reduction = 'avg'
        self.assertEqual(svrg.variance_reduction, 'avg')
        self.assertEqual(svrg._solver.get_variance_reduction(),
                         SVRG_VarianceReductionMethod_Average)

        svrg.variance_reduction = 'rand'
        self.assertEqual(svrg.variance_reduction, 'rand')
        self.assertEqual(svrg._solver.get_variance_reduction(),
                         SVRG_VarianceReductionMethod_Random)

        svrg.variance_reduction = 'last'
        self.assertEqual(svrg.variance_reduction, 'last')
        self.assertEqual(svrg._solver.get_variance_reduction(),
                         SVRG_VarianceReductionMethod_Last)

        msg = '^variance_reduction should be one of "avg, last, rand", ' \
              'got "stuff"$'
        with self.assertRaisesRegex(ValueError, msg):
            svrg = SVRG(variance_reduction='stuff')
            svrg.set_model(model)
        with self.assertRaisesRegex(ValueError, msg):
            svrg.variance_reduction = 'stuff'

        X, y = self.simu_linreg_data(dtype=self.dtype)
        model_dense, model_spars = self.get_dense_and_sparse_linreg_model(
            X, y, dtype=self.dtype)
        try:
            svrg.set_model(model_dense)
            svrg.variance_reduction = 'avg'
            svrg.variance_reduction = 'last'
            svrg.variance_reduction = 'rand'
            svrg.set_model(model_spars)
            svrg.variance_reduction = 'last'
            svrg.variance_reduction = 'rand'
        except Exception:
            self.fail('Setting variance_reduction in these cases should have '
                      'been ok')

        msg = "'avg' variance reduction cannot be used with sparse datasets"
        with catch_warnings(record=True) as w:
            simplefilter('always')
            svrg.set_model(model_spars)
            svrg.variance_reduction = 'avg'
            self.assertEqual(len(w), 1)
            self.assertTrue(issubclass(w[0].category, UserWarning))
            self.assertEqual(str(w[0].message), msg)
示例#20
0
    def _test_solver_sparse_and_dense_consistency(
        self,
        create_solver,
        model_classes=list([ModelLinReg, ModelLogReg, ModelPoisReg]),
        proxs_classes=list([ProxL2Sq, ProxL1]),
        fit_intercepts=list([False, True])):
        """...Test that solvers can run all glm models and are consistent
        with sparsity
        """
        n_samples = 50
        n_features = 10
        coeffs0 = weights_sparse_gauss(n_features, nnz=5)
        interc0 = 2.
        seed = 123
        prox_strength = 1e-3

        model_simu_map = {
            ModelLinReg: SimuLinReg,
            ModelPoisReg: SimuPoisReg,
            ModelLogReg: SimuLogReg,
        }

        cases = itertools.product(model_classes, proxs_classes, fit_intercepts)

        for Model, Prox, fit_intercept in cases:

            if fit_intercept:
                interc = interc0
            else:
                interc = None

            Simu = model_simu_map[Model]
            simu = Simu(coeffs0,
                        interc,
                        n_samples=n_samples,
                        seed=seed,
                        verbose=False)
            X, y = simu.simulate()
            X_sparse = csr_matrix(X)

            for sparse in [True, False]:
                model = Model(fit_intercept=fit_intercept)

                if sparse:
                    model.fit(X_sparse, y)
                else:
                    model.fit(X, y)

                prox = Prox(prox_strength, (0, n_features))
                solver = create_solver()
                solver.set_model(model).set_prox(prox)

                if sparse:
                    iterate_sparse = solver.solve()
                else:
                    iterate_dense = solver.solve()

            error_msg = 'Failed for %s and %s solved with %s' % (
                model.name, prox.name, solver.name)

            if fit_intercept:
                error_msg += ' with intercept'
            else:
                error_msg += ' without intercept'

            self.assertEqual(np.isfinite(iterate_dense).all(), True, error_msg)
            np.testing.assert_almost_equal(iterate_dense,
                                           iterate_sparse,
                                           err_msg=error_msg)
示例#21
0
from tick.inference import RobustLinearRegression, std_iqr
from tick.metrics import support_fdp, support_recall

np.random.seed(1)

n_samples = 1000
n_features = 5
noise_level = 1.
n_outliers = 50
outliers_intensity = 5.

intercept0 = -3.
log_linspace = np.log(n_features * np.linspace(1, 10, n_features))
weights0 = np.sqrt(2 * log_linspace)

sample_intercepts0 = weights_sparse_gauss(n_weights=n_samples, nnz=n_outliers)
idx_nnz = sample_intercepts0 != 0
log_linspace = np.log(n_samples * np.linspace(1, 10, n_outliers))
sample_intercepts0[idx_nnz] = outliers_intensity * np.sqrt(2 * log_linspace) \
    * np.sign(sample_intercepts0[idx_nnz])

X = features_normal_cov_toeplitz(n_samples, n_features, 0.5)

y = X.dot(weights0) + noise_level * np.random.randn(n_samples) \
    + intercept0 + sample_intercepts0

target_fdr = 0.1
noise_level = std_iqr(y)
learner = RobustLinearRegression(C_sample_intercepts=2 * n_samples /
                                 noise_level,
                                 penalty='none',
示例#22
0
The plot given below compares the distance to the minimum of each solver along
iterations, on a logarithmic scale.
"""

import numpy as np
import matplotlib.pyplot as plt

from tick.simulation import SimuPoisReg, weights_sparse_gauss
from tick.inference import PoissonRegression
from tick.plot import plot_history


n_samples = 50000
n_features = 100
np.random.seed(123)
weight0 = weights_sparse_gauss(n_features, nnz=int(n_features-1)) / 20.
intercept0 = -0.1
X, y = SimuPoisReg(weight0, intercept0, n_samples=n_samples,
                   verbose=False, seed=123).simulate()

opts = {'verbose': False, 'record_every': 1, 'tol': 1e-8, 'max_iter': 40}

poisson_regressions = [
    PoissonRegression(solver='gd', **opts),
    PoissonRegression(solver='agd', **opts),
    PoissonRegression(solver='svrg', random_state=1234, **opts),
    PoissonRegression(solver='bfgs', **opts)
]

for poisson_regression in poisson_regressions:
    poisson_regression.fit(X, y)