示例#1
0
    def test_sdca_identity_poisreg(self):
        """...Test SDCA on specific case of Poisson regression with
        indentity link
        """
        l_l2sq = 1e-3
        n_samples = 10000
        n_features = 3

        np.random.seed(123)
        weight0 = np.random.rand(n_features)
        features = np.random.rand(n_samples, n_features)

        for intercept in [None, 0.45]:
            if intercept is None:
                fit_intercept = False
            else:
                fit_intercept = True

            simu = SimuPoisReg(weight0, intercept=intercept, features=features,
                               n_samples=n_samples, link='identity',
                               verbose=False)
            features, labels = simu.simulate()

            model = ModelPoisReg(fit_intercept=fit_intercept, link='identity')
            model.fit(features, labels)

            sdca = SDCA(l_l2sq=l_l2sq, max_iter=100, verbose=False, tol=1e-14,
                        seed=Test.sto_seed)

            sdca.set_model(model).set_prox(ProxZero())
            start_dual = np.sqrt(sdca._rand_max * l_l2sq)
            start_dual = start_dual * np.ones(sdca._rand_max)

            sdca.solve(start_dual)

            # Check that duality gap is 0
            self.assertAlmostEqual(
                sdca.objective(sdca.solution),
                sdca.dual_objective(sdca.dual_solution))

            # Check that original vector is approximatively retrieved
            if fit_intercept:
                original_coeffs = np.hstack((weight0, intercept))
            else:
                original_coeffs = weight0

            np.testing.assert_array_almost_equal(original_coeffs,
                                                 sdca.solution, decimal=1)

            # Ensure that we solve the same problem as other solvers
            svrg = SVRG(max_iter=100, verbose=False, tol=1e-14,
                        seed=Test.sto_seed)

            svrg.set_model(model).set_prox(ProxL2Sq(l_l2sq))
            svrg.solve(0.5 * np.ones(model.n_coeffs), step=1e-2)
            np.testing.assert_array_almost_equal(svrg.solution, sdca.solution,
                                                 decimal=4)
示例#2
0
    def _construct_prox_obj(self, coeffs=None, project=False):
        n_penalized_features = len(self.penalized_features) \
            if self.penalized_features is not None else 0

        if project:
            # project future _coeffs on the support of given _coeffs
            if all(self.n_lags == 0):
                proxs = [ProxZero()]
            elif coeffs is not None:
                prox_ranges = self._detect_support(coeffs)
                proxs = [ProxEquality(0, range=r) for r in prox_ranges]
            else:
                raise ValueError("Coeffs are None. " +
                                 "Equality penalty cannot infer the "
                                 "coefficients support.")
        elif n_penalized_features > 0 and self._C_tv is not None or \
                self._C_group_l1 is not None:
            # TV and GroupLasso penalties
            blocks_start = np.zeros(n_penalized_features)
            blocks_end = np.zeros(n_penalized_features)
            proxs = []

            for i in self.penalized_features:
                start = int(self._features_offset[i])
                blocks_start[i] = start
                end = int(blocks_start[i] + self._n_lags[i] + 1)
                blocks_end[i] = end
                if self._C_tv is not None:
                    proxs.append(ProxTV(1 / self._C_tv, range=(start, end)))

            if self._C_group_l1 is not None:
                blocks_size = blocks_end - blocks_start
                proxs.append(
                    ProxGroupL1(1 / self._C_group_l1, blocks_start.tolist(),
                                blocks_size.tolist()))
        else:
            # Default prox: does nothing
            proxs = [ProxZero()]

        prox_obj = ProxMulti(tuple(proxs))

        return prox_obj
示例#3
0
    def test_ProxZero(self):
        """...Test of ProxZero
        """
        coeffs = self.coeffs.copy().astype(self.dtype)
        out = coeffs.copy()

        prox = ProxZero().astype(self.dtype)
        self.assertAlmostEqual(prox.value(coeffs), 0., delta=1e-14)
        assert_almost_equal(prox.call(coeffs), out, decimal=10)

        prox = ProxZero((3, 8)).astype(self.dtype)
        self.assertAlmostEqual(prox.value(coeffs), 0., delta=1e-14)
        assert_almost_equal(prox.call(coeffs), out, decimal=10)
示例#4
0
 def test_convergence_with_lags(self):
     """Test longitudinal multinomial model convergence."""
     n_intervals = 10
     n_lags = 3
     n_samples = 1500
     n_features = 3
     sim = SimuSCCS(n_samples, n_intervals, n_features, n_lags, None,
                    True, "short", seed=42, verbose=False)
     X, y, censoring, coeffs = sim.simulate()
     X = LongitudinalFeaturesLagger(n_lags=n_lags) \
         .fit_transform(X, censoring)
     model = ModelSCCS(n_intervals=n_intervals,
                       n_lags=n_lags).fit(X, y, censoring)
     solver = SVRG(max_iter=15, verbose=False)
     solver.set_model(model).set_prox(ProxZero())
     coeffs_svrg = solver.solve(step=1 / model.get_lip_max())
     np.testing.assert_almost_equal(coeffs, coeffs_svrg, decimal=1)
示例#5
0
def run_solvers(model, l_l2sq):
    try:
        svrg_step = 1. / model.get_lip_max()
    except AttributeError:
        svrg_step = 1e-3
    try:
        gd_step = 1. / model.get_lip_best()
    except AttributeError:
        gd_step = 1e-1

    bfgs = BFGS(verbose=False, tol=1e-13)
    bfgs.set_model(model).set_prox(ProxL2Sq(l_l2sq))
    bfgs.solve()
    bfgs.history.set_minimizer(bfgs.solution)
    bfgs.history.set_minimum(bfgs.objective(bfgs.solution))
    bfgs.solve()

    svrg = SVRG(step=svrg_step, verbose=False, tol=1e-10, seed=seed)
    svrg.set_model(model).set_prox(ProxL2Sq(l_l2sq))
    svrg.history.set_minimizer(bfgs.solution)
    svrg.history.set_minimum(bfgs.objective(bfgs.solution))
    svrg.solve()

    sdca = SDCA(l_l2sq, verbose=False, seed=seed, tol=1e-10)
    sdca.set_model(model).set_prox(ProxZero())
    sdca.history.set_minimizer(bfgs.solution)
    sdca.history.set_minimum(bfgs.objective(bfgs.solution))
    sdca.solve()

    gd = GD(verbose=False, tol=1e-10, step=gd_step, linesearch=False)
    gd.set_model(model).set_prox(ProxL2Sq(l_l2sq))
    gd.history.set_minimizer(bfgs.solution)
    gd.history.set_minimum(bfgs.objective(bfgs.solution))
    gd.solve()

    agd = AGD(verbose=False, tol=1e-10, step=gd_step, linesearch=False)
    agd.set_model(model).set_prox(ProxL2Sq(l_l2sq))
    agd.history.set_minimizer(bfgs.solution)
    agd.history.set_minimum(bfgs.objective(bfgs.solution))
    agd.solve()

    return bfgs, svrg, sdca, gd, agd
示例#6
0
    def __init__(self, proxs: tuple):
        Prox.__init__(self, None)
        if not proxs:
            proxs = [ProxZero()]
        dtype = proxs[0].dtype
        self.dtype = dtype
        for prox in proxs:
            if not isinstance(prox, Prox):
                raise ValueError('%s is not a Prox' % prox.__class__.__name__)
            if not hasattr(prox, '_prox'):
                raise ValueError('%s cannot be used in ProxMulti' % prox.name)
            if prox._prox is None:
                raise ValueError('%s cannot be used in ProxMulti' % prox.name)
            if dtype != prox.dtype:
                raise ValueError(
                    'ProxMulti can only handle proxes with same dtype')

        # strength of ProxMulti is 0., since it's not used
        self.proxs = [prox._prox for prox in proxs]
        self._prox = self._build_cpp_prox(dtype)
示例#7
0
 def test_convergence_with_lags(self):
     """Test longitudinal multinomial model convergence."""
     n_intervals = 10
     n_samples = 800
     n_features = 2
     n_lags = np.repeat(2, n_features).astype(dtype="uint64")
     sim = SimuSCCS(n_samples,
                    n_intervals,
                    n_features,
                    n_lags,
                    None,
                    "multiple_exposures",
                    seed=42)
     _, X, y, censoring, coeffs = sim.simulate()
     coeffs = np.hstack(coeffs)
     X, _, _ = LongitudinalFeaturesLagger(n_lags=n_lags) \
         .fit_transform(X, censoring)
     model = ModelSCCS(n_intervals=n_intervals,
                       n_lags=n_lags).fit(X, y, censoring)
     solver = SVRG(max_iter=15, verbose=False)
     solver.set_model(model).set_prox(ProxZero())
     coeffs_svrg = solver.solve(step=1 / model.get_lip_max())
     np.testing.assert_almost_equal(coeffs, coeffs_svrg, decimal=1)
示例#8
0
 def __init__(self, **kwargs):
     PROX.__init__(self, **kwargs)
     TPZERO.__init__(self, **kwargs)
     object.__setattr__(self, "_MANGLING", "zero")
示例#9
0
    def check_solver(self,
                     solver,
                     fit_intercept=True,
                     model='logreg',
                     decimal=1):
        """Check solver instance finds same parameters as scipy BFGS

        Parameters
        ----------
        solver : `Solver`
            Instance of a solver to be tested

        fit_intercept : `bool`, default=True
            Model uses intercept is `True`

        model : 'linreg' | 'logreg' | 'poisreg', default='logreg'
            Name of the model used to test the solver

        decimal : `int`, default=1
            Number of decimals required for the test
        """
        # Set seed for data simulation
        np.random.seed(12)
        n_samples = TestSolver.n_samples
        n_features = TestSolver.n_features

        coeffs0 = weights_sparse_gauss(n_features, nnz=5)
        if fit_intercept:
            interc0 = 2.
        else:
            interc0 = None

        if model == 'linreg':
            X, y = SimuLinReg(coeffs0,
                              interc0,
                              n_samples=n_samples,
                              verbose=False,
                              seed=123).simulate()
            model = ModelLinReg(fit_intercept=fit_intercept).fit(X, y)
        elif model == 'logreg':
            X, y = SimuLogReg(coeffs0,
                              interc0,
                              n_samples=n_samples,
                              verbose=False,
                              seed=123).simulate()
            model = ModelLogReg(fit_intercept=fit_intercept).fit(X, y)
        elif model == 'poisreg':
            X, y = SimuPoisReg(coeffs0,
                               interc0,
                               n_samples=n_samples,
                               verbose=False,
                               seed=123).simulate()
            # Rescale features to avoid overflows in Poisson simulations
            X /= np.linalg.norm(X, axis=1).reshape(n_samples, 1)
            model = ModelPoisReg(fit_intercept=fit_intercept).fit(X, y)
        else:
            raise ValueError("``model`` must be either 'linreg', 'logreg' or"
                             " 'poisreg'")

        solver.set_model(model)

        strength = 1e-2
        prox = ProxL2Sq(strength, (0, model.n_features))

        if type(solver) is not SDCA:
            solver.set_prox(prox)
        else:
            solver.set_prox(ProxZero())
            solver.l_l2sq = strength

        coeffs_solver = solver.solve()
        # Compare with BFGS
        bfgs = BFGS(max_iter=100,
                    verbose=False).set_model(model).set_prox(prox)
        coeffs_bfgs = bfgs.solve()
        np.testing.assert_almost_equal(coeffs_solver,
                                       coeffs_bfgs,
                                       decimal=decimal)

        # We ensure that reached coeffs are not equal to zero
        self.assertGreater(norm(coeffs_solver), 0)

        self.assertAlmostEqual(solver.objective(coeffs_bfgs),
                               solver.objective(coeffs_solver),
                               delta=1e-2)
示例#10
0
Plot examples of proximal operators available in tick
"""
import numpy as np
import matplotlib.pyplot as plt
from tick.prox import ProxL1, ProxElasticNet, ProxL2Sq, \
    ProxPositive, ProxSlope, ProxTV, ProxZero, ProxBinarsity, ProxGroupL1, \
    ProxEquality, ProxL1w

np.random.seed(12)
x = np.random.randn(50)
a, b = x.min() - 1e-1, x.max() + 1e-1
s = 0.4

proxs = [
    ProxZero(),
    ProxPositive(),
    ProxL2Sq(strength=s),
    ProxL1(strength=s),
    ProxElasticNet(strength=s, ratio=0.5),
    ProxSlope(strength=s),
    ProxTV(strength=s),
    ProxEquality(range=(25, 40)),
    ProxL1w(strength=s, weights=0.1 * np.arange(50, dtype=np.double)),
    ProxGroupL1(strength=2 * s,
                blocks_start=np.arange(0, 50, 10),
                blocks_length=10 * np.ones((5, ))),
    ProxBinarsity(strength=s,
                  blocks_start=np.arange(0, 50, 10),
                  blocks_length=10 * np.ones((5, )))
]
示例#11
0
    def test_serializing_solvers(self):
        """...Test serialization of solvers
        """
        ratio = 0.5
        l_enet = 1e-2
        sd = ratio * l_enet

        proxes = [
          ProxZero(),
          ProxTV(2),
          ProxL1(2),
          ProxGroupL1(strength=1, blocks_start=[0, 3, 8],
                           blocks_length=[3, 5, 2])
        ]
        solvers = [
            AdaGrad(step=1e-3, max_iter=100, verbose=False, tol=0),
            SGD(step=1e-3, max_iter=100, verbose=False, tol=0),
            SDCA(l_l2sq=sd, max_iter=100, verbose=False, tol=0),
            SAGA(step=1e-3, max_iter=100, verbose=False, tol=0),
            SVRG(step=1e-3, max_iter=100, verbose=False, tol=0)
        ]
        model_map = {
            ModelLinReg: SimuLinReg,
            ModelLogReg: SimuLogReg,
            ModelPoisReg: SimuPoisReg,
            ModelHinge: SimuLogReg,
            ModelQuadraticHinge: SimuLogReg,
            ModelSmoothedHinge: SimuLogReg,
            ModelAbsoluteRegression: SimuLinReg,
            ModelEpsilonInsensitive: SimuLinReg,
            ModelHuber: SimuLinReg,
            ModelLinRegWithIntercepts: SimuLinReg,
            ModelModifiedHuber: SimuLogReg
        }

        for solver in solvers:
            for mod in model_map:
                for prox in proxes:

                    np.random.seed(12)
                    n_samples, n_features = 100, 5
                    w0 = np.random.randn(n_features)
                    intercept0 = 50 * weights_sparse_gauss(n_weights=n_samples,
                                                           nnz=30)
                    c0 = None
                    X, y = SimuLinReg(w0, c0, n_samples=n_samples, verbose=False,
                                      seed=2038).simulate()
                    if mod == ModelLinRegWithIntercepts:
                        y += intercept0

                    model = mod(fit_intercept=False).fit(X, y)

                    # prox = ProxZero() #(2.)
                    solver.set_model(model)
                    solver.set_prox(prox)

                    pickled = pickle.loads(pickle.dumps(solver))

                    self.assertTrue(solver._solver.compare(pickled._solver))

                    self.assertTrue(
                        solver.model._model.compare(pickled.model._model))

                    self.assertTrue(solver.prox._prox.compare(pickled.prox._prox))

                    if mod == ModelLinRegWithIntercepts:
                        test_vector = np.hstack((X[0], np.ones(n_samples)))
                        self.assertEqual(
                            model.loss(test_vector),
                            solver.model.loss(test_vector))
                    else:
                        self.assertEqual(model.loss(X[0]), solver.model.loss(X[0]))