Пример #1
0
    def predict_proba(self, X):
        """
        Probability estimates.

        The returned estimates for all classes are ordered by the
        label of classes.

        Parameters
        ----------
        X : `np.ndarray` or `scipy.sparse.csr_matrix`, shape=(n_samples, n_features)
            Input features matrix

        Returns
        -------
        output : `np.ndarray`, shape=(n_samples, 2)
            Returns the probability of the sample for each class
            in the model in the same order as in `self.classes`
        """
        if not self._fitted:
            raise ValueError("You must call ``fit`` before")
        else:
            score = self.decision_function(X)
            n_samples = score.shape[0]
            probs_class_1 = np.empty((n_samples, ))
            ModelLogReg.sigmoid(score, probs_class_1)
            probs = np.empty((n_samples, 2))
            probs[:, 1] = probs_class_1
            probs[:, 0] = 1. - probs_class_1
            return probs
Пример #2
0
    def compare_solver_sdca(self):
        """...Compare SDCA solution with SVRG solution
        """
        np.random.seed(12)
        n_samples = Test.n_samples
        n_features = Test.n_features

        for fit_intercept in [True, False]:
            y, X, coeffs0, interc0 = TestSolver.generate_logistic_data(
                n_features, n_samples)

            model = ModelLogReg(fit_intercept=fit_intercept).fit(X, y)
            ratio = 0.5
            l_enet = 1e-2

            # SDCA "elastic-net" formulation is different from elastic-net
            # implementation
            l_l2_sdca = ratio * l_enet
            l_l1_sdca = (1 - ratio) * l_enet
            sdca = SDCA(l_l2sq=l_l2_sdca, max_iter=100, verbose=False, tol=0,
                        seed=Test.sto_seed).set_model(model)
            prox_l1 = ProxL1(l_l1_sdca)
            sdca.set_prox(prox_l1)
            coeffs_sdca = sdca.solve()

            # Compare with SVRG
            svrg = SVRG(max_iter=100, verbose=False, tol=0,
                        seed=Test.sto_seed).set_model(model)
            prox_enet = ProxElasticNet(l_enet, ratio)
            svrg.set_prox(prox_enet)
            coeffs_svrg = svrg.solve(step=0.1)

            np.testing.assert_allclose(coeffs_sdca, coeffs_svrg)
Пример #3
0
 def prepare_solver(solver,
                    X,
                    y,
                    fit_intercept=True,
                    model="logistic",
                    prox="l2"):
     if model == "logistic":
         model = ModelLogReg(fit_intercept=fit_intercept).fit(X, y)
     elif model == "poisson":
         model = ModelPoisReg(fit_intercept=fit_intercept).fit(X, y)
     solver.set_model(model)
     if prox == "l2":
         l_l2sq = TestSolver.l_l2sq
         prox = ProxL2Sq(l_l2sq, (0, model.n_coeffs))
     if prox is not None:
         solver.set_prox(prox)
Пример #4
0
    def test_solver_sdca(self):
        """...Check SDCA solver for a Logistic regression with Ridge
        penalization and L1 penalization
        """
        solver = SDCA(l_l2sq=1e-5, max_iter=100, verbose=False, tol=0)
        self.check_solver(solver,
                          fit_intercept=False,
                          model="logreg",
                          decimal=1)

        # Now a specific test with a real prox for SDCA
        np.random.seed(12)
        n_samples = Test.n_samples
        n_features = Test.n_features

        for fit_intercept in [True, False]:
            y, X, coeffs0, interc0 = TestSolver.generate_logistic_data(
                n_features, n_samples)

            model = ModelLogReg(fit_intercept=fit_intercept).fit(X, y)
            ratio = 0.5
            l_enet = 1e-2

            # SDCA "elastic-net" formulation is different from elastic-net
            # implementation
            l_l2_sdca = ratio * l_enet
            l_l1_sdca = (1 - ratio) * l_enet
            sdca = SDCA(l_l2sq=l_l2_sdca,
                        max_iter=100,
                        verbose=False,
                        tol=0,
                        seed=Test.sto_seed).set_model(model)
            prox_l1 = ProxL1(l_l1_sdca)
            sdca.set_prox(prox_l1)
            coeffs_sdca = sdca.solve()

            # Compare with SVRG
            svrg = SVRG(max_iter=100, verbose=False, tol=0,
                        seed=Test.sto_seed).set_model(model)
            prox_enet = ProxElasticNet(l_enet, ratio)
            svrg.set_prox(prox_enet)
            coeffs_svrg = svrg.solve(step=0.1)

            np.testing.assert_allclose(coeffs_sdca, coeffs_svrg)
Пример #5
0
 def test_solver_bfgs(self):
     """...Check BFGS solver for Logistic Regression with Ridge
     penalization
     """
     # It is the reference solver used in other unittests so we check that
     # it's actually close to the true parameter of the simulated dataset
     np.random.seed(12)
     n_samples = 3000
     n_features = 10
     coeffs0 = weights_sparse_gauss(n_features, nnz=5)
     interc0 = 2.
     X, y = SimuLogReg(coeffs0, interc0, n_samples=n_samples,
                       verbose=False).simulate()
     model = ModelLogReg(fit_intercept=True).fit(X, y)
     prox = ProxL2Sq(strength=1e-6)
     solver = BFGS(max_iter=100, print_every=1,
                   verbose=False, tol=1e-6).set_model(model).set_prox(prox)
     coeffs = solver.solve()
     err = Test.evaluate_model(coeffs, coeffs0, interc0)
     self.assertAlmostEqual(err, 0., delta=5e-1)
Пример #6
0
def create_model(model_type, n_samples, n_features, with_intercept=True):
    weights = np.random.randn(n_features)
    intercept = None
    if with_intercept:
        intercept = np.random.normal()

    if model_type == 'Poisson':
        # we need to rescale features to avoid overflows
        weights /= n_features
        if intercept is not None:
            intercept /= n_features

    if model_type == 'Linear':
        simulator = SimuLinReg(weights,
                               intercept=intercept,
                               n_samples=n_samples,
                               verbose=False)
    elif model_type == 'Logistic':
        simulator = SimuLogReg(weights,
                               intercept=intercept,
                               n_samples=n_samples,
                               verbose=False)
    elif model_type == 'Poisson':
        simulator = SimuPoisReg(weights,
                                intercept=intercept,
                                n_samples=n_samples,
                                verbose=False)

    labels, features = simulator.simulate()

    if model_type == 'Linear':
        model = ModelLinReg(fit_intercept=with_intercept)
    elif model_type == 'Logistic':
        model = ModelLogReg(fit_intercept=with_intercept)
    elif model_type == 'Poisson':
        model = ModelPoisReg(fit_intercept=with_intercept)

    model.fit(labels, features)
    return model
Пример #7
0
    def check_solver(self,
                     solver,
                     fit_intercept=True,
                     model='logreg',
                     decimal=1):
        """Check solver instance finds same parameters as scipy BFGS

        Parameters
        ----------
        solver : `Solver`
            Instance of a solver to be tested

        fit_intercept : `bool`, default=True
            Model uses intercept is `True`

        model : 'linreg' | 'logreg' | 'poisreg', default='logreg'
            Name of the model used to test the solver

        decimal : `int`, default=1
            Number of decimals required for the test
        """
        # Set seed for data simulation
        np.random.seed(12)
        n_samples = TestSolver.n_samples
        n_features = TestSolver.n_features

        coeffs0 = weights_sparse_gauss(n_features, nnz=5)
        if fit_intercept:
            interc0 = 2.
        else:
            interc0 = None

        if model == 'linreg':
            X, y = SimuLinReg(coeffs0,
                              interc0,
                              n_samples=n_samples,
                              verbose=False,
                              seed=123).simulate()
            model = ModelLinReg(fit_intercept=fit_intercept).fit(X, y)
        elif model == 'logreg':
            X, y = SimuLogReg(coeffs0,
                              interc0,
                              n_samples=n_samples,
                              verbose=False,
                              seed=123).simulate()
            model = ModelLogReg(fit_intercept=fit_intercept).fit(X, y)
        elif model == 'poisreg':
            X, y = SimuPoisReg(coeffs0,
                               interc0,
                               n_samples=n_samples,
                               verbose=False,
                               seed=123).simulate()
            # Rescale features to avoid overflows in Poisson simulations
            X /= np.linalg.norm(X, axis=1).reshape(n_samples, 1)
            model = ModelPoisReg(fit_intercept=fit_intercept).fit(X, y)
        else:
            raise ValueError("``model`` must be either 'linreg', 'logreg' or"
                             " 'poisreg'")

        solver.set_model(model)

        strength = 1e-2
        prox = ProxL2Sq(strength, (0, model.n_features))

        if type(solver) is not SDCA:
            solver.set_prox(prox)
        else:
            solver.set_prox(ProxZero())
            solver.l_l2sq = strength

        coeffs_solver = solver.solve()
        # Compare with BFGS
        bfgs = BFGS(max_iter=100,
                    verbose=False).set_model(model).set_prox(prox)
        coeffs_bfgs = bfgs.solve()
        np.testing.assert_almost_equal(coeffs_solver,
                                       coeffs_bfgs,
                                       decimal=decimal)

        # We ensure that reached coeffs are not equal to zero
        self.assertGreater(norm(coeffs_solver), 0)

        self.assertAlmostEqual(solver.objective(coeffs_bfgs),
                               solver.objective(coeffs_solver),
                               delta=1e-2)
Пример #8
0
 def _construct_model_obj(self, fit_intercept=True):
     return ModelLogReg(fit_intercept)
Пример #9
0
from tick.simulation import SimuLogReg, weights_sparse_gauss
from tick.optim.solver import SVRG
from tick.optim.model import ModelLogReg
from tick.optim.prox import ProxElasticNet
from tick.plot import plot_history

n_samples, n_features, = 5000, 50
weights0 = weights_sparse_gauss(n_features, nnz=10)
intercept0 = 0.2
X, y = SimuLogReg(weights=weights0,
                  intercept=intercept0,
                  n_samples=n_samples,
                  seed=123,
                  verbose=False).simulate()

model = ModelLogReg(fit_intercept=True).fit(X, y)
prox = ProxElasticNet(strength=1e-3, ratio=0.5, range=(0, n_features))
x0 = np.zeros(model.n_coeffs)

optimal_step = 1 / model.get_lip_max()
tested_steps = [optimal_step, 1e-2 * optimal_step, 10 * optimal_step]

solvers = []
solver_labels = []

for step in tested_steps:
    svrg = SVRG(max_iter=30, tol=1e-10, verbose=False)
    svrg.set_model(model).set_prox(prox)
    svrg.solve(step=step)

    svrg_bb = SVRG(max_iter=30, tol=1e-10, verbose=False, step_type='bb')
Пример #10
0
import numpy as np
from tick.simulation import SimuLogReg, weights_sparse_gauss
from tick.optim.solver import GD, AGD, SGD, SVRG, SDCA
from tick.optim.model import ModelLogReg
from tick.optim.prox import ProxElasticNet, ProxL1
from tick.plot import plot_history

n_samples, n_features, = 5000, 50
weights0 = weights_sparse_gauss(n_features, nnz=10)
intercept0 = 0.2
X, y = SimuLogReg(weights=weights0, intercept=intercept0,
                  n_samples=n_samples, seed=123, verbose=False).simulate()

model = ModelLogReg(fit_intercept=True).fit(X, y)
prox = ProxElasticNet(strength=1e-3, ratio=0.5, range=(0, n_features))

solver_params = {'max_iter': 100, 'tol': 0., 'verbose': False}
x0 = np.zeros(model.n_coeffs)

gd = GD(linesearch=False, **solver_params).set_model(model).set_prox(prox)
gd.solve(x0, step=1 / model.get_lip_best())

agd = AGD(linesearch=False, **solver_params).set_model(model).set_prox(prox)
agd.solve(x0, step=1 / model.get_lip_best())

sgd = SGD(**solver_params).set_model(model).set_prox(prox)
sgd.solve(x0, step=500.)

svrg = SVRG(**solver_params).set_model(model).set_prox(prox)
svrg.solve(x0, step=1 / model.get_lip_max())
Пример #11
0
    def test_ModelLogReg(self):
        """...Numerical consistency check of loss and gradient for Logistic
        Regression
        """

        np.random.seed(12)
        n_samples, n_features = 5000, 10
        w0 = np.random.randn(n_features)
        c0 = np.random.randn()

        # First check with intercept
        X, y = SimuLogReg(w0, c0, n_samples=n_samples,
                          verbose=False).simulate()
        X_spars = csr_matrix(X)
        model = ModelLogReg(fit_intercept=True).fit(X, y)
        model_spars = ModelLogReg(fit_intercept=True).fit(X_spars, y)
        self.run_test_for_glm(model, model_spars, 1e-5, 1e-4)
        self._test_glm_intercept_vs_hardcoded_intercept(model)

        # Then check without intercept
        X, y = SimuLogReg(w0,
                          None,
                          n_samples=n_samples,
                          verbose=False,
                          seed=2038).simulate()
        X_spars = csr_matrix(X)
        model = ModelLogReg(fit_intercept=False).fit(X, y)

        model_spars = ModelLogReg(fit_intercept=False).fit(X_spars, y)
        self.run_test_for_glm(model, model_spars, 1e-5, 1e-4)
        self._test_glm_intercept_vs_hardcoded_intercept(model)

        # Test for the Lipschitz constants without intercept
        self.assertAlmostEqual(model.get_lip_best(), 0.67184209642814952)
        self.assertAlmostEqual(model.get_lip_mean(), 2.48961431697108)
        self.assertAlmostEqual(model.get_lip_max(), 13.706542412138093)
        self.assertAlmostEqual(model_spars.get_lip_mean(),
                               model.get_lip_mean())
        self.assertAlmostEqual(model_spars.get_lip_max(), model.get_lip_max())

        # Test for the Lipschitz constants with intercept
        model = ModelLogReg(fit_intercept=True).fit(X, y)
        model_spars = ModelLogReg(fit_intercept=True).fit(X_spars, y)
        self.assertAlmostEqual(model.get_lip_best(), 0.671892096428)
        self.assertAlmostEqual(model.get_lip_mean(), 2.739614316971082)
        self.assertAlmostEqual(model.get_lip_max(), 13.956542412138093)
        self.assertAlmostEqual(model_spars.get_lip_mean(),
                               model.get_lip_mean())
        self.assertAlmostEqual(model_spars.get_lip_max(), model.get_lip_max())
Пример #12
0
import numpy as np
import matplotlib.pyplot as plt

from tick.simulation import SimuLogReg, weights_sparse_gauss
from tick.optim.model import ModelLogReg

n_samples, n_features = 2000, 50
weights0 = weights_sparse_gauss(n_weights=n_features, nnz=10)
intercept0 = 1.
X, y = SimuLogReg(weights0,
                  intercept=intercept0,
                  seed=123,
                  n_samples=n_samples,
                  verbose=False).simulate()

model = ModelLogReg(fit_intercept=True).fit(X, y)

coeffs0 = np.concatenate([weights0, [intercept0]])

_, ax = plt.subplots(1, 2, sharey=True, figsize=(9, 3))
ax[0].stem(model.grad(coeffs0))
ax[0].set_title(r"$\nabla f(\mathrm{coeffs0})$", fontsize=16)
ax[1].stem(model.grad(np.ones(model.n_coeffs)))
ax[1].set_title(r"$\nabla f(\mathrm{coeffs1})$", fontsize=16)

plt.show()
Пример #13
0
n_features = 20000
sparsity = 1e-4
penalty_strength = 1e-6

weights = weights_sparse_gauss(n_features, nnz=10)
intercept = 0.2
features = sparse.rand(n_samples, n_features, density=sparsity, format='csr')

simulator = SimuLogReg(weights,
                       n_samples=n_samples,
                       features=features,
                       verbose=False,
                       intercept=intercept)
features, labels = simulator.simulate()

model = ModelLogReg(fit_intercept=True)
model.fit(features, labels)
prox = ProxElasticNet(penalty_strength, ratio=0.5, range=(0, n_features))
svrg_step = 1. / model.get_lip_max()

test_n_threads = [1, 2, 4]

svrg_list = []
svrg_labels = []

for n_threads in test_n_threads:
    svrg = SVRG(step=svrg_step,
                seed=seed,
                max_iter=30,
                verbose=False,
                n_threads=n_threads)