Пример #1
0
    def test_predict(self):
        """...Test LogReg prediction
        """
        labels_mappings = [{
            -1: -1.,
            1: 1.
        }, {
            -1: 1.,
            1: -1.
        }, {
            -1: 1,
            1: 0
        }, {
            -1: 0,
            1: 1
        }, {
            -1: 'cat',
            1: 'dog'
        }]

        for labels_mapping in labels_mappings:
            X, y = Test.get_train_data(n_features=12, n_samples=300, nnz=0)
            y = np.vectorize(labels_mapping.get)(y)

            learner = LogisticRegression(random_state=32789, tol=1e-9)
            learner.fit(X, y)

            X_test, y_test = Test.get_train_data(n_features=12,
                                                 n_samples=5,
                                                 nnz=0)
            predicted_y = [1., 1., -1., 1., 1.]
            predicted_y = np.vectorize(labels_mapping.get)(predicted_y)
            np.testing.assert_array_equal(learner.predict(X_test), predicted_y)
Пример #2
0
    def test_LogisticRegression_settings(self):
        """...Test LogisticRegression basic settings
        """
        # solver
        solver_class_map = {'gd': GD, 'agd': AGD, 'sgd': SGD,
                            'svrg': SVRG, 'bfgs': BFGS, 'sdca': SDCA}
        for solver in solvers:
            learner = LogisticRegression(solver=solver,
                                         **Test.specific_solver_kwargs(solver))
            solver_class = solver_class_map[solver]
            self.assertTrue(isinstance(learner._solver_obj, solver_class))

        msg = '^``solver`` must be one of agd, bfgs, gd, sdca, sgd, ' \
              'svrg, got wrong_name$'
        with self.assertRaisesRegex(ValueError, msg):
            LogisticRegression(solver='wrong_name')

        # prox
        prox_class_map = {'none': ProxZero, 'l1': ProxL1, 'l2': ProxL2Sq,
                          'elasticnet': ProxElasticNet, 'tv': ProxTV,
                          'binarsity': ProxBinarsity}
        for penalty in penalties:
            if penalty == 'binarsity':
                learner = LogisticRegression(penalty=penalty, blocks_start=[0],
                                             blocks_length=[1])
            else:
                learner = LogisticRegression(penalty=penalty)
            prox_class = prox_class_map[penalty]
            self.assertTrue(isinstance(learner._prox_obj, prox_class))

        msg = '^``penalty`` must be one of binarsity, elasticnet, l1, l2, none, ' \
              'tv, got wrong_name$'
        with self.assertRaisesRegex(ValueError, msg):
            LogisticRegression(penalty='wrong_name')
Пример #3
0
    def test_LogisticRegression_fit(self):
        """...Test LogisticRegression fit with different solvers and penalties
        """
        sto_seed = 179312
        raw_features, y = Test.get_train_data()

        for fit_intercept in [True, False]:
            for penalty in penalties:

                if penalty == 'binarsity':
                    # binarize features
                    n_cuts = 3
                    binarizer = FeaturesBinarizer(n_cuts=n_cuts)
                    features = binarizer.fit_transform(raw_features)
                else:
                    features = raw_features

                for solver in solvers:
                    solver_kwargs = {
                        'penalty': penalty,
                        'tol': 1e-5,
                        'solver': solver,
                        'verbose': False,
                        'max_iter': 10,
                        'fit_intercept': fit_intercept
                    }

                    if penalty != 'none':
                        solver_kwargs['C'] = 100

                    if penalty == 'binarsity':
                        solver_kwargs[
                            'blocks_start'] = binarizer.feature_indices[:-1, ]
                        solver_kwargs['blocks_length'] = binarizer.n_values

                    if solver == 'sdca':
                        solver_kwargs['sdca_ridge_strength'] = 2e-2

                    if solver in ['sgd', 'svrg', 'sdca']:
                        solver_kwargs['random_state'] = sto_seed

                    if solver == 'sgd':
                        solver_kwargs['step'] = 1.

                    if solver == 'bfgs':
                        # BFGS only accepts ProxZero and ProxL2sq for now
                        if penalty not in ['none', 'l2']:
                            continue

                    learner = LogisticRegression(**solver_kwargs)
                    learner.fit(features, y)
                    probas = learner.predict_proba(features)[:, 1]
                    auc = roc_auc_score(y, probas)
                    self.assertGreater(
                        auc, 0.7, "solver %s with penalty %s and "
                        "intercept %s reached too low AUC" %
                        (solver, penalty, fit_intercept))
Пример #4
0
    def test_LogisticRegression_penalty_C(self):
        """...Test LogisticRegression setting of parameter of C
        """

        for penalty in penalties:
            if penalty != 'none':
                learner = LogisticRegression(penalty=penalty, C=self.float_1)
                self.assertEqual(learner.C, self.float_1)
                self.assertEqual(learner._prox_obj.strength, 1. / self.float_1)
                learner.C = self.float_2
                self.assertEqual(learner.C, self.float_2)
                self.assertEqual(learner._prox_obj.strength, 1. / self.float_2)

                msg = '^``C`` must be positive, got -1$'
                with self.assertRaisesRegex(ValueError, msg):
                    LogisticRegression(penalty=penalty, C=-1)

            else:
                msg = '^You cannot set C for penalty "%s"$' % penalty
                with self.assertWarnsRegex(RuntimeWarning, msg):
                    LogisticRegression(penalty=penalty, C=self.float_1)

                learner = LogisticRegression(penalty=penalty)
                with self.assertWarnsRegex(RuntimeWarning, msg):
                    learner.C = self.float_1

            msg = '^``C`` must be positive, got -2$'
            with self.assertRaisesRegex(ValueError, msg):
                learner.C = -2
Пример #5
0
    def test_LogisticRegression_solver_step(self):
        """...Test LogisticRegression setting of step parameter of solver
        """
        for solver in solvers:
            if solver in ['sdca', 'bfgs']:
                msg = '^Solver "%s" has no settable step$' % solver
                with self.assertWarnsRegex(RuntimeWarning, msg):
                    learner = LogisticRegression(solver=solver, step=1,
                                                 **Test.specific_solver_kwargs(
                                                     solver))
                    self.assertIsNone(learner.step)
            else:
                learner = LogisticRegression(solver=solver, step=self.float_1,
                                             **Test.specific_solver_kwargs(
                                                 solver))
                self.assertEqual(learner.step, self.float_1)
                self.assertEqual(learner._solver_obj.step, self.float_1)
                learner.step = self.float_2
                self.assertEqual(learner.step, self.float_2)
                self.assertEqual(learner._solver_obj.step, self.float_2)

            if solver in ['sgd']:
                msg = '^SGD step needs to be tuned manually$'
                with self.assertWarnsRegex(RuntimeWarning, msg):
                    learner = LogisticRegression(solver='sgd')
                    learner.fit(self.X, self.y)
Пример #6
0
    def test_decision_function(self):
        """...Test LogReg predict_proba
        """
        X, y = Test.get_train_data(n_features=12, n_samples=300, nnz=0)
        learner = LogisticRegression(random_state=32789, tol=1e-13)
        learner.fit(X, y)

        X_test, y_test = Test.get_train_data(n_features=12, n_samples=5, nnz=0)
        decision_function_values = np.array(
            [0.58182, 0.30026, -0.73075, 0.41864, 0.29278])
        np.testing.assert_array_almost_equal(learner.decision_function(X_test),
                                             decision_function_values,
                                             decimal=3)
Пример #7
0
    def test_LogisticRegression_solver_random_state(self):
        """...Test LogisticRegression setting of random_state parameter of solver
        """
        for solver in solvers:
            if solver in ['bfgs', 'agd', 'gd']:
                msg = '^Solver "%s" has no settable random_state$' % solver
                with self.assertWarnsRegex(RuntimeWarning, msg):
                    learner = LogisticRegression(solver=solver, random_state=1,
                                                 **Test.specific_solver_kwargs(
                                                     solver))
                    self.assertIsNone(learner.random_state)
            else:
                learner = LogisticRegression(solver=solver,
                                             random_state=self.int_1,
                                             **Test.specific_solver_kwargs(
                                                 solver))
                self.assertEqual(learner.random_state, self.int_1)
                self.assertEqual(learner._solver_obj.seed, self.int_1)

                msg = '^random_state must be positive, got -1$'
                with self.assertRaisesRegex(ValueError, msg):
                    LogisticRegression(solver=solver, random_state=-1,
                                       **Test.specific_solver_kwargs(solver))

            msg = '^random_state is readonly in LogisticRegression$'
            with self.assertRaisesRegex(AttributeError, msg):
                learner = LogisticRegression(solver=solver,
                                             **Test.specific_solver_kwargs(
                                                 solver))
                learner.random_state = self.int_2
Пример #8
0
    def test_safe_array_cast(self):
        """...Test error and warnings raised by LogLearner constructor
        """
        msg = '^Copying array of size \(5, 5\) to convert it in the ' \
              'right format$'
        with self.assertWarnsRegex(RuntimeWarning, msg):
            LogisticRegression._safe_array(self.X.astype(int))

        msg = '^Copying array of size \(3, 5\) to create a ' \
              'C-contiguous version of it$'
        with self.assertWarnsRegex(RuntimeWarning, msg):
            LogisticRegression._safe_array(self.X[::2])

        np.testing.assert_array_equal(self.X,
                                      LogisticRegression._safe_array(self.X))
Пример #9
0
    def test_predict_proba(self):
        """...Test LogReg predict_proba
        """
        X, y = Test.get_train_data(n_features=12, n_samples=300, nnz=0)
        learner = LogisticRegression(random_state=32289, tol=1e-13)
        learner.fit(X, y)

        X_test, y_test = Test.get_train_data(n_features=12, n_samples=5, nnz=0)
        predicted_probas = np.array([[0.35851418, 0.64148582],
                                     [0.42549328, 0.57450672],
                                     [0.6749705, 0.3250295],
                                     [0.39684181, 0.60315819],
                                     [0.42732443, 0.57267557]])
        np.testing.assert_array_almost_equal(learner.predict_proba(X_test),
                                             predicted_probas,
                                             decimal=3)
Пример #10
0
    def test_LogisticRegression_solver_sdca_ridge_strength(self):
        """...Test LogisticRegression setting of sdca_ridge_strength parameter
        of solver
        """
        for solver in solvers:
            if solver == 'sdca':
                learner = LogisticRegression(solver=solver,
                                             sdca_ridge_strength=self.float_1,
                                             **Test.specific_solver_kwargs(
                                                 solver))
                self.assertEqual(learner.sdca_ridge_strength, self.float_1)
                self.assertEqual(learner._solver_obj._solver.get_l_l2sq(),
                                 self.float_1)

                learner.sdca_ridge_strength = self.float_2
                self.assertEqual(learner.sdca_ridge_strength, self.float_2)
                self.assertEqual(learner._solver_obj._solver.get_l_l2sq(),
                                 self.float_2)
            else:

                msg = '^Solver "%s" has no sdca_ridge_strength attribute$' % \
                      solver
                with self.assertWarnsRegex(RuntimeWarning, msg):
                    LogisticRegression(solver=solver, sdca_ridge_strength=1e-2,
                                       **Test.specific_solver_kwargs(solver))

                learner = LogisticRegression(solver=solver,
                                             **Test.specific_solver_kwargs(
                                                 solver))
                with self.assertWarnsRegex(RuntimeWarning, msg):
                    learner.sdca_ridge_strength = self.float_1
Пример #11
0
    def test_LogisticRegression_penalty_elastic_net_ratio(self):
        """...Test LogisticRegression setting of parameter of elastic_net_ratio
        """
        ratio_1 = 0.6
        ratio_2 = 0.3

        for penalty in penalties:
            if penalty == 'elasticnet':

                learner = LogisticRegression(penalty=penalty,
                                             C=self.float_1,
                                             elastic_net_ratio=ratio_1)
                self.assertEqual(learner.C, self.float_1)
                self.assertEqual(learner.elastic_net_ratio, ratio_1)
                self.assertEqual(learner._prox_obj.strength, 1. / self.float_1)
                self.assertEqual(learner._prox_obj.ratio, ratio_1)

                learner.elastic_net_ratio = ratio_2
                self.assertEqual(learner.C, self.float_1)
                self.assertEqual(learner.elastic_net_ratio, ratio_2)
                self.assertEqual(learner._prox_obj.ratio, ratio_2)

            else:
                msg = '^Penalty "%s" has no elastic_net_ratio attribute$$' % \
                      penalty
                with self.assertWarnsRegex(RuntimeWarning, msg):
                    LogisticRegression(penalty=penalty, elastic_net_ratio=0.8)

                learner = LogisticRegression(penalty=penalty)
                with self.assertWarnsRegex(RuntimeWarning, msg):
                    learner.elastic_net_ratio = ratio_1
Пример #12
0
    def test_LogisticRegression_fit(self):
        """...Test LogisticRegression fit with different solvers and penalties
        """
        sto_seed = 179312
        X, y = Test.get_train_data()

        for fit_intercept in [True, False]:
            for penalty in penalties:
                for solver in solvers:
                    solver_kwargs = {
                        'penalty': penalty,
                        'tol': 1e-5,
                        'solver': solver,
                        'verbose': False,
                        'max_iter': 10,
                        'fit_intercept': fit_intercept
                    }

                    if penalty != 'none':
                        solver_kwargs['C'] = 50

                    if solver == 'sdca':
                        solver_kwargs['sdca_ridge_strength'] = 2e-2

                    if solver in ['sgd', 'svrg', 'sdca']:
                        solver_kwargs['random_state'] = sto_seed

                    if solver == 'sgd':
                        solver_kwargs['step'] = 1.

                    if solver == 'bfgs':
                        # BFGS only accepts ProxZero and ProxL2sq for now
                        if penalty not in ['none', 'l2']:
                            continue

                    learner = LogisticRegression(**solver_kwargs)
                    learner.fit(X, y)
                    probas = learner.predict_proba(X)[:, 1]
                    auc = roc_auc_score(y, probas)
                    self.assertGreater(
                        auc, 0.7, "solver %s with penalty %s and "
                        "intercept %s reached too low AUC" %
                        (solver, penalty, fit_intercept))
Пример #13
0
    def test_plot_history_learner(self):
        """...Test plot_history rendering given a list of learners
        """
        learner1 = LogisticRegression(solver='svrg')
        learner1._solver_obj._set('history', self.solver1.history)
        learner2 = LogisticRegression(solver='agd')
        learner2._solver_obj._set('history', self.solver2.history)

        fig = plot_history([learner1, learner2], show=False)
        ax = fig.axes[0]

        ax_n_iter1, ax_obj1 = ax.lines[0].get_xydata().T
        np.testing.assert_array_equal(ax_n_iter1, self.n_iter1)
        np.testing.assert_array_equal(ax_obj1, self.obj1)
        self.assertEqual(ax.lines[0].get_label(), 'SVRG')

        ax_n_iter2, ax_obj2 = ax.lines[1].get_xydata().T
        np.testing.assert_array_equal(ax_n_iter2, self.n_iter2)
        np.testing.assert_array_equal(ax_obj2, self.obj2)
        self.assertEqual(ax.lines[1].get_label(), 'AGD')
Пример #14
0
    def test_LogisticRegression_warm_start(self):
        """...Test LogisticRegression warm start
        """
        sto_seed = 179312
        X, y = Test.get_train_data()

        fit_intercepts = [True, False]
        cases = itertools.product(solvers, fit_intercepts)

        for solver, fit_intercept in cases:
            solver_kwargs = {
                'solver': solver,
                'max_iter': 2,
                'fit_intercept': fit_intercept,
                'warm_start': True,
                'tol': 0
            }

            if solver == 'sdca':
                msg = '^SDCA cannot be warm started$'
                with self.assertRaisesRegex(ValueError, msg):
                    LogisticRegression(**solver_kwargs)

            else:

                if solver in ['sgd', 'svrg']:
                    solver_kwargs['random_state'] = sto_seed

                if solver == 'sgd':
                    solver_kwargs['step'] = 1.

                learner = LogisticRegression(**solver_kwargs)

                learner.fit(X, y)
                if fit_intercept:
                    coeffs_1 = np.hstack((learner.weights, learner.intercept))
                else:
                    coeffs_1 = learner.weights

                learner.fit(X, y)
                if fit_intercept:
                    coeffs_2 = np.hstack((learner.weights, learner.intercept))
                else:
                    coeffs_2 = learner.weights

                # Thanks to warm start objective should have decreased
                self.assertLess(learner._solver_obj.objective(coeffs_2),
                                learner._solver_obj.objective(coeffs_1))
Пример #15
0
    def test_LogisticRegression_model_settings(self):
        """...Test LogisticRegression setting of parameters of model
        """
        for solver in solvers:
            learner = LogisticRegression(fit_intercept=True, solver=solver)
            self.assertEqual(learner.fit_intercept, True)
            self.assertEqual(learner._model_obj.fit_intercept, True)
            learner.fit_intercept = False
            self.assertEqual(learner.fit_intercept, False)
            self.assertEqual(learner._model_obj.fit_intercept, False)

            learner = LogisticRegression(fit_intercept=False, solver=solver)
            self.assertEqual(learner.fit_intercept, False)
            self.assertEqual(learner._model_obj.fit_intercept, False)
            learner.fit_intercept = True
            self.assertEqual(learner.fit_intercept, True)
            self.assertEqual(learner._model_obj.fit_intercept, True)
Пример #16
0
* In this example, the computational time of ``tick`` is better than ``scikit``'s
"""
import numpy as np
from time import time
import matplotlib.pyplot as plt

from sklearn.metrics import roc_curve, auc
from sklearn.linear_model import LogisticRegression as LogRegScikit

from tick.dataset import fetch_tick_dataset
from tick.inference import LogisticRegression as LogRegTick

train_set = fetch_tick_dataset('binary/adult/adult.trn.bz2')
test_set = fetch_tick_dataset('binary/adult/adult.tst.bz2')

clf_tick = LogRegTick(C=1e5, penalty='l1', tol=1e-8)
clf_scikit = LogRegScikit(penalty='l1', tol=1e-8)

t1 = time()
clf_tick.fit(train_set[0], train_set[1])
t_tick = time() - t1

t1 = time()
clf_scikit.fit(train_set[0], train_set[1])
t_scikit = time() - t1

pred_tick = clf_tick.predict_proba(test_set[0])
pred_scikit = clf_scikit.predict_proba(test_set[0])

fpr_tick, tpr_tick, _ = roc_curve(test_set[1], pred_tick[:, 1])
fpr_scikit, tpr_scikit, _ = roc_curve(test_set[1], pred_scikit[:, 1])
Пример #17
0
    def test_labels_encoding(self):
        """...Test that class encoding is well done for LogReg
        """
        learner = LogisticRegression(max_iter=1)

        np.random.seed(38027)
        n_features = 3
        n_samples = 5
        X = np.random.rand(n_samples, n_features)

        encoded_y = np.array([1., -1., 1., -1., -1.])
        learner.fit(X, encoded_y)
        np.testing.assert_array_equal(learner.classes, [-1., 1.])
        np.testing.assert_array_equal(learner._encode_labels_vector(encoded_y),
                                      encoded_y)

        zero_one_y = np.array([1., 0., 1., 0., 0.])
        learner.fit(X, zero_one_y)
        np.testing.assert_array_equal(learner.classes, [0., 1.])
        np.testing.assert_array_equal(learner._encode_labels_vector(zero_one_y),
                                      encoded_y)

        text_y = np.array(['cat', 'dog', 'cat', 'dog', 'dog'])
        learner.fit(X, text_y)
        np.testing.assert_array_equal(set(learner.classes), {'cat', 'dog'})
        encoded_text_y = learner._encode_labels_vector(text_y)
        np.testing.assert_array_equal(encoded_text_y,
                                      encoded_y * np.sign(encoded_text_y[0])
                                      * np.sign(encoded_y[0]))
Пример #18
0
==============================================
Binary classification with logistic regression
==============================================

This code perform binary classification on adult dataset with logistic
regression learner (`tick.inference.LogisticRegression`).
"""

import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc

from tick.inference import LogisticRegression
from tick.dataset import fetch_tick_dataset

train_set = fetch_tick_dataset('binary/adult/adult.trn.bz2')
test_set = fetch_tick_dataset('binary/adult/adult.tst.bz2')

learner = LogisticRegression()
learner.fit(train_set[0], train_set[1])

predictions = learner.predict_proba(test_set[0])
fpr, tpr, _ = roc_curve(test_set[1], predictions[:, 1])

plt.figure(figsize=(6, 5))
plt.plot(fpr, tpr, lw=2)
plt.title("ROC curve on adult dataset (area = {:.2f})".format(auc(fpr, tpr)))
plt.ylabel("True Positive Rate")
plt.xlabel("False Positive Rate")

plt.show()
Пример #19
0
    def test_LogisticRegression_solver_basic_settings(self):
        """...Test LogisticRegression setting of basic parameters of solver
        """
        for solver in solvers:
            # tol
            learner = LogisticRegression(solver=solver, tol=self.float_1,
                                         **Test.specific_solver_kwargs(solver))
            self.assertEqual(learner.tol, self.float_1)
            self.assertEqual(learner._solver_obj.tol, self.float_1)
            learner.tol = self.float_2
            self.assertEqual(learner.tol, self.float_2)
            self.assertEqual(learner._solver_obj.tol, self.float_2)

            # max_iter
            learner = LogisticRegression(solver=solver, max_iter=self.int_1,
                                         **Test.specific_solver_kwargs(solver))
            self.assertEqual(learner.max_iter, self.int_1)
            self.assertEqual(learner._solver_obj.max_iter, self.int_1)
            learner.max_iter = self.int_2
            self.assertEqual(learner.max_iter, self.int_2)
            self.assertEqual(learner._solver_obj.max_iter, self.int_2)

            # verbose
            learner = LogisticRegression(solver=solver, verbose=True,
                                         **Test.specific_solver_kwargs(solver))
            self.assertEqual(learner.verbose, True)
            self.assertEqual(learner._solver_obj.verbose, True)
            learner.verbose = False
            self.assertEqual(learner.verbose, False)
            self.assertEqual(learner._solver_obj.verbose, False)

            learner = LogisticRegression(solver=solver, verbose=False,
                                         **Test.specific_solver_kwargs(solver))
            self.assertEqual(learner.verbose, False)
            self.assertEqual(learner._solver_obj.verbose, False)
            learner.verbose = True
            self.assertEqual(learner.verbose, True)
            self.assertEqual(learner._solver_obj.verbose, True)

            # print_every
            learner = LogisticRegression(solver=solver, print_every=self.int_1,
                                         **Test.specific_solver_kwargs(solver))
            self.assertEqual(learner.print_every, self.int_1)
            self.assertEqual(learner._solver_obj.print_every, self.int_1)
            learner.print_every = self.int_2
            self.assertEqual(learner.print_every, self.int_2)
            self.assertEqual(learner._solver_obj.print_every, self.int_2)

            # record_every
            learner = LogisticRegression(solver=solver, record_every=self.int_1,
                                         **Test.specific_solver_kwargs(solver))
            self.assertEqual(learner.record_every, self.int_1)
            self.assertEqual(learner._solver_obj.record_every, self.int_1)
            learner.record_every = self.int_2
            self.assertEqual(learner.record_every, self.int_2)
            self.assertEqual(learner._solver_obj.record_every, self.int_2)