def test_ModelHinge(self): """...Numerical consistency check of loss and gradient for Hinge model """ np.random.seed(12) n_samples, n_features = 5000, 10 w0 = np.random.randn(n_features) c0 = np.random.randn() # First check with intercept X, y = SimuLogReg(w0, c0, n_samples=n_samples, verbose=False, dtype=self.dtype).simulate() X_spars = csr_matrix(X, dtype=self.dtype) model = ModelHinge(fit_intercept=True).fit(X, y) model_spars = ModelHinge(fit_intercept=True).fit(X_spars, y) self.run_test_for_glm(model, model_spars) self._test_glm_intercept_vs_hardcoded_intercept(model) # Then check without intercept X, y = SimuLogReg(w0, None, n_samples=n_samples, verbose=False, seed=2038, dtype=self.dtype).simulate() X_spars = csr_matrix(X) model = ModelHinge(fit_intercept=False).fit(X, y) model_spars = ModelHinge(fit_intercept=False).fit(X_spars, y) self.run_test_for_glm(model, model_spars)
def test_SimuLogReg(self): """...Test simulation of a Logistic Regression """ n_samples = 10 n_features = 3 idx = np.arange(n_features) weights = np.exp(-idx / 10.) weights[::2] *= -1 seed = 123 simu = SimuLogReg(weights, None, n_samples=n_samples, seed=seed, verbose=False) X, y = simu.simulate() X_truth = np.array([[1.4912667, 0.80881799, 0.26977298], [1.23227551, 0.50697013, 1.9409132], [1.8891494, 1.49834791, 2.41445794], [0.19431319, 0.80245126, 1.02577552], [-1.61687582, -1.08411865, -0.83438387], [2.30419894, -0.68987056, -0.39750262], [-0.28826405, -1.23635074, -0.76124386], [-1.32869473, -1.8752391, -0.182537], [0.79464218, 0.65055633, 1.57572506], [0.71524202, 1.66759831, 0.88679047]]) y_truth = np.array([-1., -1., -1., -1., 1., -1., 1., -1., -1., 1.]) np.testing.assert_array_almost_equal(X_truth, X) np.testing.assert_array_almost_equal(y_truth, y)
def test_ModelLogReg(self): """...Numerical consistency check of loss and gradient for Logistic Regression """ np.random.seed(12) n_samples, n_features = 5000, 10 w0 = np.random.randn(n_features) c0 = np.random.randn() # First check with intercept X, y = SimuLogReg(w0, c0, n_samples=n_samples, verbose=False, dtype=self.dtype).simulate() X_spars = csr_matrix(X, dtype=self.dtype) model = ModelLogReg(fit_intercept=True).fit(X, y) model_spars = ModelLogReg(fit_intercept=True).fit(X_spars, y) self.run_test_for_glm(model, model_spars) self._test_glm_intercept_vs_hardcoded_intercept(model) # Then check without intercept X, y = SimuLogReg(w0, None, n_samples=n_samples, verbose=False, seed=2038, dtype=self.dtype).simulate() X_spars = csr_matrix(X, dtype=self.dtype) model = ModelLogReg(fit_intercept=False).fit(X, y) model_spars = ModelLogReg(fit_intercept=False).fit(X_spars, y) self.run_test_for_glm(model, model_spars) self._test_glm_intercept_vs_hardcoded_intercept(model) # Test for the Lipschitz constants without intercept self.assertAlmostEqual(model.get_lip_best(), 0.67184209642814952, places=self.decimal_places) self.assertAlmostEqual(model.get_lip_mean(), 2.48961431697108, places=self.decimal_places) self.assertAlmostEqual(model.get_lip_max(), 13.706542412138093, places=self.decimal_places) self.assertAlmostEqual(model_spars.get_lip_mean(), model.get_lip_mean(), places=self.decimal_places) self.assertAlmostEqual(model_spars.get_lip_max(), model.get_lip_max(), places=self.decimal_places) # Test for the Lipschitz constants with intercept model = ModelLogReg(fit_intercept=True).fit(X, y) model_spars = ModelLogReg(fit_intercept=True).fit(X_spars, y) self.assertAlmostEqual(model.get_lip_best(), 0.671892096428, places=self.decimal_places) self.assertAlmostEqual(model.get_lip_mean(), 2.739614316971082, places=self.decimal_places) self.assertAlmostEqual(model.get_lip_max(), 13.956542412138093, places=self.decimal_places) self.assertAlmostEqual(model_spars.get_lip_mean(), model.get_lip_mean(), places=self.decimal_places) self.assertAlmostEqual(model_spars.get_lip_max(), model.get_lip_max(), places=self.decimal_places)
def test_ModelSmoothedHinge(self): """...Numerical consistency check of loss and gradient for SmoothedHinge model """ np.random.seed(12) n_samples, n_features = 5000, 10 w0 = np.random.randn(n_features) c0 = np.random.randn() # First check with intercept X, y = SimuLogReg(w0, c0, n_samples=n_samples, verbose=False).simulate() X_spars = csr_matrix(X) model = ModelSmoothedHinge(fit_intercept=True, smoothness=0.2).fit(X, y) model_spars = ModelSmoothedHinge(fit_intercept=True, smoothness=0.2).fit(X_spars, y) self.run_test_for_glm(model, model_spars, 1e-5, 1e-4) self._test_glm_intercept_vs_hardcoded_intercept(model) # Then check without intercept X, y = SimuLogReg(w0, None, n_samples=n_samples, verbose=False, seed=2038).simulate() X_spars = csr_matrix(X) model = ModelSmoothedHinge(fit_intercept=False).fit(X, y) model_spars = ModelSmoothedHinge(fit_intercept=False).fit(X_spars, y) self.run_test_for_glm(model, model_spars, 1e-5, 1e-4) model = ModelSmoothedHinge(fit_intercept=False, smoothness=0.2).fit(X, y) model_spars = ModelSmoothedHinge(fit_intercept=False, smoothness=0.2).fit(X_spars, y) # Test for the Lipschitz constants without intercept self.assertAlmostEqual(model.get_lip_best(), 5 * 2.6873683857125981) self.assertAlmostEqual(model.get_lip_mean(), 5 * 9.95845726788432) self.assertAlmostEqual(model.get_lip_max(), 5 * 54.82616964855237) self.assertAlmostEqual(model_spars.get_lip_mean(), model.get_lip_mean()) self.assertAlmostEqual(model_spars.get_lip_max(), model.get_lip_max()) # Test for the Lipschitz constants with intercept model = ModelSmoothedHinge(fit_intercept=True, smoothness=0.2).fit(X, y) model_spars = ModelSmoothedHinge(fit_intercept=True, smoothness=0.2).fit(X_spars, y) self.assertAlmostEqual(model.get_lip_best(), 5 * 2.687568385712598) self.assertAlmostEqual(model.get_lip_mean(), 5 * 10.958457267884327) self.assertAlmostEqual(model.get_lip_max(), 5 * 55.82616964855237) self.assertAlmostEqual(model_spars.get_lip_mean(), model.get_lip_mean()) self.assertAlmostEqual(model_spars.get_lip_max(), model.get_lip_max())
def generate_logistic_data(n_features, n_samples, use_intercept=False): """ Function to generate labels features y and X that corresponds to w, c """ if n_features <= 5: raise ValueError("``n_features`` must be larger than 5") np.random.seed(12) coeffs0 = weights_sparse_gauss(n_features, nnz=5) if use_intercept: interc0 = 2. else: interc0 = None simu = SimuLogReg(coeffs0, interc0, n_samples=n_samples, verbose=False) X, y = simu.simulate() return y, X, coeffs0, interc0
def test_step_type_setting(self): """...Test that SVRG step_type parameter behaves correctly """ svrg = SVRG() coeffs0 = weights_sparse_gauss(20, nnz=5, dtype=self.dtype) interc0 = None X, y = SimuLogReg(coeffs0, interc0, n_samples=3000, verbose=False, seed=123, dtype=self.dtype).simulate() model = ModelLogReg().fit(X, y) svrg.set_model(model) self.assertEqual(svrg.step_type, 'fixed') self.assertEqual(svrg._solver.get_step_type(), SVRG_StepType_Fixed) svrg = SVRG(step_type='bb') svrg.set_model(model) self.assertEqual(svrg.step_type, 'bb') self.assertEqual(svrg._solver.get_step_type(), SVRG_StepType_BarzilaiBorwein) svrg.step_type = 'fixed' self.assertEqual(svrg.step_type, 'fixed') self.assertEqual(svrg._solver.get_step_type(), SVRG_StepType_Fixed) svrg.step_type = 'bb' self.assertEqual(svrg.step_type, 'bb') self.assertEqual(svrg._solver.get_step_type(), SVRG_StepType_BarzilaiBorwein)
def test_ModelSmoothedHinge_smoothness(self): np.random.seed(12) n_samples, n_features = 50, 2 w0 = np.random.randn(n_features) c0 = np.random.randn() # First check with intercept X, y = SimuLogReg(w0, c0, n_samples=n_samples, verbose=False).simulate() model = ModelSmoothedHinge(smoothness=0.123).fit(X, y) self.assertEqual(model._model.get_smoothness(), 0.123) model.smoothness = 0.765 self.assertEqual(model._model.get_smoothness(), 0.765) msg = '^smoothness should be between 0.01 and 1$' with self.assertRaisesRegex(RuntimeError, msg): model = ModelSmoothedHinge(smoothness=-1).fit(X, y) with self.assertRaisesRegex(RuntimeError, msg): model = ModelSmoothedHinge(smoothness=1.2).fit(X, y) with self.assertRaisesRegex(RuntimeError, msg): model = ModelSmoothedHinge(smoothness=0.).fit(X, y) with self.assertRaisesRegex(RuntimeError, msg): model.smoothness = 0. with self.assertRaisesRegex(RuntimeError, msg): model.smoothness = -1. with self.assertRaisesRegex(RuntimeError, msg): model.smoothness = 2.
def get_train_data(n_features=20, n_samples=3000, nnz=5): np.random.seed(12) weights0 = weights_sparse_gauss(n_features, nnz=nnz) interc0 = 0.1 features, y = SimuLogReg(weights0, interc0, n_samples=n_samples, verbose=False).simulate() return features, y
def test_asaga_solver(self): """...Check ASAGA solver for a Logistic Regression with Elastic net penalization """ seed = 1398 np.random.seed(seed) n_samples = 4000 n_features = 30 weights = weights_sparse_gauss(n_features, nnz=3).astype(self.dtype) intercept = 0.2 penalty_strength = 1e-3 sparsity = 1e-4 features = sparse.rand(n_samples, n_features, density=sparsity, format='csr', random_state=8).astype(self.dtype) simulator = SimuLogReg(weights, n_samples=n_samples, features=features, verbose=False, intercept=intercept, dtype=self.dtype) features, labels = simulator.simulate() model = ModelLogReg(fit_intercept=True) model.fit(features, labels) prox = ProxElasticNet(penalty_strength, ratio=0.1, range=(0, n_features)) solver_step = 1. / model.get_lip_max() saga = SAGA(step=solver_step, max_iter=100, tol=1e-10, verbose=False, n_threads=1, record_every=10, seed=seed) saga.set_model(model).set_prox(prox) saga.solve() asaga = SAGA(step=solver_step, max_iter=100, tol=1e-10, verbose=False, n_threads=2, record_every=10, seed=seed) asaga.set_model(model).set_prox(prox) asaga.solve() np.testing.assert_array_almost_equal(saga.solution, asaga.solution, decimal=4) self.assertGreater(np.linalg.norm(saga.solution[:-1]), 0)
def test_variance_reduction_setting(self): """...SolverTest SAGA variance_reduction parameter is correctly set""" svrg = SAGA() coeffs0 = weights_sparse_gauss(20, nnz=5, dtype=self.dtype) interc0 = None X, y = SimuLogReg(coeffs0, interc0, n_samples=3000, verbose=False, seed=123, dtype=self.dtype).simulate() model = ModelLogReg().fit(X, y) svrg.set_model(model) svrg.astype(self.dtype) self.assertEqual(svrg.variance_reduction, 'last') self.assertEqual(svrg._solver.get_variance_reduction(), SAGA_VarianceReductionMethod_Last) svrg = SAGA(variance_reduction='rand') svrg.set_model(model) svrg.astype(self.dtype) self.assertEqual(svrg.variance_reduction, 'rand') self.assertEqual(svrg._solver.get_variance_reduction(), SAGA_VarianceReductionMethod_Random) svrg.variance_reduction = 'avg' self.assertEqual(svrg.variance_reduction, 'avg') self.assertEqual(svrg._solver.get_variance_reduction(), SAGA_VarianceReductionMethod_Average) svrg.variance_reduction = 'rand' self.assertEqual(svrg.variance_reduction, 'rand') self.assertEqual(svrg._solver.get_variance_reduction(), SAGA_VarianceReductionMethod_Random) svrg.variance_reduction = 'last' self.assertEqual(svrg.variance_reduction, 'last') self.assertEqual(svrg._solver.get_variance_reduction(), SAGA_VarianceReductionMethod_Last) with self.assertRaises(ValueError): svrg.variance_reduction = 'wrong_name'
def test_solver_bfgs(self): """...Check BFGS solver for Logistic Regression with Ridge penalization """ # It is the reference solver used in other unittests so we check that # it's actually close to the true parameter of the simulated dataset np.random.seed(12) n_samples = 3000 n_features = 10 coeffs0 = weights_sparse_gauss(n_features, nnz=5) interc0 = 2. X, y = SimuLogReg(coeffs0, interc0, n_samples=n_samples, verbose=False).simulate() model = ModelLogReg(fit_intercept=True).fit(X, y) prox = ProxL2Sq(strength=1e-6) solver = BFGS(max_iter=100, print_every=1, verbose=False, tol=1e-6).set_model(model).set_prox(prox) coeffs = solver.solve() err = Test.evaluate_model(coeffs, coeffs0, interc0) self.assertAlmostEqual(err, 0., delta=5e-1)
def create_model(model_type, n_samples, n_features, with_intercept=True): weights = np.random.randn(n_features) intercept = None if with_intercept: intercept = np.random.normal() if model_type == 'Poisson': # we need to rescale features to avoid overflows weights /= n_features if intercept is not None: intercept /= n_features if model_type == 'Linear': simulator = SimuLinReg(weights, intercept=intercept, n_samples=n_samples, verbose=False) elif model_type == 'Logistic': simulator = SimuLogReg(weights, intercept=intercept, n_samples=n_samples, verbose=False) elif model_type == 'Poisson': simulator = SimuPoisReg(weights, intercept=intercept, n_samples=n_samples, verbose=False) labels, features = simulator.simulate() if model_type == 'Linear': model = ModelLinReg(fit_intercept=with_intercept) elif model_type == 'Logistic': model = ModelLogReg(fit_intercept=with_intercept) elif model_type == 'Poisson': model = ModelPoisReg(fit_intercept=with_intercept) model.fit(labels, features) return model
from tick.solver import SVRG, SAGA from tick.prox import ProxElasticNet seed = 1398 np.random.seed(seed) n_samples = 40000 n_features = 20000 sparsity = 1e-4 penalty_strength = 1e-5 weights = weights_sparse_gauss(n_features, nnz=1000) intercept = 0.2 features = sparse.rand(n_samples, n_features, density=sparsity, format='csr') simulator = SimuLogReg(weights, n_samples=n_samples, features=features, verbose=False, intercept=intercept) features, labels = simulator.simulate() model = ModelLogReg(fit_intercept=True) model.fit(features, labels) prox = ProxElasticNet(penalty_strength, ratio=0.5, range=(0, n_features)) svrg_step = 1. / model.get_lip_max() test_n_threads = [1, 2, 4] fig, axes = plt.subplots(1, 2, figsize=(8, 4)) for ax, SolverClass in zip(axes, [SVRG, SAGA]): solver_list = [] solver_labels = []
""" import numpy as np import matplotlib.pyplot as plt from cycler import cycler from tick.simulation import weights_sparse_gauss from tick.solver import SVRG from tick.linear_model import SimuLogReg, ModelLogReg from tick.prox import ProxElasticNet from tick.plot import plot_history n_samples, n_features, = 5000, 50 weights0 = weights_sparse_gauss(n_features, nnz=10) intercept0 = 0.2 X, y = SimuLogReg(weights=weights0, intercept=intercept0, n_samples=n_samples, seed=123, verbose=False).simulate() model = ModelLogReg(fit_intercept=True).fit(X, y) prox = ProxElasticNet(strength=1e-3, ratio=0.5, range=(0, n_features)) x0 = np.zeros(model.n_coeffs) optimal_step = 1 / model.get_lip_max() tested_steps = [optimal_step, 1e-2 * optimal_step, 10 * optimal_step] solvers = [] solver_labels = [] for step in tested_steps: svrg = SVRG(max_iter=30, tol=1e-10, verbose=False) svrg.set_model(model).set_prox(prox) svrg.solve(step=step)
def test_ModelQuadraticHinge(self): """...Numerical consistency check of loss and gradient for Quadratic Hinge model """ np.random.seed(12) n_samples, n_features = 5000, 10 w0 = np.random.randn(n_features) c0 = np.random.randn() # First check with intercept X, y = SimuLogReg(w0, c0, n_samples=n_samples, verbose=False, dtype=self.dtype).simulate() X_spars = csr_matrix(X, dtype=self.dtype) model = ModelQuadraticHinge(fit_intercept=True).fit(X, y) model_spars = ModelQuadraticHinge(fit_intercept=True, ).fit(X_spars, y) self.run_test_for_glm(model, model_spars) self._test_glm_intercept_vs_hardcoded_intercept(model) # Then check without intercept X, y = SimuLogReg(w0, None, n_samples=n_samples, verbose=False, seed=2038, dtype=self.dtype).simulate() X_spars = csr_matrix(X, dtype=self.dtype) model = ModelQuadraticHinge(fit_intercept=False).fit(X, y) model_spars = ModelQuadraticHinge(fit_intercept=False).fit(X_spars, y) self.run_test_for_glm(model, model_spars) # Test for the Lipschitz constants without intercept self.assertAlmostEqual(model.get_lip_best(), 2.6873683857125981, places=self.decimal_places) self.assertAlmostEqual(model.get_lip_mean(), 9.95845726788432, places=self.decimal_places) self.assertAlmostEqual(model.get_lip_max(), 54.82616964855237, places=self.decimal_places) self.assertAlmostEqual(model_spars.get_lip_mean(), model.get_lip_mean()) self.assertAlmostEqual(model_spars.get_lip_max(), model.get_lip_max()) # Test for the Lipschitz constants with intercept model = ModelQuadraticHinge(fit_intercept=True).fit(X, y) model_spars = ModelQuadraticHinge(fit_intercept=True).fit(X_spars, y) self.assertAlmostEqual(model.get_lip_best(), 2.687568385712598, places=self.decimal_places) self.assertAlmostEqual(model.get_lip_mean(), 10.958457267884327, places=self.decimal_places) self.assertAlmostEqual(model.get_lip_max(), 55.82616964855237, places=self.decimal_places) self.assertAlmostEqual(model_spars.get_lip_mean(), model.get_lip_mean(), places=self.decimal_places) self.assertAlmostEqual(model_spars.get_lip_max(), model.get_lip_max(), places=self.decimal_places)
n_features = 30 n_classes = 2 nnz = 5 w0 = np.zeros(n_features) w0[:nnz] = 1 # TODO: Seed n_trees = 50 # w0 = weights_sparse_exp(n_features, nnz=nnz) X, y = SimuLogReg(weights=w0, intercept=None, n_samples=n_samples, cov_corr=0.1, features_scaling='standard', seed=123).simulate() y = (y + 1) / 2 X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=123) rf = RandomForestClassifier(n_estimators=n_trees, criterion="entropy", random_state=123) rf.fit(X_train, y_train) feature_importances = rf.feature_importances_ of1 = OnlineForestClassifier(n_classes=n_classes, n_trees=n_trees, seed=123,
def test_variance_reduction_setting(self): """...Test that SVRG variance_reduction parameter behaves correctly """ svrg = SVRG() coeffs0 = weights_sparse_gauss(20, nnz=5, dtype=self.dtype) interc0 = None X, y = SimuLogReg(coeffs0, interc0, n_samples=3000, verbose=False, seed=123, dtype=self.dtype).simulate() model = ModelLogReg().fit(X, y) svrg.set_model(model) self.assertEqual(svrg.variance_reduction, 'last') self.assertEqual(svrg._solver.get_variance_reduction(), SVRG_VarianceReductionMethod_Last) svrg = SVRG(variance_reduction='rand') svrg.set_model(model) self.assertEqual(svrg.variance_reduction, 'rand') self.assertEqual(svrg._solver.get_variance_reduction(), SVRG_VarianceReductionMethod_Random) svrg.variance_reduction = 'avg' self.assertEqual(svrg.variance_reduction, 'avg') self.assertEqual(svrg._solver.get_variance_reduction(), SVRG_VarianceReductionMethod_Average) svrg.variance_reduction = 'rand' self.assertEqual(svrg.variance_reduction, 'rand') self.assertEqual(svrg._solver.get_variance_reduction(), SVRG_VarianceReductionMethod_Random) svrg.variance_reduction = 'last' self.assertEqual(svrg.variance_reduction, 'last') self.assertEqual(svrg._solver.get_variance_reduction(), SVRG_VarianceReductionMethod_Last) msg = '^variance_reduction should be one of "avg, last, rand", ' \ 'got "stuff"$' with self.assertRaisesRegex(ValueError, msg): svrg = SVRG(variance_reduction='stuff') svrg.set_model(model) with self.assertRaisesRegex(ValueError, msg): svrg.variance_reduction = 'stuff' X, y = self.simu_linreg_data(dtype=self.dtype) model_dense, model_spars = self.get_dense_and_sparse_linreg_model( X, y, dtype=self.dtype) try: svrg.set_model(model_dense) svrg.variance_reduction = 'avg' svrg.variance_reduction = 'last' svrg.variance_reduction = 'rand' svrg.set_model(model_spars) svrg.variance_reduction = 'last' svrg.variance_reduction = 'rand' except Exception: self.fail('Setting variance_reduction in these cases should have ' 'been ok') msg = "'avg' variance reduction cannot be used with sparse datasets" with catch_warnings(record=True) as w: simplefilter('always') svrg.set_model(model_spars) svrg.variance_reduction = 'avg' self.assertEqual(len(w), 1) self.assertTrue(issubclass(w[0].category, UserWarning)) self.assertEqual(str(w[0].message), msg)
def check_solver(self, solver, fit_intercept=True, model='logreg', decimal=1): """Check solver instance finds same parameters as scipy BFGS Parameters ---------- solver : `Solver` Instance of a solver to be tested fit_intercept : `bool`, default=True Model uses intercept is `True` model : 'linreg' | 'logreg' | 'poisreg', default='logreg' Name of the model used to test the solver decimal : `int`, default=1 Number of decimals required for the test """ # Set seed for data simulation np.random.seed(12) n_samples = TestSolver.n_samples n_features = TestSolver.n_features coeffs0 = weights_sparse_gauss(n_features, nnz=5) if fit_intercept: interc0 = 2. else: interc0 = None if model == 'linreg': X, y = SimuLinReg(coeffs0, interc0, n_samples=n_samples, verbose=False, seed=123).simulate() model = ModelLinReg(fit_intercept=fit_intercept).fit(X, y) elif model == 'logreg': X, y = SimuLogReg(coeffs0, interc0, n_samples=n_samples, verbose=False, seed=123).simulate() model = ModelLogReg(fit_intercept=fit_intercept).fit(X, y) elif model == 'poisreg': X, y = SimuPoisReg(coeffs0, interc0, n_samples=n_samples, verbose=False, seed=123).simulate() # Rescale features to avoid overflows in Poisson simulations X /= np.linalg.norm(X, axis=1).reshape(n_samples, 1) model = ModelPoisReg(fit_intercept=fit_intercept).fit(X, y) else: raise ValueError("``model`` must be either 'linreg', 'logreg' or" " 'poisreg'") solver.set_model(model) strength = 1e-2 prox = ProxL2Sq(strength, (0, model.n_features)) if type(solver) is not SDCA: solver.set_prox(prox) else: solver.set_prox(ProxZero()) solver.l_l2sq = strength coeffs_solver = solver.solve() # Compare with BFGS bfgs = BFGS(max_iter=100, verbose=False).set_model(model).set_prox(prox) coeffs_bfgs = bfgs.solve() np.testing.assert_almost_equal(coeffs_solver, coeffs_bfgs, decimal=decimal) # We ensure that reached coeffs are not equal to zero self.assertGreater(norm(coeffs_solver), 0) self.assertAlmostEqual(solver.objective(coeffs_bfgs), solver.objective(coeffs_solver), delta=1e-2)
n_samples, n_features = 150, 2 weights0 = np.array([0.3, 1.2]) intercept0 = 0.5 simu_linreg = SimuLinReg(weights0, intercept0, n_samples=n_samples, seed=123, verbose=False) X_linreg, y_linreg = simu_linreg.simulate() simu_logreg = SimuLogReg(weights0, intercept0, n_samples=n_samples, seed=123, verbose=False) X_logreg, y_logreg = simu_logreg.simulate() simu_poisreg = SimuPoisReg(weights0, intercept0, n_samples=n_samples, link='exponential', seed=123, verbose=False) X_poisreg, y_poisreg = simu_poisreg.simulate() plt.figure(figsize=(12, 3)) plt.subplot(1, 3, 1)
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier from sklearn.datasets import make_moons, make_classification, make_circles from sklearn.metrics import roc_auc_score import matplotlib.pyplot as plt from time import time n_samples = 1000 n_features = 2 seed = 123 np.set_printoptions(precision=2) w0 = weights_sparse_gauss(n_features, nnz=2) X, y = SimuLogReg(w0, -1., n_samples=n_samples, seed=seed).simulate() y = (y + 1) / 2 def plot_decisions_regression(clfs, datasets, names): i = 1 h = .02 fig = plt.figure(figsize=(4 * (len(clfs) + 1), 4 * len(datasets))) # iterate over datasets for ds_cnt, ds in enumerate(datasets): X, y = ds X_train, X_test, y_train, y_test = \ train_test_split(X, y, test_size=.4, random_state=42) x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5 y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5