def test_SimuLogReg(self): """...Test simulation of a Logistic Regression """ n_samples = 10 n_features = 3 idx = np.arange(n_features) weights = np.exp(-idx / 10.) weights[::2] *= -1 seed = 123 simu = SimuLogReg(weights, None, n_samples=n_samples, seed=seed, verbose=False) X, y = simu.simulate() X_truth = np.array([[1.4912667, 0.80881799, 0.26977298], [1.23227551, 0.50697013, 1.9409132], [1.8891494, 1.49834791, 2.41445794], [0.19431319, 0.80245126, 1.02577552], [-1.61687582, -1.08411865, -0.83438387], [2.30419894, -0.68987056, -0.39750262], [-0.28826405, -1.23635074, -0.76124386], [-1.32869473, -1.8752391, -0.182537], [0.79464218, 0.65055633, 1.57572506], [0.71524202, 1.66759831, 0.88679047]]) y_truth = np.array([-1., -1., -1., -1., 1., -1., 1., -1., -1., 1.]) np.testing.assert_array_almost_equal(X_truth, X) np.testing.assert_array_almost_equal(y_truth, y)
def generate_logistic_data(n_features, n_samples, use_intercept=False): """ Function to generate labels features y and X that corresponds to w, c """ if n_features <= 5: raise ValueError("``n_features`` must be larger than 5") np.random.seed(12) coeffs0 = weights_sparse_gauss(n_features, nnz=5) if use_intercept: interc0 = 2. else: interc0 = None simu = SimuLogReg(coeffs0, interc0, n_samples=n_samples, verbose=False) X, y = simu.simulate() return y, X, coeffs0, interc0
def test_asaga_solver(self): """...Check ASAGA solver for a Logistic Regression with Elastic net penalization """ seed = 1398 np.random.seed(seed) n_samples = 4000 n_features = 30 weights = weights_sparse_gauss(n_features, nnz=3).astype(self.dtype) intercept = 0.2 penalty_strength = 1e-3 sparsity = 1e-4 features = sparse.rand(n_samples, n_features, density=sparsity, format='csr', random_state=8).astype(self.dtype) simulator = SimuLogReg(weights, n_samples=n_samples, features=features, verbose=False, intercept=intercept, dtype=self.dtype) features, labels = simulator.simulate() model = ModelLogReg(fit_intercept=True) model.fit(features, labels) prox = ProxElasticNet(penalty_strength, ratio=0.1, range=(0, n_features)) solver_step = 1. / model.get_lip_max() saga = SAGA(step=solver_step, max_iter=100, tol=1e-10, verbose=False, n_threads=1, record_every=10, seed=seed) saga.set_model(model).set_prox(prox) saga.solve() asaga = SAGA(step=solver_step, max_iter=100, tol=1e-10, verbose=False, n_threads=2, record_every=10, seed=seed) asaga.set_model(model).set_prox(prox) asaga.solve() np.testing.assert_array_almost_equal(saga.solution, asaga.solution, decimal=4) self.assertGreater(np.linalg.norm(saga.solution[:-1]), 0)
weights0 = np.array([0.3, 1.2]) intercept0 = 0.5 simu_linreg = SimuLinReg(weights0, intercept0, n_samples=n_samples, seed=123, verbose=False) X_linreg, y_linreg = simu_linreg.simulate() simu_logreg = SimuLogReg(weights0, intercept0, n_samples=n_samples, seed=123, verbose=False) X_logreg, y_logreg = simu_logreg.simulate() simu_poisreg = SimuPoisReg(weights0, intercept0, n_samples=n_samples, link='exponential', seed=123, verbose=False) X_poisreg, y_poisreg = simu_poisreg.simulate() plt.figure(figsize=(12, 3)) plt.subplot(1, 3, 1) plt.scatter(*X_linreg.T, c=y_linreg, cmap='RdBu') plt.colorbar() plt.title('Linear', fontsize=16)
seed = 1398 np.random.seed(seed) n_samples = 40000 n_features = 20000 sparsity = 1e-4 penalty_strength = 1e-5 weights = weights_sparse_gauss(n_features, nnz=1000) intercept = 0.2 features = sparse.rand(n_samples, n_features, density=sparsity, format='csr') simulator = SimuLogReg(weights, n_samples=n_samples, features=features, verbose=False, intercept=intercept) features, labels = simulator.simulate() model = ModelLogReg(fit_intercept=True) model.fit(features, labels) prox = ProxElasticNet(penalty_strength, ratio=0.5, range=(0, n_features)) svrg_step = 1. / model.get_lip_max() test_n_threads = [1, 2, 4] fig, axes = plt.subplots(1, 2, figsize=(8, 4)) for ax, SolverClass in zip(axes, [SVRG, SAGA]): solver_list = [] solver_labels = [] for n_threads in test_n_threads: