def test_group_lasso_lasso(sparse_X, fit_intercept, normalize): # check that group Lasso with groups of size 1 gives Lasso n_features = 1000 X, y = build_dataset(n_samples=100, n_features=n_features, sparse_X=sparse_X)[:2] alpha_max = norm(X.T @ y, ord=np.inf) / len(y) alpha = alpha_max / 10 clf = Lasso(alpha, tol=1e-12, fit_intercept=fit_intercept, normalize=normalize, verbose=0) clf.fit(X, y) # take groups of size 1: clf1 = GroupLasso(alpha=alpha, groups=1, tol=1e-12, fit_intercept=fit_intercept, normalize=normalize, verbose=0) clf1.fit(X, y) np.testing.assert_allclose(clf1.coef_, clf.coef_, atol=1e-6) np.testing.assert_allclose(clf1.intercept_, clf.intercept_, rtol=1e-4)
class Solver(BaseSolver): name = 'Celer' stop_strategy = 'iteration' install_cmd = 'conda' requirements = ['pip:git+https://github.com/mathurinm/celer.git'] def set_objective(self, X, y, lmbd): self.X, self.y, self.lmbd = X, y, lmbd warnings.filterwarnings('ignore', category=ConvergenceWarning) n_samples = self.X.shape[0] self.lasso = Lasso( alpha=self.lmbd / n_samples, max_iter=1, max_epochs=100000, tol=1e-12, prune=True, fit_intercept=False, normalize=False, warm_start=False, positive=False, verbose=False, ) def run(self, n_iter): self.lasso.max_iter = n_iter self.lasso.fit(self.X, self.y) def get_result(self): return self.lasso.coef_.flatten()
def fit_weights(self, y, phi, reg_param, tol_factor=1e-4): obs = self.compute_obs(phi, version=1) mat = np.array([np.sum(obs[i], axis=0) for i in range(self.num_atoms)]) mat = mat.reshape((self.num_atoms, -1)).T tol = tol_factor * np.linalg.norm(y)**2 / y.size perimeters = np.array([self.atoms[i].support.compute_perimeter() for i in range(self.num_atoms)]) lasso = Lasso(alpha=reg_param/y.size, fit_intercept=False, tol=tol, weights=perimeters) lasso.fit(mat, y.reshape(-1)) new_weights = lasso.coef_ self.atoms = [WeightedIndicatorFunction(new_weights[i], self.atoms[i].support) for i in range(self.num_atoms) if np.abs(new_weights[i]) > 1e-2]
def set_objective(self, X, y, lmbd): self.X, self.y, self.lmbd = X, y, lmbd warnings.filterwarnings('ignore', category=ConvergenceWarning) n_samples = self.X.shape[0] self.lasso = Lasso( alpha=self.lmbd / n_samples, max_iter=1, max_epochs=100000, tol=1e-12, prune=True, fit_intercept=False, normalize=False, warm_start=False, positive=False, verbose=False, )
class Solver(BaseSolver): name = 'Celer' stop_strategy = 'iteration' install_cmd = 'conda' requirements = ['pip:celer'] references = [ 'M. Massias, A. Gramfort and J. Salmon, ICML, ' '"Celer: a Fast Solver for the Lasso with Dual Extrapolation", ' 'vol. 80, pp. 3321-3330 (2018)' ] def set_objective(self, X, y, lmbd, fit_intercept): self.X, self.y, self.lmbd = X, y, lmbd self.fit_intercept = fit_intercept warnings.filterwarnings('ignore', category=ConvergenceWarning) n_samples = self.X.shape[0] self.lasso = Lasso( alpha=self.lmbd / n_samples, max_iter=1, max_epochs=100000, tol=1e-12, prune=True, fit_intercept=fit_intercept, warm_start=False, positive=False, verbose=False, ) def run(self, n_iter): self.lasso.max_iter = n_iter self.lasso.fit(self.X, self.y) def get_result(self): beta = self.lasso.coef_.flatten() if self.fit_intercept: beta = np.r_[beta, self.lasso.intercept_] return beta
def test_rw_cvg(): X, y, _ = make_correlated_data(20, 40, random_state=0) alpha = np.max(np.abs(X.T @ y)) / 5 w, E = reweighted(X, y, alpha, max_iter=1000, n_adapt=5) clf = Lasso(fit_intercept=False, alpha=alpha / len(y)).fit(X, y) np.testing.assert_allclose(w, clf.coef_, atol=5e-4) np.testing.assert_allclose(E[-1] / E[0], E[-2] / E[0], atol=5e-4) w, E = reweighted(X, y, alpha, deriv_MCP) np.testing.assert_allclose(E[-1] / E[0], E[-2] / E[0], atol=5e-4)
def test_cd_ista_fista(): np.random.seed(0) X, y, _ = make_correlated_data(20, 40, random_state=0) alpha = np.max(np.abs(X.T @ y)) / 5 w, _, _ = cd(X, y, alpha, max_iter=100) clf = Lasso(fit_intercept=False, alpha=alpha / len(y)).fit(X, y) np.testing.assert_allclose(w, clf.coef_, atol=5e-4) w, _, _ = ista(X, y, alpha, max_iter=1_000) np.testing.assert_allclose(w, clf.coef_, atol=5e-4) w, _, _ = fista(X, y, alpha, max_iter=1_000) np.testing.assert_allclose(w, clf.coef_, atol=5e-4)
def linear_cv(dataset_name, tol=1e-3, compute_jac=True, model_name="lasso"): X, y = load_libsvm(dataset_name) X = csc_matrix(X) n_samples, n_features = X.shape p_alpha = p_alphas[dataset_name, model_name] max_iter = max_iters[dataset_name] if model_name == "lasso": model = Lasso(X, y, 0, max_iter=max_iter, tol=tol) elif model_name == "logreg": model = SparseLogreg(X, y, 0, max_iter=max_iter, tol=tol) alpha_max = np.exp(model.compute_alpha_max()) alpha = p_alpha * alpha_max if model_name == "lasso": clf = Lasso_cel(alpha=alpha, fit_intercept=False, warm_start=True, tol=tol * norm(y)**2 / 2, max_iter=10000) clf.fit(X, y) beta_star = clf.coef_ mask = beta_star != 0 dense = beta_star[mask] elif model_name == "logreg": # clf = LogisticRegression( # penalty='l1', C=(1 / (alpha * n_samples)), # fit_intercept=False, # warm_start=True, max_iter=10000, # tol=tol, verbose=True).fit(X, y) # clf = LogisticRegression( # penalty='l1', C=(1 / (alpha * n_samples)), # fit_intercept=False, # warm_start=True, max_iter=10000, # tol=tol, verbose=True, # solver='liblinear').fit(X, y) # beta_star = clf.coef_[0] blitzl1.set_use_intercept(False) blitzl1.set_tolerance(1e-32) blitzl1.set_verbose(True) # blitzl1.set_min_time(60) prob = blitzl1.LogRegProblem(X, y) # # lammax = prob.compute_lambda_max() clf = prob.solve(alpha * n_samples) beta_star = clf.x mask = beta_star != 0 mask = np.array(mask) dense = beta_star[mask] # if model == "lasso": v = -n_samples * alpha * np.sign(beta_star[mask]) mat_to_inv = model.get_hessian(mask, dense, np.log(alpha)) # mat_to_inv = X[:, mask].T @ X[:, mask] jac_temp = cg(mat_to_inv, v, tol=1e-10) jac_star = np.zeros(n_features) jac_star[mask] = jac_temp[0] # elif model == "logreg": # v = - n_samples * alpha * np.sign(beta_star[mask]) log_alpha = np.log(alpha) list_beta, list_jac = get_beta_jac_iterdiff(X, y, log_alpha, model, save_iterates=True, tol=tol, max_iter=max_iter, compute_jac=compute_jac) diff_beta = norm(list_beta - beta_star, axis=1) diff_jac = norm(list_jac - jac_star, axis=1) supp_star = beta_star != 0 n_iter = list_beta.shape[0] for i in np.arange(n_iter)[::-1]: supp = list_beta[i, :] != 0 if not np.all(supp == supp_star): supp_id = i + 1 break supp_id = 0 return dataset_name, p_alpha, diff_beta, diff_jac, n_iter, supp_id
tol=1e-7, max_iter=100, cv=2, n_jobs=2).fit(X_train_val, y_train_val) # Measure mse on test mse_cv = mean_squared_error(y_test, model_cv.predict(X_test)) print("Vanilla LassoCV: Mean-squared error on test data %f" % mse_cv) ############################################################################## ############################################################################## # Weighted Lasso with sparse-ho. # We use the vanilla lassoCV coefficients as a starting point alpha0 = np.log(model_cv.alpha_) * np.ones(X_train.shape[1]) # Weighted Lasso: Sparse-ho: 1 param per feature estimator = Lasso(fit_intercept=False, max_iter=10, warm_start=True) model = WeightedLasso(X_train, y_train, estimator=estimator) criterion = HeldOutMSE(X_val, y_val, model, X_test=X_test, y_test=y_test) algo = ImplicitForward() monitor = Monitor() grad_search(algo, criterion, alpha0, monitor, n_outer=20, tol=1e-6) ############################################################################## ############################################################################## # MSE on validation set mse_sho_val = mean_squared_error(y_val, estimator.predict(X_val)) # MSE on test set, ie unseen data mse_sho_test = mean_squared_error(y_test, estimator.predict(X_test)) print("Sparse-ho: Mean-squared error on validation data %f" % mse_sho_val)
model_cv = LassoCV( verbose=False, fit_intercept=False, alphas=alphas, tol=1e-7, max_iter=100, cv=cv, n_jobs=2).fit(X, y) # Measure mse on test mse_cv = mean_squared_error(y_test, model_cv.predict(X_test)) print("Vanilla LassoCV: Mean-squared error on test data %f" % mse_cv) ############################################################################## ############################################################################## # Weighted Lasso with sparse-ho. # We use the vanilla lassoCV coefficients as a starting point alpha0 = model_cv.alpha_ * np.ones(n_features) # Weighted Lasso: Sparse-ho: 1 param per feature estimator = Lasso(fit_intercept=False, max_iter=100, warm_start=True) model = WeightedLasso(estimator=estimator) sub_criterion = HeldOutMSE(idx_train, idx_val) criterion = CrossVal(sub_criterion, cv=cv) algo = ImplicitForward() monitor = Monitor() optimizer = GradientDescent( n_outer=100, tol=1e-7, verbose=True, p_grad_norm=1.9) results = grad_search( algo, criterion, model, optimizer, X, y, alpha0, monitor) ############################################################################## estimator.weights = monitor.alphas[-1] estimator.fit(X, y) ############################################################################## # MSE on validation set