def test_group_lasso_lasso(sparse_X, fit_intercept, normalize): # check that group Lasso with groups of size 1 gives Lasso n_features = 1000 X, y = build_dataset(n_samples=100, n_features=n_features, sparse_X=sparse_X)[:2] alpha_max = norm(X.T @ y, ord=np.inf) / len(y) alpha = alpha_max / 10 clf = Lasso(alpha, tol=1e-12, fit_intercept=fit_intercept, normalize=normalize, verbose=0) clf.fit(X, y) # take groups of size 1: clf1 = GroupLasso(alpha=alpha, groups=1, tol=1e-12, fit_intercept=fit_intercept, normalize=normalize, verbose=0) clf1.fit(X, y) np.testing.assert_allclose(clf1.coef_, clf.coef_, atol=1e-6) np.testing.assert_allclose(clf1.intercept_, clf.intercept_, rtol=1e-4)
def test_GroupLasso(sparse_X): n_features = 50 X, y = build_dataset(n_samples=11, n_features=n_features, sparse_X=sparse_X, n_informative_features=n_features)[:2] tol = 1e-4 clf = GroupLasso(alpha=0.01, groups=10, tol=tol) clf.fit(X, y) np.testing.assert_array_less(clf.dual_gap_, tol)
def test_GroupLasso(sparse_X): n_features = 50 X, y = build_dataset( n_samples=11, n_features=n_features, sparse_X=sparse_X) tol = 1e-8 clf = GroupLasso(alpha=0.8, groups=10, tol=tol) clf.fit(X, y) np.testing.assert_array_less(clf.dual_gap_, tol) clf.tol = 1e-6 clf.groups = 1 # unsatisfying but sklearn will fit out of 5 features check_estimator(clf)
def test_group_lasso_multitask(): "Group Lasso and Multitask Lasso equivalence." "" n_samples, n_features = 30, 50 X_, Y_ = build_dataset(n_samples, n_features, n_informative_features=n_features, n_targets=3)[:2] y = Y_.reshape(-1, order='F') X = np.zeros([3 * n_samples, 3 * n_features], order='F') # block filling new design for i in range(3): X[i * n_samples:(i + 1) * n_samples, i * n_features:(i + 1) * n_features] = X_ grp_indices = np.arange(3 * n_features).reshape(3, -1).reshape( -1, order='F').astype(np.int32) grp_ptr = 3 * np.arange(n_features + 1).astype(np.int32) alpha_max = np.max(norm(X_.T @ Y_, axis=1)) / len(Y_) X_data = np.empty([1], dtype=X.dtype) X_indices = np.empty([1], dtype=np.int32) X_indptr = np.empty([1], dtype=np.int32) other = dscal_grp(False, y, grp_ptr, grp_indices, X, X_data, X_indices, X_indptr, X_data, len(grp_ptr) - 1, np.zeros(1, dtype=np.int32), False) np.testing.assert_allclose(alpha_max, other / len(Y_)) alpha = alpha_max / 10 clf = MultiTaskLasso(alpha, fit_intercept=False, tol=1e-8) clf.fit(X_, Y_) groups = [grp.tolist() for grp in grp_indices.reshape(50, 3)] clf1 = GroupLasso(alpha=alpha / 3, groups=groups, fit_intercept=False, tol=1e-8) clf1.fit(X, y) np.testing.assert_allclose(clf1.coef_, clf.coef_.reshape(-1), atol=1e-4)
# Create true regression coefficients with 3 groups of 5 non-zero values w_true = np.zeros(n_features) w_true[:5] = 1 w_true[20:25] = -2 w_true[40:45] = 1 y = X @ w_true + rng.randn(n_samples) # Fit an adapted GroupLasso model groups = 5 # groups are contiguous and of size 5 # irregular groups are also supported, clf = GroupLasso(groups=groups, alpha=1.1) clf.fit(X, y) ############################################################################### # Display results fig = plt.figure(figsize=(10, 4)) m, s, _ = plt.stem(w_true, label=r"true regression coefficients", use_line_collection=True) m, s, _ = plt.stem(clf.coef_, label=r"estimated regression coefficients", markerfmt='x', use_line_collection=True) plt.setp([m, s], color='#ff7f0e') plt.xlabel("feature index") plt.legend() plt.show(block=False)