示例#1
0
def test_lasso_lars_vs_lasso_cd(verbose=False):
    """
    Test that LassoLars and Lasso using coordinate descent give the
    same results
    """
    alphas, _, lasso_path = linear_model.lars_path(X, y, method='lasso')
    lasso_cd = linear_model.Lasso(fit_intercept=False)
    for (c, a) in zip(lasso_path.T, alphas):
        lasso_cd.alpha = a
        lasso_cd.fit(X, y, tol=1e-8)
        error = np.linalg.norm(c - lasso_cd.coef_)
        assert error < 0.01

    # similar test, with the classifiers
    for alpha in np.linspace(1e-2, 1 - 1e-2):
        clf1 = linear_model.LassoLARS(alpha=alpha).fit(X, y)
        clf2 = linear_model.Lasso(alpha=alpha).fit(X, y, tol=1e-8)
        err = np.linalg.norm(clf1.coef_ - clf2.coef_)
        assert err < 1e-3
示例#2
0
def test_lasso_lars_vs_lasso_cd(verbose=False):
    """
    Test that LassoLars and Lasso using coordinate descent give the
    same results
    """
    alphas, _, lasso_path = linear_model.lars_path(X, y, method='lasso')
    lasso_cd = linear_model.Lasso(fit_intercept=False)
    for (c, a) in zip(lasso_path.T, alphas):
        lasso_cd.alpha = a
        lasso_cd.fit(X, y, tol=1e-8)
        error = np.linalg.norm(c - lasso_cd.coef_)
        assert error < 0.01
示例#3
0
def test_lasso_lars_vs_lasso_cd_early_stopping(verbose=False):
    """
    Test that LassoLars and Lasso using coordinate descent give the
    same results when early stopping is used.
    (test : before, in the middle, and in the last part of the path)
    """
    alphas_min = [10, 0.9, 1e-4]
    for alphas_min in alphas_min:
        alphas, _, lasso_path = linear_model.lars_path(X, y, method='lasso',
                                                    alpha_min=0.9)
        lasso_cd = linear_model.Lasso(fit_intercept=False)
        lasso_cd.alpha = alphas[-1]
        lasso_cd.fit(X, y, tol=1e-8)
        error = np.linalg.norm(lasso_path[:,-1] - lasso_cd.coef_)
        assert error < 0.01
示例#4
0
import numpy as np
import pylab as pl

from scikits.learn import cross_val, datasets, linear_model

diabetes = datasets.load_diabetes()
X = diabetes.data
y = diabetes.target

lasso = linear_model.Lasso()

alphas = np.logspace(-4, -1, 20)

scores = list()
scores_std = list()

for alpha in alphas:
    lasso.alpha = alpha
    this_scores = cross_val.cross_val_score(lasso, X, y, n_jobs=-1)
    scores.append(np.mean(this_scores))
    scores_std.append(np.std(this_scores))

pl.figure(1, figsize=(2.5, 2))
pl.clf()
pl.axes([.1, .25, .8, .7])
pl.semilogx(alphas, scores)
pl.semilogx(alphas, np.array(scores) + np.array(scores_std) / 20, 'b--')
pl.semilogx(alphas, np.array(scores) - np.array(scores_std) / 20, 'b--')
pl.yticks(())
pl.ylabel('CV score')
pl.xlabel('alpha')
示例#5
0
        rhos = [0]

    rc = ''
    best_alpha = 0
    best_rho = 0
    best_mean_rcor = 0
    best_mean_max_pos = features
    best_mean_max_intersect = 0

    for a, alpha in enumerate(ALPHA_VALUES):
        for r, rho in enumerate(rhos):

            if regressionModel == 'ElasticNet':
                model = lm.ElasticNet(alpha=alpha, rho=rho)
            elif regressionModel == 'Lasso':
                model = lm.Lasso(alpha=alpha)
            elif regressionModel == 'Ridge' and alpha != 0:
                model = lm.Ridge(alpha=alpha)

            for k in range(file_num):

                dy, dx = generate_data.gen_data(samples, features, impFeat)
                #dy, dx = genRedundantData(100, 6, 2, 2)
                examples, features = dx.shape

                (weights_iter, rcors, max_position_iter, intersect_size_iter,
                 deltatime) = regBoost(dx, dy, model, bootstrap_num, impFeat)

                files_rcors[k, :] = rcors
                if np.mean(rcors) > best_mean_rcor:
                    best_mean_rcor = np.mean(rcors)