Exemplo n.º 1
0
def lasso_classification(table, alpha=0.3):
    '''
    '''
    from scikits.learn.linear_model import Lasso
    X = table[:, 1:]
    Y = table[:, 0]
    #    n_samples, n_features = 50, 200
    #    X = np.random.randn(n_samples, n_features)
    #    coef = 3*np.random.randn(n_features)
    #    coef[10:] = 0 # sparsify coef
    #    Y = np.dot(X, coef)
    #
    #    # add noise
    #    Y += 0.01*np.random.normal((n_samples,))

    # Split data in train set and test set
    n_samples = X.shape[0]
    items = np.random.permutation(n_samples)
    training_items = items[:n_samples / 2]
    testing_items = items[n_samples / 2:]
    X_train, y_train = X[training_items], Y[training_items]
    X_test, y_test = X[testing_items], Y[testing_items]

    lasso = Lasso(alpha=alpha, fit_intercept=True)
    lasso_fit = lasso.fit(X_train, y_train)
    print lasso_fit.coef_

    y_pred_lasso = lasso_fit.predict(X_test)
    y_collapsed = np.zeros_like(y_pred_lasso)
    collapsed_1 = y_pred_lasso >= 0.5
    y_collapsed[collapsed_1] = 1
    test = y_collapsed == y_test
    return float(test.sum()) / test.shape[0]
Exemplo n.º 2
0
def compute_bench(alpha, n_samples, n_features, precompute):

    lasso_results = []
    larslasso_results = []

    n_test_samples = 0
    it = 0

    for ns in n_samples:
        for nf in n_features:
            it += 1
            print '=================='
            print 'Iteration %s of %s' % (it, max(len(n_samples),
                                          len(n_features)))
            print '=================='
            n_informative = nf // 10
            X, Y, _, _, coef = make_regression_dataset(
                n_train_samples=ns, n_test_samples=n_test_samples,
                n_features=nf, noise=0.1, n_informative = n_informative)

            X /= np.sqrt(np.sum(X**2, axis=0)) # Normalize data

            gc.collect()
            print "- benching Lasso"
            clf = Lasso(alpha=alpha, fit_intercept=False)
            tstart = time()
            clf.fit(X, Y, precompute=precompute)
            lasso_results.append(time() - tstart)

            gc.collect()
            print "- benching LassoLARS"
            clf = LassoLARS(alpha=alpha, fit_intercept=False)
            tstart = time()
            clf.fit(X, Y, normalize=False, precompute=precompute)
            larslasso_results.append(time() - tstart)

    return lasso_results, larslasso_results
y = np.dot(X, coef)

# add noise
y += 0.01 * np.random.normal((n_samples, ))

# Split data in train set and test set
n_samples = X.shape[0]
X_train, y_train = X[:n_samples / 2], y[:n_samples / 2]
X_test, y_test = X[n_samples / 2:], y[n_samples / 2:]

################################################################################
# Lasso
from scikits.learn.linear_model import Lasso

alpha = 0.1
lasso = Lasso(alpha=alpha)

y_pred_lasso = lasso.fit(X_train, y_train).predict(X_test)
print lasso
print "r^2 on test data : %f" % (
    1 - np.linalg.norm(y_test - y_pred_lasso)**2 / np.linalg.norm(y_test)**2)

################################################################################
# ElasticNet
from scikits.learn.linear_model import ElasticNet

enet = ElasticNet(alpha=alpha, rho=0.7)

y_pred_enet = enet.fit(X_train, y_train).predict(X_test)
print enet
print "r^2 on test data : %f" % (