def lasso_classification(table, alpha=0.3): ''' ''' from scikits.learn.linear_model import Lasso X = table[:, 1:] Y = table[:, 0] # n_samples, n_features = 50, 200 # X = np.random.randn(n_samples, n_features) # coef = 3*np.random.randn(n_features) # coef[10:] = 0 # sparsify coef # Y = np.dot(X, coef) # # # add noise # Y += 0.01*np.random.normal((n_samples,)) # Split data in train set and test set n_samples = X.shape[0] items = np.random.permutation(n_samples) training_items = items[:n_samples / 2] testing_items = items[n_samples / 2:] X_train, y_train = X[training_items], Y[training_items] X_test, y_test = X[testing_items], Y[testing_items] lasso = Lasso(alpha=alpha, fit_intercept=True) lasso_fit = lasso.fit(X_train, y_train) print lasso_fit.coef_ y_pred_lasso = lasso_fit.predict(X_test) y_collapsed = np.zeros_like(y_pred_lasso) collapsed_1 = y_pred_lasso >= 0.5 y_collapsed[collapsed_1] = 1 test = y_collapsed == y_test return float(test.sum()) / test.shape[0]
def compute_bench(alpha, n_samples, n_features, precompute): lasso_results = [] larslasso_results = [] n_test_samples = 0 it = 0 for ns in n_samples: for nf in n_features: it += 1 print '==================' print 'Iteration %s of %s' % (it, max(len(n_samples), len(n_features))) print '==================' n_informative = nf // 10 X, Y, _, _, coef = make_regression_dataset( n_train_samples=ns, n_test_samples=n_test_samples, n_features=nf, noise=0.1, n_informative = n_informative) X /= np.sqrt(np.sum(X**2, axis=0)) # Normalize data gc.collect() print "- benching Lasso" clf = Lasso(alpha=alpha, fit_intercept=False) tstart = time() clf.fit(X, Y, precompute=precompute) lasso_results.append(time() - tstart) gc.collect() print "- benching LassoLARS" clf = LassoLARS(alpha=alpha, fit_intercept=False) tstart = time() clf.fit(X, Y, normalize=False, precompute=precompute) larslasso_results.append(time() - tstart) return lasso_results, larslasso_results
y = np.dot(X, coef) # add noise y += 0.01 * np.random.normal((n_samples, )) # Split data in train set and test set n_samples = X.shape[0] X_train, y_train = X[:n_samples / 2], y[:n_samples / 2] X_test, y_test = X[n_samples / 2:], y[n_samples / 2:] ################################################################################ # Lasso from scikits.learn.linear_model import Lasso alpha = 0.1 lasso = Lasso(alpha=alpha) y_pred_lasso = lasso.fit(X_train, y_train).predict(X_test) print lasso print "r^2 on test data : %f" % ( 1 - np.linalg.norm(y_test - y_pred_lasso)**2 / np.linalg.norm(y_test)**2) ################################################################################ # ElasticNet from scikits.learn.linear_model import ElasticNet enet = ElasticNet(alpha=alpha, rho=0.7) y_pred_enet = enet.fit(X_train, y_train).predict(X_test) print enet print "r^2 on test data : %f" % (