def compute_bench(alpha, n_samples, n_features, precompute): lasso_results = [] larslasso_results = [] n_test_samples = 0 it = 0 for ns in n_samples: for nf in n_features: it += 1 print '==================' print 'Iteration %s of %s' % (it, max(len(n_samples), len(n_features))) print '==================' n_informative = nf // 10 X, Y, _, _, coef = make_regression_dataset( n_train_samples=ns, n_test_samples=n_test_samples, n_features=nf, noise=0.1, n_informative = n_informative) X /= np.sqrt(np.sum(X**2, axis=0)) # Normalize data gc.collect() print "- benching Lasso" clf = Lasso(alpha=alpha, fit_intercept=False) tstart = time() clf.fit(X, Y, precompute=precompute) lasso_results.append(time() - tstart) gc.collect() print "- benching LassoLARS" clf = LassoLARS(alpha=alpha, fit_intercept=False) tstart = time() clf.fit(X, Y, normalize=False, precompute=precompute) larslasso_results.append(time() - tstart) return lasso_results, larslasso_results
def compute_bench(alpha, n_samples, n_features, precompute): lasso_results = [] lars_lasso_results = [] n_test_samples = 0 it = 0 for ns in n_samples: for nf in n_features: it += 1 print '==================' print 'Iteration %s of %s' % (it, max(len(n_samples), len(n_features))) print '==================' n_informative = nf // 10 X, Y, _, _, coef = make_regression_dataset( n_train_samples=ns, n_test_samples=n_test_samples, n_features=nf, noise=0.1, n_informative = n_informative) X /= np.sqrt(np.sum(X**2, axis=0)) # Normalize data gc.collect() print "- benching Lasso" clf = Lasso(alpha=alpha, fit_intercept=False) tstart = time() clf.fit(X, Y, precompute=precompute) lasso_results.append(time() - tstart) gc.collect() print "- benching LassoLars" clf = LassoLars(alpha=alpha, fit_intercept=False) tstart = time() clf.fit(X, Y, normalize=False, precompute=precompute) lars_lasso_results.append(time() - tstart) return lasso_results, lars_lasso_results
def compute_bench(samples_range, features_range): it = 0 results = defaultdict(lambda: []) max_it = len(samples_range) * len(features_range) for n_samples in samples_range: for n_features in features_range: it += 1 print '====================' print 'Iteration %03d of %03d' % (it, max_it) print '====================' dataset_kwargs = { 'n_train_samples': n_samples, 'n_test_samples': 2, 'n_features': n_features, 'n_informative': n_features / 10, 'effective_rank': min(n_samples, n_features) / 10, #'effective_rank': None, 'bias': 0.0, } print "n_samples: %d" % n_samples print "n_features: %d" % n_features X, y, _, _, _ = make_regression_dataset(**dataset_kwargs) gc.collect() print "benching lars_path (with Gram):", sys.stdout.flush() tstart = time() G = np.dot(X.T, X) # precomputed Gram matrix Xy = np.dot(X.T, y) lars_path(X, y, Xy=Xy, Gram=G, method='lasso') delta = time() - tstart print "%0.3fs" % delta results['lars_path (with Gram)'].append(delta) gc.collect() print "benching lars_path (without Gram):", sys.stdout.flush() tstart = time() lars_path(X, y, method='lasso') delta = time() - tstart print "%0.3fs" % delta results['lars_path (without Gram)'].append(delta) gc.collect() print "benching lasso_path (with Gram):", sys.stdout.flush() tstart = time() lasso_path(X, y, precompute=True) delta = time() - tstart print "%0.3fs" % delta results['lasso_path (with Gram)'].append(delta) gc.collect() print "benching lasso_path (without Gram):", sys.stdout.flush() tstart = time() lasso_path(X, y, precompute=False) delta = time() - tstart print "%0.3fs" % delta results['lasso_path (without Gram)'].append(delta) return results
from scikits.learn.metrics import mean_square_error from scikits.learn.datasets.samples_generator import make_regression_dataset if __name__ == "__main__": list_n_samples = np.linspace(100, 10000, 5).astype(np.int) list_n_features = [10, 100, 1000] n_test = 1000 noise = 0.1 alpha = 0.01 sgd_results = np.zeros((len(list_n_samples), len(list_n_features), 2)) elnet_results = np.zeros((len(list_n_samples), len(list_n_features), 2)) ridge_results = np.zeros((len(list_n_samples), len(list_n_features), 2)) for i, n_train in enumerate(list_n_samples): for j, n_features in enumerate(list_n_features): X_train, y_train, X_test, y_test, coef = make_regression_dataset( n_train_samples=n_train, n_test_samples=n_test, n_features=n_features, noise=noise) print "=======================" print "Round %d %d" % (i, j) print "n_features:", n_features print "n_samples:", n_train # Shuffle data idx = np.arange(n_train) np.random.seed(13) np.random.shuffle(idx) X_train = X_train[idx] y_train = y_train[idx] std = X_train.std(axis=0) mean = X_train.mean(axis=0)
from scikits.learn.datasets.samples_generator import make_regression_dataset if __name__ == "__main__": list_n_samples = np.linspace(100, 10000, 5).astype(np.int) list_n_features = [10, 100, 1000] n_test = 1000 noise = 0.1 alpha = 0.01 sgd_results = np.zeros((len(list_n_samples), len(list_n_features), 2)) elnet_results = np.zeros((len(list_n_samples), len(list_n_features), 2)) ridge_results = np.zeros((len(list_n_samples), len(list_n_features), 2)) for i, n_train in enumerate(list_n_samples): for j, n_features in enumerate(list_n_features): X_train, y_train, X_test, y_test, coef = make_regression_dataset( n_train_samples=n_train, n_test_samples=n_test, n_features=n_features, noise=noise) print "=======================" print "Round %d %d" % (i, j) print "n_features:", n_features print "n_samples:", n_train # Shuffle data idx = np.arange(n_train) np.random.seed(13) np.random.shuffle(idx) X_train = X_train[idx] y_train = y_train[idx] std = X_train.std(axis=0)
import pylab as pl scikit_results = [] glmnet_results = [] n = 20 step = 500 n_features = 1000 n_informative = n_features / 10 n_test_samples = 1000 for i in range(1, n + 1): print '==================' print 'Iteration %s of %s' % (i, n) print '==================' X, Y, X_test, Y_test, coef = make_regression_dataset( n_train_samples=(i * step), n_test_samples=n_test_samples, n_features=n_features, noise=0.1, n_informative=n_informative) print "benching scikit: " scikit_results.append(bench(ScikitLasso, X, Y, X_test, Y_test, coef)) print "benching glmnet: " glmnet_results.append(bench(GlmnetLasso, X, Y, X_test, Y_test, coef)) pl.clf() xx = range(0, n * step, step) pl.title('Lasso regression on sample dataset (%d features)' % n_features) pl.plot(xx, scikit_results, 'b-', label='scikit-learn') pl.plot(xx, glmnet_results, 'r-', label='glmnet') pl.legend() pl.xlabel('number of samples to classify')
def compute_bench(samples_range, features_range): it = 0 results = defaultdict(lambda: []) max_it = len(samples_range) * len(features_range) for n_samples in samples_range: for n_features in features_range: it += 1 print '====================' print 'Iteration %03d of %03d' % (it, max_it) print '====================' dataset_kwargs = { 'n_train_samples': n_samples, 'n_test_samples': 2, 'n_features': n_features, 'n_informative': n_features / 10, 'effective_rank': min(n_samples, n_features) / 10, #'effective_rank': None, 'bias': 0.0, } print "n_samples: %d" % n_samples print "n_features: %d" % n_features X, y, _, _, _ = make_regression_dataset(**dataset_kwargs) gc.collect() print "benching lars_path (with Gram):", sys.stdout.flush() tstart = time() G = np.dot(X.T, X) # precomputed Gram matrix Xy = np.dot(X.T, y) lars_path(X, y, Xy=Xy, Gram=G, method='lasso') delta = time() - tstart print "%0.3fs" % delta results['lars_path (with Gram)'].append(delta) gc.collect() print "benching lars_path (without Gram):", sys.stdout.flush() tstart = time() lars_path(X, y, method='lasso') delta = time() - tstart print "%0.3fs" % delta results['lars_path (without Gram)'].append(delta) gc.collect() print "benching lasso_path (with Gram):", sys.stdout.flush() tstart = time() lasso_path(X, y, precompute=True) delta = time() - tstart print "%0.3fs" % delta results['lasso_path (with Gram)'].append(delta) gc.collect() print "benching lasso_path (without Gram):", sys.stdout.flush() tstart = time() lasso_path(X, y, precompute=False) delta = time() - tstart print "%0.3fs" % delta results['lasso_path (without Gram)'].append(delta) return results
# Delayed import of pylab import pylab as pl scikit_results = [] glmnet_results = [] n = 20 step = 500 n_features = 1000 n_informative = n_features / 10 n_test_samples = 1000 for i in range(1, n + 1): print '==================' print 'Iteration %s of %s' % (i, n) print '==================' X, Y, X_test, Y_test, coef = make_regression_dataset( n_train_samples=(i * step), n_test_samples=n_test_samples, n_features=n_features, noise=0.1, n_informative=n_informative) print "benching scikit: " scikit_results.append(bench(ScikitLasso, X, Y, X_test, Y_test, coef)) print "benching glmnet: " glmnet_results.append(bench(GlmnetLasso, X, Y, X_test, Y_test, coef)) pl.clf() xx = range(0, n*step, step) pl.title('Lasso regression on sample dataset (%d features)' % n_features) pl.plot(xx, scikit_results, 'b-', label='scikit-learn') pl.plot(xx, glmnet_results,'r-', label='glmnet') pl.legend() pl.xlabel('number of samples to classify') pl.ylabel('time (in seconds)')