def parallel_function( dataset_name, method, tol=1e-5, n_outer=50, tolerance_decrease='constant'): # load data X, y = fetch_libsvm(dataset_name) y -= y.mean() # compute alpha_max alpha_max = np.abs(X.T @ y).max() / len(y) if model_name == "logreg": alpha_max /= 2 alpha_min = alpha_max / 10_000 if model_name == "lasso": estimator = celer.Lasso( fit_intercept=False, max_iter=100, warm_start=True, tol=tol) model = Lasso(estimator=estimator) elif model_name == "logreg": model = SparseLogreg(estimator=estimator) # TODO improve this try: n_outer = dict_n_outers[dataset_name, method] except Exception: n_outer = 20 size_loop = 2 for _ in range(size_loop): if model_name == "lasso": sub_criterion = HeldOutMSE(None, None) elif model_name == "logreg": criterion = HeldOutLogistic(None, None) kf = KFold(n_splits=5, shuffle=True, random_state=42) criterion = CrossVal(sub_criterion, cv=kf) algo = ImplicitForward(tol_jac=1e-3) monitor = Monitor() t_max = dict_t_max[dataset_name] if method == 'grid_search': grid_search( algo, criterion, model, X, y, alpha_min, alpha_max, monitor, max_evals=100, tol=tol) elif method == 'random' or method == 'bayesian': hyperopt_wrapper( algo, criterion, model, X, y, alpha_min, alpha_max, monitor, max_evals=30, tol=tol, method=method, size_space=1, t_max=t_max) elif method.startswith("implicit_forward"): # do gradient descent to find the optimal lambda alpha0 = alpha_max / 100 n_outer = 30 if method == 'implicit_forward': optimizer = GradientDescent( n_outer=n_outer, p_grad_norm=1, verbose=True, tol=tol, t_max=t_max) else: optimizer = GradientDescent( n_outer=n_outer, p_grad_norm=1, verbose=True, tol=tol, t_max=t_max, tol_decrease="geom") grad_search( algo, criterion, model, optimizer, X, y, alpha0, monitor) else: raise NotImplementedError monitor.times = np.array(monitor.times) monitor.objs = np.array(monitor.objs) monitor.objs_test = 0 # TODO monitor.alphas = np.array(monitor.alphas) return (dataset_name, method, tol, n_outer, tolerance_decrease, monitor.times, monitor.objs, monitor.objs_test, monitor.alphas, alpha_max, model_name)
tab = np.linspace(1, 1000, n_features) dict_log_alpha["wLasso"] = log_alpha + np.log(tab / tab.max()) dict_log_alpha["logreg"] = (log_alpha - np.log(2)) dict_log_alpha["svm"] = 1e-4 dict_log_alpha["svr"] = np.array([1e-2, 1e-2]) # Set models to be tested models = {} models["lasso"] = Lasso(estimator=None) models["enet"] = ElasticNet(estimator=None) models["wLasso"] = WeightedLasso(estimator=None) models["logreg"] = SparseLogreg(estimator=None) models["svm"] = SVM(estimator=None) models["svr"] = SVR(estimator=None) custom_models = {} custom_models["lasso"] = Lasso(estimator=celer.Lasso( warm_start=True, fit_intercept=False)) custom_models["enet"] = ElasticNet( estimator=linear_model.ElasticNet(warm_start=True, fit_intercept=False)) custom_models["logreg"] = SparseLogreg( estimator=celer.LogisticRegression(warm_start=True, fit_intercept=False)) # Compute "ground truth" with cvxpylayer dict_cvxpy_func = { 'lasso': lasso_cvxpy, 'enet': enet_cvxpy, 'wLasso': weighted_lasso_cvxpy, 'logreg': logreg_cvxpy, 'svm': svm_cvxpy, 'svr': svr_cvxpy }
kf = KFold(n_splits=5, shuffle=True, random_state=42) n_samples = len(y) alpha_max = np.max(np.abs(X.T.dot(y))) / n_samples tol = 1e-8 max_iter = 1e5 algorithms = ['grid_search10', 'grad_search', 'random', 'bayesian'] p_alpha_min = 1 / 10_000 print("Starting path computation...") for algorithm in algorithms: estimator = celer.Lasso(fit_intercept=False, max_iter=1000, warm_start=True, tol=tol) print('%s started' % algorithm) model = Lasso(estimator=estimator) criterion = HeldOutMSE(None, None) log_alpha0 = np.log(alpha_max / 10) monitor = Monitor() cross_val_criterion = CrossVal(criterion, cv=kf) algo = ImplicitForward() optimizer = GradientDescent(n_outer=10, tol=tol, verbose=True, p_grad0=1) # optimizer = LineSearch(n_outer=10, tol=tol, verbose=True) if algorithm == 'grad_search': grad_search(algo, cross_val_criterion, model, optimizer, X, y, log_alpha0, monitor)
print("Starting path computation...") n_samples = len(y[idx_train]) alpha_max = np.max(np.abs(X[idx_train, :].T.dot(y[idx_train]))) alpha_max /= len(idx_train) alpha0 = alpha_max / 5 n_alphas = 10 alphas = np.geomspace(alpha_max, alpha_max / 1_000, n_alphas) tol = 1e-7 ############################################################################## # Grid search with scikit-learn # ----------------------------- estimator = celer.Lasso(fit_intercept=False, warm_start=True) print('Grid search started') t0 = time.time() model = Lasso(estimator=estimator) criterion = HeldOutMSE(idx_train, idx_val) monitor_grid_sk = Monitor() grid_search(criterion, model, X, y, None, None, monitor_grid_sk, alphas=alphas,
alphas = alpha_max * np.geomspace(1, 0.1) alpha_min = 0.0001 * alpha_max estimator = linear_model.Lasso(fit_intercept=False, max_iter=10000, warm_start=True) model = Lasso(estimator=estimator) tol = 1e-8 # Set models to be tested models = {} models["lasso"] = Lasso(estimator=None) models["lasso_custom"] = Lasso( estimator=celer.Lasso(warm_start=True, fit_intercept=False)) @pytest.mark.parametrize('model_name', list(models.keys())) @pytest.mark.parametrize('XX', [X, X_s]) def test_cross_val_criterion(model_name, XX): model = models[model_name] alpha_min = alpha_max / 10 max_iter = 10000 n_alphas = 10 kf = KFold(n_splits=5, shuffle=True, random_state=56) monitor_grid = Monitor() if model_name.startswith("lasso"): sub_crit = HeldOutMSE(None, None) else: