def parallel_function( dataset_name, method, tol=1e-8, n_outer=15): # load data X, y = fetch_libsvm(dataset_name) # subsample the samples and the features n_samples, n_features = dict_subsampling[dataset_name] t_max = dict_t_max[dataset_name] # t_max = 3600 X, y = clean_dataset(X, y, n_samples, n_features) alpha_max, n_classes = get_alpha_max(X, y) log_alpha_max = np.log(alpha_max) # maybe to change alpha max value algo = ImplicitForward(None, n_iter_jac=2000) estimator = LogisticRegression( C=1, fit_intercept=False, warm_start=True, max_iter=30, verbose=False) model = SparseLogreg(estimator=estimator) idx_train, idx_val, idx_test = get_splits(X, y) logit_multiclass = LogisticMulticlass( idx_train, idx_val, algo, idx_test=idx_test) monitor = Monitor() if method == "implicit_forward": log_alpha0 = np.ones(n_classes) * np.log(0.1 * alpha_max) optimizer = LineSearch(n_outer=100) grad_search( algo, logit_multiclass, model, optimizer, X, y, log_alpha0, monitor) elif method.startswith(('random', 'bayesian')): max_evals = dict_max_eval[dataset_name] log_alpha_min = np.log(alpha_max) - 7 hyperopt_wrapper( algo, logit_multiclass, model, X, y, log_alpha_min, log_alpha_max, monitor, max_evals=max_evals, tol=tol, t_max=t_max, method=method, size_space=n_classes) elif method == 'grid_search': n_alphas = 20 p_alphas = np.geomspace(1, 0.001, n_alphas) p_alphas = np.tile(p_alphas, (n_classes, 1)) for i in range(n_alphas): log_alpha_i = np.log(alpha_max * p_alphas[:, i]) logit_multiclass.get_val( model, X, y, log_alpha_i, None, monitor, tol) monitor.times = np.array(monitor.times).copy() monitor.objs = np.array(monitor.objs).copy() monitor.acc_vals = np.array(monitor.acc_vals).copy() monitor.acc_tests = np.array(monitor.acc_tests).copy() monitor.log_alphas = np.array(monitor.log_alphas).copy() return ( dataset_name, method, tol, n_outer, monitor.times, monitor.objs, monitor.acc_vals, monitor.acc_tests, monitor.log_alphas, log_alpha_max, n_samples, n_features, n_classes)
# X, y = fetch_libsvm('sector_scale') # X, y = fetch_libsvm('sector') # X, y = fetch_libsvm('smallNORB') # X, y = fetch_libsvm('mnist') # clean data and subsample X, y = clean_dataset(X, y, n_samples, n_features) idx_train, idx_val, idx_test = get_splits(X, y) n_samples, n_features = X.shape algo = ImplicitForward(n_iter_jac=1000) estimator = LogisticRegression( C=1, fit_intercept=False, warm_start=True, max_iter=2000, verbose=False) model = SparseLogreg(estimator=estimator) logit_multiclass = LogisticMulticlass( idx_train, idx_val, algo, idx_test=idx_test) alpha_max, n_classes = alpha_max_multiclass(X, y) tol = 1e-5 n_alphas = 10 p_alphas = np.geomspace(1, 0.001, n_alphas) p_alphas = np.tile(p_alphas, (n_classes, 1)) print("###################### GRID SEARCH ###################") monitor_grid = Monitor() for i in range(n_alphas):
n_alphas = 30 p_alphas = np.geomspace(1, 0.0001, n_alphas) alphas = alpha_max * p_alphas log_alphas = np.log(alphas) ############################################################################## # Grid-search # ----------- print('scikit started') t0 = time.time() estimator = LogisticRegression(penalty='l1', fit_intercept=False, max_iter=max_iter) model = SparseLogreg(max_iter=max_iter, estimator=estimator) criterion = HeldOutLogistic(idx_train, idx_val) algo_grid = Forward() monitor_grid = Monitor() grid_search(algo_grid, criterion, model, X, y, log_alpha_min, log_alpha_max, monitor_grid, log_alphas=log_alphas, tol=tol) objs = np.array(monitor_grid.objs)
p_alphas = np.geomspace(1, 0.0001, n_alphas) alphas = alpha_max * p_alphas log_alphas = np.log(alphas) ############################################################################## # Grid-search # ----------- print('scikit started') t0 = time.time() estimator = LogisticRegression(penalty='l1', fit_intercept=False, solver='saga', max_iter=max_iter) model = SparseLogreg(X_train, y_train, max_iter=max_iter, estimator=estimator) criterion = HeldOutLogistic(X_val, y_val, model) algo_grid = Forward() monitor_grid = Monitor() grid_search(algo_grid, criterion, log_alpha_min, log_alpha_max, monitor_grid, log_alphas=log_alphas, tol=tol) objs = np.array(monitor_grid.objs) t_sk = time.time() - t0 print('scikit finished')
def linear_cv(dataset_name, tol=1e-3, compute_jac=True, model_name="lasso"): X, y = load_libsvm(dataset_name) X = csc_matrix(X) n_samples, n_features = X.shape p_alpha = p_alphas[dataset_name, model_name] max_iter = max_iters[dataset_name] if model_name == "lasso": model = Lasso(X, y, 0, max_iter=max_iter, tol=tol) elif model_name == "logreg": model = SparseLogreg(X, y, 0, max_iter=max_iter, tol=tol) alpha_max = np.exp(model.compute_alpha_max()) alpha = p_alpha * alpha_max if model_name == "lasso": clf = Lasso_cel(alpha=alpha, fit_intercept=False, warm_start=True, tol=tol * norm(y)**2 / 2, max_iter=10000) clf.fit(X, y) beta_star = clf.coef_ mask = beta_star != 0 dense = beta_star[mask] elif model_name == "logreg": # clf = LogisticRegression( # penalty='l1', C=(1 / (alpha * n_samples)), # fit_intercept=False, # warm_start=True, max_iter=10000, # tol=tol, verbose=True).fit(X, y) # clf = LogisticRegression( # penalty='l1', C=(1 / (alpha * n_samples)), # fit_intercept=False, # warm_start=True, max_iter=10000, # tol=tol, verbose=True, # solver='liblinear').fit(X, y) # beta_star = clf.coef_[0] blitzl1.set_use_intercept(False) blitzl1.set_tolerance(1e-32) blitzl1.set_verbose(True) # blitzl1.set_min_time(60) prob = blitzl1.LogRegProblem(X, y) # # lammax = prob.compute_lambda_max() clf = prob.solve(alpha * n_samples) beta_star = clf.x mask = beta_star != 0 mask = np.array(mask) dense = beta_star[mask] # if model == "lasso": v = -n_samples * alpha * np.sign(beta_star[mask]) mat_to_inv = model.get_hessian(mask, dense, np.log(alpha)) # mat_to_inv = X[:, mask].T @ X[:, mask] jac_temp = cg(mat_to_inv, v, tol=1e-10) jac_star = np.zeros(n_features) jac_star[mask] = jac_temp[0] # elif model == "logreg": # v = - n_samples * alpha * np.sign(beta_star[mask]) log_alpha = np.log(alpha) list_beta, list_jac = get_beta_jac_iterdiff(X, y, log_alpha, model, save_iterates=True, tol=tol, max_iter=max_iter, compute_jac=compute_jac) diff_beta = norm(list_beta - beta_star, axis=1) diff_jac = norm(list_jac - jac_star, axis=1) supp_star = beta_star != 0 n_iter = list_beta.shape[0] for i in np.arange(n_iter)[::-1]: supp = list_beta[i, :] != 0 if not np.all(supp == supp_star): supp_id = i + 1 break supp_id = 0 return dataset_name, p_alpha, diff_beta, diff_jac, n_iter, supp_id
def parallel_function(dataset_name, method, tol=1e-5, n_outer=50, tolerance_decrease='constant'): # load data X, y = fetch_libsvm(dataset_name) y -= np.mean(y) # compute alpha_max alpha_max = np.abs(X.T @ y).max() / len(y) if model_name == "logreg": alpha_max /= 2 alpha_min = alpha_max * dict_palphamin[dataset_name] if model_name == "enet": estimator = linear_model.ElasticNet(fit_intercept=False, max_iter=10_000, warm_start=True, tol=tol) model = ElasticNet(estimator=estimator) elif model_name == "logreg": model = SparseLogreg(estimator=estimator) # TODO improve this try: n_outer = dict_n_outers[dataset_name, method] except Exception: n_outer = 20 size_loop = 2 for _ in range(size_loop): if model_name == "lasso" or model_name == "enet": sub_criterion = HeldOutMSE(None, None) elif model_name == "logreg": criterion = HeldOutLogistic(None, None) kf = KFold(n_splits=5, shuffle=True, random_state=42) criterion = CrossVal(sub_criterion, cv=kf) algo = ImplicitForward(tol_jac=1e-3) monitor = Monitor() t_max = dict_t_max[dataset_name] if method == 'grid_search': num1D = dict_point_grid_search[dataset_name] alpha1D = np.geomspace(alpha_max, alpha_min, num=num1D) alphas = [np.array(i) for i in product(alpha1D, alpha1D)] grid_search(algo, criterion, model, X, y, alpha_min, alpha_max, monitor, max_evals=100, tol=tol, alphas=alphas) elif method == 'random' or method == 'bayesian': hyperopt_wrapper(algo, criterion, model, X, y, alpha_min, alpha_max, monitor, max_evals=30, tol=tol, method=method, size_space=2, t_max=t_max) elif method.startswith("implicit_forward"): # do gradient descent to find the optimal lambda alpha0 = np.array([alpha_max / 100, alpha_max / 100]) n_outer = 30 if method == 'implicit_forward': optimizer = GradientDescent(n_outer=n_outer, p_grad_norm=1, verbose=True, tol=tol, t_max=t_max) else: optimizer = GradientDescent(n_outer=n_outer, p_grad_norm=1, verbose=True, tol=tol, t_max=t_max, tol_decrease="geom") grad_search(algo, criterion, model, optimizer, X, y, alpha0, monitor) else: raise NotImplementedError monitor.times = np.array(monitor.times) monitor.objs = np.array(monitor.objs) monitor.objs_test = 0 # TODO monitor.alphas = np.array(monitor.alphas) return (dataset_name, method, tol, n_outer, tolerance_decrease, monitor.times, monitor.objs, monitor.objs_test, monitor.alphas, alpha_max, model_name)
# X_train, X_val, X_test, y_train, y_val, y_test = get_leukemia() n_samples, n_features = X_train.shape print("Starting path computation...") alpha_max = np.max(np.abs(X_train.T @ (-y_train))) alpha_max /= (2 * n_samples) n_alphas = 10 p_alphas = np.geomspace(1, 1e-4, n_alphas) alphas = p_alphas * alpha_max log_alphas = np.log(alphas) tol = 1e-5 # grid search model = SparseLogreg(X_train, y_train, log_alphas[0], max_iter=1000) criterion = HeldOutLogistic(X_val, y_val, model, X_test=X_test, y_test=y_test) algo = Forward(criterion) monitor_grid_sk = Monitor() grid_search(algo, None, None, monitor_grid_sk, log_alphas=log_alphas, tol=tol) monitor = Monitor() # grad_search( # algo, logCs[0], monitor, n_outer=5, verbose=True, # tolerance_decrease='constant', tol=1e-8, # t_max=10000) plt.figure() plt.plot(monitor_grid_sk.log_alphas, monitor_grid_sk.objs) plt.plot(monitor.log_alphas, monitor.objs, 'bo') plt.show(block=False)
log_alpha2 = np.log(alpha_2) dict_log_alpha = {} dict_log_alpha["lasso"] = log_alpha dict_log_alpha["enet"] = np.array([log_alpha1, log_alpha2]) tab = np.linspace(1, 1000, n_features) dict_log_alpha["wLasso"] = log_alpha + np.log(tab / tab.max()) dict_log_alpha["logreg"] = (log_alpha - np.log(2)) dict_log_alpha["svm"] = 1e-4 dict_log_alpha["svr"] = np.array([1e-2, 1e-2]) # Set models to be tested models = {} models["lasso"] = Lasso(estimator=None) models["enet"] = ElasticNet(estimator=None) models["wLasso"] = WeightedLasso(estimator=None) models["logreg"] = SparseLogreg(estimator=None) models["svm"] = SVM(estimator=None) models["svr"] = SVR(estimator=None) custom_models = {} custom_models["lasso"] = Lasso(estimator=celer.Lasso( warm_start=True, fit_intercept=False)) custom_models["enet"] = ElasticNet( estimator=linear_model.ElasticNet(warm_start=True, fit_intercept=False)) custom_models["logreg"] = SparseLogreg( estimator=celer.LogisticRegression(warm_start=True, fit_intercept=False)) # Compute "ground truth" with cvxpylayer dict_cvxpy_func = { 'lasso': lasso_cvxpy, 'enet': enet_cvxpy,
n_features=n_features, n_informative=50, random_state=122, flip_y=0.1, n_redundant=0) X_val_s = csc_matrix(X_val) y_train[y_train == 0.0] = -1.0 y_val[y_val == 0.0] = -1.0 alpha_max = np.max(np.abs(X_train.T @ (- y_train))) alpha_max /= (2 * n_samples) alpha = 0.3 * alpha_max log_alpha = np.log(alpha) tol = 1e-16 models = [ SparseLogreg( X_train, y_train, max_iter=10000, estimator=None), SparseLogreg( X_train_s, y_train, max_iter=10000, estimator=None) ] estimator = LogisticRegression( penalty="l1", tol=1e-12, fit_intercept=False, max_iter=100000, solver="saga") models_custom = [ SparseLogreg( X_train, y_train, max_iter=10000, estimator=estimator), SparseLogreg( X_train_s, y_train, max_iter=10000, estimator=estimator) ]
flip_y=0.1, n_redundant=0) X_s = csc_matrix(X) y[y == 0.0] = -1.0 idx_train = np.arange(0, 50) idx_val = np.arange(50, 100) alpha_max = np.max(np.abs( X[idx_train, :].T @ y[idx_train])) / (2 * len(idx_train)) alpha = 0.3 * alpha_max log_alpha = np.log(alpha) tol = 1e-16 models = [SparseLogreg(max_iter=10000, estimator=None)] estimator = LogisticRegression(penalty="l1", tol=1e-12, fit_intercept=False, max_iter=100000, solver="saga") models_custom = [SparseLogreg(max_iter=10000, estimator=estimator)] def get_v(mask, dense): return 2 * (X[np.ix_(idx_val, mask)].T @ ( X[np.ix_(idx_val, mask)] @ dense - y[idx_val])) / len(idx_val)
def parallel_function(dataset_name, method, tol=1e-5, n_outer=50, tolerance_decrease='constant'): # load data X_train, X_val, X_test, y_train, y_val, y_test = get_data(dataset_name) n_samples, _ = X_train.shape # compute alpha_max alpha_max = np.abs(X_train.T @ y_train).max() / n_samples if model_name == "logreg": alpha_max /= 2 alpha_min = alpha_max / 10_000 log_alpha_max = np.log(alpha_max) log_alpha_min = np.log(alpha_min) log_alpha0 = np.log(0.1 * alpha_max) if model_name == "lasso": model = Lasso(X_train, y_train) elif model_name == "logreg": model = SparseLogreg(X_train, y_train) try: n_outer = dict_n_outers[dataset_name, method] except Exception: n_outer = 20 size_loop = 2 for _ in range(size_loop): if model_name == "lasso": criterion = HeldOutMSE(X_val, y_val, model, X_test=X_test, y_test=y_test) elif model_name == "logreg": criterion = HeldOutLogistic(X_val, y_val, model, X_test=X_test, y_test=y_test) algo = dict_algo[method](criterion) monitor = Monitor() if method == 'grid_search': log_alphas = np.log(np.geomspace(alpha_max, alpha_min, num=100)) grid_search(algo, None, None, monitor, log_alphas=log_alphas, tol=tol) elif method == 'random': grid_search(algo, log_alpha_max, log_alpha_min, monitor, tol=tol, max_evals=n_alphas, t_max=dict_t_max[dataset_name]) elif method in ("bayesian"): hyperopt_wrapper(algo, log_alpha_min, log_alpha_max, monitor, max_evals=n_alphas, tol=tol, method='bayesian', t_max=dict_t_max[dataset_name]) else: # do line search to find the optimal lambda grad_search(algo, log_alpha0, monitor, n_outer=n_outer, tol=tol, tolerance_decrease=tolerance_decrease, t_max=dict_t_max[dataset_name]) monitor.times = np.array(monitor.times) monitor.objs = np.array(monitor.objs) monitor.objs_test = np.array(monitor.objs_test) monitor.log_alphas = np.array(monitor.log_alphas) return (dataset_name, method, tol, n_outer, tolerance_decrease, monitor.times, monitor.objs, monitor.objs_test, monitor.log_alphas, norm(y_val), norm(y_test), log_alpha_max, model_name)
def parallel_function( dataset_name, method, tol=1e-5, n_outer=50, tolerance_decrease='exponential'): # load data X_train, X_val, X_test, y_train, y_val, y_test = get_data(dataset_name) n_samples, n_features = X_train.shape print('n_samples', n_samples) print('n_features', n_features) y_train[y_train == 0.0] = -1.0 y_val[y_val == 0.0] = -1.0 y_test[y_test == 0.0] = -1.0 alpha_max = np.max(np.abs(X_train.T @ y_train)) alpha_max /= X_train.shape[0] alpha_max /= 4 log_alpha_max = np.log(alpha_max) alpha_min = alpha_max * 1e-4 alphas = np.geomspace(alpha_max, alpha_min, 10) log_alphas = np.log(alphas) log_alpha0 = np.log(0.1 * alpha_max) log_alpha_max = np.log(alpha_max) n_outer = 25 if dataset_name == "rcv1": size_loop = 2 else: size_loop = 2 model = SparseLogreg( X_train, y_train, max_iter=1000, log_alpha_max=log_alpha_max) for i in range(size_loop): monitor = Monitor() if method == "implicit_forward": criterion = HeldOutLogistic(X_val, y_val, model, X_test=X_test, y_test=y_test) algo = ImplicitForward(criterion, tol_jac=1e-5, n_iter_jac=100) _, _, _ = grad_search( algo=algo, verbose=False, log_alpha0=log_alpha0, tol=tol, n_outer=n_outer, monitor=monitor, t_max=dict_t_max[dataset_name], tolerance_decrease=tolerance_decrease) elif method == "forward": criterion = HeldOutLogistic(X_val, y_val, model, X_test=X_test, y_test=y_test) algo = Forward(criterion) _, _, _ = grad_search( algo=algo, log_alpha0=log_alpha0, tol=tol, n_outer=n_outer, monitor=monitor, t_max=dict_t_max[dataset_name], tolerance_decrease=tolerance_decrease) elif method == "implicit": criterion = HeldOutLogistic(X_val, y_val, model, X_test=X_test, y_test=y_test) algo = Implicit(criterion) _, _, _ = grad_search( algo=algo, log_alpha0=log_alpha0, tol=tol, n_outer=n_outer, monitor=monitor, t_max=dict_t_max[dataset_name], tolerance_decrease=tolerance_decrease) elif method == "grid_search": criterion = HeldOutLogistic(X_val, y_val, model, X_test=X_test, y_test=y_test) algo = Forward(criterion) # log_alpha_min = np.log(alpha_min) log_alphas = np.log(np.geomspace(alpha_max, alpha_min, num=100)) log_alpha_opt, min_g_func = grid_search( algo, None, None, monitor, tol=tol, samp="grid", t_max=dict_t_max[dataset_name], log_alphas=log_alphas) print(log_alpha_opt) elif method == "random": criterion = HeldOutLogistic(X_val, y_val, model, X_test=X_test, y_test=y_test) algo = Forward(criterion) log_alpha_min = np.log(alpha_min) log_alpha_opt, min_g_func = grid_search( algo, log_alpha_min, np.log(alpha_max), monitor, max_evals=100, tol=tol, samp="random", t_max=dict_t_max[dataset_name]) print(log_alpha_opt) elif method == "lhs": criterion = HeldOutLogistic(X_val, y_val, model, X_test=X_test, y_test=y_test) algo = Forward(criterion) log_alpha_min = np.log(alpha_min) log_alpha_opt, min_g_func = grid_search( algo, log_alpha_min, np.log(alpha_max), monitor, max_evals=100, tol=tol, samp="lhs", t_max=dict_t_max[dataset_name]) print(log_alpha_opt) monitor.times = np.array(monitor.times).copy() monitor.objs = np.array(monitor.objs).copy() monitor.objs_test = np.array(monitor.objs_test).copy() monitor.log_alphas = np.array(monitor.log_alphas).copy() return (dataset_name, method, tol, n_outer, tolerance_decrease, monitor.times, monitor.objs, monitor.objs_test, monitor.log_alphas, norm(y_val), norm(y_test), log_alpha_max)