def test_grad_search(model, crit): """check that the paths are the same in the line search""" n_outer = 2 criterion = HeldOutLogistic(X_val, y_val, model) monitor1 = Monitor() algo = Forward() grad_search(algo, criterion, log_alpha, monitor1, n_outer=n_outer, tol=tol) criterion = HeldOutLogistic(X_val, y_val, model) monitor2 = Monitor() algo = Implicit() grad_search(algo, criterion, log_alpha, monitor2, n_outer=n_outer, tol=tol) criterion = HeldOutLogistic(X_val, y_val, model) monitor3 = Monitor() algo = ImplicitForward(tol_jac=tol, n_iter_jac=5000) grad_search(algo, criterion, log_alpha, monitor3, n_outer=n_outer, tol=tol) assert np.allclose( np.array(monitor1.log_alphas), np.array(monitor3.log_alphas)) assert np.allclose( np.array(monitor1.grads), np.array(monitor3.grads), atol=1e-4) assert np.allclose( np.array(monitor1.objs), np.array(monitor3.objs)) assert not np.allclose( np.array(monitor1.times), np.array(monitor3.times))
def test_val_grad(model): ####################################################################### # Not all methods computes the full Jacobian, but all # compute the gradients # check that the gradient returned by all methods are the same criterion = HeldOutLogistic(X_val, y_val, model) algo = Forward() val_fwd, grad_fwd = criterion.get_val_grad(log_C, algo.get_beta_jac_v, tol=tol) criterion = HeldOutLogistic(X_val, y_val, model) algo = ImplicitForward(tol_jac=1e-8, n_iter_jac=100) val_imp_fwd, grad_imp_fwd = criterion.get_val_grad(log_C, algo.get_beta_jac_v, tol=tol) criterion = HeldOutLogistic(X_val, y_val, model) algo = Implicit() val_imp, grad_imp = criterion.get_val_grad(log_C, algo.get_beta_jac_v, tol=tol) assert np.allclose(val_fwd, val_imp_fwd) assert np.allclose(grad_fwd, grad_imp_fwd) assert np.allclose(val_imp_fwd, val_imp) assert np.allclose(grad_imp_fwd, grad_imp, atol=1e-5)
def test_val_grad(model): criterion = HeldOutLogistic(idx_val, idx_val) algo = Forward() val_fwd, grad_fwd = criterion.get_val_grad(model, X, y, log_alpha, algo.get_beta_jac_v, tol=tol) criterion = HeldOutLogistic(idx_val, idx_val) algo = ImplicitForward(tol_jac=1e-8, n_iter_jac=5000) val_imp_fwd, grad_imp_fwd = criterion.get_val_grad(model, X, y, log_alpha, algo.get_beta_jac_v, tol=tol) criterion = HeldOutLogistic(idx_val, idx_val) algo = Implicit() val_imp, grad_imp = criterion.get_val_grad(model, X, y, log_alpha, algo.get_beta_jac_v, tol=tol) assert np.allclose(val_fwd, val_imp_fwd, atol=1e-4) assert np.allclose(grad_fwd, grad_imp_fwd, atol=1e-4) assert np.allclose(val_imp_fwd, val_imp, atol=1e-4) # for the implcit the conjugate grad does not converge # hence the rtol=1e-2 assert np.allclose(grad_imp_fwd, grad_imp, rtol=1e-2)
def test_check_grad_sparse_ho(model_name, criterion, algo): """Check that all methods return a good gradient using check_grad""" if criterion == 'MSE': criterion = HeldOutMSE(idx_train, idx_val) elif criterion == 'SURE': criterion = FiniteDiffMonteCarloSure(sigma_star) elif criterion == 'logistic': criterion = HeldOutLogistic(idx_train, idx_val) model = models[model_name] log_alpha = dict_log_alpha[model_name] def get_val(log_alpha): val, _ = criterion.get_val_grad( model, X, y, np.squeeze(log_alpha), algo.get_beta_jac_v, tol=tol) return val def get_grad(log_alpha): _, grad = criterion.get_val_grad( model, X, y, np.squeeze(log_alpha), algo.get_beta_jac_v, tol=tol) return grad for log_alpha in dict_list_log_alphas[model_name]: grad_error = check_grad(get_val, get_grad, log_alpha) assert grad_error < 1e-1
def test_val_grad(model_name, criterion_name, algo): """Check that all methods return the same gradient, comparing to cvxpylayer """ if model_name == 'svr': pytest.xfail("svr needs to be fixed") if criterion_name == 'logistic': pytest.xfail("cvxpylayer seems broken for logistic") if criterion_name == 'MSE': criterion = HeldOutMSE(idx_train, idx_val) elif criterion_name == 'logistic': criterion = HeldOutLogistic(idx_train, idx_val) elif criterion_name == 'SURE': criterion = FiniteDiffMonteCarloSure(sigma_star) log_alpha = dict_log_alpha[model_name] model = models[model_name] val, grad = criterion.get_val_grad(model, X, y, log_alpha, algo.compute_beta_grad, tol=tol) np.testing.assert_allclose(dict_vals_cvxpy[model_name, criterion_name], val, rtol=1e-5, atol=1e-5) np.testing.assert_allclose(dict_grads_cvxpy[model_name, criterion_name], grad, rtol=1e-5, atol=1e-5)
def test_grad_search_custom(model, model_custom, crit): """check that the paths are the same in the line search""" n_outer = 5 criterion = HeldOutLogistic(idx_val, idx_val) monitor = Monitor() algo = ImplicitForward(tol_jac=tol, n_iter_jac=5000) grad_search(algo, criterion, model, X, y, log_alpha, monitor, n_outer=n_outer, tol=tol) criterion = HeldOutLogistic(idx_val, idx_val) monitor_custom = Monitor() algo = ImplicitForward(tol_jac=tol, n_iter_jac=5000) grad_search(algo, criterion, model_custom, X, y, log_alpha, monitor_custom, n_outer=n_outer, tol=tol) np.testing.assert_allclose(np.array(monitor.log_alphas), np.array(monitor_custom.log_alphas), atol=1e-3) np.testing.assert_allclose(np.array(monitor.grads), np.array(monitor_custom.grads), atol=1e-4) np.testing.assert_allclose(np.array(monitor.objs), np.array(monitor_custom.objs), atol=1e-5) assert not np.allclose(np.array(monitor.times), np.array(monitor_custom.times))
def test_cross_val_criterion(model_name, XX): model = models[model_name] alpha_min = alpha_max / 10 max_iter = 10000 n_alphas = 10 kf = KFold(n_splits=5, shuffle=True, random_state=56) monitor_grid = Monitor() if model_name.startswith("lasso"): sub_crit = HeldOutMSE(None, None) else: sub_crit = HeldOutLogistic(None, None) criterion = CrossVal(sub_crit, cv=kf) grid_search(criterion, model, XX, y, alpha_min, alpha_max, monitor_grid, max_evals=n_alphas, tol=tol) if model_name.startswith("lasso"): reg = linear_model.LassoCV(cv=kf, verbose=True, tol=tol, fit_intercept=False, alphas=np.geomspace(alpha_max, alpha_min, num=n_alphas), max_iter=max_iter).fit(X, y) else: reg = linear_model.LogisticRegressionCV( cv=kf, verbose=True, tol=tol, fit_intercept=False, Cs=len(idx_train) / np.geomspace(alpha_max, alpha_min, num=n_alphas), max_iter=max_iter, penalty='l1', solver='liblinear').fit(X, y) reg.score(XX, y) if model_name.startswith("lasso"): objs_grid_sk = reg.mse_path_.mean(axis=1) else: objs_grid_sk = reg.scores_[1.0].mean(axis=1) # these 2 value should be the same (objs_grid_sk - np.array(monitor_grid.objs)) np.testing.assert_allclose(objs_grid_sk, monitor_grid.objs)
def test_val_grad_custom(model, model_custom): criterion = HeldOutLogistic(X_val, y_val, model) algo = ImplicitForward(tol_jac=1e-8, n_iter_jac=5000) val, grad = criterion.get_val_grad( log_alpha, algo.get_beta_jac_v, tol=tol) criterion = HeldOutLogistic(X_val, y_val, model_custom) algo = ImplicitForward(tol_jac=1e-8, n_iter_jac=5000) val_custom, grad_custom = criterion.get_val_grad( log_alpha, algo.get_beta_jac_v, tol=tol) assert np.allclose(val, val_custom) assert np.allclose(grad, grad_custom)
p_alphas = np.geomspace(1, 0.0001, n_alphas) alphas = alpha_max * p_alphas log_alphas = np.log(alphas) ############################################################################## # Grid-search # ----------- print('scikit started') t0 = time.time() estimator = LogisticRegression(penalty='l1', fit_intercept=False, max_iter=max_iter) model = SparseLogreg(max_iter=max_iter, estimator=estimator) criterion = HeldOutLogistic(idx_train, idx_val) algo_grid = Forward() monitor_grid = Monitor() grid_search(algo_grid, criterion, model, X, y, log_alpha_min, log_alpha_max, monitor_grid, log_alphas=log_alphas, tol=tol) objs = np.array(monitor_grid.objs) t_sk = time.time() - t0
alphas = alpha_max * p_alphas log_alphas = np.log(alphas) ############################################################################## # Grid-search # ----------- print('scikit started') t0 = time.time() estimator = LogisticRegression(penalty='l1', fit_intercept=False, solver='saga', max_iter=max_iter) model = SparseLogreg(X_train, y_train, max_iter=max_iter, estimator=estimator) criterion = HeldOutLogistic(X_val, y_val, model) algo_grid = Forward() monitor_grid = Monitor() grid_search(algo_grid, criterion, log_alpha_min, log_alpha_max, monitor_grid, log_alphas=log_alphas, tol=tol) objs = np.array(monitor_grid.objs) t_sk = time.time() - t0 print('scikit finished') print("Time to compute CV for scikit-learn: %.2f" % t_sk)
def parallel_function(dataset_name, method, tol=1e-5, n_outer=50, tolerance_decrease='constant'): # load data X, y = fetch_libsvm(dataset_name) y -= np.mean(y) # compute alpha_max alpha_max = np.abs(X.T @ y).max() / len(y) if model_name == "logreg": alpha_max /= 2 alpha_min = alpha_max * dict_palphamin[dataset_name] if model_name == "enet": estimator = linear_model.ElasticNet(fit_intercept=False, max_iter=10_000, warm_start=True, tol=tol) model = ElasticNet(estimator=estimator) elif model_name == "logreg": model = SparseLogreg(estimator=estimator) # TODO improve this try: n_outer = dict_n_outers[dataset_name, method] except Exception: n_outer = 20 size_loop = 2 for _ in range(size_loop): if model_name == "lasso" or model_name == "enet": sub_criterion = HeldOutMSE(None, None) elif model_name == "logreg": criterion = HeldOutLogistic(None, None) kf = KFold(n_splits=5, shuffle=True, random_state=42) criterion = CrossVal(sub_criterion, cv=kf) algo = ImplicitForward(tol_jac=1e-3) monitor = Monitor() t_max = dict_t_max[dataset_name] if method == 'grid_search': num1D = dict_point_grid_search[dataset_name] alpha1D = np.geomspace(alpha_max, alpha_min, num=num1D) alphas = [np.array(i) for i in product(alpha1D, alpha1D)] grid_search(algo, criterion, model, X, y, alpha_min, alpha_max, monitor, max_evals=100, tol=tol, alphas=alphas) elif method == 'random' or method == 'bayesian': hyperopt_wrapper(algo, criterion, model, X, y, alpha_min, alpha_max, monitor, max_evals=30, tol=tol, method=method, size_space=2, t_max=t_max) elif method.startswith("implicit_forward"): # do gradient descent to find the optimal lambda alpha0 = np.array([alpha_max / 100, alpha_max / 100]) n_outer = 30 if method == 'implicit_forward': optimizer = GradientDescent(n_outer=n_outer, p_grad_norm=1, verbose=True, tol=tol, t_max=t_max) else: optimizer = GradientDescent(n_outer=n_outer, p_grad_norm=1, verbose=True, tol=tol, t_max=t_max, tol_decrease="geom") grad_search(algo, criterion, model, optimizer, X, y, alpha0, monitor) else: raise NotImplementedError monitor.times = np.array(monitor.times) monitor.objs = np.array(monitor.objs) monitor.objs_test = 0 # TODO monitor.alphas = np.array(monitor.alphas) return (dataset_name, method, tol, n_outer, tolerance_decrease, monitor.times, monitor.objs, monitor.objs_test, monitor.alphas, alpha_max, model_name)
import pytest import numpy as np from sparse_ho.criterion import (HeldOutMSE, HeldOutLogistic, FiniteDiffMonteCarloSure) from sparse_ho.utils import Monitor from sparse_ho import Forward from sparse_ho.tests.common import (X, y, sigma_star, idx_train, idx_val, models, dict_list_log_alphas) list_model_crit = [('lasso', HeldOutMSE(idx_train, idx_val)), ('enet', HeldOutMSE(idx_train, idx_val)), ('wLasso', HeldOutMSE(idx_train, idx_val)), ('lasso', FiniteDiffMonteCarloSure(sigma_star)), ('logreg', HeldOutLogistic(idx_train, idx_val))] tol = 1e-15 @pytest.mark.parametrize('model_name,criterion', list_model_crit) def test_cross_val_criterion(model_name, criterion): # verify dtype from criterion, and the good shape algo = Forward() monitor_get_val = Monitor() monitor_get_val_grad = Monitor() model = models[model_name] for log_alpha in dict_list_log_alphas[model_name]: criterion.get_val(model, X,
# X_train, X_val, X_test, y_train, y_val, y_test = get_leukemia() n_samples, n_features = X_train.shape print("Starting path computation...") alpha_max = np.max(np.abs(X_train.T @ (-y_train))) alpha_max /= (2 * n_samples) n_alphas = 10 p_alphas = np.geomspace(1, 1e-4, n_alphas) alphas = p_alphas * alpha_max log_alphas = np.log(alphas) tol = 1e-5 # grid search model = SparseLogreg(X_train, y_train, log_alphas[0], max_iter=1000) criterion = HeldOutLogistic(X_val, y_val, model, X_test=X_test, y_test=y_test) algo = Forward(criterion) monitor_grid_sk = Monitor() grid_search(algo, None, None, monitor_grid_sk, log_alphas=log_alphas, tol=tol) monitor = Monitor() # grad_search( # algo, logCs[0], monitor, n_outer=5, verbose=True, # tolerance_decrease='constant', tol=1e-8, # t_max=10000) plt.figure() plt.plot(monitor_grid_sk.log_alphas, monitor_grid_sk.objs) plt.plot(monitor.log_alphas, monitor.objs, 'bo') plt.show(block=False)
def parallel_function(dataset_name, method, tol=1e-5, n_outer=50, tolerance_decrease='constant'): # load data X_train, X_val, X_test, y_train, y_val, y_test = get_data(dataset_name) n_samples, _ = X_train.shape # compute alpha_max alpha_max = np.abs(X_train.T @ y_train).max() / n_samples if model_name == "logreg": alpha_max /= 2 alpha_min = alpha_max / 10_000 log_alpha_max = np.log(alpha_max) log_alpha_min = np.log(alpha_min) log_alpha0 = np.log(0.1 * alpha_max) if model_name == "lasso": model = Lasso(X_train, y_train) elif model_name == "logreg": model = SparseLogreg(X_train, y_train) try: n_outer = dict_n_outers[dataset_name, method] except Exception: n_outer = 20 size_loop = 2 for _ in range(size_loop): if model_name == "lasso": criterion = HeldOutMSE(X_val, y_val, model, X_test=X_test, y_test=y_test) elif model_name == "logreg": criterion = HeldOutLogistic(X_val, y_val, model, X_test=X_test, y_test=y_test) algo = dict_algo[method](criterion) monitor = Monitor() if method == 'grid_search': log_alphas = np.log(np.geomspace(alpha_max, alpha_min, num=100)) grid_search(algo, None, None, monitor, log_alphas=log_alphas, tol=tol) elif method == 'random': grid_search(algo, log_alpha_max, log_alpha_min, monitor, tol=tol, max_evals=n_alphas, t_max=dict_t_max[dataset_name]) elif method in ("bayesian"): hyperopt_wrapper(algo, log_alpha_min, log_alpha_max, monitor, max_evals=n_alphas, tol=tol, method='bayesian', t_max=dict_t_max[dataset_name]) else: # do line search to find the optimal lambda grad_search(algo, log_alpha0, monitor, n_outer=n_outer, tol=tol, tolerance_decrease=tolerance_decrease, t_max=dict_t_max[dataset_name]) monitor.times = np.array(monitor.times) monitor.objs = np.array(monitor.objs) monitor.objs_test = np.array(monitor.objs_test) monitor.log_alphas = np.array(monitor.log_alphas) return (dataset_name, method, tol, n_outer, tolerance_decrease, monitor.times, monitor.objs, monitor.objs_test, monitor.log_alphas, norm(y_val), norm(y_test), log_alpha_max, model_name)
def parallel_function( dataset_name, method, tol=1e-5, n_outer=50, tolerance_decrease='exponential'): # load data X_train, X_val, X_test, y_train, y_val, y_test = get_data(dataset_name) n_samples, n_features = X_train.shape print('n_samples', n_samples) print('n_features', n_features) y_train[y_train == 0.0] = -1.0 y_val[y_val == 0.0] = -1.0 y_test[y_test == 0.0] = -1.0 alpha_max = np.max(np.abs(X_train.T @ y_train)) alpha_max /= X_train.shape[0] alpha_max /= 4 log_alpha_max = np.log(alpha_max) alpha_min = alpha_max * 1e-4 alphas = np.geomspace(alpha_max, alpha_min, 10) log_alphas = np.log(alphas) log_alpha0 = np.log(0.1 * alpha_max) log_alpha_max = np.log(alpha_max) n_outer = 25 if dataset_name == "rcv1": size_loop = 2 else: size_loop = 2 model = SparseLogreg( X_train, y_train, max_iter=1000, log_alpha_max=log_alpha_max) for i in range(size_loop): monitor = Monitor() if method == "implicit_forward": criterion = HeldOutLogistic(X_val, y_val, model, X_test=X_test, y_test=y_test) algo = ImplicitForward(criterion, tol_jac=1e-5, n_iter_jac=100) _, _, _ = grad_search( algo=algo, verbose=False, log_alpha0=log_alpha0, tol=tol, n_outer=n_outer, monitor=monitor, t_max=dict_t_max[dataset_name], tolerance_decrease=tolerance_decrease) elif method == "forward": criterion = HeldOutLogistic(X_val, y_val, model, X_test=X_test, y_test=y_test) algo = Forward(criterion) _, _, _ = grad_search( algo=algo, log_alpha0=log_alpha0, tol=tol, n_outer=n_outer, monitor=monitor, t_max=dict_t_max[dataset_name], tolerance_decrease=tolerance_decrease) elif method == "implicit": criterion = HeldOutLogistic(X_val, y_val, model, X_test=X_test, y_test=y_test) algo = Implicit(criterion) _, _, _ = grad_search( algo=algo, log_alpha0=log_alpha0, tol=tol, n_outer=n_outer, monitor=monitor, t_max=dict_t_max[dataset_name], tolerance_decrease=tolerance_decrease) elif method == "grid_search": criterion = HeldOutLogistic(X_val, y_val, model, X_test=X_test, y_test=y_test) algo = Forward(criterion) # log_alpha_min = np.log(alpha_min) log_alphas = np.log(np.geomspace(alpha_max, alpha_min, num=100)) log_alpha_opt, min_g_func = grid_search( algo, None, None, monitor, tol=tol, samp="grid", t_max=dict_t_max[dataset_name], log_alphas=log_alphas) print(log_alpha_opt) elif method == "random": criterion = HeldOutLogistic(X_val, y_val, model, X_test=X_test, y_test=y_test) algo = Forward(criterion) log_alpha_min = np.log(alpha_min) log_alpha_opt, min_g_func = grid_search( algo, log_alpha_min, np.log(alpha_max), monitor, max_evals=100, tol=tol, samp="random", t_max=dict_t_max[dataset_name]) print(log_alpha_opt) elif method == "lhs": criterion = HeldOutLogistic(X_val, y_val, model, X_test=X_test, y_test=y_test) algo = Forward(criterion) log_alpha_min = np.log(alpha_min) log_alpha_opt, min_g_func = grid_search( algo, log_alpha_min, np.log(alpha_max), monitor, max_evals=100, tol=tol, samp="lhs", t_max=dict_t_max[dataset_name]) print(log_alpha_opt) monitor.times = np.array(monitor.times).copy() monitor.objs = np.array(monitor.objs).copy() monitor.objs_test = np.array(monitor.objs_test).copy() monitor.log_alphas = np.array(monitor.log_alphas).copy() return (dataset_name, method, tol, n_outer, tolerance_decrease, monitor.times, monitor.objs, monitor.objs_test, monitor.log_alphas, norm(y_val), norm(y_test), log_alpha_max)