def test_cross_val_criterion(): alpha_min = alpha_max / 10 log_alpha_max = np.log(alpha_max) log_alpha_min = np.log(alpha_min) max_iter = 10000 n_alphas = 10 kf = KFold(n_splits=5, shuffle=True, random_state=56) estimator = sklearn.linear_model.Lasso(fit_intercept=False, max_iter=1000, warm_start=True) monitor_grid = Monitor() criterion = CrossVal(X, y, Lasso, cv=kf, estimator=estimator) algo = Forward() grid_search(algo, criterion, log_alpha_min, log_alpha_max, monitor_grid, max_evals=n_alphas, tol=tol) reg = LassoCV(cv=kf, verbose=True, tol=tol, fit_intercept=False, alphas=np.geomspace(alpha_max, alpha_min, num=n_alphas), max_iter=max_iter).fit(X, y) reg.score(X, y) objs_grid_sk = reg.mse_path_.mean(axis=1) # these 2 value should be the same (objs_grid_sk - np.array(monitor_grid.objs)) assert np.allclose(objs_grid_sk, monitor_grid.objs)
def test_cross_val_criterion(model_name, XX): model = models[model_name] alpha_min = alpha_max / 10 max_iter = 10000 n_alphas = 10 kf = KFold(n_splits=5, shuffle=True, random_state=56) monitor_grid = Monitor() if model_name.startswith("lasso"): sub_crit = HeldOutMSE(None, None) else: sub_crit = HeldOutLogistic(None, None) criterion = CrossVal(sub_crit, cv=kf) grid_search(criterion, model, XX, y, alpha_min, alpha_max, monitor_grid, max_evals=n_alphas, tol=tol) if model_name.startswith("lasso"): reg = linear_model.LassoCV(cv=kf, verbose=True, tol=tol, fit_intercept=False, alphas=np.geomspace(alpha_max, alpha_min, num=n_alphas), max_iter=max_iter).fit(X, y) else: reg = linear_model.LogisticRegressionCV( cv=kf, verbose=True, tol=tol, fit_intercept=False, Cs=len(idx_train) / np.geomspace(alpha_max, alpha_min, num=n_alphas), max_iter=max_iter, penalty='l1', solver='liblinear').fit(X, y) reg.score(XX, y) if model_name.startswith("lasso"): objs_grid_sk = reg.mse_path_.mean(axis=1) else: objs_grid_sk = reg.scores_[1.0].mean(axis=1) # these 2 value should be the same (objs_grid_sk - np.array(monitor_grid.objs)) np.testing.assert_allclose(objs_grid_sk, monitor_grid.objs)
def parallel_function(dataset_name, method, tol=1e-5, n_outer=50, tolerance_decrease='constant'): # load data X, y = fetch_libsvm(dataset_name) y -= np.mean(y) # compute alpha_max alpha_max = np.abs(X.T @ y).max() / len(y) if model_name == "logreg": alpha_max /= 2 alpha_min = alpha_max * dict_palphamin[dataset_name] if model_name == "enet": estimator = linear_model.ElasticNet(fit_intercept=False, max_iter=10_000, warm_start=True, tol=tol) model = ElasticNet(estimator=estimator) elif model_name == "logreg": model = SparseLogreg(estimator=estimator) # TODO improve this try: n_outer = dict_n_outers[dataset_name, method] except Exception: n_outer = 20 size_loop = 2 for _ in range(size_loop): if model_name == "lasso" or model_name == "enet": sub_criterion = HeldOutMSE(None, None) elif model_name == "logreg": criterion = HeldOutLogistic(None, None) kf = KFold(n_splits=5, shuffle=True, random_state=42) criterion = CrossVal(sub_criterion, cv=kf) algo = ImplicitForward(tol_jac=1e-3) monitor = Monitor() t_max = dict_t_max[dataset_name] if method == 'grid_search': num1D = dict_point_grid_search[dataset_name] alpha1D = np.geomspace(alpha_max, alpha_min, num=num1D) alphas = [np.array(i) for i in product(alpha1D, alpha1D)] grid_search(algo, criterion, model, X, y, alpha_min, alpha_max, monitor, max_evals=100, tol=tol, alphas=alphas) elif method == 'random' or method == 'bayesian': hyperopt_wrapper(algo, criterion, model, X, y, alpha_min, alpha_max, monitor, max_evals=30, tol=tol, method=method, size_space=2, t_max=t_max) elif method.startswith("implicit_forward"): # do gradient descent to find the optimal lambda alpha0 = np.array([alpha_max / 100, alpha_max / 100]) n_outer = 30 if method == 'implicit_forward': optimizer = GradientDescent(n_outer=n_outer, p_grad_norm=1, verbose=True, tol=tol, t_max=t_max) else: optimizer = GradientDescent(n_outer=n_outer, p_grad_norm=1, verbose=True, tol=tol, t_max=t_max, tol_decrease="geom") grad_search(algo, criterion, model, optimizer, X, y, alpha0, monitor) else: raise NotImplementedError monitor.times = np.array(monitor.times) monitor.objs = np.array(monitor.objs) monitor.objs_test = 0 # TODO monitor.alphas = np.array(monitor.alphas) return (dataset_name, method, tol, n_outer, tolerance_decrease, monitor.times, monitor.objs, monitor.objs_test, monitor.alphas, alpha_max, model_name)
def parallel_function(dataset_name, div_alpha, method, ind_rep, random_state=10): maxit = dict_maxits[(dataset_name, div_alpha)][ind_rep] print("Dataset %s, algo %s, maxit %i" % (dataset_name, method, maxit)) X, y = fetch_libsvm(dataset_name) n_samples = len(y) kf = KFold(n_splits=5, random_state=random_state, shuffle=True) for i in range(2): alpha_max = np.max(np.abs(X.T.dot(y))) / n_samples log_alpha = np.log(alpha_max / div_alpha) monitor = Monitor() if method == "celer": clf = Lasso_celer( alpha=np.exp(log_alpha), fit_intercept=False, # TODO maybe change this tol tol=1e-8, max_iter=maxit) model = Lasso(estimator=clf, max_iter=maxit) criterion = HeldOutMSE(None, None) cross_val = CrossVal(cv=kf, criterion=criterion) algo = ImplicitForward(tol_jac=1e-8, n_iter_jac=maxit, use_stop_crit=False) algo.max_iter = maxit val, grad = cross_val.get_val_grad(model, X, y, log_alpha, algo.get_beta_jac_v, tol=tol, monitor=monitor, max_iter=maxit) elif method == "ground_truth": for file in os.listdir("results/"): if file.startswith("hypergradient_%s_%i_%s" % (dataset_name, div_alpha, method)): return else: clf = Lasso_celer(alpha=np.exp(log_alpha), fit_intercept=False, warm_start=True, tol=1e-13, max_iter=10000) criterion = HeldOutMSE(None, None) cross_val = CrossVal(cv=kf, criterion=criterion) algo = Implicit(criterion) model = Lasso(estimator=clf, max_iter=10000) val, grad = cross_val.get_val_grad(model, X, y, log_alpha, algo.get_beta_jac_v, tol=1e-13, monitor=monitor) else: model = Lasso(max_iter=maxit) criterion = HeldOutMSE(None, None) cross_val = CrossVal(cv=kf, criterion=criterion) if method == "forward": algo = Forward(use_stop_crit=False) elif method == "implicit_forward": algo = ImplicitForward(use_stop_crit=False, tol_jac=1e-8, n_iter_jac=maxit, max_iter=1000) elif method == "implicit": algo = Implicit(use_stop_crit=False, max_iter=1000) elif method == "backward": algo = Backward() else: 1 / 0 algo.max_iter = maxit algo.use_stop_crit = False val, grad = cross_val.get_val_grad(model, X, y, log_alpha, algo.get_beta_jac_v, tol=tol, monitor=monitor, max_iter=maxit) results = (dataset_name, div_alpha, method, maxit, val, grad, monitor.times[0]) df = pandas.DataFrame(results).transpose() df.columns = [ 'dataset', 'div_alpha', 'method', 'maxit', 'val', 'grad', 'time' ] str_results = "results/hypergradient_%s_%i_%s_%i.pkl" % ( dataset_name, div_alpha, method, maxit) df.to_pickle(str_results)
tol=tol) dict_monitor = {} all_algo_name = ['grid_search'] # , 'implicit_forward', "implicit_forward_approx", 'bayesian'] # , 'random_search'] # all_algo_name = ['random_search'] for algo_name in all_algo_name: model = ElasticNet(estimator=estimator) sub_criterion = HeldOutMSE(None, None) alpha0 = np.array([alpha_max / 10, alpha_max / 10]) monitor = Monitor() kf = KFold(n_splits=5, shuffle=True, random_state=42) criterion = CrossVal(sub_criterion, cv=kf) algo = ImplicitForward(tol_jac=1e-3) # optimizer = LineSearch(n_outer=10, tol=tol) if algo_name.startswith('implicit_forward'): if algo_name == "implicit_forward_approx": optimizer = GradientDescent(n_outer=30, p_grad_norm=1., verbose=True, tol=tol, tol_decrease="geom") else: optimizer = GradientDescent(n_outer=30, p_grad_norm=1., verbose=True, tol=tol) grad_search(algo, criterion, model, optimizer, X, y, alpha0, monitor)
# Now do the hyperparameter optimization with implicit differentiation # -------------------------------------------------------------------- estimator = sklearn.linear_model.Lasso(fit_intercept=False, max_iter=1000, warm_start=True, tol=tol) print('sparse-ho started') t0 = time.time() model = Lasso() criterion = HeldOutMSE(None, None) alpha0 = alpha_max / 10 monitor_grad = Monitor() cross_val_criterion = CrossVal(criterion, cv=kf) algo = ImplicitForward() optimizer = LineSearch(n_outer=10, tol=tol) grad_search(algo, cross_val_criterion, model, optimizer, X, y, alpha0, monitor_grad) t_grad_search = time.time() - t0 print('sparse-ho finished') ############################################################################## # Plot results # ------------ objs = reg.mse_path_.mean(axis=1) p_alphas_grad = np.array(monitor_grad.alphas) / alpha_max
# Now do the hyperparameter optimization with implicit differentiation # -------------------------------------------------------------------- estimator = sklearn.linear_model.Lasso(fit_intercept=False, max_iter=1000, warm_start=True, tol=tol) print('sparse-ho started') t0 = time.time() Model = Lasso Criterion = HeldOutMSE log_alpha0 = np.log(alpha_max / 10) monitor_grad = Monitor() criterion = CrossVal(X, y, Model, cv=kf, estimator=estimator) algo = ImplicitForward() grad_search(algo, criterion, np.log(alpha_max / 10), monitor_grad, n_outer=10, tol=tol) t_grad_search = time.time() - t0 print('sparse-ho finished') ############################################################################## # Plot results # ------------