def test_20news(): X_train, X_val, X_test, y_train, y_val, y_test = get_data("rcv1_train") np.testing.assert_equal(X_train.shape[0], y_train.shape[0]) np.testing.assert_equal(X_test.shape[0], y_test.shape[0]) np.testing.assert_equal(X_val.shape[0], y_val.shape[0]) np.testing.assert_equal(X_train.shape[1], X_test.shape[1]) np.testing.assert_equal(X_train.shape[1], X_val.shape[1])
from sparse_ho.implicit_forward import ImplicitForward from sparse_ho.criterion import HeldOutMSE from sparse_ho.models import ElasticNet from sparse_ho.ho import grad_search from bcdsugar.utils import Monitor Axes3D dataset = "rcv1" # dataset = "real-sim" use_small_part = False # use_small_part = True ############################# print("Started to load data") X_train, X_val, X_test, y_train, y_val, y_test = get_data(dataset) if use_small_part: idx = np.abs((X_train.T @ y_train)).argsort()[-1000:] X_train = X_train[:, idx] X_val = X_val[:, idx] X_test = X_test[:, idx] y_train -= y_train.mean() y_val -= y_val.mean() y_test -= y_test.mean() print("Finished loading data") alpha_max = np.max(np.abs(X_train.T @ y_train)) alpha_max /= X_train.shape[0] log_alpha_max = np.log(alpha_max)
def parallel_function(dataset_name, method, tol=1e-5, n_outer=50, tolerance_decrease='exponential'): # load data X_train, X_val, X_test, y_train, y_val, y_test = get_data(dataset_name) n_samples, n_features = X_train.shape print('n_samples', n_samples) print('n_features', n_features) y_train[y_train == 0.0] = -1.0 y_val[y_val == 0.0] = -1.0 y_test[y_test == 0.0] = -1.0 alpha_max = np.max(np.abs(X_train.T @ y_train)) alpha_max /= X_train.shape[0] log_alpha_max = np.log(alpha_max) alpha_min = alpha_max * 1e-2 # alphas = np.geomspace(alpha_max, alpha_min, 10) # log_alphas = np.log(alphas) log_alpha1_0 = np.log(0.1 * alpha_max) log_alpha2_0 = np.log(0.1 * alpha_max) log_alpha_max = np.log(alpha_max) n_outer = 25 if dataset_name == "rcv1": size_loop = 2 else: size_loop = 2 model = ElasticNet(X_train, y_train, log_alpha1_0, log_alpha2_0, log_alpha_max, max_iter=1000, tol=tol) for i in range(size_loop): monitor = Monitor() if method == "implicit_forward": criterion = HeldOutMSE(X_val, y_val, model, X_test=X_test, y_test=y_test) algo = ImplicitForward(criterion, tol_jac=1e-3, n_iter_jac=100) _, _, _ = grad_search(algo=algo, verbose=False, log_alpha0=np.array( [log_alpha1_0, log_alpha2_0]), tol=tol, n_outer=n_outer, monitor=monitor, t_max=dict_t_max[dataset_name], tolerance_decrease=tolerance_decrease) elif method == "forward": criterion = HeldOutMSE(X_val, y_val, model, X_test=X_test, y_test=y_test) algo = Forward(criterion) _, _, _ = grad_search(algo=algo, log_alpha0=np.array( [log_alpha1_0, log_alpha2_0]), tol=tol, n_outer=n_outer, monitor=monitor, t_max=dict_t_max[dataset_name], tolerance_decrease=tolerance_decrease) elif method == "implicit": criterion = HeldOutMSE(X_val, y_val, model, X_test=X_test, y_test=y_test) algo = Implicit(criterion) _, _, _ = grad_search(algo=algo, log_alpha0=np.array( [log_alpha1_0, log_alpha2_0]), tol=tol, n_outer=n_outer, monitor=monitor, t_max=dict_t_max[dataset_name], tolerance_decrease=tolerance_decrease) elif method == "grid_search": criterion = HeldOutMSE(X_val, y_val, model, X_test=X_test, y_test=y_test) algo = Forward(criterion) log_alpha_min = np.log(alpha_min) log_alpha_opt, min_g_func = grid_search( algo, log_alpha_min, log_alpha_max, monitor, max_evals=10, tol=tol, samp="grid", t_max=dict_t_max[dataset_name], log_alphas=None, nb_hyperparam=2) print(log_alpha_opt) elif method == "random": criterion = HeldOutMSE(X_val, y_val, model, X_test=X_test, y_test=y_test) algo = Forward(criterion) log_alpha_min = np.log(alpha_min) log_alpha_opt, min_g_func = grid_search( algo, log_alpha_min, np.log(alpha_max), monitor, max_evals=10, tol=tol, samp="random", t_max=dict_t_max[dataset_name], nb_hyperparam=2) print(log_alpha_opt) elif method == "lhs": criterion = HeldOutMSE(X_val, y_val, model, X_test=X_test, y_test=y_test) algo = Forward(criterion) log_alpha_min = np.log(alpha_min) log_alpha_opt, min_g_func = grid_search( algo, log_alpha_min, np.log(alpha_max), monitor, max_evals=10, tol=tol, samp="lhs", t_max=dict_t_max[dataset_name]) print(log_alpha_opt) monitor.times = np.array(monitor.times).copy() monitor.objs = np.array(monitor.objs).copy() monitor.objs_test = np.array(monitor.objs_test).copy() monitor.log_alphas = np.array(monitor.log_alphas).copy() return (dataset_name, method, tol, n_outer, tolerance_decrease, monitor.times, monitor.objs, monitor.objs_test, monitor.log_alphas, norm(y_val), norm(y_test), log_alpha_max)
def parallel_function(dataset_name, method, tol=1e-5, n_outer=50, tolerance_decrease='constant'): # load data X_train, X_val, X_test, y_train, y_val, y_test = get_data(dataset_name) n_samples, _ = X_train.shape # compute alpha_max alpha_max = np.abs(X_train.T @ y_train).max() / n_samples if model_name == "logreg": alpha_max /= 2 alpha_min = alpha_max / 10_000 log_alpha_max = np.log(alpha_max) log_alpha_min = np.log(alpha_min) log_alpha0 = np.log(0.1 * alpha_max) if model_name == "lasso": model = Lasso(X_train, y_train) elif model_name == "logreg": model = SparseLogreg(X_train, y_train) try: n_outer = dict_n_outers[dataset_name, method] except Exception: n_outer = 20 size_loop = 2 for _ in range(size_loop): if model_name == "lasso": criterion = HeldOutMSE(X_val, y_val, model, X_test=X_test, y_test=y_test) elif model_name == "logreg": criterion = HeldOutLogistic(X_val, y_val, model, X_test=X_test, y_test=y_test) algo = dict_algo[method](criterion) monitor = Monitor() if method == 'grid_search': log_alphas = np.log(np.geomspace(alpha_max, alpha_min, num=100)) grid_search(algo, None, None, monitor, log_alphas=log_alphas, tol=tol) elif method == 'random': grid_search(algo, log_alpha_max, log_alpha_min, monitor, tol=tol, max_evals=n_alphas, t_max=dict_t_max[dataset_name]) elif method in ("bayesian"): hyperopt_wrapper(algo, log_alpha_min, log_alpha_max, monitor, max_evals=n_alphas, tol=tol, method='bayesian', t_max=dict_t_max[dataset_name]) else: # do line search to find the optimal lambda grad_search(algo, log_alpha0, monitor, n_outer=n_outer, tol=tol, tolerance_decrease=tolerance_decrease, t_max=dict_t_max[dataset_name]) monitor.times = np.array(monitor.times) monitor.objs = np.array(monitor.objs) monitor.objs_test = np.array(monitor.objs_test) monitor.log_alphas = np.array(monitor.log_alphas) return (dataset_name, method, tol, n_outer, tolerance_decrease, monitor.times, monitor.objs, monitor.objs_test, monitor.log_alphas, norm(y_val), norm(y_test), log_alpha_max, model_name)
def parallel_function(dataset_name, method, tol=1e-5, n_outer=50, tolerance_decrease='constant'): t_max = dict_tmax[dataset_name] # load data X_train, X_val, X_test, y_train, y_val, y_test = get_data(dataset_name) n_samples, n_features = X_train.shape # compute alpha_max alpha_max = np.abs(X_train.T @ y_train).max() / n_samples log_alpha0 = np.log(0.1 * alpha_max) idx_nz = scipy.sparse.linalg.norm(X_train, axis=0) != 0 L_min = scipy.sparse.linalg.norm(X_train[:, idx_nz], axis=0).min()**2 / n_samples log_alpha0_mcp = np.array([log_alpha0, np.log(2 / L_min)]) list_log_alphas = np.log(alpha_max * np.geomspace(1, 0.0001, 100)) list_log_gammas = np.log(np.geomspace(1.1 / L_min, 1000 / L_min, 5)) try: n_outer = dict_n_outers[dataset_name, method] except Exception: n_outer = 50 if dataset_name == "rcv1": size_loop = 2 else: size_loop = 1 for i in range(size_loop): monitor = Monitor() warm_start = WarmStart() if method == 'grid_search': # n_alpha = 100 # p_alphas = np.geomspace(1, 0.0001, n_alpha) grid_searchMCP(X_train, y_train, list_log_alphas, list_log_gammas, X_val, y_val, X_test, y_test, tol, monitor=monitor) elif method in ("bayesian", "random"): monitor = hyperopt_lasso(X_train, y_train, log_alpha0, X_val, y_val, X_test, y_test, tol, max_evals=n_outer, method=method) else: # do line search to find the optimal lambda log_alpha, val, grad = grad_search(X_train, y_train, log_alpha0_mcp, X_val, y_val, X_test, y_test, tol, monitor, method=method, maxit=10000, n_outer=n_outer, warm_start=warm_start, niter_jac=100, model="mcp", t_max=t_max) del log_alpha, val, grad # as not used monitor.times = np.array(monitor.times) monitor.objs = np.array(monitor.objs) monitor.objs_test = np.array(monitor.objs_test) monitor.log_alphas = np.array(monitor.log_alphas) return (dataset_name, method, tol, n_outer, tolerance_decrease, monitor.times, monitor.objs, monitor.objs_test, monitor.log_alphas, norm(y_val), norm(y_test))
def parallel_function( dataset_name, method, tol=1e-5, n_outer=50, tolerance_decrease='exponential'): # load data X_train, X_val, X_test, y_train, y_val, y_test = get_data(dataset_name, csr=True) n_samples, n_features = X_train.shape print('n_samples', n_samples) print('n_features', n_features) y_train[y_train == 0.0] = -1.0 y_val[y_val == 0.0] = -1.0 y_test[y_test == 0.0] = -1.0 C_max = 100 logC = np.log(1e-2) n_outer = 5 if dataset_name == "rcv1": size_loop = 1 else: size_loop = 1 model = SVM( X_train, y_train, logC, max_iter=10000, tol=tol) for i in range(size_loop): monitor = Monitor() if method == "implicit_forward": criterion = HeldOutSmoothedHinge(X_val, y_val, model, X_test=X_test, y_test=y_test) algo = ImplicitForward(criterion, tol_jac=1e-3, n_iter_jac=100) _, _, _ = grad_search( algo=algo, verbose=False, log_alpha0=logC, tol=tol, n_outer=n_outer, monitor=monitor, t_max=dict_t_max[dataset_name], tolerance_decrease=tolerance_decrease) elif method == "forward": criterion = HeldOutSmoothedHinge(X_val, y_val, model, X_test=X_test, y_test=y_test) algo = Forward(criterion) _, _, _ = grad_search( algo=algo, log_alpha0=logC, tol=tol, n_outer=n_outer, monitor=monitor, t_max=dict_t_max[dataset_name], tolerance_decrease=tolerance_decrease) elif method == "implicit": criterion = HeldOutSmoothedHinge(X_val, y_val, model, X_test=X_test, y_test=y_test) algo = Implicit(criterion) _, _, _ = grad_search( algo=algo, log_alpha0=logC, tol=tol, n_outer=n_outer, monitor=monitor, t_max=dict_t_max[dataset_name], tolerance_decrease=tolerance_decrease) elif method == "grid_search": criterion = HeldOutSmoothedHinge(X_val, y_val, model, X_test=X_test, y_test=y_test) algo = Forward(criterion) log_alpha_min = np.log(1e-2) log_alpha_opt, min_g_func = grid_search( algo, log_alpha_min, np.log(C_max), monitor, max_evals=25, tol=tol, samp="grid") print(log_alpha_opt) elif method == "random": criterion = HeldOutSmoothedHinge(X_val, y_val, model, X_test=X_test, y_test=y_test) algo = Forward(criterion) log_alpha_min = np.log(1e-2) log_alpha_opt, min_g_func = grid_search( algo, log_alpha_min, np.log(C_max), monitor, max_evals=25, tol=tol, samp="random") print(log_alpha_opt) elif method == "lhs": criterion = HeldOutSmoothedHinge(X_val, y_val, model, X_test=X_test, y_test=y_test) algo = Forward(criterion) log_alpha_min = np.log(1e-2) log_alpha_opt, min_g_func = grid_search( algo, log_alpha_min, np.log(C_max), monitor, max_evals=25, tol=tol, samp="lhs") print(log_alpha_opt) monitor.times = np.array(monitor.times) monitor.objs = np.array(monitor.objs) monitor.objs_test = np.array(monitor.objs_test) monitor.log_alphas = np.array(monitor.log_alphas) return (dataset_name, method, tol, n_outer, tolerance_decrease, monitor.times, monitor.objs, monitor.objs_test, monitor.log_alphas, norm(y_val), norm(y_test))