def linear_cv(dataset_name, max_iter=1000, tol=1e-3, compute_jac=True): max_iter = max_iters[dataset_name] X, y = load_libsvm(dataset_name) X = X.tocsr() num_nonzeros = np.diff(X.indptr) X = X[num_nonzeros != 0] y = y[num_nonzeros != 0] n_samples, n_features = X.shape C = Cs[dataset_name] # Computation of dual solution of SVM via cvxopt clf = SDCAClassifier( alpha=1/(C * n_samples), loss='hinge', verbose=True, tol=1e-16, max_iter=max_iter) clf.fit(X, y) beta_star = np.abs(clf.dual_coef_[0]) primal_star = np.sum(X.T.multiply(y * beta_star), axis=1) # full_supp = np.logical_and(beta_star > 0, beta_star < C) full_supp = np.logical_and(np.logical_not(np.isclose(beta_star, 0)), np.logical_not(np.isclose(beta_star, C))) # Q = (X.multiply(y[:, np.newaxis])) @ (X.multiply(y[:, np.newaxis])).T yX = X.multiply(y[:, np.newaxis]) yX = yX.tocsr() # TODO to optimize temp3 = np.zeros(n_samples) temp3[np.isclose(beta_star, C)] = np.ones( (np.isclose(beta_star, C)).sum()) * C # temp3 = temp3[full_supp] v = temp3[full_supp] - yX[full_supp, :] @ (yX[np.isclose(beta_star, C), :].T @ temp3[np.isclose(beta_star, C)]) # v = np.array((np.eye(n_samples, n_samples) - Q)[np.ix_(full_supp, np.isclose(beta_star, C))] @ (np.ones((np.isclose(beta_star, C)).sum()) * C)) # v = np.squeeze(v) temp = yX[full_supp, :] @ yX[full_supp, :].T temp = csc_matrix(temp) # temp = temp[:, full_supp] # Q = csc_matrix(Q) print("size system to solve %i" % v.shape[0]) jac_dense = cg(temp, v, tol=1e-12) jac_star = np.zeros(n_samples) jac_star[full_supp] = jac_dense[0] jac_star[np.isclose(beta_star, C)] = C primal_jac_star = np.sum(X.T.multiply(y * jac_star), axis=1) model = SVM(X, y, np.log(C), max_iter=max_iter, tol=tol) list_beta, list_jac = compute_beta( X, y, np.log(C), model, save_iterates=True, tol=1e-32, max_iter=max_iter, compute_jac=True) M = X.T @ (list_beta * y).T M_jac = X.T @ (list_jac * y).T diff_beta = norm(M - primal_star, axis=0) diff_jac = norm(M_jac - primal_jac_star, axis=0) full_supp_star = full_supp full_supp_star = np.logical_and(np.logical_not(np.isclose(list_beta[-1], 0)), np.logical_not(np.isclose(list_beta[-1], C))) n_iter = list_beta.shape[0] for i in np.arange(n_iter)[::-1]: full_supp = np.logical_and(np.logical_not(np.isclose(list_beta[i, :], 0)), np.logical_not(np.isclose(list_beta[i, :], C))) if not np.all(full_supp == full_supp_star): supp_id = i + 1 break supp_id = 0 return dataset_name, C, diff_beta, diff_jac, n_iter, supp_id
def parallel_function(dataset_name, method): X, y = fetch_libsvm(dataset_name) X, y = fetch_libsvm(dataset_name) if dataset_name == "real-sim": X = X[:, :2000] X = csr_matrix(X) # very important for SVM my_bool = norm(X, axis=1) != 0 X = X[my_bool, :] y = y[my_bool] logC = dict_logC[dataset_name] for max_iter in dict_max_iter[dataset_name]: print("Dataset %s, max iter %i" % (method, max_iter)) for i in range(2): # TODO change this sss1 = StratifiedShuffleSplit(n_splits=2, test_size=0.3333, random_state=0) idx_train, idx_val = sss1.split(X, y) idx_train = idx_train[0] idx_val = idx_val[0] monitor = Monitor() criterion = HeldOutSmoothedHinge(idx_train, idx_val) model = SVM(estimator=None, max_iter=10_000) if method == "ground_truth": for file in os.listdir("results_svm/"): if file.startswith("hypergradient_svm_%s_%s" % (dataset_name, method)): return clf = LinearSVC(C=np.exp(logC), tol=1e-32, max_iter=10_000, loss='hinge', permute=False) algo = Implicit(criterion) model.estimator = clf val, grad = criterion.get_val_grad(model, X, y, logC, algo.compute_beta_grad, tol=1e-14, monitor=monitor) else: if method == "sota": clf = LinearSVC(C=np.exp(logC), loss='hinge', max_iter=max_iter, tol=1e-32, permute=False) model.estimator = clf algo = ImplicitForward(tol_jac=1e-32, n_iter_jac=max_iter, use_stop_crit=False) elif method == "forward": algo = Forward(use_stop_crit=False) elif method == "implicit_forward": algo = ImplicitForward(tol_jac=1e-8, n_iter_jac=max_iter, use_stop_crit=False) else: raise NotImplementedError algo.max_iter = max_iter algo.use_stop_crit = False val, grad = criterion.get_val_grad(model, X, y, logC, algo.compute_beta_grad, tol=tol, monitor=monitor, max_iter=max_iter) results = (dataset_name, method, max_iter, val, grad, monitor.times[0]) df = pandas.DataFrame(results).transpose() df.columns = ['dataset', 'method', 'maxit', 'val', 'grad', 'time'] str_results = "results_svm/hypergradient_svm_%s_%s_%i.pkl" % ( dataset_name, method, max_iter) df.to_pickle(str_results)
X, y = datasets.make_classification(n_samples=n_samples, n_features=n_features, n_informative=50, random_state=11, flip_y=0.1, n_redundant=0) y[y == 0.0] = -1.0 idx_train = np.arange(0, 50) idx_val = np.arange(50, 100) C = 0.01 log_C = np.log(C) tol = 1e-16 models = [SVM(log_C, max_iter=10000, tol=tol)] def get_v(mask, dense): return 2 * (X[np.ix_(idx_val, mask)].T @ ( X[np.ix_(idx_val, mask)] @ dense - y[idx_val])) / idx_val.shape[0] @pytest.mark.parametrize('model', models) def test_beta_jac(model): supp1, dense1, jac1 = get_beta_jac_iterdiff(X[idx_train, :], y[idx_train], log_C, tol=tol, model=model, compute_jac=True,
dict_log_alpha = {} dict_log_alpha["lasso"] = log_alpha dict_log_alpha["enet"] = np.array([log_alpha1, log_alpha2]) tab = np.linspace(1, 1000, n_features) dict_log_alpha["wLasso"] = log_alpha + np.log(tab / tab.max()) dict_log_alpha["logreg"] = (log_alpha - np.log(2)) dict_log_alpha["svm"] = 1e-4 dict_log_alpha["svr"] = np.array([1e-2, 1e-2]) # Set models to be tested models = {} models["lasso"] = Lasso(estimator=None) models["enet"] = ElasticNet(estimator=None) models["wLasso"] = WeightedLasso(estimator=None) models["logreg"] = SparseLogreg(estimator=None) models["svm"] = SVM(estimator=None) models["svr"] = SVR(estimator=None) custom_models = {} custom_models["lasso"] = Lasso(estimator=celer.Lasso( warm_start=True, fit_intercept=False)) custom_models["enet"] = ElasticNet( estimator=linear_model.ElasticNet(warm_start=True, fit_intercept=False)) custom_models["logreg"] = SparseLogreg( estimator=celer.LogisticRegression(warm_start=True, fit_intercept=False)) # Compute "ground truth" with cvxpylayer dict_cvxpy_func = { 'lasso': lasso_cvxpy, 'enet': enet_cvxpy, 'wLasso': weighted_lasso_cvxpy,
def parallel_function( dataset_name, method, tol=1e-5, n_outer=50, tolerance_decrease='exponential'): # load data X_train, X_val, X_test, y_train, y_val, y_test = get_data(dataset_name, csr=True) n_samples, n_features = X_train.shape print('n_samples', n_samples) print('n_features', n_features) y_train[y_train == 0.0] = -1.0 y_val[y_val == 0.0] = -1.0 y_test[y_test == 0.0] = -1.0 C_max = 100 logC = np.log(1e-2) n_outer = 5 if dataset_name == "rcv1": size_loop = 1 else: size_loop = 1 model = SVM( X_train, y_train, logC, max_iter=10000, tol=tol) for i in range(size_loop): monitor = Monitor() if method == "implicit_forward": criterion = HeldOutSmoothedHinge(X_val, y_val, model, X_test=X_test, y_test=y_test) algo = ImplicitForward(criterion, tol_jac=1e-3, n_iter_jac=100) _, _, _ = grad_search( algo=algo, verbose=False, log_alpha0=logC, tol=tol, n_outer=n_outer, monitor=monitor, t_max=dict_t_max[dataset_name], tolerance_decrease=tolerance_decrease) elif method == "forward": criterion = HeldOutSmoothedHinge(X_val, y_val, model, X_test=X_test, y_test=y_test) algo = Forward(criterion) _, _, _ = grad_search( algo=algo, log_alpha0=logC, tol=tol, n_outer=n_outer, monitor=monitor, t_max=dict_t_max[dataset_name], tolerance_decrease=tolerance_decrease) elif method == "implicit": criterion = HeldOutSmoothedHinge(X_val, y_val, model, X_test=X_test, y_test=y_test) algo = Implicit(criterion) _, _, _ = grad_search( algo=algo, log_alpha0=logC, tol=tol, n_outer=n_outer, monitor=monitor, t_max=dict_t_max[dataset_name], tolerance_decrease=tolerance_decrease) elif method == "grid_search": criterion = HeldOutSmoothedHinge(X_val, y_val, model, X_test=X_test, y_test=y_test) algo = Forward(criterion) log_alpha_min = np.log(1e-2) log_alpha_opt, min_g_func = grid_search( algo, log_alpha_min, np.log(C_max), monitor, max_evals=25, tol=tol, samp="grid") print(log_alpha_opt) elif method == "random": criterion = HeldOutSmoothedHinge(X_val, y_val, model, X_test=X_test, y_test=y_test) algo = Forward(criterion) log_alpha_min = np.log(1e-2) log_alpha_opt, min_g_func = grid_search( algo, log_alpha_min, np.log(C_max), monitor, max_evals=25, tol=tol, samp="random") print(log_alpha_opt) elif method == "lhs": criterion = HeldOutSmoothedHinge(X_val, y_val, model, X_test=X_test, y_test=y_test) algo = Forward(criterion) log_alpha_min = np.log(1e-2) log_alpha_opt, min_g_func = grid_search( algo, log_alpha_min, np.log(C_max), monitor, max_evals=25, tol=tol, samp="lhs") print(log_alpha_opt) monitor.times = np.array(monitor.times) monitor.objs = np.array(monitor.objs) monitor.objs_test = np.array(monitor.objs_test) monitor.log_alphas = np.array(monitor.log_alphas) return (dataset_name, method, tol, n_outer, tolerance_decrease, monitor.times, monitor.objs, monitor.objs_test, monitor.log_alphas, norm(y_val), norm(y_test))
n_features=n_features, n_informative=50, random_state=12, flip_y=0.1, n_redundant=0) X_val_s = csr_matrix(X_val) y_train[y_train == 0.0] = -1.0 y_val[y_val == 0.0] = -1.0 C = 0.001 log_C = np.log(C) tol = 1e-16 models = [ SVM(X_train, y_train, log_C, max_iter=10000, tol=tol), SVM(X_train_s, y_train, log_C, max_iter=10000, tol=tol) ] def get_v(mask, dense): return 2 * ( X_val[:, mask].T @ (X_val[:, mask] @ dense - y_val)) / X_val.shape[0] @pytest.mark.parametrize('model', models) def test_beta_jac(model): supp1, dense1, jac1 = get_beta_jac_iterdiff(X_train, y_train, log_C, tol=tol,