def test_val_grad(): ####################################################################### # Not all methods computes the full Jacobian, but all # compute the gradients # check that the gradient returned by all methods are the same criterion = HeldOutMSE(idx_train, idx_val) algo = Forward() val_fwd, grad_fwd = criterion.get_val_grad(model, X, y, np.array( [log_alpha1, log_alpha2]), algo.get_beta_jac_v, tol=tol) criterion = HeldOutMSE(idx_train, idx_val) algo = ImplicitForward(tol_jac=1e-16, n_iter_jac=5000) val_imp_fwd, grad_imp_fwd = criterion.get_val_grad( model, X, y, np.array([log_alpha1, log_alpha2]), algo.get_beta_jac_v, tol=tol) criterion = HeldOutMSE(idx_train, idx_val) algo = ImplicitForward(tol_jac=1e-16, n_iter_jac=5000) val_imp_fwd_custom, grad_imp_fwd_custom = criterion.get_val_grad( model, X, y, np.array([log_alpha1, log_alpha2]), algo.get_beta_jac_v, tol=tol) criterion = HeldOutMSE(idx_train, idx_val) algo = Implicit() val_imp, grad_imp = criterion.get_val_grad(model, X, y, np.array( [log_alpha1, log_alpha2]), algo.get_beta_jac_v, tol=tol) np.testing.assert_allclose(val_fwd, val_imp_fwd) np.testing.assert_allclose(grad_fwd, grad_imp_fwd) np.testing.assert_allclose(val_imp_fwd, val_imp) np.testing.assert_allclose(val_imp_fwd, val_imp_fwd_custom) # for the implcit the conjugate grad does not converge # hence the rtol=1e-2 np.testing.assert_allclose(grad_imp_fwd, grad_imp, atol=1e-3) np.testing.assert_allclose(grad_imp_fwd, grad_imp_fwd_custom)
def test_val_grad(model_name, criterion_name, algo): """Check that all methods return the same gradient, comparing to cvxpylayer """ if model_name == 'svr': pytest.xfail("svr needs to be fixed") if criterion_name == 'logistic': pytest.xfail("cvxpylayer seems broken for logistic") if criterion_name == 'MSE': criterion = HeldOutMSE(idx_train, idx_val) elif criterion_name == 'logistic': criterion = HeldOutLogistic(idx_train, idx_val) elif criterion_name == 'SURE': criterion = FiniteDiffMonteCarloSure(sigma_star) log_alpha = dict_log_alpha[model_name] model = models[model_name] val, grad = criterion.get_val_grad(model, X, y, log_alpha, algo.compute_beta_grad, tol=tol) np.testing.assert_allclose(dict_vals_cvxpy[model_name, criterion_name], val, rtol=1e-5, atol=1e-5) np.testing.assert_allclose(dict_grads_cvxpy[model_name, criterion_name], grad, rtol=1e-5, atol=1e-5)
monitor = Monitor() if method == "celer": clf = Lasso_celer(alpha=np.exp(log_alpha), fit_intercept=False, tol=1e-12, max_iter=maxit) model = Lasso(estimator=clf, max_iter=maxit) criterion = HeldOutMSE(idx_train, idx_val) algo = ImplicitForward(tol_jac=1e-32, n_iter_jac=maxit, use_stop_crit=False) algo.max_iter = maxit val, grad = criterion.get_val_grad(model, X, y, log_alpha, algo.get_beta_jac_v, tol=1e-12, monitor=monitor, max_iter=maxit) else: model = Lasso(max_iter=maxit) criterion = HeldOutMSE(idx_train, idx_val) if method == "forward": algo = Forward() elif method == "implicit_forward": algo = ImplicitForward(tol_jac=1e-8, n_iter_jac=maxit, max_iter=1000) elif method == "implicit": algo = Implicit(max_iter=1000) elif method == "backward":
def test_val_grad(): ####################################################################### # Not all methods computes the full Jacobian, but all # compute the gradients # check that the gradient returned by all methods are the same for key in models.keys(): # model = Lasso(log_alpha) log_alpha = dict_log_alpha[key] model = models[key] # model = Lasso(log_alpha) criterion = HeldOutMSE(X_val, y_val, model) algo = Forward() val_fwd, grad_fwd = criterion.get_val_grad(log_alpha, algo.get_beta_jac_v, tol=tol) criterion = HeldOutMSE(X_val, y_val, model) algo = ImplicitForward(tol_jac=1e-8, n_iter_jac=5000) val_imp_fwd, grad_imp_fwd = criterion.get_val_grad(log_alpha, algo.get_beta_jac_v, tol=tol) criterion = HeldOutMSE(X_val, y_val, model) algo = Implicit() val_imp, grad_imp = criterion.get_val_grad(log_alpha, algo.get_beta_jac_v, tol=tol) # import ipdb; ipdb.set_trace() criterion = HeldOutMSE(X_val, y_val, model) algo = Backward() val_bwd, grad_bwd = criterion.get_val_grad(log_alpha, algo.get_beta_jac_v, tol=tol) assert np.allclose(val_fwd, val_imp_fwd) assert np.allclose(grad_fwd, grad_imp_fwd) # assert np.allclose(val_imp_fwd, val_imp) assert np.allclose(val_bwd, val_fwd) assert np.allclose(val_bwd, val_imp_fwd) assert np.allclose(grad_fwd, grad_bwd) assert np.allclose(grad_bwd, grad_imp_fwd) # for the implcit the conjugate grad does not converge # hence the rtol=1e-2 assert np.allclose(grad_imp_fwd, grad_imp, atol=1e-3) for key in models.keys(): # model = Lasso(log_alpha) log_alpha = dict_log_alpha[key] model = models[key] # model = Lasso(log_alpha) criterion = SURE(X_train, y_train, model, sigma_star) algo = Forward() val_fwd, grad_fwd = criterion.get_val_grad(log_alpha, algo.get_beta_jac_v, tol=tol) criterion = SURE(X_train, y_train, model, sigma_star) algo = ImplicitForward(tol_jac=1e-8, n_iter_jac=5000) val_imp_fwd, grad_imp_fwd = criterion.get_val_grad(log_alpha, algo.get_beta_jac_v, tol=tol) criterion = SURE(X_train, y_train, model, sigma_star) algo = Implicit(criterion) val_imp, grad_imp = criterion.get_val_grad(log_alpha, algo.get_beta_jac_v, tol=tol) criterion = SURE(X_train, y_train, model, sigma_star) algo = Backward() val_bwd, grad_bwd = criterion.get_val_grad(log_alpha, algo.get_beta_jac_v, tol=tol) assert np.allclose(val_fwd, val_imp_fwd) assert np.allclose(grad_fwd, grad_imp_fwd) assert np.allclose(val_imp_fwd, val_imp) assert np.allclose(val_bwd, val_fwd) assert np.allclose(val_bwd, val_imp_fwd) assert np.allclose(grad_fwd, grad_bwd) assert np.allclose(grad_bwd, grad_imp_fwd)
def parallel_function(name_model, div_alpha): index_col = np.arange(10) alpha_max = (np.abs(X[np.ix_(idx_train, index_col)].T @ y[idx_train])).max() / len(idx_train) if name_model == "lasso": log_alpha = np.log(alpha_max / div_alpha) elif name_model == "enet": alpha0 = alpha_max / div_alpha alpha1 = (1 - l1_ratio) * alpha0 / l1_ratio log_alpha = np.log(np.array([alpha0, alpha1])) criterion = HeldOutMSE(idx_train, idx_val) algo = Forward() monitor = Monitor() val, grad = criterion.get_val_grad(dict_models[name_model], X[:, index_col], y, log_alpha, algo.compute_beta_grad, tol=tol, monitor=monitor) criterion = HeldOutMSE(idx_train, idx_val) algo = Backward() monitor = Monitor() val, grad = criterion.get_val_grad(dict_models[name_model], X[:, index_col], y, log_alpha, algo.compute_beta_grad, tol=tol, monitor=monitor) val_cvxpy, grad_cvxpy = dict_cvxpy[name_model](X[:, index_col], y, np.exp(log_alpha), idx_train, idx_val) list_times_fwd = [] list_times_bwd = [] list_times_cvxpy = [] for n_col in dict_ncols[div_alpha]: temp_fwd = [] temp_bwd = [] temp_cvxpy = [] for i in range(repeat): rng = np.random.RandomState(i) index_col = rng.choice(n_features, n_col, replace=False) alpha_max = (np.abs(X[np.ix_(idx_train, index_col)].T @ y[idx_train])).max() / len(idx_train) if name_model == "lasso": log_alpha = np.log(alpha_max / div_alpha) elif name_model == "enet": alpha0 = alpha_max / div_alpha alpha1 = (1 - l1_ratio) * alpha0 / l1_ratio log_alpha = np.log(np.array([alpha0, alpha1])) criterion = HeldOutMSE(idx_train, idx_val) algo = Forward() monitor = Monitor() val, grad = criterion.get_val_grad(dict_models[name_model], X[:, index_col], y, log_alpha, algo.compute_beta_grad, tol=tol, monitor=monitor) temp_fwd.append(monitor.times) criterion = HeldOutMSE(idx_train, idx_val) algo = Backward() monitor = Monitor() val, grad = criterion.get_val_grad(dict_models[name_model], X[:, index_col], y, log_alpha, algo.compute_beta_grad, tol=tol, monitor=monitor) temp_bwd.append(monitor.times) t0 = time.time() val_cvxpy, grad_cvxpy = dict_cvxpy[name_model](X[:, index_col], y, np.exp(log_alpha), idx_train, idx_val) temp_cvxpy.append(time.time() - t0) print(np.abs(grad - grad_cvxpy * np.exp(log_alpha))) list_times_fwd.append(np.mean(np.array(temp_fwd))) list_times_bwd.append(np.mean(np.array(temp_bwd))) list_times_cvxpy.append(np.mean(np.array(temp_cvxpy))) np.save("results/times_%s_forward_%s" % (name_model, div_alpha), list_times_fwd) np.save("results/times_%s_backward_%s" % (name_model, div_alpha), list_times_bwd) np.save("results/times_%s_cvxpy_%s" % (name_model, div_alpha), list_times_cvxpy) np.save("results/nfeatures_%s_%s" % (name_model, div_alpha), dict_ncols[div_alpha])
def parallel_function(dataset_name, div_alpha, method): X, y = fetch_libsvm(dataset_name) n_samples = len(y) if dataset_name == "news20" and div_alpha == 100: rng = np.random.RandomState(42) y += rng.randn(n_samples) * 0.01 for maxit in dict_maxits[(dataset_name, div_alpha)]: print("Dataset %s, maxit %i" % (method, maxit)) for i in range(2): rng = np.random.RandomState(i) idx_train = rng.choice(n_samples, n_samples // 2, replace=False) idx = np.arange(0, n_samples) idx_val = idx[np.logical_not(np.isin(idx, idx_train))] alpha_max = np.max(np.abs(X[idx_train, :].T.dot(y[idx_train]))) alpha_max /= len(idx_train) log_alpha = np.log(alpha_max / div_alpha) monitor = Monitor() if method == "celer": clf = Lasso_celer(alpha=np.exp(log_alpha), fit_intercept=False, tol=1e-12, max_iter=maxit) model = Lasso(estimator=clf, max_iter=maxit) criterion = HeldOutMSE(idx_train, idx_val) algo = ImplicitForward(tol_jac=1e-32, n_iter_jac=maxit, use_stop_crit=False) algo.max_iter = maxit val, grad = criterion.get_val_grad(model, X, y, log_alpha, algo.compute_beta_grad, tol=1e-12, monitor=monitor, max_iter=maxit) elif method == "ground_truth": for file in os.listdir("results/"): if file.startswith("hypergradient_%s_%i_%s" % (dataset_name, div_alpha, method)): return clf = Lasso_celer(alpha=np.exp(log_alpha), fit_intercept=False, warm_start=True, tol=1e-14, max_iter=10000) criterion = HeldOutMSE(idx_train, idx_val) if dataset_name == "news20": algo = ImplicitForward(tol_jac=1e-11, n_iter_jac=100000) else: algo = Implicit(criterion) model = Lasso(estimator=clf, max_iter=10000) val, grad = criterion.get_val_grad(model, X, y, log_alpha, algo.compute_beta_grad, tol=1e-14, monitor=monitor) else: model = Lasso(max_iter=maxit) criterion = HeldOutMSE(idx_train, idx_val) if method == "forward": algo = Forward(use_stop_crit=False) elif method == "implicit_forward": algo = ImplicitForward(tol_jac=1e-8, n_iter_jac=maxit, use_stop_crit=False) elif method == "implicit": algo = Implicit(max_iter=1000) elif method == "backward": algo = Backward() else: raise NotImplementedError algo.max_iter = maxit algo.use_stop_crit = False val, grad = criterion.get_val_grad(model, X, y, log_alpha, algo.compute_beta_grad, tol=tol, monitor=monitor, max_iter=maxit) results = (dataset_name, div_alpha, method, maxit, val, grad, monitor.times[0]) df = pandas.DataFrame(results).transpose() df.columns = [ 'dataset', 'div_alpha', 'method', 'maxit', 'val', 'grad', 'time' ] str_results = "results/hypergradient_%s_%i_%s_%i.pkl" % ( dataset_name, div_alpha, method, maxit) df.to_pickle(str_results)