示例#1
0
def test_val_grad():
    #######################################################################
    # Not all methods computes the full Jacobian, but all
    # compute the gradients
    # check that the gradient returned by all methods are the same
    criterion = HeldOutMSE(idx_train, idx_val)
    algo = Forward()
    val_fwd, grad_fwd = criterion.get_val_grad(model,
                                               X,
                                               y,
                                               np.array(
                                                   [log_alpha1, log_alpha2]),
                                               algo.get_beta_jac_v,
                                               tol=tol)

    criterion = HeldOutMSE(idx_train, idx_val)
    algo = ImplicitForward(tol_jac=1e-16, n_iter_jac=5000)
    val_imp_fwd, grad_imp_fwd = criterion.get_val_grad(
        model,
        X,
        y,
        np.array([log_alpha1, log_alpha2]),
        algo.get_beta_jac_v,
        tol=tol)

    criterion = HeldOutMSE(idx_train, idx_val)
    algo = ImplicitForward(tol_jac=1e-16, n_iter_jac=5000)
    val_imp_fwd_custom, grad_imp_fwd_custom = criterion.get_val_grad(
        model,
        X,
        y,
        np.array([log_alpha1, log_alpha2]),
        algo.get_beta_jac_v,
        tol=tol)

    criterion = HeldOutMSE(idx_train, idx_val)
    algo = Implicit()
    val_imp, grad_imp = criterion.get_val_grad(model,
                                               X,
                                               y,
                                               np.array(
                                                   [log_alpha1, log_alpha2]),
                                               algo.get_beta_jac_v,
                                               tol=tol)
    np.testing.assert_allclose(val_fwd, val_imp_fwd)
    np.testing.assert_allclose(grad_fwd, grad_imp_fwd)
    np.testing.assert_allclose(val_imp_fwd, val_imp)
    np.testing.assert_allclose(val_imp_fwd, val_imp_fwd_custom)
    # for the implcit the conjugate grad does not converge
    # hence the rtol=1e-2
    np.testing.assert_allclose(grad_imp_fwd, grad_imp, atol=1e-3)
    np.testing.assert_allclose(grad_imp_fwd, grad_imp_fwd_custom)
示例#2
0
def test_val_grad(model_name, criterion_name, algo):
    """Check that all methods return the same gradient, comparing to cvxpylayer
    """
    if model_name == 'svr':
        pytest.xfail("svr needs to be fixed")

    if criterion_name == 'logistic':
        pytest.xfail("cvxpylayer seems broken for logistic")

    if criterion_name == 'MSE':
        criterion = HeldOutMSE(idx_train, idx_val)
    elif criterion_name == 'logistic':
        criterion = HeldOutLogistic(idx_train, idx_val)
    elif criterion_name == 'SURE':
        criterion = FiniteDiffMonteCarloSure(sigma_star)

    log_alpha = dict_log_alpha[model_name]
    model = models[model_name]
    val, grad = criterion.get_val_grad(model,
                                       X,
                                       y,
                                       log_alpha,
                                       algo.compute_beta_grad,
                                       tol=tol)
    np.testing.assert_allclose(dict_vals_cvxpy[model_name, criterion_name],
                               val,
                               rtol=1e-5,
                               atol=1e-5)
    np.testing.assert_allclose(dict_grads_cvxpy[model_name, criterion_name],
                               grad,
                               rtol=1e-5,
                               atol=1e-5)
示例#3
0
 monitor = Monitor()
 if method == "celer":
     clf = Lasso_celer(alpha=np.exp(log_alpha),
                       fit_intercept=False,
                       tol=1e-12,
                       max_iter=maxit)
     model = Lasso(estimator=clf, max_iter=maxit)
     criterion = HeldOutMSE(idx_train, idx_val)
     algo = ImplicitForward(tol_jac=1e-32,
                            n_iter_jac=maxit,
                            use_stop_crit=False)
     algo.max_iter = maxit
     val, grad = criterion.get_val_grad(model,
                                        X,
                                        y,
                                        log_alpha,
                                        algo.get_beta_jac_v,
                                        tol=1e-12,
                                        monitor=monitor,
                                        max_iter=maxit)
 else:
     model = Lasso(max_iter=maxit)
     criterion = HeldOutMSE(idx_train, idx_val)
     if method == "forward":
         algo = Forward()
     elif method == "implicit_forward":
         algo = ImplicitForward(tol_jac=1e-8,
                                n_iter_jac=maxit,
                                max_iter=1000)
     elif method == "implicit":
         algo = Implicit(max_iter=1000)
     elif method == "backward":
示例#4
0
def test_val_grad():
    #######################################################################
    # Not all methods computes the full Jacobian, but all
    # compute the gradients
    # check that the gradient returned by all methods are the same
    for key in models.keys():
        # model = Lasso(log_alpha)
        log_alpha = dict_log_alpha[key]
        model = models[key]
        # model = Lasso(log_alpha)

        criterion = HeldOutMSE(X_val, y_val, model)
        algo = Forward()
        val_fwd, grad_fwd = criterion.get_val_grad(log_alpha,
                                                   algo.get_beta_jac_v,
                                                   tol=tol)

        criterion = HeldOutMSE(X_val, y_val, model)
        algo = ImplicitForward(tol_jac=1e-8, n_iter_jac=5000)
        val_imp_fwd, grad_imp_fwd = criterion.get_val_grad(log_alpha,
                                                           algo.get_beta_jac_v,
                                                           tol=tol)

        criterion = HeldOutMSE(X_val, y_val, model)
        algo = Implicit()
        val_imp, grad_imp = criterion.get_val_grad(log_alpha,
                                                   algo.get_beta_jac_v,
                                                   tol=tol)

        # import ipdb; ipdb.set_trace()

        criterion = HeldOutMSE(X_val, y_val, model)
        algo = Backward()
        val_bwd, grad_bwd = criterion.get_val_grad(log_alpha,
                                                   algo.get_beta_jac_v,
                                                   tol=tol)

        assert np.allclose(val_fwd, val_imp_fwd)
        assert np.allclose(grad_fwd, grad_imp_fwd)
        # assert np.allclose(val_imp_fwd, val_imp)
        assert np.allclose(val_bwd, val_fwd)
        assert np.allclose(val_bwd, val_imp_fwd)
        assert np.allclose(grad_fwd, grad_bwd)
        assert np.allclose(grad_bwd, grad_imp_fwd)

        # for the implcit the conjugate grad does not converge
        # hence the rtol=1e-2
        assert np.allclose(grad_imp_fwd, grad_imp, atol=1e-3)

    for key in models.keys():
        # model = Lasso(log_alpha)
        log_alpha = dict_log_alpha[key]
        model = models[key]
        # model = Lasso(log_alpha)
        criterion = SURE(X_train, y_train, model, sigma_star)
        algo = Forward()
        val_fwd, grad_fwd = criterion.get_val_grad(log_alpha,
                                                   algo.get_beta_jac_v,
                                                   tol=tol)

        criterion = SURE(X_train, y_train, model, sigma_star)
        algo = ImplicitForward(tol_jac=1e-8, n_iter_jac=5000)
        val_imp_fwd, grad_imp_fwd = criterion.get_val_grad(log_alpha,
                                                           algo.get_beta_jac_v,
                                                           tol=tol)

        criterion = SURE(X_train, y_train, model, sigma_star)
        algo = Implicit(criterion)
        val_imp, grad_imp = criterion.get_val_grad(log_alpha,
                                                   algo.get_beta_jac_v,
                                                   tol=tol)

        criterion = SURE(X_train, y_train, model, sigma_star)
        algo = Backward()
        val_bwd, grad_bwd = criterion.get_val_grad(log_alpha,
                                                   algo.get_beta_jac_v,
                                                   tol=tol)

        assert np.allclose(val_fwd, val_imp_fwd)
        assert np.allclose(grad_fwd, grad_imp_fwd)
        assert np.allclose(val_imp_fwd, val_imp)
        assert np.allclose(val_bwd, val_fwd)
        assert np.allclose(val_bwd, val_imp_fwd)
        assert np.allclose(grad_fwd, grad_bwd)
        assert np.allclose(grad_bwd, grad_imp_fwd)
示例#5
0
def parallel_function(name_model, div_alpha):
    index_col = np.arange(10)
    alpha_max = (np.abs(X[np.ix_(idx_train, index_col)].T
                        @ y[idx_train])).max() / len(idx_train)
    if name_model == "lasso":
        log_alpha = np.log(alpha_max / div_alpha)
    elif name_model == "enet":
        alpha0 = alpha_max / div_alpha
        alpha1 = (1 - l1_ratio) * alpha0 / l1_ratio
        log_alpha = np.log(np.array([alpha0, alpha1]))

    criterion = HeldOutMSE(idx_train, idx_val)
    algo = Forward()
    monitor = Monitor()
    val, grad = criterion.get_val_grad(dict_models[name_model],
                                       X[:, index_col],
                                       y,
                                       log_alpha,
                                       algo.compute_beta_grad,
                                       tol=tol,
                                       monitor=monitor)

    criterion = HeldOutMSE(idx_train, idx_val)
    algo = Backward()
    monitor = Monitor()
    val, grad = criterion.get_val_grad(dict_models[name_model],
                                       X[:, index_col],
                                       y,
                                       log_alpha,
                                       algo.compute_beta_grad,
                                       tol=tol,
                                       monitor=monitor)

    val_cvxpy, grad_cvxpy = dict_cvxpy[name_model](X[:, index_col], y,
                                                   np.exp(log_alpha),
                                                   idx_train, idx_val)

    list_times_fwd = []
    list_times_bwd = []
    list_times_cvxpy = []
    for n_col in dict_ncols[div_alpha]:
        temp_fwd = []
        temp_bwd = []
        temp_cvxpy = []
        for i in range(repeat):

            rng = np.random.RandomState(i)
            index_col = rng.choice(n_features, n_col, replace=False)
            alpha_max = (np.abs(X[np.ix_(idx_train, index_col)].T
                                @ y[idx_train])).max() / len(idx_train)
            if name_model == "lasso":
                log_alpha = np.log(alpha_max / div_alpha)
            elif name_model == "enet":
                alpha0 = alpha_max / div_alpha
                alpha1 = (1 - l1_ratio) * alpha0 / l1_ratio
                log_alpha = np.log(np.array([alpha0, alpha1]))

            criterion = HeldOutMSE(idx_train, idx_val)
            algo = Forward()
            monitor = Monitor()
            val, grad = criterion.get_val_grad(dict_models[name_model],
                                               X[:, index_col],
                                               y,
                                               log_alpha,
                                               algo.compute_beta_grad,
                                               tol=tol,
                                               monitor=monitor)
            temp_fwd.append(monitor.times)

            criterion = HeldOutMSE(idx_train, idx_val)
            algo = Backward()
            monitor = Monitor()
            val, grad = criterion.get_val_grad(dict_models[name_model],
                                               X[:, index_col],
                                               y,
                                               log_alpha,
                                               algo.compute_beta_grad,
                                               tol=tol,
                                               monitor=monitor)
            temp_bwd.append(monitor.times)

            t0 = time.time()
            val_cvxpy, grad_cvxpy = dict_cvxpy[name_model](X[:, index_col], y,
                                                           np.exp(log_alpha),
                                                           idx_train, idx_val)
            temp_cvxpy.append(time.time() - t0)

            print(np.abs(grad - grad_cvxpy * np.exp(log_alpha)))
        list_times_fwd.append(np.mean(np.array(temp_fwd)))
        list_times_bwd.append(np.mean(np.array(temp_bwd)))
        list_times_cvxpy.append(np.mean(np.array(temp_cvxpy)))

    np.save("results/times_%s_forward_%s" % (name_model, div_alpha),
            list_times_fwd)
    np.save("results/times_%s_backward_%s" % (name_model, div_alpha),
            list_times_bwd)
    np.save("results/times_%s_cvxpy_%s" % (name_model, div_alpha),
            list_times_cvxpy)
    np.save("results/nfeatures_%s_%s" % (name_model, div_alpha),
            dict_ncols[div_alpha])
示例#6
0
def parallel_function(dataset_name, div_alpha, method):
    X, y = fetch_libsvm(dataset_name)
    n_samples = len(y)
    if dataset_name == "news20" and div_alpha == 100:
        rng = np.random.RandomState(42)
        y += rng.randn(n_samples) * 0.01
    for maxit in dict_maxits[(dataset_name, div_alpha)]:
        print("Dataset %s, maxit %i" % (method, maxit))
        for i in range(2):
            rng = np.random.RandomState(i)
            idx_train = rng.choice(n_samples, n_samples // 2, replace=False)
            idx = np.arange(0, n_samples)
            idx_val = idx[np.logical_not(np.isin(idx, idx_train))]
            alpha_max = np.max(np.abs(X[idx_train, :].T.dot(y[idx_train])))
            alpha_max /= len(idx_train)
            log_alpha = np.log(alpha_max / div_alpha)
            monitor = Monitor()
            if method == "celer":
                clf = Lasso_celer(alpha=np.exp(log_alpha),
                                  fit_intercept=False,
                                  tol=1e-12,
                                  max_iter=maxit)
                model = Lasso(estimator=clf, max_iter=maxit)
                criterion = HeldOutMSE(idx_train, idx_val)
                algo = ImplicitForward(tol_jac=1e-32,
                                       n_iter_jac=maxit,
                                       use_stop_crit=False)
                algo.max_iter = maxit
                val, grad = criterion.get_val_grad(model,
                                                   X,
                                                   y,
                                                   log_alpha,
                                                   algo.compute_beta_grad,
                                                   tol=1e-12,
                                                   monitor=monitor,
                                                   max_iter=maxit)
            elif method == "ground_truth":
                for file in os.listdir("results/"):
                    if file.startswith("hypergradient_%s_%i_%s" %
                                       (dataset_name, div_alpha, method)):
                        return
                clf = Lasso_celer(alpha=np.exp(log_alpha),
                                  fit_intercept=False,
                                  warm_start=True,
                                  tol=1e-14,
                                  max_iter=10000)
                criterion = HeldOutMSE(idx_train, idx_val)
                if dataset_name == "news20":
                    algo = ImplicitForward(tol_jac=1e-11, n_iter_jac=100000)
                else:
                    algo = Implicit(criterion)
                model = Lasso(estimator=clf, max_iter=10000)
                val, grad = criterion.get_val_grad(model,
                                                   X,
                                                   y,
                                                   log_alpha,
                                                   algo.compute_beta_grad,
                                                   tol=1e-14,
                                                   monitor=monitor)
            else:
                model = Lasso(max_iter=maxit)
                criterion = HeldOutMSE(idx_train, idx_val)
                if method == "forward":
                    algo = Forward(use_stop_crit=False)
                elif method == "implicit_forward":
                    algo = ImplicitForward(tol_jac=1e-8,
                                           n_iter_jac=maxit,
                                           use_stop_crit=False)
                elif method == "implicit":
                    algo = Implicit(max_iter=1000)
                elif method == "backward":
                    algo = Backward()
                else:
                    raise NotImplementedError
                algo.max_iter = maxit
                algo.use_stop_crit = False
                val, grad = criterion.get_val_grad(model,
                                                   X,
                                                   y,
                                                   log_alpha,
                                                   algo.compute_beta_grad,
                                                   tol=tol,
                                                   monitor=monitor,
                                                   max_iter=maxit)

        results = (dataset_name, div_alpha, method, maxit, val, grad,
                   monitor.times[0])
        df = pandas.DataFrame(results).transpose()
        df.columns = [
            'dataset', 'div_alpha', 'method', 'maxit', 'val', 'grad', 'time'
        ]
        str_results = "results/hypergradient_%s_%i_%s_%i.pkl" % (
            dataset_name, div_alpha, method, maxit)
        df.to_pickle(str_results)