Пример #1
0
def test_grid_search():
    max_evals = 5

    monitor_grid = Monitor()
    model = Lasso(estimator=estimator)
    criterion = HeldOutMSE(idx_train, idx_train)
    algo = Forward()
    log_alpha_opt_grid, _ = grid_search(
        algo, criterion, model, X, y, log_alpha_min, log_alpha_max,
        monitor_grid, max_evals=max_evals,
        tol=1e-5, samp="grid")

    monitor_random = Monitor()
    criterion = HeldOutMSE(idx_train, idx_val)
    algo = Forward()
    log_alpha_opt_random, _ = grid_search(
        algo, criterion, model, X, y, log_alpha_min, log_alpha_max,
        monitor_random,
        max_evals=max_evals, tol=1e-5, samp="random")

    assert(monitor_random.log_alphas[
        np.argmin(monitor_random.objs)] == log_alpha_opt_random)
    assert(monitor_grid.log_alphas[
        np.argmin(monitor_grid.objs)] == log_alpha_opt_grid)

    monitor_grid = Monitor()
    model = Lasso(estimator=estimator)

    criterion = SmoothedSURE(sigma=sigma_star)
    algo = Forward()
    log_alpha_opt_grid, _ = grid_search(
        algo, criterion, model, X, y, log_alpha_min, log_alpha_max,
        monitor_grid, max_evals=max_evals,
        tol=1e-5, samp="grid")

    monitor_random = Monitor()
    criterion = SmoothedSURE(sigma=sigma_star)
    algo = Forward()
    log_alpha_opt_random, _ = grid_search(
        algo, criterion, model, X, y, log_alpha_min, log_alpha_max,
        monitor_random,
        max_evals=max_evals, tol=1e-5, samp="random")

    assert(monitor_random.log_alphas[
        np.argmin(monitor_random.objs)] == log_alpha_opt_random)
    assert(monitor_grid.log_alphas[
        np.argmin(monitor_grid.objs)] == log_alpha_opt_grid)
Пример #2
0
def test_monitor():
    model = Lasso(estimator=estimator)
    criterion = HeldOutMSE(idx_train, idx_val)
    algo = ImplicitForward()
    monitor = Monitor(callback=callback)
    optimizer = LineSearch(n_outer=10, tol=tol)
    grad_search(algo, criterion, model, optimizer, X, y, alpha0, monitor)

    np.testing.assert_allclose(np.array(monitor.objs), np.array(objs))
Пример #3
0
def test_monitor():
    model = Lasso(estimator=estimator)
    criterion = HeldOutMSE(idx_train, idx_val)
    algo = ImplicitForward()
    monitor = Monitor(callback=callback)

    grad_search(algo,
                criterion,
                model,
                X,
                y,
                np.log(alpha_max / 10),
                monitor,
                n_outer=10,
                tol=tol)

    np.testing.assert_allclose(np.array(monitor.objs), np.array(objs))
Пример #4
0
def parallel_function(dataset_name, div_alpha, method):
    X, y = fetch_libsvm(dataset_name)
    n_samples = len(y)
    if dataset_name == "news20" and div_alpha == 100:
        rng = np.random.RandomState(42)
        y += rng.randn(n_samples) * 0.01
    for maxit in dict_maxits[(dataset_name, div_alpha)]:
        print("Dataset %s, maxit %i" % (method, maxit))
        for i in range(2):
            rng = np.random.RandomState(i)
            idx_train = rng.choice(n_samples, n_samples // 2, replace=False)
            idx = np.arange(0, n_samples)
            idx_val = idx[np.logical_not(np.isin(idx, idx_train))]
            alpha_max = np.max(np.abs(X[idx_train, :].T.dot(y[idx_train])))
            alpha_max /= len(idx_train)
            log_alpha = np.log(alpha_max / div_alpha)
            monitor = Monitor()
            if method == "celer":
                clf = Lasso_celer(alpha=np.exp(log_alpha),
                                  fit_intercept=False,
                                  tol=1e-12,
                                  max_iter=maxit)
                model = Lasso(estimator=clf, max_iter=maxit)
                criterion = HeldOutMSE(idx_train, idx_val)
                algo = ImplicitForward(tol_jac=1e-32,
                                       n_iter_jac=maxit,
                                       use_stop_crit=False)
                algo.max_iter = maxit
                val, grad = criterion.get_val_grad(model,
                                                   X,
                                                   y,
                                                   log_alpha,
                                                   algo.compute_beta_grad,
                                                   tol=1e-12,
                                                   monitor=monitor,
                                                   max_iter=maxit)
            elif method == "ground_truth":
                for file in os.listdir("results/"):
                    if file.startswith("hypergradient_%s_%i_%s" %
                                       (dataset_name, div_alpha, method)):
                        return
                clf = Lasso_celer(alpha=np.exp(log_alpha),
                                  fit_intercept=False,
                                  warm_start=True,
                                  tol=1e-14,
                                  max_iter=10000)
                criterion = HeldOutMSE(idx_train, idx_val)
                if dataset_name == "news20":
                    algo = ImplicitForward(tol_jac=1e-11, n_iter_jac=100000)
                else:
                    algo = Implicit(criterion)
                model = Lasso(estimator=clf, max_iter=10000)
                val, grad = criterion.get_val_grad(model,
                                                   X,
                                                   y,
                                                   log_alpha,
                                                   algo.compute_beta_grad,
                                                   tol=1e-14,
                                                   monitor=monitor)
            else:
                model = Lasso(max_iter=maxit)
                criterion = HeldOutMSE(idx_train, idx_val)
                if method == "forward":
                    algo = Forward(use_stop_crit=False)
                elif method == "implicit_forward":
                    algo = ImplicitForward(tol_jac=1e-8,
                                           n_iter_jac=maxit,
                                           use_stop_crit=False)
                elif method == "implicit":
                    algo = Implicit(max_iter=1000)
                elif method == "backward":
                    algo = Backward()
                else:
                    raise NotImplementedError
                algo.max_iter = maxit
                algo.use_stop_crit = False
                val, grad = criterion.get_val_grad(model,
                                                   X,
                                                   y,
                                                   log_alpha,
                                                   algo.compute_beta_grad,
                                                   tol=tol,
                                                   monitor=monitor,
                                                   max_iter=maxit)

        results = (dataset_name, div_alpha, method, maxit, val, grad,
                   monitor.times[0])
        df = pandas.DataFrame(results).transpose()
        df.columns = [
            'dataset', 'div_alpha', 'method', 'maxit', 'val', 'grad', 'time'
        ]
        str_results = "results/hypergradient_%s_%i_%s_%i.pkl" % (
            dataset_name, div_alpha, method, maxit)
        df.to_pickle(str_results)
Пример #5
0
log_alphas = np.log(alphas)

tol = 1e-7

# grid search
# model = Lasso(X_train, y_train, np.log(alpha_max/10))
# criterion = HeldOutMSE(X_val, y_val, model, X_test=X_test, y_test=y_test)
# algo = Forward(criterion)
# monitor_grid_sk = Monitor()
# grid_search(
#     algo, None, None, monitor_grid_sk, log_alphas=log_alphas,
#     tol=tol)

# np.save("p_alphas.npy", p_alphas)
# objs = np.array(monitor_grid_sk.objs)
# np.save("objs.npy", objs)

# grad_search
estimator = linear_model.Lasso(fit_intercept=False, warm_start=True)
model = Lasso(X_train, y_train, np.log(alpha_max / 10), estimator=estimator)
criterion = HeldOutMSE(X_val, y_val, model, X_test=X_test, y_test=y_test)
algo = ImplicitForward(criterion)
monitor_grad = Monitor()
grad_search(algo, np.log(alpha_max / 10), monitor_grad, n_outer=10, tol=tol)

p_alphas_grad = np.exp(np.array(monitor_grad.log_alphas)) / alpha_max

np.save("p_alphas_grad.npy", p_alphas_grad)
objs_grad = np.array(monitor_grad.objs)
np.save("objs_grad.npy", objs_grad)
Пример #6
0
alpha_max = (np.abs(X[idx_train, :].T @ y[idx_train])).max() / n_samples
p_alpha = 0.9
alpha = p_alpha * alpha_max
log_alpha = np.log(alpha)

log_alphas = np.log(alpha_max * np.geomspace(1, 0.1))
tol = 1e-16

dict_log_alpha = {}
dict_log_alpha["lasso"] = log_alpha
tab = np.linspace(1, 1000, n_features)
dict_log_alpha["wlasso"] = log_alpha + np.log(tab / tab.max())

models = {}
models["lasso"] = Lasso(estimator=None)
models["wlasso"] = WeightedLasso(estimator=None)


def get_v(mask, dense):
    return 2 * (X[np.ix_(idx_val, mask)].T @ (
        X[np.ix_(idx_val, mask)] @ dense - y[idx_val])) / len(idx_val)


def test_beta_jac():
    #########################################################################
    # check that the methods computing the full Jacobian compute the same sol
    # maybe we could add a test comparing with sklearn
    for key in models.keys():
        supp1, dense1, jac1 = get_beta_jac_iterdiff(X[idx_train, :],
                                                    y[idx_train],
Пример #7
0
def linear_cv(dataset_name, tol=1e-3, compute_jac=True, model_name="lasso"):

    X, y = load_libsvm(dataset_name)
    X = csc_matrix(X)
    n_samples, n_features = X.shape
    p_alpha = p_alphas[dataset_name, model_name]

    max_iter = max_iters[dataset_name]
    if model_name == "lasso":
        model = Lasso(X, y, 0, max_iter=max_iter, tol=tol)
    elif model_name == "logreg":
        model = SparseLogreg(X, y, 0, max_iter=max_iter, tol=tol)

    alpha_max = np.exp(model.compute_alpha_max())

    alpha = p_alpha * alpha_max
    if model_name == "lasso":
        clf = Lasso_cel(alpha=alpha,
                        fit_intercept=False,
                        warm_start=True,
                        tol=tol * norm(y)**2 / 2,
                        max_iter=10000)
        clf.fit(X, y)
        beta_star = clf.coef_
        mask = beta_star != 0
        dense = beta_star[mask]
    elif model_name == "logreg":
        # clf = LogisticRegression(
        #     penalty='l1', C=(1 / (alpha * n_samples)),
        #     fit_intercept=False,
        #     warm_start=True, max_iter=10000,
        #     tol=tol, verbose=True).fit(X, y)
        # clf = LogisticRegression(
        #     penalty='l1', C=(1 / (alpha * n_samples)),
        #     fit_intercept=False,
        #     warm_start=True, max_iter=10000,
        #     tol=tol, verbose=True,
        #     solver='liblinear').fit(X, y)
        # beta_star = clf.coef_[0]

        blitzl1.set_use_intercept(False)
        blitzl1.set_tolerance(1e-32)
        blitzl1.set_verbose(True)
        # blitzl1.set_min_time(60)
        prob = blitzl1.LogRegProblem(X, y)
        # # lammax = prob.compute_lambda_max()
        clf = prob.solve(alpha * n_samples)
        beta_star = clf.x
        mask = beta_star != 0
        mask = np.array(mask)
        dense = beta_star[mask]
    # if model == "lasso":
    v = -n_samples * alpha * np.sign(beta_star[mask])
    mat_to_inv = model.get_hessian(mask, dense, np.log(alpha))
    # mat_to_inv = X[:, mask].T  @ X[:, mask]

    jac_temp = cg(mat_to_inv, v, tol=1e-10)
    jac_star = np.zeros(n_features)
    jac_star[mask] = jac_temp[0]
    # elif model == "logreg":
    #     v = - n_samples * alpha * np.sign(beta_star[mask])

    log_alpha = np.log(alpha)

    list_beta, list_jac = get_beta_jac_iterdiff(X,
                                                y,
                                                log_alpha,
                                                model,
                                                save_iterates=True,
                                                tol=tol,
                                                max_iter=max_iter,
                                                compute_jac=compute_jac)

    diff_beta = norm(list_beta - beta_star, axis=1)
    diff_jac = norm(list_jac - jac_star, axis=1)

    supp_star = beta_star != 0
    n_iter = list_beta.shape[0]
    for i in np.arange(n_iter)[::-1]:
        supp = list_beta[i, :] != 0
        if not np.all(supp == supp_star):
            supp_id = i + 1
            break
        supp_id = 0

    return dataset_name, p_alpha, diff_beta, diff_jac, n_iter, supp_id
Пример #8
0
def parallel_function(dataset_name,
                      div_alpha,
                      method,
                      ind_rep,
                      random_state=10):
    maxit = dict_maxits[(dataset_name, div_alpha)][ind_rep]
    print("Dataset %s, algo %s, maxit %i" % (dataset_name, method, maxit))
    X, y = fetch_libsvm(dataset_name)
    n_samples = len(y)

    kf = KFold(n_splits=5, random_state=random_state, shuffle=True)

    for i in range(2):
        alpha_max = np.max(np.abs(X.T.dot(y))) / n_samples
        log_alpha = np.log(alpha_max / div_alpha)
        monitor = Monitor()
        if method == "celer":
            clf = Lasso_celer(
                alpha=np.exp(log_alpha),
                fit_intercept=False,
                # TODO maybe change this tol
                tol=1e-8,
                max_iter=maxit)
            model = Lasso(estimator=clf, max_iter=maxit)
            criterion = HeldOutMSE(None, None)
            cross_val = CrossVal(cv=kf, criterion=criterion)
            algo = ImplicitForward(tol_jac=1e-8,
                                   n_iter_jac=maxit,
                                   use_stop_crit=False)
            algo.max_iter = maxit
            val, grad = cross_val.get_val_grad(model,
                                               X,
                                               y,
                                               log_alpha,
                                               algo.get_beta_jac_v,
                                               tol=tol,
                                               monitor=monitor,
                                               max_iter=maxit)
        elif method == "ground_truth":
            for file in os.listdir("results/"):
                if file.startswith("hypergradient_%s_%i_%s" %
                                   (dataset_name, div_alpha, method)):
                    return
                else:
                    clf = Lasso_celer(alpha=np.exp(log_alpha),
                                      fit_intercept=False,
                                      warm_start=True,
                                      tol=1e-13,
                                      max_iter=10000)
                    criterion = HeldOutMSE(None, None)
                    cross_val = CrossVal(cv=kf, criterion=criterion)
                    algo = Implicit(criterion)
                    model = Lasso(estimator=clf, max_iter=10000)
                    val, grad = cross_val.get_val_grad(model,
                                                       X,
                                                       y,
                                                       log_alpha,
                                                       algo.get_beta_jac_v,
                                                       tol=1e-13,
                                                       monitor=monitor)
        else:
            model = Lasso(max_iter=maxit)
            criterion = HeldOutMSE(None, None)
            cross_val = CrossVal(cv=kf, criterion=criterion)
            if method == "forward":
                algo = Forward(use_stop_crit=False)
            elif method == "implicit_forward":
                algo = ImplicitForward(use_stop_crit=False,
                                       tol_jac=1e-8,
                                       n_iter_jac=maxit,
                                       max_iter=1000)
            elif method == "implicit":
                algo = Implicit(use_stop_crit=False, max_iter=1000)
            elif method == "backward":
                algo = Backward()
            else:
                1 / 0
            algo.max_iter = maxit
            algo.use_stop_crit = False
            val, grad = cross_val.get_val_grad(model,
                                               X,
                                               y,
                                               log_alpha,
                                               algo.get_beta_jac_v,
                                               tol=tol,
                                               monitor=monitor,
                                               max_iter=maxit)

    results = (dataset_name, div_alpha, method, maxit, val, grad,
               monitor.times[0])
    df = pandas.DataFrame(results).transpose()
    df.columns = [
        'dataset', 'div_alpha', 'method', 'maxit', 'val', 'grad', 'time'
    ]
    str_results = "results/hypergradient_%s_%i_%s_%i.pkl" % (
        dataset_name, div_alpha, method, maxit)
    df.to_pickle(str_results)
Пример #9
0
from andersoncd.data.real import get_gina_agnostic
from sparse_ho import Forward, Backward
from sparse_ho.models import Lasso, ElasticNet
from sparse_ho.tests.cvxpylayer import lasso_cvxpy, enet_cvxpy
from sparse_ho.criterion import HeldOutMSE
import time

X, y = get_gina_agnostic(normalize_y=False)
n_samples, n_features = X.shape
idx_train = np.arange(0, n_samples // 2)
idx_val = np.arange(n_samples // 2, n_samples)

name_models = ["lasso", "enet"]

dict_models = {}
dict_models["lasso"] = Lasso()
dict_models["enet"] = ElasticNet()

dict_cvxpy = {}
dict_cvxpy["lasso"] = lasso_cvxpy
dict_cvxpy["enet"] = enet_cvxpy

dict_ncols = {}
dict_ncols[10] = np.geomspace(100, n_features, num=10, dtype=int)
dict_ncols[100] = np.geomspace(100, n_features, num=10, dtype=int)

tol = 1e-6
l1_ratio = 0.8
repeat = 10
div_alphas = [10, 100]
Пример #10
0
alpha_max = np.max(np.abs(X[idx_train, :].T @ y[idx_train])) / len(idx_train)
p_alpha = 0.7
alpha0 = p_alpha * alpha_max
# log_alpha = np.log(alpha)

log_alphas = np.log(alpha_max * np.geomspace(1, 0.1))
tol = 1e-16
max_iter = 1000

# dict_log_alpha0 = {}
# dict_log_alpha0["lasso"] = log_alpha
# tab = np.linspace(1, 1000, n_features)
# dict_log_alpha0["wlasso"] = log_alpha + np.log(tab / tab.max())

models = [
    Lasso(max_iter=max_iter, estimator=None),
]

estimator = linear_model.Lasso(fit_intercept=False,
                               max_iter=1000,
                               warm_start=True)
models_custom = [
    Lasso(max_iter=max_iter, estimator=estimator),
]


@pytest.mark.parametrize('model', models)
@pytest.mark.parametrize('crit', ['MSE', 'sure'])
def test_grad_search(model, crit):
    """check that the paths are the same in the line search"""
    if crit == 'MSE':
Пример #11
0
X_test_s = csc_matrix(X_test)

alpha_max = (X_train.T @ y_train).max() / n_samples
p_alpha = 0.7
alpha = p_alpha * alpha_max
log_alpha = np.log(alpha)

log_alphas = np.log(alpha_max * np.geomspace(1, 0.1))
tol = 1e-16

dict_log_alpha = {}
dict_log_alpha["lasso"] = log_alpha
tab = np.linspace(1, 1000, n_features)
dict_log_alpha["wlasso"] = log_alpha + np.log(tab / tab.max())

models = [Lasso(X_train, y_train, dict_log_alpha["lasso"])]


def test_cross_val_criterion():
    alpha_min = alpha_max / 10
    log_alpha_max = np.log(alpha_max)
    log_alpha_min = np.log(alpha_min)
    max_iter = 10000
    n_alphas = 10
    kf = KFold(n_splits=5, shuffle=True, random_state=56)

    estimator = sklearn.linear_model.Lasso(fit_intercept=False,
                                           max_iter=1000,
                                           warm_start=True)
    monitor_grid = Monitor()
    criterion = CrossVal(X, y, Lasso, cv=kf, estimator=estimator)
Пример #12
0
alpha_max = np.max(np.abs(X[idx_train, :].T @ y[idx_train])) / len(idx_train)
p_alpha = 0.7
alpha0 = p_alpha * alpha_max
# log_alpha = np.log(alpha)

log_alphas = np.log(alpha_max * np.geomspace(1, 0.1))
tol = 1e-16
max_iter = 1000

# dict_log_alpha0 = {}
# dict_log_alpha0["lasso"] = log_alpha
# tab = np.linspace(1, 1000, n_features)
# dict_log_alpha0["wlasso"] = log_alpha + np.log(tab / tab.max())

models = [
    Lasso(estimator=None),
]

estimator = linear_model.Lasso(fit_intercept=False,
                               max_iter=1000,
                               warm_start=True)
models_custom = [
    Lasso(estimator=estimator),
]

Optimizers = [LineSearch, GradientDescent]


@pytest.mark.parametrize('Optimizer', Optimizers)
@pytest.mark.parametrize('model', models)
@pytest.mark.parametrize('crit', ['MSE', 'sure'])
Пример #13
0
tol = 1e-7
max_iter = 1e5

##############################################################################
# Grid-search with scikit-learn
# -----------------------------

estimator = linear_model.Lasso(fit_intercept=False,
                               max_iter=1000,
                               warm_start=True)

print('scikit-learn started')

t0 = time.time()
model = Lasso(X_train, y_train, estimator=estimator)
criterion = HeldOutMSE(X_val, y_val, model, X_test=X_test, y_test=y_test)
algo = Forward(criterion)
monitor_grid_sk = Monitor()
grid_search(algo,
            criterion,
            None,
            None,
            monitor_grid_sk,
            log_alphas=log_alphas,
            tol=tol)
objs = np.array(monitor_grid_sk.objs)
t_sk = time.time() - t0

print('scikit-learn finished')
Пример #14
0
def parallel_function(dataset_name,
                      method,
                      tol=1e-5,
                      n_outer=50,
                      tolerance_decrease='constant'):

    # load data
    X_train, X_val, X_test, y_train, y_val, y_test = get_data(dataset_name)
    n_samples, _ = X_train.shape
    # compute alpha_max
    alpha_max = np.abs(X_train.T @ y_train).max() / n_samples

    if model_name == "logreg":
        alpha_max /= 2
    alpha_min = alpha_max / 10_000
    log_alpha_max = np.log(alpha_max)
    log_alpha_min = np.log(alpha_min)
    log_alpha0 = np.log(0.1 * alpha_max)

    if model_name == "lasso":
        model = Lasso(X_train, y_train)
    elif model_name == "logreg":
        model = SparseLogreg(X_train, y_train)

    try:
        n_outer = dict_n_outers[dataset_name, method]
    except Exception:
        n_outer = 20

    size_loop = 2

    for _ in range(size_loop):
        if model_name == "lasso":
            criterion = HeldOutMSE(X_val,
                                   y_val,
                                   model,
                                   X_test=X_test,
                                   y_test=y_test)
        elif model_name == "logreg":
            criterion = HeldOutLogistic(X_val,
                                        y_val,
                                        model,
                                        X_test=X_test,
                                        y_test=y_test)
        algo = dict_algo[method](criterion)
        monitor = Monitor()
        if method == 'grid_search':
            log_alphas = np.log(np.geomspace(alpha_max, alpha_min, num=100))
            grid_search(algo,
                        None,
                        None,
                        monitor,
                        log_alphas=log_alphas,
                        tol=tol)
        elif method == 'random':
            grid_search(algo,
                        log_alpha_max,
                        log_alpha_min,
                        monitor,
                        tol=tol,
                        max_evals=n_alphas,
                        t_max=dict_t_max[dataset_name])
        elif method in ("bayesian"):
            hyperopt_wrapper(algo,
                             log_alpha_min,
                             log_alpha_max,
                             monitor,
                             max_evals=n_alphas,
                             tol=tol,
                             method='bayesian',
                             t_max=dict_t_max[dataset_name])
        else:
            # do line search to find the optimal lambda
            grad_search(algo,
                        log_alpha0,
                        monitor,
                        n_outer=n_outer,
                        tol=tol,
                        tolerance_decrease=tolerance_decrease,
                        t_max=dict_t_max[dataset_name])

        monitor.times = np.array(monitor.times)
        monitor.objs = np.array(monitor.objs)
        monitor.objs_test = np.array(monitor.objs_test)
        monitor.log_alphas = np.array(monitor.log_alphas)
    return (dataset_name, method, tol, n_outer, tolerance_decrease,
            monitor.times, monitor.objs, monitor.objs_test, monitor.log_alphas,
            norm(y_val), norm(y_test), log_alpha_max, model_name)
Пример #15
0
print('scikit finished')

##############################################################################
# Now do the hyperparameter optimization with implicit differentiation
# --------------------------------------------------------------------

estimator = sklearn.linear_model.Lasso(fit_intercept=False,
                                       max_iter=1000,
                                       warm_start=True,
                                       tol=tol)

print('sparse-ho started')

t0 = time.time()
model = Lasso()
criterion = HeldOutMSE(None, None)
alpha0 = alpha_max / 10
monitor_grad = Monitor()
cross_val_criterion = CrossVal(criterion, cv=kf)
algo = ImplicitForward()
optimizer = LineSearch(n_outer=10, tol=tol)
grad_search(algo, cross_val_criterion, model, optimizer, X, y, alpha0,
            monitor_grad)

t_grad_search = time.time() - t0

print('sparse-ho finished')

##############################################################################
# Plot results
Пример #16
0
def test_grid_search():
    max_evals = 5

    monitor_grid = Monitor()
    model = Lasso(X_train, y_train, estimator=estimator)
    criterion = HeldOutMSE(X_val, y_val, model, X_test=X_test, y_test=y_test)
    algo = Forward()
    log_alpha_opt_grid, _ = grid_search(algo,
                                        criterion,
                                        log_alpha_min,
                                        log_alpha_max,
                                        monitor_grid,
                                        max_evals=max_evals,
                                        tol=1e-5,
                                        samp="grid")

    monitor_random = Monitor()
    criterion = HeldOutMSE(X_val, y_val, model, X_test=X_test, y_test=y_test)
    algo = Forward()
    log_alpha_opt_random, _ = grid_search(algo,
                                          criterion,
                                          log_alpha_min,
                                          log_alpha_max,
                                          monitor_random,
                                          max_evals=max_evals,
                                          tol=1e-5,
                                          samp="random")

    assert (monitor_random.log_alphas[np.argmin(
        monitor_random.objs)] == log_alpha_opt_random)
    assert (monitor_grid.log_alphas[np.argmin(
        monitor_grid.objs)] == log_alpha_opt_grid)

    monitor_grid = Monitor()
    model = Lasso(X_train, y_train, estimator=estimator)
    criterion = SURE(X_train,
                     y_train,
                     model,
                     sigma=sigma_star,
                     X_test=X_test,
                     y_test=y_test)
    algo = Forward()
    log_alpha_opt_grid, _ = grid_search(algo,
                                        criterion,
                                        log_alpha_min,
                                        log_alpha_max,
                                        monitor_grid,
                                        max_evals=max_evals,
                                        tol=1e-5,
                                        samp="grid")

    monitor_random = Monitor()
    criterion = SURE(X_train,
                     y_train,
                     model,
                     sigma=sigma_star,
                     X_test=X_test,
                     y_test=y_test)
    algo = Forward()
    log_alpha_opt_random, _ = grid_search(algo,
                                          criterion,
                                          log_alpha_min,
                                          log_alpha_max,
                                          monitor_random,
                                          max_evals=max_evals,
                                          tol=1e-5,
                                          samp="random")

    assert (monitor_random.log_alphas[np.argmin(
        monitor_random.objs)] == log_alpha_opt_random)
    assert (monitor_grid.log_alphas[np.argmin(
        monitor_grid.objs)] == log_alpha_opt_grid)
Пример #17
0
alpha_1 = p_alpha * alpha_max
alpha_2 = 0.1
log_alpha1 = np.log(alpha_1)
log_alpha2 = np.log(alpha_2)

dict_log_alpha = {}
dict_log_alpha["lasso"] = log_alpha
dict_log_alpha["enet"] = np.array([log_alpha1, log_alpha2])
tab = np.linspace(1, 1000, n_features)
dict_log_alpha["wLasso"] = log_alpha + np.log(tab / tab.max())
dict_log_alpha["logreg"] = (log_alpha - np.log(2))
dict_log_alpha["svm"] = 1e-4
dict_log_alpha["svr"] = np.array([1e-2, 1e-2])
# Set models to be tested
models = {}
models["lasso"] = Lasso(estimator=None)
models["enet"] = ElasticNet(estimator=None)
models["wLasso"] = WeightedLasso(estimator=None)
models["logreg"] = SparseLogreg(estimator=None)
models["svm"] = SVM(estimator=None)
models["svr"] = SVR(estimator=None)

custom_models = {}
custom_models["lasso"] = Lasso(estimator=celer.Lasso(
    warm_start=True, fit_intercept=False))
custom_models["enet"] = ElasticNet(
    estimator=linear_model.ElasticNet(warm_start=True, fit_intercept=False))
custom_models["logreg"] = SparseLogreg(
    estimator=celer.LogisticRegression(warm_start=True, fit_intercept=False))

# Compute "ground truth" with cvxpylayer
Пример #18
0
tol = 1e-7
max_iter = 1e5

##############################################################################
# Grid-search with scikit-learn
# -----------------------------

estimator = linear_model.Lasso(fit_intercept=False,
                               max_iter=1000,
                               warm_start=True)

print('scikit-learn started')

t0 = time.time()
model = Lasso(estimator=estimator)
criterion = HeldOutMSE(idx_train, idx_val)
algo = Forward()
monitor_grid_sk = Monitor()
grid_search(algo,
            criterion,
            model,
            X,
            y,
            None,
            None,
            monitor_grid_sk,
            log_alphas=log_alphas,
            tol=tol)
objs = np.array(monitor_grid_sk.objs)
t_sk = time.time() - t0
Пример #19
0
def parallel_function(
        dataset_name, method, tol=1e-5, n_outer=50,
        tolerance_decrease='constant'):

    # load data
    X, y = fetch_libsvm(dataset_name)
    y -= y.mean()
    # compute alpha_max
    alpha_max = np.abs(X.T @ y).max() / len(y)

    if model_name == "logreg":
        alpha_max /= 2
    alpha_min = alpha_max / 10_000

    if model_name == "lasso":
        estimator = celer.Lasso(
            fit_intercept=False, max_iter=100, warm_start=True, tol=tol)
        model = Lasso(estimator=estimator)
    elif model_name == "logreg":
        model = SparseLogreg(estimator=estimator)

    # TODO improve this
    try:
        n_outer = dict_n_outers[dataset_name, method]
    except Exception:
        n_outer = 20

    size_loop = 2

    for _ in range(size_loop):
        if model_name == "lasso":
            sub_criterion = HeldOutMSE(None, None)
        elif model_name == "logreg":
            criterion = HeldOutLogistic(None, None)
        kf = KFold(n_splits=5, shuffle=True, random_state=42)
        criterion = CrossVal(sub_criterion, cv=kf)

        algo = ImplicitForward(tol_jac=1e-3)
        monitor = Monitor()
        t_max = dict_t_max[dataset_name]
        if method == 'grid_search':
            grid_search(
                algo, criterion, model, X, y, alpha_min, alpha_max,
                monitor, max_evals=100, tol=tol)
        elif method == 'random' or method == 'bayesian':
            hyperopt_wrapper(
                algo, criterion, model, X, y, alpha_min, alpha_max,
                monitor, max_evals=30, tol=tol, method=method, size_space=1,
                t_max=t_max)
        elif method.startswith("implicit_forward"):
            # do gradient descent to find the optimal lambda
            alpha0 = alpha_max / 100
            n_outer = 30
            if method == 'implicit_forward':
                optimizer = GradientDescent(
                    n_outer=n_outer, p_grad_norm=1, verbose=True, tol=tol,
                    t_max=t_max)
            else:
                optimizer = GradientDescent(
                    n_outer=n_outer, p_grad_norm=1, verbose=True, tol=tol,
                    t_max=t_max,
                    tol_decrease="geom")
            grad_search(
                algo, criterion, model, optimizer, X, y, alpha0,
                monitor)
        else:
            raise NotImplementedError

    monitor.times = np.array(monitor.times)
    monitor.objs = np.array(monitor.objs)
    monitor.objs_test = 0  # TODO
    monitor.alphas = np.array(monitor.alphas)
    return (dataset_name, method, tol, n_outer, tolerance_decrease,
            monitor.times, monitor.objs, monitor.objs_test,
            monitor.alphas, alpha_max,
            model_name)
Пример #20
0
alpha_max = (X_train.T @ y_train).max() / n_samples
p_alpha = 0.9
alpha = p_alpha * alpha_max
log_alpha = np.log(alpha)

log_alphas = np.log(alpha_max * np.geomspace(1, 0.1))
tol = 1e-16

dict_log_alpha = {}
dict_log_alpha["lasso"] = log_alpha
tab = np.linspace(1, 1000, n_features)
dict_log_alpha["wlasso"] = log_alpha + np.log(tab / tab.max())

models = {}
models["lasso"] = Lasso(X_train, y_train, estimator=None)
models["wlasso"] = WeightedLasso(X_train, y_train, estimator=None)


def get_v(mask, dense):
    return 2 * (
        X_val[:, mask].T @ (X_val[:, mask] @ dense - y_val)) / X_val.shape[0]


def test_beta_jac():
    #########################################################################
    # check that the methods computing the full Jacobian compute the same sol
    # maybe we could add a test comparing with sklearn
    for key in models.keys():
        supp1, dense1, jac1 = get_beta_jac_iterdiff(X_train,
                                                    y_train,
Пример #21
0
alpha_max = (X_train.T @ y_train).max() / n_samples
p_alpha = 0.7
alpha = p_alpha * alpha_max
log_alpha = np.log(alpha)

log_alphas = np.log(alpha_max * np.geomspace(1, 0.1))
tol = 1e-16
max_iter = 1000

dict_log_alpha0 = {}
dict_log_alpha0["lasso"] = log_alpha
tab = np.linspace(1, 1000, n_features)
dict_log_alpha0["wlasso"] = log_alpha + np.log(tab / tab.max())

models = [
    Lasso(X_train, y_train, max_iter=max_iter, estimator=None),
]

estimator = linear_model.Lasso(fit_intercept=False,
                               max_iter=1000,
                               warm_start=True)
models_custom = [
    Lasso(X_train, y_train, max_iter=max_iter, estimator=estimator),
]


@pytest.mark.parametrize('model', models)
@pytest.mark.parametrize('crit', ['cv', 'sure'])
def test_grad_search(model, crit):
    """check that the paths are the same in the line search"""
    if crit == 'cv':
Пример #22
0
dict_res = {}

for maxit in maxits:
    for method in methods:
        print("Dataset %s, maxit %i" % (method, maxit))
        for i in range(2):
            alpha_max = np.max(np.abs(X.T.dot(y))) / n_samples
            log_alpha = np.log(alpha_max * p_alpha_max)
            monitor = Monitor()
            if method == "celer":
                clf = Lasso_celer(alpha=np.exp(log_alpha),
                                  fit_intercept=False,
                                  tol=1e-12,
                                  max_iter=maxit)
                model = Lasso(estimator=clf, max_iter=maxit)
                criterion = HeldOutMSE(idx_train, idx_val)
                algo = ImplicitForward(tol_jac=1e-32,
                                       n_iter_jac=maxit,
                                       use_stop_crit=False)
                algo.max_iter = maxit
                val, grad = criterion.get_val_grad(model,
                                                   X,
                                                   y,
                                                   log_alpha,
                                                   algo.get_beta_jac_v,
                                                   tol=1e-12,
                                                   monitor=monitor,
                                                   max_iter=maxit)
            else:
                model = Lasso(max_iter=maxit)
Пример #23
0
def test_grid_search():
    max_evals = 5

    monitor_grid = Monitor()
    model = Lasso(estimator=estimator)
    criterion = HeldOutMSE(idx_train, idx_train)
    alpha_opt_grid, _ = grid_search(criterion,
                                    model,
                                    X,
                                    y,
                                    alpha_min,
                                    alpha_max,
                                    monitor_grid,
                                    max_evals=max_evals,
                                    tol=1e-5,
                                    samp="grid")

    monitor_random = Monitor()
    criterion = HeldOutMSE(idx_train, idx_val)
    alpha_opt_random, _ = grid_search(criterion,
                                      model,
                                      X,
                                      y,
                                      alpha_min,
                                      alpha_max,
                                      monitor_random,
                                      max_evals=max_evals,
                                      tol=1e-5,
                                      samp="random")

    np.testing.assert_allclose(
        monitor_random.alphas[np.argmin(monitor_random.objs)],
        alpha_opt_random)
    np.testing.assert_allclose(
        monitor_grid.alphas[np.argmin(monitor_grid.objs)], alpha_opt_grid)

    monitor_grid = Monitor()
    model = Lasso(estimator=estimator)

    criterion = FiniteDiffMonteCarloSure(sigma=sigma_star)
    alpha_opt_grid, _ = grid_search(criterion,
                                    model,
                                    X,
                                    y,
                                    alpha_min,
                                    alpha_max,
                                    monitor_grid,
                                    max_evals=max_evals,
                                    tol=1e-5,
                                    samp="grid")

    monitor_random = Monitor()
    criterion = FiniteDiffMonteCarloSure(sigma=sigma_star)
    alpha_opt_random, _ = grid_search(criterion,
                                      model,
                                      X,
                                      y,
                                      alpha_min,
                                      alpha_max,
                                      monitor_random,
                                      max_evals=max_evals,
                                      tol=1e-5,
                                      samp="random")

    np.testing.assert_allclose(
        monitor_random.alphas[np.argmin(monitor_random.objs)],
        alpha_opt_random)
    np.testing.assert_allclose(
        monitor_grid.alphas[np.argmin(monitor_grid.objs)], alpha_opt_grid)