示例#1
0
def parallel_function(
        dataset_name, method, tol=1e-8, n_outer=15):

    # load data
    X, y = fetch_libsvm(dataset_name)
    # subsample the samples and the features
    n_samples, n_features = dict_subsampling[dataset_name]
    t_max = dict_t_max[dataset_name]
    # t_max = 3600

    X, y = clean_dataset(X, y, n_samples, n_features)
    alpha_max, n_classes = get_alpha_max(X, y)
    log_alpha_max = np.log(alpha_max)  # maybe to change alpha max value

    algo = ImplicitForward(None, n_iter_jac=2000)
    estimator = LogisticRegression(
        C=1, fit_intercept=False, warm_start=True, max_iter=30, verbose=False)

    model = SparseLogreg(estimator=estimator)
    idx_train, idx_val, idx_test = get_splits(X, y)

    logit_multiclass = LogisticMulticlass(
        idx_train, idx_val, algo, idx_test=idx_test)

    monitor = Monitor()
    if method == "implicit_forward":
        log_alpha0 = np.ones(n_classes) * np.log(0.1 * alpha_max)
        optimizer = LineSearch(n_outer=100)
        grad_search(
            algo, logit_multiclass, model, optimizer, X, y, log_alpha0,
            monitor)
    elif method.startswith(('random', 'bayesian')):
        max_evals = dict_max_eval[dataset_name]
        log_alpha_min = np.log(alpha_max) - 7
        hyperopt_wrapper(
            algo, logit_multiclass, model, X, y, log_alpha_min, log_alpha_max,
            monitor, max_evals=max_evals, tol=tol, t_max=t_max, method=method,
            size_space=n_classes)
    elif method == 'grid_search':
        n_alphas = 20
        p_alphas = np.geomspace(1, 0.001, n_alphas)
        p_alphas = np.tile(p_alphas, (n_classes, 1))
        for i in range(n_alphas):
            log_alpha_i = np.log(alpha_max * p_alphas[:, i])
            logit_multiclass.get_val(
                model, X, y, log_alpha_i, None, monitor, tol)

    monitor.times = np.array(monitor.times).copy()
    monitor.objs = np.array(monitor.objs).copy()
    monitor.acc_vals = np.array(monitor.acc_vals).copy()
    monitor.acc_tests = np.array(monitor.acc_tests).copy()
    monitor.log_alphas = np.array(monitor.log_alphas).copy()
    return (
        dataset_name, method, tol, n_outer, monitor.times, monitor.objs,
        monitor.acc_vals, monitor.acc_tests, monitor.log_alphas, log_alpha_max,
        n_samples, n_features, n_classes)
示例#2
0
# X, y = fetch_libsvm('sector_scale')
# X, y = fetch_libsvm('sector')
# X, y = fetch_libsvm('smallNORB')
# X, y = fetch_libsvm('mnist')


# clean data and subsample
X, y = clean_dataset(X, y, n_samples, n_features)
idx_train, idx_val, idx_test = get_splits(X, y)
n_samples, n_features = X.shape

algo = ImplicitForward(n_iter_jac=1000)
estimator = LogisticRegression(
    C=1, fit_intercept=False, warm_start=True, max_iter=2000, verbose=False)

model = SparseLogreg(estimator=estimator)
logit_multiclass = LogisticMulticlass(
    idx_train, idx_val, algo, idx_test=idx_test)


alpha_max, n_classes = alpha_max_multiclass(X, y)
tol = 1e-5


n_alphas = 10
p_alphas = np.geomspace(1, 0.001, n_alphas)
p_alphas = np.tile(p_alphas, (n_classes, 1))

print("###################### GRID SEARCH ###################")
monitor_grid = Monitor()
for i in range(n_alphas):
示例#3
0
n_alphas = 30
p_alphas = np.geomspace(1, 0.0001, n_alphas)
alphas = alpha_max * p_alphas
log_alphas = np.log(alphas)

##############################################################################
# Grid-search
# -----------

print('scikit started')
t0 = time.time()

estimator = LogisticRegression(penalty='l1',
                               fit_intercept=False,
                               max_iter=max_iter)
model = SparseLogreg(max_iter=max_iter, estimator=estimator)
criterion = HeldOutLogistic(idx_train, idx_val)
algo_grid = Forward()
monitor_grid = Monitor()
grid_search(algo_grid,
            criterion,
            model,
            X,
            y,
            log_alpha_min,
            log_alpha_max,
            monitor_grid,
            log_alphas=log_alphas,
            tol=tol)
objs = np.array(monitor_grid.objs)
示例#4
0
p_alphas = np.geomspace(1, 0.0001, n_alphas)
alphas = alpha_max * p_alphas
log_alphas = np.log(alphas)

##############################################################################
# Grid-search
# -----------

print('scikit started')
t0 = time.time()

estimator = LogisticRegression(penalty='l1',
                               fit_intercept=False,
                               solver='saga',
                               max_iter=max_iter)
model = SparseLogreg(X_train, y_train, max_iter=max_iter, estimator=estimator)
criterion = HeldOutLogistic(X_val, y_val, model)
algo_grid = Forward()
monitor_grid = Monitor()
grid_search(algo_grid,
            criterion,
            log_alpha_min,
            log_alpha_max,
            monitor_grid,
            log_alphas=log_alphas,
            tol=tol)
objs = np.array(monitor_grid.objs)

t_sk = time.time() - t0

print('scikit finished')
示例#5
0
def linear_cv(dataset_name, tol=1e-3, compute_jac=True, model_name="lasso"):

    X, y = load_libsvm(dataset_name)
    X = csc_matrix(X)
    n_samples, n_features = X.shape
    p_alpha = p_alphas[dataset_name, model_name]

    max_iter = max_iters[dataset_name]
    if model_name == "lasso":
        model = Lasso(X, y, 0, max_iter=max_iter, tol=tol)
    elif model_name == "logreg":
        model = SparseLogreg(X, y, 0, max_iter=max_iter, tol=tol)

    alpha_max = np.exp(model.compute_alpha_max())

    alpha = p_alpha * alpha_max
    if model_name == "lasso":
        clf = Lasso_cel(alpha=alpha,
                        fit_intercept=False,
                        warm_start=True,
                        tol=tol * norm(y)**2 / 2,
                        max_iter=10000)
        clf.fit(X, y)
        beta_star = clf.coef_
        mask = beta_star != 0
        dense = beta_star[mask]
    elif model_name == "logreg":
        # clf = LogisticRegression(
        #     penalty='l1', C=(1 / (alpha * n_samples)),
        #     fit_intercept=False,
        #     warm_start=True, max_iter=10000,
        #     tol=tol, verbose=True).fit(X, y)
        # clf = LogisticRegression(
        #     penalty='l1', C=(1 / (alpha * n_samples)),
        #     fit_intercept=False,
        #     warm_start=True, max_iter=10000,
        #     tol=tol, verbose=True,
        #     solver='liblinear').fit(X, y)
        # beta_star = clf.coef_[0]

        blitzl1.set_use_intercept(False)
        blitzl1.set_tolerance(1e-32)
        blitzl1.set_verbose(True)
        # blitzl1.set_min_time(60)
        prob = blitzl1.LogRegProblem(X, y)
        # # lammax = prob.compute_lambda_max()
        clf = prob.solve(alpha * n_samples)
        beta_star = clf.x
        mask = beta_star != 0
        mask = np.array(mask)
        dense = beta_star[mask]
    # if model == "lasso":
    v = -n_samples * alpha * np.sign(beta_star[mask])
    mat_to_inv = model.get_hessian(mask, dense, np.log(alpha))
    # mat_to_inv = X[:, mask].T  @ X[:, mask]

    jac_temp = cg(mat_to_inv, v, tol=1e-10)
    jac_star = np.zeros(n_features)
    jac_star[mask] = jac_temp[0]
    # elif model == "logreg":
    #     v = - n_samples * alpha * np.sign(beta_star[mask])

    log_alpha = np.log(alpha)

    list_beta, list_jac = get_beta_jac_iterdiff(X,
                                                y,
                                                log_alpha,
                                                model,
                                                save_iterates=True,
                                                tol=tol,
                                                max_iter=max_iter,
                                                compute_jac=compute_jac)

    diff_beta = norm(list_beta - beta_star, axis=1)
    diff_jac = norm(list_jac - jac_star, axis=1)

    supp_star = beta_star != 0
    n_iter = list_beta.shape[0]
    for i in np.arange(n_iter)[::-1]:
        supp = list_beta[i, :] != 0
        if not np.all(supp == supp_star):
            supp_id = i + 1
            break
        supp_id = 0

    return dataset_name, p_alpha, diff_beta, diff_jac, n_iter, supp_id
示例#6
0
def parallel_function(dataset_name,
                      method,
                      tol=1e-5,
                      n_outer=50,
                      tolerance_decrease='constant'):

    # load data
    X, y = fetch_libsvm(dataset_name)
    y -= np.mean(y)
    # compute alpha_max
    alpha_max = np.abs(X.T @ y).max() / len(y)

    if model_name == "logreg":
        alpha_max /= 2
    alpha_min = alpha_max * dict_palphamin[dataset_name]

    if model_name == "enet":
        estimator = linear_model.ElasticNet(fit_intercept=False,
                                            max_iter=10_000,
                                            warm_start=True,
                                            tol=tol)
        model = ElasticNet(estimator=estimator)
    elif model_name == "logreg":
        model = SparseLogreg(estimator=estimator)

    # TODO improve this
    try:
        n_outer = dict_n_outers[dataset_name, method]
    except Exception:
        n_outer = 20

    size_loop = 2
    for _ in range(size_loop):
        if model_name == "lasso" or model_name == "enet":
            sub_criterion = HeldOutMSE(None, None)
        elif model_name == "logreg":
            criterion = HeldOutLogistic(None, None)
        kf = KFold(n_splits=5, shuffle=True, random_state=42)
        criterion = CrossVal(sub_criterion, cv=kf)

        algo = ImplicitForward(tol_jac=1e-3)
        monitor = Monitor()
        t_max = dict_t_max[dataset_name]
        if method == 'grid_search':
            num1D = dict_point_grid_search[dataset_name]
            alpha1D = np.geomspace(alpha_max, alpha_min, num=num1D)
            alphas = [np.array(i) for i in product(alpha1D, alpha1D)]
            grid_search(algo,
                        criterion,
                        model,
                        X,
                        y,
                        alpha_min,
                        alpha_max,
                        monitor,
                        max_evals=100,
                        tol=tol,
                        alphas=alphas)
        elif method == 'random' or method == 'bayesian':
            hyperopt_wrapper(algo,
                             criterion,
                             model,
                             X,
                             y,
                             alpha_min,
                             alpha_max,
                             monitor,
                             max_evals=30,
                             tol=tol,
                             method=method,
                             size_space=2,
                             t_max=t_max)
        elif method.startswith("implicit_forward"):
            # do gradient descent to find the optimal lambda
            alpha0 = np.array([alpha_max / 100, alpha_max / 100])
            n_outer = 30
            if method == 'implicit_forward':
                optimizer = GradientDescent(n_outer=n_outer,
                                            p_grad_norm=1,
                                            verbose=True,
                                            tol=tol,
                                            t_max=t_max)
            else:
                optimizer = GradientDescent(n_outer=n_outer,
                                            p_grad_norm=1,
                                            verbose=True,
                                            tol=tol,
                                            t_max=t_max,
                                            tol_decrease="geom")
            grad_search(algo, criterion, model, optimizer, X, y, alpha0,
                        monitor)
        else:
            raise NotImplementedError

    monitor.times = np.array(monitor.times)
    monitor.objs = np.array(monitor.objs)
    monitor.objs_test = 0  # TODO
    monitor.alphas = np.array(monitor.alphas)
    return (dataset_name, method, tol, n_outer, tolerance_decrease,
            monitor.times, monitor.objs, monitor.objs_test, monitor.alphas,
            alpha_max, model_name)
示例#7
0
# X_train, X_val, X_test, y_train, y_val, y_test = get_leukemia()
n_samples, n_features = X_train.shape

print("Starting path computation...")

alpha_max = np.max(np.abs(X_train.T @ (-y_train)))
alpha_max /= (2 * n_samples)
n_alphas = 10
p_alphas = np.geomspace(1, 1e-4, n_alphas)
alphas = p_alphas * alpha_max
log_alphas = np.log(alphas)
tol = 1e-5

# grid search
model = SparseLogreg(X_train, y_train, log_alphas[0], max_iter=1000)
criterion = HeldOutLogistic(X_val, y_val, model, X_test=X_test, y_test=y_test)
algo = Forward(criterion)
monitor_grid_sk = Monitor()
grid_search(algo, None, None, monitor_grid_sk, log_alphas=log_alphas, tol=tol)
monitor = Monitor()
# grad_search(
#     algo, logCs[0], monitor, n_outer=5, verbose=True,
#     tolerance_decrease='constant', tol=1e-8,
#     t_max=10000)

plt.figure()
plt.plot(monitor_grid_sk.log_alphas, monitor_grid_sk.objs)
plt.plot(monitor.log_alphas, monitor.objs, 'bo')
plt.show(block=False)
示例#8
0
log_alpha2 = np.log(alpha_2)

dict_log_alpha = {}
dict_log_alpha["lasso"] = log_alpha
dict_log_alpha["enet"] = np.array([log_alpha1, log_alpha2])
tab = np.linspace(1, 1000, n_features)
dict_log_alpha["wLasso"] = log_alpha + np.log(tab / tab.max())
dict_log_alpha["logreg"] = (log_alpha - np.log(2))
dict_log_alpha["svm"] = 1e-4
dict_log_alpha["svr"] = np.array([1e-2, 1e-2])
# Set models to be tested
models = {}
models["lasso"] = Lasso(estimator=None)
models["enet"] = ElasticNet(estimator=None)
models["wLasso"] = WeightedLasso(estimator=None)
models["logreg"] = SparseLogreg(estimator=None)
models["svm"] = SVM(estimator=None)
models["svr"] = SVR(estimator=None)

custom_models = {}
custom_models["lasso"] = Lasso(estimator=celer.Lasso(
    warm_start=True, fit_intercept=False))
custom_models["enet"] = ElasticNet(
    estimator=linear_model.ElasticNet(warm_start=True, fit_intercept=False))
custom_models["logreg"] = SparseLogreg(
    estimator=celer.LogisticRegression(warm_start=True, fit_intercept=False))

# Compute "ground truth" with cvxpylayer
dict_cvxpy_func = {
    'lasso': lasso_cvxpy,
    'enet': enet_cvxpy,
示例#9
0
    n_features=n_features, n_informative=50,
    random_state=122, flip_y=0.1, n_redundant=0)

X_val_s = csc_matrix(X_val)

y_train[y_train == 0.0] = -1.0
y_val[y_val == 0.0] = -1.0

alpha_max = np.max(np.abs(X_train.T @ (- y_train)))
alpha_max /= (2 * n_samples)
alpha = 0.3 * alpha_max
log_alpha = np.log(alpha)
tol = 1e-16

models = [
    SparseLogreg(
        X_train, y_train, max_iter=10000, estimator=None),
    SparseLogreg(
        X_train_s, y_train, max_iter=10000, estimator=None)
]

estimator = LogisticRegression(
    penalty="l1", tol=1e-12, fit_intercept=False, max_iter=100000,
    solver="saga")

models_custom = [
    SparseLogreg(
        X_train, y_train, max_iter=10000, estimator=estimator),
    SparseLogreg(
        X_train_s, y_train, max_iter=10000, estimator=estimator)
]
示例#10
0
                                    flip_y=0.1,
                                    n_redundant=0)
X_s = csc_matrix(X)

y[y == 0.0] = -1.0

idx_train = np.arange(0, 50)
idx_val = np.arange(50, 100)

alpha_max = np.max(np.abs(
    X[idx_train, :].T @ y[idx_train])) / (2 * len(idx_train))
alpha = 0.3 * alpha_max
log_alpha = np.log(alpha)
tol = 1e-16

models = [SparseLogreg(max_iter=10000, estimator=None)]

estimator = LogisticRegression(penalty="l1",
                               tol=1e-12,
                               fit_intercept=False,
                               max_iter=100000,
                               solver="saga")

models_custom = [SparseLogreg(max_iter=10000, estimator=estimator)]


def get_v(mask, dense):
    return 2 * (X[np.ix_(idx_val, mask)].T @ (
        X[np.ix_(idx_val, mask)] @ dense - y[idx_val])) / len(idx_val)

示例#11
0
def parallel_function(dataset_name,
                      method,
                      tol=1e-5,
                      n_outer=50,
                      tolerance_decrease='constant'):

    # load data
    X_train, X_val, X_test, y_train, y_val, y_test = get_data(dataset_name)
    n_samples, _ = X_train.shape
    # compute alpha_max
    alpha_max = np.abs(X_train.T @ y_train).max() / n_samples

    if model_name == "logreg":
        alpha_max /= 2
    alpha_min = alpha_max / 10_000
    log_alpha_max = np.log(alpha_max)
    log_alpha_min = np.log(alpha_min)
    log_alpha0 = np.log(0.1 * alpha_max)

    if model_name == "lasso":
        model = Lasso(X_train, y_train)
    elif model_name == "logreg":
        model = SparseLogreg(X_train, y_train)

    try:
        n_outer = dict_n_outers[dataset_name, method]
    except Exception:
        n_outer = 20

    size_loop = 2

    for _ in range(size_loop):
        if model_name == "lasso":
            criterion = HeldOutMSE(X_val,
                                   y_val,
                                   model,
                                   X_test=X_test,
                                   y_test=y_test)
        elif model_name == "logreg":
            criterion = HeldOutLogistic(X_val,
                                        y_val,
                                        model,
                                        X_test=X_test,
                                        y_test=y_test)
        algo = dict_algo[method](criterion)
        monitor = Monitor()
        if method == 'grid_search':
            log_alphas = np.log(np.geomspace(alpha_max, alpha_min, num=100))
            grid_search(algo,
                        None,
                        None,
                        monitor,
                        log_alphas=log_alphas,
                        tol=tol)
        elif method == 'random':
            grid_search(algo,
                        log_alpha_max,
                        log_alpha_min,
                        monitor,
                        tol=tol,
                        max_evals=n_alphas,
                        t_max=dict_t_max[dataset_name])
        elif method in ("bayesian"):
            hyperopt_wrapper(algo,
                             log_alpha_min,
                             log_alpha_max,
                             monitor,
                             max_evals=n_alphas,
                             tol=tol,
                             method='bayesian',
                             t_max=dict_t_max[dataset_name])
        else:
            # do line search to find the optimal lambda
            grad_search(algo,
                        log_alpha0,
                        monitor,
                        n_outer=n_outer,
                        tol=tol,
                        tolerance_decrease=tolerance_decrease,
                        t_max=dict_t_max[dataset_name])

        monitor.times = np.array(monitor.times)
        monitor.objs = np.array(monitor.objs)
        monitor.objs_test = np.array(monitor.objs_test)
        monitor.log_alphas = np.array(monitor.log_alphas)
    return (dataset_name, method, tol, n_outer, tolerance_decrease,
            monitor.times, monitor.objs, monitor.objs_test, monitor.log_alphas,
            norm(y_val), norm(y_test), log_alpha_max, model_name)
示例#12
0
文件: main.py 项目: svaiter/sparse-ho
def parallel_function(
        dataset_name, method, tol=1e-5, n_outer=50,
        tolerance_decrease='exponential'):

    # load data
    X_train, X_val, X_test, y_train, y_val, y_test = get_data(dataset_name)
    n_samples, n_features = X_train.shape
    print('n_samples', n_samples)
    print('n_features', n_features)
    y_train[y_train == 0.0] = -1.0
    y_val[y_val == 0.0] = -1.0
    y_test[y_test == 0.0] = -1.0

    alpha_max = np.max(np.abs(X_train.T @ y_train))
    alpha_max /= X_train.shape[0]
    alpha_max /= 4
    log_alpha_max = np.log(alpha_max)

    alpha_min = alpha_max * 1e-4
    alphas = np.geomspace(alpha_max, alpha_min, 10)
    log_alphas = np.log(alphas)

    log_alpha0 = np.log(0.1 * alpha_max)
    log_alpha_max = np.log(alpha_max)
    n_outer = 25

    if dataset_name == "rcv1":
        size_loop = 2
    else:
        size_loop = 2
    model = SparseLogreg(
        X_train, y_train, max_iter=1000, log_alpha_max=log_alpha_max)
    for i in range(size_loop):
        monitor = Monitor()

        if method == "implicit_forward":
            criterion = HeldOutLogistic(X_val, y_val, model, X_test=X_test, y_test=y_test)
            algo = ImplicitForward(criterion, tol_jac=1e-5, n_iter_jac=100)
            _, _, _ = grad_search(
                algo=algo, verbose=False,
                log_alpha0=log_alpha0, tol=tol,
                n_outer=n_outer, monitor=monitor,
                t_max=dict_t_max[dataset_name],
                tolerance_decrease=tolerance_decrease)

        elif method == "forward":
            criterion = HeldOutLogistic(X_val, y_val, model, X_test=X_test, y_test=y_test)
            algo = Forward(criterion)
            _, _, _ = grad_search(
                algo=algo,
                log_alpha0=log_alpha0, tol=tol,
                n_outer=n_outer, monitor=monitor,
                t_max=dict_t_max[dataset_name],
                tolerance_decrease=tolerance_decrease)

        elif method == "implicit":
            criterion = HeldOutLogistic(X_val, y_val, model, X_test=X_test, y_test=y_test)
            algo = Implicit(criterion)
            _, _, _ = grad_search(
                algo=algo,
                log_alpha0=log_alpha0, tol=tol,
                n_outer=n_outer, monitor=monitor,
                t_max=dict_t_max[dataset_name],
                tolerance_decrease=tolerance_decrease)

        elif method == "grid_search":
            criterion = HeldOutLogistic(X_val, y_val, model, X_test=X_test, y_test=y_test)
            algo = Forward(criterion)
            # log_alpha_min = np.log(alpha_min)
            log_alphas = np.log(np.geomspace(alpha_max, alpha_min, num=100))
            log_alpha_opt, min_g_func = grid_search(
                algo, None, None, monitor, tol=tol, samp="grid",
                t_max=dict_t_max[dataset_name], log_alphas=log_alphas)
            print(log_alpha_opt)

        elif method == "random":
            criterion = HeldOutLogistic(X_val, y_val, model, X_test=X_test, y_test=y_test)
            algo = Forward(criterion)
            log_alpha_min = np.log(alpha_min)
            log_alpha_opt, min_g_func = grid_search(
                algo, log_alpha_min, np.log(alpha_max), monitor, max_evals=100, tol=tol, samp="random",
                t_max=dict_t_max[dataset_name])
            print(log_alpha_opt)

        elif method == "lhs":
            criterion = HeldOutLogistic(X_val, y_val, model, X_test=X_test, y_test=y_test)
            algo = Forward(criterion)
            log_alpha_min = np.log(alpha_min)
            log_alpha_opt, min_g_func = grid_search(
                algo, log_alpha_min, np.log(alpha_max), monitor, max_evals=100, tol=tol, samp="lhs",
                t_max=dict_t_max[dataset_name])
            print(log_alpha_opt)

    monitor.times = np.array(monitor.times).copy()
    monitor.objs = np.array(monitor.objs).copy()
    monitor.objs_test = np.array(monitor.objs_test).copy()
    monitor.log_alphas = np.array(monitor.log_alphas).copy()
    return (dataset_name, method, tol, n_outer, tolerance_decrease,
            monitor.times, monitor.objs, monitor.objs_test,
            monitor.log_alphas, norm(y_val), norm(y_test), log_alpha_max)