Exemplo n.º 1
0
def test_beta_jac():
    #########################################################################
    # check that the methods computing the full Jacobian compute the same sol
    # maybe we could add a test comparing with sklearn
    for key in models.keys():
        supp1, dense1, jac1 = get_beta_jac_iterdiff(X_train,
                                                    y_train,
                                                    dict_log_alpha[key],
                                                    tol=tol,
                                                    model=models[key])
        supp1sk, dense1sk, jac1sk = get_beta_jac_iterdiff(X_train,
                                                          y_train,
                                                          dict_log_alpha[key],
                                                          tol=tol,
                                                          model=models[key])
        supp2, dense2, jac2 = get_beta_jac_fast_iterdiff(X_train,
                                                         y_train,
                                                         dict_log_alpha[key],
                                                         get_v,
                                                         tol=tol,
                                                         model=models[key],
                                                         tol_jac=tol)
        supp3, dense3, jac3 = get_beta_jac_iterdiff(X_train_s,
                                                    y_train,
                                                    dict_log_alpha[key],
                                                    tol=tol,
                                                    model=models[key])
        supp4, dense4, jac4 = get_beta_jac_fast_iterdiff(X_train_s,
                                                         y_train,
                                                         dict_log_alpha[key],
                                                         get_v,
                                                         tol=tol,
                                                         model=models[key],
                                                         tol_jac=tol)

        assert np.all(supp1 == supp1sk)
        assert np.all(supp1 == supp2)
        assert np.allclose(dense1, dense1sk)
        assert np.allclose(dense1, dense2)
        assert np.allclose(jac1, jac2)

        assert np.all(supp2 == supp3)
        assert np.allclose(dense2, dense3)
        assert np.allclose(jac2, jac3)

        assert np.all(supp3 == supp4)
        assert np.allclose(dense3, dense4)
        assert np.allclose(jac3, jac4)

        get_beta_jac_t_v_implicit(X_train,
                                  y_train,
                                  dict_log_alpha[key],
                                  get_v,
                                  model=models[key])
Exemplo n.º 2
0
    def get_val(self, log_alpha, tol=1e-3):
        # TODO add warm start
        mask, dense, _ = get_beta_jac_iterdiff(
            self.model.X, self.model.y, log_alpha, self.model,
            tol=tol, mask0=self.mask0, dense0=self.dense0, compute_jac=False)
        mask2, dense2, _ = get_beta_jac_iterdiff(
            self.model.X, self.model.y + self.epsilon * self.delta,
            log_alpha, self.model,
            tol=tol, compute_jac=False)

        val = self.value(mask, dense, mask2, dense2)

        return val
Exemplo n.º 3
0
def get_beta_jac_backward(X_train,
                          y_train,
                          log_alpha,
                          X_val,
                          y_val,
                          mask0=None,
                          dense0=None,
                          jac0=None,
                          tol=1e-3,
                          maxit=100,
                          model="lasso"):
    n_samples, n_features = X_train.shape

    mask, dense, list_sign = get_beta_jac_iterdiff(X_train,
                                                   y_train,
                                                   log_alpha,
                                                   mask0=mask0,
                                                   dense0=dense0,
                                                   jac0=jac0,
                                                   tol=tol,
                                                   maxit=maxit,
                                                   compute_jac=False,
                                                   model="lasso",
                                                   backward=True)

    v = np.zeros(n_features)
    v[mask] = 2 * X_val[:, mask].T @ (X_val[:, mask] @ dense -
                                      y_val) / X_val.shape[0]

    jac = get_only_jac_backward(X_train, np.exp(log_alpha), list_sign, v)
    return mask, dense, jac
Exemplo n.º 4
0
def test_beta_jac():
    supp1, dense1, jac1 = get_beta_jac_iterdiff(X_train,
                                                y_train,
                                                np.array(
                                                    [log_alpha1, log_alpha2]),
                                                tol=tol,
                                                model=model,
                                                compute_jac=True,
                                                max_iter=max_iter)

    estimator = linear_model.ElasticNet(alpha=(alpha_1 + alpha_2),
                                        fit_intercept=False,
                                        l1_ratio=alpha_1 / (alpha_1 + alpha_2),
                                        tol=1e-16,
                                        max_iter=max_iter)
    estimator.fit(X_train, y_train)

    supp2, dense2, jac2 = get_beta_jac_fast_iterdiff(
        X_train,
        y_train,
        np.array([log_alpha1, log_alpha2]),
        get_v,
        tol=tol,
        model=model,
        tol_jac=1e-16,
        max_iter=max_iter,
        niter_jac=10000)
    assert np.allclose(dense1, estimator.coef_[estimator.coef_ != 0])
    assert np.all(supp1 == supp2)
    assert np.allclose(dense1, dense2)
Exemplo n.º 5
0
 def get_val(self, log_alpha, tol=1e-3):
     mask, dense, _ = get_beta_jac_iterdiff(
         self.model.X, self.model.y, log_alpha, self.model,
         max_iter=self.model.max_iter, tol=tol, compute_jac=False)
     mask, dense = self.model.get_primal(mask, dense)
     val = self.value(mask, dense)
     self.value_test(mask, dense)
     return val
Exemplo n.º 6
0
def test_beta_jac():
    #########################################################################
    # check that the methods computing the full Jacobian compute the same sol
    # maybe we could add a test comparing with sklearn
    for model in models:
        supp1, dense1, jac1 = get_beta_jac_iterdiff(X_train,
                                                    y_train,
                                                    dict_log_alpha[model],
                                                    tol=tol,
                                                    model=model)
        supp2, dense2, jac2 = get_beta_jac_fast_iterdiff(X_train,
                                                         y_train,
                                                         dict_log_alpha[model],
                                                         X_test,
                                                         y_test,
                                                         tol=tol,
                                                         model=model,
                                                         tol_jac=tol)
        supp3, dense3, jac3 = get_beta_jac_iterdiff(X_train_s,
                                                    y_train,
                                                    dict_log_alpha[model],
                                                    tol=tol,
                                                    model=model)
        supp4, dense4, jac4 = get_beta_jac_fast_iterdiff(X_train_s,
                                                         y_train,
                                                         dict_log_alpha[model],
                                                         X_test,
                                                         y_test,
                                                         tol=tol,
                                                         model=model,
                                                         tol_jac=tol)

        assert np.all(supp1 == supp2)
        assert np.allclose(dense1, dense2)
        assert np.allclose(jac1, jac2)

        assert np.all(supp2 == supp3)
        assert np.allclose(dense2, dense3)
        assert np.allclose(jac2, jac3)

        assert np.all(supp3 == supp4)
        assert np.allclose(dense3, dense4)
        assert np.allclose(jac3, jac4)
Exemplo n.º 7
0
def test_beta_jac(model):
    supp1, dense1, jac1 = get_beta_jac_iterdiff(
        X_train, y_train, log_alpha, tol=tol,
        model=model, compute_jac=True, max_iter=1000)

    clf = LogisticRegression(penalty="l1", tol=1e-12, C=(
        1 / (alpha * n_samples)), fit_intercept=False, max_iter=100000,
        solver="saga")
    clf.fit(X_train, y_train)
    supp_sk = clf.coef_ != 0
    dense_sk = clf.coef_[supp_sk]

    supp2, dense2, jac2 = get_beta_jac_fast_iterdiff(
        X_train, y_train, log_alpha,
        get_v, tol=tol, model=model, tol_jac=1e-12)

    supp3, dense3, jac3 = get_beta_jac_iterdiff(
        X_train, y_train, log_alpha, tol=tol,
        model=model, compute_jac=True, max_iter=1000)

    supp4, dense4, jac4 = get_beta_jac_fast_iterdiff(
        X_train_s, y_train, log_alpha,
        get_v, tol=tol, model=model, tol_jac=1e-12)

    assert np.all(supp1 == supp_sk)
    assert np.allclose(dense1, dense_sk, atol=1e-4)

    assert np.all(supp1 == supp2)
    assert np.allclose(dense1, dense2)
    assert np.allclose(jac1, jac2, atol=1e-4)

    assert np.all(supp2 == supp3)
    assert np.allclose(dense2, dense3)
    assert np.allclose(jac2, jac3, atol=1e-4)

    assert np.all(supp3 == supp4)
    assert np.allclose(dense3, dense4)
    assert np.allclose(jac3, jac4, atol=1e-4)
Exemplo n.º 8
0
def test_beta_jac(model):
    supp1, dense1, jac1 = get_beta_jac_iterdiff(X_train,
                                                y_train,
                                                log_C,
                                                tol=tol,
                                                model=model,
                                                compute_jac=True,
                                                max_iter=10000)

    beta = np.zeros(n_samples)
    beta[supp1] = dense1
    full_supp = np.logical_and(beta > 0, beta < C)
    # full_supp = np.logical_or(beta <= 0, beta >= C)

    Q = (y_train[:, np.newaxis] * X_train) @ (y_train[:, np.newaxis] *
                                              X_train).T
    v = (np.eye(n_samples, n_samples) - Q)[np.ix_(
        full_supp, beta >= C)] @ (np.ones((beta >= C).sum()) * C)

    jac_dense = np.linalg.solve(Q[np.ix_(full_supp, full_supp)], v)
    assert np.allclose(jac_dense, jac1[dense1 < C])

    if issparse(model.X):
        primal = np.sum(X_train_s[supp1, :].T.multiply(y_train[supp1] *
                                                       dense1),
                        axis=1)
        primal = primal.T
    else:
        primal = np.sum(y_train[supp1] * dense1 * X_train[supp1, :].T, axis=1)
    clf = LinearSVC(loss="hinge",
                    fit_intercept=False,
                    C=C,
                    tol=tol,
                    max_iter=100000)
    clf.fit(X_train, y_train)
    supp2, dense2, jac2 = get_beta_jac_fast_iterdiff(X_train,
                                                     y_train,
                                                     log_C,
                                                     get_v,
                                                     tol=tol,
                                                     model=model,
                                                     tol_jac=1e-16,
                                                     max_iter=10000)
    assert np.allclose(primal, clf.coef_)

    assert np.all(supp1 == supp2)
    assert np.allclose(dense1, dense2)
    assert np.allclose(jac1, jac2, atol=1e-4)
Exemplo n.º 9
0
def get_beta_jac_fast_iterdiff(X,
                               y,
                               log_alpha,
                               get_v,
                               model,
                               mask0=None,
                               dense0=None,
                               jac0=None,
                               tol=1e-3,
                               max_iter=1000,
                               niter_jac=1000,
                               tol_jac=1e-6,
                               verbose=False):
    n_samples, n_features = X.shape

    mask, dense, _ = get_beta_jac_iterdiff(X,
                                           y,
                                           log_alpha,
                                           mask0=mask0,
                                           dense0=dense0,
                                           jac0=jac0,
                                           tol=tol,
                                           max_iter=max_iter,
                                           compute_jac=False,
                                           model=model,
                                           verbose=verbose)

    dbeta0_new = model._init_dbeta0(mask, mask0, jac0)
    reduce_alpha = model._reduce_alpha(np.exp(log_alpha), mask)

    v = None
    _, r = model._init_beta_r(X, y, mask, dense)
    jac = get_only_jac(model.reduce_X(mask),
                       model.reduce_y(mask),
                       r,
                       reduce_alpha,
                       model.sign(dense, log_alpha),
                       v,
                       dbeta=dbeta0_new,
                       niter_jac=niter_jac,
                       tol_jac=tol_jac,
                       model=model,
                       mask=mask,
                       dense=dense,
                       verbose=verbose)
    return mask, dense, jac
Exemplo n.º 10
0
    def get_beta_jac_v(self,
                       X,
                       y,
                       log_alpha,
                       model,
                       get_v,
                       mask0=None,
                       dense0=None,
                       quantity_to_warm_start=None,
                       max_iter=1000,
                       tol=1e-3,
                       compute_jac=False,
                       full_jac_v=False):
        mask, dense, list_sign = get_beta_jac_iterdiff(X,
                                                       y,
                                                       log_alpha,
                                                       model,
                                                       mask0=mask0,
                                                       dense0=dense0,
                                                       jac0=None,
                                                       max_iter=max_iter,
                                                       tol=tol,
                                                       compute_jac=compute_jac,
                                                       return_all=True)
        v = np.zeros(X.shape[1])
        v[mask] = get_v(mask, dense)
        jac_v = get_only_jac_backward(X,
                                      np.exp(log_alpha),
                                      list_sign,
                                      v,
                                      model,
                                      jac_v0=quantity_to_warm_start)

        if not full_jac_v:
            jac_v = model.get_mask_jac_v(mask, jac_v)
        return mask, dense, jac_v, jac_v
Exemplo n.º 11
0
def linear_cv(dataset_name, tol=1e-3, compute_jac=True, model_name="lasso"):

    X, y = load_libsvm(dataset_name)
    X = csc_matrix(X)
    n_samples, n_features = X.shape
    p_alpha = p_alphas[dataset_name, model_name]

    max_iter = max_iters[dataset_name]
    if model_name == "lasso":
        model = Lasso(X, y, 0, max_iter=max_iter, tol=tol)
    elif model_name == "logreg":
        model = SparseLogreg(X, y, 0, max_iter=max_iter, tol=tol)

    alpha_max = np.exp(model.compute_alpha_max())

    alpha = p_alpha * alpha_max
    if model_name == "lasso":
        clf = Lasso_cel(alpha=alpha,
                        fit_intercept=False,
                        warm_start=True,
                        tol=tol * norm(y)**2 / 2,
                        max_iter=10000)
        clf.fit(X, y)
        beta_star = clf.coef_
        mask = beta_star != 0
        dense = beta_star[mask]
    elif model_name == "logreg":
        # clf = LogisticRegression(
        #     penalty='l1', C=(1 / (alpha * n_samples)),
        #     fit_intercept=False,
        #     warm_start=True, max_iter=10000,
        #     tol=tol, verbose=True).fit(X, y)
        # clf = LogisticRegression(
        #     penalty='l1', C=(1 / (alpha * n_samples)),
        #     fit_intercept=False,
        #     warm_start=True, max_iter=10000,
        #     tol=tol, verbose=True,
        #     solver='liblinear').fit(X, y)
        # beta_star = clf.coef_[0]

        blitzl1.set_use_intercept(False)
        blitzl1.set_tolerance(1e-32)
        blitzl1.set_verbose(True)
        # blitzl1.set_min_time(60)
        prob = blitzl1.LogRegProblem(X, y)
        # # lammax = prob.compute_lambda_max()
        clf = prob.solve(alpha * n_samples)
        beta_star = clf.x
        mask = beta_star != 0
        mask = np.array(mask)
        dense = beta_star[mask]
    # if model == "lasso":
    v = -n_samples * alpha * np.sign(beta_star[mask])
    mat_to_inv = model.get_hessian(mask, dense, np.log(alpha))
    # mat_to_inv = X[:, mask].T  @ X[:, mask]

    jac_temp = cg(mat_to_inv, v, tol=1e-10)
    jac_star = np.zeros(n_features)
    jac_star[mask] = jac_temp[0]
    # elif model == "logreg":
    #     v = - n_samples * alpha * np.sign(beta_star[mask])

    log_alpha = np.log(alpha)

    list_beta, list_jac = get_beta_jac_iterdiff(X,
                                                y,
                                                log_alpha,
                                                model,
                                                save_iterates=True,
                                                tol=tol,
                                                max_iter=max_iter,
                                                compute_jac=compute_jac)

    diff_beta = norm(list_beta - beta_star, axis=1)
    diff_jac = norm(list_jac - jac_star, axis=1)

    supp_star = beta_star != 0
    n_iter = list_beta.shape[0]
    for i in np.arange(n_iter)[::-1]:
        supp = list_beta[i, :] != 0
        if not np.all(supp == supp_star):
            supp_id = i + 1
            break
        supp_id = 0

    return dataset_name, p_alpha, diff_beta, diff_jac, n_iter, supp_id
Exemplo n.º 12
0
def linear_cv(dataset_name, max_iter=1000, tol=1e-3, compute_jac=True):
    max_iter = max_iters[dataset_name]
    X, y = load_libsvm(dataset_name)
    X = X.tocsr()
    num_nonzeros = np.diff(X.indptr)
    X = X[num_nonzeros != 0]
    y = y[num_nonzeros != 0]
    n_samples, n_features = X.shape
    C = Cs[dataset_name]
    # Computation of dual solution of SVM via cvxopt

    clf = SDCAClassifier(
        alpha=1/(C * n_samples), loss='hinge', verbose=True, tol=1e-16,
        max_iter=max_iter)
    clf.fit(X, y)
    beta_star = np.abs(clf.dual_coef_[0])
    primal_star = np.sum(X.T.multiply(y * beta_star), axis=1)
    # full_supp = np.logical_and(beta_star > 0, beta_star < C)
    full_supp = np.logical_and(np.logical_not(np.isclose(beta_star, 0)), np.logical_not(np.isclose(beta_star, C)))
    # Q = (X.multiply(y[:, np.newaxis]))  @  (X.multiply(y[:, np.newaxis])).T
    yX = X.multiply(y[:, np.newaxis])
    yX = yX.tocsr()

    # TODO to optimize
    temp3 = np.zeros(n_samples)
    temp3[np.isclose(beta_star, C)] = np.ones(
        (np.isclose(beta_star, C)).sum()) * C
    # temp3 = temp3[full_supp]
    # import ipdb; ipdb.set_trace()
    v = temp3[full_supp] - yX[full_supp, :] @ (yX[np.isclose(beta_star, C), :].T @ temp3[np.isclose(beta_star, C)])
    # v = np.array((np.eye(n_samples, n_samples) - Q)[np.ix_(full_supp, np.isclose(beta_star, C))] @ (np.ones((np.isclose(beta_star, C)).sum()) * C))
    # v = np.squeeze(v)
    temp = yX[full_supp, :] @ yX[full_supp, :].T
    temp = csc_matrix(temp)
    # temp = temp[:, full_supp]
    # Q = csc_matrix(Q)
    print("size system to solve %i" % v.shape[0])
    jac_dense = cg(temp, v, tol=1e-12)
    jac_star = np.zeros(n_samples)
    jac_star[full_supp] = jac_dense[0]
    jac_star[np.isclose(beta_star, C)] = C
    primal_jac_star = np.sum(X.T.multiply(y * jac_star), axis=1)
    model = SVM(X, y, np.log(C), max_iter=max_iter, tol=tol)
    list_beta, list_jac = get_beta_jac_iterdiff(
        X, y, np.log(C), model, save_iterates=True, tol=1e-32,
        max_iter=max_iter, compute_jac=True)

    M = X.T @ (list_beta * y).T
    M_jac = X.T @ (list_jac * y).T
    diff_beta = norm(M - primal_star, axis=0)
    diff_jac = norm(M_jac - primal_jac_star, axis=0)
    full_supp_star = full_supp
    full_supp_star = np.logical_and(np.logical_not(np.isclose(list_beta[-1], 0)), np.logical_not(np.isclose(list_beta[-1], C)))
    n_iter = list_beta.shape[0]
    for i in np.arange(n_iter)[::-1]:
        full_supp = np.logical_and(np.logical_not(np.isclose(list_beta[i, :], 0)), np.logical_not(np.isclose(list_beta[i, :], C)))
        # import ipdb; ipdb.set_trace()
        if not np.all(full_supp == full_supp_star):
            supp_id = i + 1
            break
        supp_id = 0
    return dataset_name, C, diff_beta, diff_jac, n_iter, supp_id
Exemplo n.º 13
0
def get_beta_jac_t_v_implicit(
        X_train, y_train, log_alpha, X_val, y_val,
        mask0=None, dense0=None, jac0=None, tol=1e-3, model="lasso",
        sk=False, maxit=1000, sol_lin_sys=None, criterion="cv", n=1,
        sigma=0, delta=0, epsilon=0):
    alpha = np.exp(log_alpha)
    n_samples, n_features = X_train.shape
    # compute beta using sklearn lasso
    if sk:
        clf = Lasso(
            alpha=alpha, fit_intercept=False, warm_start=True, tol=tol,
            max_iter=10000)
        clf.fit(X, y)
        coef_ = clf.coef_
        mask = coef_ != 0
        dense = coef_[mask]
    # compute beta using vanilla numba cd lasso
    else:
        mask, dense = get_beta_jac_iterdiff(
            X_train, y_train, log_alpha, mask0=mask0, dense0=dense0,
            maxit=maxit, tol=tol,
            compute_jac=False, jac0=None)

    # v = 2 * X_val[:, mask].T @ (
    #     X_val[:, mask] @ dense - y_val) / X_val.shape[0]

    if criterion == "cv":
        v = 2 * X_val[:, mask].T @ (
            X_val[:, mask] @ dense - y_val) / X_val.shape[0]
    elif criterion == "sure":
        if n == 1:
            v = 2 * X_train[:, mask].T @ (
                X_train[:, mask] @dense -
                y_train - 2 * sigma ** 2 / epsilon * delta)
        elif n == 2:
            v = 2 * sigma ** 2 * X_train[:, mask].T @ delta / epsilon

    is_sparse = issparse(X_train)

    if not alpha.shape:
        alphas = np.ones(n_features) * alpha
    else:
        alphas = alpha.copy()

    if sol_lin_sys is not None:
        sol0 = init_dbeta0_new(sol_lin_sys, mask, mask0)
    else:
        size_mat = mask.sum()
        sol0 = np.zeros(size_mat)

    mat_to_inv = X_train[:, mask].T @ X_train[:, mask]
    size_mat = mask.sum()

    if is_sparse:
        try:
            # reg_amount = 1e-7 * norm(X_train[:, mask].todense(), ord=2) ** 2
            # mat_to_inv += reg_amount * identity(size_mat)
            sol = cg(
                mat_to_inv, - n_samples * v,
                # x0=sol0, tol=tol, maxiter=1e5)
                x0=sol0, tol=1e-15, maxiter=1e5)
                # x0=sol0, atol=1e-3)
            # sol = cg(
            #     mat_to_inv, - alpha * n_samples * v,
            #     x0=sol0, atol=1e-3)
            if sol[1] == 0:
                jac = sol[0]
            else:
                raise ValueError('cg did not converge.')
        except:
            print("Matrix to invert was badly conditioned")
            size_mat = mask.sum()
            reg_amount = 1e-7 * norm(X_train[:, mask].todense(), ord=2) ** 2
            sol = cg(
                mat_to_inv + reg_amount * identity(size_mat),
                - n_samples * v, x0=sol0,
                # - alpha * n_samples * v, x0=sol0,
                atol=1e-3)
            jac = sol[0]
    else:
        try:
            jac = solve(
                X_train[:, mask].T @ X_train[:, mask],
                - n_samples * v,
                sym_pos=True, assume_a='pos')
            # import ipdb; ipdb.set_trace()
        except:
            print("Matrix to invert was badly conditioned")
            size_mat = mask.sum()
            reg_amount = 1e-9 * norm(X_train[:, mask], ord=2) ** 2
            jac = solve(
                X_train[:, mask].T @ X_train[:, mask] +
                reg_amount * np.eye(size_mat),
                - n_samples * v,
                sym_pos=True, assume_a='pos')

    if model == "lasso":
        jac_t_v = alpha * np.sign(dense) @ jac
    elif model == "wlasso":
        jac_t_v = np.zeros(n_features)
        jac_t_v[mask] = alphas[mask] * np.sign(dense) * jac

    return mask, dense, jac_t_v, jac
Exemplo n.º 14
0
def get_beta_jac_t_v_implicit(X_train,
                              y_train,
                              log_alpha,
                              get_v,
                              mask0=None,
                              dense0=None,
                              tol=1e-3,
                              model="lasso",
                              sk=False,
                              max_iter=1000,
                              sol_lin_sys=None,
                              n=1,
                              sigma=0,
                              delta=0,
                              epsilon=0):
    alpha = np.exp(log_alpha)
    n_samples, n_features = X_train.shape

    mask, dense, _ = get_beta_jac_iterdiff(X_train,
                                           y_train,
                                           log_alpha,
                                           mask0=mask0,
                                           dense0=dense0,
                                           tol=tol,
                                           max_iter=max_iter,
                                           compute_jac=False,
                                           model=model)

    mat_to_inv = model.get_hessian(mask, dense, log_alpha)
    size_mat = mat_to_inv.shape[0]

    maskp, densep = model.get_primal(mask, dense)
    v = get_v(maskp, densep)

    # TODO: to clean
    is_sparse = issparse(X_train)
    if not alpha.shape:
        alphas = np.ones(n_features) * alpha
    else:
        alphas = alpha.copy()

    if sol_lin_sys is not None:
        sol0 = init_dbeta0_new(sol_lin_sys, mask, mask0)
    else:
        size_mat = mat_to_inv.shape[0]
        sol0 = np.zeros(size_mat)
    try:
        sol = cg(
            mat_to_inv,
            -model.restrict_full_supp(mask, dense, v),
            # x0=sol0, tol=tol, maxiter=1e5)
            x0=sol0,
            tol=tol)
        if sol[1] == 0:
            sol_lin_sys = sol[0]
        else:
            raise ValueError('cg did not converge.')
    except Exception:
        print("Matrix to invert was badly conditioned")
        size_mat = mat_to_inv.shape[0]
        if is_sparse:
            reg_amount = 1e-7 * norm(model.reduce_X(mask).todense(), ord=2)**2
            mat_to_inv += reg_amount * identity(size_mat)
        else:
            reg_amount = 1e-7 * norm(model.reduce_X(mask), ord=2)**2
            mat_to_inv += reg_amount * np.eye(size_mat)
        sol = cg(mat_to_inv + reg_amount * identity(size_mat),
                 -model.restrict_full_supp(mask, dense, v),
                 x0=sol0,
                 atol=1e-3)
        sol_lin_sys = sol[0]
    jac_t_v = model._get_jac_t_v(sol_lin_sys, mask, dense, alphas, v.copy())

    return mask, dense, jac_t_v, sol[0]
Exemplo n.º 15
0
def get_val_grad(X_train,
                 y_train,
                 log_alpha,
                 X_val,
                 y_val,
                 X_test,
                 y_test,
                 tol,
                 monitor,
                 warm_start,
                 method="implicit",
                 maxit=1000,
                 niter_jac=1000,
                 model="lasso",
                 tol_jac=1e-3,
                 convexify=False,
                 gamma=1e-2,
                 criterion="cv",
                 C=2.0,
                 gamma_sure=0.3,
                 sigma=1,
                 random_state=42,
                 beta_star=None):
    """
    Parameters
    --------------
    X_train: np.array, shape (n_samples, n_features)
        observation used for training
    y_train: np.array, shape (n_samples, n_features)
        targets used for training
    log_alpha: float
        log of the regularization coefficient alpha
    X_val: np.array, shape (n_samples, n_features)
        observation used for cross-validation
    y_val: np.array, shape (n_samples, n_features)
        targets used for cross-validation
    X_test: np.array, shape (n_samples, n_features)
        observation used for testing
    y_test: np.array, shape (n_samples, n_features)
        targets used for testing
    tol : float
        tolerance for the inner optimization solver
    monitor: Monitor object
        used to store the value of the cross-validation function
    warm_start: WarmStart object
        used for warm start for all methods
    method: string
        method used to compute the hypergradient, you may want to use
        "implicit" "forward" "backward" "fast_forward_iterdiff"
    maxit: int
        maximum number of iterations in the inner optimization solver
    niter_jac: int
        maximum number of iteration for the fast_forward_iterdiff
        method in the Jacobian computation
    model: string
        model used, "lasso", "wlasso", "mcp"
    tol_jac: float
        tolerance for the Jacobian loop
    convexify: bool
        True if you want to regularize the problem
    gamma: non negative float
        convexification coefficient
    criterion: string
        criterion to optimize during hyperparameter optimization
        you may choose between "cv" and "sure"
    C: float
        constant for sure problem
    gamma_sure:
        constant for sure problem
     sigma,
        constant for sure problem
    random_state: int
    beta_star: np.array, shape (n_features,)
        True coefficients of the underlying model (if known)
        used to compute metrics
    """

    n_samples, n_features = X_train.shape

    # warm start for cross validation loss and sure
    mask0, dense0, jac0, mask20, dense20, jac20 = (warm_start.mask_old,
                                                   warm_start.beta_old,
                                                   warm_start.dbeta_old,
                                                   warm_start.mask_old2,
                                                   warm_start.beta_old2,
                                                   warm_start.dbeta_old2)

    if criterion == "cv":
        mask2 = None
        dense2 = None
        jac2 = None
        rmse = None
        if method == "implicit":
            sol_lin_sys = warm_start.sol_lin_sys
            mask, dense, jac, sol_lin_sys = get_beta_jac_t_v_implicit(
                X_train,
                y_train,
                log_alpha,
                X_val,
                y_val,
                mask0=mask0,
                dense0=dense0,
                jac0=jac0,
                tol=tol,
                model=model)
            warm_start.set_sol_lin_sys(sol_lin_sys)
        elif method == "forward":
            mask, dense, jac = get_beta_jac_iterdiff(X_train,
                                                     y_train,
                                                     log_alpha,
                                                     mask0=mask0,
                                                     dense0=dense0,
                                                     jac0=jac0,
                                                     tol=tol,
                                                     maxit=maxit,
                                                     model=model)

        elif method == "implicit_forward":
            mask, dense, jac = get_beta_jac_fast_iterdiff(X_train,
                                                          y_train,
                                                          log_alpha,
                                                          X_val,
                                                          y_val,
                                                          mask0,
                                                          dense0,
                                                          jac0,
                                                          tol,
                                                          maxit,
                                                          niter_jac=niter_jac,
                                                          tol_jac=tol_jac,
                                                          model=model)
        elif method == "backward":
            mask, dense, jac = get_beta_jac_backward(X_train,
                                                     y_train,
                                                     log_alpha,
                                                     X_val,
                                                     y_val,
                                                     mask0=mask0,
                                                     dense0=dense0,
                                                     jac0=jac0,
                                                     tol=tol,
                                                     maxit=maxit)
        elif method == "hyperopt":
            mask, dense = get_beta_jac_iterdiff(X_train,
                                                y_train,
                                                log_alpha,
                                                mask0=mask0,
                                                dense0=dense0,
                                                tol=tol,
                                                maxit=maxit,
                                                model=model,
                                                compute_jac=False)
            jac = None
        else:
            raise ValueError('No method called %s' % method)
        # value of the objective function on the validation loss
        if convexify:
            val = norm(y_val - X_val[:, mask] @ dense)**2 / X_val.shape[0]
            val += gamma * np.sum(np.exp(log_alpha)**2)
        else:
            val = norm(y_val - X_val[:, mask] @ dense)**2 / X_val.shape[0]
        # value of the objective function on the test loss
        val_test = norm(y_test - X_test[:, mask] @ dense)**2 / X_test.shape[0]

        if method in ("implicit", "backward", "hyperopt"):
            grad = jac
        else:
            if model in ("lasso", "mcp"):
                grad = 2 * jac.T @ (X_val[:, mask].T @ (
                    X_val[:, mask] @ dense - y_val)) / X_val.shape[0]

            elif model == "wlasso":
                grad = np.zeros(n_features)
                grad[mask] = 2 * jac.T @ (X_val[:, mask].T @ (
                    X_val[:, mask] @ dense - y_val)) / X_val.shape[0]
                if convexify:
                    grad += gamma * np.exp(log_alpha)
    elif criterion == "sure":
        val_test = 0
        epsilon = C * sigma / (n_samples)**gamma_sure
        # TODO properly
        rng = np.random.RandomState(random_state)
        delta = rng.randn(n_samples)  # sample random noise for MCMC step
        y_train2 = y_train + epsilon * delta
        if method == "implicit":
            # TODO
            sol_lin_sys = warm_start.sol_lin_sys
            mask, dense, jac, sol_lin_sys = get_beta_jac_t_v_implicit(
                X_train,
                y_train,
                log_alpha,
                X_val,
                y_val,
                mask0=mask0,
                dense0=dense0,
                jac0=jac0,
                tol=tol,
                model=model,
                criterion="sure",
                n=1,
                sol_lin_sys=sol_lin_sys,
                sigma=sigma,
                epsilon=epsilon,
                delta=delta)
            sol_lin_sys2 = warm_start.sol_lin_sys2
            mask2, dense2, jac2, sol_lin_sys2 = get_beta_jac_t_v_implicit(
                X_train,
                y_train,
                log_alpha,
                X_val,
                y_val,
                mask0=mask20,
                dense0=dense20,
                jac0=jac20,
                tol=tol,
                model=model,
                criterion="sure",
                n=2,
                sol_lin_sys=sol_lin_sys2,
                sigma=sigma,
                epsilon=epsilon,
                delta=delta)
            warm_start.set_sol_lin_sys(sol_lin_sys)
            # 1 / 0
        elif method == "forward":
            mask, dense, jac = get_beta_jac_iterdiff(X_train,
                                                     y_train,
                                                     log_alpha,
                                                     mask0=mask0,
                                                     dense0=dense0,
                                                     jac0=jac0,
                                                     tol=tol,
                                                     maxit=maxit,
                                                     model=model)
            mask2, dense2, jac2 = get_beta_jac_iterdiff(X_train,
                                                        y_train2,
                                                        log_alpha,
                                                        mask0=mask20,
                                                        dense0=dense20,
                                                        jac0=jac20,
                                                        tol=tol,
                                                        maxit=maxit,
                                                        model=model)
        elif method == "implicit_forward":
            # TODO modify
            mask, dense, jac = get_beta_jac_fast_iterdiff(X_train,
                                                          y_train,
                                                          log_alpha,
                                                          None,
                                                          None,
                                                          mask0,
                                                          dense0,
                                                          jac0,
                                                          tol,
                                                          maxit,
                                                          criterion="sure",
                                                          niter_jac=niter_jac,
                                                          tol_jac=tol_jac,
                                                          model=model,
                                                          sigma=sigma,
                                                          epsilon=epsilon,
                                                          delta=delta,
                                                          n=1)
            mask2, dense2, jac2 = get_beta_jac_fast_iterdiff(
                X_train,
                y_train2,
                log_alpha,
                X_val,
                y_val,
                mask20,
                dense20,
                jac20,
                tol,
                maxit,
                criterion="sure",
                niter_jac=niter_jac,
                tol_jac=tol_jac,
                model=model,
                sigma=sigma,
                epsilon=epsilon,
                delta=delta,
                n=2)
        elif method == "backward":
            mask, dense, jac = get_beta_jac_backward(X_train,
                                                     y_train,
                                                     log_alpha,
                                                     X_val,
                                                     y_val,
                                                     mask0=mask0,
                                                     dense0=dense0,
                                                     jac0=jac0,
                                                     tol=tol,
                                                     maxit=maxit)
            mask2, dense2, jac2 = get_beta_jac_backward(X_train,
                                                        y_train2,
                                                        log_alpha,
                                                        X_val,
                                                        y_val,
                                                        mask0=mask02,
                                                        dense0=dense02,
                                                        jac0=jac02,
                                                        tol=tol,
                                                        maxit=maxit)
        elif method == "hyperopt":
            mask, dense = get_beta_jac_iterdiff(X_train,
                                                y_train,
                                                log_alpha,
                                                mask0=mask0,
                                                dense0=dense0,
                                                tol=tol,
                                                maxit=maxit,
                                                model=model,
                                                compute_jac=False)
            mask2, dense2 = get_beta_jac_iterdiff(X_train,
                                                  y_train2,
                                                  log_alpha,
                                                  mask0=mask20,
                                                  dense0=dense20,
                                                  tol=tol,
                                                  maxit=maxit,
                                                  model=model,
                                                  compute_jac=False)
            jac, jac2 = None, None

        # compute the degree of freedom
        dof = (X_train[:, mask2] @ dense2 - X_train[:, mask] @ dense) @ delta
        dof /= epsilon
        # compute the value of the sure
        val = norm(y_train - X_train[:, mask] @ dense)**2
        val -= n_samples * sigma**2
        val += 2 * sigma**2 * dof
        if convexify:
            val += gamma * np.sum(np.exp(log_alpha)**2)

        if beta_star is not None:
            diff_beta = beta_star.copy()
            diff_beta[mask] -= dense
            rmse = norm(diff_beta)
        else:
            rmse = None

        if method == "hyperopt":
            monitor(val, None, log_alpha, rmse=rmse)
            return val
        if method in ("implicit", "backward", "hyperopt"):
            grad = jac
        elif model == "lasso":
            grad = 2 * jac.T @ X_train[:, mask].T @ (
                X_train[:, mask] @ dense - y_train -
                delta * sigma**2 / epsilon)
            grad += (2 * sigma**2 * jac2.T @ X_train[:, mask2].T @ delta /
                     epsilon)
        elif model == "wlasso":
            grad = np.zeros(n_features)
            grad[mask] = 2 * jac.T @ X_train[:, mask].T @ (
                X_train[:, mask] @ dense - y_train -
                delta * sigma**2 / epsilon)
            grad[mask2] += (2 * sigma**2 *
                            jac2.T @ X_train[:, mask2].T @ delta / epsilon)
            if convexify:
                grad += gamma * np.exp(log_alpha)

    warm_start(mask, dense, jac, mask2, dense2, jac2)
    if model == "lasso":
        monitor(val, val_test, log_alpha, grad, rmse=rmse)
    elif model in ("mcp", "wlasso"):
        monitor(val, val_test, log_alpha.copy(), grad)
    else:
        monitor(val, val_test, log_alpha.copy(), rmse=rmse)
    if method == "hyperopt":
        return val
    else:
        return val, np.array(grad)
Exemplo n.º 16
0
def get_val_grid(
        X, y, log_alpha, X_val, y_val, X_test, y_test,
        tol, monitor, warm_start, mask0=None, dense0=None, maxit=1000,
        sk=False, criterion="cv", random_state=42,
        C=2.0, gamma_sure=0.3, sigma=1, beta_star=None):
    alpha = np.exp(log_alpha)
    n_samples, n_features = X.shape

    mask0, dense0, mask20, dense20 = (
        warm_start.mask_old, warm_start.beta_old, warm_start.mask_old2,
        warm_start.beta_old2)

    if criterion == "cv":
        mask2, dense2, rmse = None, None, None
        if sk:
            clf = Lasso(
                alpha=alpha, fit_intercept=False, warm_start=True, tol=tol)
            clf.fit(X, y)
            coef_ = clf.coef_
            mask = coef_ != 0
            dense = coef_[mask]
        else:
            mask, dense = get_beta_jac_iterdiff(
                X, y, log_alpha, mask0=mask0, dense0=dense0, maxit=maxit,
                tol=tol, compute_jac=False)
    elif criterion == "sure":
        val_test = 0
        epsilon = C * sigma / (n_samples) ** gamma_sure
        rng = np.random.RandomState(random_state)
        delta = rng.randn(n_samples)  # sample random noise for MCMC step
        y2 = y + epsilon * delta

        mask, dense = get_beta_jac_iterdiff(
            X, y, log_alpha, mask0=mask0, dense0=dense0, maxit=maxit,
            tol=tol, compute_jac=False)
        mask2, dense2 = get_beta_jac_iterdiff(
            X, y2, log_alpha, mask0=mask20, dense0=dense20, maxit=maxit,
            tol=tol, compute_jac=False)

    if criterion == "cv":
        val = norm(y_val - X_val[:, mask] @ dense) ** 2 / X_val.shape[0]
        val_test = norm(
            y_test - X_test[:, mask] @ dense) ** 2 / X_test.shape[0]
    elif criterion == "sure":
        val_test = 0
        dof = (X[:, mask2] @ dense2 - X[:, mask] @ dense) @ delta
        dof /= epsilon
        val = norm(y - X[:, mask] @ dense) ** 2
        val -= n_samples * sigma ** 2
        val += 2 * sigma ** 2 * dof

    warm_start(mask, dense, None, mask2, dense2, None)

    if beta_star is not None:
        diff_beta = beta_star.copy()
        diff_beta[mask] -= dense
        rmse = norm(diff_beta)

    monitor(val, val_test, log_alpha.copy(), rmse=rmse)

    return mask, dense
Exemplo n.º 17
0
 def get_val(self, log_alpha, tol=1e-3):
     # TODO add warm start
     mask, dense, _ = get_beta_jac_iterdiff(
         self.model.X, self.model.y, log_alpha, self.model, tol=tol, compute_jac=False)
     self.value_test(mask, dense)
     return self.value(mask, dense)
Exemplo n.º 18
0
def get_beta_jac_fast_iterdiff(X,
                               y,
                               log_alpha,
                               X_val,
                               y_val,
                               mask0=None,
                               dense0=None,
                               jac0=None,
                               tol=1e-3,
                               maxit=100,
                               niter_jac=1000,
                               tol_jac=1e-6,
                               model="lasso",
                               criterion="cv",
                               sigma=1,
                               epsilon=0.1,
                               delta=None,
                               n=1):
    n_samples, n_features = X.shape

    if model == "mcp":
        mask, dense = get_beta_jac_iterdiff(X,
                                            y,
                                            log_alpha,
                                            mask0=mask0,
                                            dense0=dense0,
                                            jac0=jac0,
                                            tol=tol,
                                            maxit=maxit,
                                            compute_jac=False,
                                            model="mcp")
    else:
        mask, dense = get_beta_jac_iterdiff(X,
                                            y,
                                            log_alpha,
                                            mask0=mask0,
                                            dense0=dense0,
                                            jac0=jac0,
                                            tol=tol,
                                            maxit=maxit,
                                            compute_jac=False,
                                            model="lasso")

    # TODO this is dirty, to improve and to jit
    size_mat = mask.sum()
    if model == "lasso":
        if jac0 is not None:
            dbeta0_new = init_dbeta0_new(jac0, mask, mask0)
        else:
            dbeta0_new = np.zeros(size_mat)
    elif model == "mcp":
        # TODO add warm start
        if jac0 is None:
            dbeta0_new = np.zeros((size_mat, 2))
        else:
            dbeta0_new = init_dbeta0_mcp(jac0, mask, mask0)
    else:
        if jac0 is None:
            dbeta0_new = np.zeros((size_mat, size_mat))
        else:
            dbeta0_new = init_dbeta0_new_p(jac0, mask, mask0)

    if criterion == "cv":
        v = 2 * X_val[:, mask].T @ (X_val[:, mask] @ dense -
                                    y_val) / X_val.shape[0]
    elif criterion == "sure":
        if n == 1:
            v = 2 * X[:, mask].T @ (X[:, mask] @ dense - y -
                                    2 * sigma**2 / epsilon * delta)
        elif n == 2:
            v = 2 * sigma**2 * X[:, mask].T @ delta / epsilon
    jac = get_only_jac(X[:, mask],
                       np.exp(log_alpha),
                       np.sign(dense),
                       v,
                       dbeta=dbeta0_new,
                       niter_jac=niter_jac,
                       tol_jac=tol_jac,
                       model=model,
                       mask=mask,
                       dense=dense)

    return mask, dense, jac