Exemplo n.º 1
0
def test_anova_sparse():
    X_sp = sp.csr_matrix(X)
    for m in (2, 3):
        dense = anova_kernel(X, P, degree=m)
        sparse = anova_kernel(X_sp, P, degree=m)
        assert_array_almost_equal(dense, sparse, err_msg=(
            "ANOVA kernel sparse != dense for degree {}".format(m)))
Exemplo n.º 2
0
def test_anova_sparse():
    X_sp = sp.csr_matrix(X)
    for m in (2, 3):
        dense = anova_kernel(X, P, degree=m)
        sparse = anova_kernel(X_sp, P, degree=m)
        assert_array_almost_equal(
            dense,
            sparse,
            err_msg=("ANOVA kernel sparse != dense for degree {}".format(m)))
Exemplo n.º 3
0
def _pbcd_epoch_slow(P, X, y, loss, regularizer, lams, degree, beta, gamma,
                     eta0, indices_feature, kernel):
    sum_viol = 0
    n_features = X.shape[1]
    for j in indices_feature:
        # compute prediction
        y_pred = _poly_predict(X, P.T, lams, kernel, degree=degree)

        # compute grad and inv_step_size
        x = X[:, j]
        notj_mask = np.arange(n_features) != j
        X_notj = X[:, notj_mask]
        P_notj = P[notj_mask]
        if kernel == "anova":
            grad_kernel = anova_kernel(P_notj.T, X_notj, degree=degree-1)
        else:
            grad_kernel = all_subsets_kernel(P_notj.T, X_notj)
        grad_kernel *= x # (n_components, n_samples)
        grad_y = grad_kernel * lams[:, None]
        l2_reg = beta
        inv_step_size = loss.mu * np.sum(grad_y*grad_y) + l2_reg

        dloss = loss.dloss(y_pred, y)
        step = np.sum(dloss*grad_y, axis=1) + l2_reg * P[j]
        step /= inv_step_size

        # update
        p_j_old = np.array(P[j])
        P[j] -= eta0 * step
        regularizer.prox_bcd(
            P, eta0*gamma/inv_step_size, degree, j
        )
        sum_viol += np.sum(np.abs(p_j_old - P[j]))

    return sum_viol
Exemplo n.º 4
0
def _pcd_epoch_slow(P, X, y, loss, regularizer, lams, degree, beta, gamma,
                    eta0, indices_component, indices_feature, kernel):
    sum_viol = 0
    n_features = X.shape[1]
    for s in indices_component:
        p_s = P[s]
        for j in indices_feature:
            # compute prediction
            y_pred = _poly_predict(X, P, lams, kernel, degree=degree)

            # compute grad and inv_step_size
            x = X[:, j]
            notj_mask = np.arange(n_features) != j
            X_notj = X[:, notj_mask]
            ps_notj = np.atleast_2d(p_s[notj_mask])
            if kernel == "anova":
                grad_kernel = anova_kernel(ps_notj, X_notj, degree=degree-1)
            else:
                grad_kernel = all_subsets_kernel(ps_notj, X_notj)
            grad_kernel *= x
            grad_y = lams[s] * grad_kernel.ravel()
            inv_step_size = loss.mu * np.dot(grad_y, grad_y) + beta

            dloss = loss.dloss(y_pred, y)
            step = np.dot(dloss, grad_y) + beta * p_s[j]
            step /= inv_step_size

            # update
            p_sj_new = regularizer.prox_cd(
                p_s[j]-eta0*step, p_s, eta0*gamma/inv_step_size, degree, j
            )
            sum_viol += np.abs(p_sj_new - P[s, j])
            P[s, j] = p_sj_new

    return sum_viol
Exemplo n.º 5
0
def test_anova():
    for m in (2, 3):
        expected = np.zeros((n_samples, n_bases))
        for i in range(n_samples):
            for j in range(n_bases):
                expected[i, j] = dumb_anova(X[i], P[j], degree=m)
        got = anova_kernel(X, P, degree=m)
        assert_array_almost_equal(got, expected, err_msg=(
            "ANOVA kernel incorrect for degree {}".format(m)))
Exemplo n.º 6
0
def _psgd_epoch_slow(P, w, X, y, loss, regularizer, lams, degree, alpha, beta,
                     gamma, indices_samples, fit_linear, eta0, learning_rate,
                     power_t, batch_size, it, kernel):
    n_samples = X.shape[0]
    n_features = X.shape[1]
    sum_loss = 0.0
    n_minibatches = math.ceil(n_samples / batch_size)
    for ii in range(n_minibatches):
        # pick a minibatch
        minibatch_indices = np.atleast_1d(
            indices_samples[ii * batch_size:(ii + 1) * batch_size])
        X_batch = X[minibatch_indices]
        y_batch = y[minibatch_indices]
        # compute prediction and loss
        y_pred_batch = _poly_predict(X_batch, P.T, lams, kernel, degree=degree)
        y_pred_batch += np.dot(X_batch, w)
        sum_loss += np.sum(
            loss.loss(np.atleast_1d(y_pred_batch), np.atleast_1d(y_batch)))

        # compute grad and inv_step_size
        dloss = loss.dloss(np.atleast_1d(y_pred_batch), np.atleast_1d(y_batch))
        grad_P = np.zeros(P.shape)  # (n_features, n_components)
        for j in range(n_features):
            notj_mask = np.arange(n_features) != j
            X_batch_notj = X_batch[:, notj_mask]
            P_notj = P[notj_mask]
            # grad_kernel: (n_components, n_samples)
            if kernel == "anova":
                grad_kernel = anova_kernel(P_notj.T,
                                           X_batch_notj,
                                           degree=degree - 1)
            else:
                grad_kernel = all_subsets_kernel(P_notj.T, X_batch_notj)
            grad_P[j] = np.dot(grad_kernel, dloss *
                               X_batch[:, j])  # (n_components, n_samples)
        grad_P *= lams
        grad_P /= len(minibatch_indices)

        eta_P, eta_w = _get_eta(learning_rate, eta0, alpha, beta, power_t, it)
        P -= eta_P * grad_P
        P /= (1.0 + eta_P * beta)
        # update
        regularizer.prox(
            P,
            eta_P * gamma / (1.0 + eta_P * beta),
            degree,
        )
        if fit_linear:
            grad_w = np.dot(X_batch.T, dloss) / len(minibatch_indices)
            w -= eta_w * grad_w
            w /= (1.0 + eta_w * alpha)
        it += 1

    return sum_loss, it
Exemplo n.º 7
0
def test_anova():
    for m in (2, 3):
        expected = np.zeros((n_samples, n_bases))
        for i in range(n_samples):
            for j in range(n_bases):
                expected[i, j] = dumb_anova(X[i], P[j], degree=m)
        got = anova_kernel(X, P, degree=m)
        assert_array_almost_equal(
            got,
            expected,
            err_msg=("ANOVA kernel incorrect for degree {}".format(m)))
Exemplo n.º 8
0
def _poly_predict(X, P, lams, kernel, degree=2):
    if kernel == "anova":
        K = anova_kernel(X, P, degree)
    elif kernel == "poly":
        K = homogeneous_kernel(X, P, degree)
    elif kernel == "all-subsets":
        K = all_subsets_kernel(X, P)
    else:
        raise ValueError(
            ("Unsuppported kernel: {}. Use one "
             "of {{'anova'|'poly'|'all-subsets'}}").format(kernel))
    return np.dot(K, lams)
Exemplo n.º 9
0
def test_anova_ignore_diag_equivalence():
    # predicting using anova kernel
    K = 2 * anova_kernel(X, P, degree=2)
    y_pred = np.dot(K, lams)

    # explicit
    Z = np.dot(P.T, (lams[:, np.newaxis] * P))
    y_manual = np.zeros_like(y_pred)
    for i in range(n_samples):
        x = X[i].ravel()
        xx = np.outer(x, x) - np.diag(x**2)
        y_manual[i] = np.trace(np.dot(Z.T, xx))

    assert_array_almost_equal(y_pred, y_manual)
Exemplo n.º 10
0
def test_anova_ignore_diag_equivalence():
    # predicting using anova kernel
    K = 2 * anova_kernel(X, P, degree=2)
    y_pred = np.dot(K, lams)

    # explicit
    Z = np.dot(P.T, (lams[:, np.newaxis] * P))
    y_manual = np.zeros_like(y_pred)
    for i in range(n_samples):
        x = X[i].ravel()
        xx = np.outer(x, x) - np.diag(x ** 2)
        y_manual[i] = np.trace(np.dot(Z.T, xx))

    assert_array_almost_equal(y_pred, y_manual)
Exemplo n.º 11
0
def test_predict():
    # predict with homogeneous kernel
    y_pred_poly = _poly_predict(X, P, lams, kernel='poly', degree=3)
    K = homogeneous_kernel(X, P, degree=3)
    y_pred = np.dot(K, lams)
    assert_array_almost_equal(y_pred_poly, y_pred,
                              err_msg="Homogeneous prediction incorrect.")

    # predict with homogeneous kernel
    y_pred_poly = _poly_predict(X, P, lams, kernel='anova', degree=3)
    K = anova_kernel(X, P, degree=3)
    y_pred = np.dot(K, lams)
    assert_array_almost_equal(y_pred_poly, y_pred,
                              err_msg="ANOVA prediction incorrect.")
Exemplo n.º 12
0
def test_predict():
    # predict with homogeneous kernel
    y_pred_poly = _poly_predict(X, P, lams, kernel='poly', degree=3)
    K = homogeneous_kernel(X, P, degree=3)
    y_pred = np.dot(K, lams)
    assert_array_almost_equal(y_pred_poly,
                              y_pred,
                              err_msg="Homogeneous prediction incorrect.")

    # predict with homogeneous kernel
    y_pred_poly = _poly_predict(X, P, lams, kernel='anova', degree=3)
    K = anova_kernel(X, P, degree=3)
    y_pred = np.dot(K, lams)
    assert_array_almost_equal(y_pred_poly,
                              y_pred,
                              err_msg="ANOVA prediction incorrect.")
def cd_direct_slow(X, y, lams=None, degree=2, n_components=5, beta=1.,
                   n_iter=10, tol=1e-5, verbose=False, random_state=None):
    from sklearn.utils import check_random_state
    from polylearn.kernels import anova_kernel

    n_samples, n_features = X.shape

    rng = check_random_state(random_state)
    P = 0.01 * rng.randn(n_components, n_features)
    if lams is None:
        lams = np.ones(n_components)

    K = anova_kernel(X, P, degree=degree)
    pred = np.dot(lams, K.T)

    mu = 1  # squared loss
    converged = False

    for i in range(n_iter):
        sum_viol = 0
        for s in range(n_components):
            ps = P[s]
            for j in range(n_features):

                # trivial approach:
                # multilinearity allows us to isolate the term with ps_j * x_j
                x = X[:, j]
                notj_mask = np.arange(n_features) != j
                X_notj = X[:, notj_mask]
                ps_notj = ps[notj_mask]

                if degree == 2:
                    grad_y = lams[s] * x * np.dot(X_notj, ps_notj)
                elif degree == 3:
                    grad_y = lams[s] * x * anova_kernel(np.atleast_2d(ps_notj),
                                                        X_notj, degree=2)
                else:
                    raise NotImplementedError("Degree > 3 not supported.")

                l1_reg = 2 * beta * np.abs(lams[s])
                inv_step_size = mu * (grad_y ** 2).sum() + l1_reg

                dloss = pred - y  # squared loss
                step = (dloss * grad_y).sum() + l1_reg * ps[j]
                step /= inv_step_size

                P[s, j] -= step
                sum_viol += np.abs(step)

                # stupidly recompute all predictions. No rush yet.
                K = anova_kernel(X, P, degree=degree)
                pred = np.dot(lams, K.T)

        reg_obj = beta * np.sum((P ** 2).sum(axis=1) * np.abs(lams))

        if verbose:
            print("Epoch", i, "violations", sum_viol, "obj",
                  0.5 * ((pred - y) ** 2).sum() + reg_obj)

        if sum_viol < tol:
            converged = True
            break

    if not converged:
        warnings.warn("Objective did not converge. Increase max_iter.")

    return P