Пример #1
0
def _pbcd_epoch_slow(P, X, y, loss, regularizer, lams, degree, beta, gamma,
                     eta0, indices_feature, kernel):
    sum_viol = 0
    n_features = X.shape[1]
    for j in indices_feature:
        # compute prediction
        y_pred = _poly_predict(X, P.T, lams, kernel, degree=degree)

        # compute grad and inv_step_size
        x = X[:, j]
        notj_mask = np.arange(n_features) != j
        X_notj = X[:, notj_mask]
        P_notj = P[notj_mask]
        if kernel == "anova":
            grad_kernel = anova_kernel(P_notj.T, X_notj, degree=degree-1)
        else:
            grad_kernel = all_subsets_kernel(P_notj.T, X_notj)
        grad_kernel *= x # (n_components, n_samples)
        grad_y = grad_kernel * lams[:, None]
        l2_reg = beta
        inv_step_size = loss.mu * np.sum(grad_y*grad_y) + l2_reg

        dloss = loss.dloss(y_pred, y)
        step = np.sum(dloss*grad_y, axis=1) + l2_reg * P[j]
        step /= inv_step_size

        # update
        p_j_old = np.array(P[j])
        P[j] -= eta0 * step
        regularizer.prox_bcd(
            P, eta0*gamma/inv_step_size, degree, j
        )
        sum_viol += np.sum(np.abs(p_j_old - P[j]))

    return sum_viol
Пример #2
0
def _pcd_epoch_slow(P, X, y, loss, regularizer, lams, degree, beta, gamma,
                    eta0, indices_component, indices_feature, kernel):
    sum_viol = 0
    n_features = X.shape[1]
    for s in indices_component:
        p_s = P[s]
        for j in indices_feature:
            # compute prediction
            y_pred = _poly_predict(X, P, lams, kernel, degree=degree)

            # compute grad and inv_step_size
            x = X[:, j]
            notj_mask = np.arange(n_features) != j
            X_notj = X[:, notj_mask]
            ps_notj = np.atleast_2d(p_s[notj_mask])
            if kernel == "anova":
                grad_kernel = anova_kernel(ps_notj, X_notj, degree=degree-1)
            else:
                grad_kernel = all_subsets_kernel(ps_notj, X_notj)
            grad_kernel *= x
            grad_y = lams[s] * grad_kernel.ravel()
            inv_step_size = loss.mu * np.dot(grad_y, grad_y) + beta

            dloss = loss.dloss(y_pred, y)
            step = np.dot(dloss, grad_y) + beta * p_s[j]
            step /= inv_step_size

            # update
            p_sj_new = regularizer.prox_cd(
                p_s[j]-eta0*step, p_s, eta0*gamma/inv_step_size, degree, j
            )
            sum_viol += np.abs(p_sj_new - P[s, j])
            P[s, j] = p_sj_new

    return sum_viol
Пример #3
0
def _psgd_epoch_slow(P, w, X, y, loss, regularizer, lams, degree, alpha, beta,
                     gamma, indices_samples, fit_linear, eta0, learning_rate,
                     power_t, batch_size, it, kernel):
    n_samples = X.shape[0]
    n_features = X.shape[1]
    sum_loss = 0.0
    n_minibatches = math.ceil(n_samples / batch_size)
    for ii in range(n_minibatches):
        # pick a minibatch
        minibatch_indices = np.atleast_1d(
            indices_samples[ii * batch_size:(ii + 1) * batch_size])
        X_batch = X[minibatch_indices]
        y_batch = y[minibatch_indices]
        # compute prediction and loss
        y_pred_batch = _poly_predict(X_batch, P.T, lams, kernel, degree=degree)
        y_pred_batch += np.dot(X_batch, w)
        sum_loss += np.sum(
            loss.loss(np.atleast_1d(y_pred_batch), np.atleast_1d(y_batch)))

        # compute grad and inv_step_size
        dloss = loss.dloss(np.atleast_1d(y_pred_batch), np.atleast_1d(y_batch))
        grad_P = np.zeros(P.shape)  # (n_features, n_components)
        for j in range(n_features):
            notj_mask = np.arange(n_features) != j
            X_batch_notj = X_batch[:, notj_mask]
            P_notj = P[notj_mask]
            # grad_kernel: (n_components, n_samples)
            if kernel == "anova":
                grad_kernel = anova_kernel(P_notj.T,
                                           X_batch_notj,
                                           degree=degree - 1)
            else:
                grad_kernel = all_subsets_kernel(P_notj.T, X_batch_notj)
            grad_P[j] = np.dot(grad_kernel, dloss *
                               X_batch[:, j])  # (n_components, n_samples)
        grad_P *= lams
        grad_P /= len(minibatch_indices)

        eta_P, eta_w = _get_eta(learning_rate, eta0, alpha, beta, power_t, it)
        P -= eta_P * grad_P
        P /= (1.0 + eta_P * beta)
        # update
        regularizer.prox(
            P,
            eta_P * gamma / (1.0 + eta_P * beta),
            degree,
        )
        if fit_linear:
            grad_w = np.dot(X_batch.T, dloss) / len(minibatch_indices)
            w -= eta_w * grad_w
            w /= (1.0 + eta_w * alpha)
        it += 1

    return sum_loss, it
Пример #4
0
def test_all_subsets_same_as_slow_reg(mean, loss, regularizer):
    y = _poly_predict(X, P, lams, kernel="all-subsets")
    reg = SparseAllSubsetsRegressor(
        n_components=n_components, beta=1, gamma=1e-3, regularizer=regularizer,
        warm_start=False, tol=1e-3, max_iter=5, random_state=0,
        mean=mean, shuffle=False, solver="pbcd") 
    with warnings.catch_warnings():
        warnings.simplefilter('ignore')
        reg.fit(X, y)
        P_fit_slow = pbcd_slow(
            X, y, loss=loss, regularizer=regularizer, lams=reg.lams_, 
            degree=-1, n_components=n_components, beta=1, gamma=1e-3,
            eta0=0.1, max_iter=5, tol=1e-3, random_state=0, mean=mean) 
    assert_array_almost_equal(reg.P_, P_fit_slow, decimal=4)
Пример #5
0
def test_fm_same_as_slow_clf(degree, batch_size, learning_rate, fit_linear,
                             loss, regularizer):

    y = _poly_predict(X, P, lams, kernel="anova", degree=degree)
    y = np.sign(y)

    reg = SparseFactorizationMachineClassifier(degree=degree,
                                               n_components=n_components,
                                               fit_lower=None,
                                               fit_linear=fit_linear,
                                               alpha=1e-3,
                                               beta=1e-3,
                                               gamma=0.0,
                                               regularizer=regularizer,
                                               learning_rate=learning_rate,
                                               eta0=0.01,
                                               warm_start=False,
                                               tol=1e-3,
                                               max_iter=10,
                                               random_state=0,
                                               shuffle=False,
                                               solver="psgd",
                                               batch_size=batch_size,
                                               verbose=0,
                                               loss=loss)
    with warnings.catch_warnings():
        warnings.simplefilter('ignore')
        reg.fit(X, y)
        P_fit_slow, w_fit_slow = psgd_slow(X,
                                           y,
                                           loss=loss,
                                           regularizer=regularizer,
                                           lams=reg.lams_,
                                           degree=degree,
                                           n_components=n_components,
                                           alpha=1e-3,
                                           beta=1e-3,
                                           gamma=0.0,
                                           learning_rate=learning_rate,
                                           eta0=0.01,
                                           shuffle=False,
                                           max_iter=10,
                                           tol=1e-3,
                                           random_state=0,
                                           fit_linear=fit_linear,
                                           batch_size=batch_size,
                                           verbose=0)
    assert_array_almost_equal(reg.P_[0, :, :], P_fit_slow, decimal=4)
    assert_array_almost_equal(reg.w_, w_fit_slow, decimal=4)
Пример #6
0
def test_fm_same_as_slow_reg(degree, mean, loss, regularizer):

    y = _poly_predict(X, P, lams, kernel="anova", degree=degree)

    reg = SparseFactorizationMachineRegressor(
        degree=degree, n_components=n_components, fit_lower=None,
        fit_linear=False, beta=1, gamma=1e-3, regularizer=regularizer,
        warm_start=False, tol=1e-3, max_iter=5, random_state=0,
        mean=mean, shuffle=False) 
    with warnings.catch_warnings():
        warnings.simplefilter('ignore')
        reg.fit(X, y)
        P_fit_slow = pcd_slow(
            X, y, loss=loss, regularizer=regularizer, lams=reg.lams_, 
            degree=degree, n_components=n_components, beta=1, gamma=1e-3,
            max_iter=5, tol=1e-3, random_state=0, mean=mean) 
    assert_array_almost_equal(reg.P_[0, :, :], P_fit_slow, decimal=4)