示例#1
0
def test_soft_thresholding():
    """Test soft_thresholding function."""
    # array
    array = np.arange(3)
    output = np.array([0, 0.5, 1.5])
    assert_array_equal(prox.soft_thresholding(array, .5), output)

    # matrix
    array = np.arange(9).reshape(3, 3)
    output = np.array([[0, 0, 1], [2, 3, 4], [5, 6, 7]])
    assert_array_equal(prox.soft_thresholding(array, 1), output)

    # tensor
    array = np.arange(27).reshape(3, 3, 3)
    output = array - 1
    output[0, 0, 0] = 0
    assert_array_equal(prox.soft_thresholding(array, 1), output)

    # tensor, lamda is a matrix
    array = np.arange(27).reshape(3, 3, 3)
    output = array - 1
    output[0, 0, 0] = 0
    output[1] -= 1
    output[2] -= 2
    assert_array_equal(
        prox.soft_thresholding(array,
                               np.arange(1, 4)[:, None, None]), output)
示例#2
0
def fit_each_variable(
    X,
    ix,
    alpha=1e-2,
    gamma=1e-3,
    tol=1e-3,
    max_iter=1000,
    verbose=0,
    return_history=True,
    compute_objective=True,
    return_n_iter=False,
    adjust_gamma=False,
):
    n, d = X.shape
    theta = np.zeros(d - 1) + 1e-15
    selector = [i for i in range(d) if i != ix]

    def gradient(X, theta, r, selector, n):
        XX = X[:, r].T.dot(X[:, selector])
        XXT = X[:, selector].T.dot(X[:, selector]).dot(theta)
        return -(1 / n) * XX + (1 / n) * XXT

    thetas = [theta]
    checks = []
    for iter_ in range(max_iter):
        theta_new = theta - gamma * gradient(X, theta, ix, selector, n)
        theta = soft_thresholding(theta_new, alpha * gamma)
        thetas.append(theta)

        check = convergence(
            iter=iter_,
            obj=objective(X, theta, n, ix, selector, alpha),
            iter_norm=np.linalg.norm(thetas[-2] - thetas[-1]),
            iter_r_norm=(np.linalg.norm(thetas[-2] - thetas[-1]) / np.linalg.norm(thetas[-1])),
        )
        checks.append(check)
        # if adjust_gamma: # TODO multiply or divide
        if verbose:
            print("Iter: %d, objective: %.4f, iter_norm %.4f" % (check[0], check[1], check[2]))

        if check[-2] < tol:
            break

    return_list = [thetas[-1]]
    if return_history:
        return_list.append(thetas)
        return_list.append(checks)
    if return_n_iter:
        return_list.append(iter_)

    return return_list
示例#3
0
def lasso(A, b, lamda=1.0, rho=1.0, alpha=1.0, max_iter=1000,
          tol=1e-4, rtol=1e-2, return_history=False):
    r"""Solves the following problem via ADMM:

        minimize 1/2*|| Ax - b ||_2^2 + \lambda || x ||_1

    Parameters
    ----------
    A : array-like, 2-dimensional
        Input matrix.
    b : array-like, 1-dimensional
        Output vector.
    lamda : float, optional
        Regularisation parameter.
    rho : float, optional
        Augmented Lagrangian parameter.
    alpha : float, optional
        Over-relaxation parameter (typically between 1.0 and 1.8).
    max_iter : int, optional
        Maximum number of iterations.
    tol : float, optional
        Absolute tolerance for convergence.
    rtol : float, optional
        Relative tolerance for convergence.
    return_history : bool, optional
        Return the history of computed values.

    Returns
    -------
    x : numpy.array
        Solution to the problem.
    history : list
        If return_history, then also a structure that contains the
        objective value, the primal and dual residual norms, and tolerances
        for the primal and dual residual norms at each iteration.
    """
    n_samples, n_features = A.shape

    # % save a matrix-vector multiply
    Atb = A.T.dot(b)

    # ADMM solver
    x = np.zeros(n_features)
    z = np.zeros(n_features)
    u = np.zeros(n_features)

    # % cache the factorization
    L, U = lu_factor(A, rho)

    hist = []
    for _ in range(max_iter):
        # % x-update
        q = Atb + rho * (z - u)  # % temporary value
        if n_samples >= n_features:
            x = np.linalg.lstsq(U, np.linalg.lstsq(L, q)[0])[0]
        else:
            x = q - A.T.dot(
                np.linalg.lstsq(
                    U, np.linalg.lstsq(
                        L, A.dot(q))[0])[0]) / rho
            x /= rho

        # % z-update with relaxation
        zold = z
        x_hat = alpha * x + (1 - alpha) * zold
        z = soft_thresholding(x_hat + u, lamda / rho)

        # % u-update
        u += (x_hat - z)

        # % diagnostics, reporting, termination checks
        history = (
            objective(A, b, lamda, x, z),  # obj

            np.linalg.norm(x - z),  # r norm
            np.linalg.norm(-rho * (z - zold)),  # s norm

            np.sqrt(n_features) * tol + rtol * max(
                np.linalg.norm(x), np.linalg.norm(-z)),  # eps pri
            np.sqrt(n_features) * tol + rtol * np.linalg.norm(rho * u)  # eps dual
        )

        hist.append(history)
        if history[1] < history[3] and history[2] < history[4]:
            break

    return z, history if return_history else z
示例#4
0
def lasso_kernel_admm(K,
                      y,
                      lamda=0.01,
                      rho=1.,
                      max_iter=100,
                      verbose=0,
                      rtol=1e-4,
                      tol=1e-4,
                      return_n_iter=True,
                      update_rho_options=None,
                      sample_weight=None):
    """Elastic Net kernel learning.

    Solve the following problem via ADMM:
        min sum_{i=1}^p 1/2 ||y_i - alpha_i * sum_{k=1}^{n_k} w_k * K_{ik}||^2
        + lamda ||w||_1 + beta sum_{j=1}^{c_i}||alpha_j||_2^2
    """
    n_kernels, n_samples, n_features = K.shape
    coef = np.ones(n_kernels)

    # alpha = [np.zeros(K[j].shape[2]) for j in range(n_patients)]
    # u = [np.zeros(K[j].shape[1]) for j in range(n_patients)]
    w_1 = coef.copy()
    u_1 = np.zeros(n_kernels)

    # x_old = [np.zeros(K[0].shape[1]) for j in range(n_patients)]
    w_1_old = w_1.copy()
    Y = y[:, None].dot(y[:, None].T)

    checks = []
    for iteration_ in range(max_iter):
        # update w
        KK = 2 * np.tensordot(K, K.T, axes=([1, 2], [0, 1]))
        yy = 2 * np.tensordot(Y, K, axes=([0, 1], [1, 2]))
        yy += rho * (w_1 - u_1)
        coef = _solve_cholesky_kernel(KK, yy[..., None], rho).ravel()

        w_1 = soft_thresholding(coef + u_1, lamda / rho)
        # w_2 = prox_laplacian(coef + u_2, beta / rho)

        u_1 += coef - w_1

        # diagnostics, reporting, termination checks
        rnorm = np.sqrt(squared_norm(coef - w_1))
        snorm = rho * np.sqrt(squared_norm(w_1 - w_1_old))

        obj = lasso_objective(Y, coef, K, w_1, lamda)
        check = convergence(
            obj=obj,
            rnorm=rnorm,
            snorm=snorm,
            e_pri=np.sqrt(coef.size) * tol + rtol *
            max(np.sqrt(squared_norm(coef)), np.sqrt(squared_norm(w_1))),
            e_dual=np.sqrt(coef.size) * tol + rtol * rho *
            (np.sqrt(squared_norm(u_1))))

        w_1_old = w_1.copy()

        if verbose:
            print("obj: %.4f, rnorm: %.4f, snorm: %.4f,"
                  "eps_pri: %.4f, eps_dual: %.4f" % check)

        checks.append(check)
        if check.rnorm <= check.e_pri and check.snorm <= check.e_dual and iteration_ > 1:
            break

        rho_new = update_rho(rho,
                             rnorm,
                             snorm,
                             iteration=iteration_,
                             **(update_rho_options or {}))
        # scaled dual variables should be also rescaled
        u_1 *= rho / rho_new
        rho = rho_new
    else:
        warnings.warn("Objective did not converge.")

    return_list = [coef]
    if return_n_iter:
        return_list.append(iteration_)
    return return_list
示例#5
0
def latent_time_matrix_decomposition(emp_cov,
                                     alpha=0.01,
                                     tau=1.,
                                     rho=1.,
                                     beta=1.,
                                     eta=1.,
                                     max_iter=100,
                                     verbose=False,
                                     psi='laplacian',
                                     phi='laplacian',
                                     mode='admm',
                                     tol=1e-4,
                                     rtol=1e-4,
                                     assume_centered=False,
                                     return_history=False,
                                     return_n_iter=True,
                                     update_rho_options=None,
                                     compute_objective=True):
    r"""Latent variable time-varying matrix decomposition solver.

    Solves the following problem via ADMM:
        min sum_{i=1}^T || S_i-(K_i-L_i)||^2 + alpha ||K_i||_{od,1}
            + tau ||L_i||_*
            + beta sum_{i=2}^T Psi(K_i - K_{i-1})
            + eta sum_{i=2}^T Phi(L_i - L_{i-1})

    where S is the matrix to decompose.

    Parameters
    ----------
    emp_cov : ndarray, shape (n_features, n_features)
        Matrix to decompose.
    alpha, tau, beta, eta : float, optional
        Regularisation parameters.
    rho : float, optional
        Augmented Lagrangian parameter.
    max_iter : int, optional
        Maximum number of iterations.
    tol : float, optional
        Absolute tolerance for convergence.
    rtol : float, optional
        Relative tolerance for convergence.
    return_history : bool, optional
        Return the history of computed values.

    Returns
    -------
    K, L : numpy.array, 3-dimensional (T x d x d)
        Solution to the problem for each time t=1...T .
    history : list
        If return_history, then also a structure that contains the
        objective value, the primal and dual residual norms, and tolerances
        for the primal and dual residual norms at each iteration.

    """
    psi, prox_psi, psi_node_penalty = check_norm_prox(psi)
    phi, prox_phi, phi_node_penalty = check_norm_prox(phi)

    Z_0 = np.zeros_like(emp_cov)
    Z_1 = np.zeros_like(Z_0)[:-1]
    Z_2 = np.zeros_like(Z_0)[1:]
    W_0 = np.zeros_like(Z_0)
    W_1 = np.zeros_like(Z_1)
    W_2 = np.zeros_like(Z_2)

    X_0 = np.zeros_like(Z_0)
    X_1 = np.zeros_like(Z_1)
    X_2 = np.zeros_like(Z_2)
    U_1 = np.zeros_like(W_1)
    U_2 = np.zeros_like(W_2)

    R_old = np.zeros_like(Z_0)
    Z_1_old = np.zeros_like(Z_1)
    Z_2_old = np.zeros_like(Z_2)
    W_1_old = np.zeros_like(W_1)
    W_2_old = np.zeros_like(W_2)

    # divisor for consensus variables, accounting for two less matrices
    divisor = np.full(emp_cov.shape[0], 3, dtype=float)
    divisor[0] -= 1
    divisor[-1] -= 1

    checks = []
    for iteration_ in range(max_iter):
        # update R
        A = Z_0 - W_0 - X_0
        R = (rho * A + 2 * emp_cov) / (2 + rho)

        # update Z_0
        A = R + W_0 + X_0
        A[:-1] += Z_1 - X_1
        A[1:] += Z_2 - X_2
        A /= divisor[:, None, None]
        # soft_thresholding_ = partial(soft_thresholding, lamda=alpha / rho)
        # Z_0 = np.array(map(soft_thresholding_, A))
        Z_0 = soft_thresholding(A,
                                lamda=alpha / (rho * divisor[:, None, None]))

        # update Z_1, Z_2
        A_1 = Z_0[:-1] + X_1
        A_2 = Z_0[1:] + X_2
        if not psi_node_penalty:
            prox_e = prox_psi(A_2 - A_1, lamda=2. * beta / rho)
            Z_1 = .5 * (A_1 + A_2 - prox_e)
            Z_2 = .5 * (A_1 + A_2 + prox_e)
        else:
            Z_1, Z_2 = prox_psi(np.concatenate((A_1, A_2), axis=1),
                                lamda=.5 * beta / rho,
                                rho=rho,
                                tol=tol,
                                rtol=rtol,
                                max_iter=max_iter)

        # update W_0
        A = Z_0 - R - X_0
        A[:-1] += W_1 - U_1
        A[1:] += W_2 - U_2
        A /= divisor[:, None, None]
        A += A.transpose(0, 2, 1)
        A /= 2.

        W_0 = np.array([
            prox_trace_indicator(a, lamda=tau / (rho * div))
            for a, div in zip(A, divisor)
        ])

        # update W_1, W_2
        A_1 = W_0[:-1] + U_1
        A_2 = W_0[1:] + U_2
        if not phi_node_penalty:
            prox_e = prox_phi(A_2 - A_1, lamda=2. * eta / rho)
            W_1 = .5 * (A_1 + A_2 - prox_e)
            W_2 = .5 * (A_1 + A_2 + prox_e)
        else:
            W_1, W_2 = prox_phi(np.concatenate((A_1, A_2), axis=1),
                                lamda=.5 * eta / rho,
                                rho=rho,
                                tol=tol,
                                rtol=rtol,
                                max_iter=max_iter)

        # update residuals
        X_0 += R - Z_0 + W_0
        X_1 += Z_0[:-1] - Z_1
        X_2 += Z_0[1:] - Z_2
        U_1 += W_0[:-1] - W_1
        U_2 += W_0[1:] - W_2

        # diagnostics, reporting, termination checks
        rnorm = np.sqrt(
            squared_norm(R - Z_0 + W_0) + squared_norm(Z_0[:-1] - Z_1) +
            squared_norm(Z_0[1:] - Z_2) + squared_norm(W_0[:-1] - W_1) +
            squared_norm(W_0[1:] - W_2))

        snorm = rho * np.sqrt(
            squared_norm(R - R_old) + squared_norm(Z_1 - Z_1_old) +
            squared_norm(Z_2 - Z_2_old) + squared_norm(W_1 - W_1_old) +
            squared_norm(W_2 - W_2_old))

        obj = objective(emp_cov, R, Z_0, Z_1, Z_2, W_0, W_1, W_2,
                        alpha, tau, beta, eta, psi, phi) \
            if compute_objective else np.nan

        check = convergence(
            obj=obj,
            rnorm=rnorm,
            snorm=snorm,
            e_pri=np.sqrt(R.size + 4 * Z_1.size) * tol + rtol * max(
                np.sqrt(
                    squared_norm(R) + squared_norm(Z_1) + squared_norm(Z_2) +
                    squared_norm(W_1) + squared_norm(W_2)),
                np.sqrt(
                    squared_norm(Z_0 - W_0) + squared_norm(Z_0[:-1]) +
                    squared_norm(Z_0[1:]) + squared_norm(W_0[:-1]) +
                    squared_norm(W_0[1:]))),
            e_dual=np.sqrt(R.size + 4 * Z_1.size) * tol + rtol * rho *
            (np.sqrt(
                squared_norm(X_0) + squared_norm(X_1) + squared_norm(X_2) +
                squared_norm(U_1) + squared_norm(U_2))))

        R_old = R.copy()
        Z_1_old = Z_1.copy()
        Z_2_old = Z_2.copy()
        W_1_old = W_1.copy()
        W_2_old = W_2.copy()

        if verbose:
            print("obj: %.4f, rnorm: %.4f, snorm: %.4f,"
                  "eps_pri: %.4f, eps_dual: %.4f" % check)

        checks.append(check)
        if check.rnorm <= check.e_pri and check.snorm <= check.e_dual:
            break

        rho_new = update_rho(rho,
                             rnorm,
                             snorm,
                             iteration=iteration_,
                             **(update_rho_options or {}))
        # scaled dual variables should be also rescaled
        X_0 *= rho / rho_new
        X_1 *= rho / rho_new
        X_2 *= rho / rho_new
        U_1 *= rho / rho_new
        U_2 *= rho / rho_new
        rho = rho_new
    else:
        warnings.warn("Objective did not converge.")

    return_list = [Z_0, W_0]
    if return_history:
        return_list.append(checks)
    if return_n_iter:
        return_list.append(iteration_)
    return return_list
示例#6
0
def time_graphical_lasso(
    emp_cov,
    alpha=0.01,
    rho=1,
    beta=1,
    max_iter=100,
    n_samples=None,
    verbose=False,
    psi="laplacian",
    tol=1e-4,
    rtol=1e-4,
    return_history=False,
    return_n_iter=True,
    mode="admm",
    compute_objective=True,
    stop_at=None,
    stop_when=1e-4,
    update_rho_options=None,
    init="empirical",
):
    """Time-varying graphical lasso solver.

    Solves the following problem via ADMM:
        min sum_{i=1}^T -n_i log_likelihood(S_i, K_i) + alpha*||K_i||_{od,1}
            + beta sum_{i=2}^T Psi(K_i - K_{i-1})

    where S_i = (1/n_i) X_i^T \times X_i is the empirical covariance of data
    matrix X (training observations by features).

    Parameters
    ----------
    emp_cov : ndarray, shape (n_features, n_features)
        Empirical covariance of data.
    alpha, beta : float, optional
        Regularisation parameter.
    rho : float, optional
        Augmented Lagrangian parameter.
    max_iter : int, optional
        Maximum number of iterations.
    n_samples : ndarray
        Number of samples available for each time point.
    tol : float, optional
        Absolute tolerance for convergence.
    rtol : float, optional
        Relative tolerance for convergence.
    return_history : bool, optional
        Return the history of computed values.
    return_n_iter : bool, optional
        Return the number of iteration before convergence.
    verbose : bool, default False
        Print info at each iteration.
    update_rho_options : dict, optional
        Arguments for the rho update.
        See regain.update_rules.update_rho function for more information.
    compute_objective : bool, default True
        Choose to compute the objective value.
    init : {'empirical', 'zero', ndarray}
        Choose how to initialize the precision matrix, with the inverse
        empirical covariance, zero matrix or precomputed.

    Returns
    -------
    K : numpy.array, 3-dimensional (T x d x d)
        Solution to the problem for each time t=1...T .
    history : list
        If return_history, then also a structure that contains the
        objective value, the primal and dual residual norms, and tolerances
        for the primal and dual residual norms at each iteration.

    """
    psi, prox_psi, psi_node_penalty = check_norm_prox(psi)

    Z_0 = init_precision(emp_cov, mode=init)
    Z_1 = Z_0.copy()[:-1]  # np.zeros_like(emp_cov)[:-1]
    Z_2 = Z_0.copy()[1:]  # np.zeros_like(emp_cov)[1:]

    U_0 = np.zeros_like(Z_0)
    U_1 = np.zeros_like(Z_1)
    U_2 = np.zeros_like(Z_2)

    Z_0_old = np.zeros_like(Z_0)
    Z_1_old = np.zeros_like(Z_1)
    Z_2_old = np.zeros_like(Z_2)

    # divisor for consensus variables, accounting for two less matrices
    divisor = np.full(emp_cov.shape[0], 3, dtype=float)
    divisor[0] -= 1
    divisor[-1] -= 1

    if n_samples is None:
        n_samples = np.ones(emp_cov.shape[0])

    checks = [convergence(obj=objective(n_samples, emp_cov, Z_0, Z_0, Z_1, Z_2, alpha, beta, psi))]
    for iteration_ in range(max_iter):
        # update K
        A = Z_0 - U_0
        A[:-1] += Z_1 - U_1
        A[1:] += Z_2 - U_2
        A /= divisor[:, None, None]
        # soft_thresholding_ = partial(soft_thresholding, lamda=alpha / rho)
        # K = np.array(map(soft_thresholding_, A))
        A += A.transpose(0, 2, 1)
        A /= 2.0

        A *= -rho * divisor[:, None, None] / n_samples[:, None, None]
        A += emp_cov

        K = np.array([prox_logdet(a, lamda=ni / (rho * div)) for a, div, ni in zip(A, divisor, n_samples)])

        # update Z_0
        A = K + U_0
        A += A.transpose(0, 2, 1)
        A /= 2.0
        Z_0 = soft_thresholding(A, lamda=alpha / rho)

        # other Zs
        A_1 = K[:-1] + U_1
        A_2 = K[1:] + U_2
        if not psi_node_penalty:
            prox_e = prox_psi(A_2 - A_1, lamda=2.0 * beta / rho)
            Z_1 = 0.5 * (A_1 + A_2 - prox_e)
            Z_2 = 0.5 * (A_1 + A_2 + prox_e)
        else:
            Z_1, Z_2 = prox_psi(
                np.concatenate((A_1, A_2), axis=1),
                lamda=0.5 * beta / rho,
                rho=rho,
                tol=tol,
                rtol=rtol,
                max_iter=max_iter,
            )

        # update residuals
        U_0 += K - Z_0
        U_1 += K[:-1] - Z_1
        U_2 += K[1:] - Z_2

        # diagnostics, reporting, termination checks
        rnorm = np.sqrt(squared_norm(K - Z_0) + squared_norm(K[:-1] - Z_1) + squared_norm(K[1:] - Z_2))

        snorm = rho * np.sqrt(squared_norm(Z_0 - Z_0_old) + squared_norm(Z_1 - Z_1_old) + squared_norm(Z_2 - Z_2_old))

        obj = objective(n_samples, emp_cov, Z_0, K, Z_1, Z_2, alpha, beta, psi) if compute_objective else np.nan

        # if np.isinf(obj):
        #     Z_0 = Z_0_old
        #     break

        check = convergence(
            obj=obj,
            rnorm=rnorm,
            snorm=snorm,
            e_pri=np.sqrt(K.size + 2 * Z_1.size) * tol
            + rtol
            * max(
                np.sqrt(squared_norm(Z_0) + squared_norm(Z_1) + squared_norm(Z_2)),
                np.sqrt(squared_norm(K) + squared_norm(K[:-1]) + squared_norm(K[1:])),
            ),
            e_dual=np.sqrt(K.size + 2 * Z_1.size) * tol
            + rtol * rho * np.sqrt(squared_norm(U_0) + squared_norm(U_1) + squared_norm(U_2)),
            # precision=Z_0.copy()
        )
        Z_0_old = Z_0.copy()
        Z_1_old = Z_1.copy()
        Z_2_old = Z_2.copy()

        if verbose:
            print("obj: %.4f, rnorm: %.4f, snorm: %.4f," "eps_pri: %.4f, eps_dual: %.4f" % check[:5])

        checks.append(check)
        if stop_at is not None:
            if abs(check.obj - stop_at) / abs(stop_at) < stop_when:
                break

        if check.rnorm <= check.e_pri and check.snorm <= check.e_dual:
            break

        rho_new = update_rho(rho, rnorm, snorm, iteration=iteration_, **(update_rho_options or {}))
        # scaled dual variables should be also rescaled
        U_0 *= rho / rho_new
        U_1 *= rho / rho_new
        U_2 *= rho / rho_new
        rho = rho_new

        # assert is_pos_def(Z_0)
    else:
        warnings.warn("Objective did not converge.")

    covariance_ = np.array([linalg.pinvh(x) for x in Z_0])
    return_list = [Z_0, covariance_]
    if return_history:
        return_list.append(checks)
    if return_n_iter:
        return_list.append(iteration_ + 1)
    return return_list
示例#7
0
def logistic_alternating(K, y, lamda=0.01, beta=0.01, gamma=.5,
                         max_iter=100, l1_ratio_lamda=0.1, l1_ratio_beta=0.1,
                         deep=True, verbose=0, tol=1e-4, return_n_iter=True,
                         fit_intercept=True, lr_p2=None):
    # multiple patient
    n_patients = len(K)
    n_kernels = len(K[0])
    coef = np.ones(n_kernels)
    alpha = [np.zeros(K[j].shape[2]) for j in range(n_patients)]
    objective_new = 0

    max_iter_deep = max_iter // 3 if deep else 1

    if lr_p2 is None:
        raise ValueError("lr_p2 cant be None")

    for iteration_ in range(max_iter):
        w_old = coef.copy()
        alpha_old = [a.copy() for a in alpha]
        objective_old = objective_new

        for i in range(n_patients):
            lr_p2[i].fit(np.tensordot(coef, K[i], axes=1), y[i])

        alpha = [log.coef_.ravel() for log in lr_p2]
        intercepts = [log.intercept_.ravel() for log in lr_p2]
        alpha_intercept = [np.hstack((a, c)) for a, c in zip(alpha, intercepts)]

        # X = np.tensordot(alpha, K, axes=([0], [2])).T
        # X = sum(K[j].dot(alpha[j]).T for j in range(n_patients))
        # coef = lr_p1.fit(X, y).coef_.ravel()

        for it in range(max_iter_deep):
            coef_old = coef.copy()

            l2_reg = beta * (1 - l1_ratio_beta)
            loss, gradient = _logistic_loss_and_grad(
                coef, alpha_intercept, K, y, l2_reg)
            l1_reg = beta * l1_ratio_beta
            coef = soft_thresholding(coef - gamma * gradient, gamma * l1_reg)
            coef = np.maximum(coef, 0.)

            if np.linalg.norm(coef - coef_old) < tol:
                break

        obj = logistic_objective(K, y, alpha, coef, lamda, beta)
        objective_difference = abs(objective_new - objective_old)
        # snorm = np.sqrt(squared_norm(coef - w_old) +
        #                 squared_norm(alpha - alpha_old))

        diff_w = np.linalg.norm(coef - w_old)
        diff_a = np.sqrt(
            sum(squared_norm(a - a_old) for a, a_old in zip(alpha, alpha_old)))

        if verbose:# and iteration_ % 10 == 0:
            # print("obj: %.4f, snorm: %.4f" % (obj, snorm))
            print("obj: %.4f, loss: %.4f, diff_w: %.4f, diff_a: %.4f" % (
                obj, logistic_loss(K, y, alpha, coef, lamda, beta), diff_w,
                diff_a))

        if diff_a < tol and objective_difference < tol:
            break
        if np.isnan(diff_w) or np.isnan(diff_a) or np.isnan(objective_difference):
            raise ValueError('something is nan')
    else:
        warnings.warn("Objective did not converge.")
    return_list = [alpha, coef, intercepts]
    if return_n_iter:
        return_list.append(iteration_)
    return return_list
示例#8
0
def enet_kernel_learning_admm2(
        K, y, lamda=0.01, beta=0.01, rho=1., max_iter=100, verbose=0, rtol=1e-4,
        tol=1e-4, return_n_iter=True, update_rho_options=None):
    """Elastic Net kernel learning.

    Solve the following problem via ADMM:
        min sum_{i=1}^p 1/2 ||y_i - alpha_i * sum_{k=1}^{n_k} w_k * K_{ik}||^2
        + lamda ||w||_1 + beta sum_{j=1}^{c_i}||alpha_j||_2^2
    """
    n_patients = len(K)
    n_kernels = len(K[0])
    coef = np.ones(n_kernels)
    alpha = [np.zeros(K[j].shape[2]) for j in range(n_patients)]

    u = [np.zeros(K[j].shape[1]) for j in range(n_patients)]
    u_1 = np.zeros(n_kernels)
    w_1 = np.zeros(n_kernels)

    x_old = [np.zeros(K[0].shape[1]) for j in range(n_patients)]
    w_1_old = w_1.copy()
    # w_2_old = w_2.copy()

    checks = []
    for iteration_ in range(max_iter):
        # update x
        A = [K[j].T.dot(coef) for j in range(n_patients)]
        x = [prox_laplacian(y[j] + rho * (A[j].T.dot(alpha[j]) - u[j]), rho / 2.)
             for j in range(n_patients)]

        # update alpha
        # solve (AtA + 2I)^-1 (Aty) with A = wK
        KK = [rho * A[j].dot(A[j].T) for j in range(n_patients)]
        yy = [rho * A[j].dot(x[j] + u[j]) for j in range(n_patients)]
        alpha = [_solve_cholesky_kernel(
            KK[j], yy[j][..., None], 2 * beta).ravel() for j in range(n_patients)]
        # equivalent to alpha_dot_K
        # solve (sum(AtA) + 2*rho I)^-1 (sum(Aty) + rho(w1+w2-u1-u2))
        # with A = K * alpha
        A = [K[j].dot(alpha[j]) for j in range(n_patients)]
        KK = sum(A[j].dot(A[j].T) for j in range(n_patients))
        yy = sum(A[j].dot(x[j] + u[j]) for j in range(n_patients))
        yy += w_1 - u_1
        coef = _solve_cholesky_kernel(KK, yy[..., None], 1).ravel()

        w_1 = soft_thresholding(coef + u_1, lamda / rho)
        # w_2 = prox_laplacian(coef + u_2, beta / rho)

        # update residuals
        alpha_coef_K = [
            alpha[j].dot(K[j].T.dot(coef)) for j in range(n_patients)]
        residuals = [x[j] - alpha_coef_K[j] for j in range(n_patients)]
        u = [u[j] + residuals[j] for j in range(n_patients)]
        u_1 += coef - w_1

        # diagnostics, reporting, termination checks
        rnorm = np.sqrt(
            squared_norm(coef - w_1) +
            sum(squared_norm(residuals[j]) for j in range(n_patients)))
        snorm = rho * np.sqrt(
            squared_norm(w_1 - w_1_old) +
            sum(squared_norm(x[j] - x_old[j]) for j in range(n_patients)))

        obj = objective_admm2(x, y, alpha, lamda, beta, w_1)
        check = convergence(
            obj=obj, rnorm=rnorm, snorm=snorm,
            e_pri=np.sqrt(coef.size + sum(
                x[j].size for j in range(n_patients))) * tol + rtol * max(
                    np.sqrt(squared_norm(coef) + sum(squared_norm(
                        alpha_coef_K[j]) for j in range(n_patients))),
                    np.sqrt(squared_norm(w_1) + sum(squared_norm(
                        x[j]) for j in range(n_patients)))),
            e_dual=np.sqrt(coef.size + sum(
                x[j].size for j in range(n_patients))) * tol + rtol * rho * (
                    np.sqrt(squared_norm(u_1) + sum(squared_norm(
                        u[j]) for j in range(n_patients)))))

        w_1_old = w_1.copy()
        x_old = [x[j].copy() for j in range(n_patients)]

        if verbose:
            print("obj: %.4f, rnorm: %.4f, snorm: %.4f,"
                  "eps_pri: %.4f, eps_dual: %.4f" % check)

        checks.append(check)
        if check.rnorm <= check.e_pri and check.snorm <= check.e_dual and iteration_ > 1:
            break

        rho_new = update_rho(rho, rnorm, snorm, iteration=iteration_,
                             **(update_rho_options or {}))
        # scaled dual variables should be also rescaled
        u = [u[j] * (rho / rho_new) for j in range(n_patients)]
        u_1 *= rho / rho_new
        rho = rho_new
    else:
        warnings.warn("Objective did not converge.")

    return_list = [alpha, coef]
    if return_n_iter:
        return_list.append(iteration_)
    return return_list
def kernel_latent_time_graphical_lasso(
    emp_cov,
    alpha=0.01,
    tau=1.0,
    rho=1.0,
    kernel_psi=None,
    kernel_phi=None,
    max_iter=100,
    verbose=False,
    psi="laplacian",
    phi="laplacian",
    mode="admm",
    tol=1e-4,
    rtol=1e-4,
    assume_centered=False,
    n_samples=None,
    return_history=False,
    return_n_iter=True,
    update_rho_options=None,
    compute_objective=True,
    init="empirical",
):
    r"""Time-varying latent variable graphical lasso solver.

    Solves the following problem via ADMM:
        min sum_{i=1}^T -n_i log_likelihood(K_i-L_i) + alpha ||K_i||_{od,1}
            + tau ||L_i||_*
            + sum_{s>t}^T k_psi(s,t) Psi(K_s - K_t)
            + sum_{s>t}^T k_phi(s,t)(L_s - L_t)

    where S is the empirical covariance of the data
    matrix D (training observations by features).

    Parameters
    ----------
    emp_cov : ndarray, shape (n_features, n_features)
        Empirical covariance of data.
    alpha, tau, beta, eta : float, optional
        Regularisation parameters.
    rho : float, optional
        Augmented Lagrangian parameter.
    max_iter : int, optional
        Maximum number of iterations.
    tol : float, optional
        Absolute tolerance for convergence.
    rtol : float, optional
        Relative tolerance for convergence.
    return_history : bool, optional
        Return the history of computed values.

    Returns
    -------
    K, L : numpy.array, 3-dimensional (T x d x d)
        Solution to the problem for each time t=1...T .
    history : list
        If return_history, then also a structure that contains the
        objective value, the primal and dual residual norms, and tolerances
        for the primal and dual residual norms at each iteration.

    """
    psi, prox_psi, psi_node_penalty = check_norm_prox(psi)
    phi, prox_phi, phi_node_penalty = check_norm_prox(phi)
    n_times, _, n_features = emp_cov.shape

    if kernel_psi is None:
        kernel_psi = np.eye(n_times)
    if kernel_phi is None:
        kernel_phi = np.eye(n_times)

    Z_0 = init_precision(emp_cov, mode=init)
    W_0 = np.zeros_like(Z_0)
    X_0 = np.zeros_like(Z_0)
    R_old = np.zeros_like(Z_0)

    Z_M, Z_M_old = {}, {}
    Y_M = {}
    W_M, W_M_old = {}, {}
    U_M = {}
    for m in range(1, n_times):
        Z_L = Z_0.copy()[:-m]
        Z_R = Z_0.copy()[m:]
        Z_M[m] = (Z_L, Z_R)

        W_L = np.zeros_like(Z_L)
        W_R = np.zeros_like(Z_R)
        W_M[m] = (W_L, W_R)

        Y_L = np.zeros_like(Z_L)
        Y_R = np.zeros_like(Z_R)
        Y_M[m] = (Y_L, Y_R)

        U_L = np.zeros_like(W_L)
        U_R = np.zeros_like(W_R)
        U_M[m] = (U_L, U_R)

        Z_L_old = np.zeros_like(Z_L)
        Z_R_old = np.zeros_like(Z_R)
        Z_M_old[m] = (Z_L_old, Z_R_old)

        W_L_old = np.zeros_like(W_L)
        W_R_old = np.zeros_like(W_R)
        W_M_old[m] = (W_L_old, W_R_old)

    if n_samples is None:
        n_samples = np.ones(n_times)

    checks = []
    for iteration_ in range(max_iter):
        # update R
        A = Z_0 - W_0 - X_0
        A += A.transpose(0, 2, 1)
        A /= 2.0
        A *= -rho / n_samples[:, None, None]
        A += emp_cov
        # A = emp_cov / rho - A

        R = np.array(
            [prox_logdet(a, lamda=ni / rho) for a, ni in zip(A, n_samples)])

        # update Z_0
        A = R + W_0 + X_0
        for m in range(1, n_times):
            A[:-m] += Z_M[m][0] - Y_M[m][0]
            A[m:] += Z_M[m][1] - Y_M[m][1]

        A /= n_times
        Z_0 = soft_thresholding(A, lamda=alpha / (rho * n_times))

        # update W_0
        A = Z_0 - R - X_0
        for m in range(1, n_times):
            A[:-m] += W_M[m][0] - U_M[m][0]
            A[m:] += W_M[m][1] - U_M[m][1]

        A /= n_times
        A += A.transpose(0, 2, 1)
        A /= 2.0

        W_0 = np.array(
            [prox_trace_indicator(a, lamda=tau / (rho * n_times)) for a in A])

        # update residuals
        X_0 += R - Z_0 + W_0

        for m in range(1, n_times):
            # other Zs
            Y_L, Y_R = Y_M[m]
            A_L = Z_0[:-m] + Y_L
            A_R = Z_0[m:] + Y_R
            if not psi_node_penalty:
                prox_e = prox_psi(A_R - A_L,
                                  lamda=2.0 *
                                  np.diag(kernel_psi, m)[:, None, None] / rho)
                Z_L = 0.5 * (A_L + A_R - prox_e)
                Z_R = 0.5 * (A_L + A_R + prox_e)
            else:
                Z_L, Z_R = prox_psi(
                    np.concatenate((A_L, A_R), axis=1),
                    lamda=0.5 * np.diag(kernel_psi, m)[:, None, None] / rho,
                    rho=rho,
                    tol=tol,
                    rtol=rtol,
                    max_iter=max_iter,
                )
            Z_M[m] = (Z_L, Z_R)

            # update other residuals
            Y_L += Z_0[:-m] - Z_L
            Y_R += Z_0[m:] - Z_R

            # other Ws
            U_L, U_R = U_M[m]
            A_L = W_0[:-m] + U_L
            A_R = W_0[m:] + U_R
            if not phi_node_penalty:
                prox_e = prox_phi(A_R - A_L,
                                  lamda=2.0 *
                                  np.diag(kernel_phi, m)[:, None, None] / rho)
                W_L = 0.5 * (A_L + A_R - prox_e)
                W_R = 0.5 * (A_L + A_R + prox_e)
            else:
                W_L, W_R = prox_phi(
                    np.concatenate((A_L, A_R), axis=1),
                    lamda=0.5 * np.diag(kernel_phi, m)[:, None, None] / rho,
                    rho=rho,
                    tol=tol,
                    rtol=rtol,
                    max_iter=max_iter,
                )
            W_M[m] = (W_L, W_R)

            # update other residuals
            U_L += W_0[:-m] - W_L
            U_R += W_0[m:] - W_R

        # diagnostics, reporting, termination checks
        rnorm = np.sqrt(
            squared_norm(R - Z_0 + W_0) + sum(
                squared_norm(Z_0[:-m] - Z_M[m][0]) +
                squared_norm(Z_0[m:] - Z_M[m][1]) +
                squared_norm(W_0[:-m] - W_M[m][0]) +
                squared_norm(W_0[m:] - W_M[m][1]) for m in range(1, n_times)))

        snorm = rho * np.sqrt(
            squared_norm(R - R_old) + sum(
                squared_norm(Z_M[m][0] - Z_M_old[m][0]) +
                squared_norm(Z_M[m][1] - Z_M_old[m][1]) +
                squared_norm(W_M[m][0] - W_M_old[m][0]) +
                squared_norm(W_M[m][1] - W_M_old[m][1])
                for m in range(1, n_times)))

        obj = (objective(emp_cov, n_samples, R, Z_0, Z_M, W_0, W_M, alpha, tau,
                         kernel_psi, kernel_phi, psi, phi)
               if compute_objective else np.nan)

        check = convergence(
            obj=obj,
            rnorm=rnorm,
            snorm=snorm,
            e_pri=n_features * np.sqrt(n_times * (2 * n_times - 1)) * tol +
            rtol * max(
                np.sqrt(
                    squared_norm(R) + sum(
                        squared_norm(Z_M[m][0]) + squared_norm(Z_M[m][1]) +
                        squared_norm(W_M[m][0]) + squared_norm(W_M[m][1])
                        for m in range(1, n_times))),
                np.sqrt(
                    squared_norm(Z_0 - W_0) + sum(
                        squared_norm(Z_0[:-m]) + squared_norm(Z_0[m:]) +
                        squared_norm(W_0[:-m]) + squared_norm(W_0[m:])
                        for m in range(1, n_times))),
            ),
            e_dual=n_features * np.sqrt(n_times * (2 * n_times - 1)) * tol +
            rtol * rho * np.sqrt(
                squared_norm(X_0) + sum(
                    squared_norm(Y_M[m][0]) + squared_norm(Y_M[m][1]) +
                    squared_norm(U_M[m][0]) + squared_norm(U_M[m][1])
                    for m in range(1, n_times))),
        )

        R_old = R.copy()
        for m in range(1, n_times):
            Z_M_old[m] = (Z_M[m][0].copy(), Z_M[m][1].copy())
            W_M_old[m] = (W_M[m][0].copy(), W_M[m][1].copy())

        if verbose:
            print("obj: %.4f, rnorm: %.4f, snorm: %.4f,"
                  "eps_pri: %.4f, eps_dual: %.4f" % check[:5])

        checks.append(check)
        if check.rnorm <= check.e_pri and check.snorm <= check.e_dual:
            break

        rho_new = update_rho(rho,
                             rnorm,
                             snorm,
                             iteration=iteration_,
                             **(update_rho_options or {}))
        # scaled dual variables should be also rescaled
        X_0 *= rho / rho_new
        for m in range(1, n_times):
            Y_L, Y_R = Y_M[m]
            Y_L *= rho / rho_new
            Y_R *= rho / rho_new

            U_L, U_R = U_M[m]
            U_L *= rho / rho_new
            U_R *= rho / rho_new
        rho = rho_new
    else:
        warnings.warn("Objective did not converge.")

    covariance_ = np.array([linalg.pinvh(x) for x in Z_0])
    return_list = [Z_0, W_0, covariance_]
    if return_history:
        return_list.append(checks)
    if return_n_iter:
        return_list.append(iteration_)
    return return_list
示例#10
0
def group_lasso(A,
                b,
                lamda=1.0,
                groups=None,
                rho=1.0,
                alpha=1.0,
                max_iter=1000,
                tol=1e-4,
                rtol=1e-2,
                return_history=False):
    r"""Group Lasso solver.

    Solves the following problem via ADMM
       minimize 1/2*|| Ax - b ||_2^2 + \lambda sum(norm(x_i))

    The input p is a K-element vector giving the block sizes n_i, so that x_i
    is in R^{n_i}.

    Parameters
    ----------
    A : array-like, 2-dimensional
        Input matrix.
    b : array-like, 1-dimensional
        Output vector.
    lamda : float, optional
        Regularisation parameter.
    groups : list
        Groups of variables.
    rho : float, optional
        Augmented Lagrangian parameter.
    alpha : float, optional
        Over-relaxation parameter (typically between 1.0 and 1.8).
    max_iter : int, optional
        Maximum number of iterations.
    tol : float, optional
        Absolute tolerance for convergence.
    rtol : float, optional
        Relative tolerance for convergence.
    return_history : bool, optional
        Return the history of computed values.

    Returns
    -------
    x : numpy.array
        Solution to the problem.
    history : list
        If return_history, then also a structure that contains the
        objective value, the primal and dual residual norms, and tolerances
        for the primal and dual residual norms at each iteration.

    """
    n_samples, n_features = A.shape

    # check valid partition
    if not np.allclose(flatten(groups), np.arange(n_features)):
        raise ValueError("Invalid partition in groups. "
                         "Groups must be non-overlapping and each variables "
                         "must be selected")

    # % save a matrix-vector multiply
    Atb = A.T.dot(b)

    # ADMM solver
    x = np.zeros(n_features)
    z = np.zeros(n_features)
    u = np.zeros(n_features)

    # % pre-factor
    L, U = lu_factor(A, rho)

    hist = []
    for _ in range(max_iter):
        # % x-update
        q = Atb + rho * (z - u)  # % temporary value
        if n_samples >= n_features:
            x = np.linalg.lstsq(U, np.linalg.lstsq(L, q)[0])[0]
        else:
            x = q - A.T.dot(
                np.linalg.lstsq(U,
                                np.linalg.lstsq(L, A.dot(q))[0])[0]) / rho
            x /= rho

        # % z-update with relaxation
        zold = z
        x_hat = alpha * x + (1 - alpha) * zold
        for group in groups:
            z[group] = soft_thresholding(x_hat[group] + u[group], lamda / rho)

        # % u-update
        u += (x_hat - z)

        # % diagnostics, reporting, termination checks
        history = (
            objective(A, b, lamda, groups, x, z),  # obj
            np.linalg.norm(x - z),  # r norm
            np.linalg.norm(-rho * (z - zold)),  # s norm
            np.sqrt(n_features) * tol +
            rtol * max(np.linalg.norm(x), np.linalg.norm(-z)),  # eps pri
            np.sqrt(n_features) * tol +
            rtol * np.linalg.norm(rho * u)  # eps dual
        )

        hist.append(history)
        if history[1] < history[3] and history[2] < history[4]:
            break

    return z, hist if return_history else z
def group_lasso_overlap(A,
                        b,
                        lamda=1.0,
                        groups=None,
                        rho=1.0,
                        max_iter=100,
                        tol=1e-4,
                        verbose=False,
                        rtol=1e-2):
    r"""Group Lasso with Overlap solver.

    Solves the following problem via ADMM
       minimize 1/2*|| Ax - b ||_2^2 + \lambda sum(norm(x_i))

    The input p is a K-element vector giving the block sizes n_i, so that x_i
    is in R^{n_i}.

    Parameters
    ----------
    A : array-like, 2-dimensional
        Input matrix.
    b : array-like, 1-dimensional
        Output vector.
    lamda : float, optional
        Regularisation parameter.
    groups : list
        Groups of variables.
    rho : float, optional
        Augmented Lagrangian parameter.
    alpha : float, optional
        Over-relaxation parameter (typically between 1.0 and 1.8).
    max_iter : int, optional
        Maximum number of iterations.
    tol : float, optional
        Absolute tolerance for convergence.
    rtol : float, optional
        Relative tolerance for convergence.
    return_history : bool, optional
        Return the history of computed values.

    Returns
    -------
    x : numpy.array
        Solution to the problem.
    history : list
        If return_history, then also a structure that contains the
        objective value, the primal and dual residual norms, and tolerances
        for the primal and dual residual norms at each iteration.

    """
    n, d = A.shape

    x = [np.zeros(len(g)) for g in groups]  # local variables
    z = np.zeros(d)
    y = [np.zeros(len(g)) for g in groups]

    D = np.diag(D_function(d, groups))
    Atb = A.T.dot(b)
    inv = np.linalg.inv(A.T.dot(A) + rho * D)
    hist = []
    count = 0
    for k in range(max_iter):
        # x update
        for i, g in enumerate(groups):
            x[i] = soft_thresholding(x[i] - y[i] / rho, lamda / rho)

        # z update
        zold = z
        x_consensus = P_star_x_bar_function(x, d, groups)
        y_consensus = P_star_x_bar_function(y, d, groups)
        z = inv.dot(Atb + D.dot(y_consensus + rho * x_consensus))

        for i, g in enumerate(groups):
            y[i] += rho * (x[i] - z[g])

        # diagnostics, reporting, termination checks
        history = (
            objective(A, b, lamda, x, z),  # objective
            np.linalg.norm(x_consensus - z),  # rnorm
            np.linalg.norm(-rho * (z - zold)),  # snorm
            np.sqrt(d) * tol + rtol *
            max(np.linalg.norm(x_consensus), np.linalg.norm(-z)),  # eps primal
            np.sqrt(d) * tol + rtol * np.linalg.norm(rho * y_consensus)
            # eps dual
        )

        if verbose:
            print("obj: %.4f, rnorm: %.4f, snorm: %.4f,"
                  "eps_pri: %.4f, eps_dual: %.4f" % history)

        hist.append(history)
        if history[1] < history[3] and history[2] < history[4]:
            if count > 10:
                break
            else:
                count += 1
        else:
            count = 0

    return z, hist, k
def kernel_time_graphical_lasso(
    emp_cov,
    alpha=0.01,
    rho=1,
    kernel=None,
    max_iter=100,
    n_samples=None,
    verbose=False,
    psi="laplacian",
    tol=1e-4,
    rtol=1e-4,
    return_history=False,
    return_n_iter=True,
    mode="admm",
    update_rho_options=None,
    compute_objective=True,
    stop_at=None,
    stop_when=1e-4,
    init="empirical",
):
    """Time-varying graphical lasso solver.

    Solves the following problem via ADMM:
        min sum_{i=1}^T -n_i log_likelihood(K_i-L_i) + alpha ||K_i||_{od,1}
            + sum_{s>t}^T k_psi(s,t) Psi(K_s - K_t)

    where S is the empirical covariance of the data
    matrix D (training observations by features).

    Parameters
    ----------
    emp_cov : ndarray, shape (n_features, n_features)
        Empirical covariance of data.
    alpha, beta : float, optional
        Regularisation parameter.
    rho : float, optional
        Augmented Lagrangian parameter.
    max_iter : int, optional
        Maximum number of iterations.
    tol : float, optional
        Absolute tolerance for convergence.
    rtol : float, optional
        Relative tolerance for convergence.
    return_history : bool, optional
        Return the history of computed values.
    init : {'empirical', 'zeros', ndarray}, default 'empirical'
        How to initialise the inverse covariance matrix. Default is take
        the empirical covariance and inverting it.

    Returns
    -------
    X : numpy.array, 2-dimensional
        Solution to the problem.
    history : list
        If return_history, then also a structure that contains the
        objective value, the primal and dual residual norms, and tolerances
        for the primal and dual residual norms at each iteration.

    """
    psi, prox_psi, psi_node_penalty = check_norm_prox(psi)
    n_times, _, n_features = emp_cov.shape

    if kernel is None:
        kernel = np.eye(n_times)

    Z_0 = init_precision(emp_cov, mode=init)
    U_0 = np.zeros_like(Z_0)
    Z_0_old = np.zeros_like(Z_0)

    Z_M, Z_M_old = {}, {}
    U_M = {}
    for m in range(1, n_times):
        # all possible markovians jumps
        Z_L = Z_0.copy()[:-m]
        Z_R = Z_0.copy()[m:]
        Z_M[m] = (Z_L, Z_R)

        U_L = np.zeros_like(Z_L)
        U_R = np.zeros_like(Z_R)
        U_M[m] = (U_L, U_R)

        Z_L_old = np.zeros_like(Z_L)
        Z_R_old = np.zeros_like(Z_R)
        Z_M_old[m] = (Z_L_old, Z_R_old)

    if n_samples is None:
        n_samples = np.ones(n_times)

    checks = [
        convergence(obj=objective(n_samples, emp_cov, Z_0, Z_0, Z_M, alpha,
                                  kernel, psi))
    ]
    for iteration_ in range(max_iter):
        # update K
        A = Z_0 - U_0
        for m in range(1, n_times):
            A[:-m] += Z_M[m][0] - U_M[m][0]
            A[m:] += Z_M[m][1] - U_M[m][1]

        A /= n_times
        # soft_thresholding_ = partial(soft_thresholding, lamda=alpha / rho)
        # K = np.array(map(soft_thresholding_, A))
        A += A.transpose(0, 2, 1)
        A /= 2.0

        A *= -rho * n_times / n_samples[:, None, None]
        A += emp_cov

        K = np.array([
            prox_logdet(a, lamda=ni / (rho * n_times))
            for a, ni in zip(A, n_samples)
        ])

        # update Z_0
        A = K + U_0
        A += A.transpose(0, 2, 1)
        A /= 2.0
        Z_0 = soft_thresholding(A, lamda=alpha / rho)

        # update residuals
        U_0 += K - Z_0

        # other Zs
        for m in range(1, n_times):
            U_L, U_R = U_M[m]
            A_L = K[:-m] + U_L
            A_R = K[m:] + U_R
            if not psi_node_penalty:
                prox_e = prox_psi(A_R - A_L,
                                  lamda=2.0 *
                                  np.diag(kernel, m)[:, None, None] / rho)
                Z_L = 0.5 * (A_L + A_R - prox_e)
                Z_R = 0.5 * (A_L + A_R + prox_e)
            else:
                Z_L, Z_R = prox_psi(
                    np.concatenate((A_L, A_R), axis=1),
                    lamda=0.5 * np.diag(kernel, m)[:, None, None] / rho,
                    rho=rho,
                    tol=tol,
                    rtol=rtol,
                    max_iter=max_iter,
                )
            Z_M[m] = (Z_L, Z_R)

            # update other residuals
            U_L += K[:-m] - Z_L
            U_R += K[m:] - Z_R

        # diagnostics, reporting, termination checks
        rnorm = np.sqrt(
            squared_norm(K - Z_0) + sum(
                squared_norm(K[:-m] - Z_M[m][0]) +
                squared_norm(K[m:] - Z_M[m][1]) for m in range(1, n_times)))

        snorm = rho * np.sqrt(
            squared_norm(Z_0 - Z_0_old) + sum(
                squared_norm(Z_M[m][0] - Z_M_old[m][0]) +
                squared_norm(Z_M[m][1] - Z_M_old[m][1])
                for m in range(1, n_times)))

        obj = objective(n_samples, emp_cov, Z_0, K, Z_M, alpha, kernel,
                        psi) if compute_objective else np.nan

        check = convergence(
            obj=obj,
            rnorm=rnorm,
            snorm=snorm,
            e_pri=n_features * n_times * tol + rtol * max(
                np.sqrt(
                    squared_norm(Z_0) + sum(
                        squared_norm(Z_M[m][0]) + squared_norm(Z_M[m][1])
                        for m in range(1, n_times))),
                np.sqrt(
                    squared_norm(K) + sum(
                        squared_norm(K[:-m]) + squared_norm(K[m:])
                        for m in range(1, n_times))),
            ),
            e_dual=n_features * n_times * tol + rtol * rho * np.sqrt(
                squared_norm(U_0) + sum(
                    squared_norm(U_M[m][0]) + squared_norm(U_M[m][1])
                    for m in range(1, n_times))),
        )
        Z_0_old = Z_0.copy()
        for m in range(1, n_times):
            Z_M_old[m] = (Z_M[m][0].copy(), Z_M[m][1].copy())

        if verbose:
            print("obj: %.4f, rnorm: %.4f, snorm: %.4f,"
                  "eps_pri: %.4f, eps_dual: %.4f" % check[:5])

        checks.append(check)
        if stop_at is not None:
            if abs(check.obj - stop_at) / abs(stop_at) < stop_when:
                break

        if check.rnorm <= check.e_pri and check.snorm <= check.e_dual:
            break

        rho_new = update_rho(rho,
                             rnorm,
                             snorm,
                             iteration=iteration_,
                             **(update_rho_options or {}))
        # scaled dual variables should be also rescaled
        U_0 *= rho / rho_new
        for m in range(1, n_times):
            U_L, U_R = U_M[m]
            U_L *= rho / rho_new
            U_R *= rho / rho_new
        rho = rho_new
    else:
        warnings.warn("Objective did not converge.")

    covariance_ = np.array([linalg.pinvh(x) for x in Z_0])
    return_list = [Z_0, covariance_]
    if return_history:
        return_list.append(checks)
    if return_n_iter:
        return_list.append(iteration_ + 1)
    return return_list
def latent_time_graphical_lasso(emp_cov,
                                alpha=0.01,
                                tau=1.,
                                rho=1.,
                                beta=1.,
                                eta=1.,
                                max_iter=100,
                                n_samples=None,
                                verbose=False,
                                psi='laplacian',
                                phi='laplacian',
                                mode='admm',
                                tol=1e-4,
                                rtol=1e-4,
                                return_history=False,
                                return_n_iter=True,
                                update_rho_options=None,
                                compute_objective=True,
                                init='empirical'):
    r"""Latent variable time-varying graphical lasso solver.

    Solves the following problem via ADMM:
      min sum_{i=1}^T -n_i log_likelihood(S_i, K_i-L_i) + alpha ||K_i||_{od,1}
          + tau ||L_i||_*
          + beta sum_{i=2}^T Psi(K_i - K_{i-1})
          + eta sum_{i=2}^T Phi(L_i - L_{i-1})

    where S_i = (1/n_i) X_i^T \times X_i is the empirical covariance of data
    matrix X (training observations by features).

    Parameters
    ----------
    emp_cov : ndarray, shape (n_features, n_features)
        Empirical covariance of data.
    alpha, tau, beta, eta : float, optional
        Regularisation parameters.
    rho : float, optional
        Augmented Lagrangian parameter.
    max_iter : int, optional
        Maximum number of iterations.
    n_samples : ndarray
        Number of samples available for each time point.
    tol : float, optional
        Absolute tolerance for convergence.
    rtol : float, optional
        Relative tolerance for convergence.
    return_history : bool, optional
        Return the history of computed values.
    return_n_iter : bool, optional
        Return the number of iteration before convergence.
    verbose : bool, default False
        Print info at each iteration.
    update_rho_options : dict, optional
        Arguments for the rho update.
        See regain.update_rules.update_rho function for more information.
    compute_objective : bool, default True
        Choose to compute the objective value.
    init : {'empirical', 'zeros', ndarray}, default 'empirical'
        How to initialise the inverse covariance matrix. Default is take
        the empirical covariance and inverting it.

    Returns
    -------
    K, L : numpy.array, 3-dimensional (T x d x d)
        Solution to the problem for each time t=1...T .
    history : list
        If return_history, then also a structure that contains the
        objective value, the primal and dual residual norms, and tolerances
        for the primal and dual residual norms at each iteration.

    """
    psi, prox_psi, psi_node_penalty = check_norm_prox(psi)
    phi, prox_phi, phi_node_penalty = check_norm_prox(phi)

    Z_0 = init_precision(emp_cov, mode=init)
    Z_1 = Z_0.copy()[:-1]
    Z_2 = Z_0.copy()[1:]
    W_0 = np.zeros_like(Z_0)
    W_1 = np.zeros_like(Z_1)
    W_2 = np.zeros_like(Z_2)

    X_0 = np.zeros_like(Z_0)
    X_1 = np.zeros_like(Z_1)
    X_2 = np.zeros_like(Z_2)
    U_1 = np.zeros_like(W_1)
    U_2 = np.zeros_like(W_2)

    R_old = np.zeros_like(Z_0)
    Z_1_old = np.zeros_like(Z_1)
    Z_2_old = np.zeros_like(Z_2)
    W_1_old = np.zeros_like(W_1)
    W_2_old = np.zeros_like(W_2)

    # divisor for consensus variables, accounting for two less matrices
    divisor = np.full(emp_cov.shape[0], 3, dtype=float)
    divisor[0] -= 1
    divisor[-1] -= 1

    if n_samples is None:
        n_samples = np.ones(emp_cov.shape[0])

    checks = []
    for iteration_ in range(max_iter):
        # update R
        A = Z_0 - W_0 - X_0
        A += A.transpose(0, 2, 1)
        A /= 2.
        A *= -rho / n_samples[:, None, None]
        A += emp_cov
        # A = emp_cov / rho - A

        R = np.array(
            [prox_logdet(a, lamda=ni / rho) for a, ni in zip(A, n_samples)])

        # update Z_0
        A = R + W_0 + X_0
        A[:-1] += Z_1 - X_1
        A[1:] += Z_2 - X_2
        A /= divisor[:, None, None]
        # soft_thresholding_ = partial(soft_thresholding, lamda=alpha / rho)
        # Z_0 = np.array(map(soft_thresholding_, A))
        Z_0 = soft_thresholding(A,
                                lamda=alpha / (rho * divisor[:, None, None]))

        # update Z_1, Z_2
        A_1 = Z_0[:-1] + X_1
        A_2 = Z_0[1:] + X_2
        if not psi_node_penalty:
            prox_e = prox_psi(A_2 - A_1, lamda=2. * beta / rho)
            Z_1 = .5 * (A_1 + A_2 - prox_e)
            Z_2 = .5 * (A_1 + A_2 + prox_e)
        else:
            Z_1, Z_2 = prox_psi(np.concatenate((A_1, A_2), axis=1),
                                lamda=.5 * beta / rho,
                                rho=rho,
                                tol=tol,
                                rtol=rtol,
                                max_iter=max_iter)

        # update W_0
        A = Z_0 - R - X_0
        A[:-1] += W_1 - U_1
        A[1:] += W_2 - U_2
        A /= divisor[:, None, None]
        A += A.transpose(0, 2, 1)
        A /= 2.

        W_0 = np.array([
            prox_trace_indicator(a, lamda=tau / (rho * div))
            for a, div in zip(A, divisor)
        ])

        # update W_1, W_2
        A_1 = W_0[:-1] + U_1
        A_2 = W_0[1:] + U_2
        if not phi_node_penalty:
            prox_e = prox_phi(A_2 - A_1, lamda=2. * eta / rho)
            W_1 = .5 * (A_1 + A_2 - prox_e)
            W_2 = .5 * (A_1 + A_2 + prox_e)
        else:
            W_1, W_2 = prox_phi(np.concatenate((A_1, A_2), axis=1),
                                lamda=.5 * eta / rho,
                                rho=rho,
                                tol=tol,
                                rtol=rtol,
                                max_iter=max_iter)

        # update residuals
        X_0 += R - Z_0 + W_0
        X_1 += Z_0[:-1] - Z_1
        X_2 += Z_0[1:] - Z_2
        U_1 += W_0[:-1] - W_1
        U_2 += W_0[1:] - W_2

        # diagnostics, reporting, termination checks
        rnorm = np.sqrt(
            squared_norm(R - Z_0 + W_0) + squared_norm(Z_0[:-1] - Z_1) +
            squared_norm(Z_0[1:] - Z_2) + squared_norm(W_0[:-1] - W_1) +
            squared_norm(W_0[1:] - W_2))

        snorm = rho * np.sqrt(
            squared_norm(R - R_old) + squared_norm(Z_1 - Z_1_old) +
            squared_norm(Z_2 - Z_2_old) + squared_norm(W_1 - W_1_old) +
            squared_norm(W_2 - W_2_old))

        obj = objective(emp_cov, n_samples, R, Z_0, Z_1, Z_2, W_0, W_1, W_2,
                        alpha, tau, beta, eta, psi, phi) \
            if compute_objective else np.nan

        check = convergence(
            obj=obj,
            rnorm=rnorm,
            snorm=snorm,
            e_pri=np.sqrt(R.size + 4 * Z_1.size) * tol + rtol * max(
                np.sqrt(
                    squared_norm(R) + squared_norm(Z_1) + squared_norm(Z_2) +
                    squared_norm(W_1) + squared_norm(W_2)),
                np.sqrt(
                    squared_norm(Z_0 - W_0) + squared_norm(Z_0[:-1]) +
                    squared_norm(Z_0[1:]) + squared_norm(W_0[:-1]) +
                    squared_norm(W_0[1:]))),
            e_dual=np.sqrt(R.size + 4 * Z_1.size) * tol + rtol * rho *
            (np.sqrt(
                squared_norm(X_0) + squared_norm(X_1) + squared_norm(X_2) +
                squared_norm(U_1) + squared_norm(U_2))))

        R_old = R.copy()
        Z_1_old = Z_1.copy()
        Z_2_old = Z_2.copy()
        W_1_old = W_1.copy()
        W_2_old = W_2.copy()

        if verbose:
            print("obj: %.4f, rnorm: %.4f, snorm: %.4f,"
                  "eps_pri: %.4f, eps_dual: %.4f" % check[:5])

        checks.append(check)
        if check.rnorm <= check.e_pri and check.snorm <= check.e_dual:
            break

        rho_new = update_rho(rho,
                             rnorm,
                             snorm,
                             iteration=iteration_,
                             **(update_rho_options or {}))
        # scaled dual variables should be also rescaled
        X_0 *= rho / rho_new
        X_1 *= rho / rho_new
        X_2 *= rho / rho_new
        U_1 *= rho / rho_new
        U_2 *= rho / rho_new
        rho = rho_new
    else:
        warnings.warn("Objective did not converge.")

    covariance_ = np.array([linalg.pinvh(x) for x in Z_0])
    return_list = [Z_0, W_0, covariance_]
    if return_history:
        return_list.append(checks)
    if return_n_iter:
        return_list.append(iteration_)
    return return_list
示例#14
0
def logistic_alternating(K,
                         y,
                         lamda=0.01,
                         beta=0.01,
                         gamma=.5,
                         max_iter=100,
                         l1_ratio_lamda=0.1,
                         l1_ratio_beta=0.1,
                         deep=True,
                         verbose=0,
                         tol=1e-4,
                         return_n_iter=True,
                         fit_intercept=True,
                         lr_p2=None):
    # multiple patient
    n_patients = len(K)
    n_kernels = len(K[0])
    coef = np.ones(n_kernels)
    alpha = [np.zeros(K[j].shape[2]) for j in range(n_patients)]
    objective_new = 0

    max_iter_deep = max_iter // 3 if deep else 1

    if lr_p2 is None:
        raise ValueError("lr_p2 cant be None")

    for iteration_ in range(max_iter):
        w_old = coef.copy()
        alpha_old = [a.copy() for a in alpha]
        objective_old = objective_new

        for i in range(n_patients):
            lr_p2[i].fit(np.tensordot(coef, K[i], axes=1), y[i])

        alpha = [log.coef_.ravel() for log in lr_p2]
        intercepts = [log.intercept_.ravel() for log in lr_p2]
        alpha_intercept = [
            np.hstack((a, c)) for a, c in zip(alpha, intercepts)
        ]

        # X = np.tensordot(alpha, K, axes=([0], [2])).T
        # X = sum(K[j].dot(alpha[j]).T for j in range(n_patients))
        # coef = lr_p1.fit(X, y).coef_.ravel()

        for it in range(max_iter_deep):
            coef_old = coef.copy()

            l2_reg = beta * (1 - l1_ratio_beta)
            loss, gradient = _logistic_loss_and_grad(coef, alpha_intercept, K,
                                                     y, l2_reg)
            l1_reg = beta * l1_ratio_beta
            coef = soft_thresholding(coef - gamma * gradient, gamma * l1_reg)
            coef = np.maximum(coef, 0.)

            if np.linalg.norm(coef - coef_old) < tol:
                break

        obj = logistic_objective(K, y, alpha, coef, lamda, beta)
        objective_difference = abs(objective_new - objective_old)
        # snorm = np.sqrt(squared_norm(coef - w_old) +
        #                 squared_norm(alpha - alpha_old))

        diff_w = np.linalg.norm(coef - w_old)
        diff_a = np.sqrt(
            sum(squared_norm(a - a_old) for a, a_old in zip(alpha, alpha_old)))

        if verbose:  # and iteration_ % 10 == 0:
            # print("obj: %.4f, snorm: %.4f" % (obj, snorm))
            print("obj: %.4f, loss: %.4f, diff_w: %.4f, diff_a: %.4f" %
                  (obj, logistic_loss(K, y, alpha, coef, lamda,
                                      beta), diff_w, diff_a))

        if diff_a < tol and objective_difference < tol:
            break
        if np.isnan(diff_w) or np.isnan(diff_a) or np.isnan(
                objective_difference):
            raise ValueError('something is nan')
    else:
        warnings.warn("Objective did not converge.")
    return_list = [alpha, coef, intercepts]
    if return_n_iter:
        return_list.append(iteration_)
    return return_list
示例#15
0
def enet_kernel_learning_admm(K,
                              y,
                              lamda=0.01,
                              beta=0.01,
                              rho=1.,
                              max_iter=100,
                              verbose=0,
                              rtol=1e-4,
                              tol=1e-4,
                              return_n_iter=True,
                              update_rho_options=None):
    """Elastic Net kernel learning.

    Solve the following problem via ADMM:
        min sum_{i=1}^p 1/2 ||alpha_i * w * K_i - y_i||^2 + lamda ||w||_1 +
        + beta||w||_2^2
    """
    n_patients = len(K)
    n_kernels = len(K[0])
    coef = np.ones(n_kernels)
    u_1 = np.zeros(n_kernels)
    u_2 = np.zeros(n_kernels)
    w_1 = np.zeros(n_kernels)
    w_2 = np.zeros(n_kernels)

    w_1_old = w_1.copy()
    w_2_old = w_2.copy()

    checks = []
    for iteration_ in range(max_iter):
        # update alpha
        # solve (AtA + 2I)^-1 (Aty) with A = wK
        A = [K[j].T.dot(coef) for j in range(n_patients)]
        KK = [A[j].dot(A[j].T) for j in range(n_patients)]
        yy = [y[j].dot(A[j]) for j in range(n_patients)]

        alpha = [
            _solve_cholesky_kernel(KK[j], yy[j][..., None], 2).ravel()
            for j in range(n_patients)
        ]
        # alpha = [_solve_cholesky_kernel(
        #     K_dot_coef[j], y[j][..., None], 0).ravel() for j in range(n_patients)]

        w_1 = soft_thresholding(coef + u_1, lamda / rho)
        w_2 = prox_laplacian(coef + u_2, beta / rho)

        # equivalent to alpha_dot_K
        # solve (sum(AtA) + 2*rho I)^-1 (sum(Aty) + rho(w1+w2-u1-u2))
        # with A = K * alpha
        A = [K[j].dot(alpha[j]) for j in range(n_patients)]
        KK = sum(A[j].dot(A[j].T) for j in range(n_patients))
        yy = sum(y[j].dot(A[j].T) for j in range(n_patients))
        yy += rho * (w_1 + w_2 - u_1 - u_2)

        coef = _solve_cholesky_kernel(KK, yy[..., None], 2 * rho).ravel()

        # update residuals
        u_1 += coef - w_1
        u_2 += coef - w_2

        # diagnostics, reporting, termination checks
        rnorm = np.sqrt(squared_norm(coef - w_1) + squared_norm(coef - w_2))
        snorm = rho * np.sqrt(
            squared_norm(w_1 - w_1_old) + squared_norm(w_2 - w_2_old))

        obj = objective_admm(K, y, alpha, lamda, beta, coef, w_1, w_2)

        check = convergence(
            obj=obj,
            rnorm=rnorm,
            snorm=snorm,
            e_pri=np.sqrt(2 * coef.size) * tol +
            rtol * max(np.sqrt(squared_norm(coef) + squared_norm(coef)),
                       np.sqrt(squared_norm(w_1) + squared_norm(w_2))),
            e_dual=np.sqrt(2 * coef.size) * tol + rtol * rho *
            (np.sqrt(squared_norm(u_1) + squared_norm(u_2))))

        w_1_old = w_1.copy()
        w_2_old = w_2.copy()

        if verbose:
            print("obj: %.4f, rnorm: %.4f, snorm: %.4f,"
                  "eps_pri: %.4f, eps_dual: %.4f" % check)

        checks.append(check)
        if check.rnorm <= check.e_pri and check.snorm <= check.e_dual and iteration_ > 1:
            break

        rho_new = update_rho(rho,
                             rnorm,
                             snorm,
                             iteration=iteration_,
                             **(update_rho_options or {}))
        # scaled dual variables should be also rescaled
        u_1 *= rho / rho_new
        u_2 *= rho / rho_new
        rho = rho_new
    else:
        warnings.warn("Objective did not converge.")

    return_list = [alpha, coef]
    if return_n_iter:
        return_list.append(iteration_)
    return return_list
示例#16
0
def infimal_convolution(
    S,
    alpha=1.0,
    tau=1.0,
    rho=1.0,
    max_iter=100,
    verbose=False,
    tol=1e-4,
    rtol=1e-2,
    return_history=False,
    return_n_iter=True,
    update_rho_options=None,
    compute_objective=True,
):
    r"""Latent variable graphical lasso solver.

    Solves the following problem via ADMM:
        min - log_likelihood(S, K-L) + alpha ||K||_{od,1} + tau ||L_i||_*

    where S is the empirical covariance of the data
    matrix D (training observations by features).

    Parameters
    ----------
    emp_cov : array-like
        Empirical covariance matrix.
    alpha, tau : float, optional
        Regularisation parameters.
    rho : float, optional
        Augmented Lagrangian parameter.
    max_iter : int, optional
        Maximum number of iterations.
    tol : float, optional
        Absolute tolerance for convergence.
    rtol : float, optional
        Relative tolerance for convergence.
    return_history : bool, optional
        Return the history of computed values.
    return_n_iter : bool, optional
        Return the number of iteration before convergence.
    verbose : bool, default False
        Print info at each iteration.

    Returns
    -------
    K, L : np.array, 2-dimensional, size (d x d)
        Solution to the problem.
    S : np.array, 2 dimensional
        Empirical covariance matrix.
    n_iter : int
        If return_n_iter, returns the number of iterations before convergence.
    history : list
        If return_history, then also a structure that contains the
        objective value, the primal and dual residual norms, and tolerances
        for the primal and dual residual norms at each iteration.

    """
    K = np.zeros_like(S)
    L = np.zeros_like(S)
    U = np.zeros_like(S)
    R_old = np.zeros_like(S)

    checks = []
    for iteration_ in range(max_iter):
        # update R
        A = K - L - U
        A += A.T
        A /= 2.0
        R = prox_laplacian(S + rho * A, lamda=rho / 2.0)

        A = L + R + U
        K = soft_thresholding(A, lamda=alpha / rho)

        A = K - R - U
        A += A.T
        A /= 2.0
        L = prox_trace_indicator(A, lamda=tau / rho)

        # update residuals
        U += R - K + L

        # diagnostics, reporting, termination checks
        obj = objective(S, R, K, L, alpha,
                        tau) if compute_objective else np.nan
        rnorm = np.linalg.norm(R - K + L)
        snorm = rho * np.linalg.norm(R - R_old)
        check = convergence(
            obj=obj,
            rnorm=rnorm,
            snorm=snorm,
            e_pri=np.sqrt(R.size) * tol +
            rtol * max(np.linalg.norm(R), np.linalg.norm(K - L)),
            e_dual=np.sqrt(R.size) * tol + rtol * rho * np.linalg.norm(U),
        )
        R_old = R.copy()

        if verbose:
            print("obj: %.4f, rnorm: %.4f, snorm: %.4f,"
                  "eps_pri: %.4f, eps_dual: %.4f" % check[:5])

        checks.append(check)
        if check.rnorm <= check.e_pri and check.snorm <= check.e_dual:
            break
        if check.obj == np.inf:
            break
        rho_new = update_rho(rho,
                             rnorm,
                             snorm,
                             iteration=iteration_,
                             **(update_rho_options or {}))
        # scaled dual variables should be also rescaled
        U *= rho / rho_new
        rho = rho_new
    else:
        warnings.warn("Objective did not converge.")

    covariance_ = linalg.pinvh(K)
    return_list = [K, L, covariance_]
    if return_history:
        return_list.append(checks)
    if return_n_iter:
        return_list.append(iteration_)
    return return_list
示例#17
0
def enet_kernel_learning_admm2(K,
                               y,
                               lamda=0.01,
                               beta=0.01,
                               rho=1.,
                               max_iter=100,
                               verbose=0,
                               rtol=1e-4,
                               tol=1e-4,
                               return_n_iter=True,
                               update_rho_options=None):
    """Elastic Net kernel learning.

    Solve the following problem via ADMM:
        min sum_{i=1}^p 1/2 ||y_i - alpha_i * sum_{k=1}^{n_k} w_k * K_{ik}||^2
        + lamda ||w||_1 + beta sum_{j=1}^{c_i}||alpha_j||_2^2
    """
    n_patients = len(K)
    n_kernels = len(K[0])
    coef = np.ones(n_kernels)
    alpha = [np.zeros(K[j].shape[2]) for j in range(n_patients)]

    u = [np.zeros(K[j].shape[1]) for j in range(n_patients)]
    u_1 = np.zeros(n_kernels)
    w_1 = np.zeros(n_kernels)

    x_old = [np.zeros(K[0].shape[1]) for j in range(n_patients)]
    w_1_old = w_1.copy()
    # w_2_old = w_2.copy()

    checks = []
    for iteration_ in range(max_iter):
        # update x
        A = [K[j].T.dot(coef) for j in range(n_patients)]
        x = [
            prox_laplacian(y[j] + rho * (A[j].T.dot(alpha[j]) - u[j]),
                           rho / 2.) for j in range(n_patients)
        ]

        # update alpha
        # solve (AtA + 2I)^-1 (Aty) with A = wK
        KK = [rho * A[j].dot(A[j].T) for j in range(n_patients)]
        yy = [rho * A[j].dot(x[j] + u[j]) for j in range(n_patients)]
        alpha = [
            _solve_cholesky_kernel(KK[j], yy[j][..., None], 2 * beta).ravel()
            for j in range(n_patients)
        ]
        # equivalent to alpha_dot_K
        # solve (sum(AtA) + 2*rho I)^-1 (sum(Aty) + rho(w1+w2-u1-u2))
        # with A = K * alpha
        A = [K[j].dot(alpha[j]) for j in range(n_patients)]
        KK = sum(A[j].dot(A[j].T) for j in range(n_patients))
        yy = sum(A[j].dot(x[j] + u[j]) for j in range(n_patients))
        yy += w_1 - u_1
        coef = _solve_cholesky_kernel(KK, yy[..., None], 1).ravel()

        w_1 = soft_thresholding(coef + u_1, lamda / rho)
        # w_2 = prox_laplacian(coef + u_2, beta / rho)

        # update residuals
        alpha_coef_K = [
            alpha[j].dot(K[j].T.dot(coef)) for j in range(n_patients)
        ]
        residuals = [x[j] - alpha_coef_K[j] for j in range(n_patients)]
        u = [u[j] + residuals[j] for j in range(n_patients)]
        u_1 += coef - w_1

        # diagnostics, reporting, termination checks
        rnorm = np.sqrt(
            squared_norm(coef - w_1) +
            sum(squared_norm(residuals[j]) for j in range(n_patients)))
        snorm = rho * np.sqrt(
            squared_norm(w_1 - w_1_old) +
            sum(squared_norm(x[j] - x_old[j]) for j in range(n_patients)))

        obj = objective_admm2(x, y, alpha, lamda, beta, w_1)
        check = convergence(
            obj=obj,
            rnorm=rnorm,
            snorm=snorm,
            e_pri=np.sqrt(coef.size + sum(x[j].size
                                          for j in range(n_patients))) * tol +
            rtol * max(
                np.sqrt(
                    squared_norm(coef) + sum(
                        squared_norm(alpha_coef_K[j])
                        for j in range(n_patients))),
                np.sqrt(
                    squared_norm(w_1) +
                    sum(squared_norm(x[j]) for j in range(n_patients)))),
            e_dual=np.sqrt(coef.size + sum(x[j].size
                                           for j in range(n_patients))) * tol +
            rtol * rho * (np.sqrt(
                squared_norm(u_1) +
                sum(squared_norm(u[j]) for j in range(n_patients)))))

        w_1_old = w_1.copy()
        x_old = [x[j].copy() for j in range(n_patients)]

        if verbose:
            print("obj: %.4f, rnorm: %.4f, snorm: %.4f,"
                  "eps_pri: %.4f, eps_dual: %.4f" % check)

        checks.append(check)
        if check.rnorm <= check.e_pri and check.snorm <= check.e_dual and iteration_ > 1:
            break

        rho_new = update_rho(rho,
                             rnorm,
                             snorm,
                             iteration=iteration_,
                             **(update_rho_options or {}))
        # scaled dual variables should be also rescaled
        u = [u[j] * (rho / rho_new) for j in range(n_patients)]
        u_1 *= rho / rho_new
        rho = rho_new
    else:
        warnings.warn("Objective did not converge.")

    return_list = [alpha, coef]
    if return_n_iter:
        return_list.append(iteration_)
    return return_list
示例#18
0
def enet_kernel_learning_admm(
        K, y, lamda=0.01, beta=0.01, rho=1., max_iter=100, verbose=0, rtol=1e-4,
        tol=1e-4, return_n_iter=True, update_rho_options=None):
    """Elastic Net kernel learning.

    Solve the following problem via ADMM:
        min sum_{i=1}^p 1/2 ||alpha_i * w * K_i - y_i||^2 + lamda ||w||_1 +
        + beta||w||_2^2
    """
    n_patients = len(K)
    n_kernels = len(K[0])
    coef = np.ones(n_kernels)
    u_1 = np.zeros(n_kernels)
    u_2 = np.zeros(n_kernels)
    w_1 = np.zeros(n_kernels)
    w_2 = np.zeros(n_kernels)

    w_1_old = w_1.copy()
    w_2_old = w_2.copy()

    checks = []
    for iteration_ in range(max_iter):
        # update alpha
        # solve (AtA + 2I)^-1 (Aty) with A = wK
        A = [K[j].T.dot(coef) for j in range(n_patients)]
        KK = [A[j].dot(A[j].T) for j in range(n_patients)]
        yy = [y[j].dot(A[j]) for j in range(n_patients)]

        alpha = [_solve_cholesky_kernel(
            KK[j], yy[j][..., None], 2).ravel() for j in range(n_patients)]
        # alpha = [_solve_cholesky_kernel(
        #     K_dot_coef[j], y[j][..., None], 0).ravel() for j in range(n_patients)]

        w_1 = soft_thresholding(coef + u_1, lamda / rho)
        w_2 = prox_laplacian(coef + u_2, beta / rho)

        # equivalent to alpha_dot_K
        # solve (sum(AtA) + 2*rho I)^-1 (sum(Aty) + rho(w1+w2-u1-u2))
        # with A = K * alpha
        A = [K[j].dot(alpha[j]) for j in range(n_patients)]
        KK = sum(A[j].dot(A[j].T) for j in range(n_patients))
        yy = sum(y[j].dot(A[j].T) for j in range(n_patients))
        yy += rho * (w_1 + w_2 - u_1 - u_2)

        coef = _solve_cholesky_kernel(KK, yy[..., None], 2 * rho).ravel()

        # update residuals
        u_1 += coef - w_1
        u_2 += coef - w_2

        # diagnostics, reporting, termination checks
        rnorm = np.sqrt(squared_norm(coef - w_1) + squared_norm(coef - w_2))
        snorm = rho * np.sqrt(
            squared_norm(w_1 - w_1_old) + squared_norm(w_2 - w_2_old))

        obj = objective_admm(K, y, alpha, lamda, beta, coef, w_1, w_2)

        check = convergence(
            obj=obj, rnorm=rnorm, snorm=snorm,
            e_pri=np.sqrt(2 * coef.size) * tol + rtol * max(
                np.sqrt(squared_norm(coef) + squared_norm(coef)),
                np.sqrt(squared_norm(w_1) + squared_norm(w_2))),
            e_dual=np.sqrt(2 * coef.size) * tol + rtol * rho * (
                np.sqrt(squared_norm(u_1) + squared_norm(u_2))))

        w_1_old = w_1.copy()
        w_2_old = w_2.copy()

        if verbose:
            print("obj: %.4f, rnorm: %.4f, snorm: %.4f,"
                  "eps_pri: %.4f, eps_dual: %.4f" % check)

        checks.append(check)
        if check.rnorm <= check.e_pri and check.snorm <= check.e_dual and iteration_ > 1:
            break

        rho_new = update_rho(rho, rnorm, snorm, iteration=iteration_,
                             **(update_rho_options or {}))
        # scaled dual variables should be also rescaled
        u_1 *= rho / rho_new
        u_2 *= rho / rho_new
        rho = rho_new
    else:
        warnings.warn("Objective did not converge.")

    return_list = [alpha, coef]
    if return_n_iter:
        return_list.append(iteration_)
    return return_list
示例#19
0
def enet_kernel_learning(K,
                         y,
                         lamda=0.01,
                         beta=0.01,
                         gamma='auto',
                         max_iter=100,
                         verbose=0,
                         tol=1e-4,
                         return_n_iter=True):
    """Elastic Net kernel learning.

    Solve the following problem via alternating minimisation:
        min sum_{i=1}^p 1/2 ||alpha_i * w * K_i - y_i||^2 + lamda ||w||_1 +
        + beta||w||_2^2
    """
    n_patients = len(K)
    n_kernels = len(K[0])
    coef = np.ones(n_kernels)

    alpha = [np.zeros(K[j].shape[2]) for j in range(n_patients)]
    # KKT = [K[j].T.dot(K[j]) for j in range(len(K))]
    # print(KKT[0].shape)
    if gamma == 'auto':
        lipschitz_constant = np.array([
            sum(
                np.linalg.norm(K_j[i].dot(K_j[i].T))
                for i in range(K_j.shape[0])) for K_j in K
        ])
        gamma = 1. / (lipschitz_constant)

    objective_new = 0
    for iteration_ in range(max_iter):
        w_old = coef.copy()
        alpha_old = [a.copy() for a in alpha]
        objective_old = objective_new

        # update w
        A = [K[j].dot(alpha[j]) for j in range(n_patients)]
        alpha_coef_K = [
            alpha[j].dot(K[j].T.dot(coef)) for j in range(n_patients)
        ]
        gradient = sum(
            (alpha_coef_K[j] - y[j]).dot(A[j].T) for j in range(n_patients))

        # gradient_2 = coef.dot(sum(
        #     np.dot(K[j].dot(alpha[j]), K[j].dot(alpha[j]).T)
        #     for j in range(len(K)))) - sum(
        #         y[j].dot(K[j].dot(alpha[j]).T) for j in range(len(K)))

        # gradient = coef.dot(sum(
        #     alpha[j].dot(KKT[j].dot(alpha[j])) for j in range(len(K)))) - sum(
        #         y[j].dot(K[j].dot(alpha[j]).T) for j in range(len(K)))

        # gradient += 2 * beta * coef
        coef = soft_thresholding(coef - gamma * gradient, lamda=lamda * gamma)

        # update alpha
        # for j in range(len(K)):
        #     alpha[j] = _solve_cholesky_kernel(
        #         K[j].T.dot(coef), y[j][..., None], lamda).ravel()
        A = [K[j].T.dot(coef) for j in range(n_patients)]
        alpha_coef_K = [
            alpha[j].dot(K[j].T.dot(coef)) for j in range(n_patients)
        ]
        gradient = [(alpha_coef_K[j] - y[j]).dot(A[j].T) + 2 * beta * alpha[j]
                    for j in range(n_patients)]
        alpha = [alpha[j] - gamma * gradient[j] for j in range(n_patients)]

        objective_new = objective(K, y, alpha, lamda, beta, coef)
        objective_difference = abs(objective_new - objective_old)
        snorm = np.sqrt(
            squared_norm(coef - w_old) +
            sum(squared_norm(a - a_old) for a, a_old in zip(alpha, alpha_old)))

        obj = objective(K, y, alpha, lamda, beta, coef)

        if verbose and iteration_ % 10 == 0:
            print("obj: %.4f, snorm: %.4f" % (obj, snorm))

        if snorm < tol and objective_difference < tol:
            break
        if np.isnan(snorm) or np.isnan(objective_difference):
            raise ValueError('assdgg')
    else:
        warnings.warn("Objective did not converge.")

    return_list = [alpha, coef]
    if return_n_iter:
        return_list.append(iteration_)
    return return_list
示例#20
0
def enet_kernel_learning(
        K, y, lamda=0.01, beta=0.01, gamma='auto', max_iter=100, verbose=0,
        tol=1e-4, return_n_iter=True):
    """Elastic Net kernel learning.

    Solve the following problem via alternating minimisation:
        min sum_{i=1}^p 1/2 ||alpha_i * w * K_i - y_i||^2 + lamda ||w||_1 +
        + beta||w||_2^2
    """
    n_patients = len(K)
    n_kernels = len(K[0])
    coef = np.ones(n_kernels)

    alpha = [np.zeros(K[j].shape[2]) for j in range(n_patients)]
    # KKT = [K[j].T.dot(K[j]) for j in range(len(K))]
    # print(KKT[0].shape)
    if gamma == 'auto':
        lipschitz_constant = np.array([
            sum(np.linalg.norm(K_j[i].dot(K_j[i].T))
                for i in range(K_j.shape[0]))
            for K_j in K])
        gamma = 1. / (lipschitz_constant)

    objective_new = 0
    for iteration_ in range(max_iter):
        w_old = coef.copy()
        alpha_old = [a.copy() for a in alpha]
        objective_old = objective_new

        # update w
        A = [K[j].dot(alpha[j]) for j in range(n_patients)]
        alpha_coef_K = [alpha[j].dot(K[j].T.dot(coef))
                        for j in range(n_patients)]
        gradient = sum((alpha_coef_K[j] - y[j]).dot(A[j].T)
                       for j in range(n_patients))

        # gradient_2 = coef.dot(sum(
        #     np.dot(K[j].dot(alpha[j]), K[j].dot(alpha[j]).T)
        #     for j in range(len(K)))) - sum(
        #         y[j].dot(K[j].dot(alpha[j]).T) for j in range(len(K)))

        # gradient = coef.dot(sum(
        #     alpha[j].dot(KKT[j].dot(alpha[j])) for j in range(len(K)))) - sum(
        #         y[j].dot(K[j].dot(alpha[j]).T) for j in range(len(K)))

        # gradient += 2 * beta * coef
        coef = soft_thresholding(coef - gamma * gradient, lamda=lamda * gamma)

        # update alpha
        # for j in range(len(K)):
        #     alpha[j] = _solve_cholesky_kernel(
        #         K[j].T.dot(coef), y[j][..., None], lamda).ravel()
        A = [K[j].T.dot(coef) for j in range(n_patients)]
        alpha_coef_K = [alpha[j].dot(K[j].T.dot(coef))
                        for j in range(n_patients)]
        gradient = [(alpha_coef_K[j] - y[j]).dot(A[j].T) + 2 * beta * alpha[j]
                    for j in range(n_patients)]
        alpha = [alpha[j] - gamma * gradient[j] for j in range(n_patients)]

        objective_new = objective(K, y, alpha, lamda, beta, coef)
        objective_difference = abs(objective_new - objective_old)
        snorm = np.sqrt(squared_norm(coef - w_old) + sum(
            squared_norm(a - a_old) for a, a_old in zip(alpha, alpha_old)))

        obj = objective(K, y, alpha, lamda, beta, coef)

        if verbose and iteration_ % 10 == 0:
            print("obj: %.4f, snorm: %.4f" % (obj, snorm))

        if snorm < tol and objective_difference < tol:
            break
        if np.isnan(snorm) or np.isnan(objective_difference):
            raise ValueError('assdgg')
    else:
        warnings.warn("Objective did not converge.")

    return_list = [alpha, coef]
    if return_n_iter:
        return_list.append(iteration_)
    return return_list
示例#21
0
def fit_each_variable(
    X,
    ix,
    alpha=1e-2,
    gamma=1,
    tol=1e-3,
    max_iter=100,
    verbose=0,
    update_gamma=0.5,
    return_history=True,
    compute_objective=True,
    return_n_iter=False,
    adjust_gamma=False,
    A=None,
    T=0,
    rho=1,
):
    n, d = X.shape
    theta = np.zeros(d - 1)
    selector = [i for i in range(d) if i != ix]

    def gradient(X, theta, r, selector, n, A, T, rho):
        XTX = X[:, selector].T.dot(X[:, r])
        EXK = X[:, selector].T.dot(np.exp(X[:, selector].dot(theta)))
        to_add = 0
        if A is not None:
            to_add = (rho * T) * (theta - A[r, selector]) / n
        return -(1 / n) * (XTX - EXK) + to_add

    thetas = [theta]
    checks = []
    for iter_ in range(max_iter):
        theta_old = thetas[-1]
        grad = gradient(X, theta, ix, selector, n, A, T, rho)
        while True:
            theta_new = theta - gamma * grad
            theta = soft_thresholding(theta_new, alpha * gamma)
            loss_new = loss_single_variable(X, theta, n, ix, selector)
            loss_old = loss_single_variable(X, theta_old, n, ix, selector)
            # Line search
            diff_theta2 = np.linalg.norm(theta_old - theta)**2
            grad_diff = grad.dot(theta_old - theta)
            diff = loss_old - grad_diff + (diff_theta2 / (2 * gamma))

            if loss_new > diff or np.isinf(loss_new) or np.isnan(loss_new):
                gamma = update_gamma * gamma
                theta = theta_old - gamma * grad
                theta = soft_thresholding(theta, alpha * gamma)
                loss_new = loss_single_variable(X, theta, n, ix, selector)
                diff = loss_old - grad_diff + (diff_theta2 / (2 * gamma))
            else:
                break
        thetas.append(theta)
        if iter_ > 0:
            check = convergence(
                iter=iter_,
                obj=objective_single_variable(X, theta, n, ix, selector,
                                              alpha),
                iter_norm=np.linalg.norm(thetas[-2] - thetas[-1]),
                iter_r_norm=(np.linalg.norm(thetas[-2] - thetas[-1]) /
                             np.linalg.norm(thetas[-2])),
            )
            checks.append(check)
            # if adjust_gamma: # TODO multiply or divide
            if verbose:
                print("Iter: %d, objective: %.4f, iter_norm %.4f,"
                      " iter_norm_normalized: %.4f" %
                      (check[0], check[1], check[2], check[3]))

            if np.abs(check[2]) < tol:
                break

    return_list = [thetas[-1]]
    if return_history:
        return_list.append(thetas)
        return_list.append(checks)
    if return_n_iter:
        return_list.append(iter_)

    return return_list