Exemplo n.º 1
0
def eRPCA(m,
          n,
          u,
          v,
          vecM,
          vecEpsilon,
          maxRank,
          lam=None,
          mu=None,
          rho=None,
          epsilon1=None,
          epsilon2=None,
          truncateK=0,
          maxIteration=1000,
          verbose=True,
          hasWeave=True):
    """This is an optimized code based on:

    Paffenroth, R., Du Toit, P., Nong, R., Scharf, L., Jayasumana,
    A. P., & Bandara, V. (2013). Space-time signal processing for
    distributed pattern detection in sensor networks. Selected Topics
    in Signal Processing, IEEE Journal of, 7(1), 38-49

    and

    Paffenroth, R. C., Nong, R., & Du Toit, P. C. (2013,
    September). On covariance structure in noisy, big data. In SPIE
    Optical Engineering+ Applications
    (pp. 88570E-88570E). International Society for Optics and
    Photonics.  Chicago.

    Args:

       m, n: the full size of the input matrix M.

       u, v, vecM: the samples of M as indices and values of a sparse
                    matrix.  All are one dimensional arrays.

       vecEpsilon: the pointwise error bounds.

       maxRank: the maximum rank of M to consider for completion.
                 (note, Lin-Che-Ma have a way to predict this, which
                 we are not using here)

       lam: the value of the coupling constant between L and S

       mu: the intial value for the augmented Lagrangian
            parameter.  (optional, defaults to value from
            Lin-Chen-Ma)

       rho: the growth factor for the augmented Lagrangian
             parameter.  (optional, defaults to value from
             Lin-Chen-Ma)

       epsilon1: the first error criterion that controls for the
                  error in the constraint.  (The idea for this is from
                  Lin-Chen-Ma)

       epsilon2: the second error criterion that controls for the
                  convergence of the method.  (The idea for this is from
                  Lin-Chen-Ma)

       maxIterations: the maximum number of iterations to
                       use. (optional, defaults to 100)

       verbose - print out the convergence history. (optional,
                 defaults to True)

    Returns:

       U,E,VT: the SVD of the recovered low rank matrix.

       S: A sparse matrix.
    """
    assert len(u.shape) == 1, 'u must be one dimensional'
    assert len(v.shape) == 1, 'v must be one dimensional'
    assert len(vecM.shape) == 1, 'vecM must be one dimensional'
    assert 0 <= np.max(u) < m, 'An entry in u is invalid'
    assert 0 <= np.max(v) < n, 'An entry in v is invalid'

    # The minimum value of the observed entries of M
    minM = np.min(vecM)

    if epsilon1 is None:
        # The default values for epsilon1 is from bottom of page
        # 12 in Lin-Chen-Ma.
        # FIXME: Are these good for eRPCA?
        epsilon1 = 1e-5
    if epsilon2 is None:
        # The default values for epsilon2 is from bottom of page
        # 12 in Lin-Chen-Ma.
        # FIXME: Are these good for eRPCA?
        epsilon2 = 1e-4

    # We want to keep around a sparse matrix version of Mvec, but we need
    # to be careful about 0 values in Mvec, we don't want them to get
    # discarded when we convert to a sparse matrix!  In particular, we
    # are using the sparse matrix in a slightly odd way.  We intend
    # that Mvec stores both 0 and non-zero values, and that the entries
    # of Mvec which are not stored are *unknown* (and not necessarily 0).
    # Therefore, we process the input Mvec to make all 0 entries "small"
    # numbers relative to its smallest value.
    for i in range(len(vecM)):
        if vecM[i] == 0:
            vecM[i] = minM * epsilon1
        if vecEpsilon[i] == 0:
            vecEpsilon[i] = minM * epsilon1

    # Create the required sparse matrices.  Note, u,v,d might have
    # repeats, and that is ok since the sp.coo_matrix handles
    # that case, and we don't actually use d after here.
    M = sp.csc_matrix(sp.coo_matrix((vecM, (u, v)), shape=[m, n]))
    Epsilon = sp.csc_matrix(sp.coo_matrix((vecEpsilon, (u, v)), shape=[m, n]))

    # The SVD of the low rank part of the answer.
    U = np.matrix(np.zeros([m, maxRank]))
    E = np.zeros([maxRank])
    VT = np.matrix(np.zeros([maxRank, n]))

    # Compute the largest singular values of D (assuming the
    # unobserved entries are 0.  I am not convinced this is
    # principled, but I believe it it what they do in the paper.
    dummy, E0, dummy = sparseSVDUpdate(M, U[:, 0], np.array([E[0]]), VT[0, :])

    if mu is None:
        # The default values for mu_0 is from bottom of page
        # 12 in Lin-Chen-Ma.  I believe that the use the
        # spectral norm of D (the largest singular value), where
        # the unobserved entries are assumed to be 0.
        # FIXME:  I am not sure this is principled.  I mean, why is 0 special?
        # I am pretty sure that I can break this with a inproperly scaled D.
        # FIXME: Are these good for eRPCA?
        mu = 1. / E0[0]
    if rho is None:
        # The default values for mu_0 is from bottom of page
        # 12 in Lin-Chen-Ma.
        # The flatten here is important since the ord=1 norm
        # from np.linalg.norm for a matrix is max(sum(abs(x), axis=0)), which
        # is *not* what we want.
        # FIXME: Are these good for eRPCA?
        rho_s = len(vecM) / float(m * n)
        rho = 1.2172 + 1.8588 * rho_s
    if lam is None:
        # FIXME:  Double check this with Candes "RPCA?"
        lam = 1. / np.sqrt(np.max([m, n]))

    # The sparse Lagrange multiplers
    Y = M * 0.0

    # The sparse matrix S
    S = M * 0.0

    # The projection of L onto Omega.  This is not required
    # but is convenient to have.
    LOmega = M * 0.0

    # We keep the previous answer to check convergence
    LS0 = S + LOmega

    # We also want this to check convergence
    partialFrobeniusM = sparseFrobeniusNorm(M)

    iteration = 0

    while True:
        # Break if we use too many interations
        iteration += 1
        if iteration > maxIteration:
            break

        # This is the mathematical content of the algorithm
        ###################################################

        # # DEBUG ############################
        # print 'lagrangian before min L with S fixed',
        # vecLagrangianValue0 = vecLagrangian(E, S, M, LOmega,
        #                                     Epsilon.data, Y, mu, lam,
        #                                     truncateK=truncateK)
        # # DEBUG ############################

        # Minimize the Lagrangian with respect to L with S fixed
        [U, E, VT] = minNucPlusFrob(Y / mu + M - LOmega - S,
                                    U,
                                    E,
                                    VT,
                                    mu,
                                    truncateK=truncateK)

        # If the smallest signular value we compute is too large
        # then we might have maxRank too small (and run into error problems).
        # We check that here.
        if (E[0] > epsilon2) and (E[-1] / E[0] > epsilon2):
            print('Smallest singular value may be too big, consider')
            print('increasing maxRank.  This will make the solver slower,')
            print('but improve convergence')

        # Compute the project of L onto Omega
        LOmega = projSVD(U, E, VT, u, v)

        # FIXME  I need a good test here.  Because of my approximations
        #        I suspect that the Lagrangian can go up based upon
        #        the above minimization.
        # # DEBUG ############################
        # print 'lagrangian before min S with L fixed',
        # vecLagrangianValue1 = vecLagrangian(E, S, M, LOmega,
        #                                     Epsilon.data, Y, mu, lam,
        #                                     truncateK=truncateK)
        # assert vecLagrangianValue1 <= vecLagrangianValue0, \
        #    'Lagrangian went up!'
        # # DEBUG ############################

        # Minimize the Lagrangian with respect to S with L fixed
        S.data = minShrink1Plus2Norm(Y.data / mu + M.data - LOmega.data,
                                     Epsilon.data, lam, mu)

        # # DEBUG ############################
        # print 'lagrangian before Y update',
        # vecLagrangianValue2 = vecLagrangian(E, S, M, LOmega,
        #                                     Epsilon.data, Y, mu, lam,
        #                                     truncateK=truncateK)
        # assert vecLagrangianValue2 <= vecLagrangianValue1, \
        #    'Lagrangian went up!'
        # # DEBUG ############################

        # Update the Lagrange mutiplier
        Y.data = Y.data + mu * (M.data - S.data - LOmega.data)

        ###################################################
        # If the method is converging well then increase mu_0 to focus
        # more on the constraint.  if
        # mu_0*np.linalg.norm(POA_1-POA_0,ord=2)/partialFrobeniusD <
        # epsilon2: Again, I don't know how to compute the spectral
        # norm of a partially observed matrix, so I replace with the
        # Froebenius norm on the observed entries FIXME: Attempt to
        # justify later.
        if mu * sparseFrobeniusNorm(LS0 - LOmega -
                                    S) / partialFrobeniusM < epsilon2:
            mu = rho * mu

        # stopping criterion from page 12 of Lin, Chen, and Ma.
        # criterion1 = np.linalg.norm(D-A_1-E_1,
        #                             ord='fro')/np.linalg.norm(D, ord='fro')
        # criterion1 = np.linalg.norm(D-A_1-(POA_1 - A_1),
        #                             ord='fro')/np.linalg.norm(D, ord='fro')
        # criterion1 = np.linalg.norm(D-POA_1,
        #                             ord='fro')/np.linalg.norm(D, ord='fro')
        # FIXME: I may need to justify the change from the full
        #        Froebenius norm to the partial one.
        criterion1 = sparseFrobeniusNorm(M - LOmega - S) / partialFrobeniusM
        # criterion2 = np.min([mu_0, np.sqrt(mu_0)])*\
        #              np.linalg.norm(E_1-E_0,
        #                             ord='fro')/np.linalg.norm(D, ord='fro')
        # This is the one place where I depart from Lin-Chen-Ma.  The
        # stopping criterion there have right about equation uses A
        # and POA.  As I want the algorithm to be fast I ignore the A
        # part, since that would be O(mn)
        # FIXME:  Need to justify
        # FIXME: I may need to justify the change from the full
        #         Froebenius norm to the partial one.
        criterion2 = (np.min([mu, np.sqrt(mu)]) *
                      sparseFrobeniusNorm(LS0 - LOmega - S) /
                      partialFrobeniusM)

        if verbose:
            if iteration == 1:
                print()
                print('criterion1 is the constraint')
                print('criterion2 is the solution')
                print('iteration criterion1 epsilon1 ', end='')
                print('criterion2 epsilon2 rho      mu')
            if iteration % 10 == 0:
                print('%9d %10.2e %8.2e ' % (iteration, criterion1, epsilon1),
                      end='')
                print('%10.2e %8.2e %8.2e %8.2e' %
                      (criterion2, epsilon2, rho, mu))

        # If both error criterions are satisfied stop the algorithm
        if criterion1 < epsilon1 and criterion2 < epsilon2:
            if verbose:
                print('%9d %10.2e %8.2e ' % (iteration, criterion1, epsilon1),
                      end='')
                print('%10.2e %8.2e %8.2e %8.2e' %
                      (criterion2, epsilon2, rho, mu))
            break

        # Keep around the old answer for convergence testing.
        LS0 = LOmega + S

    S.data = shrink(Epsilon.data, S.data)

    return [U, E, VT, S]
Exemplo n.º 2
0
def MC(m,
       n,
       u,
       v,
       d,
       maxRank,
       mu_0=None,
       rho=None,
       epsilon1=None,
       epsilon2=None,
       maxIteration=100,
       verbose=True,
       hasWeave=True):
    """ This is an optimized code from:
    "The Augmented Lagrange Multipler Method for Exact Recovery
     of Corrupted Low-Rank Matrices"
    by Zhouchen Lin, Minming Chen, and Yi Ma
    http://arxiv.org/abs/1009.5055

    Args:

        m, n: the full size of D.

        u, v, d: the samples of D as indices and values of a sparse matrix.
            All are one dimensional arrays.

        maxRank: the maximum rank of D to consider for completion.
          (note, Lin-Che-Ma have a way to predict this,
           which we are not using here)

        mu_0: the intial value for the augmented Lagrangian parameter.
          (optional, defaults to value from
               Lin-Chen-Ma)

        rho: the growth factor for the augmented Lagrangian parameter.
          (optional, defaults to value from Lin-Chen-Ma)

        epsilon1: the first error criterion that controls for the error in
          the constraint.  (optional, defaults to value from Lin-Chen-Ma)

        epsilon2: the second error criterion that controls for the convergence
          of the method. (optional, defaults to value from Lin-Chen-Ma)

        maxIterations: the maximum number of iterations to use.
          (optional, defaults to 100)

        verbose: print out the convergence history.
          (optional, defaults to True)

    Returns:

        A: the recovered matrix.

        E: the differences between the input matrix and the recovered matrix,
          so A+E=D.
          (Note, generally E is not important, but Lin-Chen-Ma return
           it so we do the same here.)
    """
    assert len(u.shape) == 1, 'u must be one dimensional'
    assert len(v.shape) == 1, 'v must be one dimensional'
    assert len(d.shape) == 1, 'd must be one dimensional'
    assert 0 <= np.max(u) < m, 'An entry in u is invalid'
    assert 0 <= np.max(v) < n, 'An entry in v is invalid'

    if epsilon1 is None:
        # The default values for epsilon1 is from bottom of page
        # 12 in Lin-Cheyn-Ma.
        epsilon1 = 1e-7
    if epsilon2 is None:
        # The default values for epsilon2 is from bottom of page
        # 12 in Lin-Chen-Ma.
        epsilon2 = 1e-6

    # The minimum value of the observed entries of D
    minD = np.min(d)

    # We want to keep around a sparse matrix version of D, but we need to be
    # careful about 0 values in d, we don't want them to get discarded when we
    # convert to a sparse matrix! In particular, we are using the sparse matrix
    # in a slightly odd way.  We intend that D stores both 0 and non-zero
    # values, and that the entries of D which are not stored are *unknown* (and
    # not necessarily 0).  Therefore, we process the input d to make all 0
    # entries "small" numbers relative to its smallest value.
    for i in range(len(d)):
        if d[i] == 0:
            d[i] = minD * epsilon1

    # Create the required sparse matrices.  Note, u,v,d might have
    # repeats, and that is ok since the sp.coo_matrix handles
    # that case, and we don't actually use d after here.
    D = sp.csc_matrix(sp.coo_matrix((d, (u, v)), shape=[m, n]))

    # The Frobenius norm of the observed entries of D.  This is
    # just the 2-norm of the *vector* of entries.
    partialFrobeniusD = sparseFrobeniusNorm(D)

    # The SVD of the answer A
    U = np.matrix(np.zeros([m, maxRank]))
    S = np.zeros([maxRank])
    VT = np.matrix(np.zeros([maxRank, n]))

    # Compute the largest singular values of D (assuming the unobserved entries
    # are 0. I am not convinced this is principled, but I believe it it what
    # they do in the paper.
    dummy, S0, dummy = sparseSVDUpdate(D, U[:, 0], np.array([S[0]]), VT[0, :])

    if mu_0 is None:
        # The default values for mu_0 is from bottom of page
        # 12 in Lin-Chen-Ma.  I believe that the use the
        # spectral norm of D (the largest singular value), where
        # the unobserved entries are assumed to be 0.
        # FIXME:  I am not sure this is principled.  I mean, why is 0 special?
        # I am pretty sure that I can break this with a inproperly scaled D.
        mu_0 = 1. / S0[0]
    if rho is None:
        # The default values for mu_0 is from bottom of page
        # 12 in Lin-Chen-Ma.
        # The flatten here is important since the ord=1 norm
        # from np.linalg.norm for a matrix is max(sum(abs(x), axis=0)), which
        # is *not* what we want.
        rho_s = len(d) / (m * n)
        rho = 1.2172 + 1.8588 * rho_s

    # The sparse Lagrange multiplers
    Y_0 = D * 0.0

    # The projection of A onto Omega.  This is not required
    # but is convenient to have.
    POA_0 = D * 0.0
    POA_1 = D * 0.0

    iteration = 0
    while True:
        # Break if we use too many interations
        iteration += 1
        if iteration > maxIteration:
            break

        # This is the mathematical content of the algorithm
        ###################################################
        # The full_matrices being true is required for non-square matrices
        # We know that E_0 = POA_0 - A_0 = POA_0 - U_0*S_0*VT_0
        # So,
        # [U,S,VT] = np.linalg.svd(D-E_0+Y_0/mu_0, full_matrices=False)
        # can be rewritten as
        # [U,S,VT] = np.linalg.svd(D-(POA_0 - U_0*S_0*VT_0)+Y_0/mu_0,
        #                          full_matrices=False)
        # Combining sparse terms we get
        # [U,S,VT] = np.linalg.svd( (D-POA_0+Y_0/mu_0) + U_0*S_0*VT_0,
        #                          full_matrices=False)

        [U, S, VT] = minNucPlusFrob(D - POA_0 + Y_0 / mu_0, U, S, VT, mu_0)

        # and we compute the projection of A onto Omega
        # Note, making the temp array and then creating the sparse
        # matrix all at once is *much* faster.
        POA_1 = projSVD(U, S, VT, u, v)

        # POATmp = np.zeros([len(d)])
        # # FIXME:  Needs to be numba
        # for i in range(len(d)):
        #     POATmp[i] = U[u[i], :] * np.diag(S) * VT[:, v[i]]
        # POA_1 = sp.csc_matrix(sp.coo_matrix((POATmp, (u, v)), shape=[m, n]))

        # Update the Lagrange mutiplier
        # We have that
        # E_1 = POA_1 - A_1 = POA_1 - U_1*S_1*VT_1
        # So we can plug into
        # Y_1 = Y_0 + mu_0*(D-A_1-E_1)
        # to get
        # Y_1 = Y_0 + mu_0*(D-A_1-(POA_1 - A_1))
        # so
        # Y_1 = Y_0 + mu_0*(D-POA_1)

        Y_1 = Y_0 + mu_0 * (D - POA_1)
        ###################################################

        # If the method is converging well then increase mu_0 to focus
        # more on the constraint.  if
        # mu_0*np.linalg.norm(POA_1-POA_0,ord=2)/partialFrobeniusD <
        # epsilon2: Again, I don't know how to compute the spectral
        # norm of a partially observed matrix, so I replace with the
        # Froebenius norm on the observed entries FIXME: Attempt to
        # justify later.
        if (mu_0 * sparseFrobeniusNorm(POA_1 - POA_0) / partialFrobeniusD <
                epsilon2):
            mu_0 = rho * mu_0

        # stopping criterion from page 12 of Lin, Chen, and Ma.
        # criterion1 = np.linalg.norm(D-A_1-E_1, ord='fro')
        #   /np.linalg.norm(D, ord='fro')
        # criterion1 = np.linalg.norm(D-A_1-(POA_1 - A_1), ord='fro')
        #   /np.linalg.norm(D, ord='fro')
        # criterion1 = np.linalg.norm(D-POA_1), ord='fro')
        #   /np.linalg.norm(D, ord='fro')
        # FIXME:  I may need to justify the change from the full Froebenius
        #         norm to the partial one.
        criterion1 = sparseFrobeniusNorm(D - POA_1) / partialFrobeniusD
        # criterion2 = np.min([mu_0,np.sqrt(mu_0)])
        #   *np.linalg.norm(E_1-E_0, ord='fro')/np.linalg.norm(D, ord='fro')
        # This is the one place where I depart from Lin-Chen-Ma.  The stopping
        # criterion there have right about equation uses A and POA.  As I want
        # the algorithm to be fast I ignore the A part, since that would be
        # O(mn)
        # FIXME:  Need to justify
        # FIXME:  I may need to justify the change from the full Froebenius
        #         norm to the partial one.
        criterion2 = np.min([mu_0, np.sqrt(mu_0)]) * \
            sparseFrobeniusNorm(POA_1 - POA_0) / partialFrobeniusD

        if verbose:
            if iteration == 1:
                print("printing")
                print(("iteration criterion1 epsilon1 " +
                       "criterion2 epsilon2 rho      mu"))
            if iteration % 10 == 0:
                print(('%9d %10.2e %8.2e %10.2e %8.2e %8.2e %8.2e' %
                       (iteration, criterion1, epsilon1, criterion2, epsilon2,
                        rho, mu_0)))

        # If both error criterions are satisfied stop the algorithm
        if criterion1 < epsilon1 and criterion2 < epsilon2:
            break

        Y_0 = Y_1.copy()
        POA_0 = POA_1.copy()

    return [U, S, VT]