示例#1
0
 def testUpdateSvd(self): 
     """
     Let's see if the update to the SVD works. 
     """
     numRuns = 10        
     
     for i in range(numRuns): 
         m, n = numpy.random.randint(10, 100), numpy.random.randint(10, 100) 
         k = 3
         
         X = numpy.random.rand(m, n)
         
         U, s, V = RandomisedSVD.svd(X, k)
         
         E = numpy.random.randn(m, n) * 0.2 
         
         U2, s2, V2 = RandomisedSVD.svd(X + E, k)
         U3, s3, V3 = RandomisedSVD.updateSvd(X, U, s, V, E, k)
         
         XE = X + E
         error1 = numpy.linalg.norm(XE - (U*s).dot(V.T))
         error2 = numpy.linalg.norm(XE - (U2*s2).dot(V2.T))
         error3 = numpy.linalg.norm(XE - (U3*s3).dot(V3.T))
 
         self.assertTrue(error1 >= error3)
         #print(error1, error2, error3)
         
         #Test use of linear opertors 
         X = GeneralLinearOperator.asLinearOperator(X)
         E = GeneralLinearOperator.asLinearOperator(E) 
         
         U3, s3, V3 = RandomisedSVD.updateSvd(X, U, s, V, E, k)
         error4 = numpy.linalg.norm(XE - (U2*s2).dot(V2.T))
         self.assertEquals(error4, error2)
示例#2
0
 def updateSvd(A, U, s, V, E, k, p=10): 
     """
     Given a matrix A whose approximate SVD is U s V.T, compute the SVD 
     of the new matrix A + E, using previous info. A and E are sparse 
     matrices. The rank of the approximation is p, and k is an oversampling
     parameter. 
     """
     Parameter.checkInt(k, 1, float("inf"))
     Parameter.checkInt(p, 0, float("inf"))     
                
     if isinstance(A, GeneralLinearOperator): 
         L = A 
     else: 
         L = GeneralLinearOperator.asLinearOperator(A)                    
                
     if isinstance(E, GeneralLinearOperator): 
         M = E 
     else: 
         M = GeneralLinearOperator.asLinearOperator(E) 
         
     N = GeneralLinearOperator.asLinearOperatorSum(L, M)
     
     n = A.shape[1]
     omega = numpy.random.randn(n, p)
     
     Y = U*s + M.matmat(V)
     Y = numpy.c_[Y, N.matmat(omega)]
     
     Q, R = numpy.linalg.qr(Y)
     del omega 
         
     del Y 
     del R 
     gc.collect() 
     
     B = N.rmatmat(Q).T 
     U, s, V = numpy.linalg.svd(B, full_matrices=False)
     del B 
     V = V.T
     U = Q.dot(U)
 
     U = U[:, 0:k]
     s = s[0:k]
     V = V[:, 0:k]        
     
     return U, s, V 
     
     
示例#3
0
    def lowRankOp(U, s, V):
        """
        Not that U, s, V are typically numpy arrays so we get parallelism for free. We assume the 
        given matrix is U s V.T and operator on this. 
        """
        def matvec(w):
            return (U * s).dot(V.T.dot(w))

        def rmatvec(w):
            return (V * s).dot(U.T.dot(w))

        def matmat(W):
            return (U * s).dot(V.T.dot(W))

        def rmatmat(W):
            return (V * s).dot(U.T.dot(W))

        return GeneralLinearOperator((U.shape[0], V.shape[0]),
                                     matvec,
                                     rmatvec,
                                     matmat,
                                     rmatmat,
                                     dtype=U.dtype)
示例#4
0
    def sparseLowRankOp(X, U, s, V, parallel=False):
        if X.shape[0] != U.shape[0] or X.shape[1] != V.shape[0]:
            raise ValueError("X and U s V^T should have the same shape")

        if not parallel:

            def matvec(w):
                return X.dot(w) + (U * s).dot(V.T.dot(w))

            def rmatvec(w):
                return X.T.dot(w) + (V * s).dot(U.T.dot(w))

            def matmat(W):
                return X.dot(W) + (U * s).dot(V.T.dot(W))

            def rmatmat(W):
                return X.T.dot(W) + (V * s).dot(U.T.dot(W))
        else:

            def matvec(w):
                return X.pdot(w) + (U * s).dot(V.T.dot(w))

            def rmatvec(w):
                return X.T.pdot(w) + (V * s).dot(U.T.dot(w))

            def matmat(W):
                return X.pdot(W) + (U * s).dot(V.T.dot(W))

            def rmatmat(W):
                return X.T.pdot(W) + (V * s).dot(U.T.dot(W))

        return GeneralLinearOperator(X.shape,
                                     matvec,
                                     rmatvec,
                                     matmat,
                                     rmatmat,
                                     dtype=X.dtype)
示例#5
0
文件: core.py 项目: charanpald/sppy
def rsvd(A, k, p=10, q=2, omega=None):
    """
    Compute the randomised SVD using the algorithm on page 9 of Halko et al., 
    Finding Structure with randomness: stochastic algorithms for constructing 
    approximate matrix decompositions, 2009.         
    
    Finds the partial SVD of a sparse or dense matrix A, resolving the largest k 
    singular vectors/values, using exponent q and k+p projections. Returns the 
    left and right singular vectors, and the singular values. The resulting matrix 
    can be approximated using A ~ U s V.T. To improve the approximation quality 
    for a fixed k, increase p or q.
    
    :param A: A sparse or dense matrix or GeneralLinearOperator 
    
    :param k: The number of singular values and random projections
    
    :param p: The oversampling parameter 
    
    :param q: The exponent for the projections.
    
    :param omega: An initial matrix to perform random projections onto with at least k columns 
    
    :return U: The left singular vectors 
    
    :return s: The singular values 
    
    :return V: The right singular vectors
    """
    Parameter.checkInt(k, 1, float("inf"))
    Parameter.checkInt(p, 0, float("inf"))
    Parameter.checkInt(q, 0, float("inf"))

    if isinstance(A, GeneralLinearOperator):
        L = A
    else:
        L = GeneralLinearOperator.asLinearOperator(A)

    n = L.shape[1]
    if omega is None:
        omega = numpy.random.randn(n, k + p)
    else:
        omega = numpy.c_[omega, numpy.random.randn(n, p + k - omega.shape[1])]

    Y = L.matmat(omega)
    Q, R = numpy.linalg.qr(Y)
    del omega

    for i in range(q):
        Y = L.rmatmat(Q)
        Q, R = numpy.linalg.qr(Y)
        gc.collect()

        Y = L.matmat(Q)
        Q, R = numpy.linalg.qr(Y)
        gc.collect()

    del Y
    del R
    gc.collect()

    B = L.rmatmat(Q).T
    U, s, V = numpy.linalg.svd(B, full_matrices=False)
    del B
    V = V.T
    U = Q.dot(U)

    U = U[:, 0:k]
    s = s[0:k]
    V = V[:, 0:k]

    return U, s, V
示例#6
0
def rsvd(A, k, p=10, q=2, omega=None):
    """
    Compute the randomised SVD using the algorithm on page 9 of Halko et al., 
    Finding Structure with randomness: stochastic algorithms for constructing 
    approximate matrix decompositions, 2009.         
    
    Finds the partial SVD of a sparse or dense matrix A, resolving the largest k 
    singular vectors/values, using exponent q and k+p projections. Returns the 
    left and right singular vectors, and the singular values. The resulting matrix 
    can be approximated using A ~ U s V.T. To improve the approximation quality 
    for a fixed k, increase p or q.
    
    :param A: A sparse or dense matrix or GeneralLinearOperator 
    
    :param k: The number of singular values and random projections
    
    :param p: The oversampling parameter 
    
    :param q: The exponent for the projections.
    
    :param omega: An initial matrix to perform random projections onto with at least k columns 
    
    :return U: The left singular vectors 
    
    :return s: The singular values 
    
    :return V: The right singular vectors
    """
    Parameter.checkInt(k, 1, float("inf"))
    Parameter.checkInt(p, 0, float("inf"))
    Parameter.checkInt(q, 0, float("inf"))

    if isinstance(A, GeneralLinearOperator):
        L = A
    else:
        L = GeneralLinearOperator.asLinearOperator(A)

    n = L.shape[1]
    if omega is None:
        omega = numpy.random.randn(n, k + p)
    else:
        omega = numpy.c_[omega, numpy.random.randn(n, p + k - omega.shape[1])]

    Y = L.matmat(omega)
    Q, R = numpy.linalg.qr(Y)
    del omega

    for i in range(q):
        Y = L.rmatmat(Q)
        Q, R = numpy.linalg.qr(Y)
        gc.collect()

        Y = L.matmat(Q)
        Q, R = numpy.linalg.qr(Y)
        gc.collect()

    del Y
    del R
    gc.collect()

    B = L.rmatmat(Q).T
    U, s, V = numpy.linalg.svd(B, full_matrices=False)
    del B
    V = V.T
    U = Q.dot(U)

    U = U[:, 0:k]
    s = s[0:k]
    V = V[:, 0:k]

    return U, s, V
示例#7
0
            def next(self):
                X = self.XIterator.next()
                logging.debug("Learning on matrix with shape: " +
                              str(X.shape) + " and " + str(X.nnz) +
                              " non-zeros")

                if self.iterativeSoftImpute.weighted:
                    #Compute row and col probabilities
                    up, vp = SparseUtils.nonzeroRowColsProbs(X)
                    nzuInds = up == 0
                    nzvInds = vp == 0
                    u = numpy.sqrt(1 / (up + numpy.array(nzuInds, numpy.int)))
                    v = numpy.sqrt(1 / (vp + numpy.array(nzvInds, numpy.int)))
                    u[nzuInds] = 0
                    v[nzvInds] = 0

                if self.rhos != None:
                    self.iterativeSoftImpute.setRho(self.rhos.next())

                if not scipy.sparse.isspmatrix_csc(X):
                    raise ValueError("X must be a csc_matrix not " +
                                     str(type(X)))

                #Figure out what lambda should be
                #PROPACK has problems with convergence
                Y = scipy.sparse.csc_matrix(X, dtype=numpy.float)
                U, s, V = ExpSU.SparseUtils.svdArpack(Y, 1, kmax=20)
                del Y
                #U, s, V = SparseUtils.svdPropack(X, 1, kmax=20)
                maxS = s[0]
                logging.debug("Largest singular value : " + str(maxS))

                (n, m) = X.shape

                if self.j == 0:
                    self.oldU = numpy.zeros((n, 1))
                    self.oldS = numpy.zeros(1)
                    self.oldV = numpy.zeros((m, 1))
                else:
                    oldN = self.oldU.shape[0]
                    oldM = self.oldV.shape[0]

                    if self.iterativeSoftImpute.updateAlg == "initial":
                        if n > oldN:
                            self.oldU = Util.extendArray(
                                self.oldU, (n, self.oldU.shape[1]))
                        elif n < oldN:
                            self.oldU = self.oldU[0:n, :]

                        if m > oldM:
                            self.oldV = Util.extendArray(
                                self.oldV, (m, self.oldV.shape[1]))
                        elif m < oldN:
                            self.oldV = self.oldV[0:m, :]
                    elif self.iterativeSoftImpute.updateAlg == "zero":
                        self.oldU = numpy.zeros((n, 1))
                        self.oldS = numpy.zeros(1)
                        self.oldV = numpy.zeros((m, 1))
                    else:
                        raise ValueError("Unknown SVD update algorithm: " +
                                         self.updateAlg)

                rowInds, colInds = X.nonzero()

                gamma = self.iterativeSoftImpute.eps + 1
                i = 0

                self.iterativeSoftImpute.measures = numpy.zeros(
                    (self.iterativeSoftImpute.maxIterations, 4))

                while gamma > self.iterativeSoftImpute.eps:
                    if i == self.iterativeSoftImpute.maxIterations:
                        logging.debug("Maximum number of iterations reached")
                        break

                    ZOmega = SparseUtilsCython.partialReconstructPQ(
                        (rowInds, colInds), self.oldU * self.oldS, self.oldV)
                    Y = X - ZOmega
                    #Y = Y.tocsc()
                    #del ZOmega
                    Y = csarray(Y, storagetype="row")
                    gc.collect()

                    #os.system('taskset -p 0xffffffff %d' % os.getpid())

                    if self.iterativeSoftImpute.svdAlg == "propack":
                        L = LinOperatorUtils.sparseLowRankOp(Y,
                                                             self.oldU,
                                                             self.oldS,
                                                             self.oldV,
                                                             parallel=False)
                        newU, newS, newV = SparseUtils.svdPropack(
                            L,
                            k=self.iterativeSoftImpute.k,
                            kmax=self.iterativeSoftImpute.kmax)
                    elif self.iterativeSoftImpute.svdAlg == "arpack":
                        L = LinOperatorUtils.sparseLowRankOp(Y,
                                                             self.oldU,
                                                             self.oldS,
                                                             self.oldV,
                                                             parallel=False)
                        newU, newS, newV = SparseUtils.svdArpack(
                            L,
                            k=self.iterativeSoftImpute.k,
                            kmax=self.iterativeSoftImpute.kmax)
                    elif self.iterativeSoftImpute.svdAlg == "svdUpdate":
                        newU, newS, newV = SVDUpdate.addSparseProjected(
                            self.oldU, self.oldS, self.oldV, Y,
                            self.iterativeSoftImpute.k)
                    elif self.iterativeSoftImpute.svdAlg == "rsvd":
                        L = LinOperatorUtils.sparseLowRankOp(Y,
                                                             self.oldU,
                                                             self.oldS,
                                                             self.oldV,
                                                             parallel=True)
                        newU, newS, newV = RandomisedSVD.svd(
                            L,
                            self.iterativeSoftImpute.k,
                            p=self.iterativeSoftImpute.p,
                            q=self.iterativeSoftImpute.q)
                    elif self.iterativeSoftImpute.svdAlg == "rsvdUpdate":
                        L = LinOperatorUtils.sparseLowRankOp(Y,
                                                             self.oldU,
                                                             self.oldS,
                                                             self.oldV,
                                                             parallel=True)
                        if self.j == 0:
                            newU, newS, newV = RandomisedSVD.svd(
                                L,
                                self.iterativeSoftImpute.k,
                                p=self.iterativeSoftImpute.p,
                                q=self.iterativeSoftImpute.q)
                        else:
                            newU, newS, newV = RandomisedSVD.svd(
                                L,
                                self.iterativeSoftImpute.k,
                                p=self.iterativeSoftImpute.p,
                                q=self.iterativeSoftImpute.qu,
                                omega=self.oldV)
                    elif self.iterativeSoftImpute.svdAlg == "rsvdUpdate2":

                        if self.j == 0:
                            L = LinOperatorUtils.sparseLowRankOp(Y,
                                                                 self.oldU,
                                                                 self.oldS,
                                                                 self.oldV,
                                                                 parallel=True)
                            newU, newS, newV = RandomisedSVD.svd(
                                L,
                                self.iterativeSoftImpute.k,
                                p=self.iterativeSoftImpute.p,
                                q=self.iterativeSoftImpute.q)
                        else:
                            #Need linear operator which is U s V
                            L = LinOperatorUtils.lowRankOp(
                                self.oldU, self.oldS, self.oldV)
                            Y = GeneralLinearOperator.asLinearOperator(
                                Y, parallel=True)
                            newU, newS, newV = RandomisedSVD.updateSvd(
                                L,
                                self.oldU,
                                self.oldS,
                                self.oldV,
                                Y,
                                self.iterativeSoftImpute.k,
                                p=self.iterativeSoftImpute.p)
                    else:
                        raise ValueError("Unknown SVD algorithm: " +
                                         self.iterativeSoftImpute.svdAlg)

                    if self.iterativeSoftImpute.weighted and i == 0:
                        delta = numpy.diag((u * newU.T).dot(newU))
                        pi = numpy.diag((v * newV.T).dot(newV))
                        lmbda = (maxS / numpy.max(
                            delta * pi)) * self.iterativeSoftImpute.rho
                        lmbdav = lmbda * delta * pi
                    elif not self.iterativeSoftImpute.weighted:
                        lmbda = maxS * self.iterativeSoftImpute.rho
                        if i == 0:
                            logging.debug("lambda: " + str(lmbda))
                        lmbdav = lmbda

                    newS = newS - lmbdav
                    #Soft threshold
                    newS = numpy.clip(newS, 0, numpy.max(newS))

                    normOldZ = (self.oldS**2).sum()
                    normNewZmOldZ = (self.oldS**2).sum() + (
                        newS**2).sum() - 2 * numpy.trace(
                            (self.oldV.T.dot(newV * newS)).dot(
                                newU.T.dot(self.oldU * self.oldS)))

                    #We can get newZ == oldZ in which case we break
                    if normNewZmOldZ < self.tol:
                        gamma = 0
                    elif abs(normOldZ) < self.tol:
                        gamma = self.iterativeSoftImpute.eps + 1
                    else:
                        gamma = normNewZmOldZ / normOldZ

                    if self.iterativeSoftImpute.verbose:
                        theta1 = (
                            self.iterativeSoftImpute.k -
                            numpy.linalg.norm(self.oldU.T.dot(newU), 'fro')**
                            2) / self.iterativeSoftImpute.k
                        theta2 = (
                            self.iterativeSoftImpute.k -
                            numpy.linalg.norm(self.oldV.T.dot(newV), 'fro')**
                            2) / self.iterativeSoftImpute.k
                        thetaS = numpy.linalg.norm(
                            newS - self.oldS)**2 / numpy.linalg.norm(newS)**2
                        self.iterativeSoftImpute.measures[i, :] = numpy.array(
                            [gamma, theta1, theta2, thetaS])

                    self.oldU = newU.copy()
                    self.oldS = newS.copy()
                    self.oldV = newV.copy()

                    logging.debug("Iteration " + str(i) + " gamma=" +
                                  str(gamma))
                    i += 1

                if self.iterativeSoftImpute.postProcess:
                    #Add the mean vectors
                    previousS = newS
                    newU = numpy.c_[newU, numpy.array(X.mean(1)).ravel()]
                    newV = numpy.c_[newV, numpy.array(X.mean(0)).ravel()]
                    newS = self.iterativeSoftImpute.unshrink(X, newU, newV)

                    #Note that this increases the rank of U and V by 1
                    #print("Difference in s after postprocessing: " + str(numpy.linalg.norm(previousS - newS[0:-1])))
                    logging.debug("Difference in s after postprocessing: " +
                                  str(numpy.linalg.norm(previousS -
                                                        newS[0:-1])))

                logging.debug("Number of iterations for rho=" +
                              str(self.iterativeSoftImpute.rho) + ": " +
                              str(i))
                self.j += 1
                return (newU, newS, newV)
示例#8
0
            def next(self):
                X = self.XIterator.next()
                logging.debug("Learning on matrix with shape: " + str(X.shape) + " and " + str(X.nnz) + " non-zeros")    
                
                if self.iterativeSoftImpute.weighted: 
                    #Compute row and col probabilities 
                    up, vp = SparseUtils.nonzeroRowColsProbs(X)
                    nzuInds = up==0
                    nzvInds = vp==0
                    u = numpy.sqrt(1/(up + numpy.array(nzuInds, numpy.int))) 
                    v = numpy.sqrt(1/(vp + numpy.array(nzvInds, numpy.int)))
                    u[nzuInds] = 0 
                    v[nzvInds] = 0 
                
                if self.rhos != None: 
                    self.iterativeSoftImpute.setRho(self.rhos.next())

                if not scipy.sparse.isspmatrix_csc(X):
                    raise ValueError("X must be a csc_matrix not " + str(type(X)))
                    
                #Figure out what lambda should be 
                #PROPACK has problems with convergence 
                Y = scipy.sparse.csc_matrix(X, dtype=numpy.float)
                U, s, V = ExpSU.SparseUtils.svdArpack(Y, 1, kmax=20)
                del Y
                #U, s, V = SparseUtils.svdPropack(X, 1, kmax=20)
                maxS = s[0]
                logging.debug("Largest singular value : " + str(maxS))

                (n, m) = X.shape

                if self.j == 0:
                    self.oldU = numpy.zeros((n, 1))
                    self.oldS = numpy.zeros(1)
                    self.oldV = numpy.zeros((m, 1))
                else:
                    oldN = self.oldU.shape[0]
                    oldM = self.oldV.shape[0]

                    if self.iterativeSoftImpute.updateAlg == "initial":
                        if n > oldN:
                            self.oldU = Util.extendArray(self.oldU, (n, self.oldU.shape[1]))
                        elif n < oldN:
                            self.oldU = self.oldU[0:n, :]

                        if m > oldM:
                            self.oldV = Util.extendArray(self.oldV, (m, self.oldV.shape[1]))
                        elif m < oldN:
                            self.oldV = self.oldV[0:m, :]
                    elif self.iterativeSoftImpute.updateAlg == "zero":
                        self.oldU = numpy.zeros((n, 1))
                        self.oldS = numpy.zeros(1)
                        self.oldV = numpy.zeros((m, 1))
                    else:
                        raise ValueError("Unknown SVD update algorithm: " + self.updateAlg)

                rowInds, colInds = X.nonzero()

                gamma = self.iterativeSoftImpute.eps + 1
                i = 0

                self.iterativeSoftImpute.measures = numpy.zeros((self.iterativeSoftImpute.maxIterations, 4))

                while gamma > self.iterativeSoftImpute.eps:
                    if i == self.iterativeSoftImpute.maxIterations: 
                        logging.debug("Maximum number of iterations reached")
                        break 
                    
                    ZOmega = SparseUtilsCython.partialReconstructPQ((rowInds, colInds), self.oldU*self.oldS, self.oldV)
                    Y = X - ZOmega
                    #Y = Y.tocsc()
                    #del ZOmega
                    Y = csarray(Y, storagetype="row")
                    gc.collect()
                    
                    #os.system('taskset -p 0xffffffff %d' % os.getpid())

                    if self.iterativeSoftImpute.svdAlg=="propack":
                        L = LinOperatorUtils.sparseLowRankOp(Y, self.oldU, self.oldS, self.oldV, parallel=False)                        
                        newU, newS, newV = SparseUtils.svdPropack(L, k=self.iterativeSoftImpute.k, kmax=self.iterativeSoftImpute.kmax)
                    elif self.iterativeSoftImpute.svdAlg=="arpack":
                        L = LinOperatorUtils.sparseLowRankOp(Y, self.oldU, self.oldS, self.oldV, parallel=False)                        
                        newU, newS, newV = SparseUtils.svdArpack(L, k=self.iterativeSoftImpute.k, kmax=self.iterativeSoftImpute.kmax)
                    elif self.iterativeSoftImpute.svdAlg=="svdUpdate":
                        newU, newS, newV = SVDUpdate.addSparseProjected(self.oldU, self.oldS, self.oldV, Y, self.iterativeSoftImpute.k)
                    elif self.iterativeSoftImpute.svdAlg=="rsvd":
                        L = LinOperatorUtils.sparseLowRankOp(Y, self.oldU, self.oldS, self.oldV, parallel=True)
                        newU, newS, newV = RandomisedSVD.svd(L, self.iterativeSoftImpute.k, p=self.iterativeSoftImpute.p, q=self.iterativeSoftImpute.q)
                    elif self.iterativeSoftImpute.svdAlg=="rsvdUpdate": 
                        L = LinOperatorUtils.sparseLowRankOp(Y, self.oldU, self.oldS, self.oldV, parallel=True)
                        if self.j == 0: 
                            newU, newS, newV = RandomisedSVD.svd(L, self.iterativeSoftImpute.k, p=self.iterativeSoftImpute.p, q=self.iterativeSoftImpute.q)
                        else: 
                            newU, newS, newV = RandomisedSVD.svd(L, self.iterativeSoftImpute.k, p=self.iterativeSoftImpute.p, q=self.iterativeSoftImpute.qu, omega=self.oldV)
                    elif self.iterativeSoftImpute.svdAlg=="rsvdUpdate2":
                        
                        if self.j == 0: 
                            L = LinOperatorUtils.sparseLowRankOp(Y, self.oldU, self.oldS, self.oldV, parallel=True)
                            newU, newS, newV = RandomisedSVD.svd(L, self.iterativeSoftImpute.k, p=self.iterativeSoftImpute.p, q=self.iterativeSoftImpute.q)
                        else: 
                            #Need linear operator which is U s V 
                            L = LinOperatorUtils.lowRankOp(self.oldU, self.oldS, self.oldV)
                            Y = GeneralLinearOperator.asLinearOperator(Y, parallel=True)
                            newU, newS, newV = RandomisedSVD.updateSvd(L, self.oldU, self.oldS, self.oldV, Y, self.iterativeSoftImpute.k, p=self.iterativeSoftImpute.p)
                    else:
                        raise ValueError("Unknown SVD algorithm: " + self.iterativeSoftImpute.svdAlg)

                    if self.iterativeSoftImpute.weighted and i==0: 
                        delta = numpy.diag((u*newU.T).dot(newU))
                        pi = numpy.diag((v*newV.T).dot(newV))
                        lmbda = (maxS/numpy.max(delta*pi))*self.iterativeSoftImpute.rho
                        lmbdav = lmbda*delta*pi
                    elif not self.iterativeSoftImpute.weighted: 
                        lmbda = maxS*self.iterativeSoftImpute.rho
                        if i==0: 
                            logging.debug("lambda: " + str(lmbda))
                        lmbdav = lmbda
                        
                    newS = newS - lmbdav                    
                    #Soft threshold
                    newS = numpy.clip(newS, 0, numpy.max(newS))
                    

                    normOldZ = (self.oldS**2).sum()
                    normNewZmOldZ = (self.oldS**2).sum() + (newS**2).sum() - 2*numpy.trace((self.oldV.T.dot(newV*newS)).dot(newU.T.dot(self.oldU*self.oldS)))

                    #We can get newZ == oldZ in which case we break
                    if normNewZmOldZ < self.tol:
                        gamma = 0
                    elif abs(normOldZ) < self.tol:
                        gamma = self.iterativeSoftImpute.eps + 1
                    else:
                        gamma = normNewZmOldZ/normOldZ
                        
                    if self.iterativeSoftImpute.verbose: 
                        theta1 = (self.iterativeSoftImpute.k - numpy.linalg.norm(self.oldU.T.dot(newU), 'fro')**2)/self.iterativeSoftImpute.k
                        theta2 = (self.iterativeSoftImpute.k - numpy.linalg.norm(self.oldV.T.dot(newV), 'fro')**2)/self.iterativeSoftImpute.k
                        thetaS = numpy.linalg.norm(newS - self.oldS)**2/numpy.linalg.norm(newS)**2
                        self.iterativeSoftImpute.measures[i, :] = numpy.array([gamma, theta1, theta2, thetaS])

                    self.oldU = newU.copy()
                    self.oldS = newS.copy()
                    self.oldV = newV.copy()

                    logging.debug("Iteration " + str(i) + " gamma="+str(gamma))
                    i += 1

                if self.iterativeSoftImpute.postProcess: 
                    #Add the mean vectors 
                    previousS = newS
                    newU = numpy.c_[newU, numpy.array(X.mean(1)).ravel()]
                    newV = numpy.c_[newV, numpy.array(X.mean(0)).ravel()]
                    newS = self.iterativeSoftImpute.unshrink(X, newU, newV)  
                    
                    #Note that this increases the rank of U and V by 1 
                    #print("Difference in s after postprocessing: " + str(numpy.linalg.norm(previousS - newS[0:-1]))) 
                    logging.debug("Difference in s after postprocessing: " + str(numpy.linalg.norm(previousS - newS[0:-1]))) 

                logging.debug("Number of iterations for rho="+str(self.iterativeSoftImpute.rho) + ": " + str(i))
                self.j += 1
                return (newU, newS, newV)
示例#9
0
    def parallelSparseOp(X):
        """
        Return the parallel linear operator corresponding to left and right multiply of 
        csc_matrix X. Note that there is a significant overhead for creating and waiting 
        for locked processes. 
        """
        if not scipy.sparse.isspmatrix_csc(X):
            raise ValueError("Currently only supports csc_matrices")

        #This doubles memory here but saves memory when on many CPUs and results in faster calculations when we do matmat
        Xr = X.tocsr()
        numProcesses = multiprocessing.cpu_count()
        numJobs = numProcesses
        rowInds = numpy.array(numpy.linspace(0, X.shape[0], numJobs + 1),
                              numpy.int)
        colInds = numpy.array(numpy.linspace(0, X.shape[1], numJobs + 1),
                              numpy.int)

        XrData = multiprocessing.RawArray("d", numpy.array(Xr.data))
        XrIndices = multiprocessing.RawArray("i", Xr.indices)
        XrIndptr = multiprocessing.RawArray("i", Xr.indptr)

        def matvec(w):
            pool = multiprocessing.Pool(processes=numProcesses)
            paramList = []
            for i in range(numJobs):
                paramList.append((Xr[rowInds[i]:rowInds[i + 1], :], w))

            iterator = pool.imap(dot, paramList, chunksize=1)
            #iterator = itertools.imap(dot, paramList)
            p = numpy.zeros(X.shape[0])

            for i in range(numJobs):
                p[rowInds[i]:rowInds[i + 1]] = iterator.next()

            pool.terminate()
            return p

        def rmatvec(w):
            pool = multiprocessing.Pool(processes=numProcesses)
            paramList = []
            for i in range(numJobs):
                paramList.append((X[:, colInds[i]:colInds[i + 1]], w))

            iterator = pool.imap(dotT, paramList, chunksize=1)
            #iterator = itertools.imap(dotT, paramList)
            p = numpy.zeros(X.shape[1])

            for i in range(numJobs):
                p[colInds[i]:colInds[i + 1]] = iterator.next()

            pool.terminate()
            return p

        def matmat(W):
            WArray = multiprocessing.RawArray("d", W.flatten())
            pool = multiprocessing.Pool(processes=numProcesses,
                                        initializer=initProcess,
                                        initargs=(XrData, XrIndices, XrIndptr,
                                                  X.shape, WArray, W.shape))
            params = []

            for i in range(numJobs):
                params.append((rowInds, i))

            iterator = pool.map(dot2, params)
            P = numpy.zeros((X.shape[0], W.shape[1]))

            for i in range(numJobs):
                P[rowInds[i]:rowInds[i + 1], :] = iterator[i]

            return P

        def rmatmat(W):
            pool = multiprocessing.Pool(processes=numProcesses)
            paramList = []
            for i in range(numJobs):
                paramList.append((X[:, colInds[i]:colInds[i + 1]], W))

            iterator = pool.imap(dotT, paramList, chunksize=1)
            #iterator = itertools.imap(dotT, paramList)
            P = numpy.zeros((X.shape[1], W.shape[1]))

            for i in range(numJobs):
                P[colInds[i]:colInds[i + 1], :] = iterator.next()

            pool.terminate()
            return P

        return GeneralLinearOperator(X.shape,
                                     matvec,
                                     rmatvec,
                                     matmat,
                                     rmatmat,
                                     dtype=X.dtype)
示例#10
0
    def parallelSparseLowRankOp(X, U, s, V):
        numProcesses = multiprocessing.cpu_count()
        colInds = numpy.array(numpy.linspace(0, X.shape[1], numProcesses + 1),
                              numpy.int)

        def matvec(w):
            pool = multiprocessing.Pool(processes=numProcesses)
            paramList = []
            for i in range(numProcesses):
                paramList.append((X[:, colInds[i]:colInds[i + 1]], U, s,
                                  V[colInds[i]:colInds[i + 1], :],
                                  w[colInds[i]:colInds[i + 1]]))

            iterator = pool.imap(dotSVD, paramList, chunksize=1)

            #iterator = itertools.imap(dotSVD, paramList)
            p = numpy.zeros(X.shape[0])

            for i in range(numProcesses):
                p += iterator.next()

            pool.terminate()

            return p

        def rmatvec(w):
            pool = multiprocessing.Pool(processes=numProcesses)
            paramList = []
            for i in range(numProcesses):
                paramList.append((X[:, colInds[i]:colInds[i + 1]], U, s,
                                  V[colInds[i]:colInds[i + 1], :], w))

            iterator = pool.imap(dotSVDT, paramList, chunksize=1)

            #iterator = itertools.imap(dotSVDT, paramList)
            p = numpy.zeros(X.shape[1])

            for i in range(numProcesses):
                p[colInds[i]:colInds[i + 1]] = iterator.next()

            pool.terminate()

            return p

        def matmat(W):
            pool = multiprocessing.Pool(processes=numProcesses)
            paramList = []
            for i in range(numProcesses):
                paramList.append((X[:, colInds[i]:colInds[i + 1]], U, s,
                                  V[colInds[i]:colInds[i + 1], :],
                                  W[colInds[i]:colInds[i + 1], :]))

            iterator = pool.map(dotSVD, paramList, chunksize=1)

            #iterator = itertools.imap(dotSVD, paramList)
            P = numpy.zeros((X.shape[0], W.shape[1]))

            for i in range(numProcesses):
                P += iterator[i]

            pool.terminate()

            return P

        def rmatmat(W):
            pool = multiprocessing.Pool(processes=numProcesses)
            paramList = []
            for i in range(numProcesses):
                paramList.append((X[:, colInds[i]:colInds[i + 1]], U, s,
                                  V[colInds[i]:colInds[i + 1], :], W))

            iterator = pool.imap(dotSVDT, paramList, chunksize=1)

            #iterator = itertools.imap(dotSVD, paramList)
            P = numpy.zeros((X.shape[1], W.shape[1]))

            for i in range(numProcesses):
                P[colInds[i]:colInds[i + 1], :] = iterator.next()

            pool.terminate()

            return P

        return GeneralLinearOperator(X.shape,
                                     matvec,
                                     rmatvec,
                                     matmat,
                                     rmatmat,
                                     dtype=X.dtype)