def testUpdateSvd(self): """ Let's see if the update to the SVD works. """ numRuns = 10 for i in range(numRuns): m, n = numpy.random.randint(10, 100), numpy.random.randint(10, 100) k = 3 X = numpy.random.rand(m, n) U, s, V = RandomisedSVD.svd(X, k) E = numpy.random.randn(m, n) * 0.2 U2, s2, V2 = RandomisedSVD.svd(X + E, k) U3, s3, V3 = RandomisedSVD.updateSvd(X, U, s, V, E, k) XE = X + E error1 = numpy.linalg.norm(XE - (U*s).dot(V.T)) error2 = numpy.linalg.norm(XE - (U2*s2).dot(V2.T)) error3 = numpy.linalg.norm(XE - (U3*s3).dot(V3.T)) self.assertTrue(error1 >= error3) #print(error1, error2, error3) #Test use of linear opertors X = GeneralLinearOperator.asLinearOperator(X) E = GeneralLinearOperator.asLinearOperator(E) U3, s3, V3 = RandomisedSVD.updateSvd(X, U, s, V, E, k) error4 = numpy.linalg.norm(XE - (U2*s2).dot(V2.T)) self.assertEquals(error4, error2)
def updateSvd(A, U, s, V, E, k, p=10): """ Given a matrix A whose approximate SVD is U s V.T, compute the SVD of the new matrix A + E, using previous info. A and E are sparse matrices. The rank of the approximation is p, and k is an oversampling parameter. """ Parameter.checkInt(k, 1, float("inf")) Parameter.checkInt(p, 0, float("inf")) if isinstance(A, GeneralLinearOperator): L = A else: L = GeneralLinearOperator.asLinearOperator(A) if isinstance(E, GeneralLinearOperator): M = E else: M = GeneralLinearOperator.asLinearOperator(E) N = GeneralLinearOperator.asLinearOperatorSum(L, M) n = A.shape[1] omega = numpy.random.randn(n, p) Y = U*s + M.matmat(V) Y = numpy.c_[Y, N.matmat(omega)] Q, R = numpy.linalg.qr(Y) del omega del Y del R gc.collect() B = N.rmatmat(Q).T U, s, V = numpy.linalg.svd(B, full_matrices=False) del B V = V.T U = Q.dot(U) U = U[:, 0:k] s = s[0:k] V = V[:, 0:k] return U, s, V
def lowRankOp(U, s, V): """ Not that U, s, V are typically numpy arrays so we get parallelism for free. We assume the given matrix is U s V.T and operator on this. """ def matvec(w): return (U * s).dot(V.T.dot(w)) def rmatvec(w): return (V * s).dot(U.T.dot(w)) def matmat(W): return (U * s).dot(V.T.dot(W)) def rmatmat(W): return (V * s).dot(U.T.dot(W)) return GeneralLinearOperator((U.shape[0], V.shape[0]), matvec, rmatvec, matmat, rmatmat, dtype=U.dtype)
def sparseLowRankOp(X, U, s, V, parallel=False): if X.shape[0] != U.shape[0] or X.shape[1] != V.shape[0]: raise ValueError("X and U s V^T should have the same shape") if not parallel: def matvec(w): return X.dot(w) + (U * s).dot(V.T.dot(w)) def rmatvec(w): return X.T.dot(w) + (V * s).dot(U.T.dot(w)) def matmat(W): return X.dot(W) + (U * s).dot(V.T.dot(W)) def rmatmat(W): return X.T.dot(W) + (V * s).dot(U.T.dot(W)) else: def matvec(w): return X.pdot(w) + (U * s).dot(V.T.dot(w)) def rmatvec(w): return X.T.pdot(w) + (V * s).dot(U.T.dot(w)) def matmat(W): return X.pdot(W) + (U * s).dot(V.T.dot(W)) def rmatmat(W): return X.T.pdot(W) + (V * s).dot(U.T.dot(W)) return GeneralLinearOperator(X.shape, matvec, rmatvec, matmat, rmatmat, dtype=X.dtype)
def rsvd(A, k, p=10, q=2, omega=None): """ Compute the randomised SVD using the algorithm on page 9 of Halko et al., Finding Structure with randomness: stochastic algorithms for constructing approximate matrix decompositions, 2009. Finds the partial SVD of a sparse or dense matrix A, resolving the largest k singular vectors/values, using exponent q and k+p projections. Returns the left and right singular vectors, and the singular values. The resulting matrix can be approximated using A ~ U s V.T. To improve the approximation quality for a fixed k, increase p or q. :param A: A sparse or dense matrix or GeneralLinearOperator :param k: The number of singular values and random projections :param p: The oversampling parameter :param q: The exponent for the projections. :param omega: An initial matrix to perform random projections onto with at least k columns :return U: The left singular vectors :return s: The singular values :return V: The right singular vectors """ Parameter.checkInt(k, 1, float("inf")) Parameter.checkInt(p, 0, float("inf")) Parameter.checkInt(q, 0, float("inf")) if isinstance(A, GeneralLinearOperator): L = A else: L = GeneralLinearOperator.asLinearOperator(A) n = L.shape[1] if omega is None: omega = numpy.random.randn(n, k + p) else: omega = numpy.c_[omega, numpy.random.randn(n, p + k - omega.shape[1])] Y = L.matmat(omega) Q, R = numpy.linalg.qr(Y) del omega for i in range(q): Y = L.rmatmat(Q) Q, R = numpy.linalg.qr(Y) gc.collect() Y = L.matmat(Q) Q, R = numpy.linalg.qr(Y) gc.collect() del Y del R gc.collect() B = L.rmatmat(Q).T U, s, V = numpy.linalg.svd(B, full_matrices=False) del B V = V.T U = Q.dot(U) U = U[:, 0:k] s = s[0:k] V = V[:, 0:k] return U, s, V
def next(self): X = self.XIterator.next() logging.debug("Learning on matrix with shape: " + str(X.shape) + " and " + str(X.nnz) + " non-zeros") if self.iterativeSoftImpute.weighted: #Compute row and col probabilities up, vp = SparseUtils.nonzeroRowColsProbs(X) nzuInds = up == 0 nzvInds = vp == 0 u = numpy.sqrt(1 / (up + numpy.array(nzuInds, numpy.int))) v = numpy.sqrt(1 / (vp + numpy.array(nzvInds, numpy.int))) u[nzuInds] = 0 v[nzvInds] = 0 if self.rhos != None: self.iterativeSoftImpute.setRho(self.rhos.next()) if not scipy.sparse.isspmatrix_csc(X): raise ValueError("X must be a csc_matrix not " + str(type(X))) #Figure out what lambda should be #PROPACK has problems with convergence Y = scipy.sparse.csc_matrix(X, dtype=numpy.float) U, s, V = ExpSU.SparseUtils.svdArpack(Y, 1, kmax=20) del Y #U, s, V = SparseUtils.svdPropack(X, 1, kmax=20) maxS = s[0] logging.debug("Largest singular value : " + str(maxS)) (n, m) = X.shape if self.j == 0: self.oldU = numpy.zeros((n, 1)) self.oldS = numpy.zeros(1) self.oldV = numpy.zeros((m, 1)) else: oldN = self.oldU.shape[0] oldM = self.oldV.shape[0] if self.iterativeSoftImpute.updateAlg == "initial": if n > oldN: self.oldU = Util.extendArray( self.oldU, (n, self.oldU.shape[1])) elif n < oldN: self.oldU = self.oldU[0:n, :] if m > oldM: self.oldV = Util.extendArray( self.oldV, (m, self.oldV.shape[1])) elif m < oldN: self.oldV = self.oldV[0:m, :] elif self.iterativeSoftImpute.updateAlg == "zero": self.oldU = numpy.zeros((n, 1)) self.oldS = numpy.zeros(1) self.oldV = numpy.zeros((m, 1)) else: raise ValueError("Unknown SVD update algorithm: " + self.updateAlg) rowInds, colInds = X.nonzero() gamma = self.iterativeSoftImpute.eps + 1 i = 0 self.iterativeSoftImpute.measures = numpy.zeros( (self.iterativeSoftImpute.maxIterations, 4)) while gamma > self.iterativeSoftImpute.eps: if i == self.iterativeSoftImpute.maxIterations: logging.debug("Maximum number of iterations reached") break ZOmega = SparseUtilsCython.partialReconstructPQ( (rowInds, colInds), self.oldU * self.oldS, self.oldV) Y = X - ZOmega #Y = Y.tocsc() #del ZOmega Y = csarray(Y, storagetype="row") gc.collect() #os.system('taskset -p 0xffffffff %d' % os.getpid()) if self.iterativeSoftImpute.svdAlg == "propack": L = LinOperatorUtils.sparseLowRankOp(Y, self.oldU, self.oldS, self.oldV, parallel=False) newU, newS, newV = SparseUtils.svdPropack( L, k=self.iterativeSoftImpute.k, kmax=self.iterativeSoftImpute.kmax) elif self.iterativeSoftImpute.svdAlg == "arpack": L = LinOperatorUtils.sparseLowRankOp(Y, self.oldU, self.oldS, self.oldV, parallel=False) newU, newS, newV = SparseUtils.svdArpack( L, k=self.iterativeSoftImpute.k, kmax=self.iterativeSoftImpute.kmax) elif self.iterativeSoftImpute.svdAlg == "svdUpdate": newU, newS, newV = SVDUpdate.addSparseProjected( self.oldU, self.oldS, self.oldV, Y, self.iterativeSoftImpute.k) elif self.iterativeSoftImpute.svdAlg == "rsvd": L = LinOperatorUtils.sparseLowRankOp(Y, self.oldU, self.oldS, self.oldV, parallel=True) newU, newS, newV = RandomisedSVD.svd( L, self.iterativeSoftImpute.k, p=self.iterativeSoftImpute.p, q=self.iterativeSoftImpute.q) elif self.iterativeSoftImpute.svdAlg == "rsvdUpdate": L = LinOperatorUtils.sparseLowRankOp(Y, self.oldU, self.oldS, self.oldV, parallel=True) if self.j == 0: newU, newS, newV = RandomisedSVD.svd( L, self.iterativeSoftImpute.k, p=self.iterativeSoftImpute.p, q=self.iterativeSoftImpute.q) else: newU, newS, newV = RandomisedSVD.svd( L, self.iterativeSoftImpute.k, p=self.iterativeSoftImpute.p, q=self.iterativeSoftImpute.qu, omega=self.oldV) elif self.iterativeSoftImpute.svdAlg == "rsvdUpdate2": if self.j == 0: L = LinOperatorUtils.sparseLowRankOp(Y, self.oldU, self.oldS, self.oldV, parallel=True) newU, newS, newV = RandomisedSVD.svd( L, self.iterativeSoftImpute.k, p=self.iterativeSoftImpute.p, q=self.iterativeSoftImpute.q) else: #Need linear operator which is U s V L = LinOperatorUtils.lowRankOp( self.oldU, self.oldS, self.oldV) Y = GeneralLinearOperator.asLinearOperator( Y, parallel=True) newU, newS, newV = RandomisedSVD.updateSvd( L, self.oldU, self.oldS, self.oldV, Y, self.iterativeSoftImpute.k, p=self.iterativeSoftImpute.p) else: raise ValueError("Unknown SVD algorithm: " + self.iterativeSoftImpute.svdAlg) if self.iterativeSoftImpute.weighted and i == 0: delta = numpy.diag((u * newU.T).dot(newU)) pi = numpy.diag((v * newV.T).dot(newV)) lmbda = (maxS / numpy.max( delta * pi)) * self.iterativeSoftImpute.rho lmbdav = lmbda * delta * pi elif not self.iterativeSoftImpute.weighted: lmbda = maxS * self.iterativeSoftImpute.rho if i == 0: logging.debug("lambda: " + str(lmbda)) lmbdav = lmbda newS = newS - lmbdav #Soft threshold newS = numpy.clip(newS, 0, numpy.max(newS)) normOldZ = (self.oldS**2).sum() normNewZmOldZ = (self.oldS**2).sum() + ( newS**2).sum() - 2 * numpy.trace( (self.oldV.T.dot(newV * newS)).dot( newU.T.dot(self.oldU * self.oldS))) #We can get newZ == oldZ in which case we break if normNewZmOldZ < self.tol: gamma = 0 elif abs(normOldZ) < self.tol: gamma = self.iterativeSoftImpute.eps + 1 else: gamma = normNewZmOldZ / normOldZ if self.iterativeSoftImpute.verbose: theta1 = ( self.iterativeSoftImpute.k - numpy.linalg.norm(self.oldU.T.dot(newU), 'fro')** 2) / self.iterativeSoftImpute.k theta2 = ( self.iterativeSoftImpute.k - numpy.linalg.norm(self.oldV.T.dot(newV), 'fro')** 2) / self.iterativeSoftImpute.k thetaS = numpy.linalg.norm( newS - self.oldS)**2 / numpy.linalg.norm(newS)**2 self.iterativeSoftImpute.measures[i, :] = numpy.array( [gamma, theta1, theta2, thetaS]) self.oldU = newU.copy() self.oldS = newS.copy() self.oldV = newV.copy() logging.debug("Iteration " + str(i) + " gamma=" + str(gamma)) i += 1 if self.iterativeSoftImpute.postProcess: #Add the mean vectors previousS = newS newU = numpy.c_[newU, numpy.array(X.mean(1)).ravel()] newV = numpy.c_[newV, numpy.array(X.mean(0)).ravel()] newS = self.iterativeSoftImpute.unshrink(X, newU, newV) #Note that this increases the rank of U and V by 1 #print("Difference in s after postprocessing: " + str(numpy.linalg.norm(previousS - newS[0:-1]))) logging.debug("Difference in s after postprocessing: " + str(numpy.linalg.norm(previousS - newS[0:-1]))) logging.debug("Number of iterations for rho=" + str(self.iterativeSoftImpute.rho) + ": " + str(i)) self.j += 1 return (newU, newS, newV)
def next(self): X = self.XIterator.next() logging.debug("Learning on matrix with shape: " + str(X.shape) + " and " + str(X.nnz) + " non-zeros") if self.iterativeSoftImpute.weighted: #Compute row and col probabilities up, vp = SparseUtils.nonzeroRowColsProbs(X) nzuInds = up==0 nzvInds = vp==0 u = numpy.sqrt(1/(up + numpy.array(nzuInds, numpy.int))) v = numpy.sqrt(1/(vp + numpy.array(nzvInds, numpy.int))) u[nzuInds] = 0 v[nzvInds] = 0 if self.rhos != None: self.iterativeSoftImpute.setRho(self.rhos.next()) if not scipy.sparse.isspmatrix_csc(X): raise ValueError("X must be a csc_matrix not " + str(type(X))) #Figure out what lambda should be #PROPACK has problems with convergence Y = scipy.sparse.csc_matrix(X, dtype=numpy.float) U, s, V = ExpSU.SparseUtils.svdArpack(Y, 1, kmax=20) del Y #U, s, V = SparseUtils.svdPropack(X, 1, kmax=20) maxS = s[0] logging.debug("Largest singular value : " + str(maxS)) (n, m) = X.shape if self.j == 0: self.oldU = numpy.zeros((n, 1)) self.oldS = numpy.zeros(1) self.oldV = numpy.zeros((m, 1)) else: oldN = self.oldU.shape[0] oldM = self.oldV.shape[0] if self.iterativeSoftImpute.updateAlg == "initial": if n > oldN: self.oldU = Util.extendArray(self.oldU, (n, self.oldU.shape[1])) elif n < oldN: self.oldU = self.oldU[0:n, :] if m > oldM: self.oldV = Util.extendArray(self.oldV, (m, self.oldV.shape[1])) elif m < oldN: self.oldV = self.oldV[0:m, :] elif self.iterativeSoftImpute.updateAlg == "zero": self.oldU = numpy.zeros((n, 1)) self.oldS = numpy.zeros(1) self.oldV = numpy.zeros((m, 1)) else: raise ValueError("Unknown SVD update algorithm: " + self.updateAlg) rowInds, colInds = X.nonzero() gamma = self.iterativeSoftImpute.eps + 1 i = 0 self.iterativeSoftImpute.measures = numpy.zeros((self.iterativeSoftImpute.maxIterations, 4)) while gamma > self.iterativeSoftImpute.eps: if i == self.iterativeSoftImpute.maxIterations: logging.debug("Maximum number of iterations reached") break ZOmega = SparseUtilsCython.partialReconstructPQ((rowInds, colInds), self.oldU*self.oldS, self.oldV) Y = X - ZOmega #Y = Y.tocsc() #del ZOmega Y = csarray(Y, storagetype="row") gc.collect() #os.system('taskset -p 0xffffffff %d' % os.getpid()) if self.iterativeSoftImpute.svdAlg=="propack": L = LinOperatorUtils.sparseLowRankOp(Y, self.oldU, self.oldS, self.oldV, parallel=False) newU, newS, newV = SparseUtils.svdPropack(L, k=self.iterativeSoftImpute.k, kmax=self.iterativeSoftImpute.kmax) elif self.iterativeSoftImpute.svdAlg=="arpack": L = LinOperatorUtils.sparseLowRankOp(Y, self.oldU, self.oldS, self.oldV, parallel=False) newU, newS, newV = SparseUtils.svdArpack(L, k=self.iterativeSoftImpute.k, kmax=self.iterativeSoftImpute.kmax) elif self.iterativeSoftImpute.svdAlg=="svdUpdate": newU, newS, newV = SVDUpdate.addSparseProjected(self.oldU, self.oldS, self.oldV, Y, self.iterativeSoftImpute.k) elif self.iterativeSoftImpute.svdAlg=="rsvd": L = LinOperatorUtils.sparseLowRankOp(Y, self.oldU, self.oldS, self.oldV, parallel=True) newU, newS, newV = RandomisedSVD.svd(L, self.iterativeSoftImpute.k, p=self.iterativeSoftImpute.p, q=self.iterativeSoftImpute.q) elif self.iterativeSoftImpute.svdAlg=="rsvdUpdate": L = LinOperatorUtils.sparseLowRankOp(Y, self.oldU, self.oldS, self.oldV, parallel=True) if self.j == 0: newU, newS, newV = RandomisedSVD.svd(L, self.iterativeSoftImpute.k, p=self.iterativeSoftImpute.p, q=self.iterativeSoftImpute.q) else: newU, newS, newV = RandomisedSVD.svd(L, self.iterativeSoftImpute.k, p=self.iterativeSoftImpute.p, q=self.iterativeSoftImpute.qu, omega=self.oldV) elif self.iterativeSoftImpute.svdAlg=="rsvdUpdate2": if self.j == 0: L = LinOperatorUtils.sparseLowRankOp(Y, self.oldU, self.oldS, self.oldV, parallel=True) newU, newS, newV = RandomisedSVD.svd(L, self.iterativeSoftImpute.k, p=self.iterativeSoftImpute.p, q=self.iterativeSoftImpute.q) else: #Need linear operator which is U s V L = LinOperatorUtils.lowRankOp(self.oldU, self.oldS, self.oldV) Y = GeneralLinearOperator.asLinearOperator(Y, parallel=True) newU, newS, newV = RandomisedSVD.updateSvd(L, self.oldU, self.oldS, self.oldV, Y, self.iterativeSoftImpute.k, p=self.iterativeSoftImpute.p) else: raise ValueError("Unknown SVD algorithm: " + self.iterativeSoftImpute.svdAlg) if self.iterativeSoftImpute.weighted and i==0: delta = numpy.diag((u*newU.T).dot(newU)) pi = numpy.diag((v*newV.T).dot(newV)) lmbda = (maxS/numpy.max(delta*pi))*self.iterativeSoftImpute.rho lmbdav = lmbda*delta*pi elif not self.iterativeSoftImpute.weighted: lmbda = maxS*self.iterativeSoftImpute.rho if i==0: logging.debug("lambda: " + str(lmbda)) lmbdav = lmbda newS = newS - lmbdav #Soft threshold newS = numpy.clip(newS, 0, numpy.max(newS)) normOldZ = (self.oldS**2).sum() normNewZmOldZ = (self.oldS**2).sum() + (newS**2).sum() - 2*numpy.trace((self.oldV.T.dot(newV*newS)).dot(newU.T.dot(self.oldU*self.oldS))) #We can get newZ == oldZ in which case we break if normNewZmOldZ < self.tol: gamma = 0 elif abs(normOldZ) < self.tol: gamma = self.iterativeSoftImpute.eps + 1 else: gamma = normNewZmOldZ/normOldZ if self.iterativeSoftImpute.verbose: theta1 = (self.iterativeSoftImpute.k - numpy.linalg.norm(self.oldU.T.dot(newU), 'fro')**2)/self.iterativeSoftImpute.k theta2 = (self.iterativeSoftImpute.k - numpy.linalg.norm(self.oldV.T.dot(newV), 'fro')**2)/self.iterativeSoftImpute.k thetaS = numpy.linalg.norm(newS - self.oldS)**2/numpy.linalg.norm(newS)**2 self.iterativeSoftImpute.measures[i, :] = numpy.array([gamma, theta1, theta2, thetaS]) self.oldU = newU.copy() self.oldS = newS.copy() self.oldV = newV.copy() logging.debug("Iteration " + str(i) + " gamma="+str(gamma)) i += 1 if self.iterativeSoftImpute.postProcess: #Add the mean vectors previousS = newS newU = numpy.c_[newU, numpy.array(X.mean(1)).ravel()] newV = numpy.c_[newV, numpy.array(X.mean(0)).ravel()] newS = self.iterativeSoftImpute.unshrink(X, newU, newV) #Note that this increases the rank of U and V by 1 #print("Difference in s after postprocessing: " + str(numpy.linalg.norm(previousS - newS[0:-1]))) logging.debug("Difference in s after postprocessing: " + str(numpy.linalg.norm(previousS - newS[0:-1]))) logging.debug("Number of iterations for rho="+str(self.iterativeSoftImpute.rho) + ": " + str(i)) self.j += 1 return (newU, newS, newV)
def parallelSparseOp(X): """ Return the parallel linear operator corresponding to left and right multiply of csc_matrix X. Note that there is a significant overhead for creating and waiting for locked processes. """ if not scipy.sparse.isspmatrix_csc(X): raise ValueError("Currently only supports csc_matrices") #This doubles memory here but saves memory when on many CPUs and results in faster calculations when we do matmat Xr = X.tocsr() numProcesses = multiprocessing.cpu_count() numJobs = numProcesses rowInds = numpy.array(numpy.linspace(0, X.shape[0], numJobs + 1), numpy.int) colInds = numpy.array(numpy.linspace(0, X.shape[1], numJobs + 1), numpy.int) XrData = multiprocessing.RawArray("d", numpy.array(Xr.data)) XrIndices = multiprocessing.RawArray("i", Xr.indices) XrIndptr = multiprocessing.RawArray("i", Xr.indptr) def matvec(w): pool = multiprocessing.Pool(processes=numProcesses) paramList = [] for i in range(numJobs): paramList.append((Xr[rowInds[i]:rowInds[i + 1], :], w)) iterator = pool.imap(dot, paramList, chunksize=1) #iterator = itertools.imap(dot, paramList) p = numpy.zeros(X.shape[0]) for i in range(numJobs): p[rowInds[i]:rowInds[i + 1]] = iterator.next() pool.terminate() return p def rmatvec(w): pool = multiprocessing.Pool(processes=numProcesses) paramList = [] for i in range(numJobs): paramList.append((X[:, colInds[i]:colInds[i + 1]], w)) iterator = pool.imap(dotT, paramList, chunksize=1) #iterator = itertools.imap(dotT, paramList) p = numpy.zeros(X.shape[1]) for i in range(numJobs): p[colInds[i]:colInds[i + 1]] = iterator.next() pool.terminate() return p def matmat(W): WArray = multiprocessing.RawArray("d", W.flatten()) pool = multiprocessing.Pool(processes=numProcesses, initializer=initProcess, initargs=(XrData, XrIndices, XrIndptr, X.shape, WArray, W.shape)) params = [] for i in range(numJobs): params.append((rowInds, i)) iterator = pool.map(dot2, params) P = numpy.zeros((X.shape[0], W.shape[1])) for i in range(numJobs): P[rowInds[i]:rowInds[i + 1], :] = iterator[i] return P def rmatmat(W): pool = multiprocessing.Pool(processes=numProcesses) paramList = [] for i in range(numJobs): paramList.append((X[:, colInds[i]:colInds[i + 1]], W)) iterator = pool.imap(dotT, paramList, chunksize=1) #iterator = itertools.imap(dotT, paramList) P = numpy.zeros((X.shape[1], W.shape[1])) for i in range(numJobs): P[colInds[i]:colInds[i + 1], :] = iterator.next() pool.terminate() return P return GeneralLinearOperator(X.shape, matvec, rmatvec, matmat, rmatmat, dtype=X.dtype)
def parallelSparseLowRankOp(X, U, s, V): numProcesses = multiprocessing.cpu_count() colInds = numpy.array(numpy.linspace(0, X.shape[1], numProcesses + 1), numpy.int) def matvec(w): pool = multiprocessing.Pool(processes=numProcesses) paramList = [] for i in range(numProcesses): paramList.append((X[:, colInds[i]:colInds[i + 1]], U, s, V[colInds[i]:colInds[i + 1], :], w[colInds[i]:colInds[i + 1]])) iterator = pool.imap(dotSVD, paramList, chunksize=1) #iterator = itertools.imap(dotSVD, paramList) p = numpy.zeros(X.shape[0]) for i in range(numProcesses): p += iterator.next() pool.terminate() return p def rmatvec(w): pool = multiprocessing.Pool(processes=numProcesses) paramList = [] for i in range(numProcesses): paramList.append((X[:, colInds[i]:colInds[i + 1]], U, s, V[colInds[i]:colInds[i + 1], :], w)) iterator = pool.imap(dotSVDT, paramList, chunksize=1) #iterator = itertools.imap(dotSVDT, paramList) p = numpy.zeros(X.shape[1]) for i in range(numProcesses): p[colInds[i]:colInds[i + 1]] = iterator.next() pool.terminate() return p def matmat(W): pool = multiprocessing.Pool(processes=numProcesses) paramList = [] for i in range(numProcesses): paramList.append((X[:, colInds[i]:colInds[i + 1]], U, s, V[colInds[i]:colInds[i + 1], :], W[colInds[i]:colInds[i + 1], :])) iterator = pool.map(dotSVD, paramList, chunksize=1) #iterator = itertools.imap(dotSVD, paramList) P = numpy.zeros((X.shape[0], W.shape[1])) for i in range(numProcesses): P += iterator[i] pool.terminate() return P def rmatmat(W): pool = multiprocessing.Pool(processes=numProcesses) paramList = [] for i in range(numProcesses): paramList.append((X[:, colInds[i]:colInds[i + 1]], U, s, V[colInds[i]:colInds[i + 1], :], W)) iterator = pool.imap(dotSVDT, paramList, chunksize=1) #iterator = itertools.imap(dotSVD, paramList) P = numpy.zeros((X.shape[1], W.shape[1])) for i in range(numProcesses): P[colInds[i]:colInds[i + 1], :] = iterator.next() pool.terminate() return P return GeneralLinearOperator(X.shape, matvec, rmatvec, matmat, rmatmat, dtype=X.dtype)