def testMatrixApprox(self): tol = 10**-6 A = numpy.random.rand(10, 10) A = A.dot(A.T) n = 5 inds = numpy.sort(numpy.random.permutation(A.shape[0])[0:n]) AHat = Nystrom.matrixApprox(A, inds) n = 10 AHat2 = Nystrom.matrixApprox(A, n) self.assertTrue(numpy.linalg.norm(A - AHat2) < numpy.linalg.norm(A - AHat)) self.assertTrue(numpy.linalg.norm(A - AHat2) < tol) #Test on a sparse matrix As = scipy.sparse.csr_matrix(A) n = 5 inds = numpy.sort(numpy.random.permutation(A.shape[0])[0:n]) AHat = Nystrom.matrixApprox(As, inds) n = 10 AHat2 = Nystrom.matrixApprox(As, n) self.assertTrue(SparseUtils.norm(As - AHat2) < SparseUtils.norm(As - AHat)) self.assertTrue(SparseUtils.norm(As - AHat2) < tol) #Compare dense and sparse solutions for n in range(1, 9): inds = numpy.sort(numpy.random.permutation(A.shape[0])[0:n]) AHats = Nystrom.matrixApprox(As, inds) AHat = Nystrom.matrixApprox(A, inds) self.assertTrue(numpy.linalg.norm(AHat - numpy.array(AHats.todense())) < tol)
def setWeightMatrix(self, W): """ Set the weight matrix of this graph. Requires as input an ndarray or a scipy sparse matrix with the same dimensions as the current weight matrix. Edges are represented by non-zero edges. :param W: The weight matrix to use. :type W: :class:`ndarray` or :class:`scipy.sparse` matrix """ if W.shape != (self.vList.getNumVertices(), self.vList.getNumVertices()): raise ValueError("Weight matrix has wrong shape : " + str(W.shape)) if self.undirected and type(W) == numpy.ndarray and (W != W.T).any(): raise ValueError( "Weight matrix of undirected graph must be symmetric") if self.undirected and scipy.sparse.issparse( W) and not SparseUtils.equals(W, W.T): raise ValueError( "Weight matrix of undirected graph must be symmetric") if scipy.sparse.issparse(W): W = W.todense() self.W = numpy.array(W)
def recommend(learner): """ Take a list of coauthors and read in the complete graph into a sparse matrix X such that X_ij = k means author i has worked with j, k times. Then do matrix factorisation on the resulting methods. """ outputDir = PathDefaults.getOutputDir() + "erasm/" matrixFileName = outputDir + "Toy" numExamples = 50 numFolds = 5 X = scipy.io.mmread(matrixFileName) X = scipy.sparse.csr_matrix(X) logging.debug("Loaded matrix " + str(X.shape) + " with " + str(X.getnnz()) + " non zeros") X = X.tocsr() X = X[0:numExamples ,:] X, maxS = preprocess(X) #Take out some ratings to form a training set rowInds, colInds = X.nonzero() randInds = numpy.random.permutation(rowInds.shape[0]) indexList = Sampling.crossValidation(numFolds, rowInds.shape[0]) paramList = [] for j, (trnIdx, tstIdx) in enumerate(indexList): trainInds = randInds[trnIdx] testInds = randInds[tstIdx] trainX = SparseUtils.selectMatrix(X, rowInds[trainInds], colInds[trainInds]).tocsr() testX = SparseUtils.selectMatrix(X, rowInds[testInds], colInds[testInds]).tocsr() paramList.append((trainX, testX, learner)) pool = multiprocessing.Pool(processes=multiprocessing.cpu_count()) results = pool.map(computeTestError, paramList) #results = map(computeTestError, paramList) testErrors = numpy.array(results) meanTestErrors = testErrors.mean() logging.debug("Test errors = " + str(meanTestErrors)) errorFileName = outputDir + "results_" + learner.name() numpy.savez(errorFileName, meanTestErrors) logging.debug("Saved results as " + errorFileName)
def testDiag(self): numRows = 10 numCols = 10 A = scipy.sparse.rand(numRows, numCols, 0.5, "csr") d = SparseUtils.diag(A) for i in range(numRows): self.assertEquals(d[i], A[i,i])
def testResize(self): numRows = 10 numCols = 10 A = scipy.sparse.rand(numRows, numCols, 0.1, "csr") B = SparseUtils.resize(A, (5, 5)) self.assertEquals(B.shape, (5, 5)) for i in range(5): for j in range(5): self.assertEquals(B[i,j], A[i,j]) B = SparseUtils.resize(A, (15, 15)) self.assertEquals(B.shape, (15, 15)) self.assertEquals(B.nnz, A.nnz) for i in range(10): for j in range(10): self.assertEquals(B[i,j], A[i,j])
def getNumEdges(self): """ :returns: the total number of edges in this graph. """ if self.getNumVertices() == 0: return 0 #Note that self.W.getnnz() doesn't seem to work correctly if self.undirected == True: return (self.W.nonzero()[0].shape[0] + numpy.sum(SparseUtils.diag(self.W) != 0)) / 2 else: return self.W.nonzero()[0].shape[0]
def testEquals(self): A = numpy.array([[4, 2, 1], [6, 3, 9], [3, 6, 0]]) B = numpy.array([[4, 2, 1], [6, 3, 9], [3, 6, 0]]) A = scipy.sparse.csr_matrix(A) B = scipy.sparse.csr_matrix(B) self.assertTrue(SparseUtils.equals(A, B)) A[0, 1] = 5 self.assertFalse(SparseUtils.equals(A, B)) A[0, 1] = 2 B[0, 1] = 5 self.assertFalse(SparseUtils.equals(A, B)) A[2, 2] = -1 self.assertFalse(SparseUtils.equals(A, B)) #Test two empty graphs A = scipy.sparse.csr_matrix((5, 5)) B = scipy.sparse.csr_matrix((5, 5)) self.assertTrue(SparseUtils.equals(A, B))
def testSelectMatrix(self): numRows = 10 numCols = 10 A = scipy.sparse.rand(numRows, numCols, 0.5, "csr") #Select first row rowInds = numpy.zeros(numCols) colInds = numpy.arange(10) newA = SparseUtils.selectMatrix(A, rowInds, colInds) for i in range(numCols): self.assertEquals(A[0, i], newA[0, i]) for i in range(1, numRows): for j in range(numCols): self.assertEquals(newA[i, j], 0)
def testNorm(self): numRows = 10 numCols = 10 for k in range(10): A = scipy.sparse.rand(numRows, numCols, 0.1, "csr") norm = SparseUtils.norm(A) norm2 = 0 for i in range(numRows): for j in range(numCols): norm2 += A[i, j]**2 norm2 = numpy.sqrt(norm2) norm3 = numpy.linalg.norm(numpy.array(A.todense())) self.assertAlmostEquals(norm, norm2) self.assertAlmostEquals(norm, norm3)
def setWeightMatrix(self, W): """ Set the weight matrix of this graph. Requires as input an ndarray or a scipy sparse matrix with the same dimensions as the current weight matrix. Edges are represented by non-zero edges. :param W: The weight matrix to use. :type W: :class:`ndarray` or :class:`scipy.sparse` matrix """ if W.shape != (self.vList.getNumVertices(), self.vList.getNumVertices()): raise ValueError("Weight matrix has wrong shape : " + str(W.shape)) if self.undirected and type(W) == numpy.ndarray and (W != W.T).any(): raise ValueError("Weight matrix of undirected graph must be symmetric") if self.undirected and scipy.sparse.issparse(W) and not SparseUtils.equals(W, W.T): raise ValueError("Weight matrix of undirected graph must be symmetric") if scipy.sparse.issparse(W): W = W.todense() self.W = numpy.array(W)
def cluster(self, graphIterator, verbose=False): """ Find a set of clusters using the graph and list of subgraph indices. """ tol = 10**-6 clustersList = [] decompositionTimeList = [] kMeansTimeList = [] boundList = [] numpy.random.seed(self.seed) iter = 0 for W in graphIterator: startTime = time.time() logging.debug("Graph index:" + str(iter)) startTime = time.time() if iter % self.T != 0: # --- Figure out the similarity changes in existing edges --- n = lastW.shape[0] deltaW = W.copy() #Vertices are removed if n > W.shape[0]: #deltaW = Util.extendArray(deltaW, lastW.shape) deltaW = SparseUtils.resize(deltaW, lastW.shape) #Vertices added elif n < W.shape[0]: lastWInds = lastW.nonzero() lastWVal = scipy.zeros(len(lastWInds[0])) for i,j,k in zip(lastWInds[0], lastWInds[1], range(len(lastWInds[0]))): lastWVal[k] = lastW[i,j] lastW = scipy.sparse.csr_matrix((lastWVal, lastWInds), shape=W.shape) deltaW = deltaW - lastW # --- Update the decomposition --- if n < W.shape[0]: # Q = numpy.r_[Q, numpy.zeros((W.shape[0]-Q.shape[0], Q.shape[1]))] Q = numpy.r_[Q, numpy.zeros((W.shape[0]-Q.shape[0], Q.shape[1]))] lmbda, Q = self.__updateEigenSystem(lmbda, Q, deltaW, lastW) # --- resize the decomposition if the graph is losing vertices --- if n > W.shape[0]: Q = Q[0:W.shape[0], :] else: logging.debug("Recomputing eigensystem") # We want to solve the generalized eigen problem $L.v = lambda.D.v$ # with L and D hermitians. # scipy.sparse.linalg does not solve this problem actualy (it # solves it, forgetting about hermitian information, from version # 0.11) # So we will solve $D^{-1}.L.v = lambda.v$, where $D^{-1}.L$ is # no more hermitian. L = GraphUtils.normalisedLaplacianRw(W) lmbda, Q = scipy.sparse.linalg.eigs(L, min(self.k, L.shape[0]-1), which="SM", ncv = min(20*self.k, L.shape[0]), v0=numpy.random.rand(L.shape[0])) lmbda = lmbda.real Q = Q.real decompositionTimeList.append(time.time()-startTime) # Now do actual clustering startTime = time.time() V = VqUtils.whiten(Q) centroids, distortion = vq.kmeans(V, self.k, iter=self.kmeansIter) clusters, distortion = vq.vq(V, centroids) clustersList.append(clusters) kMeansTimeList.append(time.time()-startTime) lastW = W.copy() iter += 1 if verbose: return clustersList, numpy.array((decompositionTimeList, kMeansTimeList)).T, boundList else: return clustersList