示例#1
0
 def testResize(self): 
     numRows = 10
     numCols = 10        
     
     A = scipy.sparse.rand(numRows, numCols, 0.1, "csr") 
     
     B = SparseUtils.resize(A, (5, 5))
     
     self.assertEquals(B.shape, (5, 5))
     for i in range(5): 
         for j in range(5): 
             self.assertEquals(B[i,j], A[i,j])
             
     B = SparseUtils.resize(A, (15, 15))
     
     self.assertEquals(B.shape, (15, 15))
     self.assertEquals(B.nnz, A.nnz) 
     for i in range(10): 
         for j in range(10): 
             self.assertEquals(B[i,j], A[i,j])
    def cluster(self, graphIterator, verbose=False):
        """
        Find a set of clusters using the graph and list of subgraph indices. 
        """
        tol = 10**-6 
        clustersList = []
        decompositionTimeList = [] 
        kMeansTimeList = [] 
        boundList = []
        numpy.random.seed(self.seed)

        iter = 0 

        for W in graphIterator:
            startTime = time.time()
            logging.debug("Graph index:" + str(iter))

            startTime = time.time()
            if iter % self.T != 0:
                # --- Figure out the similarity changes in existing edges ---
                n = lastW.shape[0] 
                deltaW = W.copy()
                #Vertices are removed 
                if n > W.shape[0]:  
                    #deltaW = Util.extendArray(deltaW, lastW.shape)
                    deltaW = SparseUtils.resize(deltaW, lastW.shape)
                    
                #Vertices added 
                elif n < W.shape[0]: 
                    lastWInds = lastW.nonzero()
                    lastWVal = scipy.zeros(len(lastWInds[0]))
                    for i,j,k in zip(lastWInds[0], lastWInds[1], range(len(lastWInds[0]))):
                        lastWVal[k] = lastW[i,j]
                    lastW = scipy.sparse.csr_matrix((lastWVal, lastWInds), shape=W.shape)
                deltaW = deltaW - lastW
                
                # --- Update the decomposition ---
                if n < W.shape[0]:
#                    Q = numpy.r_[Q, numpy.zeros((W.shape[0]-Q.shape[0], Q.shape[1]))]
                    Q = numpy.r_[Q, numpy.zeros((W.shape[0]-Q.shape[0], Q.shape[1]))]
                lmbda, Q = self.__updateEigenSystem(lmbda, Q, deltaW, lastW)
                
                # --- resize the decomposition if the graph is losing vertices ---
                if n > W.shape[0]:
                    Q = Q[0:W.shape[0], :]
            else:
                logging.debug("Recomputing eigensystem")
                # We want to solve the generalized eigen problem $L.v = lambda.D.v$
                # with L and D hermitians.
                # scipy.sparse.linalg does not solve this problem actualy (it
                # solves it, forgetting about hermitian information, from version
                # 0.11)
                # So we will solve $D^{-1}.L.v = lambda.v$, where $D^{-1}.L$ is
                # no more hermitian.
                L = GraphUtils.normalisedLaplacianRw(W) 
                lmbda, Q = scipy.sparse.linalg.eigs(L, min(self.k, L.shape[0]-1), which="SM", ncv = min(20*self.k, L.shape[0]), v0=numpy.random.rand(L.shape[0]))
                
                lmbda = lmbda.real
                Q = Q.real
            decompositionTimeList.append(time.time()-startTime)

            # Now do actual clustering 
            
            startTime = time.time()
            V = VqUtils.whiten(Q)
            centroids, distortion = vq.kmeans(V, self.k, iter=self.kmeansIter)
            clusters, distortion = vq.vq(V, centroids)
            clustersList.append(clusters)
            kMeansTimeList.append(time.time()-startTime)

            lastW = W.copy()
            iter += 1

        if verbose:
            return clustersList, numpy.array((decompositionTimeList, kMeansTimeList)).T, boundList
        else:
            return clustersList