def testEigWeight(self):
        tol = 10**-3
        
        n = 100
        W = numpy.random.rand(n, n)
        W = W.dot(W.T)
        w, U = numpy.linalg.eig(W)
        
        W = scipy.sparse.csr_matrix(W)
        
        k = 4 
        m = 5 
        lmbda, V = EfficientNystrom.eigWeight(W, m, k)
        

        MHat = V.dot(numpy.diag(lmbda)).dot(V.T)
        
        I = scipy.sparse.eye(n, n)
        L = GraphUtils.normalisedLaplacianSym(W) 
        M = I - L 
        
        #print(V)
        numpy.linalg.norm(M.todense() - MHat)
        #print(numpy.linalg.norm(M.todense()))
        #self.assertTrue(numpy.linalg.norm(W - WHat) < tol)
        
        #For fixed k, increasing m should improve approximation but not always 
        lastError = 10        
        
        for m in range(k+1, n+1, 10): 
            lmbda, V = EfficientNystrom.eigWeight(W, m, k)
            #print(V)
            MHat = V.dot(numpy.diag(lmbda)).dot(V.T)
        
            
            error = numpy.linalg.norm(M.todense() - MHat)
            
            self.assertTrue(error <= lastError)
            lastError = error 
示例#2
0
    def testEigWeight(self):
        tol = 10**-3

        n = 100
        W = numpy.random.rand(n, n)
        W = W.dot(W.T)
        w, U = numpy.linalg.eig(W)

        W = scipy.sparse.csr_matrix(W)

        k = 4
        m = 5
        lmbda, V = EfficientNystrom.eigWeight(W, m, k)

        MHat = V.dot(numpy.diag(lmbda)).dot(V.T)

        I = scipy.sparse.eye(n, n)
        L = GraphUtils.normalisedLaplacianSym(W)
        M = I - L

        #print(V)
        numpy.linalg.norm(M.todense() - MHat)
        #print(numpy.linalg.norm(M.todense()))
        #self.assertTrue(numpy.linalg.norm(W - WHat) < tol)

        #For fixed k, increasing m should improve approximation but not always
        lastError = 10

        for m in range(k + 1, n + 1, 10):
            lmbda, V = EfficientNystrom.eigWeight(W, m, k)
            #print(V)
            MHat = V.dot(numpy.diag(lmbda)).dot(V.T)

            error = numpy.linalg.norm(M.todense() - MHat)

            self.assertTrue(error <= lastError)
            lastError = error
示例#3
0
                graphIterator = ThreeClustIterator(p, numClusters, r).getIterator()
                clustListNings = ningsClusterer.cluster(graphIterator)
                
            logging.debug("Running random SVD method")
            graphIterator = ThreeClustIterator(p, numClusters, r).getIterator()
            clustListRandSVD = randSvdCluster.clusterFromIterator(graphIterator, False)
    
            # computer rand index error for each iteration
            # error: proportion of pairs of vertices (x,y) s.t.
            #    (cl(x) == cl(y)) != (learned_cl(x) == learned_cl(y))
            for it in range(len(ThreeClustIterator().subgraphIndicesList)):
                  indicesList = ThreeClustIterator().subgraphIndicesList[it]
                  numUsedVertices = len(indicesList)
                  
                  for i in range(len(k2s)): 
                      clustErrApprox[t, it, r, i] += GraphUtils.randIndex(clustListApprox[i][it], indicesList)
                  clustErrExact[t, it, r] += GraphUtils.randIndex(clustListExact[it], indicesList)
                  clustErrNystrom[t, it, r] += GraphUtils.randIndex(clustListNystrom[it], indicesList)
                  if do_Nings:
                      clustErrNings[t, it, r] += GraphUtils.randIndex(clustListNings[it], indicesList)
                      
                  clustErrRandSvd[t, it, r] += GraphUtils.randIndex(clustListRandSVD[it], indicesList)
    
    numpy.savez(fileName, clustErrApprox, clustErrExact, clustErrNystrom, clustErrNings, clustErrRandSvd)
    logging.debug("Saved results as " + fileName)
else:  
    errors = numpy.load(fileName)
    clustErrApprox, clustErrExact, clustErrNystrom, clustErrNings, clustErrRandSvd = errors["arr_0"], errors["arr_1"], errors["arr_2"], errors["arr_3"], errors["arr_4"]
    
    meanClustErrExact = clustErrExact.mean(2)
    meanClustErrApprox = clustErrApprox.mean(2)
示例#4
0
 if not os.path.exists(resultsDir): 
    logging.warn("Directory did not exist: " + resultsDir + ", created.")
    os.makedirs(resultsDir)
    
 iterator = getIterator()
 
 subgraphIndicesList = []
 for W in iterator: 
     logging.debug("Graph size " + str(W.shape[0]))
     subgraphIndicesList.append(range(W.shape[0])) 
 
 #Try to find number of clusters at end of sequence by looking at eigengap 
 k = 2    
 
 if findEigs: 
     L = GraphUtils.normalisedLaplacianSym(W)
     
     logging.debug("Computing eigenvalues")
     omega, Q = scipy.sparse.linalg.eigsh(L, min(k, L.shape[0]-1), which="SM", ncv = min(20*k, L.shape[0]))
     
     omegaDiff = numpy.diff(omega)
 else: 
     omega = numpy.zeros(k)
     omegaDiff = numpy.zeros(k-1)
     
 #No obvious number of clusters and there are many edges 
 graph = SparseGraph(W.shape[0], W=W)
 
 logging.debug("Computing graph statistics")
 graphStats = GraphStatistics()
 statsMatrix = graphStats.sequenceScalarStats(graph, subgraphIndicesList, slowStats=False)
示例#5
0
import numpy
import scipy.sparse
from apgl.graph import GraphUtils
from sandbox.util.Util import Util

numpy.set_printoptions(suppress=True, precision=3)
n = 10
W1 = scipy.sparse.rand(n, n, 0.5).todense()
W1 = W1.T.dot(W1)
W2 = W1.copy()

W2[1, 2] = 1
W2[2, 1] = 1

print("W1=" + str(W1))
print("W2=" + str(W2))

L1 = GraphUtils.normalisedLaplacianSym(scipy.sparse.csr_matrix(W1))
L2 = GraphUtils.normalisedLaplacianSym(scipy.sparse.csr_matrix(W2))

deltaL = L2 - L1


print("L1=" + str(L1.todense()))
print("L2=" + str(L2.todense()))
print("deltaL=" + str(deltaL.todense()))

print("rank(deltaL)=" + str(Util.rank(deltaL.todense())))
示例#6
0
import numpy 
import scipy.sparse 
from apgl.graph import GraphUtils 
from sandbox.util.Util import Util 

numpy.set_printoptions(suppress=True, precision=3)
n = 10
W1 = scipy.sparse.rand(n, n, 0.5).todense()
W1 = W1.T.dot(W1)
W2 = W1.copy()

W2[1, 2] = 1 
W2[2, 1] = 1  

print("W1="+str(W1))
print("W2="+str(W2))

L1 = GraphUtils.normalisedLaplacianSym(scipy.sparse.csr_matrix(W1))
L2 = GraphUtils.normalisedLaplacianSym(scipy.sparse.csr_matrix(W2))

deltaL = L2 - L1 


print("L1="+str(L1.todense()))
print("L2="+str(L2.todense()))
print("deltaL="+str(deltaL.todense()))

print("rank(deltaL)=" + str(Util.rank(deltaL.todense()))) 
示例#7
0
             
         logging.debug("Running random SVD method")
         resRandSVDList = []
         for i in range(len(k4s)): 
             graphIterator = ThreeClustIterator(p, numClusters, r).getIterator()
             resRandSVDList.append(randSvdClusterers[i].clusterFromIterator(graphIterator, True))
 
         # computer rand index error for each iteration
         # error: proportion of pairs of vertices (x,y) s.t.
         #    (cl(x) == cl(y)) != (learned_cl(x) == learned_cl(y))
         for it in range(len(ThreeClustIterator().subgraphIndicesList)):
               indicesList = ThreeClustIterator().subgraphIndicesList[it]
               numUsedVertices = len(indicesList)
               
               for k in range(len(k2s)): 
                   clustErrApprox[t, it, r, k] = GraphUtils.randIndex(resApproxList[k][0][it], indicesList)
               clustErrExact[t, it, r] = GraphUtils.randIndex(resExact[0][it], indicesList)
               for k in range(len(k3s)): 
                   clustErrNystrom[t, it, r, k] = GraphUtils.randIndex(resNystromList[k][0][it], indicesList)
               if do_Nings:
                   clustErrNings[t, it, r] = GraphUtils.randIndex(resNings[0][it], indicesList)
               for k in range(len(k4s)): 
                   clustErrRandSvd[t, it, r, k] = GraphUtils.randIndex(resRandSVDList[k][0][it], indicesList)
 
         # store sin(Theta)
         for k in range(len(k2s)): 
             sinThetaApprox[t, :, r, k] = resApproxList[k][2]["sinThetaList"]
         sinThetaExact[t, :, r] = resExact[2]["sinThetaList"]
         for k in range(len(k3s)): 
             sinThetaNystrom[t, :, r, k] = resNystromList[k][2]["sinThetaList"]
         if do_Nings:
    def cluster(self, graphIterator, verbose=False):
        """
        Find a set of clusters using the graph and list of subgraph indices. 
        """
        tol = 10**-6
        clustersList = []
        decompositionTimeList = []
        kMeansTimeList = []
        boundList = []
        sinThetaList = []
        numpy.random.seed(self.seed)

        iter = 0

        for W in graphIterator:
            startTime = time.time()
            logging.debug("Graph index:" + str(iter))

            startTime = time.time()
            if iter % self.T != 0:
                # --- Figure out the similarity changes in existing edges ---
                n = lastW.shape[0]
                deltaW = W.copy()
                #Vertices are removed
                if n > W.shape[0]:
                    #deltaW = Util.extendArray(deltaW, lastW.shape)
                    deltaW = SparseUtils.resize(deltaW, lastW.shape)

                #Vertices added
                elif n < W.shape[0]:
                    lastWInds = lastW.nonzero()
                    lastWVal = scipy.zeros(len(lastWInds[0]))
                    for i, j, k in zip(lastWInds[0], lastWInds[1],
                                       range(len(lastWInds[0]))):
                        lastWVal[k] = lastW[i, j]
                    lastW = scipy.sparse.csr_matrix((lastWVal, lastWInds),
                                                    shape=W.shape)
                deltaW = deltaW - lastW

                # --- Update the decomposition ---
                if n < W.shape[0]:
                    #                    Q = numpy.r_[Q, numpy.zeros((W.shape[0]-Q.shape[0], Q.shape[1]))]
                    Q = numpy.r_[Q,
                                 numpy.zeros(
                                     (W.shape[0] - Q.shape[0], Q.shape[1]))]
                lmbda, Q = self.__updateEigenSystem(lmbda, Q, deltaW, lastW)

                # --- resize the decomposition if the graph is losing vertices ---
                if n > W.shape[0]:
                    Q = Q[0:W.shape[0], :]
            else:
                logging.debug("Recomputing eigensystem")
                # We want to solve the generalized eigen problem $L.v = lambda.D.v$
                # with L and D hermitians.
                # scipy.sparse.linalg does not solve this problem actualy (it
                # solves it, forgetting about hermitian information, from version
                # 0.11)
                # So we will solve $D^{-1}.L.v = lambda.v$, where $D^{-1}.L$ is
                # no more hermitian.
                L = GraphUtils.normalisedLaplacianRw(W)
                lmbda, Q = scipy.sparse.linalg.eigs(
                    L,
                    min(self.k, L.shape[0] - 1),
                    which="SM",
                    ncv=min(20 * self.k, L.shape[0]),
                    v0=numpy.random.rand(L.shape[0]))
                #                n = L.shape[0]
                #                inds = list(range(n))
                #                Lprime = 2*scipy.sparse.csr_matrix( ([1]*n, (inds,inds)), shape=(n,n))-L
                #                lmbda, Q = scipy.sparse.linalg.eigs(Lprime, min(self.k, L.shape[0]-1), which="LM", ncv = min(20*self.k, L.shape[0]), v0=numpy.random.rand(L.shape[0]))
                #                lmbda = 2-lmbda
                lmbda = lmbda.real
                Q = Q.real

            if self.computeSinTheta:
                L = GraphUtils.normalisedLaplacianRw(W)
                lmbdaExact, QExact = scipy.linalg.eig(L.todense())
                lmbdaExact = lmbdaExact.real
                QExact = QExact.real
                indsExact = numpy.argsort(lmbdaExact)
                QExactKbot = QExact[:, indsExact[self.k:]]
                #                UQExactKbot, sQExactKbot, VhQExactKbot = scipy.linalg.svd(QExactKbot)
                inds = numpy.argsort(lmbda)
                QApproxK = Q[:, inds[:self.k]]
                #                UQApproxK, sQApproxK, VhQApproxK = scipy.linalg.svd(QApproxK)
                #                sinThetaList.append(scipy.linalg.norm(UQExactKbot.T.dot(UQApproxK)))
                sinThetaList.append(
                    scipy.linalg.norm(QExactKbot.T.dot(QApproxK)))
#                print("blop", UQExactKbot.shape, UQApproxK.shape, sinThetaList[-1])
#                UQExactK, sQExactK, VhQExactK = scipy.linalg.svd(QExact[:, indsExact[:self.k]])
#                print("blop", scipy.linalg.norm(UQExactKbot.T.dot(UQExactK)))
#                print("blop", lmbdaExact[indsExact[:10]], lmbda[inds[:10]], sep = "\n")
#                quit()

            decompositionTimeList.append(time.time() - startTime)

            # Now do actual clustering

            startTime = time.time()
            V = VqUtils.whiten(Q)
            centroids, distortion = vq.kmeans(V, self.k, iter=self.kmeansIter)
            clusters, distortion = vq.vq(V, centroids)
            clustersList.append(clusters)
            kMeansTimeList.append(time.time() - startTime)

            lastW = W.copy()
            iter += 1

        if verbose:
            eigenQuality = {
                "boundList": boundList,
                "sinThetaList": sinThetaList
            }
            return clustersList, numpy.array(
                (decompositionTimeList, kMeansTimeList)).T, eigenQuality
        else:
            return clustersList
    def cluster(self, graphIterator, verbose=False):
        """
        Find a set of clusters using the graph and list of subgraph indices. 
        """
        tol = 10**-6 
        clustersList = []
        decompositionTimeList = [] 
        kMeansTimeList = [] 
        boundList = []
        sinThetaList = []
        numpy.random.seed(self.seed)

        iter = 0 

        for W in graphIterator:
            startTime = time.time()
            logging.debug("Graph index:" + str(iter))

            startTime = time.time()
            if iter % self.T != 0:
                # --- Figure out the similarity changes in existing edges ---
                n = lastW.shape[0] 
                deltaW = W.copy()
                #Vertices are removed 
                if n > W.shape[0]:  
                    #deltaW = Util.extendArray(deltaW, lastW.shape)
                    deltaW = SparseUtils.resize(deltaW, lastW.shape)
                    
                #Vertices added 
                elif n < W.shape[0]: 
                    lastWInds = lastW.nonzero()
                    lastWVal = scipy.zeros(len(lastWInds[0]))
                    for i,j,k in zip(lastWInds[0], lastWInds[1], range(len(lastWInds[0]))):
                        lastWVal[k] = lastW[i,j]
                    lastW = scipy.sparse.csr_matrix((lastWVal, lastWInds), shape=W.shape)
                deltaW = deltaW - lastW
                
                # --- Update the decomposition ---
                if n < W.shape[0]:
#                    Q = numpy.r_[Q, numpy.zeros((W.shape[0]-Q.shape[0], Q.shape[1]))]
                    Q = numpy.r_[Q, numpy.zeros((W.shape[0]-Q.shape[0], Q.shape[1]))]
                lmbda, Q = self.__updateEigenSystem(lmbda, Q, deltaW, lastW)
                
                # --- resize the decomposition if the graph is losing vertices ---
                if n > W.shape[0]:
                    Q = Q[0:W.shape[0], :]
            else:
                logging.debug("Recomputing eigensystem")
                # We want to solve the generalized eigen problem $L.v = lambda.D.v$
                # with L and D hermitians.
                # scipy.sparse.linalg does not solve this problem actualy (it
                # solves it, forgetting about hermitian information, from version
                # 0.11)
                # So we will solve $D^{-1}.L.v = lambda.v$, where $D^{-1}.L$ is
                # no more hermitian.
                L = GraphUtils.normalisedLaplacianRw(W) 
                lmbda, Q = scipy.sparse.linalg.eigs(L, min(self.k, L.shape[0]-1), which="SM", ncv = min(20*self.k, L.shape[0]), v0=numpy.random.rand(L.shape[0]))
#                n = L.shape[0]
#                inds = list(range(n))
#                Lprime = 2*scipy.sparse.csr_matrix( ([1]*n, (inds,inds)), shape=(n,n))-L
#                lmbda, Q = scipy.sparse.linalg.eigs(Lprime, min(self.k, L.shape[0]-1), which="LM", ncv = min(20*self.k, L.shape[0]), v0=numpy.random.rand(L.shape[0]))
#                lmbda = 2-lmbda
                lmbda = lmbda.real
                Q = Q.real
                
            if self.computeSinTheta:
                L = GraphUtils.normalisedLaplacianRw(W)
                lmbdaExact, QExact = scipy.linalg.eig(L.todense())
                lmbdaExact = lmbdaExact.real
                QExact = QExact.real
                indsExact = numpy.argsort(lmbdaExact)
                QExactKbot = QExact[:, indsExact[self.k:]]
#                UQExactKbot, sQExactKbot, VhQExactKbot = scipy.linalg.svd(QExactKbot)
                inds = numpy.argsort(lmbda)
                QApproxK = Q[:,inds[:self.k]]
#                UQApproxK, sQApproxK, VhQApproxK = scipy.linalg.svd(QApproxK)
#                sinThetaList.append(scipy.linalg.norm(UQExactKbot.T.dot(UQApproxK)))
                sinThetaList.append(scipy.linalg.norm(QExactKbot.T.dot(QApproxK)))
#                print("blop", UQExactKbot.shape, UQApproxK.shape, sinThetaList[-1])
#                UQExactK, sQExactK, VhQExactK = scipy.linalg.svd(QExact[:, indsExact[:self.k]])
#                print("blop", scipy.linalg.norm(UQExactKbot.T.dot(UQExactK)))
#                print("blop", lmbdaExact[indsExact[:10]], lmbda[inds[:10]], sep = "\n")
#                quit()
            
            
            decompositionTimeList.append(time.time()-startTime)

            # Now do actual clustering 
            
            startTime = time.time()
            V = VqUtils.whiten(Q)
            centroids, distortion = vq.kmeans(V, self.k, iter=self.kmeansIter)
            clusters, distortion = vq.vq(V, centroids)
            clustersList.append(clusters)
            kMeansTimeList.append(time.time()-startTime)

            lastW = W.copy()
            iter += 1

        if verbose:
            eigenQuality = {"boundList" : boundList, "sinThetaList" : sinThetaList}
            return clustersList, numpy.array((decompositionTimeList, kMeansTimeList)).T, eigenQuality
        else:
            return clustersList
    def cluster(self, graphIterator, verbose=False):
        """
        Find a set of clusters using the graph and list of subgraph indices. 
        """
        tol = 10**-6 
        clustersList = []
        decompositionTimeList = [] 
        kMeansTimeList = [] 
        boundList = []
        numpy.random.seed(self.seed)

        iter = 0 

        for W in graphIterator:
            startTime = time.time()
            logging.debug("Graph index:" + str(iter))

            startTime = time.time()
            if iter % self.T != 0:
                # --- Figure out the similarity changes in existing edges ---
                n = lastW.shape[0] 
                deltaW = W.copy()
                #Vertices are removed 
                if n > W.shape[0]:  
                    #deltaW = Util.extendArray(deltaW, lastW.shape)
                    deltaW = SparseUtils.resize(deltaW, lastW.shape)
                    
                #Vertices added 
                elif n < W.shape[0]: 
                    lastWInds = lastW.nonzero()
                    lastWVal = scipy.zeros(len(lastWInds[0]))
                    for i,j,k in zip(lastWInds[0], lastWInds[1], range(len(lastWInds[0]))):
                        lastWVal[k] = lastW[i,j]
                    lastW = scipy.sparse.csr_matrix((lastWVal, lastWInds), shape=W.shape)
                deltaW = deltaW - lastW
                
                # --- Update the decomposition ---
                if n < W.shape[0]:
#                    Q = numpy.r_[Q, numpy.zeros((W.shape[0]-Q.shape[0], Q.shape[1]))]
                    Q = numpy.r_[Q, numpy.zeros((W.shape[0]-Q.shape[0], Q.shape[1]))]
                lmbda, Q = self.__updateEigenSystem(lmbda, Q, deltaW, lastW)
                
                # --- resize the decomposition if the graph is losing vertices ---
                if n > W.shape[0]:
                    Q = Q[0:W.shape[0], :]
            else:
                logging.debug("Recomputing eigensystem")
                # We want to solve the generalized eigen problem $L.v = lambda.D.v$
                # with L and D hermitians.
                # scipy.sparse.linalg does not solve this problem actualy (it
                # solves it, forgetting about hermitian information, from version
                # 0.11)
                # So we will solve $D^{-1}.L.v = lambda.v$, where $D^{-1}.L$ is
                # no more hermitian.
                L = GraphUtils.normalisedLaplacianRw(W) 
                lmbda, Q = scipy.sparse.linalg.eigs(L, min(self.k, L.shape[0]-1), which="SM", ncv = min(20*self.k, L.shape[0]), v0=numpy.random.rand(L.shape[0]))
                
                lmbda = lmbda.real
                Q = Q.real
            decompositionTimeList.append(time.time()-startTime)

            # Now do actual clustering 
            
            startTime = time.time()
            V = VqUtils.whiten(Q)
            centroids, distortion = vq.kmeans(V, self.k, iter=self.kmeansIter)
            clusters, distortion = vq.vq(V, centroids)
            clustersList.append(clusters)
            kMeansTimeList.append(time.time()-startTime)

            lastW = W.copy()
            iter += 1

        if verbose:
            return clustersList, numpy.array((decompositionTimeList, kMeansTimeList)).T, boundList
        else:
            return clustersList