Exemplo n.º 1
0
         clustListExact = exactClusterer.clusterFromIterator(graphIterator, False)
         
         logging.debug("Running approximate method")
         clustListApprox = []
         for i in range(len(k2s)): 
             graphIterator = ThreeClustIterator(p, numClusters, r).getIterator()
             clustListApprox.append(iascClusterers[i].clusterFromIterator(graphIterator, False)) 
         
         logging.debug("Running Nystrom method")
         graphIterator = ThreeClustIterator(p, numClusters, r).getIterator()
         clustListNystrom = nystromClusterer.clusterFromIterator(graphIterator, False)
 
         if do_Nings:
             logging.debug("Running Nings method")
             graphIterator = ThreeClustIterator(p, numClusters, r).getIterator()
             clustListNings = ningsClusterer.cluster(graphIterator)
             
         logging.debug("Running random SVD method")
         graphIterator = ThreeClustIterator(p, numClusters, r).getIterator()
         clustListRandSVD = randSvdCluster.clusterFromIterator(graphIterator, False)
 
         # computer rand index error for each iteration
         # error: proportion of pairs of vertices (x,y) s.t.
         #    (cl(x) == cl(y)) != (learned_cl(x) == learned_cl(y))
         for it in range(len(ThreeClustIterator().subgraphIndicesList)):
               indicesList = ThreeClustIterator().subgraphIndicesList[it]
               numUsedVertices = len(indicesList)
               
               for i in range(len(k2s)): 
                   clustErrApprox[t, it, r, i] += GraphUtils.randIndex(clustListApprox[i][it], indicesList)
               clustErrExact[t, it, r] += GraphUtils.randIndex(clustListExact[it], indicesList)
    def testCluster(self):
        print "< testCluster >"
        numVertices = 8
        graph = SparseGraph(GeneralVertexList(numVertices))

        graph.addEdge(0, 1)
        graph.addEdge(0, 2)
        graph.addEdge(1, 2)

        graph.addEdge(3, 4)
        graph.addEdge(3, 5)
        graph.addEdge(4, 5)

        graph.addEdge(0, 3)

        W = graph.getWeightMatrix()

        graphIterator = []
        graphIterator.append(W[0:6, 0:6].copy())
        W[1, 6] += 1
        W[6, 1] += 1
        graphIterator.append(W[0:7, 0:7].copy())
        W[4, 7] += 1
        W[7, 4] += 1
        graphIterator.append(W.copy())
        graphIterator = iter(graphIterator)

        k = 2
        clusterer = NingSpectralClustering(k)
        clustersList = clusterer.cluster(toSparseGraphListIterator(graphIterator))

        #Why are the bottom rows of Q still zero?

        #Try example in which only edges change
        numVertices = 7
        graph = SparseGraph(GeneralVertexList(numVertices))

        graph.addEdge(0, 1)
        graph.addEdge(0, 2)
        graph.addEdge(1, 2)

        graph.addEdge(3, 4)

        WList = [] 
        W = graph.getWeightMatrix()
        WList.append(W[0:5, 0:5].copy())

        graph.addEdge(3, 5)
        graph.addEdge(4, 5)
        W = graph.getWeightMatrix()
        WList.append(W[0:6, 0:6].copy())

        graph.addEdge(0, 6)
        graph.addEdge(1, 6)
        graph.addEdge(2, 6)
        W = graph.getWeightMatrix()
        WList.append(W[0:7, 0:7].copy())

        iterator = iter(WList)
        clustersList = clusterer.cluster(toSparseGraphListIterator(iterator))

        #Seems to work, amazingly 
        #print(clustersList)
        
        #Try removing rows/cols
        W2 = W[0:5, 0:5]
        W3 = W[0:4, 0:4]
        WList = [W, W2, W3]
        iterator = iter(WList)
        clustersList = clusterer.cluster(toSparseGraphListIterator(iterator))
        
        #nptst.assert_array_equal(clustersList[0][0:5], clustersList[1])
        nptst.assert_array_equal(clustersList[1][0:4], clustersList[2])
        
        #Make sure 1st clustering (without updates) is correct 
        L = GraphUtils.normalisedLaplacianRw(scipy.sparse.csr_matrix(W))
        numpy.random.seed(21)
        lmbda, Q = scipy.sparse.linalg.eigs(L, min(k, L.shape[0]-1), which="SM", ncv = min(20*k, L.shape[0]), v0=numpy.random.rand(L.shape[0]))

        
        V = VqUtils.whiten(Q)
        centroids, distortion = vq.kmeans(V, k, iter=20)
        clusters, distortion = vq.vq(V, centroids)
        
        #This should be equal but the eigenvector computation is unstable 
        #even with repeated runs (and no way to set the seed)
        nptst.assert_array_equal(clusters, clustersList[0])
Exemplo n.º 3
0
        graphIterator = getGraphIterator()
        clustListNystrom, timeListNystrom, eigenQualityNystrom = nystromClusterer.clusterFromIterator(
            graphIterator, verbose=True
        )

    if args.runRandomisedSvd:
        # run with Nystrom approximation
        logging.info("Running randomised svd method")
        graphIterator = getGraphIterator()
        clustListRSvd, timeListRSvd, eigenQualityRSvd = RSvdClusterer.clusterFromIterator(graphIterator, verbose=True)

    if args.runNing:
        # run with Ning's incremental approximation
        logging.info("Running Nings method")
        graphIterator = getGraphIterator()
        clustListNings, timeListNings, eigenQualityNings = ningsClusterer.cluster(graphIterator, verbose=True)

    # print clusters
    if args.runExact:
        logging.info("learned clustering with exact eigenvalue decomposition")
        for i in range(len(clustersExact)):
            clusters = clustersExact[i]
            print(clusters)
    if args.runIASC:
        logging.info("learned clustering with our approximation approach")
        for i in range(len(clustListApprox)):
            clusters = clustListApprox[i]
            print(clusters)
    if args.runNing:
        logging.info("learned clustering with Nings approximation approach")
        for i in range(len(clustListNings)):
Exemplo n.º 4
0
         logging.debug("Running approximate method")
         resApproxList = []
         for i in range(len(k2s)): 
             graphIterator = ThreeClustIterator(p, numClusters, r).getIterator()
             resApproxList.append(iascClusterers[i].clusterFromIterator(graphIterator, True)) 
         
         logging.debug("Running Nystrom method")
         resNystromList = []
         for i in range(len(k3s)): 
             graphIterator = ThreeClustIterator(p, numClusters, r).getIterator()
             resNystromList.append(nystromClusterers[i].clusterFromIterator(graphIterator, True))
 
         if do_Nings:
             logging.debug("Running Nings method")
             graphIterator = ThreeClustIterator(p, numClusters, r).getIterator()
             resNings = ningsClusterer.cluster(graphIterator, True)
             
         logging.debug("Running random SVD method")
         resRandSVDList = []
         for i in range(len(k4s)): 
             graphIterator = ThreeClustIterator(p, numClusters, r).getIterator()
             resRandSVDList.append(randSvdClusterers[i].clusterFromIterator(graphIterator, True))
 
         # computer rand index error for each iteration
         # error: proportion of pairs of vertices (x,y) s.t.
         #    (cl(x) == cl(y)) != (learned_cl(x) == learned_cl(y))
         for it in range(len(ThreeClustIterator().subgraphIndicesList)):
               indicesList = ThreeClustIterator().subgraphIndicesList[it]
               numUsedVertices = len(indicesList)
               
               for k in range(len(k2s)):