Exemplo n.º 1
0
    def _build(self, nodeId, targetEdge):
        '''
        todo:
        '''
        if nodeId in self._nodeLookup:
            n = self._nodeLookup[nodeId]
        else:
            n = Node(self._clusterId, nodeId)
            self._nodeLookup[nodeId] = n
            cluster = Cluster(self._clusterId, [n])
            self._clusterId += 1
            self._clusters[cluster._clusterId] = cluster

        n._addEdge(targetEdge)
Exemplo n.º 2
0
    def _fixThisBugUseTrueOOEncapsliations(self, nodeEdgesDict,
                                           betweenEdgeWeightsDict):
        '''
        do not be lazy! it leads to bugs
        
        coded in haste. assume oh all the code is same package
        no need to implement accessor fuctions. This lead to 
        lots of issue.
        
        we need to construct the full object graph before calculating
        any of the values
        '''
        self.logger.info("BEGIN lovainId:{}".format(self._louvainId))
        # calculate edge weights
        for nodeId in nodeEdgesDict.keys():
            edges = nodeEdgesDict[nodeId]
            for e in edges:
                key = (nodeId, e._targetId)
                listOfWeights = betweenEdgeWeightsDict[key]
                e._weight = sum(listOfWeights)

        #print()
        for k, v in nodeEdgesDict.items():
            self.logger.debug("nodeEdgesDict nodeId:{} edges:{}".format(k, v))

        #print()
        for k, v in betweenEdgeWeightsDict.items():
            self.logger.debug(
                "betweenEdgeWeightsDict key:{} listOfWeights:{}".format(k, v))

        # create nodes and clusters
        for newNodeId in nodeEdgesDict.keys():
            # create new node
            newClusterId = newNodeId
            newNode = Node(newClusterId, newNodeId)
            self._nodeLookup[newNodeId] = newNode
            # create new cluster
            newCluster = Cluster(newClusterId, [newNode])
            self._clusters[newClusterId] = newCluster

        # add edges to nodes
        for newNodeId, edgeSet in nodeEdgesDict.items():
            #edgeList = nodeEdgesDict[newNodeId]
            #newNode.addEdges(edgeList,  self._nodeLookup)
            newNode = self._nodeLookup[newNodeId]
            for e in edgeSet:
                newNode._addEdge(e)

        # init node caches
        for nId in self._nodeLookup.keys():
            node = self._nodeLookup[nId]
            # because we used _addEdge() instead of addEdges()
            # we need to make sure cache is set up
            node._initKiinCache(self._nodeLookup)

        # force nodes to calc cached values
        for nodeId in self._nodeLookup.keys():
            node = self._nodeLookup[nodeId]
            node.getSumAdjWeights()
            node.getSumOfWeightsInsideCluster(nodeId, self._nodeLookup)

        # force clusters to calc cached values
        for clusterId in self._clusters.keys():
            # run lazy eval
            cluster = self._clusters[clusterId]
            cluster.getSumOfWeights()
            cluster.getSumOfWeightsInsideCluster(self._nodeLookup)

        self.logger.info("END louvainId:{}\n".format(self._louvainId))
Exemplo n.º 3
0
    def createChangeQGraph(self):
        # create cluster 0
        n0 = Node(clusterId="c0", nodeId=0)
        n1 = Node(clusterId="c0", nodeId=1)
        n2 = Node(clusterId="c0", nodeId=2)
        n3 = Node(clusterId="c1", nodeId=3)
        n4 = Node(clusterId="c1", nodeId=4)

        # 0 - 1
        e0 = Edge(weight=1.0, srcId=0, targetId=1)
        n0._addEdge(e0)
        e1 = Edge(weight=1.0, srcId=1, targetId=0)
        n1._addEdge(e1)

        # 0 - 2
        e2 = Edge(weight=1.0, srcId=0, targetId=2)
        n0._addEdge(e2)
        e3 = Edge(weight=1.0, srcId=2, targetId=0)
        n2._addEdge(e3)

        # 0 - 3
        # edge between clusters
        e4 = Edge(weight=1.0, srcId=0, targetId=3)
        n0._addEdge(e4)
        e5 = Edge(weight=1.0, srcId=3, targetId=0)
        n3._addEdge(e5)

        cluster0 = Cluster(clusterId="c0", nodeList=[n0, n1, n2])

        # creat cluster 1
        #n5 = Node(clusterId="c1", nodeId=5)

        # 4 - 3
        e6 = Edge(weight=1.0, srcId=4, targetId=3)
        n4._addEdge(e6)
        e7 = Edge(weight=1.0, srcId=3, targetId=4)
        n3._addEdge(e7)

        cluster1 = Cluster(clusterId="c1", nodeList=[n3, n4])
        clusters = [cluster0, cluster1]

        e8 = Edge(weight=1.0, srcId=2, targetId=1)
        n2._addEdge(e8)
        e9 = Edge(weight=1.0, srcId=1, targetId=2)
        n1._addEdge(e9)

        edgeList = [e0, e1, e2, e3, e4, e5, e6, e7, e8, e9]
        i = 1
        for e in edgeList:
            if i % 2:
                print()

            i += 1
            self.logger.info(e)
        print()

        nodeList = [n0, n1, n2, n3, n4]
        graphNodesLookup = {n._nodeId: n for n in nodeList}

        for n in nodeList:
            # because we used _addEdge() instead of addEdges()
            # we need to make sure cache is set up
            n._initKiinCache(graphNodesLookup)

        self.logger.debug("")
        for n in nodeList:
            # for lazy evaluation to run
            n.getSumAdjWeights()
            n.getSumOfWeightsInsideCluster(n._clusterId, graphNodesLookup)
            self.logger.debug("node:{}".format(n))

        self.logger.debug("")
        for c in clusters:
            # run lazy eval
            c.getSumOfWeights()
            c.getSumOfWeightsInsideCluster(graphNodesLookup)
            self.logger.info("cluster:{}".format(c))

        level0 = Louvain("changeQGraph", [cluster0, cluster1])
        ret = (level0, clusters, nodeList, edgeList, graphNodesLookup)

        self.logger.info("END\n")
        return (ret)
Exemplo n.º 4
0
    def createSimpleGraph(self):
        '''
        creates two disjoint graphs, one is a triangle, the other is a pair of nodes
        connected by a single edge
        
        creates two cluster. one for each of the disjoint graphs
        
        all edge weights are 1
        
        returns (level0Louvain, 
               [cluster0, cluster1], 
               [n0, n1, n2, n3, n4], 
               [e0, e1, e2, e3, e4, e5, e6])
        
        '''
        self.logger.info("BEGIN")
        n0 = Node(clusterId="c0", nodeId=0)
        n1 = Node(clusterId="c0", nodeId=1)
        n2 = Node(clusterId="c0", nodeId=2)

        # undirected  triangle graph
        e0 = Edge(weight=1.0, srcId=0, targetId=1)
        n0._addEdge(e0)

        e1 = Edge(weight=1.0, srcId=1, targetId=0)
        n1._addEdge(e1)

        e2 = Edge(weight=1.0, srcId=0, targetId=2)
        n0._addEdge(e2)

        e3 = Edge(weight=1.0, srcId=2, targetId=0)
        n2._addEdge(e3)

        e4 = Edge(weight=1.0, srcId=1, targetId=2)
        n1._addEdge(e4)

        e5 = Edge(weight=1.0, srcId=2, targetId=1)
        n2._addEdge(e5)

        # create second cluster graph
        n3 = Node(clusterId="c1", nodeId=3)
        e6 = Edge(weight=1.0, srcId=3, targetId=4)
        n3._addEdge(e6)
        n4 = Node(clusterId="c1", nodeId=4)
        e6 = Edge(weight=1.0, srcId=4, targetId=3)
        n4._addEdge(e6)

        # you can not move a node to a cluster if the node is not
        # connected to something in the cluster
        # there would not gain in Q
        # create and edge between a node in c0 and c2

        ea = Edge(weight=1.0, srcId=n0._nodeId, targetId=n3._nodeId)
        eb = Edge(weight=1.0, srcId=n3._nodeId, targetId=n0._nodeId)
        n0._addEdge(ea)
        n3._addEdge(eb)

        nodeList = [n0, n1, n2, n3, n4]
        graphNodesLookup = {n._nodeId: n for n in nodeList}

        # create clusters
        cluster0 = Cluster(clusterId="c0", nodeList=[n0, n1, n2])
        cluster1 = Cluster(clusterId="c1", nodeList=[n3, n4])

        clusters = [cluster0, cluster1]

        #         self.createSimpleGraphStage2Init(nodeList, clusters, graphNodesLookup)
        level0 = Louvain("simple", [cluster0, cluster1])

        self.logger.info("END\n")

        #         for n in nodeList:
        #             # because we used _addEdge() instead of addEdges()
        #             # we need to make sure cache is set up
        #             n._initKiinCache(graphNodesLookup)

        ret = (level0, clusters, nodeList, [e0, e1, e2, e3, e4, e5,
                                            e6], graphNodesLookup)

        return (ret)
Exemplo n.º 5
0
    def testChangeInQSlow(self):
        self.logger.info("BEGIN")

        n0 = Node(clusterId="c1", nodeId=0)
        n1 = Node(clusterId="c1", nodeId=1)
        n3 = Node(clusterId="c1", nodeId=3)

        e1 = Edge(weight=1.0, srcId=0, targetId=1)
        n0._addEdge(e1)
        e2 = Edge(weight=1.0, srcId=0, targetId=2)
        n0._addEdge(e2)
        e3 = Edge(weight=1.0, srcId=0, targetId=3)
        n0._addEdge(e3)

        e4 = Edge(weight=1.0, srcId=1, targetId=0)
        n1._addEdge(e4)

        e5 = Edge(weight=1.0, srcId=3, targetId=0)
        n3._addEdge(e5)

        cluster1 = Cluster(clusterId="1", nodeList=[n0, n1, n3])

        n2 = Node(clusterId="c2", nodeId=2)
        e6 = Edge(weight=1.0, srcId=2, targetId=0)
        n2._addEdge(e6)

        n4 = Node(clusterId="c2", nodeId=4)
        n5 = Node(clusterId="c2", nodeId=5)
        e7 = Edge(weight=1.0, srcId=4, targetId=5)
        n4._addEdge(e7)
        e8 = Edge(weight=1.0, srcId=5, targetId=4)
        n5._addEdge(e8)

        e9 = Edge(weight=1.0, srcId=4, targetId=2)
        n4._addEdge(e9)

        e10 = Edge(weight=1.0, srcId=2, targetId=4)
        n2._addEdge(e10)

        cluster2 = Cluster(clusterId="2", nodeList=[n2, n4, n5])

        louvain1 = Louvain("changeInQ1", [cluster1, cluster2])

        # calculate modularity of original graph
        self.logger.info("louvain1._Q:{}".format(louvain1._Q))
        self.assertEqual(louvain1._Q, 0.5599999999999999)

        # move node 2 from cluster 2 to cluster 1
        n2._clusterId = "c1"
        cluster1 = Cluster(clusterId="1", nodeList=[n0, n1, n2, n3])
        cluster2 = Cluster(clusterId="2", nodeList=[n4, n5])

        # calculate modularity
        louvain2 = Louvain("changeInQ2", [cluster1, cluster2])
        self.logger.info("louvain2._Q:{}".format(louvain2._Q))
        self.assertEqual(louvain2._Q, 0.5199999999999999)

        self.logger.info("change in modularity:{}".format(louvain1._Q -
                                                          louvain2._Q))
Exemplo n.º 6
0
    def testNode(self):
        self.logger.info("BEGIN")

        n0 = Node(clusterId="c0", nodeId=0)
        n1 = Node(clusterId="c0", nodeId=1)
        n2 = Node(clusterId="c0", nodeId=2)

        # undirected  triangle graph
        e0 = Edge(weight=1.0, srcId=0, targetId=1)
        n0._addEdge(e0)

        e1 = Edge(weight=1.0, srcId=1, targetId=0)
        n1._addEdge(e1)

        self.assertEqual(1, n0.getSumAdjWeights())
        self.assertEqual(1, n1.getSumAdjWeights())

        e2 = Edge(weight=1.0, srcId=0, targetId=2)
        n0._addEdge(e2)

        e3 = Edge(weight=1.0, srcId=2, targetId=0)
        n2._addEdge(e3)

        # test print functions
        self.logger.info("e3:{}".format(e3))

        self.assertEqual(2, n0.getSumAdjWeights())
        self.assertEqual(1, n2.getSumAdjWeights())

        # test print functions
        self.logger.info("n2:{}".format(n2))

        e4 = Edge(weight=1.0, srcId=1, targetId=2)
        n1._addEdge(e4)

        e5 = Edge(weight=1.0, srcId=2, targetId=1)
        n2._addEdge(e5)

        self.assertEqual(2, n1.getSumAdjWeights())
        self.assertEqual(2, n2.getSumAdjWeights())

        # create  cluster0
        cluster0 = Cluster(clusterId="c0", nodeList=[n0, n1, n2])
        self.assertEqual(3, cluster0._getM())

        # test print functions
        self.logger.info("cluster0:{}".format(cluster0))

        # create disjoint graph
        n3 = Node(clusterId="c1", nodeId=3)
        e6 = Edge(weight=1.0, srcId=3, targetId=4)
        n3._addEdge(e6)

        n4 = Node(clusterId="c1", nodeId=4)
        e6 = Edge(weight=1.0, srcId=4, targetId=3)
        n4._addEdge(e6)

        cluster1 = Cluster(clusterId="c1", nodeList=[n3, n4])
        self.assertEqual(1, cluster1._getM())

        # test modularity calculation
        level0 = Louvain("testNode", [cluster0, cluster1])
        self.assertEqual(4, level0._getM())

        self.logger.info("level0._Q:{}".format(level0._Q))
        self.assertEqual(level0._Q, 0.59375)

        # test

        self.logger.info("END\n")