def _build(self, nodeId, targetEdge): ''' todo: ''' if nodeId in self._nodeLookup: n = self._nodeLookup[nodeId] else: n = Node(self._clusterId, nodeId) self._nodeLookup[nodeId] = n cluster = Cluster(self._clusterId, [n]) self._clusterId += 1 self._clusters[cluster._clusterId] = cluster n._addEdge(targetEdge)
def _fixThisBugUseTrueOOEncapsliations(self, nodeEdgesDict, betweenEdgeWeightsDict): ''' do not be lazy! it leads to bugs coded in haste. assume oh all the code is same package no need to implement accessor fuctions. This lead to lots of issue. we need to construct the full object graph before calculating any of the values ''' self.logger.info("BEGIN lovainId:{}".format(self._louvainId)) # calculate edge weights for nodeId in nodeEdgesDict.keys(): edges = nodeEdgesDict[nodeId] for e in edges: key = (nodeId, e._targetId) listOfWeights = betweenEdgeWeightsDict[key] e._weight = sum(listOfWeights) #print() for k, v in nodeEdgesDict.items(): self.logger.debug("nodeEdgesDict nodeId:{} edges:{}".format(k, v)) #print() for k, v in betweenEdgeWeightsDict.items(): self.logger.debug( "betweenEdgeWeightsDict key:{} listOfWeights:{}".format(k, v)) # create nodes and clusters for newNodeId in nodeEdgesDict.keys(): # create new node newClusterId = newNodeId newNode = Node(newClusterId, newNodeId) self._nodeLookup[newNodeId] = newNode # create new cluster newCluster = Cluster(newClusterId, [newNode]) self._clusters[newClusterId] = newCluster # add edges to nodes for newNodeId, edgeSet in nodeEdgesDict.items(): #edgeList = nodeEdgesDict[newNodeId] #newNode.addEdges(edgeList, self._nodeLookup) newNode = self._nodeLookup[newNodeId] for e in edgeSet: newNode._addEdge(e) # init node caches for nId in self._nodeLookup.keys(): node = self._nodeLookup[nId] # because we used _addEdge() instead of addEdges() # we need to make sure cache is set up node._initKiinCache(self._nodeLookup) # force nodes to calc cached values for nodeId in self._nodeLookup.keys(): node = self._nodeLookup[nodeId] node.getSumAdjWeights() node.getSumOfWeightsInsideCluster(nodeId, self._nodeLookup) # force clusters to calc cached values for clusterId in self._clusters.keys(): # run lazy eval cluster = self._clusters[clusterId] cluster.getSumOfWeights() cluster.getSumOfWeightsInsideCluster(self._nodeLookup) self.logger.info("END louvainId:{}\n".format(self._louvainId))
def createChangeQGraph(self): # create cluster 0 n0 = Node(clusterId="c0", nodeId=0) n1 = Node(clusterId="c0", nodeId=1) n2 = Node(clusterId="c0", nodeId=2) n3 = Node(clusterId="c1", nodeId=3) n4 = Node(clusterId="c1", nodeId=4) # 0 - 1 e0 = Edge(weight=1.0, srcId=0, targetId=1) n0._addEdge(e0) e1 = Edge(weight=1.0, srcId=1, targetId=0) n1._addEdge(e1) # 0 - 2 e2 = Edge(weight=1.0, srcId=0, targetId=2) n0._addEdge(e2) e3 = Edge(weight=1.0, srcId=2, targetId=0) n2._addEdge(e3) # 0 - 3 # edge between clusters e4 = Edge(weight=1.0, srcId=0, targetId=3) n0._addEdge(e4) e5 = Edge(weight=1.0, srcId=3, targetId=0) n3._addEdge(e5) cluster0 = Cluster(clusterId="c0", nodeList=[n0, n1, n2]) # creat cluster 1 #n5 = Node(clusterId="c1", nodeId=5) # 4 - 3 e6 = Edge(weight=1.0, srcId=4, targetId=3) n4._addEdge(e6) e7 = Edge(weight=1.0, srcId=3, targetId=4) n3._addEdge(e7) cluster1 = Cluster(clusterId="c1", nodeList=[n3, n4]) clusters = [cluster0, cluster1] e8 = Edge(weight=1.0, srcId=2, targetId=1) n2._addEdge(e8) e9 = Edge(weight=1.0, srcId=1, targetId=2) n1._addEdge(e9) edgeList = [e0, e1, e2, e3, e4, e5, e6, e7, e8, e9] i = 1 for e in edgeList: if i % 2: print() i += 1 self.logger.info(e) print() nodeList = [n0, n1, n2, n3, n4] graphNodesLookup = {n._nodeId: n for n in nodeList} for n in nodeList: # because we used _addEdge() instead of addEdges() # we need to make sure cache is set up n._initKiinCache(graphNodesLookup) self.logger.debug("") for n in nodeList: # for lazy evaluation to run n.getSumAdjWeights() n.getSumOfWeightsInsideCluster(n._clusterId, graphNodesLookup) self.logger.debug("node:{}".format(n)) self.logger.debug("") for c in clusters: # run lazy eval c.getSumOfWeights() c.getSumOfWeightsInsideCluster(graphNodesLookup) self.logger.info("cluster:{}".format(c)) level0 = Louvain("changeQGraph", [cluster0, cluster1]) ret = (level0, clusters, nodeList, edgeList, graphNodesLookup) self.logger.info("END\n") return (ret)
def createSimpleGraph(self): ''' creates two disjoint graphs, one is a triangle, the other is a pair of nodes connected by a single edge creates two cluster. one for each of the disjoint graphs all edge weights are 1 returns (level0Louvain, [cluster0, cluster1], [n0, n1, n2, n3, n4], [e0, e1, e2, e3, e4, e5, e6]) ''' self.logger.info("BEGIN") n0 = Node(clusterId="c0", nodeId=0) n1 = Node(clusterId="c0", nodeId=1) n2 = Node(clusterId="c0", nodeId=2) # undirected triangle graph e0 = Edge(weight=1.0, srcId=0, targetId=1) n0._addEdge(e0) e1 = Edge(weight=1.0, srcId=1, targetId=0) n1._addEdge(e1) e2 = Edge(weight=1.0, srcId=0, targetId=2) n0._addEdge(e2) e3 = Edge(weight=1.0, srcId=2, targetId=0) n2._addEdge(e3) e4 = Edge(weight=1.0, srcId=1, targetId=2) n1._addEdge(e4) e5 = Edge(weight=1.0, srcId=2, targetId=1) n2._addEdge(e5) # create second cluster graph n3 = Node(clusterId="c1", nodeId=3) e6 = Edge(weight=1.0, srcId=3, targetId=4) n3._addEdge(e6) n4 = Node(clusterId="c1", nodeId=4) e6 = Edge(weight=1.0, srcId=4, targetId=3) n4._addEdge(e6) # you can not move a node to a cluster if the node is not # connected to something in the cluster # there would not gain in Q # create and edge between a node in c0 and c2 ea = Edge(weight=1.0, srcId=n0._nodeId, targetId=n3._nodeId) eb = Edge(weight=1.0, srcId=n3._nodeId, targetId=n0._nodeId) n0._addEdge(ea) n3._addEdge(eb) nodeList = [n0, n1, n2, n3, n4] graphNodesLookup = {n._nodeId: n for n in nodeList} # create clusters cluster0 = Cluster(clusterId="c0", nodeList=[n0, n1, n2]) cluster1 = Cluster(clusterId="c1", nodeList=[n3, n4]) clusters = [cluster0, cluster1] # self.createSimpleGraphStage2Init(nodeList, clusters, graphNodesLookup) level0 = Louvain("simple", [cluster0, cluster1]) self.logger.info("END\n") # for n in nodeList: # # because we used _addEdge() instead of addEdges() # # we need to make sure cache is set up # n._initKiinCache(graphNodesLookup) ret = (level0, clusters, nodeList, [e0, e1, e2, e3, e4, e5, e6], graphNodesLookup) return (ret)
def testChangeInQSlow(self): self.logger.info("BEGIN") n0 = Node(clusterId="c1", nodeId=0) n1 = Node(clusterId="c1", nodeId=1) n3 = Node(clusterId="c1", nodeId=3) e1 = Edge(weight=1.0, srcId=0, targetId=1) n0._addEdge(e1) e2 = Edge(weight=1.0, srcId=0, targetId=2) n0._addEdge(e2) e3 = Edge(weight=1.0, srcId=0, targetId=3) n0._addEdge(e3) e4 = Edge(weight=1.0, srcId=1, targetId=0) n1._addEdge(e4) e5 = Edge(weight=1.0, srcId=3, targetId=0) n3._addEdge(e5) cluster1 = Cluster(clusterId="1", nodeList=[n0, n1, n3]) n2 = Node(clusterId="c2", nodeId=2) e6 = Edge(weight=1.0, srcId=2, targetId=0) n2._addEdge(e6) n4 = Node(clusterId="c2", nodeId=4) n5 = Node(clusterId="c2", nodeId=5) e7 = Edge(weight=1.0, srcId=4, targetId=5) n4._addEdge(e7) e8 = Edge(weight=1.0, srcId=5, targetId=4) n5._addEdge(e8) e9 = Edge(weight=1.0, srcId=4, targetId=2) n4._addEdge(e9) e10 = Edge(weight=1.0, srcId=2, targetId=4) n2._addEdge(e10) cluster2 = Cluster(clusterId="2", nodeList=[n2, n4, n5]) louvain1 = Louvain("changeInQ1", [cluster1, cluster2]) # calculate modularity of original graph self.logger.info("louvain1._Q:{}".format(louvain1._Q)) self.assertEqual(louvain1._Q, 0.5599999999999999) # move node 2 from cluster 2 to cluster 1 n2._clusterId = "c1" cluster1 = Cluster(clusterId="1", nodeList=[n0, n1, n2, n3]) cluster2 = Cluster(clusterId="2", nodeList=[n4, n5]) # calculate modularity louvain2 = Louvain("changeInQ2", [cluster1, cluster2]) self.logger.info("louvain2._Q:{}".format(louvain2._Q)) self.assertEqual(louvain2._Q, 0.5199999999999999) self.logger.info("change in modularity:{}".format(louvain1._Q - louvain2._Q))
def testNode(self): self.logger.info("BEGIN") n0 = Node(clusterId="c0", nodeId=0) n1 = Node(clusterId="c0", nodeId=1) n2 = Node(clusterId="c0", nodeId=2) # undirected triangle graph e0 = Edge(weight=1.0, srcId=0, targetId=1) n0._addEdge(e0) e1 = Edge(weight=1.0, srcId=1, targetId=0) n1._addEdge(e1) self.assertEqual(1, n0.getSumAdjWeights()) self.assertEqual(1, n1.getSumAdjWeights()) e2 = Edge(weight=1.0, srcId=0, targetId=2) n0._addEdge(e2) e3 = Edge(weight=1.0, srcId=2, targetId=0) n2._addEdge(e3) # test print functions self.logger.info("e3:{}".format(e3)) self.assertEqual(2, n0.getSumAdjWeights()) self.assertEqual(1, n2.getSumAdjWeights()) # test print functions self.logger.info("n2:{}".format(n2)) e4 = Edge(weight=1.0, srcId=1, targetId=2) n1._addEdge(e4) e5 = Edge(weight=1.0, srcId=2, targetId=1) n2._addEdge(e5) self.assertEqual(2, n1.getSumAdjWeights()) self.assertEqual(2, n2.getSumAdjWeights()) # create cluster0 cluster0 = Cluster(clusterId="c0", nodeList=[n0, n1, n2]) self.assertEqual(3, cluster0._getM()) # test print functions self.logger.info("cluster0:{}".format(cluster0)) # create disjoint graph n3 = Node(clusterId="c1", nodeId=3) e6 = Edge(weight=1.0, srcId=3, targetId=4) n3._addEdge(e6) n4 = Node(clusterId="c1", nodeId=4) e6 = Edge(weight=1.0, srcId=4, targetId=3) n4._addEdge(e6) cluster1 = Cluster(clusterId="c1", nodeList=[n3, n4]) self.assertEqual(1, cluster1._getM()) # test modularity calculation level0 = Louvain("testNode", [cluster0, cluster1]) self.assertEqual(4, level0._getM()) self.logger.info("level0._Q:{}".format(level0._Q)) self.assertEqual(level0._Q, 0.59375) # test self.logger.info("END\n")