def simpleTests(): test = range(10) uf = UnionFind() for t in test: uf.makeSet(t) # END for for t in test: assert uf.find(t) == t, "Parent not initialized correctly." # END for assert uf.countGroups() == 10, "Counted wrong number of groups." uf.union(0,1) assert uf.find(1) == 0, "Parent not updated correctly." assert uf.data[0][1] == 1, "Order not updated for equal trees correctly." assert uf.countGroups() == 9, "Counted wrong number of groups." uf.union(1,2) assert uf.find(2) == 0, "Parent not updated correctly." assert uf.data[0][1] == 1, "Order not updated for unequal trees correctly." assert uf.countGroups() == 8, "Counted wrong number of groups." uf.union(3,4) uf.union(4,5) uf.union(0,3) assert uf.data[0][1] == 2, "Order not updated for unequal trees." assert uf.data[5][0] == 3, "Parent should not be updated until find operation." assert uf.find(5) == 0, "Find operation returned wrong parent." assert uf.data[5][0] == 0, "Parent should have been updated." assert uf.countGroups() == 5, "Counted wrong number of groups."
class Kruskal: def __init__(self, data): nodes = int(data[0].split()[0]) self.ufSet = UnionFind() for n in range(nodes): self.ufSet.makeSet(n) # END for self.edges = [] for k in data[1:]: row = map(int, k.strip().split()) self.edges.append((row[0] - 1, row[1] - 1, row[2])) # END for self.edges.sort(key=itemgetter(2)) # END __init__ def mstKruskal(self): mst = [] l = 0 for edge in self.edges: s1 = self.ufSet.find(edge[0]) s2 = self.ufSet.find(edge[1]) if s1 == s2: continue # END if self.ufSet.union(edge[0], edge[1]) mst.append(edge) l += edge[2] # END for self.mst = mst return l # END mstKruskal def clusterKruskal(self, k): print "Running Clustering, k={0}".format(k) done = False for edge in self.edges: s1 = self.ufSet.find(edge[0]) s2 = self.ufSet.find(edge[1]) if s1 == s2: continue # END if if not done: self.ufSet.union(edge[0], edge[1]) else: print "Smallest unallocated edge: {0}".format(edge) return edge[2] # END if if self.ufSet.countGroups() == k: done = True
def segmentIsland(flatFaces,island): sets = UnionFind(True) if len(island)==0: island = range(len(flatFaces)) for face in island: if face not in sets.leader.keys(): sets.makeSet([face]) neighbor = flatFaces[face].fromFace if neighbor != None: if neighbor not in sets.leader.keys(): sets.makeSet([neighbor]) sets.union(face,neighbor) return sets.group, sets.leader
def segmentIsland(flatFaces, island): sets = UnionFind(True) if len(island) == 0: island = range(len(flatFaces)) for face in island: if face not in sets.leader.keys(): sets.makeSet([face]) neighbor = flatFaces[face].fromFace if neighbor is not None: if neighbor not in sets.leader.keys(): sets.makeSet([neighbor]) sets.union(face, neighbor) return sets.group, sets.leader
def build(self): '采用并查集自底向上建立TreeIndex' N = nx.number_of_nodes(ShellStructIndex.G) #图的节点个数 '步骤1:计算k-core,按coreness分组' ##k-core分解 ShellStructIndex.coreDict = nx.core_number(ShellStructIndex.G) #将节点按照core number进行分组 Vk = defaultdict(list) #字典的value是列表 for key, value in ShellStructIndex.coreDict.iteritems( ): ###(2017.3.5:发现不在图里面的节点,怀疑是nx.core_number函数# ) Vk[value].append(key) #将Vk按照coreness(key)进行排序,降序 # sortedVk=sorted(Vk.items(),key=lambda d:d[0],reverse=True) '步骤2:初始化并查集和一些需要的数据结构' restNodeList = [] #储存没有父母的节点,最后直接连接到core为0的根节点下方作为孩子 '为了处理节点不连续的问题,找iD最大的节点,将maxID替换所有的N' maxID = 0 for nodeID in ShellStructIndex.G.nodes(): if nodeID > maxID: maxID = nodeID ShellStructIndex.vertexTNodelist = [None] * (maxID + 1 ) #图节点到TNode的映射的列表 # print str(N+1) core0List = [] #coreness=0的节点,作为这棵树的根 #############初始化并查集############# unodeArr = [] #存储的是并查集的节点(id->UNode) uf = UnionFind() #包含所有并查集方法的类 for i in range(maxID + 1): #加1是因为可能从1才开始编号 unode = UNode(i) uf.makeSet(unode) unodeArr.append(unode) '步骤3:自底向上建立树' #level by level, tnodeCounter = 0 ##计算TNode个数的计数器 for key in sorted(Vk.keys(), reverse=True): #Vk按照core值从大到小排序 curcore = key vkList = Vk[key] if curcore > 0: idUFMap = { } #(id->UNode)这里用字典但是unodeArr用列表是因为这里的id不一定是连续的(临时的一个并查集映射) '步骤3.1: 先在同一个core值节点中找连通分量,利用一个临时并查集idUFMap' for id in vkList: if not idUFMap.has_key(id): #加入Vk unode = UNode(id) uf.makeSet(unode) idUFMap[id] = unode for ngid in ShellStructIndex.G.neighbors(id): if ShellStructIndex.coreDict[ ngid] >= ShellStructIndex.coreDict[ id]: #先处理core大的 if ShellStructIndex.coreDict[ ngid] > ShellStructIndex.coreDict[id]: ngid = uf.find( unodeArr[ngid] ).value #如果邻居的core比较大,说明已经处理过,用父母代替 if not idUFMap.has_key(ngid): #加入V' unode = UNode(ngid) uf.makeSet(unode) idUFMap[ngid] = unode uf.union(idUFMap[id], idUFMap[ngid]) #合并id和他的邻居(或者邻居的父母) '步骤3.2:按照上面临时并查集的结果,给图节点分组,找树节点孩子' ufGNodeMap = defaultdict( list) #(UNode->[vertex])unode到同一个组的unode的图节点的字典 ufTNodeMap = defaultdict(list) #(UNode->[TNode])unode到TNode的映射 for reId, reUNode in idUFMap.iteritems(): newParent = uf.find(reUNode) #在新的并查集里面,节点的父母 if ShellStructIndex.coreDict[ reId] == curcore: #同一个core值的节点分成一组 ufGNodeMap[newParent].append(reId) if ShellStructIndex.coreDict[ reId] > curcore: #由于是自底向上的,当前这个reid应该已经处理过了 oldParent = unodeArr[reId] #这个是外面的并查集记录的reId的父母 tnode = ShellStructIndex.vertexTNodelist[ oldParent.represent] ufTNodeMap[newParent].append(tnode) '步骤3.3:产生新的TNode节点并建立树节点之间的联系' for parent, nodeList in ufGNodeMap.iteritems(): childList = ufTNodeMap[parent] tnodeCounter = tnodeCounter + 1 # # print 'tnodeCounter:',tnodeCounter tnode = TNode( curcore, tnodeCounter) #新建一个树节点(给定coreness和树节点编号)(re:2017.2.26) tnode.nodeList = nodeList if childList: #如果孩子不为空,给树节点添加孩子节点 tnode.childList = childList #这里用不用深拷贝? #####给孩子节点添加父母,方便后面的retrieve(re:2017.2.26)######## for chid in childList: chid.parent = tnode restNodeList.append(tnode) #假设这个节点目前没有父母咯 #更新(id->TNode) for nodeId in nodeList: # print nodeId ShellStructIndex.vertexTNodelist[nodeId] = tnode #更新没有父母的树节点列表 for subTNode in tnode.childList: restNodeList.remove(subTNode) '步骤3.4: 更新外面包含所有节点的并查集' for id in vkList: x = unodeArr[id] #当前节点的UNode for ngid in ShellStructIndex.G.neighbors(id): if ShellStructIndex.coreDict[ ngid] >= curcore: #遍历边的优先级,core大的先检查,保证自底向上的 y = unodeArr[ngid] uf.union(x, y) #更新represent节点 xRoot = uf.find(x) xRepresent = uf.find(x).represent if ShellStructIndex.coreDict[ xRepresent] > ShellStructIndex.coreDict[id]: xRoot.represent = id else: #core为0的节点作为根 core0List = vkList '步骤4:建立root节点' tnodeCounter = tnodeCounter + 1 #(re:2017.2.26) # print 'tnodeCounter:', tnodeCounter ShellStructIndex.root = TNode(core=0, data=tnodeCounter) ShellStructIndex.root.nodeList = core0List ShellStructIndex.root.childList = restNodeList #这里需要深拷贝(copy.deepcopy(restNodeList))吗? ####(re:2017.2.26) for chid in ShellStructIndex.root.childList: chid.parent = ShellStructIndex.root #####把节点到树的映射也更新一下#### for v in core0List: ShellStructIndex.vertexTNodelist[v] = ShellStructIndex.root '步骤5:在树节点上获得nodeList的属性的倒排'
interval = range(A, B + 1) #print A, B, P print "sieve for ", B - A primes = sieve(B - A) print "trimming primes" primes = [a for a in primes if a >= P] print "calculating factors" set_dict2 = {} for prime in primes: set_dict2[prime] = [a for a in interval if a % prime == 0] #primes_dict = {item: calc_prime_factor(item, primes, P) for item in range(A, B+1)} #print primes_dict print "creating UnionFind" set_list = UnionFind() for item in range(A, B + 1): set_list.makeSet([item]) #set_dict = {prime: [] for prime in primes} #print "creating sets" #for item in range(A,B+1): # for prime in primes_dict[item]: # set_dict[prime].append(item) #print set_dict #print set_list.getNumGroups() print "reducing sets" for item in set_dict2: temp_list = set_dict2[item] if len(temp_list) > 1: for new_item in temp_list: set_list.union(temp_list[0], new_item) results = set_list.getNumGroups() #print results
class ClusterHamming: def __init__(self, data): (nodes, self.bits) = map(int, data.pop(0).split()) self.uf = UnionFind() for n in range(nodes): self.uf.makeSet(n) # END for self.hammingData = defaultdict(list) for n in range(nodes): s = data[n].replace(' ', '') self.hammingData[s].append(n) # END for # END __init__ def flip(self, s, flipbits): """ Given an input string (s) and tuple of indices (flipbits), returns a new string with bits at specified indices flipped. The length of (flipbits) determines the resulting hamming distance. """ result = '' for i, c in enumerate(s): if i in flipbits: if c == '1': result += '0' else: result += '1' else: result += c return result # END flip def getHammingPermutations(self, s, n): """ Generate permutations of s whose distance is less than or equal to n """ result = [] result.append(s) for d in range(1, n + 1): for flipbits in combinations(range(self.bits), d): result.append(self.flip(s, flipbits)) # END for # END for return result # END getHammingPermutations def printSummary(self): resultMap = defaultdict(list) for k, v in self.hammingData.iteritems(): cluster = self.uf.find(v) resultMap[cluster].append(k) # END for for k, v in resultMap.iteritems(): print "\n\nCluster {0}:".format(k) for key in v: print "\t{0}".format(key) # END for # END for # END printSummary def run(self, minDist): data = copy(self.hammingData) while data: (nodeKey, refNodes) = data.popitem() if len(refNodes) > 1: for i in range(1, len(refNodes)): self.uf.union(refNodes[0], refNodes[i]) # END for # END for nearestNodes = self.getHammingPermutations(nodeKey, minDist - 1) for testNodeKey in nearestNodes: if testNodeKey not in data: continue testNodes = self.hammingData[testNodeKey] for n in testNodes: if self.uf.find(n) == self.uf.find(refNodes[0]): continue self.uf.union(refNodes[0], n) # END for # END for # END while return self.uf.countGroups()
def build(G): '采用并查集自底向上建立TreeIndex' N = nx.number_of_nodes(G) #图的节点个数 '步骤1:计算k-core,按coreness分组' coreDict = nx.core_number(G) #将节点按照core number进行分组 Vk = defaultdict(list) #字典的value是列表 for key, value in coreDict.iteritems(): Vk[value].append(key) #将Vk按照coreness(key)进行排序,降序 # sortedVk=sorted(Vk.items(),key=lambda d:d[0],reverse=True) '步骤2:初始化并查集和一些需要的数据结构' unodeArr = [] #存储的是并查集的节点 uf = UnionFind() #包含所有并查集方法的类 restNodeList = [] #储存没有父母的节点,最后直接连接到core为0的根节点下方作为孩子 vertexTNodelist = [None] * N #图节点到TNode的映射的列表 core0List = [] #coreness=0的节点,作为这棵树的根 for i in range(N): unode = UNode(i) uf.makeSet(unode) unodeArr.append(unode) '步骤3:自底向上建立树' #level by level, for key in sorted(Vk.keys(), reverse=True): curcore = key vkList = Vk[key] if curcore > 0: idUFMap = {} #(id->UNode)这里用字典但是unodeArr用列表是因为这里的id不一定是连续的 '步骤3.1: 先在同一个core值节点中找连通分量,利用一个临时并查集idUFMap' for id in vkList: if not idUFMap.has_key(id): #加入Vk unode = UNode(id) uf.makeSet(unode) idUFMap[id] = unode for ngid in G.neighbors(id): if coreDict[ngid] >= coreDict[id]: #先处理core大的 if coreDict[ngid] > coreDict[id]: ngid = uf.find(unodeArr[ngid] ).value #如果邻居的core比较大,说明已经处理过,用父母代替 if not idUFMap.has_key(ngid): #加入V' unode = UNode(ngid) uf.makeSet(unode) idUFMap[ngid] = unode uf.union(idUFMap[id], idUFMap[ngid]) '步骤3.2:按照上面临时并查集的结果,给图节点分组,找树节点孩子' ufGNodeMap = defaultdict( list) #(UNode->[vertex])unode到同一个组的unode的图节点的字典 ufTNodeMap = defaultdict(list) #(UNode->[TNode])unode到TNode的映射 for reId, reUNode in idUFMap.iteritems(): newParent = uf.find(reUNode) #在新的并查集里面,节点的父母 if coreDict[reId] == curcore: #同一个core值的节点分成一组 ufGNodeMap[newParent].append(reId) if coreDict[reId] > curcore: #由于是自底向上的,当前这个reid应该已经处理过了 oldParent = unodeArr[reId] #这个是外面的并查集记录的reId的父母 tnode = vertexTNodelist[oldParent.represent] ufTNodeMap[newParent].append(tnode) '步骤3.3:产生新的TNode节点并建立树节点之间的联系' for parent, nodeList in ufGNodeMap.iteritems(): childList = ufTNodeMap[parent] tnode = TNode(curcore) #新建一个树节点 tnode.nodeList = nodeList if childList: #如果孩子不为空,给树节点添加孩子节点 tnode.childList = childList #这里用不用深拷贝? restNodeList.append(tnode) #假设这个节点目前没有父母咯 #更新(id->TNode) for nodeId in nodeList: vertexTNodelist[nodeId] = tnode #更新没有父母的树节点列表 for subTNode in tnode.childList: restNodeList.remove(subTNode) '步骤3.4: 更新外面的并查集' for id in vkList: x = unodeArr[id] #当前节点的UNode for ngid in G.neighbors(id): if coreDict[ngid] >= curcore: #遍历边的优先级,core大的先检查,保证自底向上的 y = unodeArr[ngid] uf.union(x, y) #更新represent节点 xRoot = uf.find(x) xRepresent = uf.find(x).represent if coreDict[xRepresent] > coreDict[id]: xRoot.represent = id else: #core为0的节点作为根 core0List = vkList '步骤4:建立root节点' root = TNode(0) root.nodeList = core0List root.childList = copy.deepcopy(restNodeList) '步骤5:在树节点上获得nodeList的属性的倒排' attachKw(root, G) return root, vertexTNodelist, coreDict
interval = range(A, B + 1) #print A, B, P print "sieve for ", B-A primes = sieve(B - A) print "trimming primes" primes = [a for a in primes if a >= P] print "calculating factors" set_dict2 = {} for prime in primes: set_dict2[prime] = [a for a in interval if a % prime == 0] #primes_dict = {item: calc_prime_factor(item, primes, P) for item in range(A, B+1)} #print primes_dict print "creating UnionFind" set_list = UnionFind() for item in range(A, B+1): set_list.makeSet([item]) #set_dict = {prime: [] for prime in primes} #print "creating sets" #for item in range(A,B+1): # for prime in primes_dict[item]: # set_dict[prime].append(item) #print set_dict #print set_list.getNumGroups() print "reducing sets" for item in set_dict2: temp_list = set_dict2[item] if len(temp_list) > 1: for new_item in temp_list: set_list.union(temp_list[0],new_item) results = set_list.getNumGroups() #print results
from UnionFind import UnionFind import random # build the graph with open('data/clustering_big.txt') as f: nodes = {} clusters = UnionFind() for i, line in enumerate(f.readlines()): if i == 0: N, numbits = line.split() N, numbits = int(N), int(numbits) else: bits = line.strip().replace(" ", "") nodes[bits] = True clusters.makeSet([bits]) # run the algorithm for k, node in enumerate(nodes.iterkeys()): if k%1000 == 0: print k for i in xrange(numbits): alt = node[:i] + node[i].replace(node[i], str(1-int(node[i]))) + node[i+1:] if nodes.get(alt, False): clusters.union(node, alt) for j in xrange(i+1, numbits): alt2 = alt[:j] + alt[j].replace(alt[j], str(1-int(alt[j]))) + alt[j+1:] if nodes.get(alt2, False): clusters.union(node, alt2) print clusters.getNumGroups()