Пример #1
0
def simpleTests():
    test = range(10)
    uf = UnionFind()
    for t in test:
        uf.makeSet(t)
    # END for
    for t in test:
        assert uf.find(t) == t, "Parent not initialized correctly."
    # END for
    assert uf.countGroups() == 10, "Counted wrong number of groups."

    uf.union(0,1)
    assert uf.find(1) == 0, "Parent not updated correctly."
    assert uf.data[0][1] == 1, "Order not updated for equal trees correctly."
    assert uf.countGroups() == 9, "Counted wrong number of groups."

    uf.union(1,2)
    assert uf.find(2) == 0, "Parent not updated correctly."
    assert uf.data[0][1] == 1, "Order not updated for unequal trees correctly."
    assert uf.countGroups() == 8, "Counted wrong number of groups."

    uf.union(3,4)
    uf.union(4,5)
    uf.union(0,3)
    assert uf.data[0][1] == 2, "Order not updated for unequal trees."
    assert uf.data[5][0] == 3, "Parent should not be updated until find operation."
    assert uf.find(5) == 0, "Find operation returned wrong parent."
    assert uf.data[5][0] == 0, "Parent should have been updated."
    assert uf.countGroups() == 5, "Counted wrong number of groups."
class Kruskal:
    def __init__(self, data):
        nodes = int(data[0].split()[0])
        self.ufSet = UnionFind()
        for n in range(nodes):
            self.ufSet.makeSet(n)
        # END for

        self.edges = []
        for k in data[1:]:
            row = map(int, k.strip().split())
            self.edges.append((row[0] - 1, row[1] - 1, row[2]))
        # END for

        self.edges.sort(key=itemgetter(2))

    # END __init__

    def mstKruskal(self):
        mst = []
        l = 0
        for edge in self.edges:
            s1 = self.ufSet.find(edge[0])
            s2 = self.ufSet.find(edge[1])
            if s1 == s2:
                continue
            # END if

            self.ufSet.union(edge[0], edge[1])
            mst.append(edge)
            l += edge[2]
        # END for

        self.mst = mst
        return l

    # END mstKruskal

    def clusterKruskal(self, k):
        print "Running Clustering, k={0}".format(k)
        done = False
        for edge in self.edges:
            s1 = self.ufSet.find(edge[0])
            s2 = self.ufSet.find(edge[1])
            if s1 == s2:
                continue
            # END if
            if not done:
                self.ufSet.union(edge[0], edge[1])
            else:
                print "Smallest unallocated edge: {0}".format(edge)
                return edge[2]
            # END if

            if self.ufSet.countGroups() == k:
                done = True
Пример #3
0
def segmentIsland(flatFaces,island):
  sets = UnionFind(True)
  if len(island)==0:
    island = range(len(flatFaces))
  for face in island:
    if face not in sets.leader.keys():
      sets.makeSet([face])
    neighbor = flatFaces[face].fromFace  
    if neighbor != None:
      if neighbor not in sets.leader.keys():
        sets.makeSet([neighbor])
      sets.union(face,neighbor)
  return sets.group, sets.leader
Пример #4
0
 def segmentIsland(flatFaces, island):
     sets = UnionFind(True)
     if len(island) == 0:
         island = range(len(flatFaces))
     for face in island:
         if face not in sets.leader.keys():
             sets.makeSet([face])
         neighbor = flatFaces[face].fromFace
         if neighbor is not None:
             if neighbor not in sets.leader.keys():
                 sets.makeSet([neighbor])
             sets.union(face, neighbor)
     return sets.group, sets.leader
Пример #5
0
    def build(self):
        '采用并查集自底向上建立TreeIndex'
        N = nx.number_of_nodes(ShellStructIndex.G)  #图的节点个数
        '步骤1:计算k-core,按coreness分组'
        ##k-core分解
        ShellStructIndex.coreDict = nx.core_number(ShellStructIndex.G)
        #将节点按照core number进行分组
        Vk = defaultdict(list)  #字典的value是列表
        for key, value in ShellStructIndex.coreDict.iteritems(
        ):  ###(2017.3.5:发现不在图里面的节点,怀疑是nx.core_number函数# )
            Vk[value].append(key)
        #将Vk按照coreness(key)进行排序,降序
        # sortedVk=sorted(Vk.items(),key=lambda d:d[0],reverse=True)
        '步骤2:初始化并查集和一些需要的数据结构'
        restNodeList = []  #储存没有父母的节点,最后直接连接到core为0的根节点下方作为孩子
        '为了处理节点不连续的问题,找iD最大的节点,将maxID替换所有的N'
        maxID = 0
        for nodeID in ShellStructIndex.G.nodes():
            if nodeID > maxID:
                maxID = nodeID
        ShellStructIndex.vertexTNodelist = [None] * (maxID + 1
                                                     )  #图节点到TNode的映射的列表
        # print str(N+1)
        core0List = []  #coreness=0的节点,作为这棵树的根
        #############初始化并查集#############
        unodeArr = []  #存储的是并查集的节点(id->UNode)
        uf = UnionFind()  #包含所有并查集方法的类
        for i in range(maxID + 1):  #加1是因为可能从1才开始编号
            unode = UNode(i)
            uf.makeSet(unode)
            unodeArr.append(unode)
        '步骤3:自底向上建立树'
        #level by level,
        tnodeCounter = 0  ##计算TNode个数的计数器
        for key in sorted(Vk.keys(), reverse=True):  #Vk按照core值从大到小排序
            curcore = key
            vkList = Vk[key]
            if curcore > 0:
                idUFMap = {
                }  #(id->UNode)这里用字典但是unodeArr用列表是因为这里的id不一定是连续的(临时的一个并查集映射)
                '步骤3.1: 先在同一个core值节点中找连通分量,利用一个临时并查集idUFMap'
                for id in vkList:
                    if not idUFMap.has_key(id):  #加入Vk
                        unode = UNode(id)
                        uf.makeSet(unode)
                        idUFMap[id] = unode
                    for ngid in ShellStructIndex.G.neighbors(id):
                        if ShellStructIndex.coreDict[
                                ngid] >= ShellStructIndex.coreDict[
                                    id]:  #先处理core大的
                            if ShellStructIndex.coreDict[
                                    ngid] > ShellStructIndex.coreDict[id]:
                                ngid = uf.find(
                                    unodeArr[ngid]
                                ).value  #如果邻居的core比较大,说明已经处理过,用父母代替
                            if not idUFMap.has_key(ngid):  #加入V'
                                unode = UNode(ngid)
                                uf.makeSet(unode)
                                idUFMap[ngid] = unode
                            uf.union(idUFMap[id],
                                     idUFMap[ngid])  #合并id和他的邻居(或者邻居的父母)
                '步骤3.2:按照上面临时并查集的结果,给图节点分组,找树节点孩子'
                ufGNodeMap = defaultdict(
                    list)  #(UNode->[vertex])unode到同一个组的unode的图节点的字典
                ufTNodeMap = defaultdict(list)  #(UNode->[TNode])unode到TNode的映射
                for reId, reUNode in idUFMap.iteritems():
                    newParent = uf.find(reUNode)  #在新的并查集里面,节点的父母
                    if ShellStructIndex.coreDict[
                            reId] == curcore:  #同一个core值的节点分成一组
                        ufGNodeMap[newParent].append(reId)
                    if ShellStructIndex.coreDict[
                            reId] > curcore:  #由于是自底向上的,当前这个reid应该已经处理过了
                        oldParent = unodeArr[reId]  #这个是外面的并查集记录的reId的父母
                        tnode = ShellStructIndex.vertexTNodelist[
                            oldParent.represent]
                        ufTNodeMap[newParent].append(tnode)
                '步骤3.3:产生新的TNode节点并建立树节点之间的联系'
                for parent, nodeList in ufGNodeMap.iteritems():
                    childList = ufTNodeMap[parent]
                    tnodeCounter = tnodeCounter + 1  #
                    # print 'tnodeCounter:',tnodeCounter
                    tnode = TNode(
                        curcore,
                        tnodeCounter)  #新建一个树节点(给定coreness和树节点编号)(re:2017.2.26)
                    tnode.nodeList = nodeList
                    if childList:  #如果孩子不为空,给树节点添加孩子节点
                        tnode.childList = childList  #这里用不用深拷贝?
                        #####给孩子节点添加父母,方便后面的retrieve(re:2017.2.26)########
                        for chid in childList:
                            chid.parent = tnode
                    restNodeList.append(tnode)  #假设这个节点目前没有父母咯
                    #更新(id->TNode)
                    for nodeId in nodeList:
                        # print nodeId
                        ShellStructIndex.vertexTNodelist[nodeId] = tnode
                    #更新没有父母的树节点列表
                    for subTNode in tnode.childList:
                        restNodeList.remove(subTNode)
                '步骤3.4: 更新外面包含所有节点的并查集'
                for id in vkList:
                    x = unodeArr[id]  #当前节点的UNode
                    for ngid in ShellStructIndex.G.neighbors(id):
                        if ShellStructIndex.coreDict[
                                ngid] >= curcore:  #遍历边的优先级,core大的先检查,保证自底向上的
                            y = unodeArr[ngid]
                            uf.union(x, y)
                    #更新represent节点
                    xRoot = uf.find(x)
                    xRepresent = uf.find(x).represent
                    if ShellStructIndex.coreDict[
                            xRepresent] > ShellStructIndex.coreDict[id]:
                        xRoot.represent = id
            else:  #core为0的节点作为根
                core0List = vkList

        '步骤4:建立root节点'
        tnodeCounter = tnodeCounter + 1  #(re:2017.2.26)
        # print 'tnodeCounter:', tnodeCounter
        ShellStructIndex.root = TNode(core=0, data=tnodeCounter)
        ShellStructIndex.root.nodeList = core0List
        ShellStructIndex.root.childList = restNodeList  #这里需要深拷贝(copy.deepcopy(restNodeList))吗?
        ####(re:2017.2.26)
        for chid in ShellStructIndex.root.childList:
            chid.parent = ShellStructIndex.root
        #####把节点到树的映射也更新一下####
        for v in core0List:
            ShellStructIndex.vertexTNodelist[v] = ShellStructIndex.root
        '步骤5:在树节点上获得nodeList的属性的倒排'
Пример #6
0
 interval = range(A, B + 1)
 #print A, B, P
 print "sieve for ", B - A
 primes = sieve(B - A)
 print "trimming primes"
 primes = [a for a in primes if a >= P]
 print "calculating factors"
 set_dict2 = {}
 for prime in primes:
     set_dict2[prime] = [a for a in interval if a % prime == 0]
 #primes_dict = {item: calc_prime_factor(item, primes, P) for item in range(A, B+1)}
 #print primes_dict
 print "creating UnionFind"
 set_list = UnionFind()
 for item in range(A, B + 1):
     set_list.makeSet([item])
 #set_dict = {prime: [] for prime in primes}
 #print "creating sets"
 #for item in range(A,B+1):
 #    for prime in primes_dict[item]:
 #        set_dict[prime].append(item)
 #print set_dict
 #print set_list.getNumGroups()
 print "reducing sets"
 for item in set_dict2:
     temp_list = set_dict2[item]
     if len(temp_list) > 1:
         for new_item in temp_list:
             set_list.union(temp_list[0], new_item)
 results = set_list.getNumGroups()
 #print results
Пример #7
0
class ClusterHamming:
    def __init__(self, data):
        (nodes, self.bits) = map(int, data.pop(0).split())
        self.uf = UnionFind()
        for n in range(nodes):
            self.uf.makeSet(n)
        # END for

        self.hammingData = defaultdict(list)
        for n in range(nodes):
            s = data[n].replace(' ', '')
            self.hammingData[s].append(n)
        # END for

    # END __init__

    def flip(self, s, flipbits):
        """
        Given an input string (s) and tuple of indices (flipbits), returns a new
        string with bits at specified indices flipped.

        The length of (flipbits) determines the resulting hamming distance.
        """
        result = ''
        for i, c in enumerate(s):
            if i in flipbits:
                if c == '1':
                    result += '0'
                else:
                    result += '1'
            else:
                result += c
        return result

    # END flip

    def getHammingPermutations(self, s, n):
        """
        Generate permutations of s whose distance is less than or equal to n
        """
        result = []
        result.append(s)
        for d in range(1, n + 1):
            for flipbits in combinations(range(self.bits), d):
                result.append(self.flip(s, flipbits))
            # END for
        # END for
        return result

    # END getHammingPermutations

    def printSummary(self):
        resultMap = defaultdict(list)
        for k, v in self.hammingData.iteritems():
            cluster = self.uf.find(v)
            resultMap[cluster].append(k)
        # END for

        for k, v in resultMap.iteritems():
            print "\n\nCluster {0}:".format(k)
            for key in v:
                print "\t{0}".format(key)
            # END for
        # END for

    # END printSummary

    def run(self, minDist):
        data = copy(self.hammingData)
        while data:
            (nodeKey, refNodes) = data.popitem()
            if len(refNodes) > 1:
                for i in range(1, len(refNodes)):
                    self.uf.union(refNodes[0], refNodes[i])
                # END for
            # END for

            nearestNodes = self.getHammingPermutations(nodeKey, minDist - 1)
            for testNodeKey in nearestNodes:
                if testNodeKey not in data:
                    continue

                testNodes = self.hammingData[testNodeKey]
                for n in testNodes:
                    if self.uf.find(n) == self.uf.find(refNodes[0]):
                        continue
                    self.uf.union(refNodes[0], n)
                # END for
            # END for
        # END while

        return self.uf.countGroups()
Пример #8
0
def build(G):
    '采用并查集自底向上建立TreeIndex'
    N = nx.number_of_nodes(G)  #图的节点个数
    '步骤1:计算k-core,按coreness分组'
    coreDict = nx.core_number(G)
    #将节点按照core number进行分组
    Vk = defaultdict(list)  #字典的value是列表
    for key, value in coreDict.iteritems():
        Vk[value].append(key)
    #将Vk按照coreness(key)进行排序,降序
    # sortedVk=sorted(Vk.items(),key=lambda d:d[0],reverse=True)
    '步骤2:初始化并查集和一些需要的数据结构'
    unodeArr = []  #存储的是并查集的节点
    uf = UnionFind()  #包含所有并查集方法的类
    restNodeList = []  #储存没有父母的节点,最后直接连接到core为0的根节点下方作为孩子
    vertexTNodelist = [None] * N  #图节点到TNode的映射的列表
    core0List = []  #coreness=0的节点,作为这棵树的根
    for i in range(N):
        unode = UNode(i)
        uf.makeSet(unode)
        unodeArr.append(unode)
    '步骤3:自底向上建立树'
    #level by level,
    for key in sorted(Vk.keys(), reverse=True):
        curcore = key
        vkList = Vk[key]
        if curcore > 0:
            idUFMap = {}  #(id->UNode)这里用字典但是unodeArr用列表是因为这里的id不一定是连续的
            '步骤3.1: 先在同一个core值节点中找连通分量,利用一个临时并查集idUFMap'
            for id in vkList:
                if not idUFMap.has_key(id):  #加入Vk
                    unode = UNode(id)
                    uf.makeSet(unode)
                    idUFMap[id] = unode
                for ngid in G.neighbors(id):
                    if coreDict[ngid] >= coreDict[id]:  #先处理core大的
                        if coreDict[ngid] > coreDict[id]:
                            ngid = uf.find(unodeArr[ngid]
                                           ).value  #如果邻居的core比较大,说明已经处理过,用父母代替
                        if not idUFMap.has_key(ngid):  #加入V'
                            unode = UNode(ngid)
                            uf.makeSet(unode)
                            idUFMap[ngid] = unode
                        uf.union(idUFMap[id], idUFMap[ngid])
            '步骤3.2:按照上面临时并查集的结果,给图节点分组,找树节点孩子'
            ufGNodeMap = defaultdict(
                list)  #(UNode->[vertex])unode到同一个组的unode的图节点的字典
            ufTNodeMap = defaultdict(list)  #(UNode->[TNode])unode到TNode的映射
            for reId, reUNode in idUFMap.iteritems():
                newParent = uf.find(reUNode)  #在新的并查集里面,节点的父母
                if coreDict[reId] == curcore:  #同一个core值的节点分成一组
                    ufGNodeMap[newParent].append(reId)
                if coreDict[reId] > curcore:  #由于是自底向上的,当前这个reid应该已经处理过了
                    oldParent = unodeArr[reId]  #这个是外面的并查集记录的reId的父母
                    tnode = vertexTNodelist[oldParent.represent]
                    ufTNodeMap[newParent].append(tnode)
            '步骤3.3:产生新的TNode节点并建立树节点之间的联系'
            for parent, nodeList in ufGNodeMap.iteritems():
                childList = ufTNodeMap[parent]
                tnode = TNode(curcore)  #新建一个树节点
                tnode.nodeList = nodeList
                if childList:  #如果孩子不为空,给树节点添加孩子节点
                    tnode.childList = childList  #这里用不用深拷贝?
                restNodeList.append(tnode)  #假设这个节点目前没有父母咯
                #更新(id->TNode)
                for nodeId in nodeList:
                    vertexTNodelist[nodeId] = tnode
                #更新没有父母的树节点列表
                for subTNode in tnode.childList:
                    restNodeList.remove(subTNode)
            '步骤3.4: 更新外面的并查集'
            for id in vkList:
                x = unodeArr[id]  #当前节点的UNode
                for ngid in G.neighbors(id):
                    if coreDict[ngid] >= curcore:  #遍历边的优先级,core大的先检查,保证自底向上的
                        y = unodeArr[ngid]
                        uf.union(x, y)
                #更新represent节点
                xRoot = uf.find(x)
                xRepresent = uf.find(x).represent
                if coreDict[xRepresent] > coreDict[id]:
                    xRoot.represent = id
        else:  #core为0的节点作为根
            core0List = vkList

    '步骤4:建立root节点'
    root = TNode(0)
    root.nodeList = core0List
    root.childList = copy.deepcopy(restNodeList)
    '步骤5:在树节点上获得nodeList的属性的倒排'
    attachKw(root, G)
    return root, vertexTNodelist, coreDict
Пример #9
0
 interval = range(A, B + 1)
 #print A, B, P
 print "sieve for ", B-A
 primes = sieve(B - A)
 print "trimming primes"
 primes = [a for a in primes if a >= P]
 print "calculating factors"
 set_dict2 = {}
 for prime in primes:
     set_dict2[prime] = [a for a in interval if a % prime == 0]
 #primes_dict = {item: calc_prime_factor(item, primes, P) for item in range(A, B+1)}
 #print primes_dict
 print "creating UnionFind"
 set_list = UnionFind()
 for item in range(A, B+1):
     set_list.makeSet([item])
 #set_dict = {prime: [] for prime in primes}
 #print "creating sets"
 #for item in range(A,B+1):
 #    for prime in primes_dict[item]:
 #        set_dict[prime].append(item)
 #print set_dict
 #print set_list.getNumGroups()
 print "reducing sets"
 for item in set_dict2:
     temp_list = set_dict2[item]
     if len(temp_list) > 1:
         for new_item in temp_list:
             set_list.union(temp_list[0],new_item)
 results = set_list.getNumGroups()
 #print results
Пример #10
0
from UnionFind import UnionFind
import random

# build the graph 
with open('data/clustering_big.txt') as f: 
	nodes = {}
	clusters = UnionFind()
	for i, line in enumerate(f.readlines()): 
		if i == 0: 
			N, numbits = line.split()
			N, numbits = int(N), int(numbits) 
		else: 
			bits = line.strip().replace(" ", "")
			nodes[bits] = True
			clusters.makeSet([bits])

# run the algorithm
for k, node in enumerate(nodes.iterkeys()): 
	if k%1000 == 0: 
		print k
	for i in xrange(numbits): 
		alt = node[:i] + node[i].replace(node[i], str(1-int(node[i]))) + node[i+1:]
		if nodes.get(alt, False):
			clusters.union(node, alt) 
		for j in xrange(i+1, numbits): 
			alt2 = alt[:j] + alt[j].replace(alt[j], str(1-int(alt[j]))) + alt[j+1:]
			if nodes.get(alt2, False):
				clusters.union(node, alt2) 

print clusters.getNumGroups()