def cluster(Vlist, dim): ''' Function to cluster a list of nodes so that the single link distance between each pair of cluster is at least 3 and the number of clusters is maximized Input: Vlist: bit string representation of all nodes dim : dimension of each node (length of bit string) Output: Largest number of clusters such that the spacing between any pair of clusters is at least 3 ''' distinct_V = {} i = 0 for V in Vlist: intV = int(V, 2) if intV not in distinct_V: distinct_V[intV] = i i += 1 dset = DisjointSet(len(distinct_V)) ## create a list of bit strings with length <= dim and at most 2 bits are "1" permutation_lst = [] for i in range(dim): permutation_lst.append("1" + "0" * i) for i in range(dim - 1): for j in range(i + 1, dim): permutation_lst.append("1" + "0" * (j - i - 1) + "1" + "0" * (23 - j)) permutation_lst = [int(x, 2) for x in permutation_lst] for V in distinct_V.keys(): V_permu2 = [V ^ x for x in permutation_lst] for v_permu in V_permu2: if v_permu in distinct_V: dset.union(distinct_V[V], distinct_V[v_permu]) return len(dset)
def solve(board): if board: #如果board为空则不做任何处理 nrow, ncol = len(board), len(board[0]) inner, border = [], [] #记录所有"O"出现的位置,分为内部和边界处 #扫描,记录所有"O'的位置 for i in range(nrow): for j in range(ncol): if board[i][j] == "O": if i == 0 or i == nrow - 1 or j == 0 or j == ncol - 1: border.append((i, j)) else: inner.append((i, j)) #如果边界上无O,则全部转换 if not border: for loc in inner: board[loc[0]][loc[1]] = "X" else: #初始化不相交集 ds = DisjointSet(border + inner) #将边界上的"O"点Union起来 pivot = border[0] for loc in border[1:]: ds.Union(pivot, loc) #遍历inner和border,建立必要的连接 for loc in border + inner: if loc[0] < nrow - 1: if board[loc[0] + 1][loc[1]] == "O": ds.Union(loc, (loc[0] + 1, loc[1])) if loc[1] < ncol - 1: if board[loc[0]][loc[1] + 1] == "O": ds.Union(loc, (loc[0], loc[1] + 1)) #遍历inner中的点,若其不与border中的点同类,则转换 for loc in inner: if ds.Find(loc) != ds.Find(pivot): board[loc[0]][loc[1]] = "X"
def numIslands(grid): #获取行数和列数 nrow = len(grid) if nrow == 0: return 0 ncol = len(grid[0]) if ncol == 0: return 0 #所有1的坐标 lands = set() for i in range(nrow): for j in range(ncol): if grid[i][j] == "1": lands.add((i, j)) #初始化不相交集 ds = DisjointSet(list(lands)) #扫描所有可能相邻的1,不断地union for point in lands: h_adj_point = (point[0], point[1] + 1) #右邻居 v_adj_point = (point[0] + 1, point[1]) #下邻居 #如果1的右邻居和下邻居都是1,则进行Union if h_adj_point in lands: ds.Union(point, h_adj_point) if v_adj_point in lands: ds.Union(point, v_adj_point) #最终返回不相交集的数目 return ds.SetCnt
def kruskal(nodes, edges): """ implementation of kruskal's algorithm :param nodes: nodes for input. :param edges: edges for input. :return: edges of the minimum spanning tree. """ # edges of the minimum spanning tree mst = [] # initialize a forest for all nodes forest = DisjointSet(nodes) # sort the edges by their weights edges = sorted(edges, key=lambda x: x[2]) # calculate the number of edges of the minimum spanning tree num_edges = len(nodes) # perform kruskal's algorithm for (src, dst, weight) in edges: # continue if source node or destination node doesn't exist if src not in nodes or dst not in nodes: continue # find the parents of src and dst respectively if forest.unite(src, dst): # add the current edge into the minimum spanning tree if it doesn't make a circuit mst.append((src, dst, weight)) # terminate early if len(mst) == num_edges: break # return the minimum spanning tree return mst
def parallel_prim(sc, nodes, edges, num_partition=4): """ implementation of parallel Prim's algorithm :param nodes: nodes for input. :param edges: edges for input. :param num_partition: number of partitions. :return: """ # edges of the minimum spanning tree mst = [] # initialize a forest for all nodes forest = DisjointSet(nodes) # define function for generating graph def generate_graph(iterator): for edge in iterator: for i in range(2): yield (edge[i], (edge[1 - i], edge[2])) # store the graph in an adjacency list adjacent = sc.parallelize(edges, num_partition) \ .mapPartitions(generate_graph, preservesPartitioning=True) \ .groupByKey(numPartitions=num_partition) \ .mapValues(lambda x: sorted(x, key=lambda y: y[1])) \ .persist() # candidate edges of the global MST candidates = [None] # loop until there is no candidate while len(candidates) != 0: # broadcast the forest to each machine connection = sc.broadcast(forest) # define function for finding minimum edges leaving each disjoint set def find_minimum(iterator): for group in iterator: src = group[0] for (dst, weight) in group[1]: if connection.value.find(src) != connection.value.find( dst): yield (src, dst, weight) if src < dst else (dst, src, weight) break # obtain the list of minimum edges leaving each disjoint set candidates = sorted( adjacent.mapPartitions(find_minimum).distinct().collect(), key=lambda x: x[2]) # calculate the global MST for candidate in candidates: # find the parents of src and dst respectively if forest.unite(candidate[0], candidate[1]): # add the current edge into the minimum spanning tree if it doesn't make a circuit mst.append(candidate) # return the global MST return mst
def is_forest(g): """ A forest is an acyclic, undirected graph. :param g: undirected graph :return: True if graph is a forest, otherwise False """ if not isinstance(g, Graph): return False djset = DisjointSet() for v in g.vertices: djset.make_set(v) eset = set() for v1, v2 in g.edges: if (v2, v1) not in eset: eset.add((v1, v2)) for v1, v2 in eset: if djset.find_set(v1) != djset.find_set(v2): djset.union(v1, v2) else: return False return True
def kruskals(inputFile): graph = open(inputFile) edges = [ ] nodes = {} count = 0 ''' The following reads in the input file and constructs an adjacency list of the graph. ''' for line in graph: entry = line.split() edges.append((entry[2],entry[0],entry[1])) if entry[0] not in nodes.keys(): nodes[entry[0]]= count count = count + 1 total = 0 solutions =[] disjoint = DisjointSet(len(nodes)) edges.sort() for edge in edges: if disjoint.find(nodes[edge[1]]) != disjoint.find(nodes[edge[2]]): if disjoint.find(nodes[edge[1]]) < disjoint.find(nodes[edge[2]]): disjoint.union(nodes[edge[1]], nodes[edge[2]]) else: disjoint.union(nodes[edge[2]], nodes[edge[1]]) solutions.append(edge) total += int(edge[0]) return total, solutions
def test_compress_path(self): s = DisjointSet(10) s.union(1, 2) s.union(3, 4) s.union(1, 3) self.assertEqual(s.getPath(1), [1, 2, 4]) s.find(1) self.assertEqual(s.getPath(1), [1, 4])
def testUnionFind(self): ds = DisjointSet(range(100)) for i in range(100): ds.union(i, i % 5) for i in range(100): for j in range(i, 100): self.assertEqual( ds.find_set(i) == ds.find_set(j), i % 5 == j % 5)
def findRedundantConnection(edges): n = len(edges) #点的数目 ds = DisjointSet(range(1, n + 1)) #初始化不相交集,下标从1开始 PrevSetCnt = ds.SetCnt #上一步的不相交集数目 for edge in edges: ds.Union(edge[0], edge[1]) if PrevSetCnt == ds.SetCnt: #当加入一条边时不相交集的数目没有发生改变,则说明该边是多余的,形成了一个环 return edge PrevSetCnt = ds.SetCnt
def __init__(self, length, width, initial_open=0.40): self.__length = length self.__width = width self.__area = length * width self.__map = [] self.__ds = DisjointSet() self.__up_loc = 0 self.center_pt = (int(self.__length / 2), int(self.__width / 2)) self.__gen_initial_map(initial_open)
def DisjointCycle(self, vertices): d = DisjointSet(vertices) for i, j in self.edges: a, b = d.find(i), d.find(j) if a == b: return True d.union(i, j) return False
def connected_components_dj(g): djset = DisjointSet() for v in g.vertices: djset.make_set(v) for v1, v2 in g.edges: if djset.find_set(v1) != djset.find_set(v2): djset.union(v1, v2) return djset.get_set()
def __init__(self, width, height, initial_open=0.60): self.__height = height self.__width = width self.__area = height * width self.__CAmap = [] self.__rooms = [] self.__leveldata = {} self.__ds = DisjointSet() self.__up_loc = 0 self.center_pt = (int(self.__height / 2), int(self.__width / 2)) self.__gen_initial_map(width, height, initial_open)
def mst_kruskal(g): djset = DisjointSet() mst = Graph() for u in g.vertices: djset.make_set(u) for u, v in sorted(g.edges, key=lambda e: g.weight(e[0], e[1])): if djset.find_set(u) != djset.find_set(v): djset.union(u, v) mst.add_edge(u, v, g.weight(u, v)) return mst
def findCircleNum(M): n = len(M) if n == 0: return 0 #初始化不相交集 DS = DisjointSet(range(n)) #扫描M矩阵,完成Union操作 for i in range(n): for j in range(i): if M[i][j] == 1: DS.Union(i, j) #返回不相交集的数目 return DS.SetCnt
def __init__(self, width, height, levelnum, initial_open=0.60): self.__height = height self.__width = width self.__area = height * width self.__map = [] self.__levelnum = levelnum self.__rooms = [] self.__ds = DisjointSet() self.__up_loc = 0 self.center_pt = (int(self.__height / 2), int(self.__width / 2)) self.objects = [] self.eventdict = {} self.gen_map(initial_open)
def _create_sets(game_map, width, height): ds = DisjointSet() for x in range(0, width): for y in range(0, height): if game_map[x][y] == FLOOR: ds.find((x, y)) _join_adjacent_tiles(x, y, game_map, ds) return ds
def kruskals(inputFile): graph = open(inputFile) pqueue = [] mst = [] totalCost = 0 for line in graph: entry = line.split() # construct an edge for the adjacency list 'We construct this edge and use the cost as the starting element for this tuple to use in the priority queue' edge = (int(entry[2]), entry[0], entry[1]) 'we push the edge onto the priority queue' heapq.heappush(pqueue, edge) ''' We convert the length of the priority queue into an integer to use with a disjointset ''' size = len(pqueue) frontier = DisjointSet(size) ''' Going through each edge in the priority queue, we set the newEdge equal to the current tuple in the priority queue and then perform the find function from disjointset on the two vertexes. Since the vertexes are in string form from the tuple, the ord function is used to convert the vertexes into integers for use in the find function. 96 is subtracted from the tuple vertexes to set the unicode code pointer equal to the integer value of the letter in the alphabet (a = 97 with the ord function, and subtracting 97 gives us a = 1). We then check if the two vertexes through integer values are not equal to each other. If they are not, we add the the nextEdge tuple to the minimum spanning tree list, and perform a union of the two vertices using the find function from Disjoin set. We also calculate the cost in this if statement. ''' for newEdge in sorted(pqueue): newEdge = heapq.heappop(pqueue) v1 = frontier.find(ord(newEdge[1])-96) v2 = frontier.find(ord(newEdge[2])-96) if v1 != v2: totalCost = newEdge[0] + totalCost mst.append(newEdge) frontier.union(v1, v2) 'Returns the total cost of the mst and the mst list' return totalCost,mst return 0
def minimum_spanning_tree(graph): """ Find the minimum spanning tree in the given graph using Kruskal's algorithm :param graph: the graph to find the MST in :return: the set of all edges in the MST """ d = DisjointSet() # initialize disjoint set data structure d.make_sets(graph.get_vertices()) edges = graph.get_edges() # All edges in graph solution = set() # Set of edges in MST quick_sort(edges, 0, len(edges) - 1) # Sort by edge weight in asc for e in edges: if d.find(e[0]) != d.find(e[1]): # if the vertices wont make a cycle d.union(e[0], e[1]) # union them solution.add(e) # add the edge to the solution return solution
def MST(G): ''' 图G ''' V = G.V E = G.Adjlist MSTV = set() MSTE = {} Ds = DisjointSet(V) for v in V: MSTE[v] = Linklist() for u in MSTE.keys(): curList = E[u] curNode = curList.head while (curNode != 0): v = curNode.data #使用并查集判断是否生成环 #------------------------------------ parentu = Ds.find(u) parentv = Ds.find(v) if parentu != parentv: #不会生成环 MSTV.add(u) MSTV.add(v) MSTE[u].pushback(Node(v, curNode.weight)) MSTE[v].pushback(Node(u, curNode.weight)) #合并两并查集 Ds.union(parentu, parentv) else: #加入该边会生成环 MaxWeightEdge = getMaxWeightEdge(MSTV, MSTE, u, v) if curNode.weight < MaxWeightEdge.w: MSTE[MaxWeightEdge.u].remove(MaxWeightEdge.v) MSTE[MaxWeightEdge.v].remove(MaxWeightEdge.u) MSTE[u].pushback(Node(v, curNode.weight)) MSTE[v].pushback(Node(u, curNode.weight)) #合并 Ds.union(parentu, parentv) curNode = curNode.next return Graph(MSTV, None, MSTE, kind='nodirect')
def makeKruskal(self): disjoint = DisjointSet() a = [] soma = 0 for node in self.nodes: disjoint.makeSet(node) sorted_edges = sorted(self.edges) for edge in sorted_edges: if disjoint.findSet(edge.node_from) != disjoint.findSet( edge.node_to): soma += edge.weight a.append((edge.node_from, edge.node_to, edge.weight)) disjoint.union(edge.node_from, edge.node_to) print('Caminho:') for edge in a: print(edge[0], ' -- ', edge[1], 'Peso:', edge[2]) print('Peso do caminho gerado pelo Kruskal:', soma)
def kruskal(graph): djs = DisjointSet(numel=len(graph.nodes)) all_nodes = set(range(len(graph.nodes))) weight = 0 edges = list(graph.get_edges()) edges.sort(key=lambda tup: tup[2]) fs = set() for e in edges: source = e[0] dest = e[1] wt = e[2] if not djs.same_set(source, dest): fs.add(source) fs.add(dest) weight += wt djs.union(source, dest) return weight
def constructSpanningTree(self, vertexList, edgeList): disjointSet = DisjointSet(vertexList) spanningTree = [] edgeList.sort() for edge in edgeList: u = edge.startVertex v = edge.targetVertex # check whether the edges don't form a cycle # if they don't then add the edge to the spanning tree # and union the two nodes if disjointSet.find(u.parentNode) != disjointSet.find( v.parentNode): spanningTree.append(edge) disjointSet.union(u.parentNode, v.parentNode) # print the spanning tree by printing all the edges for edge in spanningTree: print(edge.startVertex.name, "-", edge.targetVertex.name)
def eliminate_insiders(components): '''eliminates all components whose bounding boxes lie inside of others. The components object is manipulated in place Args: components: Components instance ''' by_size = by_bbox_size(components) labels = DisjointSet(n_labels=len(by_size)) # pairwise check of bounding boxes. once per pair. for a in range(len(by_size)): for b in range(a + 1, len(by_size)): if is_inside(by_size[a], by_size[b]): labels.unite(a, b) survivors = labels.final_labels() components.chars = [by_size[i] for i in survivors] return
def cluster(Elist, nV, k): ''' Function for single-linkage hierarchical clustering, which is based on Kruskal's minimum spanning tree algorithm. Input: Elist: Edge list for a complete graph(contains end verticies and edge cost) nV : total number of vertices k : number of clusters Output: The shortest distance between two verticies that belong to different clusters. ''' assert nV >= k, "Number of clusters can't be greater than number of nodes!" Elist.sort(key=lambda x: x[2]) dset = DisjointSet(nV) i = 0 while len(dset) > k: e = Elist[i] i += 1 if dset.find(e[0]) != dset.find(e[1]): dset.union(e[0], e[1]) while dset.find(Elist[i][0]) == dset.find(Elist[i][1]): i += 1 return Elist[i][2]
def __init__(self, points, edges): self.h0 = {} for p in points: self.h0[p['index']] = [p['time'], math.inf] djs = DisjointSet() edges.sort(key=itemgetter('time')) for d in edges: i0, i1 = d['p0']['index'], d['p1']['index'] f0, f1 = djs.find(i0), djs.find(i1) if f0 != f1: djs.union(f0, f1) self.h0[f0][1] = d['time'] self.h0 = list( map((lambda h: { 'index': h, 'life': self.h0[h], 'persistence': (self.h0[h][1] - self.h0[h][0]) }), self.h0.keys())) self.h0 = list(filter((lambda h: h['persistence'] > 0), self.h0)) self.h0.sort(key=itemgetter('persistence'))
from components import * from DisjointSet import DisjointSet eages = [('b', 'd'), ('e', 'g'), ('a', 'c'), ('h', 'i'), ('a', 'b'), ('e', 'f'), ('b', 'c'), ('a', 'g')] vs = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j'] ves = {} for v in vs: ve = Node(v) ves[v] = ve G = Graph(ves, eages) dset = DisjointSet(G) dset.connected_components(G) print(G)
'cd': 7, 'de': 9, 'bh': 11, 'hi': 7, 'ic': 2, 'ig': 6, 'cf': 4, 'df': 14, 'ah': 8, 'hg': 1, 'gf': 2, 'fe': 10 } vertexes = [a, b, c, d, e, f, g, h, i] v_map = {i.identifier: DisjointSet(i.identifier) for i in vertexes} def kl_mst(): edges = list(i for i in e_map.items()) edges.sort(key=lambda x: x[1]) total = 0 for edge in edges: key = edge[0] first_s, second_s = key first = v_map[first_s] second = v_map[second_s] if first.find().identifier != second.find().identifier: print(edge)
def __init__(self, V): self.graph = GraphAdj(V) self.disjointSet = DisjointSet(V) self.V = V