예제 #1
0
def two():
    """ 
    Calculates largest value k such that there is a k-clustering
    with spacing >= 3
    """
    
    # Read in the file, converting to str of binary number
    vert_df = pd.read_csv('clustering_big.txt',
                          header=0,
                          names=['vid'],
                          converters = {'vid' : strip})
    
    vertices = vert_df['vid'].unique()
    uf = UnionFind(vertices)
    to_visit = set(vertices)

    # 
    while len(to_visit) > 0:
        # Every iteration of the while loop corresponds to 
        # pulling off a new vertex label and attempting to merge
        # with all other vertices connected at cost <= 2
        this_v = to_visit.pop()

        nearby_v = nearby(this_v, to_visit)

        for v in nearby_v:
            l1 = uf[this_v]
            l2 = uf[v]
            if l1 != l2:
                uf.union(this_v, v)
                to_visit.remove(v)

    return uf.numleaders()
예제 #2
0
    def test_union_with_invalid_values(self):
        uf = UnionFind(10)

        with self.assertRaises(ValueError):
            uf.union(-1, 1)

        with self.assertRaises(ValueError):
            uf.union(11, 1)
예제 #3
0
    def test_same_set_with_invalid_values(self):
        uf = UnionFind(10)

        with self.assertRaises(ValueError):
            uf.same_set(-1, 1)

        with self.assertRaises(ValueError):
            uf.same_set(11, 0)
def weak_connected_components(nodes):
    # @param {DirectedGraphNode[]} nodes a array of directed graph node
    # @return {int[][]} a connected set of a directed graph
    union_find = UnionFind([node.label for node in nodes])
    for node in nodes:
        for neighbor in node.neighbors:
            union_find.union(node.label, neighbor.label)

    return transform_to_cluster(union_find)
예제 #5
0
    def test_union_when_already_united(self):
        uf = UnionFind([[1,2,3],[4,5],[6,7,8,9,0]])
        self.assertEqual(uf.find(1), 1)
        self.assertEqual(uf.find(2), 1)

        uf.union(1,2)

        self.assertEqual(uf.find(1), 1)
        self.assertEqual(uf.find(2), 1)
    def __mst(self):
        uf = UnionFind(self.graph.V())

        for edge in self.__sorted_edges():
            u, v, w = edge
            if not uf.connected(u, v):
                self.mst.append((u, v))
                uf.union(u, v)
                self.w += w
예제 #7
0
def Kruskal():
	edges = sorted(es, key=lambda es: es[2])
	s = UnionFind(V)
	res = 0
	for i in xrange(E):
		e = edges[i]
		u = e[0]
		v = e[1]
		if not s.same(u, v):
			s.union(u, v)
			res += e[2]
	return res
예제 #8
0
    def test_union(self):
        uf = UnionFind([i for i in range(0, 10)])
        unions = [0, 2, 3, 3, 5, 5, 5, 5, 5, 5]
        for i, j in enumerate(unions):
            uf.union(i, j)

        self.assertEqual(uf.ids, [0, 1, 1, 1, 4, 4, 4, 4, 4, 4],
                         msg='Union Find merge incorrect')
        self.assertEqual(
            uf.find(2),
            1,
            msg='Union find incorrect finds the representative class')
예제 #9
0
def community_size_distribution(vertices, edges):  # @author_pairs and @vertices must have reduced id
	communities = UnionFind(len(vertices))
	for (a, b) in edges:
		communities.union(a, b)
	trees = communities.get_trees()
	distro = dict()
	for tree in trees:
		size = len(trees[tree])
		if size not in distro:
			distro[size] = 0
		distro[size] += 1
	return distro
예제 #10
0
def one():
    mst = set([])
    edge_df = pd.read_csv('clustering1.txt',
                          sep=" ",
                          header=0,
                          names=['v1','v2','cost'])
    
    # Sort in order of least cost
    edge_df.sort(['cost'], inplace=True)
    edge_df.index = range(1,len(edge_df)+1)

    # The unique set of vertices...
    vertices = pd.concat([edge_df['v1'], 
                          edge_df['v2']]).unique()

    # Initialize the UnionFind structure with
    # unmerged set of unique vertices
    uf = UnionFind(vertices)

    # Iterate through Kruskal's until the number
    # of groups in the UnionFind struct is K
    it = edge_df.iterrows()
    while uf.numleaders() > K:
        rec = next(it)[1]
        v1 = rec['v1']
        v2 = rec['v2']
        cost = rec['cost']

        edge = Edge(v1, v2, cost)
        v1_lead = uf[v1]
        v2_lead = uf[v2]
        if v1_lead != v2_lead:
            uf.union(v1, v2)
            mst.add(edge)

    # Now iterate to the next edge that would be added 
    # to form the K-1th cluster
    cond = True
    while cond:
        rec = next(it)[1]
        v1 = rec['v1']
        v2 = rec['v2']
        cost = rec['cost']
        
        edge = Edge(v1, v2, cost)
        v1_lead = uf[v1]
        v2_lead = uf[v2]
        cond = (v1_lead == v2_lead)
        if not cond:
            mks = cost

    return mks
    def minSwapsCouples(self, row):
        """
        :type row: List[int]
        :rtype: int
        """
        N = len(row) / 2
        uf = UnionFind(N)

        for i in range(N):
            x, y = row[2 * i], row[2 * i + 1]
            uf.union(x / 2, y / 2)

        return N - uf.num_groups
예제 #12
0
def kruskal_mst(graph):
    union_find = UnionFind(graph.num_vertices)
    edges = sorted(graph.edges())

    for e in edges:
        v = e.either()
        w = e.other(v)

        if union_find.connected(v, w):
            continue

        union_find.union(v, w)
        yield e
예제 #13
0
def kruskal(graph):
    total_cost = 0
    min_cost_tree = Graph()
    connected_components = UnionFind(graph.get_all_vertices())
    edges_queue = graph.get_all_edges()
    heapq.heapify(edges_queue)
    while edges_queue:
        cost, edge = heapq.heappop(edges_queue)
        if is_valid_edge(edge, min_cost_tree, connected_components):
            total_cost += cost
            min_cost_tree.add_edge(edge[0], edge[1], cost)
            connected_components.union(edge[0], edge[1])
    return total_cost, min_cost_tree
예제 #14
0
def kruskals(N, edges):
    """Returns a list of edges of the graph that make up a minimum spanning tree using Kruskal's algorithm.
    N (int): the number of nodes in the graph
    edges [(int, int, float)]: the edges in the graph represented as (node1, node2, weight) where 0 <= node < N
    """
    UF = UnionFind(list(range(N))) # create union find data structure on the nodes
    edges.sort(key=lambda edge: edge[2]) # sort edges by increasing weight
    mst = [] # will store list of edges
    for edge in edges:
        if UF.union(edge[0], edge[1]):
            mst.append(edge)
    if len(mst) < N - 1: # graph was not connected
        return False
    return mst
예제 #15
0
    def kruskal(self, k=4):
        def compute_spacing(uf):
            min_space = float('inf')
            for triplet in self.triplets:
                u, v, w = triplet
                if uf.children[u] != uf.children[v]:
                    if w < min_space:
                        min_space = w
            return min_space

        self.triplets.sort(key=lambda x: (x[2], x[0], x[1]))
        mst = []
        idx = 0
        uf = UnionFind()
        for node in self.nodes:
            uf.children[node] = node
            uf.leaders[node] = [node]

        while len(mst) < self.num_nodes - 1 - k:
            u, v = self.triplets[idx][:2]
            cycle = False
            if uf.children[u] == uf.children[v]:
                cycle = True
            elif len(uf.leaders[uf.children[u]]) >= len(
                    uf.leaders[uf.children[v]]):
                uf.union(uf.children[u], uf.children[v])
            else:
                uf.union(uf.children[v], uf.children[u])
            if not cycle:
                mst.append(self.triplets[idx])
            idx += 1
        return compute_spacing(uf)
예제 #16
0
def solution(n, library, road, edges):
    if road >= library:
        return n * library

    uf = UnionFind(n)
    road_count = 0

    for edge in edges:
        if uf.union(edge[0] - 1, edge[1] - 1):
            road_count += 1
            if uf.count == 1:
                break

    return road_count * road + uf.count * library
예제 #17
0
def kruskal_mst(g, num_nodes):
    total_cost = index = 0
    g.sort()
    uf = UnionFind(num_nodes)

    while uf.size() > 1:
        cost, u, v = g[index]

        if uf.union(u, v):
            total_cost += cost

        index += 1

    return total_cost
예제 #18
0
def solution(n, library, road, edges):
    if road >= library:
        return n * library

    uf = UnionFind(n)
    road_count = 0

    for edge in edges:
        if uf.union(edge[0] - 1, edge[1] - 1):
            road_count += 1
            if uf.count == 1:
                break

    return road_count * road + uf.count * library
    def findCircleNum(self, M):
        """
        :type M: List[List[int]]
        :rtype: int
        """
        n = len(M)
        uf = UnionFind(n)

        for i in range(n):
            for j in range(i + 1, n):
                if M[i][j] == 1:
                    uf.union(i, j)

        return uf.num_groups
예제 #20
0
def test_connected():
	union = UnionFind(3)
	assert_equals(union.connected(0,1), False)
	assert_equals(union.connected(1,2), False)
	union.union(0,1)
	assert_equals(union.connected(0,1), True)
	assert_equals(union.connected(1,2), False)
	union.union(1,2)
	assert_equals(union.connected(2,0), True)
	assert_equals(union.connected(0,2), True)
	assert_equals(union.connected(1,2), True)
예제 #21
0
def main(argv):
    vtotal, vertices = construct(argv[0])

    vertices = sorted(vertices)
    uf       = UnionFind(vtotal)

    for i in xrange(vtotal):
        for j in xrange(i + 1, vtotal):
            if not uf.connected(vertices[i][1], vertices[j][1]):
                if hamming_distance(vertices[i][0], vertices[j][0]) <= 2:
                    uf.union(vertices[i][1], vertices[j][1])

    print
    print '%s clusters' % (uf.count())
    print
    def areSentencesSimilarTwo(self, words1, words2, pairs):
        """
        :type words1: List[str]
        :type words2: List[str]
        :type pairs: List[List[str]]
        :rtype: bool
        """
        if len(words1) != len(words2):
            return False

        uf = UnionFind()
        for s1, s2 in pairs:
            uf.union(s1, s2)

        for i in range(len(words1)):
            w1, w2 = words1[i], words2[i]
            if w1 == w2:
                continue
            # ! be careful about conditions when word is not mapping
            if w1 not in uf.parents or w2 not in uf.parents:
                return False
            if uf.find(w1) != uf.find(w2):
                return False

        return True
예제 #23
0
class UnionFind2:
    def __init__(self, groups, fn):
        self.fn = fn
        g = []
        for group in groups:
            g.append(map(fn, group))
        self.uf = UnionFind(g)

    def find(self, item):
        return self.uf.find(self.fn(item))

    def union(self, item_1, item_2):
        self.uf.union(self.fn(item_1), self.fn(item_2))

    def get_clusters(self):
        return self.uf.leader_to_group.values()
예제 #24
0
    def minimum_spanning_tree(self) -> WeightedUndirectedGraph:
        """
        Find the minimum spanning tree (MST) of the graph using Kruskal's algorithm
        :return: the minimum spanning tree as a WeightedUndirectedGraph
        """
        # sort edges with a sef-made quick sort :D
        sorted_edges = list(self.edges.values()).copy()
        qsort(sorted_edges, lambda x: x[2])

        # initialize clusters
        trees = UnionFind(list(self.vertices.keys()))

        mst_edges = []

        # examine edges in ascending order of weight
        for edge in sorted_edges:
            v0, v1, weight = edge
            if not trees.neighbors(v0, v1):
                trees.union(v0, v1)
                mst_edges.append(edge)
                if len(mst_edges) == len(self.vertices) - 1:
                    break
            continue

        # construct a WeightedUndirectedGraph to represent the minimum spanning tree
        mst = WeightedUndirectedGraph.index_edges(
            list(self.vertices.keys()),
            mst_edges)  # there is no edge saved for the starting vertex
        return mst
예제 #25
0
class UnionFind2:
    def __init__(self, groups, fn):
        self.fn = fn
        g = []
        for group in groups:
            g.append(map(fn, group))
        self.uf = UnionFind(g)

    def find(self, item):
        return self.uf.find(self.fn(item))

    def union(self, item_1, item_2):
        self.uf.union(self.fn(item_1), self.fn(item_2))

    def get_clusters(self):
        return self.uf.leader_to_group.values()
예제 #26
0
    def clustering(self, k=4) -> float:
        """
        Perform maximum spacing clustering on the graph, stopping at k clusters
        :param k: number of clusters to stop at
        :return: the clyster spacing
        """
        # sort edges with a sef-made quick sort :D
        sorted_edges = list(self.edges.values()).copy()
        qsort(sorted_edges, lambda x: x[2])

        # initialize clusters
        clusters = UnionFind(list(self.vertices.keys()))

        # examine edges in ascending order of weight
        for edge in sorted_edges:
            v0, v1, weight = edge
            # edges within clusters are ignored
            if not clusters.neighbors(v0, v1):
                if len(clusters) == k:
                    # if the number of cluster is reached, return the next edge between clusters
                    return weight
                else:
                    # if the number of cluster is not reached, join clusters
                    clusters.union(v0, v1)
            continue
예제 #27
0
def main(argv):
    vertices  = construct(argv[0])
    distances = generate_distances(24, 2) + generate_distances(24, 1)

    uf = UnionFind(len(vertices))

    for vertex in vertices:
        for distance in distances:
            candidate = vertex ^ distance
            if candidate in vertices:
                if not uf.connected(vertices[vertex], vertices[candidate]):
                    uf.union(vertices[vertex], vertices[candidate])

    print
    print '%s clusters' % (uf.count())
    print
예제 #28
0
def karger_min_cut(G, edges):

    n = max(G.keys())
    cuts = UnionFind(n+1)

    edges_map = {}
    edges_index = 0
    for _ in xrange(n-2):
        edges_index = contract(G, edges, cuts, edges_index)
        #print G, edges

    assert(len(G) == 2)
    u, v = G.keys()
    #assert(len(G[u]) == len(G[v]))

    #before returning the cuts list, we must remove the self loops from the adjacency list 
    return filter(lambda (k, z): not cuts.connected(u, z), G[u])   #each edge is stored twice, so we can just return one of the two vertices' adj list
예제 #29
0
def kruskal(g):
	V = g.number_of_nodes()
	edges = sorted(g.edges(data="weight"), key=lambda x: x[2])
	uf = UnionFind(V)
	mst = [0] * (V-1)

	e = i = 0
	while e < V-1:
		edge = edges[i]
		src, des, _ = edge
		if uf.find(src) != uf.find(des):
			uf.union(src, des)
			mst[e] = (src, des)
			e += 1
		i += 1
		
	return mst
예제 #30
0
파일: union_find_test.py 프로젝트: qpzm/PS
 def test0(self):
     u = UnionFind(3)
     u.union(0, 1)
     u.union(0, 2)
     u.union(2, 3)
     self.assertTrue(u.same_set(1, 2))
     self.assertTrue(u.same_set(1, 3))
예제 #31
0
def kruskal(g):
    MST = []
    MST_weight = 0

    # initializing Union Find
    U = UnionFind()
    U.initialize(g.V)

    # initializing heap
    heap = Heap()
    for e in g.E:
        heap.push([e[2], (e[0], e[1])])  # [cost, edge]

    while not (len(MST) == g.n - 1 or heap.is_empty()):
        aux_edge = heap.pop()
        edge = [aux_edge[1][0], aux_edge[1][1], aux_edge[0]]  # v1, v2, cost
        e0_find = U.find(edge[0])
        e1_find = U.find(edge[1])

        if e0_find != e1_find:
            MST.append(edge)
            MST_weight += edge[2]  # weighting MST
            U.union(e0_find, e1_find)

    return MST, MST_weight
예제 #32
0
 def minMalwareSpread(self, graph, initial):
     """
     :type graph: List[List[int]]
     :type initial: List[int]
     :rtype: int
     """
     if not initial:
         return -1
     # return the smallest index if multiple results
     initial.sort()
     n = len(graph)
     uf = UnionFind(n)
     # union the whole graph
     for i in range(n):
         for j in range(i + 1, n):
             if graph[i][j] == 1:
                 uf.union(i, j)
     # if only one initially infected node, the damage reduced will be the group size
     # => return the infected node in the largest group
     # if 2+ initially infected node in a group, cannot reduce the damage
     # => return the infected node with minimum index
     counter = collections.Counter(
         uf.find(i)
         for i in initial)  # group_parent => # of initially infected nodes
     one_infected = [i for i in initial if counter[uf.find(i)] == 1]
     if one_infected:
         return max(one_infected, key=lambda i: uf.sizes[uf.find(i)])
     else:
         return min(initial)
def min_spanning_tree(g):
    tree_edges = []
    edges_by_cost = g.edges[:]
    edges_by_cost.sort(key=lambda e : e.cost)

    # initially each node is in its own group
    initial_groups = map(lambda n : [n], g.nodes)
    uf = UnionFind(initial_groups)

    def edge_creates_cycle(edge):
        return uf.find(edge.node1) == uf.find(edge.node2)

    for edge in edges_by_cost:
        if not edge_creates_cycle(edge):
            tree_edges.append(edge)
            uf.union(edge.node1, edge.node2)

    return tree_edges
예제 #34
0
def min_spanning_tree(g):
    tree_edges = []
    edges_by_cost = g.edges[:]
    edges_by_cost.sort(key=lambda e: e.cost)

    # initially each node is in its own group
    initial_groups = map(lambda n: [n], g.nodes)
    uf = UnionFind(initial_groups)

    def edge_creates_cycle(edge):
        return uf.find(edge.node1) == uf.find(edge.node2)

    for edge in edges_by_cost:
        if not edge_creates_cycle(edge):
            tree_edges.append(edge)
            uf.union(edge.node1, edge.node2)

    return tree_edges
def mst_kruskal(graph):
    '''Using Disjoint-set data structure to select edges'''

    total_cost, mst = 0, []
    u = UnionFind()

    # Sorted by weight
    edges = sorted(graph.get_edges(), key=lambda x: x[2])

    for start, end, weight in edges:
        # If start vertex and end vertex has different parent on union-find structure
        # then joining the two subset
        if u[start] != u[end]:
            u.union(u[start], u[end])
            total_cost += weight
            mst.append((start, end))

    return total_cost, mst
예제 #36
0
def main(argv):
    vtotal, edges = construct_edges(argv[0])

    edges = sorted(edges)
    uf    = UnionFind(vtotal)
    k     = int(argv[1])
    T     = set([])

    max_spacing = 0
    while uf.count() >= k:
        edge = edges.pop(0)
        if not uf.connected(edge[1], edge[2]):
            uf.union(edge[1], edge[2])
            max_spacing = edge[0]

    print
    print 'For %s clustering: max spacing = %s' % (k, max_spacing)
    print
 def test_union_find(self):
     elements = [1, 2, 3, 4, 6, 7, 8]
     u1 = UnionFind(elements)
     for e in elements:
         self.assertEqual(e, u1.find(e))
     new_parent = u1.union(1, 2)
     self.assertEqual(1, new_parent)
     new_parent = u1.union(3, 4)
     self.assertEqual(3, new_parent)
     new_parent = u1.union(2, 4)
     self.assertEqual(1, new_parent)
     self.assertEqual(1, u1.find(4))
     new_parent = u1.union(3, 4)
     self.assertEqual(1, new_parent)
     new_parent = u1.union(3, 8)
     self.assertEqual(1, new_parent)
     new_parent = u1.union(8, 3)
     self.assertEqual(1, new_parent)
    def test_find(self):
        uf = UnionFind()
        five = uf.MakeSet(5)
        seven = uf.MakeSet(7)
        uf.Union(five, seven)

        self.assertEqual(uf.Find(five), seven)
        self.assertEqual(uf.Find(seven), seven)
    def kruskal_mst_improved(self) -> float:
        """
        Finds the minimum spanning tree (MST) using improved Kruskal's MST
        Algorithm.
        :return: float
        """
        # 1. Sort the edges in order of increasing cost   [O(mlog m)]
        edges = sorted(self._edge_list)

        # 2. Initialize T = {empty}, which is the current spanning tree
        curr_spanning_tree = []

        # 3. Create a Union Find of vertices
        # object -> vertex
        # group -> connected component w.r.t. the edges in T
        # Each of the vertex is on its own isolated connected component.
        union_find = UnionFind(self._vtx_list)

        # 4. For each edge e = (v, w) in the sorted edge list   [O(nlog n)]
        for edge in edges:
            # Check whether adding e to T causes cycles in T
            # This is equivalent to checking whether there exists a v-w path in
            # T before adding e.
            # This is equivalent to checking whether the leaders of v and w in
            # the UnionFind are the same.
            if edge.end1.leader is not edge.end2.leader:
                curr_spanning_tree.append(edge)
                # Fuse the two connected components to a single one
                group_name_v, group_name_w = edge.end1.leader.obj_name, \
                    edge.end2.leader.obj_name
                union_find.union(group_name_v, group_name_w)
        # Originally we would think it involves O(mn) leader updates; however,
        # we can change to a "vertex-centric" view:
        # Consider the number of leader updates for a single vertex:
        # Every time the leader of this vertex gets updated, the size of its
        # connected components at least doubles, so suppose it experiences x
        # leader updates in total, we have
        #     2^x <= n
        #     x <= log2 n
        # Thus, each vertex experiences O(log n) leader updates, leading to a
        # O(nlog n) leader updates in total.

        return sum(map(lambda x: x.cost, curr_spanning_tree))
예제 #40
0
def karger_min_cut(G, edges):

    n = max(G.keys())
    cuts = UnionFind(n + 1)

    edges_map = {}
    edges_index = 0
    for _ in xrange(n - 2):
        edges_index = contract(G, edges, cuts, edges_index)
        #print G, edges

    assert (len(G) == 2)
    u, v = G.keys()
    #assert(len(G[u]) == len(G[v]))

    #before returning the cuts list, we must remove the self loops from the adjacency list
    return filter(
        lambda (k, z): not cuts.connected(u, z), G[u]
    )  #each edge is stored twice, so we can just return one of the two vertices' adj list
예제 #41
0
    def test_union(self):
        uf = UnionFind([[1,2,3],[4,5],[6,7,8,9,0]])
        self.assertEqual(uf.find(2), 1)
        self.assertEqual(uf.find(5), 4)

        uf.union(2,5)

        self.assertEqual(uf.find(1), 1)
        self.assertEqual(uf.find(2), 1)
        self.assertEqual(uf.find(3), 1)
        self.assertEqual(uf.find(4), 1)
        self.assertEqual(uf.find(5), 1)
예제 #42
0
def find_best_delta_by_num_ccs_for_given_k(permuted_sim, edges, k):

    if k < 2:
            raise ValueError("k must be at least 2")

    max_num_ccs = 0 #initially, each node is its own CC of size 1, so none is of size >= k for k >= 2
    bestDeltas = [edges[0].weight]
    uf = UnionFind()

    for edge in edges:
        uf.union(edge.node1, edge.node2)
        num_ccs = len([root for root in uf.roots if uf.weights[root] >= k])
        if num_ccs > max_num_ccs:
            max_num_ccs = num_ccs
            bestDeltas = [edge.weight]
        elif num_ccs == max_num_ccs:
            bestDeltas.append(edge.weight)

    return max_num_ccs, bestDeltas
예제 #43
0
class Graph():

    """simple Graph class to run Prim's MST algorithm"""

    def __init__(self, file_name):
        lines = self.get_edges_size(self.open_file(file_name))
        self.sorted_edges = tuple(sorted(self.process_line(lines),
                                         key=lambda edge: edge[2])
                                  )

    def open_file(self, file_name):
        """simple lines generator"""
        with open(file_name) as myfile:
            for line in myfile:
                yield line

    def get_edges_size(self, line_seq):
        """get the number of vertices as given in the first line of file
           setup the vertices as a tuple so we don't waste memory
           populate the __vertecies tuple with lists of tuples of vertex,cost
        """
        num_verticies = int(next(line_seq).split()[0])
        next(line_seq)
        self.__union_find = UnionFind(num_verticies)
        for line in line_seq:
            yield line

    def process_line(self, line_seq):
        for line in line_seq:
            types = (int, int, float)
            yield tuple(fun(val) for fun, val in zip(types, line.split()))

    def kruskal_mst(self):
        """pick a random start_vertex then extract from heap until empty"""
        total_cost = 0
        edges = (val for val in self.sorted_edges)
        while self.__union_find.sets > 1:
            edge = next(edges)
            if self.__union_find.connected(edge[0], edge[1]):
                continue
            total_cost += edge[2]
            self.__union_find.union(edge[0], edge[1])
        return total_cost
예제 #44
0
 def get_edges_size(self, line_seq):
     """get the number of vertices as given in the first line of file
        setup the vertices as a tuple so we don't waste memory
        populate the __vertecies tuple with lists of tuples of vertex,cost
     """
     num_verticies = int(next(line_seq).split()[0])
     next(line_seq)
     self.__union_find = UnionFind(num_verticies)
     for line in line_seq:
         yield line
예제 #45
0
def test_init():
    uf = UnionFind(3)
    uf.add(('a', 1))
    for i in xrange(3):
        assert (i in uf)
    assert (('a', 1) in uf)

    uf = UnionFind(letter_data)
    for i in letter_data:
        assert (i in uf)
    assert (1 not in uf)

    uf = uf.copy()
    uf = UnionFind(letter_data)
    for i in letter_data:
        assert (i in uf)
예제 #46
0
class Graph():

    """simple Graph class to run clustering algorithm"""

    def __init__(self, file_name):
        lines = self.get_edges_size(self.open_file(file_name))
        self.sorted_edges = tuple(sorted(self.process_line(lines),
                                         key=lambda edge: edge[2])
                                  )

    def open_file(self, file_name):
        """simple lines generator"""
        with open(file_name) as myfile:
            for line in myfile:
                yield line

    def get_edges_size(self, line_seq):
        """get the number of vertices as given in the first line of file
           setup the vertices as a tuple so we don't waste memory
           populate the __vertecies tuple with lists of tuples of vertex,cost
        """
        num_verticies = int(next(line_seq).split()[0])
        # next(line_seq)
        self._union_find = UnionFind(num_verticies)
        for line in line_seq:
            yield line

    def process_line(self, line_seq):
        for line in line_seq:
            types = (int, int, int)
            yield tuple(fun(val) for fun, val in zip(types, line.split()))

    def clustering(self):
        """ """
        edges = (val for val in self.sorted_edges)
        while self._union_find.sets > 4:
            edge = next(edges)
            if not self._union_find.connected(edge[0], edge[1]):
                self._union_find.union(edge[0], edge[1])
        while self._union_find.connected(edge[0], edge[1]):
            edge = next(edges)
        return edge
예제 #47
0
def kruskals(g):
    edges = sorted(g.edges, key=lambda e: e[2])
    u = UnionFind()
    # set up all the cities as trees in UF
    for city in g.cities:
        u.makeset(city)
    mst = []
    for e in edges:
        q, v, _ = e
        if u.find(q) != u.find(v):
            mst.append(e)
            u.union(q, v)
    return mst
예제 #48
0
    def accountsMerge(self, accounts):
        """
        :type accounts: List[List[str]]
        :rtype: List[List[str]]
        """
        email_to_id, id_to_name = self.create_mappings(accounts)
        uf = UnionFind(len(email_to_id))

        # union emails within an account
        for account in accounts:
            p = email_to_id[account[1]]
            for i in range(2, len(account)):
                q = email_to_id[account[i]]
                uf.union(p, q)

        # collect emails by tree
        for email, p in email_to_id.iteritems():
            parent = uf.find(p)
            id_to_emails[parent].append(email)

        return [[id_to_name[p]] + sorted(emails) for p, emails in id_to_emails.items()]
예제 #49
0
class ClusterGraph():
    def __init__(self):
        self.edges = []
        self.union_find = UnionFind()

    def add_edge(self, edge):
        self.edges.append(edge)
        self.union_find.add_element(edge[0])
        self.union_find.add_element(edge[1])

    def clusterfy(self, num_clusters=4):
        self.sort_edges()
        while len(self.union_find.clusters) > num_clusters:
            edge = self.edges.pop()
            leader1 = self.union_find.find(edge[0])
            leader2 = self.union_find.find(edge[1])
            if leader1 != leader2:
                self.union_find.union(leader1, leader2)

    def sort_edges(self):
        self.edges.sort(key=lambda edge: edge[2], reverse=True)

    def get_maximum_spacing(self):
        final_edges = []
        for edge in self.edges:
            leader1 = self.union_find.find(edge[0])
            leader2 = self.union_find.find(edge[1])
            if leader1 != leader2:
                final_edges.append(edge)

        return min(final_edges, key=lambda edge: edge[2])[2]
예제 #50
0
def construct_graph(seq_dict, match_dict, threshold=90):

    uf = UnionFind(seq_dict)
    component_dict = dict()

    for match in match_dict.values():
        q_seq = seq_dict[match.q_name]
        r_seq = seq_dict[match.r_name]
        if match.q_global_identity > threshold or match.r_global_identity > threshold:
            uf.union(q_seq.name, r_seq.name)

    uf.rename_component()

    for seq_name in seq_dict.keys():
        seq = seq_dict[seq_name]

        component_label = uf.component_label[seq_name]
        component_size = uf.component_size[component_label]

        seq.label['component'] = component_label

        component = Component(component_label)
        component.add_member(seq)

        if component_label in component_dict:
            component_dict[component_label].add_member(seq)
        else:
            component_dict[component_label] = component

    return uf, component_dict
    def numIslands2(self, m, n, positions):
        """
        :type m: int
        :type n: int
        :type positions: List[List[int]]
        :rtype: List[int]
        """
        uf = UnionFind(m * n)
        added = set()
        count = 0  # current number of islands
        res = []

        for i, j in positions:
            p = i * n + j
            added.add((i, j))
            count += 1

            for x, y in [(i - 1, j), (i, j - 1), (i + 1, j), (i, j + 1)]:
                if 0 <= x < m and 0 <= y < n and (x, y) in added:
                    q = x * n + y
                    # reduce count if two nodes are connected
                    # but currently in different trees
                    if uf.find(p) != uf.find(q):
                        count -= 1
                    uf.union(p, q)

            res.append(count)

        return res
예제 #52
0
파일: mst.py 프로젝트: jeshiihu/maze_pygame
def minSpanningTree(nodes, edges):
    '''
    Input:
    A set of nodes and a set of edges, where each edge is
    specified by a tuple (u,v) where u,v are distinct nodes
    in "nodes".

    Output:
    A set of edges that forms a minimum-weight spanning tree
    of the graph (if it is connected).

    Running time: O( |E|*log |E| )
    '''
    mst = []
    shuffle(edges) # randomizes the edges to be connected 

    uf = UnionFind(nodes)

    for e in edges:
        if uf.union(e[0], e[1]):
            mst.append((e[0], e[1], 1))  # each edge is given a weight 1

    return mst
예제 #53
0
def find_k(input_list):
	clusters = UnionFind()
	for number in input_list:
		if not clusters.in_union(number):
			clusters.add(number)
		neighbors = generate_neighbors(number)
		for neighbor in neighbors:
			if neighbor in numbers:
				clusters.union(number, neighbor)
	return clusters.size
예제 #54
0
 def min_tree(self):
     a = WeightedGraph()
     union_obj = UnionFind()
     edges = list()
     for vertex in self.vertexes.keys():
         union_obj.creation(vertex)
         a.add_vertex(vertex)
         for v in self.vertexes[vertex]:
             edges.append((self.vertexes[vertex][v], (vertex, v)))
     for (_, (ver, vertex)) in sorted(edges):
         if union_obj.search(ver) != union_obj.search(vertex):
             a.add_direct_link(ver, vertex, self.vertexes[ver][vertex])
             union_obj.union_sets(ver, vertex)
     return a
예제 #55
0
    def test_find(self):
        uf = UnionFind([[1,2,3],[4,5],[6,7,8,9,0]])
        self.assertEqual(uf.find(1), 1)
        self.assertEqual(uf.find(2), 1)
        self.assertEqual(uf.find(3), 1)

        self.assertEqual(uf.find(4), 4)
        self.assertEqual(uf.find(5), 4)

        self.assertEqual(uf.find(6), 6)
        self.assertEqual(uf.find(7), 6)
        self.assertEqual(uf.find(8), 6)
        self.assertEqual(uf.find(9), 6)
        self.assertEqual(uf.find(0), 6)
예제 #56
0
def test_init():
    uf = UnionFind(3)
    uf.add(('a', 1))
    for i in xrange(3):
        assert(i in uf)
    assert(('a', 1) in uf)

    uf = UnionFind(letter_data)
    for i in letter_data:
        assert(i in uf)
    assert(1 not in uf)

    uf = uf.copy()
    uf = UnionFind(letter_data)
    for i in letter_data:
        assert(i in uf)
예제 #57
0
def kruskal(nodes:List(Int), edges:List(Tuple(Int, Int, Int)), edges_to_check:List(Tuple(Int, Int)))\
        ->List(Tuple(Int, Int, Int)):
    sets = UnionFind({})
    mst = []
    for n in nodes:
        sets.add_node(n)

    for e in sorted(edges, key=itemgetter(2)):
        n1 = e[0]
        n2 = e[1]
        l1 = sets.find(n1)
        l2 = sets.find(n2)
        if l1 != l2:
            (e1, e2, w) = e
            if ((e1, e2) in edges_to_check) or (e2, e1) in edges_to_check:
                mst.append(e)
            sets.union(l1, l2)
    return mst
    def __cluster(self):

        uf = UnionFind(self.graph.V())

        min_spacing = 1e100

        for edge in self.__sorted_edges():

            u, v, w = edge

            if uf.count_components() > self.k and not uf.connected(u, v):
                uf.union(u, v)

            elif not uf.connected(u, v):
                # once we have k clusters,
                # examine each cross-clusters edge and pick the minimum
                if min_spacing > w:
                    min_spacing = w

        self.spacing = min_spacing