Пример #1
0
 def processSaving(self, saving):
     (k, i, j, s) = saving
     x = self.paths[i]
     y = self.paths[j]
     if uf.joinable(self.capacity, x, y):
         uf.union(x, y)
         self.used_savings.append(k)
         self.solution -= s
Пример #2
0
 def compute_mst(self):
     node_set = [make_set(i) for i in self.datapaths.iterkeys()]
     mst = []
     match_node = lambda ln: next(n for n in node_set if n.id == ln)
     link_set = [(match_node(ln1), match_node(ln2))
                 for (ln1, ln2) in self.link_list]
     for n1, n2 in link_set:
         if find(n1) is not find(n2):
             mst.append((n1, n2))
             mst.append((n2, n1))
             union(n1, n2)
     pretty_mst = [(n1.id, n2.id) for (n1, n2) in mst]
     return pretty_mst
Пример #3
0
def main():
    K = 4  # Desired number of clusters

    with open('clustering1.txt', 'rt') as file:
        cluster_cnt = int(file.readline())  # First line is count of nodes
        edges = file.readlines()

    edges = [tuple([int(x) for x in line.split()]) for line in edges]
    # Sort edges in decreasing order of distance.  Sorting in reverse
    # order lets us pop() the next smallest edge in O(1) time
    edges.sort(key=lambda x: -x[2])
    nodes = [Node(x) for x in range(1, 501)]
    distance = 0  # Assuming distance between nodes is always nonnegative

    # >= because we want the mininum distance between clusters AFTER
    # we only have K clusters
    while cluster_cnt >= K:
        while True:
            # Get next smallest edge
            node_label_a, node_label_b, distance = edges.pop()
            node_a, node_b = nodes[node_label_a - 1], nodes[node_label_b - 1]
            if union(node_a, node_b):
                # node_a and node_b were in separate clusters that have
                # been merged by union()
                cluster_cnt -= 1
                break
    return distance
def main():
    with open('clustering_big.txt', 'rt') as file:
        node_cnt, bits = [int(x) for x in file.readline().split()]
        bs_list = ["".join(line.split()) for line in file.readlines()]

    # Build a mapping betwen binary strings and their indicies within
    # the input file -- the indicies will serve as node labels
    #
    # Note: There are duplicate binary strings in the input file
    # (so there will be multiple nodes with the same binary string value)
    bs_dict = {}
    for idx, string in enumerate(bs_list):
        try:
            bs_dict[string].append(idx)
        except KeyError:
            bs_dict[string] = [idx]

    # Initialize list of Nodes for tracking clusters with Union-Find
    uf_nodes = [Node(i) for i in range(node_cnt)]

    # Initialize an empty set to store edges
    edges = set()
    # Iterate over all binary strings and their indicies
    for cur_label, string in enumerate(bs_list):
        # For each binary string, get a list of all binary strings within
        # 2 bits of difference ("close strings")
        close_strs = get_close_values(string, max_diff=2)
        for close_str in close_strs:
            # For each close string, create an edge between the current node
            # and all nodes corresponding to the close string of the form
            # (i, j, difference in bits) where i < j and add to edge set
            nearby_nodes = bs_dict.get(close_str, [])
            if nearby_nodes:
                distance = get_diff_bits(string, close_str)
            for nearby_label in nearby_nodes:
                if cur_label < nearby_label:
                    edges.add((cur_label, nearby_label, distance))
                elif cur_label > nearby_label:
                    edges.add((nearby_label, cur_label, distance))

    # Make a list from the edge set and sort edges in reverse order
    # of bits
    edges = sorted(list(edges), key=lambda x: -x[2])

    # Run Kruskal's algorithm on the edges until none remain
    cluster_cnt = node_cnt
    while edges:
        # Get next smallest edge
        node_label_a, node_label_b, distance = edges.pop()
        node_a, node_b = uf_nodes[node_label_a], uf_nodes[node_label_b]
        if union(node_a, node_b):
            # node_a and node_b were in separate clusters that have
            # been merged by union()
            cluster_cnt -= 1
    # Return the number of clusters
    print("Cluster count: {}".format(cluster_cnt))
Пример #5
0
def min_spanning_tree(graph):
    vertices, edges = set(), []
    for v1 in graph:
        for v2 in graph[v1]:
            edges.append((graph[v1][v2], v1, v2))
            vertices.update((v1, v2))
    edges = sorted(edges, reverse=True)
    sets = dict((v, None) for v in vertices)
    result = dict((v, {}) for v in vertices)
    result_weight = 0
    while True:
        if not edges:
            return result, result_weight
        weight, v1, v2 = edges.pop()
        if union_find.find(sets, v1) != union_find.find(sets, v2):
            result[v1][v2] = weight
            result[v2][v1] = weight
            result_weight += weight
            union_find.union(sets, v1, v2)
Пример #6
0
def min_spanning_tree(graph):
    vertices, edges = set(), []
    for v1 in graph:
        for v2 in graph[v1]:
            edges.append((graph[v1][v2], v1, v2))
            vertices.update((v1, v2))
    edges = sorted(edges, reverse=True)
    sets = dict((v, None) for v in vertices)
    result = dict((v, {}) for v in vertices)
    result_weight = 0
    while True:
        if not edges:
            return result, result_weight
        weight, v1, v2 = edges.pop()
        if union_find.find(sets, v1) != union_find.find(sets, v2):
            result[v1][v2] = weight
            result[v2][v1] = weight
            result_weight += weight
            union_find.union(sets, v1, v2)
Пример #7
0
def compute(nodes, edges):
	parents = make_sets(len(nodes))

	bar = StatusBar(len(edges))
	counter = 0
	for line in edges:
		if line:
			first_delimiter = line.find("\t")
			second_delimiter = line.find("\t", first_delimiter + 1)
			left = int(line[:first_delimiter])
			right = int(line[first_delimiter:second_delimiter])
			union(parents, left, right)
		counter += 1
		if counter % 5000 == 0: bar.update(counter)

	bar.close()
	bar = StatusBar(len(parents))
	for counter, x in enumerate(parents):
		parents[counter] = find(parents, x)
		if counter % 10000 == 0: bar.update(counter)
	bar.close()
	return parents
Пример #8
0
def largest_k_clusters(file, k):

    with open(file) as f:
        n_nodes = int(f.readline())

        # initialize subsets for union find
        subsets = {}
        for i in range(1, n_nodes + 1):
            subsets[i] = union_find.Subset(i)

        # create sorted list of edges
        # tuple in the form (cost, node1, node2)
        edges = []
        for line in f:
            edge = [int(x) for x in line.split()]
            edges.append((edge[2], edge[0], edge[1]))
        edges.sort()

        # clustering algorithm based on Kruskal
        n_clusters = n_nodes
        i = 0
        while n_clusters != k:  # assumes k < nodes
            subset1 = union_find.find(subsets, edges[i][1])
            subset2 = union_find.find(subsets, edges[i][2])
            if (subset1 != subset2):
                union_find.union(subsets, subset1, subset2)
                n_clusters -= 1
            i += 1

        # determine max spacing (cost of first edge with different subsets)
        for edge in edges:
            subset1 = union_find.find(subsets, edge[1])
            subset2 = union_find.find(subsets, edge[2])
            if (subset1 != subset2):
                max_spacing = edge[0]
                break
    return max_spacing
Пример #9
0
    print('number of disjoint sets: %s' %
        (len([ i for i in itertools.groupby(sets) ])))
    print()

union_find = union_find.UnionFind()

nodes = [ Node(ch) for ch in 'abcdefg' ]

print('labels: %s' % ([ str(i) for i in nodes ]))
for node in nodes:
    union_find.make_set(node)

print_sets(nodes)

assert(union_find.find(nodes[0]) != union_find.find(nodes[2]))
union_find.union(nodes[0], nodes[2])
assert(union_find.find(nodes[0]) == union_find.find(nodes[2]))

print_sets(nodes)

assert(union_find.find(nodes[0]) != union_find.find(nodes[1]))
assert(union_find.find(nodes[1]) != union_find.find(nodes[2]))
union_find.union(nodes[0], nodes[1])
assert(union_find.find(nodes[0]) == union_find.find(nodes[1]))
assert(union_find.find(nodes[1]) == union_find.find(nodes[2]))

print_sets(nodes)

assert(union_find.find(nodes[-2]) != union_find.find(nodes[-1]))
union_find.union(nodes[-2], nodes[-1])
assert(union_find.find(nodes[-2]) == union_find.find(nodes[-1]))
Пример #10
0
def run_union_find(onStepUpdate=None):
    # start from the start of the maze and look for the next connection
    currect_index = 0  # index in the data array
    # while the start and end of the maze are not connected
    # try to find the next connected item of the path
    steps = []
    while not connected(data, 0, size - 1):
        # for currect cell get all surrounding coordinates
        # from these coordinates randomly select one as the next step,
        # but with the condition that this coordinate is not connected to the currect cell and is not a "WALL"

        # for every loop save the steps
        steps.append(currect_index)

        next_steps = find_next_steps(currect_index)

        if len(next_steps) == 0:
            """
            Dead end reached. Need to get back and look at previous connections next steps.
            """
            print(
                "Dead end at index:",
                currect_index,
                "and coordinate:",
                hashTable[currect_index],
            )
            if onStepUpdate:
                onStepUpdate({
                    "status": "DEAD_END",
                    "value": hashTable[currect_index]
                })
            prev_step = steps.index(currect_index) - 1
            while (prev_step >= 0
                   and len(find_next_steps(steps[prev_step])) == 0):
                # go check for a new route starting from one step before the current one
                # loop until a node with possible next steps to be folowed
                prev_step -= 1
            if prev_step >= 0:
                print("Loogin for new route at index", steps[prev_step])
                currect_index = steps[prev_step]
                continue
            else:
                print("Could not find a route from start to end... :(")
            break

        # get a random item from the array
        next_index = next_steps[randrange(len(next_steps))]
        union(data, currect_index, next_index)
        print("Iteration at index", currect_index)
        if onStepUpdate:
            onStepUpdate({
                "status": "NEXT_STEP",
                "value": hashTable[currect_index]
            })
        # prepare for next loop
        currect_index = next_index

    print("Iteration at last index", size - 1)
    print("--------------------------------------------------------")
    # append last index of the array
    steps.append(size - 1)

    step_coordinates = list(map(lambda item: hashTable[item], steps))
    print("Iteration traversed the following coordinates:")
    print(step_coordinates)