Exemplos de UnionFind em Python, exemplos de unionfind.UnionFind em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: solution.py Projeto: rawg/bitsandfytes

def merge():
    """Merge categories."""
    global mg_ops     # dirty :(
    cats = list(cat_arts.keys())
    uf = UnionFind(cats)
    ncats = len(cat_arts)

    for i in range(0, ncats):
        for j in range(i + 1, ncats):
            cat1, cat2 = cats[i], cats[j]

            if jaccard(cat_arts[cat1], cat_arts[cat2]) > args.threshold:
                uf.union([cat1, cat2])

    sets = uf.sets()
    for group in sets:
        mg_ops += len(group) - 1

        size = 0
        parent = None
        for cat in group:
            l = len(cat_arts[cat])
            if l > size:
                size = l
                parent = cat

        if random.random() >= args.handicap:
            for cat in group:
                if cat != parent:
                    logging.info("MERGE: %s -> %s" % (cat, parent))
                    skill_counts.decr(cat_arts[cat] & cat_arts[parent])
                    cat_arts[parent] |= cat_arts[cat]
                    del cat_arts[cat]
        else:
            logging.info("HANDICAP: Skipping merge of %s -> %s" % (cat, parent))

Exemplo n.º 2

0

Exibir arquivo

class Tree:
    def __init__(self, directed=False, weighted=False):

        self.directed = directed
        self.weighted = weighted
        self.tree = {}
        self.vertex_num = 0
        self.vertex = []
        self.components = UnionFind()

    def add_edge(self, origin, destiny, weight=0):
        def add_vertex(self, vertex):
            if not vertex in self.tree.keys():
                self.components.add(vertex)
                self.tree[vertex] = {}
                self.vertex_num += 1
                self.vertex.append(vertex)

        if not origin in self.tree.keys():
            add_vertex(self, origin)

        if not destiny in self.tree.keys():
            add_vertex(self, destiny)

        if self.components.connected(origin, destiny):
            raise Exception("Cannot add edge, would create a cicle")

        self.tree[origin][destiny] = weight
        if not self.directed:
            self.tree[destiny][origin] = weight

        self.components.union(origin, destiny)

Exemplo n.º 3

0

Exibir arquivo

def connected_components_diff(game, player):
    """
    Difference between number of connected components
    of one player and its opponent

    :param game:
    :param player:
    :return:
    """
    size = game.width * game.height
    uf = UnionFind(size)
    blank = game.get_blank_spaces()
    for bs in blank:
        for n in neighbors(game, bs):
            uf.union(bs, n)
    player_location = game.get_player_location(player)
    opp_location = game.get_player_location(game.get_opponent(player))
    for n in neighbors(game, player_location):
        uf.union(n, player_location)
    for n in neighbors(game, opp_location):
        uf.union(n, opp_location)

    pl_score = float(uf.components(player_location))
    op_score = float(uf.components(opp_location))
    return pl_score - op_score

Exemplo n.º 4

0

Exibir arquivo

Arquivo: 3_week2.py Projeto: zackAttack3/coursera_1_hw

    def __init__(self):
        self.alpha = {}
        self.input = None
        nodei = 0
        with open('clustering.txt') as f:
            self.input = [int(i) for i in f.readline().rstrip().split(' ')]
            for row in f:
                if row.isspace():
                    continue
                hammi = int(row.rstrip().replace(' ', ''), 2)
                if self.alpha.get(hammi, None) is None:
                    self.alpha[hammi] = set([nodei])
                else:
                    self.alpha[hammi].add(nodei)
                nodei += 1

        uf = UnionFind([i for i in range(self.input[0])])
        masks = [0]
        mask_1 = [1 << i for i in range(self.input[1])]
        for i in it.combinations(mask_1, 2):
            masks.append(i[0] ^ i[1])
        masks.extend(mask_1)

        for key in self.alpha:
            for di in masks:
                if self.alpha.get(di ^ key, None) is not None:
                    temp_ = self.alpha[key].union(self.alpha[di ^ key])
                    leader = temp_.pop()
                    for tempi in temp_:
                        uf.union(leader, tempi)
        print(uf.n_comps, uf.n_elts)

Exemplo n.º 5

0

Exibir arquivo

Arquivo: lsh.py Projeto: LeosonH/lshhdc

class Cluster(object):
    """Clusters sets with Jaccard similarity above threshold with high
    probability.

    Algorithm based on Rajaraman, "Mining of Massive Datasets":
    1. Generate set signature
    2. Use LSH to map similar signatures to same buckets
    3. Use UnionFind to merge buckets containing same values
    """
    def __init__(self, width=10, threshold=0.5):
        self.width = width
        self.unionfind = UnionFind()
        self.signer = MinHashSignature(width)
        self.hasher = LSH(width, threshold)
        self.hashmaps = [defaultdict(list)
                         for _ in range(self.hasher.get_n_bands())]

    def add_set(self, s, label=None):
        # A label for this set
        if not label:
            label = s

        # Add to unionfind structure
        self.unionfind[label]

        # Get signature
        sig = self.signer.sign(s)

        # Union labels with same LSH key in same band
        for band_idx, hshval in enumerate(self.hasher.hash(sig)):
            self.hashmaps[band_idx][hshval].append(label)
            self.unionfind.union(label, self.hashmaps[band_idx][hshval][0])

    def get_sets(self):
        return self.unionfind.sets()

Exemplo n.º 6

0

Exibir arquivo

    def simplify(self):
        p = copy.deepcopy(self.productions)
        uf = UnionFind(list(p.keys()))
        while True:
            changed = False
            subs = {}
            to_add = {}
            to_delete = set()
            for k1, r1 in p.items():
                for k2, r2 in p.items():
                    if k1 != k2 and k1 not in subs and k2 not in subs and r1 == r2:
                        subs[k2] = k1
                        to_add[k1] = r2
                        to_delete.add(k2)
                        uf.union(k1, k2)
                        changed = True

            p = {**p, **to_add}
            for k in to_delete:
                del p[k]

            if not changed:
                break
            else:
                p = {k2: r2.substitute(subs) for k2, r2 in p.items()}
        return Grammar(productions=p), {k: uf.component(k) for k in p.keys()}

Exemplo n.º 7

0

Exibir arquivo

Arquivo: solution.py Projeto: wushuzh/leetcode

def equations_possible_using_union_find(equations: List[str]) -> bool:
    uf = UnionFind()
    for leftvar, op, _, rightvar in equations:
        if op == '=':
            uf.union(leftvar, rightvar)
    return not any(op == '!' and uf.is_connect(l, r)
                   for l, op, _, r in equations)

Exemplo n.º 8

0

Exibir arquivo

Arquivo: lsh.py Projeto: LeosonH/lshhdc

 def __init__(self, width=10, threshold=0.5):
     self.width = width
     self.unionfind = UnionFind()
     self.signer = MinHashSignature(width)
     self.hasher = LSH(width, threshold)
     self.hashmaps = [defaultdict(list)
                      for _ in range(self.hasher.get_n_bands())]

Exemplo n.º 9

0

Exibir arquivo

Arquivo: contouring.py Projeto: xtracthub/temp-xtract-maps

def combine_boxes(boxes, img_dim, dist_threshold=15, padding=0):
    """Uses UnionFind to group close-by contours into boxes (disjoint
    connected components).

    Parameters:
    boxes (list(numpy array)): List of numpy arrays of coordinates of a
    box as returned by enclosing_box.
    img_dim (tuple(int)): Tuple of image dimensions.
    dis_threshold (int): Threshold of number of pixels to determine
    whether boxes are too close.
    padding (int): Number of pixels that are padding.

    Return:
    (numpy array): Numpy array of combined boxes.
    """
    n = len(boxes)
    uf = UnionFind(n)

    for i, j in combinations(range(n), 2):
        if closest_distance(boxes[i], boxes[j]) < dist_threshold:
            uf.union(i, j)

    box_groups = [[box for i in group for box in boxes[i]]
                  for group in uf.groups()]
    combined_boxes = [
        enclosing_box(group, img_dim, padding=padding) for group in box_groups
    ]
    filtered_boxes = [x for x in combined_boxes if fits_criteria_box(x)]
    return np.array(filtered_boxes)

Exemplo n.º 10

0

Exibir arquivo

def kruskal(edges):
    """Calculate the cost and edges of a spanning tree, given the edges of
       a graph. Complexity: O(|E| log |E|)
    """
    # Find all the nodes and create a heap of edges - O(|E|)
    all_nodes = set()
    hq = []
    for i, j, w in edges:
        all_nodes.add(i)
        all_nodes.add(j)
        heappush(hq, (w, i, j))

    # Initialize the forest - O(|V|)
    forest = UnionFind(all_nodes)

    # Initialize the tree's data structure - O(1)
    tree_edges = []
    cost = 0

    # Calculate the minimum spanning tree - O(|E| log |E|)
    while hq:
        cost_incr, n1, n2 = heappop(hq)
        if forest.is_same_set(n1, n2):
            continue
        cost += cost_incr
        forest.union(n1, n2)
        tree_edges.append((n1, n2))

    return (tree_edges, cost)

Exemplo n.º 11

0

Exibir arquivo

def flip_inside_O_by_UF(board: List[List[str]]) -> None:
    if not board:
        board = []
        return

    uf = UnionFind(board)
    dummyIdx = uf.r * uf.c

    for i in range(uf.r):
        for j in range(uf.c):
            if board[i][j] == 'O':
                ijIdx = i * uf.c + j
                # connect all boundary nodes to the ONE dummy node
                if i in (0, uf.r - 1) or j in (0, uf.c - 1):
                    uf.union(ijIdx, dummyIdx)
                # check its right neighbor is connected ?
                if j + 1 < uf.c and board[i][j + 1] == 'O':
                    uf.union(ijIdx, ijIdx + 1)
                # check its down neighbor is connected ?
                if i + 1 < uf.r and board[i + 1][j] == 'O':
                    uf.union(ijIdx, ijIdx + uf.c)

    for i in range(uf.r):
        for j in range(uf.c):
            if uf.find_root(i * uf.c + j) != uf.find_root(uf.dummyIdx):
                board[i][j] = 'X'

Exemplo n.º 12

0

Exibir arquivo

    def __init__(self, directed=False, weighted=False):

        self.directed = directed
        self.weighted = weighted
        self.tree = {}
        self.vertex_num = 0
        self.vertex = []
        self.components = UnionFind()

Exemplo n.º 13

0

Exibir arquivo

def define_shells_structure(me):
    union_find = UnionFind(len(me.vertices))
    [
        union_find.unify(v1, v2)
        for v1, v2 in [(edge.vertices) for edge in me.edges]
    ]

    return union_find

Exemplo n.º 14

0

Exibir arquivo

Arquivo: solution.py Projeto: wushuzh/leetcode

def find_circle_num_using_union_find(M: List[List[int]]) -> int:
    uf = UnionFind(len(M))
    for i in range(uf.num):
        # ignore upper-right triangle in matrix
        for j in range(0, i):
            if M[i][j] == 1:
                uf.union(i, j)

    return len([1 for i, v in enumerate(uf.parent) if i == v])

Exemplo n.º 15

0

Exibir arquivo

 def UnionFindCommunity(self, G):
     Nodes = G.nodes()
     uf = UnionFind(Nodes)
     for source, target in G.edges():
         uf.union(source, target)
     components = uf.components()
     score = []
     for nodes in components:
         score.append(nodes)
     self.addGNodesAttr(G, score, "Union find")

Exemplo n.º 16

0

Exibir arquivo

Arquivo: Trip_Chaining.py Projeto: MTI-Data/FHWA-EAR-OD-Project

def trip_roster_merged(trip_roster_file, colname_file, trip_chain,
                       park_pair_file, gas_pair_file):
    col_names = pd.read_csv(colname_file)
    trip_roster = pd.read_csv(trip_roster_file,
                              header=None,
                              names=col_names.columns)
    if trip_chain == False:
        matched_trip_pair = pd.read_csv(park_pair_file)
        matched_trip_pair.columns = [
            'TripId', 'StopId', 'EndTripId', 'StartTripId'
        ]
        matched_trip_pair = matched_trip_pair[[
            'EndTripId', 'StartTripId', 'StopId'
        ]].append(pd.read_csv(gas_pair_file))
        trip_pair_id = ['EndTripId', 'StopId', 'StartTripId']
        new_pair_df = matched_trip_pair
    else:
        matched_trip_pair = pd.read_csv(park_pair_file,
                                        usecols=['end', 'start'])
        matched_trip_pair.columns = ['EndTripId', 'StartTripId']
        matched_trip_pair = matched_trip_pair.append(
            pd.read_csv(gas_pair_file, usecols=['EndTripId', 'StartTripId']))
        trip_pair_id = ['EndTripId', 'StartTripId']
        # trip chaining
        start_time = time.time()
        uf = UnionFind(list(set(matched_trip_pair.values.flatten())))
        for index, row in matched_trip_pair.iterrows():
            uf.union(row['EndTripId'], row['StartTripId'])
        result = uf.components()
        print('Trip chaining takes %s secs for %s trip pairs.' %
              (time.time() - start_time, len(matched_trip_pair)))

        def set_first_last(input_set):
            tmp = list(input_set)
            return [tmp[0], tmp[-1]]

        new_pair = map(set_first_last, result)
        new_pair_df = pd.DataFrame(new_pair,
                                   columns=['EndTripId', 'StartTripId'])
    trip_unmatched = trip_roster.loc[~trip_roster['TripId'].isin(
        matched_trip_pair[['EndTripId', 'StartTripId']].values.flatten())]
    # create od file for matched trips/ trip chain
    trip_od = new_pair_df.merge(
        trip_roster[['TripId', 'StartLocLat',
                     'StartLocLon']].rename(columns={'TripId': 'EndTripId'}),
        how='left',
        sort=False)
    trip_od = trip_od.merge(
        trip_roster[['TripId', 'EndLocLat',
                     'EndLocLon']].rename(columns={'TripId': 'StartTripId'}),
        how='left',
        sort=False)
    trip_od['TripId'] = trip_od[trip_pair_id].apply(
        lambda row: '_'.join(row.tolist()), axis=1)
    return trip_unmatched, trip_od

Exemplo n.º 17

0

Exibir arquivo

Arquivo: clustering.py Projeto: doug-wade/AlgorithmsGreatestHits

def clustering(edge_list, count_nodes, clusters):
    u = UnionFind([x+1 for x in range(count_nodes)])
    count_edges = len(edge_list)
    i = 0
    while True:
        if not u.find(edge_list[i][1][0]) == u.find(edge_list[i][1][1]):
            if count_nodes <= clusters:
                return edge_list[i][0], u
            u.union(edge_list[i][1][0], edge_list[i][1][1])
            count_nodes -= 1
        i += 1

Exemplo n.º 18

0

Exibir arquivo

def rm_stones_using_union_find(stones: List[List[int]]) -> int:
    if not stones:
        return 0

    uf = UnionFind()
    for x, y in stones:
        for i, j in stones:
            if x == i or y == j:
                uf.union(10000 * x + y, 10000 * i + j)

    return len(uf.p) - len({uf.find_root(10000 * x + y) for x, y in stones})

Exemplo n.º 19

0

Exibir arquivo

Arquivo: kruskal.py Projeto: ninjaboynaru/my-python-demo

def kruskal(g, w):
    uf = UnionFind(g.keys())
    w = sorted(w, key=lambda x: w[x])
    edges = list()
    for edge in w:
        a, b = edge
        if not uf.connected(a, b):
            uf.union(a, b)
            edges.append(edge)
        if len(edges) == len(g) - 1:
            break
    return edges

Exemplo n.º 20

0

Exibir arquivo

 def _transitive_closure(self):
     self.uf = UnionFind(np.arange(self.n))
     for link in self.ml:
         self.uf.union(link[0], link[1])
     self.chunklets = np.array(
         [np.array(list(i)) for i in self.uf.components()])
     self.n_chunklets = self.chunklets.shape[0]
     self.chunklet_shapes = np.array([i.shape[0] for i in self.chunklets])
     self.chunklet_shapes = self.chunklet_shapes.reshape(-1, 1)
     self.chunklet_means = np.array(
         [np.mean(self.X[i], axis=0) for i in self.chunklets])
     assert self.chunklet_means.shape == (self.n_chunklets, self.d)

Exemplo n.º 21

0

Exibir arquivo

    def findCircleNum(self, M):
        """
        :type M: List[List[int]]
        :rtype: int
        """
        length =len(M)
        uf = UnionFind(length)
        for i in range(length):
            for j in range(i):
                if M[i][j] == 1:
                    uf.add(i,j)

        return uf.componentsNum()

Exemplo n.º 22

0

Exibir arquivo

    def mspEdges(self, heuristicFn):
        A = []
        N = len(self.nodes)
        uf = UnionFind(N)
        weights = self.generate_weights(heuristicFn)

        order = argsort(weights)
        for i in order:
            (u, v) = self.edges[i]
            if not uf.connected(u, v):
                A.append((u, v))
                uf.union(u, v)
        return A

Exemplo n.º 23

0

Exibir arquivo

Arquivo: graph2.py Projeto: tohyongcheng/Paper3D

  def mspEdges(self, heuristicFn):
    A = []
    N = len(self.nodes)
    uf = UnionFind(N)
    weights = self.generate_weights(heuristicFn)

    order = argsort(weights)
    for i in order:
      (u,v) = self.edges[i]
      if not uf.connected(u,v):
        A.append((u,v))
        uf.union(u,v)
    return A

Exemplo n.º 24

0

Exibir arquivo

Arquivo: solution.py Projeto: wushuzh/leetcode

def longest_consecutive_by_uf(nums: List[int]) -> int:
    if not nums:
        return 0

    uf = UnionFind(nums)
    valToIdx = {nums[i]: i for i in range(len(nums))}
    for v in valToIdx.keys():
        # When get value == 0, which is false,
        # so always check None instead of false,
        #   otherwise may cause union operations is missing
        if valToIdx.get(v + 1) is not None:
            uf.union(valToIdx[v], valToIdx[v + 1])
    return uf.largest_one_union()

Exemplo n.º 25

0

Exibir arquivo

Arquivo: clustering.py Projeto: Inevitable-Marzipan/algo_design

def max_space_clustering(edges, vertices, clusters):
    union_find = UnionFind()
    for vertex in vertices:
        union_find.add_vertex(vertex)
    edges = sorted(edges, key=(lambda x: x.dist))

    e = 0
    for edge in edges:
        rootu = union_find.path_compress_find(edge.u)
        rootv = union_find.path_compress_find(edge.v)

        if e == len(vertices) - clusters:
            if rootu != rootv:
                max_spacing = edge.dist
                break
            else:
                continue

        if rootu == rootv:
            continue
        else:
            union_find.union(edge.u, edge.v)
            e += 1

    root_parents = dict()
    for vertex in list(vertices):
        root_parents[vertex] = union_find.path_compress_find(vertex)

    return root_parents, max_spacing

Exemplo n.º 26

0

Exibir arquivo

Arquivo: superbase.py Projeto: sequba/text_mining_assignments

    def __init__(self, lemat_dict_file):
        self.lematizer = Lematizer(lemat_dict_file)
        self.superbase = UnionFind()

        lemats = self.lematizer.all_lemats()
        for l in lemats:
            self.superbase.make_set(l)

        for (_, lems) in self.lematizer.items():
            sofar = None
            for l in lems:
                if sofar:
                    self.superbase.union(sofar, l)
                sofar = self.superbase.find(l)

Exemplo n.º 27

0

Exibir arquivo

	def kruskalMST(self):
		self.generatePQ()
		uf = UnionFind()
		uf.WeightedQuickUnionUF(self.N)
		self.mst = [None] * self.N 

		index = 0

		while len(self.PQ) != 0:
			edge = heapq.heappop(self.PQ)
			if (uf.connected(edge.u, edge.v)):
				continue
			uf.unify(edge.v, edge.u)
			self.mstCost += edge.cost
			self.mst[index] = edge
			index += 1
			if uf.size1(0) == self.N:
				break

		mstExists = (uf.size1(0) == self.N)
		solved = True

		if solved:
			return self.mstCost
		else:
			return None

Exemplo n.º 28

0

Exibir arquivo

Arquivo: graph.py Projeto: amirfarhat/utilities

    def kruskal_mst(self):
        assert self.is_connected(), "Can only find MST of a connected graph"
        uf = UnionFind()
        mst = set()

        for v in self.vs:
            uf.make_set(v)

        half = set()
        for u, v in sorted(self.es):
            if (v, u) not in half:
                half.add((u, v))

        w = 0
        vs = set()
        for u, v in sorted(half, key = lambda e : self.weights[e]):
            if len(vs) == len(self.vs):
                return mst, w

            if uf.find_set(u) != uf.find_set(v):
                uf.union(u, v)
                mst.add((u, v))
                vs.add(u)
                vs.add(v)
                w += self.weights[(u, v)]

Exemplo n.º 29

0

Exibir arquivo

def accounts_merge_using_union_find(
        accounts: List[List[str]]) -> List[List[str]]:

    uf = UnionFind()
    email_to_name = dict()

    for account in accounts:
        name = account[0]
        for email in account[1:]:
            email_to_name[email] = name
            uf.union(email, account[1])

    return [[email_to_name[pmail]] + sorted(emails)
            for (pmail, emails) in uf.groups().items()]

Exemplo n.º 30

0

Exibir arquivo

Arquivo: gen_model.py Projeto: mquezada/news-model

def gen_model(dataset_name):
    event_data, missing_urls_amount = load_data(dataset_name)

    ##########

    _info(
        "create pairs (t, u) or (t, t') for each tweet t and url u or replied/retweeted tweet t'"
    )
    replies_amount = 0
    retweets_amount = 0
    quotes_amount = 0
    missing_replies_amount = 0
    pairs = []
    for tweet_id, tweet in event_data.items():
        [
            pairs.append((tweet_id, url))
            for url in tweet.expanded_urls.values() if url
        ]

        # retweets ARE considered, due to be exact text copies of the retweeted tweet
        if tweet.retweet_id != 'NULL':
            retweets_amount += 1
        if tweet.quote_id != 'NULL':
            quotes_amount += 1
        if tweet.reply_id != 'NULL':
            replies_amount += 1
            if tweet.reply_id in event_data:
                pairs.append((tweet_id, tweet.reply_id))
            else:
                missing_urls_amount += 1
    _info(
        f'total pairs: {len(pairs)}, retweets: {retweets_amount}, quotes: {quotes_amount}, replies: {replies_amount} '
        f'(missing: {missing_replies_amount})')

    ##########
    """
        all keys must be the same time (in this case, strings);
        unionfind will vectorize operations and will cast everything in the array to the same type,
        so if there are integers and strings, it will cast everything to string and comparisons will fail
        when calling uf.components().
    """

    _info('applying union-find')
    uf = UnionFind()
    for u, v in pairs:
        uf.union(u, v)
    _info(f'total components: {len(uf.components())}')

    return uf, event_data

Exemplo n.º 31

0

Exibir arquivo

Arquivo: hammond_distances.py Projeto: doug-wade/AlgorithmsGreatestHits

def hammond_distances(file_path):
    file_stream = open(file_path)
    line_one = file_stream.readline().split(' ')
    count_edges, count_bits = int(line_one[0]), int(line_one[1])
    uf = UnionFind([])
    for i in range(count_edges):
        code = file_stream.readline()
        code = code.replace(' ', '').replace('\n', '')
        uf.add(code)
        update_singles(uf, code, count_bits)
        update_doubles(uf, code, count_bits)
    file_stream.close()
    clusters = set()
    for k in uf._node_titles.keys():
        clusters.add(uf.find(k))
    return len(clusters)

Exemplo n.º 32

0

Exibir arquivo

Arquivo: lsh.py Projeto: svebk/dig-lsh-clustering

 def __init__(self, minHashLen=13, numRowsInBucket=2, threshold=None):
     self.unionfind = UnionFind()
     self.signer = MinHashSignature(minHashLen)
     self.hasher = LSH(minHashLen, numRowsInBucket, threshold)
     self.hashmaps = [defaultdict(list)
                      for _ in range(self.hasher.get_n_bands())]
     self.lshmap = {}

Exemplo n.º 33

0

Exibir arquivo

Arquivo: test_unionfind.py Projeto: bjorns/algo

    def test_add_node(self):
        u = UnionFind()
        foo = Node("foo")
        bar = Node("bar")
        baz = Node("baz")
        u.add(foo)
        u.add(bar)
        u.add(baz)
        self.assertEqual(3, len(u.leader))
        self.assertEqual(foo, u.leader[foo])
        self.assertEqual(bar, u.leader[bar])
        self.assertEqual(baz, u.leader[baz])

        self.assertEqual(3, len(u.followers))
        self.assertEqual(set(), u.followers[foo])
        self.assertEqual(set(), u.followers[bar])
        self.assertEqual(set(), u.followers[baz])

Exemplo n.º 34

0

Exibir arquivo

Arquivo: map.py Projeto: sassani/traveller-salesman-problem

    def __init__(self, cities_data, data_limit: int = 0, print: bool = False):
        self.cities = {}
        self._loops = []
        self._borders = []
        self.loops = UnionFind()
        limit = data_limit
        self._print = print
        df = pd.read_csv(cities_data)
        upper = len(df)
        if limit != 0:
            upper = limit
        df = df[0:upper]

        for row in df.iterrows():
            city = City(row[1].CityId, row[1].X, row[1].Y)
            self.cities[city.id] = city
        self._create_loops()

Exemplo n.º 35

0

Exibir arquivo

Arquivo: hamming_distance_clustering.py Projeto: nikipi/coursera_algorithms

def Clustering(nodes):

    UnionNodes = UnionFind(nodes.keys())


    for node in tqdm(nodes):

        for neighbour in getNeiborhood(node):

            if neighbour not in nodes:
                continue
            
            if not UnionNodes.connected(node, neighbour):
                UnionNodes.union(node, neighbour)

    
    return UnionNodes

Exemplo n.º 36

0

Exibir arquivo

Arquivo: test_unionfind.py Projeto: bjorns/algo

    def test_union(self):
        u = UnionFind()
        foo = Node("foo")
        u.add(foo)

        bar = Node("bar")
        u.add(bar)

        self.assertEqual(foo, u.find(foo))
        self.assertEqual(bar, u.find(bar))

        u.union(foo, bar)

        self.assertEqual(bar, u.find(foo))
        self.assertEqual(bar, u.find(bar))

Exemplo n.º 37

0

Exibir arquivo

Arquivo: hamming.py Projeto: bjorns/algo

def cluster(vertices, radix):
    V = frozenset(vertices)
    u = UnionFind()
    for v in V:
        u.add(v)
    print "Starting with {} clusters".format(u.clusters)
    i = 0.0
    for v in V:
        if i % 100 == 0:
            p = 100*( i / len(vertices))
            sys.stdout.write("\r%f%% (%d)" % (p, len(V)))
            sys.stdout.flush()
        potential = hamming_neighbours(v, radix=radix, dist=2)
        for p in potential:
            if p in V:
                neighbour = p
                u.union(v, neighbour)

        i += 1
    sys.stdout.write("\n")
    return u

Exemplo n.º 38

0

Exibir arquivo

Arquivo: phylo_tree.py Projeto: ebenmichael/genomics_project

 def mst(self,b_lengths, weights):
     """Kruskal's algorithm for minimum spanning tree 
     Input:
         b_lengths: dictionary with keys (node,node) and values branch length
         weights: dictionary maps (node,node) to negative log likelihood
     Output:
         adj_mat: adjacency matrix. dict (node,node) keys weight values
     """
     #go through all the nodes and get rid of parent/child relationships
     #so we can build the tree again
     #except keep leaf nodes and observed data
     for i in range(len(self.nodes)):
         if len(self.nodes[i].children) > 1:
             self.nodes[i].parent = None
             self.nodes[i].parent_weight = None
             self.nodes[i].children = []
         else:
             self.nodes[i].parent = None
             self.nodes[i].parent_weight = None                
         
     #unionfind object
     d_set = UnionFind()
     #sort the edges into non-decreasing order
     edges = [(edge,weight) for edge,weight in 
                 sorted(weights.items(),key = lambda x: x[1])]
     #keep track of new graph as an adjacency matrix
     adj_mat = {}
     for edge,weight in edges:
         u,v = edge
         if d_set[u] != d_set[v]:
             d_set.union(u,v)
             #update adjacency matrix
             adj_mat[(u,v)] = b_lengths[(u,v)]
             adj_mat[(v,u)] = b_lengths[(u,v)]
     
     return(adj_mat)

Exemplo n.º 39

0

Exibir arquivo

Arquivo: cluster.py Projeto: bjorns/algo

def cluster(graph, k):
    edges = heapify(graph.edges)

    u = UnionFind()
    [u.add(node) for node in graph.nodes.values()]

    while u.clusters > k:
        cost, edge = heappop(edges)
        if cycle(u, edge):
            #print "skipping {}".format(edge)
            pass
        else:
            u.union(u.find(edge.v0), u.find(edge.v1))

    mindist = get_mindist(u, edges)
    return mindist, u.followers

Exemplo n.º 40

0

Exibir arquivo

Arquivo: clustering.py Projeto: NicovincX2/Python-3.5

def max_k_clustering(gr, k):
    sorted_edges = sorted(gr.get_edge_weights())
    uf = UnionFind()
    # initialize each node as its cluster
    for n in gr.nodes():
        uf.insert(n)
    for (w, (u, v)) in sorted_edges:
        if uf.count_groups() <= k:
            return uf.get_sets()
        if uf.get_leader(u) != uf.get_leader(v):
            uf.make_union(uf.get_leader(u), uf.get_leader(v))

Exemplo n.º 41

0

Exibir arquivo

Arquivo: lsh.py Projeto: cjdd3b/lshhdc-names

 def __init__(self, width=10, threshold=0.5):
     self.width = width
     self.unionfind = UnionFind()
     self.signer = MinHashSignature(width)
     self.hasher = LSH(width, threshold)
     self.hashmap = {}

Exemplo n.º 42

0

Exibir arquivo

Arquivo: gamsout2color.py Projeto: CompBioUIC/CommDy

#					g = open(gtm_fname, 'r')
#					g.close()
#				except IOError:
#					gtm_fname = None
#				if gtm_fname is not None: break
#			if gtm_fname is None:
#				sys.stderr.write("Specify a gtm file\n")
#				exit(1)
#			sys.stderr.write("using " + gtm_fname + "\n")

gtm = GtmFile(gtm_fname)
group_count = len(gtm.groups)

# find connected components which have to be paths
# wanna check if they are paths?
uf = UnionFind()
for line in f.readlines():
	line = line.strip()
	if line.startswith("# y"): break
	if line=="" or line.startswith("#"): continue
	if line.find(',')>=0: line = line.split(',')
	elif line.find(' ')>=0: line = line.split(' ')
	else: raise Exception("Invalid line: "+line)
	if len(line)>=2:
		u,v = int(line[0]), int(line[1])
	else:
		raise Exception("ERROR line: %s"%line)
	uf.union(u,v)

# make lists of groups (no dummies) in each component
vertices = range(1, group_count+1)

Exemplo n.º 43

0

Exibir arquivo

Arquivo: unionfind_test.py Projeto: NicovincX2/Python-3.5

 def setUp(self):
     self.uf = UnionFind()
     self.uf.insert("a", "b")
     self.uf.insert("b", "c")
     self.uf.insert("i", "j")

Exemplo n.º 44

0

Exibir arquivo

Arquivo: unionfind_test.py Projeto: NicovincX2/Python-3.5

class test_unionfind(unittest.TestCase):

    def setUp(self):
        self.uf = UnionFind()
        self.uf.insert("a", "b")
        self.uf.insert("b", "c")
        self.uf.insert("i", "j")

    def test_get_parent_method(self):
        self.assertEqual("a", self.uf.get_leader("a"))
        self.assertEqual("a", self.uf.get_leader("b"))
        self.assertEqual("a", self.uf.get_leader("c"))
        self.assertEqual("i", self.uf.get_leader("j"))
        self.assertEqual("i", self.uf.get_leader("i"))
        self.assertNotEqual(self.uf.get_leader("a"), self.uf.get_leader("i"))

    def test_insert_method(self):
        self.uf.insert("c", "d")
        self.assertEqual(self.uf.get_leader("c"), self.uf.get_leader("d"))
        self.assertEqual(self.uf.get_leader("a"), self.uf.get_leader("d"))

    def test_insert_one_node(self):
        self.uf.insert('z')
        self.assertEqual(self.uf.get_leader('z'), 'z')
        self.assertEqual(self.uf.count_groups(), 3)

    def test_make_union_method(self):
        self.uf.make_union(self.uf.get_leader("a"), self.uf.get_leader("i"))
        self.assertEqual(self.uf.get_leader("a"), self.uf.get_leader("i"))

    def test_make_union_with_invalid_leader_raises_exception(self):
        self.assertRaises(Exception, self.uf.make_union, "a", "z")

    def test_get_count(self):
        self.uf.insert("z", "y")
        self.assertEqual(self.uf.count_groups(), 3)

Exemplo n.º 45

0

Exibir arquivo

Arquivo: utilities.py Projeto: tohyongcheng/Paper3D

def makeUnionFind(_set,N):
    uf = UnionFind(N)
    for i,j in _set:
        uf.union(i,j)
    return uf

Exemplo n.º 46

0

Exibir arquivo

Arquivo: lsh.py Projeto: svebk/dig-lsh-clustering

class Cluster(object):
    """Clusters sets with Jaccard similarity above threshold with high
    probability.

    Algorithm based on Rajaraman, "Mining of Massive Datasets":
    1. Generate set signature
    2. Use LSH to map similar signatures to same buckets
    3. Use UnionFind to merge buckets containing same values
    """
    def __init__(self, minHashLen=13, numRowsInBucket=2, threshold=None):
        self.unionfind = UnionFind()
        self.signer = MinHashSignature(minHashLen)
        self.hasher = LSH(minHashLen, numRowsInBucket, threshold)
        self.hashmaps = [defaultdict(list)
                         for _ in range(self.hasher.get_n_bands())]
        self.lshmap = {}

    def add_set(self, s, label=None):
        # A label for this set
        if not label:
            label = s

        # Add to unionfind structure
        self.unionfind[label]

        # Get signature
        sig = self.signer.sign(s)

        # Union labels with same LSH key in same band
        lshKeys = self.hasher.hash(sig)
        self.lshmap[label] = []

        for band_idx, hshval in enumerate(lshKeys):
            #print "Got band_idx, hashval: " + str(band_idx) + "," + str(hshval)
            self.hashmaps[band_idx][hshval].append(label)
            self.unionfind.union(label, self.hashmaps[band_idx][hshval][0])
            self.lshmap[label].append(hshval)

    def get_clusters(self, min_cluster_len):
        for band_idx in range(0,len(self.hashmaps)):
            #print "clusters>Got band_idx: " + str(band_idx)
            hashmap = self.hashmaps[band_idx]
            for key in hashmap:
                list = hashmap[key]
                if(len(list) > min_cluster_len):
                    yield list

    def get_clusters_with_hashes(self, min_cluster_len):
        for band_idx in range(0,len(self.hashmaps)):
            hashmap = self.hashmaps[band_idx]
            for key in hashmap:
                list = hashmap[key]
                if(len(list) > min_cluster_len):
                    list2 = []
                    for label in list:
                        if self.lshmap[label]:
                            list2.append((label, self.lshmap[label]))
                        else:
                            list2.append(label)
                    yield list2

    def get_cluster_unions(self, min_cluster_len):
        x = self.unionfind.sets()
        for set in x:
            if len(set) > min_cluster_len:
                yield set

    def get_min_hash(self, object):
        return list(self.signer.sign(object))

    def get_lsh_hash(self, object):
        sig = self.signer.sign(object)
        return list(self.hasher.hash(sig))

Exemplo n.º 47

0

Exibir arquivo

Arquivo: Color2.py Projeto: CompBioUIC/CommDy

    def recolor_by_connected_components(self):
        from unionfind import UnionFind
        uf = UnionFind()

        for t in self.gtm.times:
            for g in self.gtm.time[t]:
                uf.find(g)
            for i in self.gtm.inds:
                uf.find((i,t))

            for g in self.gtm.time[t]:
                for i in self.gtm.group[g]:
                    if self.group_color[g-1]==self.ind_color[i-1][t-1]:
                        uf.union(g, (i,t))
                        leader = uf.find(g)
            if t>1:
                for i in self.gtm.inds:
                    if self.ind_color[i-1][t-1]==self.ind_color[i-1][t-2]:
                        uf.union((i,t-1), (i,t))
                        leader = uf.find((i,t-1))

        new_color = {}
        for t in self.gtm.times:
            for g in self.gtm.time[t]:
                leader = uf.find(g)
                if leader not in new_color:
                    new_color[leader] = len(new_color)+1
            for i in self.gtm.inds:
                leader = uf.find((i,t))
                if leader not in new_color:
                    new_color[leader] = len(new_color)+1

        for g in self.gtm.groups:
            self.group_color[g-1] = new_color[uf.find(g)]
        for i in self.gtm.inds:
            for t in self.gtm.times:
                self.ind_color[i-1][t-1] = new_color[uf.find((i,t))]

Exemplo n.º 48

0

Exibir arquivo

Arquivo: test_unionfind.py Projeto: bjorns/algo

 def test_find(self):
     u = UnionFind()
     foo = Node("foo")
     u.add(foo)
     self.assertEqual(foo, u.find(foo))

Exemplo n.º 49

0

Exibir arquivo

Arquivo: gen_maze.py Projeto: DavideCanton/pyIA

def maze(w, h, size=2):
    def conv_size(n):
        return (n - 1) // size + 1

    nw, nh = conv_size(w), conv_size(h)
    ns = size // 2 - 1
    uf = UnionFind(nw * nh)
    lab = Labyrinth(w, h)

    for x in range(w):
        for y in range(h):
            lab[x, y] = 0

    edges = []
    for i in range(nh - 1):
        for j in range(nw - 1):
            f = flatten(i, j, nw, nh)
            edges.append((f, f + 1))  # right
            edges.append((f, f + nw))  # down

    for i in range(nh - 1):
        f = flatten(i, nw - 1, nw, nh)
        edges.append((f, f + nw))  # down

    for j in range(nw - 1):
        f = flatten(nh - 1, j, nw, nh)
        edges.append((f, f + 1))  # right

    shuffle(edges)

    while len(uf) > 1:
        u, v = edges.pop()
        y1, x1 = unflatten(u, nw, nh)
        y2, x2 = unflatten(v, nw, nh)
        if uf.find(u) != uf.find(v):
            uf.union(u, v)
            if x2 - x1 == 1:
                for i in range(size + 1):
                    for j in range(1, ns + 1):
                        ny = size * y1 - j
                        if ny >= 0:
                            lab[size * x1 + i, ny] = True
                        else:
                            break
                    lab[size * x1 + i, size * y1] = True
                    for j in range(1, ns + 1):
                        ny = size * y1 + j
                        if ny < h:
                            lab[size * x1 + i, ny] = True
                        else:
                            break
            else:
                for i in range(size + 1):
                    for j in range(1, ns + 1):
                        nx = size * x1 - j
                        if nx >= 0:
                            lab[nx, size * y1 + i] = True
                        else:
                            break
                    lab[size * x1, size * y1 + i] = True
                    for j in range(1, ns + 1):
                        nx = size * x1 + j
                        if nx < w:
                            lab[nx, size * y1 + i] = True
                        else:
                            break

    lab[0, 0] = 1
    lab.start = 0, 0
    lab[lab.w - 2, lab.h - 2] = 1
    lab.goal = lab.w - 2, lab.h - 2

    return lab

Exemplo n.º 50

0

Exibir arquivo

Arquivo: edges.py Projeto: ad1cted/UTEM

class Edge(object):
    def __init__(self, node1, node2, cost=0, marked=None):
        self.node1 = node1
        self.node2 = node2
        self.cost = cost
        self.marked = marked

    def __cmp__(self, y):
        return self.cost - y.cost
    
    def __repr__(self):
        return '<Edge(%s, %s), cost:%s>' % (self.node1, self.node2, self.cost)

f = open('./edges.txt', 'r')
n_nodes, n2 = f.readline().strip().split()
edges = []
for l in f:
    a, b, c = l.split()
    edges.append(Edge(str(a), str(b), cost=int(c)))
edges = sorted(edges, key=lambda x: x.cost)
U = UnionFind()
T = []
for e in edges:
    if U[e.node1] != U[e.node2]:
        T.append(e)
        U.union(e.node1, e.node2)
print sum(e.cost for e in T)