class graph(object): """graph object, on which the MST is calculated""" def __init__(self, vertices, edges): self.vertices = vertices # self.edges = sorted(edges, key=lambda edge: edge[2]) def kruskal(self): # TODO what are these counters? edge_i, edge_n = (0, 0) self.ds = DisjointSet() self.mst = [] while edge_n < len(self.vertices) - 1: vertex_1, vertex_2, weight = self.edges[edge_i] edge_i += 1 cluster_1 = self.ds.find(vertex_1) cluster_2 = self.ds.find(vertex_2) if cluster_1 != cluster_2: self.ds.union(cluster_1, cluster_2) self.mst.append([vertex_1, vertex_2, weight]) edge_n += 1 return self.mst
def _setup_lemma_merges(self): self.lemma_merge_ds = DisjointSet() for word, lemma, _ in self._iterate_words(): self.lemma_merge_ds.union(word, lemma) # Group words that share the same lemma lemma_counter = Counter() for _, lemma, _ in self._iterate_words(): lemma_counter[lemma] += 1 lemma_groups = defaultdict(set) for word, lemma, _ in self._iterate_words(): lemma_groups[self.lemma_merge_ds.find(word)].add(word) # Name of the group is the most frequent lemma in the group # Eg: [voyage, voyages, voyagerai, ...] should map to the same lemma self.merged_lemma_table = {} for word, lemma, _ in self._iterate_words(): if word in self.merged_lemma_table: continue maxn, maxw = 0, None for w in lemma_groups[self.lemma_merge_ds.find(word)]: if lemma_counter[w] > maxn: maxn = lemma_counter[w] maxw = w self.merged_lemma_table[word] = maxw
def get_correlated_columns(corr_mat, thresh=0.8): corr_abs = corr_mat.abs() corr_vals = corr_abs.values col_names = corr_mat.columns # get correlated pairs corr_pairs = [] for i in range(len(corr_vals) - 1): for j in range(i + 1, len(corr_vals)): if corr_vals[i][j] >= thresh: corr_pairs.append((i, j)) # get the DisjointSet of the correlated columns ds = DisjointSet() for x, y in corr_pairs: ds.union(x, y) # for each disjoint set, get the first column as a representative # should replicate the behavior of drop_duplicate(keep="first") in a DataFrame keep_cols_idx = [] remove_cols_idx = [] for dis_set in ds.itersets(): min_col_index = min(dis_set) dis_set.remove(min_col_index) keep_cols_idx.append(min_col_index) remove_cols_idx.extend(list(dis_set)) keep_cols = col_names[keep_cols_idx] remove_cols = col_names[remove_cols_idx] return keep_cols, remove_cols
def mst_sekihara_method(xs, ys, zs, radii, alpha, **keywords): n = len(xs) links = [] edge = [] radi_max = 0 for i in xrange(n): radi_max = max(radi_max, radii[i]) for i in xrange(n): for j in xrange(n): if i == j: continue if radii[i] > 1.3 * radii[j]: continue src = [xs[i], ys[i], zs[i]] dst = [xs[j], ys[j], zs[j]] center = [xs[0], ys[0], zs[0]] edge.append([i, j, (2 - radii[i] / radi_max) ** 2 * sekihara_method(src, dst, center, 0, alpha)]) edge.sort(key=lambda x:x[2]) UF = DisjointSet(n) for e in edge: u, v = e[0], e[1] if UF.same(u, v): continue links.append([u, v]) UF.merge(u, v) return links, 0
def __init__(self, angled_camera, ceil_camera, queue, total_pigs, pen_name): multiproc.context.Process.__init__(self) assert isinstance(angled_camera, Camera) and isinstance( ceil_camera, Camera) self.ceil_camera = ceil_camera self.angled_camera = angled_camera self.local_buffer = {'ceil': {}, 'angled': {}} self.warp_dict = {} with open("data/homography/matrices.pickle", "rb") as f: self.H = pickle.load(f) self.warp_dict['penc'] = pickle.load(f) self.warp_dict['ceil'] = pickle.load(f) self.queue = queue self.union_find = DisjointSet() self.total_pigs = total_pigs self.pigs_seen = 0 self.pen_name = pen_name if self.pen_name == "C": self.ceil_offset = 30 else: self.ceil_offset = 0
def kruskal_minimum_spanning_tree(graph: AdjacentListGraph) -> AdjacentListGraph: disjoint_set = DisjointSet(graph.vertices) mst_graph = AdjacentListGraph(graph.vertices) for v_from, v_to, w in sorted(graph.edges, key=lambda x: x[2]): if not disjoint_set.is_connected(v_from, v_to): mst_graph.add_edge(v_from, v_to, w) disjoint_set.connect(v_from, v_to) return mst_graph
def build_gene_groups(self): """Builds gene groups based on currently stored gene-gene links.""" ds = DisjointSet() for link in self._links.values(): ds.union(link.query.uid, link.target.uid) for genes in ds.itersets(): group = Group(label=f"Group {len(self.groups)}", genes=list(genes)) self.groups.append(group)
def kruskal_min_upper_bound(lower_bound, edges, n): sets = DisjointSet(n) for w, u, v in edges: if w < lower_bound: continue sets.merge(u, v) if sets.find(0) == sets.find(n - 1): return w return float('inf')
def create_epistemic(self): self.disjoint_set = DisjointSet(len(self.places)) #self.disjoint_set.union(0, 1) #self.disjoint_set.union(1, 3) self.disjoint_set.union(3, 2) self.disjoint_set.union(4, 8) self.disjoint_set.union(7, 9) self.disjoint_set.union(11, 10) self.disjoint_set.union(10, 5)
def __init__(self, boardsize): self.boardsize = boardsize self.board = np.zeros(boardsize**2) self.start_player = 1 self.legal_moves = [ -1, 1, -boardsize, boardsize, -(boardsize - 1), boardsize - 1 ] self.executedMoves = [] self.disjoint_set1 = DisjointSet() self.disjoint_set2 = DisjointSet()
def kruskals(graph): """Returns MST edges. If a graph is disconnected it will still process all the edges, but exception will be raised, instead of returning MST forest edges. """ disjoint_set = DisjointSet() # Extract edges from an adjacency list representation of a graph. edges = [] # (u, v, dist) for u in graph: for v, dist in graph[u].items(): edges.append((u, v, dist)) disjoint_set.make_set(v) disjoint_set.make_set(u) # Sort edges by a distance in ascending order. edges.sort(key=lambda x: x[2]) mst = [] for edge in edges: if disjoint_set.is_connected(edge[0], edge[1]): continue disjoint_set.union(edge[0], edge[1]) mst.append(edge) # MST should have |V|-1 edges. if len(mst) == len(disjoint_set) - 1: return mst # Mst becomes minimum spanning forest here raise Exception("The graph is disconnected.")
def kruskal(edges, n): sorted_edges = sorted([(w, u, v) for u, v, w in edges]) sets = DisjointSet(n) selected = [] total = 0 for w, u, v in sorted_edges: if sets.find(u) == sets.find(v): continue sets.merge(u, v) selected.append((u, v, w)) total += w return selected, total
def construct_one_vs_one_cost(**keywords): xs, ys, zs = keywords["xs"], keywords["ys"], keywords["zs"] radii, param = keywords["radii"], keywords["param"] sekihara_cos = keywords["sekihara_cos"] radius_ratio = keywords["radius_ratio"] common.assert_same_size(xs=xs, ys=ys, zs=zs) links = [] n = len(xs) UF = DisjointSet(n) max_d = compute_max_distance(xs, ys, zs) if sekihara_cos: cost_func = lambda cos_theta, abs_dst : param * (1.0 - cos_theta) + abs_dst / max_d else: cost_func = lambda cos_theta, abs_dst : math.acos(cos_theta) + param * abs_dst / max_d # 中心に遠い点から順番に処理する order_by_dist = [] for i in xrange(1, n): order_by_dist.append([i, (xs[i]-xs[0])**2 + (ys[i]-ys[0])**2 + (zs[i] - zs[0])**2]) order_by_dist.sort(key=lambda x:x[1], reverse=True) center = [xs[0], ys[0], zs[0]] for t in xrange(len(order_by_dist)): i = order_by_dist[t][0] cost = float('inf') next_index = -1 src = [xs[i], ys[i], zs[i]] for j in xrange(0, len(xs)): if i == j: continue if j != 0 and not (radii[j] / radii[i] >= radius_ratio): continue # 閉路をつくらないようにする if UF.same(i, j): continue dst = [xs[j], ys[j], zs[j]] c = sekihara_method(src, dst, center, cost_func) if cost > c: cost = c next_index = j if next_index != -1: links.append((i, next_index)) UF.merge(i, next_index) return links, 0
def Kruskals(self): """Kruskal's Algorithm""" Dset = DisjointSet(self._numVertices) # Generate numbers that will act as a wall between two cells in a row rows = set() pre = .5 for i in range(self._columns): for j in range(self._rows - 1): rows.add(pre) pre += 1 pre += 1 # Generate numbers that will act as a wall between two cells in a column columns = set() offset = self._rows / 2 pre = offset for i in range(self._rows): for j in range(self._columns - 1): columns.add(pre) pre += 1 while Dset.nsets != 1: if random() < 0.5: """Pick a random row""" random_row_edge = sample(rows, 1)[0] rows.remove(random_row_edge) left_cell = int(random_row_edge - .5) right_cell = int(random_row_edge + .5) # If the left and right cell are not part of the same set merge them if Dset.find(left_cell) != Dset.find(right_cell): # print("Joining two rows: ", left_cell, right_cell) Dset.merge(left_cell, right_cell) self.add_edge((left_cell, right_cell)) self.genTile(left_cell) self.genTile(right_cell) else: """Pick a random column""" random_column_edge = sample(columns, 1)[0] columns.remove(random_column_edge) left_cell = int(random_column_edge - offset) right_cell = int(random_column_edge + offset) # If the top and bottom cell are not part of the same set merge them if Dset.find(left_cell) != Dset.find(right_cell): # print("Joining two columns: ", left_cell, right_cell) Dset.merge(left_cell, right_cell) self.add_edge((left_cell, right_cell)) self.genTile(left_cell) self.genTile(right_cell)
def k_clusters(items, k): """Groups items in k clusters. An item should be a collection of 2 points or points with distance between them. :param items: sorted collection of (node1, node2, distance) objects. :param k: int, number of clusters we want to have returned. :return: {cluster1: [node1, node2], cluster2: [node3] ...} """ union_find = DisjointSet() # Initial setting of all points, where each node belongs to its own cluster. for item in items: union_find.make_set(item[0]) union_find.make_set(item[1]) # After every union operation the clusters counter is decreased by 1. counter = len(union_find) for item in items: node1, node2 = item[:2] if union_find.is_connected(node1, node2): continue if counter == k: return union_find.get_clusters() union_find.union(node1, node2) counter -= 1
def kruskal(graph: DisjointSet, edges): ''' Keep edges as a list with (source: disjoint_set node, destination: disjoint_set node, weight) as items and graph is the DisjointSet ''' s_edges = sorted(edges, key=itemgetter(2)) tree = list() for u, v, w in s_edges: if graph.find_set(u) is not graph.find_set(v): tree.append((u, v, w)) graph.union(u, v) return tree
def clustering(agraph, k): """ Max-Spacing k clustering Return maximum spacing of a k-clustering and corresponding mst. """ # minimum spanning tree mst = [] # disjoint set disjoint_set = DisjointSet() # make set for vertex in agraph.Vertices(): disjoint_set.make_set(vertex) # edges of the graph edges = agraph.edges() edges.sort(key=lambda tup: tup[2]) for u, v, cost in edges: if len(disjoint_set) >= k: if disjoint_set.find_set(u) != disjoint_set.find_set(v): mst.append((u, v, cost)) max_cost = cost disjoint_set.union(u, v) else: break return max_cost, mst
def view_similar_image_groups(similar_groups: DisjointSet): cmd = 'call "{}"' if os.name == 'nt' else '"{}"' folder = SIMILAR_IMAGE_FOLDER if not os.path.isdir(folder): os.mkdir(folder) try: for g in list(similar_groups.itersets()): real_files = [] for f in g: if os.path.isfile(f): real_files.append(f) if len(real_files) < 2: continue for f in real_files: shutil.move(f, folder) os.system(cmd.format(os.path.join(folder, real_files[0]))) for f in real_files: try: shutil.move(os.path.join(folder, f), '.') except (FileNotFoundError, shutil.Error): pass except KeyboardInterrupt: with fstk.ctx_pushd(folder): for f in os.listdir(): shutil.move(f, '..') sys.exit(2) finally: os.removedirs(folder)
def view_similar_images_auto(thresholds: list = None, hashtype: str = None, hashsize: int = None, trans: bool = True, stat: bool = True, dryrun: bool = False, **kwargs): thresholds = thresholds or [0.95, 0.8, 0.65] thresholds.sort(reverse=True) hashtype = hashtype or DEFAULT_IMAGE_HASHTYPE hashsize = hashsize or DEFAULT_IMAGE_HASHSIZE common_kwargs = { 'hashtype': hashtype, 'hashsize': hashsize, 'trans': trans, 'stat': stat } db = hash_all_image_files(hash_db=read_imagehash_file(), **common_kwargs) write_imagehash_file(db) similar_pairs_ll = pair_similar_images(db, min(thresholds), **common_kwargs) hd_l = [ist2hd(th, hashsize=hashsize) for th in thresholds] hd_l.sort() last_hd = 0 gs = DisjointSet() for hd in hd_l: sp_ll = similar_pairs_ll[last_hd:hd + 1] last_hd = hd + 1 if stat: print('hamming distance:', hd) gs = group_similar_images(sp_ll, groups_ds=gs, **common_kwargs) if not dryrun: view_similar_image_groups(gs)
def Ellers(self): Dset = DisjointSet(self._numTiles[0]) for i in range(self._numTiles[1]): self.genTile(i * self._numTiles[0]) for j in range(1, self._numTiles[0]): if Dset.find(j) == Dset.find(j - 1): self.genTile(self.toIndex((j, i))) continue idx = self.toIndex((j, i)) shouldMerge = bool(randint(int(i == self._numTiles[1] - 1), 1)) if shouldMerge: Dset.merge(j - 1, j) self.add_edge((idx - 1, idx)) self.genTile(idx) if i != self._numTiles[1] - 1: remainders = [i for i in range(self._numTiles[0])] for idx, s in enumerate(Dset.Sets): if s == None: continue s = s.copy() numDownward = randint(1, len(s)) for k in range(numDownward): c = randint(0, len(s) - 1) cid = s[c] c1 = self.toIndex((s[c], i)) c2 = self.toIndex((s[c], i + 1)) self.add_edge((c1, c2)) self.genTile(c2) s.pop(c) remainders.remove(cid) #recreate the disjoint set with the correct set/cell locations for r in remainders: n = next((i for i, v in enumerate(Dset.Sets) if v == None)) Dset.Cells[r] = n Dset.Sets[n] = [n] Dset.Sets = [None] * self._numTiles[0] for i, v in enumerate(Dset.Cells): if Dset.Sets[v] == None: Dset.Sets[v] = [i] if i not in Dset.Sets[v]: Dset.Sets[v].append(i)
def kruskal(self): # TODO what are these counters? edge_i, edge_n = (0, 0) self.ds = DisjointSet() self.mst = [] while edge_n < len(self.vertices) - 1: vertex_1, vertex_2, weight = self.edges[edge_i] edge_i += 1 cluster_1 = self.ds.find(vertex_1) cluster_2 = self.ds.find(vertex_2) if cluster_1 != cluster_2: self.ds.union(cluster_1, cluster_2) self.mst.append([vertex_1, vertex_2, weight]) edge_n += 1 return self.mst
def _dbscan(points, eps, min_pts): N = len(points) labels = label(points, eps=eps, min_pts=min_pts) # The indices of CORE points. cores = np.arange(N)[labels == CORE] # Rather than adding edges as the algorithm in the book does, # I'll utilize a disjoint set to maintain information about the group each point belongs to. clusters = DisjointSet() # Assign cores in the vicinity of eachother to the same group. for a, i in enumerate(cores): for j in cores[a + 1:]: if norm(points[i], points[j]) <= eps: clusters.union(i, j) # For each border point, we'll simply assign it to the cluster # of the first CORE point we stumble upon in it's vicinity. for i in np.arange(N)[labels == BORDER]: for j in cores: if norm(points[i], points[j]) <= eps: clusters.union(i, j) break # Now, we have assigned every CORE and BORDER to a group. Now, we'll transform the disjoint set # into a list of lists, to make it easier for us to gauge what's in what. results = {} for i in np.arange(N)[labels != NOISE]: results.setdefault(clusters.find(i), []).append(i) # A tuple (groups, noise) where each of these is a list containing indices. return list(results.values()), np.arange(N)[labels == NOISE]
def __init__(self, offset_lst, dst_v_lst, deg_lst, eps, min_pts): # parameters self.eps = eps self.min_pts = min_pts self.n = len(deg_lst) # offset and vertex properties self.offset_lst = offset_lst self.inc_degree_lst = map(lambda degree_val: degree_val + 1, deg_lst) self.similar_degree_lst = [0] * len(self.inc_degree_lst) # dst_v and edeg properties self.dst_v_lst = dst_v_lst self.min_cn_lst = [PScan.not_sure] * len(self.dst_v_lst) self.src_lst = [0] * len(dst_v_lst) for u in range(0, len(offset_lst) - 1): for i in range(offset_lst[u], offset_lst[u + 1]): self.src_lst[i] = u self.el_lst = list(zip(self.src_lst, self.dst_v_lst)) # disjoint set self.disjoint_set = DisjointSet(self.n) # non-core clustering self.cluster_dict = [self.n] * self.n self.non_core_cluster = [] # 1. statistics for prune self.prune0 = 0 # definitely not reachable self.prune1 = 0 # definitely reachable # 2.1 statistics for check core 1st bsp: set intersection with early stop self.intersect = 0 self.cmp0 = 0 self.cmp1 = 0 self.cmp_equ = 0 # 2.2 statistics for check core 2nd bsp: binary search self.binary_search_call = 0 # 3 statistics for disjoint set self.result_lines = []
def select(data, epsilon, measurement_log, cliques=[]): engine = FactoredInference(data.domain, iters=1000) est = engine.estimate(measurement_log) weights = {} candidates = list(itertools.combinations(data.domain.attrs, 2)) for a, b in candidates: xhat = est.project([a, b]).datavector() x = data.project([a, b]).datavector() weights[a, b] = np.linalg.norm(x - xhat, 1) T = nx.Graph() T.add_nodes_from(data.domain.attrs) ds = DisjointSet() for e in cliques: T.add_edge(*e) ds.union(*e) r = len(list(nx.connected_components(T))) for i in range(r - 1): candidates = [e for e in candidates if not ds.connected(*e)] wgts = np.array([weights[e] for e in candidates]) idx = permute_and_flip(wgts, epsilon / (r - 1), sensitivity=1.0) e = candidates[idx] T.add_edge(*e) ds.union(*e) return list(T.edges)
def MST_kruskal(self): #최종적으로 만들어질 MST mst = Graph() mst.add_vertex(self.vertex_num) #분리집합 : 사이클 형성 검사를 할 정점 집합 ds = DisjointSet(self.vertex_num) #가중치에 따라 에지를 정렬 self.edge_list.sort(key=lambda e: e.weight) #mst에 속하는 에지의 수 mst_edge_num = 0 #정렬된 에지 리스트에서 인덱스 edge_idx = 0 #|TE| = |TV|-1이면 종료 while mst_edge_num < self.vertex_num - 1: #가중치가 작은 순서대로 에지를 가져온다 edge = self.edge_list[edge_idx] #FIND(u) != FIND(v)이면 사이클을 형성하지 않는다 if ds.collapsing_find(edge.v1) != ds.collapsing_find(edge.v2): #TE=TE U {(u, v)} mst.insert_edge(edge.v1, edge.v2, edge.weight) #UNION(u, v) ds.weighted_union(ds.collapsing_find(edge.v1), ds.collapsing_find(edge.v2)) mst_edge_num += 1 edge_idx += 1 return mst
def disjoint_set(items, join_checker): """ 按照一定的相连规则分组 :param items: 项目清单 :param join_checker: 检查任意两个对象是否相连,进行分组 :return: 算法:因为会转成下标,按照下标进行分组合并,所以支持items里有重复值,或者unhashable对象 >>> disjoint_set([-1, -2, 2, 0, 0, 1], lambda x, y: x*y>0) [[-1, -2], [2, 1], [0], [0]] """ # 1 添加元素 ds = DisjointSet() items = tuple(items) n = len(items) for i in range(n): ds.find(i) # 2 连接、分组 for i, j in combinations(range(n), 2): if join_checker(items[i], items[j]): ds.union(i, j) # 3 返回分组信息 res = [] for group in ds.itersets(): group_elements = [items[g] for g in group] res.append(group_elements) return res
def __init__(self, graph): pq = PriorityQueue() for edge in graph.get_edges(): pq.put(edge) V = graph.get_num_node() disjoint_set = DisjointSet(V) minimum_spaning_tree = [] while not pq.empty() or len(minimum_spaning_tree) < V - 1: edge = pq.get() v = edge.either() w = edge.other(v) if not disjoint_set.isConnected(v, w): disjoint_set.union(v, w) minimum_spaning_tree.append(edge) self.mst = minimum_spaning_tree
def init_epistemic_relation(self): self.imperfect_information = ArrayTools.create_array_of_size( self.number_of_agents, []) self.epistemic_class_disjoint = [ DisjointSet(self.number_of_states) for _ in itertools.repeat(None, self.number_of_agents) ] self.can_go_there = ArrayTools.create_array_of_size( self.number_of_agents, [])
def __init__(self,length,width,walls=0.40): self.__length = length self.__width = width self.__exits = [] self.__map = [] self.__buf_map = [] self.__gen_initial_map(walls) self.__ds = DisjointSet() self.__cpt = (int(self.__length/2),int(self.__width/2))
def kruskal(graph): ds = DisjointSet() A = set() for v in graph.V: ds.find(v) for u, v, w in sorted(graph.edges, key=lambda x: x[2]): if ds.find(u) != ds.find(v): A = A | {(u, v)} ds.union(u, v) return A
def populate_kruskal(self): cells = [] edges = [] for i in range(self.h): for j in range(self.w): cells.append((i, j)) if i + 1 < self.h: edges.append(((i, j), (i + 1, j))) if j + 1 < self.w: edges.append(((i, j), (i, j + 1))) ds = DisjointSet(cells) random.shuffle(edges) for edge in edges: (a, b), (c, d) = edge if ds.union((a, b), (c, d)): nb = (c-a, d-b) nd = self.OFF_DIR[nb] self.set_dir(a, b, nd) self.set_dir(c, d, self.OPOSITE[nd])
def group_similar_images( similar_pairs_ll: list, groups_ds: DisjointSet = None, stat: bool = True, **kwargs ) -> DisjointSet: if not groups_ds: groups_ds = DisjointSet() pairs_l = [pair for pairs in similar_pairs_ll for pair in pairs] round_cnt, total_cnt = 0, len(pairs_l) for pair in pairs_l: groups_ds.union(*pair) if stat: round_cnt += 1 print('group:', percentage(round_cnt / total_cnt), total_cnt, round_cnt, len(list(groups_ds.itersets())), end='\r') if stat: print() return groups_ds
def print_opcode_groups(similarities): print("\nOpcode grouping based on the computed cosine similarity values:") ds = DisjointSet() for i, j in similarities: ds.find(i) ds.find(j) if similarities[i, j] > 0.85: ds.union(i, j) print(list(ds.itersets()))
def kruskal(args): vertex_list, edge_list = args disj_set = DisjointSet() min_span_tree = [] for u in vertex_list: disj_set.make_set(u) edges = list(edge_list) edges.sort() for edge in edges: weight, u, v = edge if disj_set.find(u) != disj_set.find(v): disj_set.union(u, v) min_span_tree.append(edge) return min_span_tree
def boruvka(adj_list): disj_set = DisjointSet() for u in adj_list.keys(): disj_set.make_set(u) min_span_tree = [] while True: minima = {} for u in adj_list.keys(): root = disj_set.find(u) for v in adj_list[u]: if disj_set.find(v) != root and (root not in minima or adj_list[u][v] < minima[root][0]): minima[root] = (adj_list[u][v], u, v) if len(minima) == 0: break for edge in minima.items(): if disj_set.union(edge[0], edge[1][2]): min_span_tree.append(edge[1]) return min_span_tree
from disjoint_set import DisjointSet if __name__=='__main__': test_set = DisjointSet([1,2,3,3,3,4,5,6,7,7,7,7]) print test_set.get() print test_set.union(2,3) print test_set.union(6,7) print test_set.union(7,10) print test_set.union(2,6)
N = 20 # only need to download once: # source.download_images('yosemite', N, -119.583650, 37.720424, -119.563650, 37.740424) files = source.get_images('yosemite')[:N] features = [detect.get_features(f, sx=0.5, sy=0.5) for f in files] norm_features = [] for f in features: f_normalized = np.array([v/np.linalg.norm(v) for v in f[1]]) norm_features.append((f_normalized, f_normalized.T)) grid = np.zeros((len(features), len(features)), 'int') matches = {} # {filename: (filename, correlation)} groups = DisjointSet(files) for i, f1 in enumerate(norm_features): print("matching images with image", i) for j, f2 in enumerate(norm_features): if i >= j: # do not double compute grid[i, j] = correlation(f1, f2) if i != j and grid[i, j] >= 2: print(files[i], files[j]) matches.setdefault(files[i], []).append((files[j], grid[i, j])) matches.setdefault(files[j], []).append((files[i], grid[i, j])) groups.union(files[i], files[j]) pickle.dump(files, open('files.txt', 'w')) pickle.dump(matches, open('matches.txt', 'w')) pickle.dump(groups.get_sets(), open('groups.txt', 'w'))
class new: def __init__(self,length,width,walls=0.40): self.__length = length self.__width = width self.__exits = [] self.__map = [] self.__buf_map = [] self.__gen_initial_map(walls) self.__ds = DisjointSet() self.__cpt = (int(self.__length/2),int(self.__width/2)) def resize_map(self, new_length, new_width, center=True): new_map = [[WALL for i in xrange(new_width)] for j in xrange(new_length)] ox = int(new_width/2.0-self.__width/2.0+0.5) oy = int(new_length/2.0-self.__length/2.0+0.5) for i in xrange(self.__width): for j in xrange(self.__length): x2 = ox + i y2 = oy + j if (x2 >= 0 and y2 >= 0 and x2 < new_width and y2 < new_width): new_map[x2][y2] = self.__map[i][j] self.__map = new_map self.__length = new_length self.__width = new_width self.__exits = [] self.__cpt = (int(self.__length/2),int(self.__width/2)) def print_map(self): for c in xrange(0,self.__width): for r in xrange(0,self.__length): if self.__map[r][c] == WALL: sys.stdout.write('#') elif self.__map[r][c] == TUNNEL: sys.stdout.write('+') else: sys.stdout.write(' ') print print def iterate_walls(self): for c in xrange(0,self.__width): for r in xrange(0,self.__length): if self.__map[r][c] == WALL: if (self.__adj_flr_count(r, c) > 0): yield (c, r) def iterate_map(self, cell_type): for c in xrange(0,self.__width): for r in xrange(0,self.__length): if self.__map[r][c] == cell_type: yield (c, r) def add_exit(self, pt1, pt2): while (pt1 != pt2): if (pt1[0] < 0 or pt1[0] >= self.__width or pt1[1] < 0 or pt1[1] >= self.__length): sys.exit('WARN: Exit out of range', pt1) else: self.__exits.append(pt1) pt1 = (pt1[0] + cmp(pt2[0], pt1[0]), pt1[1] + cmp(pt2[1], pt1[1])) def purge_exits(self): self.__exits = [] for c in xrange(0,self.__width): for r in xrange(0,self.__length): if (c == 0 or c == self.__width-1 or r == 0 or r == self.__length-1): self.__map[r][c] == WALL def grow_map(self): self.__generation(1, 2, -1) def reduce_map(self): self.__generation(1, 7, -1) def gen_map(self, mode='default'): if mode == 'room': # One large cavern room self.__generation(4, 5, -1) self.__join_rooms() self.__generation(1, 5, -1) else: # Windey passages. #Repeat 4: W?(p) = R1(p) ? 5 || R2(p) ? 2 #Repeat 3: W?(p) = R1(p) ? 5 # We do the above, with a cave join pass right before the final # iteration. This helps smooth out any sharp edges after the join # pass. self.__generation(4, 5, 2) self.__generation(2, 5, -1) self.__join_rooms() self.__generation(1, 5, -1) def __generation(self, count, r1_cutoff, r2_cutoff): while (count > 0): self.__buf_map = [[WALL for i in xrange(self.__width)] for j in xrange(self.__length)] self.__gen_walls(self.__buf_map) self.__gen_walls(self.__map) for r in xrange(1,self.__length-1): for c in xrange(1,self.__width-1): adjcount_r1 = self.__adj_wall_count(r,c,1) adjcount_r2 = self.__adj_wall_count(r,c,2) if(adjcount_r1 >= r1_cutoff or adjcount_r2 <= r2_cutoff): self.__buf_map[r][c] = WALL else: self.__buf_map[r][c] = FLOOR self.__map = list(self.__buf_map) count -= 1 def __gen_initial_map(self, fillprob): def rwall(fillprob): if (random() < fillprob): return WALL return FLOOR self.__map = [[rwall(fillprob) for i in xrange(self.__width)] for j in xrange(self.__length)] self.__gen_walls(self.__map) def __gen_walls(self, a_map): for j in range(0,self.__length): a_map[j][0] = WALL a_map[j][self.__width-1] = WALL for j in range(0,self.__width): a_map[0][j] = WALL a_map[self.__length-1][j] = WALL # Force the exits to be floor. We grow them out from the edge a bit to # make sure they don't get sealed off. for pos in self.__exits: a_map[pos[0]][pos[1]] = FLOOR for pos2 in ((-1,0), (1,0), (0,-1), (0,1), (-2,0), (2,0), (0,-2), (0,2)): p = (pos[0]+pos2[0], pos[1]+pos2[1]) if (p[0] < 1 or p[1] < 1): continue if (p[0] >= self.__width-1 or p[1] >= self.__length-1): continue a_map[p[0]][p[1]] = FLOOR def __adj_flr_count(self,sr,sc): count = 0 for pos in ((-1,0), (1,0), (0,-1), (0,1)): p = (sr+pos[0], sc+pos[1]) if (p[0] < 0 or p[1] < 0): continue if (p[0] > self.__width-1 or p[1] > self.__length-1): continue if (self.__map[p[0]][p[1]] == FLOOR): count += 1 return count def __adj_wall_count(self,sr,sc,rng=1): count = 0 for r in xrange(-rng,rng+1): for c in xrange(-rng,rng+1): #if (r == 0 and c == 0): # continue if (abs(r) == 2 and abs(c) == 2): continue if (sr + r < 0 or sc + c < 0): continue if (sr + r >= self.__length or sc + c >= self.__width): continue if self.__map[sr + r][sc + c] == WALL: count += 1 return count def __join_rooms(self): # Divide all cells into joined sets for r in xrange(0,self.__length): for c in xrange(0,self.__width): if self.__map[r][c] != WALL: self.__union_adj_sqr(r,c) all_caves = self.__ds.split_sets() while len(all_caves) > 1: self.__join_points(all_caves[choice(all_caves.keys())][0]) all_caves = self.__ds.split_sets() def __union_adj_sqr(self,sr,sc): loc = (sr,sc) root1 = self.__ds.find(loc) # A cell is connected to other cells only in cardinal directions. # (diagonals don't count for movement). for pos in ((-1,0), (1,0), (0,-1), (0,1)): if (sr+pos[0] < 0 or sc+pos[1] < 0): continue if (sr+pos[0] >= self.__length or sc+pos[1] >= self.__width): continue nloc = (sr+pos[0],sc+pos[1]) if self.__map[nloc[0]][nloc[1]] == FLOOR: root2 = self.__ds.find(nloc) if root1 != root2: self.__ds.union(root1,root2) def __join_points(self,pt1): next_pt = pt1 while 1: dir = self.__get_tunnel_dir(pt1,self.__cpt) move = randrange(0,3) if move == 0: next_pt = (pt1[0] + dir[0],pt1[1]) elif move == 1: next_pt = (pt1[0],pt1[1] + dir[1]) else: next_pt = (pt1[0] + dir[0],pt1[1] + dir[1]) root1 = self.__ds.find(next_pt) root2 = self.__ds.find(pt1) if root1 != root2: self.__ds.union(root1,root2) for pos in ((0,0), (-1,0), (1,0), (0,-1), (0,1)): if (next_pt[0]+pos[0] < 0 or next_pt[1]+pos[1] < 0 or next_pt[0]+pos[0] >= self.__length or next_pt[1]+pos[1] >= self.__width): continue if (self.__map[next_pt[0]+pos[0]][next_pt[1]+pos[1]] == WALL): self.__map[next_pt[0]+pos[0]][next_pt[1]+pos[1]] = TUNNEL if self.__stop_drawing(pt1,next_pt,self.__cpt): return pt1 = next_pt def __stop_drawing(self,pt,npt,cpt): if self.__ds.find(npt) == self.__ds.find(cpt): return 1 if (self.__ds.find(pt) != self.__ds.find(npt) and self.__map[npt[0]][npt[1]] != WALL): return 1 return 0 def __get_tunnel_dir(self,pt1,pt2): if pt1[0] < pt2[0]: h_dir = +1 elif pt1[0] > pt2[0]: h_dir = -1 else: h_dir = 0 if pt1[1] < pt2[1]: v_dir = +1 elif pt1[1] > pt2[1]: v_dir = -1 else: v_dir = 0 return (h_dir,v_dir)