def get_trie(s): # get frequencies: frequencies = {} for c in s: if c not in frequencies: frequencies[c] = 1 else: frequencies[c] += 1 # build trie: heap = MinHeap() for c in frequencies: heap.insert(Node(frequencies[c], c)) while heap.size() > 1: left = heap.extractmin() right = heap.extractmin() assert (left is not None) assert (right is not None) assert (left.frequency >= 0) assert (right.frequency >= 0) frequency = left.frequency + right.frequency node = Node(frequency) node.left = left node.right = right heap.insert(node) return heap.extractmin()
def BFS(self, v): """ Runs breadth-first search from the source vertex v. Returns a matrix with distances from v, or None if v does not exist. """ if v >= self.vertexCount(): return None # can't be run; vertex doesn't exist # initialize the vertex queue, the "visited" set s, and the distances from v q = MinHeap() s = set() distance = [float("inf")] * self.vertexCount() # insert the source vertex into q and set its distance as 0 q.insert(KeyValuePair(0, v)) s.add(v) distance[v] = 0 # loop through all vertices, in order of distance from v, relaxing edges out of current vertex kvp = q.extractMin() while kvp: v = kvp.value d = kvp.key # relax all edges out of current vertex v edges = self._adj[v] for e in edges: if e.dest not in s: s.add(e.dest) distance[e.dest] = d + 1 q.insert(KeyValuePair(distance[e.dest], e.dest)) # get next-lowest distance vertex kvp = q.extractMin() return distance
class Median: def __init__(self): self.h_low = MaxHeap() self.h_high = MinHeap() def add_element(self, value): if self.h_low.heap_size == 0 or value < self.h_low.max(): self.h_low.insert(value) if self.h_low.heap_size - self.h_high.heap_size > 1: self.h_high.insert(self.h_low.extract_max()) else: self.h_high.insert(value) if self.h_high.heap_size - self.h_low.heap_size > 1: self.h_low.insert(self.h_high.extract_min()) def get_median(self): if (self.h_low.heap_size + self.h_high.heap_size) % 2 == 0: return self.h_low.max(), self.h_high.min() else: if self.h_low.heap_size > self.h_high.heap_size: return self.h_low.max() else: return self.h_high.min() def get_maxheap_elements(self): return self.h_low.heap def get_minheap_elements(self): return self.h_high.heap
def k_smallest(arr, k): h = MinHeap() for item in arr: h.insert(item) for i in range(k): smallest = h.extract() print(smallest) return
def single_item_test(): h = MinHeap() h.insert(1) assert h.heap_data[0] == 1 assert_size(h, 1) elem = h.extract() assert elem == 1 assert_size(h, 0)
def test_insert_and_get_many_random_items(self): heap = MinHeap() items = random.sample(range(1000), 50) for index, item in enumerate(items): heap.insert(item) assert heap.size() == index + 1 min_item = min(items[: index + 1]) assert heap.get_min() == min_item assert heap.size() == len(items)
def sort_k_sorted(array, k): sorted = [] min_heap = MinHeap(array[:k + 2]) for i in range(len(array)): sorted.append(min_heap.extract_min()) if i + k + 2 < len(array): min_heap.insert(array[i + k + 2]) return sorted
def test_insert_and_get_many_items(self): heap = MinHeap() items = [9, 25, 86, 3, 29, 5, 55] for index, item in enumerate(items): heap.insert(item) assert heap.size() == index + 1 min_item = min(items[: index + 1]) assert heap.get_min() == min_item assert heap.size() == len(items) assert heap.items == [3, 9, 5, 25, 29, 86, 55]
def test_insert_and_remove_many_random_items(self): heap = MinHeap() items = random.sample(range(1000), 50) for item in items: heap.insert(item) assert heap.size() == len(items) sorted_items = sorted(items) for index, item in enumerate(sorted_items): min_item = heap.remove_min() assert sorted_items[index] == min_item assert heap.size() == 0
def test_insert_and_remove_many_items(self): heap = MinHeap() items = [9, 25, 86, 3, 29, 5, 55] for item in items: heap.insert(item) assert heap.size() == len(items) sorted_items = sorted(items) for index, item in enumerate(sorted_items): min_item = heap.remove_min() assert sorted_items[index] == min_item assert heap.size() == 0
def incomplete_tree_test(): h = MinHeap() h.insert(2) h.insert(1) assert h.heap_data[0] == 1 assert h.heap_data[1] == 2 assert_size(h, 2) elems = [] elems.append(h.extract()) elems.append(h.extract()) assert elems == [1, 2] assert_size(h, 0)
def sort_k_sorted(arr, k): ans = [] h = MinHeap() for i in range(k + 1): h.insert(arr[i]) for i in range(k + 1, len(arr)): smallest = h.extract() ans.append(smallest) h.insert(arr[i]) while not h.is_empty(): smallest = h.extract() ans.append(smallest) return ans
def top_k(nums, k): if len(nums) <= k: return nums min_h = MinHeap(nums[:k], k) for i in range(k, len(nums)): tmp = min_h.get_top() if nums[i] > tmp: min_h.remove_top() min_h.insert(nums[i]) return min_h.get_data()
def merge(lists): h = MinHeap() for i, l in enumerate(lists): # store list index and position of last element from # that list in min heap. h.insert(l[0], (i, 0)) while h: min_val, (li, pos) = h.extract_min() yield min_val l = lists[li] pos += 1 if pos < len(l): h.insert(l[pos], (li, pos))
def median_maintenance(data): yield data[0] if data[0] < data[1]: h_high, h_low = MinHeap([data[1]]), MaxHeap([data[0]]) else: h_high, h_low = MinHeap([data[0]]), MaxHeap([data[1]]) median = h_low.extract_max() h_low.insert(median) yield median for k in data[2:]: lower, upper = h_low.extract_max(), h_high.extract_min() if k <= lower: h_low.insert(k) else: h_high.insert(k) h_low.insert(lower) h_high.insert(upper) if abs(h_high.size() - h_low.size()) > 1: if h_high.size() > h_low.size(): h_low.insert(h_high.extract_min()) else: h_high.insert(h_low.extract_max()) if (h_high.size() + h_low.size()) % 2 == 0 or h_low.size() > h_high.size(): median = h_low.extract_max() h_low.insert(median) yield median else: median = h_high.extract_min() h_high.insert(median) yield median
class HuffmanEncoder: frequencies = {} def __init__(self, frequencies): self.frequencies = frequencies self.nodes = MinHeap() def to_nodes(self): for i in self.frequencies.keys(): self.nodes.insert(Node(self.frequencies[i], None, None, i)) def generate_coding(self): d = {} e = {} tree = self.construct_huffman_tree() self.encode_huffman_tree_r(tree, d, e) return d, e def encode_huffman_tree_r(self, node, d, e, val=''): if node.right is None and node.left is None: d[node.character] = val e[val] = node.character if node.left is not None: self.encode_huffman_tree_r(node.left, d, e, val + '0') if node.right is not None: self.encode_huffman_tree_r(node.right, d, e, val + '1') return d, e def construct_huffman_tree(self): self.to_nodes() t = Node(0) while self.nodes.length() != 1: left = self.nodes.pop() right = self.nodes.pop() if right.character is not None and left.character is None: t = Node(left.value + right.value, right, left) else: t = Node(left.value + right.value, left, right) self.nodes.insert(t) return t
def algorithm(self, graph): edges = sorted(graph.E, key=self.edge_wt_sort) hp = MinHeap() hp.insert(edges[0]) while(not hp.empty()): edge = hp.pop() graph.remove_edge(edge) if(edge.v1 in self.tree.V() \ and edge.v2 in self.tree.V()): continue self.tree.add_edge(edge) self.min_weight += edge.wt neighborhood = graph.edges(edge.v1) + graph.edges(edge.v2) hp.insert_all(neighborhood)
def several_elements_test(): h = MinHeap() h.insert(5) h.insert(3) h.insert(4) h.insert(122) h.insert(100) h.insert(2) h.insert(8) h.insert(18) assert_size(h, 8) elems = [] for i in range(8): elems.append(h.extract()) assert elems == [2, 3, 4, 5, 8, 18, 100, 122] assert_size(h, 0)
def top_k(nums,k): """ 返回数组的前k大元素 :param nums: :param k: :return: """ if len(nums) <= k: return nums min_h = MinHeap(nums[:k],k) for i in range(k,len(nums)): tmp = min_h.get_top() if nums[i]> tmp: min_h.remove_top() min_h.insert(nums[i]) return min_h.get_data()
def get_static_top_k(nums, k): """ get top-K of static data :param nums: :param k: :return: """ if len(nums) <= k: return nums min_h = MinHeap(nums[:k], k) # one time use min_h.build_heap() # compare data with heap top for n in range(k, len(nums)): tmp = min_h.get_top() if nums[n] > tmp: # if larger, change data min_h.remove_top() min_h.insert(nums[n]) return min_h.get_data()
def top_k(nums, k): """ Return the top k elements of the array :param nums: :param k: :return: """ if len(nums) <= k: return nums min_h = MinHeap(nums[:k], k) for i in range(k, len(nums)): tmp = min_h.get_top() if nums[i] > tmp: min_h.remove_top() min_h.insert(nums[i]) return min_h.get_data()
def top_k(nums, k): """ 返回数组的前k大元素 :param nums: :param k: :return: """ if len(nums) <= k: return nums # 先把nums列表的前k个元素构成小顶堆,大于等于堆顶元素的有k-1个,整个堆为前k大元素 min_h = MinHeap(nums[:k], k) # 其余各个元素 如果大于堆顶元素,则把堆顶元素移除,并添加新元素并从下到上堆化 for i in range(k, len(nums)): tmp = min_h.get_top() if nums[i] > tmp: min_h.remove_top() min_h.insert(nums[i]) return min_h.get_data()
def test_exch(count): """Test exchange method.""" heap = MinHeap() _ = [heap.insert(randint(0, 100)) for i in range(MAX_ITER)] pos_a = randint(0, len(heap.heap) - 1) val_a = heap.heap[pos_a] pos_b = randint(0, len(heap.heap) - 1) val_b = heap.heap[pos_b] heap.exch(val_a, val_b) assert heap.heap[pos_a] == val_b and heap.heap[pos_b] == val_a
def test_min_heap_sorting(self): # seed for consistant testing and reproductibility for seed in xrange(10): random.seed(seed) heap = MinHeap() shuffled_nums = [int(random.random() * 20 - 10) for _ in xrange(1000)] nums = sorted(shuffled_nums) for n in shuffled_nums: heap.insert(n) for n in nums: self.assertEqual(n, heap.pop()) heap.heapify(shuffled_nums) for n in nums: self.assertEqual(n, heap.pop())
def get_top_k(nums, k): """ 返回数组的前k大元素 :param nums: :param k: :return: """ if len(nums) < k: return nums min_heap = MinHeap(nums[:k], k) for i in range(k, len(nums)): heap_top = min_heap.get_top() if nums[i] > heap_top: min_heap.remove_top() min_heap.insert(nums[i]) return min_heap.get_data()
class TopK: def __init__(self, k): self.k = k self.heap = MinHeap(capacity=k) def add_data(self, num): assert type(num) is int if self.heap.get_length() < self.k: self.heap.insert(num) else: tmp = self.heap.get_top() if num > tmp: # if larger, change data self.heap.remove_top() self.heap.insert(num) def get_top_k(self): return 'current top-k is :' + str(self.heap.get_data()) def __repr__(self): return self.heap.__repr__()
def shortest_paths(self, v): ''' Computes the shortest path distances from a source vertex to all other vertices using Dijkstra's algorithm. ''' processed = {} # mapping of processed vertices to geodesic distance candidates = {} # mapping of candidate vertices to their Dijkstra scores; exists for convenience of O(1) lookups trace = [] # stores edges in order of processing; used to extract shortest paths def dijkstra_score(src, dest): return processed[src] + self.getWeight(src, dest) # Initialize Dijkstra scores for n in self.nodes: if n == v: processed[n] = 0 for dest in self.edges[n]: score = dijkstra_score(n, dest) if dest not in candidates or score < candidates[dest]: candidates[dest] = score else: if n not in candidates: candidates[n] = float('inf') # heapify node/score tuples, provide comparison key unprocessed = MinHeap(list(candidates.items()), lambda x:x[1]) # compute shortest paths while not unprocessed.is_empty(): n,s = unprocessed.extract_min() processed[n] = s candidates.pop(n) if len(trace) == 0: trace.append(Edge(v, n)) # Investigate KeyError when using WeightedEdge else: src = trace[-1].getDestination() trace.append(Edge(src, n)) # Investigate KeyError when using WeightedEdge for dest in self.edges[n]: if dest in candidates: unprocessed.delete((dest, candidates[dest])) score = dijkstra_score(n, dest) best = min(candidates[dest], score) candidates[dest] = best unprocessed.insert((dest, best)) return (processed, PathFinder(trace))
class MedianMaintenance: def __init__(self): self.hlow_heap = MaxHeap() self.hhigh_heap = MinHeap() def compute_median(self, i): self.insert_heap(i) self.balance_heap() return self.median() def balance_heap(self): if self.hhigh_heap.size - self.hlow_heap.size > 1 : # rebalance heap to keep it balanced high = self.hhigh_heap.extract_min() self.hlow_heap.insert(high) elif self.hlow_heap.size - self.hhigh_heap.size > 1: low = self.hlow_heap.extract_max() self.hhigh_heap.insert(low) def insert_heap(self, i): if self.hlow_heap.is_empty(): low = None else: low = self.hlow_heap.peek_max() if self.hhigh_heap.is_empty(): high = None else: high = self.hhigh_heap.peek_min() if low is None or i < low: self.hlow_heap.insert(i) elif high is not None and i > high: self.hhigh_heap.insert(i) else:# i wedged inbetween insert in first heap by default self.hlow_heap.insert(i) def median(self): if self.hhigh_heap.size - self.hlow_heap.size == 1: return self.hhigh_heap.peek_min() else:# default choice when hlow is bigger/same size as hhigh return self.hlow_heap.peek_max()
def Dijkstra(self, v): """ Runs Dijkstra shortest-path algorithm from source v, returning the list of distances from v, or None if any edges are negative-weight. """ if self.hasNegativeWeight(): return None # Dijkstra may not run properly if any weights are negative # initialize the queue of vertices, the set s of "seen" or complete vertices, and the d list of current distances q = MinHeap() s = set() d = [float("inf")] * self.vertexCount() d[v] = 0 # insert source vertex in q; could insert all if using decreaseKey, but a basic Heap implementation doesn't support that q.insert(KeyValuePair(d[v], v)) # loop through each edge vertex once, in order of distance from v, relaxing all edges out of that vertex kvp = q.extractMin() while kvp: vert = kvp.value dist = kvp.key # only relax edges from a vertex if it has not already been visited if vert not in s: s.add(vert) # relax edges to vertices not already completed for e in self._adj[vert]: if e.dest not in s: if d[vert] + e.weight < d[e.dest]: d[e.dest] = d[vert] + e.weight q.insert( KeyValuePair(d[e.dest], e.dest) ) # could also decrease key, but would not work with basic Heap implementation # retrieve the next-lowest distance vertex kvp = q.extractMin() return d
def get_max_pairs(A, B, M=None): N = len(A) if not M: M = N A.sort() B.sort() h = MinHeap() used_pairs = set() val = (N - 1, N - 1) key = -A[val[0]] - B[val[1]] h.insert(key, val) used_pairs.add(val) for _ in range(M): key, (i, j) = h.extract_min() yield -key for pair in ((i - 1, j), (i, j - 1)): if pair[0] < 0 or pair[1] < 0 or pair in used_pairs: continue key = -A[pair[0]] - B[pair[1]] h.insert(key, pair) used_pairs.add(pair)
from heap import MinHeap, MaxHeap from math import floor import sys #invariant: maintain min heap size = floor(n/2), median is the the max in max heap with open('Median.txt') as f: #with open('median10_test.txt') as f: a = [int(l) for l in f] minHeap = MinHeap([]) maxHeap = MaxHeap([]) medians = [] for i in range(len(a)): if minHeap.size() == 0 and maxHeap.size() == 0: maxHeap.insert(a[i]) else: if a[i] < maxHeap.top(): maxHeap.insert(a[i]) else: minHeap.insert(a[i]) if minHeap.size() > floor((i+1)/2): maxHeap.insert(minHeap.extract()) elif minHeap.size() < floor((i+1)/2): minHeap.insert(maxHeap.extract()) medians.append(maxHeap.top()) print(sum(medians)%10000)
def find_shortest(self): return len(self.encoding) def find_longest(self): return len(self.encoding) with open("huffman.txt", "r") as infile: infile = [int(i) for i in infile.readlines()[1:]] infile = list(enumerate(infile)) ##infile = [(0,1), (1,5), (2,7), (3,2), (4,3)] min_heap = MinHeap() for i in infile: min_heap.insert(i[1], HCLeafNode(i[0], i[1])) #do until only one node left, the root while min_heap.size() > 1: #get two smallest nodes small = min_heap.pop().get_data()[0] small.add_prefix("0") two_small = min_heap.pop().get_data()[0] two_small.add_prefix("1") #merge them together merged = HCMiddleNode(small, two_small) min_heap.insert(merged.get_frequency(), merged) tree = min_heap.pop().get_data()[0]
def test_heap(): """Pytest fixture.""" heap = MinHeap() for _ in range(MAX_ITER): heap.insert(randint(0, 100)) return heap
def FindKthSmallestEle(orig, k): count = 0 if k > orig.size: return -1 else: while orig.size > 0: x = orig.deleteMin() count += 1 if count == k: return x if __name__ == '__main__': # time: O(k logn) - k for k times, logn for each deletion orig = MinHeap() orig.insert(1) orig.insert(20) orig.insert(5) orig.insert(100) orig.insert(1000) orig.insert(12) orig.insert(18) orig.insert(16) print(orig.heapList) print(FindKthSmallestEle(orig, 6))
def test_insert_and_get_one_item(self): heap = MinHeap() heap.insert(5) assert heap.size() == 1 assert heap.get_min() == 5 assert heap.items == [5]
class TestMinHeap(unittest.TestCase): def setUp(self): self.heap = MinHeap() def test_basic_initialization_and_repr(self): self.assertEqual(repr(self.heap), '[]') def test_insert(self): self.heap.insert(4) self.assertEqual(repr(self.heap), '[4]') self.assertEqual(self.heap.size, 1) self.heap.insert(4) self.assertEqual(repr(self.heap), '[4, 4]') self.assertEqual(self.heap.size, 2) self.heap.insert(6) self.assertEqual(repr(self.heap), '[4, 4, 6]') self.assertEqual(self.heap.size, 3) self.heap.insert(1) self.assertEqual(repr(self.heap), '[1, 4, 6, 4]') self.assertEqual(self.heap.size, 4) self.heap.insert(3) self.assertEqual(repr(self.heap), '[1, 3, 6, 4, 4]') self.assertEqual(self.heap.size, 5) def test_get_min(self): self.assertEqual(self.heap.get_min(), None) self.heap.insert(4) self.assertEqual(self.heap.get_min(), 4) self.heap.insert(7) self.assertEqual(self.heap.get_min(), 4) self.heap.insert(2) self.assertEqual(self.heap.get_min(), 2) self.heap.insert(-1) self.assertEqual(self.heap.get_min(), -1) def test_extract_min(self): self.heap.insert(4) self.heap.insert(5) self.heap.insert(7) self.heap.insert(2) self.heap.insert(-1) self.assertEqual(self.heap.extract_min(), -1) self.assertEqual(self.heap.extract_min(), 2) self.assertEqual(self.heap.extract_min(), 4) self.assertEqual(self.heap.extract_min(), 5) self.assertEqual(self.heap.extract_min(), 7) self.assertEqual(self.heap.extract_min(), None) def test_build_heap(self): self.heap.build_heap([4, 4, 6, 1, 3]) self.assertEqual(repr(self.heap), '[1, 3, 6, 4, 4]')
class PathFinderTieBreak: def __init__(self, maze_file): self.close = dict() self.open = MinHeap() self.open_dict = dict() self.grid = [] self.start_state = None self.goal_state = None self.known_distances = dict() for i in range(max_size): self.grid.append([]) with open(maze_file, 'r') as f: line_row = 0 for line in f: self.grid.append([]) line = line.rstrip() if not line: continue if not self.start_state: self.start_state = line_to_high_coord(line) self.start_state.g = 0 continue if not self.goal_state: self.goal_state = line_to_high_coord(line) continue for pos in line: self.grid[line_row].append(True if pos == '1' else False) line_row += 1 def find_new_start(self): yind = 0 for y in self.grid: xind = 0 for x in y: if not self.grid[yind][xind] and (self.start_state.x != xind or self.start_state.y != yind): self.start_state = line_to_high_coord( str(xind) + "," + str(yind)) self.start_state.g = 0 print "New start is " + str(self.start_state) return xind += 1 yind += 1 def find_path(self): self.open_dict.clear() self.close.clear() self.open = MinHeap() return self.find_path_internal(self.start_state, self.goal_state, [], []) def find_path_internal(self, start, goal, path, final_path): path.append(start) if start == goal: final_path.append(start) # Fill in h values with what we learned. i = len(final_path) for c in final_path: self.known_distances[c] = i i -= 1 return (path, final_path) self.close[start] = True neighbors = self.compute_valid_neighbors(start) if neighbors: for n in neighbors: n.g = start.g + 1 if self.grid[n.y][n.x]: continue if n in self.known_distances: n.distance = self.known_distances[n] else: n.distance = calculate_manhattan_distance(n, goal) self.open.insert(n) self.open_dict[n] = True nextC = self.open.extract() if not nextC: return ([], []) if calculate_manhattan_distance(start, nextC) == 1: final_path.append(start) path, new_final_path = self.find_path_internal(nextC, goal, path, final_path) return (path, final_path) def compute_valid_neighbors(self, start): neighbors = [] # Find all neighbors. if start.x - 1 >= 0: n = HighCoord(start.x - 1, start.y) if not self.close.has_key(n) and not self.open_dict.has_key(n): neighbors.append(n) if start.y - 1 >= 0: n = HighCoord(start.x, start.y - 1) if not self.close.has_key(n) and not self.open_dict.has_key(n): neighbors.append(n) if start.y + 1 < max_size: n = HighCoord(start.x, start.y + 1) if not self.close.has_key(n) and not self.open_dict.has_key(n): neighbors.append(n) if start.x + 1 < max_size: n = HighCoord(start.x + 1, start.y) if not self.close.has_key(n) and not self.open_dict.has_key(n): neighbors.append(n) return neighbors
class PathFinderNormal: def __init__(self, maze_file): self.close = dict() self.open = MinHeap() self.open_dict = dict() self.grid = [] self.start_state = None self.goal_state = None for i in range(max_size): self.grid.append([]) with open(maze_file, 'r') as f: line_row = 0 for line in f: self.grid.append([]) line = line.rstrip() if not line: continue if not self.start_state: self.start_state = line_to_coord(line) self.start_state.g = 0 continue if not self.goal_state: self.goal_state = line_to_coord(line) continue for pos in line: self.grid[line_row].append(True if pos == '1' else False) line_row += 1 def find_path(self): return self.find_path_internal(self.start_state, self.goal_state, []) def find_path_internal(self, start, goal, path): path.append(start) if start == goal: return path self.close[start] = True neighbors = self.compute_valid_neighbors(start) if neighbors: for n in neighbors: n.g = start.g + 1 if self.grid[n.y][n.x]: continue n.distance = calculate_manhattan_distance(n, goal) self.open.insert(n) self.open_dict[n] = True nextC = self.open.extract() if not nextC: return [] return self.find_path_internal(nextC, goal, path) def compute_valid_neighbors(self, start): neighbors = [] # Find all neighbors. if start.x - 1 >= 0: n = Coord(start.x - 1, start.y) if not self.close.has_key(n): neighbors.append(n) if start.y - 1 >= 0: n = Coord(start.x, start.y - 1) if not self.close.has_key(n) and not self.open_dict.has_key(n): neighbors.append(n) if start.y + 1 < max_size: n = Coord(start.x, start.y + 1) if not self.close.has_key(n) and not self.open_dict.has_key(n): neighbors.append(n) if start.x + 1 < max_size: n = Coord(start.x + 1, start.y) if not self.close.has_key(n) and not self.open_dict.has_key(n): neighbors.append(n) return neighbors