def _select_maxheap(data, k): """Refer to <<Data structure and algorithm analysis in C>> The runtime complexity will be O(k + (N-k) * log(k)) = O(Nlog(k)) """ import heap m = k + 1 maxh = heap.MaxHeap(data[0:m]) while m < len(data): if data[m] < maxh.peek(): maxh.deleteMax() maxh.insert(data[m]) m += 1 return maxh.peek()
def transaction_heaps_add_number(self, identifier, number): """ Add a number to one of "Balanced Heaps"(self.transaction_amt_heaps) which is identified by the identifier. Due to the transaction_amt_heaps is constructed in format '{"identifier": (lower_heap, higher_heap)}', the logic to add a number is: 1) If lower_heap is empty or number < lower_heap.peek(), add number to the lower_heap; 2) Otherwise, add number to the higher_heap. Args: identifier: Identify the transaction_amt_heaps to which the number is added. The format should be 'CMTE_ID|TRANSACTION_DT' or 'CMTE_ID|ZIPCODE'. number: (float) The number to be added. """ if identifier not in self.transaction_amt_heaps: self.transaction_amt_heaps[identifier] = (heap.MaxHeap(), heap.MinHeap()) # If lower_heap is empty or the number to be added is less than lower_heap.peek(), add the number to lower_heap. if self.transaction_amt_heaps[identifier][0].empty( ) or number < self.transaction_amt_heaps[identifier][0].peek(): self.transaction_amt_heaps[identifier][0].push(number) # Otherwise, add the number to higher_heap. else: self.transaction_amt_heaps[identifier][1].push(number)
def heap_test(): heap = h.Heap() heap.insert(100) heap.insert(25) heap.insert(17) heap.insert(2) heap.insert(19) heap.insert(3) heap.insert(36) heap.insert(7) heap.insert(1) minheap = h.MinHeap() maxheap = h.MaxHeap() minheap.merge(heap) maxheap.merge(heap) print("------ heap ------") print("Heap Size: "+str(heap.size())) heap.pretty_print() # should print in increasing order while(not heap.empty()): print(heap.pop()) print("------ minheap ------") print("MinHeap Size: "+str(minheap.size())) minheap.pretty_print() # should print in increasing order while(not minheap.empty()): print(minheap.pop()) print("------ maxheap ------") print("MaxHeap Size: "+str(maxheap.size())) maxheap.pretty_print() # should print in decreasing order while(not maxheap.empty()): print(maxheap.pop())
def heapsort(l): heap.MaxHeap(_comp, l).heapsort() return l
def median_maintainance(filename): median_array = [ ] #maintain a running array of the median at each time step min_heap = heap.MinHeap( ) #use min heap to store elements larger than the current median max_heap = heap.MaxHeap( ) #use max heap to store elements smaller than the current median ##initialize data stream with open(filename, 'r') as stream: for line in stream: new_data = int(line.strip()) #print new_data if min_heap.size() == max_heap.size() == 0: #first data point median = new_data min_heap.insert(new_data) median_array.append(median) elif max_heap.size() == 0: #second data point if new_data <= min_heap.show_min(): max_heap.insert( new_data) #easy case -- new data belongs in max heap else: #complicated case -- new data belongs in min heap, but data in min heap needs to be bumped down max_heap.insert(min_heap.extract_min()) min_heap.insert(new_data) median = max( max_heap.array ) #by convention, if two heaps are equal sized median is root of max_heap median_array.append(median) else: ##load new data into the appropriate heap if new_data <= min_heap.show_min( ): #new data is in the lower half of total dataset max_heap.insert(new_data) else: min_heap.insert(new_data) #print max_heap, min_heap ##find the median if min_heap.size() == max_heap.size(): #if heaps are equally sized, median is average of two roots median = max(max_heap.array) median_array.append(median) #no rebalancing needed -- we are done with this round else: if min_heap.size() > max_heap.size(): #if min heap is bigger, median is the root of min heap rebal = min(min_heap.array) min_heap.array.remove(rebal) #rebalance the heaps by loading the former root of the min heap into the max heap max_heap.insert(rebal) else: #if max heap is bigger, median is root of max heap rebal = max(max_heap.array) max_heap.array.remove(rebal) #rebalance the heaps by loading the former root of the max heap into the min heap min_heap.insert(rebal) #if two heaps are same size after rebalancing, take mean of two roots if min_heap.size() == max_heap.size(): median = max(max_heap.array) elif min_heap.size() < max_heap.size(): median = max(max_heap.array) else: median = min(min_heap.array) median_array.append(median) print max(max_heap.array), min( min_heap.array), max_heap.size(), min_heap.size() #print 5000 in max_heap.array print sum(median_array) return sum(median_array) % 10000