def read_txt(self): # 读取已经选中的文本序号 chosen_indices = [] fi = 0 try: fi = file(self.chosen_indices, 'r') line = fi.readline().rstrip('\n') idx_list = line.split('\t') for idx in idx_list: chosen_indices.append(idx) fi.close() except IOError: pass # 读取rawset文档 fr = file(self.rawset_filename, 'r') while True: line = fr.readline().decode("utf-8") if len(line) == 0: # Zero length indicates EOF break seg_list = self.proc_line(line) if len(seg_list) == 1: # 预处理后文本为空 continue index = seg_list[0] text = seg_list[1] if index not in chosen_indices: probs = self.classifier.classify_proba(text) doc = [index, text, math.fabs(probs[0] - probs[1])] heapq.heappush(self.docset, doc) fr.close()
def slidingPuzzle(self, board: List[List[int]]) -> int: """ BFS + visited => A* priority = current_dist + heuristic_dist Chain the matrix into 1d array. N = R * C Complexity O(N * N!) There are O(N!) possible board states. O(N) is the time to scan the board for the operations in the loop. """ visited = defaultdict(bool) m, n = len(board), len(board[0]) q = [(self.heuristic_dist(board) + 0, 0, board)] target = [ [1, 2, 3], [4, 5, 0], ] while q: heu, cur_dist, board = heapq.heappop(q) visited[self.ser(board)] = True if board == target: return cur_dist cur_dist += 1 i, j = self.zero_pos(board) for di, dj in dirs: I = i + di J = j + dj if 0 <= I < m and 0 <= J < n: B = deepcopy(board) # need a copy in the queue B[I][J], B[i][j] = B[i][j], B[I][J] if not visited[self.ser(B)]: heapq.heappush(q, (self.heuristic_dist(B) + cur_dist, cur_dist, B)) return -1
def next(self): """ Increment the iterator and yield the new value. Also, store the current value for use in the comparison function. """ if not self.finishedReadingFile: try: line = self.theFile.next() cols = line.strip().split("\t") chrom = cols[0] # Where possible, convert chromosome names into # integers for sorting. If not possible, use # original names. try: chrom = int(chrom.upper().strip("CHR")) except: pass pos = int(cols[1]) heapq.heappush(self.heap, (chrom, pos, line)) except StopIteration: self.finishedReadingFile = True if len(self.heap) != 0: # Now take the top line self.chrom, self.pos, self.line = heapq.heappop(self.heap) else: raise StopIteration
def insert(self, node, priority=0): if node in self.entry_finder: self.delete(node) count = next(self.counter) entry = (priority, count, node) self.entry_finder[node] = entry heapq.heappush(self.heap, entry)
def _get_server(self): """ Get server to use for request. Also process inactive server list, re-add them after given interval. """ with self._lock: inactive_server_count = len(self._inactive_servers) for i in range(inactive_server_count): try: ts, server, message = heapq.heappop(self._inactive_servers) except IndexError: pass else: if (ts + self.retry_interval) > time(): # Not yet, put it back heapq.heappush(self._inactive_servers, (ts, server, message)) else: self._active_servers.append(server) logger.warn("Restored server %s into active pool", server) # if none is old enough, use oldest if not self._active_servers: ts, server, message = heapq.heappop(self._inactive_servers) self._active_servers.append(server) logger.info("Restored server %s into active pool", server) server = self._active_servers[0] self._roundrobin() return server
def set_resource_check(self, resource_id, check_time): # do nothing if check already exists and is correct if self.checks.get(resource_id) == check_time: return self.checks[resource_id] = check_time heapq.heappush(self.check_heap, (check_time, resource_id))
def on_elect(self, event): id = event['id'] self.dispatcher.send('worker-elect-ack', id=id) clock, hostname, pid, topic, action = self._cons_stamp_fields(event) heappush(self.consensus_requests[id], (clock, '%s.%s' % (hostname, pid), topic, action), )
def test_connect_cascade(self): now = time.time() db = TestDB('testing', stale_timeout=10) conns = [ (now - 15, 1), # Skipped due to being stale. (now - 5, 2), # In the 'closed' set. (now - 3, 3), (now, 4), # In the 'closed' set. ] db._closed.add(2) db._closed.add(4) db.counter = 4 # The next connection we create will have id=5. for ts_conn in conns: heapq.heappush(db._connections, ts_conn) # Conn 3 is not stale or closed, so we will get it. self.assertEqual(db.get_conn(), 3) self.assertEqual(db._in_use, {3: now - 3}) self.assertEqual(db._connections, [(now, 4)]) # Since conn 4 is closed, we will open a new conn. db.connect() self.assertEqual(db.get_conn(), 5) self.assertEqual(sorted(db._in_use.keys()), [3, 5]) self.assertEqual(db._connections, [])
def set_timer(self, pause, msg): tref = self.gettref() stoptime = ticks() + pause objref = weakref.ref(self) heapq.heappush(timers, (stoptime, tref, objref)) self.timers[tref] = (stoptime, msg) return tref
def search(self, board, hole): board = tuple(board) # q stores: (score, board, hole, history = [move1, move2, ...]) q = [(self.score(board, hole, []), board, hole, [])] depth = 0 seen = set([board]) while True: (priority, board, hole, history) = heapq.heappop(q) if self.is_goal(board, hole): print 'solution is', len(history), 'steps long' return history # generate new candidates for (dx, dy) in ((-1, 0), (1, 0), (0, -1), (0, 1)): (x, y) = (hole % self.size, hole / self.size) # only consider legal moves if 0 <= x + dx < self.size and 0 <= y + dy < self.size: # generate the new candidate board (newboard, newhole, newhistory) = \ self.nextCandidate(board, hole, history, dx, dy) # ignore if we have seen this board before if newboard not in seen: seen.add(newboard) # add it to the priority queue of candidates heapq.heappush(q, (self.score(newboard, newhole, newhistory), newboard, newhole, newhistory))
def _cases(doc, form): if isinstance(form, String): generators = [_forms.get(form.get_string_value())] elif form.get_head_name() == 'System`Alternatives': if not all(isinstance(f, String) for f in form.leaves): return # error generators = [_forms.get(f.get_string_value()) for f in form.leaves] elif form.get_head_name() == 'System`Containing': if len(form.leaves) == 2: for t in _containing(doc, *form.leaves): yield t return else: return # error else: return # error def try_next(iterator): try: return next(iterator) except StopIteration: return None feeds = [] for i, iterator in enumerate([iter(generator(doc)) for generator in generators]): t = try_next(iterator) if t: feeds.append((_position(t), i, t, iterator)) heapq.heapify(feeds) while feeds: pos, i, token, iterator = heapq.heappop(feeds) yield token t = try_next(iterator) if t: heapq.heappush(feeds, (_position(t), i, t, iterator))
def occurrences_after(self, after=None, amount=0): """ It is often useful to know what the next occurrence is given a list of events. This function produces a generator that yields the the most recent occurrence after the date ``after`` from any of the events in ``self.events`` """ from schedule.models import Occurrence if after is None: after = datetime.datetime.now() occ_replacer = OccurrenceReplacer( Occurrence.objects.filter(event__in = self.events)) generators = [event._occurrences_after_generator(after) for event in self.events] occurrences = [] for generator in generators: try: heapq.heappush(occurrences, (generator.next(), generator)) except StopIteration: pass if amount == 0: amount = 100000 while True: amount = amount -1 if len(occurrences) == 0 or amount < 0: raise StopIteration generator=occurrences[0][1] try: next = heapq.heapreplace(occurrences, (generator.next(), generator))[0] except StopIteration: next = heapq.heappop(occurrences)[0] yield occ_replacer.get_occurrence(next)
def __init__(self, cache_path, *, max_size=None): self._cache_path = cache_path self.max_size = max_size # convert to bytes if self.max_size is not None: self.max_size *= 1048576 # TODO 2k compat os.makedirs(cache_path, exist_ok=True) self._fn_cache = dict() self._sz_cache = dict() # TODO replace this with a double linked list like boltons LRU self._heap_map = dict() self._heap = [] # put files in to heap in random order files = glob(os.path.join(self._cache_path, '*feather')) shuffle(files) for fn in files: key = self._key_from_filename(fn) self._fn_cache[key] = fn stat = os.stat(fn) self._sz_cache[key] = stat.st_size heap_entry = [time.time(), key] heapq.heappush(self._heap, heap_entry) self._heap_map[key] = heap_entry # prune up front just in case self.__prune_files()
def calculate_sn_degree_ratio(self): pq = [] G = self._graph for node in G.nodes(): deg = G.degree(node) if deg == 0: continue neighbors = G.neighbors(node) tot = 0.0 for ng in neighbors: tot += G.degree(ng) ng_avg = tot/len(neighbors) ratio = deg/ng_avg heapq.heappush(pq,(ratio,node)) ordered = heapq.nlargest(len(pq),pq) positions = {} for i, elm in enumerate(ordered): positions[elm[1]] = i # The following array contains NORMALIZED distances distances = {} for n1 in G.nodes(): if n1 not in positions: continue distances[n1] = {} for n2 in G.nodes(): if n2 in distances: continue if n2 not in positions: continue distances[n1][n2] = (abs(positions[n1] - positions[n2])+0.0) / len(pq) self._deg_ratio_distances = distances return
def getSkyline(self, buildings): """ :type buildings: List[List[int]] :rtype: List[List[int]] """ heights = [] result = [] q = [0] pre = 0 for building in buildings: l,r,h = building heights.append((l,-h)) heights.append((r,h)) heights.sort() print heights for height in heights: index,h = height if h < 0: heapq.heappush(q,h) else: q.remove(-h) print q if len(q) > 0 and q[0] != pre: pre = q[0] result.append((index,-q[0])) return result
def addIdle(self, clients, clientid, count): ''' ''' heapq.heappush(clients['idle'], (-count, clientid)) if clients['defer']: d = clients['defer'].pop(0) clientinfo = heapq.heappop(clients['idle']) d.callback(clientinfo)
def update(self, state, newPriority): oldPriority = self.priorities.get(state) if oldPriority == None or newPriority < oldPriority: self.priorities[state] = newPriority heapq.heappush(self.heap, (newPriority, state)) return True return False
def Min_Span_Tree(outside_tree): #find all edges & put into a priorityQ heap = [] for pair in itertools.combinations(outside_tree, 2): heapq.heappush(heap, (Edge(pair), pair)) inside_tree = set() answer = 0 heapq.heapify(list(outside_tree)) initial = outside_tree.pop() while outside_tree: inside_tree.update(initial) shortest = min([item for item in heap if initial in item[1]]) answer += float(shortest[0]) x, y = shortest[1] inside_tree.update(x, y) heap.remove(shortest) next_point = outside_tree.pop() initial = next_point return answer
def _search(self, pt, within, node, accum, depth=0, max_results=None, distance=euclidean_distance): if not node: return d = distance(pt, node.pt) * self.magnitude if d <= within: heapq.heappush(accum, (d, node.key,)) if max_results: # import pdb; pdb.set_trace() accum = heapq.nsmallest(max_results, accum, key=lambda x: x[0]) axis = depth % len(pt) for child in ('left', 'right'): child = getattr(node, child) if child: # are we within range at the axis level? there = [0] * len(pt) there[axis] = child.pt[axis] here = [0] * len(pt) here[axis] = pt[axis] if distance(there, here) <= within: accum = self._search(pt, within, child, accum, depth+1, max_results=max_results, distance=distance) return accum
def BFS(start): fringe = [] # Fringe of exploration closedList = [] # Already expanded nodes goal = "*12345678" # goal state # Push the start state on the heap. heapq.heappush(fringe, [manhattanDistance(start), [start]]) # Run BFS while len(fringe) != 0: # pop first node currNode = heapq.heappop(fringe) currBoard = currNode[1][-1] parentList = currNode[1] # Check goal state if currBoard == goal: return currNode closedList.append(currBoard) L = moves(currBoard) for x in L: if x == goal: return [0, parentList + [x]] if not x in closedList: heapq.heappush(fringe, [manhattanDistance(x), parentList + [x]]) return False # No solution
def skeleton(Gin): ew = Gin.edge_weights nw = Gin.node_weights Gout = WGraph() neighbors = Gin.neighbors_dict() added = set() edges = sorted([(abs(ew[e]), e) for e in ew]) while edges: (weight, next_edge) = edges.pop() (a, b) = next_edge if a not in added or b not in added: H = [(-weight, weight, e)] while H: #print H[0] (abs_weight, next_weight, next_e) = heapq.heappop(H) (a, b) = next_e if a not in added or b not in added: for c in next_e: for cn in neighbors[c]: (cw, ce) = Gin.unordered_weight(c, cn) heapq.heappush(H, (-abs(cw), cw, ce)) Gout.add_edge(a, b, ew[next_e]) added.add(a) added.add(b) #Gout.node_weights = nw.copy() return Gout
def shortest_path(G, start, end, depth): #http://code.activestate.com/recipes/119466-dijkstras-algorithm-for-shortest-paths/ import heapq def flatten(L): # Flatten linked list of form [0,[1,[2,[]]]] while len(L) > 0: yield L[0] L = L[1] q = [(0, start, ())] # Heap of (cost, path_head, path_rest). visited = set() # Visited vertices. while True: (cost, v1, path) = heapq.heappop(q) if v1 not in visited and v1 in G: visited.add(v1) if v1 == end: final_path = list(flatten(path))[::-1] + [v1] if len(final_path)<depth: return final_path else: return None path = (v1, path) for (v2, cost2) in G[v1].iteritems(): if v2 not in visited: heapq.heappush(q, (cost + cost2, v2, path))
def prune(vertex, s, threshold, delta): ''' Check if deltaPFS computation for the vertex can be skipped. s is the sum of distances to all reachable nodes. threshold is the k'th largest value in the closeness centrality priority queue. delta is the distance upper bound. Adds vertices that can be pruned to the global 'pruned' dict ''' Q = [(0, vertex)] popped = {} while len(Q) > 0: l, f = heapq.heappop(Q) if f in popped: continue else: popped[f] = True # simplifications for undirected graphs if not DIRECTED: upper_bound = V lower_bound = V else: upper_bound = upper_bounds.get(f, V) lower_bound = lower_bounds.get(f, 0) s_prime = s - (V - lower_bound)*delta - l * upper_bound c_prime = float(upper_bound - 1)**2 / (V-1)*(s_prime) if f not in phi: phi[f] = c_prime - threshold elif c_prime - threshold < phi[f]: phi[f] = c_prime - threshold for f_prime in G[f]: heapq.heappush(Q, (l + G[f][f_prime]['weight'], f_prime)) if len(schedule[f]) == 0 and c_prime < threshold: pruned[f] = True
def _run_until_current(self): self._insert_new_calledlaters() now = time.time() while self._pending_timed_calls and (self._pending_timed_calls[0].time <= now): call = heapq.heappop(self._pending_timed_calls) if call.cancelled: self._cancellations -= 1 continue if call.delayed_time > 0: call.activate_delay() heapq.heappush(self._pending_timed_calls, call) continue try: call.called = 1 call.func(*call.args, **call.kw) except _reraised_exceptions: raise except: getlog().exception("CallLater failed") if self._cancellations > 50 and self._cancellations > len(self._pending_timed_calls) >> 1: self._cancellations = 0 self._pending_timed_calls = [x for x in self._pending_timed_calls if not x.cancelled] heapq.heapify(self._pending_timed_calls)
def fill(gw): if gw < maxSingleW: return nonlocal maxRate, bestSize, bestPos rowRests = [] heapq.heappush(rowRests, (-gw, 0)) rowYs = [0] rowHeight = sizes[0][0][1] for (w, h), id in sizes: maxW = -rowRests[0][0] if maxW >= w: # Add to this row i = rowRests[0][1] heapq.heapreplace(rowRests, (- (maxW - w), i)) pos[id] = (gw - maxW, rowYs[i]) else: # Create new row heapq.heappush(rowRests, (-(gw - w), len(rowRests))) rowYs.append(rowYs[-1] + rowHeight) rowHeight = h pos[id] = (0, rowYs[-1]) size = (gw, rowYs[-1] + rowHeight) rate = area / (size[0] * size[1]) if rate > maxRate: maxRate = rate bestSize = size bestPos = pos[:]
def PFS(v): ''' Perform Priority First Search from vertex v. PFS is a modified version of Dijkstra's algorithm that calculates additional values required for centrality computation and pruning. Returns s: sum of distances to all reachable vertices delta: distance upper bound ''' L[v] = 0 s = 0 delta = 0 Q = [(0, v)] popped = {} while len(Q) > 0: l, n = heapq.heappop(Q) # skip if node already encountered (invalid due to a previous decrement key) if n in popped: continue else: popped[n] = True s += l delta = max(delta, l) for v_prime in G[n]: l_prime = l + G[n][v_prime]['weight'] if L.get(v_prime) is None: L[v_prime] = l_prime heapq.heappush(Q, (l_prime, v_prime)) elif l_prime < L[v_prime]: L[v_prime] = l_prime # no decrement key operation in heapq, push the new value heapq.heappush(Q, (l_prime, v_prime)) return s, delta
def get_path(self, start, end, pred): if not pred(end): return open_lst = [(0 , 0, start)] visited = {start: None} while open_lst: cost, length, sq = heappop(open_lst) if sq == end: break for n, info in self.neighbors(sq, True): if n in visited or not pred(n): continue is_corner = self.is_corner(sq, n) if is_corner and not self.is_free_corner(sq, n, pred): continue step_length = sqrt(2) if is_corner else 1 nlength = length + step_length cost = length + hypot(end[0] - n[0], end[1] - n[1]) heappush(open_lst, (cost, length, n)) visited[n] = sq if end not in visited: return parent = visited[end] path = [end] while parent is not None: path.append(parent) parent = visited[parent] path.reverse() return tuple(path[1:])
def k_smallest_matrix(matrix, k): """ Given a n x n matrix where each of the rows and columns are sorted in ascending order, find the kth smallest element in the matrix. :param matrix: A square matrix of size n x n in sorted order for its rows and cols :param k: The kth-smallest element to look for :return: An k-th smallest element in the matrix. """ # Take the entire first row and heapify it open_list = [(matrix[y][x], y, x) for x in xrange(len(matrix)) for y in xrange(len(matrix))] heapq.heapify(open_list) current_min = None # Take the min element and enqueue the element below it if possible; when k is 0 we have our k'th min element while k != 0: current_min, current_y, current_x = heapq.heappop(open_list) if y < len(matrix): heapq.heappush(open_list, (matrix[y][x], current_y, current_x)) k -= 1 return current_min
def create_binary_tree(self): """ Create a binary Huffman tree using stored vocabulary word counts. Frequent words will have shorter binary codes. Called internally from `build_vocab()`. """ logger.info("constructing a huffman tree from %i words" % len(self.vocab)) # build the huffman tree heap = self.vocab.values() heapq.heapify(heap) for i in xrange(len(self.vocab) - 1): min1, min2 = heapq.heappop(heap), heapq.heappop(heap) heapq.heappush(heap, Vocab(count=min1.count + min2.count, index=i + len(self.vocab), left=min1, right=min2)) # recurse over the tree, assigning a binary code to each vocabulary word if heap: max_depth, stack = 0, [(heap[0], [], [])] while stack: node, codes, points = stack.pop() if node.index < len(self.vocab): # leaf node => store its path from the root node.code, node.point = codes, points max_depth = max(len(codes), max_depth) else: # inner node => continue recursion points = array(list(points) + [node.index - len(self.vocab)], dtype=uint32) stack.append((node.left, array(list(codes) + [0], dtype=uint8), points)) stack.append((node.right, array(list(codes) + [1], dtype=uint8), points)) logger.info("built huffman tree with maximum node depth %i" % max_depth)
def directed_havel_hakimi_graph(in_deg_sequence, out_deg_sequence, create_using=None): """Return a directed graph with the given degree sequences. Parameters ---------- in_deg_sequence : list of integers Each list entry corresponds to the in-degree of a node. out_deg_sequence : list of integers Each list entry corresponds to the out-degree of a node. create_using : graph, optional (default DiGraph) Return graph of this type. The instance will be cleared. Returns ------- G : DiGraph A graph with the specified degree sequences. Nodes are labeled starting at 0 with an index corresponding to the position in deg_sequence Raises ------ NetworkXError If the degree sequences are not digraphical. See Also -------- configuration_model Notes ----- Algorithm as described by Kleitman and Wang [1]_. References ---------- .. [1] D.J. Kleitman and D.L. Wang Algorithms for Constructing Graphs and Digraphs with Given Valences and Factors Discrete Mathematics, 6(1), pp. 79-88 (1973) """ assert(nx.utils.is_list_of_ints(in_deg_sequence)) assert(nx.utils.is_list_of_ints(out_deg_sequence)) if create_using is None: create_using = nx.DiGraph() # Process the sequences and form two heaps to store degree pairs with # either zero or nonzero out degrees sumin, sumout = 0, 0 nin, nout = len(in_deg_sequence), len(out_deg_sequence) maxn = max(nin, nout) G = nx.empty_graph(maxn, create_using) if maxn == 0: return G maxin = 0 stubheap, zeroheap = [], [] for n in range(maxn): in_deg, out_deg = 0, 0 if n < nout: out_deg = out_deg_sequence[n] if n < nin: in_deg = in_deg_sequence[n] if in_deg < 0 or out_deg < 0: raise nx.NetworkXError( 'Invalid degree sequences. Sequence values must be positive.') sumin, sumout, maxin = sumin + in_deg, sumout + out_deg, max(maxin, in_deg) if in_deg > 0: stubheap.append((-1 * out_deg, -1 * in_deg, n)) elif out_deg > 0: zeroheap.append((-1 * out_deg, n)) if sumin != sumout: raise nx.NetworkXError( 'Invalid degree sequences. Sequences must have equal sums.') heapq.heapify(stubheap) heapq.heapify(zeroheap) modstubs = [(0, 0, 0)] * (maxin + 1) # Successively reduce degree sequence by removing the maximum while stubheap: # Remove first value in the sequence with a non-zero in degree (freeout, freein, target) = heapq.heappop(stubheap) freein *= -1 if freein > len(stubheap) + len(zeroheap): raise nx.NetworkXError('Non-digraphical integer sequence') # Attach arcs from the nodes with the most stubs mslen = 0 for i in range(freein): if zeroheap and (not stubheap or stubheap[0][0] > zeroheap[0][0]): (stubout, stubsource) = heapq.heappop(zeroheap) stubin = 0 else: (stubout, stubin, stubsource) = heapq.heappop(stubheap) if stubout == 0: raise nx.NetworkXError('Non-digraphical integer sequence') G.add_edge(stubsource, target) # Check if source is now totally connected if stubout + 1 < 0 or stubin < 0: modstubs[mslen] = (stubout + 1, stubin, stubsource) mslen += 1 # Add the nodes back to the heaps that still have available stubs for i in range(mslen): stub = modstubs[i] if stub[1] < 0: heapq.heappush(stubheap, stub) else: heapq.heappush(zeroheap, (stub[0], stub[2])) if freeout < 0: heapq.heappush(zeroheap, (freeout, target)) return G
for x in range(25): #prints too much orginally maxRows for y in range(25): #prints too much holy god maxCols temp = ' ' if toody[x][y].blocked == 1: poo('B' + temp) elif toody[x][y].blocked == 0: if toody[x][y] == goal: # prints too many Gs? poo('G' + temp) #otherwise prints too many Gs elif toody[x][y] in closedList: poo('P' + temp) else: poo('O' + temp) poo("\n") openList = [] heapq.heappush(openList, (start.f, start)) closedList = [] #backtrack from goal to start def reconstructPath(curCell): poo(str(curCell.x) + ', ' + str(curCell.y) + '\n') while curCell and curCell.prevCell is not None: curCell = curCell.prevCell poo(str(curCell.x) + ', ' + str(curCell.y) + '\n') def findpath(curCell): poo(str(curCell.x) + ', ' + str(curCell.y) + '\n') while curCell and curCell.prevCell is not None: curCell = curCell.prevCell poo(str(curCell.x) + ', ' + str(curCell.y) + '\n')
def put(self, item, priority): heapq.heappush(self.elements, (priority, item))
def put(self, item, total_cost): heapq.heappush(self.elements, (total_cost, item))
def procChunk((threadId, begin, end)): logFileName = "proc_%d_log.txt" % threadId f = open(logFileName, 'w') f.write("started worker %d\n" % threadId) print "\nstarted worker %d" % threadId # print "Len HogVectors: ", len(hogVectors) chunkSize = end - begin nns = [[] for _ in range(chunkSize)] distances = [[] for _ in range(chunkSize)] isFlipped = [[] for _ in range(chunkSize)] NUMBER_OF_NNS = 1000 # print "Range: ", begin, " : ", end for searchedIndex in range(begin, end): currentCategory = Shared.categoryLookuptable[searchedIndex] for i in range(0, len(Shared.hogVectors)): if i == searchedIndex or Shared.categoryLookuptable[ i] == currentCategory: continue isImageFlipped = 0 dist = getDistanceSquared(Shared.hogVectors[searchedIndex], Shared.hogVectors[i]) distWithFlipped = getDistanceSquared( Shared.hogVectors[searchedIndex], Shared.hogVectorsFlipped[i]) if distWithFlipped < dist: isImageFlipped = 1 dist = distWithFlipped heapq.heappush(nns[searchedIndex - begin], (-dist, i + 1, isImageFlipped)) # i + 1 to make indices 1-based # (for matlab) if len(nns[searchedIndex - begin]) > NUMBER_OF_NNS: heapq.heappop(nns[searchedIndex - begin]) tmp = [ heapq.heappop(nns[searchedIndex - begin]) for _ in range(len(nns[searchedIndex - begin])) ] tmp.reverse() # reverse because we have max heap nns[searchedIndex - begin] = [tmp[k][1] for k in range(len(tmp)) ] # neighbour indices distances[searchedIndex - begin] = [ -tmp[k][0] for k in range(len(tmp)) ] # distances to the neighbours isFlipped[searchedIndex - begin] = [ tmp[k][2] for k in range(len(tmp)) ] # was neighbour image flipped? # print "Size of nns: ", len(nns[searchedIndex]) if (searchedIndex - begin + 1) % 30 == 0: timestamp = datetime.now().strftime('%d.%m %H:%M:%S') f.write( "%s. %d: Frames processed: %d / %d\n" % (timestamp, threadId, searchedIndex - begin + 1, end - begin)) if (searchedIndex - begin + 1) % 900 == 0: saveSnapshot( threadId, "snapshot_%d_nns_all_%05d_%05d.mat" % (searchedIndex - begin + 1, begin, end), nns, distances, isFlipped) f.write("%d: saved snapshot" % threadId) f.close() saveSnapshot(threadId, "nns_all_%05d_%05d.mat" % (begin, end), nns, distances, isFlipped)
def construct_exemplar_set(self, dataset, n): '''Construct set of [n] exemplars from [dataset] using 'herding'. Note that [dataset] should be from specific class; selected sets are added to [self.exemplar_sets] in order.''' # set model to eval()-mode logging.info( "entered ExemplarHandler.construct_exemplar_set(self, dataset, n)" ) mode = self.training self.eval() n_max = len(dataset) exemplar_set = [] if self.herding: logging.info("herding enabled") # compute features for each example in [dataset] first_entry = True dataloader = utils.get_data_loader(dataset, 128, cuda=self._is_on_cuda()) for (image_batch, _) in dataloader: image_batch = image_batch.to(self._device()) with torch.no_grad(): feature_batch = self.feature_extractor(image_batch).cpu() if first_entry: features = feature_batch first_entry = False else: features = torch.cat([features, feature_batch], dim=0) if self.norm_exemplars: features = F.normalize(features, p=2, dim=1) # one by one, select exemplar that makes mean of all exemplars as close to [class_mean] as possible exemplar_features = torch.zeros_like(features[:min(n, n_max)]) # initialize a min pq for getting rid of most familiar items num_exemplars = min(n, n_max) start_set = features[0:num_exemplars] heap = [] for feature_idx, feature in enumerate(start_set): mod_set = self.exclude_idx(start_set, feature_idx) startle = self.startle(mod_set, feature) heap.append((startle, feature_idx)) heapq.heapify(heap) min_startle, cur_min_idx = heapq.heappop(heap) # logging.info("heap: "+str(heap)) # iterate through remaining features, greedily maximizing startle idxs = [v for k, v in heap] cur_set = features[idxs] for idx in range(num_exemplars, len(features)): feature = features[idx] mod_set = self.exclude_idx(start_set, feature_idx) startle = self.startle(mod_set, feature) if startle > min_startle: min_startle = startle heapq.heappush(heap, (startle, idx)) min_startle, cur_min_idx = heapq.heappop(heap) idxs = [v for k, v in heap] cur_set = features[idxs] all_idxs = idxs + [cur_min_idx] for k, idx in enumerate(all_idxs): exemplar_set.append(dataset[idx][0].numpy()) exemplar_features[k] = copy.deepcopy(features[idx]) else: logging.info("herding not enabled") indeces_selected = np.random.choice(n_max, size=min(n, n_max), replace=False) for k in indeces_selected: exemplar_set.append(dataset[k][0].numpy()) # add this [exemplar_set] as a [n]x[ich]x[isz]x[isz] to the list of [exemplar_sets] self.exemplar_sets.append(np.array(exemplar_set)) # set mode of model back self.train(mode=mode)
#최소 힙 #https://www.acmicpc.net/problem/1927 import sys import heapq as hq sys.stdin = open("input.txt", "r") a = [] n = int(input()) for _ in range(n): x = int(input()) if x == -1: #종료 break elif x == 0: #출력 if len(a) == 0: print(0) else: print(hq.heappop(a)) else: #넣기 hq.heappush(a, x) #a리스트에 x넣음
def parse(self, ip_body): src_port, dst_port, seq, ack, offset, flag, window = struct.unpack('>HHIIBBH', ip_body[:16]) tcp_body = ip_body[offset>>2:] self.rwnd = window self.update = time.perf_counter() # print('RECV', self.dst_name, self.dst_port, self.state, Control(flag), seq, ack, len(tcp_body)) if self.state == State.CLOSED: if flag & Control.RST: pass elif flag & Control.ACK: self.send(seq=ack, flag=Control.RST) else: self.send(seq=0, ack=seq+len(tcp_body), flag=Control.RST|Control.ACK) elif self.state == State.INITIAL: if flag & Control.RST: pass elif flag & Control.ACK: self.send(seq=ack, flag=Control.RST) elif flag & Control.SYN: self.state = State.SYN_RECEIVED self.src_seq = seq+1 self.dst_seq = self.dst_ack = random.randrange(0x100000000) asyncio.ensure_future(self.connect()) asyncio.ensure_future(self.retransmit()) elif flag & Control.RST: self.close() self.wait_ack.set() self.wait_send.set() try: self.writer.close() except Exception: pass elif flag & Control.SYN: pass elif flag & Control.ACK == 0: pass else: if self.state == State.SYN_RECEIVED: self.state = State.ESTABLISHED while self.dst_ack-ack > 0x20000000: ack += 0x100000000 if self.dst_ack < self.dst_seq: diff_ack = ack-self.dst_ack if diff_ack == 0 and not tcp_body: self.fast_resend += 1 if self.fast_resend < 3: self.wait_send.set() elif self.fast_resend == 3: self.wait_fast.set() else: self.cwnd += SMSS if diff_ack > 0: self.fast_resend = 0 counter = 0 while self.dst_win and self.dst_win[0][0]+self.dst_win[0][1] <= ack: seq, length, tp, counter = self.dst_win.popleft() del self.dst_win_buf[:diff_ack] self.dst_ack = ack self.wait_fast.set() if self.dst_seq-self.dst_ack <= min(self.cwnd, self.rwnd): self.wait_send.set() if self.cwnd < self.ssthresh: self.cwnd += min(diff_ack, SMSS) else: self.cwnd += SMSS*SMSS//self.cwnd if counter == 0: self.cwnd = self.ssthresh if counter > 0: time_diff = time.perf_counter() - counter self.calc_rto(time_diff) if self.state == State.FIN_WAIT_1: if self.dst_ack >= self.dst_seq: self.state = State.FIN_WAIT_2 if tcp_body and self.state in (State.ESTABLISHED, State.FIN_WAIT_1, State.FIN_WAIT_2): while self.src_seq-seq > 0x20000000: seq += 0x100000000 if seq+len(tcp_body) <= self.src_seq: pass elif seq <= self.src_seq: self.logwrite(tcp_body[self.src_seq-seq:]) self.writer.write(tcp_body[self.src_seq-seq:]) self.src_seq = seq+len(tcp_body) while self.src_win and self.src_win[0][0] <= self.src_seq: seq, tcp_body = heapq.heappop(self.src_win) if seq+len(tcp_body) > self.src_seq: self.logwrite(tcp_body[self.src_seq-seq:]) self.writer.write(tcp_body[self.src_seq-seq:]) self.src_seq = seq+len(tcp_body) elif seq-self.src_seq < 0x20000000: heapq.heappush(self.src_win, (seq, tcp_body)) self.send() if flag & Control.FIN: while self.src_seq-seq > 0x20000000: seq += 0x100000000 if seq+1 <= self.src_seq: pass elif seq <= self.src_seq: self.src_seq = seq+1 self.send() if self.state in (State.SYN_RECEIVED, State.ESTABLISHED): self.state = State.CLOSE_WAIT try: self.writer.close() except Exception: pass elif self.state == State.FIN_WAIT_2: self.close() self.wait_ack.set()
def _get_tree(q, tree={}): while len(q) > 1: (ap, al), (bp, bl) = heappop(q), heappop(q) heappush(q, (ap + bp, al + bl)) tree[al + bl] = (al, bl) return tree, q[0][1]
split = line.rstrip().split() split = list(map(int, split)) row = [] for i in range(0, len(split), 3): row.append(split[i:i + 3]) image.append(row) print(image) len_row = len(image) len_col = len(image[0]) pixnum = len_col * len_row heap = [] for i in range(len_col * len_row): column = i % len_col row = i // len_col brightness = sum([image[row][column][i]**2 for i in range(3)]) heappush(heap, (brightness, i, image[row][column])) k = 4 idxs = [pixnum * i / k for i in range(k)] colors = [] assign = [0] * pixnum num = [0] * k # 代表画素の初期値 for time in range(pixnum): brightness, i, pix = heappop(heap) if time in idxs: colors.append(pix) # Cluster分類 for time in range(pixnum): column = time % len_col row = time // len_col r, g, b = image[row][column]
def push(self, item): heapq.heappush(self._queue, (item.priority, self._index, item)) self._index += 1
# Perform BFS from node N instead dist = [N + 1] * (N + 1) dist[N] = 0 queue = deque([N]) while queue: node = queue.popleft() for nxt in adj[node]: # Still unvisited? if dist[nxt] == N + 1: dist[nxt] = dist[node] + 1 queue.append(nxt) # Insert everything into the heap ans = [] for i in range(N): heapq.heappush(ans, (i + dist[subway[i]], i, subway[i])) output = [] for _ in range(D): x, y = list(map(int, input().split())) x -= 1 y -= 1 subway[x], subway[y] = subway[y], subway[x] # Insert the newly updated elements into the heapq heapq.heappush(ans, (x + dist[subway[x]], x, subway[x])) heapq.heappush(ans, (y + dist[subway[y]], y, subway[y])) # Find the lowest time that is still available while True: # ans[0] returns the smallest time (thanks heapq)
def adaptive_astar(self, steps, grid_o, tie_break, show): open_set = [] closed_set = [] path = {} cost = {} counter = 0 delta = [] delta.append(counter) reached = False final_path = {} start = (self.start.x, self.start.y) goal = (self.goal.x, self.goal.y) heapq.heappush(open_set, (0, start)) path[start] = None cost[start] = 0 snode = self.grid_info[start[1]][start[0]] gnode = self.grid_info[goal[1]][goal[0]] snode.g = 0 snode.h = manhattan_distance(snode, gnode) snode.f = snode.g + snode.h numexpanded = 0 current = heapq.heappop(open_set)[1] numexpanded = numexpanded + 1 start_time = time.time() goal_g = 0 while current != goal: path = {} counter += 1 cnode = self.grid_info[current[1]][current[0]] cnode.search = counter if goal_g > 0 and cnode.g: cnode.h = goal_g - cnode.g else: cnode.h = manhattan_distance(cnode, gnode) cnode.g = 0 cnode.f = cnode.h + cnode.g gnode.g = sys.maxsize gnode.search = counter open_set = [] closed_set = [final_path.keys()] heapq.heappush(open_set, (cnode.f, current)) while open_set and gnode.g > open_set[0][0]: numexpanded = numexpanded + 1 current = heapq.heappop(open_set)[1] cnode = self.grid_info[current[1]][current[0]] closed_set.append(current) for next in self.neighbors(current): if next not in closed_set: nextnode = self.grid_info[next[1]][next[0]] if nextnode.search < counter: if goal_g > 0 and nextnode.g and nextnode.search == counter - 1: nextnode.h = goal_g - nextnode.g nextnode.g = sys.maxsize nextnode.search = counter costCur = cnode.g + (sys.maxsize if nextnode.is_seen else 1) if nextnode.g is None or costCur < nextnode.g: if (nextnode.f, next) in open_set: open_set.remove((nextnode.f, next)) heapq.heapify(open_set) if nextnode.h == 0: nextnode.h = manhattan_distance(gnode, nextnode) nextnode.g = costCur nextnode.f = nextnode.g + nextnode.h nextnode.f = (tie_break * nextnode.f) - nextnode.g heapq.heappush(open_set, (nextnode.f, next)) path[next] = current if not open_set: print("blocked target") r = tk.Tk() grid_o.create_maze(r) reached = False break elif goal in path: reached = True goal_g = gnode.g else: goal_g = 0 cur_p = goal pathOrder = [] path = {**path, **final_path} while cur_p is not None and reached: x, y = cur_p if any(r.x == x and r.y == y for r in pathOrder): break pathOrder.insert(0, self.grid_info[y][x]) if (cur_p in path): cur_p = path[cur_p] else: cur_p = None # move located = pathOrder[0] invalid = False for i in range(1, len(pathOrder)): n = self.neighbors((located.x, located.y), True) potential_next = pathOrder[i] if potential_next.is_seen: if steps: r = tk.Tk() grid_o.display_path(r, pathOrder[:i][::-1]) r.title("intermediate representation") r.mainloop() break else: final_path[(potential_next.x, potential_next.y)] = (located.x, located.y) located = potential_next current = (located.x, located.y) if current == goal: cur_p = goal pathOrder = [] while cur_p is not None and reached: x, y = cur_p if any(r.x == x and r.y == y for r in pathOrder): break pathOrder.insert(0, self.grid_info[y][x]) if (cur_p in final_path): cur_p = final_path[cur_p] else: cur_p = None reached = True else: reached = False if reached: end_time = time.time() total_time = end_time - start_time if show: r = tk.Tk() grid_o.display_path(r, pathOrder[::-1]) r.title("final representation") r.mainloop() # return reached, pathOrder[::-1], self.grid_info[goal[1]][goal[0]].g, len(cost) return total_time, numexpanded return 0, 0
# 11279 최대 힙 # 우선순위 큐 import heapq, sys input = sys.stdin.readline n = int(input()) heap = [] for i in range(n): x = int(input()) if x == 0: if len(heap) == 0: print(0) continue print(heapq.heappop(heap)[1]) else: heapq.heappush(heap, (-x, x))
def push(self, data): heapq.heappush(self.heap, data)
import heapq from collections import defaultdict from sys import stdin input = stdin.readline N = int(input()) M = int(input()) dic = defaultdict(list) for _ in range(M): a, b, c =map(int, input().split()) dic[a].append((c,b)) s, e = map(int, input().split()) Q = [(0,s)] dp = [float('inf')] * (N+1) while Q: tmp = heapq.heappop(Q) if dp[tmp[1]] == float('inf'): dp[tmp[1]] = tmp[0] else: continue for i in dic[tmp[1]]: heapq.heappush(Q, (tmp[0] + i[0], i[1])) print(dp[e])
input = stdin.readline INF = int(1e9) result = 0 n = int(input()) coo = [] parents = [i for i in range(n)] for i in range(n): coo.append(tuple(map(float, input().split()))) indegree = [] # 진입분지수 results = [] for i in range(n): for j in range(i + 1, n): dist = round( math.sqrt((coo[i][0] - coo[j][0])**2 + (coo[i][1] - coo[j][1])**2), 2) heappush(indegree, (dist, i, j)) # (거리, 온 노드의 번호) def find(a, parents): if parents[a] != a: parents[a] = find(parents[a], parents) return parents[a] def union(a, b, parents): a = find(a, parents) b = find(b, parents) if a < b: parents[b] = a else: parents[a] = b
def insert(self, item, priority): """ Insert item into the queue, with the given priority. """ heappush(self.heap, HeapItem(item, priority))
import heapq T=int(input()) for tt in range(T): n=int(input()) a=list(map(int,input().split())) a.sort() ans=0 while(len(a)>1): x,y=map(lambda x:x,heapq.nsmallest(2,a)) heapq.heappop(a) heapq.heappop(a) ans+=x+y heapq.heappush(a,x+y) print(ans)
def addNum(self, num: int) -> None: self.leng+=1 if self.ln==self.up: if num<=self.median: heappush(self.lowers,-num) self.ln+=1 else: heappush(self.uppers,num) self.up+=1 elif self.ln<self.up: if num<=self.median: heappush(self.lowers,-num) self.ln+=1 else: heappush(self.uppers,num) ele=heappop(self.uppers) heappush(self.lowers,-ele) self.ln+=1 else: if num>self.median: heappush(self.uppers,num) self.up+=1 else: heappush(self.lowers,-num) ele=heappop(self.lowers) heappush(self.uppers,-ele) self.up+=1 if self.leng%2: if self.ln>self.up: self.median=-self.lowers[0] else: self.median=self.uppers[0] else: self.median=(self.uppers[0]-self.lowers[0])/2
from heapq import heappop, heappush
def process_summary(instance_name, metric_name, prefixes, trajectories, plot_type, agglomeration, scale_uncertainty, value_multiplier, cmap): assert instance_name in get_plot_values_funcs.keys() trajectory_names_to_prefix = {(("%s_%s" % (prefix, metric_name)) if prefix else metric_name): prefix for prefix in prefixes} trajectory_names = [t for t in trajectory_names_to_prefix.keys() if t in trajectories] # save pointers for each trajectory to iterate over them simultaneously trajectory_pointers = {(config, name): {instance: ([0] * len(run_trajectories)) # name is trajectory name, which consists of prefix and metric for instance, run_trajectories in instance_trajectories.items()} for name in trajectory_names for config, instance_trajectories in trajectories[name].items()} trajectory_values = {(config, name): {instance: ([None] * len(run_trajectories)) for instance, run_trajectories in instance_trajectories.items()} for name in trajectory_names for config, instance_trajectories in trajectories[name].items()} heap = [(run_trajectories[j]["times_finished"][0], config, name, instance, j) for name in trajectory_names for config, instance_trajectories in trajectories[name].items() for instance, run_trajectories in instance_trajectories.items() for j in range(len(run_trajectories))] heapq.heapify(heap) # data to plot center = {(config, name): [] for name in trajectory_names for config in trajectories[name].keys()} upper = {(config, name): [] for name in trajectory_names for config in trajectories[name].keys()} lower = {(config, name): [] for name in trajectory_names for config in trajectories[name].keys()} finishing_times = [] plot_empty = True # iterate simultaneously over all trajectories with increasing finishing times while heap: # get trajectory with lowest finishing time times_finished, current_config, current_name, current_instance, trajectory_id = heapq.heappop(heap) # update trajectory values and pointers current_trajectory = trajectories[current_name][current_config][current_instance][trajectory_id] current_pointer = trajectory_pointers[(current_config, current_name)][current_instance][trajectory_id] current_value = current_trajectory[plot_type][current_pointer] trajectory_values[(current_config, current_name)][current_instance][trajectory_id] = current_value trajectory_pointers[(current_config, current_name)][current_instance][trajectory_id] += 1 if trajectory_pointers[(current_config, current_name)][current_instance][trajectory_id] < len(current_trajectory[plot_type]): heapq.heappush(heap, (current_trajectory["times_finished"][trajectory_pointers[(current_config, current_name)][current_instance][trajectory_id]], current_config, current_name, current_instance, trajectory_id)) if any(value is None for _, instance_values in trajectory_values.items() for _, values in instance_values.items() for value in values): continue if finishing_times and np.isclose(times_finished, finishing_times[-1]): finishing_times.pop() [x[k].pop() for x in [center, upper, lower] for k in x.keys()] # calculate ranks values = to_dict([(instance, (config, name), value * value_multiplier) for (config, name), instance_values in trajectory_values.items() for instance, values in instance_values.items() for value in values if value is not None]) plot_values = get_plot_values_funcs[instance_name](values, center.keys(), np.median if agglomeration == "median" else np.mean) # populate plotting data for key in center.keys(): if not plot_values[key]: center[key].append(float("nan")) lower[key].append(float("nan")) upper[key].append(float("nan")) center[key].append(np.mean(plot_values[key])) lower[key].append(-1 * scale_uncertainty * np.std(plot_values[key]) + center[key][-1]) upper[key].append(scale_uncertainty * np.std(plot_values[key]) + center[key][-1]) finishing_times.append(times_finished) plot_empty = False # do the plotting plot_data = dict() for i, (config, name) in enumerate(center.keys()): prefix = trajectory_names_to_prefix[name] label = ("%s: %s" % (prefix, config)) if prefix else config color = cmap(i / len(center)) plot_data[label] = { "individual_trajectory": None, "individual_times_finished": None, "color": color, "linestyle": "-", "center": center[(config, name)], "lower": lower[(config, name)], "upper": upper[(config, name)], "finishing_times": finishing_times } return plot_empty, plot_data
def push(self, item): heappush(self.heap, item)
def push(self, value): heappush(self.heap, value)
def k_shortest_paths(G, source, target, k=1, weight='weight'): """Returns the k-shortest paths from source to target in a weighted graph G. Parameters ---------- G : NetworkX graph source : node Starting node target : node Ending node k : integer, optional (default=1) The number of shortest paths to find weight: string, optional (default='weight') Edge data key corresponding to the edge weight Returns ------- lengths, paths : lists Returns a tuple with two lists. The first list stores the length of each k-shortest path. The second list stores each k-shortest path. Raises ------ NetworkXNoPath If no path exists between source and target. Examples -------- >>> G=nx.complete_graph(5) >>> print(k_shortest_paths(G, 0, 4, 4)) ([1, 2, 2, 2], [[0, 4], [0, 1, 4], [0, 2, 4], [0, 3, 4]]) Notes ------ Edge weight attributes must be numerical and non-negative. Distances are calculated as sums of weighted edges traversed. """ if source == target: return ([0], [[source]]) length, path = nx.single_source_dijkstra(G, source, target, weight=weight) if target not in length: raise nx.NetworkXNoPath("node %s not reachable from %s" % (source, target)) lengths = [length[target]] paths = [path[target]] c = count() B = [] G_original = G.copy() for i in range(1, k): for j in range(len(paths[-1]) - 1): spur_node = paths[-1][j] root_path = paths[-1][:j + 1] edges_removed = [] for c_path in paths: if len(c_path) > j and root_path == c_path[:j + 1]: u = c_path[j] v = c_path[j + 1] if G.has_edge(u, v): edge_attr = G.edge[u][v] G.remove_edge(u, v) edges_removed.append((u, v, edge_attr)) for n in range(len(root_path) - 1): node = root_path[n] # out-edges for u, v, edge_attr in G.edges_iter(node, data=True): G.remove_edge(u, v) edges_removed.append((u, v, edge_attr)) if G.is_directed(): # in-edges for u, v, edge_attr in G.in_edges_iter(node, data=True): G.remove_edge(u, v) edges_removed.append((u, v, edge_attr)) spur_path_length, spur_path = nx.single_source_dijkstra(G, spur_node, target, weight=weight) if target in spur_path and spur_path[target]: total_path = root_path[:-1] + spur_path[target] total_path_length = get_path_length(G_original, root_path, weight) + spur_path_length[target] heappush(B, (total_path_length, next(c), total_path)) for e in edges_removed: u, v, edge_attr = e G.add_edge(u, v, edge_attr) if B: (l, _, p) = heappop(B) lengths.append(l) paths.append(p) else: break return (lengths, paths)
def insert(self, value): heapq.heappush(self.value)
def trypush(i, p, q): # push pair (A_i,p, B_i,q) if possible A, B = ABs[i] # A_i, B_i if p < n and q < n and (i,p,q) not in used: heappush(h, (A[p] + b[q], i, p, q, (A[p],B[q]))) used.add((i, p, q))
def append(self, path): if path.using_f: heapq.heappush(self.heap, (path.f, path)) else: heapq.heappush(self.heap, (path.cost, path))
def ward_tree(X, connectivity=None, n_components=None, copy=None, n_clusters=None): """Ward clustering based on a Feature matrix. Recursively merges the pair of clusters that minimally increases within-cluster variance. The inertia matrix uses a Heapq-based representation. This is the structured version, that takes into account some topological structure between samples. Parameters ---------- X : array, shape (n_samples, n_features) feature matrix representing n_samples samples to be clustered connectivity : sparse matrix (optional). connectivity matrix. Defines for each sample the neighboring samples following a given structure of the data. The matrix is assumed to be symmetric and only the upper triangular half is used. Default is None, i.e, the Ward algorithm is unstructured. n_components : int (optional) Number of connected components. If None the number of connected components is estimated from the connectivity matrix. n_clusters : int (optional) Stop early the construction of the tree at n_clusters. This is useful to decrease computation time if the number of clusters is not small compared to the number of samples. In this case, the complete tree is not computed, thus the 'children' output is of limited use, and the 'parents' output should rather be used. This option is valid only when specifying a connectivity matrix. Returns ------- children : 2D array, shape (n_nodes, 2) The children of each non-leaf node. Values less than `n_samples` refer to leaves of the tree. A greater value `i` indicates a node with children `children[i - n_samples]`. n_components : int The number of connected components in the graph. n_leaves : int The number of leaves in the tree parents : 1D array, shape (n_nodes, ) or None The parent of each node. Only returned when a connectivity matrix is specified, elsewhere 'None' is returned. """ if copy is not None: warnings.warn("The copy argument is deprecated and will be removed " "in 0.16. The connectivity is now always copied.", DeprecationWarning) X = np.asarray(X) if X.ndim == 1: X = np.reshape(X, (-1, 1)) n_samples, n_features = X.shape if connectivity is None: from scipy.cluster import hierarchy # imports PIL if n_clusters is not None: warnings.warn('Partial build of the tree is implemented ' 'only for structured clustering (i.e. with ' 'explicit connectivity). The algorithm ' 'will build the full tree and only ' 'retain the lower branches required ' 'for the specified number of clusters', stacklevel=2) out = hierarchy.ward(X) children_ = out[:, :2].astype(np.intp) return children_, 1, n_samples, None connectivity = _fix_connectivity(X, connectivity, n_components=n_components) if n_clusters is None: n_nodes = 2 * n_samples - 1 else: if n_clusters > n_samples: raise ValueError('Cannot provide more clusters than samples. ' '%i n_clusters was asked, and there are %i samples.' % (n_clusters, n_samples)) n_nodes = 2 * n_samples - n_clusters # create inertia matrix coord_row = [] coord_col = [] A = [] for ind, row in enumerate(connectivity.rows): A.append(row) # We keep only the upper triangular for the moments # Generator expressions are faster than arrays on the following row = [i for i in row if i < ind] coord_row.extend(len(row) * [ind, ]) coord_col.extend(row) coord_row = np.array(coord_row, dtype=np.intp, order='C') coord_col = np.array(coord_col, dtype=np.intp, order='C') # build moments as a list moments_1 = np.zeros(n_nodes, order='C') moments_1[:n_samples] = 1 moments_2 = np.zeros((n_nodes, n_features), order='C') moments_2[:n_samples] = X inertia = np.empty(len(coord_row), dtype=np.float, order='C') _hierarchical.compute_ward_dist(moments_1, moments_2, coord_row, coord_col, inertia) inertia = list(six.moves.zip(inertia, coord_row, coord_col)) heapify(inertia) # prepare the main fields parent = np.arange(n_nodes, dtype=np.intp) used_node = np.ones(n_nodes, dtype=bool) children = [] not_visited = np.empty(n_nodes, dtype=np.int8, order='C') # recursive merge loop for k in range(n_samples, n_nodes): # identify the merge while True: inert, i, j = heappop(inertia) if used_node[i] and used_node[j]: break parent[i], parent[j] = k, k children.append((i, j)) used_node[i] = used_node[j] = False # update the moments moments_1[k] = moments_1[i] + moments_1[j] moments_2[k] = moments_2[i] + moments_2[j] # update the structure matrix A and the inertia matrix coord_col = [] not_visited.fill(1) not_visited[k] = 0 _hierarchical._get_parents(A[i], coord_col, parent, not_visited) _hierarchical._get_parents(A[j], coord_col, parent, not_visited) # List comprehension is faster than a for loop [A[l].append(k) for l in coord_col] A.append(coord_col) coord_col = np.array(coord_col, dtype=np.intp, order='C') coord_row = np.empty(coord_col.shape, dtype=np.intp, order='C') coord_row.fill(k) n_additions = len(coord_row) ini = np.empty(n_additions, dtype=np.float, order='C') _hierarchical.compute_ward_dist(moments_1, moments_2, coord_row, coord_col, ini) # List comprehension is faster than a for loop [heappush(inertia, (ini[idx], k, coord_col[idx])) for idx in range(n_additions)] # Separate leaves in children (empty lists up to now) n_leaves = n_samples children = np.array(children) # return numpy array for efficient caching return children, n_components, n_leaves, parent
def run_search(dict_file, postings_file, queries_file, results_file): """ using the given dictionary file and postings file, perform searching on the given queries file and output the results to a file """ print('running search on the queries...') dictionary = dict() lengths = dict() stemmer = stem.PorterStemmer() #Open dictionary in memory with open(dict_file, "rb") as dictionary_f: dictionary = pickle.load(dictionary_f) with open(os.path.join(os.getcwd(), "lengths.txt"), "rb") as lengths_f: lengths = pickle.load(lengths_f) #Open and read each line of the queries file try: fd = open(queries_file, 'r', encoding="utf8") line = fd.readline() except: error_opening_file(queries_file) sys.exit(2) #Erase the contents of the file output_file = open(results_file, "w") output_file.close() #Open file to append lines output_file = open(results_file, "a") #Evaluate each line or query while line: #If its blank just write nothing if (line == " " or line == "\n" or line == "\t"): output_file.write('\n') line = fd.readline() continue #Tokenize the query tokens = nltk.word_tokenize(line) scores = defaultdict(lambda: 0) # Creating empty heap heap = [] heapify(heap) #COSINE SCORE #For each query term t stemmed_tokens = list() unique_tokens = set() for token in tokens: stemmed_tokens.append(stemmer.stem(token.lower())) unique_tokens.add(stemmer.stem(token.lower())) for token in unique_tokens: docFreq_pointer = dictionary.get(token, -1) if (docFreq_pointer == -1): continue # get the document_frequency for the token document_frequency = docFreq_pointer[0] #read the posting lists, only open the file in this line postings_f = open(postings_file, "rb") #Move to the position in the file where docFreq_pointer[1] = pointer postings_f.seek(docFreq_pointer[1]) #Only read the object at that position token_postings_list = pickle.load(postings_f) #Close file postings_f.close() #print("token postings list:") #print(token_postings_list) for docID_termF in token_postings_list: doc_vector = lengths[docID_termF[0]] query_idf = (len(lengths) + 1) / (document_frequency + 1) """ print("current token of query:") print(token) print("term frequency in query:") print(stemmed_tokens.count(token)) print("weight of term in doc vector:") print(doc_vector[token]) print("tf of term in query:") print(1 + math.log(stemmed_tokens.count(token), 10)) print("idf division:") print(query_idf) print("idf of term in query:") print(math.log((query_idf),10)) print("weight of the term in query:") print(((1 + math.log(stemmed_tokens.count(token), 10)) * math.log((query_idf),10))) """ scores[docID_termF[0]] += (doc_vector[token]) * ( (1 + math.log(stemmed_tokens.count(token), 10)) * math.log( (query_idf), 10)) heappush(heap, (-1 * scores[docID_termF[0]], docID_termF[0])) maxTen = heap[:10] result = [] for cosineSim_docId in maxTen: result.append(cosineSim_docId[1]) #Write the result with the specified format output_file.write(' '.join(map(str, result))) #Prepare new line output_file.write("\n") line = fd.readline() output_file.close fd.close
def Push(self, elem): if len(self.data) < self.k: heapq.heappush(self.data, elem) else: topk_small = self.data[0][0] if elem[0] > topk_small: heapq.heapreplace(self.data, elem)