def proc_unigram_feats(): mat,key,regy,_ = rs.extract_feats([rs.unigram_feats]) inv_key = {v:k for k,v in key.items()} num_movies,num_words = mat.get_shape() movies = [(regy[i],i) for i in range(num_movies)] min_movies = heap.nsmallest(MOVIE_TARGET,movies) max_movies = heap.nlargest(MOVIE_TARGET,movies) tot_min = 0. tot_max = 0. for mv in min_movies: tot_min += mat[mv[1]].sum() for mv in max_movies: tot_max += mat[mv[1]].sum() fix = tot_max/tot_min diffs = np.zeros((num_words)) for mv in min_movies: diffs += -1.*fix*mat[mv[1]] for mv in max_movies: diffs += mat[mv[1]] with open("english.stop") as f: stop_words = set([line.strip() for line in f.readlines()]) words = [(diffs[0,i],inv_key[i]) for i in range(num_words) if inv_key[i] not in stop_words] worst_words = heap.nsmallest(WORD_TARGET, words) worst_words.sort() best_words = heap.nlargest(WORD_TARGET, words) best_words.sort() for wd in worst_words: print wd[1] + '\t' + str(wd[0]) print '---------------------------------' for wd in best_words: print wd[1] + '\t' + str(wd[0])
def constrained_decoder(voc, predictions, beam, constraints): heap = [State(score=0, label='O', prev=None, roles=set())] for i, prediction in enumerate(predictions): next_generation = list() for prev in heapq.nsmallest(beam, heap, key=_get_score): for j, prob in enumerate(prediction): label = voc[j] score = -math.log2(prob + sys.float_info.min) if score > _PRUNING_THRESHOLD and next_generation: continue next_state = State(score=score + prev.score, label=label, prev=prev, roles=prev.roles) constraints_violated = [not check(next_state) for check in constraints] if any(constraints_violated): continue next_generation.append( State(next_state.score, next_state.label, next_state.prev, next_state.roles | {next_state.label[2:]})) heap = next_generation head = heapq.nsmallest(1, heap, key=_get_score)[0] backtrack = list() while head: backtrack.append(head.label) head = head.prev return list(reversed(backtrack[:-1]))
def recommendTopFive(bskt, hshdct): # recommend top 5 items based on distance print "###### calculating distance ######" rec_list = [] # Recommendation list avgdct = {} # store the distance between all other items to each poi's item mindct = {} for item in bskt: # Consider only items in poi's basket others = [one for one in hshdct.keys() if one not in bskt] # all other items for com in others: dist = distanceSQ(hshdct[item], hshdct[com]) if com not in avgdct: avgdct[com] = dist mindct[com] = dist # store the distance else: avgdct[com] += dist mindct[com] = min(mindct[com], dist) # update the distance #if len(dist_dict[item]) < 10: print "sku %s has <10 similar items" % str(item) rec_list_avg = heapq.nsmallest(5, avgdct, key=avgdct.get) rec_list_min = heapq.nsmallest(5, mindct, key=mindct.get) print "Based on avg distance, recommend user %s: " % str(sys.argv[1]), rec_list_avg print "Based on min distance, recommend user %s: " % str(sys.argv[1]), rec_list_min avg_dist = [math.sqrt(avgdct[com])/(len(bskt)*len(hshdct[com])) for com in rec_list_avg] print "avg dist. = ", ["{0:0.4f}".format(i) for i in avg_dist] min_dist = [math.sqrt(mindct[com])/(len(bskt)*len(hshdct[com])) for com in rec_list_min] print "min dist. = ", ["{0:0.4f}".format(i) for i in min_dist] return rec_list_avg, rec_list_min
def Counting(self, LuckyCombo): ##this function is to count all the tickets by the combination of numbers global List_of_Numbers, List_of_MegaNumbers, list_of_combos lucky = LuckyCombo if lucky == "most": ##chooses the most occuring NUMBERS number_to_count = (x for x in List_of_Numbers) c = Counter(number_to_count) d = c.most_common(5) d.sort() mega_to_count = (x for x in List_of_MegaNumbers) CC = Counter(mega_to_count) DD = CC.most_common(1) return d, DD elif lucky == "combo": ##chooses the most and least occurring combinations combo_to_count = (tuple(x) for x in list_of_combos) combo_count = Counter(combo_to_count) print 'most', combo_count.most_common(1), 'least', list(combo_count.most_common())[-1] dcombo = list(combo_count)[-1:] ecombo = list(x for x in dcombo) return dcombo else: ##chooses the least occurring NUMBERS number_to_count = (x for x in List_of_Numbers) c = Counter(number_to_count) reverse = heapq.nsmallest(5, c.items(), key=itemgetter(1)) d = reverse d.sort() mega_to_count = (x for x in List_of_MegaNumbers) CC = Counter(mega_to_count) Reverse = heapq.nsmallest(1, CC.items(), key=itemgetter(1)) DD = Reverse return d, DD
def orderall(): """ Find intersection of bestvals and lastvals """ global bestvals, lastvals,bestvalsdict,lastvalsdict print '\nbest', k, 'vals overall' nbestvals = H.nsmallest(k,bestvals) nlastvals = H.nsmallest(k,lastvals) bestexps = [] exps = {} for i in xrange(len(nlastvals)): lastval, explast = nlastvals[i] exps[explast] = i for j in xrange(len(nbestvals)): bestval, info = nbestvals[j] expbest, epcnum = info if expbest == explast: exps[expbest] += j for pair in exps.items(): H.heappush(bestexps,(pair[1],pair[0])) nbestexps = H.nsmallest(k,bestexps) for pair in nbestexps: rank, exp = pair lastval = lastvalsdict[exp] bestval, epcnum = bestvalsdict[exp] print exp,'\tbestval',bestval,'at valtest',epcnum,'\tlastval',lastval
def main(): sent_file = open(sys.argv[1]) tweet_file = open(sys.argv[2]) sent_scores = parse_sentiment_scores(sent_file) tweets = parse_tweets(tweet_file) state_happiness = {} for tweet in tweets: if "place" in tweet \ and tweet["place"] != None \ and "country_code" in tweet["place"] \ and tweet["place"]["country_code"] != None \ and "US" == tweet["place"]["country_code"]: state = determine_state(tweet) if state != None: sentiment = tweet_sentiment(tweet, sent_scores) if not state in state_happiness: state_happiness[state] = [] state_happiness[state].append(-sentiment) for state in state_happiness: state_happiness[state] = float(reduce(lambda x,y: x+y, state_happiness[state]))/len(state_happiness[state]) print heapq.nsmallest(1, state_happiness.iteritems(), operator.itemgetter(1))[0][0]
def counting(kyle): global tit, megatit, list_of_combos lucky = kyle if lucky == "most": number_to_count = (x for x in tit) c = Counter(number_to_count) d = c.most_common(5) d.sort() mega_to_count = (x for x in megatit) CC = Counter(mega_to_count) DD = CC.most_common(1) return d, DD elif lucky == "combo": combo_to_count = (tuple(x) for x in list_of_combos) combo_count = Counter(combo_to_count) print 'most', combo_count.most_common(1), 'least', list(combo_count.most_common())[-1] dcombo = list(combo_count)[-1:] return dcombo else: number_to_count = (x for x in tit) c = Counter(number_to_count) reverse = heapq.nsmallest(5, c.items(), key=itemgetter(1)) d = reverse d.sort() mega_to_count = (x for x in megatit) CC = Counter(mega_to_count) Reverse = heapq.nsmallest(1, CC.items(), key=itemgetter(1)) DD = Reverse return d, DD
def get_next_element_when_ready(self): self.first_element_changed.acquire() try: isNotReady = True while isNotReady: if self._qsize() > 0: first_element = heapq.nsmallest(1, self.queue)[0] if isinstance(first_element, SystemExit): first_element = self._get() break if not first_element.flag_alive: log.debug("Early termination of dead metric") first_element = self._get() break timeout = ( first_element.get_next_run_time() - getUTCmillis() ) / 1000.0 log.debug("Waiting on acquired first_element_changed LOCK " + "for: %.2f" % timeout) self.first_element_changed.wait(timeout) else: self.first_element_changed.wait() first_element = heapq.nsmallest(1, self.queue)[0] if isinstance(first_element, SystemExit): first_element = self._get() break if (first_element.get_next_run_time() - getUTCmillis()) <= 0 \ or not first_element.flag_alive: isNotReady = False first_element = self._get() return first_element finally: self.first_element_changed.release()
def count_mon_min_data_avg(fileObj, year, cat_flag): count = 0 flag = 0 avg_data = [] year_data = [] fd = open(fileObj, 'r') for line in fd.readlines(): a = re.split(',|\n| ', line) # Notice the None value if ((int)(a[YEAR]) == year): if (flag == 0): temp = (int)(a[MON]) flag = 1 if (len(a[cat_flag]) != 0): year_data.append((float)(a[cat_flag])) count = count + 1 if(temp != (int)(a[MON])): length = (int)(count * RATE) if (length == 0): length = 1 value = heapq.nsmallest(length, year_data) avg = mean(value) avg_data.append(avg) year_data = [] count = 0 temp = (int)(a[MON]) length = (int)(count * RATE) value = heapq.nsmallest(length, year_data) avg = mean(value) avg_data.append(avg) year_data = [] count = 0 fd.close() return avg_data
def addNum(self, num): """ Adds a num into the data structure. :type num: int :rtype: void """ if not self.leftHeap: heapq.heappush(self.leftHeap,num*-1) return elif not self.rightHeap: if heapq.nsmallest(1,self.leftHeap)[0]*-1 > num: heapq.heappush(self.rightHeap,heapq.heappop(self.leftHeap)*-1) heapq.heappush(self.leftHeap,num*-1) else: heapq.heappush(self.rightHeap,num) return if num<= heapq.nsmallest(1,self.leftHeap)[0]*-1: heapq.heappush(self.leftHeap,num*-1) else: heapq.heappush(self.rightHeap,num) while len(self.leftHeap)-len(self.rightHeap)>1: heapq.heappush(self.rightHeap,heapq.heappop(self.leftHeap)*-1) while len(self.rightHeap)-len(self.leftHeap)>1: heapq.heappush(self.leftHeap,heapq.heappop(self.rightHeap)*-1)
def findMedian(self, A): half = len(A) / 2 if len(A) & 1 == 1: res = heapq.nsmallest(half + 1, A)[-1] else: res = 0.5 * (heapq.nsmallest(half + 1, A)[-1] + heapq.nsmallest(half, A)[-1]) return res
def test_nsmallest(self): data = [(random.randrange(2000), i) for i in range(1000)] for f in (None, lambda x: x[0] * 547 % 2000): for n in (0, 1, 2, 10, 100, 400, 999, 1000, 1100): self.assertEqual(nsmallest(n, data), sorted(data)[:n]) self.assertEqual(nsmallest(n, data, key=f), sorted(data, key=f)[:n])
def persona_interp(EcmRange,Vinp,Eout,ens): """ here I define a brute force interpolation which just takes the average of nearing neighbors EcmRange = range of input energies where Vinp is defined Vinp = is the function to interpolate Eout = is the energy where it is going to be evaluated """ EcmL=list(EcmRange) if shape(Eout)==(): "meaning, if Eout is a number" Ecm0s=nsmallest(2, EcmL, key=lambda x: abs(x-Eout)) "Ecm0s is the two energies in EcmL that are closest to Eout" out=0 ne0=EcmL.index(Ecm0s[0]) ne1=EcmL.index(Ecm0s[1]) return (Vinp[ne0]+Vinp[ne1])/2.0 else: "is Eout is a list, we just loop over its components and repeat what we did above" out=zeros(len(Eout)) for i0 in range(len(Eout)): E0=Eout[i0] Ecm0s=nsmallest(2, EcmL, key=lambda x: abs(x-E0)) ne0=EcmL.index(Ecm0s[0]) ne1=EcmL.index(Ecm0s[1]) out[i0]=(Vinp[ne0]+Vinp[ne1])/2.0 return out
def put_and_notify(self, item, block=True, timeout=None): log.debug("Adding Event:" + str(item)) self.not_full.acquire() try: first_element_before_insertion = None if self._qsize() > 0: first_element_before_insertion = heapq.nsmallest(1, self.queue)[ 0] if self.maxsize > 0: if not block: if self._qsize() == self.maxsize: raise Full elif timeout is None: while self._qsize() == self.maxsize: self.not_full.wait() elif timeout < 0: raise ValueError("'timeout' must be a non-negative number") else: endtime = _time() + timeout while self._qsize() == self.maxsize: remaining = endtime - _time() if remaining <= 0.0: raise Full self.not_full.wait(remaining) self._put(item) self.unfinished_tasks += 1 self.not_empty.notify() first_element_after_insertion = heapq.nsmallest(1, self.queue)[0] if first_element_before_insertion != first_element_after_insertion: self.first_element_changed.notify() finally: self.not_full.release()
def loop(self): gen = 0 m = 300000 m = m * m p = None cnt = 0 while 1: print "gen ", gen, g = [u for u in self.group if u.select(m)] g0 = heapq.nsmallest( self.groupsize / 2, g, key=lambda x: x.judge()) m0 = g0[0].judge() g = [u0.cross(u1) for u1 in g for u0 in g] g = [u.mutation() for u in g] g = heapq.nsmallest(self.groupsize / 2, g, key=lambda x: x.judge()) m1 = g[0].judge() m = min(m0, m1) print "%.4f %.4f" % (m, m / m0), g = g0 + g self.group = g gen += 1 if m != m0: cnt = 0 if p != None: p.terminate() p = multiprocessing.Process( target=plot.plotThread, args=(pts, g[0].seq)) p.start() else: cnt += 1 print cnt if cnt == 20: break p.terminate() return g[0].seq
def heap_median_maintenance(read_in): starting_list = [] median = [] for i in read_in: starting_list.append(i) #If it's the first element being read in, that is the median if len(starting_list) == 1: low_heap = heapq.nsmallest(len(starting_list), starting_list) high_heap = heapq.nlargest(len(starting_list)-1, starting_list) #if even then split half way elif len(starting_list)%2 ==0: low_heap = heapq.nsmallest(len(starting_list)/2, starting_list) high_heap = heapq.nlargest(len(starting_list)/2, starting_list) #if odd give the larger portion to low heap else: low_list_amount = int(math.ceil(float(len(starting_list))/2)) high_list_amount = int(len(starting_list) - math.ceil(float(len(starting_list))/2)) low_heap = heapq.nsmallest(low_list_amount, starting_list) high_heap = heapq.nlargest(high_list_amount, starting_list) #print("Low heap has {} and high heap has {}".format(len(low_heap), len(high_heap))) #print("Low heap {}".format(low_heap)) #print("high heap {}".format(high_heap)) #print("Median is {}".format(heapq.nlargest(1, low_heap)[0])) #append median from the largest element of the low_heap median.append(heapq.nlargest(1, low_heap)[0]) return median
def DumpAudioDiagnostics(self, dir_name="./data/", top_k=10, bot_k=10): # utterance level diag import heapq utt_largest = heapq.nlargest(top_k, self.utt_feature, key=self.utt_feature.get) i = 0 for utt in utt_largest: utt_id = string.join(utt.split("_")[0:-2], "_") t_beg = float(utt.split("_")[-2]) / self.samp_period t_end = float(utt.split("_")[-1]) / self.samp_period file_id = self.list_files[self.map_utt_idx[utt_id]] out_file = "./data/" + repr(i) + "large_srate_" + os.path.basename(file_id).split(".")[0] + ".wav" util.cmdconvert(file_id, out_file, t_beg, t_end) i += 1 utt_smallest = heapq.nsmallest(bot_k, self.utt_feature, key=self.utt_feature.get) i = 0 for utt in utt_smallest: utt_id = string.join(utt.split("_")[0:-2], "_") t_beg = float(utt.split("_")[-2]) / self.samp_period t_end = float(utt.split("_")[-1]) / self.samp_period file_id = self.list_files[self.map_utt_idx[utt_id]] out_file = "./data/" + repr(i) + "small_srate_" + os.path.basename(file_id).split(".")[0] + ".wav" util.cmdconvert(file_id, out_file, t_beg, t_end) i += 1 # glob level diag glob_largest = heapq.nlargest(top_k, self.glob_feature, key=self.glob_feature.get) for utt_id in glob_largest: file_id = self.list_files[self.map_utt_idx[utt_id]] out_file = "./data/glob_large_srate_" + os.path.basename(file_id).split(".")[0] + ".wav" util.cmdconvert(file_id, out_file) glob_smallest = heapq.nsmallest(top_k, self.glob_feature, key=self.glob_feature.get) for utt_id in glob_smallest: file_id = self.list_files[self.map_utt_idx[utt_id]] out_file = "./data/glob_small_srate_" + os.path.basename(file_id).split(".")[0] + ".wav" util.cmdconvert(file_id, out_file)
def getmedian(l, r): # print('Median:', l, '-', rightq) if len(l) > len(r): return heapq.nlargest(1, l)[0] elif len(l) == len(r): return (heapq.nlargest(1, l)[0] + heapq.nsmallest(1, r)[0]) / 2 else: return heapq.nsmallest(1, r)[0]
def get_min(self): print "request list:",len(self.req_list),",node list:",len(self.node_list) import sys rmin = sys.maxint if len(self.req_list) is 0 else heapq.nsmallest(1,self.req_list,key=lambda x:x['time'])[0]['time'] nmin = sys.maxint if len(self.node_list) is 0 else heapq.nsmallest(1,self.node_list,key=lambda x:x['time'])[0]['time'] return min(int(rmin),int(nmin))
def main(): # Take input dir baseDir = sys.argv[1] if baseDir[-1:] != '/' : baseDir += '/' print baseDir neg_review = readAllFolds(baseDir + NEG + FALSE) neg_review += readAllFolds(baseDir + NEG + TRUTH_NEG) pos_review = readAllFolds(baseDir + POS + FALSE) pos_review += readAllFolds(baseDir + POS + TRUTH_POS) false_review = readAllFolds(baseDir + NEG + FALSE) false_review += readAllFolds(baseDir + POS + FALSE) true_review = readAllFolds(baseDir + NEG + TRUTH_NEG) true_review += readAllFolds(baseDir + POS + TRUTH_POS) print "Done with reading all data.." print "Calculating priors.." prior_pos = log_nb(len(pos_review) / (len(pos_review) + len(neg_review) * 1.0)) prior_neg = log_nb(len(neg_review) / (len(pos_review) + len(neg_review) * 1.0)) prior_truth = log_nb(len(true_review) / (len(true_review) + len(false_review) * 1.0)) prior_false = log_nb(len(false_review) / (len(true_review) + len(false_review) * 1.0)) # list of all the tokens in each class true_review_token = collectAllTokens(true_review) false_review_token = collectAllTokens(false_review) pos_review_token = collectAllTokens(pos_review) neg_review_token = collectAllTokens(neg_review) # # Initializing counters for fast counts true_review_token_ctr = Counter(true_review_token) false_review_token_ctr = Counter(false_review_token) pos_review_token_ctr = Counter(pos_review_token) neg_review_token_ctr = Counter(neg_review_token) # my_stop = set([ite for ite, it in true_review_token_ctr.most_common(50)]) # # my_stop = my_stop.union(set([ite for ite, it in false_review_token_ctr.most_common(50)])) # # print my_stop # print len(my_stop) # # print my_stop.intersection(stop_words) # print len(my_stop.intersection(stop_words)) # # print my_stop.difference(stop_words) # print len(my_stop.difference(stop_words)) least_a = set([ite+str(it) for ite, it in heapq.nsmallest(5050, true_review_token_ctr.items(), key=itemgetter(1)) ]) least_b = set([ite+str(it) for ite, it in heapq.nsmallest(4300, false_review_token_ctr.items(), key=itemgetter(1)) ]) print least_a print least_b print len(least_a.intersection(least_b)) print [ite[:-1] for ite in least_a.intersection(least_b) ]
def search_algorithm(): '''Find the largest or smallest N items in a collection. **NOTE**: If you are looking for the _N_ smallest or largest items, and _N_ is small compared to the overall size of the collection, the `nsmallest()` and `nlargest()` methods of `heapq` module provide superior performance. For larger _N_, it is more efficient to use the `sorted()` function first, and take a slice. Also, when `N==1`, it is more efficient to use the built-in `min()` and `max()` functions. **NOTE**: When doing these calculations, be aware that `zip()` creates an iterator that can only consumed once. ''' import heapq # Find in a list of integers seq = [1, 8, 2, 23, 7, -2, 18, 23, 42, 37, 2] assert heapq.nlargest(3, seq) == [42, 37, 23] assert heapq.nsmallest(3, seq) == [-2, 1, 2] # Find in a list of dictionaries list_of_dict = [ {'name': 'IBM', 'shares': 100, 'price': 91.1}, {'name': 'AAPL', 'shares': 50, 'price': 543.22}, {'name': 'FB', 'shares': 200, 'price': 21.09}, {'name': 'HPQ', 'shares': 35, 'price': 31.74}, {'name': 'YHOO', 'shares': 45, 'price': 16.35}, {'name':'ACME', 'shares': 75, 'price': 115.65} ] assert heapq.nsmallest(3, list_of_dict, key=lambda s: s['price']) == [ {'name': 'YHOO', 'shares': 45, 'price': 16.35}, {'name': 'FB', 'shares': 200, 'price': 21.09}, {'name': 'HPQ', 'shares': 35, 'price': 31.74} ] # Find in a dictionary d = { 'IBM': 91.1, 'AAPL': 543.22, 'FB': 21.09 } assert min(zip(d.values(), d.keys())) == (21.09, 'FB') assert max(zip(d.values(), d.keys())) == (543.22, 'AAPL') # Order a list as a heap, transformed in-place, in linear time heapq.heapify(seq) # Pop and return the smallest item from the heap, maintaining the heap # invariant. try: assert heapq.heappop(seq) == -2 except IndexError as e: # Heap is empty pass
def compare_point(self, point_dict,clf): new_point = Point.init_from_dict(point_dict, point_dict['activity']) smallest = heapq.nsmallest(10, self.min_heap_list) largest = heapq.nsmallest(10, self.max_heap_list) sim_center = [] sim_boundry = [] for small in smallest: sim_center.append(self.similarity_check(small, new_point,clf)) for large in largest: sim_boundry.append(self.similarity_check(large, new_point,clf)) return (sim_center,sim_boundry)
def do_action(self): """ check timer event then implement it""" now = self.get_min() while True: if len(self.req_list) is 0: break rmin = heapq.nsmallest(1,self.req_list,key=lambda x:x['time'])[0]['time'] if rmin - now > 1: break obj = heapq.heappop(self.req_list) strid = obj['dataid'] if obj['op'] == RequestInfo.CHUNK_REQ_UPLOAD: obj['obj'].upload_end(obj['dataid'],obj['size'],self.nodeid) c = CDataInfo() c.size = obj['size'] c.nodeid = strid self.node[strid] = c self.data_dict[strid] = c elif obj['op'] == RequestInfo.CHUNK_REQ_DOWNLOAD: obj['obj'].download_end(obj['dataid'],obj['size'],self.nodeid) elif obj['op'] == RequestInfo.CHUNK_REQ_UPDATE: obj['obj'].update_end(obj['dataid'],obj['size'],self.nodeid) v = self.node.get(strid) print 'update file key:',strid if v is None: logging.error('fail to update file key:%s,nodeid:%s',strid,self.nodeid) return v.size = obj['size'] self.data_dict[strid] = v self.node[strid] = v else: pass while True: if len(self.node_list) is 0: break nmin = heapq.nsmallest(1,self.node_list,key=lambda x:x['time'])[0]['time'] if nmin - now > 1: break obj = heapq.heappop(self.node_list) if not obj['obj'] is None: c = CDataInfo() c.size = obj['size'] c.nodeid = obj['dataid'] self.node[c.nodeid] = c self.data_dict[c.nodeid] = c obj['obj'].migarate_end(obj['dataid'],self.nodeid)
def findMedian(self): """ Returns the median of current data stream :rtype: float """ if len(self.leftHeap) == len(self.rightHeap): return float(heapq.nsmallest(1,self.leftHeap)[0]*-1+heapq.nsmallest(1,self.rightHeap)[0])/2 elif len(self.leftHeap) > len(self.rightHeap): return float(heapq.nsmallest(1,self.leftHeap)[0]*-1) else: return float(heapq.nsmallest(1,self.rightHeap)[0])
def calcAvgMinMaxDiff(self, twoSamples): avgMinMaxDiff = 0 if len(twoSamples['testSeq']) > 4 and len(twoSamples['grndTruthSeq'])>4: avg5max_test = heapq.nlargest(5, twoSamples['testSeq']) avg5min_test = heapq.nsmallest(5, twoSamples['testSeq']) avg5max_grnd = heapq.nlargest(5, twoSamples['grndTruthSeq']) avg5min_grnd = heapq.nsmallest(5, twoSamples['grndTruthSeq']) avgMinMaxDiff = abs(avg5max_test-avg5min_test) - abs(avg5max_grnd-avg5min_grnd) return avgMinMaxDiff
def ack_thread(self, content_sz): ''' Function that receives the acks and updates the window's limits. ''' acked = [] while True: self.mutex.acquire() if self.begin_window == content_sz: self.mutex.release() break self.mutex.release() time.sleep(0.005) try: data, addr = self.udp.recvfrom(64) self.total_bytes_received += len(data) except socket.timeout: continue pck = self.unmount_package(data) if not self.check_package(pck): continue self.total_acks_received += 1 neue_id = int(pck[1]) self.mutex.acquire() # If the id number is inside the limits of the window... if neue_id >= self.begin_window and neue_id <= self.end_window: heappush(acked, neue_id) # Putting -1 on the time spans vector to make sure # this package won't be sent anymore if self.begin_window != nsmallest(1, acked)[0]: self.time_spans[neue_id] = -1 # Updating the window's limits accordingly. while len(acked) > 0 and self.begin_window == nsmallest(1, acked)[0]: heappop(acked) del self.time_spans[self.begin_window] self.begin_window+=1 # Putting zero on the new packages to be sent to the receiver. while True: if not self.end_window in self.time_spans: self.time_spans[self.end_window] = 0 if self.end_window == content_sz-1: break if self.end_window >= self.begin_window+self.window_sz-1: break self.end_window += 1 self.mutex.release()
def _remove_stale(self): 'Remove nonces that are too old' self._dblock.acquire() iterSmallest = heapq.nsmallest(1, self._pq) while((len(iterSmallest)==1) and ((time() - iterSmallest[0][0]) > self._timeoutSeconds)): try: self._entry_finder.pop(iterSmallest[0][1]) except KeyError: pass #it may be already gone and that's OK. heapq.heappop(self._pq) iterSmallest = heapq.nsmallest(1, self._pq) self._dblock.release()
def predict_y(self, point): """ Predict Y value based on k nearest neighbours. The method uses a heapq to keep track of the k nearest neighbours. The heap queue is using the negative euclidean distance for priority. That way, the larger distances have smaller value (and priority). The heapq contains pairs (K, V), where: K is negated euclidean distance between query and data vectors. V is the Y-value of the data vector. -------------------------------------- Implemented in the following way: I) Fill the heap with the first k vectors from data set II) Keep a current minimum (the vector farthest from query vector). III) For each of the remaining vectors in data set. 1) Compute the distance between query and data vector. 2) Negate the distance (larger distances have smaller value). 3) If the current value is larger than current minimum: - Remove current min from heap and add current pair. - Update current min. Note: Size of heap is preserved (always == k) by always removing element with smallest priority (largest distance) before adding new one. """ heap = [] # Add first k elements to fill heapq for val in zip(self.dataX[:self.k], self.dataY[:self.k]): diff = point-val[0] # coordinates are at pos 0 n_summed = np.sum((np.square(diff))) # sum the squares of diff euclid_dist = -math.sqrt(n_summed) # negate the distance pair = (euclid_dist, val[1]) # y values are at position 1 heappush(heap, pair) # Compute the current minimum (largest distance). # Updated whenever the smallest element is removed. curr_min = nsmallest(1, heap)[0][0] # Check remaining elements for val in zip(self.dataX[self.k:], self.dataY[self.k:]): diff = point-val[0] n_summed = (np.sum(np.square(diff))) euclid_dist = -math.sqrt(n_summed) if euclid_dist > curr_min: pair = (euclid_dist, val[1]) heapreplace(heap, pair) # Remove smallest, add current curr_min = nsmallest(1, heap)[0][0] # Update current min elem # Get the Y-values from the heap and return the mean. result = [x[1] for x in heap] return sum(result) / float(len(result))
def findCurrentMedian (bigHalf, smallHalf): ##This finds the current median numBig = len(bigHalf) numSmall = len(smallHalf) currentMedian = 0 if numBig > numSmall: return (heapq.nsmallest(1, bigHalf))[0] elif numSmall > 0: return -(heapq.nsmallest(1, smallHalf))[0] else: return 0
def _computeGridRatio(coord, shape): """ coord (list of tuple of 2 floats): coordinates shape (2 ints): X and Y number of coordinates return (float): ratio X/Y """ x_cors = [i[0] for i in coord] y_cors = [i[1] for i in coord] x_max_cors = numpy.mean(heapq.nlargest(shape[0], x_cors)) x_min_cors = numpy.mean(heapq.nsmallest(shape[0], x_cors)) y_max_cors = numpy.mean(heapq.nlargest(shape[1], y_cors)) y_min_cors = numpy.mean(heapq.nsmallest(shape[1], y_cors)) x_scale = x_max_cors - x_min_cors y_scale = y_max_cors - y_min_cors return x_scale / y_scale
def pca_cum_variance_analysis(self, show_plot=False, custom_dpi=600, percentage=70, number_of_components=20): self.called_feat_pca_analysis = True print('PCA Cumulative Variance analysis has been called\n') print('-------------------------------\n') sns.set(style="ticks", context='paper') # fig = plt.figure(figsize=(10, 10)) fig = plt.figure(figsize=plot_tools.cm2inch(8.4, 8.4)) sns.set(font_scale=1) if number_of_components is not None: pca1 = PCA(n_components=number_of_components) else: pca1 = PCA(n_components=len(self.selection)) TEST = 1 # self.pca_transformed_data = pca1.fit_transform(self.scaled_data) self.pca_transformed_data = pca1.fit_transform(self.pca_data) # The amount of variance that each PC explains var = pca1.explained_variance_ratio_ print('Explained variance ratio: ', var) self.md_pre_feat_analysis_data = { 'varExplainedRatio': pca1.explained_variance_ratio_, 'varExplained': pca1.explained_variance_, 'mean': pca1.mean_, } # Cumulative Variance explains var1 = np.cumsum( np.round(pca1.explained_variance_ratio_, decimals=4) * 100) print("Cumulative Variance explains ", var1) # plt.plot(var) plt.plot(var1) plt.xlabel("Principal Component") plt.ylabel("Cumulative Proportion of Variance Explained") fig.savefig(self.simulation_name + 'PCA_cumsum_analysis_' + '.png', dpi=custom_dpi, bbox_inches='tight') if show_plot is True: plt.show() import heapq max_num_list = 3 var_array = np.array(var1) best_score = 0 best_index = 0 for i in range(len(var_array)): if var_array[i] >= percentage: best_score = var_array[i] best_index = i break bottom_var = heapq.nsmallest(max_num_list, range(len(var_array)), var_array.take) print('Bottom Var', bottom_var) # self.md_pca_analysis_data.update({selection_text: self.reduced_cartesian}) # self.number_pca = bottom_var[-1] + 1 self.number_pca = best_index + 1 print('Percentage of PCA : ', best_score) if best_score == 0: self.number_pca += 1 print('Number of PCA : ', self.number_pca) return self.number_pca print("PCA transformation finished successfully") print('-----------------------------------\n')
def MAP_calculation(orignal, simplified): MAP = [] assert DATA_AMOUNT == len(simplified) index_set = random.sample(xrange(DATA_AMOUNT), RANDOM_TEST_AMOUNT) for random_index in index_set: original_distance_measure = [] simplified_distance_measure = [] for index_1 in xrange(DATA_AMOUNT): # 计算这个数据与其他所有数据之间的距离 if (random_index == index_1): continue original_distance = np.linalg.norm( (orignal[index_1], orignal[random_index]), ord=2) original_distance_measure.append((original_distance, index_1)) simplified_distance = np.linalg.norm( (simplified[index_1], simplified[random_index]), ord=2) simplified_distance_measure.append((simplified_distance, index_1)) ''' original_distance_measure -> 这个数据与其他点之间的真实距离 simplified_distance_measure -> 这个数据与其他点的预测距离 ''' original_NearestNeighbor = heapq.nsmallest(MAP_LIST_LENGTH, original_distance_measure) original_NearestNeighbor = set( [vals[1] for vals in original_NearestNeighbor]) # print(original_NearestNeighbor) simplified_NearestNeighbor = heapq.nsmallest( MAP_LIST_LENGTH, simplified_distance_measure) simplified_NearestNeighbor = set( [vals[1] for vals in simplified_NearestNeighbor]) # print(simplified_NearestNeighbor) correct_prediction = 0 stats_array = [] if (index_1 >= 48): print("简化的最临近list") for item in simplified_NearestNeighbor: print(item) print("原始的最临近list") for item in original_NearestNeighbor: print(item) return for index, prediction in enumerate(simplified_NearestNeighbor): if (prediction in original_NearestNeighbor): correct_prediction += 1 stats_array.append((correct_prediction) / (index + 1)) if (len(stats_array) == 0): print("出现了全空数组: 序号为{}".format(random_index)) stats = 0 else: stats = np.mean(stats_array) MAP.append(stats) ''' NearestNeighbor -> 邻近点的集合 stats_array -> 执行MAP时的计算 stats -> 这一个数据的MAP MAP -> 所有数据的MAP ''' overall_score = np.mean(MAP) return overall_score
more = [] less = [] for num in lists: if num >= 0: more.append(num) else: less.append(num) if len(lists) == 3: print(lists[0] * lists[1] * lists[2]) else: if len(more) == 0: result = heapq.nlargest(3, less) print(result[0] * result[1] * result[2]) else: overZero = max(more) more.remove(overZero) if len(more) >= 2 and len(less) >= 2: result1 = heapq.nlargest(2, more) result2 = heapq.nsmallest(2, less) result = max(result1[0] * result1[1], result2[0] * result2[1]) else: if len(more) >= 2: result1 = heapq.nlargest(2, more) result = (result1[0] * result1[1]) else: result2 = heapq.nsmallest(2, less) result = result2[0] * result2[1] print(overZero * result)
""" 3. Get n largest/smallest elts of the array of dicts """ import heapq arr_dicts = [{"name": "John", "age": 23, "city": "Oakland", "state": "CA"}, {"name": "Mary", "age": 33, "city": "San Jose", "state": "CA"}, {"name": "Henock", "age": 27, "city": "Las Vegas", "state": "NV"}, {"name": "James", "age": 19, "city": "Seattle", "state": "WA"}] # print(arr_dicts) largests = heapq.nlargest(2, arr_dicts, lambda dict: dict['age']) print(largests) smallests = heapq.nsmallest(2, arr_dicts, lambda dict: dict['age']) print(smallests) states = heapq.nsmallest(2, arr_dicts, lambda d: d['state']) print(states) states = heapq.nlargest(2, arr_dicts, lambda d: d['state']) print(states)
str((queried_location.latitude, queried_location.longitude))) user_lat_long = (queried_location.latitude, queried_location.longitude) print('Now searching parks!') for key, value in nps_dict.items(): try: distances = (distance.distance(user_lat_long, value).miles) distances_dict[key] = distances except ValueError: #Some national parks don't have this data available. SKIP! pass except AttributeError: print( "Sorry, we can't find that location! Try simplifying it- We don't need your exact street address." ) return (user_location()) user_location() result = nsmallest(10, distances_dict.items(), key=itemgetter(1)) print( "Here are the ten closest national parks to you and their distance from you in miles:" ) pprint.pprint(result)
def method8(dists, N): closest = heapq.nsmallest(N, dists) n = np.ceil(np.sqrt(2* len(dists))) ti = np.triu_indices(n, 1) r = zip(ti[0][closest] + 1, ti[1][closest] + 1) return r
def nsmallest_tr(self_): return heapq.nsmallest(n, iter(self_), key=key)
def main(): list_1 = [1, 3, 5, 7, 9] # convert list into min heap heapq.heapify(list_1) print(list_1) """" Output: [1, 3, 5, 7, 9] After heapifying the list. 1 /\ 3 5 /\ 7 9 """ # heappush pushes data into heap and organizes it # appropriately. heapq.heappush(list_1, 6) print(list(list_1)) """" Output: [1, 3, 5, 7, 9, 6] After heapifying the list. 1 / \ 3 5 /\ / 7 9 6 """ # we also have heappop, which pops the first element # in the list and heapifies the list. heapq.heappop(list_1) print(list_1) """ heap gets rearranged as follows: 3 / \ 6 5 / \ 7 9 Here 3 becomes parent since 3 is minimum of all. """ # heappushpop simultaneously pushes and pops same element. heapq.heappushpop(list_1, 2) print(list_1) # heapreplace simultaneously pushes and pops same element. heapq.heapreplace(list_1, 2) print(list_1) """ heap gets rearranged as follows: 2 / \ 6 5 / \ 7 9 Here 3 is popped out, 2 is replaced as parent. """ # nlargest print("{} are the 3 largest elements in that order".format( heapq.nlargest(3, list_1))) # nsmallest print("{} are the 3 smallest elements in that order".format( heapq.nsmallest(3, list_1)))
def kSmallestPairs_pythonic(nums1, nums2, k): list1 = [] for i in range(len(nums1)): for j in range(len(nums2)): list1.append([nums1[i], nums2[j]]) return heapq.nsmallest(k, list1, key = lambda x : sum(x))
def __iter__(self): nodes = heapq.nsmallest(self.maxsize, self.heap) return iter(map(itemgetter(1), nodes))
q.append(2) q.append(3) print(q) q.append(4) print(q) #队列在两端插入或删除元素复杂度都是 O(1),而列表在开头或者删除元素复杂度为 O(n) # 4.查找最大或者最小的 N 个元素 """当元素数量较小的时候,使用 nlargest, nsmallest 是个不错的选择,但是如果只有一个,使用 max 或 min 更快,如果 N 很大,接近集合大小,则使用 sorted(items)[:N] or sorted(items)[N:] 会更好些 """ import heapq nums = [1, 8, 2, 23, 7, -4, 18, 23, 42, 37, 2] largest_3 = heapq.nlargest(3, nums) smallest_3 = heapq.nsmallest(3, nums) print(largest_3) print(smallest_3) # 更复杂的数据结构 portfolio = [{ 'name': 'IBM', 'shares': 100, 'price': 91.1 }, { 'name': 'AAPL', 'shares': 50, 'price': 543.22 }, { 'name': 'FB', 'shares': 200,
import heapq nums = [1, 8, 2, 23, 7, -4, 18, 23, 42, 37, 2] print(heapq.nlargest(3, nums)) # Prints [42, 37, 23] print(heapq.nsmallest(3, nums)) # Prints [-4, 1, 2] portfolio = [{ 'name': 'IBM', 'shares': 100, 'price': 91.1 }, { 'name': 'AAPL', 'shares': 50, 'price': 543.22 }, { 'name': 'FB', 'shares': 200, 'price': 21.09 }, { 'name': 'HPQ', 'shares': 35, 'price': 31.75 }, { 'name': 'YHOO', 'shares': 45, 'price': 16.35 }, { 'name': 'ACME', 'shares': 75, 'price': 115.65 }]
li2 = [5, 7, 9, 4, 3] # using heapify() to convert list into heap heapq.heapify(li1) heapq.heapify(li2) # using heappushpop() to push and pop items simultaneously # pops 2 print("The popped item using heappushpop() is : ", end="") print(heapq.heappushpop(li1, 2)) # using heapreplace() to push and pop items simultaneously # pops 3 print("The popped item using heapreplace() is : ", end="") print(heapq.heapreplace(li2, 2)) li1 = [6, 7, 9, 4, 3, 5, 8, 10, 1] # using heapify() to convert list into heap heapq.heapify(li1) # using nlargest to print 3 largest numbers # prints 10, 9 and 8 print("The 3 largest numbers in list are : ", end="") print(heapq.nlargest(3, li1)) # using nsmallest to print 3 smallest numbers # prints 1, 3 and 4 print("The 3 smallest numbers in list are : ", end="") print(heapq.nsmallest(3, li1))
def kClosest(self, points, K): import heapq return heapq.nsmallest(K, points, key=lambda p: p[0]**2 + p[1]**2)
''' import heapq from pprint import pprint items = [{ 'name': 'Item-1', 'price': 101.1 }, { 'name': 'Item-2', 'price': 555.22 }, { 'name': 'Item-3', 'price': 45.09 }, { 'name': 'Item-4', 'price': 22.75 }, { 'name': 'Item-5', 'price': 16.30 }, { 'name': 'Item-6', 'price': 110.65 }] cheap = heapq.nsmallest(3, items, key=lambda s: s['price']) expensive = heapq.nlargest(3, items, key=lambda s: s['price']) print("Original datasets:") pprint(items) print("\nFirst 3 expensive items:") pprint(expensive) print("\nFirst 3 cheap items:") pprint(cheap)
#anno pubblicazione years = set() plot_data = {i: 0 for i in range(1900, 2030)} for file in [file_abs]: for index, row in file.iterrows(): year = row['anno'] if year == ' dcterms_issued:: @@MISSING-DATA' or year == 'dcterms_issued:: @@MISSING-DATA': continue years.add(int(year)) try: plot_data[int(year)] = plot_data[int(year)] + 1 except KeyError: #plot_data[int(year)] = 1 continue print(nsmallest(10, list(years))) print(nlargest(10, list(years))) x = [k for k, v in plot_data.items()] y = [v for k, v in plot_data.items()] plt.clf() plt.plot(x, y, 'r') plt.ylabel(xx) plt.xlabel(yy) plt.title(tit) years = set() plot_data = {i: 0 for i in range(1900, 2030)} for file in [file_nabs]: for index, row in file.iterrows(): year = row['anno']
import heapq import random mylist = list(random.sample(range(100), 10)) print(mylist) k = 3 largest = heapq.nlargest(k, mylist) print('最大的前3位', largest) smallest = heapq.nsmallest(k, mylist) print('最小的前3位', smallest) #对原的数组进行堆化 heapq.heapify(mylist) print('堆化后的列表',mylist) heapq.heappush(mylist, 105) print('向堆里添加元素', mylist) heapq.heappop(mylist) print('取出堆元素', mylist) heapq.heappushpop(mylist, 130) print('添加元素顺便取', mylist) heapq.heapreplace(mylist, 2) print('取出元素,顺便添加', mylist) from functools import reduce a = reduce(lambda x, y: x*y, [1, 3, 5, 7, 9])
def min_L2dist(vec, vectors, num_cand): L2dist_with_index = [(cal_L2dist(vec, v), i) for i, v in enumerate(vectors)] closest = heapq.nsmallest(num_cand, L2dist_with_index) index_L2 = [index for (dist, index) in closest] return index_L2
#!/usr/bin/env python __author__ = 'Chocolee' import heapq import random heap = [] data = list(range(10000)) random.shuffle(data) # for num in data: # heapq.heappush(heap, num) # for i in range(len(heap)): # print(heapq.heappop(heap)) print(heapq.nsmallest(10, data))
def plot_least_10_hellinger_neurons(hellinger_stats, model1_data, model2_data, color1, color2, modelname1, modelname2, data_dict, foldername, n_tokens=0, process_data_flag=False): """ :param hellinger_stats: path to the savd file for the hellinger statistics from calculate_hellinger_distance function :param model1_data:data from trained model 1(dtype:dataframe) :param model2_data:data from trained model 2(dtype:dataframe) :param color1:color for model 1(dtype:str) :param color2:color for model 2(dtype:str) :param modelname1:model1 label(dtype:str) :param modelname2:model2 label(dtype:str) :param data_dict: dictionary containing input instructions(dtype:dict) :param foldername: pickled file name and directory to store the results :param n_tokens: number of tokens you want to plot(dtype:int) :param process_data_flag: True if the pickle files need to be generated, False if you want to load the pickle files. :Description: Generates the plot for the least 10 neurons with highest hellinger distances in hellinger_stats """ # removing the whitespaces model1_data['POS'] = model1_data['POS'].apply(lambda x: x.replace(" ", "")) model2_data['POS'] = model2_data['POS'].apply(lambda x: x.replace(" ", "")) # Getting all the POS tags activated model1_pos = list(model1_data['POS'].unique()) model1_pos = list(model2_data['POS'].unique()) all_pos = set(model1_pos + model1_pos) # all_pos = [pos.strip() for pos in all_pos] # loading the Hellinger distance dictionary with open(hellinger_stats, 'rb') as handle: hellinger_dict = pickle.load(handle) least_10_neurons = heapq.nsmallest(10, hellinger_dict, key=hellinger_dict.get) for neuron in least_10_neurons: path = os.path.join(data_dict["visualize"]["plot_directory"], foldername, "least_10", str(neuron)) if not os.path.exists(path): os.makedirs(path) model1_data_temp = model1_data[model1_data['max_activation_index'] == neuron] model2_data_temp = model2_data[model2_data['max_activation_index'] == neuron] # Getting the pos stats from all the dictionaries model1_pos_dict = dict(Counter(model1_data_temp['POS'])) model2_pos_dict = dict(Counter(model2_data_temp['POS'])) # Creating dataframe from the dictionaries model1_pos = pd.DataFrame.from_dict(model1_pos_dict, orient='index', columns=[modelname1]) model2_pos = pd.DataFrame.from_dict(model2_pos_dict, orient='index', columns=[modelname2]) # Normalizing the statistics model1_pos[modelname1] = model1_pos[modelname1].apply( lambda x: x / model1_pos[modelname1].sum()) model2_pos[modelname2] = model2_pos[modelname2].apply( lambda x: x / model2_pos[modelname2].sum()) # Merging dataframe data = [model1_pos[modelname1], model2_pos[modelname2]] df = pd.concat(data, axis=1) # Again converting the dataframe to dictionary for further computations. all_pos_stats = df.to_dict() # Getting all the pos stats into a dictionary for viz_data in all_pos_stats.keys(): for tags in all_pos: if tags not in all_pos_stats[viz_data].keys(): all_pos_stats[viz_data][tags] = None # Converting pos stats to a dataframe # all_pos_stats = pd.DataFrame.from_dict(all_pos_stats) if process_data_flag == True: # Getting the data. model1_neurondata = model1_data[model1_data['max_activation_index'] == neuron] model1_neurondata['POS'] = model1_neurondata['POS'].apply( lambda x: x.strip()) model2_neurondata = model2_data[model2_data['max_activation_index'] == neuron] model2_neurondata['POS'] = model2_neurondata['POS'].apply( lambda x: x.strip()) # Converting the other pos tags to the least three ones model1_least_pos = choose_top_pos_from_data(model1_neurondata) model2_least_pos = choose_top_pos_from_data(model2_neurondata) model1_tokens = list(model1_neurondata['inputs']) model1_pos = list(model1_neurondata['POS']) model2_tokens = list(model2_neurondata['inputs']) model2_pos = list(model2_neurondata['POS']) for index, pos in enumerate(model1_pos): if pos not in model1_least_pos[model1_tokens[index]]: model1_pos[index] = model1_least_pos[ model1_tokens[index]][0] for index, pos in enumerate(model2_pos): if pos not in model2_least_pos[model2_tokens[index]]: model2_pos[index] = model2_least_pos[ model2_tokens[index]][0] model1_neurondata['POS'] = model1_pos model2_neurondata['POS'] = model2_pos # Getting all the unique tokens model1_unique_tokens = model1_neurondata["inputs"].unique() model2_unique_tokens = model2_neurondata["inputs"].unique() model1_dict, model2_dict = ({} for i in range(2)) # Generating model1 visualization # Getting mean for all the unique tokens for tokens in model1_unique_tokens: temp_df = model1_neurondata[model1_neurondata["inputs"] == tokens] pos = list(temp_df["POS"].unique()) activation_temp = [] for unique_pos in pos: activation_temp.append( temp_df[temp_df['POS'] == unique_pos]["max_activations"].mean()) model1_dict[tokens] = { "POS": pos, "activation": activation_temp } # Getting the least 20 activation tokens model1_least_20 = {} temp_activations, temp_tokens = ([] for i in range(2)) for key, value in model1_dict.items(): for index in range(len(value['POS'])): temp_tokens.append(key) temp_activations.append(value['activation'][index]) model1_least_20_activation_index = sorted( range(len(temp_activations)), key=lambda x: temp_activations[x])[-n_tokens:] for indexes in model1_least_20_activation_index: model1_least_20[temp_tokens[indexes]] = model1_dict[ temp_tokens[indexes]] # Flipping the dictionary to get it in the order of {pos-tags:list(tuple(token,mean_activations))} model1_token_dict = defaultdict(list) for token, stats in model1_least_20.items(): for index, value in enumerate(stats['POS']): model1_token_dict[stats['POS'][index]].append( (token, stats['activation'][index])) # Adding the null features for the tags not present for tags in all_pos: if tags not in model1_token_dict.keys(): model1_token_dict[tags].append((' ', 0.0)) # Sorting dict on the basis of the names sorted_model1_dict = {} for key in sorted(model1_token_dict.keys()): sorted_model1_dict[key] = model1_token_dict[key] with open(os.path.join(path, 'model1_data.pickle'), 'wb') as handle: pickle.dump(sorted_model1_dict, handle, protocol=pickle.HIGHEST_PROTOCOL) # Generating model2 visualization # Getting mean for all the unique tokens for tokens in model2_unique_tokens: temp_df = model2_neurondata[model2_neurondata["inputs"] == tokens] pos = list(temp_df["POS"].unique()) activation_temp = [] for unique_pos in pos: activation_temp.append( temp_df[temp_df['POS'] == unique_pos]["max_activations"].mean()) model2_dict[tokens] = { "POS": pos, "activation": activation_temp } # Getting the least 20 activation tokens model2_least_20 = {} temp_activations, temp_tokens = ([] for i in range(2)) for key, value in model2_dict.items(): for index in range(len(value['POS'])): temp_tokens.append(key) temp_activations.append(value['activation'][index]) model2_least_20_activation_index = sorted( range(len(temp_activations)), key=lambda x: temp_activations[x])[-n_tokens:] for indexes in model2_least_20_activation_index: model2_least_20[temp_tokens[indexes]] = model2_dict[ temp_tokens[indexes]] # Flipping the dictionary to get it in the order of {pos-tags:list(tuple(token,mean_activations))} model2_token_dict = defaultdict(list) for token, stats in model2_least_20.items(): for index, value in enumerate(stats['POS']): model2_token_dict[stats['POS'][index]].append( (token, stats['activation'][index])) # Adding the null features for the tags not present for tags in all_pos: if tags not in model2_token_dict.keys(): model2_token_dict[tags].append((' ', 0.0)) # Sorting dict on the basis of the names sorted_model2_dict = {} for key in sorted(model2_token_dict.keys()): sorted_model2_dict[key] = model2_token_dict[key] with open(os.path.join(path, 'model2_data.pickle'), 'wb') as handle: pickle.dump(sorted_model2_dict, handle, protocol=pickle.HIGHEST_PROTOCOL) else: # loading the dictionary with open(os.path.join(path, 'model1_data.pickle'), 'rb') as handle: sorted_model1_dict = pickle.load(handle) with open(os.path.join(path, 'model2_data.pickle'), 'rb') as handle: sorted_model2_dict = pickle.load(handle) fig = go.Figure() # Plotting the bar plot fig.add_trace( go.Bar(x=list(all_pos_stats[modelname1].keys()), y=list(all_pos_stats[modelname1].values()), name=modelname1, marker_color=color1, opacity=0.6)) fig.add_trace( go.Bar(x=list(all_pos_stats[modelname2].keys()), y=list(all_pos_stats[modelname2].values()), name=modelname2, marker_color=color2, opacity=0.6)) # Plotting the tokens on the bar plot pos_model1 = list(sorted_model1_dict.keys()) values_model1 = list(sorted_model1_dict.values()) pos_model2 = list(sorted_model2_dict.keys()) values_model2 = list(sorted_model2_dict.values()) model1_value = [[(value[0], np.nan) if value[1] == 0.0 else (value[0], value[1]) for value in pairs] for pairs in values_model1] model2_value = [[(value[0], np.nan) if value[1] == 0.0 else (value[0], value[1]) for value in pairs] for pairs in values_model2] model1_token = [[value[0] for value in pairs] for pairs in model1_value] model1_activations = [[value[1] for value in pairs] for pairs in model1_value] model2_token = [[value[0] for value in pairs] for pairs in model2_value] model2_activations = [[value[1] for value in pairs] for pairs in model2_value] pos_model1_list, activation_model1_list, token_model1_list = ( [] for i in range(3)) for index in range(len(pos_model1)): for activation_list_index, activation in enumerate( model1_activations[index]): pos_model1_list.append(pos_model1[index]) activation_model1_list.append(activation) token_model1_list.append( model1_token[index][activation_list_index]) fig.add_trace( go.Scatter(x=pos_model1_list, y=activation_model1_list, text=token_model1_list, mode='markers+text', marker_color=color1, name=modelname1, textfont={'color': color1})) pos_model2_list, activation_model2_list, token_model2_list = ( [] for i in range(3)) for index in range(len(pos_model2)): for activation_list_index, activation in enumerate( model2_activations[index]): pos_model2_list.append(pos_model2[index]) activation_model2_list.append(activation) token_model2_list.append( model2_token[index][activation_list_index]) fig.add_trace( go.Scatter(x=pos_model2_list, y=activation_model2_list, text=token_model2_list, mode='markers+text', marker_color=color2, name=modelname2, textfont={'color': color2})) fig.update_layout(title_text='Hellinger plot for ' + str(neuron) + "-neuron", xaxis_title="POS-tags", yaxis_title="Activation", xaxis=go.XAxis(showticklabels=True), yaxis=go.YAxis(showticklabels=True)) plotly.offline.plot(fig, filename=os.path.join(path, str(neuron) + ".pdf"), auto_open=False) fig.show()
def top_k(k, stream): min_heap = [(len(s), s) for s in itertools.islice(stream, k)] heapq.heapify(min_heap) for next_string in stream: heapq.heappush(min_heap, (len(next_string), next_string)) return [p[1] for p in heapq.nsmallest(k, min_heap)]
def step(self, episode, action, index, done, label='T'): # update state and compute reward #print('self.F_ward', self.F_ward) #print('check, state, heap', self.check, self.state, self.heap) if action >= len(self.check): rem = self.check[0] else: rem = self.check[action] # point index in ori traj #print('remove point index and value', self.state, rem, self.F_ward[rem][0]) NEXT_P = self.F_ward[rem][1] NEXT_V = self.B_ward[NEXT_P][0] LAST_P = self.B_ward[rem][1] LAST_V = self.F_ward[LAST_P][0] if LAST_P > self.link_head: self.delete_heap(self.heap, (LAST_V, LAST_P)) #s = self.ori_traj_set[episode][self.B_ward[LAST_P][1]] #m1 = self.ori_traj_set[episode][LAST_P] #m2 = self.ori_traj_set[episode][rem] #e = self.ori_traj_set[episode][NEXT_P] self.err_record[(self.B_ward[LAST_P][1], NEXT_P)] = F.sed_op( self.ori_traj_set[episode][self.B_ward[LAST_P][1]:NEXT_P + 1]) #self.err_record[(self.B_ward[LAST_P][1], NEXT_P)] = F.sed_op([s,m1,m2,e]) self.F_ward[LAST_P][0] = self.err_record[(self.B_ward[LAST_P][1], NEXT_P)] self.B_ward[LAST_P][0] = self.err_record[(self.B_ward[LAST_P][1], NEXT_P)] heapq.heappush(self.heap, (self.F_ward[LAST_P][0], LAST_P)) if NEXT_P < self.link_tail: self.delete_heap(self.heap, (NEXT_V, NEXT_P)) #s = self.ori_traj_set[episode][LAST_P] #m1 = self.ori_traj_set[episode][rem] #m2 = self.ori_traj_set[episode][NEXT_P] #e = self.ori_traj_set[episode][self.F_ward[NEXT_P][1]] self.err_record[(LAST_P, self.F_ward[NEXT_P][1])] = F.sed_op( self.ori_traj_set[episode][LAST_P:self.F_ward[NEXT_P][1] + 1]) #self.err_record[(LAST_P, self.F_ward[NEXT_P][1])] = F.sed_op([s,m1,m2,e]) self.F_ward[NEXT_P][0] = self.err_record[(LAST_P, self.F_ward[NEXT_P][1])] self.B_ward[NEXT_P][0] = self.err_record[(LAST_P, self.F_ward[NEXT_P][1])] heapq.heappush(self.heap, (self.F_ward[NEXT_P][0], NEXT_P)) #self.copy_traj.remove(self.ori_traj_set[episode][rem]) if label == 'T': self.reward_update(episode, rem) ''' self.copy_traj.remove(self.ori_traj_set[episode][rem]) _, self.current = F.sed_error(self.ori_traj_set[episode], self.copy_traj) ''' self.rw = self.last_error - self.current self.last_error = self.current #print('self.current',self.current) self.F_ward[LAST_P][1] = NEXT_P self.B_ward[NEXT_P][1] = LAST_P self.delete_heap(self.heap, (self.F_ward[rem][0], rem)) del self.F_ward[rem] del self.B_ward[rem] if not done: if action >= len(self.check): self.INX = min(index + 2 + action - len(self.check), len(self.ori_traj_set[episode]) - 1) self.read(self.INX, episode) if label == 'T': self.reward_update(episode, [index, self.INX], 'skip') ''' for skip in range(index + 1, self.INX): self.copy_traj.remove(self.ori_traj_set[episode][skip]) _, self.current = F.sed_error(self.ori_traj_set[episode], self.copy_traj) ''' self.rw += self.last_error - self.current self.last_error = self.current else: self.read(index + 1, episode) t = heapq.nsmallest(self.n_features, self.heap) if len(t) < self.n_features: self.check = [t[0][1], t[0][1], t[1][1]] self.state = [t[0][0], t[0][0], t[1][0], t[0][0], t[0][0]] else: self.check = [t[0][1], t[1][1], t[2][1]] if self.INX + 4 <= self.steps: J1 = F.sed_op( self.ori_traj_set[episode][self.INX:self.INX + 3]) J2 = F.sed_op( self.ori_traj_set[episode][self.INX:self.INX + 4]) self.state = [t[0][0], t[1][0], t[2][0], J1, J2] else: self.state = [t[0][0], t[1][0], t[2][0], t[0][0], t[0][0]] # self.check = [self.heap[0][1], self.heap[1][1]] # self.state = [self.heap[0][0], self.heap[1][0]] #f.write('--->'+str(rw)+'\n') #self.state = [max(self.heap[0][0] - self.current, 0.0)] #cannot remove the starting and ending # if self.current_left == self.link_head: # self.check.append(self.current_right) # self.state.append(self.B_ward[self.current_right][0]) # elif self.current_right == self.link_tail: # self.check.append(self.current_left) # self.state.append(self.F_ward[self.current_left][0]) # elif self.F_ward[self.current_left][0] < self.B_ward[self.current_right][0]: # self.check.append(self.current_left) # self.state.append(self.F_ward[self.current_left][0]) # else: # self.check.append(self.current_right) # self.state.append(self.B_ward[self.current_right][0]) #self.state.append(self.current) #self.state[1] = self.state[1] - self.current #print('check and state', self.check, self.state) return np.array(self.state).reshape(1, -1), self.rw
import heapq grades = [23,45,21,56,43,76,99,43,34,65,69,74] print(heapq.nlargest(3,grades)) custom_grades = [ { 'name': 'Sumeet', 'percentage': 92.5 }, { 'name': 'Sagar', 'percentage': 90.1 }, { 'name': 'Aman', 'percentage': 81.54 }, { 'name': 'Nilesh', 'percentage': 36.54 }, { 'name': 'Satish', 'percentage': 83.43 }, { 'name': 'Manoj', 'percentage': 45.2 }, { 'name': 'Vishal', 'percentage': 95.34 } ] print(heapq.nsmallest(2, custom_grades, key=lambda custom_grades: custom_grades['percentage']))
def _tester(): # note: we run everything in a separate process to re-initialize all global states from scratch # this helps us avoid undesirable side-effects when running multiple tests in sequence loop = asyncio.get_event_loop() me = loop.run_until_complete(DHTNode.create(initial_peers=random.sample(dht.keys(), 5), parallel_rpc=10, cache_refresh_before_expiry=False)) # test 1: find self nearest = loop.run_until_complete(me.find_nearest_nodes([me.node_id], k_nearest=1))[me.node_id] assert len(nearest) == 1 and ':'.join(nearest[me.node_id].split(':')[-2:]) == f"{LOCALHOST}:{me.port}" # test 2: find others for i in range(10): ref_endpoint, query_id = random.choice(list(dht.items())) nearest = loop.run_until_complete(me.find_nearest_nodes([query_id], k_nearest=1))[query_id] assert len(nearest) == 1 found_node_id, found_endpoint = next(iter(nearest.items())) assert found_node_id == query_id and ':'.join(found_endpoint.split(':')[-2:]) == ref_endpoint # test 3: find neighbors to random nodes accuracy_numerator = accuracy_denominator = 0 # top-1 nearest neighbor accuracy jaccard_numerator = jaccard_denominator = 0 # jaccard similarity aka intersection over union all_node_ids = list(dht.values()) for i in range(100): query_id = DHTID.generate() k_nearest = random.randint(1, 20) exclude_self = random.random() > 0.5 nearest = loop.run_until_complete( me.find_nearest_nodes([query_id], k_nearest=k_nearest, exclude_self=exclude_self))[query_id] nearest_nodes = list(nearest) # keys from ordered dict assert len(nearest_nodes) == k_nearest, "beam search must return exactly k_nearest results" assert me.node_id not in nearest_nodes or not exclude_self, "if exclude, results shouldn't contain self" assert np.all(np.diff(query_id.xor_distance(nearest_nodes)) >= 0), "results must be sorted by distance" ref_nearest = heapq.nsmallest(k_nearest + 1, all_node_ids, key=query_id.xor_distance) if exclude_self and me.node_id in ref_nearest: ref_nearest.remove(me.node_id) if len(ref_nearest) > k_nearest: ref_nearest.pop() accuracy_numerator += nearest_nodes[0] == ref_nearest[0] accuracy_denominator += 1 jaccard_numerator += len(set.intersection(set(nearest_nodes), set(ref_nearest))) jaccard_denominator += k_nearest accuracy = accuracy_numerator / accuracy_denominator print("Top-1 accuracy:", accuracy) # should be 98-100% jaccard_index = jaccard_numerator / jaccard_denominator print("Jaccard index (intersection over union):", jaccard_index) # should be 95-100% assert accuracy >= 0.9, f"Top-1 accuracy only {accuracy} ({accuracy_numerator} / {accuracy_denominator})" assert jaccard_index >= 0.9, f"Jaccard index only {accuracy} ({accuracy_numerator} / {accuracy_denominator})" # test 4: find all nodes dummy = DHTID.generate() nearest = loop.run_until_complete(me.find_nearest_nodes([dummy], k_nearest=len(dht) + 100))[dummy] assert len(nearest) == len(dht) + 1 assert len(set.difference(set(nearest.keys()), set(all_node_ids) | {me.node_id})) == 0 # test 5: node without peers detached_node = loop.run_until_complete(DHTNode.create()) nearest = loop.run_until_complete(detached_node.find_nearest_nodes([dummy]))[dummy] assert len(nearest) == 1 and nearest[detached_node.node_id] == f"{LOCALHOST}:{detached_node.port}" nearest = loop.run_until_complete(detached_node.find_nearest_nodes([dummy], exclude_self=True))[dummy] assert len(nearest) == 0 # test 6 store and get value true_time = get_dht_time() + 1200 assert loop.run_until_complete(me.store("mykey", ["Value", 10], true_time)) that_guy = loop.run_until_complete(DHTNode.create(initial_peers=random.sample(dht.keys(), 3), parallel_rpc=10, cache_refresh_before_expiry=False, cache_locally=False)) for node in [me, that_guy]: val, expiration_time = loop.run_until_complete(node.get("mykey")) assert val == ["Value", 10], "Wrong value" assert expiration_time == true_time, f"Wrong time" assert loop.run_until_complete(detached_node.get("mykey")) is None # test 7: bulk store and bulk get keys = 'foo', 'bar', 'baz', 'zzz' values = 3, 2, 'batman', [1, 2, 3] store_ok = loop.run_until_complete(me.store_many(keys, values, expiration_time=get_dht_time() + 999)) assert all(store_ok.values()), "failed to store one or more keys" response = loop.run_until_complete(me.get_many(keys[::-1])) for key, value in zip(keys, values): assert key in response and response[key][0] == value # test 8: store dictionaries as values (with sub-keys) upper_key, subkey1, subkey2, subkey3 = 'ololo', 'k1', 'k2', 'k3' now = get_dht_time() assert loop.run_until_complete(me.store(upper_key, subkey=subkey1, value=123, expiration_time=now + 10)) assert loop.run_until_complete(me.store(upper_key, subkey=subkey2, value=456, expiration_time=now + 20)) for node in [that_guy, me]: value, time = loop.run_until_complete(node.get(upper_key)) assert isinstance(value, dict) and time == now + 20 assert value[subkey1] == (123, now + 10) assert value[subkey2] == (456, now + 20) assert len(value) == 2 assert not loop.run_until_complete(me.store(upper_key, subkey=subkey2, value=345, expiration_time=now + 10)) assert loop.run_until_complete(me.store(upper_key, subkey=subkey2, value=567, expiration_time=now + 30)) assert loop.run_until_complete(me.store(upper_key, subkey=subkey3, value=890, expiration_time=now + 50)) loop.run_until_complete(asyncio.sleep(0.1)) # wait for cache to refresh for node in [that_guy, me]: value, time = loop.run_until_complete(node.get(upper_key)) assert isinstance(value, dict) and time == now + 50, (value, time) assert value[subkey1] == (123, now + 10) assert value[subkey2] == (567, now + 30) assert value[subkey3] == (890, now + 50) assert len(value) == 3 test_success.set()
print(heapsort([1, 3, 5, 7, 9, 2, 4, 6, 8, 0])) # initializing list li = [5, 7, 9, 1, 3] # li = [[1, 2], [1, 4], [1, 6], [7, 2], [7, 4], [7, 6], [11, 2], [11, 4], [11, 6]] # li = [1, 3, 5, 7, 9, 2, 4, 6, 8, 0] # li = [5, 7, 9, 4, 3] # using heapify to convert list into heap print('heapify') heapq.heapify(li) print(li) print() # print(heapq.heapify(li)) # doesnt work for some reason # initializing list li1 = [6, 7, 9, 4, 3, 5, 8, 10, 1] # using heapify() to convert list into heap heapq.heapify(li1) print(li1) # using nlargest to print 3 largest numbers # prints 10, 9 and 8 print("The 3 largest numbers in list are : ", end="") print(heapq.nlargest(3, li1)) # using nsmallest to print 3 smallest numbers # prints 1, 3 and 4 print("The 3 smallest numbers in list are : ", end="") print(heapq.nsmallest(3, li1)) print(heapq.nsmallest(3, li1)[-1])
'shares': 200, 'price': 21.09 }, { 'name': 'HPQ', 'shares': 35, 'price': 31.75 }, { 'name': 'YHOO', 'shares': 45, 'price': 16.35 }, { 'name': 'ACME', 'shares': 75, 'price': 115.65 }] cheap = heapq.nsmallest(1, portfolio, key=lambda s: s['price']) print(cheap) ''' [{'name': 'YHOO', 'shares': 45, 'price': 16.35}] ''' # method 3: use while to push min element def heapilize_list(x): n = len(x) # 获取存在子节点的节点 index 列表,并对每个节点单元进行最小堆处理 for i in reversed(range(n // 2)): raiseup_node(x, i) def put_down_node(heap, startpos, pos):
# heapq_extremes.py import heapq from heapq_heapdata import data print('all :', data) print('3 largest :', heapq.nlargest(3, data)) print('from sort :', list(reversed(sorted(data)[-3:]))) print('3 smallest:', heapq.nsmallest(3, data)) print('from sort :', sorted(data)[:3])
import heapq n, c, f = input().split() n, c, f = int(n), int(c), int(f) count = int((n-1)/2) grade, mo = [], [] for i in range(c): g, m = input().split() g, m = int(g), int(m) grade.append(g) mo.append(m) dic = dict(zip(grade,mo)) sodi = sorted(dic.items(), key=lambda d:d[0], reverse = False) nemo = [] for i in sodi: nemo.append(i[1]) print(nemo) ans = 0 for i in range(count, len(sodi)-count): left = sum(heapq.nsmallest(count, nemo[:i])) right = sum(heapq.nsmallest(count, nemo[i+1:])) if left + right + nemo[i] <= f: ans = max(ans, sodi[i][0]) print(ans,end='')
def simulate(): ham = {} hamSum = 0 bitC = 0 bitW = 0 for p in range(8): keyset = format(p,"03b") beforeS = [] beforeC = [] before = [] #print('A B Cin SumOut Cout') for i in range(8): K1.next = 0 K2.next = 0 K3.next = 0 set = format(i,"03b") A.next = int(set[0]) B.next = int(set[1]) C_in.next = int(set[2]) yield delay(10) #print ('{} {} {} {} {} BEFORE'.format(bin(A,1),bin(B,1),bin(C_in,1),bin(Sum_out,1),bin(C_out,1))) beforeS.append(int(bin(Sum_out))) beforeC.append(int(bin(C_out))) before = beforeS + beforeC print('Before:') print(beforeS) print(beforeC) print(before) K1.next = int(keyset[0]) K2.next = int(keyset[1]) K3.next = int(keyset[2]) afterS = [] afterC = [] after = [] for i in range(8): set = format(i,"03b") A.next = int(set[0]) B.next = int(set[1]) C_in.next = int(set[2]) yield delay(10) afterS.append(int(bin(Sum_out))) afterC.append(int(bin(C_out))) after = afterS + afterC print('After:') print(afterS) print(afterC) print(after) t = [] for i in range(len(before)): bitC += 1 if before[i] != after[i]: bitW += 1 t.append(before[i]) hamming = len(t)/len(before) print('Key1: {} Key2: {} Key3: {}'.format(keyset[0],keyset[1],keyset[2])) print('Hamming = {}'.format(hamming)) ham[hamming] = p hamSum += hamming print('Hamming List:') print(ham) smallest = nsmallest(1, ham, key=lambda x: abs(x-0.5)) key = format(ham[smallest[0]],"03b") print('Best Key Combination: {}'.format(key)) print('Hamming Distance: {}'.format(smallest)) print('Hamming Average = {}'.format(hamSum/7)) print('Wrong {} / {}'.format(bitW,bitC))
def second_highest_point(self): return nsmallest(2, self.points, key=lambda x: x[1])[-1]