示例#1
0
文件: prelim.py 项目: rchen152/CS181
def proc_unigram_feats():
    mat,key,regy,_ = rs.extract_feats([rs.unigram_feats])
    inv_key = {v:k for k,v in key.items()}
    num_movies,num_words = mat.get_shape()

    movies = [(regy[i],i) for i in range(num_movies)]
    min_movies = heap.nsmallest(MOVIE_TARGET,movies)
    max_movies = heap.nlargest(MOVIE_TARGET,movies)
    tot_min = 0.
    tot_max = 0.
    for mv in min_movies:
        tot_min += mat[mv[1]].sum()
    for mv in max_movies:
        tot_max += mat[mv[1]].sum()
    fix = tot_max/tot_min
    diffs = np.zeros((num_words))
    for mv in min_movies:
        diffs += -1.*fix*mat[mv[1]]
    for mv in max_movies:
        diffs += mat[mv[1]]

    with open("english.stop") as f:
        stop_words = set([line.strip() for line in f.readlines()])
        words = [(diffs[0,i],inv_key[i]) for i in range(num_words)
                 if inv_key[i] not in stop_words]
        worst_words = heap.nsmallest(WORD_TARGET, words)
        worst_words.sort()
        best_words = heap.nlargest(WORD_TARGET, words)
        best_words.sort()

        for wd in worst_words:
            print wd[1] + '\t' + str(wd[0])
        print '---------------------------------'
        for wd in best_words:
            print wd[1] + '\t' + str(wd[0])
示例#2
0
def constrained_decoder(voc, predictions, beam, constraints):
    heap = [State(score=0, label='O', prev=None, roles=set())]
    for i, prediction in enumerate(predictions):
        next_generation = list()
        for prev in heapq.nsmallest(beam, heap, key=_get_score):
            for j, prob in enumerate(prediction):
                label = voc[j]
                score = -math.log2(prob + sys.float_info.min)
                if score > _PRUNING_THRESHOLD and next_generation:
                    continue

                next_state = State(score=score + prev.score,
                                   label=label, prev=prev,
                                   roles=prev.roles)

                constraints_violated = [not check(next_state) for check in
                                        constraints]
                if any(constraints_violated):
                    continue

                next_generation.append(
                    State(next_state.score, next_state.label, next_state.prev,
                          next_state.roles | {next_state.label[2:]}))

        heap = next_generation

    head = heapq.nsmallest(1, heap, key=_get_score)[0]

    backtrack = list()
    while head:
        backtrack.append(head.label)
        head = head.prev

    return list(reversed(backtrack[:-1]))
def recommendTopFive(bskt, hshdct):       # recommend top 5 items based on distance
    print "###### calculating distance ######"
    rec_list = []         # Recommendation list
    avgdct = {}           # store the distance between all other items to each poi's item
    mindct = {}
    for item in bskt:               # Consider only items in poi's basket
        others = [one for one in hshdct.keys() if one not in bskt]  # all other items
        for com in others:
            dist = distanceSQ(hshdct[item], hshdct[com])
            if com not in avgdct:
                avgdct[com] = dist
                mindct[com] = dist                      # store the distance
            else:
                avgdct[com] += dist
                mindct[com] = min(mindct[com], dist)    # update the distance
        #if len(dist_dict[item]) < 10: print "sku %s has <10 similar items" % str(item)
    rec_list_avg = heapq.nsmallest(5, avgdct, key=avgdct.get)
    rec_list_min = heapq.nsmallest(5, mindct, key=mindct.get)
    print "Based on avg distance, recommend user %s: " % str(sys.argv[1]), rec_list_avg
    print "Based on min distance, recommend user %s: " % str(sys.argv[1]), rec_list_min
    avg_dist = [math.sqrt(avgdct[com])/(len(bskt)*len(hshdct[com])) for com in rec_list_avg]
    print "avg dist. = ", ["{0:0.4f}".format(i) for i in avg_dist]
    min_dist = [math.sqrt(mindct[com])/(len(bskt)*len(hshdct[com])) for com in rec_list_min]
    print "min dist. = ", ["{0:0.4f}".format(i) for i in min_dist]
    return rec_list_avg, rec_list_min
示例#4
0
    def Counting(self, LuckyCombo): ##this function is to count all the tickets by the combination of numbers
        global List_of_Numbers, List_of_MegaNumbers, list_of_combos
        lucky = LuckyCombo
        if lucky == "most": ##chooses the most occuring NUMBERS
            number_to_count = (x for x in List_of_Numbers)
            c = Counter(number_to_count)
            d =  c.most_common(5)
            d.sort()

            mega_to_count = (x for x in List_of_MegaNumbers)
            CC = Counter(mega_to_count)
            DD = CC.most_common(1)
            return d, DD
        elif lucky == "combo": ##chooses the most and least occurring combinations
            combo_to_count = (tuple(x) for x in list_of_combos)
            combo_count = Counter(combo_to_count)
            print 'most', combo_count.most_common(1), 'least', list(combo_count.most_common())[-1]
            dcombo = list(combo_count)[-1:]
            ecombo = list(x for x in dcombo)
            return dcombo
        else: ##chooses the least occurring NUMBERS
            number_to_count = (x for x in List_of_Numbers)
            c = Counter(number_to_count)
            reverse = heapq.nsmallest(5, c.items(), key=itemgetter(1))
            d = reverse
            d.sort()

            mega_to_count = (x for x in List_of_MegaNumbers)
            CC = Counter(mega_to_count)
            Reverse = heapq.nsmallest(1, CC.items(), key=itemgetter(1))
            DD = Reverse
            return d, DD
def orderall():
    """ Find intersection of bestvals and lastvals """
    global bestvals, lastvals,bestvalsdict,lastvalsdict
    print '\nbest', k, 'vals overall'
    nbestvals = H.nsmallest(k,bestvals)
    nlastvals = H.nsmallest(k,lastvals)
    bestexps = []    

    exps = {}
    for i in xrange(len(nlastvals)):
        lastval, explast = nlastvals[i]
        exps[explast] = i
        for j in xrange(len(nbestvals)):
            bestval, info = nbestvals[j]
            expbest, epcnum = info
            if expbest == explast:
                exps[expbest] += j
    for pair in exps.items():
        H.heappush(bestexps,(pair[1],pair[0]))
    nbestexps = H.nsmallest(k,bestexps)
    for pair in nbestexps:
        rank, exp = pair
        lastval = lastvalsdict[exp]
        bestval, epcnum = bestvalsdict[exp]
        print exp,'\tbestval',bestval,'at valtest',epcnum,'\tlastval',lastval  
示例#6
0
def main():
    sent_file = open(sys.argv[1])
    tweet_file = open(sys.argv[2])
    sent_scores = parse_sentiment_scores(sent_file)
    tweets = parse_tweets(tweet_file)

    state_happiness = {}

    for tweet in tweets:
        if "place" in tweet \
        and tweet["place"] != None \
        and "country_code" in tweet["place"] \
        and tweet["place"]["country_code"] != None \
        and "US" == tweet["place"]["country_code"]:
            state = determine_state(tweet)
            if state != None:
                sentiment = tweet_sentiment(tweet, sent_scores)
                if not state in state_happiness: 
                    state_happiness[state] = []
                state_happiness[state].append(-sentiment)

    for state in state_happiness:
        state_happiness[state] = float(reduce(lambda x,y: x+y, state_happiness[state]))/len(state_happiness[state])

    print heapq.nsmallest(1, state_happiness.iteritems(), operator.itemgetter(1))[0][0]
示例#7
0
def counting(kyle):
    global tit, megatit, list_of_combos
    lucky = kyle
    if lucky == "most":
        number_to_count = (x for x in tit)
        c = Counter(number_to_count)
        d =  c.most_common(5)
        d.sort()

        mega_to_count = (x for x in megatit)
        CC = Counter(mega_to_count)
        DD = CC.most_common(1)
        return d, DD
    elif lucky == "combo":
        combo_to_count = (tuple(x) for x in list_of_combos)
        combo_count = Counter(combo_to_count)
        print 'most', combo_count.most_common(1), 'least', list(combo_count.most_common())[-1]
        dcombo = list(combo_count)[-1:]
        return dcombo
    else:
        number_to_count = (x for x in tit)
        c = Counter(number_to_count)
        reverse = heapq.nsmallest(5, c.items(), key=itemgetter(1))
        d = reverse
        d.sort()

        mega_to_count = (x for x in megatit)
        CC = Counter(mega_to_count)
        Reverse = heapq.nsmallest(1, CC.items(), key=itemgetter(1))
        DD = Reverse
        return d, DD
示例#8
0
 def get_next_element_when_ready(self):
     self.first_element_changed.acquire()
     try:
         isNotReady = True
         while isNotReady:
             if self._qsize() > 0:
                 first_element = heapq.nsmallest(1, self.queue)[0]
                 if isinstance(first_element, SystemExit):
                     first_element = self._get()
                     break
                 if not first_element.flag_alive:
                     log.debug("Early termination of dead metric")
                     first_element = self._get()
                     break
                 timeout = (
                     first_element.get_next_run_time() - getUTCmillis()
                 ) / 1000.0
                 log.debug("Waiting on acquired first_element_changed LOCK "
                          + "for: %.2f" % timeout)
                 self.first_element_changed.wait(timeout)
             else:
                 self.first_element_changed.wait()
                 first_element = heapq.nsmallest(1, self.queue)[0]
             if isinstance(first_element, SystemExit):
                 first_element = self._get()
                 break
             if (first_element.get_next_run_time() - getUTCmillis()) <= 0 \
                     or not first_element.flag_alive:
                 isNotReady = False
                 first_element = self._get()
         return first_element
     finally:
         self.first_element_changed.release()
def count_mon_min_data_avg(fileObj, year, cat_flag):
        count = 0
        flag = 0
        avg_data = []
        year_data = []
	fd = open(fileObj, 'r')
        for line in fd.readlines():
                a = re.split(',|\n| ', line)
                # Notice the None value
                if ((int)(a[YEAR]) == year):
                        if (flag == 0):
                                temp = (int)(a[MON])
                                flag = 1
                        if (len(a[cat_flag]) != 0):
                                year_data.append((float)(a[cat_flag]))
                                count = count + 1
                        if(temp != (int)(a[MON])):
				length = (int)(count * RATE)
				if (length == 0):
					length = 1
	                        value = heapq.nsmallest(length, year_data)
        	                avg = mean(value)
                	        avg_data.append(avg)
                        	year_data = []
                       	 	count = 0
                        temp = (int)(a[MON])
	length = (int)(count * RATE)
        value = heapq.nsmallest(length, year_data)
        avg = mean(value)
        avg_data.append(avg)
        year_data = []
        count = 0
	fd.close()
	return avg_data
 def addNum(self, num):
     """
     Adds a num into the data structure.
     :type num: int
     :rtype: void
     """
     if not self.leftHeap:
         heapq.heappush(self.leftHeap,num*-1)
         return
     elif not self.rightHeap:
         if heapq.nsmallest(1,self.leftHeap)[0]*-1 > num:
             heapq.heappush(self.rightHeap,heapq.heappop(self.leftHeap)*-1)
             heapq.heappush(self.leftHeap,num*-1)
         else:
             heapq.heappush(self.rightHeap,num)
         return
     
     if num<= heapq.nsmallest(1,self.leftHeap)[0]*-1:
         heapq.heappush(self.leftHeap,num*-1)
     else:
         heapq.heappush(self.rightHeap,num)
     
     while len(self.leftHeap)-len(self.rightHeap)>1:
         heapq.heappush(self.rightHeap,heapq.heappop(self.leftHeap)*-1)
         
     while len(self.rightHeap)-len(self.leftHeap)>1:
         heapq.heappush(self.leftHeap,heapq.heappop(self.rightHeap)*-1)
示例#11
0
 def findMedian(self, A):
    half = len(A) / 2
    if len(A) & 1 == 1:
       res = heapq.nsmallest(half + 1, A)[-1]
    else:
       res = 0.5 * (heapq.nsmallest(half + 1, A)[-1] + heapq.nsmallest(half, A)[-1])
    return res
示例#12
0
 def test_nsmallest(self):
     data = [(random.randrange(2000), i) for i in range(1000)]
     for f in (None, lambda x:  x[0] * 547 % 2000):
         for n in (0, 1, 2, 10, 100, 400, 999, 1000, 1100):
             self.assertEqual(nsmallest(n, data), sorted(data)[:n])
             self.assertEqual(nsmallest(n, data, key=f),
                              sorted(data, key=f)[:n])
示例#13
0
def persona_interp(EcmRange,Vinp,Eout,ens):
	"""
	here I define a brute force interpolation which just takes the average of nearing neighbors
	EcmRange = range of input energies where Vinp is defined
	Vinp = is the function to interpolate
	Eout = is the energy where it is going to be evaluated 
	"""
	EcmL=list(EcmRange)

	if shape(Eout)==():
		"meaning, if Eout is a number"
		Ecm0s=nsmallest(2, EcmL, key=lambda x: abs(x-Eout))
		"Ecm0s is the two energies in EcmL that are closest to Eout"
		out=0
		ne0=EcmL.index(Ecm0s[0])
		ne1=EcmL.index(Ecm0s[1])
		return (Vinp[ne0]+Vinp[ne1])/2.0

	else:
		"is Eout is a list, we just loop over its components and repeat what we did above"
		out=zeros(len(Eout))
		for i0 in range(len(Eout)):	
			E0=Eout[i0]
			Ecm0s=nsmallest(2, EcmL, key=lambda x: abs(x-E0))
			ne0=EcmL.index(Ecm0s[0])
			ne1=EcmL.index(Ecm0s[1])
			out[i0]=(Vinp[ne0]+Vinp[ne1])/2.0
		return out
示例#14
0
    def put_and_notify(self, item, block=True, timeout=None):
        log.debug("Adding Event:" + str(item))
        self.not_full.acquire()
        try:
            first_element_before_insertion = None
            if self._qsize() > 0:
                first_element_before_insertion = heapq.nsmallest(1, self.queue)[
                    0]

            if self.maxsize > 0:
                if not block:
                    if self._qsize() == self.maxsize:
                        raise Full
                elif timeout is None:
                    while self._qsize() == self.maxsize:
                        self.not_full.wait()
                elif timeout < 0:
                    raise ValueError("'timeout' must be a non-negative number")
                else:
                    endtime = _time() + timeout
                    while self._qsize() == self.maxsize:
                        remaining = endtime - _time()
                        if remaining <= 0.0:
                            raise Full
                        self.not_full.wait(remaining)
            self._put(item)
            self.unfinished_tasks += 1
            self.not_empty.notify()

            first_element_after_insertion = heapq.nsmallest(1, self.queue)[0]
            if first_element_before_insertion != first_element_after_insertion:
                self.first_element_changed.notify()
        finally:
            self.not_full.release()
示例#15
0
 def loop(self):
     gen = 0
     m = 300000
     m = m * m
     p = None
     cnt = 0
     while 1:
         print "gen ", gen,
         g = [u for u in self.group if u.select(m)]
         g0 = heapq.nsmallest(
             self.groupsize / 2, g, key=lambda x: x.judge())
         m0 = g0[0].judge()
         g = [u0.cross(u1) for u1 in g for u0 in g]
         g = [u.mutation() for u in g]
         g = heapq.nsmallest(self.groupsize / 2, g, key=lambda x: x.judge())
         m1 = g[0].judge()
         m = min(m0, m1)
         print "%.4f %.4f" % (m, m / m0),
         g = g0 + g
         self.group = g
         gen += 1
         if m != m0:
             cnt = 0
             if p != None:
                 p.terminate()
             p = multiprocessing.Process(
                 target=plot.plotThread, args=(pts, g[0].seq))
             p.start()
         else:
             cnt += 1
         print cnt
         if cnt == 20:
             break
     p.terminate()
     return g[0].seq
def heap_median_maintenance(read_in):
    starting_list = []
    median = []
    for i in read_in:
        starting_list.append(i)
        #If it's the first element being read in, that is the median
        if len(starting_list) == 1:
            low_heap = heapq.nsmallest(len(starting_list), starting_list)
            high_heap = heapq.nlargest(len(starting_list)-1, starting_list)
        #if even then split half way
        elif len(starting_list)%2 ==0:
            low_heap = heapq.nsmallest(len(starting_list)/2, starting_list)
            high_heap = heapq.nlargest(len(starting_list)/2, starting_list)
        #if odd give the larger portion to low heap
        else:
            low_list_amount = int(math.ceil(float(len(starting_list))/2))
            high_list_amount = int(len(starting_list) - math.ceil(float(len(starting_list))/2))
            low_heap = heapq.nsmallest(low_list_amount, starting_list)
            high_heap = heapq.nlargest(high_list_amount, starting_list)
            #print("Low heap has {} and high heap has {}".format(len(low_heap), len(high_heap)))
        #print("Low heap {}".format(low_heap))
        #print("high heap {}".format(high_heap))
        #print("Median is {}".format(heapq.nlargest(1, low_heap)[0]))
        #append median from the largest element of the low_heap
        median.append(heapq.nlargest(1, low_heap)[0])
    return median
示例#17
0
    def DumpAudioDiagnostics(self, dir_name="./data/", top_k=10, bot_k=10):
        # utterance level diag
        import heapq

        utt_largest = heapq.nlargest(top_k, self.utt_feature, key=self.utt_feature.get)
        i = 0
        for utt in utt_largest:
            utt_id = string.join(utt.split("_")[0:-2], "_")
            t_beg = float(utt.split("_")[-2]) / self.samp_period
            t_end = float(utt.split("_")[-1]) / self.samp_period
            file_id = self.list_files[self.map_utt_idx[utt_id]]
            out_file = "./data/" + repr(i) + "large_srate_" + os.path.basename(file_id).split(".")[0] + ".wav"
            util.cmdconvert(file_id, out_file, t_beg, t_end)
            i += 1
        utt_smallest = heapq.nsmallest(bot_k, self.utt_feature, key=self.utt_feature.get)
        i = 0
        for utt in utt_smallest:
            utt_id = string.join(utt.split("_")[0:-2], "_")
            t_beg = float(utt.split("_")[-2]) / self.samp_period
            t_end = float(utt.split("_")[-1]) / self.samp_period
            file_id = self.list_files[self.map_utt_idx[utt_id]]
            out_file = "./data/" + repr(i) + "small_srate_" + os.path.basename(file_id).split(".")[0] + ".wav"
            util.cmdconvert(file_id, out_file, t_beg, t_end)
            i += 1
        # glob level diag
        glob_largest = heapq.nlargest(top_k, self.glob_feature, key=self.glob_feature.get)
        for utt_id in glob_largest:
            file_id = self.list_files[self.map_utt_idx[utt_id]]
            out_file = "./data/glob_large_srate_" + os.path.basename(file_id).split(".")[0] + ".wav"
            util.cmdconvert(file_id, out_file)
        glob_smallest = heapq.nsmallest(top_k, self.glob_feature, key=self.glob_feature.get)
        for utt_id in glob_smallest:
            file_id = self.list_files[self.map_utt_idx[utt_id]]
            out_file = "./data/glob_small_srate_" + os.path.basename(file_id).split(".")[0] + ".wav"
            util.cmdconvert(file_id, out_file)
def getmedian(l, r):
    # print('Median:', l, '-', rightq)
    if len(l) > len(r):
        return heapq.nlargest(1, l)[0]
    elif len(l) == len(r):
        return (heapq.nlargest(1, l)[0] + heapq.nsmallest(1, r)[0]) / 2
    else:
        return heapq.nsmallest(1, r)[0]
示例#19
0
文件: ChNode.py 项目: ytaoWang/case
    def get_min(self):
        print "request list:",len(self.req_list),",node list:",len(self.node_list)

        import sys
        rmin = sys.maxint if len(self.req_list) is 0 else heapq.nsmallest(1,self.req_list,key=lambda x:x['time'])[0]['time']
        nmin = sys.maxint if len(self.node_list) is 0 else heapq.nsmallest(1,self.node_list,key=lambda x:x['time'])[0]['time']

        return min(int(rmin),int(nmin))
示例#20
0
def main():
    # Take input dir
    baseDir = sys.argv[1]
    if baseDir[-1:] != '/' : baseDir += '/'
    
    print baseDir
    
    neg_review = readAllFolds(baseDir + NEG + FALSE)
    neg_review += readAllFolds(baseDir + NEG + TRUTH_NEG)
    
    pos_review = readAllFolds(baseDir + POS + FALSE)
    pos_review += readAllFolds(baseDir + POS + TRUTH_POS)
    
    false_review = readAllFolds(baseDir + NEG + FALSE)
    false_review += readAllFolds(baseDir + POS + FALSE)
    
    true_review = readAllFolds(baseDir + NEG + TRUTH_NEG)
    true_review += readAllFolds(baseDir + POS + TRUTH_POS)
    
    print "Done with reading all data.."
    print "Calculating priors.."
    prior_pos = log_nb(len(pos_review) / (len(pos_review) + len(neg_review) * 1.0))
    prior_neg = log_nb(len(neg_review) / (len(pos_review) + len(neg_review) * 1.0))
    
    prior_truth = log_nb(len(true_review) / (len(true_review) + len(false_review) * 1.0))
    prior_false = log_nb(len(false_review) / (len(true_review) + len(false_review) * 1.0))
    
    
    # list of all the tokens in each class
    true_review_token = collectAllTokens(true_review)
    false_review_token = collectAllTokens(false_review)
    pos_review_token = collectAllTokens(pos_review)
    neg_review_token = collectAllTokens(neg_review)
    
    # # Initializing counters for fast counts
    true_review_token_ctr = Counter(true_review_token)
    false_review_token_ctr = Counter(false_review_token)
    pos_review_token_ctr = Counter(pos_review_token)
    neg_review_token_ctr = Counter(neg_review_token)
        
#     my_stop = set([ite for ite, it in true_review_token_ctr.most_common(50)]) 
#     
#     my_stop = my_stop.union(set([ite for ite, it in false_review_token_ctr.most_common(50)]))
#     
#     print my_stop
#     print len(my_stop)
# 
#     print my_stop.intersection(stop_words)
#     print len(my_stop.intersection(stop_words))
# 
#     print my_stop.difference(stop_words)
#     print len(my_stop.difference(stop_words))
    least_a = set([ite+str(it) for ite, it in heapq.nsmallest(5050, true_review_token_ctr.items(), key=itemgetter(1)) ])
    least_b = set([ite+str(it) for ite, it in heapq.nsmallest(4300, false_review_token_ctr.items(), key=itemgetter(1)) ])
    print least_a
    print least_b
    print len(least_a.intersection(least_b))
    print [ite[:-1] for ite in least_a.intersection(least_b) ]
示例#21
0
文件: TODO.py 项目: leven-cn/python
def search_algorithm():
    '''Find the largest or smallest N items in a collection.

    **NOTE**: If you are looking for the _N_ smallest or largest items, and _N_
    is small compared to the overall size of the collection, the `nsmallest()`
    and `nlargest()` methods of `heapq` module provide superior performance.

    For larger _N_, it is more efficient to use the `sorted()` function first,
    and take a slice. Also, when `N==1`, it is more efficient to use the
    built-in `min()` and `max()` functions.

    **NOTE**: When doing these calculations, be aware that `zip()` creates an
    iterator that can only consumed once.
    '''
    import heapq

    # Find in a list of integers
    seq = [1, 8, 2, 23, 7, -2, 18, 23, 42, 37, 2]
    assert heapq.nlargest(3, seq) == [42, 37, 23]
    assert heapq.nsmallest(3, seq) == [-2, 1, 2]

    # Find in a list of dictionaries
    list_of_dict = [
        {'name': 'IBM', 'shares': 100, 'price': 91.1},
        {'name': 'AAPL', 'shares': 50, 'price': 543.22},
        {'name': 'FB', 'shares': 200, 'price': 21.09},
        {'name': 'HPQ', 'shares': 35, 'price': 31.74},
        {'name': 'YHOO', 'shares': 45, 'price': 16.35},
        {'name':'ACME', 'shares': 75, 'price': 115.65}
    ]
    assert heapq.nsmallest(3, list_of_dict, key=lambda s: s['price']) == [
            {'name': 'YHOO', 'shares': 45, 'price': 16.35},
            {'name': 'FB', 'shares': 200, 'price': 21.09},
            {'name': 'HPQ', 'shares': 35, 'price': 31.74}
        ]
            
    # Find in a dictionary
    d = {
        'IBM': 91.1,
        'AAPL': 543.22,
        'FB': 21.09
    }
    assert min(zip(d.values(), d.keys())) == (21.09, 'FB')
    assert max(zip(d.values(), d.keys())) == (543.22, 'AAPL')

    # Order a list as a heap, transformed in-place, in linear time
    heapq.heapify(seq)

    # Pop and return the smallest item from the heap, maintaining the heap
    # invariant.
    try:
        assert heapq.heappop(seq) == -2
    except IndexError as e:
        # Heap is empty
        pass
 def compare_point(self, point_dict,clf):
     new_point = Point.init_from_dict(point_dict, point_dict['activity'])
     smallest = heapq.nsmallest(10, self.min_heap_list)
     largest = heapq.nsmallest(10, self.max_heap_list)
     sim_center = []
     sim_boundry = []
     for small in smallest:
         sim_center.append(self.similarity_check(small, new_point,clf))
     for large in largest:
         sim_boundry.append(self.similarity_check(large, new_point,clf))
     return (sim_center,sim_boundry)
示例#23
0
文件: ChNode.py 项目: ytaoWang/case
    def do_action(self):
        """ check timer event then implement it"""
        now = self.get_min()        

        while True:
            if len(self.req_list) is 0:
                break
            rmin = heapq.nsmallest(1,self.req_list,key=lambda x:x['time'])[0]['time']
            if rmin - now > 1:
                break

            obj = heapq.heappop(self.req_list)
            strid = obj['dataid']

            if obj['op'] == RequestInfo.CHUNK_REQ_UPLOAD:
                obj['obj'].upload_end(obj['dataid'],obj['size'],self.nodeid)
                c = CDataInfo()
                c.size = obj['size']
                c.nodeid = strid
                self.node[strid] = c
                self.data_dict[strid] = c

            elif obj['op'] == RequestInfo.CHUNK_REQ_DOWNLOAD:
                obj['obj'].download_end(obj['dataid'],obj['size'],self.nodeid)

            elif obj['op'] == RequestInfo.CHUNK_REQ_UPDATE:
                obj['obj'].update_end(obj['dataid'],obj['size'],self.nodeid)
                v = self.node.get(strid)
                print 'update file key:',strid
                if v is None:
                    logging.error('fail to update file key:%s,nodeid:%s',strid,self.nodeid)
                    return
                v.size = obj['size']
                self.data_dict[strid] = v
                self.node[strid] = v
            else:
                pass

        while True:

            if len(self.node_list) is 0:
                break
            nmin = heapq.nsmallest(1,self.node_list,key=lambda x:x['time'])[0]['time']
            if nmin - now > 1:
                break

            obj = heapq.heappop(self.node_list)
            if not obj['obj'] is None:
                c = CDataInfo()
                c.size = obj['size']
                c.nodeid = obj['dataid']
                self.node[c.nodeid] = c
                self.data_dict[c.nodeid] = c
                obj['obj'].migarate_end(obj['dataid'],self.nodeid)
 def findMedian(self):
     """
     Returns the median of current data stream
     :rtype: float
     """
     if len(self.leftHeap) == len(self.rightHeap):
         return float(heapq.nsmallest(1,self.leftHeap)[0]*-1+heapq.nsmallest(1,self.rightHeap)[0])/2
     elif len(self.leftHeap) > len(self.rightHeap):
         return float(heapq.nsmallest(1,self.leftHeap)[0]*-1)
     else:
         return float(heapq.nsmallest(1,self.rightHeap)[0])
    def calcAvgMinMaxDiff(self, twoSamples):
        avgMinMaxDiff = 0
        if len(twoSamples['testSeq']) > 4 and len(twoSamples['grndTruthSeq'])>4:
            avg5max_test = heapq.nlargest(5, twoSamples['testSeq'])
            avg5min_test = heapq.nsmallest(5, twoSamples['testSeq'])

            avg5max_grnd = heapq.nlargest(5, twoSamples['grndTruthSeq'])
            avg5min_grnd = heapq.nsmallest(5, twoSamples['grndTruthSeq'])

            avgMinMaxDiff = abs(avg5max_test-avg5min_test) - abs(avg5max_grnd-avg5min_grnd)
        return avgMinMaxDiff
示例#26
0
 def ack_thread(self, content_sz):
   '''
     Function that receives the acks and updates the window's limits.
   '''
   acked = []
   while True:
     self.mutex.acquire()
     if self.begin_window == content_sz:
       self.mutex.release()
       break
     self.mutex.release()
     
     time.sleep(0.005)
     try:
       data, addr = self.udp.recvfrom(64)
       self.total_bytes_received += len(data)
     except socket.timeout:
       continue
     
     pck = self.unmount_package(data)
     if not self.check_package(pck):
       continue
     self.total_acks_received += 1
     neue_id = int(pck[1])
     
     self.mutex.acquire()
     # If the id number is inside the limits of the window...
     if neue_id >= self.begin_window and neue_id <= self.end_window:
       heappush(acked, neue_id)
       
       # Putting -1 on the time spans vector to make sure
       # this package won't be sent anymore
       if self.begin_window != nsmallest(1, acked)[0]:
         self.time_spans[neue_id] = -1
       
       # Updating the window's limits accordingly.
       while len(acked) > 0 and self.begin_window == nsmallest(1, acked)[0]:
         heappop(acked)
         del self.time_spans[self.begin_window]
         self.begin_window+=1
       
       # Putting zero on the new packages to be sent to the receiver.
       while True:
         if not self.end_window in self.time_spans: 
           self.time_spans[self.end_window] = 0
         
         if self.end_window == content_sz-1:
           break
         if self.end_window >= self.begin_window+self.window_sz-1:
           break
         self.end_window += 1
      
     self.mutex.release()
示例#27
0
 def _remove_stale(self):
     'Remove nonces that are too old'
     self._dblock.acquire()
     iterSmallest = heapq.nsmallest(1, self._pq)
     while((len(iterSmallest)==1) and ((time() - iterSmallest[0][0]) > self._timeoutSeconds)):
         try:
             self._entry_finder.pop(iterSmallest[0][1])
         except KeyError:
             pass #it may be already gone and that's OK.
         heapq.heappop(self._pq)
         iterSmallest = heapq.nsmallest(1, self._pq)
     self._dblock.release()
    def predict_y(self, point):
        """ Predict Y value based on k nearest neighbours.
            The method uses a heapq to keep track of the k nearest neighbours.
            
            The heap queue is using the negative euclidean distance for priority.
            That way, the larger distances have smaller value (and priority).

            The heapq contains pairs (K, V), where:
            K is negated euclidean distance between query and data vectors.
            V is the Y-value of the data vector.
            --------------------------------------
            Implemented in the following way:
            I) Fill the heap with the first k vectors from data set
            II) Keep a current minimum (the vector farthest from query vector).
            III) For each of the remaining vectors in data set.
                1) Compute the distance between query and data vector.
                2) Negate the distance (larger distances have smaller value).
                3) If the current value is larger than current minimum:
                    - Remove current min from heap and add current pair.
                    - Update current min.

            Note: Size of heap is preserved (always == k) by always removing element
            with smallest priority (largest distance) before adding new one.
        """
        heap = []
        
        # Add first k elements to fill heapq
        for val in zip(self.dataX[:self.k], self.dataY[:self.k]):
            diff = point-val[0] # coordinates are at pos 0
            n_summed = np.sum((np.square(diff))) # sum the squares of diff
            euclid_dist = -math.sqrt(n_summed) # negate the distance
            pair = (euclid_dist, val[1]) # y values are at position 1
            heappush(heap, pair)

        # Compute the current minimum (largest distance).
        # Updated whenever the smallest element is removed.
        curr_min = nsmallest(1, heap)[0][0]
        
        # Check remaining elements
        for val in zip(self.dataX[self.k:], self.dataY[self.k:]): 
            diff = point-val[0]
            n_summed = (np.sum(np.square(diff)))
            euclid_dist = -math.sqrt(n_summed)
            
            if euclid_dist > curr_min:
                pair = (euclid_dist, val[1])
                heapreplace(heap, pair) # Remove smallest, add current
                curr_min = nsmallest(1, heap)[0][0] # Update current min elem
        
        # Get the Y-values from the heap and return the mean.
        result = [x[1] for x in heap]
        return sum(result) / float(len(result))
示例#29
0
def findCurrentMedian (bigHalf, smallHalf):
##This finds the current median
    
    numBig = len(bigHalf)
    numSmall = len(smallHalf)
    currentMedian = 0
    
    if numBig > numSmall:
        return (heapq.nsmallest(1, bigHalf))[0]
    elif numSmall > 0:
        return -(heapq.nsmallest(1, smallHalf))[0]
    else:
        return 0
示例#30
0
def _computeGridRatio(coord, shape):
    """
    coord (list of tuple of 2 floats): coordinates
    shape (2 ints): X and Y number of coordinates
    return (float): ratio X/Y
    """
    x_cors = [i[0] for i in coord]
    y_cors = [i[1] for i in coord]
    x_max_cors = numpy.mean(heapq.nlargest(shape[0], x_cors))
    x_min_cors = numpy.mean(heapq.nsmallest(shape[0], x_cors))
    y_max_cors = numpy.mean(heapq.nlargest(shape[1], y_cors))
    y_min_cors = numpy.mean(heapq.nsmallest(shape[1], y_cors))
    x_scale = x_max_cors - x_min_cors
    y_scale = y_max_cors - y_min_cors
    return x_scale / y_scale
示例#31
0
    def pca_cum_variance_analysis(self,
                                  show_plot=False,
                                  custom_dpi=600,
                                  percentage=70,
                                  number_of_components=20):

        self.called_feat_pca_analysis = True
        print('PCA Cumulative Variance analysis has been called\n')
        print('-------------------------------\n')

        sns.set(style="ticks", context='paper')
        # fig = plt.figure(figsize=(10, 10))
        fig = plt.figure(figsize=plot_tools.cm2inch(8.4, 8.4))

        sns.set(font_scale=1)

        if number_of_components is not None:
            pca1 = PCA(n_components=number_of_components)
        else:
            pca1 = PCA(n_components=len(self.selection))

        TEST = 1

        # self.pca_transformed_data = pca1.fit_transform(self.scaled_data)
        self.pca_transformed_data = pca1.fit_transform(self.pca_data)

        # The amount of variance that each PC explains
        var = pca1.explained_variance_ratio_
        print('Explained variance ratio: ', var)

        self.md_pre_feat_analysis_data = {
            'varExplainedRatio': pca1.explained_variance_ratio_,
            'varExplained': pca1.explained_variance_,
            'mean': pca1.mean_,
        }

        # Cumulative Variance explains
        var1 = np.cumsum(
            np.round(pca1.explained_variance_ratio_, decimals=4) * 100)

        print("Cumulative Variance explains ", var1)

        # plt.plot(var)
        plt.plot(var1)
        plt.xlabel("Principal Component")
        plt.ylabel("Cumulative Proportion of Variance Explained")

        fig.savefig(self.simulation_name + 'PCA_cumsum_analysis_' + '.png',
                    dpi=custom_dpi,
                    bbox_inches='tight')

        if show_plot is True:
            plt.show()

        import heapq

        max_num_list = 3
        var_array = np.array(var1)

        best_score = 0
        best_index = 0
        for i in range(len(var_array)):
            if var_array[i] >= percentage:
                best_score = var_array[i]
                best_index = i
                break

        bottom_var = heapq.nsmallest(max_num_list, range(len(var_array)),
                                     var_array.take)
        print('Bottom Var', bottom_var)

        # self.md_pca_analysis_data.update({selection_text: self.reduced_cartesian})
        # self.number_pca = bottom_var[-1] + 1
        self.number_pca = best_index + 1
        print('Percentage of PCA : ', best_score)

        if best_score == 0:
            self.number_pca += 1

        print('Number of PCA : ', self.number_pca)
        return self.number_pca
        print("PCA transformation finished successfully")
        print('-----------------------------------\n')
示例#32
0
def MAP_calculation(orignal, simplified):

    MAP = []
    assert DATA_AMOUNT == len(simplified)
    index_set = random.sample(xrange(DATA_AMOUNT), RANDOM_TEST_AMOUNT)

    for random_index in index_set:
        original_distance_measure = []
        simplified_distance_measure = []

        for index_1 in xrange(DATA_AMOUNT):
            # 计算这个数据与其他所有数据之间的距离

            if (random_index == index_1): continue

            original_distance = np.linalg.norm(
                (orignal[index_1], orignal[random_index]), ord=2)
            original_distance_measure.append((original_distance, index_1))

            simplified_distance = np.linalg.norm(
                (simplified[index_1], simplified[random_index]), ord=2)
            simplified_distance_measure.append((simplified_distance, index_1))
            '''
                original_distance_measure   -> 这个数据与其他点之间的真实距离
                simplified_distance_measure -> 这个数据与其他点的预测距离
            '''

        original_NearestNeighbor = heapq.nsmallest(MAP_LIST_LENGTH,
                                                   original_distance_measure)
        original_NearestNeighbor = set(
            [vals[1] for vals in original_NearestNeighbor])
        # print(original_NearestNeighbor)

        simplified_NearestNeighbor = heapq.nsmallest(
            MAP_LIST_LENGTH, simplified_distance_measure)
        simplified_NearestNeighbor = set(
            [vals[1] for vals in simplified_NearestNeighbor])
        # print(simplified_NearestNeighbor)

        correct_prediction = 0
        stats_array = []

        if (index_1 >= 48):
            print("简化的最临近list")
            for item in simplified_NearestNeighbor:
                print(item)
            print("原始的最临近list")
            for item in original_NearestNeighbor:
                print(item)
            return

        for index, prediction in enumerate(simplified_NearestNeighbor):
            if (prediction in original_NearestNeighbor):
                correct_prediction += 1
                stats_array.append((correct_prediction) / (index + 1))

        if (len(stats_array) == 0):
            print("出现了全空数组: 序号为{}".format(random_index))
            stats = 0
        else:
            stats = np.mean(stats_array)
        MAP.append(stats)
        '''
            NearestNeighbor -> 邻近点的集合
            stats_array -> 执行MAP时的计算
            stats  -> 这一个数据的MAP
            MAP  -> 所有数据的MAP
        '''

    overall_score = np.mean(MAP)
    return overall_score
示例#33
0
more = []
less = []

for num in lists:
    if num >= 0:
        more.append(num)
    else:
        less.append(num)

if len(lists) == 3:
    print(lists[0] * lists[1] * lists[2])
else:
    if len(more) == 0:
        result = heapq.nlargest(3, less)
        print(result[0] * result[1] * result[2])
    else:
        overZero = max(more)
        more.remove(overZero)
        if len(more) >= 2 and len(less) >= 2:
            result1 = heapq.nlargest(2, more)
            result2 = heapq.nsmallest(2, less)
            result = max(result1[0] * result1[1], result2[0] * result2[1])
        else:
            if len(more) >= 2:
                result1 = heapq.nlargest(2, more)
                result = (result1[0] * result1[1])
            else:
                result2 = heapq.nsmallest(2, less)
                result = result2[0] * result2[1]
        print(overZero * result)
示例#34
0
""" 3. Get n largest/smallest elts of the array of dicts
"""
import heapq
arr_dicts = [{"name": "John", "age": 23, "city": "Oakland", "state": "CA"},
{"name": "Mary", "age": 33, "city": "San Jose", "state": "CA"},
{"name": "Henock", "age": 27, "city": "Las Vegas", "state": "NV"},
{"name": "James", "age": 19, "city": "Seattle", "state": "WA"}]
# print(arr_dicts)
largests = heapq.nlargest(2, arr_dicts, lambda dict: dict['age'])
print(largests)
smallests = heapq.nsmallest(2, arr_dicts, lambda dict: dict['age'])
print(smallests)
states = heapq.nsmallest(2, arr_dicts, lambda d: d['state'])
print(states)
states = heapq.nlargest(2, arr_dicts, lambda d: d['state'])
print(states)
            str((queried_location.latitude, queried_location.longitude)))

        user_lat_long = (queried_location.latitude, queried_location.longitude)
        print('Now searching parks!')

        for key, value in nps_dict.items():

            try:

                distances = (distance.distance(user_lat_long, value).miles)
                distances_dict[key] = distances

            except ValueError:  #Some national parks don't have this data available. SKIP!
                pass

    except AttributeError:
        print(
            "Sorry, we can't find that location! Try simplifying it- We don't need your exact street address."
        )
        return (user_location())


user_location()

result = nsmallest(10, distances_dict.items(), key=itemgetter(1))

print(
    "Here are the ten closest national parks to you and their distance from you in miles:"
)
pprint.pprint(result)
示例#36
0
def method8(dists, N):
    closest = heapq.nsmallest(N, dists)
    n = np.ceil(np.sqrt(2* len(dists)))
    ti = np.triu_indices(n, 1)
    r  = zip(ti[0][closest] + 1, ti[1][closest] + 1)
    return r
示例#37
0
 def nsmallest_tr(self_):
     return heapq.nsmallest(n, iter(self_), key=key)
示例#38
0
def main():
    list_1 = [1, 3, 5, 7, 9]

    # convert list into min heap
    heapq.heapify(list_1)
    print(list_1)
    """"
    Output:
    [1, 3, 5, 7, 9]
    After heapifying the list.
       1
       /\
      3  5
     /\
    7  9
    """

    # heappush pushes data into heap and organizes it
    # appropriately.
    heapq.heappush(list_1, 6)
    print(list(list_1))
    """"
    Output:
    [1, 3, 5, 7, 9, 6]
    After heapifying the list.
        1
       / \
      3   5
     /\   /
    7  9 6
    """
    # we also have heappop, which pops the first element
    # in the list and heapifies the list.
    heapq.heappop(list_1)
    print(list_1)
    """
    heap gets rearranged as follows:
                3
               / \
              6   5
             / \
            7   9
            
    Here 3 becomes parent since 3 is minimum of all.
    """
    # heappushpop simultaneously pushes and pops same element.
    heapq.heappushpop(list_1, 2)
    print(list_1)

    # heapreplace simultaneously pushes and pops same element.
    heapq.heapreplace(list_1, 2)
    print(list_1)
    """
        heap gets rearranged as follows:
                    2
                   / \
                  6   5
                 / \
                7   9

        Here 3 is popped out, 2 is replaced as parent.
        """

    # nlargest
    print("{} are the 3 largest elements in that order".format(
        heapq.nlargest(3, list_1)))

    # nsmallest
    print("{} are the 3 smallest elements in that order".format(
        heapq.nsmallest(3, list_1)))
def kSmallestPairs_pythonic(nums1, nums2, k):
    list1 = [] 
    for i in range(len(nums1)):
        for j in range(len(nums2)):
            list1.append([nums1[i], nums2[j]])        
    return heapq.nsmallest(k, list1, key = lambda x : sum(x))
示例#40
0
 def __iter__(self):
     nodes = heapq.nsmallest(self.maxsize, self.heap)
     return iter(map(itemgetter(1), nodes))
示例#41
0
q.append(2)
q.append(3)
print(q)
q.append(4)
print(q)
#队列在两端插入或删除元素复杂度都是 O(1),而列表在开头或者删除元素复杂度为 O(n)

# 4.查找最大或者最小的 N 个元素
"""当元素数量较小的时候,使用 nlargest, nsmallest 是个不错的选择,但是如果只有一个,使用 max 或 min
更快,如果 N 很大,接近集合大小,则使用 sorted(items)[:N] or sorted(items)[N:] 会更好些
"""
import heapq

nums = [1, 8, 2, 23, 7, -4, 18, 23, 42, 37, 2]
largest_3 = heapq.nlargest(3, nums)
smallest_3 = heapq.nsmallest(3, nums)
print(largest_3)
print(smallest_3)

# 更复杂的数据结构
portfolio = [{
    'name': 'IBM',
    'shares': 100,
    'price': 91.1
}, {
    'name': 'AAPL',
    'shares': 50,
    'price': 543.22
}, {
    'name': 'FB',
    'shares': 200,
import heapq

nums = [1, 8, 2, 23, 7, -4, 18, 23, 42, 37, 2]
print(heapq.nlargest(3, nums))  # Prints [42, 37, 23]
print(heapq.nsmallest(3, nums))  # Prints [-4, 1, 2]

portfolio = [{
    'name': 'IBM',
    'shares': 100,
    'price': 91.1
}, {
    'name': 'AAPL',
    'shares': 50,
    'price': 543.22
}, {
    'name': 'FB',
    'shares': 200,
    'price': 21.09
}, {
    'name': 'HPQ',
    'shares': 35,
    'price': 31.75
}, {
    'name': 'YHOO',
    'shares': 45,
    'price': 16.35
}, {
    'name': 'ACME',
    'shares': 75,
    'price': 115.65
}]
示例#43
0
li2 = [5, 7, 9, 4, 3]

# using heapify() to convert list into heap
heapq.heapify(li1)
heapq.heapify(li2)

# using heappushpop() to push and pop items simultaneously
# pops 2
print("The popped item using heappushpop() is : ", end="")
print(heapq.heappushpop(li1, 2))

# using heapreplace() to push and pop items simultaneously
# pops 3
print("The popped item using heapreplace() is : ", end="")
print(heapq.heapreplace(li2, 2))

li1 = [6, 7, 9, 4, 3, 5, 8, 10, 1]

# using heapify() to convert list into heap
heapq.heapify(li1)

# using nlargest to print 3 largest numbers
# prints 10, 9 and 8
print("The 3 largest numbers in list are : ", end="")
print(heapq.nlargest(3, li1))

# using nsmallest to print 3 smallest numbers
# prints 1, 3 and 4
print("The 3 smallest numbers in list are : ", end="")
print(heapq.nsmallest(3, li1))
示例#44
0
 def kClosest(self, points, K):
     import heapq
     return heapq.nsmallest(K, points, key=lambda p: p[0]**2 + p[1]**2)
'''
import heapq
from pprint import pprint
items = [{
    'name': 'Item-1',
    'price': 101.1
}, {
    'name': 'Item-2',
    'price': 555.22
}, {
    'name': 'Item-3',
    'price': 45.09
}, {
    'name': 'Item-4',
    'price': 22.75
}, {
    'name': 'Item-5',
    'price': 16.30
}, {
    'name': 'Item-6',
    'price': 110.65
}]

cheap = heapq.nsmallest(3, items, key=lambda s: s['price'])
expensive = heapq.nlargest(3, items, key=lambda s: s['price'])
print("Original datasets:")
pprint(items)
print("\nFirst 3 expensive items:")
pprint(expensive)
print("\nFirst 3 cheap items:")
pprint(cheap)
示例#46
0
    #anno pubblicazione
    years = set()
    plot_data = {i: 0 for i in range(1900, 2030)}
    for file in [file_abs]:
        for index, row in file.iterrows():
            year = row['anno']
            if year == ' dcterms_issued:: @@MISSING-DATA' or year == 'dcterms_issued:: @@MISSING-DATA':
                continue
            years.add(int(year))
            try:
                plot_data[int(year)] = plot_data[int(year)] + 1
            except KeyError:
                #plot_data[int(year)] = 1
                continue

    print(nsmallest(10, list(years)))
    print(nlargest(10, list(years)))

    x = [k for k, v in plot_data.items()]
    y = [v for k, v in plot_data.items()]
    plt.clf()
    plt.plot(x, y, 'r')
    plt.ylabel(xx)
    plt.xlabel(yy)
    plt.title(tit)

    years = set()
    plot_data = {i: 0 for i in range(1900, 2030)}
    for file in [file_nabs]:
        for index, row in file.iterrows():
            year = row['anno']
示例#47
0
import heapq
import random

mylist = list(random.sample(range(100), 10))
print(mylist)

k = 3
largest = heapq.nlargest(k, mylist)
print('最大的前3位', largest)
smallest = heapq.nsmallest(k, mylist)
print('最小的前3位', smallest)

#对原的数组进行堆化

heapq.heapify(mylist)
print('堆化后的列表',mylist)

heapq.heappush(mylist, 105)
print('向堆里添加元素', mylist)
heapq.heappop(mylist)
print('取出堆元素', mylist)

heapq.heappushpop(mylist, 130)

print('添加元素顺便取', mylist)

heapq.heapreplace(mylist, 2)
print('取出元素,顺便添加', mylist)

from functools import reduce
a = reduce(lambda x, y: x*y, [1, 3, 5, 7, 9])
示例#48
0
def min_L2dist(vec, vectors, num_cand):
    L2dist_with_index = [(cal_L2dist(vec, v), i)
                         for i, v in enumerate(vectors)]
    closest = heapq.nsmallest(num_cand, L2dist_with_index)
    index_L2 = [index for (dist, index) in closest]
    return index_L2
示例#49
0
#!/usr/bin/env python
__author__ = 'Chocolee'

import heapq
import random

heap = []
data = list(range(10000))
random.shuffle(data)
# for num in data:
#     heapq.heappush(heap, num)
# for i in range(len(heap)):
#     print(heapq.heappop(heap))
print(heapq.nsmallest(10, data))
示例#50
0
def plot_least_10_hellinger_neurons(hellinger_stats,
                                    model1_data,
                                    model2_data,
                                    color1,
                                    color2,
                                    modelname1,
                                    modelname2,
                                    data_dict,
                                    foldername,
                                    n_tokens=0,
                                    process_data_flag=False):
    """
    :param hellinger_stats: path to the savd file for the hellinger statistics from calculate_hellinger_distance function 
    :param model1_data:data from trained model 1(dtype:dataframe)
    :param model2_data:data from trained model 2(dtype:dataframe)
    :param color1:color for model 1(dtype:str)
    :param color2:color for model 2(dtype:str)
    :param modelname1:model1 label(dtype:str)
    :param modelname2:model2 label(dtype:str)
    :param data_dict: dictionary containing input instructions(dtype:dict)
    :param foldername: pickled file name and directory to store the results
    :param n_tokens: number of tokens you want to plot(dtype:int)
    :param process_data_flag: True if the pickle files need to be generated, False if you want to load the pickle 
                              files.
    :Description: Generates the plot for the least 10 neurons with highest hellinger distances in hellinger_stats
    """
    # removing the whitespaces
    model1_data['POS'] = model1_data['POS'].apply(lambda x: x.replace(" ", ""))
    model2_data['POS'] = model2_data['POS'].apply(lambda x: x.replace(" ", ""))

    # Getting all the POS tags activated
    model1_pos = list(model1_data['POS'].unique())
    model1_pos = list(model2_data['POS'].unique())
    all_pos = set(model1_pos + model1_pos)
    # all_pos = [pos.strip() for pos in all_pos]

    # loading the Hellinger distance dictionary
    with open(hellinger_stats, 'rb') as handle:
        hellinger_dict = pickle.load(handle)

    least_10_neurons = heapq.nsmallest(10,
                                       hellinger_dict,
                                       key=hellinger_dict.get)
    for neuron in least_10_neurons:
        path = os.path.join(data_dict["visualize"]["plot_directory"],
                            foldername, "least_10", str(neuron))

        if not os.path.exists(path):
            os.makedirs(path)

        model1_data_temp = model1_data[model1_data['max_activation_index'] ==
                                       neuron]
        model2_data_temp = model2_data[model2_data['max_activation_index'] ==
                                       neuron]

        # Getting the pos stats from all the dictionaries
        model1_pos_dict = dict(Counter(model1_data_temp['POS']))
        model2_pos_dict = dict(Counter(model2_data_temp['POS']))
        # Creating dataframe from the dictionaries
        model1_pos = pd.DataFrame.from_dict(model1_pos_dict,
                                            orient='index',
                                            columns=[modelname1])
        model2_pos = pd.DataFrame.from_dict(model2_pos_dict,
                                            orient='index',
                                            columns=[modelname2])
        # Normalizing the statistics
        model1_pos[modelname1] = model1_pos[modelname1].apply(
            lambda x: x / model1_pos[modelname1].sum())
        model2_pos[modelname2] = model2_pos[modelname2].apply(
            lambda x: x / model2_pos[modelname2].sum())
        # Merging dataframe
        data = [model1_pos[modelname1], model2_pos[modelname2]]
        df = pd.concat(data, axis=1)
        # Again converting the dataframe to dictionary for further computations.
        all_pos_stats = df.to_dict()

        # Getting all the pos stats into a dictionary
        for viz_data in all_pos_stats.keys():
            for tags in all_pos:
                if tags not in all_pos_stats[viz_data].keys():
                    all_pos_stats[viz_data][tags] = None

        # Converting pos stats to a dataframe
        # all_pos_stats = pd.DataFrame.from_dict(all_pos_stats)

        if process_data_flag == True:
            # Getting the data.
            model1_neurondata = model1_data[model1_data['max_activation_index']
                                            == neuron]
            model1_neurondata['POS'] = model1_neurondata['POS'].apply(
                lambda x: x.strip())
            model2_neurondata = model2_data[model2_data['max_activation_index']
                                            == neuron]
            model2_neurondata['POS'] = model2_neurondata['POS'].apply(
                lambda x: x.strip())

            # Converting the other pos tags to the least three ones
            model1_least_pos = choose_top_pos_from_data(model1_neurondata)
            model2_least_pos = choose_top_pos_from_data(model2_neurondata)

            model1_tokens = list(model1_neurondata['inputs'])
            model1_pos = list(model1_neurondata['POS'])
            model2_tokens = list(model2_neurondata['inputs'])
            model2_pos = list(model2_neurondata['POS'])

            for index, pos in enumerate(model1_pos):
                if pos not in model1_least_pos[model1_tokens[index]]:
                    model1_pos[index] = model1_least_pos[
                        model1_tokens[index]][0]
            for index, pos in enumerate(model2_pos):
                if pos not in model2_least_pos[model2_tokens[index]]:
                    model2_pos[index] = model2_least_pos[
                        model2_tokens[index]][0]

            model1_neurondata['POS'] = model1_pos
            model2_neurondata['POS'] = model2_pos

            # Getting all the unique tokens
            model1_unique_tokens = model1_neurondata["inputs"].unique()
            model2_unique_tokens = model2_neurondata["inputs"].unique()

            model1_dict, model2_dict = ({} for i in range(2))

            # Generating model1 visualization
            # Getting mean for all the unique tokens
            for tokens in model1_unique_tokens:
                temp_df = model1_neurondata[model1_neurondata["inputs"] ==
                                            tokens]
                pos = list(temp_df["POS"].unique())
                activation_temp = []
                for unique_pos in pos:
                    activation_temp.append(
                        temp_df[temp_df['POS'] ==
                                unique_pos]["max_activations"].mean())
                model1_dict[tokens] = {
                    "POS": pos,
                    "activation": activation_temp
                }

            # Getting the least 20 activation tokens
            model1_least_20 = {}
            temp_activations, temp_tokens = ([] for i in range(2))
            for key, value in model1_dict.items():
                for index in range(len(value['POS'])):
                    temp_tokens.append(key)
                    temp_activations.append(value['activation'][index])
            model1_least_20_activation_index = sorted(
                range(len(temp_activations)),
                key=lambda x: temp_activations[x])[-n_tokens:]
            for indexes in model1_least_20_activation_index:
                model1_least_20[temp_tokens[indexes]] = model1_dict[
                    temp_tokens[indexes]]

            # Flipping the dictionary to get it in the order of {pos-tags:list(tuple(token,mean_activations))}
            model1_token_dict = defaultdict(list)
            for token, stats in model1_least_20.items():
                for index, value in enumerate(stats['POS']):
                    model1_token_dict[stats['POS'][index]].append(
                        (token, stats['activation'][index]))

            # Adding the null features for the tags not present
            for tags in all_pos:
                if tags not in model1_token_dict.keys():
                    model1_token_dict[tags].append((' ', 0.0))

            # Sorting dict on the basis of the names
            sorted_model1_dict = {}
            for key in sorted(model1_token_dict.keys()):
                sorted_model1_dict[key] = model1_token_dict[key]

            with open(os.path.join(path, 'model1_data.pickle'),
                      'wb') as handle:
                pickle.dump(sorted_model1_dict,
                            handle,
                            protocol=pickle.HIGHEST_PROTOCOL)

            # Generating model2 visualization
            # Getting mean for all the unique tokens
            for tokens in model2_unique_tokens:
                temp_df = model2_neurondata[model2_neurondata["inputs"] ==
                                            tokens]
                pos = list(temp_df["POS"].unique())
                activation_temp = []
                for unique_pos in pos:
                    activation_temp.append(
                        temp_df[temp_df['POS'] ==
                                unique_pos]["max_activations"].mean())
                model2_dict[tokens] = {
                    "POS": pos,
                    "activation": activation_temp
                }

            # Getting the least 20 activation tokens
            model2_least_20 = {}
            temp_activations, temp_tokens = ([] for i in range(2))
            for key, value in model2_dict.items():
                for index in range(len(value['POS'])):
                    temp_tokens.append(key)
                    temp_activations.append(value['activation'][index])
            model2_least_20_activation_index = sorted(
                range(len(temp_activations)),
                key=lambda x: temp_activations[x])[-n_tokens:]
            for indexes in model2_least_20_activation_index:
                model2_least_20[temp_tokens[indexes]] = model2_dict[
                    temp_tokens[indexes]]

            # Flipping the dictionary to get it in the order of {pos-tags:list(tuple(token,mean_activations))}
            model2_token_dict = defaultdict(list)
            for token, stats in model2_least_20.items():
                for index, value in enumerate(stats['POS']):
                    model2_token_dict[stats['POS'][index]].append(
                        (token, stats['activation'][index]))

            # Adding the null features for the tags not present
            for tags in all_pos:
                if tags not in model2_token_dict.keys():
                    model2_token_dict[tags].append((' ', 0.0))

            # Sorting dict on the basis of the names
            sorted_model2_dict = {}
            for key in sorted(model2_token_dict.keys()):
                sorted_model2_dict[key] = model2_token_dict[key]

            with open(os.path.join(path, 'model2_data.pickle'),
                      'wb') as handle:
                pickle.dump(sorted_model2_dict,
                            handle,
                            protocol=pickle.HIGHEST_PROTOCOL)

        else:
            # loading the dictionary
            with open(os.path.join(path, 'model1_data.pickle'),
                      'rb') as handle:
                sorted_model1_dict = pickle.load(handle)
            with open(os.path.join(path, 'model2_data.pickle'),
                      'rb') as handle:
                sorted_model2_dict = pickle.load(handle)

        fig = go.Figure()
        # Plotting the bar plot
        fig.add_trace(
            go.Bar(x=list(all_pos_stats[modelname1].keys()),
                   y=list(all_pos_stats[modelname1].values()),
                   name=modelname1,
                   marker_color=color1,
                   opacity=0.6))
        fig.add_trace(
            go.Bar(x=list(all_pos_stats[modelname2].keys()),
                   y=list(all_pos_stats[modelname2].values()),
                   name=modelname2,
                   marker_color=color2,
                   opacity=0.6))

        # Plotting the tokens on the bar plot
        pos_model1 = list(sorted_model1_dict.keys())
        values_model1 = list(sorted_model1_dict.values())

        pos_model2 = list(sorted_model2_dict.keys())
        values_model2 = list(sorted_model2_dict.values())
        model1_value = [[(value[0], np.nan) if value[1] == 0.0 else
                         (value[0], value[1]) for value in pairs]
                        for pairs in values_model1]
        model2_value = [[(value[0], np.nan) if value[1] == 0.0 else
                         (value[0], value[1]) for value in pairs]
                        for pairs in values_model2]

        model1_token = [[value[0] for value in pairs]
                        for pairs in model1_value]
        model1_activations = [[value[1] for value in pairs]
                              for pairs in model1_value]

        model2_token = [[value[0] for value in pairs]
                        for pairs in model2_value]
        model2_activations = [[value[1] for value in pairs]
                              for pairs in model2_value]

        pos_model1_list, activation_model1_list, token_model1_list = (
            [] for i in range(3))
        for index in range(len(pos_model1)):
            for activation_list_index, activation in enumerate(
                    model1_activations[index]):
                pos_model1_list.append(pos_model1[index])
                activation_model1_list.append(activation)
                token_model1_list.append(
                    model1_token[index][activation_list_index])
        fig.add_trace(
            go.Scatter(x=pos_model1_list,
                       y=activation_model1_list,
                       text=token_model1_list,
                       mode='markers+text',
                       marker_color=color1,
                       name=modelname1,
                       textfont={'color': color1}))

        pos_model2_list, activation_model2_list, token_model2_list = (
            [] for i in range(3))
        for index in range(len(pos_model2)):
            for activation_list_index, activation in enumerate(
                    model2_activations[index]):
                pos_model2_list.append(pos_model2[index])
                activation_model2_list.append(activation)
                token_model2_list.append(
                    model2_token[index][activation_list_index])
        fig.add_trace(
            go.Scatter(x=pos_model2_list,
                       y=activation_model2_list,
                       text=token_model2_list,
                       mode='markers+text',
                       marker_color=color2,
                       name=modelname2,
                       textfont={'color': color2}))

        fig.update_layout(title_text='Hellinger plot for ' + str(neuron) +
                          "-neuron",
                          xaxis_title="POS-tags",
                          yaxis_title="Activation",
                          xaxis=go.XAxis(showticklabels=True),
                          yaxis=go.YAxis(showticklabels=True))

        plotly.offline.plot(fig,
                            filename=os.path.join(path,
                                                  str(neuron) + ".pdf"),
                            auto_open=False)
        fig.show()
示例#51
0
def top_k(k, stream):
    min_heap = [(len(s), s) for s in itertools.islice(stream, k)]
    heapq.heapify(min_heap)
    for next_string in stream:
        heapq.heappush(min_heap, (len(next_string), next_string))
    return [p[1] for p in heapq.nsmallest(k, min_heap)]
示例#52
0
    def step(self, episode, action, index, done, label='T'):
        # update state and compute reward
        #print('self.F_ward', self.F_ward)
        #print('check, state, heap', self.check, self.state, self.heap)
        if action >= len(self.check):
            rem = self.check[0]
        else:
            rem = self.check[action]  # point index in ori traj
        #print('remove point index and value', self.state, rem, self.F_ward[rem][0])
        NEXT_P = self.F_ward[rem][1]
        NEXT_V = self.B_ward[NEXT_P][0]
        LAST_P = self.B_ward[rem][1]
        LAST_V = self.F_ward[LAST_P][0]

        if LAST_P > self.link_head:
            self.delete_heap(self.heap, (LAST_V, LAST_P))
            #s = self.ori_traj_set[episode][self.B_ward[LAST_P][1]]
            #m1 = self.ori_traj_set[episode][LAST_P]
            #m2 = self.ori_traj_set[episode][rem]
            #e = self.ori_traj_set[episode][NEXT_P]
            self.err_record[(self.B_ward[LAST_P][1], NEXT_P)] = F.sed_op(
                self.ori_traj_set[episode][self.B_ward[LAST_P][1]:NEXT_P + 1])
            #self.err_record[(self.B_ward[LAST_P][1], NEXT_P)] = F.sed_op([s,m1,m2,e])
            self.F_ward[LAST_P][0] = self.err_record[(self.B_ward[LAST_P][1],
                                                      NEXT_P)]
            self.B_ward[LAST_P][0] = self.err_record[(self.B_ward[LAST_P][1],
                                                      NEXT_P)]
            heapq.heappush(self.heap, (self.F_ward[LAST_P][0], LAST_P))
        if NEXT_P < self.link_tail:
            self.delete_heap(self.heap, (NEXT_V, NEXT_P))
            #s = self.ori_traj_set[episode][LAST_P]
            #m1 = self.ori_traj_set[episode][rem]
            #m2 = self.ori_traj_set[episode][NEXT_P]
            #e = self.ori_traj_set[episode][self.F_ward[NEXT_P][1]]
            self.err_record[(LAST_P, self.F_ward[NEXT_P][1])] = F.sed_op(
                self.ori_traj_set[episode][LAST_P:self.F_ward[NEXT_P][1] + 1])
            #self.err_record[(LAST_P, self.F_ward[NEXT_P][1])] = F.sed_op([s,m1,m2,e])
            self.F_ward[NEXT_P][0] = self.err_record[(LAST_P,
                                                      self.F_ward[NEXT_P][1])]
            self.B_ward[NEXT_P][0] = self.err_record[(LAST_P,
                                                      self.F_ward[NEXT_P][1])]
            heapq.heappush(self.heap, (self.F_ward[NEXT_P][0], NEXT_P))

        #self.copy_traj.remove(self.ori_traj_set[episode][rem])
        if label == 'T':
            self.reward_update(episode, rem)
            '''
            self.copy_traj.remove(self.ori_traj_set[episode][rem])
            _,  self.current = F.sed_error(self.ori_traj_set[episode], self.copy_traj)
            '''
            self.rw = self.last_error - self.current
            self.last_error = self.current
            #print('self.current',self.current)

        self.F_ward[LAST_P][1] = NEXT_P
        self.B_ward[NEXT_P][1] = LAST_P
        self.delete_heap(self.heap, (self.F_ward[rem][0], rem))
        del self.F_ward[rem]
        del self.B_ward[rem]

        if not done:
            if action >= len(self.check):
                self.INX = min(index + 2 + action - len(self.check),
                               len(self.ori_traj_set[episode]) - 1)
                self.read(self.INX, episode)
                if label == 'T':
                    self.reward_update(episode, [index, self.INX], 'skip')
                    '''
                    for skip in range(index + 1, self.INX):
                        self.copy_traj.remove(self.ori_traj_set[episode][skip])
                    _,  self.current = F.sed_error(self.ori_traj_set[episode], self.copy_traj)
                    '''
                    self.rw += self.last_error - self.current
                    self.last_error = self.current
            else:
                self.read(index + 1, episode)
            t = heapq.nsmallest(self.n_features, self.heap)
            if len(t) < self.n_features:
                self.check = [t[0][1], t[0][1], t[1][1]]
                self.state = [t[0][0], t[0][0], t[1][0], t[0][0], t[0][0]]
            else:
                self.check = [t[0][1], t[1][1], t[2][1]]
                if self.INX + 4 <= self.steps:
                    J1 = F.sed_op(
                        self.ori_traj_set[episode][self.INX:self.INX + 3])
                    J2 = F.sed_op(
                        self.ori_traj_set[episode][self.INX:self.INX + 4])
                    self.state = [t[0][0], t[1][0], t[2][0], J1, J2]
                else:
                    self.state = [t[0][0], t[1][0], t[2][0], t[0][0], t[0][0]]
#            self.check = [self.heap[0][1], self.heap[1][1]]
#            self.state = [self.heap[0][0], self.heap[1][0]]
#f.write('--->'+str(rw)+'\n')
#self.state = [max(self.heap[0][0] - self.current, 0.0)]
#cannot remove the starting and ending
#        if self.current_left == self.link_head:
#            self.check.append(self.current_right)
#            self.state.append(self.B_ward[self.current_right][0])
#        elif self.current_right == self.link_tail:
#            self.check.append(self.current_left)
#            self.state.append(self.F_ward[self.current_left][0])
#        elif self.F_ward[self.current_left][0] < self.B_ward[self.current_right][0]:
#            self.check.append(self.current_left)
#            self.state.append(self.F_ward[self.current_left][0])
#        else:
#            self.check.append(self.current_right)
#            self.state.append(self.B_ward[self.current_right][0])

#self.state.append(self.current)

#self.state[1] = self.state[1] - self.current

#print('check and state', self.check, self.state)
        return np.array(self.state).reshape(1, -1), self.rw
示例#53
0
import heapq

grades = [23,45,21,56,43,76,99,43,34,65,69,74]

print(heapq.nlargest(3,grades))

custom_grades = [
    { 'name': 'Sumeet', 'percentage': 92.5 },
    { 'name': 'Sagar', 'percentage': 90.1 },
    { 'name': 'Aman', 'percentage': 81.54 },
    { 'name': 'Nilesh', 'percentage': 36.54 },
    { 'name': 'Satish', 'percentage': 83.43 },
    { 'name': 'Manoj', 'percentage': 45.2 },
    { 'name': 'Vishal', 'percentage': 95.34 }
]

print(heapq.nsmallest(2, custom_grades, key=lambda custom_grades: custom_grades['percentage']))
示例#54
0
    def _tester():
        # note: we run everything in a separate process to re-initialize all global states from scratch
        # this helps us avoid undesirable side-effects when running multiple tests in sequence
        loop = asyncio.get_event_loop()
        me = loop.run_until_complete(DHTNode.create(initial_peers=random.sample(dht.keys(), 5), parallel_rpc=10,
                                                    cache_refresh_before_expiry=False))

        # test 1: find self
        nearest = loop.run_until_complete(me.find_nearest_nodes([me.node_id], k_nearest=1))[me.node_id]
        assert len(nearest) == 1 and ':'.join(nearest[me.node_id].split(':')[-2:]) == f"{LOCALHOST}:{me.port}"

        # test 2: find others
        for i in range(10):
            ref_endpoint, query_id = random.choice(list(dht.items()))
            nearest = loop.run_until_complete(me.find_nearest_nodes([query_id], k_nearest=1))[query_id]
            assert len(nearest) == 1
            found_node_id, found_endpoint = next(iter(nearest.items()))
            assert found_node_id == query_id and ':'.join(found_endpoint.split(':')[-2:]) == ref_endpoint

        # test 3: find neighbors to random nodes
        accuracy_numerator = accuracy_denominator = 0  # top-1 nearest neighbor accuracy
        jaccard_numerator = jaccard_denominator = 0  # jaccard similarity aka intersection over union
        all_node_ids = list(dht.values())

        for i in range(100):
            query_id = DHTID.generate()
            k_nearest = random.randint(1, 20)
            exclude_self = random.random() > 0.5
            nearest = loop.run_until_complete(
                me.find_nearest_nodes([query_id], k_nearest=k_nearest, exclude_self=exclude_self))[query_id]
            nearest_nodes = list(nearest)  # keys from ordered dict

            assert len(nearest_nodes) == k_nearest, "beam search must return exactly k_nearest results"
            assert me.node_id not in nearest_nodes or not exclude_self, "if exclude, results shouldn't contain self"
            assert np.all(np.diff(query_id.xor_distance(nearest_nodes)) >= 0), "results must be sorted by distance"

            ref_nearest = heapq.nsmallest(k_nearest + 1, all_node_ids, key=query_id.xor_distance)
            if exclude_self and me.node_id in ref_nearest:
                ref_nearest.remove(me.node_id)
            if len(ref_nearest) > k_nearest:
                ref_nearest.pop()

            accuracy_numerator += nearest_nodes[0] == ref_nearest[0]
            accuracy_denominator += 1

            jaccard_numerator += len(set.intersection(set(nearest_nodes), set(ref_nearest)))
            jaccard_denominator += k_nearest

        accuracy = accuracy_numerator / accuracy_denominator
        print("Top-1 accuracy:", accuracy)  # should be 98-100%
        jaccard_index = jaccard_numerator / jaccard_denominator
        print("Jaccard index (intersection over union):", jaccard_index)  # should be 95-100%
        assert accuracy >= 0.9, f"Top-1 accuracy only {accuracy} ({accuracy_numerator} / {accuracy_denominator})"
        assert jaccard_index >= 0.9, f"Jaccard index only {accuracy} ({accuracy_numerator} / {accuracy_denominator})"

        # test 4: find all nodes
        dummy = DHTID.generate()
        nearest = loop.run_until_complete(me.find_nearest_nodes([dummy], k_nearest=len(dht) + 100))[dummy]
        assert len(nearest) == len(dht) + 1
        assert len(set.difference(set(nearest.keys()), set(all_node_ids) | {me.node_id})) == 0

        # test 5: node without peers
        detached_node = loop.run_until_complete(DHTNode.create())
        nearest = loop.run_until_complete(detached_node.find_nearest_nodes([dummy]))[dummy]
        assert len(nearest) == 1 and nearest[detached_node.node_id] == f"{LOCALHOST}:{detached_node.port}"
        nearest = loop.run_until_complete(detached_node.find_nearest_nodes([dummy], exclude_self=True))[dummy]
        assert len(nearest) == 0

        # test 6 store and get value
        true_time = get_dht_time() + 1200
        assert loop.run_until_complete(me.store("mykey", ["Value", 10], true_time))
        that_guy = loop.run_until_complete(DHTNode.create(initial_peers=random.sample(dht.keys(), 3), parallel_rpc=10,
                                                          cache_refresh_before_expiry=False, cache_locally=False))

        for node in [me, that_guy]:
            val, expiration_time = loop.run_until_complete(node.get("mykey"))
            assert val == ["Value", 10], "Wrong value"
            assert expiration_time == true_time, f"Wrong time"

        assert loop.run_until_complete(detached_node.get("mykey")) is None

        # test 7: bulk store and bulk get
        keys = 'foo', 'bar', 'baz', 'zzz'
        values = 3, 2, 'batman', [1, 2, 3]
        store_ok = loop.run_until_complete(me.store_many(keys, values, expiration_time=get_dht_time() + 999))
        assert all(store_ok.values()), "failed to store one or more keys"
        response = loop.run_until_complete(me.get_many(keys[::-1]))
        for key, value in zip(keys, values):
            assert key in response and response[key][0] == value

        # test 8: store dictionaries as values (with sub-keys)
        upper_key, subkey1, subkey2, subkey3 = 'ololo', 'k1', 'k2', 'k3'
        now = get_dht_time()
        assert loop.run_until_complete(me.store(upper_key, subkey=subkey1, value=123, expiration_time=now + 10))
        assert loop.run_until_complete(me.store(upper_key, subkey=subkey2, value=456, expiration_time=now + 20))
        for node in [that_guy, me]:
            value, time = loop.run_until_complete(node.get(upper_key))
            assert isinstance(value, dict) and time == now + 20
            assert value[subkey1] == (123, now + 10)
            assert value[subkey2] == (456, now + 20)
            assert len(value) == 2

        assert not loop.run_until_complete(me.store(upper_key, subkey=subkey2, value=345, expiration_time=now + 10))
        assert loop.run_until_complete(me.store(upper_key, subkey=subkey2, value=567, expiration_time=now + 30))
        assert loop.run_until_complete(me.store(upper_key, subkey=subkey3, value=890, expiration_time=now + 50))
        loop.run_until_complete(asyncio.sleep(0.1))  # wait for cache to refresh

        for node in [that_guy, me]:
            value, time = loop.run_until_complete(node.get(upper_key))
            assert isinstance(value, dict) and time == now + 50, (value, time)
            assert value[subkey1] == (123, now + 10)
            assert value[subkey2] == (567, now + 30)
            assert value[subkey3] == (890, now + 50)
            assert len(value) == 3

        test_success.set()
示例#55
0
print(heapsort([1, 3, 5, 7, 9, 2, 4, 6, 8, 0]))

# initializing list
li = [5, 7, 9, 1, 3]
# li = [[1, 2], [1, 4], [1, 6], [7, 2], [7, 4], [7, 6], [11, 2], [11, 4], [11, 6]]
# li = [1, 3, 5, 7, 9, 2, 4, 6, 8, 0]
# li = [5, 7, 9, 4, 3]

# using heapify to convert list into heap
print('heapify')
heapq.heapify(li)
print(li)
print()
# print(heapq.heapify(li)) # doesnt work for some reason

# initializing list
li1 = [6, 7, 9, 4, 3, 5, 8, 10, 1]

# using heapify() to convert list into heap
heapq.heapify(li1)
print(li1)
# using nlargest to print 3 largest numbers
# prints 10, 9 and 8
print("The 3 largest numbers in list are : ", end="")
print(heapq.nlargest(3, li1))

# using nsmallest to print 3 smallest numbers
# prints 1, 3 and 4
print("The 3 smallest numbers in list are : ", end="")
print(heapq.nsmallest(3, li1))
print(heapq.nsmallest(3, li1)[-1])
示例#56
0
    'shares': 200,
    'price': 21.09
}, {
    'name': 'HPQ',
    'shares': 35,
    'price': 31.75
}, {
    'name': 'YHOO',
    'shares': 45,
    'price': 16.35
}, {
    'name': 'ACME',
    'shares': 75,
    'price': 115.65
}]
cheap = heapq.nsmallest(1, portfolio, key=lambda s: s['price'])
print(cheap)
'''
[{'name': 'YHOO', 'shares': 45, 'price': 16.35}]
'''


# method 3: use while to push min element
def heapilize_list(x):
    n = len(x)
    # 获取存在子节点的节点 index 列表,并对每个节点单元进行最小堆处理
    for i in reversed(range(n // 2)):
        raiseup_node(x, i)


def put_down_node(heap, startpos, pos):
示例#57
0
# heapq_extremes.py

import heapq
from heapq_heapdata import data

print('all       :', data)
print('3 largest :', heapq.nlargest(3, data))
print('from sort :', list(reversed(sorted(data)[-3:])))
print('3 smallest:', heapq.nsmallest(3, data))
print('from sort :', sorted(data)[:3])
示例#58
0
import heapq

n, c, f = input().split()
n, c, f = int(n), int(c), int(f)
count = int((n-1)/2)

grade, mo = [], []
for i in range(c):
    g, m = input().split()
    g, m = int(g), int(m)
    grade.append(g)
    mo.append(m)
    
dic = dict(zip(grade,mo))
sodi = sorted(dic.items(), key=lambda d:d[0], reverse = False)
nemo = []
for i in sodi:
    nemo.append(i[1])
print(nemo)

ans = 0
for i in range(count, len(sodi)-count):
    left = sum(heapq.nsmallest(count, nemo[:i]))
    right = sum(heapq.nsmallest(count, nemo[i+1:]))
    if left + right + nemo[i] <= f:
        ans = max(ans, sodi[i][0])
    
print(ans,end='')
示例#59
0
    def simulate():
        ham = {}
        hamSum = 0
        bitC = 0
        bitW = 0
        for p in range(8):
            keyset = format(p,"03b")
            
            beforeS = []
            beforeC = []
            before = []
        #print('A  B  Cin  SumOut  Cout')
            for i in range(8):
                K1.next = 0
                K2.next = 0
                K3.next = 0
                set = format(i,"03b")
                A.next = int(set[0])
                B.next = int(set[1])
                C_in.next = int(set[2])
                yield delay(10)
                #print ('{}  {}  {}    {}        {} BEFORE'.format(bin(A,1),bin(B,1),bin(C_in,1),bin(Sum_out,1),bin(C_out,1)))
                beforeS.append(int(bin(Sum_out)))
                beforeC.append(int(bin(C_out)))

            before = beforeS + beforeC
            print('Before:')
            print(beforeS)
            print(beforeC)
            print(before)

            K1.next = int(keyset[0])
            K2.next = int(keyset[1])
            K3.next = int(keyset[2])
            afterS = []
            afterC = []
            after = []

            for i in range(8):
                set = format(i,"03b")
                A.next = int(set[0])
                B.next = int(set[1])
                C_in.next = int(set[2])
                yield delay(10)
                afterS.append(int(bin(Sum_out)))
                afterC.append(int(bin(C_out)))
            
            after = afterS + afterC
            print('After:')
            print(afterS)
            print(afterC)
            print(after)

            t = []

            for i in range(len(before)):
                bitC += 1
                if before[i] != after[i]:
                    bitW += 1
                    t.append(before[i])

            hamming = len(t)/len(before)
            print('Key1: {} Key2: {} Key3: {}'.format(keyset[0],keyset[1],keyset[2]))
            print('Hamming = {}'.format(hamming))
            ham[hamming] = p
            hamSum += hamming

        print('Hamming List:')
        print(ham)

        smallest = nsmallest(1, ham, key=lambda x: abs(x-0.5))
        key = format(ham[smallest[0]],"03b")
        print('Best Key Combination: {}'.format(key))
        print('Hamming Distance: {}'.format(smallest))
        print('Hamming Average = {}'.format(hamSum/7))
        print('Wrong {} / {}'.format(bitW,bitC))
示例#60
0
 def second_highest_point(self):
     return nsmallest(2, self.points, key=lambda x: x[1])[-1]