def word_distance(w1, w2, wv): """ # word_distance: spectral word distance # # Input: # w1 - a word (string) # w2 - a word (string) # wv - a dictionary with keys words and values embedding vectors # # Output: # dist - a floating point number in the range (0, 1) # # Instructions: # You should use the cosine function (imported at the beginning of this file as # cs), to compute cosine angle similarity between the two words. """ dist = 0.0 # ----- # YOUR CODE GOES HERE v1 = wv[w1] v2 = wv[w2] dist = cs(v1, v2) # ----- return dist
def calc_distances(dic1,dic2,task,topks,distances,GT_samples): logger = logging.getLogger("my_logger") logger.info('TASK: %s',task) maps =[] for dis in distances: logger.info('distance: %s',dis) tops_val = [] for topk in topks: logger.info('K: %s',topk) mAP = 0 order_of_samples1 = sorted(dic1.keys()) order_of_samples2 = sorted(dic2.keys()) for sample in order_of_samples1: score_samples = [] reps1 = dic1[sample] for given_sample in order_of_samples2: if sample == given_sample: pass else: reps2 = dic2[given_sample] if dis == 'cos': given_score = 1 - cs(reps1, reps2) elif dis == 'sp': given_score = sp.entropy(reps1, reps2) elif dis == 'corr': given_score = pearsonr(reps1,reps2)[0] #1-->correlated, 0-->no correlated else: given_score = eu(reps1, reps2) score_samples.append((given_sample, given_score)) if dis == 'corr': sorted_scores = sorted(score_samples, key=lambda x:x[1],reverse=True) #decreasing else: sorted_scores = sorted(score_samples, key=lambda x:x[1],reverse=False) #increasing mAP = mAP + get_AP(sorted_scores, sample, GT_samples, top_k=topk) tops_val.append(mAP/(len(dic1.keys())*1.0)) logger.info('mAP %s %s',task, str(tops_val[-1])) maps.append(tops_val) maps = pd.DataFrame(maps) maps.columns = topks maps['distances'] = distances maps['task'] = task return maps