def dist_mean_cosine(self, to_log): """ Mean-cosine model selection criterion. """ mean_cosines = [] # get normalized embeddings for l1, l2 in itertools.permutations(self.params.langs, 2): logger.info('compute mean cosine languages: {},{}'.format(l1, l2)) # map embeddings to shared space src_emb = apply_mapping(self.mapping[l1], self.embs[l1].weight).data tgt_emb = apply_mapping(self.mapping[l2], self.embs[l2].weight).data # normalize mapped embeddings src_emb = src_emb / src_emb.norm(2, 1, keepdim=True).expand_as(src_emb) tgt_emb = tgt_emb / tgt_emb.norm(2, 1, keepdim=True).expand_as(tgt_emb) # build dictionary #for dico_method in ['nn', 'csls_knn_10']: for dico_method in ['csls_knn_10']: dico_build = 'S2T' dico_max_size = 10000 # temp params / dictionary generation _params = deepcopy(self.params) _params.dico_method = dico_method _params.dico_build = dico_build _params.dico_threshold = 0 _params.dico_max_rank = 10000 _params.dico_min_size = 0 _params.dico_max_size = dico_max_size s2t_candidates = get_candidates(src_emb, tgt_emb, _params) t2s_candidates = get_candidates(tgt_emb, src_emb, _params) dico = build_pairwise_dictionary(src_emb, tgt_emb, _params, s2t_candidates, t2s_candidates, True) # mean cosine if dico is None: mean_cosine = -1e9 else: mean_cosine = ( src_emb[dico[:dico_max_size, 0]] * tgt_emb[dico[:dico_max_size, 1]]).sum(1).mean() mean_cosine = mean_cosine.item() logger.info( "Mean cosine (%s method, %s build, %i max size): %.5f" % (dico_method, _params.dico_build, dico_max_size, mean_cosine)) to_log['mean_cosine-%s-%s-%i_%s_%s' % (dico_method, _params.dico_build, dico_max_size, l1, l2)] = mean_cosine mean_cosines.append(mean_cosine) to_log['mean_cosine-%s-%s-%i' % (dico_method, _params.dico_build, dico_max_size)] = np.mean(mean_cosines)
def dist_mean_cosine(self, to_log): """ Mean-cosine model selection criterion. """ # all pair refine mean_cosines = [] for i, src_lang in enumerate(self.params.src_langs): # mapped word embeddings src_emb = apply_mapping(self.mappings[src_lang], self.embs[src_lang].weight) src_emb = src_emb / src_emb.norm(2, 1, keepdim=True).expand_as(src_emb) for j in range(i + 1, len(self.params.all_langs)): tgt_lang = self.params.all_langs[j] tgt_emb = apply_mapping(self.mappings[tgt_lang], self.embs[tgt_lang].weight) tgt_emb = tgt_emb / tgt_emb.norm( 2, 1, keepdim=True).expand_as(tgt_emb) # build dictionary # for dico_method in ['nn', 'csls_knn_10']: for dico_method in ['csls_knn_10']: dico_build = 'S2T' dico_max_size = 10000 # temp params / dictionary generation _params = deepcopy(self.params) _params.dico_method = dico_method _params.dico_build = dico_build _params.dico_threshold = 0 _params.dico_max_rank = 10000 _params.dico_min_size = 0 _params.dico_max_size = dico_max_size s2t_candidates = get_candidates(src_emb, tgt_emb, _params) t2s_candidates = get_candidates(tgt_emb, src_emb, _params) dico = build_dictionary(src_emb, tgt_emb, _params, s2t_candidates, t2s_candidates) # mean cosine if dico is None: mean_cosine = -1e9 else: mean_cosine = ( src_emb[dico[:dico_max_size, 0]] * tgt_emb[dico[:dico_max_size, 1]]).sum(1).mean() mean_cosine = mean_cosine.item() if isinstance( mean_cosine, torch_tensor) else mean_cosine logger.info( "%s-%s: Mean cosine (%s method, %s build, %i max size): %.5f" % (src_lang, tgt_lang, dico_method, _params.dico_build, dico_max_size, mean_cosine)) to_log['%s-%s-mean_cosine-%s-%s-%i' % (src_lang, tgt_lang, dico_method, _params.dico_build, dico_max_size)] = mean_cosine mean_cosines.append(mean_cosine) # average cosine across lang pairs to_log['mean_cosine-%s-%s-%i' % (dico_method, _params.dico_build, dico_max_size)] = np.mean( list(mean_cosines))
def monolingual_wordanalogy(self, to_log): """ Evaluation on monolingual word analogy. """ analogy_monolingual_scores = {} for lang in self.params.all_langs: analogy_scores = get_wordanalogy_scores( lang, self.vocabs[lang].word2id, apply_mapping(self.mappings[lang], self.embs[lang].weight.detach()).cpu().numpy()) if analogy_scores is None: continue analogy_monolingual_scores[lang] = np.mean( list(analogy_scores.values())) logger.info("Monolingual %s word analogy score average: %.5f" % (lang, analogy_monolingual_scores)) to_log[ f'{lang}_analogy_monolingual_scores'] = analogy_monolingual_scores[ lang] if len(analogy_monolingual_scores) == 0: return avg_analogy_monolingual_score = sum(analogy_monolingual_scores.values( )) / len(analogy_monolingual_scores) logger.info("Monolingual word analogy score average: %.5f" % avg_analogy_monolingual_score) to_log['analogy_monolingual_scores'] = avg_analogy_monolingual_score
def monolingual_wordsim(self, to_log): """ Evaluation on monolingual word similarity. """ ws_monolingual_scores = {} for lang in self.params.all_langs: ws_scores = get_wordsim_scores( lang, self.vocabs[lang].word2id, apply_mapping(self.mappings[lang], self.embs[lang].weight.detach()).cpu().numpy()) if ws_scores is None: continue ws_monolingual_scores[lang] = np.mean(list(ws_scores.values())) logger.info("Monolingual %s word similarity score average: %.5f" % (lang, ws_monolingual_scores[lang])) to_log[f'{lang}_ws_monolingual_scores'] = ws_monolingual_scores[ lang] to_log.update({f'{lang}_{k}': v for k, v in ws_scores.items()}) if len(ws_monolingual_scores) == 0: return avg_ws_monolingual_score = sum( ws_monolingual_scores.values()) / len(ws_monolingual_scores) logger.info("Monolingual word similarity score average: %.5f" % avg_ws_monolingual_score) to_log['ws_monolingual_scores'] = avg_ws_monolingual_score
def word_translation(self, to_log): """ Evaluation on word translation. """ # mapped word embeddings from itertools import permutations for l1, l2 in permutations(self.params.langs, 2): torch.cuda.empty_cache() path = get_dict_path(self.params.dico_eval, self.params.dicts_eval_path, l1, l2) if not os.path.exists(path): logger.info( 'Warning: Test dictionary for {}-{} not exists. Skipping this pair' .format(l1, l2)) continue src_emb = apply_mapping(self.mapping[l1], self.embs[l1].weight).data src_emb = src_emb.cuda() if self.params.cuda else src_emb.cpu() tgt_emb = apply_mapping(self.mapping[l2], self.embs[l2].weight).data src_emb = src_emb.cuda() if self.params.cuda else src_emb.cpu() for method in ['nn', 'csls_knn_10']: results = get_word_translation_accuracy( l1, self.lang_dico[l1].word2id, src_emb, #.cuda(), l2, self.lang_dico[l2].word2id, tgt_emb, #.cuda(), method=method, dico_eval=self.params.dico_eval, dicts_eval_path=self.params.dicts_eval_path) to_log.update([('%s-%s_%s-%s' % (k, method, l1, l2), v) for k, v in results])
def word_translation(self): """ Evaluation on word translation. """ # mapped word embeddings all_emb = { l2: apply_mapping(self.mapping[l2], self.embs[l2].weight).data.cpu() for l2 in self.params.langs } results = defaultdict(dict) # for computational efficiency, iterate over source languages and calculate all methods for each one for src_lang in self.params.langs: logger.info('\n\n\n\nSource Language: {}\n\n\n\n'.format(src_lang)) torch.cuda.empty_cache() # get source queries paths = self.get_dico_paths(src_lang) query_ids = self.aggregate_query_ids(paths, src_lang) if query_ids is None: logger.info( 'Warning: No test dictionary was found for source language {}. Skipping!' .format(src_lang)) continue method = 'csls_knn_10' # init translation init_trans, top_scores = BI_translation(src_lang, query_ids, method, all_emb, cuda=self.params.cuda) for inf_met in self.params.multilingual_inference_method: logger.info('\n\nMultilingual inference method: {}\n\n'.format( inf_met)) # improve source word representation, and re-translate if inf_met != 'BI': updated_trans, used_langs = update_translation_for_all_langs( self.params.langs, src_lang, query_ids, all_emb, init_trans, method, inf_met, top_scores, cuda=self.params.cuda) else: used_langs = None updated_trans = init_trans # re-arrange translations for convenience translation_by_src_id, used_langs_by_src_id = self.translation_by_src_id( updated_trans, used_langs, src_lang) # calcualte accuracy, and matching per source word for tgt_lang, path in paths.items(): pair_result = self.get_pair_accuracy( path, src_lang, self.lang_dico[src_lang].word2id, tgt_lang, self.lang_dico[tgt_lang].word2id, translation_by_src_id, method) if inf_met != 'BI': self.print_aux_statistics(src_lang, tgt_lang, path, used_langs_by_src_id) results[(src_lang, tgt_lang)][inf_met] = pair_result save_results(self.params, results, self.params.multilingual_inference_method)
def crosslingual_wordsim(self, to_log, src_lang=None, tgt_lang=None): """ Evaluation on cross-lingual word similarity. If src_lang and tgt_lang are not specified, evaluate all src_langs to tgt_lang """ # evaluate all src langs to tgt_lang by default if src_lang is None and tgt_lang is None: ws_crosslingual_scores = [] tgt_lang = self.params.tgt_lang tgt_emb = self.embs[tgt_lang].weight.detach().cpu().numpy() for src_lang in self.params.src_langs: src_emb = apply_mapping( self.mappings[src_lang], self.embs[src_lang].weight.detach()).cpu().numpy() # cross-lingual wordsim evaluation ws_scores = get_crosslingual_wordsim_scores( src_lang, self.vocabs[src_lang].word2id, src_emb, tgt_lang, self.vocabs[tgt_lang].word2id, tgt_emb, ignore_oov=self.params.semeval_ignore_oov) if ws_scores is None: continue ws_crosslingual_score = np.mean(list(ws_scores.values())) ws_crosslingual_scores.append(ws_crosslingual_score) logger.info("%s-%s cross-lingual word similarity score: %.5f" % (src_lang, tgt_lang, ws_crosslingual_score)) to_log[ f'{src_lang}_{tgt_lang}_ws_crosslingual_scores'] = ws_crosslingual_score to_log.update({ f'{src_lang}_{tgt_lang}_{k}': v for k, v in ws_scores.items() }) avg_ws_crosslingual_score = np.mean(ws_crosslingual_scores) logger.info("Cross-lingual word similarity score average: %.5f" % avg_ws_crosslingual_score) to_log['ws_crosslingual_scores'] = avg_ws_crosslingual_score else: # only evaluate src_lang to tgt_lang; bridge as necessary assert src_lang is not None and tgt_lang is not None # encode src src_emb = apply_mapping(self.mappings[src_lang], self.embs[src_lang].weight).cpu().numpy() # encode tgt tgt_emb = apply_mapping(self.mappings[tgt_lang], self.embs[tgt_lang].weight).cpu().numpy() # cross-lingual wordsim evaluation ws_scores = get_crosslingual_wordsim_scores( src_lang, self.vocabs[src_lang].word2id, src_emb, tgt_lang, self.vocabs[tgt_lang].word2id, tgt_emb, ) if ws_scores is None: return ws_crosslingual_score = np.mean(list(ws_scores.values())) logger.info("%s-%s cross-lingual word similarity score: %.5f" % (src_lang, tgt_lang, ws_crosslingual_score)) to_log[ f'{src_lang}_{tgt_lang}_ws_crosslingual_scores'] = ws_crosslingual_score to_log.update({ f'{src_lang}_{tgt_lang}_{k}': v for k, v in ws_scores.items() })
def sent_translation(self, to_log, src_lang=None, tgt_lang=None): """ Evaluation on sentence translation. If src_lang and tgt_lang are not specified, evaluate all src_langs to tgt_lang Only available on Europarl, for en - {de, es, fr, it} language pairs. """ # parameters n_keys = 200000 n_queries = 2000 n_idf = 300000 # load europarl data if not hasattr(self, 'europarl_data'): self.europarl_data = {} # evaluate all src langs to tgt_lang by default if src_lang is None and tgt_lang is None: tgt_lang = self.params.tgt_lang for src_lang in self.params.src_langs: lang_pair = (src_lang, tgt_lang) # load europarl data if lang_pair not in self.europarl_data: self.europarl_data[lang_pair] = load_europarl_data( src_lang, tgt_lang, n_max=(n_keys + 2 * n_idf)) # if no Europarl data for this language pair if not self.europarl_data or lang_pair not in self.europarl_data \ or self.europarl_data[lang_pair] is None: logger.info( f'Europarl data not found for {src_lang}-{tgt_lang}.') continue # mapped word embeddings src_emb = apply_mapping(self.mappings[src_lang], self.embs[src_lang].weight) tgt_emb = self.embs[tgt_lang].weight # get idf weights idf = get_idf(self.europarl_data[lang_pair], src_lang, tgt_lang, n_idf=n_idf) for method in ['nn', 'csls_knn_10']: # source <- target sentence translation results = get_sent_translation_accuracy( self.europarl_data[lang_pair], src_lang, self.vocabs[src_lang].word2id, src_emb, tgt_lang, self.vocabs[tgt_lang].word2id, tgt_emb, n_keys=n_keys, n_queries=n_queries, method=method, idf=idf) to_log.update([ ('%s_to_%s_%s-%s' % (tgt_lang, src_lang, k, method), v) for k, v in results ]) # target <- source sentence translation results = get_sent_translation_accuracy( self.europarl_data[lang_pair], tgt_lang, self.vocabs[tgt_lang].word2id, tgt_emb, src_lang, self.vocabs[src_lang].word2id, src_emb, n_keys=n_keys, n_queries=n_queries, method=method, idf=idf) to_log.update([ ('%s_to_%s_%s-%s' % (src_lang, tgt_lang, k, method), v) for k, v in results ]) else: # only evaluate src_lang to tgt_lang; bridge as necessary assert src_lang is not None and tgt_lang is not None lang_pair = (src_lang, tgt_lang) # load europarl data if lang_pair not in self.europarl_data: self.europarl_data[lang_pair] = load_europarl_data( src_lang, tgt_lang, n_max=(n_keys + 2 * n_idf)) # if no Europarl data for this language pair if not self.europarl_data or lang_pair not in self.europarl_data \ or self.europarl_data[lang_pair] is None: logger.info( f'Europarl data not found for {src_lang}-{tgt_lang}.') return # encode src src_emb = apply_mapping(self.mappings[src_lang], self.embs[src_lang].weight) # encode tgt tgt_emb = apply_mapping(self.mappings[tgt_lang], self.embs[tgt_lang].weight) # get idf weights idf = get_idf(self.europarl_data[lang_pair], src_lang, tgt_lang, n_idf=n_idf) for method in ['nn', 'csls_knn_10']: # source <- target sentence translation results = get_sent_translation_accuracy( self.europarl_data[lang_pair], src_lang, self.vocabs[src_lang].word2id, src_emb, tgt_lang, self.vocabs[tgt_lang].word2id, tgt_emb, n_keys=n_keys, n_queries=n_queries, method=method, idf=idf) to_log.update([ ('%s_to_%s_%s-%s' % (tgt_lang, src_lang, k, method), v) for k, v in results ]) # target <- source sentence translation results = get_sent_translation_accuracy( self.europarl_data[lang_pair], tgt_lang, self.vocabs[tgt_lang].word2id, tgt_emb, src_lang, self.vocabs[src_lang].word2id, src_emb, n_keys=n_keys, n_queries=n_queries, method=method, idf=idf) to_log.update([ ('%s_to_%s_%s-%s' % (src_lang, tgt_lang, k, method), v) for k, v in results ])
def word_translation(self, to_log, src_lang=None, tgt_lang=None): """ Evaluation on word translation. If src_lang and tgt_lang are not specified, evaluate all src_langs to tgt_lang """ # evaluate all src langs to tgt_lang by default if src_lang is None and tgt_lang is None: wt_precisions = [] tgt_lang = self.params.tgt_lang tgt_emb = self.embs[tgt_lang].weight.detach() for src_lang in self.params.src_langs: # mapped word embeddings src_emb = apply_mapping(self.mappings[src_lang], self.embs[src_lang].weight.detach()) for method in ['nn', 'csls_knn_10']: results = get_word_translation_accuracy( src_lang, self.vocabs[src_lang].word2id, src_emb, tgt_lang, self.vocabs[tgt_lang].word2id, tgt_emb, method=method, dico_eval=self.params.dico_eval) if results is None: continue to_log.update([ ('%s-%s_%s-%s' % (src_lang, tgt_lang, k, method), v) for k, v in results ]) if method == 'csls_knn_10': for k, v in results: if k == 'precision_at_1': wt_precisions.append(v) to_log['precision_at_1-csls_knn_10'] = np.mean(wt_precisions) logger.info("word translation precision@1: %.5f" % (np.mean(wt_precisions))) else: # only evaluate src_lang to tgt_lang; bridge as necessary assert src_lang is not None and tgt_lang is not None # encode src src_emb = apply_mapping(self.mappings[src_lang], self.embs[src_lang].weight).cpu() # encode tgt tgt_emb = apply_mapping(self.mappings[tgt_lang], self.embs[tgt_lang].weight).cpu() for method in ['nn', 'csls_knn_10']: results = get_word_translation_accuracy( src_lang, self.vocabs[src_lang].word2id, src_emb, tgt_lang, self.vocabs[tgt_lang].word2id, tgt_emb, method=method, dico_eval=self.params.dico_eval) if results is None: continue to_log.update([ ('%s-%s_%s-%s' % (src_lang, tgt_lang, k, method), v) for k, v in results ])