def main(): ja_dic = FastVector(vector_file='../vecmap/data/wiki.ja.vec') en_dic = FastVector(vector_file='../vecmap/data/wiki.en.vec') print("loaded the dictionaries") ja_dic.apply_transform('alignment_matrices/ja.txt') en_dic.apply_transform('alignment_matrices/en.txt') print("transformed the dictionaries") idx = 0 result = {} result_f = open("en_ja_multifast.txt", "w") en_word_list = list(en_dic.word2id.keys()) print("The total length of English pretrained vector : " + str(len(en_word_list))) for en_word in tqdm(en_word_list): ja_words = ja_dic.translate_k_nearest_neighbour(en_dic[en_word], k=15) result[en_word] = ja_words idx += 1 result[en_word] = ja_words resut_str = ",".join(result[en_word]) result_f.write(str(idx) + "," + en_word + "," + resut_str + "\n") if idx > 5000: exit() result_f.close()
print("loaded the dictionaries") ja_dic.apply_transform('alignment_matrices/ja.txt') en_dic.apply_transform('alignment_matrices/en.txt') print("transformed the dictionaries") en_word_list = [ "cat", "dog", "apple", "car", "train", "school", "student", "teacher" ] ja_word_list = ["猫", "犬", "りんご", "車", "電車", "学校", "生徒", "先生"] result_f = open("multi_fast.txt", "w") result = {} # Ja_word_list 10 nearest neighbor for ja_word in ja_word_list: en_words = en_dic.translate_k_nearest_neighbour(ja_dic[ja_word], k=20) result[ja_word] = en_words resut_str = ",".join(result[ja_word]) result_f.write(ja_word + "," + resut_str + "\n") # En_word_list 10 nearest neighbor for en_word in en_word_list: ja_words = ja_dic.translate_k_nearest_neighbour(en_dic[en_word], k=20) result[en_word] = ja_words resut_str = ",".join(result[en_word]) result_f.write(en_word + "," + resut_str + "\n") result_f.close() # # text = json.dump(result, open("result.json", "w"), # ensure_ascii=False, indent=2)