def get_topics(self, word): self.candidates = [] with WordLoader() as word_loader, SenseLoader() as sense_loader: words = word_loader.load_words_with_lemma(word) if not words: return for sense in sense_loader.load_senses_with_synset(words[0]): self.find_synlinks_recursively(sense, 'hype') return list(set(self.candidates)) # uniq
def find_synlinks_recursively(self, senses, link, lang='jpn', _depth=0): if senses is None: return with SynlinkLoader() as synlink_loader, WordLoader( ) as word_loader, SenseLoader() as sense_loader: word = word_loader.load_word_with_wordid(senses.wordid).lemma synlink = synlink_loader.load_synlinks_with_sense_and_link( senses, link) print(f"{word} ", end='') if (word in self.categories): self.candidates.append(word) if not synlink: return sense = sense_loader.load_sense_with_synset(synlink[0].synset2) self.find_synlinks_recursively(sense, link)
def get_hypos(self, word): self.hypos = [] with SynlinkLoader() as synlink_loader, WordLoader( ) as word_loader, SenseLoader() as sense_loader: words = word_loader.load_words_with_lemma(word) if not words: return for sense in sense_loader.load_senses_with_synset(words[0]): for synlink in synlink_loader.load_synlinks_with_sense_and_link( sense, 'hypo'): hypo = sense_loader.load_sense_with_synset(synlink.synset2) if hypo: self.hypos.append( word_loader.load_word_with_wordid( hypo.wordid).lemma) return self.hypos # uniq
def get_senses(word_to_check, synset_data): if word_to_check in GET_SENSE_CALCULATED: return GET_SENSE_CALCULATED[word_to_check] words = WordLoader().load_words_with_lemma(word_to_check) senses = [] senses_vecs = [] for word in words: #print(word) senses_this_ittr = SenseLoader().load_senses_with_synset(word) #senses_this_ittr = SenseLoader().load_senses_with_synset_plus(word) senses += senses_this_ittr senses_vecs += get_vecs_from_senses(synset_data, senses_this_ittr) #print(senses) ''' if len(senses) > 200: print("This word: ", word_to_check) GET_SENSE_CALCULATED[word_to_check] = ([],[]) return ([],[]) ''' GET_SENSE_CALCULATED[word_to_check] = (senses, senses_vecs) return (senses, senses_vecs)
def print_synlinks_recursively(senses, link, lang='jpn', _depth=0): for sense in senses: with SynlinkLoader() as synlink_loader: synlinks = synlink_loader.load_synlinks_with_sense_and_link( sense, link) if synlinks: with WordLoader() as word_loader: with SynsetLoader() as synset_loader: print(''.join([ ' ' * 2 * _depth, word_loader.load_word_with_wordid(sense.wordid).lemma, ' ', synset_loader.load_synset_with_synset(sense.synset).name])) _senses = [] for synLink in synlinks: with SenseLoader() as sense_loader: sense = sense_loader.load_sense_with_synset( synLink.synset2, lang) if sense: _senses.append(sense) print_synlinks_recursively(_senses, link, lang, _depth + 1)
for synLink in synlinks: with SenseLoader() as sense_loader: sense = sense_loader.load_sense_with_synset( synLink.synset2, lang) if sense: _senses.append(sense) print_synlinks_recursively(_senses, link, lang, _depth + 1) if __name__ == '__main__': if len(sys.argv) >= 3: with WordLoader() as word_loader: words = word_loader.load_words_with_lemma(sys.argv[1]) if words: with SenseLoader() as sense_loader: senses = sense_loader.load_senses_with_synset(words[0]) if len(sys.argv) >= 3: link = sys.argv[2] else: link = 'hypo' if len(sys.argv) == 4: lang = sys.argv[3] else: lang = 'jpn' print_synlinks_recursively(senses, link, lang) sys.exit()
if count == 0: return total return total / count if __name__ == '__main__': word2vec_dic = load_word2vec_dic() synset_data = load_synset_data() total = 0 correct = 0 for wordid in range(155288, 249121): if wordid % 1000 == 0: print(wordid) word = WordLoader().load_word_with_wordid(wordid) lemma_senses = SenseLoader().load_senses_with_synset(word) lemma_senses_vecs = get_vecs_from_senses(synset_data, lemma_senses) chosen_sense_index = Random().randint(0, len(lemma_senses) - 1) chosen_synset_id = lemma_senses[chosen_sense_index].synset synset = SynsetLoader().load_synset_with_synset(chosen_synset_id) similar_synset_ids = get_similar_synset_ids(synset) similar_synset_avg_vector = average_synset_vectors( similar_synset_ids, synset_data) sense_ranking = get_closest_sense(similar_synset_avg_vector, lemma_senses, lemma_senses_vecs, euclidean) if sense_ranking[0][0].synset == chosen_synset_id: #correct correct += 1