def gen_wn_lemmas(pos): """Generates POS-tag, synset ID, lemma. That is all the info that we currently want from WordNet. """ for synset in wn.all_synsets(pos=POS_TO_WN.get(pos)): for lemma in synset.lemmas(): yield pos, synset.name, lemma.name
def gen_wn_lemmas(pos=wn.NOUN): """Generates POS-tag, synset ID, lemma. That is all the info that we currently want from WordNet. TODO: get ILI links too to be able to make bilingual lexicons. """ for synset in wn.all_synsets(pos=pos): for lemma in synset.lemmas(): yield pos, synset.name, lemma.name
if is_tag_allowed( word_analysis ) and word not in basic and word not in foreign_words and has_relations( synsets): foreign_words[word] = None print("Koostan töödeldud võõrsõnade loendit") with open("foreign_processed.txt", "w", encoding="UTF-8") as file: for word in foreign_words: if len(word.split(" ")) <= 1: if foreign_words[word] is None: file.write(word + "\n") else: file.write(word + " " + foreign_words[word] + "\n") foreign_words.clear() print("Võõrsõnade loend töödeldud") if __name__ == '__main__': # Hoiatus! # Kogu töötlemine võib võtta paar tundi. print("Laadin kõik sünohulgad mällu...") wn.all_synsets() print("Laaditud") process_frequency_list() process_basic_list() process_foreign_list()
import sys, os _MY_DIR = os.path.dirname(__file__) sys.path.insert(1,os.path.join(_MY_DIR,'..','..')) from estnltk.wordnet import wn FILE_ = os.path.join(_MY_DIR,'..','max_tax_depths.cnf') max_depths = {} for pos in 'bavn': all_synsets = wn.all_synsets(pos) n = len(all_synsets) max_depth = -1 for i in range(n): depth = all_synsets[i]._min_depth() max_depth = depth if depth > max_depth else max_depth max_depths[pos] = max_depth with open(FILE_,'w') as fout: for pos in max_depths: fout.write("%s:%d\n"%(pos,max_depths[pos]))
from estnltk.wordnet import wn from pprint import pprint print(len(wn.all_synsets())) pprint(wn.synsets("koer", pos=wn.VERB)) pprint(wn.synsets('koer')) synset = wn.synset("king.n.01") pprint(synset.name) pprint(synset.pos) pprint(synset.definition()) pprint(synset.examples()) pprint(synset.hypernyms()) pprint(synset.hyponyms()) pprint(synset.meronyms()) pprint(synset.holonyms()) pprint(synset.get_related_synsets('fuzzynym')) target_synset = wn.synset('kinnas.n.01') pprint(synset.path_similarity(target_synset)) pprint(synset.lch_similarity(target_synset)) pprint(synset.wup_similarity(target_synset)) pprint(synset.lowest_common_hypernyms(target_synset))
from estnltk.wordnet import wn from pprint import pprint print (len(wn.all_synsets())) pprint(wn.synsets("koer",pos=wn.VERB)) pprint(wn.synsets('koer')) synset = wn.synset("king.n.01") pprint(synset.name) pprint(synset.pos) pprint(synset.definition()) pprint(synset.examples()) pprint(synset.hypernyms()) pprint(synset.hyponyms()) pprint(synset.meronyms()) pprint(synset.holonyms()) pprint(synset.get_related_synsets('fuzzynym')) target_synset = wn.synset('kinnas.n.01') pprint(synset.path_similarity(target_synset)) pprint(synset.lch_similarity(target_synset)) pprint(synset.wup_similarity(target_synset)) pprint(synset.lowest_common_hypernyms(target_synset))