def derivational_forms(first, second): #Checks if there are any derivationally related forms of the word lemmas that match f_syns = wn.synsets(first) s_syns = wn.synsets(second) #print s_syns try: for i in f_syns: sub_i = str(i)[8:-2] curr_lemma = wn.lemma(sub_i + "." + sub_i[0:sub_i.index(".")]) derived_forms = curr_lemma.derivationally_related_forms() for derived in derived_forms: rep1 = str(derived)[7:str(derived).index(".")] index = str(derived).index(rep1) + len(rep1) + 6 rep2 = str(derived)[index:-2] if rep1 == second: return 1 elif rep1 != rep2: if rep2 == second: return 1 for s in s_syns: s_str = str(s)[8:str(s).index(".")] sub_s = str(s)[8:-2] if rep1 == s_str: return 1.5 elif rep1 != rep2: if rep2 == s_str: return 1.5 s_curr_lemma = wn.lemma(sub_s + "." + s_str) s_derived_forms = s_curr_lemma.derivationally_related_forms( ) for match in s_derived_forms: match_rep1 = str(derived)[7:str(derived).index(".")] match_index = str(derived).index(rep1) + len(rep1) + 6 match_rep2 = str(derived)[match_index:-2] if match_rep1 == first: return 2 elif match_rep1 != match_rep2: if match_rep2 == first: return 2 if match in derived_forms or match_rep1 == rep1 or match_rep1 == rep2: return 1 except: pass return 0
def lemma(self, name, lang='eng'): lemma = wn.lemma(name, lang=lang) # lemma._vector = self._vector(lemma) # lemma._freqs = {} # for t in self._TOPICS: # lemma._freqs[t] = self._lemma_freq(lemma, t) return lemma
def _mk_synset(w): # # (synset form) cat.n.01 into the Synset object form # (lemma form) syndicate.n.01.crime_syndicate # word = w.strip().replace(' ', '_') pat_regular_form = re.compile(r".*[.]\d{2}$") pat_regular_lemma_form = re.compile(r".*[.]\d{2}[.].+$") if pat_regular_form.match(word): try: return wordnet.synset(word) except Exception as ex: try: # try the first for the stem word return wordnet.synsets(word.split('.')[0])[0] except Exception as ex: return None elif pat_regular_lemma_form.match(word): try: return wordnet.lemma(word).synset() except Exception as ex: return None else: print(' * Error, invalid synset name: [{}] skipping'.format(w)) return None
def _mk_synset(w): # # (synset form) cat.n.01 into the Synset object form # (lemma form) syndicate.n.01.crime_syndicate # word = w.strip().replace(" ", "_") if word.count(".") == 2: try: return wordnet.synset(word) except Exception as ex: try: # try the first for the stem word return wordnet.synsets(word.split(".")[0])[0] except Exception as ex: return None elif word.count(".") == 3: try: return wordnet.lemma(word).synset() except Exception as ex: return None else: print(" * Error, invalid synset name", w, "skipping...") return None
def _antonyms(self): try: return wn.lemma("%s.%s.1.%s"%(self.wnbase, self.postag, self.lemma)).antonyms() except: return []
def lemma(name_synsets): """ This function return lemma object given the name. .. note:: Support only English language (*eng*). :param str name_synsets: name of the synset :return: lemma object with the given name :rtype: :class:`Lemma` :Example: >>> from pythainlp.corpus.wordnet import lemma >>> >>> lemma('practice.v.01.exercise') Lemma('practice.v.01.exercise') >>> >>> lemma('drill.v.03.exercise') Lemma('drill.v.03.exercise') >>> >>> lemma('exercise.n.01.exercise') Lemma('exercise.n.01.exercise') """ return wordnet.lemma(name_synsets)
def get_antonym(word): print "Antonym for: " + word if len(word.split()) > 1: word = word.replace(" ", "_") # the slow part wnsynset = wn.synsets(word) print "WYNSET" + str(wnsynset) antonym = None # only getting one antonym for i in wnsynset: for el in i.lemmas(): x = el.antonyms() if len(x) > 0: print "Antonym" antonym = x[0].name() break syn_set = [] if antonym is not None: print "synonyms for antonym " + str(antonym) if len(antonym.split()) > 1: word = antonym.replace(" ", "_") # the slow part wnsynset = wn.synsets(antonym) print "WYNSET" + str(wnsynset) for i in range(0, len(wnsynset)): for lemma in wnsynset[i].lemma_names(): print "LEMMA" print lemma syn_set.append(lemma) deriv = wn.lemma(wnsynset[i].name() + "." + lemma) print "DERIVATIONS" for x in deriv.derivationally_related_forms(): print x.name syn_set.append(x.name()) print "Hyponym function: " for hypo in wnsynset[i].hyponyms(): syn_set.append(re.findall(r"[a-zA-Z]*", hypo.name())[0]) print re.findall(r"[a-zA-Z]*", hypo.name())[0] ''' print "Hypernym function: " for hyper in wnsynset[i].hypernyms(): syn_set.append(re.findall(r"[a-zA-Z]*",hyper.name())[0]) print re.findall(r"[a-zA-Z]*",hyper.name())[0] ''' return syn_set
def patched_lemma_from_key(key, wordnet=wordnet): try: lemma = wordnet.lemma_from_key(key) except WordNetError as e: if key in patching_data: lemma = wordnet.lemma(patching_data[key]) elif '%3' in key: lemma = wordnet.lemma_from_key(key.replace('%3', '%5')) else: raise e return lemma
def _generate_fingerprint(self, lemma): wordnet_lemma = wn.lemma(lemma) wordnet_lemmas = gen_fingerprints.get_related_lemmas(wordnet_lemma) lemmas = [l.synset().name() + "." + l.name() for l in wordnet_lemmas] fp = set() for lemma in lemmas: bits = self._get_bits_for_lemma(lemma) fp.update(bits) return fp
def extractWordsAndSynsets(self, filenameWords, filenameSynsets, filenameLexemes): #file fWords = codecs.open(filenameWords, 'w', 'utf-8') fSynsets = codecs.open(filenameSynsets, 'w', 'utf-8') fLexemes = codecs.open(filenameLexemes, 'w', 'utf-8') wordCounter = 0 wordCounterAll = 0 synsetCounter = 0 synsetCounterAll = 0 lexemCounter = 0 lexemCounterAll = 0 ovv = [] for pos in self.pos_list: for word in wn.all_lemma_names(pos=pos, lang=self.lang): wordCounterAll += 1 self.WordIndex[word] = wordCounterAll fWords.write(word+" ") synsetInWord = 0 for synset in wn.synsets(word, lang=self.lang): lexemCounterAll += 1 synsetId = synset.name() if self.Shared.in_vocab(synsetId): synsetInWord += 1 if synsetId not in self.SynsetIndex: fSynsets.write(synsetId + " " + self.Shared.getVectorAsString(self.Shared.model[synsetId]) + "\n") synsetCounter += 1 self.SynsetIndex[synsetId] = synsetCounter lexemCounter += 1 #lemma name sensekey = wn.lemma(synset.name()+'.'+word).key() fWords.write(sensekey + ",") fLexemes.write(str(self.SynsetIndex[synsetId]) + " " + str(wordCounterAll) + "\n") else: ovv.append(synsetId) fWords.write("\n") if synsetInWord is not 0: wordCounter += 1 else: self.WordIndex[word] = -1 fWords.close() fSynsets.close() fLexemes.close() print(" Words: %d / %d\n" % (wordCounter, wordCounterAll)) print(" Synset: %d / %d\n" % (synsetCounter, synsetCounter + len(ovv))) print(" Lexems: %d / %d\n" % (lexemCounter, lexemCounterAll))
def find_antonym(word, pos_tag): global print_statements tag = pos_tag[0:2] if tag != 'JJ': return word s = str(wn.lemma(word+".a.01."+word).antonyms()) if print_statements: print "Found antonym:", s start = s.find("'") end = s.find(".") result = s[start+1:end] return result
def tokens(sent, palabra): keyword = 0 sent for word in sent: keyword = keyword + 1 if palabra == word: synsent_palabra = wn.synset(palabra + ".n.01") s_palabra = str(synsent_palabra) su_hiponimo = synsent_palabra.hyponyms() if su_hiponimo: print("Estos son sus hiponimos de ", palabra + ":", sorted(su_hiponimo[0:])) else: print("No se encontraron hiponimos") su_hiperonimos = synsent_palabra.hypernyms() if su_hiperonimos: print("Estos son sus hyperonimos", palabra + ":", sorted(su_hiperonimos[0:])) else: print("No se encontraro hipernomios") su_holonimo = synsent_palabra.member_holonyms() if su_holonimo: print("Estos son sus holonimos de ", palabra + ":", su_holonimo) else: print("Para esa palabra no se encontraron holonimos") su_consecutivo_logico = synsent_palabra.entailments() if su_consecutivo_logico: print("Estos son sus consecutivos logico de ", palabra + ":", su_consecutivo_logico) else: print("Para esa plabra no se encontraron consecutivos logicos") antonimo = [ str(lemma.name()) for lemma in synsent_palabra.lemmas() ] el_antonimo = str(palabra + ".n.01." + antonimo[0]) su_antonimo = wn.lemma(el_antonimo).antonyms() print(antonimo) if su_antonimo: print("Estos son sus antonimos de ", palabra + ":", su_antonimo) else: print("Para esa palabra no se encontraron antonimos")
def relations(): wn.synset('tree.n.01').part_meronyms() wn.synset('tree.n.01').substance_meronyms() wn.synset('tree.n.01').member_holonyms() for synset in wn.synsets('mint', wn.NOUN): print synset.name + ':', synset.definition wn.synset('mint.n.04').part_holonyms() wn.synset('mint.n.04').substance_holonyms() wn.synset('walk.v.01').entailments() wn.synset('eat.v.01').entailments() wn.synset('tease.v.03').entailments() wn.lemma('supply.n.02.supply').antonyms() wn.lemma('rush.v.01.rush').antonyms() wn.lemma('horizontal.a.01.horizontal').antonyms() wn.lemma('staccato.r.01.staccato').antonyms()
def antonym_dict(word_list): CSAT_ant_dict = {} for syn in word_list: name_list = [] syns = wn.synsets(syn) names = [s.name() for s in syns] for name in names: if syn in name: name_list.append(name) try: for n in name_list: stem_name = n.split('.')[0] atn = wn.lemma('{}.{}'.format(n, stem_name)).antonyms() if atn is not []: print('{}의 반의어는 {}'.format(stem_name, atn)) CSAT_ant_dict[stem_name] = ant except: pass return CSAT_ant_dict
def wordnet(): wn.synsets('motorcar') wn.synset('car.n.01').lemma_names wn.synset('car.n.01').definition wn.synset('car.n.01').examples wn.synset('car.n.01').lemmas wn.lemma('car.n.01.automobile') wn.lemma('car.n.01.automobile').synset wn.lemma('car.n.01.automobile').name wn.synsets('car') for synset in wn.synsets('car'): print synset.lemma_names wn.lemmas('car')
def playWithWordNet(word): syn = wn.synsets(word) print(syn) syns = wn.synset('strange.a.01').lemma_names() syns2 = wn.synset('strange.s.02').lemma_names() defn = wn.synset('strange.s.02').definition() ex = wn.synset('strange.s.02').examples() lems = wn.synset('strange.s.02').lemmas() # name = wn.lemma('strange.s.02').name() print(syns, defn, ex) for synset in syn: print(synset.lemma_names()) stranges = wn.lemmas('strange') print(stranges) synset1 = wn.synset('strange.s.02') types_of_strange = synset1.hyponyms() supersets_of_strange = synset1.hypernyms() root_hypernyms = synset1.root_hypernyms() paths = synset1.hypernym_paths() path1 = [synset.name() for synset in paths[0]] print(types_of_strange, supersets_of_strange, root_hypernyms, paths, path1) tree = wn.synset('human.n.01') parts = tree.part_meronyms() subst_parts = tree.substance_meronyms() wholes = tree.member_holonyms() print(tree, parts, subst_parts, wholes) entails = wn.synset('walk.v.01').entailments() antys = wn.lemma('rush.v.01.rush').antonyms() specificity = wn.synset('baleen_whale.n.01').min_depth()
def semanticScore(word): pluralizer = inflect.engine() syn_set = [] wnsynset = wn.synsets(word) syn_set_final = [] for i in range(0, len(wnsynset)): for lemma in wnsynset[i].lemma_names(): syn_set.append(lemma) deriv = wn.lemma(wnsynset[i].name() + "." + lemma) for x in deriv.derivationally_related_forms(): syn_set.append(x.name()) #print "Hypernym function: " for hyper in wnsynset[i].hypernyms(): syn_set.append(re.findall(r"[a-zA-Z]*", hyper.name())[0]) #print "Hyponym function: " for hypo in wnsynset[i].hyponyms(): syn_set.append(re.findall(r"[a-zA-Z]*", hypo.name())[0]) # adds plurals and removes dups syn_setnodup = [] for item in syn_set: if item not in syn_setnodup: syn_setnodup.append(item) syn_set_final = [] for item in syn_setnodup: syn_set_final.append(item) syn_set_final.append(pluralizer.plural(item)) return syn_set_final
from nltk.corpus import wordnet as wn def get_antonyms(lemma): antonyms = [ant.name for ant in lemma.antonyms()] antonyms.extend([ant.name for similar in lemma.synset.similar_tos() for lemmas in similar.lemmas for ant in lemmas.antonyms()]) return antonyms if __name__ == "__main__": print(get_antonyms(wn.lemma('alacritous.s.01.alacritous'))) print(get_antonyms(wn.lemma('sluggish.s.01.sluggish'))) print(get_antonyms(wn.lemma('adust.s.01.parched')))
#!/usr/bin/python #=================================================================== # This codelet reads the vocabulary lemmas and verifies that each # is found in NLTK WordNet. Some lemmas in WordNet cannot be looked # up because of parsing errors due to dots (.) in the lemma name. # Copyright 2014, IEEE ENCS Humanoid Robot Project #=================================================================== from nltk.corpus import wordnet as wn with open('vocab_lemmas.txt', 'r') as f: for line in f: try: wn.lemma(line.strip()) # will blow up if line isn't a lemma except: print line.strip()
print() syns = wn.synsets("dog") print(syns) print(wn.synsets('dog', pos=wn.VERB)) # chase: kovalamak print(wn.synset('dog.n.01').definition(), "\n") print(len(wn.synset('dog.n.01').examples())) # 1 print(wn.synset('dog.n.01').examples()[0], "\n") # The dog barked all night print( "lemmas: ", wn.synset('dog.n.01').lemmas() ) #[Lemma('dog.n.01.dog'), Lemma('dog.n.01.domestic_dog'), Lemma('dog.n.01.Canis_familiaris')] [str(lemma.name()) for lemma in wn.synset('dog.n.01').lemmas() ] #['dog', 'domestic_dog', 'Canis_familiaris'] print(wn.lemma('dog.n.01.dog').synset(), "\n") #Synset('dog.n.01') """For example, pigeon, crow, eagle and seagull are all hyponyms of bird (their hypernym); which, in turn, is a hyponym of animal.[3]""" dog = wn.synset('dog.n.01') print( "hypernyms : ", dog.hypernyms()) #[Synset('canine.n.02'), Synset('domestic_animal.n.01')] print( "hyponyms: ", dog.hyponyms() ) # doctest: +ELLIPSIS [Synset('basenji.n.01'), Synset('corgi.n.01'), Synset('cur.n.01'), Synset('dalmatian.n.02'), ...] print("member_holonyms: ", dog.member_holonyms()) # [Synset('canis.n.01'), Synset('pack.n.06')] print("root_hypernyms: ", dog.root_hypernyms()) #[Synset('entity.n.01')] print("lowest_common_hypernyms cat/dog: ", wn.synset('dog.n.01').lowest_common_hypernyms( wn.synset('cat.n.01'))) #[Synset('carnivore.n.01')]
def extractFeatureValues(sent, j, usePredictedLabels=True, orders={0,1}, indexer=None, candidatesThisSentence=None): ''' Extracts a map of feature names to values for a particular token in a sentence. These can be aggregated to get the feature vector or score for a whole sentence. These replicate the features used in Ciaramita and Altun, 2006 @param sent: the labeled sentence object to extract features from @param j: index of the word in the sentence to extract features for @param usePredictedLabels: whether to use predicted labels or gold labels (if available) for the previous tag. This only applies to first-order features. @param orders: list of orders; e.g. if {1}, only first-order (tag bigram) features will be extracted @return: feature name -> value ''' (lexiconCandidates, listCandidates), supersenseCandidates = candidatesThisSentence or (({}, {}), []) ff = IndexedFeatureMap(indexer) if indexer is not None else {} # note: in the interest of efficiency, we use tuples rather than string concatenation for feature names # previous label feature (first-order Markov dependency) if 1 in orders and hasFirstOrderFeatures() and j>0: ff["prevLabel=",(sent[j-1].prediction if usePredictedLabels else sent[j-1].gold)] = 1 if 0 in orders: # bias ff[()] = 1 # original token, token position-in-sentence features if sent[j].token[0].isupper(): #ff['capitalized_BOS' if j==0 else 'capitalized_!BOS'] = 1 # old version of feature (in mweFeatures) nCap = sum(1 for tkn in sent if tkn.token[0].isupper()) if j==0: ff['capitalized_BOS'] = 1 if nCap>=(len(sent)-nCap): ff['capitalized_BOS_majcap'] = 1 else: ff['capitalized_!BOS'] = 1 if nCap>=(len(sent)-nCap): ff['capitalized_!BOS_majcap'] = 1 if sent[0].token[0].islower(): ff['capitalized_!BOS_BOSlower'] = 1 ff['shape', sent[j].shape] = 1 if j<2: ff['offset_in_sent=',str(j)] = 1 if len(sent)-j<2: ff['offset_in_sent=',str(j-len(sent))] = 1 # lowercased token features w = sent[j].token.lower() # - prefix (up to 4) # - suffix (up to 4) for k in range(4): ff['w[:{}]'.format(k+1), w[:k+1]] = 1 ff['w[{}:]'.format(-k-1), w[-k-1:]] = 1 # - special characters for c in w: if c.isdigit(): ff['has-digit'] = 1 elif not c.isalpha(): ff['has-char', c] = 1 # - context word up to 2 away # - context POS up to 2 words away # - context word bigram # - context POS bigram # - current lemma and context lemma up to 2 words away, if one of them is a verb # and the other is a noun, verb, adjective, adverb, preposition, or particle for k in range(j-2,j+3): if k<0: continue elif k>len(sent)-1: break ff['w_{:+}'.format(k-j), sent[k].token.lower()] = 1 ff['pos_{:+}'.format(k-j), sent[k].pos] = 1 if k!=j and ( \ (sent[k].pos[0]=='V' and sent[j].pos[0] in {'V','N','J','I','R','T'}) \ or (sent[j].pos[0]=='V' and sent[k].pos[0] in {'V','N','J','I','R','T'})): ff['lemma_+0,{:+}'.format(k-j), sent[j].stem, sent[k].stem] = 1 if k<j+2 and k<len(sent)-1: if useTokenBigrams: ff['w_{:+},{:+}'.format(k-j,k-j+1), sent[k].token.lower(), sent[k+1].token.lower()] = 1 ff['pos_{:+},{:+}'.format(k-j,k-j+1), sent[k].pos, sent[k+1].pos] = 1 if clusterMap and (k==j or abs(k-j)==1): # current and neighbor clusters clustid, keywords = wordClusterID(sent[k].token.lower()) ff['c_{:+1}'.format(k-j), clustid, keywords or ''] = 1 if k!=j: ff['lemma_+0,c_{:+}'.format(k-j), sent[j].stem, clustid, keywords or ''] = 1 # - word + context POS # - POS + context word if j>0: ff['w_+0_pos_-1', sent[j].token.lower(), sent[j-1].pos] = 1 ff['w_-1_pos_+0', sent[j-1].token.lower(), sent[j].pos] = 1 if j<len(sent)-1: ff['w_+0_pos_+1', sent[j].token.lower(), sent[j+1].pos] = 1 ff['w_+1_pos_+0', sent[j+1].token.lower(), sent[j].pos] = 1 # - auxiliary verb/main verb (new relative to mweFeatures) if coarsen(sent[j].pos)=='V': cposes = [coarsen(tok.pos) for tok in sent[j:]] if len(cposes)>1 and cposes[1]=='V': # followed by another verb: probably an aux (though there are exceptions: # "try giving", "all people want is", etc.) ff['auxverb'] = 1 elif len(cposes)>2 and cposes[1]=='R' and cposes[2]=='V': # followed by an adverb followed by a verb: probably an aux ff['auxverb'] = 1 else: ff['mainverb'] = 1 # lexicon features if not wn.lemmas(sent[j].stem): if useWNOOV: ff['OOV',sent[j].pos] = 1 wn_pos_setS = '{}' else: wn_pos_set = frozenset({lem.synset().pos().replace('s','a') for lem in wn.lemmas(sent[j].stem)}) wn_pos_setS = '{'+repr(tuple(wn_pos_set))[1:-1]+'}' # - WordNet supersense (new relative to mweFeatures) extractWNSupersenseFeat(ff, j, supersenseCandidates) if useWNCompound: # - compound if sent[j].pos.isalnum(): prevtok = None for tok in sent[j-1::-1]: if tok.pos=='HYPH': continue elif tok.pos.isalnum(): prevtok = tok break nexttok = None for tok in sent[j+1:]: if tok.pos=='HYPH': continue elif tok.pos.isalnum(): nexttok = tok break if sent[j].pos=='HYPH': if isCompound(prevtok,nexttok): ff['compound_left_right'] = 1 else: if isCompound(prevtok,sent[j]): ff['compound_left'] = 1 if isCompound(sent[j],nexttok): ff['compound_right'] = 1 nMatches = Counter() for lexiconname,segmentation in lexiconCandidates.items(): toffset,tag,expr_tokens,is_gappy_expr,entry = segmentation[j] assert toffset==j if lexiconname=='wordnet_mwes': if entry: try: mw_pos_set = frozenset(wn.lemma(wnlemma).synset().pos().replace('s','a') for wnlemma in entry["wnlemmas"]) except: print(entry, file=sys.stderr) raise mw_pos_setS = '{'+repr(tuple(mw_pos_set))[1:-1]+'}' ff['wn',wn_pos_setS,tag,mw_pos_setS] = 1 else: ff['wn',wn_pos_setS,tag] = 1 if tag.upper()!='O': lbl = entry["label"] if not lbl.startswith('NE:') and SENSENUM.search(lbl): lbl = '<sense-tagged>' ff['lex',lexiconname,tag.upper(),str(is_gappy_expr),lbl] = 1 if True or entry["datasource"].lower()!='wikimwe': # TODO: OK to remove constraint for wikimwe? p1 = sent[expr_tokens[0]].pos p2 = sent[expr_tokens[-1]].pos ff['lex',lexiconname,tag.upper(),str(is_gappy_expr),lbl,p1,'...',p2] = 1 nMatches[p1,p2] += 1 nMatches[None,None] += 1 else: ff['lex',lexiconname,'O'] = 1 if nMatches[None,None]==0: ff['#lex-matches=','0'] = 1 else: for n in range(1,nMatches[None,None]+1): ff['#lex-matches>=',str(n)] = 1 for (p1,p2),N in nMatches.items(): if (p1,p2)!=(None,None): for n in range(1,N+1): ff['#lex-matches',p1,'...',p2,'>=',str(n)] = 1 #sentpos = ''.join(coarsen(w.pos) for w in sent) #cposj = coarsen(sent[j].pos) # - collocation extraction lists # lists for 6 collocation classes: adj-noun noun-noun preposition-noun verb-noun verb-preposition verb-particle # each list ranks lemma pairs using the t-test. # considering each list separately, we segment the sentence preferring higher-ranked items # (requiring lemmas and coarse POSes to match). # fire features indicating (a) B vs. I match, and (b) whether the rank in the top # {25,50,75,100,150,200,300,...,900,1000,2000,...,9000,10k,20k,...90k,100k,200k,...}, # (c) gappiness? for listname,segmentation in listCandidates.items(): toffset,tag,expr_tokens,is_gappy_expr,entry = segmentation[j] assert toffset==j if tag.upper()!='O': lbl = entry["label"] is_phrasinator = (entry["datasource"].lower().startswith('phrasinator')) ff['list',listname,tag.upper(),str(is_gappy_expr),lbl] = 1 p1 = sent[expr_tokens[0]].pos p2 = sent[expr_tokens[-1]].pos if is_phrasinator: ff['list',listname,tag.upper(),str(is_gappy_expr),lbl,p1,'...',p2] = 1 r = entry["rank"] for t in THRESHOLDS: if r>t: break ff['list',listname,'rank<={}'.format(t), tag.upper(),str(is_gappy_expr),lbl] = 1 if is_phrasinator: ff['list',listname,'rank<={}'.format(t), tag.upper(),str(is_gappy_expr),lbl,p1,'...',p2] = 1 else: ff['list',listname,'O'] = 1 return ff
def latihan_wordnet(): wn.synsets('motorcar') wn.synset('car.n.01').lemma_names wn.synset('car.n.01').lemmas wn.synset('car.n.01').definition wn.synset('car.n.01').examples wn.synset('car.n.01').definition #'a motor vehicle with four wheels; usually propelled by an internal combustion engine' wn.synset('car.n.01').examples #['he needs a car to get to work'] #atau wn.lemmas('car') #[Lemma('car.n.01.car'), Lemma('car.n.02.car'), Lemma('car.n.03.car'), #Lemma('car.n.04.car'), Lemma('cable_car.n.01.car')] wn.synsets('car') #[Synset('car.n.01'), Synset('car.n.02'), Synset('car.n.03'), Synset('car.n.04'), #Synset('cable_car.n.01')] for synset in wn.synsets('car'): print synset.lemma_names #['car', 'auto', 'automobile', 'machine', 'motorcar'] #['car', 'railcar', 'railway_car', 'railroad_car'] #['car', 'gondola'] #['car', 'elevator_car'] #['cable_car', 'car'] motorcar = wn.synset('car.n.01') types_of_motorcar = motorcar.hyponyms() len(types_of_motorcar) #31 types_of_motorcar[26] #Synset('ambulance.n.01') sorted([lemma.name for synset in types_of_motorcar for lemma in synset.lemmas]) #['Model_T', 'S.U.V.', 'SUV', 'Stanley_Steamer', 'ambulance', 'beach_waggon', #...] motorcar.hypernyms() #[Synset('motor_vehicle.n.01')] paths = motorcar.hypernym_paths() len(paths) #2 [synset.name for synset in paths[0]] #['entity.n.01', 'physical_entity.n.01', 'object.n.01', 'whole.n.02', 'artifact.n.01', #'instrumentality.n.03', 'container.n.01', 'wheeled_vehicle.n.01', #'self-propelled_vehicle.n.01', 'motor_vehicle.n.01', 'car.n.01'] [synset.name for synset in paths[1]] #['entity.n.01', 'physical_entity.n.01', 'object.n.01', 'whole.n.02', 'artifact.n.01', #'instrumentality.n.03', 'conveyance.n.03', 'vehicle.n.01', 'wheeled_vehicle.n.01', #'self-propelled_vehicle.n.01', 'motor_vehicle.n.01', 'car.n.01'] motorcar.root_hypernyms() #[Synset('entity.n.01')] wn.synset('tree.n.01').part_meronyms() #[Synset('burl.n.02'), Synset('crown.n.07'), Synset('stump.n.01'), #Synset('trunk.n.01'), Synset('limb.n.02')] wn.synset('tree.n.01').substance_meronyms() #[Synset('heartwood.n.01'), Synset('sapwood.n.01')] wn.synset('tree.n.01').member_holonyms() #[Synset('forest.n.01')] for synset in wn.synsets('mint', wn.NOUN): print synset.name + ':', synset.definition #batch.n.02: (often followed by `of') a large number or amount or extent #mint.n.02: any north temperate plant of the genus Mentha with aromatic leaves and #small mauve flowers #mint.n.03: any member of the mint family of plants #mint.n.04: the leaves of a mint plant used fresh or candied #mint.n.05: a candy that is flavored with a mint oil #mint.n.06: a plant where money is coined by authority of the government wn.synset('mint.n.04').part_holonyms() #[Synset('mint.n.02')] wn.synset('mint.n.04').substance_holonyms() #[Synset('mint.n.05')] wn.synset('walk.v.01').entailments() #[Synset('step.v.01')] wn.synset('eat.v.01').entailments() #[Synset('swallow.v.01'), Synset('chew.v.01')] wn.synset('tease.v.03').entailments() #[Synset('arouse.v.07'), Synset('disappoint.v.01')] #Antonym wn.lemma('supply.n.02.supply').antonyms() #[Lemma('demand.n.02.demand')] wn.lemma('rush.v.01.rush').antonyms() #[Lemma('linger.v.04.linger')] wn.lemma('horizontal.a.01.horizontal').antonyms() #[Lemma('vertical.a.01.vertical'), Lemma('inclined.a.02.inclined')] wn.lemma('staccato.r.01.staccato').antonyms() #[Lemma('legato.r.01.legato')] #Semantic Similarity #Semakin dekat path antara dua lemma, semakin mirip makna semantik kedua lemma tersebut right = wn.synset('right_whale.n.01') orca = wn.synset('orca.n.01') minke = wn.synset('minke_whale.n.01') tortoise = wn.synset('tortoise.n.01') novel = wn.synset('novel.n.01') print right.lowest_common_hypernyms(minke) #[Synset('baleen_whale.n.01')] print right.lowest_common_hypernyms(orca) #[Synset('whale.n.02')] print right.lowest_common_hypernyms(tortoise) #[Synset('vertebrate.n.01')] print right.lowest_common_hypernyms(novel) #[Synset('entity.n.01')] print wn.synset('baleen_whale.n.01').min_depth() #14 print wn.synset('whale.n.02').min_depth() #13 print wn.synset('vertebrate.n.01').min_depth() #8 print wn.synset('entity.n.01').min_depth() #0 print right.path_similarity(minke) #0.25 print right.path_similarity(orca) #0.16666666666666666 print right.path_similarity(tortoise) #0.076923076923076927 print right.path_similarity(novel) #0.043478260869565216 ##nltk web #from __future__ import division import nltk, re, pprint #from urllib import urlopen url = "http://www.gutenberg.org/files/2554/2554.txt" raw = urlopen(url).read() len(raw) raw[:75] #from __future__ import division #import nltk, re, pprint #from urllib import urlopen url = "http://www.gutenberg.org/files/2554/2554.txt" print "Accessing gutenberg #2554..." raw = urlopen(url).read() tokens = nltk.word_tokenize(raw) text = nltk.Text(tokens) text.concorddance("Gutenberg") text.collocations() text.similarity() #Mengakses data dengan tag HTML url = 'http://news.bbc.co.uk/2/hi/health/2284783.stm' htmlsite = urlopen(url) htmldata = htmlsite.read() htmlraw = nltk.clean_html(htmldata) htmltokens = nltk.word_tokenize(htmlraw) htmltexts = nltk.Text(htmltokens) htmltexts.concordance('gene') #Mengakses Berkas Lokal f = open('document.txt', 'r') data = f.read() f.close() tokens = nltk.word_tokenize(data) texts = nltk.Text(tokens) texts.concordance('gene') #Menulis Berkas Lokal f = open('document.txt', 'w') for word in sorted(htmltexts): f.write(word + '\n') #Mengakses RSS Feed import feedparser url = 'http://news.bbc.co.uk/2/hi/health/2284783.stm' htmlsite = urlopen(url) htmldata = htmlsite.read() htmlraw = nltk.clean_html(htmldata) htmltokens = nltk.word_tokenize(htmlraw) htmltexts = nltk.Text(htmltokens) htmltexts.concordance('gene') #Python dan PyScripter import os os.chdir('path\to\tugas') import tugas reload(tugas) #NLTK dan Teks import nltk data = 'Sebuah contoh kalimat yang ingin dianalisis menggunakan NLTK' tokens = nltk.word_tokenize(data) text = nltk.Text(tokens)
import nltk from nltk.corpus import wordnet as wn def supergloss(s): res = s.definition() for hyper in s.hypernyms(): res += ";" + hyper.definition() for hypo in s.hyponyms(): res += ";" + hypo.definition() return res print supergloss(wn.lemma('car.n.01.automobile').synset())
# -*- coding: utf-8 -*- """ Created on Wed Jun 17 22:14:04 2015 @author: mongolia19 """ from nltk.corpus import wordnet as wn wordAList = wn.synset('thin.a.02') print wordAList #keyA = wordAList[0] wordBList = wn.synsets('fat') print wordBList keyB = wordBList[0] wordCList = wn.synsets('people') keyC = wordCList[0] #print keyA #print keyB #print keyC score = wordAList.path_similarity(keyB) print wn.lemma('fat.a.01.fat').antonyms() #scoreA = keyC.path_similarity(keyB) print score
#!/usr/bin/python #coding:utf-8 # 2013/02/27 from nltk.corpus import reuters from nltk.corpus import wordnet as wn import nltk wn.synsets('motorcar') # motorcar のSynset (同義語集合) のリスト wn.synset('car.n.01').lemma_names # 同義語(同義の見出し語)のリスト. 文字列のリスト wn.synset('car.n.01').definition # 定義文 wn.synset('car.n.01').examples # 例文 wn.synset('car.n.01').lemmas # 同義語集合から全見出し語を抽出 Lemmaのリスト wn.lemma('car.n.01.automobile') # 特定の見出し語を調べる Lemma wn.lemma('car.n.01.automobile').synset # 見出し語に対応する同義語集合を取得 Synset wn.lemma('car.n.01.automobile').name # 見出し語の名前を取得 文字列 wn.synsets('car') # car の同義語集合Synset のリスト for synset in wn.synsets('car'): # carの同義語集合synsetを1つずつ取り出す print synset.lemma_names # synset の同義語のリスト を出力 wn.lemmas('car') # 単語'car'の全同義語集合にアクセス.同義語集合Lemmaのリスト motorcar = wn.synset('car.n.01') # 特定のSynset types_of_motorcar = motorcar.hyponyms() # synsetの下位Synsetのリスト types_of_motorcar[26] # 下位Synset sorted(lemma.name for synset in types_of_motorcar for lemma in synset.lemmas) # 下位Synsetを1つずつ取り出し, 下位Synsetの見出し語Lemmaのリストから見出し語Lemmaを1つずつ取り出し, Lemmaの名前をソート motorcar.hypernyms() # 上位Synsetのリスト paths = motorcar.hypernym_paths() # jou len(paths) [synset.name for synset in paths[0]]
from nltk.corpus import wordnet as wn print wn.synsets("motorcar") print wn.synset("car.n.01").lemma_names print wn.synset("car.n.01").definition print wn.synset("car.n.01").examples print wn.synset("car.n.01").lemmas print wn.lemma("supply.n.02.supply").antonyms()
---------------------------------------------------------------------- """) from nltk.corpus import wordnet as wn print(wn.synsets('motorcar')) print("-" * 40) print(wn.synset('car.n.01').lemma_names()) print("-" * 40) print(wn.synset('car.n.01').definition()) print(wn.synset('car.n.01').examples()) print("-" * 40) print(wn.synset('car.n.01').lemmas()) print(wn.lemma('car.n.01.automobile')) print(wn.lemma('car.n.01.automobile').synset()) print(wn.lemma('car.n.01.automobile').name()) print("-" * 40) print(wn.synset('car')) for synset in wn.synsets('car'): print(synset.lemma_names()) print("-" * 40) print(wn.lemmas('car')) print("-" * 40) print(wn.synsets('dish')) print(wn.synset('dish.n.01').lemma_names()) print(wn.synset('dish.n.01').definition())
import nltk from nltk.corpus import wordnet from nltk.corpus import wordnet as wn wn.synsets('cat') wn.synsets('cat', pos=wn.VERB) wn.synset('cat.n.01') print(wn.synset('cat.n.01').definition()) print(len(wn.synset('cat.n.01').examples())) print(wn.synset('cat.n.01').lemmas()) print([str(lemma.name()) for lemma in wn.synset('cat.n.01').lemmas()]) print(wn.lemma('cat.n.01.cat').synset())
def get_antonym(word): print "Antonym for: " + word if len(word.split()) > 1: word = word.replace(" ","_") # the slow part wnsynset = wn.synsets(word) print "WYNSET" + str(wnsynset) antonym = None # only getting one antonym for i in wnsynset: for el in i.lemmas(): x = el.antonyms() if len(x) > 0: print "Antonym" antonym = x[0].name() break syn_set = [] if antonym is not None: print "synonyms for antonym " + str(antonym) if len(antonym.split()) > 1: word = antonym.replace(" ","_") # the slow part wnsynset = wn.synsets(antonym) print "WYNSET" + str(wnsynset) for i in range(0, len(wnsynset)): for lemma in wnsynset[i].lemma_names(): print "LEMMA" print lemma syn_set.append(lemma) deriv = wn.lemma(wnsynset[i].name() +"."+ lemma) print "DERIVATIONS" for x in deriv.derivationally_related_forms(): print x.name syn_set.append(x.name()) print "Hyponym function: " for hypo in wnsynset[i].hyponyms(): syn_set.append(re.findall(r"[a-zA-Z]*",hypo.name())[0]) print re.findall(r"[a-zA-Z]*",hypo.name())[0] ''' print "Hypernym function: " for hyper in wnsynset[i].hypernyms(): syn_set.append(re.findall(r"[a-zA-Z]*",hyper.name())[0]) print re.findall(r"[a-zA-Z]*",hyper.name())[0] ''' return syn_set
import nltk from nltk.corpus import wordnet as wn # for each sense of a word, there is a synset with an id consisting of one of the words, # whether it is noun, verb, adj or adverb and a number among the synsets of that word # given word "dog", returns the ids of the synsets wn.synsets('dog') # given a synset id, find words/lemma names (the synonyms) of the first noun sense of "dog" wn.synset('dog.n.01').lemma_names() # given a synset id, find lemmas of the synset (a lemma pairs a word with a synset) wn.synset('dog.n.01').lemmas() # find synset of a lemma wn.lemma('dog.n.01.domestic_dog').synset() # find lemma names for all senses of a word for synset in wn.synsets('dog'): print(synset, ": ", synset.lemma_names()) # find definition of the first noun sense of dog, or namely, the dog.n.01 synset wn.synset('dog.n.01').definition() # display an example of the synset wn.synset('dog.n.01').examples() # or show the definitions for all the synsets of a word for synset in wn.synsets('dog'): print(synset, ": ", synset.definition())
exit() for synset in list(wn.all_synsets('n'))[:10]: print(synset) print(wn.synsets('dog', pos=wn.VERB)) print("*"*111) print(wn.synset('dog.n.01')) print(wn.synset('dog.n.01').definition()) print(len(wn.synset('dog.n.01').examples())) print(wn.synset('dog.n.01').examples()[0]) print(wn.synset('dog.n.01').lemmas()) a = [str(lemma.name()) for lemma in wn.synset('dog.n.01').lemmas()] print(a) print(wn.lemma('dog.n.01.dog').synset()) print("*"*111) print(sorted(wn.langs())) print(wn.synsets(b'\xe7\x8a\xac'.decode('utf-8'), lang='jpn')) print(wn.synset('spy.n.01').lemma_names('jpn')) print(wn.synset('dog.n.01').lemma_names('ita')) print("*"*111) dog = wn.synset('dog.n.01') print(dog.hypernyms()) print(dog.hyponyms()) print(dog.member_holonyms()) print(dog.root_hypernyms()) print(wn.synset('dog.n.01').lowest_common_hypernyms(wn.synset('cat.n.01'))) print("*"*111)
def get_semantic_score(word): print "STARTING semanticScore for" + word if len(word.split()) > 1: word = word.replace(" ","_") pluralizer = inflect.engine() syn_set = [] # the slow part wnsynset = wn.synsets(word) print "WYNSET" + str(wnsynset) syn_set_final = [] # not suitable for synonyms but good for relations abstractions = [] for i in range(0, len(wnsynset)): for lemma in wnsynset[i].lemma_names(): print "LEMMA" print lemma syn_set.append(lemma) deriv = wn.lemma(wnsynset[i].name() +"."+ lemma) print "DERIVATIONS" for x in deriv.derivationally_related_forms(): print x.name() syn_set.append(x.name()) syn_set_b = noDup(syn_set) if len(syn_set_b) < 11: print "FULL SYNONYMS INCLUDING ABSTRACTIONS" print syn_set_b for i in range(0, len(wnsynset)): print "Hypernym function: " for hyper in wnsynset[i].hypernyms(): # 15 in random - did it for fund to finance hyper = re.findall(r"[a-zA-Z]*",hyper.name())[0] if len(syn_set_b) > 10: abstractions.append(hyper) else: syn_set.append(hyper) print hyper print "Hyponym function: " for hypo in wnsynset[i].hyponyms(): hypo = re.findall(r"[a-zA-Z]*",hypo.name())[0] if len(syn_set_b) > 10: abstractions.append(hypo) else: syn_set.append(hypo) print hypo # adds plurals and removes dups syn_setnodup = noDup(syn_set) syn_set_final = [] for item in syn_setnodup: syn_set_final.append(item.lower()) syn_set_final.append(pluralizer.plural(item).lower()) abstractions = noDup(abstractions) abstractions_final = [] for item in abstractions: abstractions_final.append(item.lower()) abstractions_final.append(pluralizer.plural(item).lower()) uselesswords = ["issues", "issues", "organization", "organizations"] abstractions_final = [w for w in abstractions_final if w.lower() not in uselesswords] syn_set_final = [w for w in syn_set_final if w.lower() not in uselesswords] print "END semanticScore" return [syn_set_final, abstractions_final]
from nltk.corpus import wordnet as wn res = wn.synset('locomotive.n.01').lemma_names() print(res) resdef = wn.synset('ocean.n.01').definition() print(resdef) res_exm = wn.synset('good.n.01').examples() print(res_exm) res_a = wn.lemma('horizontal.a.01.horizontal').antonyms() print(res_a)
def lemma(name_synsets): return wordnet.lemma(name_synsets)
for synset in tqdm(list(wn.all_synsets())): synset_set.add(synset) for item in synset.hypernyms(): hypernyms.append((to_str(synset), to_str(item))) for item in synset.hyponyms(): hyponyms.append((to_str(synset), to_str(item))) for item in synset.member_holonyms(): member_holonyms.append((to_str(synset), to_str(item))) # lemma_set for item in synset.lemmas(): lemma_set.add(to_str(item)) # lemma edge for lemma in tqdm(lemma_set): lemma = wn.lemma(lemma) for item in lemma.derivationally_related_forms(): derivationally_related_forms.append((to_str(lemma), to_str(item))) for item in lemma.pertainyms(): pertainyms.append((to_str(lemma), to_str(item))) for item in lemma.antonyms(): antonyms.append((to_str(lemma), to_str(item))) # node lemmas = [] for item in lemma_set: lemmas.append(item) # lemmas = [to_str(it) for it in lemma_set] synsets = [to_str(it) for it in synset_set] words = list(wn.all_lemma_names())
dog[0].definition() dog[0].examples() type(dog[0].examples()) len(dog[0].examples()) ## LEMMA: represent a specifuc sense o a specific word ## ===== ## --------------------------------------------------- dog[0].lemmas() type(dog[0].lemmas()) len(dog[0].lemmas()) for w in dog[0].lemmas(): print(w) wn.lemma('dog.n.01.dog') wn.lemma('dog.n.01.dog').synset() ####################################################### print(wn.synset('dog.n.02'.definition())) # doesn't exist for i in wn.synsets('dog'): print(i) print(wn.synset('frump.n.01').definition()) for i in wn.synsets('dog'): print(i.definition())
def readFile(): input_file = open( "C:\\Users\\Sergio\\Dropbox\\QMUL\\Data\\choicesNHS\\nhsChoices.txt", "r") #input_file = open("C:\\Users\\Sergio\\Dropbox\\QMUL\\Data\\choicesNHS\\nhsChoicesDiagnosis.txt", "r") #input_file = open("C:\\Users\\Sergio\\Dropbox\\QMUL\\Data\\choicesNHS\\nhsChoicesDiabetesWhole.txt", "r") lines = input_file.readlines() input_file.close() annotationsX = [] annotationsSLR = [] annotationsNER = [] for x in lines: annotationX = x annotationSLR = annotator.getAnnotations(x, dep_parse=True)['srl'] #annotationNER = annotator.getAnnotations(x,dep_parse=True)['ner'] annotationsX.append(annotationX) annotationsSLR.append(annotationSLR) #annotationsNER.append(annotationNER) size = len(annotationsSLR) print size A0 = 0 A1 = 0 pbroles = [] annotationsA0 = [] annotationsA1 = [] for an in range(5): print annotationsX[an] print annotationsSLR[an] sizeIn = len(annotationsSLR[an]) #print sizeIn for an2 in range(sizeIn): print "--------------------------------------------------------------------------------------------------------" print annotationsSLR[an][an2]["V"] w = Word(annotationsSLR[an][an2]["V"]).lemmatize("v") #print w #print wn.synset(w+'.v.01') try: for role in propbank.roleset(w + '.01').findall("roles/role"): print(role.attrib['f'], role.attrib['n'], role.attrib['descr']) pbroles.append(role.attrib['descr']) #for role in propbank.roleset(w+'.01').findall("aliases/alias"): #print(role.attrib['framenet'], role.attrib['pos'], role.attrib['verbnet']) except: pass try: print( wn.lemma(w + '.v.01.' + w).derivationally_related_forms()) except: pass if "A0" in annotationsSLR[an][an2]: print annotationsSLR[an][an2]["A0"] A0 = annotationsSLR[an][an2]["A0"] #try: #A0 = TextBlob(A0, np_extractor=extractor) #A0 = A0.noun_phrases[0] #print A0 #except: #pass try: annotationsA0 = WordNet.spotlightSearch(A0) annotationsA0 = annotationsA0[0].get('URI') except: annotationsA0 = "unknown" pass if "A1" in annotationsSLR[an][an2]: print annotationsSLR[an][an2]["A1"] A1 = annotationsSLR[an][an2]["A1"] #try: #A1 = TextBlob(A1, np_extractor=extractor) #A1 = A1.noun_phrases[0] #print A1 #except: #pass try: annotationsA1 = WordNet.spotlightSearch(A1) annotationsA1 = annotationsA1[0].get('URI') except: annotationsA1 = "unknown" pass print pbroles print "--------------------------------------------------------------------------------------------------------" CreateGraphNeo4J.createGraph(w, A0, A1, pbroles, annotationsA0, annotationsA1) del pbroles[:] annotationsA0 = [] annotationsA1 = [] A0 = 0 A1 = 0
nltk.download('punkt') nltk.download('wordnet') from nltk.corpus import wordnet as wn motorcar = wn.synsets('motorcar') print('synsets that motorcar belongs to: ' + repr(motorcar)) cars = wn.synset('car.n.01') print('synset of car sense 1: ' + str(cars)) print('car sense 1 lemma names: ' + repr(cars.lemma_names())) print('car sense 1 definition: ' + cars.definition()) print('car sense 1 example sentences: ' + repr(cars.examples())) car_lemmas = cars.lemmas() print('car sense 1 lemmas: ' + repr(car_lemmas)) automobile = wn.lemma('car.n.01.automobile') print('synset of automobile (car sense 1): ' + str(automobile.synset())) print('name of the automobile lemma: ' + automobile.name()) all_noun_synsets = wn.all_synsets('n') print('number of noun synsets: ' + str(len(list(all_noun_synsets)))) car_synsets = wn.synsets('car') print('synsets that car belongs to: ' + repr(car_synsets)) for synset in car_synsets: print(str(synset) + ' ' + repr(synset.lemma_names())) print('synsets in which car is a lemma: ' + repr(wn.lemmas('car'))) motorcar = wn.synset('car.n.01') types_of_motorcar = motorcar.hyponyms()
for l in s.lemmas(): ls.add(l) for s in lemma.synset().substance_meronyms(): for l in s.lemmas(): ls.add(l) return ls def generate_fp(lemma, lemmas): fp = set() ls = get_related_lemmas(lemma, lemmas) for l in ls: set_bit(fp, lemmas, l) return fp if __name__ == "__main__": lemmas, lemmas_list = read_lemmas('vocab_lemmas.txt') N = len(lemmas) with open('fingerprints.txt', 'w') as f: for lookup in lemmas_list: lemma = wn.lemma(lookup) print lemmas[lookup], lemma fp = generate_fp(lemma, lemmas) f.write(lookup + ":" + str(fp) + "\n")
languages = ['en', 'de', 'nl', 'es', 'fr', 'pt', 'la'] for i in [139, 140, 141, 142]: print(swadesh.entries(languages)[i]) from nltk.corpus import toolbox toolbox.entries('rotokas.dic') from nltk.corpus import wordnet as wn wn.synsets('motorcar') wn.synset('car.n.01').lemma_names() wn.synset('car.n.01').definition() wn.synset('car.n.01').examples() wn.synset('car.n.01').lemmas() wn.lemma('car.n.01.automobile') wn.lemma('car.n.01.automobile').synset() wn.lemma('car.n.01.automobile').name() wn.synsets('car') for synset in wn.synsets('car'): print(synset.lemma_names()) wn.lemmas('car') motorcar = wn.synset('car.n.01') types_of_motorcar = motorcar.hyponyms() types_of_motorcar[0] sorted(lemma.name() for synset in types_of_motorcar for lemma in synset.lemmas()) motorcar.hypernyms()
# In[118]: wn.synset('car.n.01').definition() # In[119]: wn.synset('car.n.01').examples() # In[120]: wn.synset('car.n.01').lemmas() # In[123]: print(wn.lemma('car.n.01.automobile')) print(wn.lemma('car.n.01.automobile').synset()) print(wn.lemma('car.n.01.automobile').name()) # In[124]: # the word car is ambiguous, having five synsets: wn.synsets('car') # In[125]: for synset in wn.synsets('car'): print(synset.lemma_names()) # In[126]:
#three relations: part, substance, member print wn.synset('tree.n.01').part_meronyms() #burl, crown, stump, trunk, limb is part of tree print wn.synset('tree.n.01').substance_meronyms() #heartwood and sapwood is substance of tree print wn.synset('forest.n.01').member_meronyms() #the member of forest is tree print wn.synset('trunk.n.01').part_holonyms() #tree print wn.synset('heartwood.n.01').substance_holonyms() #tree print wn.synset('tree.n.01').member_holonyms() #forest for synset in wn.synsets('mint', wn.NOUN): print synset.name() + ':', synset.definition() #batch.n.02: (often followed by `of') a large number or amount or extent #mint.n.02: any north temperate plant of the genus Mentha with aromatic leaves and small mauve flowers #mint.n.03: any member of the mint family of plants #mint.n.04: the leaves of a mint plant used fresh or candied #mint.n.05: a candy that is flavored with a mint oil #mint.n.06: a plant where money is coined by authority of the government print wn.synset('eat.v.01').entailments() #eat entails swallow and chew print wn.lemma('supply.n.02.supply').antonyms() #supply vs demand