def augmentKeywordsFromSentence(self, stopWordList, tokens): from stdwn import impl # initialise keyword list keywords = [] # get WordNet information for each relevant word for token in tokens: # get synsets synsets = impl.lookupSynsetsByForm(token) # go through synsets for synset in synsets: # append synonyms for synonym in synset.synonyms: keywords.append(self.underscore.sub(" ", synonym.form)) # go through hyponyms for hyponym in synset.hyponyms(): # append synonyms for synonym in hyponym.synonyms: keywords.append(self.underscore.sub(" ", synonym.form)) # go through hypernyms for hypernym in synset.relations("@"): # append synonyms for synonym in hypernym.synonyms: keywords.append(self.underscore.sub(" ", synonym.form)) # build set with unique values keywords = set(keywords) # return keywords return keywords
def augmentKeywordsFromSentence(self, stopWordList, tokens): # initialise keyword list keywords = [] # get WordNet information for each relevant word for token in tokens: # get synsets synsets = impl.lookupSynsetsByForm(token) # go through synsets for synset in synsets: # append synonyms for synonym in synset.synonyms: keywords.append(self.underscore.sub(' ', synonym.form)) # go through hyponyms for hyponym in synset.hyponyms(): # append synonyms for synonym in hyponym.synonyms: keywords.append(self.underscore.sub(' ', synonym.form)) # go through hypernyms for hypernym in synset.relations('@'): # append synonyms for synonym in hypernym.synonyms: keywords.append(self.underscore.sub(' ', synonym.form)) # build set with unique values keywords = set(keywords) # return keywords return keywords
# train tagging model model = tag.Bigram() model.train(brown.tagged([textCategories['pressReportage'], textCategories['pressEditorial'], textCategories['pressReviews'], textCategories['skillsAndHobbies'], textCategories['popularLore']])) # tag text text = 'I want to buy a camera' tokens = list(tokenizer.processWhitespacesWithoutStopWords(text, 1)) taggedTokens = list(model.tag(tokens)) print tokens print taggedTokens # get WordNet information for each noun for taggedToken in taggedTokens: if taggedToken[1] == 'nn' or taggedToken[1] == None: # get synsets synsets = impl.lookupSynsetsByForm(taggedToken[0]) # print gloss for synset in synsets: # go through synonyms print '--------------------------------------------------------------' for synonym in synset.synonyms: print synonym.form # go through hyponyms print '- Hyponyms ------------------------------------------------------------' for hyponym in synset.hyponyms(): for hypoSynonym in hyponym.synonyms: print synonym.form, hypoSynonym.form # go through hypernyms
from stdwn import impl # get synsets synsets = impl.lookupSynsetsByForm('camera') # print for synset in synsets: for item in synset: print item
textCategories['pressReviews'], textCategories['skillsAndHobbies'], textCategories['popularLore'] ])) # tag text text = 'I want to buy a camera' tokens = list(tokenizer.processWhitespacesWithoutStopWords(text, 1)) taggedTokens = list(model.tag(tokens)) print tokens print taggedTokens # get WordNet information for each noun for taggedToken in taggedTokens: if taggedToken[1] == 'nn' or taggedToken[1] == None: # get synsets synsets = impl.lookupSynsetsByForm(taggedToken[0]) # print gloss for synset in synsets: # go through synonyms print '--------------------------------------------------------------' for synonym in synset.synonyms: print synonym.form # go through hyponyms print '- Hyponyms ------------------------------------------------------------' for hyponym in synset.hyponyms(): for hypoSynonym in hyponym.synonyms: print synonym.form, hypoSynonym.form # go through hypernyms