def _search_wn( self, word ): ''' Search the wordnet for this word, based on user options. @return: A list of related words. >>> wn.synsets('blue')[0].hypernyms() [Synset('chromatic_color.n.01')] >>> wn.synsets('blue')[0].hypernyms()[0].hyponyms() [ Synset('orange.n.02'), Synset('brown.n.01'), Synset('green.n.01'), Synset('salmon.n.04'), Synset('red.n.01'), Synset('blue.n.01'), Synset('blond.n.02'), Synset('purple.n.01'), Synset('olive.n.05'), Synset('yellow.n.01'), Synset('pink.n.01'), Synset('pastel.n.01'), Synset('complementary_color.n.01')] >>> ''' result = [] # Now the magic that gets me a lot of results: try: result.extend( wn.synsets(word)[0].hypernyms()[0].hyponyms() ) except: pass synset_list = wn.synsets( word ) for synset in synset_list: # first I add the synsec as it is: result.append( synset ) # Now some variations... result.extend( synset.hypernyms() ) result.extend( synset.hyponyms() ) result.extend( synset.member_holonyms() ) result.extend( synset.lemmas[0].antonyms() ) # Now I have a results list filled up with a lot of words, the problem is that # this words are really Synset objects, so I'll transform them to strings: result = [ i.name.split('.')[0] for i in result] # Another problem with Synsets is that the name is "underscore separated" # so, for example: # "big dog" is "big_dog" result = [ i.replace('_', ' ') for i in result] # Now I make a "uniq" result = list(set(result)) if word in result: result.remove(word) # The next step is to order each list by popularity, so I only send to the web # the most common words, not the strange and unused words. result = self._popularity_contest( result ) # left here for debugging! #print word, result return result
def _search_wn(self, word): ''' Search the wordnet for this word, based on user options. :return: A list of related words. ''' result = [] if not word or word.isdigit(): return result with self._plugin_lock: # Now the magic that gets me a lot of results: try: result.extend(wn.synsets(word)[0].hypernyms()[0].hyponyms()) except: pass synset_list = wn.synsets(word) for synset in synset_list: # first I add the synset as it is: result.append(synset) # Now some variations... result.extend(synset.hypernyms()) result.extend(synset.hyponyms()) result.extend(synset.member_holonyms()) result.extend(synset.lemmas[0].antonyms()) # Now I have a results list filled up with a lot of words, the problem is that # this words are really Synset objects, so I'll transform them to strings: result = [i.name.split('.')[0] for i in result] # Another problem with Synsets is that the name is "underscore separated" # so, for example: # "big dog" is "big_dog" result = [i.replace('_', ' ') for i in result] # Now I make a "uniq" result = list(set(result)) if word in result: result.remove(word) # The next step is to order each list by popularity, so I only send to the web # the most common words, not the strange and unused words. result = self._popularity_contest(result) # Respect the user settings result = result[:self._wordnet_results] return result
def test_simple(self): wn_res = wn.synsets('blue')[0].hypernyms()[0].hyponyms() wn_words = set([i.name.split('.')[0] for i in wn_res]) EXPECTED = set(['orange', 'brown', 'green', 'salmon', 'pink', 'red', 'blue', 'blond', 'purple', 'olive', 'yellow', 'pastel', 'complementary_color']) self.assertEqual(wn_words, EXPECTED)
def test_simple(self): wn_res = wn.synsets('blue')[0].hypernyms()[0].hyponyms() wn_words = set([i.name.split('.')[0] for i in wn_res]) EXPECTED = set([ 'orange', 'brown', 'green', 'salmon', 'pink', 'red', 'blue', 'blond', 'purple', 'olive', 'yellow', 'pastel', 'complementary_color' ]) self.assertEqual(wn_words, EXPECTED)