예제 #1
0
 def word_sim(self, word1, word2):
     if (word1, word2) in self.sim_cache:
         return self.sim_cache[(word1, word2)]
     cand1 = self.get_spell_variations(word1)
     cand2 = self.get_spell_variations(word2)
     max_pair = (word1, word2)
     max_sim = None
     for c1 in cand1:
         for c2 in cand2:
             if c1 in self.model and c2 in self.model:
                 s = self.model.similarity(c1, c2)
                 # logging.info(
                 #     u'Calling similarity: \t{0}\t{1}'.format(
                 #         c1, c2).encode('utf8'))
                 if not max_sim or s > max_sim:
                     max_pair = (word1, word2)
                     max_sim = s
     if max_sim is not None:
         if max_sim > 0.1:
             if self.wordnet_boost:
                 D = Wordnet.get_boost(max_pair[0], max_pair[1])
                 if D is not None:
                     max_sim += 0.5 * math.exp(-0.25 * D)
         else:
             max_sim = 0.0
     if max_sim is not None and max_sim > 1.0:
         max_sim = 1.0
     self.sim_cache[(word1, word2)] = max_sim
     self.sim_cache[(word2, word1)] = max_sim
     return max_sim
예제 #2
0
 def add_wordnet_senses(self, tokens):
     for token in tokens:
         if self.conf.getboolean('wordnet', 'enrich_with_senses'):
             token['senses'] = Wordnet.get_senses(
                 token['token'],
                 self.conf.getint('wordnet', 'sense_threshold'))
         else:
             token['senses'] = set([token['token']])
예제 #3
0
 def _featurize(self, w1, w2):
     s1 = Wordnet.get_significant_synsets(w1)
     s2 = Wordnet.get_significant_synsets(w2)
     yield 'wordnet_hyp', float(Wordnet.is_hypernym(s1, s2))
     yield 'wordnet_2-hyp', float(Wordnet.is_two_link_hypernym(s1, s2))
     yield 'wordnet_deriv_rel', float(
         Wordnet.is_derivationally_related(s1, s2))
     yield 'wordnet_in_glosses', float(Wordnet.in_glosses(w1, w2, s1, s2))
예제 #4
0
 def add_wordnet_senses(self, tokens):
     for token in tokens:
         if self.conf.getboolean('wordnet', 'enrich_with_senses'):
             token['senses'] = Wordnet.get_senses(token['token'], self.conf.getint('wordnet', 'sense_threshold'))
         else:
             token['senses'] = set([token['token']])
예제 #5
0
 def add_wordnet_senses(self, tokens):
     for token in tokens:
         if self.conf.getboolean("wordnet", "enrich_with_senses"):
             token["senses"] = Wordnet.get_senses(token["token"], self.conf.getint("wordnet", "sense_threshold"))
         else:
             token["senses"] = set([token["token"]])