def word_sim(self, word1, word2): if (word1, word2) in self.sim_cache: return self.sim_cache[(word1, word2)] cand1 = self.get_spell_variations(word1) cand2 = self.get_spell_variations(word2) max_pair = (word1, word2) max_sim = None for c1 in cand1: for c2 in cand2: if c1 in self.model and c2 in self.model: s = self.model.similarity(c1, c2) # logging.info( # u'Calling similarity: \t{0}\t{1}'.format( # c1, c2).encode('utf8')) if not max_sim or s > max_sim: max_pair = (word1, word2) max_sim = s if max_sim is not None: if max_sim > 0.1: if self.wordnet_boost: D = Wordnet.get_boost(max_pair[0], max_pair[1]) if D is not None: max_sim += 0.5 * math.exp(-0.25 * D) else: max_sim = 0.0 if max_sim is not None and max_sim > 1.0: max_sim = 1.0 self.sim_cache[(word1, word2)] = max_sim self.sim_cache[(word2, word1)] = max_sim return max_sim
def add_wordnet_senses(self, tokens): for token in tokens: if self.conf.getboolean('wordnet', 'enrich_with_senses'): token['senses'] = Wordnet.get_senses( token['token'], self.conf.getint('wordnet', 'sense_threshold')) else: token['senses'] = set([token['token']])
def _featurize(self, w1, w2): s1 = Wordnet.get_significant_synsets(w1) s2 = Wordnet.get_significant_synsets(w2) yield 'wordnet_hyp', float(Wordnet.is_hypernym(s1, s2)) yield 'wordnet_2-hyp', float(Wordnet.is_two_link_hypernym(s1, s2)) yield 'wordnet_deriv_rel', float( Wordnet.is_derivationally_related(s1, s2)) yield 'wordnet_in_glosses', float(Wordnet.in_glosses(w1, w2, s1, s2))
def add_wordnet_senses(self, tokens): for token in tokens: if self.conf.getboolean('wordnet', 'enrich_with_senses'): token['senses'] = Wordnet.get_senses(token['token'], self.conf.getint('wordnet', 'sense_threshold')) else: token['senses'] = set([token['token']])
def add_wordnet_senses(self, tokens): for token in tokens: if self.conf.getboolean("wordnet", "enrich_with_senses"): token["senses"] = Wordnet.get_senses(token["token"], self.conf.getint("wordnet", "sense_threshold")) else: token["senses"] = set([token["token"]])