def distance(self, query, entity): q_len = len(query) e_len = len(entity) if q_len == 0 or e_len == 0: return 1.0 else: query = dict(sorted(query.iteritems(), key=operator.itemgetter(1), reverse=True)[:100]) entity = dict(sorted(entity.iteritems(), key=operator.itemgetter(1), reverse=True)[:100]) query_wrs = list(self.iter_word_reps(query)) entity_wrs = list(self.iter_word_reps(entity)) a = entity_wrs if e_len >= q_len else query_wrs b = entity_wrs if q_len > e_len else query_wrs total = 0.0 for wb in b: max_sim = 0.0 for wa in a: sim = dense_cosine_distance(query, entity) #log.debug("SIM: %f", sim) if sim > max_sim: max_sim = sim total += max_sim #llm_sim = sum(max(self.dense_cosine(wa, wb) for wa in a) for wb in b) / float(len(b)) #log.debug('LLM SIM: %f - %i - %i - %f' % (total, len(b), len(a), total / len(b))) return 1.0 - (total / len(b))
def distance(self, query, entity): q_len = len(query) e_len = len(entity) if q_len == 0 or e_len == 0: return 1.0 else: query = dict( sorted(query.iteritems(), key=operator.itemgetter(1), reverse=True)[:100]) entity = dict( sorted(entity.iteritems(), key=operator.itemgetter(1), reverse=True)[:100]) query_wrs = list(self.iter_word_reps(query)) entity_wrs = list(self.iter_word_reps(entity)) a = entity_wrs if e_len >= q_len else query_wrs b = entity_wrs if q_len > e_len else query_wrs total = 0.0 for wb in b: max_sim = 0.0 for wa in a: sim = dense_cosine_distance(query, entity) #log.debug("SIM: %f", sim) if sim > max_sim: max_sim = sim total += max_sim #llm_sim = sum(max(self.dense_cosine(wa, wb) for wa in a) for wb in b) / float(len(b)) #log.debug('LLM SIM: %f - %i - %i - %f' % (total, len(b), len(a), total / len(b))) return 1.0 - (total / len(b))
def distance(self, query, entity): query = self.bow_to_dbow(query) entity = self.bow_to_dbow(entity) return dense_cosine_distance(query, entity)
def distance(self, query, entity): query = self.bow_to_dbow(query) entity = self.bow_to_dbow(entity) return dense_cosine_distance(query, entity)