def calc_text_token_distance(cls, attr_name, item_list, selected_item): if selected_item and getattr(selected_item, attr_name): item_tokens = vector_utils.to_vector(attr_name, item_list) tokens = vector_utils.to_value(getattr(selected_item, attr_name)) clusters, vectors = vector_utils.make_text_clusters(item_tokens) target_vector = vector_utils.classify_text_tokens(tokens, clusters) distances = [vector_utils.calc_vector_distance(target_vector, v) for v in vectors] inv_distance = [4 if d == 0 else 1 - math.log(d) for d in distances] # 4 is large enough in f(x) = 1-log(x) return cls.normalize(inv_distance) else: raise NotCalculatable("selected item's " + attr_name + " is None")
def test_calc_vector_distance(self): distance = vector_utils.calc_vector_distance([0, 0], [1, 1]) self.assertLess(math.sqrt(2) - distance, math.pow(0.1, 10)) distance = vector_utils.calc_vector_distance([1, 1], [2, 1 + math.sqrt(3)]) self.assertLess(2 - distance, math.pow(0.1, 10))