Пример #1
0
def compare_vectors(word_vector1, word_vector2):
    """Numerical similarity between lists of words. Higher is better.

    Uses cosine similarity.
    Result range: 0 (bad) - 1 (uses all the same words in the same proportions)

    """
    all_words = list(set(word_vector1).union(set(word_vector2)))
    frequency_dict1 = word_frequencies(word_vector1)
    frequency_dict2 = word_frequencies(word_vector2)

    frequency_vector1 = [frequency_dict1.get(word, 0) for word in all_words]
    frequency_vector2 = [frequency_dict2.get(word, 0) for word in all_words]

    return similarity(frequency_vector1, frequency_vector2)
Пример #2
0
 def test_similarity(self):
     self.assertAlmostEqual(similarity([1, 1], [-1, -1]), -1)
     self.assertAlmostEqual(similarity([1, 1], [1, 1]), 1)
     self.assertAlmostEqual(similarity([0, 1], [1, 0]), 0)