def test_ngram_distances_one_word(): """test ngram methods with a single word""" s1 = "string1" s2 = "string2" assert string_distance_measures.distance_unigrams_same(s1, s1) == 0 assert string_distance_measures.distance_unigrams_same(s1, s2) == 1 dist = string_distance_measures.distance_bigrams_same(s1, s1) print dist assert dist == 1 # no bigrams so regarded as being different! dist = string_distance_measures.distance_bigrams_same(s1, s2) print dist assert dist == 1 dist = string_distance_measures.distance_trigrams_same(s1, s1) print dist assert dist == 1 # no trigrams so regarded as being different dist = string_distance_measures.distance_trigrams_same(s1, s2) print dist assert dist == 1
def test_ngram_distances_four_words(): """test ngram methods with four word sentences""" s1 = "string1 some thing else" s2 = "string2 some thing else" assert string_distance_measures.distance_unigrams_same(s1, s1) == 0 dist = string_distance_measures.distance_unigrams_same(s1, s2) print dist assert dist == 0.4 dist = string_distance_measures.distance_bigrams_same(s1, s1) print dist assert dist == 0 dist = string_distance_measures.distance_bigrams_same(s1, s2) print dist assert dist == 0.5 dist = string_distance_measures.distance_trigrams_same(s1, s1) print dist assert dist == 0 dist = string_distance_measures.distance_trigrams_same(s1, s2) print dist assert dist > 0.6 and dist < 0.7 # approx. 0.66666667