def test_cosine_distance(): """test the Vector Model's cosine similarity distance measurement""" s1 = "string1" s2 = "string2" dist = string_distance_measures.distance_cosine_measure(s1, s1) print dist assert dist == 0 dist = string_distance_measures.distance_cosine_measure(s1, s2) print dist assert dist == 1 s3 = "mary had a little lamb" s4 = "mary had another little lamb" dist = string_distance_measures.distance_cosine_measure(s3, s4) print "distance:", dist assert dist > 0.19 and dist < 0.21 # approx. 0.2 s3 = "mary had a little lamb" s4 = "mary had little lamb" dist = string_distance_measures.distance_cosine_measure(s3, s4) print "distance:", dist assert dist > 0.1 and dist < 0.11 # approx. 0.105572809 dist = string_distance_measures.distance_cosine_measure("", "") print "distance:", dist assert dist == 0
def test_calls_work(): """test the calls to check that the signature works as expected""" s1 = "string1" s2 = "string2" assert string_distance_measures.distance_levenshtein_distance(s1, s1) == 0 assert string_distance_measures.distance_levenshtein_distance(s1, s2) > 0 assert string_distance_measures.distance_levenshtein_jaro_winkler(s1, s1) == 0 assert string_distance_measures.distance_levenshtein_jaro_winkler(s1, s2) > 0 assert string_distance_measures.distance_levenshtein_ratio(s1, s1) == 0 assert string_distance_measures.distance_levenshtein_ratio(s1, s2) > 0 assert string_distance_measures.distance_title_len(s1, s1) == 0 assert string_distance_measures.distance_title_len(s1, s2) == 0 # note same length strings! s1 = string_distance_measures.make_terms_from_string(s1) s2 = string_distance_measures.make_terms_from_string(s2) assert string_distance_measures.distance_nbr_title_terms(s1, s1) == 0 assert string_distance_measures.distance_nbr_title_terms(s1, s2) == 0 # note same length strings! dist = string_distance_measures.distance_cosine_measure(s1, s1) print dist assert dist == 0 dist = string_distance_measures.distance_cosine_measure(s1, s2) print dist assert dist == 1
def test_calls_work(): """test the calls to check that the signature works as expected""" s1 = "string1" s2 = "string2" assert string_distance_measures.distance_levenshtein_distance(s1, s1) == 0 assert string_distance_measures.distance_levenshtein_distance(s1, s2) > 0 assert string_distance_measures.distance_levenshtein_jaro_winkler(s1, s1) == 0 assert string_distance_measures.distance_levenshtein_jaro_winkler(s1, s2) > 0 assert string_distance_measures.distance_levenshtein_ratio(s1, s1) == 0 assert string_distance_measures.distance_levenshtein_ratio(s1, s2) > 0 assert string_distance_measures.distance_title_len(s1, s1) == 0 assert string_distance_measures.distance_title_len( s1, s2) == 0 # note same length strings! s1 = string_distance_measures.make_terms_from_string(s1) s2 = string_distance_measures.make_terms_from_string(s2) assert string_distance_measures.distance_nbr_title_terms(s1, s1) == 0 assert string_distance_measures.distance_nbr_title_terms( s1, s2) == 0 # note same length strings! dist = string_distance_measures.distance_cosine_measure(s1, s1) print dist assert dist == 0 dist = string_distance_measures.distance_cosine_measure(s1, s2) print dist assert dist == 1