def test_ngram_distances_one_word():
    """test ngram methods with a single word"""
    s1 = "string1"
    s2 = "string2"
    
    assert string_distance_measures.distance_unigrams_same(s1, s1) == 0
    assert string_distance_measures.distance_unigrams_same(s1, s2) == 1

    dist = string_distance_measures.distance_bigrams_same(s1, s1)
    print dist
    assert dist == 1  # no bigrams so regarded as being different!
    dist = string_distance_measures.distance_bigrams_same(s1, s2)
    print dist
    assert dist == 1

    dist = string_distance_measures.distance_trigrams_same(s1, s1)
    print dist
    assert dist == 1  # no trigrams so regarded as being different
    dist = string_distance_measures.distance_trigrams_same(s1, s2)
    print dist
    assert dist == 1
Exemplo n.º 2
0
def test_ngram_distances_one_word():
    """test ngram methods with a single word"""
    s1 = "string1"
    s2 = "string2"

    assert string_distance_measures.distance_unigrams_same(s1, s1) == 0
    assert string_distance_measures.distance_unigrams_same(s1, s2) == 1

    dist = string_distance_measures.distance_bigrams_same(s1, s1)
    print dist
    assert dist == 1  # no bigrams so regarded as being different!
    dist = string_distance_measures.distance_bigrams_same(s1, s2)
    print dist
    assert dist == 1

    dist = string_distance_measures.distance_trigrams_same(s1, s1)
    print dist
    assert dist == 1  # no trigrams so regarded as being different
    dist = string_distance_measures.distance_trigrams_same(s1, s2)
    print dist
    assert dist == 1
def test_ngram_distances_four_words():
    """test ngram methods with four word sentences"""
    s1 = "string1 some thing else"
    s2 = "string2 some thing else"
    
    assert string_distance_measures.distance_unigrams_same(s1, s1) == 0
    dist = string_distance_measures.distance_unigrams_same(s1, s2)
    print dist
    assert dist == 0.4

    dist = string_distance_measures.distance_bigrams_same(s1, s1)
    print dist
    assert dist == 0
    dist = string_distance_measures.distance_bigrams_same(s1, s2)
    print dist
    assert dist == 0.5

    dist = string_distance_measures.distance_trigrams_same(s1, s1)
    print dist
    assert dist == 0
    dist = string_distance_measures.distance_trigrams_same(s1, s2)
    print dist
    assert dist > 0.6 and dist < 0.7  # approx. 0.66666667
Exemplo n.º 4
0
def test_ngram_distances_four_words():
    """test ngram methods with four word sentences"""
    s1 = "string1 some thing else"
    s2 = "string2 some thing else"

    assert string_distance_measures.distance_unigrams_same(s1, s1) == 0
    dist = string_distance_measures.distance_unigrams_same(s1, s2)
    print dist
    assert dist == 0.4

    dist = string_distance_measures.distance_bigrams_same(s1, s1)
    print dist
    assert dist == 0
    dist = string_distance_measures.distance_bigrams_same(s1, s2)
    print dist
    assert dist == 0.5

    dist = string_distance_measures.distance_trigrams_same(s1, s1)
    print dist
    assert dist == 0
    dist = string_distance_measures.distance_trigrams_same(s1, s2)
    print dist
    assert dist > 0.6 and dist < 0.7  # approx. 0.66666667