Exemplo n.º 1
0
def all_metrics(string1, string2, longer_prob):

    pre_scale = 0.1
    typo_scale = 10

    ans = string_metrics(string1, string2,
                            boost_threshold=0.7, pre_len=4,
                               typo_table=adjwt, typo_scale=typo_scale,
                                  pre_scale=pre_scale, longer_prob=longer_prob)

    (len1, len2, num_matches, half_transposes,
                                     typo_score, pre_matches, adjust_long) = ans
    assert not (longer_prob or adjust_long) or longer_prob

    weight_jaro = fn_jaro(len1, len2, num_matches, half_transposes, 0, 1)
    weight_typo = fn_jaro(len1, len2, num_matches, half_transposes,
                                                         typo_score, typo_scale)
    weight_winkler = fn_winkler(weight_jaro, pre_matches, pre_scale)
    weight_winkler_typo = fn_winkler(weight_typo, pre_matches, pre_scale)
    weight_longer = weight_winkler_typo

    if adjust_long:
        weight_longer = fn_longer(weight_longer, len1, len2,
                                                       num_matches, pre_matches)

    return (len1, len2, num_matches, half_transposes,
                typo_score, pre_matches, adjust_long,
                    weight_jaro, weight_typo, weight_winkler,
                        weight_winkler_typo, weight_longer)
Exemplo n.º 2
0
def all_metrics(string1, string2, longer_prob):

    pre_scale = 0.1
    typo_scale = 10

    ans = string_metrics(string1,
                         string2,
                         boost_threshold=0.7,
                         pre_len=4,
                         typo_table=adjwt,
                         typo_scale=typo_scale,
                         pre_scale=pre_scale,
                         longer_prob=longer_prob)

    (len1, len2, num_matches, half_transposes, typo_score, pre_matches,
     adjust_long) = ans
    assert not (longer_prob or adjust_long) or longer_prob

    weight_jaro = fn_jaro(len1, len2, num_matches, half_transposes, 0, 1)
    weight_typo = fn_jaro(len1, len2, num_matches, half_transposes, typo_score,
                          typo_scale)
    weight_winkler = fn_winkler(weight_jaro, pre_matches, pre_scale)
    weight_winkler_typo = fn_winkler(weight_typo, pre_matches, pre_scale)
    weight_longer = weight_winkler_typo

    if adjust_long:
        weight_longer = fn_longer(weight_longer, len1, len2, num_matches,
                                  pre_matches)

    return (len1, len2, num_matches, half_transposes, typo_score, pre_matches,
            adjust_long, weight_jaro, weight_typo, weight_winkler,
            weight_winkler_typo, weight_longer)
Exemplo n.º 3
0
def test():
    import jaro

    for test in jaro_tests:
        # s1, s2, m, t, jaro, wink = test
        s1, s2 = test[:2]

        string_metrics = jaro.string_metrics(s1, s2)
        (len1, len2, num_matches, half_transposes,
                          typo_score, pre_matches, adjust_long) = string_metrics

        weight_jaro = jaro.metric_jaro(s1, s2)
        weight_winkler = jaro.metric_jaro_winkler(s1, s2)
        weight_original = jaro.metric_original(s1, s2)
        # TODO: Test for the custom function?

        weights = [weight_jaro, weight_winkler, weight_original]

        check = [num_matches, half_transposes]
        check.extend(['%7.5f' % w for w in weights])

        if check != list(test[2:]):
            print
            print s1, s2
            print check
            print test[2:]
            raise AssertionError

        strings = []
        for s in [s1, s2]:
            if s.strip() == '':
                s = '-'*(len(s)+1)
            strings.append(s.ljust(12))
        for n in [num_matches, half_transposes]:
            strings.append(str(n).rjust(2))
        for w in weights:
            strings.append(' %7.5f' % w)

        print ' '.join(strings)
Exemplo n.º 4
0
def test():
    import jaro

    for test in jaro_tests:
        # s1, s2, m, t, jaro, wink = test
        s1, s2 = test[:2]

        string_metrics = jaro.string_metrics(s1, s2)
        (len1, len2, num_matches, half_transposes, typo_score, pre_matches,
         adjust_long) = string_metrics

        weight_jaro = jaro.metric_jaro(s1, s2)
        weight_winkler = jaro.metric_jaro_winkler(s1, s2)
        weight_original = jaro.metric_original(s1, s2)
        # TODO: Test for the custom function?

        weights = [weight_jaro, weight_winkler, weight_original]

        check = [num_matches, half_transposes]
        check.extend(['%7.5f' % w for w in weights])

        if check != list(test[2:]):
            print
            print s1, s2
            print check
            print test[2:]
            raise AssertionError

        strings = []
        for s in [s1, s2]:
            if s.strip() == '':
                s = '-' * (len(s) + 1)
            strings.append(s.ljust(12))
        for n in [num_matches, half_transposes]:
            strings.append(str(n).rjust(2))
        for w in weights:
            strings.append(' %7.5f' % w)

        print ' '.join(strings)