Пример #1
0
def similarity2(a, b):
    """Calculates similarity of a multi-word strings."""
    alist = filter(bool, _split_words_re.split(a.lower()))
    blist = filter(bool, _split_words_re.split(b.lower()))
    total = 0
    score = 0.0
    if len(alist) > len(blist):
        alist, blist = blist, alist
    for a in alist:
        ms = 0.0
        mp = None
        for position, b in enumerate(blist):
            s = astrcmp(a, b)
            if s > ms:
                ms = s
                mp = position
        if mp is not None:
            score += ms
            if ms > 0.6:
                del blist[mp]
        total += 1
    total += len(blist) * 0.4
    if total:
        return score / total
    else:
        return 0
Пример #2
0
def similarity2(a, b):
    """Calculates similarity of a multi-word strings."""
    alist = list(filter(bool, _split_words_re.split(a.lower())))
    blist = list(filter(bool, _split_words_re.split(b.lower())))
    total = 0
    score = 0.0
    if len(alist) > len(blist):
        alist, blist = blist, alist
    for a in alist:
        ms = 0.0
        mp = None
        for position, b in enumerate(blist):
            s = astrcmp(a, b)
            if s > ms:
                ms = s
                mp = position
        if mp is not None:
            score += ms
            if ms > 0.6:
                del blist[mp]
        total += 1
    total += len(blist) * 0.4
    if total:
        return score / total
    else:
        return 0
Пример #3
0
def similarity2(a, b):
    """Calculates similarity of a multi-word strings."""
    if not a or not b:
        return 0.0
    if a == b:
        return 1.0

    alist = list(filter(bool, _split_words_re.split(a.lower())))
    blist = list(filter(bool, _split_words_re.split(b.lower())))

    alen, blen = len(alist), len(blist)
    if not alen or not blen:
        return 0.0
    if alen > blen:
        alist, blist = blist, alist
        alen, blen = blen, alen

    score = 0.0
    for av in alist:
        ms = 0.0
        mp = None
        for position, bv in enumerate(blist):
            s = astrcmp(av, bv)
            if s > ms:
                ms = s
                mp = position
        if mp is not None:
            score += ms
            if ms > 0.6:
                del blist[mp]

    # division by zero cannot happen, alen > 0 at this point
    return score / (alen + len(blist) * 0.4)
Пример #4
0
def similarity(a1, b1):
    """Calculates similarity of single words as a function of their edit distance."""
    a2 = normalize(a1)
    if a2:
        b2 = normalize(b1)
    else:
        b2 = ""
    return astrcmp(a2, b2)
Пример #5
0
def similarity(a1, b1):
    """Calculates similarity of single words as a function of their edit distance."""
    a2 = normalize(a1)
    if a2:
        b2 = normalize(b1)
    else:
        b2 = ""
    return astrcmp(a2, b2)