def similarity2(a, b): """Calculates similarity of a multi-word strings.""" alist = filter(bool, _split_words_re.split(a.lower())) blist = filter(bool, _split_words_re.split(b.lower())) total = 0 score = 0.0 if len(alist) > len(blist): alist, blist = blist, alist for a in alist: ms = 0.0 mp = None for position, b in enumerate(blist): s = astrcmp(a, b) if s > ms: ms = s mp = position if mp is not None: score += ms if ms > 0.6: del blist[mp] total += 1 total += len(blist) * 0.4 if total: return score / total else: return 0
def similarity2(a, b): """Calculates similarity of a multi-word strings.""" alist = list(filter(bool, _split_words_re.split(a.lower()))) blist = list(filter(bool, _split_words_re.split(b.lower()))) total = 0 score = 0.0 if len(alist) > len(blist): alist, blist = blist, alist for a in alist: ms = 0.0 mp = None for position, b in enumerate(blist): s = astrcmp(a, b) if s > ms: ms = s mp = position if mp is not None: score += ms if ms > 0.6: del blist[mp] total += 1 total += len(blist) * 0.4 if total: return score / total else: return 0
def similarity2(a, b): """Calculates similarity of a multi-word strings.""" if not a or not b: return 0.0 if a == b: return 1.0 alist = list(filter(bool, _split_words_re.split(a.lower()))) blist = list(filter(bool, _split_words_re.split(b.lower()))) alen, blen = len(alist), len(blist) if not alen or not blen: return 0.0 if alen > blen: alist, blist = blist, alist alen, blen = blen, alen score = 0.0 for av in alist: ms = 0.0 mp = None for position, bv in enumerate(blist): s = astrcmp(av, bv) if s > ms: ms = s mp = position if mp is not None: score += ms if ms > 0.6: del blist[mp] # division by zero cannot happen, alen > 0 at this point return score / (alen + len(blist) * 0.4)
def similarity(a1, b1): """Calculates similarity of single words as a function of their edit distance.""" a2 = normalize(a1) if a2: b2 = normalize(b1) else: b2 = "" return astrcmp(a2, b2)