def filter_blends(word, first_blend, second_blend, limit="1237"): refined_first_blend, refined_second_blend = [], [] for w in first_blend: qua1 = metric.qualified("jw_sim", textdistance.jaro_winkler(word, w), True, False) qua2 = metric.qualified( "levenshtein_sim", textdistance.levenshtein.normalized_similarity(word, w), True, False) qua3 = metric.qualified("ro_sim", textdistance.ratcliff_obershelp(word, w), True, False) qua4 = metric.qualified("needleman_wunsch", textdistance.needleman_wunsch(word, w), True, False) qua5 = metric.qualified("smith_waterman", textdistance.smith_waterman(word, w), True, False) qua6 = metric.qualified("gotoh", textdistance.gotoh(word, w), True, False) qua7 = metric.qualified("strcmp95", textdistance.strcmp95(word, w), True, False) metric_pool = [qua1, qua2, qua3, qua4, qua5, qua6, qua7] statis = True for i in limit: statis &= metric_pool[int(i) - 1] if statis: refined_first_blend.append(w) for w in second_blend: qua1 = metric.qualified("jw_sim", textdistance.jaro_winkler(word, w), False, True) qua2 = metric.qualified( "levenshtein_sim", textdistance.levenshtein.normalized_similarity(word, w), False, True) qua3 = metric.qualified("ro_sim", textdistance.ratcliff_obershelp(word, w), False, True) qua4 = metric.qualified("needleman_wunsch", textdistance.needleman_wunsch(word, w), False, True) qua5 = metric.qualified("smith_waterman", textdistance.smith_waterman(word, w), False, True) qua6 = metric.qualified("gotoh", textdistance.gotoh(word, w), False, True) qua7 = metric.qualified("strcmp95", textdistance.strcmp95(word, w), False, True) metric_pool = [qua1, qua2, qua3, qua4, qua5, qua6, qua7] statis = True for i in limit: statis &= metric_pool[int(i) - 1] if statis: refined_second_blend.append(w) return refined_first_blend, refined_second_blend
def needleman_wunsch_of_blends(): blend1, blend2 = [], [] count = 0 with open("data/blends.txt", 'r') as f: for line in f: s = line.split() origin, first, second = s[0], s[1], s[2] blend1.append(textdistance.needleman_wunsch(origin, first)) blend2.append(textdistance.needleman_wunsch(origin, second)) count += 1 #print(textdistance.jaro_winkler()) x = np.array([i for i in range(count)]) y1 = np.array(blend1) y2 = np.array(blend2) plt.plot(x, y1, color="r", linestyle="-", marker="^", linewidth=1) plt.plot(x, y2, color="b", linestyle="-", marker="s", linewidth=1) plt.xlabel("x") plt.ylabel("y") plt.title("needleman_wunsch", fontsize=12, color='g') print("first blend: -2.5 ~ 5.0\nsecond blend: 0.0 ~ 8.0") plt.show()
def compare(self, str1, str2): if self.debug: self.log("needleman wunsch winkler comparison") self.start_time() self.result.distance = needleman_wunsch(str1, str2) self.end_time() self.result.nos = max(len(str1), len(str2)) self.result.threshold = 90 self.result.similarity = 100 - ( (100.0 / float(self.result.nos)) * (self.result.nos - self.result.distance)) return self.result
import yara import textdistance # Try Monk comparison and filter with YARA and then post the results here: https://github.com/Finch4/Monk/issues with the tag "Results" rule = yara.compile( source='rule foo: bar {strings: $a = "0d0000706f100000" condition: $a}') matches = rule.match(data=f'{open("YOUR_TXT.txt","r").readlines()}') for i in matches: print(matches) # Some of the algorithms in textdistance; actually there are 30+ jaro = textdistance.jaro_winkler("0a00027b02000004", "0a00027b02000004") jaccard = textdistance.jaccard("a", "a") cosine = textdistance.cosine("a", "a") wunsch = textdistance.needleman_wunsch("a", "a") mra = textdistance.mra("a", "a") print(f""" Jaro: {jaro}, Jaccard: {jaccard}, Cosine: {cosine}, Wunsch: {wunsch}, Mra: {mra} """)
def simple_example(): str1, str2 = 'test', 'text' qval = 2 #-------------------- # Edit-based. if True: print("textdistance.hamming({}, {}) = {}.".format( str1, str2, textdistance.hamming(str1, str2))) print("textdistance.hamming.distance({}, {}) = {}.".format( str1, str2, textdistance.hamming.distance(str1, str2))) print("textdistance.hamming.similarity({}, {}) = {}.".format( str1, str2, textdistance.hamming.similarity(str1, str2))) print("textdistance.hamming.normalized_distance({}, {}) = {}.".format( str1, str2, textdistance.hamming.normalized_distance(str1, str2))) print( "textdistance.hamming.normalized_similarity({}, {}) = {}.".format( str1, str2, textdistance.hamming.normalized_similarity(str1, str2))) print( "textdistance.Hamming(qval={}, test_func=None, truncate=False, external=True).distance({}, {}) = {}." .format( qval, str1, str2, textdistance.Hamming(qval=qval, test_func=None, truncate=False, external=True).distance(str1, str2))) print("textdistance.mlipns({}, {}) = {}.".format( str1, str2, textdistance.mlipns(str1, str2))) print("textdistance.mlipns.distance({}, {}) = {}.".format( str1, str2, textdistance.mlipns.distance(str1, str2))) print("textdistance.mlipns.similarity({}, {}) = {}.".format( str1, str2, textdistance.mlipns.similarity(str1, str2))) print("textdistance.mlipns.normalized_distance({}, {}) = {}.".format( str1, str2, textdistance.mlipns.normalized_distance(str1, str2))) print("textdistance.mlipns.normalized_similarity({}, {}) = {}.".format( str1, str2, textdistance.mlipns.normalized_similarity(str1, str2))) print( "textdistance.MLIPNS(threshold=0.25, maxmismatches=2, qval={}, external=True).distance({}, {}) = {}." .format( qval, str1, str2, textdistance.MLIPNS(threshold=0.25, maxmismatches=2, qval=qval, external=True).distance(str1, str2))) print("textdistance.levenshtein({}, {}) = {}.".format( str1, str2, textdistance.levenshtein(str1, str2))) print("textdistance.levenshtein.distance({}, {}) = {}.".format( str1, str2, textdistance.levenshtein.distance(str1, str2))) print("textdistance.levenshtein.similarity({}, {}) = {}.".format( str1, str2, textdistance.levenshtein.similarity(str1, str2))) print("textdistance.levenshtein.normalized_distance({}, {}) = {}.". format(str1, str2, textdistance.levenshtein.normalized_distance(str1, str2))) print("textdistance.levenshtein.normalized_similarity({}, {}) = {}.". format( str1, str2, textdistance.levenshtein.normalized_similarity(str1, str2))) print( "textdistance.Levenshtein(qval={}, test_func=None, external=True).distance({}, {}) = {}." .format( qval, str1, str2, textdistance.Levenshtein(qval=qval, test_func=None, external=True).distance(str1, str2))) print("textdistance.damerau_levenshtein({}, {}) = {}.".format( str1, str2, textdistance.damerau_levenshtein(str1, str2))) print("textdistance.damerau_levenshtein.distance({}, {}) = {}.".format( str1, str2, textdistance.damerau_levenshtein.distance(str1, str2))) print( "textdistance.damerau_levenshtein.similarity({}, {}) = {}.".format( str1, str2, textdistance.damerau_levenshtein.similarity(str1, str2))) print( "textdistance.damerau_levenshtein.normalized_distance({}, {}) = {}." .format( str1, str2, textdistance.damerau_levenshtein.normalized_distance( str1, str2))) print( "textdistance.damerau_levenshtein.normalized_similarity({}, {}) = {}." .format( str1, str2, textdistance.damerau_levenshtein.normalized_similarity( str1, str2))) print( "textdistance.DamerauLevenshtein(qval={}, test_func=None, external=True).distance({}, {}) = {}." .format( qval, str1, str2, textdistance.DamerauLevenshtein(qval=qval, test_func=None, external=True).distance( str1, str2))) print("textdistance.jaro({}, {}) = {}.".format( str1, str2, textdistance.jaro(str1, str2))) print("textdistance.jaro.distance({}, {}) = {}.".format( str1, str2, textdistance.jaro.distance(str1, str2))) print("textdistance.jaro.similarity({}, {}) = {}.".format( str1, str2, textdistance.jaro.similarity(str1, str2))) print("textdistance.jaro.normalized_distance({}, {}) = {}.".format( str1, str2, textdistance.jaro.normalized_distance(str1, str2))) print("textdistance.jaro.normalized_similarity({}, {}) = {}.".format( str1, str2, textdistance.jaro.normalized_similarity(str1, str2))) print( "textdistance.Jaro(long_tolerance=False, qval={}, external=True).distance({}, {}) = {}." .format( qval, str1, str2, textdistance.Jaro(long_tolerance=False, qval=qval, external=True).distance(str1, str2))) print("textdistance.jaro_winkler({}, {}) = {}.".format( str1, str2, textdistance.jaro_winkler(str1, str2))) print("textdistance.jaro_winkler.distance({}, {}) = {}.".format( str1, str2, textdistance.jaro_winkler.distance(str1, str2))) print("textdistance.jaro_winkler.similarity({}, {}) = {}.".format( str1, str2, textdistance.jaro_winkler.similarity(str1, str2))) print("textdistance.jaro_winkler.normalized_distance({}, {}) = {}.". format(str1, str2, textdistance.jaro_winkler.normalized_distance(str1, str2))) print("textdistance.jaro_winkler.normalized_similarity({}, {}) = {}.". format( str1, str2, textdistance.jaro_winkler.normalized_similarity(str1, str2))) print( "textdistance.JaroWinkler(long_tolerance=False, winklerize=True, qval={}, external=True).distance({}, {}) = {}." .format( qval, str1, str2, textdistance.JaroWinkler(long_tolerance=False, winklerize=True, qval=qval, external=True).distance(str1, str2))) print("textdistance.strcmp95({}, {}) = {}.".format( str1, str2, textdistance.strcmp95(str1, str2))) print("textdistance.strcmp95.distance({}, {}) = {}.".format( str1, str2, textdistance.strcmp95.distance(str1, str2))) print("textdistance.strcmp95.similarity({}, {}) = {}.".format( str1, str2, textdistance.strcmp95.similarity(str1, str2))) print("textdistance.strcmp95.normalized_distance({}, {}) = {}.".format( str1, str2, textdistance.strcmp95.normalized_distance(str1, str2))) print( "textdistance.strcmp95.normalized_similarity({}, {}) = {}.".format( str1, str2, textdistance.strcmp95.normalized_similarity(str1, str2))) print( "textdistance.StrCmp95(long_strings=False, external=True).distance({}, {}) = {}." .format( str1, str2, textdistance.StrCmp95(long_strings=False, external=True).distance(str1, str2))) print("textdistance.needleman_wunsch({}, {}) = {}.".format( str1, str2, textdistance.needleman_wunsch(str1, str2))) print("textdistance.needleman_wunsch.distance({}, {}) = {}.".format( str1, str2, textdistance.needleman_wunsch.distance(str1, str2))) print("textdistance.needleman_wunsch.similarity({}, {}) = {}.".format( str1, str2, textdistance.needleman_wunsch.similarity(str1, str2))) print( "textdistance.needleman_wunsch.normalized_distance({}, {}) = {}.". format( str1, str2, textdistance.needleman_wunsch.normalized_distance(str1, str2))) print( "textdistance.needleman_wunsch.normalized_similarity({}, {}) = {}." .format( str1, str2, textdistance.needleman_wunsch.normalized_similarity( str1, str2))) print( "textdistance.NeedlemanWunsch(gap_cost=1.0, sim_func=None, qval={}, external=True).distance({}, {}) = {}." .format( qval, str1, str2, textdistance.NeedlemanWunsch(gap_cost=1.0, sim_func=None, qval=qval, external=True).distance( str1, str2))) print("textdistance.gotoh({}, {}) = {}.".format( str1, str2, textdistance.gotoh(str1, str2))) print("textdistance.gotoh.distance({}, {}) = {}.".format( str1, str2, textdistance.gotoh.distance(str1, str2))) print("textdistance.gotoh.similarity({}, {}) = {}.".format( str1, str2, textdistance.gotoh.similarity(str1, str2))) print("textdistance.gotoh.normalized_distance({}, {}) = {}.".format( str1, str2, textdistance.gotoh.normalized_distance(str1, str2))) print("textdistance.gotoh.normalized_similarity({}, {}) = {}.".format( str1, str2, textdistance.gotoh.normalized_similarity(str1, str2))) print( "textdistance.Gotoh(gap_open=1, gap_ext=0.4, sim_func=None, qval={}, external=True).distance({}, {}) = {}." .format( qval, str1, str2, textdistance.Gotoh(gap_open=1, gap_ext=0.4, sim_func=None, qval=qval, external=True).distance(str1, str2))) print("textdistance.smith_waterman({}, {}) = {}.".format( str1, str2, textdistance.smith_waterman(str1, str2))) print("textdistance.smith_waterman.distance({}, {}) = {}.".format( str1, str2, textdistance.smith_waterman.distance(str1, str2))) print("textdistance.smith_waterman.similarity({}, {}) = {}.".format( str1, str2, textdistance.smith_waterman.similarity(str1, str2))) print("textdistance.smith_waterman.normalized_distance({}, {}) = {}.". format( str1, str2, textdistance.smith_waterman.normalized_distance(str1, str2))) print( "textdistance.smith_waterman.normalized_similarity({}, {}) = {}.". format( str1, str2, textdistance.smith_waterman.normalized_similarity(str1, str2))) print( "textdistance.SmithWaterman(gap_cost=1.0, sim_func=None, qval={}, external=True).distance({}, {}) = {}." .format( qval, str1, str2, textdistance.SmithWaterman(gap_cost=1.0, sim_func=None, qval=qval, external=True).distance(str1, str2))) #-------------------- # Token-based. if False: print("textdistance.jaccard({}, {}) = {}.".format( str1, str2, textdistance.jaccard(str1, str2))) print("textdistance.jaccard.distance({}, {}) = {}.".format( str1, str2, textdistance.jaccard.distance(str1, str2))) print("textdistance.jaccard.similarity({}, {}) = {}.".format( str1, str2, textdistance.jaccard.similarity(str1, str2))) print("textdistance.jaccard.normalized_distance({}, {}) = {}.".format( str1, str2, textdistance.jaccard.normalized_distance(str1, str2))) print( "textdistance.jaccard.normalized_similarity({}, {}) = {}.".format( str1, str2, textdistance.jaccard.normalized_similarity(str1, str2))) print( "textdistance.Jaccard(qval={}, as_set=False, external=True).distance({}, {}) = {}." .format( qval, str1, str2, textdistance.Jaccard(qval=qval, as_set=False, external=True).distance(str1, str2))) print("textdistance.sorensen({}, {}) = {}.".format( str1, str2, textdistance.sorensen(str1, str2))) print("textdistance.sorensen.distance({}, {}) = {}.".format( str1, str2, textdistance.sorensen.distance(str1, str2))) print("textdistance.sorensen.similarity({}, {}) = {}.".format( str1, str2, textdistance.sorensen.similarity(str1, str2))) print("textdistance.sorensen.normalized_distance({}, {}) = {}.".format( str1, str2, textdistance.sorensen.normalized_distance(str1, str2))) print( "textdistance.sorensen.normalized_similarity({}, {}) = {}.".format( str1, str2, textdistance.sorensen.normalized_similarity(str1, str2))) print( "textdistance.Sorensen(qval={}, as_set=False, external=True).distance({}, {}) = {}." .format( qval, str1, str2, textdistance.Sorensen(qval=qval, as_set=False, external=True).distance(str1, str2))) print("textdistance.sorensen_dice({}, {}) = {}.".format( str1, str2, textdistance.sorensen_dice(str1, str2))) print("textdistance.sorensen_dice.distance({}, {}) = {}.".format( str1, str2, textdistance.sorensen_dice.distance(str1, str2))) print("textdistance.sorensen_dice.similarity({}, {}) = {}.".format( str1, str2, textdistance.sorensen_dice.similarity(str1, str2))) print("textdistance.sorensen_dice.normalized_distance({}, {}) = {}.". format( str1, str2, textdistance.sorensen_dice.normalized_distance(str1, str2))) print("textdistance.sorensen_dice.normalized_similarity({}, {}) = {}.". format( str1, str2, textdistance.sorensen_dice.normalized_similarity(str1, str2))) #print("textdistance.SorensenDice().distance({}, {}) = {}.".format(str1, str2, textdistance.SorensenDice().distance(str1, str2))) print("textdistance.tversky({}, {}) = {}.".format( str1, str2, textdistance.tversky(str1, str2))) print("textdistance.tversky.distance({}, {}) = {}.".format( str1, str2, textdistance.tversky.distance(str1, str2))) print("textdistance.tversky.similarity({}, {}) = {}.".format( str1, str2, textdistance.tversky.similarity(str1, str2))) print("textdistance.tversky.normalized_distance({}, {}) = {}.".format( str1, str2, textdistance.tversky.normalized_distance(str1, str2))) print( "textdistance.tversky.normalized_similarity({}, {}) = {}.".format( str1, str2, textdistance.tversky.normalized_similarity(str1, str2))) print( "textdistance.Tversky(qval={}, ks=None, bias=None, as_set=False, external=True).distance({}, {}) = {}." .format( qval, str1, str2, textdistance.Tversky(qval=qval, ks=None, bias=None, as_set=False, external=True).distance(str1, str2))) print("textdistance.overlap({}, {}) = {}.".format( str1, str2, textdistance.overlap(str1, str2))) print("textdistance.overlap.distance({}, {}) = {}.".format( str1, str2, textdistance.overlap.distance(str1, str2))) print("textdistance.overlap.similarity({}, {}) = {}.".format( str1, str2, textdistance.overlap.similarity(str1, str2))) print("textdistance.overlap.normalized_distance({}, {}) = {}.".format( str1, str2, textdistance.overlap.normalized_distance(str1, str2))) print( "textdistance.overlap.normalized_similarity({}, {}) = {}.".format( str1, str2, textdistance.overlap.normalized_similarity(str1, str2))) print( "textdistance.Overlap(qval={}, as_set=False, external=True).distance({}, {}) = {}." .format( qval, str1, str2, textdistance.Overlap(qval=qval, as_set=False, external=True).distance(str1, str2))) # This is identical to the Jaccard similarity coefficient and the Tversky index for alpha=1 and beta=1. print("textdistance.tanimoto({}, {}) = {}.".format( str1, str2, textdistance.tanimoto(str1, str2))) print("textdistance.tanimoto.distance({}, {}) = {}.".format( str1, str2, textdistance.tanimoto.distance(str1, str2))) print("textdistance.tanimoto.similarity({}, {}) = {}.".format( str1, str2, textdistance.tanimoto.similarity(str1, str2))) print("textdistance.tanimoto.normalized_distance({}, {}) = {}.".format( str1, str2, textdistance.tanimoto.normalized_distance(str1, str2))) print( "textdistance.tanimoto.normalized_similarity({}, {}) = {}.".format( str1, str2, textdistance.tanimoto.normalized_similarity(str1, str2))) print( "textdistance.Tanimoto(qval={}, as_set=False, external=True).distance({}, {}) = {}." .format( qval, str1, str2, textdistance.Tanimoto(qval=qval, as_set=False, external=True).distance(str1, str2))) print("textdistance.cosine({}, {}) = {}.".format( str1, str2, textdistance.cosine(str1, str2))) print("textdistance.cosine.distance({}, {}) = {}.".format( str1, str2, textdistance.cosine.distance(str1, str2))) print("textdistance.cosine.similarity({}, {}) = {}.".format( str1, str2, textdistance.cosine.similarity(str1, str2))) print("textdistance.cosine.normalized_distance({}, {}) = {}.".format( str1, str2, textdistance.cosine.normalized_distance(str1, str2))) print("textdistance.cosine.normalized_similarity({}, {}) = {}.".format( str1, str2, textdistance.cosine.normalized_similarity(str1, str2))) print( "textdistance.Cosine(qval={}, as_set=False, external=True).distance({}, {}) = {}." .format( qval, str1, str2, textdistance.Cosine(qval=qval, as_set=False, external=True).distance(str1, str2))) print("textdistance.monge_elkan({}, {}) = {}.".format( str1, str2, textdistance.monge_elkan(str1, str2))) print("textdistance.monge_elkan.distance({}, {}) = {}.".format( str1, str2, textdistance.monge_elkan.distance(str1, str2))) print("textdistance.monge_elkan.similarity({}, {}) = {}.".format( str1, str2, textdistance.monge_elkan.similarity(str1, str2))) print("textdistance.monge_elkan.normalized_distance({}, {}) = {}.". format(str1, str2, textdistance.monge_elkan.normalized_distance(str1, str2))) print("textdistance.monge_elkan.normalized_similarity({}, {}) = {}.". format( str1, str2, textdistance.monge_elkan.normalized_similarity(str1, str2))) print( "textdistance.MongeElkan(algorithm=textdistance.DamerauLevenshtein(), symmetric=False, qval={}, external=True).distance({}, {}) = {}." .format( qval, str1, str2, textdistance.MongeElkan( algorithm=textdistance.DamerauLevenshtein(), symmetric=False, qval=qval, external=True).distance(str1, str2))) print("textdistance.bag({}, {}) = {}.".format( str1, str2, textdistance.bag(str1, str2))) print("textdistance.bag.distance({}, {}) = {}.".format( str1, str2, textdistance.bag.distance(str1, str2))) print("textdistance.bag.similarity({}, {}) = {}.".format( str1, str2, textdistance.bag.similarity(str1, str2))) print("textdistance.bag.normalized_distance({}, {}) = {}.".format( str1, str2, textdistance.bag.normalized_distance(str1, str2))) print("textdistance.bag.normalized_similarity({}, {}) = {}.".format( str1, str2, textdistance.bag.normalized_similarity(str1, str2))) print("textdistance.Bag(qval={}).distance({}, {}) = {}.".format( qval, str1, str2, textdistance.Bag(qval=qval).distance(str1, str2))) #-------------------- # Sequence-based. if False: print("textdistance.lcsseq({}, {}) = {}.".format( str1, str2, textdistance.lcsseq(str1, str2))) print("textdistance.lcsseq.distance({}, {}) = {}.".format( str1, str2, textdistance.lcsseq.distance(str1, str2))) print("textdistance.lcsseq.similarity({}, {}) = {}.".format( str1, str2, textdistance.lcsseq.similarity(str1, str2))) print("textdistance.lcsseq.normalized_distance({}, {}) = {}.".format( str1, str2, textdistance.lcsseq.normalized_distance(str1, str2))) print("textdistance.lcsseq.normalized_similarity({}, {}) = {}.".format( str1, str2, textdistance.lcsseq.normalized_similarity(str1, str2))) #print("textdistance.LCSSeq(qval={}, test_func=None, external=True).distance({}, {}) = {}.".format(qval, str1, str2, textdistance.LCSSeq(qval=qval, test_func=None, external=True).distance(str1, str2))) print("textdistance.LCSSeq().distance({}, {}) = {}.".format( str1, str2, textdistance.LCSSeq().distance(str1, str2))) print("textdistance.lcsstr({}, {}) = {}.".format( str1, str2, textdistance.lcsstr(str1, str2))) print("textdistance.lcsstr.distance({}, {}) = {}.".format( str1, str2, textdistance.lcsstr.distance(str1, str2))) print("textdistance.lcsstr.similarity({}, {}) = {}.".format( str1, str2, textdistance.lcsstr.similarity(str1, str2))) print("textdistance.lcsstr.normalized_distance({}, {}) = {}.".format( str1, str2, textdistance.lcsstr.normalized_distance(str1, str2))) print("textdistance.lcsstr.normalized_similarity({}, {}) = {}.".format( str1, str2, textdistance.lcsstr.normalized_similarity(str1, str2))) print("textdistance.LCSStr(qval={}).distance({}, {}) = {}.".format( qval, str1, str2, textdistance.LCSStr(qval=qval).distance(str1, str2))) print("textdistance.ratcliff_obershelp({}, {}) = {}.".format( str1, str2, textdistance.ratcliff_obershelp(str1, str2))) print("textdistance.ratcliff_obershelp.distance({}, {}) = {}.".format( str1, str2, textdistance.ratcliff_obershelp.distance(str1, str2))) print( "textdistance.ratcliff_obershelp.similarity({}, {}) = {}.".format( str1, str2, textdistance.ratcliff_obershelp.similarity(str1, str2))) print( "textdistance.ratcliff_obershelp.normalized_distance({}, {}) = {}." .format( str1, str2, textdistance.ratcliff_obershelp.normalized_distance( str1, str2))) print( "textdistance.ratcliff_obershelp.normalized_similarity({}, {}) = {}." .format( str1, str2, textdistance.ratcliff_obershelp.normalized_similarity( str1, str2))) print("textdistance.RatcliffObershelp().distance({}, {}) = {}.".format( str1, str2, textdistance.RatcliffObershelp().distance(str1, str2))) #-------------------- # Compression-based. if False: print("textdistance.arith_ncd({}, {}) = {}.".format( str1, str2, textdistance.arith_ncd(str1, str2))) print("textdistance.arith_ncd.distance({}, {}) = {}.".format( str1, str2, textdistance.arith_ncd.distance(str1, str2))) print("textdistance.arith_ncd.similarity({}, {}) = {}.".format( str1, str2, textdistance.arith_ncd.similarity(str1, str2))) print( "textdistance.arith_ncd.normalized_distance({}, {}) = {}.".format( str1, str2, textdistance.arith_ncd.normalized_distance(str1, str2))) print("textdistance.arith_ncd.normalized_similarity({}, {}) = {}.". format(str1, str2, textdistance.arith_ncd.normalized_similarity(str1, str2))) #print("textdistance.ArithNCD(base=2, terminator=None, qval={}).distance({}, {}) = {}.".format(qval, str1, str2, textdistance.ArithNCD(base=2, terminator=None, qval=qval).distance(str1, str2))) print("textdistance.ArithNCD().distance({}, {}) = {}.".format( str1, str2, textdistance.ArithNCD().distance(str1, str2))) print("textdistance.rle_ncd({}, {}) = {}.".format( str1, str2, textdistance.rle_ncd(str1, str2))) print("textdistance.rle_ncd.distance({}, {}) = {}.".format( str1, str2, textdistance.rle_ncd.distance(str1, str2))) print("textdistance.rle_ncd.similarity({}, {}) = {}.".format( str1, str2, textdistance.rle_ncd.similarity(str1, str2))) print("textdistance.rle_ncd.normalized_distance({}, {}) = {}.".format( str1, str2, textdistance.rle_ncd.normalized_distance(str1, str2))) print( "textdistance.rle_ncd.normalized_similarity({}, {}) = {}.".format( str1, str2, textdistance.rle_ncd.normalized_similarity(str1, str2))) print("textdistance.RLENCD().distance({}, {}) = {}.".format( str1, str2, textdistance.RLENCD().distance(str1, str2))) print("textdistance.bwtrle_ncd({}, {}) = {}.".format( str1, str2, textdistance.bwtrle_ncd(str1, str2))) print("textdistance.bwtrle_ncd.distance({}, {}) = {}.".format( str1, str2, textdistance.bwtrle_ncd.distance(str1, str2))) print("textdistance.bwtrle_ncd.similarity({}, {}) = {}.".format( str1, str2, textdistance.bwtrle_ncd.similarity(str1, str2))) print( "textdistance.bwtrle_ncd.normalized_distance({}, {}) = {}.".format( str1, str2, textdistance.bwtrle_ncd.normalized_distance(str1, str2))) print("textdistance.bwtrle_ncd.normalized_similarity({}, {}) = {}.". format(str1, str2, textdistance.bwtrle_ncd.normalized_similarity(str1, str2))) print("textdistance.BWTRLENCD(terminator='\0').distance({}, {}) = {}.". format( str1, str2, textdistance.BWTRLENCD(terminator='\0').distance(str1, str2))) print("textdistance.sqrt_ncd({}, {}) = {}.".format( str1, str2, textdistance.sqrt_ncd(str1, str2))) print("textdistance.sqrt_ncd.distance({}, {}) = {}.".format( str1, str2, textdistance.sqrt_ncd.distance(str1, str2))) print("textdistance.sqrt_ncd.similarity({}, {}) = {}.".format( str1, str2, textdistance.sqrt_ncd.similarity(str1, str2))) print("textdistance.sqrt_ncd.normalized_distance({}, {}) = {}.".format( str1, str2, textdistance.sqrt_ncd.normalized_distance(str1, str2))) print( "textdistance.sqrt_ncd.normalized_similarity({}, {}) = {}.".format( str1, str2, textdistance.sqrt_ncd.normalized_similarity(str1, str2))) print("textdistance.SqrtNCD(qval={}).distance({}, {}) = {}.".format( qval, str1, str2, textdistance.SqrtNCD(qval=qval).distance(str1, str2))) print("textdistance.entropy_ncd({}, {}) = {}.".format( str1, str2, textdistance.entropy_ncd(str1, str2))) print("textdistance.entropy_ncd.distance({}, {}) = {}.".format( str1, str2, textdistance.entropy_ncd.distance(str1, str2))) print("textdistance.entropy_ncd.similarity({}, {}) = {}.".format( str1, str2, textdistance.entropy_ncd.similarity(str1, str2))) print("textdistance.entropy_ncd.normalized_distance({}, {}) = {}.". format(str1, str2, textdistance.entropy_ncd.normalized_distance(str1, str2))) print("textdistance.entropy_ncd.normalized_similarity({}, {}) = {}.". format( str1, str2, textdistance.entropy_ncd.normalized_similarity(str1, str2))) print( "textdistance.EntropyNCD(qval={}, coef=1, base=2).distance({}, {}) = {}." .format( qval, str1, str2, textdistance.EntropyNCD(qval=qval, coef=1, base=2).distance(str1, str2))) print("textdistance.bz2_ncd({}, {}) = {}.".format( str1, str2, textdistance.bz2_ncd(str1, str2))) print("textdistance.bz2_ncd.distance({}, {}) = {}.".format( str1, str2, textdistance.bz2_ncd.distance(str1, str2))) print("textdistance.bz2_ncd.similarity({}, {}) = {}.".format( str1, str2, textdistance.bz2_ncd.similarity(str1, str2))) print("textdistance.bz2_ncd.normalized_distance({}, {}) = {}.".format( str1, str2, textdistance.bz2_ncd.normalized_distance(str1, str2))) print( "textdistance.bz2_ncd.normalized_similarity({}, {}) = {}.".format( str1, str2, textdistance.bz2_ncd.normalized_similarity(str1, str2))) print("textdistance.BZ2NCD().distance({}, {}) = {}.".format( str1, str2, textdistance.BZ2NCD().distance(str1, str2))) print("textdistance.lzma_ncd({}, {}) = {}.".format( str1, str2, textdistance.lzma_ncd(str1, str2))) print("textdistance.lzma_ncd.distance({}, {}) = {}.".format( str1, str2, textdistance.lzma_ncd.distance(str1, str2))) print("textdistance.lzma_ncd.similarity({}, {}) = {}.".format( str1, str2, textdistance.lzma_ncd.similarity(str1, str2))) print("textdistance.lzma_ncd.normalized_distance({}, {}) = {}.".format( str1, str2, textdistance.lzma_ncd.normalized_distance(str1, str2))) print( "textdistance.lzma_ncd.normalized_similarity({}, {}) = {}.".format( str1, str2, textdistance.lzma_ncd.normalized_similarity(str1, str2))) print("textdistance.LZMANCD().distance({}, {}) = {}.".format( str1, str2, textdistance.LZMANCD().distance(str1, str2))) print("textdistance.zlib_ncd({}, {}) = {}.".format( str1, str2, textdistance.zlib_ncd(str1, str2))) print("textdistance.zlib_ncd.distance({}, {}) = {}.".format( str1, str2, textdistance.zlib_ncd.distance(str1, str2))) print("textdistance.zlib_ncd.similarity({}, {}) = {}.".format( str1, str2, textdistance.zlib_ncd.similarity(str1, str2))) print("textdistance.zlib_ncd.normalized_distance({}, {}) = {}.".format( str1, str2, textdistance.zlib_ncd.normalized_distance(str1, str2))) print( "textdistance.zlib_ncd.normalized_similarity({}, {}) = {}.".format( str1, str2, textdistance.zlib_ncd.normalized_similarity(str1, str2))) print("textdistance.ZLIBNCD().distance({}, {}) = {}.".format( str1, str2, textdistance.ZLIBNCD().distance(str1, str2))) #-------------------- # Phonetic. if False: print("textdistance.mra({}, {}) = {}.".format( str1, str2, textdistance.mra(str1, str2))) print("textdistance.mra.distance({}, {}) = {}.".format( str1, str2, textdistance.mra.distance(str1, str2))) print("textdistance.mra.similarity({}, {}) = {}.".format( str1, str2, textdistance.mra.similarity(str1, str2))) print("textdistance.mra.normalized_distance({}, {}) = {}.".format( str1, str2, textdistance.mra.normalized_distance(str1, str2))) print("textdistance.mra.normalized_similarity({}, {}) = {}.".format( str1, str2, textdistance.mra.normalized_similarity(str1, str2))) print("textdistance.MRA().distance({}, {}) = {}.".format( str1, str2, textdistance.MRA().distance(str1, str2))) print("textdistance.editex({}, {}) = {}.".format( str1, str2, textdistance.editex(str1, str2))) print("textdistance.editex.distance({}, {}) = {}.".format( str1, str2, textdistance.editex.distance(str1, str2))) print("textdistance.editex.similarity({}, {}) = {}.".format( str1, str2, textdistance.editex.similarity(str1, str2))) print("textdistance.editex.normalized_distance({}, {}) = {}.".format( str1, str2, textdistance.editex.normalized_distance(str1, str2))) print("textdistance.editex.normalized_similarity({}, {}) = {}.".format( str1, str2, textdistance.editex.normalized_similarity(str1, str2))) print( "textdistance.Editex(local=False, match_cost=0, group_cost=1, mismatch_cost=2, groups=None, ungrouped=None, external=True).distance({}, {}) = {}." .format( str1, str2, textdistance.Editex(local=False, match_cost=0, group_cost=1, mismatch_cost=2, groups=None, ungrouped=None, external=True).distance(str1, str2))) #-------------------- # Simple. if False: print("textdistance.prefix({}, {}) = {}.".format( str1, str2, textdistance.prefix(str1, str2))) print("textdistance.prefix.distance({}, {}) = {}.".format( str1, str2, textdistance.prefix.distance(str1, str2))) print("textdistance.prefix.similarity({}, {}) = {}.".format( str1, str2, textdistance.prefix.similarity(str1, str2))) print("textdistance.prefix.normalized_distance({}, {}) = {}.".format( str1, str2, textdistance.prefix.normalized_distance(str1, str2))) print("textdistance.prefix.normalized_similarity({}, {}) = {}.".format( str1, str2, textdistance.prefix.normalized_similarity(str1, str2))) print( "textdistance.Prefix(qval={}, sim_test=None).distance({}, {}) = {}." .format( qval, str1, str2, textdistance.Prefix(qval=qval, sim_test=None).distance(str1, str2))) print("textdistance.postfix({}, {}) = {}.".format( str1, str2, textdistance.postfix(str1, str2))) print("textdistance.postfix.distance({}, {}) = {}.".format( str1, str2, textdistance.postfix.distance(str1, str2))) print("textdistance.postfix.similarity({}, {}) = {}.".format( str1, str2, textdistance.postfix.similarity(str1, str2))) print("textdistance.postfix.normalized_distance({}, {}) = {}.".format( str1, str2, textdistance.postfix.normalized_distance(str1, str2))) print( "textdistance.postfix.normalized_similarity({}, {}) = {}.".format( str1, str2, textdistance.postfix.normalized_similarity(str1, str2))) #print("textdistance.Postfix(qval={}, sim_test=None).distance({}, {}) = {}.".format(qval, str1, str2, textdistance.Postfix(qval=qval, sim_test=None).distance(str1, str2))) print("textdistance.Postfix().distance({}, {}) = {}.".format( str1, str2, textdistance.Postfix().distance(str1, str2))) print("textdistance.length({}, {}) = {}.".format( str1, str2, textdistance.length(str1, str2))) print("textdistance.length.distance({}, {}) = {}.".format( str1, str2, textdistance.length.distance(str1, str2))) print("textdistance.length.similarity({}, {}) = {}.".format( str1, str2, textdistance.length.similarity(str1, str2))) print("textdistance.length.normalized_distance({}, {}) = {}.".format( str1, str2, textdistance.length.normalized_distance(str1, str2))) print("textdistance.length.normalized_similarity({}, {}) = {}.".format( str1, str2, textdistance.length.normalized_similarity(str1, str2))) print("textdistance.Length().distance({}, {}) = {}.".format( str1, str2, textdistance.Length().distance(str1, str2))) print("textdistance.identity({}, {}) = {}.".format( str1, str2, textdistance.identity(str1, str2))) print("textdistance.identity.distance({}, {}) = {}.".format( str1, str2, textdistance.identity.distance(str1, str2))) print("textdistance.identity.similarity({}, {}) = {}.".format( str1, str2, textdistance.identity.similarity(str1, str2))) print("textdistance.identity.normalized_distance({}, {}) = {}.".format( str1, str2, textdistance.identity.normalized_distance(str1, str2))) print( "textdistance.identity.normalized_similarity({}, {}) = {}.".format( str1, str2, textdistance.identity.normalized_similarity(str1, str2))) print("textdistance.Identity().distance({}, {}) = {}.".format( str1, str2, textdistance.Identity().distance(str1, str2))) print("textdistance.matrix({}, {}) = {}.".format( str1, str2, textdistance.matrix(str1, str2))) print("textdistance.matrix.distance({}, {}) = {}.".format( str1, str2, textdistance.matrix.distance(str1, str2))) print("textdistance.matrix.similarity({}, {}) = {}.".format( str1, str2, textdistance.matrix.similarity(str1, str2))) print("textdistance.matrix.normalized_distance({}, {}) = {}.".format( str1, str2, textdistance.matrix.normalized_distance(str1, str2))) print("textdistance.matrix.normalized_similarity({}, {}) = {}.".format( str1, str2, textdistance.matrix.normalized_similarity(str1, str2))) print( "textdistance.Matrix(mat=None, mismatch_cost=0, match_cost=1, symmetric=True, external=True).distance({}, {}) = {}." .format( str1, str2, textdistance.Matrix(mat=None, mismatch_cost=0, match_cost=1, symmetric=True, external=True).distance(str1, str2)))