def partial_ratio(s1, s2): if s1 is None: raise TypeError("s1 is None") if s2 is None: raise TypeError("s2 is None") if len(s1) <= len(s2): shorter = s1; longer = s2; else: shorter = s2; longer = s1 m = SequenceMatcher(None, shorter, longer) blocks = m.get_matching_blocks() # each block represents a sequence of matching characters in a string # of the form (idx_1, idx_2, len) # the best partial match will block align with at least one of those blocks # e.g. shorter = "abcd", longer = XXXbcdeEEE # block = (1,3,3) # best score === ratio("abcd", "Xbcd") scores = [] for block in blocks: long_start = block[1] - block[0] if (block[1] - block[0]) > 0 else 0 long_end = long_start + len(shorter) long_substr = longer[long_start:long_end] m2 = SequenceMatcher(None, shorter, long_substr) r = m2.ratio() if r > .995: return 100 else: scores.append(r) return int(100 * max(scores))
def partial_ratio(s1, s2): if s1 is None: raise TypeError("s1 is None") if s2 is None: raise TypeError("s2 is None") if len(s1) <= len(s2): shorter = s1 longer = s2 else: shorter = s2 longer = s1 m = SequenceMatcher(None, shorter, longer) blocks = m.get_matching_blocks() # each block represents a sequence of matching characters in a string # of the form (idx_1, idx_2, len) # the best partial match will block align with at least one of those blocks # e.g. shorter = "abcd", longer = XXXbcdeEEE # block = (1,3,3) # best score === ratio("abcd", "Xbcd") scores = [] for block in blocks: long_start = block[1] - block[0] if (block[1] - block[0]) > 0 else 0 long_end = long_start + len(shorter) long_substr = longer[long_start:long_end] m2 = SequenceMatcher(None, shorter, long_substr) r = m2.ratio() if r > .995: return 100 else: scores.append(r) return int(100 * max(scores))
def ratio(s1, s2): if s1 is None: raise TypeError("s1 is None") if s2 is None: raise TypeError("s2 is None") m = SequenceMatcher(None, s1, s2) return intr(100 * m.ratio())
def seq_matcher(name1, name2): name1 = unicode( unicodedata.normalize('NFKD', name1).encode('ascii', 'ignore'), 'utf-8') name2 = unicode(name2, 'utf-8') name2 = unicode( unicodedata.normalize('NFKD', name2).encode('ascii', 'ignore'), 'utf-8') soundex = fuzzy.Soundex(4) name1 = soundex(name1) name2 = soundex(name2) # dmeta = fuzzy.DMetaphone() # name1 = dmeta(name1)[0] # name2 = dmeta(name2)[0] # name1 = fuzzy.nysiis(name1) # name2 = fuzzy.nysiis(name2) m = SequenceMatcher(None, name1, name2) # Calculate an edit distance"abcef" # print 'm',m.ratio() e = editdist.distance(name1, name2) # print 'e',e sm = StringMatcher(seq1=name1, seq2=name2) # return e # print sm.distance() return sm.distance()
def processText(self, text1, text2): if(text1 == text2) : return 0. String_test = StringMatcher() String_test.set_seqs(text1, text2) dist = String_test.distance() # dist = tds.levenshtein(text1,text2) return float(dist) / max(len(text1),len(text2))
def processText(self, text1, text2): if (text1 == text2): return 0. String_test = StringMatcher() String_test.set_seqs(text1, text2) dist = String_test.distance() # dist = tds.levenshtein(text1,text2) return float(dist) / max(len(text1), len(text2))
def ratio(s1, s2): if s1 is None: raise TypeError("s1 is None") if s2 is None: raise TypeError("s2 is None") s1, s2 = make_type_consistent(s1, s2) if len(s1) == 0 or len(s2) == 0: return 0 m = SequenceMatcher(None, s1, s2) return intr(100 * m.ratio())
def partial_ratio(s1, s2): """"Return the ratio of the most similar substring as a number between 0 and 100.""" if s1 is None: raise TypeError("s1 is None") if s2 is None: raise TypeError("s2 is None") s1, s2 = utils.make_type_consistent(s1, s2) if len(s1) == 0 or len(s2) == 0: return 0 if len(s1) <= len(s2): shorter = s1 longer = s2 else: shorter = s2 longer = s1 m = SequenceMatcher(None, shorter, longer) blocks = m.get_matching_blocks() # each block represents a sequence of matching characters in a string # of the form (idx_1, idx_2, len) # the best partial match will block align with at least one of those blocks # e.g. shorter = "abcd", longer = XXXbcdeEEE # block = (1,3,3) # best score === ratio("abcd", "Xbcd") scores = [] for block in blocks: long_start = block[1] - block[0] if (block[1] - block[0]) > 0 else 0 long_end = long_start + len(shorter) long_substr = longer[long_start:long_end] m2 = SequenceMatcher(None, shorter, long_substr) r = m2.ratio() if r > 0.995: return 100 else: scores.append(r) return int(100 * max(scores))
def processText(self, text1, text2): String_test = StringMatcher() String_test.set_seqs(text1,text2) return 1 - String_test.jaro()
def editdist(self,string1,string2): a=StringMatcher(None,string1.lower(),string2.lower()) return a.distance()
def processText(self, text1, text2): String_test = StringMatcher() String_test.set_seqs(text1, text2) return 1 - String_test.jaro()