def test_valid_input(self): """Algorithm should return correct values under valid input""" self.assertAlmostEqual(fuzzycomp.jaro_distance("MARTHA", "MARHTA"), 0.944, places=3) self.assertAlmostEqual(fuzzycomp.jaro_distance("DWAYNE", "DUANE"), 0.822, places=3) self.assertAlmostEqual(fuzzycomp.jaro_distance("DIXON", "DICKSONX"), 0.767, places=3)
def test_iterable_input(self): """Function should raise ValueError if passed non string input""" self.assertAlmostEqual( fuzzycomp.jaro_distance(["M", "A", "R", "T", "H", "A"], ["M", "A", "R", "H", "T", "A"]), 0.944, places=3 ) self.assertAlmostEqual( fuzzycomp.jaro_distance(["D", "W", "A", "Y", "N", "E"], ["D", "U", "A", "N", "E"]), 0.822, places=3 ) self.assertAlmostEqual( fuzzycomp.jaro_distance(["D", "I", "X", "O", "N"], ["D", "I", "C", "K", "S", "O", "N", "X"]), 0.767, places=3, ) self.assertAlmostEqual( fuzzycomp.jaro_distance(("M", "A", "R", "T", "H", "A"), ("M", "A", "R", "H", "T", "A")), 0.944, places=3 ) self.assertAlmostEqual( fuzzycomp.jaro_distance(("D", "W", "A", "Y", "N", "E"), ("D", "U", "A", "N", "E")), 0.822, places=3 ) self.assertAlmostEqual( fuzzycomp.jaro_distance(("D", "I", "X", "O", "N"), ("D", "I", "C", "K", "S", "O", "N", "X")), 0.767, places=3, )
def test_iterable_input(self): """Function should raise ValueError if passed non string input""" self.assertAlmostEqual(fuzzycomp.jaro_distance( ["M", "A", "R", "T", "H", "A"], ["M", "A", "R", "H", "T", "A"]), 0.944, places=3) self.assertAlmostEqual(fuzzycomp.jaro_distance( ["D", "W", "A", "Y", "N", "E"], ["D", "U", "A", "N", "E"]), 0.822, places=3) self.assertAlmostEqual( fuzzycomp.jaro_distance(["D", "I", "X", "O", "N"], ["D", "I", "C", "K", "S", "O", "N", "X"]), 0.767, places=3) self.assertAlmostEqual(fuzzycomp.jaro_distance( ("M", "A", "R", "T", "H", "A"), ("M", "A", "R", "H", "T", "A")), 0.944, places=3) self.assertAlmostEqual(fuzzycomp.jaro_distance( ("D", "W", "A", "Y", "N", "E"), ("D", "U", "A", "N", "E")), 0.822, places=3) self.assertAlmostEqual(fuzzycomp.jaro_distance( ("D", "I", "X", "O", "N"), ("D", "I", "C", "K", "S", "O", "N", "X")), 0.767, places=3)
def extractVenueFeatures(A, B): # remove pontuation A = re.sub(r'[^\w\s]', '', A) # remove pontuation B = re.sub(r'[^\w\s]', '', B) # remove multiple spaces A = re.sub(' +', ' ', A) B = re.sub(' +', ' ', B) if A == '': A = '-' if B == '': B = '-' charactersA = numpy.zeros(26) for char in A: if char >= 'a' and char <= 'z': charactersA[ord(char) - 97] += 1 charactersB = numpy.zeros(26) for char in B: if char >= 'a' and char <= 'z': charactersB[ord(char) - 97] += 1 subtraction = numpy.absolute(charactersA - charactersB) distance = numpy.sum(subtraction) max_chars = max(numpy.sum(charactersA), numpy.sum(charactersB)) if (max_chars == 0): measure = 0 else: measure = distance / float(max_chars) return [ measure, fuzzycomp.levenshtein_distance(A, B), fuzzycomp.jaccard_distance(A, B), fuzzycomp.jaro_distance(A, B) ]
def extractVenueFeatures(A,B): # remove pontuation A = re.sub(r'[^\w\s]','',A) # remove pontuation B = re.sub(r'[^\w\s]','',B) # remove multiple spaces A = re.sub(' +',' ',A) B = re.sub(' +',' ',B) if A == '': A = '-'; if B == '': B = '-'; charactersA = numpy.zeros(26) for char in A: if char >= 'a' and char <= 'z': charactersA[ord(char)-97]+=1 charactersB = numpy.zeros(26) for char in B: if char >= 'a' and char <= 'z': charactersB[ord(char)-97]+=1 subtraction = numpy.absolute(charactersA-charactersB) distance = numpy.sum(subtraction) max_chars = max(numpy.sum(charactersA),numpy.sum(charactersB)) if (max_chars ==0): measure =0 else: measure = distance/float(max_chars) return [measure, fuzzycomp.levenshtein_distance(A,B), fuzzycomp.jaccard_distance(A,B), fuzzycomp.jaro_distance(A,B)]