def test_iterable_input(self): """Function should function properly when called with an iterable""" self.assertEqual(fuzzycomp.levenshtein_distance(["H", "e", "l", "l", "o"], ["H", "e", "l", "l", "o"]), 0) self.assertEqual( fuzzycomp.levenshtein_distance(["S", "a", "t", "u", "r", "d", "a", "y"], ["S", "u", "n", "d", "a", "y"]), 3 ) self.assertEqual(fuzzycomp.levenshtein_distance(("H", "e", "l", "l", "o"), ("H", "e", "l", "l", "o")), 0) self.assertEqual( fuzzycomp.levenshtein_distance(("S", "a", "t", "u", "r", "d", "a", "y"), ("S", "u", "n", "d", "a", "y")), 3 )
def extractVenueFeatures(A,B): # remove pontuation A = re.sub(r'[^\w\s]','',A) # remove pontuation B = re.sub(r'[^\w\s]','',B) # remove multiple spaces A = re.sub(' +',' ',A) B = re.sub(' +',' ',B) if A == '': A = '-'; if B == '': B = '-'; charactersA = numpy.zeros(26) for char in A: if char >= 'a' and char <= 'z': charactersA[ord(char)-97]+=1 charactersB = numpy.zeros(26) for char in B: if char >= 'a' and char <= 'z': charactersB[ord(char)-97]+=1 subtraction = numpy.absolute(charactersA-charactersB) distance = numpy.sum(subtraction) max_chars = max(numpy.sum(charactersA),numpy.sum(charactersB)) if (max_chars ==0): measure =0 else: measure = distance/float(max_chars) return [measure, fuzzycomp.levenshtein_distance(A,B), fuzzycomp.jaccard_distance(A,B), fuzzycomp.jaro_distance(A,B)]
def test_iterable_input(self): """Function should function properly when called with an iterable""" self.assertEqual( fuzzycomp.levenshtein_distance(["H", "e", "l", "l", "o"], ["H", "e", "l", "l", "o"]), 0) self.assertEqual( fuzzycomp.levenshtein_distance( ["S", "a", "t", "u", "r", "d", "a", "y"], ["S", "u", "n", "d", "a", "y"]), 3) self.assertEqual( fuzzycomp.levenshtein_distance(("H", "e", "l", "l", "o"), ("H", "e", "l", "l", "o")), 0) self.assertEqual( fuzzycomp.levenshtein_distance( ("S", "a", "t", "u", "r", "d", "a", "y"), ("S", "u", "n", "d", "a", "y")), 3)
def extractCathegoryFeatures(A,B): matches=re.findall(r"\'(.+?)\'",A) a = " ".join(matches) matches=re.findall(r"\'(.+?)\'",B) b = " ".join(matches) a = a.replace("&", "") a = re.sub(r'[^\w\s]','',a) a = re.sub(' +',' ',a) b = b.replace("&", "") b = re.sub(r'[^\w\s]','',b) b = re.sub(' +',' ',b) a = a.split() b = b.split() if not a: a.append("-") if not b: b.append("-") min_leven = 99999; value_leven =0; for elem1 in a: for elem2 in b: if elem1 == "": elem1 = "-" if elem2 == "": elem2 = "-" lev = fuzzycomp.levenshtein_distance(elem1,elem2) if lev < min_leven: value_leven = lev min_leven = value_leven return value_leven
def extractVenueFeatures(A, B): # remove pontuation A = re.sub(r'[^\w\s]', '', A) # remove pontuation B = re.sub(r'[^\w\s]', '', B) # remove multiple spaces A = re.sub(' +', ' ', A) B = re.sub(' +', ' ', B) if A == '': A = '-' if B == '': B = '-' charactersA = numpy.zeros(26) for char in A: if char >= 'a' and char <= 'z': charactersA[ord(char) - 97] += 1 charactersB = numpy.zeros(26) for char in B: if char >= 'a' and char <= 'z': charactersB[ord(char) - 97] += 1 subtraction = numpy.absolute(charactersA - charactersB) distance = numpy.sum(subtraction) max_chars = max(numpy.sum(charactersA), numpy.sum(charactersB)) if (max_chars == 0): measure = 0 else: measure = distance / float(max_chars) return [ measure, fuzzycomp.levenshtein_distance(A, B), fuzzycomp.jaccard_distance(A, B), fuzzycomp.jaro_distance(A, B) ]
def extractCathegoryFeatures(A, B): matches = re.findall(r"\'(.+?)\'", A) a = " ".join(matches) matches = re.findall(r"\'(.+?)\'", B) b = " ".join(matches) a = a.replace("&", "") a = re.sub(r'[^\w\s]', '', a) a = re.sub(' +', ' ', a) b = b.replace("&", "") b = re.sub(r'[^\w\s]', '', b) b = re.sub(' +', ' ', b) a = a.split() b = b.split() if not a: a.append("-") if not b: b.append("-") min_leven = 99999 value_leven = 0 for elem1 in a: for elem2 in b: if elem1 == "": elem1 = "-" if elem2 == "": elem2 = "-" lev = fuzzycomp.levenshtein_distance(elem1, elem2) if lev < min_leven: value_leven = lev min_leven = value_leven return value_leven
def test_case_difference(self): """Algorithm should be case sensitive""" self.assertNotEqual(fuzzycomp.levenshtein_distance("HELLO", "hello"), 0)
def test_valid_input(self): """Algorithm should return correct values under valid input""" self.assertEqual(fuzzycomp.levenshtein_distance("Hello", "Hello"), 0) self.assertEqual(fuzzycomp.levenshtein_distance("Saturday", "Sunday"), 3)