def extractVenueFeatures(A,B): # remove pontuation A = re.sub(r'[^\w\s]','',A) # remove pontuation B = re.sub(r'[^\w\s]','',B) # remove multiple spaces A = re.sub(' +',' ',A) B = re.sub(' +',' ',B) if A == '': A = '-'; if B == '': B = '-'; charactersA = numpy.zeros(26) for char in A: if char >= 'a' and char <= 'z': charactersA[ord(char)-97]+=1 charactersB = numpy.zeros(26) for char in B: if char >= 'a' and char <= 'z': charactersB[ord(char)-97]+=1 subtraction = numpy.absolute(charactersA-charactersB) distance = numpy.sum(subtraction) max_chars = max(numpy.sum(charactersA),numpy.sum(charactersB)) if (max_chars ==0): measure =0 else: measure = distance/float(max_chars) return [measure, fuzzycomp.levenshtein_distance(A,B), fuzzycomp.jaccard_distance(A,B), fuzzycomp.jaro_distance(A,B)]
def extractVenueFeatures(A, B): # remove pontuation A = re.sub(r'[^\w\s]', '', A) # remove pontuation B = re.sub(r'[^\w\s]', '', B) # remove multiple spaces A = re.sub(' +', ' ', A) B = re.sub(' +', ' ', B) if A == '': A = '-' if B == '': B = '-' charactersA = numpy.zeros(26) for char in A: if char >= 'a' and char <= 'z': charactersA[ord(char) - 97] += 1 charactersB = numpy.zeros(26) for char in B: if char >= 'a' and char <= 'z': charactersB[ord(char) - 97] += 1 subtraction = numpy.absolute(charactersA - charactersB) distance = numpy.sum(subtraction) max_chars = max(numpy.sum(charactersA), numpy.sum(charactersB)) if (max_chars == 0): measure = 0 else: measure = distance / float(max_chars) return [ measure, fuzzycomp.levenshtein_distance(A, B), fuzzycomp.jaccard_distance(A, B), fuzzycomp.jaro_distance(A, B) ]
def test_iterable_input(self): """Function should return correct values when called with valid iterables""" self.assertEqual(fuzzycomp.jaccard_distance([1, 2, 3, 4, 5], [5, 4, 3, 2, 1]), 0.0) self.assertEqual(fuzzycomp.jaccard_distance([1, 2, 3, 4, 5], [6, 7, 8, 9, 10]), 1.0)
def test_valid_input(self): """Algorithm should return correct values under valid input""" self.assertEqual(fuzzycomp.jaccard_distance("Hello", "Hello"), 0.0) self.assertAlmostEqual(fuzzycomp.jaccard_distance("Hello", "World"), 0.7142857, 7) self.assertEqual(fuzzycomp.jaccard_distance("foo", "bar"), 1.0)
def test_iterable_input(self): """Function should return correct values when called with valid iterables""" self.assertEqual( fuzzycomp.jaccard_distance([1, 2, 3, 4, 5], [5, 4, 3, 2, 1]), 0.0) self.assertEqual( fuzzycomp.jaccard_distance([1, 2, 3, 4, 5], [6, 7, 8, 9, 10]), 1.0)