示例#1
0
 def test_valid_input(self):
     """Algorithm should return correct values under valid input"""
     self.assertAlmostEqual(fuzzycomp.jaro_distance("MARTHA", "MARHTA"),
                            0.944,
                            places=3)
     self.assertAlmostEqual(fuzzycomp.jaro_distance("DWAYNE", "DUANE"),
                            0.822,
                            places=3)
     self.assertAlmostEqual(fuzzycomp.jaro_distance("DIXON", "DICKSONX"),
                            0.767,
                            places=3)
示例#2
0
    def test_iterable_input(self):
        """Function should raise ValueError if passed non string input"""
        self.assertAlmostEqual(
            fuzzycomp.jaro_distance(["M", "A", "R", "T", "H", "A"], ["M", "A", "R", "H", "T", "A"]), 0.944, places=3
        )
        self.assertAlmostEqual(
            fuzzycomp.jaro_distance(["D", "W", "A", "Y", "N", "E"], ["D", "U", "A", "N", "E"]), 0.822, places=3
        )
        self.assertAlmostEqual(
            fuzzycomp.jaro_distance(["D", "I", "X", "O", "N"], ["D", "I", "C", "K", "S", "O", "N", "X"]),
            0.767,
            places=3,
        )

        self.assertAlmostEqual(
            fuzzycomp.jaro_distance(("M", "A", "R", "T", "H", "A"), ("M", "A", "R", "H", "T", "A")), 0.944, places=3
        )
        self.assertAlmostEqual(
            fuzzycomp.jaro_distance(("D", "W", "A", "Y", "N", "E"), ("D", "U", "A", "N", "E")), 0.822, places=3
        )
        self.assertAlmostEqual(
            fuzzycomp.jaro_distance(("D", "I", "X", "O", "N"), ("D", "I", "C", "K", "S", "O", "N", "X")),
            0.767,
            places=3,
        )
示例#3
0
    def test_iterable_input(self):
        """Function should raise ValueError if passed non string input"""
        self.assertAlmostEqual(fuzzycomp.jaro_distance(
            ["M", "A", "R", "T", "H", "A"], ["M", "A", "R", "H", "T", "A"]),
                               0.944,
                               places=3)
        self.assertAlmostEqual(fuzzycomp.jaro_distance(
            ["D", "W", "A", "Y", "N", "E"], ["D", "U", "A", "N", "E"]),
                               0.822,
                               places=3)
        self.assertAlmostEqual(
            fuzzycomp.jaro_distance(["D", "I", "X", "O", "N"],
                                    ["D", "I", "C", "K", "S", "O", "N", "X"]),
            0.767,
            places=3)

        self.assertAlmostEqual(fuzzycomp.jaro_distance(
            ("M", "A", "R", "T", "H", "A"), ("M", "A", "R", "H", "T", "A")),
                               0.944,
                               places=3)
        self.assertAlmostEqual(fuzzycomp.jaro_distance(
            ("D", "W", "A", "Y", "N", "E"), ("D", "U", "A", "N", "E")),
                               0.822,
                               places=3)
        self.assertAlmostEqual(fuzzycomp.jaro_distance(
            ("D", "I", "X", "O", "N"),
            ("D", "I", "C", "K", "S", "O", "N", "X")),
                               0.767,
                               places=3)
示例#4
0
def extractVenueFeatures(A, B):

    # remove pontuation
    A = re.sub(r'[^\w\s]', '', A)
    # remove pontuation
    B = re.sub(r'[^\w\s]', '', B)

    # remove multiple spaces
    A = re.sub(' +', ' ', A)
    B = re.sub(' +', ' ', B)
    if A == '':
        A = '-'
    if B == '':
        B = '-'

    charactersA = numpy.zeros(26)
    for char in A:
        if char >= 'a' and char <= 'z':
            charactersA[ord(char) - 97] += 1

    charactersB = numpy.zeros(26)
    for char in B:
        if char >= 'a' and char <= 'z':
            charactersB[ord(char) - 97] += 1

    subtraction = numpy.absolute(charactersA - charactersB)
    distance = numpy.sum(subtraction)

    max_chars = max(numpy.sum(charactersA), numpy.sum(charactersB))
    if (max_chars == 0):
        measure = 0
    else:
        measure = distance / float(max_chars)

    return [
        measure,
        fuzzycomp.levenshtein_distance(A, B),
        fuzzycomp.jaccard_distance(A, B),
        fuzzycomp.jaro_distance(A, B)
    ]
示例#5
0
def extractVenueFeatures(A,B):
    
     # remove pontuation
    A = re.sub(r'[^\w\s]','',A)
     # remove pontuation
    B = re.sub(r'[^\w\s]','',B)
    
    # remove multiple spaces
    A = re.sub(' +',' ',A)
    B = re.sub(' +',' ',B)
    if A == '':
        A = '-';
    if B == '':
        B = '-';
		
    charactersA = numpy.zeros(26)
    for char in A:
        if char >= 'a' and char <= 'z':
            charactersA[ord(char)-97]+=1
    
    charactersB = numpy.zeros(26)
    for char in B:
        if char >= 'a' and char <= 'z':
            charactersB[ord(char)-97]+=1
    
    subtraction = numpy.absolute(charactersA-charactersB)
    distance = numpy.sum(subtraction)
    
    max_chars =  max(numpy.sum(charactersA),numpy.sum(charactersB))
    if (max_chars ==0):
        measure =0
    else:
        measure = distance/float(max_chars)
    
    return [measure, fuzzycomp.levenshtein_distance(A,B), fuzzycomp.jaccard_distance(A,B), fuzzycomp.jaro_distance(A,B)]
示例#6
0
 def test_valid_input(self):
     """Algorithm should return correct values under valid input"""
     self.assertAlmostEqual(fuzzycomp.jaro_distance("MARTHA", "MARHTA"), 0.944, places=3)
     self.assertAlmostEqual(fuzzycomp.jaro_distance("DWAYNE", "DUANE"), 0.822, places=3)
     self.assertAlmostEqual(fuzzycomp.jaro_distance("DIXON", "DICKSONX"), 0.767, places=3)