def testWinkler( self): # - - - - - - - - - - - - - - - - - - - - - - - - - - """Test 'Winkler' approximate string comparator""" for pair in self.string_pairs: approx_str_value = stringcmp.winkler(pair[0], pair[1]) assert isinstance( approx_str_value, float ), '"Winkler" does not return a floating point number for:' + str( pair) assert (approx_str_value >= 0.0 ), '"Winkler" returns a negative number for:' + str(pair) assert ( approx_str_value <= 1.0 ), '"Winkler" returns a number larger than 1.0 for:' + str(pair) approx_str_value_1 = stringcmp.winkler(pair[0], pair[1]) approx_str_value_2 = stringcmp.winkler(pair[1], pair[0]) assert approx_str_value_1 == approx_str_value_2, ( '"Winkler" returns different values for pair and swapped ' + "pair: " + str(pair) + ": " + str(approx_str_value_1) + ", " + str(approx_str_value_2)) # Check for value 1.0 if the strings are the same # if pair[0] == pair[1]: assert ( approx_str_value == 1.0 ), '"Winkler" does not return 1.0 if strings are equal: ' + str( pair) # Winkler should always return a value equal to or larger than Jaro # approx_str_value_winkler = stringcmp.winkler(pair[0], pair[1]) approx_str_value_jaro = stringcmp.jaro(pair[0], pair[1]) assert ( approx_str_value_winkler >= approx_str_value_jaro ), '"Winkler" value smaller than "Jaro" value for:' + str(pair)
def testWinkler(self): # - - - - - - - - - - - - - - - - - - - - - - - - - - """Test 'Winkler' approximate string comparator""" for pair in self.string_pairs: approx_str_value = stringcmp.winkler(pair[0],pair[1]) assert (isinstance(approx_str_value,float)), \ '"Winkler" does not return a floating point number for:'+ \ str(pair) assert (approx_str_value >= 0.0), \ '"Winkler" returns a negative number for:'+str(pair) assert (approx_str_value <= 1.0), \ '"Winkler" returns a number larger than 1.0 for:'+str(pair) approx_str_value_1 = stringcmp.winkler(pair[0],pair[1]) approx_str_value_2 = stringcmp.winkler(pair[1],pair[0]) assert (approx_str_value_1 == approx_str_value_2), \ '"Winkler" returns different values for pair and swapped ' + \ 'pair: '+str(pair)+': '+str(approx_str_value_1)+', '+ \ str(approx_str_value_2) # Check for value 1.0 if the strings are the same # if (pair[0] == pair[1]): assert (approx_str_value == 1.0), \ '"Winkler" does not return 1.0 if strings are equal: '+str(pair) # Winkler should always return a value equal to or larger than Jaro # approx_str_value_winkler = stringcmp.winkler(pair[0],pair[1]) approx_str_value_jaro = stringcmp.jaro(pair[0],pair[1]) assert (approx_str_value_winkler >= approx_str_value_jaro), \ '"Winkler" value smaller than "Jaro" value for:'+str(pair)
def testPermWinkler( self): # - - - - - - - - - - - - - - - - - - - - - - - - """Test 'PermWinkler' approximate string comparator""" for pair in self.string_pairs: approx_str_value = stringcmp.permwinkler(pair[0], pair[1]) assert (isinstance(approx_str_value,float)), \ '"PermWinkler" does not return a floating point number for:'+ \ str(pair) assert (approx_str_value >= 0.0), \ '"PermWinkler" returns a negative number for:'+str(pair) assert (approx_str_value <= 1.0), \ '"PermWinkler" returns a number larger than 1.0 for:'+str(pair) approx_str_value_1 = stringcmp.permwinkler(pair[0], pair[1]) approx_str_value_2 = stringcmp.permwinkler(pair[1], pair[0]) assert (approx_str_value_1 == approx_str_value_2), \ '"PermWinkler" returns different values for pair and swapped ' + \ 'pair: '+str(pair)+': '+str(approx_str_value_1)+', '+ \ str(approx_str_value_2) # Check for value 1.0 if the strings are the same # if (pair[0] == pair[1]): assert (approx_str_value == 1.0), \ '"PermWinkler" does not return 1.0 if strings are equal: '+ \ str(pair) # PermWinkler should always return a value equal to or larger than # Winkler # approx_str_value_permwinkler = stringcmp.permwinkler( pair[0], pair[1]) approx_str_value_winkler = stringcmp.winkler(pair[0], pair[1]) assert (approx_str_value_permwinkler >= approx_str_value_winkler), \ '"PermWinkler" value smaller than "Winkler" value for:'+str(pair)
# data = [line.rstrip('\n\r').split(',') for line in f_open] for line in f_open: twoNames = line.rstrip('\n\r').split(',') #print(twoNames) s = '%13s,%13s,' % (twoNames[0], twoNames[1]) ##------Jaro------## start_time = time.time() s += ' %.3f' % (stringcmp.jaro(twoNames[0], twoNames[1])) time_used = time.time() - start_time s += ' %.10f' % (time_used) ##------winkler------## start_time1 = time.time() s += ' %.3f' % (stringcmp.winkler(twoNames[0], twoNames[1])) time_used1 = time.time() - start_time1 s += ' %.10f' % (time_used1) ##------qgram 1------## start_time2 = time.time() s += ' %.3f' % (stringcmp.qgram(twoNames[0], twoNames[1], 1)) time_used2 = time.time() - start_time2 s += ' %.10f' % (time_used2) ##------qgram 2------## start_time3 = time.time() s += ' %.3f' % (stringcmp.qgram(twoNames[0], twoNames[1], 2)) time_used3 = time.time() - start_time3 s += ' %.10f' % (time_used3)