Exemplo n.º 1
0
    def testWinkler(
            self):  # - - - - - - - - - - - - - - - - - - - - - - - - - -
        """Test 'Winkler' approximate string comparator"""

        for pair in self.string_pairs:

            approx_str_value = stringcmp.winkler(pair[0], pair[1])

            assert isinstance(
                approx_str_value, float
            ), '"Winkler" does not return a floating point number for:' + str(
                pair)

            assert (approx_str_value >= 0.0
                    ), '"Winkler" returns a negative number for:' + str(pair)

            assert (
                approx_str_value <= 1.0
            ), '"Winkler" returns a number larger than 1.0 for:' + str(pair)

            approx_str_value_1 = stringcmp.winkler(pair[0], pair[1])
            approx_str_value_2 = stringcmp.winkler(pair[1], pair[0])

            assert approx_str_value_1 == approx_str_value_2, (
                '"Winkler" returns different values for pair and swapped ' +
                "pair: " + str(pair) + ": " + str(approx_str_value_1) + ", " +
                str(approx_str_value_2))

            # Check for value 1.0 if the strings are the same
            #
            if pair[0] == pair[1]:

                assert (
                    approx_str_value == 1.0
                ), '"Winkler" does not return 1.0 if strings are equal: ' + str(
                    pair)

            # Winkler should always return a value equal to or larger than Jaro
            #
            approx_str_value_winkler = stringcmp.winkler(pair[0], pair[1])
            approx_str_value_jaro = stringcmp.jaro(pair[0], pair[1])

            assert (
                approx_str_value_winkler >= approx_str_value_jaro
            ), '"Winkler" value smaller than "Jaro" value for:' + str(pair)
Exemplo n.º 2
0
  def testWinkler(self):  # - - - - - - - - - - - - - - - - - - - - - - - - - -
    """Test 'Winkler' approximate string comparator"""

    for pair in self.string_pairs:

      approx_str_value = stringcmp.winkler(pair[0],pair[1])

      assert (isinstance(approx_str_value,float)), \
             '"Winkler" does not return a floating point number for:'+ \
             str(pair)

      assert (approx_str_value >= 0.0), \
             '"Winkler" returns a negative number for:'+str(pair)

      assert (approx_str_value <= 1.0), \
             '"Winkler" returns a number larger than 1.0 for:'+str(pair)

      approx_str_value_1 = stringcmp.winkler(pair[0],pair[1])
      approx_str_value_2 = stringcmp.winkler(pair[1],pair[0])

      assert (approx_str_value_1 == approx_str_value_2), \
             '"Winkler" returns different values for pair and swapped ' + \
             'pair: '+str(pair)+': '+str(approx_str_value_1)+', '+ \
             str(approx_str_value_2)

      # Check for value 1.0 if the strings are the same
      #
      if (pair[0] == pair[1]):

        assert (approx_str_value == 1.0), \
               '"Winkler" does not return 1.0 if strings are equal: '+str(pair)

      # Winkler should always return a value equal to or larger than Jaro
      #
      approx_str_value_winkler = stringcmp.winkler(pair[0],pair[1])
      approx_str_value_jaro =    stringcmp.jaro(pair[0],pair[1])

      assert (approx_str_value_winkler >= approx_str_value_jaro), \
             '"Winkler" value smaller than "Jaro" value for:'+str(pair)
    def testPermWinkler(
            self):  # - - - - - - - - - - - - - - - - - - - - - - - -
        """Test 'PermWinkler' approximate string comparator"""

        for pair in self.string_pairs:

            approx_str_value = stringcmp.permwinkler(pair[0], pair[1])

            assert (isinstance(approx_str_value,float)), \
                   '"PermWinkler" does not return a floating point number for:'+ \
                   str(pair)

            assert (approx_str_value >= 0.0), \
                   '"PermWinkler" returns a negative number for:'+str(pair)

            assert (approx_str_value <= 1.0), \
                   '"PermWinkler" returns a number larger than 1.0 for:'+str(pair)

            approx_str_value_1 = stringcmp.permwinkler(pair[0], pair[1])
            approx_str_value_2 = stringcmp.permwinkler(pair[1], pair[0])

            assert (approx_str_value_1 == approx_str_value_2), \
                   '"PermWinkler" returns different values for pair and swapped ' + \
                   'pair: '+str(pair)+': '+str(approx_str_value_1)+', '+ \
                   str(approx_str_value_2)

            # Check for value 1.0 if the strings are the same
            #
            if (pair[0] == pair[1]):

                assert (approx_str_value == 1.0), \
                       '"PermWinkler" does not return 1.0 if strings are equal: '+ \
                       str(pair)

            # PermWinkler should always return a value equal to or larger than
            # Winkler
            #
            approx_str_value_permwinkler = stringcmp.permwinkler(
                pair[0], pair[1])
            approx_str_value_winkler = stringcmp.winkler(pair[0], pair[1])

            assert (approx_str_value_permwinkler >= approx_str_value_winkler), \
                   '"PermWinkler" value smaller than "Winkler" value for:'+str(pair)
Exemplo n.º 4
0
    # data =  [line.rstrip('\n\r').split(',') for line in f_open]

    for line in f_open:
        twoNames = line.rstrip('\n\r').split(',')
        #print(twoNames)
        s = '%13s,%13s,' % (twoNames[0], twoNames[1])

        ##------Jaro------##
        start_time = time.time()
        s += ' %.3f' % (stringcmp.jaro(twoNames[0], twoNames[1]))
        time_used = time.time() - start_time
        s += ' %.10f' % (time_used)

        ##------winkler------##
        start_time1 = time.time()
        s += ' %.3f' % (stringcmp.winkler(twoNames[0], twoNames[1]))
        time_used1 = time.time() - start_time1
        s += ' %.10f' % (time_used1)

        ##------qgram 1------##
        start_time2 = time.time()
        s += ' %.3f' % (stringcmp.qgram(twoNames[0], twoNames[1], 1))
        time_used2 = time.time() - start_time2
        s += ' %.10f' % (time_used2)

        ##------qgram 2------##
        start_time3 = time.time()
        s += ' %.3f' % (stringcmp.qgram(twoNames[0], twoNames[1], 2))
        time_used3 = time.time() - start_time3
        s += ' %.10f' % (time_used3)