Exemplo n.º 1
0
 def get_similarity(self, test_string=''):
     """
     Custom similarity for train stations - takes into account fact many people use abbreviated names
     """
     # For low-scoring matches, we try matching between a string the same size as the user query, if its shorter than the name
     # being tested against, so this works for e.g. Kings Cross matching King's Cross St Pancras
     score = get_name_similarity(self.name, test_string)
     if len(test_string) < len(self.name):
         abbreviated_score = get_name_similarity(self.name[:len(test_string)], test_string)
         if abbreviated_score >= 85 and abbreviated_score > score:
             return min(abbreviated_score, 99)  # Never 100, in case it overrides an exact match
     return score
Exemplo n.º 2
0
    def get_similarity(self, test_string=''):
        """
        Custom similarity match for bus stops - takes into account many of them will be from train stations or bus stations
        """
        # Use the above function to normalise our names and facilitate easier comparison
        my_name = self.get_normalised_name()
        their_name = BusStop(test_string).get_normalised_name()
        # Exact match is obviously best
        if my_name == their_name:
            return 100

        # If user has specified a station or bus station, then a partial match at start or end of string works for us
        # We prioritise, just slightly, names that have the match at the beginning
        if re.search("(BUS)?STN", their_name):
            if my_name.startswith(their_name):
                return 95
            if my_name.endswith(their_name):
                return 94

        # If on the other hand, we add station or bus station to their name and it matches, that's also pretty good
        if re.search("^%s(BUS)?STN" % their_name, my_name):
            return 91
        if re.search("%s(BUS)?STN$" % their_name, my_name):
            return 90

        # Else fall back on name similarity
        return get_name_similarity(my_name, their_name)
Exemplo n.º 3
0
    def test_stringutils(self):
        """
        Unit test for stringutils' methods
        """
        # Check capwords
        capitalised_strings = ("Bank", "Morden East", "King's Cross St. Pancras", "Kennington Oval via Charing X")
        for test_string in capitalised_strings:
            self.assertEqual(test_string, capwords(test_string))
            self.assertEqual(test_string, capwords(test_string.lower()))
            self.assertEqual(test_string, capwords(test_string.upper()))
            self.assertNotEqual(test_string.lower(), capwords(test_string))
            self.assertNotEqual(test_string.upper(), capwords(test_string))

        # Check to see cleanup string is working
        random_string = lambda a, b: "".join([chr(random.Random().randint(a, b)) for _i in range(0, 10)])
        dirty_strings = [random_string(48, 122) for _i in range(0, 10)]
        undesirables = ("a", "b+", "[0-9]", "^x")
        for dirty_string in dirty_strings:
            cleaned_string = cleanup_name_from_undesirables(dirty_string, undesirables)
            for undesirable in undesirables:
                self.assertIsNone(re.search(undesirable, cleaned_string, flags=re.I))

        # Check string similarities - 100 for identical strings, 90 or more for one character change
        # and nothing at all for a totally unidentical string
        similarity_string = random_string(65, 122)
        self.assertEqual(get_name_similarity(similarity_string, similarity_string), 100)
        self.assertGreaterEqual(get_name_similarity(similarity_string, similarity_string[:-1]), 90)
        self.assertEqual(get_name_similarity(similarity_string, random_string(48, 57)), 0)

        # Check to see most similar string gets picked out of an list of similar-looking strings, and that
        # with very dissimilar strings, there is no candidate at all
        similarity_candidates = (similarity_string[:3], similarity_string[:5], similarity_string[:9], "z" * 10)
        self.assertEqual(get_best_fuzzy_match(similarity_string, similarity_candidates), similarity_candidates[-2])
        dissimilarity_candidates = [random_string(48, 57) for _i in range(0, 10)]
        self.assertIsNone(get_best_fuzzy_match(similarity_string, dissimilarity_candidates))

        if time.localtime().tm_isdst:
            self.assertEqual(gmt_to_localtime("2359"), "0059")
            self.assertEqual(gmt_to_localtime("23:59"), "0059")
            self.assertEqual(gmt_to_localtime("Tue 00:01"), "0101")
        else:
            self.assertEqual(gmt_to_localtime("2359"), "2359")
            self.assertEqual(gmt_to_localtime("23:59"), "2359")
            self.assertEqual(gmt_to_localtime("Tue 00:01"), "0001")