def get_abbreviated_name(self): """ Take this station's name and abbreviate it to make it fit on Twitter better """ # Stations we just have to cut down by hand translations = { "High Street Kensington": "High St Ken", "King's Cross St. Pancras": "Kings X St P", "Kensington (Olympia)": "Olympia", "W'wich Arsenal": "Woolwich A", } station_name = translations.get(self.name, self.name) # Punctuation marks can be cut down punctuation_to_remove = (r'\.', ', ', r'\(', r'\)', "'",) station_name = cleanup_name_from_undesirables(station_name, punctuation_to_remove) # Words like Road and Park can be slimmed down as well abbreviations = { 'Bridge': 'Br', 'Broadway': 'Bdwy', 'Central': 'Ctrl', 'Court': 'Ct', 'Cross': 'X', 'Crescent': 'Cresc', 'East': 'E', 'Gardens': 'Gdns', 'Green': 'Grn', 'Heathway': 'Hthwy', 'Junction': 'Jct', 'Market': 'Mkt', 'North': 'N', 'Park': 'Pk', 'Road': 'Rd', 'South': 'S', 'Square': 'Sq', 'Street': 'St', 'Terminal': 'T', 'Terminals': 'T', 'West': 'W', } station_name = ' '.join([abbreviations.get(word, word) for word in station_name.split(' ')]) # Any station with & in it gets only the initial of the second word - e.g. Elephant & C if station_name.find('&') > -1: station_name = station_name[:station_name.find('&') + 2] return station_name
def __init__(self, destination_name, direction, departure_time, line_code, set_number): manual_translations = {"Heathrow T123 + 5": "Heathrow Terminal 5", "Olympia": "Kensington (Olympia)"} destination_name = manual_translations.get(destination_name, destination_name) # Get rid of TfL's odd designations in the Destination field to make it compatible with our list of stations in the database # Destination names are full of garbage. What I would like is a database mapping codes to canonical names, but this does not exist destination_name = re.sub(r"\band\b", "&", destination_name, flags=re.I) # Destinations that are line names or Unknown get boiled down to Unknown if destination_name in ("Unknown", "Circle & Hammersmith & City") or destination_name.startswith("Circle Line") \ or destination_name.endswith("Train") or destination_name.endswith("Line"): destination_name = "Unknown" else: # Regular expressions of instructions, depot names (presumably instructions for shunting after arrival), or platform numbers undesirables = ('\(rev to .*\)', '\(Rev\) Bank Branch', r'sidings?\b', '(then )?depot', 'ex (barnet|edgware) branch', '\(ex .*\)', '/ london road', '27 Road', r'24r/25r', '\(plat\. [0-9]+\)', ' loop', '\(circle\)', '\(district\)', ' TOC',) destination_name = cleanup_name_from_undesirables(destination_name, undesirables) via_match = re.search(" \(?via ([^)]*)\)?$", destination_name, flags=re.I) if via_match: manual_translations = {"CX": "Charing Cross", "T4": "Heathrow Terminal 4"} via = manual_translations.get(via_match.group(1), via_match.group(1)) destination_name = re.sub(" \(?via .*$", "", destination_name, flags=re.I) else: via = "" Train.__init__(self, destination_name, departure_time) if via: self.via = RailStation(via) self.direction = direction self.line_code = line_code self.set_number = set_number
def test_stringutils(self): """ Unit test for stringutils' methods """ # Check capwords capitalised_strings = ("Bank", "Morden East", "King's Cross St. Pancras", "Kennington Oval via Charing X") for test_string in capitalised_strings: self.assertEqual(test_string, capwords(test_string)) self.assertEqual(test_string, capwords(test_string.lower())) self.assertEqual(test_string, capwords(test_string.upper())) self.assertNotEqual(test_string.lower(), capwords(test_string)) self.assertNotEqual(test_string.upper(), capwords(test_string)) # Check to see cleanup string is working random_string = lambda a, b: "".join([chr(random.Random().randint(a, b)) for _i in range(0, 10)]) dirty_strings = [random_string(48, 122) for _i in range(0, 10)] undesirables = ("a", "b+", "[0-9]", "^x") for dirty_string in dirty_strings: cleaned_string = cleanup_name_from_undesirables(dirty_string, undesirables) for undesirable in undesirables: self.assertIsNone(re.search(undesirable, cleaned_string, flags=re.I)) # Check string similarities - 100 for identical strings, 90 or more for one character change # and nothing at all for a totally unidentical string similarity_string = random_string(65, 122) self.assertEqual(get_name_similarity(similarity_string, similarity_string), 100) self.assertGreaterEqual(get_name_similarity(similarity_string, similarity_string[:-1]), 90) self.assertEqual(get_name_similarity(similarity_string, random_string(48, 57)), 0) # Check to see most similar string gets picked out of an list of similar-looking strings, and that # with very dissimilar strings, there is no candidate at all similarity_candidates = (similarity_string[:3], similarity_string[:5], similarity_string[:9], "z" * 10) self.assertEqual(get_best_fuzzy_match(similarity_string, similarity_candidates), similarity_candidates[-2]) dissimilarity_candidates = [random_string(48, 57) for _i in range(0, 10)] self.assertIsNone(get_best_fuzzy_match(similarity_string, dissimilarity_candidates)) if time.localtime().tm_isdst: self.assertEqual(gmt_to_localtime("2359"), "0059") self.assertEqual(gmt_to_localtime("23:59"), "0059") self.assertEqual(gmt_to_localtime("Tue 00:01"), "0101") else: self.assertEqual(gmt_to_localtime("2359"), "2359") self.assertEqual(gmt_to_localtime("23:59"), "2359") self.assertEqual(gmt_to_localtime("Tue 00:01"), "0001")
def get_clean_name(self): """ Get rid of TfL's ASCII symbols for Tube, National Rail, DLR & Tram from this stop's name """ return cleanup_name_from_undesirables(self.name, ('<>', '#', r'\[DLR\]', '>T<'))