def _write_stats(self): G = GTFS(self.day_db_path) net = combined_stop_to_stop_transit_network(G) sections = net.edges(data=True) n_links = len(sections) section_lengths = [] vehicle_kilometers_per_section = [] for from_I, to_I, data in sections: section_lengths.append(data['d']) vehicle_kilometers_per_section.append(data['n_vehicles'] * data['d'] / 1000.) stats = {"n_stops": len(G.stops(require_reference_in_stop_times=True)), "n_connections": len(G.get_transit_events()), "n_links": n_links, "network_length_m": sum(section_lengths), "link_distance_avg_m": int(sum(section_lengths) / len(section_lengths)), "vehicle_kilometers": sum(vehicle_kilometers_per_section), "buffer_center_lat": self.lat, "buffer_center_lon": self.lon, "buffer_radius_km": self.buffer_distance, "extract_start_date": self.get_weekly_extract_start_date().strftime("%Y-%m-%d") } self.__verify_stats(stats) df = pandas.DataFrame.from_dict({key:[value] for key, value in stats.items()}) df.to_csv(self.stats_fname, sep=";", columns=list(sorted(stats.keys())), index=False)
def add_zone_to_stop_table(zone_shape_path=DEMAND_ZONES): """ Creates table which relates stop_Is with TAZ zones and counts the number of stops :return: """ crs = {"init": "espg:4326"} zones = gpd.read_file(zone_shape_path, crs=crs) for (name, gtfs_dict) in FEED_LIST: gtfs = GTFS(gtfs_dict["gtfs_dir"]) df = gtfs.stops() geometry = [Point(xy) for xy in zip(df.lon, df.lat)] df = df.drop(["lon", "lat"], axis=1) gdf = gpd.GeoDataFrame(df, crs=crs, geometry=geometry) zones_and_stops = gpd.sjoin(gdf, zones, how="inner", op='intersects') try: gtfs.execute_custom_query( """ALTER TABLE stops ADD COLUMN n_stops INT;""") gtfs.execute_custom_query( """ALTER TABLE stops ADD COLUMN zone_id INT;""") except OperationalError: pass subset = zones_and_stops[['WSP_ENN', 'stop_I']] tuples = [tuple(x) for x in subset.values] gtfs.conn.executemany( """UPDATE stops SET zone_id = ? WHERE stop_I = ?""", tuples) gtfs.conn.commit()
def analysis_zones(as_dict=False): """ returns data containers that pair zone type to a set of stops :param as_dict: :return: """ gtfs_old = GTFS(OLD_DICT["gtfs_dir"]) gtfs_lm = GTFS(LM_DICT["gtfs_dir"]) station_distance = 600 upstream_ratio = 0.5 df_old = gtfs_old.get_stops_for_route_type(1) df_lm = gtfs_lm.get_stops_for_route_type(1) new_metro = difference_of_pandas_dfs(df_old, df_lm, ["stop_I"]) old_metro = difference_of_pandas_dfs(new_metro, df_lm, ["stop_I"]) train = gtfs_lm.get_stops_for_route_type(2) feeder_area = pd.DataFrame() other_stops = gtfs_lm.stops() jda = JourneyDataAnalyzer(LM_DICT["journey_dir"], LM_DICT["gtfs_dir"]) # jda = JourneyDataAnalyzer(OLD_DICT["journey_dir"], OLD_DICT["gtfs_dir"]) areas_to_remove = stops_to_exclude(return_sqlite_list=False) df = jda.get_upstream_stops_ratio( 1040, [str(i.stop_I) for i in new_metro.itertuples()], upstream_ratio) feeder_area = feeder_area.append(df) # df = jda.get_upstream_stops_ratio(7193, 563, 0.7) print("new metro") for i in new_metro.itertuples(): df = gtfs_lm.get_stops_within_distance(i.stop_I, station_distance) new_metro = new_metro.append(df) print("old metro") for i in old_metro.itertuples(): df = gtfs_lm.get_stops_within_distance(i.stop_I, station_distance) old_metro = old_metro.append(df) print("train") for i in train.itertuples(): df = gtfs_lm.get_stops_within_distance(i.stop_I, station_distance) train = train.append(df) new_metro = new_metro.drop_duplicates().reset_index(drop=True) old_metro = old_metro.drop_duplicates().reset_index(drop=True) train = train.drop_duplicates().reset_index(drop=True) feeder_area = feeder_area.drop_duplicates().reset_index(drop=True) # cleaning up borders new_metro = difference_of_pandas_dfs(old_metro, new_metro, ["stop_I"]) for zone in [new_metro, old_metro, areas_to_remove]: train = difference_of_pandas_dfs(zone, train, ["stop_I"]) for zone in [new_metro, train, old_metro, areas_to_remove]: feeder_area = difference_of_pandas_dfs(zone, feeder_area, ["stop_I"]) spec_areas = pd.concat( [new_metro, old_metro, train, feeder_area, areas_to_remove]) other_stops = difference_of_pandas_dfs(spec_areas, other_stops, ["stop_I"]) old_metro = old_metro.assign(stop_cat=1) new_metro = new_metro.assign(stop_cat=2) train = train.assign(stop_cat=3) feeder_area = feeder_area.assign(stop_cat=4) other_stops = other_stops.assign(stop_cat=5) all_stops = pd.concat( [new_metro, old_metro, train, feeder_area, other_stops]).reset_index(drop=True) if as_dict: all_dfs = { "new_metro_stations": new_metro, "feeder_bus_area": feeder_area, "old_metro_stations": old_metro, "commuter_train_stations": train, "other_stops": other_stops } else: all_dfs = [("new_metro_stations", new_metro), ("feeder_bus_area", feeder_area), ("old_metro_stations", old_metro), ("commuter_train_stations", train), ("other_stops", other_stops)] return all_dfs, all_stops
license = license.replace("CC BY0", "CC0") license = license.replace("_", " ") city_data_dict["License"] = license feeds = get_feeds_from_to_publish_tuple(city_data) pipeline = ExtractPipeline(city_data, feeds) try: day_G = GTFS(pipeline.day_db_path) trip_counts_per_day = day_G.get_trip_counts_per_day() print(trip_counts_per_day) assert len(trip_counts_per_day) <= 3 city_data_dict["Extract date"] = str(trip_counts_per_day.loc[ trip_counts_per_day['trip_counts'] == max( trip_counts_per_day['trip_counts'])].iloc[0]['date']) print(city_data_dict["Extract date"].replace(" 00:00:00", "")) city_data_dict["n_stops"] = len( day_G.stops(require_reference_in_stop_times=True)) city_data_dict["n_connections"] = len(day_G.get_transit_events()) n_links = len( combined_stop_to_stop_transit_network(day_G).edges(data=True)) city_data_dict["n_links"] = int(n_links) except FileNotFoundError as e: print("File " + pipeline.day_db_path + " was not found") city_data_dict["Extract date"] = "NaN" cities.append(city_data_dict) pickle.dump(cities, open(pickle_cache_file, 'wb'), -1) def spaces(x): try: num_as_str_reversed = str(int(x))[::-1] num_with_spaces = ',\\'.join(
class TimetableValidator(object): def __init__(self, gtfs, buffer_params=None): """ Parameters ---------- gtfs: GTFS, or path to a GTFS object A GTFS object """ if not isinstance(gtfs, GTFS): self.gtfs = GTFS(gtfs) else: self.gtfs = gtfs self.buffer_params = buffer_params self.warnings_container = WarningsContainer() def validate_and_get_warnings(self): """ Validates/checks a given GTFS feed with respect to a number of different issues. The set of warnings that are checked for, can be found in the gtfs_validator.ALL_WARNINGS Returns ------- warnings: WarningsContainer """ self.warnings_container.clear() self._validate_stops_with_same_stop_time() self._validate_speeds_and_trip_times() self._validate_stop_spacings() self._validate_stop_sequence() self._validate_misplaced_stops() return self.warnings_container def _validate_misplaced_stops(self): if self.buffer_params: p = self.buffer_params center_lat = p['lat'] center_lon = p['lon'] buffer_distance = p[ 'buffer_distance'] * 1000 * 1.002 # some error margin for rounding for stop_row in self.gtfs.stops().itertuples(): if buffer_distance < wgs84_distance( center_lat, center_lon, stop_row.lat, stop_row.lon): self.warnings_container.add_warning( WARNING_STOP_FAR_AWAY_FROM_FILTER_BOUNDARY, stop_row) print(WARNING_STOP_FAR_AWAY_FROM_FILTER_BOUNDARY, stop_row) def _validate_stops_with_same_stop_time(self): n_stops_with_same_time = 5 # this query returns the trips where there are N or more stops with the same stop time rows = self.gtfs.get_cursor().execute( 'SELECT ' 'trip_I, ' 'arr_time, ' 'N ' 'FROM ' '(SELECT trip_I, arr_time, count(*) as N FROM stop_times GROUP BY trip_I, arr_time) q1 ' 'WHERE N >= ?', (n_stops_with_same_time, )) for row in rows: self.warnings_container.add_warning( WARNING_5_OR_MORE_CONSECUTIVE_STOPS_WITH_SAME_TIME, row) def _validate_stop_spacings(self): self.gtfs.conn.create_function("find_distance", 4, wgs84_distance) # this query calculates distance and travel time between consecutive stops rows = self.gtfs.execute_custom_query( 'SELECT ' 'q1.trip_I, ' 'type, ' 'q1.stop_I as stop_1, ' 'q2.stop_I as stop_2, ' 'CAST(find_distance(q1.lat, q1.lon, q2.lat, q2.lon) AS INT) as distance, ' 'q2.arr_time_ds - q1.arr_time_ds as traveltime ' 'FROM ' '(SELECT * FROM stop_times, stops WHERE stop_times.stop_I = stops.stop_I) q1, ' '(SELECT * FROM stop_times, stops WHERE stop_times.stop_I = stops.stop_I) q2, ' 'trips, ' 'routes ' 'WHERE q1.trip_I = q2.trip_I ' 'AND q1.seq + 1 = q2.seq ' 'AND q1.trip_I = trips.trip_I ' 'AND trips.route_I = routes.route_I ').fetchall() for row in rows: if row[4] > MAX_ALLOWED_DISTANCE_BETWEEN_CONSECUTIVE_STOPS: self.warnings_container.add_warning(WARNING_LONG_STOP_SPACING, row) if row[5] > MAX_TIME_BETWEEN_STOPS: self.warnings_container.add_warning( WARNING_LONG_TRAVEL_TIME_BETWEEN_STOPS, row) def _validate_speeds_and_trip_times(self): # These are the mode - feasible speed combinations used here: # https://support.google.com/transitpartners/answer/1095482?hl=en self.gtfs.conn.create_function("find_distance", 4, wgs84_distance) # this query returns the total distance and travel time for each trip calculated for each stop spacing separately rows = pandas.read_sql( 'SELECT ' 'q1.trip_I, ' 'type, ' 'sum(CAST(find_distance(q1.lat, q1.lon, q2.lat, q2.lon) AS INT)) AS total_distance, ' # sum used for getting total 'sum(q2.arr_time_ds - q1.arr_time_ds) AS total_traveltime, ' # sum used for getting total 'count(*)' # for getting the total number of stops 'FROM ' ' (SELECT * FROM stop_times, stops WHERE stop_times.stop_I = stops.stop_I) q1, ' ' (SELECT * FROM stop_times, stops WHERE stop_times.stop_I = stops.stop_I) q2, ' ' trips, ' ' routes ' 'WHERE q1.trip_I = q2.trip_I AND q1.seq + 1 = q2.seq AND q1.trip_I = trips.trip_I ' 'AND trips.route_I = routes.route_I GROUP BY q1.trip_I', self.gtfs.conn) for row in rows.itertuples(): avg_velocity_km_per_h = row.total_distance / max( row.total_traveltime, 1) * 3.6 if avg_velocity_km_per_h > GTFS_TYPE_TO_MAX_SPEED[row.type]: self.warnings_container.add_warning( WARNING_TRIP_UNREALISTIC_AVERAGE_SPEED + " (route_type=" + str(row.type) + ")", row) if row.total_traveltime > MAX_TRIP_TIME: self.warnings_container.add_warning( WARNING_LONG_TRIP_TIME.format(MAX_TRIP_TIME=MAX_TRIP_TIME), row, 1) def _validate_stop_sequence(self): # This function checks if the seq values in stop_times are increasing with departure_time, # and that seq always increases by one. rows = self.gtfs.execute_custom_query( 'SELECT trip_I, dep_time_ds, seq ' 'FROM stop_times ' 'ORDER BY trip_I, dep_time_ds, seq').fetchall() old_trip_id = None old_seq = None for row in rows: new_trip_id = int(row[0]) new_seq = int(row[2]) if old_trip_id == new_trip_id: if old_seq + 1 != new_seq: self.warnings_container.add_warning( WARNING_STOP_SEQUENCE_NOT_INCREMENTAL, row) if old_seq >= new_seq: self.warnings_container.add_warning( WARNING_STOP_SEQUENCE_ORDER_ERROR, row) old_trip_id = row[0] old_seq = row[2]
class TestGTFSFilter(unittest.TestCase): def setUp(self): self.gtfs_source_dir = os.path.join(os.path.dirname(__file__), "test_data") self.gtfs_source_dir_filter_test = os.path.join( self.gtfs_source_dir, "filter_test_feed/") # self.G = GTFS.from_directory_as_inmemory_db(self.gtfs_source_dir) # some preparations: self.fname = self.gtfs_source_dir + "/test_gtfs.sqlite" self.fname_copy = self.gtfs_source_dir + "/test_gtfs_copy.sqlite" self.fname_filter = self.gtfs_source_dir + "/test_gtfs_filter_test.sqlite" self._remove_temporary_files() self.assertFalse(os.path.exists(self.fname_copy)) conn = sqlite3.connect(self.fname) import_gtfs(self.gtfs_source_dir, conn, preserve_connection=True, print_progress=False) conn_filter = sqlite3.connect(self.fname_filter) import_gtfs(self.gtfs_source_dir_filter_test, conn_filter, preserve_connection=True, print_progress=False) self.G = GTFS(conn) self.G_filter_test = GTFS(conn_filter) self.hash_orig = hashlib.md5(open(self.fname, 'rb').read()).hexdigest() def _remove_temporary_files(self): for fn in [self.fname, self.fname_copy, self.fname_filter]: if os.path.exists(fn) and os.path.isfile(fn): os.remove(fn) def tearDown(self): self._remove_temporary_files() def test_copy(self): # do a simple copy FilterExtract(self.G, self.fname_copy, update_metadata=False).create_filtered_copy() # check that the copying has been properly performed: hash_copy = hashlib.md5(open(self.fname_copy, 'rb').read()).hexdigest() self.assertTrue(os.path.exists(self.fname_copy)) self.assertEqual(self.hash_orig, hash_copy) def test_filter_change_metadata(self): # A simple test that changing update_metadata to True, does update some stuff: FilterExtract(self.G, self.fname_copy, update_metadata=True).create_filtered_copy() # check that the copying has been properly performed: hash_orig = hashlib.md5(open(self.fname, 'rb').read()).hexdigest() hash_copy = hashlib.md5(open(self.fname_copy, 'rb').read()).hexdigest() self.assertTrue(os.path.exists(self.fname_copy)) self.assertNotEqual(hash_orig, hash_copy) os.remove(self.fname_copy) def test_filter_by_agency(self): FilterExtract(self.G, self.fname_copy, agency_ids_to_preserve=['DTA']).create_filtered_copy() hash_copy = hashlib.md5(open(self.fname_copy, 'rb').read()).hexdigest() self.assertNotEqual(self.hash_orig, hash_copy) G_copy = GTFS(self.fname_copy) agency_table = G_copy.get_table("agencies") assert "EXA" not in agency_table[ 'agency_id'].values, "EXA agency should not be preserved" assert "DTA" in agency_table[ 'agency_id'].values, "DTA agency should be preserved" routes_table = G_copy.get_table("routes") assert "EXR1" not in routes_table[ 'route_id'].values, "EXR1 route_id should not be preserved" assert "AB" in routes_table[ 'route_id'].values, "AB route_id should be preserved" trips_table = G_copy.get_table("trips") assert "EXT1" not in trips_table[ 'trip_id'].values, "EXR1 route_id should not be preserved" assert "AB1" in trips_table[ 'trip_id'].values, "AB1 route_id should be preserved" calendar_table = G_copy.get_table("calendar") assert "FULLW" in calendar_table[ 'service_id'].values, "FULLW service_id should be preserved" # stop_times stop_times_table = G_copy.get_table("stop_times") # 01:23:45 corresponds to 3600 + (32 * 60) + 45 [in day seconds] assert 3600 + (32 * 60) + 45 not in stop_times_table['arr_time'] os.remove(self.fname_copy) def test_filter_by_start_and_end_full_range(self): # untested tables with filtering: stops, shapes # test filtering by start and end time, copy full range FilterExtract(self.G, self.fname_copy, start_date=u"2007-01-01", end_date=u"2011-01-01", update_metadata=False).create_filtered_copy() G_copy = GTFS(self.fname_copy) dsut_end = G_copy.get_day_start_ut("2010-12-31") dsut_to_trip_I = G_copy.get_tripIs_within_range_by_dsut( dsut_end, dsut_end + 24 * 3600) self.assertGreater(len(dsut_to_trip_I), 0) os.remove(self.fname_copy) def test_filter_end_date_not_included(self): # the end date should not be included: FilterExtract(self.G, self.fname_copy, start_date="2007-01-02", end_date="2010-12-31").create_filtered_copy() hash_copy = hashlib.md5(open(self.fname_copy, 'rb').read()).hexdigest() self.assertNotEqual(self.hash_orig, hash_copy) G_copy = GTFS(self.fname_copy) dsut_end = G_copy.get_day_start_ut("2010-12-31") dsut_to_trip_I = G_copy.get_tripIs_within_range_by_dsut( dsut_end, dsut_end + 24 * 3600) self.assertEqual(len(dsut_to_trip_I), 0) calendar_copy = G_copy.get_table("calendar") max_date_calendar = max([ datetime.datetime.strptime(el, "%Y-%m-%d") for el in calendar_copy["end_date"].values ]) min_date_calendar = max([ datetime.datetime.strptime(el, "%Y-%m-%d") for el in calendar_copy["start_date"].values ]) end_date_not_included = datetime.datetime.strptime( "2010-12-31", "%Y-%m-%d") start_date_not_included = datetime.datetime.strptime( "2007-01-01", "%Y-%m-%d") self.assertLess(max_date_calendar, end_date_not_included, msg="the last date should not be included in calendar") self.assertLess(start_date_not_included, min_date_calendar) os.remove(self.fname_copy) def test_filter_spatially(self): # test that the db is split by a given spatial boundary FilterExtract(self.G, self.fname_copy, buffer_lat=36.914893, buffer_lon=-116.76821, buffer_distance_km=50).create_filtered_copy() G_copy = GTFS(self.fname_copy) stops_table = G_copy.get_table("stops") self.assertNotIn("FUR_CREEK_RES", stops_table['stop_id'].values) self.assertIn("AMV", stops_table['stop_id'].values) self.assertEqual(len(stops_table['stop_id'].values), 8) conn_copy = sqlite3.connect(self.fname_copy) stop_ids_df = pandas.read_sql( 'SELECT stop_id from stop_times ' 'left join stops ' 'on stops.stop_I = stop_times.stop_I', conn_copy) stop_ids = stop_ids_df["stop_id"].values self.assertNotIn("FUR_CREEK_RES", stop_ids) self.assertIn("AMV", stop_ids) trips_table = G_copy.get_table("trips") self.assertNotIn("BFC1", trips_table['trip_id'].values) routes_table = G_copy.get_table("routes") self.assertNotIn("BFC", routes_table['route_id'].values) # cases: # whole trip excluded # whole route excluded # whole agency excluded # part of trip excluded # part of route excluded # part of agency excluded # not removing stops from a trip that returns into area # test higher-order removals # stop A preserved # -> stop B preserved # -> stop C preserved def test_filter_spatially_2(self): n_rows_before = { "routes": 4, "stop_times": 14, "trips": 4, "stops": 6, "shapes": 4 } n_rows_after_1000 = { # within "soft buffer" in the feed data "routes": 1, "stop_times": 2, "trips": 1, "stops": 2, "shapes": 0 } n_rows_after_3000 = { # within "hard buffer" in the feed data "routes": len(["t1", "t3", "t4"]), "stop_times": 11, "trips": 4, "stops": len({"P", "H", "V", "L", "B"}), # for some reason, the first "shapes": 4 } paris_lat = 48.832781 paris_lon = 2.360734 SELECT_MIN_MAX_SHAPE_BREAKS_BY_TRIP_I_SQL = \ "SELECT trips.trip_I, shape_id, min(shape_break) as min_shape_break, max(shape_break) as max_shape_break FROM trips, stop_times WHERE trips.trip_I=stop_times.trip_I GROUP BY trips.trip_I" trip_min_max_shape_seqs = pandas.read_sql( SELECT_MIN_MAX_SHAPE_BREAKS_BY_TRIP_I_SQL, self.G_filter_test.conn) for distance_km, n_rows_after in zip( [1000, 3000], [n_rows_after_1000, n_rows_after_3000]): try: os.remove(self.fname_copy) except FileNotFoundError: pass FilterExtract( self.G_filter_test, self.fname_copy, buffer_lat=paris_lat, buffer_lon=paris_lon, buffer_distance_km=distance_km).create_filtered_copy() for table_name, n_rows in n_rows_before.items(): self.assertEqual( len(self.G_filter_test.get_table(table_name)), n_rows, "Row counts before differ in " + table_name + ", distance: " + str(distance_km)) G_copy = GTFS(self.fname_copy) for table_name, n_rows in n_rows_after.items(): table = G_copy.get_table(table_name) self.assertEqual( len(table), n_rows, "Row counts after differ in " + table_name + ", distance: " + str(distance_km) + "\n" + str(table)) # assert that stop_times are resequenced starting from one counts = pandas.read_sql( "SELECT count(*) FROM stop_times GROUP BY trip_I ORDER BY trip_I", G_copy.conn) max_values = pandas.read_sql( "SELECT max(seq) FROM stop_times GROUP BY trip_I ORDER BY trip_I", G_copy.conn) self.assertTrue((counts.values == max_values.values).all()) def test_remove_all_trips_fully_outside_buffer(self): stops = self.G.stops() stop_1 = stops[stops['stop_I'] == 1] n_trips_before = len(self.G.get_table("trips")) remove_all_trips_fully_outside_buffer(self.G.conn, float(stop_1.lat), float(stop_1.lon), 100000) self.assertEqual(len(self.G.get_table("trips")), n_trips_before) # 0.002 (=max 2 meters from the stop), rounding errors can take place... remove_all_trips_fully_outside_buffer(self.G.conn, float(stop_1.lat), float(stop_1.lon), 0.002) self.assertEqual(len(self.G.get_table("trips")), 2) # value "2" comes from the data
class TimetableValidator(object): def __init__(self, gtfs, buffer_params=None): """ Parameters ---------- gtfs: GTFS, or path to a GTFS object A GTFS object """ if not isinstance(gtfs, GTFS): self.gtfs = GTFS(gtfs) else: self.gtfs = gtfs self.buffer_params = buffer_params self.warnings_container = WarningsContainer() def get_warnings(self): """ Validates/checks a given GTFS feed with respect to a number of different issues. The set of warnings that are checked for, can be found in the gtfs_validator.ALL_WARNINGS Returns ------- warnings: WarningsContainer """ self.warnings_container.clear() self._validate_stops_with_same_stop_time() self._validate_speeds_and_trip_times() self._validate_stop_spacings() self._validate_stop_sequence() self._validate_misplaced_stops() self.warnings_container.print_summary() return self.warnings_container def _validate_misplaced_stops(self): if self.buffer_params: p = self.buffer_params center_lat = p['lat'] center_lon = p['lon'] distance = p['buffer_distance'] * 2 * 1000 count = 0 for stop_row in self.gtfs.stops().itertuples(): if distance < wgs84_distance(center_lat, center_lon, stop_row.lat, stop_row.lon): self.warnings_container.add_warning( stop_row, WARNING_STOP_FAR_AWAY_FROM_FILTER_BOUNDARY) print(WARNING_STOP_FAR_AWAY_FROM_FILTER_BOUNDARY, stop_row) def _validate_stops_with_same_stop_time(self): n_stops_with_same_time = 5 # this query returns the trips where there are N or more stops with the same stop time rows = self.gtfs.get_cursor().execute( 'SELECT ' 'trip_I, ' 'arr_time, ' 'N ' 'FROM ' '(SELECT trip_I, arr_time, count(*) as N FROM stop_times GROUP BY trip_I, arr_time) q1 ' 'WHERE N >= ?', (n_stops_with_same_time, )) for row in rows: self.warnings_container.add_warning( row, WARNING_5_OR_MORE_CONSECUTIVE_STOPS_WITH_SAME_TIME) def _validate_stop_spacings(self): self.gtfs.conn.create_function("find_distance", 4, wgs84_distance) max_stop_spacing = 20000 # meters max_time_between_stops = 1800 # seconds # this query calculates distance and travel time between consecutive stops rows = self.gtfs.execute_custom_query( 'SELECT ' 'q1.trip_I, ' 'type, ' 'q1.stop_I as stop_1, ' 'q2.stop_I as stop_2, ' 'CAST(find_distance(q1.lat, q1.lon, q2.lat, q2.lon) AS INT) as distance, ' 'q2.arr_time_ds - q1.arr_time_ds as traveltime ' 'FROM ' '(SELECT * FROM stop_times, stops WHERE stop_times.stop_I = stops.stop_I) q1, ' '(SELECT * FROM stop_times, stops WHERE stop_times.stop_I = stops.stop_I) q2, ' 'trips, ' 'routes ' 'WHERE q1.trip_I = q2.trip_I ' 'AND q1.seq + 1 = q2.seq ' 'AND q1.trip_I = trips.trip_I ' 'AND trips.route_I = routes.route_I ').fetchall() for row in rows: if row[4] > max_stop_spacing: self.warnings_container.add_warning(row, WARNING_LONG_STOP_SPACING) if row[5] > max_time_between_stops: self.warnings_container.add_warning( row, WARNING_LONG_TRAVEL_TIME_BETWEEN_STOPS) def _validate_speeds_and_trip_times(self): # These are the mode - feasible speed combinations used here: # https://support.google.com/transitpartners/answer/1095482?hl=en gtfs_type_to_max_speed = { route_types.TRAM: 100, route_types.SUBWAY: 150, route_types.RAIL: 300, route_types.BUS: 100, route_types.FERRY: 80, route_types.CABLE_CAR: 50, route_types.GONDOLA: 50, route_types.FUNICULAR: 50, route_types.AIRCRAFT: 1000 } max_trip_time = 7200 # seconds self.gtfs.conn.create_function("find_distance", 4, wgs84_distance) # this query returns the total distance and travel time for each trip calculated for each stop spacing separately rows = self.gtfs.execute_custom_query( 'SELECT ' ' q1.trip_I, ' ' type, ' ' sum(CAST(find_distance(q1.lat, q1.lon, q2.lat, q2.lon) AS INT)) AS total_distance, ' ' sum(q2.arr_time_ds - q1.arr_time_ds) AS total_traveltime ' ' FROM ' '(SELECT * FROM stop_times, ' 'stops WHERE stop_times.stop_I = stops.stop_I) q1, ' '(SELECT * FROM stop_times, ' 'stops WHERE stop_times.stop_I = stops.stop_I) q2, trips, routes WHERE q1.trip_I = q2.trip_I ' 'AND q1.seq + 1 = q2.seq AND q1.trip_I = trips.trip_I ' ' AND trips.route_I = routes.route_I GROUP BY q1.trip_I' ).fetchall() for row in rows: avg_velocity = row[2] / max(row[3], 1) * 3.6 if avg_velocity > gtfs_type_to_max_speed[row[1]]: self.warnings_container.add_warning( row, WARNING_UNREALISTIC_AVERAGE_SPEED) if row[3] > max_trip_time: self.warnings_container.add_warning(row, WARNING_LONG_TRIP_TIME) def _validate_stop_sequence(self): # this function checks if the stop sequence value is changing with +1 for each stop. This is not (yet) enforced rows = self.gtfs.execute_custom_query( 'SELECT trip_I, dep_time_ds, seq ' 'FROM stop_times ' 'ORDER BY trip_I, dep_time_ds, seq').fetchall() old_trip_id = None for row in rows: new_trip_id = row[0] new_seq = row[2] if old_trip_id == new_trip_id: if old_seq + 1 != new_seq: self.warnings_container.add_warning( row, WARNING_STOP_SEQUENCE_ERROR) old_trip_id = row[0] old_seq = row[2]
from settings import FIGS_DIRECTORY import settings from matplotlib import rc rc('legend', framealpha=0.8) rc("text", usetex=True) fname = "../data/main.sqlite" # A database imported using gtfspy g = GTFS(fname) plt.subplots_adjust(left=0, right=1, top=1, bottom=0) fig = plt.figure(figsize=(5,3.5)) ax = fig.add_subplot(111) plt.subplots_adjust(left=0, right=1, top=1, bottom=0) stops = g.stops() lats = stops["lat"].values lons = stops["lon"].values ROUTE_TYPE_TO_ZORDER[1] = 10 # set subway on top spatial_bounds = { "lat_min": numpy.percentile(lats, 2), "lat_max": numpy.percentile(lats, 94), "lon_min": numpy.percentile(lons, 5), "lon_max": numpy.percentile(lons, 95) } ax, smopy_map = plot_route_network(g, ax, spatial_bounds=spatial_bounds, map_alpha=0.8, scalebar=True, return_smopy_map=True) stop_lats = []
class GenericJourneyDataPipeline: def __init__(self): self.G = GTFS(GTFS_DATA_BASEDIR) self.day_start_ut = self.G.get_suitable_date_for_daily_extract( ut=True) + 3600 self.start_time = self.day_start_ut + 8 * 3600 self.end_time = self.day_start_ut + 11 * 3600 self.profiles = {} self.journey_analyzer = None # self.analysis_start_time # self.analysis_end_time makedirs(RESULTS_DIRECTORY) print("Retrieving transit events") self.connections = [] for e in self.G.generate_routable_transit_events( start_time_ut=self.start_time, end_time_ut=self.end_time): self.connections.append( Connection(int(e.from_stop_I), int(e.to_stop_I), int(e.dep_time_ut), int(e.arr_time_ut), int(e.trip_I))) print("Retrieving walking network") self.net = self.G.get_walk_transfer_stop_to_stop_network() def script(self): self.get_profile_data() journey_analyzer = JourneyDataManager(TARGET_STOPS, JOURNEY_DATA_DIR, GTFS_DATA_BASEDIR, ROUTING_PARAMS, track_route=True, close_connection=False) journey_analyzer.import_journey_data_for_target_stop(self.profiles) journey_analyzer.create_indices() if False: journey_analyzer.add_fastest_path_column() """ all_geoms = journey_analyzer.get_all_geoms() journey_path = os.path.join(RESULTS_DIRECTORY, "all_routes_to_" + target_list_to_str(TARGET_STOPS) + ".geojson") with open(journey_path, 'w') as f: dump(journey_analyzer.extract_geojson(all_geoms), f) """ def get_profile_data(self, targets=TARGET_STOPS, recompute=False): node_profiles_fname = os.path.join( RESULTS_DIRECTORY, "node_profile_" + target_list_to_str(targets) + ".pickle") if not recompute and os.path.exists(node_profiles_fname): print("Loading precomputed data") self.profiles = pickle.load(open(node_profiles_fname, 'rb')) print("Loaded precomputed data") else: print("Recomputing profiles") self._compute_profile_data() pickle.dump(self.profiles, open(node_profiles_fname, 'wb'), -1) print("Recomputing profiles") def _compute_profile_data(self): csp = MultiObjectivePseudoCSAProfiler(self.connections, TARGET_STOPS, walk_network=self.net, transfer_margin=TRANSFER_MARGIN, walk_speed=WALK_SPEED, verbose=True, track_vehicle_legs=False, track_time=True, track_route=True) print("CSA Profiler running...") csp.run() print("CSA profiler finished") self.profiles = dict(csp.stop_profiles) def key_measures_as_csv(self, csv_path="stop_data.csv"): """ Combines key temporal distance measures for each node with stop data from gtfs and stores in csv format :return: """ node_profiles_list = [] # iterate through all node profiles and add the NodeProfileAnalyzer data to a list of dicts for node, profile in self.profiles.items(): npa = NodeProfileAnalyzerTimeAndVehLegs.from_profile( profile, self.start_time, self.end_time) node_profile_dict = npa.get_node_profile_measures_as_dict() node_profile_dict["node"] = node node_profiles_list.append(node_profile_dict) node_profiles = DataFrame(node_profiles_list) stops = self.G.stops() stops.join(node_profiles.set_index("node"), on='stop_I').to_csv(path_or_buf=csv_path)