def reset_feed_dates(daterange): if not gtfs.has_table('feed_info'): return gtfs_daterange = GTFSDateRange(daterange['start'], daterange['end']) feed_info = gtfs.get_table('feed_info') feed_info['feed_start_date'] = gtfs_daterange.start.datestring() feed_info['feed_end_date'] = gtfs_daterange.end.datestring() gtfs.update_table('feed_info', feed_info)
def remove_trips_with_nonexistent_calendars(): calendar = gtfs.get_table('calendar', index=False) trips = gtfs.get_table('trips') trips_filtered = trips[trips['service_id'].isin(calendar['service_id'])] if (gtfs.has_table('frequencies')): frequencies = gtfs.get_table('frequencies') frequencies_filtered = frequencies[frequencies['trip_id'].isin( trips_filtered.index.to_series())] gtfs.update_table('frequencies', frequencies_filtered) gtfs.update_table('trips', trips_filtered)
def filter_board_alight_by_daterange(daterange): if not gtfs.has_table('board_alight'): return board_alight = gtfs.get_table('board_alight', index=False) if 'service_date' not in board_alight.columns: return filter_daterange = GTFSDateRange(daterange['start'], daterange['end']) board_alight['_inrange'] = board_alight.apply( lambda row: filter_daterange.includes(row['service_date']), axis=1) board_alight_filtered = board_alight[board_alight['_inrange']] gtfs.update_table('board_alight', board_alight_filtered)
def filter_trips_by_date(date): # removes trips that do not occur on specified date # TODO consider replacing with filter_calendars_by_date, prune trips_extended = get_trips_extended() dow = GTFSDate(date).dow() date_in_range = (trips_extended['start_date'] <= date) & (date <= trips_extended['end_date']) dow_in_service = trips_extended[dow] == GTFSBool.TRUE trips_filter = date_in_range & dow_in_service # filter calendar_dates for relevant calendar exceptions if gtfs.has_table('calendar_dates'): calendar_dates = gtfs.get_table('calendar_dates') added_on_date = (calendar_dates['date'] == date) & ( calendar_dates['exception_type'] == GTFSExceptionType.ADDED) services_added_on_date = calendar_dates[added_on_date]['service_id'] removed_on_date = (calendar_dates['date'] == date) & ( calendar_dates['exception_type'] == GTFSExceptionType.REMOVED) services_removed_on_date = calendar_dates[removed_on_date][ 'service_id'] service_added_on_date = trips_extended['service_id'].isin( services_added_on_date) service_removed_on_date = trips_extended['service_id'].isin( services_removed_on_date) if gtfs.has_table('calendar'): trips_filter = (date_in_range & dow_in_service & ~service_removed_on_date) | service_added_on_date else: trips_filter = service_added_on_date trips_filtered_df = trips_extended[trips_filter] gtfs.update_table('trips', trips_filtered_df)
def filter_calendar_dates_by_daterange(daterange): if not gtfs.has_table('calendar_dates'): return calendar_dates = gtfs.get_table('calendar_dates') filter_daterange = GTFSDateRange(daterange['start'], daterange['end']) calendar_dates['_gtfs_date'] = calendar_dates.apply( lambda row: GTFSDate(row['date']), axis=1) calendar_dates['_inrange'] = calendar_dates.apply( lambda row: filter_daterange.includes(row['date']), axis=1) calendar_dates_filtered = calendar_dates[calendar_dates['_inrange']] gtfs.update_table('calendar_dates', calendar_dates_filtered)
def filter_calendars_by_daterange(daterange): calendar = gtfs.get_table('calendar') filter_daterange = GTFSDateRange(daterange['start'], daterange['end']) calendar['_gtfs_daterange'] = calendar.apply( lambda row: GTFSDateRange(row['start_date'], row['end_date']), axis=1) calendar['_overlap'] = calendar['_gtfs_daterange'].apply(lambda dr: \ filter_daterange.get_overlap(dr) \ ) # we want to remove calendar entries that don't overlap DOWs calendar['_dows_overlap'] = calendar.apply(lambda row: \ GTFSBool.TRUE in (row[dow] for dow in filter_daterange.days_of_week()), axis=1 ) # we want to keep calendar entries that are used in overlapping exceptions if gtfs.has_table('calendar_dates'): calendar_dates = gtfs.get_table('calendar_dates') calendar_dates['_date_overlap'] = calendar_dates.apply( lambda row: filter_daterange.includes(row['date']), axis=1) calendar_dates = calendar_dates[calendar_dates['_date_overlap']] calendar['_exception_overlap'] = calendar.index.to_series().isin( calendar_dates['service_id']) else: calendar['_exception_overlap'] = False calendar = calendar[(calendar['_overlap'].notnull() & calendar['_dows_overlap']) | calendar['_exception_overlap']] # trim bounds to fit within daterange calendar['start_date'] = calendar['_overlap'].apply( lambda dr: dr.start.datestring()) calendar['end_date'] = calendar['_overlap'].apply( lambda dr: dr.end.datestring()) gtfs.update_table('calendar', calendar)
def get_feed_start_end_daterange(): if not gtfs.has_table('feed_info'): return None feed_info = gtfs.get_table('feed_info') return GTFSDateRange(feed_info.loc[0, 'feed_start_date'], feed_info.loc[0, 'feed_end_date'])
def interpolate_stop_times(): # returns false if interpolation not possible stop_times = gtfs.get_table('stop_times') shapes = gtfs.get_table('shapes') no_shape_dist_traveled = 'shape_dist_traveled' not in stop_times.columns \ or stop_times['shape_dist_traveled'].isna().all() no_shapes_txt = not gtfs.has_table('shapes') or shapes.empty if (no_shape_dist_traveled or no_shapes_txt): return False # build table with chunk information df = stop_times.copy() df['has_arrival'] = df['arrival_time'].notna() df['has_departure'] = df['departure_time'].notna() df = df[df['has_arrival'] | df['has_departure']] timepoints_only = df[df['has_arrival'] | df['has_departure']] # https://stackoverflow.com/questions/50411098/how-to-do-forward-rolling-sum-in-pandas df['next_stop_sequence'] = timepoints_only.sort_values(by=['trip_id', 'stop_sequence']) \ .iloc[::-1] \ .groupby('trip_id')['stop_sequence'].transform(lambda x: x.rolling(2).max()) \ .iloc[::-1] \ # cleanup df['next_stop_sequence'] = df['next_stop_sequence'].fillna(df['stop_sequence']).astype('int64') df['stop_sequence_list'] = df.apply(lambda row: \ list(range(row['stop_sequence'], row['next_stop_sequence']) \ if row['stop_sequence'] != row['next_stop_sequence'] \ else [row['stop_sequence']] \ ), axis=1) df = df.explode('stop_sequence_list') df = df.rename(columns={'stop_sequence': 'start_seq', 'next_stop_sequence': 'end_seq', 'stop_sequence_list': 'stop_sequence'}) chunks = df.set_index(['trip_id', 'stop_sequence']) \ [['start_seq', 'end_seq']] stop_times = stop_times.set_index(['trip_id', 'stop_sequence']) stop_times = stop_times.merge(chunks, \ how='left', right_index=True, left_index=True, ) start_time = stop_times['departure_time'].rename('start_time') end_time = stop_times['arrival_time'].rename('end_time') start_sdt = stop_times['shape_dist_traveled'].rename('start_sdt') end_sdt = stop_times['shape_dist_traveled'].rename('end_sdt') stop_times = stop_times.merge(start_time, \ left_on=['trip_id', 'start_seq'], right_index=True ) stop_times = stop_times.merge(end_time, \ left_on=['trip_id', 'end_seq'], right_index=True ) stop_times = stop_times.merge(start_sdt, \ left_on=['trip_id', 'start_seq'], right_index=True ) stop_times = stop_times.merge(end_sdt, \ left_on=['trip_id', 'end_seq'], right_index=True ) def interpolate_row(row): # happens if last stop or on 1-stop chunks (consecutive timepoints) if (row['start_time'] == row['end_time']): return row['start_time'] return seconds_to_military( \ seconds_since_zero(row['start_time']) + \ int(round( \ ( \ (row['shape_dist_traveled'] - row['start_sdt']) / (row['end_sdt'] - row['start_sdt']) \ ) * ( \ seconds_since_zero(row['end_time']) - seconds_since_zero(row['start_time']) \ ) \ )) ) stop_times['interp'] = stop_times.apply(lambda row: interpolate_row(row), axis=1) stop_times['arrival_time'] = stop_times['arrival_time'].fillna(stop_times['interp']) stop_times['departure_time'] = stop_times['departure_time'].fillna(stop_times['interp']) gtfs.update_table('stop_times', stop_times.reset_index(), cascade=False) return True