def remove_trips_with_nonexistent_calendars(): calendar = gtfs.get_table('calendar', index=False) trips = gtfs.get_table('trips') trips_filtered = trips[trips['service_id'].isin(calendar['service_id'])] if (gtfs.has_table('frequencies')): frequencies = gtfs.get_table('frequencies') frequencies_filtered = frequencies[frequencies['trip_id'].isin( trips_filtered.index.to_series())] gtfs.update_table('frequencies', frequencies_filtered) gtfs.update_table('trips', trips_filtered)
def print_table(tablename, message='', all=False): df = gtfs.get_table(tablename) prefix = 'DEBUG ' + tablename + ' ' + message + ': ' if all: print(prefix, df.to_string()) else: print(prefix, df)
def reset_feed_dates(daterange): if not gtfs.has_table('feed_info'): return gtfs_daterange = GTFSDateRange(daterange['start'], daterange['end']) feed_info = gtfs.get_table('feed_info') feed_info['feed_start_date'] = gtfs_daterange.start.datestring() feed_info['feed_end_date'] = gtfs_daterange.end.datestring() gtfs.update_table('feed_info', feed_info)
def filter_board_alight_by_daterange(daterange): if not gtfs.has_table('board_alight'): return board_alight = gtfs.get_table('board_alight', index=False) if 'service_date' not in board_alight.columns: return filter_daterange = GTFSDateRange(daterange['start'], daterange['end']) board_alight['_inrange'] = board_alight.apply( lambda row: filter_daterange.includes(row['service_date']), axis=1) board_alight_filtered = board_alight[board_alight['_inrange']] gtfs.update_table('board_alight', board_alight_filtered)
def filter_calendars_by_daterange(daterange): calendar = gtfs.get_table('calendar') filter_daterange = GTFSDateRange(daterange['start'], daterange['end']) calendar['_gtfs_daterange'] = calendar.apply( lambda row: GTFSDateRange(row['start_date'], row['end_date']), axis=1) calendar['_overlap'] = calendar['_gtfs_daterange'].apply(lambda dr: \ filter_daterange.get_overlap(dr) \ ) # we want to remove calendar entries that don't overlap DOWs calendar['_dows_overlap'] = calendar.apply(lambda row: \ GTFSBool.TRUE in (row[dow] for dow in filter_daterange.days_of_week()), axis=1 ) # we want to keep calendar entries that are used in overlapping exceptions if gtfs.has_table('calendar_dates'): calendar_dates = gtfs.get_table('calendar_dates') calendar_dates['_date_overlap'] = calendar_dates.apply( lambda row: filter_daterange.includes(row['date']), axis=1) calendar_dates = calendar_dates[calendar_dates['_date_overlap']] calendar['_exception_overlap'] = calendar.index.to_series().isin( calendar_dates['service_id']) else: calendar['_exception_overlap'] = False calendar = calendar[(calendar['_overlap'].notnull() & calendar['_dows_overlap']) | calendar['_exception_overlap']] # trim bounds to fit within daterange calendar['start_date'] = calendar['_overlap'].apply( lambda dr: dr.start.datestring()) calendar['end_date'] = calendar['_overlap'].apply( lambda dr: dr.end.datestring()) gtfs.update_table('calendar', calendar)
def filter_calendar_dates_by_daterange(daterange): if not gtfs.has_table('calendar_dates'): return calendar_dates = gtfs.get_table('calendar_dates') filter_daterange = GTFSDateRange(daterange['start'], daterange['end']) calendar_dates['_gtfs_date'] = calendar_dates.apply( lambda row: GTFSDate(row['date']), axis=1) calendar_dates['_inrange'] = calendar_dates.apply( lambda row: filter_daterange.includes(row['date']), axis=1) calendar_dates_filtered = calendar_dates[calendar_dates['_inrange']] gtfs.update_table('calendar_dates', calendar_dates_filtered)
def filter_single_trips_by_timerange(timerange, trim_trips=False): # filters trips by time ranges provided in config # IMPORTANT: If trim_trips is True, trips partially within range will have out-of-range # stops AND stops without arrival and departure times removed. This is to avoid inferring stop # service time when not supplied, and if used it is recommended to clean data beforehand by interpolating stops trips_extended = triphelpers.get_trips_extended() # If trim_trips is False, we only want to keep trips that are completely within time range # If trim_trips is True, we want to keep partially-in-range trips to we can trip them in stop_times wholly_within = not trim_trips # add range information trips_extended['inrange'] = get_inrange(trips_extended, 'start_time', 'end_time', timerange, wholly_within=wholly_within) # filter trips and write to table trips_filtered_df = trips_extended[ \ (trips_extended['inrange'] == True) | trips_extended['is_repeating'] == True] gtfs.update_table('trips', trips_filtered_df) # filter stop_times if trim_trips is True if trim_trips: stop_times = gtfs.get_table('stop_times') stop_times = stop_times.merge( trips_extended['is_repeating'].reset_index(), how='left', left_on='trip_id', right_on='trip_id' ) start = timerange['start'] end = timerange['end'] # IMPORTANT: to avoid inferring stop service time when not supplied, this will remove # all stops outside of range AND all stops without stop times. kwargs = {'inrange' : lambda df: service_in_range( df['arrival_time'].apply(safe_seconds_since_zero), df['departure_time'].apply(safe_seconds_since_zero), seconds_since_zero(start), seconds_since_zero(end) )} stop_times = stop_times.assign(**kwargs) stop_times = stop_times[(stop_times['inrange'] == True) | (stop_times['is_repeating'] == True)] gtfs.update_table('stop_times', stop_times)
def filter_stops_by_multipolygon(multipolygon): # Note: this does not clean up the gtfs after removing stops # multipolygon is a shapely MultiPolygon stops = gtfs.get_table('stops') stops_gdf = gpd.GeoDataFrame(stops, geometry=gpd.points_from_xy(stops['stop_lon'], stops['stop_lat']), crs='EPSG:4326') stops_in_area = stops_gdf.geometry.transform(lambda g: multipolygon.contains(g)).rename('in_area') stops = stops.merge(stops_in_area.to_frame(), \ how='left', \ left_index=True, \ right_index=True \ ) stops_filtered = stops[stops['in_area']] gtfs.update_table('stops', stops_filtered)
def filter_trips_by_date(date): # removes trips that do not occur on specified date # TODO consider replacing with filter_calendars_by_date, prune trips_extended = get_trips_extended() dow = GTFSDate(date).dow() date_in_range = (trips_extended['start_date'] <= date) & (date <= trips_extended['end_date']) dow_in_service = trips_extended[dow] == GTFSBool.TRUE trips_filter = date_in_range & dow_in_service # filter calendar_dates for relevant calendar exceptions if gtfs.has_table('calendar_dates'): calendar_dates = gtfs.get_table('calendar_dates') added_on_date = (calendar_dates['date'] == date) & ( calendar_dates['exception_type'] == GTFSExceptionType.ADDED) services_added_on_date = calendar_dates[added_on_date]['service_id'] removed_on_date = (calendar_dates['date'] == date) & ( calendar_dates['exception_type'] == GTFSExceptionType.REMOVED) services_removed_on_date = calendar_dates[removed_on_date][ 'service_id'] service_added_on_date = trips_extended['service_id'].isin( services_added_on_date) service_removed_on_date = trips_extended['service_id'].isin( services_removed_on_date) if gtfs.has_table('calendar'): trips_filter = (date_in_range & dow_in_service & ~service_removed_on_date) | service_added_on_date else: trips_filter = service_added_on_date trips_filtered_df = trips_extended[trips_filter] gtfs.update_table('trips', trips_filtered_df)
def calculate_average_headways(date, time_range): # For each route, and each specified time period, comma separated values, LF/CR for each new route/time period combo: # Agency ID # Agency Name # Date # Start Time # End Time # Route ID # Route Name # Route Frequency [for specified time period] # Trip Start Time, Trip Start Time, Trip Start Time, … [for each trip that starts during specified time period(s)] trips_extended = triphelpers.get_trips_extended().reset_index() trips = gtfs.get_table('trips', original=True) if not ('direction_id' in trips.columns): trips['direction_id'] = '' trips_extended['direction_id'] = '' route_direction_pairs = trips[['route_id', 'direction_id']].drop_duplicates() route_direction_pairs = route_direction_pairs.set_index(['route_id', 'direction_id']) agency_info = gtfs.get_table('agency')[['agency_id','agency_name']] if 'agency_id' in gtfs.get_columns('routes'): route_info = gtfs.get_table('routes', original=True, index=False)[['route_id', 'agency_id', 'route_long_name']] output = route_direction_pairs.reset_index() \ .merge(route_info, how='left', on='route_id') \ .set_index(['route_id', 'direction_id']) output = output.reset_index() \ .merge(agency_info, how='left', on='agency_id') \ .set_index(['route_id', 'direction_id']) # No agency id in routes.txt means there is only one agency else: route_info = gtfs.get_table('routes', original=True, index=False)[['route_id', 'route_long_name']] output = route_direction_pairs.reset_index() \ .merge(route_info.to_frame(), how='left', on='route_id') \ .set_index(['route_id', 'direction_id']) output['agency_id'] = agency_info['agency_id'].iloc[0] output['agency_name'] = agency_info['agency_name'].iloc[0] output['date'] = date output['start_time'] = time_range['start'] if time_range else '' output['end_time'] = time_range['end'] if time_range else '' if trips_extended.empty: output['trip_start_times'] = np.empty((len(output), 0)).tolist() output['average_headway_minutes'] = 0 return output.reset_index() unwrapped_repeating_trips = triphelpers.get_unwrapped_repeating_trips() trip_start_times = trips_extended[['trip_id', 'start_time']] if not unwrapped_repeating_trips.empty: frequency_trip_ids = unwrapped_repeating_trips['trip_id'] # remove stop_times for trips in frequencies as they should be ignored trip_start_times = trip_start_times[~trip_start_times['trip_id'].isin(frequency_trip_ids)] unwrapped_repeating_trips = unwrapped_repeating_trips[['trip_id', 'trip_start']] unwrapped_repeating_trips = unwrapped_repeating_trips.rename(columns={ 'trip_start': 'start_time' }) trip_start_times = pd.concat([trip_start_times, unwrapped_repeating_trips]) trip_start_times = trip_start_times.merge(trips_extended.reset_index()[['trip_id','route_id', 'direction_id']], how='left', on='trip_id') trip_start_times['start_time_seconds'] = trip_start_times['start_time'].transform(seconds_since_zero) # calculate deltas trip_start_times.sort_values(['route_id', 'direction_id', 'start_time_seconds'], inplace=True) trip_start_times['delta_seconds'] = trip_start_times['start_time_seconds'].diff() first_trip_in_route_dir = (trip_start_times['route_id'] != trip_start_times['route_id'].shift(1)) \ | ( \ (trip_start_times['direction_id'] != '') \ & (trip_start_times['direction_id'] != trip_start_times['direction_id'].shift(1)) \ ) trip_start_times.loc[first_trip_in_route_dir, 'delta_seconds'] = np.nan route_avg_headway_minutes = trip_start_times \ .groupby(['route_id', 'direction_id'])['delta_seconds'].mean() \ .fillna(0) \ .transform(lambda x: np.round(x / 60, decimals=3)) \ .rename('average_headway_minutes') route_trip_starts_list = trip_start_times.groupby(['route_id', 'direction_id'])['start_time'].apply(list) \ .rename('trip_start_times') output['average_headway_minutes'] = route_avg_headway_minutes output = output.merge(route_trip_starts_list, how='left', left_index=True, right_index=True) # fill empty trip start times with empty list output['trip_start_times'] = output['trip_start_times'].apply(lambda d: d if isinstance(d, list) else []) return output.reset_index()
def get_feed_start_end_daterange(): if not gtfs.has_table('feed_info'): return None feed_info = gtfs.get_table('feed_info') return GTFSDateRange(feed_info.loc[0, 'feed_start_date'], feed_info.loc[0, 'feed_end_date'])
def get_feed_calendar_service_daterange(): calendar = gtfs.get_table('calendar') calendar_min_start = calendar['start_date'].min() calendar_max_end = calendar['end_date'].max() return GTFSDateRange(calendar_min_start, calendar_max_end)
def filter_repeating_trips_by_timerange(time_range, trim_trips=False): # edit start_time and end_time of frequencies partially in range (at least one but not all trips occur in range) # edit stop_times for trip if start_time has changed unwrapped_long = get_long_form_unwrapped_frequencies_inrange_df(time_range) # do nothing if no repeating trips if (unwrapped_long.empty): return unwrapped_grouped = unwrapped_long.groupby(['frequency_start', 'trip_id']) # if trimming trips, we want to keep partially in range trips in trips.txt trip_filter_on = 'partially_in_range' if trim_trips else 'wholly_in_range' # Remove frequencies with no trips in range any_trip_in_frequency_in_range_series = unwrapped_grouped[trip_filter_on].any() \ .rename('any_frequency_trip_in_range') unwrapped_long = unwrapped_long \ .merge(any_trip_in_frequency_in_range_series.to_frame().reset_index(), on=['frequency_start', 'trip_id']) unwrapped_long = unwrapped_long[unwrapped_long['any_frequency_trip_in_range'] == True] \ .drop('any_frequency_trip_in_range', axis='columns') # Remove trip from trips.txt if trip_id not in any range in frequencies trips_not_in_any_range_whole = unwrapped_long.groupby(['trip_id'])['wholly_in_range'].any() trips_not_in_any_range_whole = trips_not_in_any_range_whole[trips_not_in_any_range_whole == False] trips_not_in_any_range_partial = unwrapped_long.groupby(['trip_id'])['partially_in_range'].any() trips_not_in_any_range_partial = trips_not_in_any_range_partial[trips_not_in_any_range_partial == False] trips_df = gtfs.get_table('trips', index=False) trips_filtered_df = trips_df[~trips_df['trip_id'].isin(trips_not_in_any_range_partial.index.to_series())] # if trimming, we need to create trimmed single trips for partially-in-range runs if trim_trips: partial_trips = unwrapped_long.copy().loc[(unwrapped_long['partially_in_range'] == True) & (unwrapped_long['wholly_in_range'] == False)] partial_trips['new_trip_id'] = partial_trips['trip_id'] + '_freq_' + partial_trips['trip_order'].apply(str) # add new rows to trips for each partial trip partial_trips_rows = trips_filtered_df.merge( partial_trips, left_on='trip_id', right_on='trip_id' ) partial_trips_rows = partial_trips_rows.drop(columns=['trip_id']).rename(columns={ 'new_trip_id': 'trip_id' }) # now that new partial trips have been created, we can remove trips that were partially within # the timerange but not wholly within trips_filtered_df = trips_df[~trips_df['trip_id'].isin(trips_not_in_any_range_whole.index.to_series())] trips_filtered_df = pd.concat( [trips_filtered_df, partial_trips_rows], axis=0 ) # add new rows in range into stoptimes for new trips stop_times = gtfs.get_table('stop_times') partial_stop_times = stop_times.merge( partial_trips[['trip_id', 'new_trip_id', 'trip_start']], left_on='trip_id', right_on='trip_id' ).sort_values(['new_trip_id', 'stop_sequence']) trip_bounds = triphelpers.get_trip_bounds() trip_bounds = trip_bounds.rename(columns={ 'start_time': 'first_arrival' }) partial_stop_times = partial_stop_times.merge( trip_bounds['first_arrival'].to_frame(), how='left', left_on='trip_id', right_on='trip_id' ) def safe_transpose(val, diff_secs): isnan = (type(val) == str and val == '') or (type(val) == float and np.isnan(val)) if isnan: return np.nan transpose_secs = seconds_since_zero(val) + diff_secs return seconds_to_military(transpose_secs) # transpose stop times partial_stop_times['arrival_time'] = partial_stop_times.apply( lambda row: safe_transpose( row['arrival_time'], (seconds_since_zero(row['trip_start']) - \ seconds_since_zero(row['first_arrival'])) ), axis=1 ) partial_stop_times['departure_time'] = partial_stop_times.apply( lambda row: safe_transpose( row['departure_time'], (seconds_since_zero(row['trip_start']) - \ seconds_since_zero(row['first_arrival'])) ), axis=1 ) partial_stop_times = partial_stop_times.rename(columns={ 'trip_id': 'old_trip_id', 'new_trip_id': 'trip_id' }) kwargs = {'inrange' : lambda df: service_in_range( df['arrival_time'].apply(safe_seconds_since_zero), df['departure_time'].apply(safe_seconds_since_zero), seconds_since_zero(time_range['start']), seconds_since_zero(time_range['end']) )} partial_stop_times = partial_stop_times.assign(**kwargs) partial_stop_times = partial_stop_times[partial_stop_times['inrange'] == True] stop_times_updated = pd.concat( [stop_times, partial_stop_times], axis=0 ) # remove original partial trips from trips and stoptimes stop_times_updated = stop_times_updated[stop_times_updated['trip_id'].isin(trips_filtered_df['trip_id'])] stop_times_updated = stop_times_updated.sort_values(['trip_id', 'stop_sequence']) gtfs.update_table('stop_times', stop_times_updated) gtfs.update_table('trips', trips_filtered_df.set_index('trip_id')) # Shorten and/or push back frequencies if needed unwrapped_grouped = unwrapped_long.groupby(['frequency_start', 'trip_id']) last_trip_order = unwrapped_grouped['trip_order'].max().rename('last_trip_order') # only keep runs in frequencies that are completely in range unwrapped_in_range_only_grouped = unwrapped_long[unwrapped_long['wholly_in_range'] == True].groupby(['frequency_start', 'trip_id']) # TODO: handle if unwrapped_in_range_only_grouped is empty here (all frequencies out of range), causes error last_trip_order_in_range = unwrapped_in_range_only_grouped.apply(lambda g: g[g['trip_order'] == g['trip_order'].max()]) \ [['frequency_start', 'trip_id', 'trip_order', 'trip_end']] last_trip_order_in_range = last_trip_order_in_range \ .rename(columns={ 'trip_order': 'last_trip_order_in_range', 'trip_end': 'last_trip_end_in_range' }) \ .reset_index(drop=True) first_trip_order_in_range = unwrapped_in_range_only_grouped.apply(lambda g: g[g['trip_order'] == g['trip_order'].min()]) \ [['frequency_start', 'trip_id', 'trip_order', 'trip_start']] first_trip_order_in_range = first_trip_order_in_range \ .rename(columns={ 'trip_order': 'first_trip_order_in_range', 'trip_start': 'first_trip_start_in_range' }) \ .reset_index(drop=True) unwrapped_long = unwrapped_long.merge(last_trip_order, left_on=['frequency_start', 'trip_id'], right_on=['frequency_start', 'trip_id']) unwrapped_long = unwrapped_long.merge(first_trip_order_in_range, left_on=['frequency_start', 'trip_id'], right_on=['frequency_start', 'trip_id']) unwrapped_long = unwrapped_long.merge(last_trip_order_in_range, left_on=['frequency_start', 'trip_id'], right_on=['frequency_start', 'trip_id']) unwrapped_long.loc[unwrapped_long['last_trip_order'] > unwrapped_long['last_trip_order_in_range'], \ 'frequency_end'] = unwrapped_long['last_trip_end_in_range'] unwrapped_long.loc[unwrapped_long['first_trip_order_in_range'] > 0, \ 'frequency_start'] = unwrapped_long['first_trip_start_in_range'] unwrapped_long = unwrapped_long[ \ (unwrapped_long['trip_order'] <= unwrapped_long['last_trip_order_in_range']) \ & (unwrapped_long['trip_order'] >= unwrapped_long['first_trip_order_in_range']) \ ] # split frequencies on gaps # agg individual in_range status unwrapped_trips_out_of_range = unwrapped_long[unwrapped_long['wholly_in_range'] == False] # copy over index columns we need to iteratively update unwrapped_long = unwrapped_long \ .reset_index() \ .set_index(['frequency_start', 'trip_id', 'trip_order'], drop=False) unwrapped_long = unwrapped_long.rename(columns={ \ 'frequency_start': 'new_frequency_start', \ 'trip_order': 'new_trip_order' \ }) # Perform update for each out-of-range trip on adjacent in-range trips for index, current_row in unwrapped_trips_out_of_range.iterrows(): cur_frequency_start = current_row['frequency_start'] cur_trip_id = current_row['trip_id'] cur_trip_order = current_row['trip_order'] # if next trip in range if (unwrapped_long.loc[cur_frequency_start, cur_trip_id, cur_trip_order + 1]['wholly_in_range'] == True): # update frequency start for all future trips in frequency new_trip_start = unwrapped_long.loc[ \ (unwrapped_long['new_frequency_start'] == cur_frequency_start) \ & (unwrapped_long['trip_id'] == cur_trip_id) \ & (unwrapped_long['new_trip_order'] == cur_trip_order + 1), \ ]['trip_start'].tolist()[0] unwrapped_long.loc[ \ (unwrapped_long['new_frequency_start'] == cur_frequency_start) \ & (unwrapped_long['trip_id'] == cur_trip_id) \ & (unwrapped_long['new_trip_order'] >= cur_trip_order), \ 'new_frequency_start' \ ] = new_trip_start # update trip order for all future trips in frequency unwrapped_long['new_trip_order'] = unwrapped_long.apply(lambda unwrapped_long: \ unwrapped_long['new_trip_order'] - (cur_trip_order + 1) if ( \ (unwrapped_long['new_frequency_start'] == cur_frequency_start) \ & (unwrapped_long['trip_id'] == cur_trip_id) \ & (unwrapped_long['new_trip_order'] >= cur_trip_order) \ ) else unwrapped_long['new_trip_order'], \ axis='columns') # if previous trip in range if (unwrapped_long.loc[cur_frequency_start, cur_trip_id, cur_trip_order - 1]['wholly_in_range'] == True): # update frequency end for all previous trips in frequency new_trip_end = unwrapped_long.loc[ \ (unwrapped_long['new_frequency_start'] == cur_frequency_start) \ & (unwrapped_long['trip_id'] == cur_trip_id) \ & (unwrapped_long['new_trip_order'] == cur_trip_order - 1), \ ]['trip_end'].tolist()[0] unwrapped_long.loc[ \ (unwrapped_long['new_frequency_start'] == cur_frequency_start) \ & (unwrapped_long['trip_id'] == cur_trip_id) \ & (unwrapped_long['new_trip_order'] <= cur_trip_order), \ 'frequency_end' \ ] = new_trip_end # Now we can finally remove all out-of-range entries and reshape back into frequencies unwrapped_long = unwrapped_long[unwrapped_long['wholly_in_range'] == True] unwrapped_long = unwrapped_long \ .reset_index(drop=True) \ .rename(columns={ 'new_frequency_start': 'start_time', 'frequency_end': 'end_time' }) filtered_frequencies_df = unwrapped_long[gtfs.get_columns('frequencies')] \ .drop_duplicates() gtfs.update_table('frequencies', filtered_frequencies_df)
def interpolate_stop_times(): # returns false if interpolation not possible stop_times = gtfs.get_table('stop_times') shapes = gtfs.get_table('shapes') no_shape_dist_traveled = 'shape_dist_traveled' not in stop_times.columns \ or stop_times['shape_dist_traveled'].isna().all() no_shapes_txt = not gtfs.has_table('shapes') or shapes.empty if (no_shape_dist_traveled or no_shapes_txt): return False # build table with chunk information df = stop_times.copy() df['has_arrival'] = df['arrival_time'].notna() df['has_departure'] = df['departure_time'].notna() df = df[df['has_arrival'] | df['has_departure']] timepoints_only = df[df['has_arrival'] | df['has_departure']] # https://stackoverflow.com/questions/50411098/how-to-do-forward-rolling-sum-in-pandas df['next_stop_sequence'] = timepoints_only.sort_values(by=['trip_id', 'stop_sequence']) \ .iloc[::-1] \ .groupby('trip_id')['stop_sequence'].transform(lambda x: x.rolling(2).max()) \ .iloc[::-1] \ # cleanup df['next_stop_sequence'] = df['next_stop_sequence'].fillna(df['stop_sequence']).astype('int64') df['stop_sequence_list'] = df.apply(lambda row: \ list(range(row['stop_sequence'], row['next_stop_sequence']) \ if row['stop_sequence'] != row['next_stop_sequence'] \ else [row['stop_sequence']] \ ), axis=1) df = df.explode('stop_sequence_list') df = df.rename(columns={'stop_sequence': 'start_seq', 'next_stop_sequence': 'end_seq', 'stop_sequence_list': 'stop_sequence'}) chunks = df.set_index(['trip_id', 'stop_sequence']) \ [['start_seq', 'end_seq']] stop_times = stop_times.set_index(['trip_id', 'stop_sequence']) stop_times = stop_times.merge(chunks, \ how='left', right_index=True, left_index=True, ) start_time = stop_times['departure_time'].rename('start_time') end_time = stop_times['arrival_time'].rename('end_time') start_sdt = stop_times['shape_dist_traveled'].rename('start_sdt') end_sdt = stop_times['shape_dist_traveled'].rename('end_sdt') stop_times = stop_times.merge(start_time, \ left_on=['trip_id', 'start_seq'], right_index=True ) stop_times = stop_times.merge(end_time, \ left_on=['trip_id', 'end_seq'], right_index=True ) stop_times = stop_times.merge(start_sdt, \ left_on=['trip_id', 'start_seq'], right_index=True ) stop_times = stop_times.merge(end_sdt, \ left_on=['trip_id', 'end_seq'], right_index=True ) def interpolate_row(row): # happens if last stop or on 1-stop chunks (consecutive timepoints) if (row['start_time'] == row['end_time']): return row['start_time'] return seconds_to_military( \ seconds_since_zero(row['start_time']) + \ int(round( \ ( \ (row['shape_dist_traveled'] - row['start_sdt']) / (row['end_sdt'] - row['start_sdt']) \ ) * ( \ seconds_since_zero(row['end_time']) - seconds_since_zero(row['start_time']) \ ) \ )) ) stop_times['interp'] = stop_times.apply(lambda row: interpolate_row(row), axis=1) stop_times['arrival_time'] = stop_times['arrival_time'].fillna(stop_times['interp']) stop_times['departure_time'] = stop_times['departure_time'].fillna(stop_times['interp']) gtfs.update_table('stop_times', stop_times.reset_index(), cascade=False) return True