Python get_table示例，tpau_gtfsutilities.gtfs.gtfssingleton.gtfs.get_table Python示例

示例#1

0

显示文件

def remove_trips_with_nonexistent_calendars():
    calendar = gtfs.get_table('calendar', index=False)

    trips = gtfs.get_table('trips')
    trips_filtered = trips[trips['service_id'].isin(calendar['service_id'])]

    if (gtfs.has_table('frequencies')):
        frequencies = gtfs.get_table('frequencies')
        frequencies_filtered = frequencies[frequencies['trip_id'].isin(
            trips_filtered.index.to_series())]
        gtfs.update_table('frequencies', frequencies_filtered)

    gtfs.update_table('trips', trips_filtered)

示例#2

0

显示文件

文件： debughelpers.py 项目： mb-fun/tpau-gtfsutils

def print_table(tablename, message='', all=False):
    df = gtfs.get_table(tablename)
    prefix = 'DEBUG ' + tablename + ' ' + message + ': '
    if all:
        print(prefix, df.to_string())
    else:
        print(prefix, df)

示例#3

0

显示文件

def reset_feed_dates(daterange):
    if not gtfs.has_table('feed_info'): return

    gtfs_daterange = GTFSDateRange(daterange['start'], daterange['end'])
    feed_info = gtfs.get_table('feed_info')

    feed_info['feed_start_date'] = gtfs_daterange.start.datestring()
    feed_info['feed_end_date'] = gtfs_daterange.end.datestring()

    gtfs.update_table('feed_info', feed_info)

示例#4

0

显示文件

def filter_board_alight_by_daterange(daterange):
    if not gtfs.has_table('board_alight'): return

    board_alight = gtfs.get_table('board_alight', index=False)
    if 'service_date' not in board_alight.columns: return

    filter_daterange = GTFSDateRange(daterange['start'], daterange['end'])

    board_alight['_inrange'] = board_alight.apply(
        lambda row: filter_daterange.includes(row['service_date']), axis=1)
    board_alight_filtered = board_alight[board_alight['_inrange']]

    gtfs.update_table('board_alight', board_alight_filtered)

示例#5

0

显示文件

def filter_calendars_by_daterange(daterange):

    calendar = gtfs.get_table('calendar')
    filter_daterange = GTFSDateRange(daterange['start'], daterange['end'])

    calendar['_gtfs_daterange'] = calendar.apply(
        lambda row: GTFSDateRange(row['start_date'], row['end_date']), axis=1)
    calendar['_overlap'] = calendar['_gtfs_daterange'].apply(lambda dr: \
        filter_daterange.get_overlap(dr) \
    )

    # we want to remove calendar entries that don't overlap DOWs
    calendar['_dows_overlap'] = calendar.apply(lambda row: \
        GTFSBool.TRUE in (row[dow] for dow in filter_daterange.days_of_week()),
        axis=1
    )

    # we want to keep calendar entries that are used in overlapping exceptions
    if gtfs.has_table('calendar_dates'):
        calendar_dates = gtfs.get_table('calendar_dates')
        calendar_dates['_date_overlap'] = calendar_dates.apply(
            lambda row: filter_daterange.includes(row['date']), axis=1)
        calendar_dates = calendar_dates[calendar_dates['_date_overlap']]
        calendar['_exception_overlap'] = calendar.index.to_series().isin(
            calendar_dates['service_id'])
    else:
        calendar['_exception_overlap'] = False

    calendar = calendar[(calendar['_overlap'].notnull()
                         & calendar['_dows_overlap'])
                        | calendar['_exception_overlap']]

    # trim bounds to fit within daterange
    calendar['start_date'] = calendar['_overlap'].apply(
        lambda dr: dr.start.datestring())
    calendar['end_date'] = calendar['_overlap'].apply(
        lambda dr: dr.end.datestring())

    gtfs.update_table('calendar', calendar)

示例#6

0

显示文件

def filter_calendar_dates_by_daterange(daterange):
    if not gtfs.has_table('calendar_dates'): return

    calendar_dates = gtfs.get_table('calendar_dates')
    filter_daterange = GTFSDateRange(daterange['start'], daterange['end'])

    calendar_dates['_gtfs_date'] = calendar_dates.apply(
        lambda row: GTFSDate(row['date']), axis=1)
    calendar_dates['_inrange'] = calendar_dates.apply(
        lambda row: filter_daterange.includes(row['date']), axis=1)

    calendar_dates_filtered = calendar_dates[calendar_dates['_inrange']]

    gtfs.update_table('calendar_dates', calendar_dates_filtered)

示例#7

0

显示文件

文件： timerange.py 项目： mb-fun/tpau-gtfsutils

def filter_single_trips_by_timerange(timerange, trim_trips=False):
    # filters trips by time ranges provided in config
    # IMPORTANT: If trim_trips is True, trips partially within range will have out-of-range
    # stops AND stops without arrival and departure times removed. This is to avoid inferring stop 
    # service time when not supplied, and if used it is recommended to clean data beforehand by interpolating stops

    trips_extended = triphelpers.get_trips_extended()


    # If trim_trips is False, we only want to keep trips that are completely within time range 
    # If trim_trips is True, we want to keep partially-in-range trips to we can trip them in stop_times
    wholly_within = not trim_trips

    # add range information
    trips_extended['inrange'] = get_inrange(trips_extended, 'start_time', 'end_time', timerange, wholly_within=wholly_within)

    # filter trips and write to table
    trips_filtered_df = trips_extended[ \
        (trips_extended['inrange'] == True) | trips_extended['is_repeating'] == True]

    gtfs.update_table('trips', trips_filtered_df)

    # filter stop_times if trim_trips is True
    if trim_trips:
        stop_times = gtfs.get_table('stop_times')
        stop_times = stop_times.merge(
            trips_extended['is_repeating'].reset_index(),
            how='left',
            left_on='trip_id',
            right_on='trip_id'
        )

        start = timerange['start']
        end = timerange['end']

        # IMPORTANT: to avoid inferring stop service time when not supplied, this will remove
        # all stops outside of range AND all stops without stop times. 
        kwargs = {'inrange' : lambda df: service_in_range(
            df['arrival_time'].apply(safe_seconds_since_zero),
            df['departure_time'].apply(safe_seconds_since_zero),
            seconds_since_zero(start),
            seconds_since_zero(end)
        )}
        stop_times = stop_times.assign(**kwargs)
        stop_times = stop_times[(stop_times['inrange'] == True) | (stop_times['is_repeating'] == True)]
        gtfs.update_table('stop_times', stop_times)

示例#8

0

显示文件

def filter_stops_by_multipolygon(multipolygon):
    # Note: this does not clean up the gtfs after removing stops
    # multipolygon is a shapely MultiPolygon

    stops = gtfs.get_table('stops')

    stops_gdf = gpd.GeoDataFrame(stops, geometry=gpd.points_from_xy(stops['stop_lon'], stops['stop_lat']), crs='EPSG:4326')

    stops_in_area = stops_gdf.geometry.transform(lambda g: multipolygon.contains(g)).rename('in_area')

    stops = stops.merge(stops_in_area.to_frame(), \
        how='left', \
        left_index=True, \
        right_index=True \
    )

    stops_filtered = stops[stops['in_area']]

    gtfs.update_table('stops', stops_filtered)

示例#9

0

显示文件

文件： date.py 项目： mb-fun/tpau-gtfsutils

def filter_trips_by_date(date):
    # removes trips that do not occur on specified date
    # TODO consider replacing with filter_calendars_by_date, prune

    trips_extended = get_trips_extended()

    dow = GTFSDate(date).dow()

    date_in_range = (trips_extended['start_date'] <=
                     date) & (date <= trips_extended['end_date'])
    dow_in_service = trips_extended[dow] == GTFSBool.TRUE

    trips_filter = date_in_range & dow_in_service

    # filter calendar_dates for relevant calendar exceptions
    if gtfs.has_table('calendar_dates'):
        calendar_dates = gtfs.get_table('calendar_dates')
        added_on_date = (calendar_dates['date'] == date) & (
            calendar_dates['exception_type'] == GTFSExceptionType.ADDED)
        services_added_on_date = calendar_dates[added_on_date]['service_id']

        removed_on_date = (calendar_dates['date'] == date) & (
            calendar_dates['exception_type'] == GTFSExceptionType.REMOVED)
        services_removed_on_date = calendar_dates[removed_on_date][
            'service_id']
        service_added_on_date = trips_extended['service_id'].isin(
            services_added_on_date)
        service_removed_on_date = trips_extended['service_id'].isin(
            services_removed_on_date)

        if gtfs.has_table('calendar'):
            trips_filter = (date_in_range & dow_in_service
                            & ~service_removed_on_date) | service_added_on_date
        else:
            trips_filter = service_added_on_date

    trips_filtered_df = trips_extended[trips_filter]

    gtfs.update_table('trips', trips_filtered_df)

示例#10

0

显示文件

文件： averageheadways.py 项目： mb-fun/tpau-gtfsutils

def calculate_average_headways(date, time_range):
    # For each route, and each specified time period, comma separated values, LF/CR for each new route/time period combo:
    #   Agency ID
    #   Agency Name
    #   Date
    #   Start Time
    #   End Time
    #   Route ID
    #   Route Name
    #   Route Frequency [for specified time period]
    #   Trip Start Time, Trip Start Time, Trip Start Time, … [for each trip that starts during specified time period(s)]

    trips_extended = triphelpers.get_trips_extended().reset_index()
    trips = gtfs.get_table('trips', original=True)

    if not ('direction_id' in trips.columns):
        trips['direction_id'] = ''
        trips_extended['direction_id'] = ''

    route_direction_pairs = trips[['route_id', 'direction_id']].drop_duplicates()
    route_direction_pairs = route_direction_pairs.set_index(['route_id', 'direction_id'])

    agency_info = gtfs.get_table('agency')[['agency_id','agency_name']]

    if 'agency_id' in gtfs.get_columns('routes'):
        route_info = gtfs.get_table('routes', original=True, index=False)[['route_id', 'agency_id', 'route_long_name']]

        output = route_direction_pairs.reset_index() \
            .merge(route_info, how='left', on='route_id') \
            .set_index(['route_id', 'direction_id'])
        output = output.reset_index() \
            .merge(agency_info, how='left', on='agency_id') \
            .set_index(['route_id', 'direction_id'])

    # No agency id in routes.txt means there is only one agency
    else:
        route_info = gtfs.get_table('routes', original=True, index=False)[['route_id', 'route_long_name']]
        output = route_direction_pairs.reset_index() \
            .merge(route_info.to_frame(), how='left', on='route_id') \
            .set_index(['route_id', 'direction_id'])
        
        output['agency_id'] = agency_info['agency_id'].iloc[0]
        output['agency_name'] = agency_info['agency_name'].iloc[0]

    output['date'] = date
    output['start_time'] = time_range['start'] if time_range else ''
    output['end_time'] = time_range['end'] if time_range else ''

    if trips_extended.empty:
        output['trip_start_times'] = np.empty((len(output), 0)).tolist()
        output['average_headway_minutes'] = 0

        return output.reset_index()

    unwrapped_repeating_trips = triphelpers.get_unwrapped_repeating_trips()

    trip_start_times = trips_extended[['trip_id', 'start_time']]

    if not unwrapped_repeating_trips.empty:
        frequency_trip_ids = unwrapped_repeating_trips['trip_id']

        # remove stop_times for trips in frequencies as they should be ignored
        trip_start_times = trip_start_times[~trip_start_times['trip_id'].isin(frequency_trip_ids)]

        unwrapped_repeating_trips = unwrapped_repeating_trips[['trip_id', 'trip_start']]
        unwrapped_repeating_trips = unwrapped_repeating_trips.rename(columns={ 'trip_start': 'start_time' })

        trip_start_times = pd.concat([trip_start_times, unwrapped_repeating_trips])

    trip_start_times = trip_start_times.merge(trips_extended.reset_index()[['trip_id','route_id', 'direction_id']], how='left', on='trip_id')
    trip_start_times['start_time_seconds'] = trip_start_times['start_time'].transform(seconds_since_zero)

    # calculate deltas
    trip_start_times.sort_values(['route_id', 'direction_id', 'start_time_seconds'], inplace=True)
    trip_start_times['delta_seconds'] = trip_start_times['start_time_seconds'].diff()
    first_trip_in_route_dir = (trip_start_times['route_id'] != trip_start_times['route_id'].shift(1)) \
        | ( \
            (trip_start_times['direction_id'] != '') \
            & (trip_start_times['direction_id'] != trip_start_times['direction_id'].shift(1)) \
        )

    trip_start_times.loc[first_trip_in_route_dir, 'delta_seconds'] = np.nan

    route_avg_headway_minutes = trip_start_times \
        .groupby(['route_id', 'direction_id'])['delta_seconds'].mean() \
        .fillna(0) \
        .transform(lambda x: np.round(x / 60, decimals=3)) \
        .rename('average_headway_minutes')

    route_trip_starts_list = trip_start_times.groupby(['route_id', 'direction_id'])['start_time'].apply(list) \
        .rename('trip_start_times')

    output['average_headway_minutes'] = route_avg_headway_minutes
    output = output.merge(route_trip_starts_list, how='left', left_index=True, right_index=True)

    # fill empty trip start times with empty list
    output['trip_start_times'] = output['trip_start_times'].apply(lambda d: d if isinstance(d, list) else [])

    return output.reset_index()

示例#11

0

显示文件

def get_feed_start_end_daterange():
    if not gtfs.has_table('feed_info'): return None
    feed_info = gtfs.get_table('feed_info')
    return GTFSDateRange(feed_info.loc[0, 'feed_start_date'],
                         feed_info.loc[0, 'feed_end_date'])

示例#12

0

显示文件

def get_feed_calendar_service_daterange():
    calendar = gtfs.get_table('calendar')
    calendar_min_start = calendar['start_date'].min()
    calendar_max_end = calendar['end_date'].max()
    return GTFSDateRange(calendar_min_start, calendar_max_end)

示例#13

0

显示文件

文件： timerange.py 项目： mb-fun/tpau-gtfsutils

def filter_repeating_trips_by_timerange(time_range, trim_trips=False):
    # edit start_time and end_time of frequencies partially in range (at least one but not all trips occur in range)
    # edit stop_times for trip if start_time has changed

    unwrapped_long = get_long_form_unwrapped_frequencies_inrange_df(time_range)

    # do nothing if no repeating trips
    if (unwrapped_long.empty):
        return

    unwrapped_grouped = unwrapped_long.groupby(['frequency_start', 'trip_id'])

    # if trimming trips, we want to keep partially in range trips in trips.txt
    trip_filter_on = 'partially_in_range' if trim_trips else 'wholly_in_range'

    # Remove frequencies with no trips in range
    any_trip_in_frequency_in_range_series = unwrapped_grouped[trip_filter_on].any() \
            .rename('any_frequency_trip_in_range')
    unwrapped_long = unwrapped_long \
        .merge(any_trip_in_frequency_in_range_series.to_frame().reset_index(), on=['frequency_start', 'trip_id'])
    unwrapped_long = unwrapped_long[unwrapped_long['any_frequency_trip_in_range'] == True] \
        .drop('any_frequency_trip_in_range', axis='columns')

    # Remove trip from trips.txt if trip_id not in any range in frequencies
    trips_not_in_any_range_whole = unwrapped_long.groupby(['trip_id'])['wholly_in_range'].any()
    trips_not_in_any_range_whole = trips_not_in_any_range_whole[trips_not_in_any_range_whole == False]
    trips_not_in_any_range_partial = unwrapped_long.groupby(['trip_id'])['partially_in_range'].any()
    trips_not_in_any_range_partial = trips_not_in_any_range_partial[trips_not_in_any_range_partial == False]

    trips_df = gtfs.get_table('trips', index=False)
    trips_filtered_df = trips_df[~trips_df['trip_id'].isin(trips_not_in_any_range_partial.index.to_series())]

    # if trimming, we need to create trimmed single trips for partially-in-range runs
    if trim_trips:
        partial_trips = unwrapped_long.copy().loc[(unwrapped_long['partially_in_range'] == True) & (unwrapped_long['wholly_in_range'] == False)]

        partial_trips['new_trip_id'] = partial_trips['trip_id'] + '_freq_' + partial_trips['trip_order'].apply(str)

        # add new rows to trips for each partial trip
        partial_trips_rows = trips_filtered_df.merge(
            partial_trips,
            left_on='trip_id',
            right_on='trip_id'
        )
        partial_trips_rows = partial_trips_rows.drop(columns=['trip_id']).rename(columns={ 'new_trip_id': 'trip_id' })

        # now that new partial trips have been created, we can remove trips that were partially within
        # the timerange but not wholly within
        trips_filtered_df = trips_df[~trips_df['trip_id'].isin(trips_not_in_any_range_whole.index.to_series())]

        trips_filtered_df = pd.concat(
            [trips_filtered_df, partial_trips_rows],
            axis=0
        )

        # add new rows in range into stoptimes for new trips
        stop_times = gtfs.get_table('stop_times')

        partial_stop_times = stop_times.merge(
            partial_trips[['trip_id', 'new_trip_id', 'trip_start']],
            left_on='trip_id',
            right_on='trip_id'
        ).sort_values(['new_trip_id', 'stop_sequence'])

        trip_bounds = triphelpers.get_trip_bounds()
        trip_bounds = trip_bounds.rename(columns={ 'start_time': 'first_arrival' })

        partial_stop_times = partial_stop_times.merge(
            trip_bounds['first_arrival'].to_frame(),
            how='left',
            left_on='trip_id',
            right_on='trip_id'
        )
        def safe_transpose(val, diff_secs):
            isnan = (type(val) == str and val == '') or (type(val) == float and np.isnan(val))
            if isnan: return np.nan
            transpose_secs = seconds_since_zero(val) + diff_secs
            return seconds_to_military(transpose_secs)

        # transpose stop times
        partial_stop_times['arrival_time'] = partial_stop_times.apply(
            lambda row: safe_transpose(
                row['arrival_time'],
                (seconds_since_zero(row['trip_start']) - \
                    seconds_since_zero(row['first_arrival']))
            ),
            axis=1
        )
        partial_stop_times['departure_time'] = partial_stop_times.apply(
            lambda row: safe_transpose(
                row['departure_time'],
                (seconds_since_zero(row['trip_start']) - \
                    seconds_since_zero(row['first_arrival']))
            ),
            axis=1
        )
        partial_stop_times = partial_stop_times.rename(columns={
            'trip_id': 'old_trip_id',
            'new_trip_id': 'trip_id'
        })

        kwargs = {'inrange' : lambda df: service_in_range(
            df['arrival_time'].apply(safe_seconds_since_zero),
            df['departure_time'].apply(safe_seconds_since_zero),
            seconds_since_zero(time_range['start']),
            seconds_since_zero(time_range['end'])
        )}

        partial_stop_times = partial_stop_times.assign(**kwargs)
        partial_stop_times = partial_stop_times[partial_stop_times['inrange'] == True]

        stop_times_updated = pd.concat(
            [stop_times, partial_stop_times],
            axis=0
        )

        # remove original partial trips from trips and stoptimes
        stop_times_updated = stop_times_updated[stop_times_updated['trip_id'].isin(trips_filtered_df['trip_id'])]
        stop_times_updated = stop_times_updated.sort_values(['trip_id', 'stop_sequence'])
        gtfs.update_table('stop_times', stop_times_updated)

    gtfs.update_table('trips', trips_filtered_df.set_index('trip_id'))
    
    # Shorten and/or push back frequencies if needed
    unwrapped_grouped = unwrapped_long.groupby(['frequency_start', 'trip_id'])
    last_trip_order = unwrapped_grouped['trip_order'].max().rename('last_trip_order')

    # only keep runs in frequencies that are completely in range
    unwrapped_in_range_only_grouped = unwrapped_long[unwrapped_long['wholly_in_range'] == True].groupby(['frequency_start', 'trip_id'])

    # TODO: handle if unwrapped_in_range_only_grouped is empty here (all frequencies out of range), causes error

    last_trip_order_in_range = unwrapped_in_range_only_grouped.apply(lambda g: g[g['trip_order'] == g['trip_order'].max()]) \
        [['frequency_start', 'trip_id', 'trip_order', 'trip_end']]
    last_trip_order_in_range = last_trip_order_in_range \
        .rename(columns={ 'trip_order': 'last_trip_order_in_range', 'trip_end': 'last_trip_end_in_range' }) \
        .reset_index(drop=True)
    
    first_trip_order_in_range = unwrapped_in_range_only_grouped.apply(lambda g: g[g['trip_order'] == g['trip_order'].min()]) \
        [['frequency_start', 'trip_id', 'trip_order', 'trip_start']]
    first_trip_order_in_range = first_trip_order_in_range \
        .rename(columns={ 'trip_order': 'first_trip_order_in_range', 'trip_start': 'first_trip_start_in_range' }) \
        .reset_index(drop=True)

    unwrapped_long = unwrapped_long.merge(last_trip_order, left_on=['frequency_start', 'trip_id'], right_on=['frequency_start', 'trip_id'])
    unwrapped_long = unwrapped_long.merge(first_trip_order_in_range, left_on=['frequency_start', 'trip_id'], right_on=['frequency_start', 'trip_id'])
    unwrapped_long = unwrapped_long.merge(last_trip_order_in_range, left_on=['frequency_start', 'trip_id'], right_on=['frequency_start', 'trip_id'])

    unwrapped_long.loc[unwrapped_long['last_trip_order'] > unwrapped_long['last_trip_order_in_range'], \
        'frequency_end'] = unwrapped_long['last_trip_end_in_range']
    
    unwrapped_long.loc[unwrapped_long['first_trip_order_in_range'] > 0, \
        'frequency_start'] = unwrapped_long['first_trip_start_in_range']

    unwrapped_long = unwrapped_long[ \
        (unwrapped_long['trip_order'] <= unwrapped_long['last_trip_order_in_range']) \
        & (unwrapped_long['trip_order'] >= unwrapped_long['first_trip_order_in_range']) \
    ]

    # split frequencies on gaps

    # agg individual in_range status
    unwrapped_trips_out_of_range = unwrapped_long[unwrapped_long['wholly_in_range'] == False]

    # copy over index columns we need to iteratively update
    unwrapped_long = unwrapped_long \
        .reset_index() \
        .set_index(['frequency_start', 'trip_id', 'trip_order'], drop=False)
    unwrapped_long = unwrapped_long.rename(columns={ \
        'frequency_start': 'new_frequency_start', \
        'trip_order': 'new_trip_order' \
    })

    # Perform update for each out-of-range trip on adjacent in-range trips
    for index, current_row in unwrapped_trips_out_of_range.iterrows():
        cur_frequency_start = current_row['frequency_start']
        cur_trip_id = current_row['trip_id']
        cur_trip_order = current_row['trip_order']

        # if next trip in range
        if (unwrapped_long.loc[cur_frequency_start, cur_trip_id, cur_trip_order + 1]['wholly_in_range'] == True):
            # update frequency start for all future trips in frequency
            new_trip_start = unwrapped_long.loc[ \
                (unwrapped_long['new_frequency_start'] == cur_frequency_start) \
                    & (unwrapped_long['trip_id'] == cur_trip_id) \
                    & (unwrapped_long['new_trip_order'] == cur_trip_order + 1), \
            ]['trip_start'].tolist()[0]
            unwrapped_long.loc[ \
                (unwrapped_long['new_frequency_start'] == cur_frequency_start) \
                    & (unwrapped_long['trip_id'] == cur_trip_id) \
                    & (unwrapped_long['new_trip_order'] >= cur_trip_order), \
                'new_frequency_start' \
            ] = new_trip_start

            # update trip order for all future trips in frequency
            unwrapped_long['new_trip_order'] = unwrapped_long.apply(lambda unwrapped_long: \
                unwrapped_long['new_trip_order'] - (cur_trip_order + 1) if ( \
                    (unwrapped_long['new_frequency_start'] == cur_frequency_start) \
                    & (unwrapped_long['trip_id'] == cur_trip_id) \
                    & (unwrapped_long['new_trip_order'] >= cur_trip_order) \
                ) else unwrapped_long['new_trip_order'], \
            axis='columns')

        # if previous trip in range
        if (unwrapped_long.loc[cur_frequency_start, cur_trip_id, cur_trip_order - 1]['wholly_in_range'] == True):
            # update frequency end for all previous trips in frequency
            new_trip_end = unwrapped_long.loc[ \
                (unwrapped_long['new_frequency_start'] == cur_frequency_start) \
                    & (unwrapped_long['trip_id'] == cur_trip_id) \
                    & (unwrapped_long['new_trip_order'] == cur_trip_order - 1), \
            ]['trip_end'].tolist()[0]
            unwrapped_long.loc[ \
                (unwrapped_long['new_frequency_start'] == cur_frequency_start) \
                    & (unwrapped_long['trip_id'] == cur_trip_id) \
                    & (unwrapped_long['new_trip_order'] <= cur_trip_order), \
                'frequency_end' \
            ] = new_trip_end

    # Now we can finally remove all out-of-range entries and reshape back into frequencies 
    unwrapped_long = unwrapped_long[unwrapped_long['wholly_in_range'] == True]
    unwrapped_long = unwrapped_long \
        .reset_index(drop=True) \
        .rename(columns={ 'new_frequency_start': 'start_time', 'frequency_end': 'end_time' })
    filtered_frequencies_df = unwrapped_long[gtfs.get_columns('frequencies')] \
        .drop_duplicates()

    gtfs.update_table('frequencies', filtered_frequencies_df)

示例#14

0

显示文件

文件： interpolation.py 项目： mb-fun/tpau-gtfsutils

def interpolate_stop_times():
    # returns false if interpolation not possible

    stop_times = gtfs.get_table('stop_times')
    shapes = gtfs.get_table('shapes')

    no_shape_dist_traveled = 'shape_dist_traveled' not in stop_times.columns \
        or stop_times['shape_dist_traveled'].isna().all()

    no_shapes_txt = not gtfs.has_table('shapes') or shapes.empty

    if (no_shape_dist_traveled or no_shapes_txt):
        return False

    # build table with chunk information

    df = stop_times.copy()
    df['has_arrival'] = df['arrival_time'].notna()
    df['has_departure'] = df['departure_time'].notna()

    df = df[df['has_arrival'] | df['has_departure']]
    timepoints_only = df[df['has_arrival'] | df['has_departure']]

    # https://stackoverflow.com/questions/50411098/how-to-do-forward-rolling-sum-in-pandas
    df['next_stop_sequence'] = timepoints_only.sort_values(by=['trip_id', 'stop_sequence']) \
        .iloc[::-1] \
        .groupby('trip_id')['stop_sequence'].transform(lambda x: x.rolling(2).max()) \
        .iloc[::-1] \

    # cleanup
    df['next_stop_sequence'] = df['next_stop_sequence'].fillna(df['stop_sequence']).astype('int64')

    df['stop_sequence_list'] = df.apply(lambda row: \
        list(range(row['stop_sequence'], row['next_stop_sequence']) \
        if row['stop_sequence'] != row['next_stop_sequence'] \
        else [row['stop_sequence']] \
    ), axis=1)

    df = df.explode('stop_sequence_list')
    df = df.rename(columns={'stop_sequence': 'start_seq', 'next_stop_sequence': 'end_seq', 'stop_sequence_list': 'stop_sequence'})

    chunks = df.set_index(['trip_id', 'stop_sequence']) \
        [['start_seq', 'end_seq']]


    stop_times = stop_times.set_index(['trip_id', 'stop_sequence'])
    stop_times = stop_times.merge(chunks, \
        how='left',
        right_index=True,
        left_index=True,
    )

    start_time = stop_times['departure_time'].rename('start_time')
    end_time = stop_times['arrival_time'].rename('end_time')
    start_sdt = stop_times['shape_dist_traveled'].rename('start_sdt')
    end_sdt = stop_times['shape_dist_traveled'].rename('end_sdt')

    stop_times = stop_times.merge(start_time, \
        left_on=['trip_id', 'start_seq'],
        right_index=True
    )
    
    stop_times = stop_times.merge(end_time, \
        left_on=['trip_id', 'end_seq'],
        right_index=True
    )
    
    stop_times = stop_times.merge(start_sdt, \
        left_on=['trip_id', 'start_seq'],
        right_index=True
    )
    
    stop_times = stop_times.merge(end_sdt, \
        left_on=['trip_id', 'end_seq'],
        right_index=True
    )

    def interpolate_row(row):
        # happens if last stop or on 1-stop chunks (consecutive timepoints)
        if (row['start_time'] == row['end_time']):
            return row['start_time']

        return seconds_to_military( \
            seconds_since_zero(row['start_time']) + \
                int(round( \
                    ( \
                        (row['shape_dist_traveled'] - row['start_sdt']) / (row['end_sdt'] - row['start_sdt']) \
                    ) * ( \
                        seconds_since_zero(row['end_time']) - seconds_since_zero(row['start_time']) \
                    ) \
                ))
            )

    stop_times['interp'] = stop_times.apply(lambda row: interpolate_row(row), axis=1)
    stop_times['arrival_time'] = stop_times['arrival_time'].fillna(stop_times['interp'])
    stop_times['departure_time'] = stop_times['departure_time'].fillna(stop_times['interp'])

    gtfs.update_table('stop_times', stop_times.reset_index(), cascade=False)

    return True