Exemplo n.º 1
0
def get_representative_feed(file_loc: str,
                            day_type: str='busiest') -> ptg.gtfs.Feed:
    """
    Given a filepath, extract a partridge feed object, holding a \
    representative set of schedule patterns, extracted from the GTFS zip \
    file, as a set of pandas DataFrames.

    Parameters
    ----------
    file_loc : str
        The location (filepath) of the GTFS zip file.
    day_type : str
        The name of the type of representative feed desired. Currently, only \
        one type is supported, busiest. This extracts the schedule pattern \
        for a day that has the most service on it. This is determined by the \
        day with the most trips on it.

    Returns
    -------
    feed : ptg.gtfs.Feed
        A partridge feed object, holding related schedule information as \
        pandas DataFrames for the busiest day in the available schedule.
    """

    # Extract service ids and then trip counts by those dates
    try:
        service_ids_by_date = ptg.read_service_ids_by_date(file_loc)
        trip_counts_by_date = ptg.read_trip_counts_by_date(file_loc)

    # Raised by partridge if no valid dates returned
    except AssertionError:
        # Make sure we have some valid values returned in trips
        raise InvalidGTFS('No valid trip counts by date '
                          'were identified in GTFS.')

    # TODO: Due to partridge's assertion error being raised, this
    #       check may no longer be needed.    
    if not len(trip_counts_by_date.items()):
        # Otherwise, error out
        raise InvalidGTFS('No valid trip counts by date '
                          'were identified in GTFS.')

    # At this point, different methods can be implemented to help select how
    # to pick which date/schedule id to use
    if day_type == 'busiest':
        # Choose the service id that has the most trips associated with it
        (selected_date,
         trip_count) = max(trip_counts_by_date.items(), key=lambda p: p[1])
    else:
        raise NotImplementedError('Unsupported day type string supplied.')

    log('Selected_date: {}'.format(selected_date))
    log('Number of trips on that date: {}'.format(trip_count))

    all_service_ids = '\n\t'.join(service_ids_by_date[selected_date])
    log('\nAll related service IDs: \n\t{}'.format(all_service_ids))

    sub = service_ids_by_date[selected_date]
    feed_query = {'trips.txt': {'service_id': sub}}
    return ptg.load_feed(file_loc, view=feed_query)
Exemplo n.º 2
0
def get_representative_feed(file_loc: str, day_type: str = 'busiest'):
    # Extract service ids and then trip counts by those dates
    service_ids_by_date = ptg.read_service_ids_by_date(file_loc)
    trip_counts_by_date = ptg.read_trip_counts_by_date(file_loc)

    # Make sure we have some valid values returned in trips
    if not len(trip_counts_by_date.items()):
        # Otherwise, error out
        raise InvalidGTFS('No valid trip counts by date '
                          'were identified in GTFS.')

    # At this point, different methods can be implemented to help select how
    # to pick which date/schedule id to use
    if day_type == 'busiest':
        # Choose the service id that has the most trips associated with it
        (selected_date, trip_count) = max(trip_counts_by_date.items(),
                                          key=lambda p: p[1])
    else:
        raise NotImplementedError('Unsupported day type string supplied.')

    log('Selected_date: {}'.format(selected_date))
    log('Number of trips on that date: {}'.format(trip_count))

    all_service_ids = '\n\t'.join(service_ids_by_date[selected_date])
    log('\nAll related service IDs: \n\t{}'.format(all_service_ids))

    sub = service_ids_by_date[selected_date]
    feed_query = {'trips.txt': {'service_id': sub}}
    return ptg.feed(file_loc, view=feed_query)
Exemplo n.º 3
0
def test_trip_counts_by_date(path):
    trip_counts_by_date = ptg.read_trip_counts_by_date(path)

    assert trip_counts_by_date == {
        datetime.date(2017, 8, 1): 442,
        datetime.date(2017, 8, 2): 442,
        datetime.date(2017, 8, 3): 442,
        datetime.date(2017, 8, 4): 442,
        datetime.date(2017, 8, 5): 1,
        datetime.date(2017, 8, 7): 442,
    }
Exemplo n.º 4
0
 def get_representative_feed(self,file_loc: str, the_date: str):
     year, month, day = map(int, the_date.split("/"))
     selected_date = date(year, month, day)
     # Extract service ids and then trip counts by those dates
     service_ids_by_date = ptg.read_service_ids_by_date(file_loc)
     trip_counts_by_date = ptg.read_trip_counts_by_date(file_loc) 
     # Make sure we have some valid values returned in trips
     if not len(trip_counts_by_date.items()):
         # Otherwise, error out
         raise InvalidGTFS('No valid trip counts by date '
                           'were identified in GTFS.')
     sub = service_ids_by_date[selected_date]
     feed_query = {'trips.txt': {'service_id': sub}}
     feeds=ptg.load_feed(file_loc, view=feed_query)
     return feeds