Пример #1
0
def get_representative_feed(file_loc: str, day_type: str = 'busiest'):
    # Extract service ids and then trip counts by those dates
    service_ids_by_date = ptg.read_service_ids_by_date(file_loc)
    trip_counts_by_date = ptg.read_trip_counts_by_date(file_loc)

    # Make sure we have some valid values returned in trips
    if not len(trip_counts_by_date.items()):
        # Otherwise, error out
        raise InvalidGTFS('No valid trip counts by date '
                          'were identified in GTFS.')

    # At this point, different methods can be implemented to help select how
    # to pick which date/schedule id to use
    if day_type == 'busiest':
        # Choose the service id that has the most trips associated with it
        (selected_date, trip_count) = max(trip_counts_by_date.items(),
                                          key=lambda p: p[1])
    else:
        raise NotImplementedError('Unsupported day type string supplied.')

    log('Selected_date: {}'.format(selected_date))
    log('Number of trips on that date: {}'.format(trip_count))

    all_service_ids = '\n\t'.join(service_ids_by_date[selected_date])
    log('\nAll related service IDs: \n\t{}'.format(all_service_ids))

    sub = service_ids_by_date[selected_date]
    feed_query = {'trips.txt': {'service_id': sub}}
    return ptg.feed(file_loc, view=feed_query)
Пример #2
0
def test_add_shape_dist_traveled(zip_file, scenario_results, scenario_date):
    service_ids_by_date = ptg.read_service_ids_by_date(zip_file)
    service_ids = service_ids_by_date[scenario_date]

    feed = ptg.feed(zip_file,
                    view={
                        'trips.txt': {
                            'service_id': service_ids,
                        },
                    })

    stop_times_df = Trip.add_shape_dist_traveled(feed.stop_times, feed.stops)
    stop_times_df.sort_values(
        [Trip.TRIPS_COLUMN_TRIP_ID, Trip.STOPTIMES_COLUMN_STOP_SEQUENCE],
        inplace=True)

    for trip_id, expected_array in scenario_results.iteritems():
        print stop_times_df[stop_times_df[
            Trip.TRIPS_COLUMN_TRIP_ID] == trip_id][
                Trip.STOPTIMES_COLUMN_SHAPE_DIST_TRAVELED].values.tolist()
        np.testing.assert_allclose(
            stop_times_df[stop_times_df[Trip.TRIPS_COLUMN_TRIP_ID] == trip_id][
                Trip.STOPTIMES_COLUMN_SHAPE_DIST_TRAVELED].values,
            expected_array,
            rtol=0,
            atol=0.00001)
Пример #3
0
def get_representative_feed(file_loc: str,
                            day_type: str='busiest') -> ptg.gtfs.Feed:
    """
    Given a filepath, extract a partridge feed object, holding a \
    representative set of schedule patterns, extracted from the GTFS zip \
    file, as a set of pandas DataFrames.

    Parameters
    ----------
    file_loc : str
        The location (filepath) of the GTFS zip file.
    day_type : str
        The name of the type of representative feed desired. Currently, only \
        one type is supported, busiest. This extracts the schedule pattern \
        for a day that has the most service on it. This is determined by the \
        day with the most trips on it.

    Returns
    -------
    feed : ptg.gtfs.Feed
        A partridge feed object, holding related schedule information as \
        pandas DataFrames for the busiest day in the available schedule.
    """

    # Extract service ids and then trip counts by those dates
    try:
        service_ids_by_date = ptg.read_service_ids_by_date(file_loc)
        trip_counts_by_date = ptg.read_trip_counts_by_date(file_loc)

    # Raised by partridge if no valid dates returned
    except AssertionError:
        # Make sure we have some valid values returned in trips
        raise InvalidGTFS('No valid trip counts by date '
                          'were identified in GTFS.')

    # TODO: Due to partridge's assertion error being raised, this
    #       check may no longer be needed.    
    if not len(trip_counts_by_date.items()):
        # Otherwise, error out
        raise InvalidGTFS('No valid trip counts by date '
                          'were identified in GTFS.')

    # At this point, different methods can be implemented to help select how
    # to pick which date/schedule id to use
    if day_type == 'busiest':
        # Choose the service id that has the most trips associated with it
        (selected_date,
         trip_count) = max(trip_counts_by_date.items(), key=lambda p: p[1])
    else:
        raise NotImplementedError('Unsupported day type string supplied.')

    log('Selected_date: {}'.format(selected_date))
    log('Number of trips on that date: {}'.format(trip_count))

    all_service_ids = '\n\t'.join(service_ids_by_date[selected_date])
    log('\nAll related service IDs: \n\t{}'.format(all_service_ids))

    sub = service_ids_by_date[selected_date]
    feed_query = {'trips.txt': {'service_id': sub}}
    return ptg.load_feed(file_loc, view=feed_query)
Пример #4
0
def get_partridge_filter_for_date(zip_path: str, date: datetime.date):
    service_ids = ptg.read_service_ids_by_date(zip_path)[date]

    return {
        'trips.txt': {
            'service_id': service_ids,
        },
    }
Пример #5
0
def write_filtered_feed_by_date(zip_path, date, output_path):
    service_ids_by_date = ptg.read_service_ids_by_date(zip_path)  # , encoding='utf-8')
    service_ids = service_ids_by_date[date]

    ptg.writers.extract_feed(zip_path, output_path, {
        'trips.txt': {
            'service_id': service_ids,
        },
    })
Пример #6
0
def get_partridge_feed_by_date(zip_path, date):
    service_ids_by_date = ptg.read_service_ids_by_date(zip_path)
    service_ids = service_ids_by_date[date]

    feed = ptg.feed(zip_path, view={
        'trips.txt': {
            'service_id': service_ids,
        },
    })
    return feed
Пример #7
0
def write_filtered_feed_by_date(zip_path, date, output_path):
    service_ids_by_date = ptg.read_service_ids_by_date(
        zip_path)  # , encoding='utf-8')
    service_ids = service_ids_by_date[date]

    ptg.writers.extract_feed(zip_path, output_path, {
        'trips.txt': {
            'service_id': service_ids,
        },
    })
Пример #8
0
def test_filtered_columns(path):
    service_ids_by_date = ptg.read_service_ids_by_date(path)
    service_ids = list(service_ids_by_date.values())[0]

    feed_full = Feed(path)
    feed_view = Feed(path, view={"trips.txt": {"service_id": service_ids}})
    feed_null = Feed(path, view={"trips.txt": {"service_id": "never-match"}})

    assert set(feed_full.trips.columns) == set(feed_view.trips.columns)
    assert set(feed_full.trips.columns) == set(feed_null.trips.columns)
Пример #9
0
def test_service_ids_by_date(path):
    service_ids_by_date = ptg.read_service_ids_by_date(path)

    assert service_ids_by_date == {
        datetime.date(2017, 8, 1): frozenset({"1", "0"}),
        datetime.date(2017, 8, 2): frozenset({"1", "0"}),
        datetime.date(2017, 8, 3): frozenset({"1", "0"}),
        datetime.date(2017, 8, 4): frozenset({"1", "0"}),
        datetime.date(2017, 8, 5): frozenset({"1"}),
        datetime.date(2017, 8, 7): frozenset({"1", "0"}),
    }
Пример #10
0
def get_partridge_feed_by_date(zip_path, date):
    service_ids_by_date = ptg.read_service_ids_by_date(zip_path)  # , encoding='utf-8')
    service_ids = service_ids_by_date[date]

    feed = ptg.load_feed(zip_path, view={
        'trips.txt': {
            'service_id': service_ids,
        },
    },
                    # encoding='utf-8' # CUSTOM VERSION, NOT YET PUSHED
                    )
    return feed
Пример #11
0
def get_gtfs_feed(network, network_date):
    from fasttrips.Assignment import Assignment
    from fasttrips.Util import Util

    Assignment.NETWORK_BUILD_DATE = network_date

    service_ids_by_date = ptg.read_service_ids_by_date(network)
    service_ids = service_ids_by_date[network_date]
    feed = ptg.load_feed(network, config=Util.get_fast_trips_config(), view={
        'trips.txt': {'service_id': service_ids},
    })
    return feed
Пример #12
0
def gtfs_feed(zip_file, network_date):
    from fasttrips.Assignment import Assignment
    Assignment.NETWORK_BUILD_DATE = network_date
    service_ids_by_date = ptg.read_service_ids_by_date(zip_file)
    service_ids = service_ids_by_date[network_date]
    feed = ptg.feed(os.path.join(zip_file),
                    config=Util.get_fast_trips_config(),
                    view={
                        'trips.txt': {
                            'service_id': service_ids
                        },
                    })
    yield feed
Пример #13
0
def test_filtered_columns(path):
    service_ids_by_date = ptg.read_service_ids_by_date(path)
    service_ids = list(service_ids_by_date.values())[0]

    feed_full = ptg.feed(path)
    feed_view = ptg.feed(path, view={'trips.txt': {'service_id': service_ids}})
    feed_null = ptg.feed(path,
                         view={'trips.txt': {
                             'service_id': 'never-match'
                         }})

    assert set(feed_full.trips.columns) == set(feed_view.trips.columns)
    assert set(feed_full.trips.columns) == set(feed_null.trips.columns)
Пример #14
0
    def read_input_files(self):
        """
        Reads in the input network and demand files and initializes the relevant data structures.
        """
        self.performance.record_step_start(0, 0, 0, "read_input_files")

        # Read the gtfs files first
        FastTripsLogger.info("Reading GTFS schedule")

        service_ids_by_date = ptg.read_service_ids_by_date(
            Assignment.INPUT_NETWORK_ARCHIVE)
        service_ids = service_ids_by_date[Assignment.NETWORK_BUILD_DATE]
        gtfs_feed = ptg.load_feed(os.path.join(
            Assignment.INPUT_NETWORK_ARCHIVE),
                                  config=Util.get_fast_trips_config(),
                                  view={
                                      'trips.txt': {
                                          'service_id': service_ids
                                      },
                                  })
        # Read Stops (gtfs-required)
        self.stops = Stop(Assignment.INPUT_NETWORK_ARCHIVE,
                          Assignment.OUTPUT_DIR, gtfs_feed,
                          Assignment.NETWORK_BUILD_DATE)

        # Read routes, agencies, fares
        self.routes = Route(Assignment.INPUT_NETWORK_ARCHIVE,
                            Assignment.OUTPUT_DIR, gtfs_feed,
                            Assignment.NETWORK_BUILD_DATE, self.stops)

        # Read Transfers
        self.transfers = Transfer(Assignment.INPUT_NETWORK_ARCHIVE,
                                  Assignment.OUTPUT_DIR, gtfs_feed)

        # Read trips, vehicles, calendar and stoptimes
        self.trips = Trip(Assignment.INPUT_NETWORK_ARCHIVE,
                          Assignment.OUTPUT_DIR, gtfs_feed,
                          Assignment.NETWORK_BUILD_DATE, self.stops,
                          self.routes, Assignment.PREPEND_ROUTE_ID_TO_TRIP_ID)

        # read the TAZs into a TAZ instance
        self.tazs = TAZ(Assignment.OUTPUT_DIR, gtfs_feed,
                        Assignment.NETWORK_BUILD_DATE, self.stops,
                        self.transfers, self.routes)

        # Read the demand int passenger_id -> passenger instance
        self.passengers = Passenger(Assignment.INPUT_DEMAND_DIR,
                                    Assignment.OUTPUT_DIR,
                                    Assignment.NETWORK_BUILD_DATE, self.stops,
                                    self.routes,
                                    Assignment.CAPACITY_CONSTRAINT)
Пример #15
0
 def get_representative_feed(self,file_loc: str, the_date: str):
     year, month, day = map(int, the_date.split("/"))
     selected_date = date(year, month, day)
     # Extract service ids and then trip counts by those dates
     service_ids_by_date = ptg.read_service_ids_by_date(file_loc)
     trip_counts_by_date = ptg.read_trip_counts_by_date(file_loc) 
     # Make sure we have some valid values returned in trips
     if not len(trip_counts_by_date.items()):
         # Otherwise, error out
         raise InvalidGTFS('No valid trip counts by date '
                           'were identified in GTFS.')
     sub = service_ids_by_date[selected_date]
     feed_query = {'trips.txt': {'service_id': sub}}
     feeds=ptg.load_feed(file_loc, view=feed_query)
     return feeds
Пример #16
0
def get_partridge_feed_by_date(zip_path, date):
    service_ids_by_date = ptg.read_service_ids_by_date(
        zip_path)  # , encoding='utf-8')
    service_ids = service_ids_by_date[date]

    feed = ptg.load_feed(
        zip_path,
        view={
            'trips.txt': {
                'service_id': service_ids,
            },
        },
        # encoding='utf-8' # CUSTOM VERSION, NOT YET PUSHED
    )
    return feed
Пример #17
0
def gtfs_feed(network):
    from fasttrips.Assignment import Assignment

    Assignment.NETWORK_BUILD_DATE = network[2]
    network_dir = os.path.join(HOME_DIR, network[0], "networks", network[1])

    service_ids_by_date = ptg.read_service_ids_by_date(network_dir)
    service_ids = service_ids_by_date[network_date]
    feed = ptg.feed(network_dir,
                    config=Util.get_fast_trips_config(),
                    view={
                        'trips.txt': {
                            'service_id': service_ids
                        },
                    })
    yield feed
Пример #18
0
def test_read_file(path, dates, shapes):
    service_ids_by_date = ptg.read_service_ids_by_date(path)

    service_ids = {
        service_id
        for date in dates if date in service_ids_by_date
        for service_id in service_ids_by_date[date]
    }

    if service_ids:
        feed = ptg.feed(path, view={'trips.txt': {'service_id': service_ids}})
    else:
        feed = ptg.feed(path)

    for filename, shape in shapes.items():
        assert feed.get(filename).shape == shape, \
            '{}/{} dataframe shape was incorrect'.format(path, filename)
Пример #19
0
def test_no_service():
    path = fixture("empty")
    with pytest.raises(AssertionError, message="No service"):
        ptg.read_service_ids_by_date(path)
    def get_service_ids(self, date):

        service_ids_by_date = ptg.read_service_ids_by_date(self.gtfs_path)

        return service_ids_by_date[date]
    # keep only those with TM2 Node set - then we can make them ints
    trn_stop_labels = trn_stop_labels.loc[ pandas.notnull(trn_stop_labels["TM2 Node"]) ]
    trn_stop_labels["TM2 Node"] = trn_stop_labels["TM2 Node"].astype(int)

    for operator in ["Caltrain", "San Francisco MUNI","Vallejo Baylink Ferry", "Blue and Gold", "Amtrak Capitol Cor. & Reg. Svc", "BART",
	                 "ACE", "Golden Gate Ferry", "Alameda Harbor Bay Ferry","Alameda/Oakland Ferry","Vallejo Baylink Ferry", "Santa Clara VTA", "Blue and Gold"]:
        Wrangler.WranglerLogger.info("Processing operator %s" % operator)

        # get the stop labels for this operator
        operator_stop_labels = trn_stop_labels.loc[ trn_stop_labels["Operator"] == operator ]
        Wrangler.WranglerLogger.debug("operator_stop_labels.head()\n%s" % operator_stop_labels.head())
        operator_stop_label_dict = operator_stop_labels.set_index(["TM2 Node"]).to_dict(orient="index")

        # read GTFS
        fullpath = os.path.join(GTFS_DIR, GTFS_NETWORKS[operator])
        service_ids_by_date = partridge.read_service_ids_by_date(fullpath)
        service_ids = service_ids_by_date[datetime.date(2015,03,11)]
        feed = partridge.feed(fullpath, view={'trips.txt':{'service_id':service_ids}})

        # lets see the stop_times with the stop names
        gtfs_stop_times = pandas.merge(left=feed.stop_times,
                                       right=feed.stops[["stop_id","stop_name"]]).sort_values(by=["trip_id","stop_sequence"])
        # and the route_id and direction_id
        gtfs_stop_times = pandas.merge(left=gtfs_stop_times,
                                       right=feed.trips[["trip_id","route_id","direction_id"]], how="left")
        # and route_long_name and route_type
        gtfs_stop_times = pandas.merge(left=gtfs_stop_times,
                                       right=feed.routes[["route_id","route_long_name","route_type"]], how="left")
        # => filter out buses since the travel time comes from traffic
        gtfs_stop_times = gtfs_stop_times.loc[gtfs_stop_times.route_type != 3,:]
Пример #22
0
def bus_peak_frequencies(
    gtfs_path: str,
    test_date: typing.Optional[datetime.date] = None,
    am_peak: typing.Optional[typing.Tuple[int, int]] = None,
    pm_peak: typing.Optional[typing.Tuple[int, int]] = None,
) -> geopandas.GeoDataFrame:
    """
    Compute AM and PM Peak frequencies for all the lines in a GTFS Feed.

    Parameters
    ==========
    gtfs_path: str
        The path (or URL) to a GTFS feed.
    test_date: datetime.date
        The test date for which to compute frequencies. Defaults to February
        18th, 2020, an unremarkable weekday February.
    am_peak: tuple of integers
        The two hours (out of 24) demarcating the AM peak period.
    pm_peak: tuple of integers
        The two hours (out of 24) demarcating the PM peak period.
    """

    # Set default values
    test_date = test_date or TEST_DATE
    am_peak = am_peak or (6, 9)
    pm_peak = pm_peak or (15, 19)

    am_duration = am_peak[1] - am_peak[0]
    pm_duration = pm_peak[1] - pm_peak[0]

    assert am_duration > 0
    assert pm_duration > 0

    # Download and read the GTFS feed
    with fsspec.open(gtfs_path, "rb") as infile:
        data = infile.read()
    with open(GTFS_FILE, "wb") as outfile:
        outfile.write(data)
    service_by_date = partridge.read_service_ids_by_date(GTFS_FILE)
    feed = partridge.load_geo_feed(GTFS_FILE)

    # Get the service for the test date
    try:
        test_service = next(v for k, v in service_by_date.items()
                            if k == test_date)
    except StopIteration:
        raise ValueError(f"Could not find service for {test_date}")

    test_trips = feed.trips[feed.trips.service_id.isin(test_service)]
    test_stops = feed.stop_times[feed.stop_times.trip_id.isin(
        test_trips.trip_id)]

    # Get the departure, arrival, and mean time for each trip
    trip_timings = test_stops.groupby(test_stops.trip_id).agg({
        "departure_time":
        min,
        "arrival_time":
        max
    })
    trip_timings = trip_timings.assign(
        mean_time=trip_timings.departure_time +
        (trip_timings.arrival_time - trip_timings.departure_time) / 2.0)

    # Find all of the trips that fall within the AM and PM peak times.
    am_peak_trips = trip_timings[
        (trip_timings.mean_time > am_peak[0] * 60 * 60)
        & (trip_timings.mean_time < am_peak[1] * 60 * 60)]
    pm_peak_trips = trip_timings[
        (trip_timings.mean_time > pm_peak[0] * 60 * 60)
        & (trip_timings.mean_time < pm_peak[1] * 60 * 60)]
    am_peak_trips = test_trips.merge(
        am_peak_trips,
        left_on=test_trips.trip_id,
        right_index=True,
    )
    pm_peak_trips = test_trips.merge(
        pm_peak_trips,
        left_on=test_trips.trip_id,
        right_index=True,
    )

    # Compute the peak frequency
    am_peak_frequency = (am_peak_trips.groupby(
        [am_peak_trips.route_id,
         am_peak_trips.direction_id]).size().to_frame("am_peak_trips"))
    am_peak_frequency = am_peak_frequency.assign(
        am_peak_frequency=am_duration * 60 / am_peak_frequency.am_peak_trips)
    pm_peak_frequency = (pm_peak_trips.groupby(
        [pm_peak_trips.route_id,
         pm_peak_trips.direction_id]).size().to_frame("pm_peak_trips"))
    pm_peak_frequency = pm_peak_frequency.assign(
        pm_peak_frequency=pm_duration * 60 / pm_peak_frequency.pm_peak_trips)
    peak_frequency = pandas.concat([am_peak_frequency, pm_peak_frequency],
                                   axis=1,
                                   sort=False)

    # Add the route short name for easier legibility.
    peak_frequency = peak_frequency.join(
        feed.routes[["route_id", "route_short_name"]].set_index("route_id"),
        how="left",
        on="route_id",
    )

    # Grab the most popular shape as the official one.
    route_shapes = (test_trips.groupby("route_id").agg({
        "shape_id":
        lambda s: s.value_counts().index[0]
    }).reset_index().merge(
        feed.shapes, how="left",
        on="shape_id").set_index("route_id").drop(columns=["shape_id"]))

    peak_frequency = peak_frequency.merge(
        route_shapes, how="left", right_index=True,
        left_index=True).assign(agency=feed.agency.agency_name.iloc[0])

    gdf = geopandas.GeoDataFrame(peak_frequency, geometry="geometry")
    gdf.crs = f"EPSG:{WGS84}"
    return gdf
Пример #23
0
def test_unused_service_ids():
    # Feed has rows in calendar.txt and calendar_dates.txt
    # with `service_id`s that have no applicable trips
    path = fixture("trimet-vermont-2018-02-06")
    ptg.read_service_ids_by_date(path)
Пример #24
0
            "Alameda/Oakland Ferry", "Vallejo Baylink Ferry",
            "Santa Clara VTA", "Blue and Gold"
    ]:
        Wrangler.WranglerLogger.info("Processing operator %s" % operator)

        # get the stop labels for this operator
        operator_stop_labels = trn_stop_labels.loc[trn_stop_labels["Operator"]
                                                   == operator]
        Wrangler.WranglerLogger.debug("operator_stop_labels.head()\n%s" %
                                      operator_stop_labels.head())
        operator_stop_label_dict = operator_stop_labels.set_index(
            ["TM2 Node"]).to_dict(orient="index")

        # read GTFS
        fullpath = os.path.join(GTFS_DIR, GTFS_NETWORKS[operator])
        service_ids_by_date = partridge.read_service_ids_by_date(fullpath)
        service_ids = service_ids_by_date[datetime.date(2015, 03, 11)]
        feed = partridge.feed(fullpath,
                              view={'trips.txt': {
                                  'service_id': service_ids
                              }})

        # lets see the stop_times with the stop names
        gtfs_stop_times = pandas.merge(
            left=feed.stop_times,
            right=feed.stops[["stop_id", "stop_name"
                              ]]).sort_values(by=["trip_id", "stop_sequence"])
        # and the route_id and direction_id
        gtfs_stop_times = pandas.merge(
            left=gtfs_stop_times,
            right=feed.trips[["trip_id", "route_id", "direction_id"]],
Пример #25
0
def test_missing_calendar_dates():
    path = fixture("israel-public-transportation-route-2126")
    ptg.read_service_ids_by_date(path)
Пример #26
0
from datetime import date, datetime
import partridge as ptg
import json
import hashlib

path = 'gtfsfp20182018-09-26.zip'

print("Loading GTFS")
service_ids_by_date = ptg.read_service_ids_by_date(path)

tree = {}
dates = {"LUN": date(2018, 10, 1), "VEN": date(2018, 10, 5), "SAM": date(2018, 10, 6), "DIM": date(2018, 10, 7)}
for (day, currentDate) in dates.items():
	print("Loading " + day)
	service_ids = service_ids_by_date[currentDate]

	feed = ptg.get_filtered_feed(path, {
		'trips.txt': {
			'service_id': service_ids
	    },
		'agency.txt': {
			'agency_id': '881'
		}
	})

	trips = {}
	stop_times = feed.stop_times.values
	for time in stop_times:
		if time[0] not in trips:
			trips[time[0]] = {"passList": []}
		hours = int(time[2] // 3600)