def get_representative_feed(file_loc: str, day_type: str = 'busiest'): # Extract service ids and then trip counts by those dates service_ids_by_date = ptg.read_service_ids_by_date(file_loc) trip_counts_by_date = ptg.read_trip_counts_by_date(file_loc) # Make sure we have some valid values returned in trips if not len(trip_counts_by_date.items()): # Otherwise, error out raise InvalidGTFS('No valid trip counts by date ' 'were identified in GTFS.') # At this point, different methods can be implemented to help select how # to pick which date/schedule id to use if day_type == 'busiest': # Choose the service id that has the most trips associated with it (selected_date, trip_count) = max(trip_counts_by_date.items(), key=lambda p: p[1]) else: raise NotImplementedError('Unsupported day type string supplied.') log('Selected_date: {}'.format(selected_date)) log('Number of trips on that date: {}'.format(trip_count)) all_service_ids = '\n\t'.join(service_ids_by_date[selected_date]) log('\nAll related service IDs: \n\t{}'.format(all_service_ids)) sub = service_ids_by_date[selected_date] feed_query = {'trips.txt': {'service_id': sub}} return ptg.feed(file_loc, view=feed_query)
def test_add_shape_dist_traveled(zip_file, scenario_results, scenario_date): service_ids_by_date = ptg.read_service_ids_by_date(zip_file) service_ids = service_ids_by_date[scenario_date] feed = ptg.feed(zip_file, view={ 'trips.txt': { 'service_id': service_ids, }, }) stop_times_df = Trip.add_shape_dist_traveled(feed.stop_times, feed.stops) stop_times_df.sort_values( [Trip.TRIPS_COLUMN_TRIP_ID, Trip.STOPTIMES_COLUMN_STOP_SEQUENCE], inplace=True) for trip_id, expected_array in scenario_results.iteritems(): print stop_times_df[stop_times_df[ Trip.TRIPS_COLUMN_TRIP_ID] == trip_id][ Trip.STOPTIMES_COLUMN_SHAPE_DIST_TRAVELED].values.tolist() np.testing.assert_allclose( stop_times_df[stop_times_df[Trip.TRIPS_COLUMN_TRIP_ID] == trip_id][ Trip.STOPTIMES_COLUMN_SHAPE_DIST_TRAVELED].values, expected_array, rtol=0, atol=0.00001)
def get_representative_feed(file_loc: str, day_type: str='busiest') -> ptg.gtfs.Feed: """ Given a filepath, extract a partridge feed object, holding a \ representative set of schedule patterns, extracted from the GTFS zip \ file, as a set of pandas DataFrames. Parameters ---------- file_loc : str The location (filepath) of the GTFS zip file. day_type : str The name of the type of representative feed desired. Currently, only \ one type is supported, busiest. This extracts the schedule pattern \ for a day that has the most service on it. This is determined by the \ day with the most trips on it. Returns ------- feed : ptg.gtfs.Feed A partridge feed object, holding related schedule information as \ pandas DataFrames for the busiest day in the available schedule. """ # Extract service ids and then trip counts by those dates try: service_ids_by_date = ptg.read_service_ids_by_date(file_loc) trip_counts_by_date = ptg.read_trip_counts_by_date(file_loc) # Raised by partridge if no valid dates returned except AssertionError: # Make sure we have some valid values returned in trips raise InvalidGTFS('No valid trip counts by date ' 'were identified in GTFS.') # TODO: Due to partridge's assertion error being raised, this # check may no longer be needed. if not len(trip_counts_by_date.items()): # Otherwise, error out raise InvalidGTFS('No valid trip counts by date ' 'were identified in GTFS.') # At this point, different methods can be implemented to help select how # to pick which date/schedule id to use if day_type == 'busiest': # Choose the service id that has the most trips associated with it (selected_date, trip_count) = max(trip_counts_by_date.items(), key=lambda p: p[1]) else: raise NotImplementedError('Unsupported day type string supplied.') log('Selected_date: {}'.format(selected_date)) log('Number of trips on that date: {}'.format(trip_count)) all_service_ids = '\n\t'.join(service_ids_by_date[selected_date]) log('\nAll related service IDs: \n\t{}'.format(all_service_ids)) sub = service_ids_by_date[selected_date] feed_query = {'trips.txt': {'service_id': sub}} return ptg.load_feed(file_loc, view=feed_query)
def get_partridge_filter_for_date(zip_path: str, date: datetime.date): service_ids = ptg.read_service_ids_by_date(zip_path)[date] return { 'trips.txt': { 'service_id': service_ids, }, }
def write_filtered_feed_by_date(zip_path, date, output_path): service_ids_by_date = ptg.read_service_ids_by_date(zip_path) # , encoding='utf-8') service_ids = service_ids_by_date[date] ptg.writers.extract_feed(zip_path, output_path, { 'trips.txt': { 'service_id': service_ids, }, })
def get_partridge_feed_by_date(zip_path, date): service_ids_by_date = ptg.read_service_ids_by_date(zip_path) service_ids = service_ids_by_date[date] feed = ptg.feed(zip_path, view={ 'trips.txt': { 'service_id': service_ids, }, }) return feed
def write_filtered_feed_by_date(zip_path, date, output_path): service_ids_by_date = ptg.read_service_ids_by_date( zip_path) # , encoding='utf-8') service_ids = service_ids_by_date[date] ptg.writers.extract_feed(zip_path, output_path, { 'trips.txt': { 'service_id': service_ids, }, })
def test_filtered_columns(path): service_ids_by_date = ptg.read_service_ids_by_date(path) service_ids = list(service_ids_by_date.values())[0] feed_full = Feed(path) feed_view = Feed(path, view={"trips.txt": {"service_id": service_ids}}) feed_null = Feed(path, view={"trips.txt": {"service_id": "never-match"}}) assert set(feed_full.trips.columns) == set(feed_view.trips.columns) assert set(feed_full.trips.columns) == set(feed_null.trips.columns)
def test_service_ids_by_date(path): service_ids_by_date = ptg.read_service_ids_by_date(path) assert service_ids_by_date == { datetime.date(2017, 8, 1): frozenset({"1", "0"}), datetime.date(2017, 8, 2): frozenset({"1", "0"}), datetime.date(2017, 8, 3): frozenset({"1", "0"}), datetime.date(2017, 8, 4): frozenset({"1", "0"}), datetime.date(2017, 8, 5): frozenset({"1"}), datetime.date(2017, 8, 7): frozenset({"1", "0"}), }
def get_partridge_feed_by_date(zip_path, date): service_ids_by_date = ptg.read_service_ids_by_date(zip_path) # , encoding='utf-8') service_ids = service_ids_by_date[date] feed = ptg.load_feed(zip_path, view={ 'trips.txt': { 'service_id': service_ids, }, }, # encoding='utf-8' # CUSTOM VERSION, NOT YET PUSHED ) return feed
def get_gtfs_feed(network, network_date): from fasttrips.Assignment import Assignment from fasttrips.Util import Util Assignment.NETWORK_BUILD_DATE = network_date service_ids_by_date = ptg.read_service_ids_by_date(network) service_ids = service_ids_by_date[network_date] feed = ptg.load_feed(network, config=Util.get_fast_trips_config(), view={ 'trips.txt': {'service_id': service_ids}, }) return feed
def gtfs_feed(zip_file, network_date): from fasttrips.Assignment import Assignment Assignment.NETWORK_BUILD_DATE = network_date service_ids_by_date = ptg.read_service_ids_by_date(zip_file) service_ids = service_ids_by_date[network_date] feed = ptg.feed(os.path.join(zip_file), config=Util.get_fast_trips_config(), view={ 'trips.txt': { 'service_id': service_ids }, }) yield feed
def test_filtered_columns(path): service_ids_by_date = ptg.read_service_ids_by_date(path) service_ids = list(service_ids_by_date.values())[0] feed_full = ptg.feed(path) feed_view = ptg.feed(path, view={'trips.txt': {'service_id': service_ids}}) feed_null = ptg.feed(path, view={'trips.txt': { 'service_id': 'never-match' }}) assert set(feed_full.trips.columns) == set(feed_view.trips.columns) assert set(feed_full.trips.columns) == set(feed_null.trips.columns)
def read_input_files(self): """ Reads in the input network and demand files and initializes the relevant data structures. """ self.performance.record_step_start(0, 0, 0, "read_input_files") # Read the gtfs files first FastTripsLogger.info("Reading GTFS schedule") service_ids_by_date = ptg.read_service_ids_by_date( Assignment.INPUT_NETWORK_ARCHIVE) service_ids = service_ids_by_date[Assignment.NETWORK_BUILD_DATE] gtfs_feed = ptg.load_feed(os.path.join( Assignment.INPUT_NETWORK_ARCHIVE), config=Util.get_fast_trips_config(), view={ 'trips.txt': { 'service_id': service_ids }, }) # Read Stops (gtfs-required) self.stops = Stop(Assignment.INPUT_NETWORK_ARCHIVE, Assignment.OUTPUT_DIR, gtfs_feed, Assignment.NETWORK_BUILD_DATE) # Read routes, agencies, fares self.routes = Route(Assignment.INPUT_NETWORK_ARCHIVE, Assignment.OUTPUT_DIR, gtfs_feed, Assignment.NETWORK_BUILD_DATE, self.stops) # Read Transfers self.transfers = Transfer(Assignment.INPUT_NETWORK_ARCHIVE, Assignment.OUTPUT_DIR, gtfs_feed) # Read trips, vehicles, calendar and stoptimes self.trips = Trip(Assignment.INPUT_NETWORK_ARCHIVE, Assignment.OUTPUT_DIR, gtfs_feed, Assignment.NETWORK_BUILD_DATE, self.stops, self.routes, Assignment.PREPEND_ROUTE_ID_TO_TRIP_ID) # read the TAZs into a TAZ instance self.tazs = TAZ(Assignment.OUTPUT_DIR, gtfs_feed, Assignment.NETWORK_BUILD_DATE, self.stops, self.transfers, self.routes) # Read the demand int passenger_id -> passenger instance self.passengers = Passenger(Assignment.INPUT_DEMAND_DIR, Assignment.OUTPUT_DIR, Assignment.NETWORK_BUILD_DATE, self.stops, self.routes, Assignment.CAPACITY_CONSTRAINT)
def get_representative_feed(self,file_loc: str, the_date: str): year, month, day = map(int, the_date.split("/")) selected_date = date(year, month, day) # Extract service ids and then trip counts by those dates service_ids_by_date = ptg.read_service_ids_by_date(file_loc) trip_counts_by_date = ptg.read_trip_counts_by_date(file_loc) # Make sure we have some valid values returned in trips if not len(trip_counts_by_date.items()): # Otherwise, error out raise InvalidGTFS('No valid trip counts by date ' 'were identified in GTFS.') sub = service_ids_by_date[selected_date] feed_query = {'trips.txt': {'service_id': sub}} feeds=ptg.load_feed(file_loc, view=feed_query) return feeds
def get_partridge_feed_by_date(zip_path, date): service_ids_by_date = ptg.read_service_ids_by_date( zip_path) # , encoding='utf-8') service_ids = service_ids_by_date[date] feed = ptg.load_feed( zip_path, view={ 'trips.txt': { 'service_id': service_ids, }, }, # encoding='utf-8' # CUSTOM VERSION, NOT YET PUSHED ) return feed
def gtfs_feed(network): from fasttrips.Assignment import Assignment Assignment.NETWORK_BUILD_DATE = network[2] network_dir = os.path.join(HOME_DIR, network[0], "networks", network[1]) service_ids_by_date = ptg.read_service_ids_by_date(network_dir) service_ids = service_ids_by_date[network_date] feed = ptg.feed(network_dir, config=Util.get_fast_trips_config(), view={ 'trips.txt': { 'service_id': service_ids }, }) yield feed
def test_read_file(path, dates, shapes): service_ids_by_date = ptg.read_service_ids_by_date(path) service_ids = { service_id for date in dates if date in service_ids_by_date for service_id in service_ids_by_date[date] } if service_ids: feed = ptg.feed(path, view={'trips.txt': {'service_id': service_ids}}) else: feed = ptg.feed(path) for filename, shape in shapes.items(): assert feed.get(filename).shape == shape, \ '{}/{} dataframe shape was incorrect'.format(path, filename)
def test_no_service(): path = fixture("empty") with pytest.raises(AssertionError, message="No service"): ptg.read_service_ids_by_date(path)
def get_service_ids(self, date): service_ids_by_date = ptg.read_service_ids_by_date(self.gtfs_path) return service_ids_by_date[date]
# keep only those with TM2 Node set - then we can make them ints trn_stop_labels = trn_stop_labels.loc[ pandas.notnull(trn_stop_labels["TM2 Node"]) ] trn_stop_labels["TM2 Node"] = trn_stop_labels["TM2 Node"].astype(int) for operator in ["Caltrain", "San Francisco MUNI","Vallejo Baylink Ferry", "Blue and Gold", "Amtrak Capitol Cor. & Reg. Svc", "BART", "ACE", "Golden Gate Ferry", "Alameda Harbor Bay Ferry","Alameda/Oakland Ferry","Vallejo Baylink Ferry", "Santa Clara VTA", "Blue and Gold"]: Wrangler.WranglerLogger.info("Processing operator %s" % operator) # get the stop labels for this operator operator_stop_labels = trn_stop_labels.loc[ trn_stop_labels["Operator"] == operator ] Wrangler.WranglerLogger.debug("operator_stop_labels.head()\n%s" % operator_stop_labels.head()) operator_stop_label_dict = operator_stop_labels.set_index(["TM2 Node"]).to_dict(orient="index") # read GTFS fullpath = os.path.join(GTFS_DIR, GTFS_NETWORKS[operator]) service_ids_by_date = partridge.read_service_ids_by_date(fullpath) service_ids = service_ids_by_date[datetime.date(2015,03,11)] feed = partridge.feed(fullpath, view={'trips.txt':{'service_id':service_ids}}) # lets see the stop_times with the stop names gtfs_stop_times = pandas.merge(left=feed.stop_times, right=feed.stops[["stop_id","stop_name"]]).sort_values(by=["trip_id","stop_sequence"]) # and the route_id and direction_id gtfs_stop_times = pandas.merge(left=gtfs_stop_times, right=feed.trips[["trip_id","route_id","direction_id"]], how="left") # and route_long_name and route_type gtfs_stop_times = pandas.merge(left=gtfs_stop_times, right=feed.routes[["route_id","route_long_name","route_type"]], how="left") # => filter out buses since the travel time comes from traffic gtfs_stop_times = gtfs_stop_times.loc[gtfs_stop_times.route_type != 3,:]
def bus_peak_frequencies( gtfs_path: str, test_date: typing.Optional[datetime.date] = None, am_peak: typing.Optional[typing.Tuple[int, int]] = None, pm_peak: typing.Optional[typing.Tuple[int, int]] = None, ) -> geopandas.GeoDataFrame: """ Compute AM and PM Peak frequencies for all the lines in a GTFS Feed. Parameters ========== gtfs_path: str The path (or URL) to a GTFS feed. test_date: datetime.date The test date for which to compute frequencies. Defaults to February 18th, 2020, an unremarkable weekday February. am_peak: tuple of integers The two hours (out of 24) demarcating the AM peak period. pm_peak: tuple of integers The two hours (out of 24) demarcating the PM peak period. """ # Set default values test_date = test_date or TEST_DATE am_peak = am_peak or (6, 9) pm_peak = pm_peak or (15, 19) am_duration = am_peak[1] - am_peak[0] pm_duration = pm_peak[1] - pm_peak[0] assert am_duration > 0 assert pm_duration > 0 # Download and read the GTFS feed with fsspec.open(gtfs_path, "rb") as infile: data = infile.read() with open(GTFS_FILE, "wb") as outfile: outfile.write(data) service_by_date = partridge.read_service_ids_by_date(GTFS_FILE) feed = partridge.load_geo_feed(GTFS_FILE) # Get the service for the test date try: test_service = next(v for k, v in service_by_date.items() if k == test_date) except StopIteration: raise ValueError(f"Could not find service for {test_date}") test_trips = feed.trips[feed.trips.service_id.isin(test_service)] test_stops = feed.stop_times[feed.stop_times.trip_id.isin( test_trips.trip_id)] # Get the departure, arrival, and mean time for each trip trip_timings = test_stops.groupby(test_stops.trip_id).agg({ "departure_time": min, "arrival_time": max }) trip_timings = trip_timings.assign( mean_time=trip_timings.departure_time + (trip_timings.arrival_time - trip_timings.departure_time) / 2.0) # Find all of the trips that fall within the AM and PM peak times. am_peak_trips = trip_timings[ (trip_timings.mean_time > am_peak[0] * 60 * 60) & (trip_timings.mean_time < am_peak[1] * 60 * 60)] pm_peak_trips = trip_timings[ (trip_timings.mean_time > pm_peak[0] * 60 * 60) & (trip_timings.mean_time < pm_peak[1] * 60 * 60)] am_peak_trips = test_trips.merge( am_peak_trips, left_on=test_trips.trip_id, right_index=True, ) pm_peak_trips = test_trips.merge( pm_peak_trips, left_on=test_trips.trip_id, right_index=True, ) # Compute the peak frequency am_peak_frequency = (am_peak_trips.groupby( [am_peak_trips.route_id, am_peak_trips.direction_id]).size().to_frame("am_peak_trips")) am_peak_frequency = am_peak_frequency.assign( am_peak_frequency=am_duration * 60 / am_peak_frequency.am_peak_trips) pm_peak_frequency = (pm_peak_trips.groupby( [pm_peak_trips.route_id, pm_peak_trips.direction_id]).size().to_frame("pm_peak_trips")) pm_peak_frequency = pm_peak_frequency.assign( pm_peak_frequency=pm_duration * 60 / pm_peak_frequency.pm_peak_trips) peak_frequency = pandas.concat([am_peak_frequency, pm_peak_frequency], axis=1, sort=False) # Add the route short name for easier legibility. peak_frequency = peak_frequency.join( feed.routes[["route_id", "route_short_name"]].set_index("route_id"), how="left", on="route_id", ) # Grab the most popular shape as the official one. route_shapes = (test_trips.groupby("route_id").agg({ "shape_id": lambda s: s.value_counts().index[0] }).reset_index().merge( feed.shapes, how="left", on="shape_id").set_index("route_id").drop(columns=["shape_id"])) peak_frequency = peak_frequency.merge( route_shapes, how="left", right_index=True, left_index=True).assign(agency=feed.agency.agency_name.iloc[0]) gdf = geopandas.GeoDataFrame(peak_frequency, geometry="geometry") gdf.crs = f"EPSG:{WGS84}" return gdf
def test_unused_service_ids(): # Feed has rows in calendar.txt and calendar_dates.txt # with `service_id`s that have no applicable trips path = fixture("trimet-vermont-2018-02-06") ptg.read_service_ids_by_date(path)
"Alameda/Oakland Ferry", "Vallejo Baylink Ferry", "Santa Clara VTA", "Blue and Gold" ]: Wrangler.WranglerLogger.info("Processing operator %s" % operator) # get the stop labels for this operator operator_stop_labels = trn_stop_labels.loc[trn_stop_labels["Operator"] == operator] Wrangler.WranglerLogger.debug("operator_stop_labels.head()\n%s" % operator_stop_labels.head()) operator_stop_label_dict = operator_stop_labels.set_index( ["TM2 Node"]).to_dict(orient="index") # read GTFS fullpath = os.path.join(GTFS_DIR, GTFS_NETWORKS[operator]) service_ids_by_date = partridge.read_service_ids_by_date(fullpath) service_ids = service_ids_by_date[datetime.date(2015, 03, 11)] feed = partridge.feed(fullpath, view={'trips.txt': { 'service_id': service_ids }}) # lets see the stop_times with the stop names gtfs_stop_times = pandas.merge( left=feed.stop_times, right=feed.stops[["stop_id", "stop_name" ]]).sort_values(by=["trip_id", "stop_sequence"]) # and the route_id and direction_id gtfs_stop_times = pandas.merge( left=gtfs_stop_times, right=feed.trips[["trip_id", "route_id", "direction_id"]],
def test_missing_calendar_dates(): path = fixture("israel-public-transportation-route-2126") ptg.read_service_ids_by_date(path)
from datetime import date, datetime import partridge as ptg import json import hashlib path = 'gtfsfp20182018-09-26.zip' print("Loading GTFS") service_ids_by_date = ptg.read_service_ids_by_date(path) tree = {} dates = {"LUN": date(2018, 10, 1), "VEN": date(2018, 10, 5), "SAM": date(2018, 10, 6), "DIM": date(2018, 10, 7)} for (day, currentDate) in dates.items(): print("Loading " + day) service_ids = service_ids_by_date[currentDate] feed = ptg.get_filtered_feed(path, { 'trips.txt': { 'service_id': service_ids }, 'agency.txt': { 'agency_id': '881' } }) trips = {} stop_times = feed.stop_times.values for time in stop_times: if time[0] not in trips: trips[time[0]] = {"passList": []} hours = int(time[2] // 3600)