Пример #1
0
def ParseFile(gtfs_filename, output_prefix):
    # find the busiest date
    date, service_ids = ptg.read_busiest_date(gtfs_filename)

    print("Service id chosen = {0}".format(service_ids))

    #Load file twice so that we don't modify it within these functions
    trips = GenerateTrips(ptg.load_geo_feed(gtfs_filename), date, service_ids)
    stops = GenerateStops(ptg.load_geo_feed(gtfs_filename))
    stop_times = GenerateStopTimes(ptg.load_geo_feed(gtfs_filename))
    # road_segs, seg_props = GenerateRoadSegments(ptg.load_geo_feed(gtfs_filename))

    trips.to_csv(output_prefix + "_trips.csv", index=False)
    stops.to_csv(output_prefix + "_stops.csv", index=False)
    stop_times.to_csv(output_prefix + "_stop_times.csv", index=False)
Пример #2
0
def update_output(list_of_contents, list_of_names, list_of_dates):
    for content, name, date in zip(list_of_contents, list_of_names,
                                   list_of_dates):
        # the content needs to be split. It contains the type and the real content
        content_type, content_string = content.split(',')
        # Decode the base64 string
        content_decoded = base64.b64decode(content_string)
        # Use BytesIO to handle the decoded content
        zip_str = io.BytesIO(content_decoded)
        # Now you can use ZipFile to take the BytesIO output
        zip_obj = ZipFile(zip_str, 'r')

        with tempfile.TemporaryDirectory() as tmpdirname:
            zip_obj.extractall(tmpdirname)
            children = 'created temporary directory' + tmpdirname
            service_ids = ptg.read_busiest_date(tmpdirname)[1]
            view = {'trips.txt': {'service_id': service_ids}}

            feed = ptg.load_geo_feed(tmpdirname, view)

            routes = feed.routes
            trips = feed.trips
            stop_times = feed.stop_times
            stops = feed.stops
            shapes = feed.shapes

        return str(routes.loc[0, 'route_short_name'])
    def get_transfers(self, stop_ids):

        view = {'transfers.txt': {'from_stop_id': stop_ids}}

        df = ptg.load_geo_feed(self.gtfs_path, view).transfers

        df = df.set_index(['from_stop_id', 'to_stop_id'])

        return df
    def get_stops(self, stop_ids):

        view = {'stops.txt': {'stop_id': stop_ids}}

        df = ptg.load_geo_feed(self.gtfs_path, view).stops

        df = df.set_index('stop_id')

        return df
    def get_stop_times(self, trip_ids):

        view = {'stop_times.txt': {'trip_id': trip_ids}}

        df = ptg.load_geo_feed(self.gtfs_path, view).stop_times

        df = df.set_index(['trip_id', 'stop_sequence'])

        return df
    def get_trips(self, service_ids):

        view = {'trips.txt': {'service_id': service_ids}}

        df = ptg.load_geo_feed(self.gtfs_path, view).trips

        df = df.set_index('trip_id')

        return df
Пример #7
0
def test_load_geo_feed_empty():
    gpd = pytest.importorskip("geopandas")

    feed = ptg.load_geo_feed(fixture("empty"))

    assert isinstance(feed.shapes, gpd.GeoDataFrame)
    assert isinstance(feed.stops, gpd.GeoDataFrame)
    assert feed.shapes.empty
    assert feed.stops.empty
Пример #8
0
def GetExtents(gtfs_filename):
    """Returns a bounding box containing all of the feed's stops.

  Returns: [minlon, minlat, maxlon, maxlat]
  """
    gtfs = ptg.load_geo_feed(gtfs_filename)
    bounds = gtfs.stops.total_bounds
    if np.isnan(bounds).any():
        raise Exception("Extents: bounds had a nan!")
    return bounds.tolist()
Пример #9
0
def get_feed_df(inpath):
    print(inpath)
    _date, service_ids = ptg.read_busiest_date(inpath)
    # assume it'll be a typical weekday; GO rail is the same every weekday
    view = {
        'trips.txt': {
            'service_id': service_ids
        },
    }
    feed = ptg.load_geo_feed(inpath, view)
    return feed
Пример #10
0
    def __init__(self, agency: config.Agency):
        self.agency = agency
        self.agency_id = agency_id = agency.id
        gtfs_cache_dir = f'{util.get_data_dir()}/gtfs-{agency_id}'

        download_gtfs_data(agency, gtfs_cache_dir)

        self.feed = ptg.load_geo_feed(gtfs_cache_dir, {})

        self.stop_times_by_trip = None
        self.stops_df = None
        self.trips_df = None
        self.routes_df = None
        self.stop_times_df = None
        self.shapes_df = None
        self.gtfs_stop_ids_map = None
        self.stops_map = None
Пример #11
0
def test_load_geo_feed():
    gpd = pytest.importorskip("geopandas")

    feed = ptg.load_geo_feed(fixture("amazon-2017-08-06"))
    assert isinstance(feed.shapes, gpd.GeoDataFrame)
    assert isinstance(feed.stops, gpd.GeoDataFrame)
    assert {"LineString"} == set(feed.shapes.geom_type)
    assert {"Point"} == set(feed.stops.geom_type)
    assert feed.shapes.crs == {"init": "EPSG:4326"}
    assert feed.stops.crs == {"init": "EPSG:4326"}
    assert ["shape_id", "geometry"] == list(feed.shapes.columns)
    assert [
        "stop_id",
        "stop_code",
        "stop_name",
        "stop_desc",
        "zone_id",
        "stop_url",
        "location_type",
        "parent_station",
        "stop_timezone",
        "wheelchair_boarding",
        "geometry",
    ] == list(feed.stops.columns)
Пример #12
0
 def __init__(self, inpath, agency, version):
     self.inpath = inpath
     self.agency = agency
     self.version = version
     self.feed = ptg.load_geo_feed(self.inpath, {})
Пример #13
0
def save_routes_for_agency(agency: config.Agency, save_to_s3=True):
    agency_id = agency.id
    gtfs_cache_dir = f'{util.get_data_dir()}/gtfs-{agency_id}'

    download_gtfs_data(agency, gtfs_cache_dir)

    feed = ptg.load_geo_feed(gtfs_cache_dir, {})

    print(f"Loading {agency_id} routes...")
    routes_df = feed.routes
    if agency.gtfs_agency_id is not None:
        routes_df = routes_df[routes_df.agency_id == agency.gtfs_agency_id]

    routes_data = []

    print(f"Loading {agency_id} trips...")
    trips_df = feed.trips
    trips_df['direction_id'] = trips_df['direction_id'].astype(str)

    print(f"Loading {agency_id} stop times...")
    stop_times_df = feed.stop_times
    print(f"Loading {agency_id} shapes...")
    shapes_df = feed.shapes

    print(f"Loading {agency_id} stops...")
    stops_df = feed.stops

    # gtfs_stop_ids_map allows looking up row from stops.txt via GTFS stop_id
    gtfs_stop_ids_map = {stop.stop_id: stop for stop in stops_df.itertuples()}

    stop_id_gtfs_field = agency.stop_id_gtfs_field

    # get OpenTransit stop ID for GTFS stop_id (may be the same)
    def normalize_gtfs_stop_id(gtfs_stop_id):
        if stop_id_gtfs_field != 'stop_id':
            return getattr(gtfs_stop_ids_map[gtfs_stop_id], stop_id_gtfs_field)
        else:
            return gtfs_stop_id

    # stops_map allows looking up row from stops.txt via OpenTransit stop ID
    if stop_id_gtfs_field != 'stop_id':
        stops_map = {getattr(stop, stop_id_gtfs_field): stop for stop in stops_df.itertuples()}
    else:
        stops_map = gtfs_stop_ids_map

    if agency.provider == 'nextbus':
        nextbus_route_order = [route.id for route in nextbus.get_route_list(agency.nextbus_id)]

    for route in routes_df.itertuples():

        gtfs_route_id = route.route_id

        short_name = route.route_short_name
        long_name = route.route_long_name

        if isinstance(short_name, str) and isinstance(long_name, str):
            title = f'{short_name} - {long_name}'
        elif isinstance(short_name, str):
            title = short_name
        else:
            title = long_name

        type = int(route.route_type) if hasattr(route, 'route_type') else None
        url = route.route_url if hasattr(route, 'route_url') and isinstance(route.route_url, str) else None
        #color = route.route_color
        #text_color = route.route_text_color

        route_id = getattr(route, agency.route_id_gtfs_field)

        if agency.provider == 'nextbus':
            route_id = route_id.replace('-', '_') # hack to handle muni route IDs where e.g. GTFS has "T-OWL" but nextbus has "T_OWL"
            try:
                nextbus_route_config = nextbus.get_route_config(agency.nextbus_id, route_id)
                title = nextbus_route_config.title
            except Exception as ex:
                print(ex)
                continue

            try:
                sort_order = nextbus_route_order.index(route_id)
            except ValueError as ex:
                print(ex)
                sort_order = None
        else:
            sort_order = int(route.route_sort_order) if hasattr(route, 'route_sort_order') else None

        print(f'route {route_id} {title}')

        route_data = {
            'id': route_id,
            'title': title,
            'url': url,
            'type': type,
            #'color': color,
            #'text_color': text_color,
            'gtfs_route_id': gtfs_route_id,
            'sort_order': sort_order,
            'stops': {},
            'directions': [],
        }

        directions = []

        route_directions_df = feed.get('route_directions.txt') # unofficial trimet gtfs extension
        if not route_directions_df.empty:
            route_directions_df = route_directions_df[route_directions_df['route_id'] == gtfs_route_id]
        else:
            route_directions_df = None

        routes_data.append(route_data)

        route_trips_df = trips_df[trips_df['route_id'] == gtfs_route_id]

        route_direction_id_values = route_trips_df['direction_id'].values

        def add_custom_direction(custom_direction_info):
            direction_id = custom_direction_info['id']
            print(f' custom direction = {direction_id}')

            gtfs_direction_id = custom_direction_info['gtfs_direction_id']

            direction_trips_df = route_trips_df[route_direction_id_values == gtfs_direction_id]

            included_stop_ids = custom_direction_info.get('included_stop_ids', [])
            excluded_stop_ids = custom_direction_info.get('excluded_stop_ids', [])

            shapes = get_unique_shapes(
                direction_trips_df=direction_trips_df,
                stop_times_df=stop_times_df,
                stops_map=stops_map,
                normalize_gtfs_stop_id=normalize_gtfs_stop_id
            )

            def contains_included_stops(shape_stop_ids):
                min_index = 0
                for stop_id in included_stop_ids:
                    try:
                        index = shape_stop_ids.index(stop_id, min_index)
                    except ValueError:
                        return False
                    min_index = index + 1 # stops must appear in same order as in included_stop_ids
                return True

            def contains_excluded_stop(shape_stop_ids):
                for stop_id in excluded_stop_ids:
                    try:
                        index = shape_stop_ids.index(stop_id)
                        return True
                    except ValueError:
                        pass
                return False

            matching_shapes = []
            for shape in shapes:
                shape_stop_ids = shape['stop_ids']
                if contains_included_stops(shape_stop_ids) and not contains_excluded_stop(shape_stop_ids):
                    matching_shapes.append(shape)

            if len(matching_shapes) != 1:
                matching_shape_ids = [shape['shape_id'] for shape in matching_shapes]
                error_message = f'{len(matching_shapes)} shapes found for route {route_id} with GTFS direction ID {gtfs_direction_id}'
                if len(included_stop_ids) > 0:
                    error_message += f" including {','.join(included_stop_ids)}"

                if len(excluded_stop_ids) > 0:
                    error_message += f" excluding {','.join(excluded_stop_ids)}"

                if len(matching_shape_ids) > 0:
                    error_message += f": {','.join(matching_shape_ids)}"

                raise Exception(error_message)

            matching_shape = matching_shapes[0]
            matching_shape_id = matching_shape['shape_id']
            matching_shape_count = matching_shape['count']

            print(f'  matching shape = {matching_shape_id} ({matching_shape_count} times)')

            add_direction(
                id=direction_id,
                gtfs_shape_id=matching_shape_id,
                gtfs_direction_id=gtfs_direction_id,
                stop_ids=matching_shape['stop_ids'],
                title=custom_direction_info.get('title', None)
            )

        def add_default_direction(direction_id):
            print(f' default direction = {direction_id}')

            direction_trips_df = route_trips_df[route_direction_id_values == direction_id]

            shapes = get_unique_shapes(
                direction_trips_df=direction_trips_df,
                stop_times_df=stop_times_df,
                stops_map=stops_map,
                normalize_gtfs_stop_id=normalize_gtfs_stop_id)

            best_shape = shapes[0]
            best_shape_id = best_shape['shape_id']
            best_shape_count = best_shape['count']

            print(f'  most common shape = {best_shape_id} ({best_shape_count} times)')

            add_direction(
                id=direction_id,
                gtfs_shape_id=best_shape_id,
                gtfs_direction_id=direction_id,
                stop_ids=best_shape['stop_ids']
            )

        def add_direction(id, gtfs_shape_id, gtfs_direction_id, stop_ids, title = None):

            if title is None:
                default_direction_info = agency.default_directions.get(gtfs_direction_id, {})
                title_prefix = default_direction_info.get('title_prefix', None)

                last_stop_id = stop_ids[-1]
                last_stop = stops_map[last_stop_id]

                if title_prefix is not None:
                    title = f"{title_prefix} to {last_stop.stop_name}"
                else:
                    title = f"To {last_stop.stop_name}"

            print(f'  title = {title}')

            dir_data = {
                'id': id,
                'title': title,
                'gtfs_shape_id': gtfs_shape_id,
                'gtfs_direction_id': gtfs_direction_id,
                'stops': stop_ids,
                'stop_geometry': {},
            }
            route_data['directions'].append(dir_data)

            for stop_id in stop_ids:
                stop = stops_map[stop_id]
                stop_data = {
                    'id': stop_id,
                    'lat': round(stop.geometry.y, 5), # stop_lat in gtfs
                    'lon': round(stop.geometry.x, 5), # stop_lon in gtfs
                    'title': stop.stop_name,
                    'url': stop.stop_url if hasattr(stop, 'stop_url') and isinstance(stop.stop_url, str) else None,
                }
                route_data['stops'][stop_id] = stop_data

            geometry = shapes_df[shapes_df['shape_id'] == gtfs_shape_id]['geometry'].values[0]

            # partridge returns GTFS geometries for each shape_id as a shapely LineString
            # (https://shapely.readthedocs.io/en/stable/manual.html#linestrings).
            # Each coordinate is an array in [lon,lat] format (note: longitude first, latitude second)
            dir_data['coords'] = [
                {
                    'lat': round(coord[1], 5),
                    'lon': round(coord[0], 5)
                } for coord in geometry.coords
            ]

            if agency.provider == 'nextbus':
                # match nextbus direction IDs with GTFS direction IDs
                best_nextbus_dir_info, best_terminal_dist = match_nextbus_direction(nextbus_route_config, geometry)
                print(f'  {direction_id} = {best_nextbus_dir_info.id} (terminal_dist={int(best_terminal_dist)}) {" (questionable match)" if best_terminal_dist > 300 else ""}')
                # dir_data['title'] = best_nextbus_dir_info.title
                dir_data['nextbus_direction_id'] = best_nextbus_dir_info.id

            start_lat = geometry.coords[0][1]
            start_lon = geometry.coords[0][0]

            #print(f"  start_lat = {start_lat} start_lon = {start_lon}")

            deg_lat_dist = util.haver_distance(start_lat, start_lon, start_lat-0.1, start_lon)*10
            deg_lon_dist = util.haver_distance(start_lat, start_lon, start_lat, start_lon-0.1)*10

            # projection function from lon/lat coordinates in degrees (z ignored) to x/y coordinates in meters.
            # satisfying the interface of shapely.ops.transform (https://shapely.readthedocs.io/en/stable/manual.html#shapely.ops.transform).
            # This makes it possible to use shapely methods to calculate the distance in meters between geometries
            def project_xy(lon, lat, z=None):
                return (round((lon - start_lon) * deg_lon_dist, 1), round((lat - start_lat) * deg_lat_dist, 1))

            xy_geometry = shapely.ops.transform(project_xy, geometry)

            shape_lon_lat = np.array(geometry).T
            shape_lon = shape_lon_lat[0]
            shape_lat = shape_lon_lat[1]

            shape_prev_lon = np.r_[shape_lon[0], shape_lon[:-1]]
            shape_prev_lat = np.r_[shape_lat[0], shape_lat[:-1]]

            # shape_cumulative_dist[i] is the cumulative distance in meters along the shape geometry from 0th to ith coordinate
            shape_cumulative_dist = np.cumsum(util.haver_distance(shape_lon, shape_lat, shape_prev_lon, shape_prev_lat))

            shape_lines_xy = [shapely.geometry.LineString(xy_geometry.coords[i:i+2]) for i in range(0, len(xy_geometry.coords) - 1)]

            # this is the total distance of the GTFS shape, which may not be exactly the same as the
            # distance along the route between the first and last Nextbus stop
            dir_data['distance'] = int(shape_cumulative_dist[-1])

            print(f"  distance = {dir_data['distance']}")

            # Find each stop along the route shape, so that the frontend can draw line segments between stops along the shape
            start_index = 0

            for stop_id in stop_ids:
                stop_info = route_data['stops'][stop_id]

                # Need to project lon/lat coords to x/y in order for shapely to determine the distance between
                # a point and a line (shapely doesn't support distance for lon/lat coords)

                stop_xy = shapely.geometry.Point(project_xy(stop_info['lon'], stop_info['lat']))

                stop_geometry = get_stop_geometry(stop_xy, shape_lines_xy, shape_cumulative_dist, start_index)

                if stop_geometry['offset'] > 100:
                    print(f"    !! bad geometry for stop {stop_id}: {stop_geometry['offset']} m from route line segment")
                    continue

                dir_data['stop_geometry'][stop_id] = stop_geometry

                start_index = stop_geometry['after_index']

        if route_id in agency.custom_directions:
            for custom_direction_info in agency.custom_directions[route_id]:
                add_custom_direction(custom_direction_info)
        else:
            for direction_id in np.unique(route_direction_id_values):
                add_default_direction(direction_id)

    if routes_data[0]['sort_order'] is not None:
        sort_key = lambda route_data: route_data['sort_order']
    else:
        sort_key = lambda route_data: route_data['id']

    routes_data = sorted(routes_data, key=sort_key)

    data_str = json.dumps({
        'version': routeconfig.DefaultVersion,
        'routes': routes_data
    }, separators=(',', ':'))

    cache_path = routeconfig.get_cache_path(agency_id)

    with open(cache_path, "w") as f:
        f.write(data_str)

    if save_to_s3:
        s3 = boto3.resource('s3')
        s3_path = routeconfig.get_s3_path(agency_id)
        s3_bucket = config.s3_bucket
        print(f'saving to s3://{s3_bucket}/{s3_path}')
        object = s3.Object(s3_bucket, s3_path)
        object.put(
            Body=gzip.compress(bytes(data_str, 'utf-8')),
            CacheControl='max-age=86400',
            ContentType='application/json',
            ContentEncoding='gzip',
            ACL='public-read'
        )
Пример #14
0
def bus_peak_frequencies(
    gtfs_path: str,
    test_date: typing.Optional[datetime.date] = None,
    am_peak: typing.Optional[typing.Tuple[int, int]] = None,
    pm_peak: typing.Optional[typing.Tuple[int, int]] = None,
) -> geopandas.GeoDataFrame:
    """
    Compute AM and PM Peak frequencies for all the lines in a GTFS Feed.

    Parameters
    ==========
    gtfs_path: str
        The path (or URL) to a GTFS feed.
    test_date: datetime.date
        The test date for which to compute frequencies. Defaults to February
        18th, 2020, an unremarkable weekday February.
    am_peak: tuple of integers
        The two hours (out of 24) demarcating the AM peak period.
    pm_peak: tuple of integers
        The two hours (out of 24) demarcating the PM peak period.
    """

    # Set default values
    test_date = test_date or TEST_DATE
    am_peak = am_peak or (6, 9)
    pm_peak = pm_peak or (15, 19)

    am_duration = am_peak[1] - am_peak[0]
    pm_duration = pm_peak[1] - pm_peak[0]

    assert am_duration > 0
    assert pm_duration > 0

    # Download and read the GTFS feed
    with fsspec.open(gtfs_path, "rb") as infile:
        data = infile.read()
    with open(GTFS_FILE, "wb") as outfile:
        outfile.write(data)
    service_by_date = partridge.read_service_ids_by_date(GTFS_FILE)
    feed = partridge.load_geo_feed(GTFS_FILE)

    # Get the service for the test date
    try:
        test_service = next(v for k, v in service_by_date.items()
                            if k == test_date)
    except StopIteration:
        raise ValueError(f"Could not find service for {test_date}")

    test_trips = feed.trips[feed.trips.service_id.isin(test_service)]
    test_stops = feed.stop_times[feed.stop_times.trip_id.isin(
        test_trips.trip_id)]

    # Get the departure, arrival, and mean time for each trip
    trip_timings = test_stops.groupby(test_stops.trip_id).agg({
        "departure_time":
        min,
        "arrival_time":
        max
    })
    trip_timings = trip_timings.assign(
        mean_time=trip_timings.departure_time +
        (trip_timings.arrival_time - trip_timings.departure_time) / 2.0)

    # Find all of the trips that fall within the AM and PM peak times.
    am_peak_trips = trip_timings[
        (trip_timings.mean_time > am_peak[0] * 60 * 60)
        & (trip_timings.mean_time < am_peak[1] * 60 * 60)]
    pm_peak_trips = trip_timings[
        (trip_timings.mean_time > pm_peak[0] * 60 * 60)
        & (trip_timings.mean_time < pm_peak[1] * 60 * 60)]
    am_peak_trips = test_trips.merge(
        am_peak_trips,
        left_on=test_trips.trip_id,
        right_index=True,
    )
    pm_peak_trips = test_trips.merge(
        pm_peak_trips,
        left_on=test_trips.trip_id,
        right_index=True,
    )

    # Compute the peak frequency
    am_peak_frequency = (am_peak_trips.groupby(
        [am_peak_trips.route_id,
         am_peak_trips.direction_id]).size().to_frame("am_peak_trips"))
    am_peak_frequency = am_peak_frequency.assign(
        am_peak_frequency=am_duration * 60 / am_peak_frequency.am_peak_trips)
    pm_peak_frequency = (pm_peak_trips.groupby(
        [pm_peak_trips.route_id,
         pm_peak_trips.direction_id]).size().to_frame("pm_peak_trips"))
    pm_peak_frequency = pm_peak_frequency.assign(
        pm_peak_frequency=pm_duration * 60 / pm_peak_frequency.pm_peak_trips)
    peak_frequency = pandas.concat([am_peak_frequency, pm_peak_frequency],
                                   axis=1,
                                   sort=False)

    # Add the route short name for easier legibility.
    peak_frequency = peak_frequency.join(
        feed.routes[["route_id", "route_short_name"]].set_index("route_id"),
        how="left",
        on="route_id",
    )

    # Grab the most popular shape as the official one.
    route_shapes = (test_trips.groupby("route_id").agg({
        "shape_id":
        lambda s: s.value_counts().index[0]
    }).reset_index().merge(
        feed.shapes, how="left",
        on="shape_id").set_index("route_id").drop(columns=["shape_id"]))

    peak_frequency = peak_frequency.merge(
        route_shapes, how="left", right_index=True,
        left_index=True).assign(agency=feed.agency.agency_name.iloc[0])

    gdf = geopandas.GeoDataFrame(peak_frequency, geometry="geometry")
    gdf.crs = f"EPSG:{WGS84}"
    return gdf