示例#1
0
def get_closest_nodes():
    closest_stops = []
    nodes = pandas.read_csv(HELSINKI_NODES_FNAME)
    for swimming_hall in target_locations:
        swimming_hall_lat = swimming_hall['latitude']
        swimming_hall_lon = swimming_hall['longitude']
        min_distance = float('inf')
        min_node = None
        for node in nodes.itertuples():
            distance = wgs84_distance(swimming_hall_lat, swimming_hall_lon,
                                      node.lat, node.lon)
            if distance < min_distance:
                min_distance = distance
                min_node = node
        closest_stops.append(min_node.stop_I)
    return closest_stops
示例#2
0
def calc_transfers(conn, threshold_meters=1000):
    geohash_precision = _get_geo_hash_precision(threshold_meters / 1000.)
    geo_index = GeoGridIndex(precision=geohash_precision)
    g = GTFS(conn)
    stops = g.get_table("stops")
    stop_geopoints = []
    cursor = conn.cursor()

    for stop in stops.itertuples():
        stop_geopoint = GeoPoint(stop.lat, stop.lon, ref=stop.stop_I)
        geo_index.add_point(stop_geopoint)
        stop_geopoints.append(stop_geopoint)
    for stop_geopoint in stop_geopoints:
        nearby_stop_geopoints = geo_index.get_nearest_points_dirty(
            stop_geopoint, threshold_meters / 1000.0, "km")
        from_stop_I = int(stop_geopoint.ref)
        from_lat = stop_geopoint.latitude
        from_lon = stop_geopoint.longitude

        to_stop_Is = []
        distances = []
        for nearby_stop_geopoint in nearby_stop_geopoints:
            to_stop_I = int(nearby_stop_geopoint.ref)
            if to_stop_I == from_stop_I:
                continue
            to_lat = nearby_stop_geopoint.latitude
            to_lon = nearby_stop_geopoint.longitude
            distance = math.ceil(
                wgs84_distance(from_lat, from_lon, to_lat, to_lon))
            if distance <= threshold_meters:
                to_stop_Is.append(to_stop_I)
                distances.append(distance)

        n_pairs = len(to_stop_Is)
        from_stop_Is = [from_stop_I] * n_pairs
        cursor.executemany(
            'INSERT OR REPLACE INTO stop_distances VALUES (?, ?, ?, ?, ?, ?);',
            zip(from_stop_Is, to_stop_Is, distances, [None] * n_pairs,
                [None] * n_pairs, [None] * n_pairs))
        cursor.execute(
            'CREATE INDEX IF NOT EXISTS idx_sd_fsid ON stop_distances (from_stop_I);'
        )
示例#3
0
文件: stats.py 项目: Almoni/gtfspy
def get_stats(gtfs):
    """
    Get basic statistics of the GTFS data.

    Parameters
    ----------
    gtfs: GTFS

    Returns
    -------
    stats: dict
        A dictionary of various statistics.
        Keys should be strings, values should be inputtable to a database (int, date, str, ...)
        (but not a list)
    """
    stats = {}
    # Basic table counts
    for table in [
            'agencies', 'routes', 'stops', 'stop_times', 'trips', 'calendar',
            'shapes', 'calendar_dates', 'days', 'stop_distances',
            'frequencies', 'feed_info', 'transfers'
    ]:
        stats["n_" + table] = gtfs.get_row_count(table)

    # Agency names
    agencies = gtfs.get_table("agencies")
    stats["agencies"] = "_".join(agencies['name'].values)

    # Stop lat/lon range
    stops = gtfs.get_table("stops")
    lats = stops['lat'].values
    lons = stops['lon'].values
    percentiles = [0, 10, 50, 90, 100]

    try:
        lat_percentiles = numpy.percentile(lats, percentiles)
    except IndexError:
        lat_percentiles = [None] * 5
    lat_min, lat_10, lat_median, lat_90, lat_max = lat_percentiles
    stats["lat_min"] = lat_min
    stats["lat_10"] = lat_10
    stats["lat_median"] = lat_median
    stats["lat_90"] = lat_90
    stats["lat_max"] = lat_max

    try:
        lon_percentiles = numpy.percentile(lons, percentiles)
    except IndexError:
        lon_percentiles = [None] * 5
    lon_min, lon_10, lon_median, lon_90, lon_max = lon_percentiles
    stats["lon_min"] = lon_min
    stats["lon_10"] = lon_10
    stats["lon_median"] = lon_median
    stats["lon_90"] = lon_90
    stats["lon_max"] = lon_max

    if len(lats) > 0:
        stats["height_km"] = wgs84_distance(lat_min, lon_median, lat_max,
                                            lon_median) / 1000.
        stats["width_km"] = wgs84_distance(lon_min, lat_median, lon_max,
                                           lat_median) / 1000.
    else:
        stats["height_km"] = None
        stats["width_km"] = None

    first_day_start_ut, last_day_start_ut = gtfs.get_day_start_ut_span()
    stats["start_time_ut"] = first_day_start_ut
    if last_day_start_ut is None:
        stats["end_time_ut"] = None
    else:
        # 28 (instead of 24) comes from the GTFS stANDard
        stats["end_time_ut"] = last_day_start_ut + 28 * 3600

    stats["start_date"] = gtfs.get_min_date()
    stats["end_date"] = gtfs.get_max_date()

    # Maximum activity day
    max_activity_date = gtfs.execute_custom_query(
        'SELECT count(*), date '
        'FROM days '
        'GROUP BY date '
        'ORDER BY count(*) DESC, date '
        'LIMIT 1;').fetchone()
    if max_activity_date:
        stats["max_activity_date"] = max_activity_date[1]
        max_activity_hour = gtfs.get_cursor().execute(
            'SELECT count(*), arr_time_hour FROM day_stop_times '
            'WHERE date=? GROUP BY arr_time_hour '
            'ORDER BY count(*) DESC;',
            (stats["max_activity_date"], )).fetchone()
        if max_activity_hour:
            stats["max_activity_hour"] = max_activity_hour[1]
        else:
            stats["max_activity_hour"] = None

    # Fleet size estimate: considering each line separately
    if max_activity_date and max_activity_hour:
        fleet_size_estimates = _fleet_size_estimate(gtfs,
                                                    stats['max_activity_hour'],
                                                    stats['max_activity_date'])
        stats.update(fleet_size_estimates)

    # Compute simple distributions of various columns that have a finite range of values.
    # Commented lines refer to values that are not imported yet, ?

    stats['routes__type__dist'] = _distribution(gtfs, 'routes', 'type')
    # stats['stop_times__pickup_type__dist'] = _distribution(gtfs, 'stop_times', 'pickup_type')
    # stats['stop_times__drop_off_type__dist'] = _distribution(gtfs, 'stop_times', 'drop_off_type')
    # stats['stop_times__timepoint__dist'] = _distribution(gtfs, 'stop_times', 'timepoint')
    stats['calendar_dates__exception_type__dist'] = _distribution(
        gtfs, 'calendar_dates', 'exception_type')
    stats['frequencies__exact_times__dist'] = _distribution(
        gtfs, 'frequencies', 'exact_times')
    stats['transfers__transfer_type__dist'] = _distribution(
        gtfs, 'transfers', 'transfer_type')
    stats['agencies__lang__dist'] = _distribution(gtfs, 'agencies', 'lang')
    stats['stops__location_type__dist'] = _distribution(
        gtfs, 'stops', 'location_type')
    # stats['stops__wheelchair_boarding__dist'] = _distribution(gtfs, 'stops', 'wheelchair_boarding')
    # stats['trips__wheelchair_accessible__dist'] = _distribution(gtfs, 'trips', 'wheelchair_accessible')
    # stats['trips__bikes_allowed__dist'] = _distribution(gtfs, 'trips', 'bikes_allowed')
    # stats[''] = _distribution(gtfs, '', '')
    stats = _feed_calendar_span(gtfs, stats)

    return stats
示例#4
0
def _add_scale_bar(ax, lat, lon_min, lon_max, width_pixels):
    distance_m = util.wgs84_distance(lat, lon_min, lat, lon_max)
    scalebar = ScaleBar(distance_m / width_pixels)  # 1 pixel = 0.2 meter
    ax.add_artist(scalebar)
示例#5
0
def stop_to_stop_network_for_route_type(gtfs,
                                        route_type,
                                        link_attributes=None,
                                        start_time_ut=None,
                                        end_time_ut=None):
    """
    Get a stop-to-stop network describing a single mode of travel.

    Parameters
    ----------
    gtfs : gtfspy.GTFS
    route_type : int
        See gtfspy.route_types.TRANSIT_ROUTE_TYPES for the list of possible types.
    link_attributes: list[str], optional
        defaulting to use the following link attributes:
            "n_vehicles" : Number of vehicles passed
            "duration_min" : minimum travel time between stops
            "duration_max" : maximum travel time between stops
            "duration_median" : median travel time between stops
            "duration_avg" : average travel time between stops
            "d" : distance along straight line (wgs84_distance)
            "distance_shape" : minimum distance along shape
            "capacity_estimate" : approximate capacity passed through the stop
            "route_I_counts" : dict from route_I to counts
    start_time_ut: int
        start time of the time span (in unix time)
    end_time_ut: int
        end time of the time span (in unix time)

    Returns
    -------
    net: networkx.DiGraph
        A directed graph Directed graph
    """
    if link_attributes is None:
        link_attributes = DEFAULT_STOP_TO_STOP_LINK_ATTRIBUTES
    assert (route_type in route_types.TRANSIT_ROUTE_TYPES)

    stops_dataframe = gtfs.get_stops_for_route_type(route_type)
    net = networkx.DiGraph()
    _add_stops_to_net(net, stops_dataframe)

    events_df = gtfs.get_transit_events(start_time_ut=start_time_ut,
                                        end_time_ut=end_time_ut,
                                        route_type=route_type)
    if len(net.nodes()) < 2:
        assert events_df.shape[0] == 0

    # group events by links, and loop over them (i.e. each link):
    link_event_groups = events_df.groupby(['from_stop_I', 'to_stop_I'],
                                          sort=False)
    for key, link_events in link_event_groups:
        from_stop_I, to_stop_I = key
        assert isinstance(link_events, pd.DataFrame)
        # 'dep_time_ut' 'arr_time_ut' 'shape_id' 'route_type' 'trip_I' 'duration' 'from_seq' 'to_seq'
        if link_attributes is None:
            net.add_edge(from_stop_I, to_stop_I)
        else:
            link_data = {}
            if "duration_min" in link_attributes:
                link_data['duration_min'] = float(
                    link_events['duration'].min())
            if "duration_max" in link_attributes:
                link_data['duration_max'] = float(
                    link_events['duration'].max())
            if "duration_median" in link_attributes:
                link_data['duration_median'] = float(
                    link_events['duration'].median())
            if "duration_avg" in link_attributes:
                link_data['duration_avg'] = float(
                    link_events['duration'].mean())
            # statistics on numbers of vehicles:
            if "n_vehicles" in link_attributes:
                link_data['n_vehicles'] = int(link_events.shape[0])
            if "capacity_estimate" in link_attributes:
                link_data['capacity_estimate'] = route_types.ROUTE_TYPE_TO_APPROXIMATE_CAPACITY[route_type] \
                                                 * int(link_events.shape[0])
            if "d" in link_attributes:
                from_lat = graph_node_attrs(net, from_stop_I)['lat']
                from_lon = graph_node_attrs(net, from_stop_I)['lon']
                to_lat = graph_node_attrs(net, to_stop_I)['lat']
                to_lon = graph_node_attrs(net, to_stop_I)['lon']
                distance = wgs84_distance(from_lat, from_lon, to_lat, to_lon)
                link_data['d'] = int(distance)
            if "distance_shape" in link_attributes:
                assert "shape_id" in link_events.columns.values
                found = None
                for i, shape_id in enumerate(link_events["shape_id"].values):
                    if shape_id is not None:
                        found = i
                        break
                if found is None:
                    link_data["distance_shape"] = None
                else:
                    link_event = link_events.iloc[found]
                    distance = gtfs.get_shape_distance_between_stops(
                        link_event["trip_I"], int(link_event["from_seq"]),
                        int(link_event["to_seq"]))
                    link_data['distance_shape'] = distance
            if "route_I_counts" in link_attributes:
                link_data["route_I_counts"] = link_events.groupby(
                    "route_I").size().to_dict()
            net.add_edge(from_stop_I, to_stop_I, **link_data)
    return net
示例#6
0
    def test_get_buffered_area_of_stops(self):
        # stop1 is far from stop2, theres no overlap
        # stop1 and stop3 are close and could have overlap
        # The area has an accuracy between 95%-99% of the real value.
        stop1_coords = 61.129094, 24.027896
        stop2_coords = 61.747408, 23.924279
        stop3_coords = 61.129621, 24.027363
        #lat, lon
        lats_1, lons_1 = list(zip(stop1_coords))
        lats_1_2, lons_1_2 = list(zip(stop1_coords, stop2_coords))
        lats_1_3, lons_1_3 = list(zip(stop1_coords, stop3_coords))

        #One point buffer
        buffer_onepoint = 100  #100 meters of radius
        true_area = 10000 * np.pi  #area = pi * square radius
        area_1 = compute_buffered_area_of_stops(lats_1, lons_1,
                                                buffer_onepoint)
        confidence = true_area * 0.95
        self.assertTrue(confidence < area_1 < true_area)

        # Two points buffer non-overlap
        # Note: the points are "far away" to avoid overlap, but since they are points in the same city
        # a "really big buffer" could cause overlap and the test is going fail.
        buffer_nonoverlap = 100  #100 meters of radius
        two_points_nonoverlap_true_area = 2 * buffer_nonoverlap**2 * np.pi  #area = pi * square radius
        area_1_2 = compute_buffered_area_of_stops(lats_1_2, lons_1_2,
                                                  buffer_nonoverlap)
        confidence_2 = two_points_nonoverlap_true_area * 0.95
        self.assertTrue(confidence_2 < area_1_2
                        and area_1_2 < two_points_nonoverlap_true_area)

        # Two points buffer with overlap
        # Points so close that will overlap with a radius of 100 meters
        buffer_overlap = 100  # 100 meters of radius
        area_1_3 = compute_buffered_area_of_stops(lats_1_3, lons_1_3,
                                                  buffer_overlap)
        self.assertLess(area_1, area_1_3)
        self.assertLess(area_1_3, two_points_nonoverlap_true_area)

        # 'Half-overlap'
        from gtfspy.util import wgs84_distance
        lat1, lat3 = lats_1_3
        lon1, lon3 = lons_1_3

        distance = wgs84_distance(lat1, lon1, lat3, lon3)
        # just a little overlap
        buffer = distance / 2. + 1
        area_1_3b = compute_buffered_area_of_stops(lats_1_3,
                                                   lons_1_3,
                                                   buffer,
                                                   resolution=100)
        one_point_true_area = np.pi * buffer**2
        self.assertLess(one_point_true_area * 1.5, area_1_3b)
        self.assertLess(area_1_3b, 2 * one_point_true_area)

        # no overlap
        buffer = distance / 2. - 1
        area_1_3b = compute_buffered_area_of_stops(lats_1_3,
                                                   lons_1_3,
                                                   buffer,
                                                   resolution=100)
        two_points_nonoverlap_true_area = 2 * buffer**2 * np.pi
        self.assertGreater(area_1_3b, two_points_nonoverlap_true_area * 0.95)
        self.assertLess(area_1_3b, two_points_nonoverlap_true_area)