def _write_stats(self):
        G = GTFS(self.day_db_path)
        net = combined_stop_to_stop_transit_network(G)
        sections = net.edges(data=True)
        n_links = len(sections)
        section_lengths = []
        vehicle_kilometers_per_section = []
        for from_I, to_I, data in sections:
            section_lengths.append(data['d'])
            vehicle_kilometers_per_section.append(data['n_vehicles'] * data['d'] / 1000.)

        stats = {"n_stops": len(G.stops(require_reference_in_stop_times=True)),
                 "n_connections": len(G.get_transit_events()),
                 "n_links": n_links,
                 "network_length_m": sum(section_lengths),
                 "link_distance_avg_m": int(sum(section_lengths) / len(section_lengths)),
                 "vehicle_kilometers": sum(vehicle_kilometers_per_section),
                 "buffer_center_lat": self.lat,
                 "buffer_center_lon": self.lon,
                 "buffer_radius_km": self.buffer_distance,
                 "extract_start_date": self.get_weekly_extract_start_date().strftime("%Y-%m-%d")
                 }
        self.__verify_stats(stats)
        df = pandas.DataFrame.from_dict({key:[value] for key, value in stats.items()})
        df.to_csv(self.stats_fname, sep=";", columns=list(sorted(stats.keys())), index=False)
def add_zone_to_stop_table(zone_shape_path=DEMAND_ZONES):
    """
    Creates table which relates stop_Is with TAZ zones and counts the number of stops
    :return:
    """
    crs = {"init": "espg:4326"}
    zones = gpd.read_file(zone_shape_path, crs=crs)
    for (name, gtfs_dict) in FEED_LIST:
        gtfs = GTFS(gtfs_dict["gtfs_dir"])
        df = gtfs.stops()
        geometry = [Point(xy) for xy in zip(df.lon, df.lat)]
        df = df.drop(["lon", "lat"], axis=1)

        gdf = gpd.GeoDataFrame(df, crs=crs, geometry=geometry)
        zones_and_stops = gpd.sjoin(gdf, zones, how="inner", op='intersects')
        try:
            gtfs.execute_custom_query(
                """ALTER TABLE stops ADD COLUMN n_stops INT;""")
            gtfs.execute_custom_query(
                """ALTER TABLE stops ADD COLUMN zone_id INT;""")
        except OperationalError:
            pass
        subset = zones_and_stops[['WSP_ENN', 'stop_I']]
        tuples = [tuple(x) for x in subset.values]
        gtfs.conn.executemany(
            """UPDATE stops SET zone_id = ? WHERE stop_I = ?""", tuples)
        gtfs.conn.commit()
def analysis_zones(as_dict=False):
    """
    returns data containers that pair zone type to a set of stops
    :param as_dict:
    :return:
    """
    gtfs_old = GTFS(OLD_DICT["gtfs_dir"])
    gtfs_lm = GTFS(LM_DICT["gtfs_dir"])
    station_distance = 600
    upstream_ratio = 0.5
    df_old = gtfs_old.get_stops_for_route_type(1)
    df_lm = gtfs_lm.get_stops_for_route_type(1)
    new_metro = difference_of_pandas_dfs(df_old, df_lm, ["stop_I"])
    old_metro = difference_of_pandas_dfs(new_metro, df_lm, ["stop_I"])
    train = gtfs_lm.get_stops_for_route_type(2)
    feeder_area = pd.DataFrame()
    other_stops = gtfs_lm.stops()
    jda = JourneyDataAnalyzer(LM_DICT["journey_dir"], LM_DICT["gtfs_dir"])
    # jda = JourneyDataAnalyzer(OLD_DICT["journey_dir"], OLD_DICT["gtfs_dir"])

    areas_to_remove = stops_to_exclude(return_sqlite_list=False)
    df = jda.get_upstream_stops_ratio(
        1040, [str(i.stop_I) for i in new_metro.itertuples()], upstream_ratio)
    feeder_area = feeder_area.append(df)
    # df = jda.get_upstream_stops_ratio(7193, 563, 0.7)
    print("new metro")
    for i in new_metro.itertuples():
        df = gtfs_lm.get_stops_within_distance(i.stop_I, station_distance)
        new_metro = new_metro.append(df)

    print("old metro")

    for i in old_metro.itertuples():
        df = gtfs_lm.get_stops_within_distance(i.stop_I, station_distance)
        old_metro = old_metro.append(df)
    print("train")

    for i in train.itertuples():
        df = gtfs_lm.get_stops_within_distance(i.stop_I, station_distance)
        train = train.append(df)

    new_metro = new_metro.drop_duplicates().reset_index(drop=True)
    old_metro = old_metro.drop_duplicates().reset_index(drop=True)
    train = train.drop_duplicates().reset_index(drop=True)
    feeder_area = feeder_area.drop_duplicates().reset_index(drop=True)

    # cleaning up borders
    new_metro = difference_of_pandas_dfs(old_metro, new_metro, ["stop_I"])
    for zone in [new_metro, old_metro, areas_to_remove]:
        train = difference_of_pandas_dfs(zone, train, ["stop_I"])
    for zone in [new_metro, train, old_metro, areas_to_remove]:
        feeder_area = difference_of_pandas_dfs(zone, feeder_area, ["stop_I"])

    spec_areas = pd.concat(
        [new_metro, old_metro, train, feeder_area, areas_to_remove])

    other_stops = difference_of_pandas_dfs(spec_areas, other_stops, ["stop_I"])

    old_metro = old_metro.assign(stop_cat=1)
    new_metro = new_metro.assign(stop_cat=2)
    train = train.assign(stop_cat=3)
    feeder_area = feeder_area.assign(stop_cat=4)
    other_stops = other_stops.assign(stop_cat=5)
    all_stops = pd.concat(
        [new_metro, old_metro, train, feeder_area,
         other_stops]).reset_index(drop=True)
    if as_dict:
        all_dfs = {
            "new_metro_stations": new_metro,
            "feeder_bus_area": feeder_area,
            "old_metro_stations": old_metro,
            "commuter_train_stations": train,
            "other_stops": other_stops
        }
    else:
        all_dfs = [("new_metro_stations", new_metro),
                   ("feeder_bus_area", feeder_area),
                   ("old_metro_stations", old_metro),
                   ("commuter_train_stations", train),
                   ("other_stops", other_stops)]
    return all_dfs, all_stops
        license = license.replace("CC BY0", "CC0")
        license = license.replace("_", " ")
        city_data_dict["License"] = license
        feeds = get_feeds_from_to_publish_tuple(city_data)
        pipeline = ExtractPipeline(city_data, feeds)
        try:
            day_G = GTFS(pipeline.day_db_path)
            trip_counts_per_day = day_G.get_trip_counts_per_day()
            print(trip_counts_per_day)
            assert len(trip_counts_per_day) <= 3
            city_data_dict["Extract date"] = str(trip_counts_per_day.loc[
                trip_counts_per_day['trip_counts'] == max(
                    trip_counts_per_day['trip_counts'])].iloc[0]['date'])
            print(city_data_dict["Extract date"].replace(" 00:00:00", ""))
            city_data_dict["n_stops"] = len(
                day_G.stops(require_reference_in_stop_times=True))
            city_data_dict["n_connections"] = len(day_G.get_transit_events())
            n_links = len(
                combined_stop_to_stop_transit_network(day_G).edges(data=True))
            city_data_dict["n_links"] = int(n_links)
        except FileNotFoundError as e:
            print("File " + pipeline.day_db_path + " was not found")
            city_data_dict["Extract date"] = "NaN"
        cities.append(city_data_dict)
    pickle.dump(cities, open(pickle_cache_file, 'wb'), -1)


def spaces(x):
    try:
        num_as_str_reversed = str(int(x))[::-1]
        num_with_spaces = ',\\'.join(
示例#5
0
class TimetableValidator(object):
    def __init__(self, gtfs, buffer_params=None):
        """
        Parameters
        ----------
        gtfs: GTFS, or path to a GTFS object
            A GTFS object
        """
        if not isinstance(gtfs, GTFS):
            self.gtfs = GTFS(gtfs)
        else:
            self.gtfs = gtfs
        self.buffer_params = buffer_params
        self.warnings_container = WarningsContainer()

    def validate_and_get_warnings(self):
        """
        Validates/checks a given GTFS feed with respect to a number of different issues.

        The set of warnings that are checked for, can be found in the gtfs_validator.ALL_WARNINGS

        Returns
        -------
        warnings: WarningsContainer
        """
        self.warnings_container.clear()
        self._validate_stops_with_same_stop_time()
        self._validate_speeds_and_trip_times()
        self._validate_stop_spacings()
        self._validate_stop_sequence()
        self._validate_misplaced_stops()
        return self.warnings_container

    def _validate_misplaced_stops(self):
        if self.buffer_params:
            p = self.buffer_params
            center_lat = p['lat']
            center_lon = p['lon']
            buffer_distance = p[
                'buffer_distance'] * 1000 * 1.002  # some error margin for rounding
            for stop_row in self.gtfs.stops().itertuples():
                if buffer_distance < wgs84_distance(
                        center_lat, center_lon, stop_row.lat, stop_row.lon):
                    self.warnings_container.add_warning(
                        WARNING_STOP_FAR_AWAY_FROM_FILTER_BOUNDARY, stop_row)
                    print(WARNING_STOP_FAR_AWAY_FROM_FILTER_BOUNDARY, stop_row)

    def _validate_stops_with_same_stop_time(self):
        n_stops_with_same_time = 5
        # this query returns the trips where there are N or more stops with the same stop time
        rows = self.gtfs.get_cursor().execute(
            'SELECT '
            'trip_I, '
            'arr_time, '
            'N '
            'FROM '
            '(SELECT trip_I, arr_time, count(*) as N FROM stop_times GROUP BY trip_I, arr_time) q1 '
            'WHERE N >= ?', (n_stops_with_same_time, ))
        for row in rows:
            self.warnings_container.add_warning(
                WARNING_5_OR_MORE_CONSECUTIVE_STOPS_WITH_SAME_TIME, row)

    def _validate_stop_spacings(self):
        self.gtfs.conn.create_function("find_distance", 4, wgs84_distance)
        # this query calculates distance and travel time between consecutive stops
        rows = self.gtfs.execute_custom_query(
            'SELECT '
            'q1.trip_I, '
            'type, '
            'q1.stop_I as stop_1, '
            'q2.stop_I as stop_2, '
            'CAST(find_distance(q1.lat, q1.lon, q2.lat, q2.lon) AS INT) as distance, '
            'q2.arr_time_ds - q1.arr_time_ds as traveltime '
            'FROM '
            '(SELECT * FROM stop_times, stops WHERE stop_times.stop_I = stops.stop_I) q1, '
            '(SELECT * FROM stop_times, stops WHERE stop_times.stop_I = stops.stop_I) q2, '
            'trips, '
            'routes '
            'WHERE q1.trip_I = q2.trip_I '
            'AND q1.seq + 1 = q2.seq '
            'AND q1.trip_I = trips.trip_I '
            'AND trips.route_I = routes.route_I ').fetchall()
        for row in rows:
            if row[4] > MAX_ALLOWED_DISTANCE_BETWEEN_CONSECUTIVE_STOPS:
                self.warnings_container.add_warning(WARNING_LONG_STOP_SPACING,
                                                    row)
            if row[5] > MAX_TIME_BETWEEN_STOPS:
                self.warnings_container.add_warning(
                    WARNING_LONG_TRAVEL_TIME_BETWEEN_STOPS, row)

    def _validate_speeds_and_trip_times(self):
        # These are the mode - feasible speed combinations used here:
        # https://support.google.com/transitpartners/answer/1095482?hl=en
        self.gtfs.conn.create_function("find_distance", 4, wgs84_distance)

        # this query returns the total distance and travel time for each trip calculated for each stop spacing separately
        rows = pandas.read_sql(
            'SELECT '
            'q1.trip_I, '
            'type, '
            'sum(CAST(find_distance(q1.lat, q1.lon, q2.lat, q2.lon) AS INT)) AS total_distance, '  # sum used for getting total
            'sum(q2.arr_time_ds - q1.arr_time_ds) AS total_traveltime, '  # sum used for getting total
            'count(*)'  # for getting the total number of stops
            'FROM '
            '   (SELECT * FROM stop_times, stops WHERE stop_times.stop_I = stops.stop_I) q1, '
            '   (SELECT * FROM stop_times, stops WHERE stop_times.stop_I = stops.stop_I) q2, '
            '    trips, '
            '    routes '
            'WHERE q1.trip_I = q2.trip_I AND q1.seq + 1 = q2.seq AND q1.trip_I = trips.trip_I '
            'AND trips.route_I = routes.route_I GROUP BY q1.trip_I',
            self.gtfs.conn)

        for row in rows.itertuples():
            avg_velocity_km_per_h = row.total_distance / max(
                row.total_traveltime, 1) * 3.6
            if avg_velocity_km_per_h > GTFS_TYPE_TO_MAX_SPEED[row.type]:
                self.warnings_container.add_warning(
                    WARNING_TRIP_UNREALISTIC_AVERAGE_SPEED + " (route_type=" +
                    str(row.type) + ")", row)
            if row.total_traveltime > MAX_TRIP_TIME:
                self.warnings_container.add_warning(
                    WARNING_LONG_TRIP_TIME.format(MAX_TRIP_TIME=MAX_TRIP_TIME),
                    row, 1)

    def _validate_stop_sequence(self):
        # This function checks if the seq values in stop_times are increasing with departure_time,
        # and that seq always increases by one.
        rows = self.gtfs.execute_custom_query(
            'SELECT trip_I, dep_time_ds, seq '
            'FROM stop_times '
            'ORDER BY trip_I, dep_time_ds, seq').fetchall()
        old_trip_id = None
        old_seq = None
        for row in rows:
            new_trip_id = int(row[0])
            new_seq = int(row[2])
            if old_trip_id == new_trip_id:
                if old_seq + 1 != new_seq:
                    self.warnings_container.add_warning(
                        WARNING_STOP_SEQUENCE_NOT_INCREMENTAL, row)
                if old_seq >= new_seq:
                    self.warnings_container.add_warning(
                        WARNING_STOP_SEQUENCE_ORDER_ERROR, row)
            old_trip_id = row[0]
            old_seq = row[2]
示例#6
0
class TestGTFSFilter(unittest.TestCase):
    def setUp(self):
        self.gtfs_source_dir = os.path.join(os.path.dirname(__file__),
                                            "test_data")
        self.gtfs_source_dir_filter_test = os.path.join(
            self.gtfs_source_dir, "filter_test_feed/")

        # self.G = GTFS.from_directory_as_inmemory_db(self.gtfs_source_dir)

        # some preparations:
        self.fname = self.gtfs_source_dir + "/test_gtfs.sqlite"
        self.fname_copy = self.gtfs_source_dir + "/test_gtfs_copy.sqlite"
        self.fname_filter = self.gtfs_source_dir + "/test_gtfs_filter_test.sqlite"

        self._remove_temporary_files()
        self.assertFalse(os.path.exists(self.fname_copy))

        conn = sqlite3.connect(self.fname)
        import_gtfs(self.gtfs_source_dir,
                    conn,
                    preserve_connection=True,
                    print_progress=False)
        conn_filter = sqlite3.connect(self.fname_filter)
        import_gtfs(self.gtfs_source_dir_filter_test,
                    conn_filter,
                    preserve_connection=True,
                    print_progress=False)

        self.G = GTFS(conn)
        self.G_filter_test = GTFS(conn_filter)

        self.hash_orig = hashlib.md5(open(self.fname, 'rb').read()).hexdigest()

    def _remove_temporary_files(self):
        for fn in [self.fname, self.fname_copy, self.fname_filter]:
            if os.path.exists(fn) and os.path.isfile(fn):
                os.remove(fn)

    def tearDown(self):
        self._remove_temporary_files()

    def test_copy(self):
        # do a simple copy
        FilterExtract(self.G, self.fname_copy,
                      update_metadata=False).create_filtered_copy()

        # check that the copying has been properly performed:
        hash_copy = hashlib.md5(open(self.fname_copy, 'rb').read()).hexdigest()
        self.assertTrue(os.path.exists(self.fname_copy))
        self.assertEqual(self.hash_orig, hash_copy)

    def test_filter_change_metadata(self):
        # A simple test that changing update_metadata to True, does update some stuff:
        FilterExtract(self.G, self.fname_copy,
                      update_metadata=True).create_filtered_copy()
        # check that the copying has been properly performed:
        hash_orig = hashlib.md5(open(self.fname, 'rb').read()).hexdigest()
        hash_copy = hashlib.md5(open(self.fname_copy, 'rb').read()).hexdigest()
        self.assertTrue(os.path.exists(self.fname_copy))
        self.assertNotEqual(hash_orig, hash_copy)
        os.remove(self.fname_copy)

    def test_filter_by_agency(self):
        FilterExtract(self.G, self.fname_copy,
                      agency_ids_to_preserve=['DTA']).create_filtered_copy()
        hash_copy = hashlib.md5(open(self.fname_copy, 'rb').read()).hexdigest()
        self.assertNotEqual(self.hash_orig, hash_copy)
        G_copy = GTFS(self.fname_copy)
        agency_table = G_copy.get_table("agencies")
        assert "EXA" not in agency_table[
            'agency_id'].values, "EXA agency should not be preserved"
        assert "DTA" in agency_table[
            'agency_id'].values, "DTA agency should be preserved"
        routes_table = G_copy.get_table("routes")
        assert "EXR1" not in routes_table[
            'route_id'].values, "EXR1 route_id should not be preserved"
        assert "AB" in routes_table[
            'route_id'].values, "AB route_id should be preserved"
        trips_table = G_copy.get_table("trips")
        assert "EXT1" not in trips_table[
            'trip_id'].values, "EXR1 route_id should not be preserved"
        assert "AB1" in trips_table[
            'trip_id'].values, "AB1 route_id should be preserved"
        calendar_table = G_copy.get_table("calendar")
        assert "FULLW" in calendar_table[
            'service_id'].values, "FULLW service_id should be preserved"
        # stop_times
        stop_times_table = G_copy.get_table("stop_times")
        # 01:23:45 corresponds to 3600 + (32 * 60) + 45 [in day seconds]
        assert 3600 + (32 * 60) + 45 not in stop_times_table['arr_time']
        os.remove(self.fname_copy)

    def test_filter_by_start_and_end_full_range(self):
        # untested tables with filtering: stops, shapes
        # test filtering by start and end time, copy full range
        FilterExtract(self.G,
                      self.fname_copy,
                      start_date=u"2007-01-01",
                      end_date=u"2011-01-01",
                      update_metadata=False).create_filtered_copy()
        G_copy = GTFS(self.fname_copy)
        dsut_end = G_copy.get_day_start_ut("2010-12-31")
        dsut_to_trip_I = G_copy.get_tripIs_within_range_by_dsut(
            dsut_end, dsut_end + 24 * 3600)
        self.assertGreater(len(dsut_to_trip_I), 0)
        os.remove(self.fname_copy)

    def test_filter_end_date_not_included(self):
        # the end date should not be included:
        FilterExtract(self.G,
                      self.fname_copy,
                      start_date="2007-01-02",
                      end_date="2010-12-31").create_filtered_copy()

        hash_copy = hashlib.md5(open(self.fname_copy, 'rb').read()).hexdigest()
        self.assertNotEqual(self.hash_orig, hash_copy)
        G_copy = GTFS(self.fname_copy)
        dsut_end = G_copy.get_day_start_ut("2010-12-31")
        dsut_to_trip_I = G_copy.get_tripIs_within_range_by_dsut(
            dsut_end, dsut_end + 24 * 3600)
        self.assertEqual(len(dsut_to_trip_I), 0)

        calendar_copy = G_copy.get_table("calendar")
        max_date_calendar = max([
            datetime.datetime.strptime(el, "%Y-%m-%d")
            for el in calendar_copy["end_date"].values
        ])
        min_date_calendar = max([
            datetime.datetime.strptime(el, "%Y-%m-%d")
            for el in calendar_copy["start_date"].values
        ])
        end_date_not_included = datetime.datetime.strptime(
            "2010-12-31", "%Y-%m-%d")
        start_date_not_included = datetime.datetime.strptime(
            "2007-01-01", "%Y-%m-%d")
        self.assertLess(max_date_calendar,
                        end_date_not_included,
                        msg="the last date should not be included in calendar")
        self.assertLess(start_date_not_included, min_date_calendar)
        os.remove(self.fname_copy)

    def test_filter_spatially(self):
        # test that the db is split by a given spatial boundary
        FilterExtract(self.G,
                      self.fname_copy,
                      buffer_lat=36.914893,
                      buffer_lon=-116.76821,
                      buffer_distance_km=50).create_filtered_copy()
        G_copy = GTFS(self.fname_copy)

        stops_table = G_copy.get_table("stops")
        self.assertNotIn("FUR_CREEK_RES", stops_table['stop_id'].values)
        self.assertIn("AMV", stops_table['stop_id'].values)
        self.assertEqual(len(stops_table['stop_id'].values), 8)

        conn_copy = sqlite3.connect(self.fname_copy)
        stop_ids_df = pandas.read_sql(
            'SELECT stop_id from stop_times '
            'left join stops '
            'on stops.stop_I = stop_times.stop_I', conn_copy)
        stop_ids = stop_ids_df["stop_id"].values

        self.assertNotIn("FUR_CREEK_RES", stop_ids)
        self.assertIn("AMV", stop_ids)

        trips_table = G_copy.get_table("trips")
        self.assertNotIn("BFC1", trips_table['trip_id'].values)

        routes_table = G_copy.get_table("routes")
        self.assertNotIn("BFC", routes_table['route_id'].values)
        # cases:
        # whole trip excluded
        # whole route excluded
        # whole agency excluded
        # part of trip excluded
        # part of route excluded
        # part of agency excluded
        # not removing stops from a trip that returns into area

        # test higher-order removals
        # stop A preserved
        # -> stop B preserved
        # -> stop C preserved

    def test_filter_spatially_2(self):
        n_rows_before = {
            "routes": 4,
            "stop_times": 14,
            "trips": 4,
            "stops": 6,
            "shapes": 4
        }
        n_rows_after_1000 = {  # within "soft buffer" in the feed data
            "routes": 1,
            "stop_times": 2,
            "trips": 1,
            "stops": 2,
            "shapes": 0
        }
        n_rows_after_3000 = {  # within "hard buffer" in the feed data
            "routes": len(["t1", "t3", "t4"]),
            "stop_times": 11,
            "trips": 4,
            "stops": len({"P", "H", "V", "L", "B"}),
            # for some reason, the first "shapes": 4
        }
        paris_lat = 48.832781
        paris_lon = 2.360734

        SELECT_MIN_MAX_SHAPE_BREAKS_BY_TRIP_I_SQL = \
            "SELECT trips.trip_I, shape_id, min(shape_break) as min_shape_break, max(shape_break) as max_shape_break FROM trips, stop_times WHERE trips.trip_I=stop_times.trip_I GROUP BY trips.trip_I"
        trip_min_max_shape_seqs = pandas.read_sql(
            SELECT_MIN_MAX_SHAPE_BREAKS_BY_TRIP_I_SQL, self.G_filter_test.conn)

        for distance_km, n_rows_after in zip(
            [1000, 3000], [n_rows_after_1000, n_rows_after_3000]):
            try:
                os.remove(self.fname_copy)
            except FileNotFoundError:
                pass
            FilterExtract(
                self.G_filter_test,
                self.fname_copy,
                buffer_lat=paris_lat,
                buffer_lon=paris_lon,
                buffer_distance_km=distance_km).create_filtered_copy()
            for table_name, n_rows in n_rows_before.items():
                self.assertEqual(
                    len(self.G_filter_test.get_table(table_name)), n_rows,
                    "Row counts before differ in " + table_name +
                    ", distance: " + str(distance_km))
            G_copy = GTFS(self.fname_copy)
            for table_name, n_rows in n_rows_after.items():
                table = G_copy.get_table(table_name)
                self.assertEqual(
                    len(table), n_rows,
                    "Row counts after differ in " + table_name +
                    ", distance: " + str(distance_km) + "\n" + str(table))

            # assert that stop_times are resequenced starting from one
            counts = pandas.read_sql(
                "SELECT count(*) FROM stop_times GROUP BY trip_I ORDER BY trip_I",
                G_copy.conn)
            max_values = pandas.read_sql(
                "SELECT max(seq) FROM stop_times GROUP BY trip_I ORDER BY trip_I",
                G_copy.conn)
            self.assertTrue((counts.values == max_values.values).all())

    def test_remove_all_trips_fully_outside_buffer(self):
        stops = self.G.stops()
        stop_1 = stops[stops['stop_I'] == 1]

        n_trips_before = len(self.G.get_table("trips"))

        remove_all_trips_fully_outside_buffer(self.G.conn, float(stop_1.lat),
                                              float(stop_1.lon), 100000)
        self.assertEqual(len(self.G.get_table("trips")), n_trips_before)

        # 0.002 (=max 2 meters from the stop), rounding errors can take place...
        remove_all_trips_fully_outside_buffer(self.G.conn, float(stop_1.lat),
                                              float(stop_1.lon), 0.002)
        self.assertEqual(len(self.G.get_table("trips")),
                         2)  # value "2" comes from the data
示例#7
0
class TimetableValidator(object):
    def __init__(self, gtfs, buffer_params=None):
        """
        Parameters
        ----------
        gtfs: GTFS, or path to a GTFS object
            A GTFS object
        """
        if not isinstance(gtfs, GTFS):
            self.gtfs = GTFS(gtfs)
        else:
            self.gtfs = gtfs
        self.buffer_params = buffer_params
        self.warnings_container = WarningsContainer()

    def get_warnings(self):
        """
        Validates/checks a given GTFS feed with respect to a number of different issues.

        The set of warnings that are checked for, can be found in the gtfs_validator.ALL_WARNINGS

        Returns
        -------
        warnings: WarningsContainer
        """
        self.warnings_container.clear()
        self._validate_stops_with_same_stop_time()
        self._validate_speeds_and_trip_times()
        self._validate_stop_spacings()
        self._validate_stop_sequence()
        self._validate_misplaced_stops()
        self.warnings_container.print_summary()
        return self.warnings_container

    def _validate_misplaced_stops(self):
        if self.buffer_params:
            p = self.buffer_params
            center_lat = p['lat']
            center_lon = p['lon']
            distance = p['buffer_distance'] * 2 * 1000
            count = 0
            for stop_row in self.gtfs.stops().itertuples():
                if distance < wgs84_distance(center_lat, center_lon,
                                             stop_row.lat, stop_row.lon):
                    self.warnings_container.add_warning(
                        stop_row, WARNING_STOP_FAR_AWAY_FROM_FILTER_BOUNDARY)
                    print(WARNING_STOP_FAR_AWAY_FROM_FILTER_BOUNDARY, stop_row)

    def _validate_stops_with_same_stop_time(self):
        n_stops_with_same_time = 5
        # this query returns the trips where there are N or more stops with the same stop time
        rows = self.gtfs.get_cursor().execute(
            'SELECT '
            'trip_I, '
            'arr_time, '
            'N '
            'FROM '
            '(SELECT trip_I, arr_time, count(*) as N FROM stop_times GROUP BY trip_I, arr_time) q1 '
            'WHERE N >= ?', (n_stops_with_same_time, ))
        for row in rows:
            self.warnings_container.add_warning(
                row, WARNING_5_OR_MORE_CONSECUTIVE_STOPS_WITH_SAME_TIME)

    def _validate_stop_spacings(self):
        self.gtfs.conn.create_function("find_distance", 4, wgs84_distance)
        max_stop_spacing = 20000  # meters
        max_time_between_stops = 1800  # seconds
        # this query calculates distance and travel time between consecutive stops
        rows = self.gtfs.execute_custom_query(
            'SELECT '
            'q1.trip_I, '
            'type, '
            'q1.stop_I as stop_1, '
            'q2.stop_I as stop_2, '
            'CAST(find_distance(q1.lat, q1.lon, q2.lat, q2.lon) AS INT) as distance, '
            'q2.arr_time_ds - q1.arr_time_ds as traveltime '
            'FROM '
            '(SELECT * FROM stop_times, stops WHERE stop_times.stop_I = stops.stop_I) q1, '
            '(SELECT * FROM stop_times, stops WHERE stop_times.stop_I = stops.stop_I) q2, '
            'trips, '
            'routes '
            'WHERE q1.trip_I = q2.trip_I '
            'AND q1.seq + 1 = q2.seq '
            'AND q1.trip_I = trips.trip_I '
            'AND trips.route_I = routes.route_I ').fetchall()
        for row in rows:
            if row[4] > max_stop_spacing:
                self.warnings_container.add_warning(row,
                                                    WARNING_LONG_STOP_SPACING)
            if row[5] > max_time_between_stops:
                self.warnings_container.add_warning(
                    row, WARNING_LONG_TRAVEL_TIME_BETWEEN_STOPS)

    def _validate_speeds_and_trip_times(self):
        # These are the mode - feasible speed combinations used here:
        # https://support.google.com/transitpartners/answer/1095482?hl=en
        gtfs_type_to_max_speed = {
            route_types.TRAM: 100,
            route_types.SUBWAY: 150,
            route_types.RAIL: 300,
            route_types.BUS: 100,
            route_types.FERRY: 80,
            route_types.CABLE_CAR: 50,
            route_types.GONDOLA: 50,
            route_types.FUNICULAR: 50,
            route_types.AIRCRAFT: 1000
        }
        max_trip_time = 7200  # seconds
        self.gtfs.conn.create_function("find_distance", 4, wgs84_distance)

        # this query returns the total distance and travel time for each trip calculated for each stop spacing separately
        rows = self.gtfs.execute_custom_query(
            'SELECT '
            ' q1.trip_I, '
            ' type, '
            ' sum(CAST(find_distance(q1.lat, q1.lon, q2.lat, q2.lon) AS INT)) AS total_distance, '
            ' sum(q2.arr_time_ds - q1.arr_time_ds) AS total_traveltime '
            ' FROM '
            '(SELECT * FROM stop_times, '
            'stops WHERE stop_times.stop_I = stops.stop_I) q1, '
            '(SELECT * FROM stop_times, '
            'stops WHERE stop_times.stop_I = stops.stop_I) q2, trips, routes WHERE q1.trip_I = q2.trip_I '
            'AND q1.seq + 1 = q2.seq AND q1.trip_I = trips.trip_I '
            '  AND trips.route_I = routes.route_I GROUP BY q1.trip_I'
        ).fetchall()

        for row in rows:
            avg_velocity = row[2] / max(row[3], 1) * 3.6
            if avg_velocity > gtfs_type_to_max_speed[row[1]]:
                self.warnings_container.add_warning(
                    row, WARNING_UNREALISTIC_AVERAGE_SPEED)

            if row[3] > max_trip_time:
                self.warnings_container.add_warning(row,
                                                    WARNING_LONG_TRIP_TIME)

    def _validate_stop_sequence(self):
        # this function checks if the stop sequence value is changing with +1 for each stop. This is not (yet) enforced
        rows = self.gtfs.execute_custom_query(
            'SELECT trip_I, dep_time_ds, seq '
            'FROM stop_times '
            'ORDER BY trip_I, dep_time_ds, seq').fetchall()

        old_trip_id = None
        for row in rows:
            new_trip_id = row[0]
            new_seq = row[2]
            if old_trip_id == new_trip_id:
                if old_seq + 1 != new_seq:
                    self.warnings_container.add_warning(
                        row, WARNING_STOP_SEQUENCE_ERROR)
            old_trip_id = row[0]
            old_seq = row[2]
示例#8
0
from settings import FIGS_DIRECTORY
import settings
from matplotlib import rc
rc('legend', framealpha=0.8)
rc("text", usetex=True)


fname = "../data/main.sqlite"  # A database imported using gtfspy
g = GTFS(fname)

plt.subplots_adjust(left=0, right=1, top=1, bottom=0)
fig = plt.figure(figsize=(5,3.5))
ax = fig.add_subplot(111)
plt.subplots_adjust(left=0, right=1, top=1, bottom=0)

stops = g.stops()
lats = stops["lat"].values
lons = stops["lon"].values

ROUTE_TYPE_TO_ZORDER[1] = 10  # set subway on top

spatial_bounds = {
    "lat_min": numpy.percentile(lats, 2),
    "lat_max": numpy.percentile(lats, 94),
    "lon_min": numpy.percentile(lons, 5),
    "lon_max": numpy.percentile(lons, 95)
}

ax, smopy_map = plot_route_network(g, ax, spatial_bounds=spatial_bounds, map_alpha=0.8, scalebar=True, return_smopy_map=True)

stop_lats = []
class GenericJourneyDataPipeline:
    def __init__(self):
        self.G = GTFS(GTFS_DATA_BASEDIR)
        self.day_start_ut = self.G.get_suitable_date_for_daily_extract(
            ut=True) + 3600
        self.start_time = self.day_start_ut + 8 * 3600
        self.end_time = self.day_start_ut + 11 * 3600
        self.profiles = {}
        self.journey_analyzer = None
        # self.analysis_start_time
        # self.analysis_end_time
        makedirs(RESULTS_DIRECTORY)
        print("Retrieving transit events")
        self.connections = []
        for e in self.G.generate_routable_transit_events(
                start_time_ut=self.start_time, end_time_ut=self.end_time):
            self.connections.append(
                Connection(int(e.from_stop_I), int(e.to_stop_I),
                           int(e.dep_time_ut), int(e.arr_time_ut),
                           int(e.trip_I)))
        print("Retrieving walking network")
        self.net = self.G.get_walk_transfer_stop_to_stop_network()

    def script(self):

        self.get_profile_data()
        journey_analyzer = JourneyDataManager(TARGET_STOPS,
                                              JOURNEY_DATA_DIR,
                                              GTFS_DATA_BASEDIR,
                                              ROUTING_PARAMS,
                                              track_route=True,
                                              close_connection=False)
        journey_analyzer.import_journey_data_for_target_stop(self.profiles)
        journey_analyzer.create_indices()
        if False:
            journey_analyzer.add_fastest_path_column()
        """
        all_geoms = journey_analyzer.get_all_geoms()
        journey_path = os.path.join(RESULTS_DIRECTORY, "all_routes_to_" + target_list_to_str(TARGET_STOPS) + ".geojson")
        with open(journey_path, 'w') as f:
            dump(journey_analyzer.extract_geojson(all_geoms), f)
        """

    def get_profile_data(self, targets=TARGET_STOPS, recompute=False):
        node_profiles_fname = os.path.join(
            RESULTS_DIRECTORY,
            "node_profile_" + target_list_to_str(targets) + ".pickle")
        if not recompute and os.path.exists(node_profiles_fname):
            print("Loading precomputed data")
            self.profiles = pickle.load(open(node_profiles_fname, 'rb'))
            print("Loaded precomputed data")
        else:
            print("Recomputing profiles")
            self._compute_profile_data()
            pickle.dump(self.profiles, open(node_profiles_fname, 'wb'), -1)
            print("Recomputing profiles")

    def _compute_profile_data(self):
        csp = MultiObjectivePseudoCSAProfiler(self.connections,
                                              TARGET_STOPS,
                                              walk_network=self.net,
                                              transfer_margin=TRANSFER_MARGIN,
                                              walk_speed=WALK_SPEED,
                                              verbose=True,
                                              track_vehicle_legs=False,
                                              track_time=True,
                                              track_route=True)
        print("CSA Profiler running...")
        csp.run()
        print("CSA profiler finished")

        self.profiles = dict(csp.stop_profiles)

    def key_measures_as_csv(self, csv_path="stop_data.csv"):
        """
        Combines key temporal distance measures for each node with stop data from gtfs and stores in csv format
        :return:
        """
        node_profiles_list = []
        # iterate through all node profiles and add the NodeProfileAnalyzer data to a list of dicts
        for node, profile in self.profiles.items():
            npa = NodeProfileAnalyzerTimeAndVehLegs.from_profile(
                profile, self.start_time, self.end_time)
            node_profile_dict = npa.get_node_profile_measures_as_dict()
            node_profile_dict["node"] = node
            node_profiles_list.append(node_profile_dict)

        node_profiles = DataFrame(node_profiles_list)
        stops = self.G.stops()
        stops.join(node_profiles.set_index("node"),
                   on='stop_I').to_csv(path_or_buf=csv_path)