async def get_trip_row_from_id(self, trip_id: str):
     trips_table = db.get_table("trips")
     async with db.acquire_conn() as conn:
         res = await conn.execute(trips_table.select().where(
             trips_table.c.system == self.system.value).where(
                 trips_table.c.trip_id == trip_id))
         return await res.fetchone()
示例#2
0
async def write_trip_paths(routes_for_shape_id, paths):
    values = []
    for (shape_id, path) in paths.items():
        values.append(
            {
                "system": TRANSIT_SYSTEM.value,
                "shape_id": shape_id,
                "routes": routes_for_shape_id.get(shape_id),
                "shape": "LINESTRING({})".format(
                    ", ".join(["{} {}".format(p.x, p.y) for p in path])
                ),
            }
        )

    table = db.get_table("trip_paths")
    stmt = insert(table).values(values)
    stmt = stmt.on_conflict_do_update(
        index_elements=[table.c.system, table.c.shape_id],
        set_={"shape": stmt.excluded.shape},
    )
    async with db.acquire_conn() as conn:
        res = await conn.execute(
            table.delete().where(table.c.system == TRANSIT_SYSTEM.value)
        )
        logging.info("Deleted %d rows from %s", res.rowcount, table.name)
        await conn.execute(stmt)
        logging.info("Inserted %d rows into %s", len(values), table.name)
示例#3
0
 async def query_realtime_stop_times(self, system: TransitSystem,
                                     route_id: str, stop_ids: List[str]):
     async with db.acquire_conn() as conn:
         res = await conn.execute(
             """
             select
                 rst.stop_id,
                 rst.departure,
                 rst.start_date,
                 st.departure_time as scheduled_departure
             from realtime_stop_times as rst
             left outer join stop_times as st
             on
                 rst.trip_id = st.trip_id
                 and rst.stop_id = st.stop_id
             where
                 rst.system = %s
                 and rst.route_id = %s
                 and rst.stop_id in %s
                 and rst.departure >= %s
             """,
             system.value,
             route_id,
             tuple(stop_ids),
             datetime.now(timezone.utc) - timedelta(days=1),
         )
         return await res.fetchall()
示例#4
0
async def process_feed(feed_id: str):
    feed_message = await nyc.get_data(feed_id)
    request_time = datetime.now(timezone.utc)
    if not feed_message.IsInitialized():
        raise Exception(
            "Unable to parse NYC MTA feed {}: (FeedMessage not initialized)".
            format(feed_id))

    timestamp = feed_message.header.timestamp
    json_str = google.protobuf.json_format.MessageToJson(feed_message)
    logging.info(
        "NYC MTA feed {}: {} ({:.3f} seconds ago), {} JSON bytes".format(
            feed_id, timestamp,
            time.time() - timestamp, len(json_str)))

    async with db.acquire_conn() as conn:
        table = db.get_table("realtime_raw")
        await conn.execute(
            insert(table).values(
                system=gtfs.TransitSystem.NYC_MTA.value,
                feed_id=feed_id,
                time=datetime.fromtimestamp(timestamp, timezone.utc),
                json=json.loads(json_str),
                raw=feed_message.SerializeToString(),
                update_time=request_time,
            ).on_conflict_do_update(table.primary_key,
                                    set_=dict(update_time=request_time)))
示例#5
0
 async def query_timezone(self, system: TransitSystem):
     agency = db.get_table("agency")
     async with db.acquire_conn() as conn:
         res = await conn.execute(
             agency.select().where(agency.c.system == system.value))
         row = await res.fetchone()
         return gettz(row.agency_timezone)
示例#6
0
async def _load_edges(transit_system: TransitSystem) -> List[Edge]:
    edges = []
    async with db.acquire_conn() as conn:
        query = """
            select
                id,
                node_id1,
                node_id2,
                routes,
                ST_AsGeoJSON(path)::json as geojson,
                ST_Length(path::geography) as length
            from map_edges
            where system=%s
        """
        async for row in conn.execute(query, transit_system.value):
            edges.append(
                Edge(
                    id=row.id,
                    node_id1=row.node_id1,
                    node_id2=row.node_id2,
                    geojson=row.geojson,
                    length=row.length,
                    route_ids=row.routes,
                )
            )
    return edges
 async def get_stop_exists(self, stop_id: str) -> bool:
     table = db.get_table("stops")
     async with db.acquire_conn() as conn:
         res = await conn.scalar(
             sa.select([table.c.stop_id
                        ]).where(table.c.system == self.system.value).where(
                            table.c.stop_id == stop_id))
         return res is not None
示例#8
0
async def delete_tables(transit_system: gtfs.TransitSystem):
    # Order matters due to foreign keys
    tables = reversed([db.get_table(spec.table) for spec in IMPORT_SPECS])
    async with db.acquire_conn() as conn:
        for table in tables:
            res = await conn.execute(
                table.delete().where(table.c.system == transit_system.value)
            )
            logging.info("Deleted %d rows from %s", res.rowcount, table.name)
示例#9
0
 async def query_route(self, system: gtfs.TransitSystem, route_id: str):
     routes = db.get_table("routes")
     async with db.acquire_conn() as conn:
         res = await conn.execute(
             routes.select().where(routes.c.system == system.value).where(
                 routes.c.route_id == route_id))
         route = await res.fetchone()
     assert route is not None
     return route
示例#10
0
 async def query_stop(self, system: gtfs.TransitSystem, stop_id: str):
     stops = db.get_table("stops")
     async with db.acquire_conn() as conn:
         res = await conn.execute(
             stops.select().where(stops.c.system == system.value).where(
                 stops.c.stop_id == stop_id))
         stop = await res.fetchone()
     assert stop is not None
     return stop
示例#11
0
 async def query_stop_and_parents(self, system: TransitSystem,
                                  stop_id: str):
     stops = db.get_table("stops")
     async with db.acquire_conn() as conn:
         res = await conn.execute(
             stops.select().where(stops.c.system == system.value).where(
                 sa.or_(stops.c.stop_id == stop_id,
                        stops.c.parent_station == stop_id)))
         return await res.fetchall()
示例#12
0
    async def get_trip_row_from_descriptor(
            self, trip_descriptor: gtfs.TripDescriptor):
        row = await self.get_trip_row_from_id(trip_descriptor.trip_id)
        if row is not None:
            return row

        # table = db.get_table('trips')
        # async with db.acquire_conn() as conn:
        #     res = await conn.execute(
        #         table.select()
        #         .where(table.c.system == self.system.value)
        #         .where(table.c.trip_id.like("%" + trip_descriptor.trip_id + "%"))
        #     )
        #     row = await res.fetchone()
        #     if row is not None:
        #         return row

        if self.system is not gtfs.TransitSystem.NYC_MTA:
            return None

        # Monday = 0, Sunday = 6
        day_of_week = trip_descriptor.start_date.weekday()
        if day_of_week < 5:
            service_day = nyc.ServiceDay.WEEKDAY
        elif day_of_week == 5:
            service_day = nyc.ServiceDay.SATURDAY
        elif day_of_week == 6:
            service_day = nyc.ServiceDay.SUNDAY
        else:
            raise ValueError("Unexpected day of week {} for {}".format(
                day_of_week, trip_descriptor.start_date))

        table = db.get_table("mta_trip_id")
        async with db.acquire_conn() as conn:
            res = await conn.execute(table.select(
            ).where(table.c.system == self.system.value).where(
                table.c.alternate_trip_id == trip_descriptor.trip_id).where(
                    table.c.service_day == service_day))
            rows = await res.fetchall()
            if len(rows) == 0:
                return None
            if len(rows) > 1:
                trip_ids = [row["trip_id"] for row in rows]
                logging.info(
                    "%s: (%s, %s/%s) has multiple trip IDs: (%s)",
                    self.system,
                    trip_descriptor.trip_id,
                    trip_descriptor.start_date,
                    service_day,
                    ", ".join(trip_ids),
                )
                return None
            trip_id = rows[0]["trip_id"]
        return await self.get_trip_row_from_id(trip_id)
示例#13
0
async def get_routes_for_shape_id() -> Dict[str, Set[str]]:
    query = """
        select shape_id, array_agg(distinct route_id) as routes
        from trips
        where shape_id is not null
        group by shape_id
    """
    routes_for_shape_id = {}
    async with db.acquire_conn() as conn:
        async for row in conn.execute(query):
            routes_for_shape_id[row.shape_id] = set(row.routes)
    logging.info("%d shapes have routes", len(routes_for_shape_id))
    return routes_for_shape_id
示例#14
0
 async def _get_stop_id_from_stop_seq(self, trip_id: str,
                                      stop_seq: int) -> str:
     table = db.get_table("stop_times")
     async with db.acquire_conn() as conn:
         stop_id = await conn.scalar(
             sa.select([table.c.stop_id
                        ]).where(table.c.system == self.system.value).where(
                            table.c.trip_id == trip_id).where(
                                table.c.stop_sequence == stop_seq))
         if stop_id is None:
             raise Exception(
                 "No stop_id for (trip_id, seq): ({}, {})".format(
                     trip_id, stop_seq))
         return stop_id
示例#15
0
    async def query_stop_ids(self, system: gtfs.TransitSystem, route_id: str):
        trips = db.get_table("trips")
        stop_times = db.get_table("stop_times")

        async with db.acquire_conn() as conn:
            res = await conn.execute(
                sa.select([stop_times.c.stop_id]).distinct().where(
                    sa.and_(
                        trips.c.system == system.value,
                        trips.c.route_id == route_id,
                        trips.c.trip_id == stop_times.c.trip_id,
                    )))
            rows = await res.fetchall()

        return [row["stop_id"] for row in rows]
示例#16
0
    async def write(self, transit_system: TransitSystem):
        nodes_table = db.get_table("map_nodes")
        edges_table = db.get_table("map_edges")
        nodes_values = [
            {
                "system": transit_system.value,
                "id": node.id,
                "edge_ids": [edge.id for edge in node.edges],
                "loc": node.postgis_point(),
                "stop_ids": list(node.stop_ids) or None,
            }
            for node in self.nodes.values()
        ]
        edges_values = []
        for edge in self.edges.values():
            # The nodes are in the same order as the path
            (node1, node2) = edge.nodes
            if node1.point == edge.points[-1]:
                (node1, node2) = (node2, node1)
            edges_values.append(
                {
                    "system": transit_system.value,
                    "id": edge.id,
                    "node_id1": node1.id,
                    "node_id2": node2.id,
                    "path": edge.postgis_linestring(),
                    "routes": list(edge.routes),
                }
            )

        async with db.acquire_conn() as conn:
            res = await conn.execute(
                nodes_table.delete().where(nodes_table.c.system == transit_system.value)
            )
            logging.info("Deleted %d rows from %s", res.rowcount, nodes_table.name)
            res = await conn.execute(nodes_table.insert().values(nodes_values))
            logging.info("Inserted %d rows into %s", res.rowcount, nodes_table.name)

            res = await conn.execute(
                edges_table.delete().where(edges_table.c.system == transit_system.value)
            )
            logging.info("Deleted %d rows from %s", res.rowcount, edges_table.name)
            res = await conn.execute(edges_table.insert().values(edges_values))
            logging.info("Inserted %d rows into %s", res.rowcount, edges_table.name)
示例#17
0
async def import_generic(
    transit_system: gtfs.TransitSystem,
    csv_path: str,
    table: sa.Table,
    spec: ImportSpec,
):
    with open(csv_path) as csv_file:
        reader = csv.DictReader(csv_file)
        db_rows = []
        for csv_row in reader:
            db_row_spec = {}
            db_row_spec["system"] = transit_system.value
            for col in spec.copy:
                db_row_spec[col] = csv_row[col] if csv_row[col] != "" else None
            for col_name, generator in spec.compute.items():
                db_row_spec[col_name] = generator(csv_row)
            db_rows.extend(get_db_rows(db_row_spec))

        async with db.acquire_conn() as conn:
            await import_rows_batch(conn, table, db_rows)
示例#18
0
async def _load_nodes(transit_system: TransitSystem) -> List[Node]:
    nodes = []
    async with db.acquire_conn() as conn:
        query = """
            select
                id,
                edge_ids,
                ST_AsGeoJSON(loc)::json as geojson,
                stop_ids
            from map_nodes
            where system=%s
        """
        async for row in conn.execute(query, transit_system.value):
            nodes.append(
                Node(
                    id=row.id,
                    edge_ids=row.edge_ids,
                    geojson=row.geojson,
                    stop_ids=row.stop_ids,
                )
            )
    return nodes
示例#19
0
async def import_lines(filename: str):
    with open(filename) as file:
        geodata = json.load(file)

    assert geodata["type"] == "FeatureCollection"
    logging.info("%s has %d lines", filename, len(geodata["features"]))

    async with db.acquire_conn() as conn:
        res = await conn.execute(db.get_table("nyc_subway_lines").delete())
        logging.info("Deleted %d lines", res.rowcount)

    insert_stmt = """
        insert into nyc_subway_lines (objectid, lines, shape_len, path)
        values (
            %(objectid)s,
            %(lines)s,
            %(shape_len)s,
            ST_GeomFromGeoJSON(%(path)s)
        )
    """
    values = []
    for feature in geodata["features"]:
        assert feature["type"] == "Feature"
        assert feature["geometry"]["type"] == "LineString"
        props = feature["properties"]
        values.append({
            "objectid": props["objectid"],
            "lines": props["name"].split("-"),
            "shape_len": float(props["shape_len"]),
            "path": json.dumps(feature["geometry"]),
        })

    async def insert(value):
        async with db.acquire_conn() as conn:
            await conn.execute(insert_stmt, value)

    await asyncio.gather(*[insert(value) for value in values])
    logging.info("Inserted %d lines", len(values))
示例#20
0
    async def process_trip_update(self, update: gtfs.TripUpdate,
                                  message: gtfs.FeedMessage):
        trip = await self.parser.get_trip_row_from_descriptor(update.trip)
        if trip is None:
            if not message.is_trip_replaced(update.trip.route_id):
                logging.warning(
                    "TripUpdate trip  not scheduled or replaced: (%s, %s, %s, %s) at %s",
                    update.trip.trip_id,
                    update.trip.train_id,
                    update.trip.route_id,
                    update.trip.start_date,
                    message.timestamp,
                )

        table = db.get_table("realtime_stop_times2")
        raw_table = db.get_table("realtime_raw_stop_times")
        # All updates in this function will share the same update_time
        now = datetime.now(timezone.utc)
        route_id = update.trip.route_id
        start_date = update.trip.start_date
        trip_id = trip["trip_id"] if trip else update.trip.trip_id
        # train_id is a primary key and cannot be None
        train_id = update.trip.train_id or ""

        # Get last few entries for this trip
        async with db.acquire_conn() as conn:
            res = await conn.execute(raw_table.select(
            ).where(raw_table.c.system == self.system.value).where(
                raw_table.c.route_id == route_id).where(
                    raw_table.c.start_date == start_date).where(
                        raw_table.c.trip_id == trip_id).where(
                            raw_table.c.train_id == train_id).where(
                                raw_table.c.time < message.timestamp).order_by(
                                    raw_table.c.time.desc()).limit(5))
            rows = await res.fetchall()
        previous_stop_times = [[
            self.deserialize_stop_time(obj) for obj in row.stop_times
        ] for row in rows]

        # Determine most likely stop order

        # Remove any stops that are out of order
        stop_time_updates = update.stop_time_updates.copy()

        # Update stop information in into realtime_stop_times2
        if len(stop_time_updates) > 0:
            values = []
            for stu in stop_time_updates:
                values.append({
                    "system":
                    self.system.value,
                    "route_id":
                    route_id,
                    "start_date":
                    start_date,
                    "trip_id":
                    trip_id,
                    "train_id":
                    train_id,
                    "stop_id":
                    stu.stop_id,
                    "arrival":
                    stu.arrival,
                    "departure":
                    stu.departure,
                    "departure_or_arrival":
                    stu.departure
                    if stu.departure is not None else stu.arrival,
                    "time":
                    update.timestamp or message.timestamp,
                })
            stmt = insert(table).values(values)
            stmt = stmt.on_conflict_do_update(
                index_elements=[
                    table.c.system,
                    table.c.route_id,
                    table.c.start_date,
                    table.c.trip_id,
                    table.c.train_id,
                    table.c.stop_id,
                ],
                set_={
                    "arrival": stmt.excluded.arrival,
                    "departure": stmt.excluded.departure,
                    "departure_or_arrival": stmt.excluded.departure_or_arrival,
                    "time": stmt.excluded.time,
                },
                where=(table.c.time <= stmt.excluded.time),
            )
            async with db.acquire_conn() as conn:
                await conn.execute(stmt)

        # Deleted any outdated stop info from the last entry
        # 1. If there are no stops in the current stop list, the train
        #    has completed it's route and we do not remove any stops.
        # 2. If the train arrived before the current time, assume it
        #    arrived.
        # 3. Otherwise, see if it would arrive before the earliest train in the
        #    current stop list.  If it does, assume it is omitted from the
        #    current stop list because it already arrived early.
        if len(previous_stop_times) > 0 and len(stop_time_updates) > 0:
            current_stop_ids = [stu.stop_id for stu in stop_time_updates]
            earliest_stop_time = min([
                stu.arrival if stu.arrival is not None else stu.departure
                for stu in stop_time_updates
            ])
            outdated_stop_ids = []
            for stu in previous_stop_times[0]:
                time = stu.arrival if stu.arrival is not None else stu.departure
                if (stu.stop_id not in current_stop_ids
                        and time > message.timestamp
                        and time >= earliest_stop_time):
                    outdated_stop_ids.append(stu.stop_id)
            if len(outdated_stop_ids) > 0:
                logging.info(
                    "Removing stops [%s] from trip (%s, %s, %s, %s) based on "
                    "updated information at %s",
                    ", ".join(outdated_stop_ids),
                    trip_id,
                    train_id,
                    route_id,
                    start_date,
                    message.timestamp,
                )
                async with db.acquire_conn() as conn:
                    await conn.execute(table.delete().where(
                        table.c.system == self.system.value
                    ).where(table.c.route_id == route_id).where(
                        table.c.start_date == start_date).where(
                            table.c.trip_id == trip_id).where(
                                table.c.train_id == train_id).where(
                                    table.c.stop_id.in_(outdated_stop_ids)))

        # Insert stops data into realtime_raw_stop_times
        values = {
            "system":
            self.system.value,
            "route_id":
            route_id,
            "start_date":
            start_date,
            "trip_id":
            trip_id,
            "train_id":
            train_id,
            "time":
            message.timestamp,
            "stop_times":
            [self.serialize_stop_time(stu) for stu in stop_time_updates],
            "update_time":
            now,
        }
        stmt = insert(raw_table).values(values)
        stmt = stmt.on_conflict_do_update(
            index_elements=[
                raw_table.c.system,
                raw_table.c.route_id,
                raw_table.c.start_date,
                raw_table.c.trip_id,
                raw_table.c.train_id,
                raw_table.c.time,
            ],
            set_={
                "stop_times": stmt.excluded.stop_times,
                "update_time": stmt.excluded.update_time,
            },
        )
        async with db.acquire_conn() as conn:
            await conn.execute(stmt)
示例#21
0
 async def insert(value):
     async with db.acquire_conn() as conn:
         await conn.execute(insert_stmt, value)
示例#22
0
async def import_stations(filename: str):
    with open(filename) as file:
        geodata = json.load(file)

    assert geodata["type"] == "FeatureCollection"
    logging.info("%s has %d stations", filename, len(geodata["features"]))

    async with db.acquire_conn() as conn:
        res = await conn.execute(db.get_table("nyc_subway_stations").delete())
        logging.info("Deleted %d stations", res.rowcount)

    insert_stmt = """
        insert into nyc_subway_stations (objectid, name, notes, lines, loc)
        values (
            %(objectid)s,
            %(name)s,
            %(notes)s,
            %(lines)s,
            ST_GeomFromGeoJSON(%(loc)s)
        )
    """
    values = []
    for feature in geodata["features"]:
        assert feature["type"] == "Feature"
        assert feature["geometry"]["type"] == "Point"
        props = feature["properties"]
        values.append({
            "objectid": props["objectid"],
            "name": props["name"],
            "notes": props["notes"],
            "lines": props["line"].split("-"),
            "loc": json.dumps(feature["geometry"]),
        })

    async def insert(value):
        async with db.acquire_conn() as conn:
            await conn.execute(insert_stmt, value)

    await asyncio.gather(*[insert(value) for value in values])
    logging.info("Inserted %d stations", len(values))

    stmt = """
        select
            distinct on (stop_id)
            name,
            objectid,
            array_to_string(lines, '-') as line,
            loc,
            stop_name,
            stop_id,
            stop_loc,
            ST_Distance(loc::geography, stop_loc::geography) as distance
        from stops s
        cross join nyc_subway_stations nss
        where parent_station is null
        order by
            stop_id,
            distance asc
    """
    async with db.acquire_conn() as conn:
        res = await conn.execute(stmt)
        rows = await res.fetchall()

    failed = False
    o_to_s = {}
    s_to_o = {}
    for row in rows:
        # Staten Island is very far away and has no subway data, so let's ignore
        # everything more than 1km away from the nearest GeoJSON station
        if row.distance >= 1000:
            continue
        # Some stops have no station
        if row.stop_id in NO_STATION:
            continue
        if row.stop_id in STATION_EQUIVALENTS:
            (name, line) = STATION_EQUIVALENTS[row.stop_id]
            features = [
                f for f in geodata["features"]
                if f["properties"]["name"] == name
                and f["properties"]["line"] == line
            ]
            if len(features) != 1:
                raise Exception(
                    "{}, {} does not uniquely describe station: [{}]".format(
                        name,
                        line,
                        ", ".join(
                            [f["properties"]["objectid"] for f in features]),
                    ))
            o = features[0]["properties"]["objectid"]
            s = row.stop_id
            if o not in o_to_s:
                o_to_s[o] = []
            o_to_s[o].append(s)
            if s not in s_to_o:
                s_to_o[s] = []
            s_to_o[s].append(o)
            continue
        if normalize_name(row.name) == normalize_name(row.stop_name):
            o = row.objectid
            s = row.stop_id
            s = row.stop_id
            if o not in o_to_s:
                o_to_s[o] = []
            o_to_s[o].append(s)
            if s not in s_to_o:
                s_to_o[s] = []
            s_to_o[s].append(o)
            continue
        print("Name mismatch: {} ({}), {}, {}, {} ({}), {}".format(
            row.stop_name,
            row.stop_id,
            normalize_name(row.stop_name),
            normalize_name(row.name),
            row.name,
            row.line,
            row.distance,
        ))
        failed = True
    if failed:
        raise Exception("Cannot write data, resolve mismatches first")

    # Duplicates expected here becuase stops.txt has finer granularity
    # on some stations where platforms are stacked (e.g. a station is a single
    # level of platforms, instead of both of them)
    expected_duplicate_objectids = {
        # Queensboro Plaza
        "103": ["718", "R09"],
        # 145 St
        "295": ["A12", "D13"],
        # Coney Island - Stillwell Ave
        "469": ["D43", "N12"],
    }
    overloaded_objects = {
        k: v
        for (k, v) in o_to_s.items()
        if len(set(v)) > 1 and (k,
                                v) not in expected_duplicate_objectids.items()
    }
    if len(overloaded_objects) > 0:
        logging.info(
            "Multiple stop_ids for objectid: %s",
            overloaded_objects,
        )
        raise Exception("Cannot write data, resolve overloaded stop_ids")
    overloaded_stops = {k: v for (k, v) in s_to_o.items() if len(set(v)) > 1}
    if len(overloaded_stops) > 0:
        logging.info("Multiple objectids for stop_id: %s", overloaded_stops)
        raise Exception("Cannot write data, resolve overloaded objectids")

    async with db.acquire_conn() as conn:
        res = await conn.execute(db.get_table("stops").select())
        all_stops = await res.fetchall()
        res = await conn.execute(db.get_table("nyc_subway_stations").select())
        all_imported_stations = await res.fetchall()
    # all_stops_by_id = {s.stop_id: s for s in all_stops}
    all_imported_stations_by_id = {
        s.objectid: s
        for s in all_imported_stations
    }

    stmt = db.get_table("map_stops").insert()
    values = []
    for stop in all_stops:
        station_id = stop.parent_station or stop.stop_id
        # station = all_stops_by_id[station_id]
        if station_id in s_to_o:
            # We just imported this station and will use it's data in the map
            objectid = c.only(s_to_o[station_id])
            loc = all_imported_stations_by_id[objectid].loc
        elif station_id in NO_STATION:
            # This station is not in the map
            logging.info("Skipping stop %s (%s) in map", stop.stop_id,
                         stop.stop_name)
            continue
        else:
            # We did not import any data for this station, use the existing
            # location data (e.g. for the Staten Island Railway)
            # loc = stop.stop_loc
            continue
        values.append({
            "system": gtfs.TransitSystem.NYC_MTA.value,
            "stop_id": stop.stop_id,
            "loc": loc,
        })
    async with db.acquire_conn() as conn:
        deleted = await conn.execute(db.get_table("map_stops").delete())
        logging.info("Deleted %d stops from map_stops", deleted.rowcount)
        await conn.execute(db.get_table("map_stops").insert().values(values))
        logging.info("Wrote %d stops to map_stops", len(values))
示例#23
0
    async def _write_trip_update(self, update: gtfs.TripUpdate,
                                 message: gtfs.FeedMessage):
        trip = await self.parser.get_trip_row_from_descriptor(update.trip)
        if trip is None:
            if not message.is_trip_replaced(update.trip.route_id):
                logging.warning(
                    "TripUpdate trip  not scheduled or replaced: (%s, %s, %s) at %s",
                    update.trip.trip_id,
                    update.trip.route_id,
                    update.trip.start_date,
                    message.timestamp,
                )

        def get_insert_values(stop_time_update) -> Optional[Tuple[str, Dict]]:
            values = {
                "system": self.system.value,
                "route_id": update.trip.route_id,
                "stop_id": stop_time_update.stop_id,
                "start_date": update.trip.start_date,
                "trip_id":
                trip["trip_id"] if trip is not None else update.trip.trip_id,
                "timestamp": update.timestamp,
                "arrival": stop_time_update.arrival,
                "departure": stop_time_update.departure,
                "update_time": message.timestamp,
            }
            key = "||".join([
                values["system"],
                values["route_id"],
                values["stop_id"],
                str(values["start_date"]),
            ])
            return (key, values)

        insert_key_values = [
            get_insert_values(update) for update in update.stop_time_updates
        ]
        if len(insert_key_values) == 0:
            return

        # Sometimes we get data that updates the same trip twice for the same
        # stop.  We can't update them both in the same DB update because that
        # can conflict.  Resolve by picking one semi-arbitrarily.
        insert_values = list(dict(insert_key_values).values())

        table = db.get_table("realtime_stop_times")
        stmt = insert(table).values(insert_values)
        stmt = stmt.on_conflict_do_update(
            index_elements=[
                table.c.system,
                table.c.route_id,
                table.c.stop_id,
                table.c.start_date,
                table.c.trip_id,
            ],
            set_={
                "timestamp": stmt.excluded.timestamp,
                "arrival": stmt.excluded.arrival,
                "departure": stmt.excluded.departure,
                "update_time": stmt.excluded.update_time,
            },
            where=(table.c.update_time <= stmt.excluded.update_time),
        )
        async with db.acquire_conn() as conn:
            await conn.execute(stmt)
示例#24
0
    async def _write_vehicle_position(self, position: gtfs.VehiclePosition,
                                      message: gtfs.FeedMessage):
        trip = await self.parser.get_trip_row_from_descriptor(position.trip)
        if trip is None:
            if not message.is_trip_replaced(position.trip.route_id):
                logging.warning(
                    "VehiclePosition trip  not scheduled or replaced: "
                    "(%s, %s, %s) at %s",
                    position.trip.trip_id,
                    position.trip.route_id,
                    position.trip.start_date,
                    message.timestamp,
                )

        if position.stop_id is not None:
            stop_id = position.stop_id
        else:
            # Guess from current_stop_sequence and stop_times
            # For some reason this is not always consistent with stop_id,
            # which is why we use stop_id if it exists (above)
            assert position.current_stop_sequence is not None
            if trip is None:
                logging.debug(
                    "Cannot write VehiclePosition: no stop_id or trip_id for "
                    "(%s, %s, %s) at %s",
                    position.trip.trip_id,
                    position.trip.route_id,
                    position.trip.start_date,
                    message.timestamp,
                )
                return

            current_stop_sequence = position.current_stop_sequence
            # Give up on current_stop_sequence because it is weird.  It starts
            # at 0 which is invalid.  It also goes over the number of stops in
            # the table.  For the L train, there are no entries with
            # current_stop_sequence larger than 22, even though there are trips
            # with 24 stops (e.g. BFA19SUPP-L047-Weekday-99_048500_L..S01R).
            # It seems like others have this problem as well:
            # https://groups.google.com/forum/#!topic/mtadeveloperresources/x8-f1biU-l0
            if self.system == gtfs.TransitSystem.NYC_MTA:
                return

            stop_id = await self.parser.get_stop_id_from_stop_seq(
                trip["trip_id"], current_stop_sequence)

        # For some reason, stop_id does not always exist (wtf MTA?).
        # In that case, don't do any writes because that will fail on the
        # foreign key constraint.
        stop_exists = await self.parser.get_stop_exists(stop_id)
        if not stop_exists:
            logging.debug(
                "Encountered nonexistent stop %s in vehicle position "
                "(%s, %s, %s) at %s",
                stop_id,
                position.trip.trip_id,
                position.trip.route_id,
                position.trip.start_date,
                message.timestamp,
            )
            return

        table = db.get_table("realtime_vehicle_positions")
        values = {
            "system": self.system.value,
            "route_id": position.trip.route_id,
            "stop_id": stop_id,
            "start_date": position.trip.start_date,
            "trip_id":
            trip["trip_id"] if trip is not None else position.trip.trip_id,
            "timestamp": position.timestamp or message.timestamp,
            "status": position.current_status,
            "update_time": message.timestamp,
        }
        stmt = insert(table).values(values)
        stmt = stmt.on_conflict_do_update(
            index_elements=[
                table.c.system,
                table.c.route_id,
                table.c.stop_id,
                table.c.start_date,
                table.c.trip_id,
                table.c.timestamp,
            ],
            set_={
                "status": stmt.excluded.status,
                "update_time": stmt.excluded.update_time,
            },
            where=(table.c.update_time <= stmt.excluded.update_time),
        )
        async with db.acquire_conn() as conn:
            await conn.execute(stmt)