示例#1
0
    def set_routes(self):
        """Gen the table ROUTE."""
        df = self.dict_gtfs["routes.txt"]
        df = change_nan_value(df, None)

        df["sha"] = (df["route_long_name"].map(str) +
                     df["route_short_name"].map(str))
        df = df.sort_values(by="sha")
        df["start_date"] = self.start_date
        df["end_date"] = None
        df["gtfs_id"] = self.gtfs_id

        def route_direction(serie):
            """Change the name of 2 routes with the same name,
            in opposite direction.
            """
            lst_index = list(serie.loc[serie.shift(-1) == serie].index)
            msg = "We have {} routes with the same name for 2 directions"
            logger.log(logging.WARNING, msg.format(len(lst_index)))
            for idx in lst_index:
                serie.loc[idx] += "_other_dire"

        # route_direction(df["sha"])

        return self.format_df(df, dict_normalize["Route"].requiered)
示例#2
0
    def set_shapes(self):
        """Gen the table SHAPES."""
        df = self.dict_gtfs["shapes.txt"]
        df = change_nan_value(df, None)
        df["gtfs_id"] = self.gtfs_id

        return self.format_df(df, dict_normalize["Shape"].requiered)
示例#3
0
    def set_calendar_dates(self):
        """Gen the table CALENDAR_DATES."""
        df = self.dict_gtfs["calendar_dates.txt"]
        df["date"] = df["date"].map(dt.value2date)
        df["gtfs_id"] = self.gtfs_id
        df = change_nan_value(df, None)

        return self.format_df(df, dict_normalize["CalendarDate"].requiered)
示例#4
0
    def merge_shapes(self, vshapes):
        """Change the shape_id to new sha1."""
        shapes = self.dict_df_mapped["Shape"].copy()
        shapes = pd.merge(shapes, vshapes, on="Id")
        shapes["Id"] = shapes["ShapeScheduleId"]
        shapes = pt.change_nan_value(shapes, None)

        return shapes
示例#5
0
    def merge_routes(self, vroutes):
        """Change the route_id to new sha1."""
        routes = self.dict_df_mapped["Route"].copy()
        routes = pd.merge(routes, vroutes, on="Id")
        routes["Id"] = routes["RouteScheduleId"]
        routes["AgencyId"] = pt.change_nan_value(routes["AgencyId"],
                                                 self.agency_id)

        return routes.drop_duplicates(subset="Id")
示例#6
0
    def set_trips(self):
        """Gen the table TRIPS."""
        df = self.dict_gtfs["trips.txt"]
        df_calendar = self.dict_gtfs["calendar.txt"]
        df = change_nan_value(df, None)
        df = self.format_df(df, dict_normalize["Trip"].requiered)
        sub_cols = ["service_id", "start_date", "end_date"]
        df = pd.merge(df, df_calendar[sub_cols], on="service_id")
        df["gtfs_id"] = self.gtfs_id

        return df
示例#7
0
    def set_stoptimes(self, trips):
        """Gen the table STOPTIMES."""
        df = self.dict_gtfs["stop_times.txt"]

        def prepare_stop_times(df):
            """Gen the arrival and departure time."""
            try:
                df["arrival_time"] = df["arrival_time"].map(self.t2s)
                df["departure_time"] = df["departure_time"].map(self.t2s)
                msg = "We got all the stoptimes"
                logger.log(logging.INFO, msg)
            except:
                msg = "Some stoptimes are generating..."
                logger.log(logging.WARNING, msg)
                df = EstimateStopTimes.main(df)
            return df

        def fill_shape_dist_trav(df, stops):
            """Gen shape_dist_traveled if doesn't exist."""
            if df["shape_dist_traveled"].isnull().values.any():
                return EstimateShapeDistTrav(df, stops).main()
            else:
                return df

        def add_service_id(df, trips):
            return pd.merge(df, trips[["trip_id", "service_id"]], on="trip_id")

        stops = self.dict_gtfs["stops.txt"]
        df = self.format_df(df, dict_normalize["StopTime"].requiered)
        df = fill_shape_dist_trav(df, stops)
        df = prepare_stop_times(df)
        df = add_service_id(df, trips)
        df["gtfs_id"] = self.gtfs_id
        df = change_nan_value(df, None)

        def gen_unallow_null(series):
            """."""
            if series is None:
                return 1
            else:
                return series

        df["pickup_type"] = df["pickup_type"].map(gen_unallow_null)
        df["drop_off_type"] = df["drop_off_type"].map(gen_unallow_null)

        return df
示例#8
0
    def set_stops(self):
        """Gen the table STOPS."""
        df = self.dict_gtfs["stops.txt"]
        df = change_nan_value(df, None)
        df["start_date"] = self.start_date
        df["end_date"] = None
        df["gtfs_id"] = self.gtfs_id

        def gen_unallow_null(series):
            """Gen 1 if no type in the stop."""
            if series is None:
                return 0
            else:
                return series

        df = self.format_df(df, dict_normalize["Stop"].requiered)
        df["location_type"] = df["location_type"].map(gen_unallow_null)

        return df
示例#9
0
    def merge_stops(self, vstops):
        """Change stop_id to new sha1."""
        stops = self.dict_df_mapped["Stop"].copy()
        stops = pd.merge(stops, vstops, on="Id")

        def merge_parent_station(stops, vstops):
            """Change the parent stop id to sha1."""
            vstops = vstops.copy()
            vstops = vstops.rename(columns={
                "Id": "ParentStopId",
                "StopScheduleId": "ParentScheduleId"
            })
            stops = pd.merge(stops, vstops, on="ParentStopId", how="left")

            return stops

        stops = merge_parent_station(stops, vstops)
        stops["Id"] = stops["StopScheduleId"]
        stops["ParentStopId"] = stops["ParentScheduleId"]
        stops = pt.change_nan_value(stops, None)

        return stops.drop_duplicates(subset="Id")
示例#10
0
    def set_agency(self):
        """Gen the table AGENCY."""
        df = self.dict_gtfs["agency.txt"]
        df = change_nan_value(df, None)

        return self.format_df(df, dict_normalize["Agency"].requiered)