def set_routes(self): """Gen the table ROUTE.""" df = self.dict_gtfs["routes.txt"] df = change_nan_value(df, None) df["sha"] = (df["route_long_name"].map(str) + df["route_short_name"].map(str)) df = df.sort_values(by="sha") df["start_date"] = self.start_date df["end_date"] = None df["gtfs_id"] = self.gtfs_id def route_direction(serie): """Change the name of 2 routes with the same name, in opposite direction. """ lst_index = list(serie.loc[serie.shift(-1) == serie].index) msg = "We have {} routes with the same name for 2 directions" logger.log(logging.WARNING, msg.format(len(lst_index))) for idx in lst_index: serie.loc[idx] += "_other_dire" # route_direction(df["sha"]) return self.format_df(df, dict_normalize["Route"].requiered)
def set_shapes(self): """Gen the table SHAPES.""" df = self.dict_gtfs["shapes.txt"] df = change_nan_value(df, None) df["gtfs_id"] = self.gtfs_id return self.format_df(df, dict_normalize["Shape"].requiered)
def set_calendar_dates(self): """Gen the table CALENDAR_DATES.""" df = self.dict_gtfs["calendar_dates.txt"] df["date"] = df["date"].map(dt.value2date) df["gtfs_id"] = self.gtfs_id df = change_nan_value(df, None) return self.format_df(df, dict_normalize["CalendarDate"].requiered)
def merge_shapes(self, vshapes): """Change the shape_id to new sha1.""" shapes = self.dict_df_mapped["Shape"].copy() shapes = pd.merge(shapes, vshapes, on="Id") shapes["Id"] = shapes["ShapeScheduleId"] shapes = pt.change_nan_value(shapes, None) return shapes
def merge_routes(self, vroutes): """Change the route_id to new sha1.""" routes = self.dict_df_mapped["Route"].copy() routes = pd.merge(routes, vroutes, on="Id") routes["Id"] = routes["RouteScheduleId"] routes["AgencyId"] = pt.change_nan_value(routes["AgencyId"], self.agency_id) return routes.drop_duplicates(subset="Id")
def set_trips(self): """Gen the table TRIPS.""" df = self.dict_gtfs["trips.txt"] df_calendar = self.dict_gtfs["calendar.txt"] df = change_nan_value(df, None) df = self.format_df(df, dict_normalize["Trip"].requiered) sub_cols = ["service_id", "start_date", "end_date"] df = pd.merge(df, df_calendar[sub_cols], on="service_id") df["gtfs_id"] = self.gtfs_id return df
def set_stoptimes(self, trips): """Gen the table STOPTIMES.""" df = self.dict_gtfs["stop_times.txt"] def prepare_stop_times(df): """Gen the arrival and departure time.""" try: df["arrival_time"] = df["arrival_time"].map(self.t2s) df["departure_time"] = df["departure_time"].map(self.t2s) msg = "We got all the stoptimes" logger.log(logging.INFO, msg) except: msg = "Some stoptimes are generating..." logger.log(logging.WARNING, msg) df = EstimateStopTimes.main(df) return df def fill_shape_dist_trav(df, stops): """Gen shape_dist_traveled if doesn't exist.""" if df["shape_dist_traveled"].isnull().values.any(): return EstimateShapeDistTrav(df, stops).main() else: return df def add_service_id(df, trips): return pd.merge(df, trips[["trip_id", "service_id"]], on="trip_id") stops = self.dict_gtfs["stops.txt"] df = self.format_df(df, dict_normalize["StopTime"].requiered) df = fill_shape_dist_trav(df, stops) df = prepare_stop_times(df) df = add_service_id(df, trips) df["gtfs_id"] = self.gtfs_id df = change_nan_value(df, None) def gen_unallow_null(series): """.""" if series is None: return 1 else: return series df["pickup_type"] = df["pickup_type"].map(gen_unallow_null) df["drop_off_type"] = df["drop_off_type"].map(gen_unallow_null) return df
def set_stops(self): """Gen the table STOPS.""" df = self.dict_gtfs["stops.txt"] df = change_nan_value(df, None) df["start_date"] = self.start_date df["end_date"] = None df["gtfs_id"] = self.gtfs_id def gen_unallow_null(series): """Gen 1 if no type in the stop.""" if series is None: return 0 else: return series df = self.format_df(df, dict_normalize["Stop"].requiered) df["location_type"] = df["location_type"].map(gen_unallow_null) return df
def merge_stops(self, vstops): """Change stop_id to new sha1.""" stops = self.dict_df_mapped["Stop"].copy() stops = pd.merge(stops, vstops, on="Id") def merge_parent_station(stops, vstops): """Change the parent stop id to sha1.""" vstops = vstops.copy() vstops = vstops.rename(columns={ "Id": "ParentStopId", "StopScheduleId": "ParentScheduleId" }) stops = pd.merge(stops, vstops, on="ParentStopId", how="left") return stops stops = merge_parent_station(stops, vstops) stops["Id"] = stops["StopScheduleId"] stops["ParentStopId"] = stops["ParentScheduleId"] stops = pt.change_nan_value(stops, None) return stops.drop_duplicates(subset="Id")
def set_agency(self): """Gen the table AGENCY.""" df = self.dict_gtfs["agency.txt"] df = change_nan_value(df, None) return self.format_df(df, dict_normalize["Agency"].requiered)