def get_realtime(agency, mode): URL = transit_agencies.get(agency, mode) if URL == None: return None response = urllib.urlopen(URL) feed = gtfs_realtime_pb2.FeedMessage() feed.ParseFromString(response.read()) return feed
def get_static(agency): feed = {} pathname = "./agencies/" + agency + "/" #IF LOCAL FILES EXISTS if path.exists(pathname): for f in os.listdir(pathname): if f[-4:] == ".txt" or f[-4:] == ".csv": #if its a txt file with open(pathname + "/" + str(f)) as csvfile: print "f[:-4] " + f[:-4] feed[f[:-4]] = csv2df(csvfile) return feed #PULL NEW INFORMATION request = requests.get(transit_agencies.get(agency, "static"), stream=True) if request.status_code != 200: print "Error! Did not reach" if not path.exists(pathname): os.makedirs(pathname) #Unzip GTFS static buf = request.raw.read() zipdata = StringIO() zipdata.write(buf) with open(pathname + "gtfs.zip", "w") as zipout: zipout.write(buf) z = zipfile.ZipFile(zipdata) z.extractall(pathname) # format static feed for f in z.namelist(): with z.open(f) as csvfile: feed[f[:-4]] = csv2df(csvfile).rename(columns=lambda s: str(s.decode('ascii', 'ignore'))) for f in REQUIRED_GTFS_FILES: if f not in feed: print "Incomplete GTFS dataset" return None z.close() return feed
def interpret(in_tabletype, in_agency, in_route, static_feed, trip_update_feed, alert_feed, vehicle_position_feed): tables = {} trip2pattern = {} trip2vehicle = {} agency_id = transit_agencies.get(in_agency, "id") #for bart route_id = in_route if in_tabletype == "static" : #Agencies tablefunctions.agencies(tables, static_feed, arg_agency_id=agency_id) #Routes tablefunctions.routes(tables, static_feed, arg_agency_id=agency_id) #Stops tablefunctions.stops(tables, static_feed, arg_agency_id=agency_id) #Route Stop Seq tablefunctions.route_stop_seq(tables, static_feed, arg_agency_id=agency_id, arg_route_id=route_id, trip2pattern=trip2pattern) #Run Pattern tablefunctions.runPattern(tables=tables, static_feed=static_feed, arg_agency_id=agency_id) #Schedules: gets some data from RunPattern table tablefunctions.schedules(tables, static_feed, arg_agency_id=agency_id, trip2pattern=trip2pattern) #Table Points tablefunctions.points(tables, static_feed, arg_agency_id=agency_id, trip2pattern=trip2pattern) tablefunctions.big_points(tables, static_feed, arg_agency_id=agency_id, trip2pattern=trip2pattern) #Table Route_point_Seq tablefunctions.route_point_seq(tables, static_feed, arg_agency_id=agency_id, trip2pattern=trip2pattern) elif in_tabletype == "transfer" : ### ----- Task 2 ---------- #Table Transfers tablefunctions.transfers(tables, static_feed, arg_agency_id=agency_id) elif in_tabletype == "realtime" : ### ---- Task 3 ----------------- #Table GPS FIXES tablefunctions.gps_fixes(tables, static_feed, trip_update_feed, alert_feed, vehicle_position_feed, arg_agency_id=agency_id, trip2pattern=trip2pattern) #Table Transit ETA tablefunctions.transit_eta(tables, static_feed, trip_update_feed, alert_feed, vehicle_position_feed, arg_agency_id=agency_id, trip2pattern=trip2pattern)
def get_static(agency): feed = {} pathname = "./agencies/" + agency + "/" #IF LOCAL FILES EXISTS if path.exists(pathname): for f in os.listdir(pathname): if f[-4:] == ".txt" or f[-4:] == ".csv": #if its a txt file with open(pathname + "/" + str(f)) as csvfile: print "f[:-4] " + f[:-4] feed[f[:-4]] = csv2df(csvfile) return feed #PULL NEW INFORMATION request = requests.get(transit_agencies.get(agency, "static"), stream=True) if request.status_code != 200: print "Error! Did not reach" if not path.exists(pathname): os.makedirs(pathname) #Unzip GTFS static buf = request.raw.read() zipdata = StringIO() zipdata.write(buf) with open(pathname + "gtfs.zip", "w") as zipout: zipout.write(buf) z = zipfile.ZipFile(zipdata) z.extractall(pathname) # format static feed for f in z.namelist(): with z.open(f) as csvfile: feed[f[:-4]] = csv2df(csvfile).rename( columns=lambda s: str(s.decode('ascii', 'ignore'))) for f in REQUIRED_GTFS_FILES: if f not in feed: print "Incomplete GTFS dataset" return None z.close() return feed
def get_static(agency, refresh): pathname = "./agencies/" + agency + "/" feed = {} get_new_feed = False if path.exists(pathname + "raw_csv/") and not refresh: with open(pathname + "gtfs.zip", "r") as zipout: checksum = hasher(zipout, hashlib.md5()) # did not work # read csv files for f in os.listdir(pathname + "raw_csv/"): if f[-4:] == ".csv": with open(pathname + "raw_csv/" + f, 'rb') as csvfile: feed[f[:-4]] = df_helper.csv2df(csvfile) # if required GTFS data is missing, request new data anyway for f in REQUIRED_GTFS_FILES: if f not in feed: get_new_feed = True # if feed_end_date has passed, request new data anyway if not get_new_feed: feed_end_date = feed["feed_info"].feed_end_date[ 0] if "feed_info" in feed else feed["calendar"].end_date[0] feed_end_date = datetime.strptime(str(feed_end_date), '%Y%m%d') current_date = datetime.now() logging.info("feed_end_date = %s", feed_end_date.strftime("%Y%m%d")) logging.info("current_date = %s", current_date.strftime("%Y%m%d")) get_new_feed = feed_end_date < current_date if not get_new_feed: logging.debug("Read from local") return feed, checksum # request GTFS-Static request = requests.get(transit_agencies.get(agency, "static"), stream=True) # if unsuccessful if request.status_code != 200: return None if not path.exists(pathname + "raw/"): os.makedirs(pathname + "raw/") # unzip GTFS static buf = request.raw.read() zipdata = StringIO() zipdata.write(buf) with open(pathname + "gtfs.zip", "w") as zipout: zipout.write(buf) with open(pathname + "gtfs.zip", "r") as zipout: checksum = hasher(zipout, hashlib.md5()) z = zipfile.ZipFile(zipdata) z.extractall(pathname + "raw/") # format static feed for f in z.namelist(): with z.open(f) as csvfile: feed[f[:-4]] = df_helper.csv2df(csvfile).rename( columns=lambda s: str(s.decode('ascii', 'ignore'))) for f in REQUIRED_GTFS_FILES: if f not in feed: logging.error("Incomplete GTFS dataset") return None z.close() # write csv files if not path.exists(pathname + "raw_csv/"): os.makedirs(pathname + "raw_csv/") for fn, df in feed.iteritems(): df.to_csv(pathname + "raw_csv/" + fn + ".csv", sep=',', index=False) logging.debug("Read from online") return feed, checksum
tables[table].to_csv(pathname + table + '.csv', sep=',', index=False) else: df_helper.df2sql(tables[table], table, login=login, exist_flag=('replace' if refresh else 'append')) # check if newer timestamp # process entity tables = {} trip2pattern = {} trip2vehicle = {} agency_id = transit_agencies.get(agency, "id") # Static Feed # ---- Task 1 ---- # Agency # int agency_id -> 'agency_id' int(10) unsigned # required string agency_name -> 'agency_name' varchar(255) # required string agency_url -> 'agency_url' varchar(255) # required string agency_timezone -> 'agency_timezone' smallint(6) # optional string agency_lang -> 'agency_lang' varchar(255) # optional string agency_phone -> 'agency_phone' varchar(255) # required string agency_timezone -> 'timezone_name' varchar(45) # PRIMARY KEY ('agency_id') # KEY ('agency_timezone') if can_read_table('Agency') and not refresh:
logging.debug('Read from database') return df_helper.sql2df(table, login) def write_table(table): if local: tables[table].to_csv(pathname + table + '.csv', sep = ',', index = False) else: df_helper.df2sql(tables[table], table, login=login, exist_flag=('replace' if refresh else 'append')) # check if newer timestamp # process entity tables = {} trip2pattern = {} trip2vehicle = {} agency_id = transit_agencies.get(agency, "id") # Static Feed # ---- Task 1 ---- # Agency # int agency_id -> 'agency_id' int(10) unsigned # required string agency_name -> 'agency_name' varchar(255) # required string agency_url -> 'agency_url' varchar(255) # required string agency_timezone -> 'agency_timezone' smallint(6) # optional string agency_lang -> 'agency_lang' varchar(255) # optional string agency_phone -> 'agency_phone' varchar(255) # required string agency_timezone -> 'timezone_name' varchar(45) # PRIMARY KEY ('agency_id') # KEY ('agency_timezone') if can_read_table('Agency') and not refresh:
def get_static(agency, refresh): pathname = "./agencies/" + agency + "/" feed = {} get_new_feed = False if path.exists(pathname + "raw_csv/") and not refresh: with open(pathname + "gtfs.zip", "r") as zipout: checksum = hasher(zipout, hashlib.md5()) # did not work # read csv files for f in os.listdir(pathname + "raw_csv/"): if f[-4:] == ".csv": with open(pathname + "raw_csv/" + f, 'rb') as csvfile: feed[f[:-4]] = df_helper.csv2df(csvfile) # if required GTFS data is missing, request new data anyway for f in REQUIRED_GTFS_FILES: if f not in feed: get_new_feed = True # if feed_end_date has passed, request new data anyway if not get_new_feed: feed_end_date = feed["feed_info"].feed_end_date[0] if "feed_info" in feed else feed["calendar"].end_date[0] feed_end_date = datetime.strptime(str(feed_end_date),'%Y%m%d') current_date = datetime.now() logging.info("feed_end_date = %s", feed_end_date.strftime("%Y%m%d")) logging.info("current_date = %s", current_date.strftime("%Y%m%d")) get_new_feed = feed_end_date < current_date if not get_new_feed: logging.debug("Read from local") return feed, checksum # request GTFS-Static request = requests.get(transit_agencies.get(agency, "static"), stream = True) # if unsuccessful if request.status_code != 200: return None if not path.exists(pathname + "raw/"): os.makedirs(pathname + "raw/") # unzip GTFS static buf = request.raw.read() zipdata = StringIO() zipdata.write(buf) with open(pathname + "gtfs.zip", "w") as zipout: zipout.write(buf) with open(pathname + "gtfs.zip", "r") as zipout: checksum = hasher(zipout, hashlib.md5()) z = zipfile.ZipFile(zipdata) z.extractall(pathname + "raw/") # format static feed for f in z.namelist(): with z.open(f) as csvfile: feed[f[:-4]] = df_helper.csv2df(csvfile).rename(columns = lambda s: str(s.decode('ascii', 'ignore'))) for f in REQUIRED_GTFS_FILES: if f not in feed: logging.error("Incomplete GTFS dataset") return None z.close() # write csv files if not path.exists(pathname + "raw_csv/"): os.makedirs(pathname + "raw_csv/") for fn, df in feed.iteritems(): df.to_csv(pathname + "raw_csv/" + fn + ".csv", sep = ',', index = False) logging.debug("Read from online") return feed, checksum