def set_config(base_dir): config.base = base_dir config.temp_data = os.path.join(config.base, 'tmp_data') # gtfs config.gtfs = os.path.join(config.temp_data, 'gtfs') config.gtfs_raw_data = os.path.join(config.gtfs, 'data') config.gtfs_processed_data = os.path.join(config.gtfs, 'processed_data') mkdir_p(config.gtfs_processed_data) config.gtfs_stop_file = os.path.join(config.gtfs_processed_data, 'stop.data') config.gtfs_shape_file = os.path.join(config.gtfs_processed_data, 'shape.data') # reports config.output_data = os.path.join(config.temp_data, 'output') mkdir_p(config.output_data) # params config.max_accuracy_radius_meters = 300 config.min_accuracy_radius_meters = 200 config.route_sampling__min_distance_between_points_meters = 10.0 config.station_radius_in_meters = 300 config.early_arrival_max_seconds = 35 * 60 # how early can a train arrive before the actual arrival config.late_arrival_max_seconds = 35 * 60 # how late can a train arrive after the actual arrival config.early_departure_max_seconds = 15 * 60 # how early can a train depart before the actual departure config.late_departure_max_seconds = 35 * 60 # how late can a train depart after the actual departure config.shape_probability_threshold= 0.80 config.stop_discovery_location_timeout_seconds = 60 config.stop_discovery_probability_thresh = 0.90 config.stop_discovery_count_thresh = 3 # if the trip list is longer than the threshold, we do not # have a match to GTFS config.trip_list_length_thresh = 3 config.no_report_timegap = datetime.timedelta(minutes = 60) config.no_stop_timegap = datetime.timedelta(minutes = 60)
def download_gtfs_file(): """ download gtfs zip file from mot, and put it in DATA_DIR in its own subfolder """ local_dir = os.path.join(GTFS_DATA_DIR,ot_utils.get_utc_time_underscored()) ot_utils.mkdir_p(local_dir) local_path = os.path.join(local_dir,FILE_NAME) ot_utils.ftp_get_file(MOT_FTP,FILE_NAME,local_path) ot_utils.unzip_file(local_path,local_dir)
def set_config(base_dir): config.base = base_dir config.temp_data = os.path.join(config.base, 'tmp_data') # gtfs config.gtfs = os.path.join(config.temp_data, 'gtfs') config.gtfs_raw_data = os.path.join(config.gtfs, 'data') config.gtfs_processed_data = os.path.join(config.gtfs, 'processed_data') mkdir_p(config.gtfs_processed_data) config.gtfs_stop_file = os.path.join(config.gtfs_processed_data, 'stop.data') config.gtfs_shape_file = os.path.join(config.gtfs_processed_data, 'shape.data') # reports config.output_data = os.path.join(config.temp_data, 'output') mkdir_p(config.output_data) config.output_shelve_file = os.path.join(config.output_data, 'shelve.data') # params config.max_accuracy_radius_meters = 300 config.min_accuracy_radius_meters = 200 config.route_sampling__min_distance_between_points_meters = 10.0 config.station_radius_in_meters = 300 config.early_arrival_max_seconds = 35 * 60 # how early can a train arrive before the actual arrival config.late_arrival_max_seconds = 35 * 60 # how late can a train arrive before the actual arrival config.early_departure_max_seconds = 15 * 60 # how early can a train depart before the actual departure config.late_departure_max_seconds = 35 * 60 # how late can a train depart before the actual departure config.shape_probability_threshold= 0.80 config.stop_discovery_location_timeout_seconds = 60 config.stop_discovery_probability_thresh = 0.95 config.stop_discovery_count_thresh = 10
def download_gtfs_file(force=False,gtfs_url=None): """ download gtfs zip file from mot, and put it in DATA_DIR in its own subfolder """ import shutil time_suffix = ot_utils.get_utc_time_underscored() if not os.path.exists(GTFS_DATA_DIR): ot_utils.mkdir_p(GTFS_DATA_DIR) tmp_file = '/tmp/%s_tmp.zip' % (time_suffix) print 'downloading GTFS to tmp file' if not gtfs_url: ot_utils.ftp_get_file(MOT_FTP,FILE_NAME,tmp_file) else: ot_utils.download_url(gtfs_url,tmp_file) if not force: tmp_md5 = ot_utils.md5_for_file(tmp_file) last_dir = ot_utils.find_lastest_in_dir(GTFS_DATA_DIR) if last_dir: was_success = os.path.exists(os.path.join(last_dir,'success')) if not was_success: print 'Last time was not success' else: last_file = os.path.join(last_dir,FILE_NAME) try: last_md5 = ot_utils.md5_for_file(last_file) except Exception,e: print e last_md5 = 'error_in_md5' if last_md5 == tmp_md5: print 'Checksum is identical - removing tmp file' os.remove(tmp_file) return None
def download_gtfs_file(force=False): """ download gtfs zip file from mot, and put it in DATA_DIR in its own subfolder """ import shutil time_suffix = ot_utils.get_utc_time_underscored() if not os.path.exists(GTFS_DATA_DIR): ot_utils.mkdir_p(GTFS_DATA_DIR) tmp_file = '/tmp/{0}_tmp.zip'.format(time_suffix) LOGGER.info('downloading GTFS to tmp file') ot_utils.ftp_get_file(MOT_FTP, FILE_NAME, tmp_file) if not force: tmp_md5 = ot_utils.md5_for_file(tmp_file) last_dir = ot_utils.find_lastest_in_dir(GTFS_DATA_DIR) if last_dir: was_success = os.path.exists(os.path.join(last_dir, 'success')) if not was_success: LOGGER.info('Last time was not success') else: last_file = os.path.join(last_dir, FILE_NAME) try: last_md5 = ot_utils.md5_for_file(last_file) except Exception as e: LOGGER.exception('failed in md5 for last file - ignoring') last_md5 = 'error_in_md5' if last_md5 == tmp_md5: LOGGER.info('Checksum is identical - removing tmp file') os.remove(tmp_file) return None LOGGER.info('Checksum is different or force -- copying') local_dir = os.path.join(GTFS_DATA_DIR, time_suffix) ot_utils.mkdir_p(local_dir) try: os.remove(os.path.join(GTFS_DATA_DIR,'latest')) except (IOError, OSError): pass os.symlink(local_dir, os.path.join(GTFS_DATA_DIR,'latest')) local_file = os.path.join(local_dir, FILE_NAME) shutil.move(tmp_file, local_file) ot_utils.unzip_file(local_file, local_dir) LOGGER.info('All gtfs files are in %s' % local_dir) return local_dir
basedir = GTFS_ZIP_DIR local_dir = os.path.join(basedir,time_suffix) tmp_file = '/tmp/%s_tmp.zip' % (time_suffix) ot_utils.ftp_get_file(MOT_FTP,FILE_NAME,tmp_file) tmp_md5 = ot_utils.md5_for_file(tmp_file) last_dir = ot_utils.find_lastest_in_dir(basedir) last_file = os.path.join(last_dir,FILE_NAME) try: last_md5 = ot_utils.md5_for_file(last_file) except Exception,e: print e last_md5 = 'error_in_md5' if last_md5 != tmp_md5: print 'Checksum is different- copying' ot_utils.mkdir_p(local_dir) local_file = os.path.join(local_dir,FILE_NAME) shutil.move(tmp_file,local_file) else: print 'Checksum is identical - removing tmp file' os.remove(tmp_file) return if not download_only: ot_utils.unzip_file(local_file,local_dir) def find_gtfs_data_dir(): """ returns the lastest subfolder in DATA_DIR """ dirnames = glob.glob("%s/*" % (GTFS_DATA_DIR)) if not dirnames: