def download_type(type, output_dir): ftp = init_ftp() saved = [] dirs = ("recent", "historical") if type == "solar": # no splitting into recent and historical here dirs = (".", ) for dir in dirs: ftp.cwd(FTP_BASE_DIR) ftp.cwd(type + "/" + dir) for file_info in ftp.mlsd(): name = file_info[0] if name not in (".", ".."): match = DATA_FILE_PATTERN.match(name) if match: store_path = os.path.join(output_dir, name) if os.path.exists(store_path): logger.info("already downloaded: '%s'", store_path) continue ftp.retrbinary('RETR %s' % name, open(store_path, 'wb').write) saved.append(store_path) else: logger.debug("name '%s' no valid data file", name) ftp.quit() return saved
def get_stations(download_dir): stations = set() for type in TYPES: for file_path in os.listdir(os.path.join(download_dir, type)): match = DATA_FILE_PATTERN.match(file_path) if match: station_id = match.group(1) stations.add(station_id) return stations