def collect_NeahBay_ssh(parsed_args, config, *args): """ :param :py:class:`argparse.Namespace` parsed_args: :param :py:class:`nemo_nowcast.Config` config: :return: Nowcast system checklist items :rtype: dict """ data_date = parsed_args.data_date yyyymmdd = arrow.get(data_date).format("YYYYMMDD") forecast = parsed_args.forecast logger.info( f"collecting Neah Bay ssh {forecast}Z obs/forecast for {data_date.format('YYYY-MM-DD')}" ) url_tmpl = config["ssh"]["download"]["url template"] tar_url = url_tmpl.format(yyyymmdd=yyyymmdd, forecast=forecast) tar_file_tmpl = config["ssh"]["download"]["tar file template"] tar_file = tar_file_tmpl.format(yyyymmdd=yyyymmdd, forecast=forecast) ssh_dir = Path(config["ssh"]["ssh dir"]) csv_file = Path(tar_file).with_suffix(".csv") csv_file_path = ssh_dir / "txt" / csv_file tar_csv_tmpl = config["ssh"]["download"]["tarball csv file template"] tar_csv_member = tar_csv_tmpl.format(yyyymmdd=yyyymmdd, forecast=forecast) with tempfile.TemporaryDirectory() as tmp_dir: tar_file_path = Path(tmp_dir, tar_file) logger.debug(f"downloading {tar_url}") get_web_data(tar_url, NAME, tar_file_path) size = os.stat(tar_file_path).st_size logger.debug(f"downloaded {size} bytes from {tar_url}") _extract_csv(tar_csv_member, tar_file_path, csv_file_path) checklist = { "data date": data_date.format("YYYY-MM-DD"), f"{forecast}": os.fspath(csv_file_path), } return checklist
def download_live_ocean(parsed_args, config, *args): yyyymmdd = parsed_args.run_date.format("YYYYMMDD") dotted_yyyymmdd = parsed_args.run_date.format("YYYY.MM.DD") ymd = parsed_args.run_date.format("YYYY-MM-DD") logger.info(f"downloading Salish Sea western boundary day-averaged LiveOcean file for {ymd}") process_status_url_tmpl = config["temperature salinity"]["download"]["status file url template"] process_status_url = process_status_url_tmpl.format(yyyymmdd=dotted_yyyymmdd) with requests.Session() as session: try: _is_file_ready(process_status_url, session) except RetryError as exc: logger.error( f"giving up after {exc.last_attempt.attempt_number} attempts: " f"{exc.last_attempt.value[1]} for {process_status_url}" ) raise WorkerError bc_file_url_tmpl = config["temperature salinity"]["download"]["bc file url template"] bc_file_url = bc_file_url_tmpl.format(yyyymmdd=dotted_yyyymmdd) dest_dir = Path(config["temperature salinity"]["download"]["dest dir"], yyyymmdd) filename = config["temperature salinity"]["download"]["file name"] grp_name = config["file group"] lib.mkdir(dest_dir, logger, grp_name=grp_name) get_web_data(bc_file_url, logger_name=NAME, filepath=dest_dir / filename, session=session) size = os.stat(dest_dir / filename).st_size logger.info(f"downloaded {size} bytes from {bc_file_url} to {dest_dir / filename}") if size == 0: logger.critical(f"Problem! 0 size file: {dest_dir / filename}") raise WorkerError nemo_cmd.api.deflate([dest_dir / filename], 1) checklist = {ymd: os.fspath(dest_dir / filename)} return checklist
def _get_file(url, filename, dest_dir, session): """ :type dest_dir: :class:`pathlib.Path` """ filepath = dest_dir / filename get_web_data(url, NAME, filepath, session) size = filepath.stat().st_size logger.debug(f'downloaded {size} bytes from {url}', extra={ 'url': url, 'dest_dir': dest_dir }) return filepath
def _read_website(save_path): """Read a website with Neah Bay storm surge predictions/observations. The data is stored in a file in save_path. Returns the filename. """ html = get_web_data(URL, NAME) logger.debug(f'downloaded Neah Bay storm surge observations & predictions ' f'from {URL}') # Parse the text table out of the HTML soup = BeautifulSoup(html, 'html.parser') table = soup.find('pre').contents for line in table: line = line.replace('[', '') line = line.replace(']', '') logger.debug( 'scraped observations & predictions table from downloaded HTML') # Save the table as a text file with the date it was generated as its name utc_now = datetime.datetime.now(pytz.timezone('UTC')) filepath = os.path.join(save_path, 'txt', f'sshNB_{utc_now:%Y-%m-%d_%H}.txt') with open(filepath, 'wt') as f: f.writelines(table) os.chmod(filepath, FilePerms(user='******', group='rw', other='r')) logger.debug(f'observations & predictions table saved to {filepath}') return filepath
def _get_file(url_tmpl, filename_tmpl, var, dest_dir_root, date, forecast, hr_str, session): filename = filename_tmpl.format(variable=var, date=date, forecast=forecast, hour=hr_str) filepath = os.path.join(dest_dir_root, date, forecast, hr_str, filename) file_url = url_tmpl.format(date=date, forecast=forecast, hour=hr_str, filename=filename) get_web_data(file_url, NAME, Path(filepath), session=session, wait_exponential_max=9000) size = os.stat(filepath).st_size logger.debug(f"downloaded {size} bytes from {file_url}") if size == 0: logger.critical(f"Problem! 0 size file: {file_url}") raise WorkerError return filepath
def _get_file(var, dest_dir_root, date, forecast, hr_str, session): filename = FILENAME_TEMPLATE.format(variable=var, date=date, forecast=forecast, hour=hr_str) filepath = os.path.join(dest_dir_root, date, forecast, hr_str, filename) fileURL = URL_TEMPLATE.format(forecast=forecast, hour=hr_str, filename=filename) get_web_data(fileURL, NAME, Path(filepath), session=session, wait_exponential_max=9000) size = os.stat(filepath).st_size logger.debug(f'downloaded {size} bytes from {fileURL}', extra={'forecast': forecast}) if size == 0: logger.critical(f'Problem, 0 size file {fileURL}', extra={'forecast': forecast}) raise WorkerError return filepath