Python get_web_data示例，nemo_nowcast.get_web_data Python示例

示例#1

0

显示文件

def collect_NeahBay_ssh(parsed_args, config, *args):
    """
    :param :py:class:`argparse.Namespace` parsed_args:
    :param :py:class:`nemo_nowcast.Config` config:

    :return: Nowcast system checklist items
    :rtype: dict
    """
    data_date = parsed_args.data_date
    yyyymmdd = arrow.get(data_date).format("YYYYMMDD")
    forecast = parsed_args.forecast
    logger.info(
        f"collecting Neah Bay ssh {forecast}Z obs/forecast for {data_date.format('YYYY-MM-DD')}"
    )
    url_tmpl = config["ssh"]["download"]["url template"]
    tar_url = url_tmpl.format(yyyymmdd=yyyymmdd, forecast=forecast)
    tar_file_tmpl = config["ssh"]["download"]["tar file template"]
    tar_file = tar_file_tmpl.format(yyyymmdd=yyyymmdd, forecast=forecast)
    ssh_dir = Path(config["ssh"]["ssh dir"])
    csv_file = Path(tar_file).with_suffix(".csv")
    csv_file_path = ssh_dir / "txt" / csv_file
    tar_csv_tmpl = config["ssh"]["download"]["tarball csv file template"]
    tar_csv_member = tar_csv_tmpl.format(yyyymmdd=yyyymmdd, forecast=forecast)
    with tempfile.TemporaryDirectory() as tmp_dir:
        tar_file_path = Path(tmp_dir, tar_file)
        logger.debug(f"downloading {tar_url}")
        get_web_data(tar_url, NAME, tar_file_path)
        size = os.stat(tar_file_path).st_size
        logger.debug(f"downloaded {size} bytes from {tar_url}")
        _extract_csv(tar_csv_member, tar_file_path, csv_file_path)
    checklist = {
        "data date": data_date.format("YYYY-MM-DD"),
        f"{forecast}": os.fspath(csv_file_path),
    }
    return checklist

示例#2

0

显示文件

def download_live_ocean(parsed_args, config, *args):
    yyyymmdd = parsed_args.run_date.format("YYYYMMDD")
    dotted_yyyymmdd = parsed_args.run_date.format("YYYY.MM.DD")
    ymd = parsed_args.run_date.format("YYYY-MM-DD")
    logger.info(f"downloading Salish Sea western boundary day-averaged LiveOcean file for {ymd}")
    process_status_url_tmpl = config["temperature salinity"]["download"]["status file url template"]
    process_status_url = process_status_url_tmpl.format(yyyymmdd=dotted_yyyymmdd)
    with requests.Session() as session:
        try:
            _is_file_ready(process_status_url, session)
        except RetryError as exc:
            logger.error(
                f"giving up after {exc.last_attempt.attempt_number} attempts: "
                f"{exc.last_attempt.value[1]} for {process_status_url}"
            )
            raise WorkerError
        bc_file_url_tmpl = config["temperature salinity"]["download"]["bc file url template"]
        bc_file_url = bc_file_url_tmpl.format(yyyymmdd=dotted_yyyymmdd)
        dest_dir = Path(config["temperature salinity"]["download"]["dest dir"], yyyymmdd)
        filename = config["temperature salinity"]["download"]["file name"]
        grp_name = config["file group"]
        lib.mkdir(dest_dir, logger, grp_name=grp_name)
        get_web_data(bc_file_url, logger_name=NAME, filepath=dest_dir / filename, session=session)
        size = os.stat(dest_dir / filename).st_size
        logger.info(f"downloaded {size} bytes from {bc_file_url} to {dest_dir / filename}")
        if size == 0:
            logger.critical(f"Problem! 0 size file: {dest_dir / filename}")
            raise WorkerError
    nemo_cmd.api.deflate([dest_dir / filename], 1)
    checklist = {ymd: os.fspath(dest_dir / filename)}
    return checklist

示例#3

0

显示文件

文件： download_live_ocean.py 项目： SalishSeaCast/analysis-vicky

def _get_file(url, filename, dest_dir, session):
    """
    :type dest_dir: :class:`pathlib.Path`
    """
    filepath = dest_dir / filename
    get_web_data(url, NAME, filepath, session)
    size = filepath.stat().st_size
    logger.debug(f'downloaded {size} bytes from {url}',
                 extra={
                     'url': url,
                     'dest_dir': dest_dir
                 })
    return filepath

示例#4

0

显示文件

def _read_website(save_path):
    """Read a website with Neah Bay storm surge predictions/observations.

    The data is stored in a file in save_path.

    Returns the filename.
    """
    html = get_web_data(URL, NAME)
    logger.debug(f'downloaded Neah Bay storm surge observations & predictions '
                 f'from {URL}')
    # Parse the text table out of the HTML
    soup = BeautifulSoup(html, 'html.parser')
    table = soup.find('pre').contents
    for line in table:
        line = line.replace('[', '')
        line = line.replace(']', '')
    logger.debug(
        'scraped observations & predictions table from downloaded HTML')
    # Save the table as a text file with the date it was generated as its name
    utc_now = datetime.datetime.now(pytz.timezone('UTC'))
    filepath = os.path.join(save_path, 'txt',
                            f'sshNB_{utc_now:%Y-%m-%d_%H}.txt')
    with open(filepath, 'wt') as f:
        f.writelines(table)
    os.chmod(filepath, FilePerms(user='******', group='rw', other='r'))
    logger.debug(f'observations & predictions table saved to {filepath}')
    return filepath

示例#5

0

显示文件

def _get_file(url_tmpl, filename_tmpl, var, dest_dir_root, date, forecast,
              hr_str, session):
    filename = filename_tmpl.format(variable=var,
                                    date=date,
                                    forecast=forecast,
                                    hour=hr_str)
    filepath = os.path.join(dest_dir_root, date, forecast, hr_str, filename)
    file_url = url_tmpl.format(date=date,
                               forecast=forecast,
                               hour=hr_str,
                               filename=filename)
    get_web_data(file_url,
                 NAME,
                 Path(filepath),
                 session=session,
                 wait_exponential_max=9000)
    size = os.stat(filepath).st_size
    logger.debug(f"downloaded {size} bytes from {file_url}")
    if size == 0:
        logger.critical(f"Problem! 0 size file: {file_url}")
        raise WorkerError
    return filepath

示例#6

0

显示文件

def _get_file(var, dest_dir_root, date, forecast, hr_str, session):
    filename = FILENAME_TEMPLATE.format(variable=var,
                                        date=date,
                                        forecast=forecast,
                                        hour=hr_str)
    filepath = os.path.join(dest_dir_root, date, forecast, hr_str, filename)
    fileURL = URL_TEMPLATE.format(forecast=forecast,
                                  hour=hr_str,
                                  filename=filename)
    get_web_data(fileURL,
                 NAME,
                 Path(filepath),
                 session=session,
                 wait_exponential_max=9000)
    size = os.stat(filepath).st_size
    logger.debug(f'downloaded {size} bytes from {fileURL}',
                 extra={'forecast': forecast})
    if size == 0:
        logger.critical(f'Problem, 0 size file {fileURL}',
                        extra={'forecast': forecast})
        raise WorkerError
    return filepath