Exemplo n.º 1
0
def main():
    files = glob.glob('*.nc')
    for ncf in files:
        prefix = ncf.split('.')[0]
        outf = '{}.md5'.format(prefix)
        with open(outf, 'w') as f:
            f.write(file_md5_checksum(ncf))
Exemplo n.º 2
0
def main():
    files = glob.glob('*.nc')
    for ncf in files:
        prefix = ncf.split('.')[0]
        outf = '{}.md5'.format(prefix)
        with open(outf, 'w') as f:
            f.write(file_md5_checksum(ncf))
Exemplo n.º 3
0
def _get(
    fullname: Path,
    github_url: str,
    branch: str,
    suffix: str,
    cache_dir: Path,
) -> Path:
    cache_dir = cache_dir.absolute()
    local_file = cache_dir / branch / fullname
    md5name = fullname.with_suffix("{}.md5".format(suffix))
    md5file = cache_dir / branch / md5name

    if not local_file.is_file():
        # This will always leave this directory on disk.
        # We may want to add an option to remove it.
        local_file.parent.mkdir(parents=True, exist_ok=True)

        url = "/".join((github_url, "raw", branch, fullname.as_posix()))
        LOGGER.info("Fetching remote file: %s" % fullname.as_posix())
        urlretrieve(url, local_file)
        try:
            url = "/".join((github_url, "raw", branch, md5name.as_posix()))
            LOGGER.info("Fetching remote file md5: %s" % md5name.as_posix())
            urlretrieve(url, md5file)
        except HTTPError as e:
            msg = f"{md5name.as_posix()} not found. Aborting file retrieval."
            local_file.unlink()
            raise FileNotFoundError(msg) from e

        localmd5 = file_md5_checksum(local_file)
        try:
            with open(md5file) as f:
                remotemd5 = f.read()
            if localmd5.strip() != remotemd5.strip():
                local_file.unlink()
                msg = """
                    MD5 checksum does not match, try downloading dataset again.
                    """
                raise OSError(msg)
        except OSError as e:
            LOGGER.error(e)
    return local_file
Exemplo n.º 4
0
def main():
    files = glob.glob('*.nc')
    for ncf in files:
        outf = '{}.md5'.format(ncf)
        with open(outf, 'w') as f:
            f.write(file_md5_checksum(ncf))
Exemplo n.º 5
0
def open_dataset(
    name,
    cache: bool = True,
    cache_dir: Path = _default_cache_dir,
    github_url: str = "https://github.com/Ouranosinc/xclim-testdata",
    branch: str = "main",
    **kws,
):
    """
    Open a dataset from the online repository (requires internet).

    If a local copy is found then always use that to avoid network traffic.

    Parameters
    ----------
    name : str
        Name of the file containing the dataset. If no suffix is given, assumed
        to be netCDF ('.nc' is appended). The name may contain
    cache_dir : Path
        The directory in which to search for and write cached data.
    cache : bool
        If True, then cache data locally for use on subsequent calls
    github_url : str
        Github repository where the data is stored
    branch : str
        The git branch to download from
    kws : dict, optional
        Passed to xarray.open_dataset

    See Also
    --------
    xarray.open_dataset

    """
    name = Path(name)
    fullname = name.with_suffix(".nc")
    cache_dir = cache_dir.absolute()
    local_file = cache_dir / fullname
    md5name = fullname.with_suffix(".nc.md5")
    md5file = cache_dir / md5name

    if not local_file.is_file():
        # This will always leave this directory on disk.
        # We may want to add an option to remove it.
        local_file.parent.mkdir(parents=True, exist_ok=True)

        url = "/".join((github_url, "raw", branch, fullname.as_posix()))
        urlretrieve(url, local_file)
        url = "/".join((github_url, "raw", branch, md5name.as_posix()))
        urlretrieve(url, md5file)

        localmd5 = file_md5_checksum(local_file)
        with open(md5file) as f:
            remotemd5 = f.read()
        if localmd5 != remotemd5:
            local_file.unlink()
            msg = """
            MD5 checksum does not match, try downloading dataset again.
            """
            raise OSError(msg)

    ds = _open_dataset(local_file, **kws)

    if not cache:
        ds = ds.load()
        local_file.unlink()

    return ds