示例#1
0
def _create_image_fetcher():
    try:
        import pooch
    except ImportError:
        # Without pooch, fallback on the standard data directory
        # which for now, includes a few limited data samples
        return None, legacy_data_dir

    pooch_version = __version__.replace(".dev", "+")
    url = "https://github.com/histolab/histolab/raw/{version}/histolab/"

    # Create a new friend to manage your sample data storage
    image_fetcher = pooch.create(
        # Pooch uses appdirs to select an appropriate directory for the cache
        # on each platform.
        # https://github.com/ActiveState/appdirs
        # On linux this converges to
        # '$HOME/.cache/histolab-image'
        # With a version qualifier
        path=pooch.os_cache("histolab-images"),
        base_url=url,
        version=pooch_version,
        env="HISTOLAB_DATADIR",
        registry=registry,
        urls=registry_urls,
    )

    data_dir = os.path.join(str(image_fetcher.abspath), "data")
    return image_fetcher, data_dir
示例#2
0
    def data_path(self,
                  subject,
                  path=None,
                  force_update=False,
                  update_path=None,
                  verbose=None):
        if subject not in self.subject_list:
            raise (ValueError("Invalid subject number"))

        sub = "{:02d}".format(subject)
        sign = self.code.split()[1]
        key = "MNE_DATASETS_{:s}_PATH".format(sign)
        key_dest = "MNE-{:s}-data".format(sign.lower())
        if get_config(key) is None:
            set_config(key, osp.join(osp.expanduser("~"), "mne_data"))
        path = osp.join(_get_path(None, key, sign), key_dest)

        filelist = fs_get_file_list(self.figshare_id)
        reg = fs_get_file_hash(filelist)
        fsn = fs_get_file_id(filelist)
        gb = pooch.create(path=path, base_url=MAMEM_URL, registry=reg)

        spath = []
        for f in fsn.keys():
            if f[2:4] == sub:
                spath.append(gb.fetch(fsn[f]))

        # _do_path_update(path, update_path, key, sign)
        return spath
示例#3
0
def create_image_fetcher():
    try:
        import pooch
    except ImportError:
        # Without pooch, fallback on the standard data directory
        # which for now, includes a few limited data samples
        return None, legacy_data_dir

    # Pooch expects a `+` to exist in development versions.
    # Since scikit-image doesn't follow that convention, we have to manually
    # remove `.dev` with a `+` if it exists.
    # This helps pooch understand that it should look in master
    # to find the required files
    pooch_version = __version__.replace('.dev', '+')
    url = "https://github.com/scikit-image/scikit-image/raw/{version}/skimage/"

    # Create a new friend to manage your sample data storage
    image_fetcher = pooch.create(
        # Pooch uses appdirs to select an appropriate directory for the cache
        # on each platform.
        # https://github.com/ActiveState/appdirs
        # On linux this converges to
        # '$HOME/.cache/scikit-image'
        # With a version qualifier
        path=pooch.os_cache("scikit-image"),
        base_url=url,
        version=pooch_version,
        env="SKIMAGE_DATADIR",
        registry=registry,
        urls=registry_urls,
    )

    data_dir = osp.join(str(image_fetcher.abspath), 'data')
    return image_fetcher, data_dir
示例#4
0
def retrieve(cache_key, pooch_kwargs=None):
    """Download and cache the file associated with the provided ``cache_key``.

    Cache location is controlled by the config ``data_dir`` key. See
    :ref:`data_dir_setting` for more information.

    Args:
        cache_key (str): Cache key returned by
            :func:`~satpy.aux_download.register_file`.
        pooch_kwargs (dict or None): Extra keyword arguments to pass to
            :meth:`pooch.Pooch.fetch`.

    Returns:
        Local path of the cached file.


    """
    pooch_kwargs = pooch_kwargs or {}

    path = satpy.config.get('data_dir')
    if not satpy.config.get('download_aux'):
        return _retrieve_offline(path, cache_key)
    if not _should_download(cache_key):
        raise RuntimeError("Auxiliary data download is not allowed during "
                           "tests. Mock the appropriate components of your "
                           "tests to not need the 'retrieve' function.")
    # reuse data directory as the default URL where files can be downloaded from
    pooch_obj = pooch.create(path, path, registry=_FILE_REGISTRY,
                             urls=_FILE_URLS)
    return pooch_obj.fetch(cache_key, **pooch_kwargs)
示例#5
0
def create_image_fetcher():
    try:
        import pooch
        # older versions of Pooch don't have a __version__ attribute
        if not hasattr(pooch, '__version__'):
            retry = {}
        else:
            pooch_version = pooch.__version__.lstrip('v')
            retry = {'retry_if_failed': 3}
            # Keep version check in synch with
            # scikit-image/requirements/optional.txt
            if version.parse(pooch_version) < version.parse('1.3.0'):
                # we need a more recent version of pooch to retry
                retry = {}
    except ImportError:
        # Without pooch, fallback on the standard data directory
        # which for now, includes a few limited data samples
        return None, legacy_data_dir

    # Pooch expects a `+` to exist in development versions.
    # Since scikit-image doesn't follow that convention, we have to manually
    # remove `.dev` with a `+` if it exists.
    # This helps pooch understand that it should look in master
    # to find the required files
    if '+git' in __version__:
        skimage_version_for_pooch = __version__.replace('.dev0+git', '+git')
    else:
        skimage_version_for_pooch = __version__.replace('.dev', '+')

    if '+' in skimage_version_for_pooch:
        url = ("https://github.com/scikit-image/scikit-image/raw/"
               "{version}/skimage/")
    else:
        url = ("https://github.com/scikit-image/scikit-image/raw/"
               "v{version}/skimage/")

    # Create a new friend to manage your sample data storage
    image_fetcher = pooch.create(
        # Pooch uses appdirs to select an appropriate directory for the cache
        # on each platform.
        # https://github.com/ActiveState/appdirs
        # On linux this converges to
        # '$HOME/.cache/scikit-image'
        # With a version qualifier
        path=pooch.os_cache("scikit-image"),
        base_url=url,
        version=skimage_version_for_pooch,
        version_dev="main",
        env="SKIMAGE_DATADIR",
        registry=registry,
        urls=registry_urls,
        # Note: this should read `retry_if_failed=3,`, but we generate that
        # dynamically at import time above, in case installed pooch is a less
        # recent version
        **retry,
    )

    data_dir = osp.join(str(image_fetcher.abspath), 'data')
    return image_fetcher, data_dir
示例#6
0
def _retrieve_all_with_pooch(pooch_kwargs):
    if pooch_kwargs is None:
        pooch_kwargs = {}
    path = satpy.config.get('data_dir')
    pooch_obj = pooch.create(path, path, registry=_FILE_REGISTRY,
                             urls=_FILE_URLS)
    for fname in _FILE_REGISTRY:
        logger.info("Downloading extra data file '%s'...", fname)
        pooch_obj.fetch(fname, **pooch_kwargs)
示例#7
0
文件: data.py 项目: scipp/ess
def _make_pooch():
    import pooch
    return pooch.create(
        path=pooch.os_cache('ess/amor'),
        env='ESS_AMOR_DATA_DIR',
        base_url='https://public.esss.dk/groups/scipp/ess/amor/{version}/',
        version=_version,
        registry={
            "reference.nxs": "md5:56d493c8051e1c5c86fb7a95f8ec643b",
            "sample.nxs": "md5:4e07ccc87b5c6549e190bc372c298e83"
        })
示例#8
0
def test_pooch():
    goodboy = pooch.create(
        # Use the default cache folder for the OS
        path=pooch.os_cache("plumbus"),
        # The remote data is on Github
        base_url="https://raw.githubusercontent.com/cgre-aachen/gempy_data/master/data"
                 "/gempy_models/Tutorial_ch1-8_Onlap_relations/",
        # If this is a development version, get the data from the master branch
        version_dev="master",
        # We'll load it from a file later
        registry={
        "Tutorial_ch1-8_Onlap_relations_faults.csv": "19uheidhlkjdwhoiwuhc0uhcwljchw9ochwochw89dcgw9dcgwc"
    },
    )
    print(goodboy)
def create_pooch(base_url, files, target):
    """
    Create POOCH class to fetch files from a website
    Args:
        base_url: Base URL for the remote data source.
        files: A record of the files that are managed by this Pooch.
        target: The path to the local data storage folder
    Returns:
        POOCH class
    """
    pc = pooch.create(
        base_url=base_url, path=target, registry={
            i: None
            for i in files
        })  # None because the Hash is always changing.. Sciebo problem?
    logger.info("Pooch created for url: %s" % base_url)
    return pc
示例#10
0
 def __init__(self, cache_dir=None):
     import json
     import pooch
     import pkg_resources
     if cache_dir is None:
         if os.path.isdir(soxs_cfg.get("soxs", "soxs_data_dir")):
             cache_dir = soxs_cfg.get("soxs", "soxs_data_dir")
         else:
             cache_dir = pooch.os_cache("soxs")
             
     self._registry = json.load(
         pkg_resources.resource_stream("soxs", "file_hash_registry.json"))
     self.pooch_obj = pooch.create(
         path=cache_dir,
         registry=self._registry,
         env="SOXS_DATA_DIR",
         base_url="https://hea-www.cfa.harvard.edu/soxs/soxs_responses/"
     )
     self.dl = pooch.HTTPDownloader(progressbar=True)
示例#11
0
def _make_pooch():
    import pooch
    return pooch.create(
        path=pooch.os_cache('scippneutron'),
        env='SCIPPNEUTRON_DATA_DIR',
        retry_if_failed=3,
        base_url='https://public.esss.dk/groups/scipp/scippneutron/{version}/',
        version=_version,
        registry={
            'iris26176_graphite002_sqw.nxs':
            'md5:7ea63f9137602b7e9b604fe30f0c6ec2',
            'loki-at-larmor.hdf5': 'md5:7fc48639bb2c409adb9a1dedd53d1c83',
            'powder-event.h5': 'md5:f92ca1da4c2d42c3d8a5d1b1d79aa5a4',
            'mcstas_sans.h5': 'md5:a608255dd757905490882eb35e209d12',
            'CNCS_51936_event.nxs': 'md5:5ba401e489260a44374b5be12b780911',
            'GEM40979.raw': 'md5:6df0f1c2fc472af200eec43762e9a874',
            'PG3_4844_calibration.h5': 'md5:290f5108aa9ff0b1c5a2ac8dc2c1cb15',
            'PG3_4844_event.nxs': 'md5:d5ae38871d0a09a28ae01f85d969de1e',
            'PG3_4866_event.nxs': 'md5:3d543bc6a646e622b3f4542bc3435e7e',
            'PG3_4871_event.nxs': 'md5:a3d0edcb36ab8e9e3342cd8a4440b779',
            'WISH00016748.raw': 'md5:37ecc6f99662b57e405ed967bdc068af',
        })
示例#12
0
    def data_path(self,
                  subject,
                  path=None,
                  force_update=False,
                  update_path=None,
                  verbose=None):
        if subject not in self.subject_list:
            raise (ValueError("Invalid subject number"))

        sub = "{:02d}".format(subject)
        sign = self.code.split()[1]
        key_dest = "MNE-{:s}-data".format(sign.lower())
        path = osp.join(get_dataset_path(sign, path), key_dest)

        filelist = fs_get_file_list(self.figshare_id)
        reg = fs_get_file_hash(filelist)
        fsn = fs_get_file_id(filelist)
        gb = pooch.create(path=path, base_url=MAMEM_URL, registry=reg)

        spath = []
        for f in fsn.keys():
            if f[2:4] == sub:
                spath.append(gb.fetch(fsn[f]))
        return spath
示例#13
0
# This code is part of the Fatiando a Terra project (https://www.fatiando.org)
#
"""
Functions to load sample datasets used in the Harmonica docs.
"""
import pandas as pd
import pkg_resources
import pooch
import xarray as xr

from .._version import __version__ as version

REGISTRY = pooch.create(
    path=pooch.os_cache("harmonica"),
    base_url="https://github.com/fatiando/harmonica/raw/{version}/data/",
    version=version,
    version_dev="main",
    env="HARMONICA_DATA_DIR",
)
with pkg_resources.resource_stream("harmonica.datasets",
                                   "registry.txt") as registry_file:
    REGISTRY.load_registry(registry_file)


def locate():
    r"""
    The absolute path to the sample data storage location on disk.

    This is where the data are saved on your computer. The location is
    dependent on the operating system. The folder locations are defined by the
    ``appdirs``  package (see the `appdirs documentation
示例#14
0
from kikuchipy.signals import EBSD, EBSDMasterPattern
from kikuchipy import load
from kikuchipy.release import version
from kikuchipy.data._registry import registry, registry_urls

__all__ = [
    "nickel_ebsd_small",
    "nickel_ebsd_large",
    "nickel_ebsd_master_pattern_small",
]

fetcher = ppooch.create(
    path=ppooch.os_cache("kikuchipy"),
    base_url="",
    version=version.replace(".dev", "+"),
    env="KIKUCHIPY_DATA_DIR",
    registry=registry,
    urls=registry_urls,
)
cache_data_path = fetcher.path.joinpath("data")
package_data_path = Path(os.path.abspath(os.path.dirname(__file__)))


def _has_hash(path, expected_hash):
    """Check if the provided path has the expected hash."""
    if not os.path.exists(path):
        return False
    else:
        return ppooch.utils.file_hash(path) == expected_hash

示例#15
0
import pooch

# Going to set this up a bit later...
from . import __version__

_registry = {
    "fullSoy_2-12a.ply":
    "e12f192188058851289f0531dc456c6df31b562405b77e382e0f9e4b1c899108"
}

PLANTS = pooch.create(
    path=pooch.os_cache("hothouse"),
    base_url="https://github.com/MatthewTurk/hothouse/raw/{version}/data/",
    version=__version__,
    version_dev="master",
    env="HOTHOUSE_DATA_DIR",
    registry=_registry,
)
示例#16
0
import logging
from pathlib import Path

import pooch

from ._env import BGEN_CACHE_HOME

__all__ = ["get"]

pooch.get_logger().setLevel(logging.ERROR)

goodboy = pooch.create(
    path=BGEN_CACHE_HOME / "test_data",
    base_url="https://bgen-examples.s3.amazonaws.com/",
    registry={
        "complex.23bits.no.samples.bgen": "25d30a4e489da1aeb05f9893af98e8bf3b09d74db2982bf1828f8c8565886fc6",
        "haplotypes.bgen": "84e0b59efcc83c7c305cf5446e5dc26b49b15aeb4157a9eb36451376ce3efe4c",
        "haplotypes.bgen.metadata.corrupted": "8f55628770c1ae8155c1ced2463f15df80d32bc272a470bb1d6b68225e1604b1",
        "wrong.metadata": "f746345605150076f3234fbeea7c52e86bf95c9329b2f08e1e3e92a7918b98fb",
        "merged_487400x220000.bgen": "8dccd89a53e048ea24305cb04c3653b0eb2af265b04d05a60cfa2cca5fb7ae94",
        "merged_487400x2420000.bgen": "81aecfab787bee1cb7f1d0d21f2465c581a4db78011d8b0f0f73c868e17ec888",
        "merged_487400x4840000.bgen": "5ef82f92a001615c93bbb317a9fd2329272370c6d481405d4f8f0a2b7fddf68b",
    },
)


def get(filename: str) -> Path:
    return Path(goodboy.fetch(filename))
示例#17
0
        downloader = pooch.HTTPDownloader(auth=auth, progressbar=True)
        try:
            login = requests.get(url)
            downloader(login.url, output_file, dataset)
        except requests.exceptions.HTTPError as error:
            if 'Unauthorized' in str(error):
                pooch.get_logger().error('Wrong username/password!')
                self._username = None
                self._password = None
            raise error


_earthdata_downloader = EarthDataDownloader()

nsidc_data = pooch.create(path=pooch.os_cache('icepack'),
                          base_url='',
                          registry=None)

registry_file = pkg_resources.resource_stream('icepack', 'registry.txt')
nsidc_data.load_registry(registry_file)


def fetch_measures_antarctica():
    r"""Fetch the MEaSUREs Antarctic velocity map"""
    return nsidc_data.fetch('antarctic_ice_vel_phase_map_v01.nc',
                            downloader=_earthdata_downloader)


def fetch_measures_greenland():
    r"""Fetch the MEaSUREs Greenland velocity map"""
    return [
示例#18
0
import logging
from pathlib import Path

import pooch

__all__ = ["example_filepath"]

pooch.get_logger().setLevel(logging.ERROR)

goodboy = pooch.create(
    path=pooch.os_cache("hmmer"),
    base_url="https://hmmer-py.s3.eu-west-2.amazonaws.com/",
    registry={
        "Pfam-A_24.hmm.gz": "32791a1b50837cbe1fca1376a3e1c45bc84b32dd4fe28c92fd276f3f2c3a15e3"
    },
)


def example_filepath(filename: str) -> Path:
    return Path(goodboy.fetch(filename + ".gz", processor=pooch.Decompress()))
示例#19
0
try:
    import cartopy.feature as cfeature
    import cartopy.crs as ccrs
    from cartopy.mpl.ticker import LongitudeFormatter, LatitudeFormatter
except ImportError:
    pass

from ..version import full_version

# Otherwise, DeprecationWarning won't be shown, kind of defeating the purpose.
warnings.simplefilter("default")

POOCH = pooch.create(
    path=["~", ".verde", "data"],
    base_url="https://github.com/fatiando/verde/raw/{version}/data/",
    version=full_version,
    version_dev="master",
    env="VERDE_DATA_DIR",
)
POOCH.load_registry(os.path.join(os.path.dirname(__file__), "registry.txt"))


def _setup_map(ax,
               xticks,
               yticks,
               crs,
               region,
               land=None,
               ocean=None,
               borders=None,
               states=None):
示例#20
0
import tempfile
from pathlib import Path
from typing import Union

import pooch
"""
Load sample data.
"""

POOCH = pooch.create(
    # Use the default cache folder for the OS
    path=pooch.os_cache("bed_reader"),
    # The remote data is on Github
    base_url="https://raw.githubusercontent.com/" +
    "fastlmm/bed-reader/master/bed_reader/tests/data/",
    # If this is a development version, get the data from the master branch
    version_dev="master",
    # The registry specifies the files that can be fetched
    env="BED_READER_DATA_DIR",
)

# Get registry file from package_data
registry_file = Path(__file__).parent / "tests/registry.txt"
# Load this registry file
POOCH.load_registry(registry_file)


def sample_file(filepath: Union[str, Path]) -> str:
    """
    Retrieve a sample .bed file. (Also retrieves associated .fam and .bim files).
示例#21
0
文件: cbook.py 项目: akrherz/MetPy
try:
    string_type = basestring
except NameError:
    string_type = str


# TODO: This can go away when we remove Python 2
def is_string_like(s):
    """Check if an object is a string."""
    return isinstance(s, string_type)


POOCH = pooch.create(
    path=pooch.os_cache('metpy'),
    base_url='https://github.com/Unidata/MetPy/raw/{version}/staticdata/',
    version='v' + __version__,
    version_dev='master',
    env='TEST_DATA_DIR')

# Check if we're running from a git clone and if so, bash the path attribute with the path
# to git's local data store (un-versioned)
# Look for the staticdata directory (i.e. this is a git checkout)
if os.path.exists(os.path.join(os.path.dirname(__file__), '..', 'staticdata')):
    POOCH.path = os.path.join(os.path.dirname(__file__), '..', 'staticdata')

POOCH.load_registry(os.path.join(os.path.dirname(__file__), 'static-data-manifest.txt'))


def get_test_data(fname, as_file_obj=True):
    """Access a file from MetPy's collection of test data."""
    path = POOCH.fetch(fname)
示例#22
0
from .exceptions import ParameterError


__all__ = [
    "find_files",
    "example",
    "ex",
    "list_examples",
    "example_info",
]


# Instantiate the pooch
__data_path = os.environ.get("LIBROSA_DATA_DIR", pooch.os_cache("librosa"))
__GOODBOY = pooch.create(
    __data_path, base_url="https://librosa.org/data/audio/", registry=None
)

__GOODBOY.load_registry(
    resource_filename(__name__, str(Path("example_data") / "registry.txt"))
)

with open(
    resource_filename(__name__, str(Path("example_data") / "index.json")), "r"
) as fdesc:
    __TRACKMAP = json.load(fdesc)


def example(key, *, hq=False):
    """Retrieve the example recording identified by 'key'.
示例#23
0
def load_data(subject, runs, path=None, force_update=False, update_path=None,
              base_url=EEGMI_URL, verbose=None):  # noqa: D301
    """Get paths to local copies of EEGBCI dataset files.

    This will fetch data for the EEGBCI dataset :footcite:`SchalkEtAl2004`, which is also
    available at PhysioNet :footcite:`GoldbergerEtAl2000`.

    Parameters
    ----------
    subject : int
        The subject to use. Can be in the range of 1-109 (inclusive).
    runs : int | list of int
        The runs to use. See Notes for details.
    path : None | str
        Location of where to look for the EEGBCI data storing location.
        If None, the environment variable or config parameter
        ``MNE_DATASETS_EEGBCI_PATH`` is used. If it doesn't exist, the
        "~/mne_data" directory is used. If the EEGBCI dataset
        is not found under the given path, the data
        will be automatically downloaded to the specified folder.
    force_update : bool
        Force update of the dataset even if a local copy exists.
    update_path : bool | None
        If True, set the MNE_DATASETS_EEGBCI_PATH in mne-python
        config to the given path. If None, the user is prompted.
    base_url : str
        The URL root for the data.
    %(verbose)s

    Returns
    -------
    paths : list
        List of local data paths of the given type.

    Notes
    -----
    The run numbers correspond to:

    =========  ===================================
    run        task
    =========  ===================================
    1          Baseline, eyes open
    2          Baseline, eyes closed
    3, 7, 11   Motor execution: left vs right hand
    4, 8, 12   Motor imagery: left vs right hand
    5, 9, 13   Motor execution: hands vs feet
    6, 10, 14  Motor imagery: hands vs feet
    =========  ===================================

    For example, one could do::

        >>> from mne.datasets import eegbci
        >>> eegbci.load_data(1, [4, 10, 14], os.getenv('HOME') + '/datasets') # doctest:+SKIP

    This would download runs 4, 10, and 14 (hand/foot motor imagery) runs from
    subject 1 in the EEGBCI dataset to the 'datasets' folder, and prompt the
    user to save the 'datasets' path to the  mne-python config, if it isn't
    there already.

    References
    ----------
    .. footbibliography::
    """  # noqa: E501
    import pooch

    if not hasattr(runs, '__iter__'):
        runs = [runs]

    # get local storage path
    config_key = 'MNE_DATASETS_EEGBCI_PATH'
    folder = 'MNE-eegbci-data'
    name = 'EEGBCI'
    path = _get_path(path, config_key, name)

    # extract path parts
    pattern = r'(?:https?://.*)(files)/(eegmmidb)/(\d+\.\d+\.\d+)/?'
    match = re.compile(pattern).match(base_url)
    if match is None:
        raise ValueError('base_url does not match the expected EEGMI folder '
                         'structure. Please notify MNE-Python developers.')
    base_path = op.join(path, folder, *match.groups())

    # create the download manager
    fetcher = pooch.create(
        path=base_path,
        base_url=base_url,
        version=None,   # Data versioning is decoupled from MNE-Python version.
        registry=None,  # Registry is loaded from file, below.
        retry_if_failed=2  # 2 retries = 3 total attempts
    )

    # load the checksum registry
    registry = pkg_resources.resource_stream(
        'mne', op.join('data', 'eegbci_checksums.txt'))
    fetcher.load_registry(registry)

    # fetch the file(s)
    data_paths = []
    for run in runs:
        file_part = f'S{subject:03d}/S{subject:03d}R{run:02d}.edf'
        destination = op.join(base_path, file_part)
        if force_update and op.isfile(destination):
            os.remove(destination)
        data_paths.append(fetcher.fetch(file_part))
        # update path in config if desired
        _do_path_update(path, update_path, config_key, name)
    return data_paths
示例#24
0
文件: cbook.py 项目: nawendt/MetPy
# Copyright (c) 2008,2015,2018,2019 MetPy Developers.
# Distributed under the terms of the BSD 3-Clause License.
# SPDX-License-Identifier: BSD-3-Clause
"""Collection of generally useful utility code from the cookbook."""

import os
from pathlib import Path

import numpy as np
import pooch

from . import __version__

POOCH = pooch.create(
    path=pooch.os_cache('metpy'),
    base_url='https://github.com/Unidata/MetPy/raw/{version}/staticdata/',
    version='v' + __version__,
    version_dev='main')

# Check if we have the data available directly from a git checkout, either from the
# TEST_DATA_DIR variable, or looking relative to the path of this module's file. Use this
# to override Pooch's path and disable downloading from GitHub.
dev_data_path = os.environ.get('TEST_DATA_DIR',
                               Path(__file__).parents[2] / 'staticdata')
if Path(dev_data_path).exists():
    POOCH.path = dev_data_path
    POOCH.base_url = 'NODOWNLOAD:'

POOCH.load_registry(Path(__file__).parent / 'static-data-manifest.txt')

示例#25
0
from numba import jit, prange

try:
    from tqdm import tqdm
except ImportError:
    tqdm = None

INPUTDATA_DIR = ['~', '.pop_tools']

# On Cheyenne/Casper and/or CGD machines, use local inputdata directory
# See: https://github.com/NCAR/pop-tools/issues/24#issue-523701065

INPUTDATA = pooch.create(
    # This is still the default in case the environment variable isn't defined
    path=INPUTDATA_DIR,
    version_dev='master',
    base_url='https://svn-ccsm-inputdata.cgd.ucar.edu/trunk/',
    # The name of the environment variable that can overwrite the path argument
    env='CESMDATAROOT',
)

INPUTDATA.load_registry(
    pkg_resources.resource_stream('pop_tools', 'inputdata_registry.txt'))

if tqdm is not None:
    downloader = pooch.HTTPDownloader(progressbar=True,
                                      verify=False,
                                      allow_redirects=True)
else:
    downloader = pooch.HTTPDownloader(verify=False, allow_redirects=True)

grid_def_file = pkg_resources.resource_filename('pop_tools',
示例#26
0
# Create a new friend to manage your sample data storage
GOODBOY = pooch.create(
    # Folder where the data will be stored. For a sensible default, use the default
    # cache folder for your OS.
    # path=pooch.os_cache("mypackage_test"),
    # path=pooch.os_cache("mypackage_test"),
    path=data_path,
    # Base URL of the remote data store. Will call .format on this string to insert
    # https://github.com/JustinGOSSES/predictatops/
    # the version (see below).  https://github.com/JustinGOSSES/MannvilleGroup_Strat_Hackathon/tree/master/SPE_006_originalData
    #base_url="https://github.com/JustinGOSSES/predictatops/raw/{version}/demo/mannville_demo_data/",
    base_url="https://github.com/JustinGOSSES/predictatops/raw/{version}/demo/",
    # Pooches are versioned so that you can use multiple versions of a package
    # simultaneously. Use PEP440 compliant version number. The version will be
    # appended to the path.
    #version="v0.0.0-alpha",
    version="v0.0.3-alpha",
    # If a version as a "+XX.XXXXX" suffix, we'll assume that this is a dev version
    # and replace the version with this string.
    version_dev="master",
    # An environment variable that overwrites the path.
    env=data_path,
    # The cache file registry. A dictionary with all files managed by this pooch.
    # Keys are the file names (relative to *base_url*) and values are their
    # respective SHA256 hashes. Files will be downloaded automatically when needed
    # (see fetch_gravity_data).  1414057d0c5235b0ed13103c72c864ddfd34a0c8
    # registry={"OilSandsDB/LITHOLOGY_DIC.TXT": "83f3be338d6fa42eeadf60466c716e4370fe8723682c187d214a054bd695880a"}
)
# You can also load the registry from a file. Each line contains a file name and
# it's sha256 hash separated by a space. This makes it easier to manage large
示例#27
0
import numpy as np
import pandas as pd
import pooch

try:
    import cartopy.crs as ccrs
    from cartopy.mpl.ticker import LongitudeFormatter, LatitudeFormatter
except ImportError:
    pass

from ..version import full_version

REGISTRY = pooch.create(
    path=pooch.os_cache("verde"),
    base_url="https://github.com/fatiando/verde/raw/{version}/data/",
    version=full_version,
    version_dev="master",
    env="VERDE_DATA_DIR",
)
with pkg_resources.resource_stream("verde.datasets",
                                   "registry.txt") as registry_file:
    REGISTRY.load_registry(registry_file)


def locate():
    r"""
    The absolute path to the sample data storage location on disk.

    This is where the data are saved on your computer. The location is
    dependent on the operating system. The folder locations are defined by the
    ``appdirs``  package (see the `appdirs documentation
示例#28
0
"""
Functions to load sample data
"""

import os
from pathlib import Path

import pkg_resources
import pooch

DATASETS = pooch.create(
    path=['~', '.pop_tools', 'data'],
    version_dev='master',
    base_url='ftp://ftp.cgd.ucar.edu/archive/aletheia-data/cesm-data/ocn/',
    env='POP_TOOLS_DATA_DIR',
)
DATASETS.load_registry(
    pkg_resources.resource_stream('pop_tools', 'data_registry.txt'))


class UnzipZarr(pooch.processors.Unzip):
    """
    Processor that unpacks a zarr store zip archive and
    returns the zarr store path.
    """
    def __call__(self, fname, action, pooch):
        """
        Extract all files from the given archive.
        Parameters
        ----------
        fname : str
示例#29
0
    def __init__(
        self,
        cache_path=None,
        registry=None,
        project_dir=None,
        check_hash=True,
        show_progress=True,
    ):
        """Class to download FARS data from the NHTSA FTP repository.

        Note that on first run, this will take a long time to fully download the data, as the repository is large.
        Expect first run to take 5-10+ minutes, depending on your setup.

        Parameters
        ----------
        cache_path: `os.path` or path-like, or str, optional
            The path to save the downloaded FARS files to.
            Default is `pooch.os_cache("fars")`, the default cache path as defined by the OS. See `pooch` and
            `appdirs` documentations.
            If `str`, and `project_dir` is not `None`, files will be downloaded to `project_dir/cache_path`
        registry:
            Path to registry file. Defaults to path for packaged `registry.txt` file. Override at your own risk.
        project_dir:
            Top level directory for your current project. If a path is provided, and `cache_path` is left as default,
            files will be downloaded to `project_dir/data/fars`. If `cache_path` is not the default, files will be
            downloaded to `project_dir/cache_path`.
        check_hash: bool
            Flag to enforce pooch download behavior. Defaults to True. When False, force download of FARS resources
            regardless of hash mismatch against the local registry version. Useful for when the FARS
            database is updated before the registry can be modified. Should normally be left to default (False).
        show_progress: bool
            Use pooch built-in feature to show progress bars during download. Default True.
        """
        if project_dir:
            self.project_dir = project_dir
            if cache_path:
                self.cache_path = Path(project_dir) / cache_path
            else:
                self.cache_path = Path(project_dir) / "data" / "fars"
            self.project_dir.mkdir(parents=True, exist_ok=True)
            self.cache_path.mkdir(parents=True, exist_ok=True)
        else:
            self.project_dir = None
            if cache_path:
                self.cache_path = Path(cache_path)
                self.cache_path.mkdir(parents=True, exist_ok=True)
            else:
                self.cache_path = pooch.os_cache("fars")

        if registry:
            self.registry = Path(registry)
        else:
            self.registry = os.path.join(os.path.dirname(__file__),
                                         "registry.txt")

        self.check_hash = check_hash
        self.show_progress = show_progress

        self.GOODBOY = pooch.create(
            path=self.cache_path,
            base_url="https://www.nhtsa.gov/filebrowser/download/",
            registry=None,
            allow_updates=self.check_hash,
        )

        self.GOODBOY.load_registry(self.registry)
示例#30
0
BASE_URL = 'ftp://ftp.cgd.ucar.edu/archive/aletheia-data'
TUTORIAL_DATA_URL = f'{BASE_URL}/tutorial-data'
URLS = {
    'thetao_Omon_historical_GISS-E2-1-G_r1i1p1f1_gn_185001-185512.nc': f'{TUTORIAL_DATA_URL}/thetao_Omon_historical_GISS-E2-1-G_r1i1p1f1_gn_185001-185512.nc',
    'woa2013v2-O2-thermocline-ann.nc': f'{TUTORIAL_DATA_URL}/woa2013v2-O2-thermocline-ann.nc',
    'NOAA_NCDC_ERSST_v3b_SST.nc': f'{TUTORIAL_DATA_URL}/NOAA_NCDC_ERSST_v3b_SST.nc',
    'sst_indices.csv': f'{TUTORIAL_DATA_URL}/sst_indices.csv',
    'air_temperature.nc': f'{TUTORIAL_DATA_URL}/air_temperature.nc',
    'rasm.nc': f'{TUTORIAL_DATA_URL}/rasm.nc',
    'co2.nc': f'{TUTORIAL_DATA_URL}/co2.nc',
    'moc.nc': f'{TUTORIAL_DATA_URL}/moc.nc',
    'aviso_madt_2015.tar.gz': f'{TUTORIAL_DATA_URL}/aviso_madt_2015.tar.gz',
    'NARR_19930313_0000.nc': f'{TUTORIAL_DATA_URL}/NARR_19930313_0000.nc',
    'MPAS.nc': f'{TUTORIAL_DATA_URL}/MPAS.nc',
    'Oklahoma.static.nc': f'{TUTORIAL_DATA_URL}/Oklahoma.static.nc',
    'uas.rcp85.CanESM2.CRCM5-UQAM.day.NAM-44i.raw.Colorado.nc': f'{TUTORIAL_DATA_URL}/uas.rcp85.CanESM2.CRCM5-UQAM.day.NAM-44i.raw.Colorado.nc',
    'uas.hist.CanESM2.CRCM5-UQAM.day.NAM-44i.raw.Colorado.nc': f'{TUTORIAL_DATA_URL}/uas.hist.CanESM2.CRCM5-UQAM.day.NAM-44i.raw.Colorado.nc',
    'uas.gridMET.NAM-44i.Colorado.nc': f'{TUTORIAL_DATA_URL}/uas.gridMET.NAM-44i.Colorado.nc',
    'T2_RR_F_2014_08.nc': f'{TUTORIAL_DATA_URL}/T2_RR_F_2014_08.nc',
    'wrfinput_d02': f'{TUTORIAL_DATA_URL}/wrfinput_d02'
}
DATASETS = pooch.create(
    path=data_cache_map[hostname],
    version_dev='master',
    base_url='ftp://ftp.cgd.ucar.edu/archive/aletheia-data',
    urls=URLS,
)

DATASETS.load_registry(os.path.join(os.path.dirname(__file__), 'registry.txt'))
示例#31
0
# Pooch expects a `+` to exist in development versions.
# Since scikit-image doesn't follow that convention, we have to manually
# remove `.dev` with a `+` if it exists.
# This helps pooch understand that it should look in master
# to find the required files
pooch_version = __version__.replace('.dev', '+')
url = "https://github.com/scikit-image/scikit-image/raw/{version}/skimage/"

# Create a new friend to manage your sample data storage
image_fetcher = pooch.create(
    # Pooch uses appdirs to select an appropriate directory for the cache
    # on each platform.
    # https://github.com/ActiveState/appdirs
    # On linux this converges to
    # '$HOME/.cache/scikit-image'
    # With a version qualifier
    path=pooch.os_cache("scikit-image"),
    base_url=url,
    version=pooch_version,
    env="SKIMAGE_DATADIR",
    registry=registry,
    urls=registry_urls,
)

data_dir = osp.join(str(image_fetcher.abspath), 'data')

os.makedirs(data_dir, exist_ok=True)
shutil.copy2(osp.join(skimage_distribution_dir, 'data', 'README.txt'),
             osp.join(data_dir, 'README.txt'))


def _has_hash(path, expected_hash):