def _create_image_fetcher(): try: import pooch except ImportError: # Without pooch, fallback on the standard data directory # which for now, includes a few limited data samples return None, legacy_data_dir pooch_version = __version__.replace(".dev", "+") url = "https://github.com/histolab/histolab/raw/{version}/histolab/" # Create a new friend to manage your sample data storage image_fetcher = pooch.create( # Pooch uses appdirs to select an appropriate directory for the cache # on each platform. # https://github.com/ActiveState/appdirs # On linux this converges to # '$HOME/.cache/histolab-image' # With a version qualifier path=pooch.os_cache("histolab-images"), base_url=url, version=pooch_version, env="HISTOLAB_DATADIR", registry=registry, urls=registry_urls, ) data_dir = os.path.join(str(image_fetcher.abspath), "data") return image_fetcher, data_dir
def data_path(self, subject, path=None, force_update=False, update_path=None, verbose=None): if subject not in self.subject_list: raise (ValueError("Invalid subject number")) sub = "{:02d}".format(subject) sign = self.code.split()[1] key = "MNE_DATASETS_{:s}_PATH".format(sign) key_dest = "MNE-{:s}-data".format(sign.lower()) if get_config(key) is None: set_config(key, osp.join(osp.expanduser("~"), "mne_data")) path = osp.join(_get_path(None, key, sign), key_dest) filelist = fs_get_file_list(self.figshare_id) reg = fs_get_file_hash(filelist) fsn = fs_get_file_id(filelist) gb = pooch.create(path=path, base_url=MAMEM_URL, registry=reg) spath = [] for f in fsn.keys(): if f[2:4] == sub: spath.append(gb.fetch(fsn[f])) # _do_path_update(path, update_path, key, sign) return spath
def create_image_fetcher(): try: import pooch except ImportError: # Without pooch, fallback on the standard data directory # which for now, includes a few limited data samples return None, legacy_data_dir # Pooch expects a `+` to exist in development versions. # Since scikit-image doesn't follow that convention, we have to manually # remove `.dev` with a `+` if it exists. # This helps pooch understand that it should look in master # to find the required files pooch_version = __version__.replace('.dev', '+') url = "https://github.com/scikit-image/scikit-image/raw/{version}/skimage/" # Create a new friend to manage your sample data storage image_fetcher = pooch.create( # Pooch uses appdirs to select an appropriate directory for the cache # on each platform. # https://github.com/ActiveState/appdirs # On linux this converges to # '$HOME/.cache/scikit-image' # With a version qualifier path=pooch.os_cache("scikit-image"), base_url=url, version=pooch_version, env="SKIMAGE_DATADIR", registry=registry, urls=registry_urls, ) data_dir = osp.join(str(image_fetcher.abspath), 'data') return image_fetcher, data_dir
def retrieve(cache_key, pooch_kwargs=None): """Download and cache the file associated with the provided ``cache_key``. Cache location is controlled by the config ``data_dir`` key. See :ref:`data_dir_setting` for more information. Args: cache_key (str): Cache key returned by :func:`~satpy.aux_download.register_file`. pooch_kwargs (dict or None): Extra keyword arguments to pass to :meth:`pooch.Pooch.fetch`. Returns: Local path of the cached file. """ pooch_kwargs = pooch_kwargs or {} path = satpy.config.get('data_dir') if not satpy.config.get('download_aux'): return _retrieve_offline(path, cache_key) if not _should_download(cache_key): raise RuntimeError("Auxiliary data download is not allowed during " "tests. Mock the appropriate components of your " "tests to not need the 'retrieve' function.") # reuse data directory as the default URL where files can be downloaded from pooch_obj = pooch.create(path, path, registry=_FILE_REGISTRY, urls=_FILE_URLS) return pooch_obj.fetch(cache_key, **pooch_kwargs)
def create_image_fetcher(): try: import pooch # older versions of Pooch don't have a __version__ attribute if not hasattr(pooch, '__version__'): retry = {} else: pooch_version = pooch.__version__.lstrip('v') retry = {'retry_if_failed': 3} # Keep version check in synch with # scikit-image/requirements/optional.txt if version.parse(pooch_version) < version.parse('1.3.0'): # we need a more recent version of pooch to retry retry = {} except ImportError: # Without pooch, fallback on the standard data directory # which for now, includes a few limited data samples return None, legacy_data_dir # Pooch expects a `+` to exist in development versions. # Since scikit-image doesn't follow that convention, we have to manually # remove `.dev` with a `+` if it exists. # This helps pooch understand that it should look in master # to find the required files if '+git' in __version__: skimage_version_for_pooch = __version__.replace('.dev0+git', '+git') else: skimage_version_for_pooch = __version__.replace('.dev', '+') if '+' in skimage_version_for_pooch: url = ("https://github.com/scikit-image/scikit-image/raw/" "{version}/skimage/") else: url = ("https://github.com/scikit-image/scikit-image/raw/" "v{version}/skimage/") # Create a new friend to manage your sample data storage image_fetcher = pooch.create( # Pooch uses appdirs to select an appropriate directory for the cache # on each platform. # https://github.com/ActiveState/appdirs # On linux this converges to # '$HOME/.cache/scikit-image' # With a version qualifier path=pooch.os_cache("scikit-image"), base_url=url, version=skimage_version_for_pooch, version_dev="main", env="SKIMAGE_DATADIR", registry=registry, urls=registry_urls, # Note: this should read `retry_if_failed=3,`, but we generate that # dynamically at import time above, in case installed pooch is a less # recent version **retry, ) data_dir = osp.join(str(image_fetcher.abspath), 'data') return image_fetcher, data_dir
def _retrieve_all_with_pooch(pooch_kwargs): if pooch_kwargs is None: pooch_kwargs = {} path = satpy.config.get('data_dir') pooch_obj = pooch.create(path, path, registry=_FILE_REGISTRY, urls=_FILE_URLS) for fname in _FILE_REGISTRY: logger.info("Downloading extra data file '%s'...", fname) pooch_obj.fetch(fname, **pooch_kwargs)
def _make_pooch(): import pooch return pooch.create( path=pooch.os_cache('ess/amor'), env='ESS_AMOR_DATA_DIR', base_url='https://public.esss.dk/groups/scipp/ess/amor/{version}/', version=_version, registry={ "reference.nxs": "md5:56d493c8051e1c5c86fb7a95f8ec643b", "sample.nxs": "md5:4e07ccc87b5c6549e190bc372c298e83" })
def test_pooch(): goodboy = pooch.create( # Use the default cache folder for the OS path=pooch.os_cache("plumbus"), # The remote data is on Github base_url="https://raw.githubusercontent.com/cgre-aachen/gempy_data/master/data" "/gempy_models/Tutorial_ch1-8_Onlap_relations/", # If this is a development version, get the data from the master branch version_dev="master", # We'll load it from a file later registry={ "Tutorial_ch1-8_Onlap_relations_faults.csv": "19uheidhlkjdwhoiwuhc0uhcwljchw9ochwochw89dcgw9dcgwc" }, ) print(goodboy)
def create_pooch(base_url, files, target): """ Create POOCH class to fetch files from a website Args: base_url: Base URL for the remote data source. files: A record of the files that are managed by this Pooch. target: The path to the local data storage folder Returns: POOCH class """ pc = pooch.create( base_url=base_url, path=target, registry={ i: None for i in files }) # None because the Hash is always changing.. Sciebo problem? logger.info("Pooch created for url: %s" % base_url) return pc
def __init__(self, cache_dir=None): import json import pooch import pkg_resources if cache_dir is None: if os.path.isdir(soxs_cfg.get("soxs", "soxs_data_dir")): cache_dir = soxs_cfg.get("soxs", "soxs_data_dir") else: cache_dir = pooch.os_cache("soxs") self._registry = json.load( pkg_resources.resource_stream("soxs", "file_hash_registry.json")) self.pooch_obj = pooch.create( path=cache_dir, registry=self._registry, env="SOXS_DATA_DIR", base_url="https://hea-www.cfa.harvard.edu/soxs/soxs_responses/" ) self.dl = pooch.HTTPDownloader(progressbar=True)
def _make_pooch(): import pooch return pooch.create( path=pooch.os_cache('scippneutron'), env='SCIPPNEUTRON_DATA_DIR', retry_if_failed=3, base_url='https://public.esss.dk/groups/scipp/scippneutron/{version}/', version=_version, registry={ 'iris26176_graphite002_sqw.nxs': 'md5:7ea63f9137602b7e9b604fe30f0c6ec2', 'loki-at-larmor.hdf5': 'md5:7fc48639bb2c409adb9a1dedd53d1c83', 'powder-event.h5': 'md5:f92ca1da4c2d42c3d8a5d1b1d79aa5a4', 'mcstas_sans.h5': 'md5:a608255dd757905490882eb35e209d12', 'CNCS_51936_event.nxs': 'md5:5ba401e489260a44374b5be12b780911', 'GEM40979.raw': 'md5:6df0f1c2fc472af200eec43762e9a874', 'PG3_4844_calibration.h5': 'md5:290f5108aa9ff0b1c5a2ac8dc2c1cb15', 'PG3_4844_event.nxs': 'md5:d5ae38871d0a09a28ae01f85d969de1e', 'PG3_4866_event.nxs': 'md5:3d543bc6a646e622b3f4542bc3435e7e', 'PG3_4871_event.nxs': 'md5:a3d0edcb36ab8e9e3342cd8a4440b779', 'WISH00016748.raw': 'md5:37ecc6f99662b57e405ed967bdc068af', })
def data_path(self, subject, path=None, force_update=False, update_path=None, verbose=None): if subject not in self.subject_list: raise (ValueError("Invalid subject number")) sub = "{:02d}".format(subject) sign = self.code.split()[1] key_dest = "MNE-{:s}-data".format(sign.lower()) path = osp.join(get_dataset_path(sign, path), key_dest) filelist = fs_get_file_list(self.figshare_id) reg = fs_get_file_hash(filelist) fsn = fs_get_file_id(filelist) gb = pooch.create(path=path, base_url=MAMEM_URL, registry=reg) spath = [] for f in fsn.keys(): if f[2:4] == sub: spath.append(gb.fetch(fsn[f])) return spath
# This code is part of the Fatiando a Terra project (https://www.fatiando.org) # """ Functions to load sample datasets used in the Harmonica docs. """ import pandas as pd import pkg_resources import pooch import xarray as xr from .._version import __version__ as version REGISTRY = pooch.create( path=pooch.os_cache("harmonica"), base_url="https://github.com/fatiando/harmonica/raw/{version}/data/", version=version, version_dev="main", env="HARMONICA_DATA_DIR", ) with pkg_resources.resource_stream("harmonica.datasets", "registry.txt") as registry_file: REGISTRY.load_registry(registry_file) def locate(): r""" The absolute path to the sample data storage location on disk. This is where the data are saved on your computer. The location is dependent on the operating system. The folder locations are defined by the ``appdirs`` package (see the `appdirs documentation
from kikuchipy.signals import EBSD, EBSDMasterPattern from kikuchipy import load from kikuchipy.release import version from kikuchipy.data._registry import registry, registry_urls __all__ = [ "nickel_ebsd_small", "nickel_ebsd_large", "nickel_ebsd_master_pattern_small", ] fetcher = ppooch.create( path=ppooch.os_cache("kikuchipy"), base_url="", version=version.replace(".dev", "+"), env="KIKUCHIPY_DATA_DIR", registry=registry, urls=registry_urls, ) cache_data_path = fetcher.path.joinpath("data") package_data_path = Path(os.path.abspath(os.path.dirname(__file__))) def _has_hash(path, expected_hash): """Check if the provided path has the expected hash.""" if not os.path.exists(path): return False else: return ppooch.utils.file_hash(path) == expected_hash
import pooch # Going to set this up a bit later... from . import __version__ _registry = { "fullSoy_2-12a.ply": "e12f192188058851289f0531dc456c6df31b562405b77e382e0f9e4b1c899108" } PLANTS = pooch.create( path=pooch.os_cache("hothouse"), base_url="https://github.com/MatthewTurk/hothouse/raw/{version}/data/", version=__version__, version_dev="master", env="HOTHOUSE_DATA_DIR", registry=_registry, )
import logging from pathlib import Path import pooch from ._env import BGEN_CACHE_HOME __all__ = ["get"] pooch.get_logger().setLevel(logging.ERROR) goodboy = pooch.create( path=BGEN_CACHE_HOME / "test_data", base_url="https://bgen-examples.s3.amazonaws.com/", registry={ "complex.23bits.no.samples.bgen": "25d30a4e489da1aeb05f9893af98e8bf3b09d74db2982bf1828f8c8565886fc6", "haplotypes.bgen": "84e0b59efcc83c7c305cf5446e5dc26b49b15aeb4157a9eb36451376ce3efe4c", "haplotypes.bgen.metadata.corrupted": "8f55628770c1ae8155c1ced2463f15df80d32bc272a470bb1d6b68225e1604b1", "wrong.metadata": "f746345605150076f3234fbeea7c52e86bf95c9329b2f08e1e3e92a7918b98fb", "merged_487400x220000.bgen": "8dccd89a53e048ea24305cb04c3653b0eb2af265b04d05a60cfa2cca5fb7ae94", "merged_487400x2420000.bgen": "81aecfab787bee1cb7f1d0d21f2465c581a4db78011d8b0f0f73c868e17ec888", "merged_487400x4840000.bgen": "5ef82f92a001615c93bbb317a9fd2329272370c6d481405d4f8f0a2b7fddf68b", }, ) def get(filename: str) -> Path: return Path(goodboy.fetch(filename))
downloader = pooch.HTTPDownloader(auth=auth, progressbar=True) try: login = requests.get(url) downloader(login.url, output_file, dataset) except requests.exceptions.HTTPError as error: if 'Unauthorized' in str(error): pooch.get_logger().error('Wrong username/password!') self._username = None self._password = None raise error _earthdata_downloader = EarthDataDownloader() nsidc_data = pooch.create(path=pooch.os_cache('icepack'), base_url='', registry=None) registry_file = pkg_resources.resource_stream('icepack', 'registry.txt') nsidc_data.load_registry(registry_file) def fetch_measures_antarctica(): r"""Fetch the MEaSUREs Antarctic velocity map""" return nsidc_data.fetch('antarctic_ice_vel_phase_map_v01.nc', downloader=_earthdata_downloader) def fetch_measures_greenland(): r"""Fetch the MEaSUREs Greenland velocity map""" return [
import logging from pathlib import Path import pooch __all__ = ["example_filepath"] pooch.get_logger().setLevel(logging.ERROR) goodboy = pooch.create( path=pooch.os_cache("hmmer"), base_url="https://hmmer-py.s3.eu-west-2.amazonaws.com/", registry={ "Pfam-A_24.hmm.gz": "32791a1b50837cbe1fca1376a3e1c45bc84b32dd4fe28c92fd276f3f2c3a15e3" }, ) def example_filepath(filename: str) -> Path: return Path(goodboy.fetch(filename + ".gz", processor=pooch.Decompress()))
try: import cartopy.feature as cfeature import cartopy.crs as ccrs from cartopy.mpl.ticker import LongitudeFormatter, LatitudeFormatter except ImportError: pass from ..version import full_version # Otherwise, DeprecationWarning won't be shown, kind of defeating the purpose. warnings.simplefilter("default") POOCH = pooch.create( path=["~", ".verde", "data"], base_url="https://github.com/fatiando/verde/raw/{version}/data/", version=full_version, version_dev="master", env="VERDE_DATA_DIR", ) POOCH.load_registry(os.path.join(os.path.dirname(__file__), "registry.txt")) def _setup_map(ax, xticks, yticks, crs, region, land=None, ocean=None, borders=None, states=None):
import tempfile from pathlib import Path from typing import Union import pooch """ Load sample data. """ POOCH = pooch.create( # Use the default cache folder for the OS path=pooch.os_cache("bed_reader"), # The remote data is on Github base_url="https://raw.githubusercontent.com/" + "fastlmm/bed-reader/master/bed_reader/tests/data/", # If this is a development version, get the data from the master branch version_dev="master", # The registry specifies the files that can be fetched env="BED_READER_DATA_DIR", ) # Get registry file from package_data registry_file = Path(__file__).parent / "tests/registry.txt" # Load this registry file POOCH.load_registry(registry_file) def sample_file(filepath: Union[str, Path]) -> str: """ Retrieve a sample .bed file. (Also retrieves associated .fam and .bim files).
try: string_type = basestring except NameError: string_type = str # TODO: This can go away when we remove Python 2 def is_string_like(s): """Check if an object is a string.""" return isinstance(s, string_type) POOCH = pooch.create( path=pooch.os_cache('metpy'), base_url='https://github.com/Unidata/MetPy/raw/{version}/staticdata/', version='v' + __version__, version_dev='master', env='TEST_DATA_DIR') # Check if we're running from a git clone and if so, bash the path attribute with the path # to git's local data store (un-versioned) # Look for the staticdata directory (i.e. this is a git checkout) if os.path.exists(os.path.join(os.path.dirname(__file__), '..', 'staticdata')): POOCH.path = os.path.join(os.path.dirname(__file__), '..', 'staticdata') POOCH.load_registry(os.path.join(os.path.dirname(__file__), 'static-data-manifest.txt')) def get_test_data(fname, as_file_obj=True): """Access a file from MetPy's collection of test data.""" path = POOCH.fetch(fname)
from .exceptions import ParameterError __all__ = [ "find_files", "example", "ex", "list_examples", "example_info", ] # Instantiate the pooch __data_path = os.environ.get("LIBROSA_DATA_DIR", pooch.os_cache("librosa")) __GOODBOY = pooch.create( __data_path, base_url="https://librosa.org/data/audio/", registry=None ) __GOODBOY.load_registry( resource_filename(__name__, str(Path("example_data") / "registry.txt")) ) with open( resource_filename(__name__, str(Path("example_data") / "index.json")), "r" ) as fdesc: __TRACKMAP = json.load(fdesc) def example(key, *, hq=False): """Retrieve the example recording identified by 'key'.
def load_data(subject, runs, path=None, force_update=False, update_path=None, base_url=EEGMI_URL, verbose=None): # noqa: D301 """Get paths to local copies of EEGBCI dataset files. This will fetch data for the EEGBCI dataset :footcite:`SchalkEtAl2004`, which is also available at PhysioNet :footcite:`GoldbergerEtAl2000`. Parameters ---------- subject : int The subject to use. Can be in the range of 1-109 (inclusive). runs : int | list of int The runs to use. See Notes for details. path : None | str Location of where to look for the EEGBCI data storing location. If None, the environment variable or config parameter ``MNE_DATASETS_EEGBCI_PATH`` is used. If it doesn't exist, the "~/mne_data" directory is used. If the EEGBCI dataset is not found under the given path, the data will be automatically downloaded to the specified folder. force_update : bool Force update of the dataset even if a local copy exists. update_path : bool | None If True, set the MNE_DATASETS_EEGBCI_PATH in mne-python config to the given path. If None, the user is prompted. base_url : str The URL root for the data. %(verbose)s Returns ------- paths : list List of local data paths of the given type. Notes ----- The run numbers correspond to: ========= =================================== run task ========= =================================== 1 Baseline, eyes open 2 Baseline, eyes closed 3, 7, 11 Motor execution: left vs right hand 4, 8, 12 Motor imagery: left vs right hand 5, 9, 13 Motor execution: hands vs feet 6, 10, 14 Motor imagery: hands vs feet ========= =================================== For example, one could do:: >>> from mne.datasets import eegbci >>> eegbci.load_data(1, [4, 10, 14], os.getenv('HOME') + '/datasets') # doctest:+SKIP This would download runs 4, 10, and 14 (hand/foot motor imagery) runs from subject 1 in the EEGBCI dataset to the 'datasets' folder, and prompt the user to save the 'datasets' path to the mne-python config, if it isn't there already. References ---------- .. footbibliography:: """ # noqa: E501 import pooch if not hasattr(runs, '__iter__'): runs = [runs] # get local storage path config_key = 'MNE_DATASETS_EEGBCI_PATH' folder = 'MNE-eegbci-data' name = 'EEGBCI' path = _get_path(path, config_key, name) # extract path parts pattern = r'(?:https?://.*)(files)/(eegmmidb)/(\d+\.\d+\.\d+)/?' match = re.compile(pattern).match(base_url) if match is None: raise ValueError('base_url does not match the expected EEGMI folder ' 'structure. Please notify MNE-Python developers.') base_path = op.join(path, folder, *match.groups()) # create the download manager fetcher = pooch.create( path=base_path, base_url=base_url, version=None, # Data versioning is decoupled from MNE-Python version. registry=None, # Registry is loaded from file, below. retry_if_failed=2 # 2 retries = 3 total attempts ) # load the checksum registry registry = pkg_resources.resource_stream( 'mne', op.join('data', 'eegbci_checksums.txt')) fetcher.load_registry(registry) # fetch the file(s) data_paths = [] for run in runs: file_part = f'S{subject:03d}/S{subject:03d}R{run:02d}.edf' destination = op.join(base_path, file_part) if force_update and op.isfile(destination): os.remove(destination) data_paths.append(fetcher.fetch(file_part)) # update path in config if desired _do_path_update(path, update_path, config_key, name) return data_paths
# Copyright (c) 2008,2015,2018,2019 MetPy Developers. # Distributed under the terms of the BSD 3-Clause License. # SPDX-License-Identifier: BSD-3-Clause """Collection of generally useful utility code from the cookbook.""" import os from pathlib import Path import numpy as np import pooch from . import __version__ POOCH = pooch.create( path=pooch.os_cache('metpy'), base_url='https://github.com/Unidata/MetPy/raw/{version}/staticdata/', version='v' + __version__, version_dev='main') # Check if we have the data available directly from a git checkout, either from the # TEST_DATA_DIR variable, or looking relative to the path of this module's file. Use this # to override Pooch's path and disable downloading from GitHub. dev_data_path = os.environ.get('TEST_DATA_DIR', Path(__file__).parents[2] / 'staticdata') if Path(dev_data_path).exists(): POOCH.path = dev_data_path POOCH.base_url = 'NODOWNLOAD:' POOCH.load_registry(Path(__file__).parent / 'static-data-manifest.txt')
from numba import jit, prange try: from tqdm import tqdm except ImportError: tqdm = None INPUTDATA_DIR = ['~', '.pop_tools'] # On Cheyenne/Casper and/or CGD machines, use local inputdata directory # See: https://github.com/NCAR/pop-tools/issues/24#issue-523701065 INPUTDATA = pooch.create( # This is still the default in case the environment variable isn't defined path=INPUTDATA_DIR, version_dev='master', base_url='https://svn-ccsm-inputdata.cgd.ucar.edu/trunk/', # The name of the environment variable that can overwrite the path argument env='CESMDATAROOT', ) INPUTDATA.load_registry( pkg_resources.resource_stream('pop_tools', 'inputdata_registry.txt')) if tqdm is not None: downloader = pooch.HTTPDownloader(progressbar=True, verify=False, allow_redirects=True) else: downloader = pooch.HTTPDownloader(verify=False, allow_redirects=True) grid_def_file = pkg_resources.resource_filename('pop_tools',
# Create a new friend to manage your sample data storage GOODBOY = pooch.create( # Folder where the data will be stored. For a sensible default, use the default # cache folder for your OS. # path=pooch.os_cache("mypackage_test"), # path=pooch.os_cache("mypackage_test"), path=data_path, # Base URL of the remote data store. Will call .format on this string to insert # https://github.com/JustinGOSSES/predictatops/ # the version (see below). https://github.com/JustinGOSSES/MannvilleGroup_Strat_Hackathon/tree/master/SPE_006_originalData #base_url="https://github.com/JustinGOSSES/predictatops/raw/{version}/demo/mannville_demo_data/", base_url="https://github.com/JustinGOSSES/predictatops/raw/{version}/demo/", # Pooches are versioned so that you can use multiple versions of a package # simultaneously. Use PEP440 compliant version number. The version will be # appended to the path. #version="v0.0.0-alpha", version="v0.0.3-alpha", # If a version as a "+XX.XXXXX" suffix, we'll assume that this is a dev version # and replace the version with this string. version_dev="master", # An environment variable that overwrites the path. env=data_path, # The cache file registry. A dictionary with all files managed by this pooch. # Keys are the file names (relative to *base_url*) and values are their # respective SHA256 hashes. Files will be downloaded automatically when needed # (see fetch_gravity_data). 1414057d0c5235b0ed13103c72c864ddfd34a0c8 # registry={"OilSandsDB/LITHOLOGY_DIC.TXT": "83f3be338d6fa42eeadf60466c716e4370fe8723682c187d214a054bd695880a"} ) # You can also load the registry from a file. Each line contains a file name and # it's sha256 hash separated by a space. This makes it easier to manage large
import numpy as np import pandas as pd import pooch try: import cartopy.crs as ccrs from cartopy.mpl.ticker import LongitudeFormatter, LatitudeFormatter except ImportError: pass from ..version import full_version REGISTRY = pooch.create( path=pooch.os_cache("verde"), base_url="https://github.com/fatiando/verde/raw/{version}/data/", version=full_version, version_dev="master", env="VERDE_DATA_DIR", ) with pkg_resources.resource_stream("verde.datasets", "registry.txt") as registry_file: REGISTRY.load_registry(registry_file) def locate(): r""" The absolute path to the sample data storage location on disk. This is where the data are saved on your computer. The location is dependent on the operating system. The folder locations are defined by the ``appdirs`` package (see the `appdirs documentation
""" Functions to load sample data """ import os from pathlib import Path import pkg_resources import pooch DATASETS = pooch.create( path=['~', '.pop_tools', 'data'], version_dev='master', base_url='ftp://ftp.cgd.ucar.edu/archive/aletheia-data/cesm-data/ocn/', env='POP_TOOLS_DATA_DIR', ) DATASETS.load_registry( pkg_resources.resource_stream('pop_tools', 'data_registry.txt')) class UnzipZarr(pooch.processors.Unzip): """ Processor that unpacks a zarr store zip archive and returns the zarr store path. """ def __call__(self, fname, action, pooch): """ Extract all files from the given archive. Parameters ---------- fname : str
def __init__( self, cache_path=None, registry=None, project_dir=None, check_hash=True, show_progress=True, ): """Class to download FARS data from the NHTSA FTP repository. Note that on first run, this will take a long time to fully download the data, as the repository is large. Expect first run to take 5-10+ minutes, depending on your setup. Parameters ---------- cache_path: `os.path` or path-like, or str, optional The path to save the downloaded FARS files to. Default is `pooch.os_cache("fars")`, the default cache path as defined by the OS. See `pooch` and `appdirs` documentations. If `str`, and `project_dir` is not `None`, files will be downloaded to `project_dir/cache_path` registry: Path to registry file. Defaults to path for packaged `registry.txt` file. Override at your own risk. project_dir: Top level directory for your current project. If a path is provided, and `cache_path` is left as default, files will be downloaded to `project_dir/data/fars`. If `cache_path` is not the default, files will be downloaded to `project_dir/cache_path`. check_hash: bool Flag to enforce pooch download behavior. Defaults to True. When False, force download of FARS resources regardless of hash mismatch against the local registry version. Useful for when the FARS database is updated before the registry can be modified. Should normally be left to default (False). show_progress: bool Use pooch built-in feature to show progress bars during download. Default True. """ if project_dir: self.project_dir = project_dir if cache_path: self.cache_path = Path(project_dir) / cache_path else: self.cache_path = Path(project_dir) / "data" / "fars" self.project_dir.mkdir(parents=True, exist_ok=True) self.cache_path.mkdir(parents=True, exist_ok=True) else: self.project_dir = None if cache_path: self.cache_path = Path(cache_path) self.cache_path.mkdir(parents=True, exist_ok=True) else: self.cache_path = pooch.os_cache("fars") if registry: self.registry = Path(registry) else: self.registry = os.path.join(os.path.dirname(__file__), "registry.txt") self.check_hash = check_hash self.show_progress = show_progress self.GOODBOY = pooch.create( path=self.cache_path, base_url="https://www.nhtsa.gov/filebrowser/download/", registry=None, allow_updates=self.check_hash, ) self.GOODBOY.load_registry(self.registry)
BASE_URL = 'ftp://ftp.cgd.ucar.edu/archive/aletheia-data' TUTORIAL_DATA_URL = f'{BASE_URL}/tutorial-data' URLS = { 'thetao_Omon_historical_GISS-E2-1-G_r1i1p1f1_gn_185001-185512.nc': f'{TUTORIAL_DATA_URL}/thetao_Omon_historical_GISS-E2-1-G_r1i1p1f1_gn_185001-185512.nc', 'woa2013v2-O2-thermocline-ann.nc': f'{TUTORIAL_DATA_URL}/woa2013v2-O2-thermocline-ann.nc', 'NOAA_NCDC_ERSST_v3b_SST.nc': f'{TUTORIAL_DATA_URL}/NOAA_NCDC_ERSST_v3b_SST.nc', 'sst_indices.csv': f'{TUTORIAL_DATA_URL}/sst_indices.csv', 'air_temperature.nc': f'{TUTORIAL_DATA_URL}/air_temperature.nc', 'rasm.nc': f'{TUTORIAL_DATA_URL}/rasm.nc', 'co2.nc': f'{TUTORIAL_DATA_URL}/co2.nc', 'moc.nc': f'{TUTORIAL_DATA_URL}/moc.nc', 'aviso_madt_2015.tar.gz': f'{TUTORIAL_DATA_URL}/aviso_madt_2015.tar.gz', 'NARR_19930313_0000.nc': f'{TUTORIAL_DATA_URL}/NARR_19930313_0000.nc', 'MPAS.nc': f'{TUTORIAL_DATA_URL}/MPAS.nc', 'Oklahoma.static.nc': f'{TUTORIAL_DATA_URL}/Oklahoma.static.nc', 'uas.rcp85.CanESM2.CRCM5-UQAM.day.NAM-44i.raw.Colorado.nc': f'{TUTORIAL_DATA_URL}/uas.rcp85.CanESM2.CRCM5-UQAM.day.NAM-44i.raw.Colorado.nc', 'uas.hist.CanESM2.CRCM5-UQAM.day.NAM-44i.raw.Colorado.nc': f'{TUTORIAL_DATA_URL}/uas.hist.CanESM2.CRCM5-UQAM.day.NAM-44i.raw.Colorado.nc', 'uas.gridMET.NAM-44i.Colorado.nc': f'{TUTORIAL_DATA_URL}/uas.gridMET.NAM-44i.Colorado.nc', 'T2_RR_F_2014_08.nc': f'{TUTORIAL_DATA_URL}/T2_RR_F_2014_08.nc', 'wrfinput_d02': f'{TUTORIAL_DATA_URL}/wrfinput_d02' } DATASETS = pooch.create( path=data_cache_map[hostname], version_dev='master', base_url='ftp://ftp.cgd.ucar.edu/archive/aletheia-data', urls=URLS, ) DATASETS.load_registry(os.path.join(os.path.dirname(__file__), 'registry.txt'))
# Pooch expects a `+` to exist in development versions. # Since scikit-image doesn't follow that convention, we have to manually # remove `.dev` with a `+` if it exists. # This helps pooch understand that it should look in master # to find the required files pooch_version = __version__.replace('.dev', '+') url = "https://github.com/scikit-image/scikit-image/raw/{version}/skimage/" # Create a new friend to manage your sample data storage image_fetcher = pooch.create( # Pooch uses appdirs to select an appropriate directory for the cache # on each platform. # https://github.com/ActiveState/appdirs # On linux this converges to # '$HOME/.cache/scikit-image' # With a version qualifier path=pooch.os_cache("scikit-image"), base_url=url, version=pooch_version, env="SKIMAGE_DATADIR", registry=registry, urls=registry_urls, ) data_dir = osp.join(str(image_fetcher.abspath), 'data') os.makedirs(data_dir, exist_ok=True) shutil.copy2(osp.join(skimage_distribution_dir, 'data', 'README.txt'), osp.join(data_dir, 'README.txt')) def _has_hash(path, expected_hash):