def _get_golf_path(): unpack = pooch.Unzip() fnames = REGISTRY.fetch('golf.zip', processor=unpack) nc_bool = [os.path.splitext(fname)[1] == '.nc' for fname in fnames] nc_idx = [i for i, b in enumerate(nc_bool) if b] golf_path = fnames[nc_idx[0]] return golf_path
def _get_xslope_path(): unpack = pooch.Unzip() fnames = REGISTRY.fetch('xslope.zip', processor=unpack) nc_bool = [os.path.splitext(fname)[1] == '.nc' for fname in fnames] # nc_idx = [i for i, b in enumerate(nc_bool) if b] fnames_idx = [fnames[i] for i, b in enumerate(nc_bool) if b] fnames_idx.sort() # xslope_path = fnames[nc_idx[0]] xslope_job_003_path = fnames_idx[0] xslope_job_013_path = fnames_idx[1] return xslope_job_003_path, xslope_job_013_path
def fetch_single(self, year): """ Load the FARS data for a given year. """ # fname = f'{year}/National/FARS{year}NationalCSV.zip' fname = f'{year}.zip' if self.GOODBOY.is_available(fname): unpack = pooch.Unzip(extract_dir=f"./{fname[:-4]}.unzip") unzipped = self.GOODBOY.fetch(fname, processor=unpack, progressbar=self.show_progress) else: raise FileNotFoundError( f"{fname}: File could not be found in FARS FTP directory.") return {year: unzipped}
def main(): # importing regions as the base template regions_url = 'https://github.com/RECCAP2-ocean/shared-resources/raw/master/regions/reccap2ocean_regions.nc' regions_fname = pooch.retrieve(regions_url, None) regions = (xr.open_dataset(regions_fname).interp( lon=np.arange(-179.875, 180, .25), lat=np.arange(-89.875, 90, 0.25)).rename(lat='latitude', lon='longitude')) # getting shapefile information shapefile_url = 'http://www.hydrol-earth-syst-sci.net/17/2029/2013/hess-17-2029-2013-supplement.zip' shapefile_name = 'Continental_Shelf' shapefile_flist = pooch.retrieve(shapefile_url, None, processor=pooch.Unzip()) shapefile_path = str( Path([f for f in shapefile_flist if shapefile_name in f][0]).parent) regions = add_shape_coord_from_data_array(regions, shapefile_path, shapefile_name) continental_shelf = regions[shapefile_name] continental_shelf = continental_shelf.coarsen(latitude=4, longitude=4).min() continental_shelf = continental_shelf.to_dataset(name='continental_shelf') continental_shelf.attrs = dict( source='https://www.hydrol-earth-syst-sci.net/17/2029/2013/', publication= 'Laruelle, G. G., Dürr, H. H., Lauerwald, R., Hartmann, J., Slomp, C. P., Goossens, N., and Regnier, P. A. G.: Global multi-scale segmentation of continental and coastal waters from the watersheds to the continental margins, Hydrol. Earth Syst. Sci., 17, 2029–2051, https://doi.org/10.5194/hess-17-2029-2013, 2013.', description='coastal zones as defined from the publication. ', history= 'data downloaded as shapefile (Continental_shelf.shp) from the link provided in `source`. The file was then converted to 1/4deg netCDF and then downscaled to 1deg.' ) encoding = { k: { 'zlib': True, 'complevel': 4 } for k in continental_shelf.data_vars } continental_shelf.to_netcdf( '../reccap2coastal_coscats.nc', encoding=encoding, )
def choose_processor(url): """ chooses the processor to uncompress if required """ known_processors = { pooch.Decompress(): (".gz2", ".gz"), pooch.Untar(): (".tar", ".tgz", ".tar.gz"), pooch.Unzip(): (".zip", ), None: "*", } chosen = None for processor, extensions in known_processors.items(): for ext in extensions: if ext in url.lower(): chosen = processor return chosen
def _download_mne_dataset(name, processor, path, force_update, update_path, download, accept=False): """Aux function for downloading internal MNE datasets.""" import pooch from mne.datasets._fetch import fetch_dataset # import pooch library for handling the dataset downloading dataset_params = MNE_DATASETS[name] dataset_params['dataset_name'] = name config_key = MNE_DATASETS[name]['config_key'] folder_name = MNE_DATASETS[name]['folder_name'] # get download path for specific dataset path = _get_path(path=path, key=config_key, name=name) # instantiate processor that unzips file if processor == 'nested_untar': processor_ = pooch.Untar(extract_dir=op.join(path, folder_name)) elif processor == 'nested_unzip': processor_ = pooch.Unzip(extract_dir=op.join(path, folder_name)) else: processor_ = processor # handle case of multiple sub-datasets with different urls if name == 'visual_92_categories': dataset_params = [] for name in ['visual_92_categories_1', 'visual_92_categories_2']: this_dataset = MNE_DATASETS[name] this_dataset['dataset_name'] = name dataset_params.append(this_dataset) return fetch_dataset(dataset_params=dataset_params, processor=processor_, path=path, force_update=force_update, update_path=update_path, download=download, accept=accept)
def get_jmamlr(): url = 'http://www.data.jma.go.jp/gmd/kaiyou/data/english/co2_flux/grid/{name}' xds = [] for t in pd.date_range('1990-01', '2019', freq='1AS', closed='left'): fname = 'JMA_co2map_{t:%Y}.ZIP'.format(t=t) fname = pooch.retrieve( url.format(t=t, name=fname), None, fname=fname, path='../data-in/JMA-MLR/', processor=pooch.Unzip(), downloader=pooch.HTTPDownloader(progressbar=True))[0] xda = xr.open_dataset(fname, decode_times=False).pCO2s y0, y1 = str(t.year), str(t.year + 1) time = pd.date_range(y0, y1, freq='1MS', closed='left') xda = xda.assign_coords(time=time) xds += xda, xda = (xr.concat(xds, dim='time').assign_coords(lon=(xda.lon - 180) % 360 - 180).sortby('lon')) return xda
def fetch_all(self): """ Download the entire FARS dataset, to cache folder. """ # The file will be downloaded automatically the first time this is run. fnames = self.GOODBOY.registry_files unzipped = {} for fname in fnames: if self.GOODBOY.is_available(fname): if "dict" in fname: self.GOODBOY.fetch(fname, progressbar=self.show_progress) else: unpack = pooch.Unzip(extract_dir=f"./{fname[:-4]}.unzip") unzipped[fname] = self.GOODBOY.fetch( fname, processor=unpack, progressbar=self.show_progress) else: raise FileNotFoundError( "File could not be found in FARS FTP directory.") return unzipped
def data_path(path=None, force_update=False, update_path=True, download=True, verbose=None): # noqa: D103 """ Audio speech and noise dataset with 18 participants. Get path to local copy of data from the article :footcite:`shader2021use`. Parameters ---------- path : None | str Location of where to look for the dataset. If None, the environment variable or config parameter is used. If it doesn’t exist, the “~/mne_data” directory is used. If the dataset is not found under the given path, the data will be automatically downloaded to the specified folder. force_update : bool Force update of the dataset even if a local copy exists. update_path : bool | None If True, set the MNE_DATASETS_FNIRSSPEECHNOISE_PATH in mne-python config to the given path. If None, the user is prompted. download : bool If False and the dataset has not been downloaded yet, it will not be downloaded and the path will be returned as ‘’ (empty string). This is mostly used for debugging purposes and can be safely ignored by most users. %(verbose)s Returns ------- path : str Path to dataset directory. References ---------- .. footbibliography:: """ dataset_params = dict( archive_name='2021-fNIRS-Analysis-Methods-Passive-Auditory.zip', hash='md5:569c0fbafa575e344e90698c808dfdd3', url='https://osf.io/bjfu7/download?version=1', folder_name='fNIRS-block-speech-noise', dataset_name='block_speech_noise', config_key='MNE_DATASETS_FNIRSSPEECHNOISE_PATH', ) dpath = fetch_dataset( dataset_params, path=path, force_update=force_update, update_path=update_path, download=download, processor=pooch.Unzip(extract_dir="./fNIRS-block-speech-noise")) dpath = str(dpath) # Do some wrangling to deal with nested directories bad_name = os.path.join(dpath, '2021-fNIRS-Analysis-Methods-' 'Passive-Auditory') if os.path.isdir(bad_name): os.rename(bad_name, dpath + '.true') shutil.rmtree(dpath) os.rename(dpath + '.true', dpath) return _mne_path(dpath)
def data_path(path=None, force_update=False, update_path=True, download=True, verbose=None): # noqa: D103 """ Motor task experiment data with 5 participants. Get path to local copy of data from the article :footcite:`shader2021use`. Parameters ---------- path : None | str Location of where to look for the dataset. If None, the environment variable or config parameter is used. If it doesn’t exist, the “~/mne_data” directory is used. If the dataset is not found under the given path, the data will be automatically downloaded to the specified folder. force_update : bool Force update of the dataset even if a local copy exists. update_path : bool | None If True, set the MNE_DATASETS_FNIRSMOTORGROUP_PATH in mne-python config to the given path. If None, the user is prompted. download : bool If False and the dataset has not been downloaded yet, it will not be downloaded and the path will be returned as ‘’ (empty string). This is mostly used for debugging purposes and can be safely ignored by most users. %(verbose)s Returns ------- path : str Path to dataset directory. References ---------- .. footbibliography:: """ dataset_params = dict( archive_name='BIDS-NIRS-Tapping-master.zip', hash='md5:da3cac7252005f0a64fdba5c683cf3dd', url='https://github.com/rob-luke/BIDS-NIRS-Tapping/archive/v0.1.0.zip', folder_name='fNIRS-motor-group', dataset_name='fnirs_motor_group', config_key='MNE_DATASETS_FNIRSMOTORGROUP_PATH', ) dpath = fetch_dataset( dataset_params, path=path, force_update=force_update, update_path=update_path, download=download, processor=pooch.Unzip(extract_dir="./fNIRS-motor-group")) dpath = str(dpath) # Do some wrangling to deal with nested directories bad_name = os.path.join(dpath, 'BIDS-NIRS-Tapping-0.1.0') if os.path.isdir(bad_name): os.rename(bad_name, dpath + '.true') shutil.rmtree(dpath) os.rename(dpath + '.true', dpath) return _mne_path(dpath)
def fetch_bedmap2(): filenames = bedmap2.fetch('bedmap2_tiff.zip', processor=pooch.Unzip()) return [f for f in filenames if os.path.splitext(f)[1] == '.tif']
def fetch_dataset( dataset_params, processor=None, path=None, force_update=False, update_path=True, download=True, check_version=False, return_version=False, accept=False, auth=None, token=None, ): """Fetch an MNE-compatible dataset. Parameters ---------- dataset_params : list of dict | dict The dataset name(s) and corresponding parameters to download the dataset(s). The dataset parameters that contains the following keys: ``archive_name``, ``url``, ``folder_name``, ``hash``, ``config_key`` (optional). See Notes. processor : None | "unzip" | "untar" | instance of pooch.Unzip | instance of pooch.Untar What to do after downloading the file. ``"unzip"`` and ``"untar"`` will decompress the downloaded file in place; for custom extraction (e.g., only extracting certain files from the archive) pass an instance of :class:`pooch.Unzip` or :class:`pooch.Untar`. If ``None`` (the default), the files are left as-is. path : None | str Directory in which to put the dataset. If ``None``, the dataset location is determined by first checking whether ``dataset_params['config_key']`` is defined, and if so, whether that config key exists in the MNE-Python config file. If so, the configured path is used; if not, the location is set to the value of the ``MNE_DATA`` config key (if it exists), or ``~/mne_data`` otherwise. force_update : bool Force update of the dataset even if a local copy exists. Default is False. update_path : bool | None If True (default), set the mne-python config to the given path. If None, the user is prompted. download : bool If False and the dataset has not been downloaded yet, it will not be downloaded and the path will be returned as ``''`` (empty string). This is mostly used for testing purposes and can be safely ignored by most users. check_version : bool Whether to check the version of the dataset or not. Each version of the dataset is stored in the root with a ``version.txt`` file. return_version : bool Whether or not to return the version of the dataset or not. Defaults to False. accept : bool Some MNE-supplied datasets require acceptance of an additional license. Default is ``False``. auth : tuple | None Optional authentication tuple containing the username and password/token, passed to :class:`pooch.HTTPDownloader` (e.g., ``auth=('foo', 012345)``). token : str | None Optional authentication token passed to :class:`pooch.HTTPDownloader`. Returns ------- data_path : str The path to the fetched dataset. version : str Only returned if ``return_version`` is True. See Also -------- mne.get_config mne.set_config mne.datasets.has_dataset Notes ----- The ``dataset_params`` argument must contain the following keys: - ``archive_name``: The name of the (possibly compressed) file to download - ``url``: URL from which the file can be downloaded - ``folder_name``: the subfolder within the ``MNE_DATA`` folder in which to save and uncompress (if needed) the file(s) - ``hash``: the cryptographic hash type of the file followed by a colon and then the hash value (examples: "sha256:19uheid...", "md5:upodh2io...") - ``config_key`` (optional): key passed to :func:`mne.set_config` to store the on-disk location of the downloaded dataset (e.g., ``"MNE_DATASETS_EEGBCI_PATH"``). This will only work for the provided datasets listed :ref:`here <datasets>`; do not use for user-defined datasets. An example would look like:: {'dataset_name': 'sample', 'archive_name': 'MNE-sample-data-processed.tar.gz', 'hash': 'md5:12b75d1cb7df9dfb4ad73ed82f61094f', 'url': 'https://osf.io/86qa2/download?version=5', 'folder_name': 'MNE-sample-data', 'config_key': 'MNE_DATASETS_SAMPLE_PATH'} For datasets where a single (possibly compressed) file must be downloaded, pass a single :class:`dict` as ``dataset_params``. For datasets where multiple files must be downloaded and (optionally) uncompressed separately, pass a list of dicts. """ # noqa E501 # import pooch library for handling the dataset downloading #pooch = _soft_import("pooch", "dataset downloading", strict=True) # JG_MOD import pooch if auth is not None: if len(auth) != 2: raise RuntimeError("auth should be a 2-tuple consisting " "of a username and password/token.") # processor to uncompress files if processor == "untar": processor = pooch.Untar(extract_dir=path) elif processor == "unzip": processor = pooch.Unzip(extract_dir=path) if isinstance(dataset_params, dict): dataset_params = [dataset_params] # extract configuration parameters names = [params["dataset_name"] for params in dataset_params] name = names[0] dataset_dict = dataset_params[0] config_key = dataset_dict.get('config_key', None) folder_name = dataset_dict["folder_name"] # get download path for specific dataset path = _get_path(path=path, key=config_key, name=name) # get the actual path to each dataset folder name final_path = op.join(path, folder_name) # handle BrainStorm datasets with nested folders for datasets if name.startswith("bst_"): final_path = op.join(final_path, name) # additional condition: check for version.txt and parse it # check if testing or misc data is outdated; if so, redownload it want_version = RELEASES.get(name, None) want_version = _FAKE_VERSION if name == "fake" else want_version # get the version of the dataset and then check if the version is outdated data_version = _dataset_version(final_path, name) outdated = (want_version is not None and LooseVersion(want_version) > LooseVersion(data_version)) if outdated: logger.info(f"Dataset {name} version {data_version} out of date, " f"latest version is {want_version}") # return empty string if outdated dataset and we don't want to download if (not force_update) and outdated and not download: return ("", data_version) if return_version else "" # reasons to bail early (hf_sef has separate code for this): if ((not force_update) and (not outdated) and (not name.startswith("hf_sef_"))): # ...if target folder exists (otherwise pooch downloads every # time because we don't save the archive files after unpacking, so # pooch can't check its checksum) if op.isdir(final_path): if config_key is not None: _do_path_update(path, update_path, config_key, name) return (final_path, data_version) if return_version else final_path # ...if download=False (useful for debugging) elif not download: return ("", data_version) if return_version else "" # ...if user didn't accept the license elif name.startswith("bst_"): if accept or "--accept-brainstorm-license" in sys.argv: answer = "y" else: # If they don't have stdin, just accept the license # https://github.com/mne-tools/mne-python/issues/8513#issuecomment-726823724 # noqa: E501 answer = _safe_input("%sAgree (y/[n])? " % _bst_license_text, use="y") if answer.lower() != "y": raise RuntimeError("You must agree to the license to use this " "dataset") # downloader & processors download_params = dict(progressbar=True) # use tqdm if name == "fake": download_params["progressbar"] = False if auth is not None: download_params["auth"] = auth if token is not None: download_params["headers"] = {"Authorization": f"token {token}"} downloader = pooch.HTTPDownloader(**download_params) # make mappings from archive names to urls and to checksums urls = dict() registry = dict() for idx, this_name in enumerate(names): this_dataset = dataset_params[idx] archive_name = this_dataset["archive_name"] dataset_url = this_dataset["url"] dataset_hash = this_dataset["hash"] urls[archive_name] = dataset_url registry[archive_name] = dataset_hash # create the download manager fetcher = pooch.create( path=final_path if processor is None else path, base_url="", # Full URLs are given in the `urls` dict. version=None, # Data versioning is decoupled from MNE-Python version. urls=urls, registry=registry, retry_if_failed=2, # 2 retries = 3 total attempts ) # use our logger level for pooch's logger too pooch.get_logger().setLevel(logger.getEffectiveLevel()) for idx in range(len(names)): # fetch and unpack the data archive_name = dataset_params[idx]["archive_name"] fetcher.fetch(fname=archive_name, downloader=downloader, processor=processor) # after unpacking, remove the archive file if processor is not None: os.remove(op.join(path, archive_name)) # remove version number from "misc" and "testing" datasets folder names if name == "misc": rmtree(final_path, ignore_errors=True) os.replace(op.join(path, MISC_VERSIONED), final_path) elif name == "testing": rmtree(final_path, ignore_errors=True) os.replace(op.join(path, TESTING_VERSIONED), final_path) # maybe update the config if config_key is not None: old_name = "brainstorm" if name.startswith("bst_") else name _do_path_update(path, update_path, config_key, old_name) # compare the version of the dataset and mne data_version = _dataset_version(path, name) # 0.7 < 0.7.git should be False, therefore strip if check_version and (LooseVersion(data_version) < LooseVersion( mne_version.strip(".git"))): warn("The {name} dataset (version {current}) is older than " "mne-python (version {newest}). If the examples fail, " "you may need to update the {name} dataset by using " "mne.datasets.{name}.data_path(force_update=True)".format( name=name, current=data_version, newest=mne_version)) return (final_path, data_version) if return_version else final_path
def data_path(path=None, force_update=False, update_path=True, download=True, verbose=None): # noqa: D103 """ Audio and visual speech dataset with 8 participants. Get path to local copy of data from the article :footcite:`shader2021use`. Parameters ---------- path : None | str Location of where to look for the dataset. If None, the environment variable or config parameter is used. If it doesn’t exist, the “~/mne_data” directory is used. If the dataset is not found under the given path, the data will be automatically downloaded to the specified folder. force_update : bool Force update of the dataset even if a local copy exists. update_path : bool | None If True, set the MNE_DATASETS_FNIRSAUDIOVISUALSPEECH_PATH in mne-python config to the given path. If None, the user is prompted. download : bool If False and the dataset has not been downloaded yet, it will not be downloaded and the path will be returned as ‘’ (empty string). This is mostly used for debugging purposes and can be safely ignored by most users. %(verbose)s Returns ------- path : str Path to dataset directory. References ---------- .. footbibliography:: """ dataset_params = dict( archive_name='2021-fNIRS-Audio-visual-speech-' 'Broad-vs-restricted-regions.zip', hash='md5:16cac6565880dae6aed9b69100399d0b', url='https://osf.io/xwerv/download?version=1', folder_name='fNIRS-audio-visual-speech', dataset_name='audio_or_visual_speech', config_key='MNE_DATASETS_FNIRSAUDIOVISUALSPEECH_PATH', ) dpath = fetch_dataset( dataset_params, path=path, force_update=force_update, update_path=update_path, download=download, processor=pooch.Unzip(extract_dir="./fNIRS-audio-visual-speech")) dpath = str(dpath) # Do some wrangling to deal with nested directories bad_name = os.path.join( dpath, '2021-fNIRS-Audio-visual-speech-' 'Broad-vs-restricted-regions') if os.path.isdir(bad_name): os.rename(bad_name, dpath + '.true') shutil.rmtree(dpath) os.rename(dpath + '.true', dpath) return _mne_path(dpath)
def fetch_bedmap2(): downloader = pooch.HTTPDownloader(progressbar=True) filenames = bedmap2.fetch('bedmap2_tiff.zip', processor=pooch.Unzip(), downloader=downloader) return [f for f in filenames if os.path.splitext(f)[1] == '.tif']