Exemplo n.º 1
0
def _get_golf_path():
    unpack = pooch.Unzip()
    fnames = REGISTRY.fetch('golf.zip', processor=unpack)
    nc_bool = [os.path.splitext(fname)[1] == '.nc' for fname in fnames]
    nc_idx = [i for i, b in enumerate(nc_bool) if b]
    golf_path = fnames[nc_idx[0]]
    return golf_path
Exemplo n.º 2
0
def _get_xslope_path():
    unpack = pooch.Unzip()
    fnames = REGISTRY.fetch('xslope.zip', processor=unpack)
    nc_bool = [os.path.splitext(fname)[1] == '.nc' for fname in fnames]
    # nc_idx = [i for i, b in enumerate(nc_bool) if b]
    fnames_idx = [fnames[i] for i, b in enumerate(nc_bool) if b]
    fnames_idx.sort()
    # xslope_path = fnames[nc_idx[0]]
    xslope_job_003_path = fnames_idx[0]
    xslope_job_013_path = fnames_idx[1]
    return xslope_job_003_path, xslope_job_013_path
Exemplo n.º 3
0
    def fetch_single(self, year):
        """
        Load the FARS data for a given year.
        """
        # fname = f'{year}/National/FARS{year}NationalCSV.zip'
        fname = f'{year}.zip'
        if self.GOODBOY.is_available(fname):
            unpack = pooch.Unzip(extract_dir=f"./{fname[:-4]}.unzip")
            unzipped = self.GOODBOY.fetch(fname,
                                          processor=unpack,
                                          progressbar=self.show_progress)
        else:
            raise FileNotFoundError(
                f"{fname}: File could not be found in FARS FTP directory.")

        return {year: unzipped}
def main():
    # importing regions as the base template
    regions_url = 'https://github.com/RECCAP2-ocean/shared-resources/raw/master/regions/reccap2ocean_regions.nc'
    regions_fname = pooch.retrieve(regions_url, None)
    regions = (xr.open_dataset(regions_fname).interp(
        lon=np.arange(-179.875, 180, .25),
        lat=np.arange(-89.875, 90, 0.25)).rename(lat='latitude',
                                                 lon='longitude'))

    # getting shapefile information
    shapefile_url = 'http://www.hydrol-earth-syst-sci.net/17/2029/2013/hess-17-2029-2013-supplement.zip'
    shapefile_name = 'Continental_Shelf'
    shapefile_flist = pooch.retrieve(shapefile_url,
                                     None,
                                     processor=pooch.Unzip())
    shapefile_path = str(
        Path([f for f in shapefile_flist if shapefile_name in f][0]).parent)

    regions = add_shape_coord_from_data_array(regions, shapefile_path,
                                              shapefile_name)

    continental_shelf = regions[shapefile_name]
    continental_shelf = continental_shelf.coarsen(latitude=4,
                                                  longitude=4).min()
    continental_shelf = continental_shelf.to_dataset(name='continental_shelf')

    continental_shelf.attrs = dict(
        source='https://www.hydrol-earth-syst-sci.net/17/2029/2013/',
        publication=
        'Laruelle, G. G., Dürr, H. H., Lauerwald, R., Hartmann, J., Slomp, C. P., Goossens, N., and Regnier, P. A. G.: Global multi-scale segmentation of continental and coastal waters from the watersheds to the continental margins, Hydrol. Earth Syst. Sci., 17, 2029–2051, https://doi.org/10.5194/hess-17-2029-2013, 2013.',
        description='coastal zones as defined from the publication. ',
        history=
        'data downloaded as shapefile (Continental_shelf.shp) from the link provided in `source`. The file was then converted to 1/4deg netCDF and then downscaled to 1deg.'
    )

    encoding = {
        k: {
            'zlib': True,
            'complevel': 4
        }
        for k in continental_shelf.data_vars
    }

    continental_shelf.to_netcdf(
        '../reccap2coastal_coscats.nc',
        encoding=encoding,
    )
Exemplo n.º 5
0
def choose_processor(url):
    """
    chooses the processor to uncompress if required
    """
    known_processors = {
        pooch.Decompress(): (".gz2", ".gz"),
        pooch.Untar(): (".tar", ".tgz", ".tar.gz"),
        pooch.Unzip(): (".zip", ),
        None: "*",
    }

    chosen = None
    for processor, extensions in known_processors.items():
        for ext in extensions:
            if ext in url.lower():
                chosen = processor
    return chosen
Exemplo n.º 6
0
def _download_mne_dataset(name,
                          processor,
                          path,
                          force_update,
                          update_path,
                          download,
                          accept=False):
    """Aux function for downloading internal MNE datasets."""
    import pooch
    from mne.datasets._fetch import fetch_dataset

    # import pooch library for handling the dataset downloading
    dataset_params = MNE_DATASETS[name]
    dataset_params['dataset_name'] = name
    config_key = MNE_DATASETS[name]['config_key']
    folder_name = MNE_DATASETS[name]['folder_name']

    # get download path for specific dataset
    path = _get_path(path=path, key=config_key, name=name)

    # instantiate processor that unzips file
    if processor == 'nested_untar':
        processor_ = pooch.Untar(extract_dir=op.join(path, folder_name))
    elif processor == 'nested_unzip':
        processor_ = pooch.Unzip(extract_dir=op.join(path, folder_name))
    else:
        processor_ = processor

    # handle case of multiple sub-datasets with different urls
    if name == 'visual_92_categories':
        dataset_params = []
        for name in ['visual_92_categories_1', 'visual_92_categories_2']:
            this_dataset = MNE_DATASETS[name]
            this_dataset['dataset_name'] = name
            dataset_params.append(this_dataset)

    return fetch_dataset(dataset_params=dataset_params,
                         processor=processor_,
                         path=path,
                         force_update=force_update,
                         update_path=update_path,
                         download=download,
                         accept=accept)
def get_jmamlr():
    url = 'http://www.data.jma.go.jp/gmd/kaiyou/data/english/co2_flux/grid/{name}'

    xds = []
    for t in pd.date_range('1990-01', '2019', freq='1AS', closed='left'):
        fname = 'JMA_co2map_{t:%Y}.ZIP'.format(t=t)
        fname = pooch.retrieve(
            url.format(t=t, name=fname),
            None,
            fname=fname,
            path='../data-in/JMA-MLR/',
            processor=pooch.Unzip(),
            downloader=pooch.HTTPDownloader(progressbar=True))[0]
        xda = xr.open_dataset(fname, decode_times=False).pCO2s
        y0, y1 = str(t.year), str(t.year + 1)
        time = pd.date_range(y0, y1, freq='1MS', closed='left')
        xda = xda.assign_coords(time=time)
        xds += xda,

    xda = (xr.concat(xds, dim='time').assign_coords(lon=(xda.lon - 180) % 360 -
                                                    180).sortby('lon'))

    return xda
Exemplo n.º 8
0
    def fetch_all(self):
        """
        Download the entire FARS dataset, to cache folder.
        """
        # The file will be downloaded automatically the first time this is run.

        fnames = self.GOODBOY.registry_files
        unzipped = {}

        for fname in fnames:

            if self.GOODBOY.is_available(fname):
                if "dict" in fname:
                    self.GOODBOY.fetch(fname, progressbar=self.show_progress)
                else:
                    unpack = pooch.Unzip(extract_dir=f"./{fname[:-4]}.unzip")
                    unzipped[fname] = self.GOODBOY.fetch(
                        fname,
                        processor=unpack,
                        progressbar=self.show_progress)
            else:
                raise FileNotFoundError(
                    "File could not be found in FARS FTP directory.")
        return unzipped
Exemplo n.º 9
0
def data_path(path=None,
              force_update=False,
              update_path=True,
              download=True,
              verbose=None):  # noqa: D103
    """
    Audio speech and noise dataset with 18 participants.

    Get path to local copy of data from the article :footcite:`shader2021use`.

    Parameters
    ----------
    path : None | str
        Location of where to look for the dataset.
        If None, the environment variable or config parameter is used.
        If it doesn’t exist, the “~/mne_data” directory is used.
        If the dataset is not found under the given path,
        the data will be automatically downloaded to the specified folder.
    force_update : bool
        Force update of the dataset even if a local copy exists.
    update_path : bool | None
        If True, set the MNE_DATASETS_FNIRSSPEECHNOISE_PATH in
        mne-python config to the given path. If None, the user is prompted.
    download : bool
        If False and the dataset has not been downloaded yet,
        it will not be downloaded and the path will be returned
        as ‘’ (empty string). This is mostly used for debugging purposes
        and can be safely ignored by most users.
    %(verbose)s

    Returns
    -------
    path : str
        Path to dataset directory.

    References
    ----------
    .. footbibliography::
    """

    dataset_params = dict(
        archive_name='2021-fNIRS-Analysis-Methods-Passive-Auditory.zip',
        hash='md5:569c0fbafa575e344e90698c808dfdd3',
        url='https://osf.io/bjfu7/download?version=1',
        folder_name='fNIRS-block-speech-noise',
        dataset_name='block_speech_noise',
        config_key='MNE_DATASETS_FNIRSSPEECHNOISE_PATH',
    )

    dpath = fetch_dataset(
        dataset_params,
        path=path,
        force_update=force_update,
        update_path=update_path,
        download=download,
        processor=pooch.Unzip(extract_dir="./fNIRS-block-speech-noise"))
    dpath = str(dpath)

    # Do some wrangling to deal with nested directories
    bad_name = os.path.join(dpath, '2021-fNIRS-Analysis-Methods-'
                            'Passive-Auditory')
    if os.path.isdir(bad_name):
        os.rename(bad_name, dpath + '.true')
        shutil.rmtree(dpath)
        os.rename(dpath + '.true', dpath)

    return _mne_path(dpath)
Exemplo n.º 10
0
def data_path(path=None,
              force_update=False,
              update_path=True,
              download=True,
              verbose=None):  # noqa: D103
    """
    Motor task experiment data with 5 participants.

    Get path to local copy of data from the article :footcite:`shader2021use`.

    Parameters
    ----------
    path : None | str
        Location of where to look for the dataset.
        If None, the environment variable or config parameter is used.
        If it doesn’t exist, the “~/mne_data” directory is used.
        If the dataset is not found under the given path,
        the data will be automatically downloaded to the specified folder.
    force_update : bool
        Force update of the dataset even if a local copy exists.
    update_path : bool | None
        If True, set the MNE_DATASETS_FNIRSMOTORGROUP_PATH in
        mne-python config to the given path. If None, the user is prompted.
    download : bool
        If False and the dataset has not been downloaded yet,
        it will not be downloaded and the path will be returned
        as ‘’ (empty string). This is mostly used for debugging purposes
        and can be safely ignored by most users.
    %(verbose)s

    Returns
    -------
    path : str
        Path to dataset directory.

    References
    ----------
    .. footbibliography::
    """

    dataset_params = dict(
        archive_name='BIDS-NIRS-Tapping-master.zip',
        hash='md5:da3cac7252005f0a64fdba5c683cf3dd',
        url='https://github.com/rob-luke/BIDS-NIRS-Tapping/archive/v0.1.0.zip',
        folder_name='fNIRS-motor-group',
        dataset_name='fnirs_motor_group',
        config_key='MNE_DATASETS_FNIRSMOTORGROUP_PATH',
    )

    dpath = fetch_dataset(
        dataset_params,
        path=path,
        force_update=force_update,
        update_path=update_path,
        download=download,
        processor=pooch.Unzip(extract_dir="./fNIRS-motor-group"))
    dpath = str(dpath)

    # Do some wrangling to deal with nested directories
    bad_name = os.path.join(dpath, 'BIDS-NIRS-Tapping-0.1.0')
    if os.path.isdir(bad_name):
        os.rename(bad_name, dpath + '.true')
        shutil.rmtree(dpath)
        os.rename(dpath + '.true', dpath)

    return _mne_path(dpath)
Exemplo n.º 11
0
def fetch_bedmap2():
    filenames = bedmap2.fetch('bedmap2_tiff.zip', processor=pooch.Unzip())
    return [f for f in filenames if os.path.splitext(f)[1] == '.tif']
Exemplo n.º 12
0
def fetch_dataset(
    dataset_params,
    processor=None,
    path=None,
    force_update=False,
    update_path=True,
    download=True,
    check_version=False,
    return_version=False,
    accept=False,
    auth=None,
    token=None,
):
    """Fetch an MNE-compatible dataset.

    Parameters
    ----------
    dataset_params : list of dict | dict
        The dataset name(s) and corresponding parameters to download the
        dataset(s). The dataset parameters that contains the following keys:
        ``archive_name``, ``url``, ``folder_name``, ``hash``,
        ``config_key`` (optional). See Notes.
    processor : None | "unzip" | "untar" | instance of pooch.Unzip | instance of pooch.Untar
        What to do after downloading the file. ``"unzip"`` and ``"untar"`` will
        decompress the downloaded file in place; for custom extraction (e.g.,
        only extracting certain files from the archive) pass an instance of
        :class:`pooch.Unzip` or :class:`pooch.Untar`. If ``None`` (the
        default), the files are left as-is.
    path : None | str
        Directory in which to put the dataset. If ``None``, the dataset
        location is determined by first checking whether
        ``dataset_params['config_key']`` is defined, and if so, whether that
        config key exists in the MNE-Python config file. If so, the configured
        path is used; if not, the location is set to the value of the
        ``MNE_DATA`` config key (if it exists), or ``~/mne_data`` otherwise.
    force_update : bool
        Force update of the dataset even if a local copy exists.
        Default is False.
    update_path : bool | None
        If True (default), set the mne-python config to the given
        path. If None, the user is prompted.
    download : bool
        If False and the dataset has not been downloaded yet, it will not be
        downloaded and the path will be returned as ``''`` (empty string). This
        is mostly used for testing purposes and can be safely ignored by most
        users.
    check_version : bool
        Whether to check the version of the dataset or not. Each version
        of the dataset is stored in the root with a ``version.txt`` file.
    return_version : bool
        Whether or not to return the version of the dataset or not.
        Defaults to False.
    accept : bool
        Some MNE-supplied datasets require acceptance of an additional license.
        Default is ``False``.
    auth : tuple | None
        Optional authentication tuple containing the username and
        password/token, passed to :class:`pooch.HTTPDownloader` (e.g.,
        ``auth=('foo', 012345)``).
    token : str | None
        Optional authentication token passed to :class:`pooch.HTTPDownloader`.

    Returns
    -------
    data_path : str
        The path to the fetched dataset.
    version : str
        Only returned if ``return_version`` is True.

    See Also
    --------
    mne.get_config
    mne.set_config
    mne.datasets.has_dataset

    Notes
    -----
    The ``dataset_params`` argument must contain the following keys:

    - ``archive_name``: The name of the (possibly compressed) file to download
    - ``url``: URL from which the file can be downloaded
    - ``folder_name``: the subfolder within the ``MNE_DATA`` folder in which to
        save and uncompress (if needed) the file(s)
    - ``hash``: the cryptographic hash type of the file followed by a colon and
        then the hash value (examples: "sha256:19uheid...", "md5:upodh2io...")
    - ``config_key`` (optional): key passed to :func:`mne.set_config` to store
        the on-disk location of the downloaded dataset (e.g.,
        ``"MNE_DATASETS_EEGBCI_PATH"``). This will only work for the provided
        datasets listed :ref:`here <datasets>`; do not use for user-defined
        datasets.

    An example would look like::

        {'dataset_name': 'sample',
         'archive_name': 'MNE-sample-data-processed.tar.gz',
         'hash': 'md5:12b75d1cb7df9dfb4ad73ed82f61094f',
         'url': 'https://osf.io/86qa2/download?version=5',
         'folder_name': 'MNE-sample-data',
         'config_key': 'MNE_DATASETS_SAMPLE_PATH'}

    For datasets where a single (possibly compressed) file must be downloaded,
    pass a single :class:`dict` as ``dataset_params``. For datasets where
    multiple files must be downloaded and (optionally) uncompressed separately,
    pass a list of dicts.
    """  # noqa E501
    # import pooch library for handling the dataset downloading

    #pooch = _soft_import("pooch", "dataset downloading", strict=True) # JG_MOD
    import pooch

    if auth is not None:
        if len(auth) != 2:
            raise RuntimeError("auth should be a 2-tuple consisting "
                               "of a username and password/token.")

    # processor to uncompress files
    if processor == "untar":
        processor = pooch.Untar(extract_dir=path)
    elif processor == "unzip":
        processor = pooch.Unzip(extract_dir=path)

    if isinstance(dataset_params, dict):
        dataset_params = [dataset_params]

    # extract configuration parameters
    names = [params["dataset_name"] for params in dataset_params]
    name = names[0]
    dataset_dict = dataset_params[0]
    config_key = dataset_dict.get('config_key', None)
    folder_name = dataset_dict["folder_name"]

    # get download path for specific dataset
    path = _get_path(path=path, key=config_key, name=name)

    # get the actual path to each dataset folder name
    final_path = op.join(path, folder_name)

    # handle BrainStorm datasets with nested folders for datasets
    if name.startswith("bst_"):
        final_path = op.join(final_path, name)

    # additional condition: check for version.txt and parse it
    # check if testing or misc data is outdated; if so, redownload it
    want_version = RELEASES.get(name, None)
    want_version = _FAKE_VERSION if name == "fake" else want_version

    # get the version of the dataset and then check if the version is outdated
    data_version = _dataset_version(final_path, name)
    outdated = (want_version is not None
                and LooseVersion(want_version) > LooseVersion(data_version))

    if outdated:
        logger.info(f"Dataset {name} version {data_version} out of date, "
                    f"latest version is {want_version}")

    # return empty string if outdated dataset and we don't want to download
    if (not force_update) and outdated and not download:
        return ("", data_version) if return_version else ""

    # reasons to bail early (hf_sef has separate code for this):
    if ((not force_update) and (not outdated)
            and (not name.startswith("hf_sef_"))):
        # ...if target folder exists (otherwise pooch downloads every
        # time because we don't save the archive files after unpacking, so
        # pooch can't check its checksum)
        if op.isdir(final_path):
            if config_key is not None:
                _do_path_update(path, update_path, config_key, name)
            return (final_path, data_version) if return_version else final_path
        # ...if download=False (useful for debugging)
        elif not download:
            return ("", data_version) if return_version else ""
        # ...if user didn't accept the license
        elif name.startswith("bst_"):
            if accept or "--accept-brainstorm-license" in sys.argv:
                answer = "y"
            else:
                # If they don't have stdin, just accept the license
                # https://github.com/mne-tools/mne-python/issues/8513#issuecomment-726823724  # noqa: E501
                answer = _safe_input("%sAgree (y/[n])? " % _bst_license_text,
                                     use="y")
            if answer.lower() != "y":
                raise RuntimeError("You must agree to the license to use this "
                                   "dataset")
    # downloader & processors
    download_params = dict(progressbar=True)  # use tqdm
    if name == "fake":
        download_params["progressbar"] = False
    if auth is not None:
        download_params["auth"] = auth
    if token is not None:
        download_params["headers"] = {"Authorization": f"token {token}"}
    downloader = pooch.HTTPDownloader(**download_params)

    # make mappings from archive names to urls and to checksums
    urls = dict()
    registry = dict()
    for idx, this_name in enumerate(names):
        this_dataset = dataset_params[idx]
        archive_name = this_dataset["archive_name"]
        dataset_url = this_dataset["url"]
        dataset_hash = this_dataset["hash"]
        urls[archive_name] = dataset_url
        registry[archive_name] = dataset_hash

    # create the download manager
    fetcher = pooch.create(
        path=final_path if processor is None else path,
        base_url="",  # Full URLs are given in the `urls` dict.
        version=None,  # Data versioning is decoupled from MNE-Python version.
        urls=urls,
        registry=registry,
        retry_if_failed=2,  # 2 retries = 3 total attempts
    )

    # use our logger level for pooch's logger too
    pooch.get_logger().setLevel(logger.getEffectiveLevel())

    for idx in range(len(names)):
        # fetch and unpack the data
        archive_name = dataset_params[idx]["archive_name"]
        fetcher.fetch(fname=archive_name,
                      downloader=downloader,
                      processor=processor)
        # after unpacking, remove the archive file
        if processor is not None:
            os.remove(op.join(path, archive_name))

    # remove version number from "misc" and "testing" datasets folder names
    if name == "misc":
        rmtree(final_path, ignore_errors=True)
        os.replace(op.join(path, MISC_VERSIONED), final_path)
    elif name == "testing":
        rmtree(final_path, ignore_errors=True)
        os.replace(op.join(path, TESTING_VERSIONED), final_path)

    # maybe update the config
    if config_key is not None:
        old_name = "brainstorm" if name.startswith("bst_") else name
        _do_path_update(path, update_path, config_key, old_name)

    # compare the version of the dataset and mne
    data_version = _dataset_version(path, name)
    # 0.7 < 0.7.git should be False, therefore strip
    if check_version and (LooseVersion(data_version) < LooseVersion(
            mne_version.strip(".git"))):
        warn("The {name} dataset (version {current}) is older than "
             "mne-python (version {newest}). If the examples fail, "
             "you may need to update the {name} dataset by using "
             "mne.datasets.{name}.data_path(force_update=True)".format(
                 name=name, current=data_version, newest=mne_version))
    return (final_path, data_version) if return_version else final_path
Exemplo n.º 13
0
def data_path(path=None,
              force_update=False,
              update_path=True,
              download=True,
              verbose=None):  # noqa: D103
    """
    Audio and visual speech dataset with 8 participants.

    Get path to local copy of data from the article :footcite:`shader2021use`.

    Parameters
    ----------
    path : None | str
        Location of where to look for the dataset.
        If None, the environment variable or config parameter is used.
        If it doesn’t exist, the “~/mne_data” directory is used.
        If the dataset is not found under the given path,
        the data will be automatically downloaded to the specified folder.
    force_update : bool
        Force update of the dataset even if a local copy exists.
    update_path : bool | None
        If True, set the MNE_DATASETS_FNIRSAUDIOVISUALSPEECH_PATH in
        mne-python config to the given path. If None, the user is prompted.
    download : bool
        If False and the dataset has not been downloaded yet,
        it will not be downloaded and the path will be returned
        as ‘’ (empty string). This is mostly used for debugging purposes
        and can be safely ignored by most users.
    %(verbose)s

    Returns
    -------
    path : str
        Path to dataset directory.

    References
    ----------
    .. footbibliography::
    """

    dataset_params = dict(
        archive_name='2021-fNIRS-Audio-visual-speech-'
        'Broad-vs-restricted-regions.zip',
        hash='md5:16cac6565880dae6aed9b69100399d0b',
        url='https://osf.io/xwerv/download?version=1',
        folder_name='fNIRS-audio-visual-speech',
        dataset_name='audio_or_visual_speech',
        config_key='MNE_DATASETS_FNIRSAUDIOVISUALSPEECH_PATH',
    )

    dpath = fetch_dataset(
        dataset_params,
        path=path,
        force_update=force_update,
        update_path=update_path,
        download=download,
        processor=pooch.Unzip(extract_dir="./fNIRS-audio-visual-speech"))
    dpath = str(dpath)

    # Do some wrangling to deal with nested directories
    bad_name = os.path.join(
        dpath, '2021-fNIRS-Audio-visual-speech-'
        'Broad-vs-restricted-regions')
    if os.path.isdir(bad_name):
        os.rename(bad_name, dpath + '.true')
        shutil.rmtree(dpath)
        os.rename(dpath + '.true', dpath)

    return _mne_path(dpath)
Exemplo n.º 14
0
def fetch_bedmap2():
    downloader = pooch.HTTPDownloader(progressbar=True)
    filenames = bedmap2.fetch('bedmap2_tiff.zip',
                              processor=pooch.Unzip(),
                              downloader=downloader)
    return [f for f in filenames if os.path.splitext(f)[1] == '.tif']