Пример #1
0
def fnirs_data_path(path, subject, accept):
    datapath = op.join(path, "NIRS", "subject {:02d}".format(subject))
    if not op.isfile(op.join(datapath, "mrk.mat")):
        # fNIRS
        if not op.isfile(op.join(path, "fNIRS.zip")):
            if not accept:
                raise AttributeError(
                    "You must accept licence term to download this dataset,"
                    "set accept=True when instanciating the dataset."
                )
            retrieve(
                "http://doc.ml.tu-berlin.de/hBCI/NIRS/NIRS_01-29.zip",
                None,
                fname="fNIRS.zip",
                path=path,
            )
        if not op.isdir(op.join(path, "NIRS")):
            os.makedirs(op.join(path, "NIRS"))
        with z.ZipFile(op.join(path, "fNIRS.zip"), "r") as f:
            f.extractall(op.join(path, "NIRS"))
        os.remove(op.join(path, "fNIRS.zip"))
    return [op.join(datapath, fn) for fn in ["cnt.mat", "mrk.mat"]]
def get_mpisomffn():
    url = 'https://www.nodc.noaa.gov/archive/arc0105/0160558/5.5/data/0-data/MPI_SOM-FFN_v2020/spco2_MPI-SOM_FFN_v2020.nc'
    fname = pooch.retrieve(url,
                           None,
                           fname='MPI-SOMFFN_v2020.nc',
                           path='../data-in/',
                           downloader=pooch.HTTPDownloader(progressbar=True))

    xds = xr.open_dataset(fname, drop_variables='date')
    xda = xds.spco2_raw.resample(time='1MS').mean()
    xda = xda.rename('mpi_somffn')

    return xda
def get_mpiulbsomffn():
    url = 'https://www.ncei.noaa.gov/data/oceans/ncei/ocads/data/0209633/MPI-ULB-SOM_FFN_clim.nc'
    fname = pooch.retrieve(url,
                           None,
                           fname='MPIULB-SOMFFN_clim.nc',
                           path='../data-in/',
                           downloader=pooch.HTTPDownloader(progressbar=True))

    xds = xr.open_dataset(fname)
    xda = xds.pco2.where(xds.pco2 > 0).coarsen(lat=4, lon=4).mean()
    xda = xda.rename('mpiulb_somffn').rename(time='month')

    return xda
Пример #4
0
def data_dl(url, sign, path=None, force_update=False, verbose=None):
    """Download file from url to specified path

    This function should replace data_path as the MNE will not support the download
    of dataset anymore. This version is using Pooch.

    Parameters
    ----------
    url : str
        Path to remote location of data
    sign : str
        Signifier of dataset
    path : None | str
        Location of where to look for the data storing location.
        If None, the environment variable or config parameter
        ``MNE_DATASETS_(signifier)_PATH`` is used. If it doesn't exist, the
        "~/mne_data" directory is used. If the dataset
        is not found under the given path, the data
        will be automatically downloaded to the specified folder.
    force_update : bool
        Force update of the dataset even if a local copy exists.
    verbose : bool, str, int, or None
        If not None, override default verbose level (see :func:`mne.verbose`).

    Returns
    -------
    path : list of str
        Local path to the given data file. This path is contained inside a list
        of length one, for compatibility.
    """
    path = get_dataset_path(sign, path)
    key_dest = "MNE-{:s}-data".format(sign.lower())
    destination = _url_to_local_path(url, osp.join(path, key_dest))

    # Fetch the file
    if not osp.isfile(destination) or force_update:
        if osp.isfile(destination):
            os.remove(destination)
        if not osp.isdir(osp.dirname(destination)):
            os.makedirs(osp.dirname(destination))
        known_hash = None
    else:
        known_hash = file_hash(destination)
    dlpath = retrieve(url,
                      known_hash,
                      fname=osp.basename(url),
                      path=osp.dirname(destination))
    return dlpath
Пример #5
0
def get_woa_basins():
    url = (
        "https://iridl.ldeo.columbia.edu/"
        "SOURCES/.NOAA/.NODC/.WOA09/.Masks/.basin/data.nc"
    )
    fname = pooch.retrieve(url, None)
    xda = (
        xr.open_dataset(fname)
        .rename({'X': 'lon', 'Y': 'lat', 'Z': 'depth'})
        .transpose('depth', 'lat', 'lon')
        .basin
        .assign_coords(lon=(np.arange(0.5, 360) - 180) % 360 - 180)
        .sortby('lon')
        .sel(depth=0)
        .drop('depth'))
    return xda
Пример #6
0
    def __getitem__(self, item):
        try:
            import pooch
        except ImportError:
            raise ModuleNotFoundError(
                "pooch must be installed to load example data"
            )

        try:
            dataset, known_hash = self.registry[item]
        except KeyError:
            raise KeyError(f"'{item}' is not a valid example dataset")

        return Path(
            pooch.retrieve(url=self.base_url + dataset, known_hash=known_hash)
        )
Пример #7
0
def get_example_data(outdir='./'):
    """
    Get example data sets and configuration files

    Parameters
    ----------
    outdir : str or Path, optional
        Location to extract the example files into.  They will be put at
        ``outdir/pyglider-example-data/``.  Default is to unpack in the
        current directory.
    """
    zipfile = pooch.retrieve("https://github.com/c-proof/pyglider-example-data/archive/refs/heads/main.zip",
                             known_hash=None)

    with ZipFile(zipfile, 'r') as zipObj:
        # Extract all the contents of zip file in outdir
        zipObj.extractall(outdir)
Пример #8
0
    def download_and_read_noaa_mbl(noaa_mbl_url):
        import re
        import pooch
        import pandas as pd

        # save to temporary location with pooch
        fname = pooch.retrieve(noaa_mbl_url, None)

        # find start line
        is_mbl_surface = False
        for start_line, line in enumerate(open(fname)):
            if re.findall('MBL.*SURFACE', line):
                is_mbl_surface = True
            if not line.startswith('#'):
                break
        if not is_mbl_surface:
            raise Exception(
                'The file at the provided url is not an MBL SURFACE file. '
                'Please check that you have provided the surface url. '
            )

        # read fixed width file CO2
        df = pd.read_fwf(fname, skiprows=start_line, header=None, index_col=0)
        df.index.name = 'date'
        # every second line is uncertainty
        df = df.iloc[:, ::2]
        # latitude is given as sin(lat)
        df.columns = np.rad2deg(np.arcsin(np.linspace(-1, 1, 41)))

        # resolve time properly
        year = (df.index.values - (df.index.values % 1)).astype(int)
        day_of_year = ((df.index.values - year) * 365 + 1).astype(int)
        date_strings = ['{}-{:03d}'.format(*a) for a in zip(year, day_of_year)]
        date = pd.to_datetime(date_strings, format='%Y-%j')
        df = df.set_index(date)

        # renaming indexes (have to stack for that)
        df = df.stack()
        index = df.index.set_names(['time', 'lat'])
        df = df.set_axis(index)

        df.source = noaa_mbl_url

        return df
def _get_southern_ocean_subregions(
        url='https://github.com/RECCAP2-ocean/shared-resources/raw/master/regions/RECCAP2_region_masks_all.nc',
        dest='../data/regions/'):
    import pooch
    import xarray as xr
    import pandas as pd
    from pathlib import Path as posixpath
    import itertools

    fname = pooch.retrieve(url, None, posixpath(url).name, dest)
    ds = xr.open_dataset(fname)

    mask = ds.southern

    atlantic = (((mask.lon > 290) | (mask.lon <= 20)) &
                (mask > 0)).astype(int) * 1
    indian = (((mask.lon > 20) & (mask.lon <= 147)) &
              (mask > 0)).astype(int) * 2
    pacific = (((mask.lon > 147) & (mask.lon <= 290)) &
               (mask > 0)).astype(int) * 3

    mask = xr.Dataset()
    mask['biomes'] = ds.southern.copy()
    mask['basins'] = (pacific + atlantic + indian).transpose('lat', 'lon')

    mask['subregions'] = (mask.basins * 3 + mask.biomes -
                          3).where(lambda a: a > 0).fillna(0).astype(int)

    basin = ['ATL', 'IND', 'PAC']
    biome = ['STSS', 'SPSS', 'ICE']
    names = ['-'.join(l) for l in itertools.product(basin, biome)]
    mask['names'] = xr.DataArray(names,
                                 coords={'idx': range(1, 10)},
                                 dims=('idx'))
    mask['names'].attrs['description'] = 'Names for the subregions'

    mask['subregions'].attrs['description'] = '(basins * 3 + biomes - 3)'
    mask['basins'].attrs[
        'description'] = 'Atlantic = 1, Indian = 2, Pacific = 3'
    mask['biomes'].attrs[
        'description'] = 'Biomes based on Fay and McKinley (2014), STSS=1, SPSS=2, ICE=3'
    mask.attrs['source'] = url
    mask.attrs['date'] = pd.Timestamp.today().strftime('%Y-%m-%d')
    return mask
Пример #10
0
def pytest_configure():

    fnames = pooch.retrieve(
        url="https://zenodo.org/record/5832607/files/Data.tar.gz?download=1",
        processor=Untar(),
        known_hash=
        "98b2bfadefa62dd223224c797354f9266b54143c2af3c4b6fe676d8547e7d5ee",
    )
    symlink_args = dict(
        src=f"{os.path.commonpath(fnames)}",
        dst="./oceanspy/tests/Data",
        target_is_directory=True,
    )
    try:
        print(f"Linking {symlink_args['src']!r} to {symlink_args['dst']!r}")
        os.symlink(**symlink_args)
    except FileExistsError:
        os.unlink("./oceanspy/tests/Data")
        os.symlink(**symlink_args)
Пример #11
0
def _data_dl(url, destination, force_update=False, verbose=None):
    # Code taken from moabb due to problem with ':' occurring in path
    # On Windows ':' is a forbidden in folder name
    # moabb/datasets/download.py

    from pooch import file_hash, retrieve  # keep soft depenency
    if not osp.isfile(destination) or force_update:
        if osp.isfile(destination):
            os.remove(destination)
        if not osp.isdir(osp.dirname(destination)):
            os.makedirs(osp.dirname(destination))
        known_hash = None
    else:
        known_hash = file_hash(destination)
    data_path = retrieve(url,
                         known_hash,
                         fname=osp.basename(url),
                         path=osp.dirname(destination))
    return data_path
def get_niesfnn():
    url = 'https://ndownloader.figshare.com/files/23907317?private_link=6dfc21bc1a2c51da8081'
    fname = pooch.retrieve(url,
                           None,
                           fname='NIES-FNN_v2020.nc',
                           path='../data-in/',
                           downloader=pooch.HTTPDownloader(progressbar=True))

    xds = xr.open_dataset(fname, drop_variables='date')

    yymm = np.meshgrid(xds.year, xds.month)
    years_months = np.c_[([y.flatten() for y in yymm])].T
    time = [pd.Timestamp(f'{y}-{m}') for y, m in years_months]

    xda = xr.DataArray(xds.co2.values.reshape(len(time), xds.lat.size,
                                              xds.lon.size),
                       coords=dict(time=time, lat=xds.lat, lon=xds.lon),
                       dims=['time', 'lat', 'lon'])

    return xda
def get_somffn_flux_params():

    url = 'https://www.nodc.noaa.gov/archive/arc0105/0160558/5.5/data/0-data/MPI_SOM-FFN_v2020/spco2_MPI-SOM_FFN_v2020.nc'
    fname = pooch.retrieve(url,
                           None,
                           fname='MPI-SOMFFN_v2020.nc',
                           path='../data-in/',
                           downloader=pooch.HTTPDownloader(progressbar=True))

    drop = [
        'date', 'dco2', 'spco2_raw', 'spco2_smoothed', 'fgco2_raw',
        'fgco2_smoothed', 'time_bnds', 'lat_bnds', 'lon_bnds'
    ]

    xds = xr.open_dataset(fname, drop_variables=drop)
    attrs = {k: xds[k].attrs for k in xds}
    xds = xds.resample(time='1MS').mean()
    for k in xds:
        xds[k].attrs = attrs[k]
    xds.attrs = {}

    return xds
Пример #14
0
def create_seamask():
    from pooch import retrieve
    from pandas import Timestamp
    from xarray import open_dataset
    from numpy import arange

    date = Timestamp('2010-01-01')
    url = (f"https://www.ncei.noaa.gov/data/"
           f"sea-surface-temperature-optimum-interpolation"
           f"/v2.1/access/avhrr/{date:%Y%m}/"
           f"oisst-avhrr-v02r01.{date:%Y%m%d}.nc")
    fname = retrieve(url, None)

    mask = (open_dataset(fname).sst.isel(time=0, zlev=0).drop([
        'time', 'zlev'
    ]).interp(lat=arange(-89.5, 90, 1),
              lon=arange(0.5, 360)).notnull().rename('seamask').assign_attrs(
                  dict(description=(
                      "sea mask based on OISSTv2 coverage on "
                      "2010-01-01 where True is sea and False is land"))))

    return mask
def get_jenamls():
    url = 'http://www.bgc-jena.mpg.de/CarboScope/oc/INVERSION/OUTPUT/oc_v1.7_pCO2_daily.nc'
    username = '******'
    password = '******'
    fname = pooch.retrieve(url,
                           None,
                           fname='Jena-MLS_v1.7_pCO2.nc',
                           path='../data-in/',
                           downloader=pooch.HTTPDownloader(progressbar=True,
                                                           auth=(username,
                                                                 password)))

    xds = xr.open_dataset(fname)
    xda = xds.pCO2.resample(mtime='1MS').mean('mtime')

    xda = xda.rename("jena_mls")
    xda = (xda.interp(
        lat=np.arange(-89.5, 90), lon=np.arange(-179.5, 180),
        method='nearest').roll(lon=180, roll_coords=False).interpolate_na(
            'lon', limit=20).roll(lon=-180,
                                  roll_coords=False).rename(mtime='time'))

    return xda
def get_jmamlr():
    url = 'http://www.data.jma.go.jp/gmd/kaiyou/data/english/co2_flux/grid/{name}'

    xds = []
    for t in pd.date_range('1990-01', '2019', freq='1AS', closed='left'):
        fname = 'JMA_co2map_{t:%Y}.ZIP'.format(t=t)
        fname = pooch.retrieve(
            url.format(t=t, name=fname),
            None,
            fname=fname,
            path='../data-in/JMA-MLR/',
            processor=pooch.Unzip(),
            downloader=pooch.HTTPDownloader(progressbar=True))[0]
        xda = xr.open_dataset(fname, decode_times=False).pCO2s
        y0, y1 = str(t.year), str(t.year + 1)
        time = pd.date_range(y0, y1, freq='1MS', closed='left')
        xda = xda.assign_coords(time=time)
        xds += xda,

    xda = (xr.concat(xds, dim='time').assign_coords(lon=(xda.lon - 180) % 360 -
                                                    180).sortby('lon'))

    return xda
Пример #17
0
def open_dataset(
    name,
    cache=True,
    cache_dir=None,
    *,
    engine=None,
    **kws,
):
    """
    Open a dataset from the online repository (requires internet).

    If a local copy is found then always use that to avoid network traffic.

    Available datasets:

    * ``"air_temperature"``: NCEP reanalysis subset
    * ``"air_temperature_gradient"``: NCEP reanalysis subset with approximate x,y gradients
    * ``"basin_mask"``: Dataset with ocean basins marked using integers
    * ``"ASE_ice_velocity"``: MEaSUREs InSAR-Based Ice Velocity of the Amundsen Sea Embayment, Antarctica, Version 1
    * ``"rasm"``: Output of the Regional Arctic System Model (RASM)
    * ``"ROMS_example"``: Regional Ocean Model System (ROMS) output
    * ``"tiny"``: small synthetic dataset with a 1D data variable
    * ``"era5-2mt-2019-03-uk.grib"``: ERA5 temperature data over the UK
    * ``"eraint_uvz"``: data from ERA-Interim reanalysis, monthly averages of upper level data
    * ``"ersstv5"``: NOAA's Extended Reconstructed Sea Surface Temperature monthly averages

    Parameters
    ----------
    name : str
        Name of the file containing the dataset.
        e.g. 'air_temperature'
    cache_dir : path-like, optional
        The directory in which to search for and write cached data.
    cache : bool, optional
        If True, then cache data locally for use on subsequent calls
    **kws : dict, optional
        Passed to xarray.open_dataset

    See Also
    --------
    tutorial.load_dataset
    open_dataset
    load_dataset
    """
    try:
        import pooch
    except ImportError as e:
        raise ImportError(
            "tutorial.open_dataset depends on pooch to download and manage datasets."
            " To proceed please install pooch.") from e

    logger = pooch.get_logger()
    logger.setLevel("WARNING")

    cache_dir = _construct_cache_dir(cache_dir)
    if name in external_urls:
        url = external_urls[name]
    else:
        path = pathlib.Path(name)
        if not path.suffix:
            # process the name
            default_extension = ".nc"
            if engine is None:
                _check_netcdf_engine_installed(name)
            path = path.with_suffix(default_extension)
        elif path.suffix == ".grib":
            if engine is None:
                engine = "cfgrib"

        url = f"{base_url}/raw/{version}/{path.name}"

    # retrieve the file
    filepath = pooch.retrieve(url=url, known_hash=None, path=cache_dir)
    ds = _open_dataset(filepath, engine=engine, **kws)
    if not cache:
        ds = ds.load()
        pathlib.Path(filepath).unlink()

    return ds
Пример #18
0
def open_rasterio(
    name,
    engine=None,
    cache=True,
    cache_dir=None,
    **kws,
):
    """
    Open a rasterio dataset from the online repository (requires internet).

    If a local copy is found then always use that to avoid network traffic.

    Available datasets:

    * ``"RGB.byte"``: TIFF file derived from USGS Landsat 7 ETM imagery.
    * ``"shade"``: TIFF file derived from from USGS SRTM 90 data

    ``RGB.byte`` and ``shade`` are downloaded from the ``rasterio`` repository [1]_.

    Parameters
    ----------
    name : str
        Name of the file containing the dataset.
        e.g. 'RGB.byte'
    cache_dir : path-like, optional
        The directory in which to search for and write cached data.
    cache : bool, optional
        If True, then cache data locally for use on subsequent calls
    **kws : dict, optional
        Passed to xarray.open_rasterio

    See Also
    --------
    xarray.open_rasterio

    References
    ----------
    .. [1] https://github.com/rasterio/rasterio
    """
    try:
        import pooch
    except ImportError as e:
        raise ImportError(
            "tutorial.open_rasterio depends on pooch to download and manage datasets."
            " To proceed please install pooch.") from e

    logger = pooch.get_logger()
    logger.setLevel("WARNING")

    cache_dir = _construct_cache_dir(cache_dir)
    url = external_rasterio_urls.get(name)
    if url is None:
        raise ValueError(f"unknown rasterio dataset: {name}")

    # retrieve the file
    filepath = pooch.retrieve(url=url, known_hash=None, path=cache_dir)
    arr = _open_rasterio(filepath, **kws)
    if not cache:
        arr = arr.load()
        pathlib.Path(filepath).unlink()

    return arr
Пример #19
0
import numpy as np
import pandas as pd
import pooch
import tensorflow as tf
from pooch import retrieve
from rdkit import RDLogger

from alfabet import _model_files_baseurl
from alfabet.drawing import draw_bde

RDLogger.DisableLog('rdApp.*')

model_files = retrieve(
    _model_files_baseurl + 'model.tar.gz',
    known_hash=
    'sha256:f1c2b9436f2d18c76b45d95140e6a08c096250bd5f3e2b412492ca27ab38ad0c',
    processor=pooch.Untar(extract_dir='model'))

model = tf.keras.models.load_model(os.path.dirname(model_files[0]))

bde_dft = pd.read_csv(
    retrieve(
        _model_files_baseurl + 'bonds_for_neighbors.csv.gz',
        known_hash=
        'sha256:d4fb825c42d790d4b2b4bd5dc2d87c844932e2da82992a31d7521ce51395adb1'
    ))


def validate_inputs(inputs: dict) -> (bool, np.array, np.array):
    """ Check the given SMILES to ensure it's present in the model's
Пример #20
0
import pyproj
import pooch
import numpy as np
import xarray as xr
import verde as vd
import boule as bl
import harmonica as hm
import matplotlib.pyplot as plt

print("Harmonica version: {}".format(hm.__version__))

# Fetch gravity data and DEM
data = hm.datasets.fetch_south_africa_gravity()
url = "https://github.com/fatiando/transform21/raw/main/data/bushveld_topography.nc"
fname = pooch.retrieve(url, known_hash=None, fname="bushveld_topography.nc")
topography = xr.load_dataset(fname).bedrock

# Project the dataset coordinates
projection = pyproj.Proj(proj="merc", lat_ts=data.latitude.mean())
easting, northing = projection(data.longitude.values, data.latitude.values)
data = data.assign(easting=easting)
data = data.assign(northing=northing)

# Cut the datasets to a very small region to run the script faster
region_deg = (28, 29, -26, -25)
inside = vd.inside((data.longitude, data.latitude), region_deg)
data = data[inside]
topography = topography.sel(longitude=slice(*region_deg[:2]),
                            latitude=slice(*region_deg[2:]))

# Compute gravity disturbance
Пример #21
0
def data_path(url, path=None, force_update=False, update_path=None, *,
              verbose=None):
    """Get path to local copy of EEGMMI dataset URL.

    This is a low-level function useful for getting a local copy of a
    remote EEGBCI dataset :footcite:`SchalkEtAl2004` which is available at PhysioNet :footcite:`GoldbergerEtAl2000`.

    Parameters
    ----------
    url : str
        The dataset to use.
    path : None | str
        Location of where to look for the EEGBCI data storing location.
        If None, the environment variable or config parameter
        ``MNE_DATASETS_EEGBCI_PATH`` is used. If it doesn't exist, the
        "~/mne_data" directory is used. If the EEGBCI dataset
        is not found under the given path, the data
        will be automatically downloaded to the specified folder.
    force_update : bool
        Force update of the dataset even if a local copy exists.
    update_path : bool | None
        If True, set the MNE_DATASETS_EEGBCI_PATH in mne-python
        config to the given path. If None, the user is prompted.
    %(verbose)s

    Returns
    -------
    path : list of Path
        Local path to the given data file. This path is contained inside a list
        of length one, for compatibility.

    Notes
    -----
    For example, one could do:

        >>> from mne.datasets import eegbci
        >>> url = 'http://www.physionet.org/physiobank/database/eegmmidb/'
        >>> eegbci.data_path(url, os.getenv('HOME') + '/datasets') # doctest:+SKIP

    This would download the given EEGBCI data file to the 'datasets' folder,
    and prompt the user to save the 'datasets' path to the mne-python config,
    if it isn't there already.

    References
    ----------
    .. footbibliography::
    """  # noqa: E501
    import pooch

    key = 'MNE_DATASETS_EEGBCI_PATH'
    name = 'EEGBCI'
    path = _get_path(path, key, name)
    fname = 'MNE-eegbci-data'
    destination = _url_to_local_path(url, op.join(path, fname))
    destinations = [destination]

    # Fetch the file
    if not op.isfile(destination) or force_update:
        if op.isfile(destination):
            os.remove(destination)
        if not op.isdir(op.dirname(destination)):
            os.makedirs(op.dirname(destination))
        pooch.retrieve(
            # URL to one of Pooch's test files
            url=url,
            path=destination,
            fname=fname
        )

    # Offer to update the path
    _do_path_update(path, update_path, key, name)
    destinations = [_mne_path(dest) for dest in destinations]
    return destinations
Пример #22
0
    if not flipped:
        atoms = "{}-{}".format(
            *tuple((bond.GetBeginAtom().GetSymbol(),
                    bond.GetEndAtom().GetSymbol())))
    else:
        atoms = "{}-{}".format(
            *tuple((bond.GetEndAtom().GetSymbol(),
                    bond.GetBeginAtom().GetSymbol())))

    btype = str((bond.GetBondType(),
                 bond.GetIsConjugated()))
    ring = 'R{}'.format(get_ring_size(bond, max_size=6)) if bond.IsInRing() else ''

    return " ".join([atoms, btype, ring]).strip()


preprocessor = nfp.SmilesBondIndexPreprocessor(
    atom_features=atom_featurizer,
    bond_features=bond_featurizer,
    explicit_hs=True,
    output_dtype='int64'
)

preprocessor.from_json(retrieve(
    _model_files_baseurl + 'preprocessor.json',
    known_hash='412d15ca4d0e8b5030e9b497f566566922818ff355b8ee677a91dd23696878ac'))


def get_features(smiles: str, **kwargs) -> dict:
    return preprocessor(smiles, train=False, **kwargs)
Пример #23
0
def _update_sleep_temazepam_records(fname=TEMAZEPAM_SLEEP_RECORDS):
    """Help function to download Physionet's temazepam dataset records."""
    import pooch

    pd = _check_pandas_installed()
    tmp = _TempDir()

    # Download subjects info.
    subjects_fname = op.join(tmp, 'ST-subjects.xls')
    pooch.retrieve(url=TEMAZEPAM_RECORDS_URL,
                   known_hash=f"sha1:{TEMAZEPAM_RECORDS_URL_SHA1}",
                   path=tmp,
                   fname=op.basename(subjects_fname))

    # Load and Massage the checksums.
    sha1_df = pd.read_csv(sha1sums_fname,
                          sep='  ',
                          header=None,
                          names=['sha', 'fname'],
                          engine='python')
    select_age_records = (sha1_df.fname.str.startswith('ST')
                          & sha1_df.fname.str.endswith('edf'))
    sha1_df = sha1_df[select_age_records]
    sha1_df['id'] = [name[:6] for name in sha1_df.fname]

    # Load and massage the data.
    data = pd.read_excel(subjects_fname, header=[0, 1])
    data = data.set_index(('Subject - age - sex', 'Nr'))
    data.index.name = 'subject'
    data.columns.names = [None, None]
    data = (data.set_index([('Subject - age - sex', 'Age'),
                            ('Subject - age - sex', 'M1/F2')],
                           append=True).stack(level=0).reset_index())

    data = data.rename(
        columns={
            ('Subject - age - sex', 'Age'): 'age',
            ('Subject - age - sex', 'M1/F2'): 'sex',
            'level_3': 'drug'
        })
    data['id'] = [
        'ST7{:02d}{:1d}'.format(s, n)
        for s, n in zip(data.subject, data['night nr'])
    ]

    data = pd.merge(sha1_df, data, how='outer', on='id')
    data['record type'] = (data.fname.str.split('-', expand=True)[1].str.split(
        '.', expand=True)[0].astype('category'))

    data = data.set_index([
        'id', 'subject', 'age', 'sex', 'drug', 'lights off', 'night nr',
        'record type'
    ]).unstack()
    data.columns = [l1 + '_' + l2 for l1, l2 in data.columns]
    data = data.reset_index().drop(columns=['id'])

    data['sex'] = (data.sex.astype('category').cat.rename_categories({
        1:
        'male',
        2:
        'female'
    }))

    data['drug'] = data['drug'].str.split(expand=True)[0]
    data['subject_orig'] = data['subject']
    data['subject'] = data.index // 2  # to make sure index is from 0 to 21

    # Save the data.
    data.to_csv(fname, index=False)
Пример #24
0
def test_constants(tmp_path):
    """Test compensation."""
    tmp_path = str(tmp_path)  # old pytest...
    fname = 'fiff.zip'
    dest = op.join(tmp_path, fname)
    pooch.retrieve(url='https://codeload.github.com/'
                   f'{REPO}/fiff-constants/zip/{COMMIT}',
                   path=tmp_path,
                   fname=fname,
                   known_hash=None)
    names = list()
    with zipfile.ZipFile(dest, 'r') as ff:
        for name in ff.namelist():
            if 'Dictionary' in name:
                ff.extract(name, tmp_path)
                names.append(op.basename(name))
                shutil.move(op.join(tmp_path, name),
                            op.join(tmp_path, names[-1]))
    names = sorted(names)
    assert names == [
        'DictionaryIOD.txt', 'DictionaryIOD_MNE.txt',
        'DictionaryStructures.txt', 'DictionaryTags.txt',
        'DictionaryTags_MNE.txt', 'DictionaryTypes.txt',
        'DictionaryTypes_MNE.txt'
    ]
    # IOD (MEGIN and MNE)
    fif = dict(iod=dict(), tags=dict(), types=dict(), defines=dict())
    con = dict(iod=dict(), tags=dict(), types=dict(), defines=dict())
    fiff_version = None
    for name in ['DictionaryIOD.txt', 'DictionaryIOD_MNE.txt']:
        with open(op.join(tmp_path, name), 'rb') as fid:
            for line in fid:
                line = line.decode('latin1').strip()
                if line.startswith('# Packing revision'):
                    assert fiff_version is None
                    fiff_version = line.split()[-1]
                if (line.startswith('#') or line.startswith('alias')
                        or len(line) == 0):
                    continue
                line = line.split('"')
                assert len(line) in (1, 2, 3)
                desc = '' if len(line) == 1 else line[1]
                line = line[0].split()
                assert len(line) in (2, 3)
                if len(line) == 2:
                    kind, id_ = line
                else:
                    kind, id_, tagged = line
                    assert tagged in ('tagged', )
                id_ = int(id_)
                if id_ not in iod_dups:
                    assert id_ not in fif['iod']
                fif['iod'][id_] = [kind, desc]
    # Tags (MEGIN)
    with open(op.join(tmp_path, 'DictionaryTags.txt'), 'rb') as fid:
        for line in fid:
            line = line.decode('ISO-8859-1').strip()
            if (line.startswith('#') or line.startswith('alias')
                    or line.startswith(':') or len(line) == 0):
                continue
            line = line.split('"')
            assert len(line) in (1, 2, 3), line
            desc = '' if len(line) == 1 else line[1]
            line = line[0].split()
            assert len(line) == 4, line
            kind, id_, dtype, unit = line
            id_ = int(id_)
            val = [kind, dtype, unit]
            assert id_ not in fif['tags'], (fif['tags'].get(id_), val)
            fif['tags'][id_] = val
    # Tags (MNE)
    with open(op.join(tmp_path, 'DictionaryTags_MNE.txt'), 'rb') as fid:
        for li, line in enumerate(fid):
            line = line.decode('ISO-8859-1').strip()
            # ignore continuation lines (*)
            if (line.startswith('#') or line.startswith('alias')
                    or line.startswith(':') or line.startswith('*')
                    or len(line) == 0):
                continue
            # weird syntax around line 80:
            if line in ('/*', '"'):
                continue
            line = line.split('"')
            assert len(line) in (1, 2, 3), line
            if len(line) == 3 and len(line[2]) > 0:
                l2 = line[2].strip()
                assert l2.startswith('/*') and l2.endswith('*/'), l2
            desc = '' if len(line) == 1 else line[1]
            line = line[0].split()
            assert len(line) == 3, (li + 1, line)
            kind, id_, dtype = line
            unit = '-'
            id_ = int(id_)
            val = [kind, dtype, unit]
            if id_ not in tag_dups:
                assert id_ not in fif['tags'], (fif['tags'].get(id_), val)
            fif['tags'][id_] = val

    # Types and enums
    in_ = None
    re_prim = re.compile(r'^primitive\((.*)\)\s*(\S*)\s*"(.*)"$')
    re_enum = re.compile(r'^enum\((\S*)\)\s*".*"$')
    re_enum_entry = re.compile(r'\s*(\S*)\s*(\S*)\s*"(.*)"$')
    re_defi = re.compile(r'#define\s*(\S*)\s*(\S*)\s*"(.*)"$')
    used_enums = list()
    for extra in ('', '_MNE'):
        with open(op.join(tmp_path, 'DictionaryTypes%s.txt' % (extra, )),
                  'rb') as fid:
            for li, line in enumerate(fid):
                line = line.decode('ISO-8859-1').strip()
                if in_ is None:
                    p = re_prim.match(line)
                    e = re_enum.match(line)
                    d = re_defi.match(line)
                    if p is not None:
                        t, s, d = p.groups()
                        s = int(s)
                        assert s not in fif['types']
                        fif['types'][s] = [t, d]
                    elif e is not None:
                        # entering an enum
                        this_enum = e.group(1)
                        if this_enum not in fif:
                            used_enums.append(this_enum)
                            fif[this_enum] = dict()
                            con[this_enum] = dict()
                        in_ = fif[this_enum]
                    elif d is not None:
                        t, s, d = d.groups()
                        s = int(s)
                        fif['defines'][t] = [s, d]
                    else:
                        assert not line.startswith('enum(')
                else:  # in an enum
                    if line == '{':
                        continue
                    elif line == '}':
                        in_ = None
                        continue
                    t, s, d = re_enum_entry.match(line).groups()
                    s = int(s)
                    if t != 'ecg' and s != 3:  # ecg defined the same way
                        assert s not in in_
                    in_[s] = [t, d]

    #
    # Assertions
    #

    # Version
    mne_version = '%d.%d' % (FIFF.FIFFC_MAJOR_VERSION,
                             FIFF.FIFFC_MINOR_VERSION)
    assert fiff_version == mne_version
    unknowns = list()

    # Assert that all our constants are in the FIF def
    assert 'FIFFV_SSS_JOB_NOTHING' in dir(FIFF)
    for name in sorted(dir(FIFF)):
        if name.startswith('_') or name in _dir_ignore_names:
            continue
        check = None
        val = getattr(FIFF, name)
        if name in fif['defines']:
            assert fif['defines'][name][0] == val
        elif name.startswith('FIFFC_'):
            # Checked above
            assert name in ('FIFFC_MAJOR_VERSION', 'FIFFC_MINOR_VERSION',
                            'FIFFC_VERSION')
        elif name.startswith('FIFFB_'):
            check = 'iod'
        elif name.startswith('FIFFT_'):
            check = 'types'
        elif name.startswith('FIFFV_'):
            if name.startswith('FIFFV_MNE_') and name.endswith('_ORI'):
                check = 'mne_ori'
            elif name.startswith('FIFFV_MNE_') and name.endswith('_COV'):
                check = 'covariance_type'
            elif name.startswith('FIFFV_MNE_COORD'):
                check = 'coord'  # weird wrapper
            elif name.endswith('_CH') or '_QUAT_' in name or name in \
                    ('FIFFV_DIPOLE_WAVE', 'FIFFV_GOODNESS_FIT',
                     'FIFFV_HPI_ERR', 'FIFFV_HPI_G', 'FIFFV_HPI_MOV'):
                check = 'ch_type'
            elif name.startswith('FIFFV_SUBJ_'):
                check = name.split('_')[2].lower()
            elif name in ('FIFFV_POINT_LPA', 'FIFFV_POINT_NASION',
                          'FIFFV_POINT_RPA', 'FIFFV_POINT_INION'):
                check = 'cardinal_point'
            else:
                for check in used_enums:
                    if name.startswith('FIFFV_' + check.upper()):
                        break
                else:
                    if name not in _tag_ignore_names:
                        raise RuntimeError('Could not find %s' % (name, ))
            assert check in used_enums, name
            if 'SSS' in check:
                raise RuntimeError
        elif name.startswith('FIFF_UNIT'):  # units and multipliers
            check = name.split('_')[1].lower()
        elif name.startswith('FIFF_'):
            check = 'tags'
        else:
            unknowns.append((name, val))
        if check is not None and name not in _tag_ignore_names:
            assert val in fif[check], '%s: %s, %s' % (check, val, name)
            if val in con[check]:
                msg = "%s='%s'  ?" % (name, con[check][val])
                assert _aliases.get(name) == con[check][val], msg
            else:
                con[check][val] = name
    unknowns = '\n\t'.join('%s (%s)' % u for u in unknowns)
    assert len(unknowns) == 0, 'Unknown types\n\t%s' % unknowns

    # Assert that all the FIF defs are in our constants
    assert set(fif.keys()) == set(con.keys())
    for key in sorted(set(fif.keys()) - {'defines'}):
        this_fif, this_con = fif[key], con[key]
        assert len(set(this_fif.keys())) == len(this_fif)
        assert len(set(this_con.keys())) == len(this_con)
        missing_from_con = sorted(set(this_con.keys()) - set(this_fif.keys()))
        assert missing_from_con == [], key
        if key not in _ignore_incomplete_enums:
            missing_from_fif = sorted(
                set(this_fif.keys()) - set(this_con.keys()))
            assert missing_from_fif == [], key

    # Assert that `coil_def.dat` has accurate descriptions of all enum(coil)
    coil_def = _read_coil_defs()
    coil_desc = np.array([c['desc'] for c in coil_def])
    coil_def = np.array([(c['coil_type'], c['accuracy']) for c in coil_def],
                        int)
    mask = (coil_def[:, 1] == FWD.COIL_ACCURACY_ACCURATE)
    coil_def = coil_def[mask, 0]
    coil_desc = coil_desc[mask]
    bad_list = []
    for key in fif['coil']:
        if key not in _missing_coil_def and key not in coil_def:
            bad_list.append(('    %s,' % key).ljust(10) + '  # ' +
                            fif['coil'][key][1])
    assert len(bad_list) == 0, \
        '\nIn fiff-constants, missing from coil_def:\n' + '\n'.join(bad_list)
    # Assert that enum(coil) has all `coil_def.dat` entries
    for key, desc in zip(coil_def, coil_desc):
        if key not in fif['coil']:
            bad_list.append(('    %s,' % key).ljust(10) + '  # ' + desc)
    assert len(bad_list) == 0, \
        'In coil_def, missing  from fiff-constants:\n' + '\n'.join(bad_list)
Пример #25
0
def default_absorbers(
    Tatm,
    ozone_file='apeozone_cam3_5_54.nc',
    verbose=True,
):
    '''Initialize a dictionary of well-mixed radiatively active gases
    All values are volumetric mixing ratios.

    Ozone is set to a climatology.

    All other gases are assumed well-mixed:

        - CO2
        - CH4
        - N2O
        - O2
        - CFC11
        - CFC12
        - CFC22
        - CCL4

    Specific values are based on the AquaPlanet Experiment protocols,
    except for O2 which is set the realistic value 0.21
    (affects the RRTMG scheme).
    '''
    absorber_vmr = {}
    absorber_vmr['CO2'] = 348. / 1E6
    absorber_vmr['CH4'] = 1650. / 1E9
    absorber_vmr['N2O'] = 306. / 1E9
    absorber_vmr['O2'] = 0.21
    absorber_vmr['CFC11'] = 0.
    absorber_vmr['CFC12'] = 0.
    absorber_vmr['CFC22'] = 0.
    absorber_vmr['CCL4'] = 0.

    # Ozone: start with all zeros, interpolate to data if we can
    xTatm = Tatm.to_xarray()
    O3 = 0. * xTatm
    if ozone_file is not None:
        ozonepath_http = _datapath_http + 'ozone/' + ozone_file
        ozonefilehandle = pooch.retrieve(
            url=ozonepath_http,
            known_hash=
            "bc659bfa129fafa4ed9368bb19278ae15724a5a66599affd317c143ba511ff84")
        ozonedata = xr.open_dataset(ozonefilehandle)
        ##  zonal and time average
        ozone_zon = ozonedata.OZONE.mean(dim=('time',
                                              'lon')).transpose('lat', 'lev')
        if ('lat' in xTatm.dims):
            O3source = ozone_zon
        else:
            weight = np.cos(np.deg2rad(ozonedata.lat))
            ozone_global = (ozone_zon *
                            weight).mean(dim='lat') / weight.mean(dim='lat')
            O3source = ozone_global
        try:
            O3 = O3source.interp_like(xTatm)
            # There will be NaNs for gridpoints outside the ozone file domain
            assert not np.any(np.isnan(O3))
        except:
            warnings.warn(
                'Some grid points are beyond the bounds of the ozone file. Ozone values will be extrapolated.'
            )
            try:
                # passing fill_value='extrapolate' to the underlying scipy interpolator
                # will result in extrapolation instead of NaNs
                O3 = O3source.interp_like(xTatm,
                                          kwargs={'fill_value': 'extrapolate'})
                assert not np.any(np.isnan(O3))
            except:
                warnings.warn(
                    'Interpolation of ozone data failed. Setting O3 to zero instead.'
                )
                O3 = 0. * xTatm
    absorber_vmr['O3'] = O3.values
    return absorber_vmr
Пример #26
0
def open_dataset(
    name,
    engine=None,
    cache=True,
    cache_dir=None,
    **kws,
):
    """
    Open a dataset from the online repository (requires internet).

    If a local copy is found then always use that to avoid network traffic.

    Parameters
    ----------
    name : str
        Name of the file containing the dataset.
        e.g. 'air_temperature'
    engine : str, optional
        The engine to use.
    cache_dir : path-like, optional
        The directory in which to search for and write cached data.
    cache : bool, optional
        If True, then cache data locally for use on subsequent calls
    kws : dict, optional
        Passed to xarray.open_dataset

    Notes
    -----
    Available datasets:

    * ``"air_temperature"``
    * ``"rasm"``
    * ``"ROMS_example"``
    * ``"tiny"``
    * ``"era5-2mt-2019-03-uk.grib"``
    * ``"RGB.byte"``: example rasterio file from https://github.com/mapbox/rasterio

    See Also
    --------
    xarray.open_dataset
    """
    try:
        import pooch
    except ImportError:
        raise ImportError("using the tutorial data requires pooch")

    if isinstance(cache_dir, pathlib.Path):
        cache_dir = os.fspath(cache_dir)
    elif cache_dir is None:
        cache_dir = pooch.os_cache(_default_cache_dir_name)

    if name in external_urls:
        engine_, url = external_urls[name]
        if engine is None:
            engine = engine_
    else:
        # process the name
        default_extension = ".nc"
        path = pathlib.Path(name)
        if not path.suffix:
            path = path.with_suffix(default_extension)

        url = f"{base_url}/raw/{version}/{path.name}"

    _open = overrides.get(engine, _open_dataset)
    # retrieve the file
    filepath = pooch.retrieve(url=url, known_hash=None, path=cache_dir)
    ds = _open(filepath, engine=engine, **kws)
    if not cache:
        ds = ds.load()
        pathlib.Path(filepath).unlink()

    return ds
Пример #27
0
def open_dataset(
    name,
    cache=True,
    cache_dir=None,
    **kws,
):
    """
    Open a dataset from the online repository (requires internet).

    If a local copy is found then always use that to avoid network traffic.

    Available datasets:

    * ``"air_temperature"``: NCEP reanalysis subset
    * ``"rasm"``: Output of the Regional Arctic System Model (RASM)
    * ``"ROMS_example"``: Regional Ocean Model System (ROMS) output
    * ``"tiny"``: small synthetic dataset with a 1D data variable
    * ``"era5-2mt-2019-03-uk.grib"``: ERA5 temperature data over the UK
    * ``"eraint_uvz"``: data from ERA-Interim reanalysis, monthly averages of upper level data

    Parameters
    ----------
    name : str
        Name of the file containing the dataset.
        e.g. 'air_temperature'
    cache_dir : path-like, optional
        The directory in which to search for and write cached data.
    cache : bool, optional
        If True, then cache data locally for use on subsequent calls
    **kws : dict, optional
        Passed to xarray.open_dataset

    See Also
    --------
    xarray.open_dataset
    """
    try:
        import pooch
    except ImportError:
        raise ImportError("using the tutorial data requires pooch")

    logger = pooch.get_logger()
    logger.setLevel("WARNING")

    cache_dir = _construct_cache_dir(cache_dir)
    if name in external_urls:
        url = external_urls[name]
    else:
        # process the name
        default_extension = ".nc"
        path = pathlib.Path(name)
        if not path.suffix:
            path = path.with_suffix(default_extension)

        url = f"{base_url}/raw/{version}/{path.name}"

    # retrieve the file
    filepath = pooch.retrieve(url=url, known_hash=None, path=cache_dir)
    ds = _open_dataset(filepath, **kws)
    if not cache:
        ds = ds.load()
        pathlib.Path(filepath).unlink()

    return ds
Пример #28
0
def fetch_hcp_mmp_parcellation(subjects_dir=None,
                               combine=True,
                               *,
                               accept=False,
                               verbose=None):
    """Fetch the HCP-MMP parcellation.

    This will download and install the HCP-MMP parcellation
    :footcite:`GlasserEtAl2016` files for FreeSurfer's fsaverage
    :footcite:`Mills2016` to the specified directory.

    Parameters
    ----------
    subjects_dir : str | None
        The subjects directory to use. The file will be placed in
        ``subjects_dir + '/fsaverage/label'``.
    combine : bool
        If True, also produce the combined/reduced set of 23 labels per
        hemisphere as ``HCPMMP1_combined.annot``
        :footcite:`GlasserEtAl2016supp`.
    %(accept)s
    %(verbose)s

    Notes
    -----
    Use of this parcellation is subject to terms of use on the
    `HCP-MMP webpage <https://balsa.wustl.edu/WN56>`_.

    References
    ----------
    .. footbibliography::
    """
    import pooch

    subjects_dir = get_subjects_dir(subjects_dir, raise_error=True)
    destination = op.join(subjects_dir, 'fsaverage', 'label')
    fnames = [
        op.join(destination, '%s.HCPMMP1.annot' % hemi)
        for hemi in ('lh', 'rh')
    ]
    urls = dict(lh='https://ndownloader.figshare.com/files/5528816',
                rh='https://ndownloader.figshare.com/files/5528819')
    hashes = dict(lh='46a102b59b2fb1bb4bd62d51bf02e975',
                  rh='75e96b331940227bbcb07c1c791c2463')
    if not all(op.isfile(fname) for fname in fnames):
        if accept or '--accept-hcpmmp-license' in sys.argv:
            answer = 'y'
        else:
            answer = _safe_input('%s\nAgree (y/[n])? ' % _hcp_mmp_license_text)
        if answer.lower() != 'y':
            raise RuntimeError('You must agree to the license to use this '
                               'dataset')
    for hemi, fpath in zip(('lh', 'rh'), fnames):
        if not op.isfile(fpath):
            fname = op.basename(fpath)
            pooch.retrieve(url=urls[hemi],
                           known_hash=f"md5:{hashes[hemi]}",
                           path=destination,
                           fname=fname)

    if combine:
        fnames = [
            op.join(destination, '%s.HCPMMP1_combined.annot' % hemi)
            for hemi in ('lh', 'rh')
        ]
        if all(op.isfile(fname) for fname in fnames):
            return
        # otherwise, let's make them
        logger.info('Creating combined labels')
        groups = OrderedDict([
            ('Primary Visual Cortex (V1)', ('V1', )),
            ('Early Visual Cortex', ('V2', 'V3', 'V4')),
            ('Dorsal Stream Visual Cortex', ('V3A', 'V3B', 'V6', 'V6A', 'V7',
                                             'IPS1')),
            ('Ventral Stream Visual Cortex', ('V8', 'VVC', 'PIT', 'FFC',
                                              'VMV1', 'VMV2', 'VMV3')),
            ('MT+ Complex and Neighboring Visual Areas',
             ('V3CD', 'LO1', 'LO2', 'LO3', 'V4t', 'FST', 'MT', 'MST', 'PH')),
            ('Somatosensory and Motor Cortex', ('4', '3a', '3b', '1', '2')),
            ('Paracentral Lobular and Mid Cingulate Cortex', (
                '24dd',
                '24dv',
                '6mp',
                '6ma',
                'SCEF',
                '5m',
                '5L',
                '5mv',
            )),
            ('Premotor Cortex', ('55b', '6d', '6a', 'FEF', '6v', '6r', 'PEF')),
            ('Posterior Opercular Cortex', ('43', 'FOP1', 'OP4', 'OP1',
                                            'OP2-3', 'PFcm')),
            ('Early Auditory Cortex', ('A1', 'LBelt', 'MBelt', 'PBelt', 'RI')),
            ('Auditory Association Cortex', (
                'A4',
                'A5',
                'STSdp',
                'STSda',
                'STSvp',
                'STSva',
                'STGa',
                'TA2',
            )),
            ('Insular and Frontal Opercular Cortex',
             ('52', 'PI', 'Ig', 'PoI1', 'PoI2', 'FOP2', 'FOP3', 'MI', 'AVI',
              'AAIC', 'Pir', 'FOP4', 'FOP5')),
            ('Medial Temporal Cortex', (
                'H',
                'PreS',
                'EC',
                'PeEc',
                'PHA1',
                'PHA2',
                'PHA3',
            )),
            ('Lateral Temporal Cortex', (
                'PHT',
                'TE1p',
                'TE1m',
                'TE1a',
                'TE2p',
                'TE2a',
                'TGv',
                'TGd',
                'TF',
            )),
            ('Temporo-Parieto-Occipital Junction', (
                'TPOJ1',
                'TPOJ2',
                'TPOJ3',
                'STV',
                'PSL',
            )),
            ('Superior Parietal Cortex', (
                'LIPv',
                'LIPd',
                'VIP',
                'AIP',
                'MIP',
                '7PC',
                '7AL',
                '7Am',
                '7PL',
                '7Pm',
            )),
            ('Inferior Parietal Cortex', (
                'PGp',
                'PGs',
                'PGi',
                'PFm',
                'PF',
                'PFt',
                'PFop',
                'IP0',
                'IP1',
                'IP2',
            )),
            ('Posterior Cingulate Cortex', (
                'DVT',
                'ProS',
                'POS1',
                'POS2',
                'RSC',
                'v23ab',
                'd23ab',
                '31pv',
                '31pd',
                '31a',
                '23d',
                '23c',
                'PCV',
                '7m',
            )),
            ('Anterior Cingulate and Medial Prefrontal Cortex', (
                '33pr',
                'p24pr',
                'a24pr',
                'p24',
                'a24',
                'p32pr',
                'a32pr',
                'd32',
                'p32',
                's32',
                '8BM',
                '9m',
                '10v',
                '10r',
                '25',
            )),
            ('Orbital and Polar Frontal Cortex', (
                '47s',
                '47m',
                'a47r',
                '11l',
                '13l',
                'a10p',
                'p10p',
                '10pp',
                '10d',
                'OFC',
                'pOFC',
            )),
            ('Inferior Frontal Cortex', (
                '44',
                '45',
                'IFJp',
                'IFJa',
                'IFSp',
                'IFSa',
                '47l',
                'p47r',
            )),
            ('DorsoLateral Prefrontal Cortex', (
                '8C',
                '8Av',
                'i6-8',
                's6-8',
                'SFL',
                '8BL',
                '9p',
                '9a',
                '8Ad',
                'p9-46v',
                'a9-46v',
                '46',
                '9-46d',
            )), ('???', ('???', ))
        ])
        assert len(groups) == 23
        labels_out = list()

        for hemi in ('lh', 'rh'):
            labels = read_labels_from_annot('fsaverage',
                                            'HCPMMP1',
                                            hemi=hemi,
                                            subjects_dir=subjects_dir,
                                            sort=False)
            label_names = [
                '???'
                if label.name.startswith('???') else label.name.split('_')[1]
                for label in labels
            ]
            used = np.zeros(len(labels), bool)
            for key, want in groups.items():
                assert '\t' not in key
                these_labels = [
                    li for li, label_name in enumerate(label_names)
                    if label_name in want
                ]
                assert not used[these_labels].any()
                assert len(these_labels) == len(want)
                used[these_labels] = True
                these_labels = [labels[li] for li in these_labels]
                # take a weighted average to get the color
                # (here color == task activation)
                w = np.array([len(label.vertices) for label in these_labels])
                w = w / float(w.sum())
                color = np.dot(w, [label.color for label in these_labels])
                these_labels = sum(these_labels,
                                   Label([], subject='fsaverage', hemi=hemi))
                these_labels.name = key
                these_labels.color = color
                labels_out.append(these_labels)
            assert used.all()
        assert len(labels_out) == 46
        for hemi, side in (('lh', 'left'), ('rh', 'right')):
            table_name = './%s.fsaverage164.label.gii' % (side, )
            write_labels_to_annot(labels_out,
                                  'fsaverage',
                                  'HCPMMP1_combined',
                                  hemi=hemi,
                                  subjects_dir=subjects_dir,
                                  sort=False,
                                  table_name=table_name)
Пример #29
0
def test_load_model_compressed_remote_fail():
    with pytest.raises(Exception):
        model_file = pooch.retrieve(url="https://nowhere.zip", known_hash=None)

        geo_model = gp.load_model(name='error', path=model_file)
Пример #30
0
def read_noaa_mbl_url(noaa_mbl_url, dest):
    """Downloads url and reads in the MBL surface file

    Args:
        noaa_mbl_url (str): the address for the noaa surface file
        dest (str): the destination to which the raw file will be saved

    Returns:
        pd.Series: multindexed series of xCO2 with (time, lat) as coords.
    """
    import re

    from pathlib import Path

    import numpy as np
    import pandas as pd
    import pooch

    # save to temporary location with pooch
    print(
        f"[SeaFlux] Downloading {noaa_mbl_url} to {dest} and reading in as pd.DataFrame"
    )

    dest = Path(dest)
    fname = pooch.retrieve(
        url=noaa_mbl_url,
        known_hash=None,
        path=str(dest.parent),
        fname=str(dest.name),
    )

    # find start line
    is_mbl_surface = False
    for start_line, line in enumerate(open(fname)):
        if re.findall("MBL.*SURFACE", line):
            is_mbl_surface = True
        if not line.startswith("#"):
            break
    if not is_mbl_surface:
        raise Exception(
            "The file at the provided url is not an MBL SURFACE file. "
            "Please check that you have provided the surface url. "
        )

    # read fixed width file CO2
    df = pd.read_fwf(fname, skiprows=start_line, header=None, index_col=0)
    df.index.name = "date"
    # every second line is uncertainty
    df = df.iloc[:, ::2]
    # latitude is given as sin(lat)
    df.columns = np.rad2deg(np.arcsin(np.linspace(-1, 1, 41)))

    # resolve time properly
    year = (df.index.values - (df.index.values % 1)).astype(int)
    day_of_year = ((df.index.values - year) * 365 + 1).astype(int)
    date_strings = ["{}-{:03d}".format(*a) for a in zip(year, day_of_year)]
    date = pd.to_datetime(date_strings, format="%Y-%j")
    df = df.set_index(date)
    df = df.iloc[:-1]  # remove the last value that is for 2020-01-01

    # renaming indexes (have to stack for that)
    df = df.stack()
    index = df.index.set_names(["time", "lat"])
    df = df.set_axis(index)

    df.source = noaa_mbl_url

    return df