Python get_resource_path примеры, nimare.utils.get_resource_path Python примеры использования

Пример #1

0

Показать файл

Файл: download_test_data.py Проект: puckr/NiMARE

def download_dataset():
    coll = '1425'
    url = 'https://neurovault.org/collections/{0}/download'.format(coll)
    out_dir = op.join(get_resource_path(),
                      'data/neurovault-data/collection-{0}'.format(coll))

    os.makedirs(out_dir, exist_ok=True)

    # Download
    fname = download_file(url)

    # Unzip
    with zipfile.ZipFile(fname, 'r') as zip_ref:
        zip_ref.extractall(out_dir)

    collection_folders = [f for f in glob(op.join(out_dir, '*'))
                          if '.nidm' not in f]
    collection_folders = [f for f in collection_folders if op.isdir(f)]
    if len(collection_folders) > 1:
        raise Exception('More than one folder found: '
                        '{0}'.format(', '.join(collection_folders)))
    else:
        folder = collection_folders[0]
    zip_files = glob(op.join(folder, '*.zip'))
    for zf in zip_files:
        fn = op.splitext(op.basename(zf))[0]
        with zipfile.ZipFile(zf, 'r') as zip_ref:
            zip_ref.extractall(op.join(out_dir, fn))

    os.remove(fname)
    shutil.rmtree(folder)

Пример #2

0

Показать файл

def test_CogAtLemmatizer():
    """A smoke test for CogAtLemmatizer."""
    cogat = extract.download_cognitive_atlas(data_dir=utils.get_resource_path(), overwrite=False)
    id_df = pd.read_csv(cogat["ids"])
    id_df = id_df.loc[id_df["id"] == "trm_4aae62e4ad209"]
    lem = annotate.cogat.CogAtLemmatizer(id_df)
    true_text = "trm_4aae62e4ad209 is great"
    test_text = "Cognitive control is great"
    assert lem.transform(test_text) == true_text

Пример #3

0

Показать файл

def test_cogat(testdata_laird):
    """A smoke test for CogAt-related functions."""
    # A small test dataset with abstracts
    ns_dset_laird = testdata_laird.copy()
    cogat = extract.download_cognitive_atlas(data_dir=utils.get_resource_path(), overwrite=False)
    id_df = pd.read_csv(cogat["ids"])
    rel_df = pd.read_csv(cogat["relationships"])
    weights = {"isKindOf": 1, "isPartOf": 1, "inCategory": 1}
    counts_df, rep_text_df = annotate.cogat.extract_cogat(
        ns_dset_laird.texts, id_df, text_column="abstract"
    )
    assert "id" in ns_dset_laird.texts.columns
    expanded_df = annotate.cogat.expand_counts(counts_df, rel_df, weights)
    assert isinstance(expanded_df, pd.DataFrame)

Пример #4

0

Показать файл

import os

import nibabel as nib
import numpy as np
from nilearn import image, masking, plotting

from nimare import annotate, decode
from nimare.dataset import Dataset
from nimare.utils import get_resource_path

###############################################################################
# Load dataset with abstracts
# -----------------------------------------------------------------------------
# We'll load a small dataset composed only of studies in Neurosynth with
# Angela Laird as a coauthor, for the sake of speed.
dset = Dataset(os.path.join(get_resource_path(), "neurosynth_laird_studies.json"))
dset.texts.head(2)

###############################################################################
# Generate term counts
# -----------------------------------------------------------------------------
# GCLDA uses raw word counts instead of the tf-idf values generated by
# Neurosynth.
counts_df = annotate.text.generate_counts(
    dset.texts,
    text_column="abstract",
    tfidf=False,
    max_df=0.99,
    min_df=0.01,
)
counts_df.head(5)

Пример #5

0

Показать файл

def test_get_resource_path():
    """
    Test nimare.utils.get_resource_path
    """
    print(utils.get_resource_path())
    assert op.isdir(utils.get_resource_path())

Пример #6

0

Показать файл

"""
###############################################################################
# Start with the necessary imports
# -----------------------------------------------------------------------------
import os

from nilearn.plotting import plot_glass_brain

from nimare.dataset import Dataset
from nimare.meta.kernel import Peaks2MapsKernel
from nimare.utils import get_resource_path

###############################################################################
# Load Dataset
# -----------------------------------------------------------------------------
dset_file = os.path.join(get_resource_path(), "nidm_pain_dset.json")
dset = Dataset(dset_file)

###############################################################################
# Run peaks2maps
# -----------------------------------------------------------------------------
k = Peaks2MapsKernel()
imgs = k.transform(dset, return_type="image")

###############################################################################
# Plot modeled activation maps
# -----------------------------------------------------------------------------
for img in imgs:
    display = plot_glass_brain(img,
                               display_mode="lyrz",
                               plot_abs=False,

Пример #7

0

Показать файл

import matplotlib.pyplot as plt
from nilearn.plotting import plot_stat_map

###############################################################################
# Load Sleuth text files into Datasets
# -----------------------------------------------------------------------------
# The data for this example are a subset of studies from a meta-analysis on
# semantic cognition in children :footcite:p:`enge2021meta`.
# A first group of studies probed children's semantic world knowledge
# (e.g., correctly naming an object after hearing its auditory description)
# while a second group of studies asked children to decide if two (or more)
# words were semantically related to one another or not.
from nimare.io import convert_sleuth_to_dataset
from nimare.utils import get_resource_path

knowledge_file = os.path.join(get_resource_path(),
                              "semantic_knowledge_children.txt")
related_file = os.path.join(get_resource_path(),
                            "semantic_relatedness_children.txt")

knowledge_dset = convert_sleuth_to_dataset(knowledge_file)
related_dset = convert_sleuth_to_dataset(related_file)

###############################################################################
# Individual group ALEs
# -----------------------------------------------------------------------------
# Computing separate ALE analyses for each group is not strictly necessary for
# performing the subtraction analysis but will help the experimenter to appreciate the
# similarities and differences between the groups.
from nimare.correct import FWECorrector
from nimare.meta.cbma import ALE

Пример #8

0

Показать файл

===========================
Simple annotation from text
===========================

Perform simple term count or tf-idf value extraction from texts stored in a Dataset.
"""
import os

from nimare import annotate, dataset, utils

###############################################################################
# Load dataset with abstracts
# -----------------------------------------------------------------------------
# We'll load a small dataset composed only of studies in Neurosynth with
# Angela Laird as a coauthor, for the sake of speed.
dset = dataset.Dataset(os.path.join(utils.get_resource_path(), "neurosynth_laird_studies.json"))
dset.texts.head(2)

###############################################################################
# Generate term counts
# -----------------------------------------------------------------------------
# Let's start by extracting terms and their associated counts from article
# abstracts.
counts_df = annotate.text.generate_counts(
    dset.texts,
    text_column="abstract",
    tfidf=False,
    max_df=0.99,
    min_df=0.01,
)
counts_df.head(5)

Пример #9

0

Показать файл

"""
import os

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from nimare import annotate, extract
from nimare.dataset import Dataset
from nimare.utils import get_resource_path

###############################################################################
# Load dataset with abstracts
# -----------------------------------------------------------------------------
dset = Dataset(
    os.path.join(get_resource_path(), "neurosynth_laird_studies.json"))

###############################################################################
# Download Cognitive Atlas
# -----------------------------------------------------------------------------
cogatlas = extract.download_cognitive_atlas(data_dir=get_resource_path(),
                                            overwrite=False)
id_df = pd.read_csv(cogatlas["ids"])
rel_df = pd.read_csv(cogatlas["relationships"])

###############################################################################
# ID DataFrame
id_df.head()

###############################################################################
# Relationships DataFrame

Пример #10

0

Показать файл

Файл: download_test_data.py Проект: puckr/NiMARE

def make_json():
    dset_file = 'nimare/resources/nidm_pain_dset_with_subpeaks_docker.json'

    ddict = {}
    folders = sorted(glob(op.join(
        get_resource_path(),
        'data/neurovault-data/collection-1425/pain_*.nidm')))
    for folder in folders:
        name = op.basename(folder)
        ddict[name] = {}
        ddict[name]['contrasts'] = {}
        ddict[name]['contrasts']['1'] = {}
        ddict[name]['contrasts']['1']['coords'] = {}
        ddict[name]['contrasts']['1']['coords']['space'] = 'MNI'
        ddict[name]['contrasts']['1']['images'] = {}
        ddict[name]['contrasts']['1']['images']['space'] = 'MNI_2mm'
        # con file
        files = glob(op.join(folder, 'Contrast*.nii.gz'))
        files = [f for f in files if 'StandardError' not in op.basename(f)]
        if files:
            f = sorted(files)[0]
        else:
            f = None
        ddict[name]['contrasts']['1']['images']['con'] = f
        # se file
        files = glob(op.join(folder, 'ContrastStandardError*.nii.gz'))
        if files:
            f = sorted(files)[0]
        else:
            f = None
        ddict[name]['contrasts']['1']['images']['se'] = f
        # z file
        files = glob(op.join(folder, 'ZStatistic*.nii.gz'))
        if files:
            f = sorted(files)[0]
        else:
            f = None
        ddict[name]['contrasts']['1']['images']['z'] = f
        # t file
        # z file
        files = glob(op.join(folder, 'TStatistic*.nii.gz'))
        if files:
            f = sorted(files)[0]
        else:
            f = None
        ddict[name]['contrasts']['1']['images']['t'] = f
        # sample size
        f = op.join(folder, 'DesignMatrix.csv')
        if op.isfile(f):
            df = pd.read_csv(f, header=None)
            n = [df.shape[0]]
        else:
            n = None
        ddict[name]['contrasts']['1']['sample_sizes'] = n
        # foci
        files = glob(op.join(folder, 'ExcursionSet*.nii.gz'))
        f = sorted(files)[0]
        img = nib.load(f)
        data = np.nan_to_num(img.get_data())
        # positive clusters
        binarized = np.copy(data)
        binarized[binarized > 0] = 1
        binarized[binarized < 0] = 0
        binarized = binarized.astype(int)
        labeled = ndimage.measurements.label(binarized, np.ones((3, 3, 3)))[0]
        clust_ids = sorted(list(np.unique(labeled)[1:]))

        peak_vals = np.array([np.max(data * (labeled == c)) for c in clust_ids])
        clust_ids = [clust_ids[c] for c in (-peak_vals).argsort()]  # Sort by descending max value

        ijk = []
        for c_id, c_val in enumerate(clust_ids):
            cluster_mask = labeled == c_val
            masked_data = data * cluster_mask

            # Get peaks, subpeaks and associated statistics
            subpeak_ijk, subpeak_vals = _local_max(masked_data, img.affine,
                                                   min_distance=8)

            # Only report peak and, at most, top 3 subpeaks.
            n_subpeaks = np.min((len(subpeak_vals), 4))
            subpeak_ijk = subpeak_ijk[:n_subpeaks, :]
            ijk.append(subpeak_ijk)
        ijk = np.vstack(ijk)
        xyz = nib.affines.apply_affine(img.affine, ijk)
        ddict[name]['contrasts']['1']['coords']['x'] = list(xyz[:, 0])
        ddict[name]['contrasts']['1']['coords']['y'] = list(xyz[:, 1])
        ddict[name]['contrasts']['1']['coords']['z'] = list(xyz[:, 2])

    with open(dset_file, 'w') as fo:
        json.dump(ddict, fo, sort_keys=True, indent=4)

Пример #11

0

Показать файл

def _fetch_database(search_pairs, database_url, out_dir, overwrite=False):
    """Fetch generic database."""
    res_dir = get_resource_path()
    with open(op.join(res_dir, "database_file_manifest.json"), "r") as fo:
        database_file_manifest = json.load(fo)

    out_dir = op.abspath(out_dir)
    os.makedirs(out_dir, exist_ok=True)

    found_databases = []
    found_files = []
    log = True
    for database in database_file_manifest:
        coordinates_file = database["coordinates"]
        metadata_file = database["metadata"]
        if not _find_entities(coordinates_file, search_pairs, log=log):
            log = False
            continue

        log = False

        feature_dicts = database["features"]
        for feature_dict in feature_dicts:
            features_file = feature_dict["features"]
            # Other files associated with features have subset of entities,
            # so unnecessary to search them if we assume that the hard-coded manifest is valid.
            if not _find_entities(features_file, search_pairs):
                continue
            else:
                out_coordinates_file = op.join(out_dir, coordinates_file)
                out_metadata_file = op.join(out_dir, metadata_file)
                out_feature_dict = {k: op.join(out_dir, v) for k, v in feature_dict.items()}

                db_found = [
                    i_db
                    for i_db, db_dct in enumerate(found_databases)
                    if db_dct["coordinates"] == out_coordinates_file
                ]
                if len(db_found):
                    assert len(db_found) == 1

                    found_databases[db_found[0]]["features"].append(out_feature_dict)
                else:
                    found_databases.append(
                        {
                            "coordinates": out_coordinates_file,
                            "metadata": out_metadata_file,
                            "features": [out_feature_dict],
                        }
                    )
                found_files += [coordinates_file, metadata_file, *feature_dict.values()]

    found_files = sorted(list(set(found_files)))
    for found_file in found_files:
        print(f"Downloading {found_file}", flush=True)

        url = op.join(database_url, found_file + "?raw=true")
        out_file = op.join(out_dir, found_file)

        if op.isfile(out_file) and not overwrite:
            print("File exists and overwrite is False. Skipping.")
            continue

        with open(out_file, "wb") as fo:
            u = urlopen(url)

            block_size = 8192
            while True:
                buffer = u.read(block_size)
                if not buffer:
                    break
                fo.write(buffer)

    return found_databases

Пример #12

0

Показать файл

def mni_mask():
    """Load MNI mask for testing."""
    return nib.load(
        os.path.join(get_resource_path(), "templates",
                     "MNI152_2x2x2_brainmask.nii.gz"))

Пример #13

0

Показать файл

def generate_counts(text_df,
                    text_column="abstract",
                    tfidf=True,
                    min_df=50,
                    max_df=0.5):
    """Generate tf-idf weights for unigrams/bigrams derived from textual data.

    Parameters
    ----------
    text_df : (D x 2) :obj:`pandas.DataFrame`
        A DataFrame with two columns ('id' and 'text'). D = document.

    Returns
    -------
    weights_df : (D x T) :obj:`pandas.DataFrame`
        A DataFrame where the index is 'id' and the columns are the
        unigrams/bigrams derived from the data. D = document. T = term.
    """
    if text_column not in text_df.columns:
        raise ValueError(f"Column '{text_column}' not found in DataFrame")

    # Remove rows with empty text cells
    orig_ids = text_df["id"].tolist()
    text_df = text_df.fillna("")
    keep_ids = text_df.loc[text_df[text_column] != "", "id"]
    text_df = text_df.loc[text_df["id"].isin(keep_ids)]

    if len(keep_ids) != len(orig_ids):
        LGR.info(f"Retaining {len(keep_ids)}/{len(orig_ids)} studies")

    ids = text_df["id"].tolist()
    text = text_df[text_column].tolist()
    stoplist = op.join(get_resource_path(), "neurosynth_stoplist.txt")
    with open(stoplist, "r") as fo:
        stop_words = fo.read().splitlines()

    if tfidf:
        vectorizer = TfidfVectorizer(
            min_df=min_df,
            max_df=max_df,
            ngram_range=(1, 2),
            vocabulary=None,
            stop_words=stop_words,
        )
    else:
        vectorizer = CountVectorizer(
            min_df=min_df,
            max_df=max_df,
            ngram_range=(1, 2),
            vocabulary=None,
            stop_words=stop_words,
        )
    weights = vectorizer.fit_transform(text).toarray()

    if hasattr(vectorizer, "get_feature_names_out"):
        # scikit-learn >= 1.0.0
        names = vectorizer.get_feature_names_out()
    else:
        # scikit-learn < 1.0.0
        # To remove when we drop support for 3.6 and increase minimum sklearn version to 1.0.0.
        names = vectorizer.get_feature_names()

    names = [str(name) for name in names]
    weights_df = pd.DataFrame(weights, columns=names, index=ids)
    weights_df.index.name = "id"
    return weights_df

Python get_resource_path примеры использования