Exemplo n.º 1
0
def prepare_collect(global_config=None, verbose=False):
    # --------------
    # --- CONFIG ---
    # --------------

    nv_path = Path(global_config["nv_path"])
    nv_path.mkdir(exist_ok=True)
    meta_path = Path(global_config["meta_path"])
    meta_path.mkdir(exist_ok=True)
    cache_path = Path(global_config["cache_path"])
    cache_path.mkdir(exist_ok=True)

    colls_file = str(meta_path / "colls.csv")
    fmris_file = str(meta_path / "fmris.csv")

    hcp_file = global_config["collect"]["hcp_tags"]
    download_mode = global_config["collect"]["download_mode"]

    if verbose:
        source = "disk" if download_mode == "offline" else "NeuroVault"
        print(f" > Fetch fMRIs from {source}")

    neurovault = fetch_neurovault(
        max_images=None,
        collection_terms={},
        image_terms={},
        data_dir=str(nv_path),
        mode=download_mode,
        verbose=2,
    )

    if verbose:
        print(" > Load NeuroVault metadata and images")

    neurovault_collections = load_colls(neurovault, verbose)
    neurovault_fmris = load_fmris(neurovault)

    # Removal of Neurovault's first and latest IBC version (to match Romuald's original data):
    IBC_DUPLICATES_TO_REMOVE = [2138, 4438] # 6618
    fmris = neurovault_fmris.loc[lambda df: ~df.collection_id.isin(IBC_DUPLICATES_TO_REMOVE)]
    colls = neurovault_collections.drop(IBC_DUPLICATES_TO_REMOVE, axis=0)

    if verbose:
        print(" > Adding tags from HCP")

    fmris = add_hcp_tags(fmris, hcp_file)

    if verbose:
        print(">>> Data collection OK, {} fMRIs from Neurovault, {} collections"
              .format(len(fmris), len(colls)))

    # TODO: Add DiFuMo atlas fetching when Nilearn 0.7.1 is published

    # Final dumps
    colls.to_csv(colls_file, header=True)
    fmris.to_csv(fmris_file, header=True)
Exemplo n.º 2
0
def load_data(verbose=0, image_terms=None):
    """Fetch neurovault data, filtering out non MNI images.
    """
    if image_terms is None:
        image_terms = {"not_mni": False}
    neurovault_data = datasets.fetch_neurovault(max_images=None,
                                                mode="offline",
                                                verbose=verbose,
                                                image_terms=image_terms)
    return neurovault_data
def fetch_nv_collection(contrast):
    nv_data = fetch_neurovault(
    max_images=788,
    collection_id=4337,
    mode='overwrite',
    data_dir='/storage/store/data/HCP900/hcplang',
    cognitive_paradigm_cogatlas=neurovault.Contains(
        'language processing fMRI task paradigm'),
    contrast_definition=neurovault.Contains(contrast),
    map_type='Z map',
    task='LANGUAGE')

    print([meta['id'] for meta in nv_data['images_meta']])
Exemplo n.º 4
0
def fetch_nv(repo, nv_file,
             download=False,
             verbose=False):
    """
    Loads neurovault into memory, either downloading it from the web-API or
    loading it from the disk.

    :param repo: str
        Path where the data is downloaded.
    :param nv_file: str
        Pickle file where the full data is saved
        (for faster loading than the fetch_neurovault).
    :param download: bool, default=False
        If True: the data is downloaded from the web-API.
    :param verbose: bool, default=False
        Activate verbose mode.

    :return: Bunch
        A dict-like object containing the data from fMRIs fetched from
        Neurovault.
    """

    # Download and save to disk or load from disk
    if download:
        if verbose:
            print("...Download from Neurovault API...")

        neurovault = fetch_neurovault(max_images=None,
                                      collection_terms={},
                                      image_terms={},
                                      data_dir=repo,
                                      mode="download_new",
                                      verbose=2)
        with open(nv_file, 'wb') as f:
            pickle.dump(neurovault, f)
    else:
        if verbose:
            print("...Load pre-fetched data from Neurovault...")
        with open(nv_file, 'rb') as f:
            neurovault = pickle.load(f)

    n_fmri_dl = len(neurovault.images)
    if verbose:
        print("  > Number of (down)loaded fmri =", n_fmri_dl)

    return neurovault
Exemplo n.º 5
0
from nilearn.datasets import load_mni152_brain_mask
from nilearn.input_data import NiftiMasker

from nilearn import plotting

######################################################################
# Get image and term data
# -----------------------

# Download images
# Here by default we only download 80 images to save time,
# but for better results I recommend using at least 200.
print("Fetching Neurovault images; "
      "if you haven't downloaded any Neurovault data before "
      "this will take several minutes.")
nv_data = fetch_neurovault(max_images=80, fetch_neurosynth_words=True)

images = nv_data['images']
term_weights = nv_data['word_frequencies']
vocabulary = nv_data['vocabulary']

# Clean and report term scores
term_weights[term_weights < 0] = 0
total_scores = np.mean(term_weights, axis=0)

print("\nTop 10 neurosynth terms from downloaded images:\n")

for term_idx in np.argsort(total_scores)[-10:][::-1]:
    print(vocabulary[term_idx])

######################################################################
import os
import os.path as op

from nilearn.image import resample_to_img
from nilearn import datasets

import nibabel as nib

data_dir = '/home/mainak/Desktop/neurovault/'
base_dir = 'neurovault_resampled'
nv_data = datasets.fetch_neurovault(max_images=None,
                                    mode='offline',
                                    data_dir=data_dir)

images = nv_data['images']

if not op.exists(base_dir):
    os.mkdir(base_dir)

target_img = nib.load(images[0])
for ii, image in enumerate(images):
    collection, name = image.split('/')[-2:]
    fname = op.join(base_dir, collection, name)
    print('Resampling image %d' % ii)
    if op.exists(fname):
        continue

    if not op.exists(op.join(base_dir, collection)):
        os.mkdir(op.join(base_dir, collection))
    img = nib.load(image)
    img = resample_to_img(img, target_img)
Exemplo n.º 7
0
# DiFuMo atlases: https://parietal-inria.github.io/DiFuMo/
# Script to download DiFuMo atlases:
# https://github.com/Parietal-INRIA/DiFuMo/blob/master/notebook/fetcher.py

# Load a file not on the path
fetcher = runpy.run_path('../../fetcher.py')
fetch_difumo = fetcher['fetch_difumo']

####################################################################
# Fetch statistical maps from Neurovault repository
# -------------------------------------------------

collection_terms = {'id': 504}
image_terms = {'not_mni': False}
pain_data = fetch_neurovault(max_images=None,
                             image_terms=image_terms,
                             collection_terms=collection_terms)
n_images = len(pain_data.images)
ref_img = load_img(pain_data.images[0])

input_images = []
y = []
groups = []
for index in range(n_images):
    input_images.append(pain_data.images[index])
    target = pain_data.images_meta[index]['PainLevel']
    subject_id = pain_data.images_meta[index]['SubjectID']
    y.append(target)
    groups.append(subject_id)
y = np.ravel(y)
groups = np.ravel(groups)
Exemplo n.º 8
0
from nilearn.input_data import NiftiMasker

from nilearn import plotting


######################################################################
# Get image and term data
# -----------------------

# Download images
# Here by default we only download 80 images to save time,
# but for better results I recommend using at least 200.
print("Fetching Neurovault images; "
      "if you haven't downloaded any Neurovault data before "
      "this will take several minutes.")
nv_data = fetch_neurovault(max_images=80, fetch_neurosynth_words=True)

images = nv_data['images']
term_weights = nv_data['word_frequencies']
vocabulary = nv_data['vocabulary']

# Clean and report term scores
term_weights[term_weights < 0] = 0
total_scores = np.mean(term_weights, axis=0)

print("\nTop 10 neurosynth terms from downloaded images:\n")

for term_idx in np.argsort(total_scores)[-10:][::-1]:
    print(vocabulary[term_idx])

Exemplo n.º 9
0
from nilearn import datasets

# The code below is to include thresholded images.

# With non thresholded images it is simpler

from nilearn.datasets import neurovault

img_terms = neurovault.basic_image_terms().copy()

img_terms = neurovault.basic_image_terms()

del img_terms['is_thresholded']

d = datasets.fetch_neurovault(max_images=None, image_terms=img_terms)
Exemplo n.º 10
0
def fetch_neurovault(max_images=np.inf, query_server=True, fetch_terms=True,
                     map_types=['F map', 'T map', 'Z map'], collection_ids=tuple(),
                     image_filters=tuple(), sort_images=True):
    """Give meaningful defaults, extra computations."""
    # Set image filters: The filt_dict contains metadata field for the key
    # and the desired entry for each field as the value.
    # Since neurovault metadata are not always filled, it also includes any
    # images with missing values for the any given field.
    filt_dict = {'modality': 'fMRI-BOLD', 'analysis_level': 'group',
                 'is_thresholded': False, 'not_mni': False}

    def make_fun(key, val):
        return lambda img: (img.get(key) or '') in ('', val)
    image_filters = list(image_filters) + [
        lambda img: (img.get('map_type') or '') in map_types
    ]
    image_filters = (image_filters +
                     [make_fun(key, val) for key, val in filt_dict.items()])

    # Also remove bad collections
    bad_collects = [367,   # Single image w/ large uniform area value > 0
                    1003,  # next three collections contain stat maps on
                    1011,  # parcellated brains. Likely causes odd-looking
                    1013,  # ICA component
                    1071,  # Added Oct2016-strange-looking images
                    1889]  # Added Oct2016-extreme vals on edge
    collection_ids = list(collection_ids) + bad_collects

    # Download matching images
    def image_filter(img_metadata):
        if img_metadata.get('collection_id') in collection_ids:
            return False
        for filt in image_filters:
            if not filt(img_metadata):
                return False
        return True
        #    query_server=query_server, map_types=map_types,, **kwargs)

    ss_all = datasets.fetch_neurovault(
        mode='download_new' if query_server else 'offline',
        max_images=max_images, image_filter=image_filter,
        fetch_neurosynth_words=fetch_terms)
    images = ss_all['images_meta']

    # Post-fetcher filtering: remove duplicates, bad images from raw data.
    images = _neurovault_dedupe(images)
    images = _neurovault_remove_bad_images(images)

    # Stamp some collection properties onto images.
    colls = dict([(c['id'], c) for c in ss_all['collections_meta']])
    for image in images:
        image['DOI'] = colls.get(image['collection_id'], {}).get('DOI')

    if not fetch_terms:
        term_scores = None
    else:
        term_scores = ss_all['terms']

        # Clean & report term scores
        terms = np.array(term_scores.keys())
        term_matrix = np.asarray(term_scores.values())
        term_matrix[term_matrix < 0] = 0
        total_scores = np.mean(term_matrix, axis=1)

        print("Top 10 neurosynth terms from downloaded images:")
        for term_idx in np.argsort(total_scores)[-10:][::-1]:
            print('\t%-25s: %.2f' % (terms[term_idx], total_scores[term_idx]))

    if sort_images:
        idx = sorted(
            range(len(images)),
            lambda k1, k2: images[k1]['id'] - images[k2]['id'])
        images = [images[ii] for ii in idx]
        if term_scores:
            term_scores = [term_scores[ti] for ti in idx]
    return images, term_scores
Exemplo n.º 11
0
        means[:, None], maxs[:, None], mins[:, None], stddevs[:, None],
        kurts[:, None]
    ],
                                    axis=1)
    # first step: exclude data with nan features:
    exclude = np.isnan(dummy_features).any(axis=1)
    dummy_features_filtered = dummy_features[~exclude]
    inds_filtered = np.arange(n_samples)[~exclude]
    names_filtered = neurovault_data.images[~exclude]
    clf = OneClassSVM(nu=0.3, kernel="linear")
    clf.fit(dummy_features_filtered)

    # sanity check :we know that the data from this collection is clean
    # because it was uploaded by Bertrand
    clean = datasets.fetch_neurovault(max_images=None,
                                      mode='offline',
                                      image_terms={'collection_id': 656})
    X_bertrand = get_data_by_names(masker, clean.images)
    # the following fails if nu is too high
    np.testing.assert_equal(clf.predict(get_dummy_features(X_bertrand)), 1.)

    pred = clf.predict(dummy_features_filtered)
    # getting back the indices of the outliers in the non filtered data:
    outliers = inds_filtered[pred != 1.]
    print("# of outliers: %d" % np.sum(pred != 1.))

    # X_outliers = get_data_by_indices(neurovault_data, outliers)
    #

    #
    # cut_coords = (-34, -16)