def fetch_adhd(n_subjects=40, data_dir=None, url=None, resume=True, modl_data_dir=None, mask_url=None, verbose=1): dataset = nilearn_fetch_adhd(n_subjects=n_subjects, data_dir=data_dir, url=url, resume=resume, verbose=verbose) root_dir = dataset.func[0] tail_dir = '' while tail_dir != 'adhd': root_dir, tail_dir = os.path.split(root_dir) root_dir = os.path.join(root_dir, tail_dir) modl_data_dir = get_data_dirs(modl_data_dir)[0] mask_data_dir = join(modl_data_dir, 'adhd') if mask_url is None: mask_url = 'http://amensch.fr/data/adhd/mask_img.nii.gz' _fetch_file(mask_url, mask_data_dir, resume=resume) mask_img = join(mask_data_dir, 'mask_img.nii.gz') behavioral = pd.DataFrame(dataset.phenotypic) behavioral.loc[:, 'Subject'] = pd.to_numeric(behavioral.loc[:, 'Subject']) behavioral.set_index('Subject', inplace=True) behavioral.index.names = ['subject'] rest = pd.DataFrame(data=list(zip(dataset.func, dataset.confounds)), columns=['filename', 'confounds'], index=behavioral.index) return Bunch(rest=rest, behavioral=behavioral, description=dataset.description, mask=mask_img, root=root_dir)
def fetch_fiac_first_level(data_dir=None, verbose=1): """ Download a first-level fiac fMRI dataset (2 sessions) Parameters ---------- data_dir: string directory where data should be downloaded and unpacked. """ data_dir = _get_dataset_dir('fiac_nistats', data_dir=data_dir, verbose=verbose) def _glob_fiac_data(): """glob data from subject_dir.""" _subject_data = {} subject_dir = os.path.join(data_dir, 'nipy-data-0.2/data/fiac/fiac0') for session in [1, 2]: # glob func data for session session + 1 session_func = os.path.join(subject_dir, 'run%i.nii.gz' % session) if not os.path.isfile(session_func): print('Missing functional scan for session %i.' % session) return None _subject_data['func%i' % session] = session_func # glob design matrix .npz file sess_dmtx = os.path.join(subject_dir, 'run%i_design.npz' % session) if not os.path.isfile(sess_dmtx): print('Missing session file: %s' % sess_dmtx) return None _subject_data['design_matrix%i' % session] = sess_dmtx # glob for mask data mask = os.path.join(subject_dir, 'mask.nii.gz') if not os.path.isfile(mask): print('Missing mask image.') return None _subject_data['mask'] = mask return Bunch(**_subject_data) # maybe data_dir already contains the data ? data = _glob_fiac_data() if data is not None: return data # No. Download the data print('Data absent, downloading...') url = 'http://nipy.sourceforge.net/data-packages/nipy-data-0.2.tar.gz' archive_path = os.path.join(data_dir, os.path.basename(url)) _fetch_file(url, data_dir) try: _uncompress_file(archive_path) except: print('Archive corrupted, trying to download it again.') return fetch_fiac_first_level(data_dir=data_dir) return _glob_fiac_data()
def fetch_fiac_first_level(data_dir=None, verbose=1): """ Download a first-level fiac fMRI dataset (2 sessions) Parameters ---------- data_dir: string directory where data should be downloaded and unpacked. """ data_dir = _get_dataset_dir('fiac_nistats', data_dir=data_dir, verbose=verbose) def _glob_fiac_data(): """glob data from subject_dir.""" _subject_data = {} subject_dir = os.path.join(data_dir, 'nipy-data-0.2/data/fiac/fiac0') for session in [1, 2]: # glob func data for session session_func = os.path.join(subject_dir, 'run%i.nii.gz' % session) if not os.path.isfile(session_func): print('Missing functional scan for session %i.' % session) return None _subject_data['func%i' % session] = session_func # glob design matrix .npz file sess_dmtx = os.path.join(subject_dir, 'run%i_design.npz' % session) if not os.path.isfile(sess_dmtx): print('Missing session file: %s' % sess_dmtx) return None _subject_data['design_matrix%i' % session] = sess_dmtx # glob for mask data mask = os.path.join(subject_dir, 'mask.nii.gz') if not os.path.isfile(mask): print('Missing mask image.') return None _subject_data['mask'] = mask return Bunch(**_subject_data) # maybe data_dir already contains the data ? data = _glob_fiac_data() if data is not None: return data # No. Download the data print('Data absent, downloading...') url = 'http://nipy.sourceforge.net/data-packages/nipy-data-0.2.tar.gz' archive_path = os.path.join(data_dir, os.path.basename(url)) _fetch_file(url, data_dir) try: _uncompress_file(archive_path) except: print('Archive corrupted, trying to download it again.') return fetch_fiac_first_level(data_dir=data_dir) return _glob_fiac_data()
def fetch_hcp_mask(data_dir=None, url=None, resume=True): data_dir = get_data_dirs(data_dir)[0] if not os.path.exists(data_dir): os.makedirs(data_dir) data_dir = join(data_dir, 'parietal') if not os.path.exists(data_dir): os.makedirs(data_dir) if url is None: url = 'http://amensch.fr/data/cogspaces/mask/mask_img.nii.gz' _fetch_file(url, data_dir, resume=resume) return join(data_dir, 'mask_img.nii.gz')
def _download_spm_auditory_data(data_dir, subject_dir, subject_id): print("Data absent, downloading...") url = ("http://www.fil.ion.ucl.ac.uk/spm/download/data/MoAEpilot/" "MoAEpilot.zip") archive_path = os.path.join(subject_dir, os.path.basename(url)) _fetch_file(url, subject_dir) try: _uncompress_file(archive_path) except: print("Archive corrupted, trying to download it again.") return fetch_spm_auditory(data_dir=data_dir, data_name="", subject_id=subject_id)
def fetch_adni(data_dir=None): """Fetch ADNI timeseries data from Open Science Framework (OSF) Parameters ---------- data_dir : string Path where data should be downloaded Returns ------- data_dir : string Path to the downloaded timeseries directory """ if data_dir is None: warnings.warn('Data downloading is requested but data_dir is not ' 'provided. Downloading to the current directory with ' 'folder name ADNI', stacklevel=2) data_dir = './ADNI' url = 'https://osf.io/xhrcs/download' # Download the zip file, first dl_file = _fetch_file(url, data_dir=data_dir) # Second, uncompress the downloaded zip file _uncompress_file(dl_file, verbose=2) return data_dir
def fetch_behavioral_data(data_dir=None, restricted=False, overwrite=False): _, _, username, password = get_credentials(data_dir=data_dir) data_dir = get_data_dirs(data_dir)[0] behavioral_dir = join(data_dir, 'behavioral') if not os.path.exists(behavioral_dir): os.makedirs(behavioral_dir) csv_unrestricted = join(behavioral_dir, 'hcp_unrestricted_data.csv') if not os.path.exists(csv_unrestricted) or overwrite: result = _fetch_file(data_dir=data_dir, url='https://db.humanconnectome.org/REST/' 'search/dict/Subject%20Information/results?' 'format=csv&removeDelimitersFromFieldValues' '=true' '&restricted=0&project=HCP_900', username=username, password=password) os.rename(result, csv_unrestricted) csv_restricted = join(behavioral_dir, 'hcp_restricted_data.csv') df_unrestricted = pd.read_csv(csv_unrestricted) df_unrestricted.set_index('Subject', inplace=True) if restricted and not os.path.exists(csv_restricted): warnings.warn("Cannot automatically retrieve restricted data. " "Please create the file '%s' manually" % csv_restricted) restricted = False if not restricted: df = df_unrestricted else: df_restricted = pd.read_csv(csv_restricted) df_restricted.set_index('Subject', inplace=True) df = df_unrestricted.join(df_restricted, how='outer') df.sort_index(ascending=True, inplace=True) df.index.names = ['subject'] return df
def _download_data_spm_multimodal(data_dir, subject_dir, subject_id): print('Data absent, downloading...') urls = [ # fmri ('http://www.fil.ion.ucl.ac.uk/spm/download/data/mmfaces/' 'multimodal_fmri.zip'), # structural ('http://www.fil.ion.ucl.ac.uk/spm/download/data/mmfaces/' 'multimodal_smri.zip') ] for url in urls: archive_path = os.path.join(subject_dir, os.path.basename(url)) _fetch_file(url, subject_dir) try: _uncompress_file(archive_path) except: # noqa:E722 print('Archive corrupted, trying to download it again.') return fetch_spm_multimodal_fmri(data_dir=data_dir, data_name='', subject_id=subject_id) return _glob_spm_multimodal_fmri_data(subject_dir)
def _download_data_spm_multimodal(data_dir, subject_dir, subject_id): print("Data absent, downloading...") urls = [ # fmri ("http://www.fil.ion.ucl.ac.uk/spm/download/data/mmfaces/" "multimodal_fmri.zip"), # structural ("http://www.fil.ion.ucl.ac.uk/spm/download/data/mmfaces/" "multimodal_smri.zip") ] for url in urls: archive_path = os.path.join(subject_dir, os.path.basename(url)) _fetch_file(url, subject_dir) try: _uncompress_file(archive_path) except: print("Archive corrupted, trying to download it again.") return fetch_spm_multimodal_fmri(data_dir=data_dir, data_name="", subject_id=subject_id) return _glob_spm_multimodal_fmri_data(subject_dir)
def download_collection(collection=None, data_dir=None, overwrite=False, resume=True, verbose=1): """ Download images and metadata from Neurovault collection Args: collection (int, optional): collection id. Defaults to None. data_dir (str, optional): data directory. Defaults to None. overwrite (bool, optional): overwrite data directory. Defaults to False. resume (bool, optional): resume download. Defaults to True. verbose (int, optional): print diagnostic messages. Defaults to 1. Returns: (pd.DataFrame, list): (DataFrame of image metadata, list of files from downloaded collection) """ if data_dir is None: data_dir = _get_dataset_dir(str(collection), data_dir=data_dir, verbose=verbose) # Get collection Metadata metadata = get_collection_image_metadata(collection=collection, data_dir=data_dir) # Get images files = [] for f in metadata["file"]: files.append( _fetch_file(f, data_dir, resume=resume, verbose=verbose, overwrite=overwrite)) return (metadata, files)
def download_collection(collection=None, data_dir=None, overwrite=False, resume=True, verbose=1): ''' Download images and metadata from Neurovault collection Args: collection: (int) collection id data_dir: (str) data directory Returns: metadata: (pd.DataFrame) Dataframe with full image metadata from collection files: (list) list of files of downloaded collection ''' if data_dir is None: data_dir = _get_dataset_dir(str(collection), data_dir=data_dir, verbose=verbose) # Get collection Metadata metadata = get_collection_image_metadata(collection=collection, data_dir=data_dir) # Get images files = [] for f in metadata['file']: files.append( _fetch_file(f, data_dir, resume=resume, verbose=verbose, overwrite=overwrite)) return (metadata, files)
def fetch_openfmri_dataset(dataset_name='ds000001', dataset_revision=None, data_dir=None, verbose=1): """Download latest revision of specified bids dataset. Compressed files will not be uncompressed automatically due to the expected great size of downloaded dataset. Only datasets that contain preprocessed files following the official conventions of the future BIDS derivatives specification can be used out of the box with Nistats. Otherwise custom preprocessing would need to be performed, optionally following the BIDS derivatives specification for the preprocessing output files. Parameters ---------- dataset_name: string, optional Accesion number as published in https://openfmri.org/dataset/. Downloads by default dataset ds000001. dataset_revision: string, optional Revision as presented in the specific dataset link accesible from https://openfmri.org/dataset/. Looks for the latest by default. data_dir: string, optional Path to store the downloaded dataset. if None employ nilearn datasets default download directory. verbose: int, optional verbosity level (0 means no message). Returns ------- data_dir: string Path to downloaded dataset downloaded_files: list of string Absolute paths of downloaded files on disk """ # We download a json file with all the api data from the openfmri server openfmri_api = 'https://openfmri.org/dataset/api' data_dir = _get_dataset_dir(dataset_name, data_dir=data_dir, verbose=verbose) files = _fetch_file(openfmri_api, data_dir) json_api = json.load(open(files, 'r')) dataset_url_set = [] for i in range(len(json_api)): # We look for the desired dataset in the json api file if dataset_name == json_api[i]['accession_number']: # Now we look for the desired revision or the last one if not dataset_revision: revision = json_api[i]['revision_set'] if revision: dataset_revision = revision[-1]['revision_number'] # After selecting the revision we download all its files link_set = json_api[i]['link_set'] for link in link_set: revision = link['revision'] if revision == dataset_revision: dataset_url_set.append(link['url']) # If revision is specified but no file is found there is an issue if dataset_revision and not dataset_url_set: Exception('No files found for revision %s' % dataset_revision) break if not dataset_url_set: raise ValueError('dataset %s not found' % dataset_name) else: # The files_spec needed for _fetch_files files_spec = [] for dat_url in dataset_url_set: target_file = os.path.basename(dat_url) url = dat_url files_spec.append((target_file, url, {})) # download the files downloaded_files = _fetch_files(data_dir, files_spec, resume=True, verbose=verbose) return data_dir, downloaded_files
def fetch_localizer(subject_ids=None, get_anats=False, data_type='raw', data_dir=None, url=None, resume=True, verbose=1): """ Download and load Brainomics Localizer dataset (94 subjects). "The Functional Localizer is a simple and fast acquisition procedure based on a 5-minute functional magnetic resonance imaging (fMRI) sequence that can be run as easily and as systematically as an anatomical scan. This protocol captures the cerebral bases of auditory and visual perception, motor actions, reading, language comprehension and mental calculation at an individual level. Individual functional maps are reliable and quite precise. The procedure is decribed in more detail on the Functional Localizer page." This code is modified from `fetch_localizer_contrasts` from nilearn.datasets.funcs.py. (see http://brainomics.cea.fr/localizer/) "Scientific results obtained using this dataset are described in Pinel et al., 2007" [1] Notes: It is better to perform several small requests than a big one because the Brainomics server has no cache (can lead to timeout while the archive is generated on the remote server). For example, download n_subjects=np.array(1,10), then n_subjects=np.array(10,20), etc. Args: subject_ids: (list) List of Subject IDs (e.g., ['S01','S02']. If None is given, all 94 subjects are used. get_anats: (boolean) Whether individual structural images should be fetched or not. data_type: (string) type of data to download. Valid values are ['raw','preprocessed'] data_dir: (string, optional) Path of the data directory. Used to force data storage in a specified location. url: (string, optional) Override download URL. Used for test only (or if you setup a mirror of the data). resume: (bool) Whether to resume download of a partly-downloaded file. verbose: (int) Verbosity level (0 means no message). Returns: data: (Bunch) Dictionary-like object, the interest attributes are : - 'functional': string list Paths to nifti contrast maps - 'structural' string Path to nifti files corresponding to the subjects structural images References ---------- Pinel, Philippe, et al. "Fast reproducible identification and large-scale databasing of individual functional cognitive networks." BMC neuroscience 8.1 (2007): 91. """ if subject_ids is None: subject_ids = ['S%02d' % x for x in np.arange(1, 95)] elif not isinstance(subject_ids, (list)): raise ValueError( "subject_ids must be a list of subject ids (e.g., ['S01','S02'])") if data_type == 'raw': dat_type = "raw fMRI" dat_label = "raw bold" anat_type = "raw T1" anat_label = "raw anatomy" elif data_type == 'preprocessed': dat_type = "preprocessed fMRI" dat_label = "bold" anat_type = "normalized T1" anat_label = "anatomy" else: raise ValueError( "Only ['raw','preprocessed'] data_types are currently supported.") root_url = "http://brainomics.cea.fr/localizer/" dataset_name = 'brainomics_localizer' data_dir = _get_dataset_dir(dataset_name, data_dir=data_dir, verbose=verbose) fdescr = _get_dataset_descr(dataset_name) opts = {'uncompress': True} bold_files = [] anat_files = [] for subject_id in subject_ids: base_query = ("Any X,XT,XL,XI,XF,XD WHERE X is Scan, X type XT, " "X concerns S, " "X label XL, X identifier XI, " "X format XF, X description XD, " 'S identifier = "%s", ' % (subject_id, ) + 'X type IN(%(types)s), X label "%(label)s"') file_tarball_url = "%sbrainomics_data.zip?rql=%s&vid=data-zip" % ( root_url, _urllib.parse.quote(base_query % { "types": "\"%s\"" % dat_type, "label": dat_label }, safe=',()')) name_aux = str.replace(str.join('_', [dat_type, dat_label]), ' ', '_') file_path = os.path.join("brainomics_data", subject_id, "%s.nii.gz" % name_aux) bold_files.append( _fetch_file(data_dir, [(file_path, file_tarball_url, opts)], verbose=verbose)) if get_anats: file_tarball_url = "%sbrainomics_data_anats.zip?rql=%s&vid=data-zip" % ( root_url, _urllib.parse.quote(base_query % { "types": "\"%s\"" % anat_type, "label": anat_label }, safe=',()')) if data_type == 'raw': anat_name_aux = "raw_T1_raw_anat_defaced.nii.gz" elif data_type == 'preprocessed': anat_name_aux = "normalized_T1_anat_defaced.nii.gz" file_path = os.path.join("brainomics_data", subject_id, anat_name_aux) anat_files.append( _fetch_file(data_dir, [(file_path, file_tarball_url, opts)], verbose=verbose)) # Fetch subject characteristics (separated in two files) if url is None: url_csv = ("%sdataset/cubicwebexport.csv?rql=%s&vid=csvexport" % (root_url, _urllib.parse.quote("Any X WHERE X is Subject"))) url_csv2 = ("%sdataset/cubicwebexport2.csv?rql=%s&vid=csvexport" % (root_url, _urllib.parse.quote( "Any X,XI,XD WHERE X is QuestionnaireRun, " "X identifier XI, X datetime " "XD", safe=','))) else: url_csv = "%s/cubicwebexport.csv" % url url_csv2 = "%s/cubicwebexport2.csv" % url filenames = [("cubicwebexport.csv", url_csv, {}), ("cubicwebexport2.csv", url_csv2, {})] csv_files = _fetch_file(data_dir, filenames, verbose=verbose) metadata = pd.merge(pd.read_csv(csv_files[0], sep=';'), pd.read_csv(csv_files[1], sep=';'), on='"subject_id"') metadata.to_csv(os.path.join(data_dir, 'metadata.csv')) for x in ['cubicwebexport.csv', 'cubicwebexport2.csv']: os.remove(os.path.join(data_dir, x)) if not get_anats: anat_files = None return Bunch(functional=bold_files, structural=anat_files, ext_vars=metadata, description=fdescr)
def fetch_spm_auditory(data_dir=None, data_name='spm_auditory', subject_id="sub001", verbose=1): """Function to fetch SPM auditory single-subject data. Parameters ---------- data_dir: string Path of the data directory. Used to force data storage in a specified location. If the data is already present there, then will simply glob it. Returns ------- data: sklearn.datasets.base.Bunch Dictionary-like object, the interest attributes are: - 'func': string list. Paths to functional images - 'anat': string list. Path to anat image References ---------- :download: http://www.fil.ion.ucl.ac.uk/spm/data/auditory/ """ data_dir = _get_dataset_dir(data_name, data_dir=data_dir, verbose=verbose) subject_dir = os.path.join(data_dir, subject_id) def _glob_spm_auditory_data(): """glob data from subject_dir. """ if not os.path.exists(subject_dir): return None subject_data = {} for file_name in SPM_AUDITORY_DATA_FILES: file_path = os.path.join(subject_dir, file_name) if os.path.exists(file_path): subject_data[file_name] = file_path else: print("%s missing from filelist!" % file_name) return None _subject_data = {} _subject_data["func"] = sorted([ subject_data[x] for x in subject_data.keys() if re.match("^fM00223_0\d\d\.img$", os.path.basename(x)) ]) # volumes for this dataset of shape (64, 64, 64, 1); let's fix this for x in _subject_data["func"]: vol = nibabel.load(x) if len(vol.shape) == 4: vol = nibabel.Nifti1Image(vol.get_data()[:, :, :, 0], vol.get_affine()) nibabel.save(vol, x) _subject_data["anat"] = [ subject_data[x] for x in subject_data.keys() if re.match("^sM00223_002\.img$", os.path.basename(x)) ][0] # ... same thing for anat vol = nibabel.load(_subject_data["anat"]) if len(vol.shape) == 4: vol = nibabel.Nifti1Image(vol.get_data()[:, :, :, 0], vol.get_affine()) nibabel.save(vol, _subject_data["anat"]) return Bunch(**_subject_data) # maybe data_dir already contains the data ? data = _glob_spm_auditory_data() if data is not None: return data # No. Download the data print("Data absent, downloading...") url = ("http://www.fil.ion.ucl.ac.uk/spm/download/data/MoAEpilot/" "MoAEpilot.zip") archive_path = os.path.join(subject_dir, os.path.basename(url)) _fetch_file(url, subject_dir) try: _uncompress_file(archive_path) except: print("Archive corrupted, trying to download it again.") return fetch_spm_auditory(data_dir=data_dir, data_name="", subject_id=subject_id) return _glob_spm_auditory_data()
def fetch_lemur_mircen_2019_t2(subjects=[0], data_dir=None, url=None, resume=True, verbose=1): """Download and loads the mouse lemur template dataset. Parameters ---------- subjects : sequence of int or None, optional ids of subjects to load, default to loading one subject. data_dir : string, optional Path of the data directory. Used to force data storage in a specified location. Default: None resume : bool, optional (default True) If true, try resuming download if possible. verbose : int, optional (default 0) Defines the level of verbosity of the output. Returns ------- data : sklearn.datasets.base.Bunch Dictionary-like object, the interest attributes are : - 'anat': string list. Paths to T2-weighted images. - 'phenotypic': Participants genders, birth dates and MRI scan dates References ---------- :Download: https://openneuro.org/datasets/ds001945/versions/1.0.0/download :Reference: `A 3D population-based brain atlas of the mouse lemur primate with examples of applications in aging studies and comparative anatomy. <http://doi:10.1016/j.neuroimage.2018.10.010>`_ Neuroimage 185 (2019): 85-95. N. A. Nadkarni, S. Bougacha, C. Garin, M. Dhenain, and J. L. Picq. """ if url is None: url = 'https://openneuro.org/crn/datasets/ds001945/snapshots/1.0.0/files' dataset_name = 'mircen2019_t2' data_dir = _get_dataset_dir(dataset_name, data_dir=data_dir, verbose=verbose) # Check arguments max_subjects = 34 if max(subjects) > max_subjects: warnings.warn( 'Warning: there are only {0} subjects'.format(max_subjects)) subjects = range(max_subjects) subject_ids = np.array(['sub-{0:02d}'.format(i) for i in range(1, 35)]) subject_ids = subject_ids[subjects] # Generate the list of urls json_urls = [ os.path.join(url, '{0}:anat:{0}_T2w.json'.format(subject_id)) for subject_id in subject_ids ] anat_urls = [ os.path.join(url, '{0}:anat:{0}_T2w.nii.gz'.format(subject_id)) for subject_id in subject_ids ] # Generate the list of target files anat_basenames = [ '{0}_anat_{0}_T2w.nii.gz'.format(subject_id) for subject_id in subject_ids ] anat_files = [ os.path.join(animal_dir, anat_basename) for (animal_dir, anat_basename) in zip(subject_ids, anat_basenames) ] json_basenames = [ '{0}_anat_{0}_T2w.json'.format(subject_id) for subject_id in subject_ids ] json_files = [ os.path.join(animal_dir, json_basename) for (animal_dir, json_basename) in zip(subject_ids, json_basenames) ] # Call fetch_files once per subject. anat = [] json = [] for anat_u, anat_f, json_u, json_f in zip(anat_urls, anat_files, json_urls, json_files): a, j = _fetch_files(data_dir, [(anat_f, anat_u, { 'move': anat_f }), (json_f, json_u, { 'move': json_f })], verbose=verbose) json.append(j) anat.append(a) pheno_url = os.path.join(url, 'lemur_atlas_list_t2_bids.csv') pheno_file = _fetch_file(pheno_url, data_dir, verbose=verbose) phenotypic = np.recfromcsv( pheno_file, delimiter='\t', skip_header=True, names=['animal_id', 'gender', 'birthdate', 'mri_date'], dtype=['U8', 'U3', 'datetime64[D]', 'datetime64[D]'], converters={ 2: _parse_date, 3: _parse_date }, encoding='U8') phenotypic = phenotypic[[ np.where(phenotypic['animal_id'] == '"' + i + '"')[0][0] for i in subject_ids ]] fdescr = _get_dataset_descr(dataset_name) return Bunch(anat=anat, pheno=phenotypic, description=fdescr)
def fetch_spm_multimodal_fmri(data_dir=None, data_name="spm_multimodal_fmri", subject_id="sub001", verbose=1): """Fetcher for Multi-modal Face Dataset. Parameters ---------- data_dir: string path of the data directory. Used to force data storage in a specified location. If the data is already present there, then will simply glob it. Returns ------- data: sklearn.datasets.base.Bunch Dictionary-like object, the interest attributes are: - 'func1': string list. Paths to functional images for session 1 - 'func2': string list. Paths to functional images for session 2 - 'trials_ses1': string list. Path to onsets file for session 1 - 'trials_ses2': string list. Path to onsets file for session 2 - 'anat': string. Path to anat file References ---------- :download: http://www.fil.ion.ucl.ac.uk/spm/data/mmfaces/ """ data_dir = _get_dataset_dir(data_name, data_dir=data_dir, verbose=verbose) subject_dir = os.path.join(data_dir, subject_id) def _glob_spm_multimodal_fmri_data(): """glob data from subject_dir.""" _subject_data = {'slice_order': 'descending'} for session in range(2): # glob func data for session s + 1 session_func = sorted( glob.glob( os.path.join(subject_dir, ("fMRI/Session%i/fMETHODS-000%i-*-01.img" % (session + 1, session + 5))))) if len(session_func) < 390: print("Missing %i functional scans for session %i." % (390 - len(session_func), session)) return None _subject_data['func%i' % (session + 1)] = session_func # glob trials .mat file sess_trials = os.path.join(subject_dir, "fMRI/trials_ses%i.mat" % (session + 1)) if not os.path.isfile(sess_trials): print("Missing session file: %s" % sess_trials) return None _subject_data['trials_ses%i' % (session + 1)] = sess_trials # glob for anat data anat = os.path.join(subject_dir, "sMRI/smri.img") if not os.path.isfile(anat): print("Missing structural image.") return None _subject_data["anat"] = anat return Bunch(**_subject_data) # maybe data_dir already contains the data ? data = _glob_spm_multimodal_fmri_data() if data is not None: return data # No. Download the data print("Data absent, downloading...") urls = [ # fmri ("http://www.fil.ion.ucl.ac.uk/spm/download/data/mmfaces/" "multimodal_fmri.zip"), # structural ("http://www.fil.ion.ucl.ac.uk/spm/download/data/mmfaces/" "multimodal_smri.zip") ] for url in urls: archive_path = os.path.join(subject_dir, os.path.basename(url)) _fetch_file(url, subject_dir) try: _uncompress_file(archive_path) except: print("Archive corrupted, trying to download it again.") return fetch_spm_multimodal_fmri(data_dir=data_dir, data_name="", subject_id=subject_id) return _glob_spm_multimodal_fmri_data()
def fetch_zurich_anesthesiant( subjects=range(30), url=None, data_dir=None, resume=True, verbose=1): """Download and loads the ETH-Zurich anesthesiant dataset. Parameters ---------- subjects : sequence of int or None, optional ids of subjects to load, default to loading all subjects. data_dir: string, optional Path of the data directory. Used to force data storage in a specified location. Default: None resume: bool, optional (default True) If true, try resuming download if possible. verbose: int, optional (default 0) Defines the level of verbosity of the output. Returns ------- data : sklearn.datasets.base.Bunch Dictionary-like object, the interest attributes are: - 'func': string list. Paths to functional images. - 'anesthesiant': string list. Information on used anesthesiant. Notes ------ This dataset is composed of 30 male mice with different anesthesia protocols. References ---------- :Download: https://central.xnat.org :Reference: `Optimization of anesthesia protocol for resting-state fMRI in mice based on differential effects of anesthetics on functional connectivity patterns. <http://dx.doi.org/10.1016/j.neuroimage.2014.08.043>`_ NeuroImage 102 (2014): 838-847. J. Grandjean and A. Schroeter and I. Batata and M. Rudin. """ if url is None: url = 'https://central.xnat.org' dataset_name = 'zurich_anest' data_dir = _get_dataset_dir(dataset_name, data_dir=data_dir, verbose=verbose) # First, fetch the file that references all individual URLs json_file = _fetch_file(os.path.join(url, 'data', 'experiments.html'), data_dir, verbose=verbose) # Return the json file contents as a dictionary with open(json_file) as json_data: rows = json.load(json_data).values()[0]['Result'] names = [name for name in rows[0].keys()] projects = {} for name in names: projects[name] = np.array([row[name] for row in rows]) # Collect directories for all mice in the anesthesiant dataset iso_ids = [ 'iso2273', 'iso2274', 'iso2238', 'iso2239', 'iso2250', 'iso2270', 'iso3294', 'iso3296' ] med_ids = [ 'med2259', 'med2241', 'med2247', 'med2251', 'med2256', 'med2257' ] mi_ids = [ 'mi272871', 'mi273299', 'mi273457', 'mi273458', 'mi273459', 'mi273460', 'mi273461', 'mi273300' ] med_half_dose_ids = [ 'medHalfDose', 'medHalfDose1', 'medHalfDose2', 'medHalfDose3' ] iso1_c3_ids = ['iso1c3perc', 'iso1c3perc'] iso1_c5_ids = ['iso1c5perc', 'iso2870_1c5perc'] subjects_ids = iso_ids + med_ids + mi_ids + med_half_dose_ids + \ iso1_c3_ids + iso1_c5_ids subjects_labels = ['Iso1'] * len(iso_ids) + ['Med'] * len(med_ids) + \ ['Med-Iso'] * len(mi_ids) + \ ['Med-half'] * len(med_half_dose_ids) + \ ['Iso1pt3'] * len(iso1_c3_ids) + \ ['Iso1pt5'] * len(iso1_c5_ids) max_subjects = len(subjects_ids) # Check arguments max_subjects = len(subjects_ids) if subjects is None: subjects = range(max_subjects) elif max(subjects) > max_subjects: warnings.warn( 'Warning: there are only {0} subjects'.format(max_subjects)) subjects = range(max_subjects) unique_subjects, indices = np.unique(subjects, return_index=True) if len(unique_subjects) < len(subjects): warnings.warn('Warning: Duplicate subjects, removing them.') subjects = unique_subjects[np.argsort(indices)] subjects_ids = [subjects_ids[subject] for subject in subjects] subjects_labels = [subjects_labels[subject] for subject in subjects] mice_uris = projects['URI'][np.in1d(projects['label'], subjects_ids)] # Generate the list of urls by session img_file = 'rsfMRI.img' hdr_file = 'rsfMRI.hdr' func_path = 'scans/rs_fMRI/resources/NULL/files' img_urls = [os.path.join(url + b, func_path, img_file) for b in mice_uris] hdr_urls = [os.path.join(url + b, func_path, hdr_file) for b in mice_uris] # Generate the list of target files by session target_img = [ os.path.join(label, sub, img_file) for sub, label in zip(subjects_ids, subjects_labels) ] target_hdr = [ os.path.join(label, sub, hdr_file) for sub, label in zip(subjects_ids, subjects_labels) ] # Call fetch_files once per subject. img = [] for img_u, hdr_u, img_f, hdr_f in zip(img_urls, hdr_urls, target_img, target_hdr): f, _ = _fetch_files(data_dir, [(img_f, img_u, { 'move': img_f }), (hdr_f, hdr_u, { 'move': hdr_f })], verbose=verbose) img.append(f) fdescr = _get_dataset_descr(dataset_name) return Bunch(func=img, anesthesiant=subjects_labels, description=fdescr)
def fetch_zurich_test_retest(subjects=range(15), sessions=[1], data_dir=None, url=None, resume=True, verbose=1, correct_headers=False): """Download and loads the ETH-Zurich test-retest dataset. Parameters ---------- subjects : sequence of int or None, optional ids of subjects to load, default to loading all subjects. sessions : iterable of int, optional The sessions to load. Load only the first session by default. data_dir : string, optional Path of the data directory. Used to force data storage in a specified location. Default: None resume : bool, optional (default True) If true, try resuming download if possible. verbose : int, optional (default 0) Defines the level of verbosity of the output. Returns ------- data : sklearn.datasets.base.Bunch Dictionary-like object, the interest attributes are : - 'func': string list. Paths to functional images. - 'anat': string list. Paths to anatomic images. - 'session': numpy array. List of ids corresponding to images sessions. Notes ------ This dataset is composed of 2 sessions of 15 male mice. For each mice, 2 resting-state scans of continuous EPI functional volumes were collected, both with their anatomical scan. Session 2 was collected 15-20 days after Session 1. References ---------- :Download: https://central.xnat.org :Reference: `Mapping the Mouse Brain with Rs-fMRI: An Optimized Pipeline for Functional Network Identification <http://dx.doi.org/10.1016/j.neuroimage.2015.07.090>`_ NeuroImage 123 (2015): 11-21. V. Zerbi, J. Grandjean, M. Rudin, and N. Wenderoth. """ if url is None: url = 'https://central.xnat.org' dataset_name = 'zurich_retest' data_dir = _get_dataset_dir(dataset_name, data_dir=data_dir, verbose=verbose) # First, fetch the file that references all individual URLs json_file = _fetch_file(url + '/data/experiments.html', data_dir, verbose=verbose) # Return the json file contents as a dictionary with open(json_file) as json_data: rows = json.load(json_data).values()[0]['Result'] names = [name for name in rows[0].keys()] projects = {} for name in names: projects[name] = np.array([row[name] for row in rows]) # Collect directories for all mice in the test-restest dataset subject_ids = [ '1366', '1367', '1368', '1369', '1371', '1378', '1380', '1402', '1403', '1404', '1405', '1406', '1407', '1411', '1412' ] baseline_subject_ids = [subject + '_baseline' for subject in subject_ids] post_subject_ids = [subject + '_post' for subject in subject_ids] baseline_uris = projects['URI'][np.in1d(projects['label'], baseline_subject_ids)] post_uris = projects['URI'][np.in1d(projects['label'], post_subject_ids)] # Generate the list of urls by session func_file = 'rsfMRI.nii.gz' anat_file = '3DRARE.nii.gz' func_path = 'scans/rsfMRI/resources/NIFTI/files' anat_path = 'scans/anatomical/resources/NIFTI/files' func_urls = [[ os.path.join(url + b, func_path, func_file) for b in baseline_uris ], [os.path.join(url + p, func_path, func_file) for p in post_uris]] anat_urls = [[ os.path.join(url + b, anat_path, anat_file) for b in baseline_uris ], [os.path.join(url + p, anat_path, anat_file) for p in post_uris]] # Generate the list of target files by session func_files = [[ os.path.join('baseline', sub, func_file) for sub in subject_ids ], [os.path.join('post', sub, func_file) for sub in subject_ids]] anat_files = [[ os.path.join('baseline', sub, anat_file) for sub in subject_ids ], [os.path.join('post', sub, anat_file) for sub in subject_ids]] # Check arguments max_subjects = len(subject_ids) if max(subjects) > max_subjects: warnings.warn( 'Warning: there are only {0} subjects'.format(max_subjects)) subjects = range(max_subjects) unique_subjects, indices = np.unique(subjects, return_index=True) if len(unique_subjects) < len(subjects): warnings.warn('Warning: Duplicate subjects, removing them.') subjects = unique_subjects[np.argsort(indices)] n_subjects = len(subjects) target_anat = [] target_func = [] source_anat = [] source_func = [] session = [] for i in sessions: if not (i in [1, 2]): raise ValueError('Zurich dataset session id must be in [1, 2]') source_anat += [anat_urls[i - 1][subject] for subject in subjects] source_func += [func_urls[i - 1][subject] for subject in subjects] target_anat += [anat_files[i - 1][subject] for subject in subjects] target_func += [func_files[i - 1][subject] for subject in subjects] session += [i] * n_subjects # Call fetch_files once per subject. func = [] anat = [] for anat_u, anat_f, func_u, func_f in zip(source_anat, target_anat, source_func, target_func): a, f = _fetch_files(data_dir, [(anat_f, anat_u, { 'move': anat_f }), (func_f, func_u, { 'move': func_f })], verbose=verbose) func.append(f) anat.append(a) fdescr = _get_dataset_descr(dataset_name) # This data has wrong sforms and qforms in the headers, so we correct them. if correct_headers: corrected_anat = [] for a in anat: corrected_a = os.path.join(os.path.dirname(a), '3DRARE_corrected.nii.gz') _reset_affines(a, corrected_a, axes_to_permute=[(1, 2)], axes_to_flip=[0], verbose=0) corrected_anat.append(corrected_a) corrected_func = [] for f in func: corrected_f = os.path.join(os.path.dirname(f), 'rsfMRI_corrected.nii.gz') _reset_affines(f, corrected_f, center_mass=(0, 0, 0), xyzscale=.1, axes_to_permute=[(1, 2)], axes_to_flip=[0], verbose=0) corrected_func.append(corrected_f) anat = corrected_anat func = corrected_func return Bunch(anat=anat, func=func, session=session, description=fdescr)
def fetch_fsl_feeds(data_dir=None, data_name="fsl_feeds", verbose=1): """Function to fetch FSL FEEDS dataset (single-subject) Parameters ---------- data_dir: string path of the data directory. Used to force data storage in a specified location. If the data is already present there, then will simply glob it. Returns ------- data: sklearn.datasets.base.Bunch Dictionary-like object, the interest attributes are: - 'func': string list. Paths to functional images - 'anat': string list. Path to anat image """ data_dir = _get_dataset_dir(data_name, data_dir=data_dir, verbose=verbose) def _glob_fsl_feeds_data(subject_dir): """glob data from subject_dir. """ if not os.path.exists(subject_dir): return None for file_name in FSL_FEEDS_DATA_FILES: file_path = os.path.join(subject_dir, file_name) if os.path.exists(file_path) or os.path.exists( file_path.rstrip(".gz")): file_name = re.sub("(?:\.nii\.gz|\.txt)", "", file_name) else: if not os.path.basename(subject_dir) == 'data': return _glob_fsl_feeds_data(os.path.join(subject_dir, 'feeds/data')) else: print "%s missing from filelist!" % file_name return None return Bunch(data_dir=data_dir, func=os.path.join(subject_dir, "fmri.nii.gz"), anat=os.path.join( subject_dir, "structural_brain.nii.gz")) # maybe data_dir already contents the data ? data = _glob_fsl_feeds_data(data_dir) if not data is None: return data # download the data print("Data absent, downloading...") url = ("http://fsl.fmrib.ox.ac.uk/fsldownloads/oldversions/" "fsl-4.1.0-feeds.tar.gz") archive_path = os.path.join(data_dir, os.path.basename(url)) _fetch_file(url, data_dir) try: _uncompress_file(archive_path) except: print "Archive corrupted, trying to download it again." os.remove(archive_path) return fetch_fsl_feeds(data_dir=data_dir, data_name="") return _glob_fsl_feeds_data(data_dir)
def fetch_spm_auditory(data_dir=None, data_name='spm_auditory', subject_id="sub001", verbose=1): """Function to fetch SPM auditory single-subject data. Parameters ---------- data_dir: string path of the data directory. Used to force data storage in a specified location. If the data is already present there, then will simply glob it. Returns ------- data: sklearn.datasets.base.Bunch Dictionary-like object, the interest attributes are: - 'func': string list. Paths to functional images - 'anat': string list. Path to anat image References ---------- :download: http://www.fil.ion.ucl.ac.uk/spm/data/auditory/ """ data_dir = _get_dataset_dir(data_name, data_dir=data_dir, verbose=verbose) subject_dir = os.path.join(data_dir, subject_id) def _glob_spm_auditory_data(): """glob data from subject_dir. """ if not os.path.exists(subject_dir): return None subject_data = {} for file_name in SPM_AUDITORY_DATA_FILES: file_path = os.path.join(subject_dir, file_name) if os.path.exists(file_path): subject_data[file_name] = file_path else: print("%s missing from filelist!" % file_name) return None _subject_data = {} _subject_data["func"] = sorted([subject_data[x] for x in subject_data.keys() if re.match("^fM00223_0\d\d\.img$", os.path.basename(x))]) # volumes for this dataset of shape (64, 64, 64, 1); let's fix this for x in _subject_data["func"]: vol = nibabel.load(x) if len(vol.shape) == 4: vol = nibabel.Nifti1Image(vol.get_data()[:, :, :, 0], vol.get_affine()) nibabel.save(vol, x) _subject_data["anat"] = [subject_data[x] for x in subject_data.keys() if re.match("^sM00223_002\.img$", os.path.basename(x))][0] # ... same thing for anat vol = nibabel.load(_subject_data["anat"]) if len(vol.shape) == 4: vol = nibabel.Nifti1Image(vol.get_data()[:, :, :, 0], vol.get_affine()) nibabel.save(vol, _subject_data["anat"]) return Bunch(**_subject_data) # maybe data_dir already contains the data ? data = _glob_spm_auditory_data() if not data is None: return data # No. Download the data print("Data absent, downloading...") url = ("http://www.fil.ion.ucl.ac.uk/spm/download/data/MoAEpilot/" "MoAEpilot.zip") archive_path = os.path.join(subject_dir, os.path.basename(url)) _fetch_file(url, subject_dir) try: _uncompress_file(archive_path) except: print("Archive corrupted, trying to download it again.") return fetch_spm_auditory(data_dir=data_dir, data_name="", subject_id=subject_id) return _glob_spm_auditory_data()
def fetch_spm_multimodal_fmri(data_dir=None, data_name="spm_multimodal_fmri", subject_id="sub001", verbose=1): """Fetcher for Multi-modal Face Dataset. Parameters ---------- data_dir: string path of the data directory. Used to force data storage in a specified location. If the data is already present there, then will simply glob it. Returns ------- data: sklearn.datasets.base.Bunch Dictionary-like object, the interest attributes are: - 'func1': string list. Paths to functional images for session 1 - 'func2': string list. Paths to functional images for session 2 - 'trials_ses1': string list. Path to onsets file for session 1 - 'trials_ses2': string list. Path to onsets file for session 2 - 'anat': string. Path to anat file References ---------- :download: http://www.fil.ion.ucl.ac.uk/spm/data/mmfaces/ """ data_dir = _get_dataset_dir(data_name, data_dir=data_dir, verbose=verbose) subject_dir = os.path.join(data_dir, subject_id) def _glob_spm_multimodal_fmri_data(): """glob data from subject_dir.""" _subject_data = {'slice_order': 'descending'} for s in range(2): # glob func data for session s + 1 session_func = sorted(glob.glob( os.path.join( subject_dir, ("fMRI/Session%i/fMETHODS-000%i-*-01.img" % ( s + 1, s + 5))))) if len(session_func) < 390: print "Missing %i functional scans for session %i." % ( 390 - len(session_func), s) return None else: _subject_data['func%i' % (s + 1)] = session_func # glob trials .mat file sess_trials = os.path.join( subject_dir, "fMRI/trials_ses%i.mat" % (s + 1)) if not os.path.isfile(sess_trials): print "Missing session file: %s" % sess_trials return None else: _subject_data['trials_ses%i' % (s + 1)] = sess_trials # glob for anat data anat = os.path.join(subject_dir, "sMRI/smri.img") if not os.path.isfile(anat): print "Missing structural image." return None else: _subject_data["anat"] = anat return Bunch(**_subject_data) # maybe data_dir already contains the data ? data = _glob_spm_multimodal_fmri_data() if not data is None: return data # No. Download the data print("Data absent, downloading...") urls = [ # fmri ("http://www.fil.ion.ucl.ac.uk/spm/download/data/mmfaces/" "multimodal_fmri.zip"), # structural ("http://www.fil.ion.ucl.ac.uk/spm/download/data/mmfaces/" "multimodal_smri.zip") ] for url in urls: archive_path = os.path.join(subject_dir, os.path.basename(url)) _fetch_file(url, subject_dir) try: _uncompress_file(archive_path) except: print("Archive corrupted, trying to download it again.") return fetch_spm_multimodal_fmri_data(data_dir=data_dir, data_name="", subject_id=subject_id) return _glob_spm_multimodal_fmri_data()
def fetch_fsl_feeds(data_dir=None, data_name="fsl_feeds", verbose=1): """Function to fetch FSL FEEDS dataset (single-subject) Parameters ---------- data_dir: string path of the data directory. Used to force data storage in a specified location. If the data is already present there, then will simply glob it. Returns ------- data: sklearn.datasets.base.Bunch Dictionary-like object, the interest attributes are: - 'func': string list. Paths to functional images - 'anat': string list. Path to anat image """ data_dir = _get_dataset_dir(data_name, data_dir=data_dir, verbose=verbose) def _glob_fsl_feeds_data(subject_dir): """glob data from subject_dir. """ if not os.path.exists(subject_dir): return None subject_data = {} subject_data["subject_dir"] = subject_dir for file_name in FSL_FEEDS_DATA_FILES: file_path = os.path.join(subject_dir, file_name) if os.path.exists(file_path) or os.path.exists( file_path.rstrip(".gz")): file_name = re.sub("(?:\.nii\.gz|\.txt)", "", file_name) subject_data[file_name] = file_path else: if not os.path.basename(subject_dir) == 'data': return _glob_fsl_feeds_data(os.path.join(subject_dir, 'feeds/data')) else: print "%s missing from filelist!" % file_name return None _subject_data = {"func": os.path.join(subject_dir, "fmri.nii.gz"), "anat": os.path.join(subject_dir, "structural_brain.nii.gz") } return Bunch(**_subject_data) # maybe data_dir already contents the data ? data = _glob_fsl_feeds_data(data_dir) if not data is None: return data # download the data print("Data absent, downloading...") url = ("http://fsl.fmrib.ox.ac.uk/fsldownloads/oldversions/" "fsl-4.1.0-feeds.tar.gz") archive_path = os.path.join(data_dir, os.path.basename(url)) _fetch_file(url, data_dir) try: _uncompress_file(archive_path) except: print "Archive corrupted, trying to download it again." os.remove(archive_path) return fetch_fsl_feeds(data_dir=data_dir, data_name="") return _glob_fsl_feeds_data(data_dir)