Пример #1
0
def test_get_data_dir(tmpdir):
    # testing folder creation under different environments, enforcing
    # a custom clean install
    os.environ.pop('NILEARN_DATA', None)
    os.environ.pop('NILEARN_SHARED_DATA', None)

    expected_base_dir = os.path.expanduser('~/modl_data')
    data_dir = get_data_dirs()[0]
    assert_equal(data_dir, expected_base_dir)

    expected_base_dir = os.path.join(tmpdir, 'modl_data')
    os.environ['MODL_DATA'] = expected_base_dir
    data_dir = get_data_dirs()[0]
    assert_equal(data_dir, expected_base_dir)

    expected_base_dir = os.path.join(tmpdir, 'mdoln_shared_data')
    os.environ['MODL_SHARED_DATA'] = expected_base_dir
    data_dir = get_data_dirs()[0]
    assert_equal(data_dir, expected_base_dir)

    expected_base_dir = os.path.join(tmpdir, 'modl_data')
    os.environ.pop('MODL_DATA', None)
    os.environ.pop('MODL_SHARED_DATA', None)
    data_dir = get_data_dirs(expected_base_dir)[0]
    assert_equal(data_dir, expected_base_dir)
Пример #2
0
def get_hcp_data(raw=False, data_dir=None):
    data_dir = get_data_dirs(data_dir)[0]
    if not os.path.exists(join(data_dir, 'HCP_extra')):
        raise ValueError(
            'Please download HCP_extra folder using make download-hcp_extra'
            ' first.')
    if raw:
        mask = join(data_dir, 'HCP_extra/mask_img.nii.gz')
        try:
            mapping = json.load(
                open(join(data_dir, 'HCP_unmasked/mapping.json'), 'r'))
        except FileNotFoundError:
            raise IOError('Please unmask the data using hcp_prepare.py first.')
        func_filenames = sorted(list(mapping.values()))
    else:
        hcp_dataset = fetch_hcp_rest(data_dir=data_dir, n_subjects=500)
        mask = hcp_dataset.mask
        # list of 4D nifti files for each subject
        func_filenames = hcp_dataset.func
        # Flatten it
        func_filenames = [
            record for subject in func_filenames for record in subject
        ]

        # print basic information on the dataset
        print('First functional nifti image (4D) is at: %s' %
              hcp_dataset.func[0])  # 4D data
    return mask, func_filenames
Пример #3
0
def prepare_hcp_raw_data(data_dir=None,
                         dest_data_dir='HCP_unmasked',
                         n_jobs=1,
                         smoothing_fwhm=3,
                         n_subject=500):
    data_dir = get_data_dirs(data_dir)[0]
    source_dir = join(data_dir, 'HCP')
    dest_data_dir = join(data_dir, dest_data_dir)
    dataset = fetch_hcp_rest(data_dir=data_dir, n_subjects=n_subject)
    imgs = dataset.func
    imgs = [img for subject_imgs in imgs for img in subject_imgs]
    try:
        os.mkdir(dest_data_dir)
    except OSError:
        raise ValueError('%s already exist,'
                         'please delete manually before proceeding' %
                         dest_data_dir)
    mask = join(data_dir, 'HCP_extra', 'mask_img.nii.gz')

    masker = NiftiMasker(mask_img=mask,
                         smoothing_fwhm=smoothing_fwhm,
                         standardize=True).fit()
    Parallel(n_jobs=n_jobs)(
        delayed(_single_mask)(masker, img, dest_data_dir, source_dir)
        for img in imgs)
    _gather(dest_data_dir)
Пример #4
0
def load_image(source, scale=1, gray=False, memory=Memory(cachedir=None)):
    data_dir = get_data_dirs()[0]
    if source == 'face':
        image = face(gray=gray)
        image = image.astype(np.float32) / 255
        if image.ndim == 2:
            image = image[..., np.newaxis]
        if scale != 1:
            image = memory.cache(rescale)(image, scale=scale)
        return image
    elif source == 'lisboa':
        image = imread(join(data_dir, 'images', 'lisboa.jpg'), as_grey=gray)
        image = image.astype(np.float32) / 255
        if image.ndim == 2:
            image = image[..., np.newaxis]
        if scale != 1:
            image = memory.cache(rescale)(image, scale=scale)
        return image
    elif source == 'aviris':
        image = open_image(
            join(
                data_dir, 'aviris', 'f100826t01p00r05rdn_b/'
                'f100826t01p00r05rdn_b_sc01_ort_img.hdr'))
        image = np.array(image.open_memmap(), dtype=np.float32)
        good_bands = list(range(image.shape[2]))
        good_bands.remove(110)
        image = image[:, :, good_bands]
        indices = image == -50
        image[indices] = -1
        image[~indices] -= np.min(image[~indices])
        image[~indices] /= np.max(image[~indices])
        return image
    else:
        raise ValueError('Data source is not known')
Пример #5
0
def fetch_hcp_task(data_dir=None, n_subjects=500):
    """Nilearn like fetcher"""
    data_dir = get_data_dirs(data_dir)[0]
    source_dir = join(data_dir, 'HCP')
    extra_dir = join(data_dir, 'HCP_extra')
    mask = join(extra_dir, 'mask_img.nii.gz')
    func = []
    meta = []
    contrasts = []
    ids = []

    list_dir = sorted(glob.glob(join(source_dir, '*/*/MNINonLinear/Results')))
    for dirpath in list_dir[:n_subjects]:
        dirpath_split = dirpath.split(os.sep)
        subject_id = dirpath_split[-3]
        serie_id = dirpath_split[-4]

        subject_id = int(subject_id)

        ids.append(subject_id)

        kwargs = {'subject_id': subject_id, 'serie_id': serie_id}

        subject_func = []
        subject_contrasts = []
        for i, task_id in enumerate(task_ids):
            task = tasks[task_id]
            task_name = task[0]
            contrast_idx = task[1]
            contrast = task[2]
            this_func = join(
                dirpath, "tfMRI_%s/tfMRI_%s_hp200_s4_"
                "level2vol.feat/cope%i.feat/"
                "stats/zstat1.nii.gz" % (task_name, task_name, contrast_idx))
            if os.path.exists(this_func):
                subject_contrasts.append(contrast)
                subject_func.append(this_func)
            meta.append(kwargs)
            contrasts.append(subject_contrasts)
            func.append(subject_func)

    results = {
        'func': func,
        'contrast': contrasts,
        'meta': meta,
        'mask': mask,
        'description': "Human connectome project",
        'contrasts_description': contrasts_description
    }
    return Bunch(**results)
Пример #6
0
def fetch_hcp_behavioral(data_dir=None, n_subjects=500):
    import pandas as pd
    data_dir = get_data_dirs(data_dir)[0]
    source_dir = join(data_dir, 'HCP')
    df = pd.read_csv(join(source_dir, 'restricted_scores.csv'))
    list_dir = sorted(glob.glob(join(source_dir, '*/*/MNINonLinear/Results')))
    subjects = []
    for dirpath in list_dir[:n_subjects]:
        dirpath_split = dirpath.split(os.sep)
        subject_id = int(dirpath_split[-3])
        subjects.append(subject_id)
    indices = [subject in subjects for subject in df['Subject']]
    df = df.loc[indices]
    return df
Пример #7
0
def load_movielens(version):
    data_home = get_data_dirs()[0]

    if version == "100k":
        path = os.path.join(data_home, "movielens100k", "movielens100k.pkl")
    elif version == "1m":
        path = os.path.join(data_home, "movielens1m", "movielens1m.pkl")
    elif version == "10m":
        path = os.path.join(data_home, "movielens10m", "movielens10m.pkl")
    else:
        raise ValueError("Invalid version of movielens.")

    # FIXME: make downloader
    if not os.path.exists(path):
        raise ValueError("Dowload dataset using 'make download-movielens%s' at"
                         " project root." % version)

    X = skjoblib.load(path)
    return X
Пример #8
0
def fetch_hcp_rest(data_dir=None, n_subjects=500):
    """Nilearn like fetcher"""
    data_dir = get_data_dirs(data_dir)[0]
    source_dir = join(data_dir, 'HCP')
    extra_dir = join(data_dir, 'HCP_extra')
    mask = join(extra_dir, 'mask_img.nii.gz')
    func = []
    meta = []
    ids = []

    list_dir = sorted(glob.glob(join(source_dir, '*/*/MNINonLinear/Results')))
    for dirpath in list_dir[:n_subjects]:
        dirpath_split = dirpath.split(os.sep)
        subject_id = dirpath_split[-3]
        serie_id = dirpath_split[-4]

        subject_id = int(subject_id)

        ids.append(subject_id)

        kwargs = {'subject_id': subject_id, 'serie_id': serie_id}

        meta.append(kwargs)

        subject_func = []

        for filename in os.listdir(dirpath):
            name, ext = os.path.splitext(filename)
            if name in ('rfMRI_REST1_RL', 'rfMRI_REST1_LR', 'rfMRI_REST2_RL',
                        'rfMRI_REST2_LR'):
                filename = join(dirpath, filename, filename + '.nii.gz')
                subject_func.append(filename)
        func.append(subject_func)

    results = {
        'func': func,
        'meta': meta,
        'mask': mask,
        'description': "'Human connectome project"
    }
    return Bunch(**results)
Пример #9
0
def load_rest_func(dataset='adhd',
                   n_subjects=40, test_size=0.1, raw=False, random_state=None):
    data_dir = get_data_dirs()[0]
    if dataset == 'adhd':
        adhd_dataset = datasets.fetch_adhd(n_subjects=n_subjects)
        mask = join(data_dir, 'ADHD_mask', 'mask_img.nii.gz')
        data = adhd_dataset.func  # list of 4D nifti files for each subject
    elif dataset == 'hcp':
        if not os.path.exists(join(data_dir, 'HCP_extra')):
            raise ValueError(
                'Please download HCP_extra folder using make '
                'download-hcp_extra '
                ' first.')
        if raw:
            mask = join(data_dir, 'HCP_extra/mask_img.nii.gz')
            try:
                mapping = json.load(
                    open(join(data_dir, 'HCP_unmasked/mapping.json'), 'r'))
            except FileNotFoundError:
                raise ValueError(
                    'Please unmask the data using hcp_prepare.py first.')
            data = sorted(list(mapping.values()))
        else:
            hcp_dataset = fetch_hcp_rest(data_dir=data_dir,
                                         n_subjects=n_subjects)
            mask = hcp_dataset.mask
            # list of 4D nifti files for each subject
            data = hcp_dataset.func
            # Flatten it
            data = [(record for record in subject) for subject in data]
    else:
        raise NotImplementedError
    train_data, test_data = train_test_split(data,
                                             test_size=test_size,
                                             random_state=random_state)
    return train_data, test_data, mask