def test_get_data_dir(tmpdir): # testing folder creation under different environments, enforcing # a custom clean install os.environ.pop('NILEARN_DATA', None) os.environ.pop('NILEARN_SHARED_DATA', None) expected_base_dir = os.path.expanduser('~/modl_data') data_dir = get_data_dirs()[0] assert_equal(data_dir, expected_base_dir) expected_base_dir = os.path.join(tmpdir, 'modl_data') os.environ['MODL_DATA'] = expected_base_dir data_dir = get_data_dirs()[0] assert_equal(data_dir, expected_base_dir) expected_base_dir = os.path.join(tmpdir, 'mdoln_shared_data') os.environ['MODL_SHARED_DATA'] = expected_base_dir data_dir = get_data_dirs()[0] assert_equal(data_dir, expected_base_dir) expected_base_dir = os.path.join(tmpdir, 'modl_data') os.environ.pop('MODL_DATA', None) os.environ.pop('MODL_SHARED_DATA', None) data_dir = get_data_dirs(expected_base_dir)[0] assert_equal(data_dir, expected_base_dir)
def load_netflix(): data_home = get_data_dirs()[0] path = os.path.join(data_home, "nf_prize", "X_tr.pkl") X_tr = load(path) path = os.path.join(data_home, "nf_prize", "X_te.pkl") X_te = load(path) return X_tr, X_te
def fetch_adhd(n_subjects=40, data_dir=None, url=None, resume=True, modl_data_dir=None, mask_url=None, verbose=1): dataset = nilearn_fetch_adhd(n_subjects=n_subjects, data_dir=data_dir, url=url, resume=resume, verbose=verbose) root_dir = dataset.func[0] tail_dir = '' while tail_dir != 'adhd': root_dir, tail_dir = os.path.split(root_dir) root_dir = os.path.join(root_dir, tail_dir) modl_data_dir = get_data_dirs(modl_data_dir)[0] mask_data_dir = join(modl_data_dir, 'adhd') if mask_url is None: mask_url = 'http://amensch.fr/data/adhd/mask_img.nii.gz' _fetch_file(mask_url, mask_data_dir, resume=resume) mask_img = join(mask_data_dir, 'mask_img.nii.gz') behavioral = pd.DataFrame(dataset.phenotypic) behavioral.loc[:, 'Subject'] = pd.to_numeric(behavioral.loc[:, 'Subject']) behavioral.set_index('Subject', inplace=True) behavioral.index.names = ['subject'] rest = pd.DataFrame(data=list(zip(dataset.func, dataset.confounds)), columns=['filename', 'confounds'], index=behavioral.index) return Bunch(rest=rest, behavioral=behavioral, description=dataset.description, mask=mask_img, root=root_dir)
def load_movielens(version): data_home = get_data_dirs()[0] if version == "100k": path = os.path.join(data_home, "movielens100k", "movielens100k.pkl") elif version == "1m": path = os.path.join(data_home, "movielens1m", "movielens1m.pkl") elif version == "10m": path = os.path.join(data_home, "movielens10m", "movielens10m.pkl") else: raise ValueError("Invalid version of movielens.") # FIXME: make downloader if not os.path.exists(path): raise ValueError("Dowload dataset using 'make download-movielens%s' at" " project root." % version) X = load(path) return X
def load_image(source, scale=1, gray=False, memory=Memory(cachedir=None)): data_dir = get_data_dirs()[0] if source == 'face': image = face(gray=gray) image = image.astype(np.float32) / 255 if image.ndim == 2: image = image[..., np.newaxis] if scale != 1: image = memory.cache(rescale)(image, scale=scale) return image elif source == 'lisboa': image = imread(join(data_dir, 'images', 'lisboa.jpg'), as_grey=gray) image = image.astype(np.float32) / 255 if image.ndim == 2: image = image[..., np.newaxis] if scale != 1: image = memory.cache(rescale)(image, scale=scale) return image elif source == 'aviris': image = open_image( join(data_dir, 'aviris', 'f100826t01p00r05rdn_b/' 'f100826t01p00r05rdn_b_sc01_ort_img.hdr')) image = np.array(image.open_memmap(), dtype=np.float32) good_bands = list(range(image.shape[2])) good_bands.remove(110) image = image[:, :, good_bands] indices = image == -50 image[indices] = -1 image[~indices] -= np.min(image[~indices]) image[~indices] /= np.max(image[~indices]) return image else: raise ValueError('Data source is not known')
def compute_components(n_components, batch_size, learning_rate, method, reduction, alpha, step_size, n_jobs, n_epochs, verbose, source, _run): basedir = join(_run.observers[0].basedir, str(_run._id)) artifact_dir = join(basedir, 'artifacts') if not os.path.exists(artifact_dir): os.makedirs(artifact_dir) if source == 'hcp': # Hack to recover data from TSP train_size = None smoothing_fwhm = 3 test_size = 2 data_dir = get_data_dirs()[0] mask = fetch_hcp_mask() masker = MultiRawMasker(mask_img=mask, smoothing_fwhm=smoothing_fwhm, detrend=True, standardize=True) mapping = json.load( open(join(data_dir, 'HCP_unmasked/mapping.json'), 'r')) data = sorted(list(mapping.values())) data = list(map(lambda x: join(data_dir, x), data)) data = pd.DataFrame(data, columns=['filename']) else: smoothing_fwhm = 6 train_size = 4 test_size = 4 raw_res_dir = join(get_output_dir(), 'unmasked', source) try: masker, data = get_raw_rest_data(raw_res_dir) except ValueError: # On local machine: raw_res_dir = join(get_output_dir(), 'unmask', source) masker, data = get_raw_rest_data(raw_res_dir) train_imgs, test_imgs = train_test_split(data, test_size=test_size, random_state=0, train_size=train_size) train_imgs = train_imgs['filename'].values test_imgs = test_imgs['filename'].values cb = rfMRIDictionaryScorer(test_imgs, info=_run.info) dict_fact = fMRIDictFact( method=method, mask=masker, verbose=verbose, n_epochs=n_epochs, n_jobs=n_jobs, random_state=1, n_components=n_components, smoothing_fwhm=smoothing_fwhm, learning_rate=learning_rate, batch_size=batch_size, reduction=reduction, step_size=step_size, alpha=alpha, callback=cb, ) dict_fact.fit(train_imgs) dict_fact.components_img_.to_filename( join(artifact_dir, 'components.nii.gz')) fig = plt.figure() display_maps(fig, dict_fact.components_img_) plt.savefig(join(artifact_dir, 'components.png')) fig, ax = plt.subplots(1, 1) ax.plot(cb.cpu_time, cb.score, marker='o') _run.info['time'] = cb.cpu_time _run.info['score'] = cb.score _run.info['iter'] = cb.iter plt.savefig(join(artifact_dir, 'score.png'))