def apply_slhyper(queryengine, dss=[ds0, ds1], return_mappers=False, **kw): """Helper for a common code to create/call slhyper""" slhyper = SearchlightHyperalignment(queryengine=queryengine, **kw) mappers = slhyper(dss) proj = [m.proj.todense() for m in mappers] return (proj, mappers) if return_mappers else proj
def __call__(self, datasets): """ estimate mappers for each dataset. Parameters ---------- datasets : list of datasets Returns ------- mappers_iter1: mappers from the first HA iteration mappers_iter2: mappers from the second HA iteration """ debug.active += ['SHPAL', 'SLC'] mask_node_indices = np.concatenate(self.mask_node_indices) qe = self.queryengine nproc = self.nproc dtype = self.dtype nblocks = self.nblocks ha_iter1 = SearchlightHyperalignment(queryengine=qe, nproc=nproc, nblocks=nblocks, mask_node_ids=mask_node_indices, dtype=dtype) mappers_iter1 = ha_iter1(datasets) aligned_iter1_datasets = self._apply_mappers(datasets, mappers_iter1) h2a_input_data = self._prep_h2a_data(aligned_iter1_datasets, mask_node_indices) ha_iter2 = SearchlightHyperalignment(queryengine=qe, nproc=nproc, nblocks=nblocks, mask_node_ids=mask_node_indices, dtype=dtype) mappers_iter2 = ha_iter2(h2a_input_data) # push the original data through the trained model mappers_final = ha_iter2(datasets) if self.get_all_mappers: return mappers_iter1, mappers_iter2, mappers_final return mappers_final
def run_hyperalignment(subjects_to_analyze, out_dir): # Load subject data ds_all = [] for subject_label in subjects_to_analyze: ds_all.append(h5load('%s/sub-%s_data.hdf5' % (out_dir, subject_label))) # Initialize searchlight hyperalignment slhyper = SearchlightHyperalignment(radius=2, nblocks=10, sparse_radius=5, dtype='float16') hmappers = slhyper(ds_all) return hmappers
def test_searchlight_hyperalignment_warnings_and_exceptions(self): skip_if_no_external('scipy') skip_if_no_external('hdf5') # needed for default results backend hdf5 ds_orig = datasets['3dsmall'][:, :1] # tiny dataset just to test exceptions ds_orig.fa['voxel_indices'] = ds_orig.fa.myspace slhyper = SearchlightHyperalignment() self.assertRaises(ValueError, slhyper, [ds_orig]) # need more than 1 ds_orig.samples += 1.0 # not zscored for sure # TODO: we need assert_warnings to also capture our own warnings, # currently they are just suppressed :-/ So this is just a smoke test mappers = slhyper([ds_orig, ds_orig.copy()])
def align_one_target(sources_train, sources_test, target_train, target_test, method, masker, pairwise_method, clustering, n_pieces, n_jobs, decoding_dir=None, srm_atlas=None, srm_components=40, ha_radius=5, ha_sparse_radius=3, smoothing_fwhm=6, surface=False): overhead_time = 0 aligned_sources_test = [] if surface == "rh": clustering = clustering.replace("lh", "rh") # clustering = load_surf_data( # "/storage/store2/tbazeill/schaeffer/FreeSurfer5.3/fsaverage/label/rh.Schaefer2018_700Parcels_17Networks_order.annot") sources_train = np.asarray( [t.replace("lh", "rh") for t in sources_train]) sources_test = np.asarray( [t.replace("lh", "rh") for t in sources_test]) target_train.replace("lh", "rh") target_test.replace("lh", "rh") if surface in ["rh", "lh"]: from nilearn.surface import load_surf_data clustering = load_surf_data(clustering) if method == "anat_inter_subject": fit_start = time.process_time() if surface in ["lh", "rh"]: aligned_sources_test = load_clean(sources_test, masker) aligned_target_test = load_clean(target_test, masker) else: aligned_sources_test = np.vstack( [masker.transform(s) for s in sources_test]) aligned_target_test = masker.transform(target_test) elif method == "smoothing": fit_start = time.process_time() smoothing_masker = NiftiMasker(mask_img=masker.mask_img_, smoothing_fwhm=smoothing_fwhm).fit() aligned_sources_test = np.vstack( [smoothing_masker.transform(s) for s in sources_test]) aligned_target_test = smoothing_masker.transform(target_test) elif method in ["pairwise", "intra_subject"]: fit_start = time.process_time() for source_train, source_test in zip(sources_train, sources_test): if method == "pairwise": if surface in ["lh", "rh"]: source_align = SurfacePairwiseAlignment( alignment_method=pairwise_method, clustering=clustering, n_jobs=n_jobs) else: source_align = PairwiseAlignment( alignment_method=pairwise_method, clustering=clustering, n_pieces=n_pieces, mask=masker, n_jobs=n_jobs) source_align.fit(source_train, target_train) aligned_sources_test.append( source_align.transform(source_test)) elif method == "intra_subject": source_align = IntraSubjectAlignment( alignment_method="ridge_cv", clustering=clustering, n_pieces=n_pieces, mask=masker, n_jobs=n_jobs) source_align.fit(source_train, source_test) aligned_sources_test.append( source_align.transform(target_train)) if surface in ["lh", "rh"]: aligned_sources_test = np.vstack(aligned_sources_test) aligned_target_test = load_clean(target_test, masker) else: aligned_target_test = masker.transform(target_test) aligned_sources_test = np.vstack( [masker.transform(t) for t in aligned_sources_test]) elif method == "srm": common_time = time.process_time() fastsrm = FastSRM(atlas=srm_atlas, n_components=srm_components, n_iter=1000, n_jobs=n_jobs, aggregate="mean", temp_dir=decoding_dir) reduced_SR = fastsrm.fit_transform( [masker.transform(t).T for t in sources_train]) overhead_time = time.process_time() - common_time fit_start = time.process_time() fastsrm.aggregate = None fastsrm.add_subjects([masker.transform(t).T for t in [target_train]], reduced_SR) aligned_test = fastsrm.transform([ masker.transform(t).T for t in np.hstack([sources_test, [target_test]]) ]) aligned_sources_test = np.hstack(aligned_test[:-1]).T aligned_target_test = aligned_test[-1].T elif method == "HA": overhead_time = 0 fit_start = time.process_time() from mvpa2.algorithms.searchlight_hyperalignment import SearchlightHyperalignment from mvpa2.datasets.base import Dataset pymvpa_datasets = [] flat_mask = load_img(masker.mask_img_).get_fdata().flatten() n_voxels = flat_mask.sum() flat_coord_grid = make_coordinates_grid( masker.mask_img_.shape).reshape((-1, 3)) masked_coord_grid = flat_coord_grid[flat_mask != 0] for sub, sub_data in enumerate( np.hstack([[target_train], sources_train])): d = Dataset(masker.transform(sub_data)) d.fa['voxel_indices'] = masked_coord_grid pymvpa_datasets.append(d) ha = SearchlightHyperalignment(radius=ha_radius, nproc=1, sparse_radius=ha_sparse_radius) ha.__call__(pymvpa_datasets) aligned_sources_test = [] for j, source_test in enumerate(sources_test): if surface in ["lh", "rh"]: array_source = load_clean(source_test, masker) else: array_source = masker.transform(source_test) aligned_sources_test.append( array_source.dot(ha.projections[j + 1].proj.toarray())) aligned_sources_test = np.vstack(aligned_sources_test) aligned_target_test = masker.transform(target_test).dot( ha.projections[0].proj.toarray()) fit_time = time.process_time() - fit_start return aligned_sources_test, aligned_target_test, fit_time, overhead_time
# where to write out intermediate files os.environ['TMPDIR'] = '/dartfs-hpc/scratch/f002d44/temp' os.environ['TEMP'] = '/dartfs-hpc/scratch/f002d44/temp' os.environ['TMP'] = '/dartfs-hpc/scratch/f002d44/temp' t0 = time.time() print('-------- beginning hyperalignment at {t0} --------'.format(t0=t0)) debug.active += ['SHPAL', 'SLC'] N_PROCS = 16 N_BLOCKS = 128 slhyper = SearchlightHyperalignment( queryengine=qe, # pass it our surface query engine nproc=N_PROCS, # the number of processes we want to use nblocks= N_BLOCKS, # the number of blocks we want to divide that into (the more you have the less memory it takes) mask_node_ids=node_indices, # tell it which nodes you are masking dtype='float64') transformations = slhyper(dss) elapsed = time.time() - t0 print('-------- time elapsed: {elapsed} --------'.format(elapsed=elapsed)) h5save(outdir + 'hyperalignment_mappers.hdf5.gz', transformations, compression=9) # 7. You did it! Way to go. That saved a HDF5 file of each subject's transformation matrices into the common space. # Now we save each individual's mapper as a npz. from scipy.sparse import save_npz, load_npz
def __init__(self, **kwargs): SearchlightHyperalignment.__init__(self, **kwargs)
ds.fa['voxel_indices'] = range(ds.shape[1]) #ds.sa['chunks'] = np.repeat(i,cnx_tx) mv.zscore(ds, chunks_attr=None) dss.append(ds) print('Number of data sets in dss: ') print(len(dss)) print('Size of data sets: ') print(dss[0].shape) # create SL hyperalignment instance hyper = SearchlightHyperalignment( queryengine=qe, compute_recon=False, # We don't need to project back from common space to subject space nproc=1, nblocks=N_BLOCKS, dtype ='float64' ) # start timer t0 = time.time() # hyperalign dss mappers = hyper(dss) #save mappers try: h5save(toutdir, mappers) print('saved hdf5 mappers') except:
def test_custom_qas(self): # Test if we could provide custom QEs per each of the datasets skip_if_no_external('scipy') skip_if_no_external('hdf5') # needed for default results backend hdf5 ns, nf = 10, 4 # # of samples/features -- a very BIG dataset ;) ds0 = Dataset(np.random.normal(size=(ns, nf))) zscore(ds0, chunks_attr=None) ds1 = ds0[:, [3, 0, 1, 2]] # features circular shifted to the right qe0 = FancyQE([[0], [1], [2], [3]]) # does nothing qe1 = FancyQE([[1], [2], [3], [0]]) # knows to look into the right def apply_slhyper(queryengine, dss=[ds0, ds1], return_mappers=False, **kw): """Helper for a common code to create/call slhyper""" slhyper = SearchlightHyperalignment(queryengine=queryengine, **kw) mappers = slhyper(dss) proj = [m.proj.todense() for m in mappers] return (proj, mappers) if return_mappers else proj # since this single qe resulted in trying to match non-matching time series # projections should be non-identity, but no offdiagonal elements assert_no_offdiag(apply_slhyper(qe0)) # both are provided projs, mappers = apply_slhyper([qe0, qe1], return_mappers=True) tprojs_shifted = [np.eye(nf), np.roll(np.eye(nf), 1, axis=0)] assert_array_equal( projs[0], tprojs_shifted[0]) # must be identity since we made them so assert_array_equal( projs[1], tprojs_shifted[1]) # pretty much incorporating that shift # TODO -- not identity assert_array_equal(projs[0], np.eye(len(p))) # must be identity since we made them so # and must restore data properly assert_array_almost_equal(mappers[0].forward(ds0), mappers[1].forward(ds1)) # give more then # of qes assert_raises(ValueError, SearchlightHyperalignment(queryengine=[qe0, qe1]), [ds0, ds1, ds0]) # The one having no voxels for the "1st" id in "subj1" qe1_ = FancyQE([[1], [], [3], [0]]) # knows to look into the right projs = apply_slhyper(qe1_) assert_no_offdiag(projs) for proj in projs: # assess that both have '2nd' one 0 # but not the others! assert_array_equal( np.diagonal(proj) != 0, [True, True, False, True]) # smoke test whenever combine is False # In this case should work ok apply_slhyper(qe0, combine_neighbormappers=False) # this one ok as well since needs only matching ones in ref_ds apply_slhyper([qe0, qe1], combine_neighbormappers=False) # here since features do not match node_ids -- should raise ValueError assert_raises(ValueError, apply_slhyper, qe1, combine_neighbormappers=False) assert_raises(ValueError, apply_slhyper, [qe0, qe1], ref_ds=1, combine_neighbormappers=False) # and now only one qe lacking for that id projs = apply_slhyper([qe0, qe1_]) tproj0 = np.eye(nf) tproj0[1, 1] = 0 tprojs_shifted_1st0 = [tproj0, np.roll(tproj0, 1, axis=0)] for proj, tproj in zip(projs, tprojs_shifted_1st0): # assess that both have '2nd' one 0 # but not the others! assert_array_equal(proj, tproj) # And now a test with varying number of selected fids, no shift qe0 = FancyQE([[0], [1, 2], [1, 2, 3], [0, 1, 2, 3]]) projs = apply_slhyper(qe0) # Test that in general we get larger coefficients for "correct" transformation for p, tproj in zip(projs, tprojs_shifted): assert (np.all(np.asarray(p)[tproj > 0] >= 1.0)) assert_array_lequal(np.mean(np.asarray(p)[tproj == 0]), 0.3) qe1 = FancyQE([[0, 1, 2, 3], [1, 2, 3], [2, 3], [3]]) # Just a smoke test, for now TODO projs = apply_slhyper([qe0, qe1])
def test_searchlight_hyperalignment(self): skip_if_no_external('scipy') skip_if_no_external('h5py') ds_orig = datasets['3dsmall'].copy()[:, :15] ds_orig.fa['voxel_indices'] = ds_orig.fa.myspace space = 'voxel_indices' # total number of datasets for the analysis nds = 5 zscore(ds_orig, chunks_attr=None) dss = [ds_orig] # create a few distorted datasets to match the desired number of datasets # not sure if this truly mimics the real data, but at least we can test # implementation while len(dss) < nds - 1: sd = local_random_affine_transformations( ds_orig, scatter_neighborhoods(Sphere(1), ds_orig.fa[space].value, deterministic=True)[1], Sphere(2), space=space, scale_fac=1.0, shift_fac=0.0) # sometimes above function returns dataset with nans, infs, we don't want that. if np.sum(np.isnan(sd.samples)+np.isinf(sd.samples)) == 0 \ and np.all(sd.samples.std(0)): dss.append(sd) ds_orig_noisy = ds_orig.copy() ds_orig_noisy.samples += 0.1 * np.random.random( size=ds_orig_noisy.shape) dss.append(ds_orig_noisy) _ = [zscore(sd, chunks_attr=None) for sd in dss[1:]] # we should have some distortion for ds in dss[1:]: assert_false(np.all(ds_orig.samples == ds.samples)) # testing checks slhyp = SearchlightHyperalignment(ref_ds=1, exclude_from_model=[1]) self.assertRaises(ValueError, slhyp, dss[:3]) slhyp = SearchlightHyperalignment(ref_ds=3) self.assertRaises(ValueError, slhyp, dss[:3]) # explicit test of exclude_from_model slhyp = SearchlightHyperalignment(ref_ds=2, exclude_from_model=[1], featsel=0.7) projs1 = slhyp(dss) aligned1 = [proj.forward(ds) for proj, ds in zip(projs1, dss)] samples = dss[1].samples.copy() dss[1].samples += 0.1 * np.random.random(size=dss[1].shape) projs2 = slhyp(dss) aligned2 = [proj.forward(ds) for proj, ds in zip(projs1, dss)] for i in [0, 2, 3, 4]: assert_array_almost_equal(projs1[i].proj.todense(), projs2[i].proj.todense()) assert_array_almost_equal(aligned1[i].samples, aligned2[i].samples) assert_false( np.all(projs1[1].proj.todense() == projs1[2].proj.todense())) assert_false(np.all(aligned1[1].samples == aligned2[1].samples)) dss[1].samples = samples # store projections for each mapper separately projs = list() # run the algorithm with all combinations of the two major parameters # for projection calculation. for kwargs in [{ 'combine_neighbormappers': True, 'nproc': 1 + int(externals.exists('pprocess')) }, { 'combine_neighbormappers': True, 'dtype': 'float64', 'compute_recon': True }, { 'combine_neighbormappers': True, 'exclude_from_model': [2, 4] }, { 'combine_neighbormappers': False }, { 'combine_neighbormappers': False, 'mask_node_ids': np.arange(dss[0].nfeatures).tolist() }, { 'combine_neighbormappers': True, 'sparse_radius': 1 }, { 'combine_neighbormappers': True, 'nblocks': 2 }]: slhyp = SearchlightHyperalignment(radius=2, **kwargs) mappers = slhyp(dss) # one mapper per input ds assert_equal(len(mappers), nds) projs.append(mappers) # some checks for midx in range(nds): # making sure mask_node_ids options works as expected assert_array_almost_equal(projs[3][midx].proj.todense(), projs[4][midx].proj.todense()) # recon check assert_array_almost_equal(projs[0][midx].proj.todense(), projs[1][midx].recon.T.todense(), decimal=5) assert_equal(projs[1][midx].proj.dtype, 'float64') assert_equal(projs[0][midx].proj.dtype, 'float32') # making sure the projections make sense for proj in projs: # no .max on sparse matrices on older scipy (e.g. on precise) so conver to array first max_weight = proj[0].proj.toarray().max(1).squeeze() diag_weight = proj[0].proj.diagonal() # Check to make sure diagonal is the max weight, in almost all rows for reference subject assert (np.sum(max_weight == diag_weight) / float(len(diag_weight)) >= 0.80) # and not true for other subjects for i in range(1, nds - 1): assert (np.sum(proj[i].proj.toarray().max(1).squeeze() == proj[i].proj.diagonal()) / float(proj[i].proj.shape[0]) < 0.80) # Check to make sure projection weights match across duplicate datasets max_weight = proj[-1].proj.toarray().max(1).squeeze() diag_weight = proj[-1].proj.diagonal() # Check to make sure diagonal is the max weight, in almost all rows for reference subject assert (np.sum(max_weight == diag_weight) / float(len(diag_weight)) >= 0.80) # project data dss_hyper = [hm.forward(sd) for hm, sd in zip(projs[0], dss)] _ = [zscore(sd, chunks_attr=None) for sd in dss_hyper] ndcss = [] nf = ds_orig.nfeatures for ds_hyper in dss_hyper: ndcs = np.diag(np.corrcoef(ds_hyper.samples.T, ds_orig.samples.T)[nf:, :nf], k=0) ndcss += [ndcs] assert_true(np.median(ndcss[0]) > 0.9) # noisy copy of original dataset should be similar to original after hyperalignment assert_true(np.median(ndcss[-1]) > 0.9) assert_true(np.all([np.median(ndcs) > 0.2 for ndcs in ndcss[1:-2]]))
for nst, split in enumerate(splits): if nst == 0: runs = [1, 2, 3] elif nst == 1: runs = [4, 5] dss, node_indices = load_dss(lr, runs, subjects) for ds in dss: print(ds.shape) debug.active += ['SHPAL', 'SLC'] ha = SearchlightHyperalignment( queryengine=SurfaceQueryEngine(surf, radius), nproc=N_JOBS, nblocks=128, compute_recon=False, featsel=1.0, mask_node_ids=node_indices, dtype='float64', ) Ts = ha(dss) for T, subj in zip(Ts, subjects): out_fn = '/out_fn/split_{split}/{subj}_{lr}h_{radius}_{runs}.hdf5.gz'\ ''.format(split=split, subj=subj, lr=lr, radius=radius, runs='-'.join([str(_) for _ in runs])) h5save(out_fn, T, compression=9) for subj in subjects: for lr in 'lr': for nst, split in enumerate(splits): if nst == 0:
def __init__(self, **kwargs): SearchlightHyperalignment.__init__(self, **kwargs)