def test_hpal_svd_combo(self): # get seed dataset ds4l = datasets['uni4large'] ds_orig = ds4l[:, ds4l.a.nonbogus_features] # XXX Is this SVD mapping required? svm = SVDMapper() svm.train(ds_orig) ds_svs = svm.forward(ds_orig) ds_orig.samples = ds_svs.samples nf_true = ds_orig.nfeatures n = 4 # # of datasets to generate # Adding non-shared dimensions for each subject dss_rotated = [[]] * n for i in range(n): dss_rotated[i] = hstack( (ds_orig, ds4l[:, ds4l.a.bogus_features[i * 4:i * 4 + 4]])) # rotate data nf = dss_rotated[0].nfeatures dss_rotated = [ random_affine_transformation(dss_rotated[i]) for i in xrange(n) ] # Test if it is close to doing hpal+SVD in sequence outside hpal # First, as we do in sequence outside hpal ha = Hyperalignment() mappers_orig = ha(dss_rotated) dss_back = [ m.forward(ds_) for m, ds_ in zip(mappers_orig, dss_rotated) ] dss_mean = np.mean([sd.samples for sd in dss_back], axis=0) svm = SVDMapper() svm.train(dss_mean) dss_sv = [svm.forward(sd) for sd in dss_back] # Test for SVD dimensionality reduction even with 2 training subjects for output_dim in [1, 4]: ha = Hyperalignment(output_dim=output_dim) ha.train(dss_rotated[:2]) mappers = ha(dss_rotated) dss_back = [m.forward(ds_) for m, ds_ in zip(mappers, dss_rotated)] for sd in dss_back: assert (sd.nfeatures == output_dim) # Check if combined hpal+SVD works as expected sv_corrs = [] for sd1, sd2 in zip(dss_sv, dss_back): ndcs = np.diag(np.corrcoef(sd1.samples.T, sd2.samples.T)[nf:, :nf], k=0) sv_corrs.append(ndcs) self.assertTrue( np.all(np.abs(np.array(sv_corrs)) >= 0.95), msg="Hyperalignment with dimensionality reduction should have " "reconstructed SVD dataset. Got correlations %s." % sv_corrs) # Check if it recovers original SVs sv_corrs_orig = [] for sd in dss_back: ndcs = np.diag(np.corrcoef(sd.samples.T, ds_orig.samples.T)[nf_true:, :nf_true], k=0) sv_corrs_orig.append(ndcs) self.assertTrue(np.all(np.abs(np.array(sv_corrs_orig)) >= 0.9), msg="Expected original dimensions after " "SVD. Got correlations %s." % sv_corrs_orig)
def test_hpal_joblib(self): skip_if_no_external('joblib') # get seed dataset ds4l = datasets['uni4large'] dss_rotated = [random_affine_transformation(ds4l, scale_fac=100, shift_fac=10) for i in range(4)] ha = Hyperalignment(nproc=1, enable_ca=['residual_errors']) ha.train(dss_rotated[:2]) mappers = ha(dss_rotated) ha_proc = Hyperalignment(nproc=2, enable_ca=['residual_errors']) ha_proc.train(dss_rotated[:2]) mappers_nproc = ha_proc(dss_rotated) # not sure yet why on windows only is not precise cmp_ = assert_array_equal if (not on_windows) else assert_array_almost_equal [cmp_(m.proj, mp.proj) for m, mp in zip(mappers, mappers_nproc)] # "Mappers differ when using nproc>1." cmp_(ha.ca.residual_errors.samples, ha_proc.ca.residual_errors.samples) # smoke test ha = Hyperalignment(nproc=0) mappers = ha(dss_rotated)
def test_hyper_input_dataset_check(self): # If supplied with only one dataset during training, # make sure it doesn't run multiple levels and crap out ha = Hyperalignment() ds_all = [datasets['uni4small'] for i in range(3)] # Make sure it raises TypeError if a list is not passed self.assertRaises(TypeError, ha, ds_all[0]) self.assertRaises(TypeError, ha.train, ds_all[0]) # And it doesn't crap out with a single dataset for training ha.train([ds_all[0]]) zscore(ds_all[0], chunks_attr=None) assert_array_equal(ha.commonspace, ds_all[0].samples) # make sure it accepts tuple of ndarray ha = Hyperalignment() m = ha(tuple(ds_all)) ha = Hyperalignment() dss_arr = np.empty(len(ds_all), dtype=object) for i in range(len(ds_all)): dss_arr[i] = ds_all[i] m = ha(dss_arr)
def test_hyper_input_dataset_check(self): # If supplied with only one dataset during training, # make sure it doesn't run multiple levels and crap out ha = Hyperalignment() ds_all = [datasets['uni4small'] for i in range(3)] # Make sure it raises TypeError if a list is not passed self.assertRaises(TypeError, ha, ds_all[0]) self.assertRaises(TypeError, ha.train, ds_all[0]) # And it doesn't crap out with a single dataset for training ha.train([ds_all[0]]) zscore(ds_all[0], chunks_attr=None) assert_array_equal(ha.commonspace, ds_all[0].samples) # make sure it accepts tuple of ndarray ha = Hyperalignment() m = ha(tuple(ds_all)) ha = Hyperalignment() dss_arr = np.empty(len(ds_all), dtype=object) for i in range(len(ds_all)): dss_arr[i] = ds_all[i] m = ha(dss_arr)
def test_hpal_joblib(self): skip_if_no_external('joblib') # get seed dataset ds4l = datasets['uni4large'] dss_rotated = [random_affine_transformation(ds4l, scale_fac=100, shift_fac=10) for i in range(4)] ha = Hyperalignment(nproc=1, enable_ca=['residual_errors']) ha.train(dss_rotated[:2]) mappers = ha(dss_rotated) ha_proc = Hyperalignment(nproc=2, enable_ca=['residual_errors']) ha_proc.train(dss_rotated[:2]) mappers_nproc = ha_proc(dss_rotated) self.assertTrue( np.all([np.array_equal(m.proj, mp.proj) for m, mp in zip(mappers, mappers_nproc)]), msg="Mappers differ when using nproc>1.") assert_array_equal(ha.ca.residual_errors.samples, ha_proc.ca.residual_errors.samples) # smoke test ha = Hyperalignment(nproc=0) mappers = ha(dss_rotated)
def test_hpal_joblib(self): skip_if_no_external('joblib') # get seed dataset ds4l = datasets['uni4large'] dss_rotated = [random_affine_transformation(ds4l, scale_fac=100, shift_fac=10) for i in range(4)] ha = Hyperalignment(nproc=1, enable_ca=['residual_errors']) ha.train(dss_rotated[:2]) mappers = ha(dss_rotated) ha_proc = Hyperalignment(nproc=2, enable_ca=['residual_errors']) ha_proc.train(dss_rotated[:2]) mappers_nproc = ha_proc(dss_rotated) self.assertTrue( np.all([np.array_equal(m.proj, mp.proj) for m, mp in zip(mappers, mappers_nproc)]), msg="Mappers differ when using nproc>1.") assert_array_equal(ha.ca.residual_errors.samples, ha_proc.ca.residual_errors.samples) # smoke test ha = Hyperalignment(nproc=0) mappers = ha(dss_rotated)
def _get_hypesvs(self, sl_connectomes, local_common_model=None): ''' Hyperalign connectomes and return mapppers and trained SVDMapper of common space. Parameters ---------- sl_connectomes: a list of connectomes to hyperalign local_common_model: a reference common model to be used. Returns ------- a tuple (sl_hmappers, svm, local_common_model) sl_hmappers: a list of mappers corresponding to input list in that order. svm: a svm mapper based on the input data. if given a common model, this is None. local_common_model: If local_common_model is provided as input, this will be None. Otherwise, local_common_model will be computed here and returned. ''' # TODO Should we z-score sl_connectomes? return_model = False if self.params.save_model is None else True if local_common_model is not None: ha = Hyperalignment(level2_niter=0) if not is_datasetlike(local_common_model): local_common_model = Dataset(samples=local_common_model) ha.train([local_common_model]) sl_hmappers = ha(sl_connectomes) return sl_hmappers, None, None ha = Hyperalignment() sl_hmappers = ha(sl_connectomes) sl_connectomes = [ slhm.forward(slc) for slhm, slc in zip(sl_hmappers, sl_connectomes) ] _ = [zscore(slc, chunks_attr=None) for slc in sl_connectomes] sl_connectomes = np.dstack(sl_connectomes).mean(axis=-1) svm = SVDMapper(force_train=True) svm.train(sl_connectomes) if return_model: local_common_model = svm.forward(sl_connectomes) else: local_common_model = None return sl_hmappers, svm, local_common_model
def _get_hypesvs(self, sl_connectomes, local_common_model=None): ''' Hyperalign connectomes and return mapppers and trained SVDMapper of common space. Parameters ---------- sl_connectomes: a list of connectomes to hyperalign local_common_model: a reference common model to be used. Returns ------- a tuple (sl_hmappers, svm, local_common_model) sl_hmappers: a list of mappers corresponding to input list in that order. svm: a svm mapper based on the input data. if given a common model, this is None. local_common_model: If local_common_model is provided as input, this will be None. Otherwise, local_common_model will be computed here and returned. ''' # TODO Should we z-score sl_connectomes? return_model = False if self.params.save_model is None else True if local_common_model is not None: ha = Hyperalignment(level2_niter=0) if not is_datasetlike(local_common_model): local_common_model = Dataset(samples=local_common_model) ha.train([local_common_model]) sl_hmappers = ha(sl_connectomes) return sl_hmappers, None, None ha = Hyperalignment() sl_hmappers = ha(sl_connectomes) sl_connectomes = [slhm.forward(slc) for slhm, slc in zip(sl_hmappers, sl_connectomes)] _ = [zscore(slc, chunks_attr=None) for slc in sl_connectomes] sl_connectomes = np.dstack(sl_connectomes).mean(axis=-1) svm = SVDMapper(force_train=True) svm.train(sl_connectomes) if return_model: local_common_model = svm.forward(sl_connectomes) else: local_common_model = None return sl_hmappers, svm, local_common_model
def test_hpal_joblib(self): skip_if_no_external('joblib') # get seed dataset ds4l = datasets['uni4large'] dss_rotated = [ random_affine_transformation(ds4l, scale_fac=100, shift_fac=10) for i in range(4) ] ha = Hyperalignment(nproc=1, enable_ca=['residual_errors']) ha.train(dss_rotated[:2]) mappers = ha(dss_rotated) ha_proc = Hyperalignment(nproc=2, enable_ca=['residual_errors']) ha_proc.train(dss_rotated[:2]) mappers_nproc = ha_proc(dss_rotated) # not sure yet why on windows only is not precise cmp_ = assert_array_equal if ( not on_windows) else assert_array_almost_equal [cmp_(m.proj, mp.proj) for m, mp in zip(mappers, mappers_nproc) ] # "Mappers differ when using nproc>1." cmp_(ha.ca.residual_errors.samples, ha_proc.ca.residual_errors.samples) # smoke test ha = Hyperalignment(nproc=0) mappers = ha(dss_rotated)
def test_hpal_svd_combo(self): # get seed dataset ds4l = datasets['uni4large'] ds_orig = ds4l[:, ds4l.a.nonbogus_features] # XXX Is this SVD mapping required? svm = SVDMapper() svm.train(ds_orig) ds_svs = svm.forward(ds_orig) ds_orig.samples = ds_svs.samples nf_true = ds_orig.nfeatures n = 4 # # of datasets to generate # Adding non-shared dimensions for each subject dss_rotated = [[]]*n for i in range(n): dss_rotated[i] = hstack( (ds_orig, ds4l[:, ds4l.a.bogus_features[i * 4: i * 4 + 4]])) # rotate data nf = dss_rotated[0].nfeatures dss_rotated = [random_affine_transformation(dss_rotated[i]) for i in xrange(n)] # Test if it is close to doing hpal+SVD in sequence outside hpal # First, as we do in sequence outside hpal ha = Hyperalignment() mappers_orig = ha(dss_rotated) dss_back = [m.forward(ds_) for m, ds_ in zip(mappers_orig, dss_rotated)] dss_mean = np.mean([sd.samples for sd in dss_back], axis=0) svm = SVDMapper() svm.train(dss_mean) dss_sv = [svm.forward(sd) for sd in dss_back] # Test for SVD dimensionality reduction even with 2 training subjects for output_dim in [1, 4]: ha = Hyperalignment(output_dim=output_dim) ha.train(dss_rotated[:2]) mappers = ha(dss_rotated) dss_back = [m.forward(ds_) for m, ds_ in zip(mappers, dss_rotated)] for sd in dss_back: assert (sd.nfeatures == output_dim) # Check if combined hpal+SVD works as expected sv_corrs = [] for sd1, sd2 in zip(dss_sv, dss_back): ndcs = np.diag(np.corrcoef(sd1.samples.T, sd2.samples.T)[nf:, :nf], k=0) sv_corrs.append(ndcs) self.assertTrue( np.all(np.abs(np.array(sv_corrs)) >= 0.95), msg="Hyperalignment with dimensionality reduction should have " "reconstructed SVD dataset. Got correlations %s." % sv_corrs) # Check if it recovers original SVs sv_corrs_orig = [] for sd in dss_back: ndcs = np.diag( np.corrcoef(sd.samples.T, ds_orig.samples.T)[nf_true:, :nf_true], k=0) sv_corrs_orig.append(ndcs) self.assertTrue( np.all(np.abs(np.array(sv_corrs_orig)) >= 0.9), msg="Expected original dimensions after " "SVD. Got correlations %s." % sv_corrs_orig)
myvoxels = np.nonzero(indices[PARCEL_NUMBER]) dss = [] for sub in range(len(mats)): ds = mats[sub][:, myvoxels[0]] ds = mv.Dataset(ds) ds.fa['voxel_indices'] = range(ds.shape[1]) mv.zscore(ds, chunks_attr=None) dss.append(ds) print('Size of Training data sets: {0}'.format(dss[0].shape)) print('Beginning Hyperalignment.') # create hyperalignment instance hyper = Hyperalignment(nproc=1, ) hyper.train(dss) # get mappers to common space created by hyper.train (2x procrustes iteration) mappers = hyper(dss) # apply mappers back onto training data ds_hyper = [h.forward(sd) for h, sd in zip(mappers, dss)] train_aa_isc = compute_average_similarity(dss) train_ha_isc = compute_average_similarity(ds_hyper) df_results.loc[parcel, 'Train_AA_ISC'] = np.mean(train_aa_isc) df_results.loc[parcel, 'Train_HA_ISC'] = np.mean(train_ha_isc) # create test dss test_dss = []
for d in [aligned_dirname, mapper_dirname]: if not os.path.exists(d): os.makedirs(d) train_dss = [utils.prep_parcelwise_data(sub, parcel_num, 'sponpain') for sub in sub_list] print('-------- size of training data sets {A} -------------'.format(A=train_dss[0].shape)) print('-------- beginning hyperalignment parcel {A} --------'.format(A=parcel_num)) # train hyperalignment model on all subject's sponpain data for this parcel print('-------- length of train subjects={A} '.format(A=str(len(train_dss)))) ha = Hyperalignment(nproc=NPROC, joblib_backend='multiprocessing') debug.active += ['HPAL'] t0 = time.time() ha.train(train_dss) mappers = ha(train_dss) t1 = time.time() print('-------- done training hyperalignment at {B} --------'.format(B=str(timedelta(seconds=t1-t0)))) del train_dss pool = mp.Pool(NPROC) data_fns = [os.path.join(aligned_dirname,'{s}_aligned_cleaned_bladder_ts_noZ.hdf5'.format(s=s)) for s in sub_list] mapper_fns = [os.path.join(mapper_dirname,'{s}_trained_mapper.hdf5noZ.gz'.format(s=s)) for s in sub_list] iterable = zip(data_fns, mapper_fns, sub_list, mappers, np.repeat(parcel_num, len(mappers))) pool.map(apply_mappers, iterable) t2=time.time() print('-------- done aligning & saving test data at {B} --------'.format(B=str(timedelta(seconds=t2-t1))))