Exemplo n.º 1
0
 def test_hpal_svd_combo(self):
     # get seed dataset
     ds4l = datasets['uni4large']
     ds_orig = ds4l[:, ds4l.a.nonbogus_features]
     # XXX Is this SVD mapping required?
     svm = SVDMapper()
     svm.train(ds_orig)
     ds_svs = svm.forward(ds_orig)
     ds_orig.samples = ds_svs.samples
     nf_true = ds_orig.nfeatures
     n = 4  # # of datasets to generate
     # Adding non-shared dimensions for each subject
     dss_rotated = [[]] * n
     for i in range(n):
         dss_rotated[i] = hstack(
             (ds_orig, ds4l[:, ds4l.a.bogus_features[i * 4:i * 4 + 4]]))
     # rotate data
     nf = dss_rotated[0].nfeatures
     dss_rotated = [
         random_affine_transformation(dss_rotated[i]) for i in xrange(n)
     ]
     # Test if it is close to doing hpal+SVD in sequence outside hpal
     # First, as we do in sequence outside hpal
     ha = Hyperalignment()
     mappers_orig = ha(dss_rotated)
     dss_back = [
         m.forward(ds_) for m, ds_ in zip(mappers_orig, dss_rotated)
     ]
     dss_mean = np.mean([sd.samples for sd in dss_back], axis=0)
     svm = SVDMapper()
     svm.train(dss_mean)
     dss_sv = [svm.forward(sd) for sd in dss_back]
     # Test for SVD dimensionality reduction even with 2 training subjects
     for output_dim in [1, 4]:
         ha = Hyperalignment(output_dim=output_dim)
         ha.train(dss_rotated[:2])
         mappers = ha(dss_rotated)
         dss_back = [m.forward(ds_) for m, ds_ in zip(mappers, dss_rotated)]
         for sd in dss_back:
             assert (sd.nfeatures == output_dim)
     # Check if combined hpal+SVD works as expected
     sv_corrs = []
     for sd1, sd2 in zip(dss_sv, dss_back):
         ndcs = np.diag(np.corrcoef(sd1.samples.T, sd2.samples.T)[nf:, :nf],
                        k=0)
         sv_corrs.append(ndcs)
     self.assertTrue(
         np.all(np.abs(np.array(sv_corrs)) >= 0.95),
         msg="Hyperalignment with dimensionality reduction should have "
         "reconstructed SVD dataset. Got correlations %s." % sv_corrs)
     # Check if it recovers original SVs
     sv_corrs_orig = []
     for sd in dss_back:
         ndcs = np.diag(np.corrcoef(sd.samples.T,
                                    ds_orig.samples.T)[nf_true:, :nf_true],
                        k=0)
         sv_corrs_orig.append(ndcs)
     self.assertTrue(np.all(np.abs(np.array(sv_corrs_orig)) >= 0.9),
                     msg="Expected original dimensions after "
                     "SVD. Got correlations %s." % sv_corrs_orig)
Exemplo n.º 2
0
 def test_hpal_joblib(self):
     skip_if_no_external('joblib')
     # get seed dataset
     ds4l = datasets['uni4large']
     dss_rotated = [random_affine_transformation(ds4l, scale_fac=100, shift_fac=10)
                    for i in range(4)]
     ha = Hyperalignment(nproc=1, enable_ca=['residual_errors'])
     ha.train(dss_rotated[:2])
     mappers = ha(dss_rotated)
     ha_proc = Hyperalignment(nproc=2, enable_ca=['residual_errors'])
     ha_proc.train(dss_rotated[:2])
     mappers_nproc = ha_proc(dss_rotated)
     # not sure yet why on windows only is not precise
     cmp_ = assert_array_equal if (not on_windows) else assert_array_almost_equal
     [cmp_(m.proj, mp.proj) for m, mp in zip(mappers, mappers_nproc)]  # "Mappers differ when using nproc>1."
     cmp_(ha.ca.residual_errors.samples, ha_proc.ca.residual_errors.samples)
     # smoke test
     ha = Hyperalignment(nproc=0)
     mappers = ha(dss_rotated)
Exemplo n.º 3
0
 def test_hyper_input_dataset_check(self):
     # If supplied with only one dataset during training,
     # make sure it doesn't run multiple levels and crap out
     ha = Hyperalignment()
     ds_all = [datasets['uni4small'] for i in range(3)]
     # Make sure it raises TypeError if a list is not passed
     self.assertRaises(TypeError, ha, ds_all[0])
     self.assertRaises(TypeError, ha.train, ds_all[0])
     # And it doesn't crap out with a single dataset for training
     ha.train([ds_all[0]])
     zscore(ds_all[0], chunks_attr=None)
     assert_array_equal(ha.commonspace, ds_all[0].samples)
     # make sure it accepts tuple of ndarray
     ha = Hyperalignment()
     m = ha(tuple(ds_all))
     ha = Hyperalignment()
     dss_arr = np.empty(len(ds_all), dtype=object)
     for i in range(len(ds_all)):
         dss_arr[i] = ds_all[i]
     m = ha(dss_arr)
Exemplo n.º 4
0
 def test_hyper_input_dataset_check(self):
     # If supplied with only one dataset during training,
     # make sure it doesn't run multiple levels and crap out
     ha = Hyperalignment()
     ds_all = [datasets['uni4small'] for i in range(3)]
     # Make sure it raises TypeError if a list is not passed
     self.assertRaises(TypeError, ha, ds_all[0])
     self.assertRaises(TypeError, ha.train, ds_all[0])
     # And it doesn't crap out with a single dataset for training
     ha.train([ds_all[0]])
     zscore(ds_all[0], chunks_attr=None)
     assert_array_equal(ha.commonspace, ds_all[0].samples)
     # make sure it accepts tuple of ndarray
     ha = Hyperalignment()
     m = ha(tuple(ds_all))
     ha = Hyperalignment()
     dss_arr = np.empty(len(ds_all), dtype=object)
     for i in range(len(ds_all)):
         dss_arr[i] = ds_all[i]
     m = ha(dss_arr)
Exemplo n.º 5
0
 def test_hpal_joblib(self):
     skip_if_no_external('joblib')
     # get seed dataset
     ds4l = datasets['uni4large']
     dss_rotated = [random_affine_transformation(ds4l, scale_fac=100, shift_fac=10)
                    for i in range(4)]
     ha = Hyperalignment(nproc=1, enable_ca=['residual_errors'])
     ha.train(dss_rotated[:2])
     mappers = ha(dss_rotated)
     ha_proc = Hyperalignment(nproc=2, enable_ca=['residual_errors'])
     ha_proc.train(dss_rotated[:2])
     mappers_nproc = ha_proc(dss_rotated)
     self.assertTrue(
         np.all([np.array_equal(m.proj, mp.proj)
                for m, mp in zip(mappers, mappers_nproc)]),
         msg="Mappers differ when using nproc>1.")
     assert_array_equal(ha.ca.residual_errors.samples, ha_proc.ca.residual_errors.samples)
     # smoke test
     ha = Hyperalignment(nproc=0)
     mappers = ha(dss_rotated)
Exemplo n.º 6
0
 def test_hpal_joblib(self):
     skip_if_no_external('joblib')
     # get seed dataset
     ds4l = datasets['uni4large']
     dss_rotated = [random_affine_transformation(ds4l, scale_fac=100, shift_fac=10)
                    for i in range(4)]
     ha = Hyperalignment(nproc=1, enable_ca=['residual_errors'])
     ha.train(dss_rotated[:2])
     mappers = ha(dss_rotated)
     ha_proc = Hyperalignment(nproc=2, enable_ca=['residual_errors'])
     ha_proc.train(dss_rotated[:2])
     mappers_nproc = ha_proc(dss_rotated)
     self.assertTrue(
         np.all([np.array_equal(m.proj, mp.proj)
                for m, mp in zip(mappers, mappers_nproc)]),
         msg="Mappers differ when using nproc>1.")
     assert_array_equal(ha.ca.residual_errors.samples, ha_proc.ca.residual_errors.samples)
     # smoke test
     ha = Hyperalignment(nproc=0)
     mappers = ha(dss_rotated)
Exemplo n.º 7
0
    def _get_hypesvs(self, sl_connectomes, local_common_model=None):
        '''
        Hyperalign connectomes and return mapppers
        and trained SVDMapper of common space.

        Parameters
        ----------
        sl_connectomes: a list of connectomes to hyperalign
        local_common_model: a reference common model to be used.

        Returns
        -------
        a tuple (sl_hmappers, svm, local_common_model)
        sl_hmappers: a list of mappers corresponding to input list in that order.
        svm: a svm mapper based on the input data. if given a common model, this is None.
        local_common_model: If local_common_model is provided as input, this will be None.
            Otherwise, local_common_model will be computed here and returned.
        '''
        # TODO Should we z-score sl_connectomes?
        return_model = False if self.params.save_model is None else True
        if local_common_model is not None:
            ha = Hyperalignment(level2_niter=0)
            if not is_datasetlike(local_common_model):
                local_common_model = Dataset(samples=local_common_model)
            ha.train([local_common_model])
            sl_hmappers = ha(sl_connectomes)
            return sl_hmappers, None, None
        ha = Hyperalignment()
        sl_hmappers = ha(sl_connectomes)
        sl_connectomes = [
            slhm.forward(slc) for slhm, slc in zip(sl_hmappers, sl_connectomes)
        ]
        _ = [zscore(slc, chunks_attr=None) for slc in sl_connectomes]
        sl_connectomes = np.dstack(sl_connectomes).mean(axis=-1)
        svm = SVDMapper(force_train=True)
        svm.train(sl_connectomes)
        if return_model:
            local_common_model = svm.forward(sl_connectomes)
        else:
            local_common_model = None
        return sl_hmappers, svm, local_common_model
Exemplo n.º 8
0
    def _get_hypesvs(self, sl_connectomes, local_common_model=None):
        '''
        Hyperalign connectomes and return mapppers
        and trained SVDMapper of common space.

        Parameters
        ----------
        sl_connectomes: a list of connectomes to hyperalign
        local_common_model: a reference common model to be used.

        Returns
        -------
        a tuple (sl_hmappers, svm, local_common_model)
        sl_hmappers: a list of mappers corresponding to input list in that order.
        svm: a svm mapper based on the input data. if given a common model, this is None.
        local_common_model: If local_common_model is provided as input, this will be None.
            Otherwise, local_common_model will be computed here and returned.
        '''
        # TODO Should we z-score sl_connectomes?
        return_model = False if self.params.save_model is None else True
        if local_common_model is not None:
            ha = Hyperalignment(level2_niter=0)
            if not is_datasetlike(local_common_model):
                local_common_model = Dataset(samples=local_common_model)
            ha.train([local_common_model])
            sl_hmappers = ha(sl_connectomes)
            return sl_hmappers, None, None
        ha = Hyperalignment()
        sl_hmappers = ha(sl_connectomes)
        sl_connectomes = [slhm.forward(slc) for slhm, slc in zip(sl_hmappers, sl_connectomes)]
        _ = [zscore(slc, chunks_attr=None) for slc in sl_connectomes]
        sl_connectomes = np.dstack(sl_connectomes).mean(axis=-1)
        svm = SVDMapper(force_train=True)
        svm.train(sl_connectomes)
        if return_model:
            local_common_model = svm.forward(sl_connectomes)
        else:
            local_common_model = None
        return sl_hmappers, svm, local_common_model
Exemplo n.º 9
0
 def test_hpal_joblib(self):
     skip_if_no_external('joblib')
     # get seed dataset
     ds4l = datasets['uni4large']
     dss_rotated = [
         random_affine_transformation(ds4l, scale_fac=100, shift_fac=10)
         for i in range(4)
     ]
     ha = Hyperalignment(nproc=1, enable_ca=['residual_errors'])
     ha.train(dss_rotated[:2])
     mappers = ha(dss_rotated)
     ha_proc = Hyperalignment(nproc=2, enable_ca=['residual_errors'])
     ha_proc.train(dss_rotated[:2])
     mappers_nproc = ha_proc(dss_rotated)
     # not sure yet why on windows only is not precise
     cmp_ = assert_array_equal if (
         not on_windows) else assert_array_almost_equal
     [cmp_(m.proj, mp.proj) for m, mp in zip(mappers, mappers_nproc)
      ]  # "Mappers differ when using nproc>1."
     cmp_(ha.ca.residual_errors.samples, ha_proc.ca.residual_errors.samples)
     # smoke test
     ha = Hyperalignment(nproc=0)
     mappers = ha(dss_rotated)
Exemplo n.º 10
0
 def test_hpal_svd_combo(self):
     # get seed dataset
     ds4l = datasets['uni4large']
     ds_orig = ds4l[:, ds4l.a.nonbogus_features]
     # XXX Is this SVD mapping required?
     svm = SVDMapper()
     svm.train(ds_orig)
     ds_svs = svm.forward(ds_orig)
     ds_orig.samples = ds_svs.samples
     nf_true = ds_orig.nfeatures
     n = 4  # # of datasets to generate
     # Adding non-shared dimensions for each subject
     dss_rotated = [[]]*n
     for i in range(n):
         dss_rotated[i] = hstack(
             (ds_orig, ds4l[:, ds4l.a.bogus_features[i * 4: i * 4 + 4]]))
     # rotate data
     nf = dss_rotated[0].nfeatures
     dss_rotated = [random_affine_transformation(dss_rotated[i])
                    for i in xrange(n)]
     # Test if it is close to doing hpal+SVD in sequence outside hpal
     # First, as we do in sequence outside hpal
     ha = Hyperalignment()
     mappers_orig = ha(dss_rotated)
     dss_back = [m.forward(ds_)
                 for m, ds_ in zip(mappers_orig, dss_rotated)]
     dss_mean = np.mean([sd.samples for sd in dss_back], axis=0)
     svm = SVDMapper()
     svm.train(dss_mean)
     dss_sv = [svm.forward(sd) for sd in dss_back]
     # Test for SVD dimensionality reduction even with 2 training subjects
     for output_dim in [1, 4]:
         ha = Hyperalignment(output_dim=output_dim)
         ha.train(dss_rotated[:2])
         mappers = ha(dss_rotated)
         dss_back = [m.forward(ds_)
                     for m, ds_ in zip(mappers, dss_rotated)]
         for sd in dss_back:
             assert (sd.nfeatures == output_dim)
     # Check if combined hpal+SVD works as expected
     sv_corrs = []
     for sd1, sd2 in zip(dss_sv, dss_back):
         ndcs = np.diag(np.corrcoef(sd1.samples.T, sd2.samples.T)[nf:, :nf],
                        k=0)
         sv_corrs.append(ndcs)
     self.assertTrue(
         np.all(np.abs(np.array(sv_corrs)) >= 0.95),
         msg="Hyperalignment with dimensionality reduction should have "
             "reconstructed SVD dataset. Got correlations %s."
             % sv_corrs)
     # Check if it recovers original SVs
     sv_corrs_orig = []
     for sd in dss_back:
         ndcs = np.diag(
             np.corrcoef(sd.samples.T, ds_orig.samples.T)[nf_true:, :nf_true],
             k=0)
         sv_corrs_orig.append(ndcs)
     self.assertTrue(
         np.all(np.abs(np.array(sv_corrs_orig)) >= 0.9),
         msg="Expected original dimensions after "
             "SVD. Got correlations %s."
             % sv_corrs_orig)
    myvoxels = np.nonzero(indices[PARCEL_NUMBER])
    dss = []
    for sub in range(len(mats)):
        ds = mats[sub][:, myvoxels[0]]
        ds = mv.Dataset(ds)
        ds.fa['voxel_indices'] = range(ds.shape[1])
        mv.zscore(ds, chunks_attr=None)
        dss.append(ds)

    print('Size of Training data sets: {0}'.format(dss[0].shape))
    print('Beginning Hyperalignment.')

    # create hyperalignment instance
    hyper = Hyperalignment(nproc=1, )
    hyper.train(dss)

    # get mappers to common space created by hyper.train (2x procrustes iteration)
    mappers = hyper(dss)

    # apply mappers back onto training data
    ds_hyper = [h.forward(sd) for h, sd in zip(mappers, dss)]

    train_aa_isc = compute_average_similarity(dss)
    train_ha_isc = compute_average_similarity(ds_hyper)

    df_results.loc[parcel, 'Train_AA_ISC'] = np.mean(train_aa_isc)
    df_results.loc[parcel, 'Train_HA_ISC'] = np.mean(train_ha_isc)

    # create test dss
    test_dss = []
    for d in [aligned_dirname, mapper_dirname]:
        if not os.path.exists(d):
            os.makedirs(d)


    train_dss = [utils.prep_parcelwise_data(sub, parcel_num, 'sponpain') for sub in sub_list]
    print('-------- size of training data sets {A} -------------'.format(A=train_dss[0].shape))
    print('-------- beginning hyperalignment parcel {A} --------'.format(A=parcel_num))

    # train hyperalignment model on all subject's sponpain data for this parcel
    print('-------- length of train subjects={A} '.format(A=str(len(train_dss))))
    ha = Hyperalignment(nproc=NPROC, joblib_backend='multiprocessing')
    debug.active += ['HPAL']
    t0 = time.time()
    ha.train(train_dss)
    mappers = ha(train_dss)
    t1 = time.time()
    print('-------- done training hyperalignment at {B} --------'.format(B=str(timedelta(seconds=t1-t0))))
    del train_dss

    pool = mp.Pool(NPROC)
    data_fns = [os.path.join(aligned_dirname,'{s}_aligned_cleaned_bladder_ts_noZ.hdf5'.format(s=s)) for s in sub_list]
    mapper_fns = [os.path.join(mapper_dirname,'{s}_trained_mapper.hdf5noZ.gz'.format(s=s)) for s in sub_list]
    iterable = zip(data_fns, mapper_fns, sub_list, mappers, np.repeat(parcel_num, len(mappers)))
    pool.map(apply_mappers, iterable)
    t2=time.time()
    print('-------- done aligning & saving test data at {B} --------'.format(B=str(timedelta(seconds=t2-t1))))