def test_hpal_svd_combo(self): # get seed dataset ds4l = datasets['uni4large'] ds_orig = ds4l[:, ds4l.a.nonbogus_features] # XXX Is this SVD mapping required? svm = SVDMapper() svm.train(ds_orig) ds_svs = svm.forward(ds_orig) ds_orig.samples = ds_svs.samples nf_true = ds_orig.nfeatures n = 4 # # of datasets to generate # Adding non-shared dimensions for each subject dss_rotated = [[]] * n for i in range(n): dss_rotated[i] = hstack( (ds_orig, ds4l[:, ds4l.a.bogus_features[i * 4:i * 4 + 4]])) # rotate data nf = dss_rotated[0].nfeatures dss_rotated = [ random_affine_transformation(dss_rotated[i]) for i in xrange(n) ] # Test if it is close to doing hpal+SVD in sequence outside hpal # First, as we do in sequence outside hpal ha = Hyperalignment() mappers_orig = ha(dss_rotated) dss_back = [ m.forward(ds_) for m, ds_ in zip(mappers_orig, dss_rotated) ] dss_mean = np.mean([sd.samples for sd in dss_back], axis=0) svm = SVDMapper() svm.train(dss_mean) dss_sv = [svm.forward(sd) for sd in dss_back] # Test for SVD dimensionality reduction even with 2 training subjects for output_dim in [1, 4]: ha = Hyperalignment(output_dim=output_dim) ha.train(dss_rotated[:2]) mappers = ha(dss_rotated) dss_back = [m.forward(ds_) for m, ds_ in zip(mappers, dss_rotated)] for sd in dss_back: assert (sd.nfeatures == output_dim) # Check if combined hpal+SVD works as expected sv_corrs = [] for sd1, sd2 in zip(dss_sv, dss_back): ndcs = np.diag(np.corrcoef(sd1.samples.T, sd2.samples.T)[nf:, :nf], k=0) sv_corrs.append(ndcs) self.assertTrue( np.all(np.abs(np.array(sv_corrs)) >= 0.95), msg="Hyperalignment with dimensionality reduction should have " "reconstructed SVD dataset. Got correlations %s." % sv_corrs) # Check if it recovers original SVs sv_corrs_orig = [] for sd in dss_back: ndcs = np.diag(np.corrcoef(sd.samples.T, ds_orig.samples.T)[nf_true:, :nf_true], k=0) sv_corrs_orig.append(ndcs) self.assertTrue(np.all(np.abs(np.array(sv_corrs_orig)) >= 0.9), msg="Expected original dimensions after " "SVD. Got correlations %s." % sv_corrs_orig)
def _test_mcasey20120222(): # pragma: no cover # http://lists.alioth.debian.org/pipermail/pkg-exppsy-pymvpa/2012q1/002034.html # This one is conditioned on allowing # of samples to be changed # by the mapper provided to MappedClassifier. See # https://github.com/yarikoptic/PyMVPA/tree/_tent/allow_ch_nsamples import numpy as np from mvpa2.datasets.base import dataset_wizard from mvpa2.generators.partition import NFoldPartitioner from mvpa2.mappers.base import ChainMapper from mvpa2.mappers.svd import SVDMapper from mvpa2.mappers.fx import mean_group_sample from mvpa2.clfs.svm import LinearCSVMC from mvpa2.clfs.meta import MappedClassifier from mvpa2.measures.base import CrossValidation mapper = ChainMapper([mean_group_sample(['targets','chunks']), SVDMapper()]) clf = MappedClassifier(LinearCSVMC(), mapper) cvte = CrossValidation(clf, NFoldPartitioner(), enable_ca=['repetition_results', 'stats']) ds = dataset_wizard( samples=np.arange(32).reshape((8, -1)), targets=[1, 1, 2, 2, 1, 1, 2, 2], chunks=[1, 1, 1, 1, 2, 2, 2, 2]) errors = cvte(ds)
def test_simple_svd(self): pm = SVDMapper() # train SVD pm.train(self.ndlin) self.assertEqual(pm.proj.shape, (20, 20)) # now project data into PCA space p = pm.forward(self.ndlin) # only first eigenvalue significant self.assertTrue(pm.sv[:1] > 1.0) self.assertTrue((pm.sv[1:] < 0.0001).all()) # only variance of first component significant var = p.var(axis=0) # test that only one component has variance self.assertTrue(var[:1] > 1.0) self.assertTrue((var[1:] < 0.0001).all()) # check that the mapped data can be fully recovered by 'reverse()' pr = pm.reverse(p) self.assertEqual(pr.shape, (40, 20)) self.assertTrue(np.abs(pm.reverse(p) - self.ndlin).sum() < 0.0001)
def _get_hypesvs(self, sl_connectomes, local_common_model=None): ''' Hyperalign connectomes and return mapppers and trained SVDMapper of common space. Parameters ---------- sl_connectomes: a list of connectomes to hyperalign local_common_model: a reference common model to be used. Returns ------- a tuple (sl_hmappers, svm, local_common_model) sl_hmappers: a list of mappers corresponding to input list in that order. svm: a svm mapper based on the input data. if given a common model, this is None. local_common_model: If local_common_model is provided as input, this will be None. Otherwise, local_common_model will be computed here and returned. ''' # TODO Should we z-score sl_connectomes? return_model = False if self.params.save_model is None else True if local_common_model is not None: ha = Hyperalignment(level2_niter=0) if not is_datasetlike(local_common_model): local_common_model = Dataset(samples=local_common_model) ha.train([local_common_model]) sl_hmappers = ha(sl_connectomes) return sl_hmappers, None, None ha = Hyperalignment() sl_hmappers = ha(sl_connectomes) sl_connectomes = [ slhm.forward(slc) for slhm, slc in zip(sl_hmappers, sl_connectomes) ] _ = [zscore(slc, chunks_attr=None) for slc in sl_connectomes] sl_connectomes = np.dstack(sl_connectomes).mean(axis=-1) svm = SVDMapper(force_train=True) svm.train(sl_connectomes) if return_model: local_common_model = svm.forward(sl_connectomes) else: local_common_model = None return sl_hmappers, svm, local_common_model
def test_simple_svd(self): pm = SVDMapper() # train SVD pm.train(self.ndlin) self.failUnlessEqual(pm.proj.shape, (20, 20)) # now project data into PCA space p = pm.forward(self.ndlin) # only first eigenvalue significant self.failUnless(pm.sv[:1] > 1.0) self.failUnless((pm.sv[1:] < 0.0001).all()) # only variance of first component significant var = p.var(axis=0) # test that only one component has variance self.failUnless(var[:1] > 1.0) self.failUnless((var[1:] < 0.0001).all()) # check that the mapped data can be fully recovered by 'reverse()' pr = pm.reverse(p) self.failUnlessEqual(pr.shape, (40,20)) self.failUnless(np.abs(pm.reverse(p) - self.ndlin).sum() < 0.0001)
def _get_hypesvs(self, sl_connectomes, local_common_model=None): ''' Hyperalign connectomes and return mapppers and trained SVDMapper of common space. Parameters ---------- sl_connectomes: a list of connectomes to hyperalign local_common_model: a reference common model to be used. Returns ------- a tuple (sl_hmappers, svm, local_common_model) sl_hmappers: a list of mappers corresponding to input list in that order. svm: a svm mapper based on the input data. if given a common model, this is None. local_common_model: If local_common_model is provided as input, this will be None. Otherwise, local_common_model will be computed here and returned. ''' # TODO Should we z-score sl_connectomes? return_model = False if self.params.save_model is None else True if local_common_model is not None: ha = Hyperalignment(level2_niter=0) if not is_datasetlike(local_common_model): local_common_model = Dataset(samples=local_common_model) ha.train([local_common_model]) sl_hmappers = ha(sl_connectomes) return sl_hmappers, None, None ha = Hyperalignment() sl_hmappers = ha(sl_connectomes) sl_connectomes = [slhm.forward(slc) for slhm, slc in zip(sl_hmappers, sl_connectomes)] _ = [zscore(slc, chunks_attr=None) for slc in sl_connectomes] sl_connectomes = np.dstack(sl_connectomes).mean(axis=-1) svm = SVDMapper(force_train=True) svm.train(sl_connectomes) if return_model: local_common_model = svm.forward(sl_connectomes) else: local_common_model = None return sl_hmappers, svm, local_common_model
def test_more_svd(self): pm = SVDMapper() # train SVD pm.train(self.largefeat) # mixing matrix cannot be square self.assertEqual(pm.proj.shape, (40, 10)) # only first singular value significant self.assertTrue(pm.sv[:1] > 10) self.assertTrue((pm.sv[1:] < 10).all()) # now project data into SVD space p = pm.forward(self.largefeat) # only variance of first component significant var = p.var(axis=0) # test that only one component has variance self.assertTrue(var[:1] > 1.0) self.assertTrue((var[1:] < 0.0001).all()) # check that the mapped data can be fully recovered by 'reverse()' rp = pm.reverse(p) self.assertEqual(rp.shape, self.largefeat.shape) self.assertTrue((np.round(rp) == self.largefeat).all()) # copy mapper pm2 = deepcopy(pm) # now make new random data and do forward->reverse check data = np.random.normal(size=(98, 40)) data_f = pm.forward(data) self.assertEqual(data_f.shape, (98, 10)) data_r = pm.reverse(data_f) self.assertEqual(data_r.shape, (98, 40))
def test_more_svd(self): pm = SVDMapper() # train SVD pm.train(self.largefeat) # mixing matrix cannot be square self.failUnlessEqual(pm.proj.shape, (40, 10)) # only first singular value significant self.failUnless(pm.sv[:1] > 10) self.failUnless((pm.sv[1:] < 10).all()) # now project data into SVD space p = pm.forward(self.largefeat) # only variance of first component significant var = p.var(axis=0) # test that only one component has variance self.failUnless(var[:1] > 1.0) self.failUnless((var[1:] < 0.0001).all()) # check that the mapped data can be fully recovered by 'reverse()' rp = pm.reverse(p) self.failUnlessEqual(rp.shape, self.largefeat.shape) self.failUnless((np.round(rp) == self.largefeat).all()) # copy mapper pm2 = deepcopy(pm) # now make new random data and do forward->reverse check data = np.random.normal(size=(98,40)) data_f = pm.forward(data) self.failUnlessEqual(data_f.shape, (98,10)) data_r = pm.reverse(data_f) self.failUnlessEqual(data_r.shape, (98,40))
def train(self, datasets): """Derive a common feature space from a series of datasets. Parameters ---------- datasets : sequence of datasets Returns ------- A list of trained Mappers matching the number of input datasets. """ params = self.params # for quicker access ;) ca = self.ca # Check to make sure we get a list of datasets as input. if not isinstance(datasets, (list, tuple, np.ndarray)): raise TypeError("Input datasets should be a sequence " "(of type list, tuple, or ndarray) of datasets.") ndatasets = len(datasets) nfeatures = [ds.nfeatures for ds in datasets] alpha = params.alpha residuals = None if ca['training_residual_errors'].enabled: residuals = np.zeros((1 + params.level2_niter, ndatasets)) ca.training_residual_errors = Dataset( samples=residuals, sa={ 'levels': ['1'] + ['2:%i' % i for i in xrange(params.level2_niter)] }) if __debug__: debug('HPAL', "Hyperalignment %s for %i datasets" % (self, ndatasets)) if params.ref_ds is None: ref_ds = np.argmax(nfeatures) else: ref_ds = params.ref_ds # Making sure that ref_ds is within range. #Parameter() already checks for it being a non-negative integer if ref_ds >= ndatasets: raise ValueError, "Requested reference dataset %i is out of " \ "bounds. We have only %i datasets provided" \ % (ref_ds, ndatasets) ca.chosen_ref_ds = ref_ds # zscore all data sets # ds = [ zscore(ds, chunks_attr=None) for ds in datasets] # TODO since we are doing in-place zscoring create deep copies # of the datasets with pruned targets and shallow copies of # the collections (if they would come needed in the transformation) # TODO: handle floats and non-floats differently to prevent # waste of memory if there is no need (e.g. no z-scoring) #otargets = [ds.sa.targets for ds in datasets] datasets = [ds.copy(deep=False) for ds in datasets] #datasets = [Dataset(ds.samples.astype(float), sa={'targets': [None] * len(ds)}) #datasets = [Dataset(ds.samples, sa={'targets': [None] * len(ds)}) # for ds in datasets] if params.zscore_all: if __debug__: debug('HPAL', "Z-scoring all datasets") for ids in xrange(len(datasets)): zmapper = ZScoreMapper(chunks_attr=None) zmapper.train(datasets[ids]) datasets[ids] = zmapper.forward(datasets[ids]) if alpha < 1: datasets, wmappers = self._regularize(datasets, alpha) # initial common space is the reference dataset commonspace = datasets[ref_ds].samples # the reference dataset might have been zscored already, don't do it # twice if params.zscore_common and not params.zscore_all: if __debug__: debug( 'HPAL_', "Creating copy of a commonspace and assuring " "it is of a floating type") commonspace = commonspace.astype(float) zscore(commonspace, chunks_attr=None) # If there is only one dataset in training phase, there is nothing to be done # just use that data as the common space if len(datasets) < 2: self.commonspace = commonspace else: # create a mapper per dataset # might prefer some other way to initialize... later mappers = [deepcopy(params.alignment) for ds in datasets] # # Level 1 -- initial projection # lvl1_projdata = self._level1(datasets, commonspace, ref_ds, mappers, residuals) # # Level 2 -- might iterate multiple times # # this is the final common space self.commonspace = self._level2(datasets, lvl1_projdata, mappers, residuals) if params.output_dim is not None: mappers = self._level3(datasets) self._svd_mapper = SVDMapper() self._svd_mapper.train(self._map_and_mean(datasets, mappers)) self._svd_mapper = StaticProjectionMapper( proj=self._svd_mapper.proj[:, :params.output_dim])
from mvpa2.mappers.mdp_adaptor import ICAMapper, PCAMapper from mvpa2 import cfg center = [10, 20] axis_range = 7 ##REF: Name was automagically refactored def plot_proj_dir(p): pl.plot([0, p[0, 0]], [0, p[0, 1]], linewidth=3, hold=True, color='y') pl.plot([0, p[1, 0]], [0, p[1, 1]], linewidth=3, hold=True, color='k') mappers = { 'PCA': PCAMapper(), 'SVD': SVDMapper(), 'ICA': ICAMapper(alg='CuBICA'), } datasets = [ noisy_2d_fx(100, lambda x: x, [lambda x: x], center, noise_std=0.5), noisy_2d_fx(50, lambda x: x, [lambda x: x, lambda x: -x], center, noise_std=0.5), noisy_2d_fx(50, lambda x: x, [lambda x: x, lambda x: 0], center, noise_std=0.5), ] ndatasets = len(datasets)
def test_hpal_svd_combo(self): # get seed dataset ds4l = datasets['uni4large'] ds_orig = ds4l[:, ds4l.a.nonbogus_features] # XXX Is this SVD mapping required? svm = SVDMapper() svm.train(ds_orig) ds_svs = svm.forward(ds_orig) ds_orig.samples = ds_svs.samples nf_true = ds_orig.nfeatures n = 4 # # of datasets to generate # Adding non-shared dimensions for each subject dss_rotated = [[]]*n for i in range(n): dss_rotated[i] = hstack( (ds_orig, ds4l[:, ds4l.a.bogus_features[i * 4: i * 4 + 4]])) # rotate data nf = dss_rotated[0].nfeatures dss_rotated = [random_affine_transformation(dss_rotated[i]) for i in xrange(n)] # Test if it is close to doing hpal+SVD in sequence outside hpal # First, as we do in sequence outside hpal ha = Hyperalignment() mappers_orig = ha(dss_rotated) dss_back = [m.forward(ds_) for m, ds_ in zip(mappers_orig, dss_rotated)] dss_mean = np.mean([sd.samples for sd in dss_back], axis=0) svm = SVDMapper() svm.train(dss_mean) dss_sv = [svm.forward(sd) for sd in dss_back] # Test for SVD dimensionality reduction even with 2 training subjects for output_dim in [1, 4]: ha = Hyperalignment(output_dim=output_dim) ha.train(dss_rotated[:2]) mappers = ha(dss_rotated) dss_back = [m.forward(ds_) for m, ds_ in zip(mappers, dss_rotated)] for sd in dss_back: assert (sd.nfeatures == output_dim) # Check if combined hpal+SVD works as expected sv_corrs = [] for sd1, sd2 in zip(dss_sv, dss_back): ndcs = np.diag(np.corrcoef(sd1.samples.T, sd2.samples.T)[nf:, :nf], k=0) sv_corrs.append(ndcs) self.assertTrue( np.all(np.abs(np.array(sv_corrs)) >= 0.95), msg="Hyperalignment with dimensionality reduction should have " "reconstructed SVD dataset. Got correlations %s." % sv_corrs) # Check if it recovers original SVs sv_corrs_orig = [] for sd in dss_back: ndcs = np.diag( np.corrcoef(sd.samples.T, ds_orig.samples.T)[nf_true:, :nf_true], k=0) sv_corrs_orig.append(ndcs) self.assertTrue( np.all(np.abs(np.array(sv_corrs_orig)) >= 0.9), msg="Expected original dimensions after " "SVD. Got correlations %s." % sv_corrs_orig)