Exemplo n.º 1
0
 def test_hpal_svd_combo(self):
     # get seed dataset
     ds4l = datasets['uni4large']
     ds_orig = ds4l[:, ds4l.a.nonbogus_features]
     # XXX Is this SVD mapping required?
     svm = SVDMapper()
     svm.train(ds_orig)
     ds_svs = svm.forward(ds_orig)
     ds_orig.samples = ds_svs.samples
     nf_true = ds_orig.nfeatures
     n = 4  # # of datasets to generate
     # Adding non-shared dimensions for each subject
     dss_rotated = [[]] * n
     for i in range(n):
         dss_rotated[i] = hstack(
             (ds_orig, ds4l[:, ds4l.a.bogus_features[i * 4:i * 4 + 4]]))
     # rotate data
     nf = dss_rotated[0].nfeatures
     dss_rotated = [
         random_affine_transformation(dss_rotated[i]) for i in xrange(n)
     ]
     # Test if it is close to doing hpal+SVD in sequence outside hpal
     # First, as we do in sequence outside hpal
     ha = Hyperalignment()
     mappers_orig = ha(dss_rotated)
     dss_back = [
         m.forward(ds_) for m, ds_ in zip(mappers_orig, dss_rotated)
     ]
     dss_mean = np.mean([sd.samples for sd in dss_back], axis=0)
     svm = SVDMapper()
     svm.train(dss_mean)
     dss_sv = [svm.forward(sd) for sd in dss_back]
     # Test for SVD dimensionality reduction even with 2 training subjects
     for output_dim in [1, 4]:
         ha = Hyperalignment(output_dim=output_dim)
         ha.train(dss_rotated[:2])
         mappers = ha(dss_rotated)
         dss_back = [m.forward(ds_) for m, ds_ in zip(mappers, dss_rotated)]
         for sd in dss_back:
             assert (sd.nfeatures == output_dim)
     # Check if combined hpal+SVD works as expected
     sv_corrs = []
     for sd1, sd2 in zip(dss_sv, dss_back):
         ndcs = np.diag(np.corrcoef(sd1.samples.T, sd2.samples.T)[nf:, :nf],
                        k=0)
         sv_corrs.append(ndcs)
     self.assertTrue(
         np.all(np.abs(np.array(sv_corrs)) >= 0.95),
         msg="Hyperalignment with dimensionality reduction should have "
         "reconstructed SVD dataset. Got correlations %s." % sv_corrs)
     # Check if it recovers original SVs
     sv_corrs_orig = []
     for sd in dss_back:
         ndcs = np.diag(np.corrcoef(sd.samples.T,
                                    ds_orig.samples.T)[nf_true:, :nf_true],
                        k=0)
         sv_corrs_orig.append(ndcs)
     self.assertTrue(np.all(np.abs(np.array(sv_corrs_orig)) >= 0.9),
                     msg="Expected original dimensions after "
                     "SVD. Got correlations %s." % sv_corrs_orig)
Exemplo n.º 2
0
def _test_mcasey20120222():  # pragma: no cover
    # http://lists.alioth.debian.org/pipermail/pkg-exppsy-pymvpa/2012q1/002034.html

    # This one is conditioned on allowing # of samples to be changed
    # by the mapper provided to MappedClassifier.  See
    # https://github.com/yarikoptic/PyMVPA/tree/_tent/allow_ch_nsamples

    import numpy as np
    from mvpa2.datasets.base import dataset_wizard
    from mvpa2.generators.partition import NFoldPartitioner
    from mvpa2.mappers.base import ChainMapper
    from mvpa2.mappers.svd import SVDMapper
    from mvpa2.mappers.fx import mean_group_sample
    from mvpa2.clfs.svm import LinearCSVMC
    from mvpa2.clfs.meta import MappedClassifier
    from mvpa2.measures.base import CrossValidation

    mapper = ChainMapper([mean_group_sample(['targets','chunks']),
                          SVDMapper()])
    clf = MappedClassifier(LinearCSVMC(), mapper)
    cvte = CrossValidation(clf, NFoldPartitioner(),
                           enable_ca=['repetition_results', 'stats'])

    ds = dataset_wizard(
        samples=np.arange(32).reshape((8, -1)),
        targets=[1, 1, 2, 2, 1, 1, 2, 2],
        chunks=[1, 1, 1, 1, 2, 2, 2, 2])

    errors = cvte(ds)
Exemplo n.º 3
0
    def test_simple_svd(self):
        pm = SVDMapper()
        # train SVD
        pm.train(self.ndlin)

        self.assertEqual(pm.proj.shape, (20, 20))

        # now project data into PCA space
        p = pm.forward(self.ndlin)

        # only first eigenvalue significant
        self.assertTrue(pm.sv[:1] > 1.0)
        self.assertTrue((pm.sv[1:] < 0.0001).all())

        # only variance of first component significant
        var = p.var(axis=0)

        # test that only one component has variance
        self.assertTrue(var[:1] > 1.0)
        self.assertTrue((var[1:] < 0.0001).all())

        # check that the mapped data can be fully recovered by 'reverse()'
        pr = pm.reverse(p)

        self.assertEqual(pr.shape, (40, 20))
        self.assertTrue(np.abs(pm.reverse(p) - self.ndlin).sum() < 0.0001)
Exemplo n.º 4
0
    def _get_hypesvs(self, sl_connectomes, local_common_model=None):
        '''
        Hyperalign connectomes and return mapppers
        and trained SVDMapper of common space.

        Parameters
        ----------
        sl_connectomes: a list of connectomes to hyperalign
        local_common_model: a reference common model to be used.

        Returns
        -------
        a tuple (sl_hmappers, svm, local_common_model)
        sl_hmappers: a list of mappers corresponding to input list in that order.
        svm: a svm mapper based on the input data. if given a common model, this is None.
        local_common_model: If local_common_model is provided as input, this will be None.
            Otherwise, local_common_model will be computed here and returned.
        '''
        # TODO Should we z-score sl_connectomes?
        return_model = False if self.params.save_model is None else True
        if local_common_model is not None:
            ha = Hyperalignment(level2_niter=0)
            if not is_datasetlike(local_common_model):
                local_common_model = Dataset(samples=local_common_model)
            ha.train([local_common_model])
            sl_hmappers = ha(sl_connectomes)
            return sl_hmappers, None, None
        ha = Hyperalignment()
        sl_hmappers = ha(sl_connectomes)
        sl_connectomes = [
            slhm.forward(slc) for slhm, slc in zip(sl_hmappers, sl_connectomes)
        ]
        _ = [zscore(slc, chunks_attr=None) for slc in sl_connectomes]
        sl_connectomes = np.dstack(sl_connectomes).mean(axis=-1)
        svm = SVDMapper(force_train=True)
        svm.train(sl_connectomes)
        if return_model:
            local_common_model = svm.forward(sl_connectomes)
        else:
            local_common_model = None
        return sl_hmappers, svm, local_common_model
Exemplo n.º 5
0
    def test_simple_svd(self):
        pm = SVDMapper()
        # train SVD
        pm.train(self.ndlin)

        self.failUnlessEqual(pm.proj.shape, (20, 20))

        # now project data into PCA space
        p = pm.forward(self.ndlin)

        # only first eigenvalue significant
        self.failUnless(pm.sv[:1] > 1.0)
        self.failUnless((pm.sv[1:] < 0.0001).all())

        # only variance of first component significant
        var = p.var(axis=0)

       # test that only one component has variance
        self.failUnless(var[:1] > 1.0)
        self.failUnless((var[1:] < 0.0001).all())

        # check that the mapped data can be fully recovered by 'reverse()'
        pr = pm.reverse(p)

        self.failUnlessEqual(pr.shape, (40,20))
        self.failUnless(np.abs(pm.reverse(p) - self.ndlin).sum() < 0.0001)
Exemplo n.º 6
0
    def _get_hypesvs(self, sl_connectomes, local_common_model=None):
        '''
        Hyperalign connectomes and return mapppers
        and trained SVDMapper of common space.

        Parameters
        ----------
        sl_connectomes: a list of connectomes to hyperalign
        local_common_model: a reference common model to be used.

        Returns
        -------
        a tuple (sl_hmappers, svm, local_common_model)
        sl_hmappers: a list of mappers corresponding to input list in that order.
        svm: a svm mapper based on the input data. if given a common model, this is None.
        local_common_model: If local_common_model is provided as input, this will be None.
            Otherwise, local_common_model will be computed here and returned.
        '''
        # TODO Should we z-score sl_connectomes?
        return_model = False if self.params.save_model is None else True
        if local_common_model is not None:
            ha = Hyperalignment(level2_niter=0)
            if not is_datasetlike(local_common_model):
                local_common_model = Dataset(samples=local_common_model)
            ha.train([local_common_model])
            sl_hmappers = ha(sl_connectomes)
            return sl_hmappers, None, None
        ha = Hyperalignment()
        sl_hmappers = ha(sl_connectomes)
        sl_connectomes = [slhm.forward(slc) for slhm, slc in zip(sl_hmappers, sl_connectomes)]
        _ = [zscore(slc, chunks_attr=None) for slc in sl_connectomes]
        sl_connectomes = np.dstack(sl_connectomes).mean(axis=-1)
        svm = SVDMapper(force_train=True)
        svm.train(sl_connectomes)
        if return_model:
            local_common_model = svm.forward(sl_connectomes)
        else:
            local_common_model = None
        return sl_hmappers, svm, local_common_model
Exemplo n.º 7
0
    def test_more_svd(self):
        pm = SVDMapper()
        # train SVD
        pm.train(self.largefeat)

        # mixing matrix cannot be square
        self.assertEqual(pm.proj.shape, (40, 10))

        # only first singular value significant
        self.assertTrue(pm.sv[:1] > 10)
        self.assertTrue((pm.sv[1:] < 10).all())

        # now project data into SVD space
        p = pm.forward(self.largefeat)

        # only variance of first component significant
        var = p.var(axis=0)

        # test that only one component has variance
        self.assertTrue(var[:1] > 1.0)
        self.assertTrue((var[1:] < 0.0001).all())

        # check that the mapped data can be fully recovered by 'reverse()'
        rp = pm.reverse(p)
        self.assertEqual(rp.shape, self.largefeat.shape)
        self.assertTrue((np.round(rp) == self.largefeat).all())

        # copy mapper
        pm2 = deepcopy(pm)

        # now make new random data and do forward->reverse check
        data = np.random.normal(size=(98, 40))
        data_f = pm.forward(data)

        self.assertEqual(data_f.shape, (98, 10))

        data_r = pm.reverse(data_f)
        self.assertEqual(data_r.shape, (98, 40))
Exemplo n.º 8
0
    def test_more_svd(self):
        pm = SVDMapper()
        # train SVD
        pm.train(self.largefeat)

        # mixing matrix cannot be square
        self.failUnlessEqual(pm.proj.shape, (40, 10))

        # only first singular value significant
        self.failUnless(pm.sv[:1] > 10)
        self.failUnless((pm.sv[1:] < 10).all())

        # now project data into SVD space
        p = pm.forward(self.largefeat)

        # only variance of first component significant
        var = p.var(axis=0)

        # test that only one component has variance
        self.failUnless(var[:1] > 1.0)
        self.failUnless((var[1:] < 0.0001).all())

        # check that the mapped data can be fully recovered by 'reverse()'
        rp = pm.reverse(p)
        self.failUnlessEqual(rp.shape, self.largefeat.shape)
        self.failUnless((np.round(rp) == self.largefeat).all())

        # copy mapper
        pm2 = deepcopy(pm)

        # now make new random data and do forward->reverse check
        data = np.random.normal(size=(98,40))
        data_f = pm.forward(data)

        self.failUnlessEqual(data_f.shape, (98,10))

        data_r = pm.reverse(data_f)
        self.failUnlessEqual(data_r.shape, (98,40))
Exemplo n.º 9
0
    def train(self, datasets):
        """Derive a common feature space from a series of datasets.

        Parameters
        ----------
        datasets : sequence of datasets

        Returns
        -------
        A list of trained Mappers matching the number of input datasets.
        """
        params = self.params  # for quicker access ;)
        ca = self.ca
        # Check to make sure we get a list of datasets as input.
        if not isinstance(datasets, (list, tuple, np.ndarray)):
            raise TypeError("Input datasets should be a sequence "
                            "(of type list, tuple, or ndarray) of datasets.")

        ndatasets = len(datasets)
        nfeatures = [ds.nfeatures for ds in datasets]
        alpha = params.alpha

        residuals = None
        if ca['training_residual_errors'].enabled:
            residuals = np.zeros((1 + params.level2_niter, ndatasets))
            ca.training_residual_errors = Dataset(
                samples=residuals,
                sa={
                    'levels':
                    ['1'] + ['2:%i' % i for i in xrange(params.level2_niter)]
                })

        if __debug__:
            debug('HPAL',
                  "Hyperalignment %s for %i datasets" % (self, ndatasets))

        if params.ref_ds is None:
            ref_ds = np.argmax(nfeatures)
        else:
            ref_ds = params.ref_ds
            # Making sure that ref_ds is within range.
            #Parameter() already checks for it being a non-negative integer
            if ref_ds >= ndatasets:
                raise ValueError, "Requested reference dataset %i is out of " \
                      "bounds. We have only %i datasets provided" \
                      % (ref_ds, ndatasets)
        ca.chosen_ref_ds = ref_ds
        # zscore all data sets
        # ds = [ zscore(ds, chunks_attr=None) for ds in datasets]

        # TODO since we are doing in-place zscoring create deep copies
        # of the datasets with pruned targets and shallow copies of
        # the collections (if they would come needed in the transformation)
        # TODO: handle floats and non-floats differently to prevent
        #       waste of memory if there is no need (e.g. no z-scoring)
        #otargets = [ds.sa.targets for ds in datasets]
        datasets = [ds.copy(deep=False) for ds in datasets]
        #datasets = [Dataset(ds.samples.astype(float), sa={'targets': [None] * len(ds)})
        #datasets = [Dataset(ds.samples, sa={'targets': [None] * len(ds)})
        #            for ds in datasets]

        if params.zscore_all:
            if __debug__:
                debug('HPAL', "Z-scoring all datasets")
            for ids in xrange(len(datasets)):
                zmapper = ZScoreMapper(chunks_attr=None)
                zmapper.train(datasets[ids])
                datasets[ids] = zmapper.forward(datasets[ids])

        if alpha < 1:
            datasets, wmappers = self._regularize(datasets, alpha)

        # initial common space is the reference dataset
        commonspace = datasets[ref_ds].samples
        # the reference dataset might have been zscored already, don't do it
        # twice
        if params.zscore_common and not params.zscore_all:
            if __debug__:
                debug(
                    'HPAL_', "Creating copy of a commonspace and assuring "
                    "it is of a floating type")
            commonspace = commonspace.astype(float)
            zscore(commonspace, chunks_attr=None)
        # If there is only one dataset in training phase, there is nothing to be done
        # just use that data as the common space
        if len(datasets) < 2:
            self.commonspace = commonspace
        else:
            # create a mapper per dataset
            # might prefer some other way to initialize... later
            mappers = [deepcopy(params.alignment) for ds in datasets]

            #
            # Level 1 -- initial projection
            #
            lvl1_projdata = self._level1(datasets, commonspace, ref_ds,
                                         mappers, residuals)
            #
            # Level 2 -- might iterate multiple times
            #
            # this is the final common space
            self.commonspace = self._level2(datasets, lvl1_projdata, mappers,
                                            residuals)
        if params.output_dim is not None:
            mappers = self._level3(datasets)
            self._svd_mapper = SVDMapper()
            self._svd_mapper.train(self._map_and_mean(datasets, mappers))
            self._svd_mapper = StaticProjectionMapper(
                proj=self._svd_mapper.proj[:, :params.output_dim])
Exemplo n.º 10
0
from mvpa2.mappers.mdp_adaptor import ICAMapper, PCAMapper
from mvpa2 import cfg

center = [10, 20]
axis_range = 7


##REF: Name was automagically refactored
def plot_proj_dir(p):
    pl.plot([0, p[0, 0]], [0, p[0, 1]], linewidth=3, hold=True, color='y')
    pl.plot([0, p[1, 0]], [0, p[1, 1]], linewidth=3, hold=True, color='k')


mappers = {
    'PCA': PCAMapper(),
    'SVD': SVDMapper(),
    'ICA': ICAMapper(alg='CuBICA'),
}
datasets = [
    noisy_2d_fx(100, lambda x: x, [lambda x: x], center, noise_std=0.5),
    noisy_2d_fx(50,
                lambda x: x, [lambda x: x, lambda x: -x],
                center,
                noise_std=0.5),
    noisy_2d_fx(50,
                lambda x: x, [lambda x: x, lambda x: 0],
                center,
                noise_std=0.5),
]

ndatasets = len(datasets)
Exemplo n.º 11
0
 def test_hpal_svd_combo(self):
     # get seed dataset
     ds4l = datasets['uni4large']
     ds_orig = ds4l[:, ds4l.a.nonbogus_features]
     # XXX Is this SVD mapping required?
     svm = SVDMapper()
     svm.train(ds_orig)
     ds_svs = svm.forward(ds_orig)
     ds_orig.samples = ds_svs.samples
     nf_true = ds_orig.nfeatures
     n = 4  # # of datasets to generate
     # Adding non-shared dimensions for each subject
     dss_rotated = [[]]*n
     for i in range(n):
         dss_rotated[i] = hstack(
             (ds_orig, ds4l[:, ds4l.a.bogus_features[i * 4: i * 4 + 4]]))
     # rotate data
     nf = dss_rotated[0].nfeatures
     dss_rotated = [random_affine_transformation(dss_rotated[i])
                    for i in xrange(n)]
     # Test if it is close to doing hpal+SVD in sequence outside hpal
     # First, as we do in sequence outside hpal
     ha = Hyperalignment()
     mappers_orig = ha(dss_rotated)
     dss_back = [m.forward(ds_)
                 for m, ds_ in zip(mappers_orig, dss_rotated)]
     dss_mean = np.mean([sd.samples for sd in dss_back], axis=0)
     svm = SVDMapper()
     svm.train(dss_mean)
     dss_sv = [svm.forward(sd) for sd in dss_back]
     # Test for SVD dimensionality reduction even with 2 training subjects
     for output_dim in [1, 4]:
         ha = Hyperalignment(output_dim=output_dim)
         ha.train(dss_rotated[:2])
         mappers = ha(dss_rotated)
         dss_back = [m.forward(ds_)
                     for m, ds_ in zip(mappers, dss_rotated)]
         for sd in dss_back:
             assert (sd.nfeatures == output_dim)
     # Check if combined hpal+SVD works as expected
     sv_corrs = []
     for sd1, sd2 in zip(dss_sv, dss_back):
         ndcs = np.diag(np.corrcoef(sd1.samples.T, sd2.samples.T)[nf:, :nf],
                        k=0)
         sv_corrs.append(ndcs)
     self.assertTrue(
         np.all(np.abs(np.array(sv_corrs)) >= 0.95),
         msg="Hyperalignment with dimensionality reduction should have "
             "reconstructed SVD dataset. Got correlations %s."
             % sv_corrs)
     # Check if it recovers original SVs
     sv_corrs_orig = []
     for sd in dss_back:
         ndcs = np.diag(
             np.corrcoef(sd.samples.T, ds_orig.samples.T)[nf_true:, :nf_true],
             k=0)
         sv_corrs_orig.append(ndcs)
     self.assertTrue(
         np.all(np.abs(np.array(sv_corrs_orig)) >= 0.9),
         msg="Expected original dimensions after "
             "SVD. Got correlations %s."
             % sv_corrs_orig)