Python dataset_wizard示例，mvpa.datasets.dataset_wizard Python示例

示例#1

0

显示文件

文件： bench_pca.py 项目： fabianp/ml-benchmarks

def bench_pymvpa():
#
#       .. PyMVPA ..
#
    from mvpa.mappers.mdp_adaptor import PCAMapper as MVPA_PCA
    from mvpa.datasets import dataset_wizard
    start = datetime.now()
    clf = MVPA_PCA(output_dim=n_components)
    data = dataset_wizard(samples=X)
    clf.train(data)
    return datetime.now() - start

示例#2

0

显示文件

文件： bench_knn.py 项目： Big-Data/ml-benchmarks

def bench_pymvpa(X, y, T, valid):
#
#       .. PyMVPA ..
#
    from mvpa.datasets import dataset_wizard
    from mvpa.clfs import knn as mvpa_knn
    start = datetime.now()
    data = dataset_wizard(X, y)
    mvpa_clf = mvpa_knn.kNN(k=n_neighbors)
    mvpa_clf.train(data)
    score = np.mean(mvpa_clf.predict(T) == valid)
    return score, datetime.now() - start

示例#3

0

显示文件

文件： bench_svm.py 项目： fabianp/ml-benchmarks

def bench_pymvpa(X, y, T, valid):
#
#       .. PyMVPA ..
#
    from mvpa.clfs import svm as mvpa_svm
    from mvpa.datasets import dataset_wizard
    tstart = datetime.now()
    data = dataset_wizard(X, y)
    clf = mvpa_svm.RbfCSVMC(C=1.)
    clf.train(data)
    score = np.mean(clf.predict(T) == valid)
    return score, datetime.now() - tstart

示例#4

0

显示文件

文件： bench_knn.py 项目： WeilerWebServices/scikit-learn

def bench_pymvpa(X, y, T, valid):
    #
    #       .. PyMVPA ..
    #
    from mvpa.datasets import dataset_wizard
    from mvpa.clfs import knn as mvpa_knn
    start = datetime.now()
    data = dataset_wizard(X, y)
    mvpa_clf = mvpa_knn.kNN(k=n_neighbors)
    mvpa_clf.train(data)
    score = np.mean(mvpa_clf.predict(T) == valid)
    return score, datetime.now() - start

示例#5

0

显示文件

文件： bench_svm.py 项目： bdholt1/ml-benchmarks

def bench_pymvpa(X, y, T, valid):
#
#       .. PyMVPA ..
#
    from mvpa.clfs import svm
    from mvpa.datasets import dataset_wizard
    tstart = datetime.now()
    data = dataset_wizard(X, y)
    kernel = svm.RbfSVMKernel(gamma = 1./sigma)
    clf = svm.SVM(C=1., kernel=kernel)
    clf.train(data)
    score = np.mean(clf.predict(T) == valid)
    return score, datetime.now() - tstart

示例#6

0

显示文件

文件： bench_pca.py 项目： bdholt1/ml-benchmarks

def bench_pymvpa(X, y, T, valid):
#
#       .. PyMVPA ..
#
    from mvpa.mappers.mdp_adaptor import PCAMapper
    from mvpa.datasets import dataset_wizard
    start = datetime.now()
    clf = PCAMapper(output_dim=n_components)
    data = dataset_wizard(samples=X)
    clf.train(data)
    delta = datetime.now() - start
    ev = explained_variance(X, clf.proj.T).sum()
    return ev, delta

示例#7

0

显示文件

文件： bench_pca.py 项目： WeilerWebServices/scikit-learn

def bench_pymvpa(X, y, T, valid):
    #
    #       .. PyMVPA ..
    #
    from mvpa.mappers.mdp_adaptor import PCAMapper
    from mvpa.datasets import dataset_wizard
    start = datetime.now()
    clf = PCAMapper(output_dim=n_components)
    data = dataset_wizard(samples=X)
    clf.train(data)
    delta = datetime.now() - start
    ev = explained_variance(X, clf.proj.T).sum()
    return ev, delta

示例#8

0

显示文件

文件： bench_elasticnet.py 项目： luispedro/ml-benchmarks

def bench_pymvpa(X, y, T, valid):
#
#       .. PyMVPA ..
#
    from mvpa.datasets import dataset_wizard
    from mvpa.clfs import glmnet as mvpa_glmnet
    tstart = datetime.now()
    data = dataset_wizard(X, y)
    clf = mvpa_glmnet.GLMNET_R(alpha=.5)
    clf.train(data)
    mse = np.linalg.norm(
        clf.predict(T) - valid, 2) ** 2
    return mse, datetime.now() - tstart

示例#9

0

显示文件

文件： bench_svm.py 项目： WeilerWebServices/scikit-learn

def bench_pymvpa(X, y, T, valid):
#
#       .. PyMVPA ..
#
    from mvpa.clfs import svm
    from mvpa.datasets import dataset_wizard
    tstart = datetime.now()
    data = dataset_wizard(X, y)
    kernel = svm.RbfSVMKernel(gamma=1. / sigma)
    clf = svm.SVM(C=1., kernel=kernel)
    clf.train(data)
    score = np.mean(clf.predict(T) == valid)
    return score, datetime.now() - tstart

示例#10

0

显示文件

文件： bench_lassolars.py 项目： fabianp/ml-benchmarks

def bench_pymvpa(X, y, T, valid):
#
#       .. PyMVPA ..
#

    from mvpa.datasets import dataset_wizard
    from mvpa.clfs import lars as mvpa_lars
    tstart = datetime.now()
    data = dataset_wizard(X, y)
    mvpa_clf = mvpa_lars.LARS()
    mvpa_clf.train(data)
#    BROKEN
#    mvpa_pred = mvpa_clf.predict(X)
    return None, datetime.now() - tstart

示例#11

0

显示文件

文件： bench_lassolars.py 项目： WeilerWebServices/scikit-learn

def bench_pymvpa(X, y, T, valid):
    #
    #       .. PyMVPA ..
    #
    from mvpa.datasets import dataset_wizard
    from mvpa.clfs import lars
    start = datetime.now()
    data = dataset_wizard(X, y)
    clf = lars.LARS(model_type="lasso")
    clf.train(data)
    pred = clf.predict(T)
    delta = datetime.now() - start
    mse = np.linalg.norm(pred - valid, 2)**2
    return mse, delta

示例#12

0

显示文件

def bench_pymvpa(X, y, T, valid):
    #
    #       .. PyMVPA ..
    #
    from mvpa.datasets import dataset_wizard
    from mvpa.clfs import glmnet
    start = datetime.now()
    data = dataset_wizard(X, y)
    clf = glmnet.GLMNET_R(alpha=.5)
    clf.train(data)
    pred = clf.predict(T)
    delta = datetime.now() - start
    mse = np.linalg.norm(pred - valid, 2)**2
    return mse, delta

示例#13

0

显示文件

        def _call(self, ds_):
            """Extract weights from GPR

            .. note:
              Input dataset is not actually used. New dataset is
              constructed from what is known to the classifier
            """

            clf = self.clf
            # normalize data:
            clf._train_labels = (clf._train_labels - clf._train_labels.mean()) \
                                / clf._train_labels.std()
            # clf._train_fv = (clf._train_fv-clf._train_fv.mean(0)) \
            #                  /clf._train_fv.std(0)
            ds = dataset_wizard(samples=clf._train_fv,
                                targets=clf._train_labels)
            clf.ca.enable("log_marginal_likelihood")
            ms = ModelSelector(clf, ds)
            # Note that some kernels does not have gradient yet!
            # XXX Make it initialize to clf's current hyperparameter values
            #     or may be add ability to specify starting points in the constructor
            sigma_noise_initial = 1.0e-5
            sigma_f_initial = 1.0
            length_scale_initial = np.ones(ds.nfeatures) * 1.0e4
            # length_scale_initial = np.random.rand(ds.nfeatures)*1.0e4
            hyp_initial_guess = np.hstack(
                [sigma_noise_initial, sigma_f_initial, length_scale_initial])
            fixedHypers = array([0] * hyp_initial_guess.size, dtype=bool)
            fixedHypers = None
            problem = ms.max_log_marginal_likelihood(
                hyp_initial_guess=hyp_initial_guess,
                optimization_algorithm="scipy_lbfgsb",
                ftol=1.0e-3,
                fixedHypers=fixedHypers,
                use_gradient=True,
                logscale=True)
            if __debug__ and 'GPR_WEIGHTS' in debug.active:
                problem.iprint = 1
            lml = ms.solve()
            weights = 1.0 / ms.hyperparameters_best[
                2:]  # weight = 1/length_scale
            if __debug__:
                debug(
                    "GPR", "%s, train: shape %s, labels %s, min:max %g:%g, "
                    "sigma_noise %g, sigma_f %g" %
                    (clf, clf._train_fv.shape, np.unique(clf._train_labels),
                     clf._train_fv.min(), clf._train_fv.max(),
                     ms.hyperparameters_best[0], ms.hyperparameters_best[1]))

            return weights

示例#14

0

显示文件

文件： bench_lassolars.py 项目： NelleV/ml-benchmarks

def bench_pymvpa(X, y, T, valid):
    #
    #       .. PyMVPA ..
    #
    from mvpa.datasets import dataset_wizard
    from mvpa.clfs import lars

    start = datetime.now()
    data = dataset_wizard(X, y)
    clf = lars.LARS(model_type="lasso")
    clf.train(data)
    pred = clf.predict(T)
    delta = datetime.now() - start
    mse = np.linalg.norm(pred - valid, 2) ** 2
    return mse, delta

示例#15

0

显示文件

文件： gpr.py 项目： B-Rich/PyMVPA

        def _call(self, ds_):
            """Extract weights from GPR

            .. note:
              Input dataset is not actually used. New dataset is
              constructed from what is known to the classifier
            """

            clf = self.clf
            # normalize data:
            clf._train_labels = (clf._train_labels - clf._train_labels.mean()) \
                                / clf._train_labels.std()
            # clf._train_fv = (clf._train_fv-clf._train_fv.mean(0)) \
            #                  /clf._train_fv.std(0)
            ds = dataset_wizard(samples=clf._train_fv, targets=clf._train_labels)
            clf.ca.enable("log_marginal_likelihood")
            ms = ModelSelector(clf, ds)
            # Note that some kernels does not have gradient yet!
            # XXX Make it initialize to clf's current hyperparameter values
            #     or may be add ability to specify starting points in the constructor
            sigma_noise_initial = 1.0e-5
            sigma_f_initial = 1.0
            length_scale_initial = np.ones(ds.nfeatures)*1.0e4
            # length_scale_initial = np.random.rand(ds.nfeatures)*1.0e4
            hyp_initial_guess = np.hstack([sigma_noise_initial,
                                          sigma_f_initial,
                                          length_scale_initial])
            fixedHypers = array([0]*hyp_initial_guess.size, dtype=bool)
            fixedHypers = None
            problem =  ms.max_log_marginal_likelihood(
                hyp_initial_guess=hyp_initial_guess,
                optimization_algorithm="scipy_lbfgsb",
                ftol=1.0e-3, fixedHypers=fixedHypers,
                use_gradient=True, logscale=True)
            if __debug__ and 'GPR_WEIGHTS' in debug.active:
                problem.iprint = 1
            lml = ms.solve()
            weights = 1.0/ms.hyperparameters_best[2:] # weight = 1/length_scale
            if __debug__:
                debug("GPR",
                      "%s, train: shape %s, labels %s, min:max %g:%g, "
                      "sigma_noise %g, sigma_f %g" %
                      (clf, clf._train_fv.shape, np.unique(clf._train_labels),
                       clf._train_fv.min(), clf._train_fv.max(),
                       ms.hyperparameters_best[0], ms.hyperparameters_best[1]))

            return weights

示例#16

0

显示文件

文件： test_mapper_sp.py 项目： arokem/PyMVPA

def test_polydetrend():
    samples_forwhole = np.array( [[1.0, 2, 3, 4, 5, 6],
                                 [-2.0, -4, -6, -8, -10, -12]], ndmin=2 ).T
    samples_forchunks = np.array( [[1.0, 2, 3, 3, 2, 1],
                                  [-2.0, -4, -6, -6, -4, -2]], ndmin=2 ).T
    chunks = [0, 0, 0, 1, 1, 1]
    chunks_bad = [ 0, 0, 1, 1, 1, 0]
    target_whole = np.array( [[-3.0, -2, -1, 1, 2, 3],
                             [-6, -4, -2,  2, 4, 6]], ndmin=2 ).T
    target_chunked = np.array( [[-1.0, 0, 1, 1, 0, -1],
                               [2, 0, -2, -2, 0, 2]], ndmin=2 ).T


    ds = Dataset(samples_forwhole)

    # this one will auto-train the mapper on first use
    dm = PolyDetrendMapper(polyord=1, inspace='police')
    mds = dm(ds)
    # features are linear trends, so detrending should remove all
    assert_array_almost_equal(mds.samples, np.zeros(mds.shape))
    # we get the information where each sample is assumed to be in the
    # space spanned by the polynomials
    assert_array_equal(mds.sa.police, np.arange(len(ds)))

    # hackish way to get the previous regressors into a dataset
    ds.sa['opt_reg_const'] = dm._regs[:,0]
    ds.sa['opt_reg_lin'] = dm._regs[:,1]
    # using these precomputed regressors, we should get the same result as
    # before even if we do not generate a regressor for linear
    dm_optreg = PolyDetrendMapper(polyord=0,
                                  opt_regs=['opt_reg_const', 'opt_reg_lin'])
    mds_optreg = dm_optreg(ds)
    assert_array_almost_equal(mds_optreg, np.zeros(mds.shape))


    ds = Dataset(samples_forchunks)
    # 'constant' detrending removes the mean
    mds = PolyDetrendMapper(polyord=0)(ds)
    assert_array_almost_equal(
            mds.samples,
            samples_forchunks - np.mean(samples_forchunks, axis=0))
    # if there is no GLOBAL linear trend it should be identical to mean removal
    # even if trying to remove linear
    mds2 = PolyDetrendMapper(polyord=1)(ds)
    assert_array_almost_equal(mds, mds2)

    # chunk-wise detrending
    ds = dataset_wizard(samples_forchunks, chunks=chunks)
    dm = PolyDetrendMapper(chunks_attr='chunks', polyord=1, inspace='police')
    mds = dm(ds)
    # features are chunkswise linear trends, so detrending should remove all
    assert_array_almost_equal(mds.samples, np.zeros(mds.shape))
    # we get the information where each sample is assumed to be in the
    # space spanned by the polynomials, which is the identical linspace in both
    # chunks
    assert_array_equal(mds.sa.police, range(3) * 2)
    # non-matching number of samples cannot be mapped
    assert_raises(ValueError, dm, ds[:-1])
    # however, if the dataset knows about the space it is possible
    ds.sa['police'] = mds.sa.police
    # XXX this should be
    #mds2 = dm(ds[1:-1])
    #assert_array_equal(mds[1:-1], mds2)
    # XXX but right now is
    assert_raises(NotImplementedError, dm, ds[1:-1])

    # Detrend must preserve the size of dataset
    assert_equal(mds.shape, ds.shape)

    # small additional test for break points
    # although they are no longer there
    ds = dataset_wizard(np.array([[1.0, 2, 3, 1, 2, 3]], ndmin=2).T,
                 targets=chunks, chunks=chunks)
    mds = PolyDetrendMapper(chunks_attr='chunks', polyord=1)(ds)
    assert_array_almost_equal(mds.samples, np.zeros(mds.shape))

    # test of different polyord on each chunk
    target_mixed = np.array( [[-1.0, 0, 1, 0, 0, 0],
                             [2.0, 0, -2, 0, 0, 0]], ndmin=2 ).T
    ds = dataset_wizard(samples_forchunks.copy(), targets=chunks, chunks=chunks)
    mds = PolyDetrendMapper(chunks_attr='chunks', polyord=[0,1])(ds)
    assert_array_almost_equal(mds, target_mixed)

    # test irregluar spacing of samples, but with corrective time info
    samples_forwhole = np.array( [[1.0, 4, 6, 8, 2, 9],
                                 [-2.0, -8, -12, -16, -4, -18]], ndmin=2 ).T
    ds = Dataset(samples_forwhole, sa={'time': samples_forwhole[:,0]})
    # linear detrending that makes use of temporal info from dataset
    dm = PolyDetrendMapper(polyord=1, inspace='time')
    mds = dm(ds)
    assert_array_almost_equal(mds.samples, np.zeros(mds.shape))

    # and now the same stuff, but with chunking and ordered by time
    samples_forchunks = np.array( [[1.0, 3, 3, 2, 2, 1],
                                  [-2.0, -6, -6, -4, -4, -2]], ndmin=2 ).T
    chunks = [0, 1, 0, 1, 0, 1]
    time = [4, 4, 12, 8, 8, 12]
    ds = Dataset(samples_forchunks.copy(), sa={'chunks': chunks, 'time': time})
    mds = PolyDetrendMapper(chunks_attr='chunks', polyord=1, inspace='time')(ds)

    # the whole thing must not affect the source data
    assert_array_equal(ds, samples_forchunks)
    # but if done inplace that is no longer true
    poly_detrend(ds, chunks_attr='chunks', polyord=1, inspace='time')
    assert_array_equal(ds, mds)

示例#17

0

显示文件

def test_polydetrend():
    samples_forwhole = np.array(
        [[1.0, 2, 3, 4, 5, 6], [-2.0, -4, -6, -8, -10, -12]], ndmin=2).T
    samples_forchunks = np.array(
        [[1.0, 2, 3, 3, 2, 1], [-2.0, -4, -6, -6, -4, -2]], ndmin=2).T
    chunks = [0, 0, 0, 1, 1, 1]
    chunks_bad = [0, 0, 1, 1, 1, 0]
    target_whole = np.array([[-3.0, -2, -1, 1, 2, 3], [-6, -4, -2, 2, 4, 6]],
                            ndmin=2).T
    target_chunked = np.array([[-1.0, 0, 1, 1, 0, -1], [2, 0, -2, -2, 0, 2]],
                              ndmin=2).T

    ds = Dataset(samples_forwhole)

    # this one will auto-train the mapper on first use
    dm = PolyDetrendMapper(polyord=1, inspace='police')
    mds = dm(ds)
    # features are linear trends, so detrending should remove all
    assert_array_almost_equal(mds.samples, np.zeros(mds.shape))
    # we get the information where each sample is assumed to be in the
    # space spanned by the polynomials
    assert_array_equal(mds.sa.police, np.arange(len(ds)))

    # hackish way to get the previous regressors into a dataset
    ds.sa['opt_reg_const'] = dm._regs[:, 0]
    ds.sa['opt_reg_lin'] = dm._regs[:, 1]
    # using these precomputed regressors, we should get the same result as
    # before even if we do not generate a regressor for linear
    dm_optreg = PolyDetrendMapper(polyord=0,
                                  opt_regs=['opt_reg_const', 'opt_reg_lin'])
    mds_optreg = dm_optreg(ds)
    assert_array_almost_equal(mds_optreg, np.zeros(mds.shape))

    ds = Dataset(samples_forchunks)
    # 'constant' detrending removes the mean
    mds = PolyDetrendMapper(polyord=0)(ds)
    assert_array_almost_equal(
        mds.samples, samples_forchunks - np.mean(samples_forchunks, axis=0))
    # if there is no GLOBAL linear trend it should be identical to mean removal
    # even if trying to remove linear
    mds2 = PolyDetrendMapper(polyord=1)(ds)
    assert_array_almost_equal(mds, mds2)

    # chunk-wise detrending
    ds = dataset_wizard(samples_forchunks, chunks=chunks)
    dm = PolyDetrendMapper(chunks_attr='chunks', polyord=1, inspace='police')
    mds = dm(ds)
    # features are chunkswise linear trends, so detrending should remove all
    assert_array_almost_equal(mds.samples, np.zeros(mds.shape))
    # we get the information where each sample is assumed to be in the
    # space spanned by the polynomials, which is the identical linspace in both
    # chunks
    assert_array_equal(mds.sa.police, range(3) * 2)
    # non-matching number of samples cannot be mapped
    assert_raises(ValueError, dm, ds[:-1])
    # however, if the dataset knows about the space it is possible
    ds.sa['police'] = mds.sa.police
    # XXX this should be
    #mds2 = dm(ds[1:-1])
    #assert_array_equal(mds[1:-1], mds2)
    # XXX but right now is
    assert_raises(NotImplementedError, dm, ds[1:-1])

    # Detrend must preserve the size of dataset
    assert_equal(mds.shape, ds.shape)

    # small additional test for break points
    # although they are no longer there
    ds = dataset_wizard(np.array([[1.0, 2, 3, 1, 2, 3]], ndmin=2).T,
                        targets=chunks,
                        chunks=chunks)
    mds = PolyDetrendMapper(chunks_attr='chunks', polyord=1)(ds)
    assert_array_almost_equal(mds.samples, np.zeros(mds.shape))

    # test of different polyord on each chunk
    target_mixed = np.array([[-1.0, 0, 1, 0, 0, 0], [2.0, 0, -2, 0, 0, 0]],
                            ndmin=2).T
    ds = dataset_wizard(samples_forchunks.copy(),
                        targets=chunks,
                        chunks=chunks)
    mds = PolyDetrendMapper(chunks_attr='chunks', polyord=[0, 1])(ds)
    assert_array_almost_equal(mds, target_mixed)

    # test irregluar spacing of samples, but with corrective time info
    samples_forwhole = np.array(
        [[1.0, 4, 6, 8, 2, 9], [-2.0, -8, -12, -16, -4, -18]], ndmin=2).T
    ds = Dataset(samples_forwhole, sa={'time': samples_forwhole[:, 0]})
    # linear detrending that makes use of temporal info from dataset
    dm = PolyDetrendMapper(polyord=1, inspace='time')
    mds = dm(ds)
    assert_array_almost_equal(mds.samples, np.zeros(mds.shape))

    # and now the same stuff, but with chunking and ordered by time
    samples_forchunks = np.array(
        [[1.0, 3, 3, 2, 2, 1], [-2.0, -6, -6, -4, -4, -2]], ndmin=2).T
    chunks = [0, 1, 0, 1, 0, 1]
    time = [4, 4, 12, 8, 8, 12]
    ds = Dataset(samples_forchunks.copy(), sa={'chunks': chunks, 'time': time})
    mds = PolyDetrendMapper(chunks_attr='chunks', polyord=1,
                            inspace='time')(ds)

    # the whole thing must not affect the source data
    assert_array_equal(ds, samples_forchunks)
    # but if done inplace that is no longer true
    poly_detrend(ds, chunks_attr='chunks', polyord=1, inspace='time')
    assert_array_equal(ds, mds)

示例#18

0

显示文件

文件： test_generators.py 项目： esc/PyMVPA

def give_data():
    # 100x10, 10 chunks, 4 targets
    return dataset_wizard(np.random.normal(size=(100,10)),
                          targets=[ i%4 for i in range(100) ],
                          chunks=[ i/10 for i in range(100)])

示例#19

0

显示文件

def test_erdataset():
    # 3 chunks, 5 targets, blocks of 5 samples each
    nchunks = 3
    ntargets = 5
    blocklength = 5
    nfeatures = 10
    targets = np.tile(np.repeat(range(ntargets), blocklength), nchunks)
    chunks = np.repeat(np.arange(nchunks), ntargets * blocklength)
    samples = np.repeat(
                np.arange(nchunks * ntargets * blocklength),
                nfeatures).reshape(-1, nfeatures)
    ds = dataset_wizard(samples, targets=targets, chunks=chunks)
    # check if events are determined properly
    evs = find_events(targets=ds.sa.targets, chunks=ds.sa.chunks)
    for ev in evs:
        assert_equal(ev['duration'], blocklength)
    assert_equal(ntargets * nchunks, len(evs))
    for t in range(ntargets):
        assert_equal(len([ev for ev in evs if ev['targets'] == t]),
                     nchunks)
    # now turn `ds` into an eventreleated dataset
    erds = eventrelated_dataset(ds, evs)
    # the only unprefixed sample attributes are 
    assert_equal(sorted([a for a in ds.sa if not a.startswith('event')]),
                 ['chunks', 'targets'])
    # samples as expected?
    assert_array_equal(erds.samples[0],
                       np.repeat(np.arange(blocklength), nfeatures))
    # that should also be the temporal feature offset
    assert_array_equal(erds.samples[0], erds.fa.event_offsetidx)
    assert_array_equal(erds.sa.event_onsetidx, np.arange(0,71,5))
    # finally we should see two mappers
    assert_equal(len(erds.a.mapper), 2)
    assert_true(isinstance(erds.a.mapper[0], BoxcarMapper))
    assert_true(isinstance(erds.a.mapper[1], FlattenMapper))
    #
    # now check the same dataset with event descretization
    tr = 2.5
    ds.sa['time'] = np.arange(nchunks * ntargets * blocklength) * tr
    evs = [{'onset': 4.9, 'duration': 6.2}]
    # doesn't work without conversion
    assert_raises(ValueError, eventrelated_dataset, ds, evs)
    erds = eventrelated_dataset(ds, evs, time_attr='time')
    assert_equal(len(erds), 1)
    assert_array_equal(erds.samples[0], np.repeat(np.arange(1,5), nfeatures))
    assert_array_equal(erds.sa.orig_onset, [evs[0]['onset']])
    assert_array_equal(erds.sa.orig_duration, [evs[0]['duration']])
    assert_array_almost_equal(erds.sa.orig_offset, [2.4])
    assert_array_equal(erds.sa.time, [np.arange(2.5, 11, 2.5)])
    # now with closest match
    erds = eventrelated_dataset(ds, evs, time_attr='time', match='closest')
    expected_nsamples = 3
    assert_equal(len(erds), 1)
    assert_array_equal(erds.samples[0],
                       np.repeat(np.arange(2,2+expected_nsamples),
                                nfeatures))
    assert_array_equal(erds.sa.orig_onset, [evs[0]['onset']])
    assert_array_equal(erds.sa.orig_duration, [evs[0]['duration']])
    assert_array_almost_equal(erds.sa.orig_offset, [-0.1])
    assert_array_equal(erds.sa.time, [np.arange(5.0, 11, 2.5)])
    # now test the way back
    results = np.arange(erds.nfeatures)
    assert_array_equal(erds.a.mapper.reverse1(results),
                       results.reshape(expected_nsamples, nfeatures))
    # what about multiple results?
    nresults = 5
    results = dataset_wizard([results] * nresults)
    # and let's have an attribute to make it more difficult
    results.sa['myattr'] = np.arange(5)
    rds = erds.a.mapper.reverse(results)
    assert_array_equal(rds,
                       results.samples.reshape(nresults * expected_nsamples,
                                               nfeatures))
    assert_array_equal(rds.sa.myattr, np.repeat(results.sa.myattr,
                                               expected_nsamples))

示例#20

0

显示文件

文件： hyperalignment.py 项目： B-Rich/PyMVPA

    def __call__(self, datasets):
        """Estimate mappers for each dataset

        Parameters
        ----------
          datasets : list or tuple of datasets

        Returns
        -------
        A list of trained Mappers of the same length as datasets
        """
        params = self.params            # for quicker access ;)
        ca = self.ca
        ndatasets = len(datasets)
        nfeatures = [ds.nfeatures for ds in datasets]

        residuals = None
        if ca['residual_errors'].enabled:
            residuals = np.zeros((2 + params.level2_niter, ndatasets))
            ca.residual_errors = Dataset(
                samples = residuals,
                sa = {'levels' :
                       ['1'] +
                       ['2:%i' % i for i in xrange(params.level2_niter)] +
                       ['3']})

        if __debug__:
            debug('HPAL', "Hyperalignment %s for %i datasets"
                  % (self, ndatasets))

        if params.ref_ds is None:
            ref_ds = np.argmax(nfeatures)
        else:
            ref_ds = params.ref_ds
            if ref_ds < 0 and ref_ds >= ndatasets:
                raise ValueError, "Requested reference dataset %i is out of " \
                      "bounds. We have only %i datasets provided" \
                      % (ref_ds, ndatasets)
        ca.choosen_ref_ds = ref_ds
        # might prefer some other way to initialize... later
        mappers = [deepcopy(params.alignment) for ds in datasets]
        # zscore all data sets
        # ds = [ zscore(ds, chunks_attr=None) for ds in datasets]

        # Level 1 (first)
        commonspace = np.asanyarray(datasets[ref_ds])
        if params.zscore_common:
            zscore(commonspace, chunks_attr=None)
        data_mapped = [np.asanyarray(ds) for ds in datasets]
        for i, (m, data) in enumerate(zip(mappers, data_mapped)):
            if __debug__:
                debug('HPAL_', "Level 1: ds #%i" % i)
            if i == ref_ds:
                continue
            #ZSC zscore(data, chunks_attr=None)
            ds = dataset_wizard(samples=data, targets=commonspace)
            #ZSC zscore(ds, chunks_attr=None)
            m.train(ds)
            data_temp = m.forward(data)
            #ZSC zscore(data_temp, chunks_attr=None)
            data_mapped[i] = data_temp

            if residuals is not None:
                residuals[0, i] = np.linalg.norm(data_temp - commonspace)

            ## if ds_mapped == []:
            ##     ds_mapped = [zscore(m.forward(d), chunks_attr=None)]
            ## else:
            ##     ds_mapped += [zscore(m.forward(d), chunks_attr=None)]

            # zscore before adding
            # TODO: make just a function so we dont' waste space
            commonspace = params.combiner1(data_mapped[i], commonspace)
            if params.zscore_common:
                zscore(commonspace, chunks_attr=None)

        # update commonspace to mean of ds_mapped
        commonspace = params.combiner2(data_mapped)
        if params.zscore_common:
            zscore(commonspace, chunks_attr=None)

        # Level 2 -- might iterate multiple times
        for loop in xrange(params.level2_niter):
            for i, (m, ds) in enumerate(zip(mappers, datasets)):
                if __debug__:
                    debug('HPAL_', "Level 2 (%i-th iteration): ds #%i" % (loop, i))

                ## ds_temp = zscore( (commonspace*ndatasets - ds_mapped[i])
                ##                   /(ndatasets-1), chunks_attr=None )
                ds_new = ds.copy()
                #ZSC zscore(ds_new, chunks_attr=None)
                #PRJ ds_temp = (commonspace*ndatasets - ds_mapped[i])/(ndatasets-1)
                #ZSC zscore(ds_temp, chunks_attr=None)
                ds_new.targets = commonspace #PRJ ds_temp
                m.train(ds_new) # ds_temp)
                data_mapped[i] = m.forward(np.asanyarray(ds))
                if residuals is not None:
                    residuals[1+loop, i] = np.linalg.norm(data_mapped - commonspace)

                #ds_mapped[i] = zscore( m.forward(ds_temp), chunks_attr=None)

            commonspace = params.combiner2(data_mapped)
            if params.zscore_common:
                zscore(commonspace, chunks_attr=None)

        # Level 3 (last) to params.levels
        for i, (m, ds) in enumerate(zip(mappers, datasets)):
            if __debug__:
                debug('HPAL_', "Level 3: ds #%i" % i)

            ## ds_temp = zscore( (commonspace*ndatasets - ds_mapped[i])
            ##                   /(ndatasets-1), chunks_attr=None )
            ds_new = ds.copy()     # shallow copy so we could assign new labels
            #ZSC zscore(ds_new, chunks_attr=None)
            #PRJ ds_temp = (commonspace*ndatasets - ds_mapped[i])/(ndatasets-1)
            #ZSC zscore(ds_temp, chunks_attr=None)
            ds_new.targets = commonspace #PRJ ds_temp#
            m.train(ds_new) #ds_temp)

            if residuals is not None:
                data_mapped = m.forward(ds_new)
                residuals[-1, i] = np.linalg.norm(data_mapped - commonspace)

        return mappers