示例#1
0
def test_mapper_vs_zscore():
    """Test by comparing to results of elderly z-score function
    """
    # data: 40 sample feature line in 20d space (40x20; samples x features)
    dss = [
        dataset_wizard(np.concatenate(
            [np.arange(40) for i in range(20)]).reshape(20,-1).T,
                targets=1, chunks=1),
        ] + datasets.values()

    for ds in dss:
        ds1 = deepcopy(ds)
        ds2 = deepcopy(ds)

        zsm = ZScoreMapper(chunks_attr=None)
        assert_raises(RuntimeError, zsm.forward, ds1.samples)
        idhashes = (idhash(ds1), idhash(ds1.samples))
        zsm.train(ds1)
        idhashes_train = (idhash(ds1), idhash(ds1.samples))
        assert_equal(idhashes, idhashes_train)

        # forward dataset
        ds1z_ds = zsm.forward(ds1)
        idhashes_forwardds = (idhash(ds1), idhash(ds1.samples))
        # must not modify samples in place!
        assert_equal(idhashes, idhashes_forwardds)

        # forward samples explicitly
        ds1z = zsm.forward(ds1.samples)
        idhashes_forward = (idhash(ds1), idhash(ds1.samples))
        assert_equal(idhashes, idhashes_forward)

        zscore(ds2, chunks_attr=None)
        assert_array_almost_equal(ds1z, ds2.samples)
        assert_array_equal(ds1.samples, ds.samples)
示例#2
0
def test_dataset_summary():
    for ds in datasets.values() + [Dataset(np.array([None], dtype=object))]:
        s = ds.summary()
        ok_(s.startswith(str(ds)[1:-1])) # we strip surrounding '<...>'
        # TODO: actual test of what was returned; to do that properly
        #       RF the summary() so it is a dictionary

        summaries = []
        if 'targets' in ds.sa:
            summaries += ['Sequence statistics']
            if 'chunks' in ds.sa:
                summaries += ['Summary for targets', 'Summary for chunks']

        # By default we should get all kinds of summaries
        if not 'Number of unique targets >' in s:
            for summary in summaries:
                ok_(summary in s)

        # If we give "wrong" targets_attr we should see none of summaries
        s2 = ds.summary(targets_attr='bogus')
        for summary in summaries:
            ok_(not summary in s2)
示例#3
0
def test_dataset_summary():
    for ds in datasets.values() + [Dataset(np.array([None], dtype=object))]:
        s = ds.summary()
        ok_(s.startswith(str(ds)[1:-1]))  # we strip surrounding '<...>'
        # TODO: actual test of what was returned; to do that properly
        #       RF the summary() so it is a dictionary

        summaries = []
        if 'targets' in ds.sa:
            summaries += ['Sequence statistics']
            if 'chunks' in ds.sa:
                summaries += ['Summary for targets', 'Summary for chunks']

        # By default we should get all kinds of summaries
        if not 'Number of unique targets >' in s:
            for summary in summaries:
                ok_(summary in s)

        # If we give "wrong" targets_attr we should see none of summaries
        s2 = ds.summary(targets_attr='bogus')
        for summary in summaries:
            ok_(not summary in s2)
示例#4
0
def test_mapper_vs_zscore():
    """Test by comparing to results of elderly z-score function
    """
    # data: 40 sample feature line in 20d space (40x20; samples x features)
    dss = [
        dataset_wizard(np.concatenate([np.arange(40)
                                       for i in range(20)]).reshape(20, -1).T,
                       targets=1,
                       chunks=1),
    ] + datasets.values()

    for ds in dss:
        ds1 = deepcopy(ds)
        ds2 = deepcopy(ds)

        zsm = ZScoreMapper(chunks_attr=None)
        assert_raises(RuntimeError, zsm.forward, ds1.samples)
        idhashes = (idhash(ds1), idhash(ds1.samples))
        zsm.train(ds1)
        idhashes_train = (idhash(ds1), idhash(ds1.samples))
        assert_equal(idhashes, idhashes_train)

        # forward dataset
        ds1z_ds = zsm.forward(ds1)
        idhashes_forwardds = (idhash(ds1), idhash(ds1.samples))
        # must not modify samples in place!
        assert_equal(idhashes, idhashes_forwardds)

        # forward samples explicitly
        ds1z = zsm.forward(ds1.samples)
        idhashes_forward = (idhash(ds1), idhash(ds1.samples))
        assert_equal(idhashes, idhashes_forward)

        zscore(ds2, chunks_attr=None)
        assert_array_almost_equal(ds1z, ds2.samples)
        assert_array_equal(ds1.samples, ds.samples)
示例#5
0
from mvpa2.base import externals

from mvpa2.datasets import dataset_wizard
from mvpa2.measures.fx import BinaryFxFeaturewiseMeasure
from mvpa2.measures.fx import targets_dcorrcoef
if externals.exists('statsmodels'):
    from mvpa2.measures.fx import targets_mutualinfo_kde

from mvpa2.testing import sweepargs
from mvpa2.testing.datasets import datasets as tdatasets
from mvpa2.testing import assert_array_almost_equal, assert_array_less, assert_equal, ok_

if __debug__:
    from mvpa2.base import debug

@sweepargs(ds=iter(tdatasets.values()))
def test_BinaryFxFeatureMeasure(ds):
    if not isinstance(ds.samples, np.ndarray):
        return
    # some simple function
    f = lambda x, y: np.sum((x.T*y).T, axis=0)
    fx = BinaryFxFeaturewiseMeasure(f, uni=False, numeric=True)
    fx_uni = BinaryFxFeaturewiseMeasure(f, uni=True, numeric=True)
    out = fx(ds)
    out_uni = fx_uni(ds)
    assert(len(out) == 1)
    assert_array_almost_equal(out.samples, out_uni)
    assert_equal(out.fa, out_uni.fa)
    ok_(str(fx).startswith("<BinaryFxFeaturewiseMeasure: lambda x, y:"))

_nonlin_tests = [(dataset_wizard([0, 1-0.01, 0, 1],
示例#6
0
        # things that might behave in surprising ways
        # lists -- first axis is samples, hence single feature
        ds = Dataset(range(5))
        assert_equal(ds.nfeatures, 1)
        assert_equal(ds.shape, (5, 1))
        # arrays of objects
        data = np.array([{}, {}])
        ds = Dataset(data)
        assert_equal(ds.shape, (2, 1))
        assert_equal(ds.nsamples, 2)
        # Nothing to index, hence no features
        assert_equal(ds.nfeatures, 1)


@sweepargs(ds=datasets.values() + [
    Dataset(np.array([None], dtype=object)),
    dataset_wizard(np.arange(3), targets=['a', 'bc', 'd'], chunks=np.arange(3)),
    dataset_wizard(np.arange(4), targets=['a', 'bc', 'a', 'bc'], chunks=[1, 1, 2, 2]),
    dataset_wizard(np.arange(4), targets=['a', 'bc', 'a', None], chunks=[1, 1, 2, 2]),
    ])
def test_dataset_summary(ds):
    s = ds.summary()
    ok_(s.startswith(str(ds)[1:-1])) # we strip surrounding '<...>'
    # TODO: actual test of what was returned; to do that properly
    #       RF the summary() so it is a dictionary

    summaries = []
    if 'targets' in ds.sa:
        summaries += ['Sequence statistics']
        if 'chunks' in ds.sa:
示例#7
0
        # things that might behave in surprising ways
        # lists -- first axis is samples, hence single feature
        ds = Dataset(range(5))
        assert_equal(ds.nfeatures, 1)
        assert_equal(ds.shape, (5, 1))
        # arrays of objects
        data = np.array([{}, {}])
        ds = Dataset(data)
        assert_equal(ds.shape, (2, 1))
        assert_equal(ds.nsamples, 2)
        # Nothing to index, hence no features
        assert_equal(ds.nfeatures, 1)


@sweepargs(ds=datasets.values() + [
    Dataset(np.array([None], dtype=object)),
    dataset_wizard(np.arange(3), targets=['a', 'bc', 'd'], chunks=np.arange(3)),
    dataset_wizard(np.arange(4), targets=['a', 'bc', 'a', 'bc'], chunks=[1, 1, 2, 2]),
    dataset_wizard(np.arange(4), targets=['a', 'bc', 'a', None], chunks=[1, 1, 2, 2]),
    ])
def test_dataset_summary(ds):
    s = ds.summary()
    ok_(s.startswith(str(ds)[1:-1])) # we strip surrounding '<...>'
    # TODO: actual test of what was returned; to do that properly
    #       RF the summary() so it is a dictionary

    summaries = []
    if 'targets' in ds.sa:
        summaries += ['Sequence statistics']
        if 'chunks' in ds.sa:
示例#8
0
        # things that might behave in surprising ways
        # lists -- first axis is samples, hence single feature
        ds = Dataset(list(range(5)))
        assert_equal(ds.nfeatures, 1)
        assert_equal(ds.shape, (5, 1))
        # arrays of objects
        data = np.array([{}, {}])
        ds = Dataset(data)
        assert_equal(ds.shape, (2, 1))
        assert_equal(ds.nsamples, 2)
        # Nothing to index, hence no features
        assert_equal(ds.nfeatures, 1)


@sweepargs(ds=list(datasets.values()) + [
    Dataset(np.array([None], dtype=object)),
    dataset_wizard(np.arange(3), targets=['a', 'bc', 'd'], chunks=np.arange(3)),
    dataset_wizard(np.arange(4), targets=['a', 'bc', 'a', 'bc'], chunks=[1, 1, 2, 2]),
    dataset_wizard(np.arange(4), targets=['a', 'bc', 'a', None], chunks=[1, 1, 2, 2]),
    ])
def test_dataset_summary(ds):
    s = ds.summary()
    ok_(s.startswith(str(ds)[1:-1])) # we strip surrounding '<...>'
    # TODO: actual test of what was returned; to do that properly
    #       RF the summary() so it is a dictionary

    summaries = []
    if 'targets' in ds.sa:
        summaries += ['Sequence statistics']
        if 'chunks' in ds.sa: