def test_mapper_vs_zscore(): """Test by comparing to results of elderly z-score function """ # data: 40 sample feature line in 20d space (40x20; samples x features) dss = [ dataset_wizard(np.concatenate( [np.arange(40) for i in range(20)]).reshape(20,-1).T, targets=1, chunks=1), ] + datasets.values() for ds in dss: ds1 = deepcopy(ds) ds2 = deepcopy(ds) zsm = ZScoreMapper(chunks_attr=None) assert_raises(RuntimeError, zsm.forward, ds1.samples) idhashes = (idhash(ds1), idhash(ds1.samples)) zsm.train(ds1) idhashes_train = (idhash(ds1), idhash(ds1.samples)) assert_equal(idhashes, idhashes_train) # forward dataset ds1z_ds = zsm.forward(ds1) idhashes_forwardds = (idhash(ds1), idhash(ds1.samples)) # must not modify samples in place! assert_equal(idhashes, idhashes_forwardds) # forward samples explicitly ds1z = zsm.forward(ds1.samples) idhashes_forward = (idhash(ds1), idhash(ds1.samples)) assert_equal(idhashes, idhashes_forward) zscore(ds2, chunks_attr=None) assert_array_almost_equal(ds1z, ds2.samples) assert_array_equal(ds1.samples, ds.samples)
def test_dataset_summary(): for ds in datasets.values() + [Dataset(np.array([None], dtype=object))]: s = ds.summary() ok_(s.startswith(str(ds)[1:-1])) # we strip surrounding '<...>' # TODO: actual test of what was returned; to do that properly # RF the summary() so it is a dictionary summaries = [] if 'targets' in ds.sa: summaries += ['Sequence statistics'] if 'chunks' in ds.sa: summaries += ['Summary for targets', 'Summary for chunks'] # By default we should get all kinds of summaries if not 'Number of unique targets >' in s: for summary in summaries: ok_(summary in s) # If we give "wrong" targets_attr we should see none of summaries s2 = ds.summary(targets_attr='bogus') for summary in summaries: ok_(not summary in s2)
def test_mapper_vs_zscore(): """Test by comparing to results of elderly z-score function """ # data: 40 sample feature line in 20d space (40x20; samples x features) dss = [ dataset_wizard(np.concatenate([np.arange(40) for i in range(20)]).reshape(20, -1).T, targets=1, chunks=1), ] + datasets.values() for ds in dss: ds1 = deepcopy(ds) ds2 = deepcopy(ds) zsm = ZScoreMapper(chunks_attr=None) assert_raises(RuntimeError, zsm.forward, ds1.samples) idhashes = (idhash(ds1), idhash(ds1.samples)) zsm.train(ds1) idhashes_train = (idhash(ds1), idhash(ds1.samples)) assert_equal(idhashes, idhashes_train) # forward dataset ds1z_ds = zsm.forward(ds1) idhashes_forwardds = (idhash(ds1), idhash(ds1.samples)) # must not modify samples in place! assert_equal(idhashes, idhashes_forwardds) # forward samples explicitly ds1z = zsm.forward(ds1.samples) idhashes_forward = (idhash(ds1), idhash(ds1.samples)) assert_equal(idhashes, idhashes_forward) zscore(ds2, chunks_attr=None) assert_array_almost_equal(ds1z, ds2.samples) assert_array_equal(ds1.samples, ds.samples)
from mvpa2.base import externals from mvpa2.datasets import dataset_wizard from mvpa2.measures.fx import BinaryFxFeaturewiseMeasure from mvpa2.measures.fx import targets_dcorrcoef if externals.exists('statsmodels'): from mvpa2.measures.fx import targets_mutualinfo_kde from mvpa2.testing import sweepargs from mvpa2.testing.datasets import datasets as tdatasets from mvpa2.testing import assert_array_almost_equal, assert_array_less, assert_equal, ok_ if __debug__: from mvpa2.base import debug @sweepargs(ds=iter(tdatasets.values())) def test_BinaryFxFeatureMeasure(ds): if not isinstance(ds.samples, np.ndarray): return # some simple function f = lambda x, y: np.sum((x.T*y).T, axis=0) fx = BinaryFxFeaturewiseMeasure(f, uni=False, numeric=True) fx_uni = BinaryFxFeaturewiseMeasure(f, uni=True, numeric=True) out = fx(ds) out_uni = fx_uni(ds) assert(len(out) == 1) assert_array_almost_equal(out.samples, out_uni) assert_equal(out.fa, out_uni.fa) ok_(str(fx).startswith("<BinaryFxFeaturewiseMeasure: lambda x, y:")) _nonlin_tests = [(dataset_wizard([0, 1-0.01, 0, 1],
# things that might behave in surprising ways # lists -- first axis is samples, hence single feature ds = Dataset(range(5)) assert_equal(ds.nfeatures, 1) assert_equal(ds.shape, (5, 1)) # arrays of objects data = np.array([{}, {}]) ds = Dataset(data) assert_equal(ds.shape, (2, 1)) assert_equal(ds.nsamples, 2) # Nothing to index, hence no features assert_equal(ds.nfeatures, 1) @sweepargs(ds=datasets.values() + [ Dataset(np.array([None], dtype=object)), dataset_wizard(np.arange(3), targets=['a', 'bc', 'd'], chunks=np.arange(3)), dataset_wizard(np.arange(4), targets=['a', 'bc', 'a', 'bc'], chunks=[1, 1, 2, 2]), dataset_wizard(np.arange(4), targets=['a', 'bc', 'a', None], chunks=[1, 1, 2, 2]), ]) def test_dataset_summary(ds): s = ds.summary() ok_(s.startswith(str(ds)[1:-1])) # we strip surrounding '<...>' # TODO: actual test of what was returned; to do that properly # RF the summary() so it is a dictionary summaries = [] if 'targets' in ds.sa: summaries += ['Sequence statistics'] if 'chunks' in ds.sa:
# things that might behave in surprising ways # lists -- first axis is samples, hence single feature ds = Dataset(list(range(5))) assert_equal(ds.nfeatures, 1) assert_equal(ds.shape, (5, 1)) # arrays of objects data = np.array([{}, {}]) ds = Dataset(data) assert_equal(ds.shape, (2, 1)) assert_equal(ds.nsamples, 2) # Nothing to index, hence no features assert_equal(ds.nfeatures, 1) @sweepargs(ds=list(datasets.values()) + [ Dataset(np.array([None], dtype=object)), dataset_wizard(np.arange(3), targets=['a', 'bc', 'd'], chunks=np.arange(3)), dataset_wizard(np.arange(4), targets=['a', 'bc', 'a', 'bc'], chunks=[1, 1, 2, 2]), dataset_wizard(np.arange(4), targets=['a', 'bc', 'a', None], chunks=[1, 1, 2, 2]), ]) def test_dataset_summary(ds): s = ds.summary() ok_(s.startswith(str(ds)[1:-1])) # we strip surrounding '<...>' # TODO: actual test of what was returned; to do that properly # RF the summary() so it is a dictionary summaries = [] if 'targets' in ds.sa: summaries += ['Sequence statistics'] if 'chunks' in ds.sa: