def test_subset(): data = np.array( [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], [16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31], [32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], [48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63]]) # float array doesn't work sm = FeatureSliceMapper(np.ones(16)) assert_raises(IndexError, sm.forward, data) # full mask sm = FeatureSliceMapper(slice(None)) # should not change single samples assert_array_equal(sm.forward(data[0:1].copy()), data[0:1]) # or multi-samples assert_array_equal(sm.forward(data.copy()), data) sm.train(data) # same on reverse assert_array_equal(sm.reverse(data[0:1].copy()), data[0:1]) # or multi-samples assert_array_equal(sm.reverse(data.copy()), data) # identical mappers sm_none = FeatureSliceMapper(slice(None)) sm_int = FeatureSliceMapper(np.arange(16)) sm_bool = FeatureSliceMapper(np.ones(16, dtype='bool')) sms = [sm_none, sm_int, sm_bool] # test subsets sids = [3, 4, 5, 6] bsubset = np.zeros(16, dtype='bool') bsubset[sids] = True subsets = [sids, slice(3, 7), bsubset, [3, 3, 4, 4, 6, 6, 6, 5]] # all test subset result in equivalent masks, hence should do the same to # the mapper and result in identical behavior for st in sms: for i, sub in enumerate(subsets): # shallow copy orig = copy(st) subsm = FeatureSliceMapper(sub) # should do copy-on-write for all important stuff!! assert_true(orig.is_mergable(subsm)) orig += subsm # test if selection did its job if i == 3: # special case of multiplying features assert_array_equal(orig.forward1(data[0].copy()), subsets[i]) else: assert_array_equal(orig.forward1(data[0].copy()), sids) ## all of the above shouldn't change the original mapper #assert_array_equal(sm.get_mask(), np.arange(16)) # check for some bug catcher # no 3D input #assert_raises(IndexError, sm.forward, np.ones((3,2,1))) # no input of wrong length if __debug__: # checked only in __debug__ assert_raises(ValueError, sm.forward, np.ones(4))
def test_subset(): data = np.array( [[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], [16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31], [32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], [48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63]]) # float array doesn't work sm = FeatureSliceMapper(np.ones(16)) assert_raises(IndexError, sm.forward, data) # full mask sm = FeatureSliceMapper(slice(None)) # should not change single samples assert_array_equal(sm.forward(data[0:1].copy()), data[0:1]) # or multi-samples assert_array_equal(sm.forward(data.copy()), data) sm.train(data) # same on reverse assert_array_equal(sm.reverse(data[0:1].copy()), data[0:1]) # or multi-samples assert_array_equal(sm.reverse(data.copy()), data) # identical mappers sm_none = FeatureSliceMapper(slice(None)) sm_int = FeatureSliceMapper(np.arange(16)) sm_bool = FeatureSliceMapper(np.ones(16, dtype='bool')) sms = [sm_none, sm_int, sm_bool] # test subsets sids = [3,4,5,6] bsubset = np.zeros(16, dtype='bool') bsubset[sids] = True subsets = [sids, slice(3,7), bsubset, [3,3,4,4,6,6,6,5]] # all test subset result in equivalent masks, hence should do the same to # the mapper and result in identical behavior for st in sms: for i, sub in enumerate(subsets): # shallow copy orig = copy(st) subsm = FeatureSliceMapper(sub) # should do copy-on-write for all important stuff!! assert_true(orig.is_mergable(subsm)) orig += subsm # test if selection did its job if i == 3: # special case of multiplying features assert_array_equal(orig.forward1(data[0].copy()), subsets[i]) else: assert_array_equal(orig.forward1(data[0].copy()), sids) ## all of the above shouldn't change the original mapper #assert_array_equal(sm.get_mask(), np.arange(16)) # check for some bug catcher # no 3D input #assert_raises(IndexError, sm.forward, np.ones((3,2,1))) # no input of wrong length if __debug__: # checked only in __debug__ assert_raises(ValueError, sm.forward, np.ones(4))
def test_eep_bin(): eb = EEPBin(os.path.join(pymvpa_dataroot, 'eep.bin')) assert_equal(eb.nchannels, 32) assert_equal(eb.nsamples, 2) assert_equal(eb.ntimepoints, 4) assert_true(eb.t0 - eb.dt < 0.00000001) assert_equal(len(eb.channels), 32) assert_equal(eb.data.shape, (2, 32, 4))
def test_chainmapper(): # the chain needs at lest one mapper assert_raises(ValueError, ChainMapper, []) # a typical first mapper is to flatten cm = ChainMapper([FlattenMapper()]) # few container checks assert_equal(len(cm), 1) assert_true(isinstance(cm[0], FlattenMapper)) # now training # come up with data samples_shape = (2, 2, 4) data_shape = (4,) + samples_shape data = np.arange(np.prod(data_shape)).reshape(data_shape) pristinedata = data.copy() target = [ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], [16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31], [32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], [48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63], ] target = np.array(target) # if it is not trained it knows nothing cm.train(data) # a new mapper should appear when doing feature selection cm.append(FeatureSliceMapper(range(1, 16))) assert_equal(cm.forward1(data[0]).shape, (15,)) assert_equal(len(cm), 2) # multiple slicing cm.append(FeatureSliceMapper([9, 14])) assert_equal(cm.forward1(data[0]).shape, (2,)) assert_equal(len(cm), 3) # check reproduction cm_clone = eval(repr(cm)) assert_equal(repr(cm_clone), repr(cm)) # what happens if we retrain the whole beast an same data as before cm.train(data) assert_equal(cm.forward1(data[0]).shape, (2,)) assert_equal(len(cm), 3) # let's map something mdata = cm.forward(data) assert_array_equal(mdata, target[:, [10, 15]]) # and back rdata = cm.reverse(mdata) # original shape assert_equal(rdata.shape, data.shape) # content as far it could be restored assert_array_equal(rdata[rdata > 0], data[rdata > 0]) assert_equal(np.sum(rdata > 0), 8)
def test_attrpermute(): ds = give_data() ds.sa['ids'] = range(len(ds)) pristine_data = ds.samples.copy() permutation = AttributePermutator(['targets', 'ids'], assure=True) pds = permutation(ds) # should not touch the data assert_array_equal(pristine_data, pds.samples) # even keep the very same array assert_true(pds.samples.base is ds.samples) # there is no way that it can be the same attribute assert_false(np.all(pds.sa.ids == ds.sa.ids)) # ids should reflect permutation setup assert_array_equal(pds.sa.targets, ds.sa.targets[pds.sa.ids]) # other attribute should remain intact assert_array_equal(pds.sa.chunks, ds.sa.chunks) # now chunk-wise permutation permutation = AttributePermutator('ids', limit='chunks') pds = permutation(ds) # first ten should remain first ten assert_false(np.any(pds.sa.ids[:10] > 9)) # same thing, but only permute single chunk permutation = AttributePermutator('ids', limit={'chunks': 3}) pds = permutation(ds) # one chunk should change assert_false(np.any(pds.sa.ids[30:40] > 39)) assert_false(np.any(pds.sa.ids[30:40] < 30)) # the rest not assert_array_equal(pds.sa.ids[:30], range(30)) # or a list of chunks permutation = AttributePermutator('ids', limit={'chunks': [3,4]}) pds = permutation(ds) # two chunks should change assert_false(np.any(pds.sa.ids[30:50] > 49)) assert_false(np.any(pds.sa.ids[30:50] < 30)) # the rest not assert_array_equal(pds.sa.ids[:30], range(30)) # and now try generating more permutations nruns = 2 permutation = AttributePermutator(['targets', 'ids'], assure=True, count=nruns) pds = list(permutation.generate(ds)) assert_equal(len(pds), nruns) for p in pds: assert_false(np.all(p.sa.ids == ds.sa.ids)) # permute feature attrs ds.fa['ids'] = range(ds.shape[1]) permutation = AttributePermutator('fa.ids', assure=True) pds = permutation(ds) assert_false(np.all(pds.fa.ids == ds.fa.ids))
def test_chainmapper(): # the chain needs at lest one mapper assert_raises(ValueError, ChainMapper, []) # a typical first mapper is to flatten cm = ChainMapper([FlattenMapper()]) # few container checks assert_equal(len(cm), 1) assert_true(isinstance(cm[0], FlattenMapper)) # now training # come up with data samples_shape = (2, 2, 4) data_shape = (4, ) + samples_shape data = np.arange(np.prod(data_shape)).reshape(data_shape) pristinedata = data.copy() target = [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], [16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31], [32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], [48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63]] target = np.array(target) # if it is not trained it knows nothing cm.train(data) # a new mapper should appear when doing feature selection cm.append(FeatureSliceMapper(range(1, 16))) assert_equal(cm.forward1(data[0]).shape, (15, )) assert_equal(len(cm), 2) # multiple slicing cm.append(FeatureSliceMapper([9, 14])) assert_equal(cm.forward1(data[0]).shape, (2, )) assert_equal(len(cm), 3) # check reproduction cm_clone = eval(repr(cm)) assert_equal(repr(cm_clone), repr(cm)) # what happens if we retrain the whole beast an same data as before cm.train(data) assert_equal(cm.forward1(data[0]).shape, (2, )) assert_equal(len(cm), 3) # let's map something mdata = cm.forward(data) assert_array_equal(mdata, target[:, [10, 15]]) # and back rdata = cm.reverse(mdata) # original shape assert_equal(rdata.shape, data.shape) # content as far it could be restored assert_array_equal(rdata[rdata > 0], data[rdata > 0]) assert_equal(np.sum(rdata > 0), 8)
def test_splitter(): ds = give_data() # split with defaults spl1 = Splitter('chunks') assert_raises(NotImplementedError, spl1, ds) splits = list(spl1.generate(ds)) assert_equal(len(splits), len(ds.sa['chunks'].unique)) for split in splits: # it should have perform basic slicing! assert_true(split.samples.base is ds.samples) assert_equal(len(split.sa['chunks'].unique), 1) assert_true('lastsplit' in split.a) assert_true(splits[-1].a.lastsplit) # now again, more customized spl2 = Splitter('targets', attr_values = [0,1,1,2,3,3,3], count=4, noslicing=True) splits = list(spl2.generate(ds)) assert_equal(len(splits), 4) for split in splits: # it should NOT have perform basic slicing! assert_false(split.samples.base is ds.samples) assert_equal(len(split.sa['targets'].unique), 1) assert_equal(len(split.sa['chunks'].unique), 10) assert_true(splits[-1].a.lastsplit) # two should be identical assert_array_equal(splits[1].samples, splits[2].samples) # now go wild and split by feature attribute ds.fa['roi'] = np.repeat([0,1], 5) # splitter should auto-detect that this is a feature attribute spl3 = Splitter('roi') splits = list(spl3.generate(ds)) assert_equal(len(splits), 2) for split in splits: assert_true(split.samples.base is ds.samples) assert_equal(len(split.fa['roi'].unique), 1) assert_equal(split.shape, (100, 5)) # and finally test chained splitters cspl = ChainNode([spl2, spl3, spl1]) splits = list(cspl.generate(ds)) # 4 target splits and 2 roi splits each and 10 chunks each assert_equal(len(splits), 80)
def test_collections(): sa = SampleAttributesCollection() assert_equal(len(sa), 0) assert_raises(ValueError, sa.__setitem__, 'test', 0) l = range(5) sa['test'] = l # auto-wrapped assert_true(isinstance(sa['test'], ArrayCollectable)) assert_equal(len(sa), 1) # names which are already present in dict interface assert_raises(ValueError, sa.__setitem__, 'values', range(5)) sa_c = copy.deepcopy(sa) assert_equal(len(sa), len(sa_c)) assert_array_equal(sa.test, sa_c.test)
def test_glmnet_r(): # not the perfect dataset with which to test, but # it will do for now. #data = datasets['dumb2'] # for some reason the R code fails with the dumb data data = datasets['chirp_linear'] clf = GLMNET_R() clf.train(data) # prediction has to be almost perfect # test with a correlation pre = clf.predict(data.samples) corerr = corr_error(pre, data.targets) if cfg.getboolean('tests', 'labile', default='yes'): assert_true(corerr < .2)
def test_glmnet_r(): # not the perfect dataset with which to test, but # it will do for now. #data = datasets['dumb2'] # for some reason the R code fails with the dumb data data = datasets['chirp_linear'] clf = GLMNET_R() clf.train(data) # prediction has to be almost perfect # test with a correlation pre = clf.predict(data.samples) corerr = CorrErrorFx()(pre, data.targets) if cfg.getboolean('tests', 'labile', default='yes'): assert_true(corerr < .2)
def test_balancer(): ds = give_data() # only mark the selection in an attribute bal = Balancer() res = bal(ds) # we get a new dataset, with shared samples assert_false(ds is res) assert_true(ds.samples is res.samples.base) # should kick out 2 samples in each chunk of 10 assert_almost_equal(np.mean(res.sa.balanced_set), 0.8) # same as above, but actually apply the selection bal = Balancer(apply_selection=True, count=5) # just run it once res = bal(ds) # we get a new dataset, with shared samples assert_false(ds is res) # should kick out 2 samples in each chunk of 10 assert_equal(len(res), int(0.8 * len(ds))) # now use it as a generator dses = list(bal.generate(ds)) assert_equal(len(dses), 5) # with limit bal = Balancer(limit={'chunks': 3}, apply_selection=True) res = bal(ds) assert_equal(res.sa['chunks'].unique, (3,)) assert_equal(get_nelements_per_value(res.sa.targets).values(), [2] * 4) # fixed amount bal = Balancer(amount=1, limit={'chunks': 3}, apply_selection=True) res = bal(ds) assert_equal(get_nelements_per_value(res.sa.targets).values(), [1] * 4) # fraction bal = Balancer(amount=0.499, limit=None, apply_selection=True) res = bal(ds) assert_array_equal( np.round(np.array(get_nelements_per_value(ds.sa.targets).values()) * 0.5), np.array(get_nelements_per_value(res.sa.targets).values())) # check on feature attribute ds.fa['one'] = np.tile([1,2], 5) ds.fa['chk'] = np.repeat([1,2], 5) bal = Balancer(attr='one', amount=2, limit='chk', apply_selection=True) res = bal(ds) assert_equal(get_nelements_per_value(res.fa.one).values(), [4] * 2)
def test_array_collectable(): c = ArrayCollectable() # empty by default assert_equal(c.name, None) assert_equal(c.value, None) # late assignment c.name = 'somename' assert_raises(ValueError, c._set, 12345) assert_equal(c.value, None) c.value = np.arange(5) assert_equal(c.name, 'somename') assert_array_equal(c.value, np.arange(5)) # immediate content data = np.random.random(size=(3,10)) c = ArrayCollectable(data.copy(), 'myname', "This is a test", length=3) assert_equal(c.name, 'myname') assert_array_equal(c.value, data) assert_equal(c.__doc__, "This is a test") assert_equal(str(c), 'myname') # repr from numpy import array e = eval(repr(c)) assert_equal(e.name, 'myname') assert_array_almost_equal(e.value, data) assert_equal(e.__doc__, "This is a test") # cannot assign array of wrong length assert_raises(ValueError, c._set, np.arange(5)) assert_equal(len(c), 3) # shallow copy DOES create a view of value array c.value = np.arange(3) d = copy.copy(c) assert_true(d.value.base is c.value) # names starting with _ are not allowed assert_raises(ValueError, c._set_name, "_underscore")
def test_array_collectable(): c = ArrayCollectable() # empty by default assert_equal(c.name, None) assert_equal(c.value, None) # late assignment c.name = 'somename' assert_raises(ValueError, c._set, 12345) assert_equal(c.value, None) c.value = np.arange(5) assert_equal(c.name, 'somename') assert_array_equal(c.value, np.arange(5)) # immediate content data = np.random.random(size=(3, 10)) c = ArrayCollectable(data.copy(), 'myname', "This is a test", length=3) assert_equal(c.name, 'myname') assert_array_equal(c.value, data) assert_equal(c.__doc__, "This is a test") assert_equal(str(c), 'myname') # repr from numpy import array e = eval(repr(c)) assert_equal(e.name, 'myname') assert_array_almost_equal(e.value, data) assert_equal(e.__doc__, "This is a test") # cannot assign array of wrong length assert_raises(ValueError, c._set, np.arange(5)) assert_equal(len(c), 3) # shallow copy DOES create a view of value array c.value = np.arange(3) d = copy.copy(c) assert_true(d.value.base is c.value) # names starting with _ are not allowed assert_raises(ValueError, c._set_name, "_underscore")
def test_slicing(self): hs = HalfPartitioner() spl = Splitter(attr='partitions') splits = list(hs.generate(self.data)) for s in splits: # partitioned dataset shared the data assert_true(s.samples.base is self.data.samples) splits = [ list(spl.generate(p)) for p in hs.generate(self.data) ] for s in splits: # we get slicing all the time assert_true(s[0].samples.base.base is self.data.samples) assert_true(s[1].samples.base.base is self.data.samples) spl = Splitter(attr='partitions', noslicing=True) splits = [ list(spl.generate(p)) for p in hs.generate(self.data) ] for s in splits: # we no slicing at all assert_false(s[0].samples.base is self.data.samples) assert_false(s[1].samples.base is self.data.samples) nfs = NFoldPartitioner() spl = Splitter(attr='partitions') splits = [ list(spl.generate(p)) for p in nfs.generate(self.data) ] for i, s in enumerate(splits): # training only first and last split if i == 0 or i == len(splits) - 1: assert_true(s[0].samples.base.base is self.data.samples) else: assert_true(s[0].samples.base is None) # we get slicing all the time assert_true(s[1].samples.base.base is self.data.samples) step_ds = Dataset(np.random.randn(20,2), sa={'chunks': np.tile([0,1], 10)}) oes = OddEvenPartitioner() spl = Splitter(attr='partitions') splits = list(oes.generate(step_ds)) for s in splits: # partitioned dataset shared the data assert_true(s.samples.base is step_ds.samples) splits = [ list(spl.generate(p)) for p in oes.generate(step_ds) ] assert_equal(len(splits), 2) for s in splits: # we get slicing all the time assert_true(s[0].samples.base.base is step_ds.samples) assert_true(s[1].samples.base.base is step_ds.samples)
def test_slicing(self): spl = HalfSplitter() splits = [ (train, test) for (train, test) in spl(self.data) ] for s in splits: # we get slicing all the time assert_true(s[0].samples.base is self.data.samples) assert_true(s[1].samples.base is self.data.samples) spl = HalfSplitter(noslicing=True) splits = [ (train, test) for (train, test) in spl(self.data) ] for s in splits: # we no slicing at all assert_false(s[0].samples.base is self.data.samples) assert_false(s[1].samples.base is self.data.samples) spl = NFoldSplitter() splits = [ (train, test) for (train, test) in spl(self.data) ] for i, s in enumerate(splits): # training only first and last split if i == 0 or i == len(splits) - 1: assert_true(s[0].samples.base is self.data.samples) else: assert_false(s[0].samples.base is self.data.samples) # we get slicing all the time assert_true(s[1].samples.base is self.data.samples) step_ds = Dataset(np.random.randn(20,2), sa={'chunks': np.tile([0,1], 10)}) spl = OddEvenSplitter() splits = [ (train, test) for (train, test) in spl(step_ds) ] assert_equal(len(splits), 2) for s in splits: # we get slicing all the time assert_true(s[0].samples.base is step_ds.samples) assert_true(s[1].samples.base is step_ds.samples)
def test_simpleboxcar(): data = np.atleast_2d(np.arange(10)).T sp = np.arange(10) # check if stupid thing don't work assert_raises(ValueError, BoxcarMapper, sp, 0) # now do an identity transformation bcm = BoxcarMapper(sp, 1) trans = bcm.forward(data) # ,0 is a feature below, so we get explicit 2D out of 1D assert_array_equal(trans[:, 0], data) # now check for illegal boxes if __debug__: # condition is checked only in __debug__ assert_raises(ValueError, BoxcarMapper(sp, 2).train, data) # now something that should work nbox = 9 boxlength = 2 sp = np.arange(nbox) bcm = BoxcarMapper(sp, boxlength) trans = bcm(data) # check that is properly upcasts the dimensionality assert_equal(trans.shape, (nbox, boxlength) + data.shape[1:]) # check actual values, squeezing the last dim for simplicity assert_array_equal(trans.squeeze(), np.vstack((np.arange(9), np.arange(9) + 1)).T) # now test for proper data shape data = np.ones((10, 3, 4, 2)) sp = [2, 4, 3, 5] trans = BoxcarMapper(sp, 4)(data) assert_equal(trans.shape, (4, 4, 3, 4, 2)) # test reverse data = np.arange(240).reshape(10, 3, 4, 2) sp = [2, 4, 3, 5] boxlength = 2 m = BoxcarMapper(sp, boxlength) m.train(data) mp = m.forward(data) assert_equal(mp.shape, (4, 2, 3, 4, 2)) # try full reconstruct mr = m.reverse(mp) # shape has to match assert_equal(mr.shape, (len(sp) * boxlength, ) + data.shape[1:]) # only known samples are part of the results assert_true((mr >= 24).all()) assert_true((mr < 168).all()) # check proper reconstruction of non-conflicting sample assert_array_equal(mr[0].ravel(), np.arange(48, 72)) # check proper reconstruction of samples being part of multiple # mapped samples assert_array_equal(mr[1].ravel(), np.arange(72, 96)) # test reverse of a single sample singlesample = np.arange(48).reshape(2, 3, 4, 2) assert_array_equal(singlesample, m.reverse1(singlesample)) # should not work for shape mismatch, but it does work and is useful when # reverse mapping sample attributes #assert_raises(ValueError, m.reverse, singlesample[0]) # check broadcasting of 'raw' samples into proper boxcars on forward() bc = m.forward1(np.arange(24).reshape(3, 4, 2)) assert_array_equal(bc, np.array(2 * [np.arange(24).reshape(3, 4, 2)]))
def test_chainmapper(): # the chain needs at lest one mapper assert_raises(ValueError, ChainMapper, []) # a typical first mapper is to flatten cm = ChainMapper([FlattenMapper()]) # few container checks assert_equal(len(cm), 1) assert_true(isinstance(cm[0], FlattenMapper)) # now training # come up with data samples_shape = (2, 2, 4) data_shape = (4,) + samples_shape data = np.arange(np.prod(data_shape)).reshape(data_shape) pristinedata = data.copy() target = [ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], [16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31], [32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], [48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63], ] target = np.array(target) # if it is not trained it knows nothing cm.train(data) # a new mapper should appear when doing feature selection cm.append(StaticFeatureSelection(range(1, 16))) assert_equal(cm.forward1(data[0]).shape, (15,)) assert_equal(len(cm), 2) # multiple slicing cm.append(StaticFeatureSelection([9, 14])) assert_equal(cm.forward1(data[0]).shape, (2,)) assert_equal(len(cm), 3) # check reproduction if __debug__: # debug mode needs special test as it enhances the repr output # with module info and id() appendix for objects import mvpa cm_clone = eval(repr(cm)) assert_equal("#".join(repr(cm_clone).split("#")[:-1]), "#".join(repr(cm).split("#")[:-1])) else: cm_clone = eval(repr(cm)) assert_equal(repr(cm_clone), repr(cm)) # what happens if we retrain the whole beast an same data as before cm.train(data) assert_equal(cm.forward1(data[0]).shape, (2,)) assert_equal(len(cm), 3) # let's map something mdata = cm.forward(data) assert_array_equal(mdata, target[:, [10, 15]]) # and back rdata = cm.reverse(mdata) # original shape assert_equal(rdata.shape, data.shape) # content as far it could be restored assert_array_equal(rdata[rdata > 0], data[rdata > 0]) assert_equal(np.sum(rdata > 0), 8) # Lets construct a dataset with mapper assigned and see # if sub-selecting a feature adjusts trailing StaticFeatureSelection # appropriately ds_subsel = Dataset.from_wizard(data, mapper=cm)[:, 1] tail_sfs = ds_subsel.a.mapper[-1] assert_equal(repr(tail_sfs), "StaticFeatureSelection(slicearg=array([14]))")
def test_simpleboxcar(): data = np.atleast_2d(np.arange(10)).T sp = np.arange(10) # check if stupid thing don't work assert_raises(ValueError, BoxcarMapper, sp, 0) # now do an identity transformation bcm = BoxcarMapper(sp, 1) trans = bcm.forward(data) # ,0 is a feature below, so we get explicit 2D out of 1D assert_array_equal(trans[:,0], data) # now check for illegal boxes if __debug__: # condition is checked only in __debug__ assert_raises(ValueError, BoxcarMapper(sp, 2).train, data) # now something that should work nbox = 9 boxlength = 2 sp = np.arange(nbox) bcm = BoxcarMapper(sp, boxlength) trans = bcm(data) # check that is properly upcasts the dimensionality assert_equal(trans.shape, (nbox, boxlength) + data.shape[1:]) # check actual values, squeezing the last dim for simplicity assert_array_equal(trans.squeeze(), np.vstack((np.arange(9), np.arange(9)+1)).T) # now test for proper data shape data = np.ones((10,3,4,2)) sp = [ 2, 4, 3, 5 ] trans = BoxcarMapper(sp, 4)(data) assert_equal(trans.shape, (4,4,3,4,2)) # test reverse data = np.arange(240).reshape(10, 3, 4, 2) sp = [ 2, 4, 3, 5 ] boxlength = 2 m = BoxcarMapper(sp, boxlength) m.train(data) mp = m.forward(data) assert_equal(mp.shape, (4, 2, 3, 4, 2)) # try full reconstruct mr = m.reverse(mp) # shape has to match assert_equal(mr.shape, (len(sp) * boxlength,) + data.shape[1:]) # only known samples are part of the results assert_true((mr >= 24).all()) assert_true((mr < 168).all()) # check proper reconstruction of non-conflicting sample assert_array_equal(mr[0].ravel(), np.arange(48, 72)) # check proper reconstruction of samples being part of multiple # mapped samples assert_array_equal(mr[1].ravel(), np.arange(72, 96)) # test reverse of a single sample singlesample = np.arange(48).reshape(2, 3, 4, 2) assert_array_equal(singlesample, m.reverse1(singlesample)) # should not work for shape mismatch, but it does work and is useful when # reverse mapping sample attributes #assert_raises(ValueError, m.reverse, singlesample[0]) # check broadcasting of 'raw' samples into proper boxcars on forward() bc = m.forward1(np.arange(24).reshape(3, 4, 2)) assert_array_equal(bc, np.array(2 * [np.arange(24).reshape(3, 4, 2)]))
def test_slicing(self): spl = HalfSplitter() splits = [(train, test) for (train, test) in spl(self.data)] for s in splits: # we get slicing all the time assert_true(s[0].samples.base is self.data.samples) assert_true(s[1].samples.base is self.data.samples) spl = HalfSplitter(noslicing=True) splits = [(train, test) for (train, test) in spl(self.data)] for s in splits: # we no slicing at all assert_false(s[0].samples.base is self.data.samples) assert_false(s[1].samples.base is self.data.samples) spl = NFoldSplitter() splits = [(train, test) for (train, test) in spl(self.data)] for i, s in enumerate(splits): # training only first and last split if i == 0 or i == len(splits) - 1: assert_true(s[0].samples.base is self.data.samples) else: assert_false(s[0].samples.base is self.data.samples) # we get slicing all the time assert_true(s[1].samples.base is self.data.samples) step_ds = Dataset(np.random.randn(20, 2), sa={'chunks': np.tile([0, 1], 10)}) spl = OddEvenSplitter() splits = [(train, test) for (train, test) in spl(step_ds)] assert_equal(len(splits), 2) for s in splits: # we get slicing all the time assert_true(s[0].samples.base is step_ds.samples) assert_true(s[1].samples.base is step_ds.samples)
def test_flatten(): samples_shape = (2, 2, 4) data_shape = (4, ) + samples_shape data = np.arange(np.prod(data_shape)).reshape(data_shape).view(myarray) pristinedata = data.copy() target = [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], [16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31], [32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], [48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63]] target = np.array(target).view(myarray) index_target = np.array([[0, 0, 0], [0, 0, 1], [0, 0, 2], [0, 0, 3], [0, 1, 0], [0, 1, 1], [0, 1, 2], [0, 1, 3], [1, 0, 0], [1, 0, 1], [1, 0, 2], [1, 0, 3], [1, 1, 0], [1, 1, 1], [1, 1, 2], [1, 1, 3]]) # array subclass survives ok_(isinstance(data, myarray)) # actually, there should be no difference between a plain FlattenMapper and # a chain that only has a FlattenMapper as the one element for fm in [ FlattenMapper(inspace='voxel'), ChainMapper([ FlattenMapper(inspace='voxel'), FeatureSliceMapper(slice(None)) ]) ]: # not working if untrained assert_raises(RuntimeError, fm.forward1, np.arange(np.sum(samples_shape) + 1)) fm.train(data) ok_(isinstance(fm.forward(data), myarray)) ok_(isinstance(fm.forward1(data[2]), myarray)) assert_array_equal(fm.forward(data), target) assert_array_equal(fm.forward1(data[2]), target[2]) assert_raises(ValueError, fm.forward, np.arange(4)) # all of that leaves that data unmodified assert_array_equal(data, pristinedata) # reverse mapping ok_(isinstance(fm.reverse(target), myarray)) ok_(isinstance(fm.reverse1(target[0]), myarray)) ok_(isinstance(fm.reverse(target[1:2]), myarray)) assert_array_equal(fm.reverse(target), data) assert_array_equal(fm.reverse1(target[0]), data[0]) assert_array_equal(fm.reverse(target[1:2]), data[1:2]) assert_raises(ValueError, fm.reverse, np.arange(14)) # check one dimensional data, treated as scalar samples oned = np.arange(5) fm.train(Dataset(oned)) # needs 2D assert_raises(ValueError, fm.forward, oned) # doesn't match mapper, since Dataset turns `oned` into (5,1) assert_raises(ValueError, fm.forward, oned) assert_equal(Dataset(oned).nfeatures, 1) # try dataset mode, with some feature attribute fattr = np.arange(np.prod(samples_shape)).reshape(samples_shape) ds = Dataset(data, fa={'awesome': fattr.copy()}) assert_equal(ds.samples.shape, data_shape) fm.train(ds) dsflat = fm.forward(ds) ok_(isinstance(dsflat, Dataset)) ok_(isinstance(dsflat.samples, myarray)) assert_array_equal(dsflat.samples, target) assert_array_equal(dsflat.fa.awesome, np.arange(np.prod(samples_shape))) assert_true(isinstance(dsflat.fa['awesome'], ArrayCollectable)) # test index creation assert_array_equal(index_target, dsflat.fa.voxel) # and back revds = fm.reverse(dsflat) ok_(isinstance(revds, Dataset)) ok_(isinstance(revds.samples, myarray)) assert_array_equal(revds.samples, data) assert_array_equal(revds.fa.awesome, fattr) assert_true(isinstance(revds.fa['awesome'], ArrayCollectable)) assert_false('voxel' in revds.fa)
def test_flatten(): samples_shape = (2, 2, 4) data_shape = (4,) + samples_shape data = np.arange(np.prod(data_shape)).reshape(data_shape).view(myarray) pristinedata = data.copy() target = [[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], [16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31], [32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], [48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63]] target = np.array(target).view(myarray) index_target = np.array([[0, 0, 0], [0, 0, 1], [0, 0, 2], [0, 0, 3], [0, 1, 0], [0, 1, 1], [0, 1, 2], [0, 1, 3], [1, 0, 0], [1, 0, 1], [1, 0, 2], [1, 0, 3], [1, 1, 0], [1, 1, 1], [1, 1, 2], [1, 1, 3]]) # array subclass survives ok_(isinstance(data, myarray)) # actually, there should be no difference between a plain FlattenMapper and # a chain that only has a FlattenMapper as the one element for fm in [FlattenMapper(space='voxel'), ChainMapper([FlattenMapper(space='voxel'), StaticFeatureSelection(slice(None))])]: # not working if untrained assert_raises(RuntimeError, fm.forward1, np.arange(np.sum(samples_shape) + 1)) fm.train(data) ok_(isinstance(fm.forward(data), myarray)) ok_(isinstance(fm.forward1(data[2]), myarray)) assert_array_equal(fm.forward(data), target) assert_array_equal(fm.forward1(data[2]), target[2]) assert_raises(ValueError, fm.forward, np.arange(4)) # all of that leaves that data unmodified assert_array_equal(data, pristinedata) # reverse mapping ok_(isinstance(fm.reverse(target), myarray)) ok_(isinstance(fm.reverse1(target[0]), myarray)) ok_(isinstance(fm.reverse(target[1:2]), myarray)) assert_array_equal(fm.reverse(target), data) assert_array_equal(fm.reverse1(target[0]), data[0]) assert_array_equal(fm.reverse(target[1:2]), data[1:2]) assert_raises(ValueError, fm.reverse, np.arange(14)) # check one dimensional data, treated as scalar samples oned = np.arange(5) fm.train(Dataset(oned)) # needs 2D assert_raises(ValueError, fm.forward, oned) # doesn't match mapper, since Dataset turns `oned` into (5,1) assert_raises(ValueError, fm.forward, oned) assert_equal(Dataset(oned).nfeatures, 1) # try dataset mode, with some feature attribute fattr = np.arange(np.prod(samples_shape)).reshape(samples_shape) ds = Dataset(data, fa={'awesome': fattr.copy()}) assert_equal(ds.samples.shape, data_shape) fm.train(ds) dsflat = fm.forward(ds) ok_(isinstance(dsflat, Dataset)) ok_(isinstance(dsflat.samples, myarray)) assert_array_equal(dsflat.samples, target) assert_array_equal(dsflat.fa.awesome, np.arange(np.prod(samples_shape))) assert_true(isinstance(dsflat.fa['awesome'], ArrayCollectable)) # test index creation assert_array_equal(index_target, dsflat.fa.voxel) # and back revds = fm.reverse(dsflat) ok_(isinstance(revds, Dataset)) ok_(isinstance(revds.samples, myarray)) assert_array_equal(revds.samples, data) assert_array_equal(revds.fa.awesome, fattr) assert_true(isinstance(revds.fa['awesome'], ArrayCollectable)) assert_false('voxel' in revds.fa)