Exemplo n.º 1
0
def test_repr():
    # this time give mask only by its target length
    sm = StaticFeatureSelection(slice(None), space='myspace')

    # check reproduction
    sm_clone = eval(repr(sm))
    assert_equal(repr(sm_clone), repr(sm))
Exemplo n.º 2
0
 def test_slicing(self):
     spl = HalfSplitter()
     splits = [(train, test) for (train, test) in spl(self.data)]
     for s in splits:
         # we get slicing all the time
         assert_true(s[0].samples.base is self.data.samples)
         assert_true(s[1].samples.base is self.data.samples)
     spl = HalfSplitter(noslicing=True)
     splits = [(train, test) for (train, test) in spl(self.data)]
     for s in splits:
         # we no slicing at all
         assert_false(s[0].samples.base is self.data.samples)
         assert_false(s[1].samples.base is self.data.samples)
     spl = NFoldSplitter()
     splits = [(train, test) for (train, test) in spl(self.data)]
     for i, s in enumerate(splits):
         # training only first and last split
         if i == 0 or i == len(splits) - 1:
             assert_true(s[0].samples.base is self.data.samples)
         else:
             assert_false(s[0].samples.base is self.data.samples)
         # we get slicing all the time
         assert_true(s[1].samples.base is self.data.samples)
     step_ds = Dataset(np.random.randn(20, 2),
                       sa={'chunks': np.tile([0, 1], 10)})
     spl = OddEvenSplitter()
     splits = [(train, test) for (train, test) in spl(step_ds)]
     assert_equal(len(splits), 2)
     for s in splits:
         # we get slicing all the time
         assert_true(s[0].samples.base is step_ds.samples)
         assert_true(s[1].samples.base is step_ds.samples)
Exemplo n.º 3
0
def test_sampleslicemapper():
    # this does nothing but Dataset.__getitem__ which is tested elsewhere -- but
    # at least we run it
    ds = datasets['uni2small']
    ssm = SampleSliceMapper(slice(3, 8, 2))
    sds = ssm(ds)
    assert_equal(len(sds), 3)
Exemplo n.º 4
0
def test_repr():
    # this time give mask only by its target length
    sm = FeatureSliceMapper(slice(None), inspace="myspace")

    # check reproduction
    sm_clone = eval(repr(sm))
    assert_equal(repr(sm_clone), repr(sm))
Exemplo n.º 5
0
def test_repeater():
    reps = 4
    r = Repeater(reps, space='OMG')
    dsl = [ds for ds in r.generate(Dataset([0,1]))]
    assert_equal(len(dsl), reps)
    for i, ds in enumerate(dsl):
        assert_equal(ds.a.OMG, i)
Exemplo n.º 6
0
def test_forward_dense_array_mapper():
    mask = np.ones((3, 2), dtype="bool")
    map_ = mask_mapper(mask)

    # test shape reports
    assert_equal(map_.forward1(mask).shape, (6,))

    # test 1sample mapping
    assert_array_equal(map_.forward1(np.arange(6).reshape(3, 2)), [0, 1, 2, 3, 4, 5])

    # test 4sample mapping
    foursample = map_.forward(np.arange(24).reshape(4, 3, 2))
    assert_array_equal(
        foursample, [[0, 1, 2, 3, 4, 5], [6, 7, 8, 9, 10, 11], [12, 13, 14, 15, 16, 17], [18, 19, 20, 21, 22, 23]]
    )

    # check incomplete masks
    mask[1, 1] = 0
    map_ = mask_mapper(mask)
    assert_equal(map_.forward1(mask).shape, (5,))
    assert_array_equal(map_.forward1(np.arange(6).reshape(3, 2)), [0, 1, 2, 4, 5])

    # check that it doesn't accept wrong dataspace
    assert_raises(ValueError, map_.forward, np.arange(4).reshape(2, 2))

    # check fail if neither mask nor shape
    assert_raises(ValueError, mask_mapper)

    # check that a full mask is automatically created when providing shape
    m = mask_mapper(shape=(2, 3, 4))
    mp = m.forward1(np.arange(24).reshape(2, 3, 4))
    assert_array_equal(mp, np.arange(24))
Exemplo n.º 7
0
 def test_slicing(self):
     spl = HalfSplitter()
     splits = [ (train, test) for (train, test) in spl(self.data) ]
     for s in splits:
         # we get slicing all the time
         assert_true(s[0].samples.base is self.data.samples)
         assert_true(s[1].samples.base is self.data.samples)
     spl = HalfSplitter(noslicing=True)
     splits = [ (train, test) for (train, test) in spl(self.data) ]
     for s in splits:
         # we no slicing at all
         assert_false(s[0].samples.base is self.data.samples)
         assert_false(s[1].samples.base is self.data.samples)
     spl = NFoldSplitter()
     splits = [ (train, test) for (train, test) in spl(self.data) ]
     for i, s in enumerate(splits):
         # training only first and last split
         if i == 0 or i == len(splits) - 1:
             assert_true(s[0].samples.base is self.data.samples)
         else:
             assert_false(s[0].samples.base is self.data.samples)
         # we get slicing all the time
         assert_true(s[1].samples.base is self.data.samples)
     step_ds = Dataset(np.random.randn(20,2),
                       sa={'chunks': np.tile([0,1], 10)})
     spl = OddEvenSplitter()
     splits = [ (train, test) for (train, test) in spl(step_ds) ]
     assert_equal(len(splits), 2)
     for s in splits:
         # we get slicing all the time
         assert_true(s[0].samples.base is step_ds.samples)
         assert_true(s[1].samples.base is step_ds.samples)
Exemplo n.º 8
0
    def test_simple_n_minus_one_cv(self):
        data = get_mv_pattern(3)
        data.init_origids('samples')

        self.failUnless( data.nsamples == 120 )
        self.failUnless( data.nfeatures == 2 )
        self.failUnless(
            (data.sa.targets == \
                [0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0] * 6).all())
        self.failUnless(
            (data.sa.chunks == \
                [k for k in range(1, 7) for i in range(20)]).all())
        assert_equal(len(np.unique(data.sa.origids)), data.nsamples)

        transerror = TransferError(sample_clf_nl)
        cv = CrossValidatedTransferError(
                transerror,
                NFoldSplitter(cvtype=1),
                enable_ca=['confusion', 'training_confusion',
                               'samples_error'])

        results = cv(data)
        self.failUnless((results.samples < 0.2).all() and (results.samples >= 0.0).all())

        # TODO: test accessibility of {training_,}confusion{,s} of
        # CrossValidatedTransferError

        self.failUnless(isinstance(cv.ca.samples_error, dict))
        self.failUnless(len(cv.ca.samples_error) == data.nsamples)
        # one value for each origid
        assert_array_equal(sorted(cv.ca.samples_error.keys()),
                           sorted(data.sa.origids))
        for k, v in cv.ca.samples_error.iteritems():
            self.failUnless(len(v) == 1)
Exemplo n.º 9
0
    def test_simple_n_minus_one_cv(self):
        data = get_mv_pattern(3)
        data.init_origids('samples')

        self.failUnless(data.nsamples == 120)
        self.failUnless(data.nfeatures == 2)
        self.failUnless(
            (data.sa.targets == \
                [0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0] * 6).all())
        self.failUnless(
            (data.sa.chunks == \
                [k for k in range(1, 7) for i in range(20)]).all())
        assert_equal(len(np.unique(data.sa.origids)), data.nsamples)

        transerror = TransferError(sample_clf_nl)
        cv = CrossValidatedTransferError(
            transerror,
            NFoldSplitter(cvtype=1),
            enable_ca=['confusion', 'training_confusion', 'samples_error'])

        results = cv(data)
        self.failUnless((results.samples < 0.2).all()
                        and (results.samples >= 0.0).all())

        # TODO: test accessibility of {training_,}confusion{,s} of
        # CrossValidatedTransferError

        self.failUnless(isinstance(cv.ca.samples_error, dict))
        self.failUnless(len(cv.ca.samples_error) == data.nsamples)
        # one value for each origid
        assert_array_equal(sorted(cv.ca.samples_error.keys()),
                           sorted(data.sa.origids))
        for k, v in cv.ca.samples_error.iteritems():
            self.failUnless(len(v) == 1)
Exemplo n.º 10
0
def test_repr():
    # this time give mask only by its target length
    sm = FeatureSliceMapper(slice(None), inspace='myspace')

    # check reproduction
    sm_clone = eval(repr(sm))
    assert_equal(repr(sm_clone), repr(sm))
Exemplo n.º 11
0
def test_strip_boundary():
    ds = datasets['hollow']
    ds.sa['btest'] = np.repeat([0,1], 20)
    sn = StripBoundariesSamples('btest', 1, 2)
    sds = sn(ds)
    assert_equal(len(sds), len(ds) - 3)
    for i in [19, 20, 21]:
        assert_false(i in sds.samples.sid)
Exemplo n.º 12
0
def test_eep_bin():
    eb = EEPBin(os.path.join(pymvpa_dataroot, 'eep.bin'))

    assert_equal(eb.nchannels, 32)
    assert_equal(eb.nsamples, 2)
    assert_equal(eb.ntimepoints, 4)
    assert_true(eb.t0 - eb.dt < 0.00000001)
    assert_equal(len(eb.channels), 32)
    assert_equal(eb.data.shape, (2, 32, 4))
Exemplo n.º 13
0
def test_sphere():
    # test sphere initialization
    s = ne.Sphere(1)
    center0 = (0, 0, 0)
    center1 = (1, 1, 1)
    assert_equal(len(s(center0)), 7)
    target = array([array([-1,  0,  0]),
              array([ 0, -1,  0]),
              array([ 0,  0, -1]),
              array([0, 0, 0]),
              array([0, 0, 1]),
              array([0, 1, 0]),
              array([1, 0, 0])])
    # test of internals -- no recomputation of increments should be done
    prev_increments = s._increments
    assert_array_equal(s(center0), target)
    ok_(prev_increments is s._increments)
    # query lower dimensionality
    _ = s((0, 0))
    ok_(not prev_increments is s._increments)

    # test Sphere call
    target = [array([0, 1, 1]),
              array([1, 0, 1]),
              array([1, 1, 0]),
              array([1, 1, 1]),
              array([1, 1, 2]),
              array([1, 2, 1]),
              array([2, 1, 1])]
    res = s(center1)
    assert_array_equal(array(res), target)
    # They all should be tuples
    ok_(np.all([isinstance(x, tuple) for x in res]))

    # test for larger diameter
    s = ne.Sphere(4)
    assert_equal(len(s(center1)), 257)

    # test extent keyword
    #s = ne.Sphere(4,extent=(1,1,1))
    #assert_array_equal(array(s((0,0,0))), array([[0,0,0]]))

    # test Errors during initialisation and call
    #assert_raises(ValueError, ne.Sphere, 2)
    #assert_raises(ValueError, ne.Sphere, 1.0)

    # no longer extent available
    assert_raises(TypeError, ne.Sphere, 1, extent=(1))
    assert_raises(TypeError, ne.Sphere, 1, extent=(1.0, 1.0, 1.0))

    s = ne.Sphere(1)
    #assert_raises(ValueError, s, (1))
    if __debug__:
        # No float coordinates allowed for now...
        # XXX might like to change that ;)
        # 
        assert_raises(ValueError, s, (1.0, 1.0, 1.0))
Exemplo n.º 14
0
def test_eep_bin():
    eb = EEPBin(os.path.join(pymvpa_dataroot, 'eep.bin'))

    assert_equal(eb.nchannels, 32)
    assert_equal(eb.nsamples, 2)
    assert_equal(eb.ntimepoints, 4)
    assert_true(eb.t0 - eb.dt < 0.00000001)
    assert_equal(len(eb.channels), 32)
    assert_equal(eb.data.shape, (2, 32, 4))
Exemplo n.º 15
0
def test_attrpermute():
    ds = give_data()
    ds.sa['ids'] = range(len(ds))
    pristine_data = ds.samples.copy()
    permutation = AttributePermutator(['targets', 'ids'], assure=True)
    pds = permutation(ds)
    # should not touch the data
    assert_array_equal(pristine_data, pds.samples)
    # even keep the very same array
    assert_true(pds.samples.base is ds.samples)
    # there is no way that it can be the same attribute
    assert_false(np.all(pds.sa.ids == ds.sa.ids))
    # ids should reflect permutation setup
    assert_array_equal(pds.sa.targets, ds.sa.targets[pds.sa.ids])
    # other attribute should remain intact
    assert_array_equal(pds.sa.chunks, ds.sa.chunks)

    # now chunk-wise permutation
    permutation = AttributePermutator('ids', limit='chunks')
    pds = permutation(ds)
    # first ten should remain first ten
    assert_false(np.any(pds.sa.ids[:10] > 9))

    # same thing, but only permute single chunk
    permutation = AttributePermutator('ids', limit={'chunks': 3})
    pds = permutation(ds)
    # one chunk should change
    assert_false(np.any(pds.sa.ids[30:40] > 39))
    assert_false(np.any(pds.sa.ids[30:40] < 30))
    # the rest not
    assert_array_equal(pds.sa.ids[:30], range(30))

    # or a list of chunks
    permutation = AttributePermutator('ids', limit={'chunks': [3,4]})
    pds = permutation(ds)
    # two chunks should change
    assert_false(np.any(pds.sa.ids[30:50] > 49))
    assert_false(np.any(pds.sa.ids[30:50] < 30))
    # the rest not
    assert_array_equal(pds.sa.ids[:30], range(30))

    # and now try generating more permutations
    nruns = 2
    permutation = AttributePermutator(['targets', 'ids'], assure=True, count=nruns)
    pds = list(permutation.generate(ds))
    assert_equal(len(pds), nruns)
    for p in pds:
        assert_false(np.all(p.sa.ids == ds.sa.ids))

    # permute feature attrs
    ds.fa['ids'] = range(ds.shape[1])
    permutation = AttributePermutator('fa.ids', assure=True)
    pds = permutation(ds)
    assert_false(np.all(pds.fa.ids == ds.fa.ids))
Exemplo n.º 16
0
def test_glmnet_r_sensitivities():
    data = datasets['chirp_linear']

    clf = GLMNET_R()

    clf.train(data)

    # now ask for the sensitivities WITHOUT having to pass the dataset
    # again
    sens = clf.get_sensitivity_analyzer(force_train=False)(None)

    assert_equal(sens.shape, (1, data.nfeatures))
Exemplo n.º 17
0
def test_glmnet_r_sensitivities():
    data = datasets['chirp_linear']

    clf = GLMNET_R()

    clf.train(data)

    # now ask for the sensitivities WITHOUT having to pass the dataset
    # again
    sens = clf.get_sensitivity_analyzer(force_training=False)()

    assert_equal(sens.shape, (1, data.nfeatures))
Exemplo n.º 18
0
def test_glmnet_c_sensitivities():
    data = normal_feature_dataset(perlabel=10, nlabels=2, nfeatures=4)

    # use GLMNET on binary problem
    clf = GLMNET_C()
    clf.train(data)

    # now ask for the sensitivities WITHOUT having to pass the dataset
    # again
    sens = clf.get_sensitivity_analyzer(force_training=False)()

    #failUnless(sens.shape == (data.nfeatures,))
    assert_equal(sens.shape, (len(data.UT), data.nfeatures))
Exemplo n.º 19
0
def test_glmnet_c_sensitivities():
    data = normal_feature_dataset(perlabel=10, nlabels=2, nfeatures=4)

    # use GLMNET on binary problem
    clf = GLMNET_C()
    clf.train(data)

    # now ask for the sensitivities WITHOUT having to pass the dataset
    # again
    sens = clf.get_sensitivity_analyzer(force_train=False)(None)

    #failUnless(sens.shape == (data.nfeatures,))
    assert_equal(sens.shape, (len(data.UT), data.nfeatures))
Exemplo n.º 20
0
 def test_harvesting(self):
     # get a dataset with a very high SNR
     data = get_mv_pattern(10)
     # do crossval with default errorfx and 'mean' combiner
     transerror = TransferError(clfswh['linear'][0])
     cv = CrossValidatedTransferError(
             transerror,
             NFoldSplitter(cvtype=1),
             harvest_attribs=['transerror.clf.ca.training_time'])
     result = cv(data)
     ok_(cv.ca.harvested.has_key('transerror.clf.ca.training_time'))
     assert_equal(len(cv.ca.harvested['transerror.clf.ca.training_time']),
                  len(data.UC))
Exemplo n.º 21
0
 def test_harvesting(self):
     # get a dataset with a very high SNR
     data = get_mv_pattern(10)
     # do crossval with default errorfx and 'mean' combiner
     transerror = TransferError(clfswh['linear'][0])
     cv = CrossValidatedTransferError(
         transerror,
         NFoldSplitter(cvtype=1),
         harvest_attribs=['transerror.clf.ca.training_time'])
     result = cv(data)
     ok_(cv.ca.harvested.has_key('transerror.clf.ca.training_time'))
     assert_equal(len(cv.ca.harvested['transerror.clf.ca.training_time']),
                  len(data.UC))
Exemplo n.º 22
0
def test_attrmap_repr():
    assert_equal(repr(AttributeMap()), "AttributeMap()")
    assert_equal(repr(AttributeMap(dict(a=2, b=1))),
                 "AttributeMap({'a': 2, 'b': 1})")
    assert_equal(repr(AttributeMap(dict(a=2, b=1), mapnumeric=True)),
                 "AttributeMap({'a': 2, 'b': 1}, mapnumeric=True)")
    assert_equal(repr(AttributeMap(dict(a=2, b=1), mapnumeric=True, collisions_resolution='tuple')),
                 "AttributeMap({'a': 2, 'b': 1}, mapnumeric=True, collisions_resolution='tuple')")
Exemplo n.º 23
0
def test_sifter():
    # somewhat duplicating the doctest
    ds = Dataset(samples=np.arange(8).reshape((4,2)),
                 sa={'chunks':   [ 0 ,  1 ,  2 ,  3 ],
                     'targets':  ['c', 'c', 'p', 'p']})
    par = ChainNode([NFoldPartitioner(cvtype=2, attr='chunks'),
                     Sifter([('partitions', 2),
                             ('targets', ['c', 'p'])])
                     ])
    dss = list(par.generate(ds))
    assert_equal(len(dss), 4)
    for ds_ in dss:
        testing = ds[ds_.sa.partitions == 2]
        assert_array_equal(np.unique(testing.sa.targets), ['c', 'p'])
        # and we still have both targets  present in training
        training = ds[ds_.sa.partitions == 1]
        assert_array_equal(np.unique(training.sa.targets), ['c', 'p'])
Exemplo n.º 24
0
def test_featuregroup_mapper():
    ds = Dataset(np.arange(24).reshape(3,8))
    ds.fa['roi'] = [0, 1] * 4
    # just to check
    ds.sa['chunks'] = np.arange(3)

    # correct results
    csamples = [[3, 4], [11, 12], [19, 20]]
    croi = [0, 1]
    cchunks = np.arange(3)

    m = mean_group_feature(['roi'])
    mds = m.forward(ds)
    assert_equal(mds.shape, (3, 2))
    assert_array_equal(mds.samples, csamples)
    assert_array_equal(mds.fa.roi, np.unique([0, 1] * 4))
    # FAs should simply remain the same
    assert_array_equal(mds.sa.chunks, np.arange(3))
Exemplo n.º 25
0
    def test_discarded_boundaries(self):
        ds = datasets['hollow']
        # four runs
        ds.sa['chunks'] = np.repeat(np.arange(4), 10)
        # do odd even splitting for lots of boundaries in few splits
        part = ChainNode([OddEvenPartitioner(),
                          StripBoundariesSamples('chunks', 1, 2)])

        parts = [d.samples.sid for d in part.generate(ds)]

        # both dataset should have the same samples, because the boundaries are
        # identical and the same sample should be stripped
        assert_array_equal(parts[0], parts[1])

        # we strip 3 samples per boundary
        assert_equal(len(parts[0]), len(ds) - (3 * 3))

        for i in [9, 10, 11, 19, 20, 21, 29, 30, 31]:
            assert_false(i in parts[0])
Exemplo n.º 26
0
 def test_slicing(self):
     hs = HalfPartitioner()
     spl = Splitter(attr='partitions')
     splits = list(hs.generate(self.data))
     for s in splits:
         # partitioned dataset shared the data
         assert_true(s.samples.base is self.data.samples)
     splits = [ list(spl.generate(p)) for p in hs.generate(self.data) ]
     for s in splits:
         # we get slicing all the time
         assert_true(s[0].samples.base.base is self.data.samples)
         assert_true(s[1].samples.base.base is self.data.samples)
     spl = Splitter(attr='partitions', noslicing=True)
     splits = [ list(spl.generate(p)) for p in hs.generate(self.data) ]
     for s in splits:
         # we no slicing at all
         assert_false(s[0].samples.base is self.data.samples)
         assert_false(s[1].samples.base is self.data.samples)
     nfs = NFoldPartitioner()
     spl = Splitter(attr='partitions')
     splits = [ list(spl.generate(p)) for p in nfs.generate(self.data) ]
     for i, s in enumerate(splits):
         # training only first and last split
         if i == 0 or i == len(splits) - 1:
             assert_true(s[0].samples.base.base is self.data.samples)
         else:
             assert_true(s[0].samples.base is None)
         # we get slicing all the time
         assert_true(s[1].samples.base.base is self.data.samples)
     step_ds = Dataset(np.random.randn(20,2),
                       sa={'chunks': np.tile([0,1], 10)})
     oes = OddEvenPartitioner()
     spl = Splitter(attr='partitions')
     splits = list(oes.generate(step_ds))
     for s in splits:
         # partitioned dataset shared the data
         assert_true(s.samples.base is step_ds.samples)
     splits = [ list(spl.generate(p)) for p in oes.generate(step_ds) ]
     assert_equal(len(splits), 2)
     for s in splits:
         # we get slicing all the time
         assert_true(s[0].samples.base.base is step_ds.samples)
         assert_true(s[1].samples.base.base is step_ds.samples)
Exemplo n.º 27
0
    def test_simple_n_minus_one_cv(self):
        data = get_mv_pattern(3)
        data.init_origids('samples')

        self.failUnless( data.nsamples == 120 )
        self.failUnless( data.nfeatures == 2 )
        self.failUnless(
            (data.sa.targets == \
                [0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0] * 6).all())
        self.failUnless(
            (data.sa.chunks == \
                [k for k in range(1, 7) for i in range(20)]).all())
        assert_equal(len(np.unique(data.sa.origids)), data.nsamples)

        cv = CrossValidation(sample_clf_nl, NFoldPartitioner(),
                enable_ca=['stats', 'training_stats'])
#                               'samples_error'])

        results = cv(data)
        self.failUnless((results.samples < 0.2).all() and (results.samples >= 0.0).all())
Exemplo n.º 28
0
def test_distances():
    a = np.array([3,8])
    b = np.array([6,4])
    # test distances or yarik recalls unit testing ;)
    assert_equal(cartesian_distance(a, b), 5.0)
    assert_equal(manhatten_distance(a, b), 7)
    assert_equal(absmin_distance(a, b), 4)
Exemplo n.º 29
0
def test_distances():
    a = np.array([3, 8])
    b = np.array([6, 4])
    # test distances or yarik recalls unit testing ;)
    assert_equal(cartesian_distance(a, b), 5.0)
    assert_equal(manhatten_distance(a, b), 7)
    assert_equal(absmin_distance(a, b), 4)
Exemplo n.º 30
0
def test_samplesgroup_mapper():
    data = np.arange(24).reshape(8,3)
    labels = [0, 1] * 4
    chunks = np.repeat(np.array((0,1)),4)

    # correct results
    csamples = [[3, 4, 5], [6, 7, 8], [15, 16, 17], [18, 19, 20]]
    clabels = [0, 1, 0, 1]
    cchunks = [0, 0, 1, 1]

    ds = dataset_wizard(samples=data, targets=labels, chunks=chunks)
    # add some feature attribute -- just to check
    ds.fa['checker'] = np.arange(3)
    ds.init_origids('samples')

    m = mean_group_sample(['targets', 'chunks'])
    mds = m.forward(ds)
    assert_array_equal(mds.samples, csamples)
    # FAs should simply remain the same
    assert_array_equal(mds.fa.checker, np.arange(3))

    # now without grouping
    m = mean_sample()
    # forwarding just the samples should yield the same result
    assert_array_equal(m.forward(ds.samples),
                       m.forward(ds).samples)

    # directly apply to dataset
    # using untrained mapper
    m = mean_group_sample(['targets', 'chunks'])
    mapped = ds.get_mapped(m)

    assert_equal(mapped.nsamples, 4)
    assert_equal(mapped.nfeatures, 3)
    assert_array_equal(mapped.samples, csamples)
    assert_array_equal(mapped.targets, clabels)
    assert_array_equal(mapped.chunks, cchunks)
    # make sure origids get regenerated
    assert_array_equal([s.count('+') for s in mapped.sa.origids], [1] * 4)
Exemplo n.º 31
0
def test_partitionmapper():
    ds = give_data()
    oep = OddEvenPartitioner()
    parts = list(oep.generate(ds))
    assert_equal(len(parts), 2)
    for i, p in enumerate(parts):
        assert_array_equal(p.sa['partitions'].unique, [1, 2])
        assert_equal(p.a.partitions_set, i)
        assert_equal(len(p), len(ds))
Exemplo n.º 32
0
def test_forward_dense_array_mapper():
    mask = np.ones((3,2), dtype='bool')
    map_ = mask_mapper(mask)

    # test shape reports
    assert_equal(map_.forward1(mask).shape, (6,))

    # test 1sample mapping
    assert_array_equal(map_.forward1(np.arange(6).reshape(3,2)),
                       [0,1,2,3,4,5])

    # test 4sample mapping
    foursample = map_.forward(np.arange(24).reshape(4,3,2))
    assert_array_equal(foursample,
                       [[0,1,2,3,4,5],
                        [6,7,8,9,10,11],
                        [12,13,14,15,16,17],
                        [18,19,20,21,22,23]])

    # check incomplete masks
    mask[1,1] = 0
    map_ = mask_mapper(mask)
    assert_equal(map_.forward1(mask).shape, (5,))
    assert_array_equal(map_.forward1(np.arange(6).reshape(3,2)),
                       [0,1,2,4,5])

    # check that it doesn't accept wrong dataspace
    assert_raises(ValueError, map_.forward, np.arange(4).reshape(2,2))

    # check fail if neither mask nor shape
    assert_raises(ValueError, mask_mapper)

    # check that a full mask is automatically created when providing shape
    m = mask_mapper(shape=(2, 3, 4))
    mp = m.forward1(np.arange(24).reshape(2, 3, 4))
    assert_array_equal(mp, np.arange(24))
Exemplo n.º 33
0
def test_eep_load():
    eb = EEPBin(os.path.join(pymvpa_dataroot, 'eep.bin'))

    ds = [ eep_dataset(source, targets=[1, 2]) for source in
            (eb, os.path.join(pymvpa_dataroot, 'eep.bin')) ]

    for d in ds:
        assert_equal(d.nsamples, 2)
        assert_equal(d.nfeatures, 128)
        assert_equal(np.unique(d.fa.channels[4*23:4*23+4]), 'Pz')
        assert_array_almost_equal([np.arange(-0.002, 0.005, 0.002)] * 32,
                                  d.a.mapper.reverse1(d.fa.timepoints))
Exemplo n.º 34
0
def test_eep_load():
    eb = EEPBin(os.path.join(pymvpa_dataroot, 'eep.bin'))

    ds = [
        eep_dataset(source, targets=[1, 2])
        for source in (eb, os.path.join(pymvpa_dataroot, 'eep.bin'))
    ]

    for d in ds:
        assert_equal(d.nsamples, 2)
        assert_equal(d.nfeatures, 128)
        assert_equal(np.unique(d.fa.channels[4 * 23:4 * 23 + 4]), 'Pz')
        assert_array_almost_equal([np.arange(-0.002, 0.005, 0.002)] * 32,
                                  d.a.mapper.reverse1(d.fa.timepoints))
Exemplo n.º 35
0
def test_collections():
    sa = SampleAttributesCollection()
    assert_equal(len(sa), 0)

    assert_raises(ValueError, sa.__setitem__, 'test', 0)
    l = range(5)
    sa['test'] = l
    # auto-wrapped
    assert_true(isinstance(sa['test'], ArrayCollectable))
    assert_equal(len(sa), 1)

    # names which are already present in dict interface
    assert_raises(ValueError, sa.__setitem__, 'values', range(5))

    sa_c = copy.deepcopy(sa)
    assert_equal(len(sa), len(sa_c))
    assert_array_equal(sa.test, sa_c.test)
Exemplo n.º 36
0
def test_collections():
    sa = SampleAttributesCollection()
    assert_equal(len(sa), 0)

    assert_raises(ValueError, sa.__setitem__, 'test', 0)
    l = range(5)
    sa['test'] = l
    # auto-wrapped
    assert_true(isinstance(sa['test'], ArrayCollectable))
    assert_equal(len(sa), 1)

    # names which are already present in dict interface
    assert_raises(ValueError, sa.__setitem__, 'values', range(5))

    sa_c = copy.deepcopy(sa)
    assert_equal(len(sa), len(sa_c))
    assert_array_equal(sa.test, sa_c.test)
Exemplo n.º 37
0
def test_sphere():
    # test sphere initialization
    s = ne.Sphere(1)
    center0 = (0, 0, 0)
    center1 = (1, 1, 1)
    assert_equal(len(s(center0)), 7)
    target = array([
        array([-1, 0, 0]),
        array([0, -1, 0]),
        array([0, 0, -1]),
        array([0, 0, 0]),
        array([0, 0, 1]),
        array([0, 1, 0]),
        array([1, 0, 0])
    ])
    # test of internals -- no recomputation of increments should be done
    prev_increments = s._increments
    assert_array_equal(s(center0), target)
    ok_(prev_increments is s._increments)
    # query lower dimensionality
    _ = s((0, 0))
    ok_(not prev_increments is s._increments)

    # test Sphere call
    target = [
        array([0, 1, 1]),
        array([1, 0, 1]),
        array([1, 1, 0]),
        array([1, 1, 1]),
        array([1, 1, 2]),
        array([1, 2, 1]),
        array([2, 1, 1])
    ]
    res = s(center1)
    assert_array_equal(array(res), target)
    # They all should be tuples
    ok_(np.all([isinstance(x, tuple) for x in res]))

    # test for larger diameter
    s = ne.Sphere(4)
    assert_equal(len(s(center1)), 257)

    # test extent keyword
    #s = ne.Sphere(4,extent=(1,1,1))
    #assert_array_equal(array(s((0,0,0))), array([[0,0,0]]))

    # test Errors during initialisation and call
    #assert_raises(ValueError, ne.Sphere, 2)
    #assert_raises(ValueError, ne.Sphere, 1.0)

    # no longer extent available
    assert_raises(TypeError, ne.Sphere, 1, extent=(1))
    assert_raises(TypeError, ne.Sphere, 1, extent=(1.0, 1.0, 1.0))

    s = ne.Sphere(1)
    #assert_raises(ValueError, s, (1))
    if __debug__:
        # No float coordinates allowed for now...
        # XXX might like to change that ;)
        #
        assert_raises(ValueError, s, (1.0, 1.0, 1.0))
Exemplo n.º 38
0
def test_chainmapper():
    # the chain needs at lest one mapper
    assert_raises(ValueError, ChainMapper, [])
    # a typical first mapper is to flatten
    cm = ChainMapper([FlattenMapper()])

    # few container checks
    assert_equal(len(cm), 1)
    assert_true(isinstance(cm[0], FlattenMapper))

    # now training
    # come up with data
    samples_shape = (2, 2, 4)
    data_shape = (4, ) + samples_shape
    data = np.arange(np.prod(data_shape)).reshape(data_shape)
    pristinedata = data.copy()
    target = [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
              [16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31],
              [32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47],
              [48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63]]
    target = np.array(target)

    # if it is not trained it knows nothing
    cm.train(data)

    # a new mapper should appear when doing feature selection
    cm.append(FeatureSliceMapper(range(1, 16)))
    assert_equal(cm.forward1(data[0]).shape, (15, ))
    assert_equal(len(cm), 2)
    # multiple slicing
    cm.append(FeatureSliceMapper([9, 14]))
    assert_equal(cm.forward1(data[0]).shape, (2, ))
    assert_equal(len(cm), 3)

    # check reproduction
    cm_clone = eval(repr(cm))
    assert_equal(repr(cm_clone), repr(cm))

    # what happens if we retrain the whole beast an same data as before
    cm.train(data)
    assert_equal(cm.forward1(data[0]).shape, (2, ))
    assert_equal(len(cm), 3)

    # let's map something
    mdata = cm.forward(data)
    assert_array_equal(mdata, target[:, [10, 15]])
    # and back
    rdata = cm.reverse(mdata)
    # original shape
    assert_equal(rdata.shape, data.shape)
    # content as far it could be restored
    assert_array_equal(rdata[rdata > 0], data[rdata > 0])
    assert_equal(np.sum(rdata > 0), 8)
Exemplo n.º 39
0
def test_array_collectable():
    c = ArrayCollectable()

    # empty by default
    assert_equal(c.name, None)
    assert_equal(c.value, None)

    # late assignment
    c.name = 'somename'
    assert_raises(ValueError, c._set, 12345)
    assert_equal(c.value, None)
    c.value = np.arange(5)
    assert_equal(c.name, 'somename')
    assert_array_equal(c.value, np.arange(5))

    # immediate content
    data = np.random.random(size=(3, 10))
    c = ArrayCollectable(data.copy(), 'myname', "This is a test", length=3)
    assert_equal(c.name, 'myname')
    assert_array_equal(c.value, data)
    assert_equal(c.__doc__, "This is a test")
    assert_equal(str(c), 'myname')

    # repr
    from numpy import array
    e = eval(repr(c))
    assert_equal(e.name, 'myname')
    assert_array_almost_equal(e.value, data)
    assert_equal(e.__doc__, "This is a test")

    # cannot assign array of wrong length
    assert_raises(ValueError, c._set, np.arange(5))
    assert_equal(len(c), 3)

    # shallow copy DOES create a view of value array
    c.value = np.arange(3)
    d = copy.copy(c)
    assert_true(d.value.base is c.value)

    # names starting with _ are not allowed
    assert_raises(ValueError, c._set_name, "_underscore")
Exemplo n.º 40
0
def test_simpleboxcar():
    data = np.atleast_2d(np.arange(10)).T
    sp = np.arange(10)

    # check if stupid thing don't work
    assert_raises(ValueError, BoxcarMapper, sp, 0)

    # now do an identity transformation
    bcm = BoxcarMapper(sp, 1)
    trans = bcm.forward(data)
    # ,0 is a feature below, so we get explicit 2D out of 1D
    assert_array_equal(trans[:, 0], data)

    # now check for illegal boxes
    if __debug__:
        # condition is checked only in __debug__
        assert_raises(ValueError, BoxcarMapper(sp, 2).train, data)

    # now something that should work
    nbox = 9
    boxlength = 2
    sp = np.arange(nbox)
    bcm = BoxcarMapper(sp, boxlength)
    trans = bcm(data)
    # check that is properly upcasts the dimensionality
    assert_equal(trans.shape, (nbox, boxlength) + data.shape[1:])
    # check actual values, squeezing the last dim for simplicity
    assert_array_equal(trans.squeeze(),
                       np.vstack((np.arange(9), np.arange(9) + 1)).T)

    # now test for proper data shape
    data = np.ones((10, 3, 4, 2))
    sp = [2, 4, 3, 5]
    trans = BoxcarMapper(sp, 4)(data)
    assert_equal(trans.shape, (4, 4, 3, 4, 2))

    # test reverse
    data = np.arange(240).reshape(10, 3, 4, 2)
    sp = [2, 4, 3, 5]
    boxlength = 2
    m = BoxcarMapper(sp, boxlength)
    m.train(data)
    mp = m.forward(data)
    assert_equal(mp.shape, (4, 2, 3, 4, 2))

    # try full reconstruct
    mr = m.reverse(mp)
    # shape has to match
    assert_equal(mr.shape, (len(sp) * boxlength, ) + data.shape[1:])
    # only known samples are part of the results
    assert_true((mr >= 24).all())
    assert_true((mr < 168).all())

    # check proper reconstruction of non-conflicting sample
    assert_array_equal(mr[0].ravel(), np.arange(48, 72))

    # check proper reconstruction of samples being part of multiple
    # mapped samples
    assert_array_equal(mr[1].ravel(), np.arange(72, 96))

    # test reverse of a single sample
    singlesample = np.arange(48).reshape(2, 3, 4, 2)
    assert_array_equal(singlesample, m.reverse1(singlesample))
    # should not work for shape mismatch, but it does work and is useful when
    # reverse mapping sample attributes
    #assert_raises(ValueError, m.reverse, singlesample[0])

    # check broadcasting of 'raw' samples into proper boxcars on forward()
    bc = m.forward1(np.arange(24).reshape(3, 4, 2))
    assert_array_equal(bc, np.array(2 * [np.arange(24).reshape(3, 4, 2)]))
Exemplo n.º 41
0
def test_reverse_dense_array_mapper():
    mask = np.ones((3,2), dtype='bool')
    mask[1,1] = 0
    map_ = mask_mapper(mask)

    rmapped = map_.reverse1(np.arange(1,6))
    assert_equal(rmapped.shape, (3,2))
    assert_equal(rmapped[1,1], 0)
    assert_equal(rmapped[2,1], 5)


    # check that it doesn't accept wrong dataspace
    assert_raises(ValueError, map_, np.arange(6))

    rmapped2 = map_.reverse(np.arange(1,11).reshape(2,5))
    assert_equal(rmapped2.shape, (2,3,2))
    assert_equal(rmapped2[0,1,1], 0 )
    assert_equal(rmapped2[1,1,1], 0 )
    assert_equal(rmapped2[0,2,1], 5 )
    assert_equal(rmapped2[1,2,1], 10 )
Exemplo n.º 42
0
def test_datasetmapping():
    # 6 samples, 4 features
    data = np.arange(24).reshape(6, 4)
    ds = Dataset(data,
                 sa={
                     'timepoints': np.arange(6),
                     'multidim': data.copy()
                 },
                 fa={'fid': np.arange(4)})
    # with overlapping and non-overlapping boxcars
    startpoints = [0, 1, 4]
    boxlength = 2
    bm = BoxcarMapper(startpoints, boxlength, inspace='boxy')
    # train is critical
    bm.train(ds)
    mds = bm.forward(ds)
    assert_equal(len(mds), len(startpoints))
    assert_equal(mds.nfeatures, boxlength)
    # all samples attributes remain, but the can rotated/compressed into
    # multidimensional attributes
    assert_equal(sorted(mds.sa.keys()),
                 ['boxy_onsetidx'] + sorted(ds.sa.keys()))
    assert_equal(mds.sa.multidim.shape,
                 (len(startpoints), boxlength, ds.nfeatures))
    assert_equal(mds.sa.timepoints.shape, (len(startpoints), boxlength))
    assert_array_equal(mds.sa.timepoints.flatten(),
                       np.array([(s, s + 1) for s in startpoints]).flatten())
    assert_array_equal(mds.sa.boxy_onsetidx, startpoints)
    # feature attributes also get rotated and broadcasted
    assert_array_equal(mds.fa.fid, [ds.fa.fid, ds.fa.fid])
    # and finally there is a new one
    assert_array_equal(mds.fa.boxy_offsetidx,
                       np.repeat(np.arange(boxlength), 4).reshape(2, -1))

    # now see how it works on reverse()
    rds = bm.reverse(mds)
    # we got at least something of all original attributes back
    assert_equal(sorted(rds.sa.keys()), sorted(ds.sa.keys()))
    assert_equal(sorted(rds.fa.keys()), sorted(ds.fa.keys()))
    # it is not possible to reconstruct the full samples array
    # some samples even might show up multiple times (when there are overlapping
    # boxcars
    assert_array_equal(
        rds.samples,
        np.array([[0, 1, 2, 3], [4, 5, 6, 7], [4, 5, 6, 7], [8, 9, 10, 11],
                  [16, 17, 18, 19], [20, 21, 22, 23]]))
    assert_array_equal(rds.sa.timepoints, [0, 1, 1, 2, 4, 5])
    assert_array_equal(rds.sa.multidim, ds.sa.multidim[rds.sa.timepoints])
    # but feature attributes should be fully recovered
    assert_array_equal(rds.fa.fid, ds.fa.fid)
Exemplo n.º 43
0
def test_array_collectable():
    c = ArrayCollectable()

    # empty by default
    assert_equal(c.name, None)
    assert_equal(c.value, None)

    # late assignment
    c.name = 'somename'
    assert_raises(ValueError, c._set, 12345)
    assert_equal(c.value, None)
    c.value = np.arange(5)
    assert_equal(c.name, 'somename')
    assert_array_equal(c.value, np.arange(5))

    # immediate content
    data = np.random.random(size=(3,10))
    c = ArrayCollectable(data.copy(), 'myname',
                         "This is a test", length=3)
    assert_equal(c.name, 'myname')
    assert_array_equal(c.value, data)
    assert_equal(c.__doc__, "This is a test")
    assert_equal(str(c), 'myname')

    # repr
    from numpy import array
    e = eval(repr(c))
    assert_equal(e.name, 'myname')
    assert_array_almost_equal(e.value, data)
    assert_equal(e.__doc__, "This is a test")

    # cannot assign array of wrong length
    assert_raises(ValueError, c._set, np.arange(5))
    assert_equal(len(c), 3)

    # shallow copy DOES create a view of value array
    c.value = np.arange(3)
    d = copy.copy(c)
    assert_true(d.value.base is c.value)

    # names starting with _ are not allowed
    assert_raises(ValueError, c._set_name, "_underscore")
Exemplo n.º 44
0
def test_splitter():
    ds = give_data()
    # split with defaults
    spl1 = Splitter('chunks')
    assert_raises(NotImplementedError, spl1, ds)

    splits = list(spl1.generate(ds))
    assert_equal(len(splits), len(ds.sa['chunks'].unique))

    for split in splits:
        # it should have perform basic slicing!
        assert_true(split.samples.base is ds.samples)
        assert_equal(len(split.sa['chunks'].unique), 1)
        assert_true('lastsplit' in split.a)
    assert_true(splits[-1].a.lastsplit)

    # now again, more customized
    spl2 = Splitter('targets', attr_values = [0,1,1,2,3,3,3], count=4,
                   noslicing=True)
    splits = list(spl2.generate(ds))
    assert_equal(len(splits), 4)
    for split in splits:
        # it should NOT have perform basic slicing!
        assert_false(split.samples.base is ds.samples)
        assert_equal(len(split.sa['targets'].unique), 1)
        assert_equal(len(split.sa['chunks'].unique), 10)
    assert_true(splits[-1].a.lastsplit)

    # two should be identical
    assert_array_equal(splits[1].samples, splits[2].samples)

    # now go wild and split by feature attribute
    ds.fa['roi'] = np.repeat([0,1], 5)
    # splitter should auto-detect that this is a feature attribute
    spl3 = Splitter('roi')
    splits = list(spl3.generate(ds))
    assert_equal(len(splits), 2)
    for split in splits:
        assert_true(split.samples.base is ds.samples)
        assert_equal(len(split.fa['roi'].unique), 1)
        assert_equal(split.shape, (100, 5))

    # and finally test chained splitters
    cspl = ChainNode([spl2, spl3, spl1])
    splits = list(cspl.generate(ds))
    # 4 target splits and 2 roi splits each and 10 chunks each
    assert_equal(len(splits), 80)
Exemplo n.º 45
0
def test_balancer():
    ds = give_data()
    # only mark the selection in an attribute
    bal = Balancer()
    res = bal(ds)
    # we get a new dataset, with shared samples
    assert_false(ds is res)
    assert_true(ds.samples is res.samples.base)
    # should kick out 2 samples in each chunk of 10
    assert_almost_equal(np.mean(res.sa.balanced_set), 0.8)
    # same as above, but actually apply the selection
    bal = Balancer(apply_selection=True, count=5)
    # just run it once
    res = bal(ds)
    # we get a new dataset, with shared samples
    assert_false(ds is res)
    # should kick out 2 samples in each chunk of 10
    assert_equal(len(res), int(0.8 * len(ds)))
    # now use it as a generator
    dses = list(bal.generate(ds))
    assert_equal(len(dses), 5)
    # with limit
    bal = Balancer(limit={'chunks': 3}, apply_selection=True)
    res = bal(ds)
    assert_equal(res.sa['chunks'].unique, (3,))
    assert_equal(get_nelements_per_value(res.sa.targets).values(),
                 [2] * 4)
    # fixed amount
    bal = Balancer(amount=1, limit={'chunks': 3}, apply_selection=True)
    res = bal(ds)
    assert_equal(get_nelements_per_value(res.sa.targets).values(),
                 [1] * 4)
    # fraction
    bal = Balancer(amount=0.499, limit=None, apply_selection=True)
    res = bal(ds)
    assert_array_equal(
            np.round(np.array(get_nelements_per_value(ds.sa.targets).values()) * 0.5),
            np.array(get_nelements_per_value(res.sa.targets).values()))
    # check on feature attribute
    ds.fa['one'] = np.tile([1,2], 5)
    ds.fa['chk'] = np.repeat([1,2], 5)
    bal = Balancer(attr='one', amount=2, limit='chk', apply_selection=True)
    res = bal(ds)
    assert_equal(get_nelements_per_value(res.fa.one).values(),
                 [4] * 2)
Exemplo n.º 46
0
def test_attrmap():
    map_default = {'eins': 0, 'zwei': 2, 'sieben': 1}
    map_custom = {'eins': 11, 'zwei': 22, 'sieben': 33}
    literal = ['eins', 'zwei', 'sieben', 'eins', 'sieben', 'eins']
    literal_nonmatching = ['uno', 'dos', 'tres']
    num_default = [0, 2, 1, 0, 1, 0]
    num_custom = [11, 22, 33, 11, 33, 11]

    # no custom mapping given
    am = AttributeMap()
    assert_false(am)
    ok_(len(am) == 0)
    assert_array_equal(am.to_numeric(literal), num_default)
    assert_array_equal(am.to_literal(num_default), literal)
    ok_(am)
    ok_(len(am) == 3)

    #
    # Tests for recursive mapping + preserving datatype
    class myarray(np.ndarray):
        pass

    assert_raises(KeyError, am.to_literal, [(1, 2), 2, 0])
    literal_fancy = [(1, 2), 2, [0], np.array([0, 1]).view(myarray)]
    literal_fancy_tuple = tuple(literal_fancy)
    literal_fancy_array = np.array(literal_fancy, dtype=object)

    for l in (literal_fancy, literal_fancy_tuple,
              literal_fancy_array):
        res = am.to_literal(l, recurse=True)
        assert_equal(res[0], ('sieben', 'zwei'))
        assert_equal(res[1], 'zwei')
        assert_equal(res[2], ['eins'])
        assert_array_equal(res[3], ['eins', 'sieben'])

        # types of result and subsequences should be preserved
        ok_(isinstance(res, l.__class__))
        ok_(isinstance(res[0], tuple))
        ok_(isinstance(res[1], str))
        ok_(isinstance(res[2], list))
        ok_(isinstance(res[3], myarray))

    # yet another example
    a = np.empty(1, dtype=object)
    a[0] = (0, 1)
    res = am.to_literal(a, recurse=True)
    ok_(isinstance(res[0], tuple))

    #
    # with custom mapping
    am = AttributeMap(map=map_custom)
    assert_array_equal(am.to_numeric(literal), num_custom)
    assert_array_equal(am.to_literal(num_custom), literal)

    # if not numeric nothing is mapped
    assert_array_equal(am.to_numeric(num_custom), num_custom)
    # even if the map doesn't fit
    assert_array_equal(am.to_numeric(num_default), num_default)

    # need to_numeric first
    am = AttributeMap()
    assert_raises(RuntimeError, am.to_literal, [1,2,3])
    # stupid args
    assert_raises(ValueError, AttributeMap, map=num_custom)

    # map mismatch
    am = AttributeMap(map=map_custom)
    if __debug__:
        # checked only in __debug__
        assert_raises(KeyError, am.to_numeric, literal_nonmatching)
    # needs reset and should work afterwards
    am.clear()
    assert_array_equal(am.to_numeric(literal_nonmatching), [2, 0, 1])
    # and now reverse
    am = AttributeMap(map=map_custom)
    assert_raises(KeyError, am.to_literal, num_default)

    # dict-like interface
    am = AttributeMap()

    ok_([(k, v) for k, v in am.iteritems()] == [])
Exemplo n.º 47
0
def test_reverse_dense_array_mapper():
    mask = np.ones((3, 2), dtype="bool")
    mask[1, 1] = 0
    map_ = mask_mapper(mask)

    rmapped = map_.reverse1(np.arange(1, 6))
    assert_equal(rmapped.shape, (3, 2))
    assert_equal(rmapped[1, 1], 0)
    assert_equal(rmapped[2, 1], 5)

    # check that it doesn't accept wrong dataspace
    assert_raises(ValueError, map_.forward, np.arange(6))

    rmapped2 = map_.reverse(np.arange(1, 11).reshape(2, 5))
    assert_equal(rmapped2.shape, (2, 3, 2))
    assert_equal(rmapped2[0, 1, 1], 0)
    assert_equal(rmapped2[1, 1, 1], 0)
    assert_equal(rmapped2[0, 2, 1], 5)
    assert_equal(rmapped2[1, 2, 1], 10)
Exemplo n.º 48
0
def test_zscore():
    """Test z-scoring transformation
    """
    # dataset: mean=2, std=1
    samples = np.array((0, 1, 3, 4, 2, 2, 3, 1, 1, 3, 3, 1, 2, 2, 2, 2)).\
        reshape((16, 1))
    data = dataset_wizard(samples.copy(), targets=range(16), chunks=[0] * 16)
    assert_equal(data.samples.mean(), 2.0)
    assert_equal(data.samples.std(), 1.0)
    data_samples = data.samples.copy()
    zscore(data, chunks_attr='chunks')

    # copy should stay intact
    assert_equal(data_samples.mean(), 2.0)
    assert_equal(data_samples.std(), 1.0)
    # we should be able to operate on ndarrays
    # But we can't change type inplace for an array, can't we?
    assert_raises(TypeError, zscore, data_samples, chunks_attr=None)
    # so lets do manually
    data_samples = data_samples.astype(float)
    zscore(data_samples, chunks_attr=None)
    assert_array_equal(data.samples, data_samples)
    print data_samples

    # check z-scoring
    check = np.array([-2, -1, 1, 2, 0, 0, 1, -1, -1, 1, 1, -1, 0, 0, 0, 0],
                    dtype='float64').reshape(16, 1)
    assert_array_equal(data.samples, check)

    data = dataset_wizard(samples.copy(), targets=range(16), chunks=[0] * 16)
    zscore(data, chunks_attr=None)
    assert_array_equal(data.samples, check)

    # check z-scoring taking set of labels as a baseline
    data = dataset_wizard(samples.copy(),
                   targets=[0, 2, 2, 2, 1] + [2] * 11,
                   chunks=[0] * 16)
    zscore(data, param_est=('targets', [0, 1]))
    assert_array_equal(samples, data.samples + 1.0)

    # check that zscore modifies in-place; only guaranteed if no upcasting is
    # necessary
    samples = samples.astype('float')
    data = dataset_wizard(samples,
                   targets=[0, 2, 2, 2, 1] + [2] * 11,
                   chunks=[0] * 16)
    zscore(data, param_est=('targets', [0, 1]))
    assert_array_equal(samples, data.samples)

    # these might be duplicating code above -- but twice is better than nothing

    # dataset: mean=2, std=1
    raw = np.array((0, 1, 3, 4, 2, 2, 3, 1, 1, 3, 3, 1, 2, 2, 2, 2))
    # dataset: mean=12, std=1
    raw2 = np.array((0, 1, 3, 4, 2, 2, 3, 1, 1, 3, 3, 1, 2, 2, 2, 2)) + 10
    # zscore target
    check = [-2, -1, 1, 2, 0, 0, 1, -1, -1, 1, 1, -1, 0, 0, 0, 0]

    ds = dataset_wizard(raw.copy(), targets=range(16), chunks=[0] * 16)
    pristine = dataset_wizard(raw.copy(), targets=range(16), chunks=[0] * 16)

    zm = ZScoreMapper()
    # should do global zscore by default
    zm.train(ds)                        # train
    assert_array_almost_equal(zm.forward(ds), np.transpose([check]))
    # should not modify the source
    assert_array_equal(pristine, ds)

    # if we tell it a different mean it should obey the order
    zm = ZScoreMapper(params=(3,1))
    zm.train(ds)
    assert_array_almost_equal(zm.forward(ds), np.transpose([check]) - 1 )
    assert_array_equal(pristine, ds)

    # let's look at chunk-wise z-scoring
    ds = dataset_wizard(np.hstack((raw.copy(), raw2.copy())),
                        targets=range(32),
                        chunks=[0] * 16 + [1] * 16)
    # by default chunk-wise
    zm = ZScoreMapper()
    zm.train(ds)                        # train
    assert_array_almost_equal(zm.forward(ds), np.transpose([check + check]))
    # we should be able to do that same manually
    zm = ZScoreMapper(params={0: (2,1), 1: (12,1)})
    zm.train(ds)                        # train
    assert_array_almost_equal(zm.forward(ds), np.transpose([check + check]))
Exemplo n.º 49
0
def test_flatten():
    samples_shape = (2, 2, 4)
    data_shape = (4, ) + samples_shape
    data = np.arange(np.prod(data_shape)).reshape(data_shape).view(myarray)
    pristinedata = data.copy()
    target = [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
              [16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31],
              [32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47],
              [48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63]]
    target = np.array(target).view(myarray)
    index_target = np.array([[0, 0, 0], [0, 0, 1], [0, 0, 2], [0, 0, 3],
                             [0, 1, 0], [0, 1, 1], [0, 1, 2], [0, 1, 3],
                             [1, 0, 0], [1, 0, 1], [1, 0, 2], [1, 0, 3],
                             [1, 1, 0], [1, 1, 1], [1, 1, 2], [1, 1, 3]])

    # array subclass survives
    ok_(isinstance(data, myarray))

    # actually, there should be no difference between a plain FlattenMapper and
    # a chain that only has a FlattenMapper as the one element
    for fm in [
            FlattenMapper(inspace='voxel'),
            ChainMapper([
                FlattenMapper(inspace='voxel'),
                FeatureSliceMapper(slice(None))
            ])
    ]:
        # not working if untrained
        assert_raises(RuntimeError, fm.forward1,
                      np.arange(np.sum(samples_shape) + 1))

        fm.train(data)

        ok_(isinstance(fm.forward(data), myarray))
        ok_(isinstance(fm.forward1(data[2]), myarray))
        assert_array_equal(fm.forward(data), target)
        assert_array_equal(fm.forward1(data[2]), target[2])
        assert_raises(ValueError, fm.forward, np.arange(4))

        # all of that leaves that data unmodified
        assert_array_equal(data, pristinedata)

        # reverse mapping
        ok_(isinstance(fm.reverse(target), myarray))
        ok_(isinstance(fm.reverse1(target[0]), myarray))
        ok_(isinstance(fm.reverse(target[1:2]), myarray))
        assert_array_equal(fm.reverse(target), data)
        assert_array_equal(fm.reverse1(target[0]), data[0])
        assert_array_equal(fm.reverse(target[1:2]), data[1:2])
        assert_raises(ValueError, fm.reverse, np.arange(14))

        # check one dimensional data, treated as scalar samples
        oned = np.arange(5)
        fm.train(Dataset(oned))
        # needs 2D
        assert_raises(ValueError, fm.forward, oned)
        # doesn't match mapper, since Dataset turns `oned` into (5,1)
        assert_raises(ValueError, fm.forward, oned)
        assert_equal(Dataset(oned).nfeatures, 1)

        # try dataset mode, with some feature attribute
        fattr = np.arange(np.prod(samples_shape)).reshape(samples_shape)
        ds = Dataset(data, fa={'awesome': fattr.copy()})
        assert_equal(ds.samples.shape, data_shape)
        fm.train(ds)
        dsflat = fm.forward(ds)
        ok_(isinstance(dsflat, Dataset))
        ok_(isinstance(dsflat.samples, myarray))
        assert_array_equal(dsflat.samples, target)
        assert_array_equal(dsflat.fa.awesome,
                           np.arange(np.prod(samples_shape)))
        assert_true(isinstance(dsflat.fa['awesome'], ArrayCollectable))
        # test index creation
        assert_array_equal(index_target, dsflat.fa.voxel)

        # and back
        revds = fm.reverse(dsflat)
        ok_(isinstance(revds, Dataset))
        ok_(isinstance(revds.samples, myarray))
        assert_array_equal(revds.samples, data)
        assert_array_equal(revds.fa.awesome, fattr)
        assert_true(isinstance(revds.fa['awesome'], ArrayCollectable))
        assert_false('voxel' in revds.fa)
Exemplo n.º 50
0
def test_basic_collectable():
    c = Collectable()

    # empty by default
    assert_equal(c.name, None)
    assert_equal(c.value, None)
    assert_equal(c.__doc__, None)

    # late assignment
    c.name = 'somename'
    c.value = 12345
    assert_equal(c.name, 'somename')
    assert_equal(c.value, 12345)

    # immediate content
    c = Collectable('value', 'myname', "This is a test")
    assert_equal(c.name, 'myname')
    assert_equal(c.value, 'value')
    assert_equal(c.__doc__, "This is a test")
    assert_equal(str(c), 'myname')

    # repr
    e = eval(repr(c))
    assert_equal(e.name, 'myname')
    assert_equal(e.value, 'value')
    assert_equal(e.__doc__, "This is a test")

    # shallow copy does not create a view of value array
    c.value = np.arange(5)
    d = copy.copy(c)
    assert_false(d.value.base is c.value)

    # names starting with _ are not allowed
    assert_raises(ValueError, c._set_name, "_underscore")