Exemplo n.º 1
0
 def test_basic(self):
     dataset = data_generators.linear1d_gaussian_noise()
     k = GeneralizedLinearKernel()
     clf = GPR(k)
     clf.train(dataset)
     y = clf.predict(dataset.samples)
     assert_array_equal(y.shape, dataset.targets.shape)
Exemplo n.º 2
0
    def test_simple_n_minus_one_cv(self):
        data = get_mv_pattern(3)
        data.init_origids('samples')

        self.failUnless(data.nsamples == 120)
        self.failUnless(data.nfeatures == 2)
        self.failUnless(
            (data.sa.targets == \
                [0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0] * 6).all())
        self.failUnless(
            (data.sa.chunks == \
                [k for k in range(1, 7) for i in range(20)]).all())
        assert_equal(len(np.unique(data.sa.origids)), data.nsamples)

        transerror = TransferError(sample_clf_nl)
        cv = CrossValidatedTransferError(
            transerror,
            NFoldSplitter(cvtype=1),
            enable_ca=['confusion', 'training_confusion', 'samples_error'])

        results = cv(data)
        self.failUnless((results.samples < 0.2).all()
                        and (results.samples >= 0.0).all())

        # TODO: test accessibility of {training_,}confusion{,s} of
        # CrossValidatedTransferError

        self.failUnless(isinstance(cv.ca.samples_error, dict))
        self.failUnless(len(cv.ca.samples_error) == data.nsamples)
        # one value for each origid
        assert_array_equal(sorted(cv.ca.samples_error.keys()),
                           sorted(data.sa.origids))
        for k, v in cv.ca.samples_error.iteritems():
            self.failUnless(len(v) == 1)
Exemplo n.º 3
0
def test_sphere_scaled():
    s1 = ne.Sphere(3)
    s = ne.Sphere(3, element_sizes=(1, 1))

    # Should give exactly the same results since element_sizes are 1s
    for p in ((0, 0), (-23, 1)):
        assert_array_equal(s1(p), s(p))
        ok_(len(s(p)) == len(set(s(p))))

    # Raise exception if query dimensionality does not match element_sizes
    assert_raises(ValueError, s, (1, ))

    s = ne.Sphere(3, element_sizes=(1.5, 2))
    assert_array_equal(s((0, 0)), [(-2, 0), (-1, -1), (-1, 0), (-1, 1),
                                   (0, -1), (0, 0), (0, 1), (1, -1), (1, 0),
                                   (1, 1), (2, 0)])

    s = ne.Sphere(1.5, element_sizes=(1.5, 1.5, 1.5))
    res = s((0, 0, 0))
    ok_(np.all([np.sqrt(np.sum(np.array(x)**2)) <= 1.5 for x in res]))
    ok_(len(res) == 7)

    # all neighbors so no more than 1 voxel away -- just a cube, for
    # some "sphere" effect radius had to be 3.0 ;)
    td = np.sqrt(3 * 1.5**2)
    s = ne.Sphere(td, element_sizes=(1.5, 1.5, 1.5))
    res = s((0, 0, 0))
    ok_(np.all([np.sqrt(np.sum(np.array(x)**2)) <= td for x in res]))
    ok_(np.all([np.sum(np.abs(x) > 1) == 0 for x in res]))
    ok_(len(res) == 27)
Exemplo n.º 4
0
    def test_simple_n_minus_one_cv(self):
        data = get_mv_pattern(3)
        data.init_origids('samples')

        self.failUnless( data.nsamples == 120 )
        self.failUnless( data.nfeatures == 2 )
        self.failUnless(
            (data.sa.targets == \
                [0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0] * 6).all())
        self.failUnless(
            (data.sa.chunks == \
                [k for k in range(1, 7) for i in range(20)]).all())
        assert_equal(len(np.unique(data.sa.origids)), data.nsamples)

        transerror = TransferError(sample_clf_nl)
        cv = CrossValidatedTransferError(
                transerror,
                NFoldSplitter(cvtype=1),
                enable_ca=['confusion', 'training_confusion',
                               'samples_error'])

        results = cv(data)
        self.failUnless((results.samples < 0.2).all() and (results.samples >= 0.0).all())

        # TODO: test accessibility of {training_,}confusion{,s} of
        # CrossValidatedTransferError

        self.failUnless(isinstance(cv.ca.samples_error, dict))
        self.failUnless(len(cv.ca.samples_error) == data.nsamples)
        # one value for each origid
        assert_array_equal(sorted(cv.ca.samples_error.keys()),
                           sorted(data.sa.origids))
        for k, v in cv.ca.samples_error.iteritems():
            self.failUnless(len(v) == 1)
Exemplo n.º 5
0
 def test_basic(self):
     dataset = data_generators.linear1d_gaussian_noise()
     k = GeneralizedLinearKernel()
     clf = GPR(k)
     clf.train(dataset)
     y = clf.predict(dataset.samples)
     assert_array_equal(y.shape, dataset.targets.shape)
Exemplo n.º 6
0
def test_sphere_scaled():
    s1 = ne.Sphere(3)
    s = ne.Sphere(3, element_sizes=(1, 1))

    # Should give exactly the same results since element_sizes are 1s
    for p in ((0, 0), (-23, 1)):
        assert_array_equal(s1(p), s(p))
        ok_(len(s(p)) == len(set(s(p))))

    # Raise exception if query dimensionality does not match element_sizes
    assert_raises(ValueError, s, (1,))

    s = ne.Sphere(3, element_sizes=(1.5, 2))
    assert_array_equal(s((0, 0)),
                       [(-2, 0), (-1, -1), (-1, 0), (-1, 1),
                        (0, -1), (0, 0), (0, 1),
                        (1, -1), (1, 0), (1, 1), (2, 0)])

    s = ne.Sphere(1.5, element_sizes=(1.5, 1.5, 1.5))
    res = s((0, 0, 0))
    ok_(np.all([np.sqrt(np.sum(np.array(x)**2)) <= 1.5 for x in res]))
    ok_(len(res) == 7)

    # all neighbors so no more than 1 voxel away -- just a cube, for
    # some "sphere" effect radius had to be 3.0 ;)
    td = np.sqrt(3*1.5**2)
    s = ne.Sphere(td, element_sizes=(1.5, 1.5, 1.5))
    res = s((0, 0, 0))
    ok_(np.all([np.sqrt(np.sum(np.array(x)**2)) <= td for x in res]))
    ok_(np.all([np.sum(np.abs(x) > 1) == 0 for x in res]))
    ok_(len(res) == 27)
Exemplo n.º 7
0
    def test_aggregation(self):
        data = dataset_wizard(np.arange( 20 ).reshape((4, 5)), targets=1, chunks=1)

        ag_data = aggregate_features(data, np.mean)

        ok_(ag_data.nsamples == 4)
        ok_(ag_data.nfeatures == 1)
        assert_array_equal(ag_data.samples[:, 0], [2, 7, 12, 17])
Exemplo n.º 8
0
 def test_size_random_prototypes(self):
     self.build_vector_based_pm()
     fraction = 0.5
     prototype_number = max(int(len(self.samples)*fraction),1)
     ## debug("MAP","Generating "+str(prototype_number)+" random prototypes.")
     self.prototypes2 = np.array(random.sample(self.samples, prototype_number))
     self.pm2 = PrototypeMapper(similarities=self.similarities, prototypes=self.prototypes2)
     self.pm2.train(self.samples)
     assert_array_equal(self.pm2.proj.shape, (self.samples.shape[0], self.pm2.prototypes.shape[0]*len(self.similarities)))
Exemplo n.º 9
0
 def test_size_random_prototypes(self):
     self.build_vector_based_pm()
     fraction = 0.5
     prototype_number = max(int(len(self.samples)*fraction),1)
     ## debug("MAP","Generating "+str(prototype_number)+" random prototypes.")
     self.prototypes2 = np.array(random.sample(self.samples, prototype_number))
     self.pm2 = PrototypeMapper(similarities=self.similarities, prototypes=self.prototypes2)
     self.pm2.train(self.samples)
     assert_array_equal(self.pm2.proj.shape, (self.samples.shape[0], self.pm2.prototypes.shape[0]*len(self.similarities)))
Exemplo n.º 10
0
def test_sphere():
    # test sphere initialization
    s = ne.Sphere(1)
    center0 = (0, 0, 0)
    center1 = (1, 1, 1)
    assert_equal(len(s(center0)), 7)
    target = array([array([-1,  0,  0]),
              array([ 0, -1,  0]),
              array([ 0,  0, -1]),
              array([0, 0, 0]),
              array([0, 0, 1]),
              array([0, 1, 0]),
              array([1, 0, 0])])
    # test of internals -- no recomputation of increments should be done
    prev_increments = s._increments
    assert_array_equal(s(center0), target)
    ok_(prev_increments is s._increments)
    # query lower dimensionality
    _ = s((0, 0))
    ok_(not prev_increments is s._increments)

    # test Sphere call
    target = [array([0, 1, 1]),
              array([1, 0, 1]),
              array([1, 1, 0]),
              array([1, 1, 1]),
              array([1, 1, 2]),
              array([1, 2, 1]),
              array([2, 1, 1])]
    res = s(center1)
    assert_array_equal(array(res), target)
    # They all should be tuples
    ok_(np.all([isinstance(x, tuple) for x in res]))

    # test for larger diameter
    s = ne.Sphere(4)
    assert_equal(len(s(center1)), 257)

    # test extent keyword
    #s = ne.Sphere(4,extent=(1,1,1))
    #assert_array_equal(array(s((0,0,0))), array([[0,0,0]]))

    # test Errors during initialisation and call
    #assert_raises(ValueError, ne.Sphere, 2)
    #assert_raises(ValueError, ne.Sphere, 1.0)

    # no longer extent available
    assert_raises(TypeError, ne.Sphere, 1, extent=(1))
    assert_raises(TypeError, ne.Sphere, 1, extent=(1.0, 1.0, 1.0))

    s = ne.Sphere(1)
    #assert_raises(ValueError, s, (1))
    if __debug__:
        # No float coordinates allowed for now...
        # XXX might like to change that ;)
        # 
        assert_raises(ValueError, s, (1.0, 1.0, 1.0))
Exemplo n.º 11
0
def test_partitionmapper():
    ds = give_data()
    oep = OddEvenPartitioner()
    parts = list(oep.generate(ds))
    assert_equal(len(parts), 2)
    for i, p in enumerate(parts):
        assert_array_equal(p.sa['partitions'].unique, [1, 2])
        assert_equal(p.a.partitions_set, i)
        assert_equal(len(p), len(ds))
Exemplo n.º 12
0
    def test_aggregation(self):
        data = dataset_wizard(np.arange(20).reshape((4, 5)),
                              targets=1,
                              chunks=1)

        ag_data = aggregate_features(data, np.mean)

        ok_(ag_data.nsamples == 4)
        ok_(ag_data.nfeatures == 1)
        assert_array_equal(ag_data.samples[:, 0], [2, 7, 12, 17])
Exemplo n.º 13
0
def test_chainmapper():
    # the chain needs at lest one mapper
    assert_raises(ValueError, ChainMapper, [])
    # a typical first mapper is to flatten
    cm = ChainMapper([FlattenMapper()])

    # few container checks
    assert_equal(len(cm), 1)
    assert_true(isinstance(cm[0], FlattenMapper))

    # now training
    # come up with data
    samples_shape = (2, 2, 4)
    data_shape = (4,) + samples_shape
    data = np.arange(np.prod(data_shape)).reshape(data_shape)
    pristinedata = data.copy()
    target = [
        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
        [16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31],
        [32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47],
        [48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63],
    ]
    target = np.array(target)

    # if it is not trained it knows nothing
    cm.train(data)

    # a new mapper should appear when doing feature selection
    cm.append(FeatureSliceMapper(range(1, 16)))
    assert_equal(cm.forward1(data[0]).shape, (15,))
    assert_equal(len(cm), 2)
    # multiple slicing
    cm.append(FeatureSliceMapper([9, 14]))
    assert_equal(cm.forward1(data[0]).shape, (2,))
    assert_equal(len(cm), 3)

    # check reproduction
    cm_clone = eval(repr(cm))
    assert_equal(repr(cm_clone), repr(cm))

    # what happens if we retrain the whole beast an same data as before
    cm.train(data)
    assert_equal(cm.forward1(data[0]).shape, (2,))
    assert_equal(len(cm), 3)

    # let's map something
    mdata = cm.forward(data)
    assert_array_equal(mdata, target[:, [10, 15]])
    # and back
    rdata = cm.reverse(mdata)
    # original shape
    assert_equal(rdata.shape, data.shape)
    # content as far it could be restored
    assert_array_equal(rdata[rdata > 0], data[rdata > 0])
    assert_equal(np.sum(rdata > 0), 8)
Exemplo n.º 14
0
def test_attrpermute():
    ds = give_data()
    ds.sa['ids'] = range(len(ds))
    pristine_data = ds.samples.copy()
    permutation = AttributePermutator(['targets', 'ids'], assure=True)
    pds = permutation(ds)
    # should not touch the data
    assert_array_equal(pristine_data, pds.samples)
    # even keep the very same array
    assert_true(pds.samples.base is ds.samples)
    # there is no way that it can be the same attribute
    assert_false(np.all(pds.sa.ids == ds.sa.ids))
    # ids should reflect permutation setup
    assert_array_equal(pds.sa.targets, ds.sa.targets[pds.sa.ids])
    # other attribute should remain intact
    assert_array_equal(pds.sa.chunks, ds.sa.chunks)

    # now chunk-wise permutation
    permutation = AttributePermutator('ids', limit='chunks')
    pds = permutation(ds)
    # first ten should remain first ten
    assert_false(np.any(pds.sa.ids[:10] > 9))

    # same thing, but only permute single chunk
    permutation = AttributePermutator('ids', limit={'chunks': 3})
    pds = permutation(ds)
    # one chunk should change
    assert_false(np.any(pds.sa.ids[30:40] > 39))
    assert_false(np.any(pds.sa.ids[30:40] < 30))
    # the rest not
    assert_array_equal(pds.sa.ids[:30], range(30))

    # or a list of chunks
    permutation = AttributePermutator('ids', limit={'chunks': [3,4]})
    pds = permutation(ds)
    # two chunks should change
    assert_false(np.any(pds.sa.ids[30:50] > 49))
    assert_false(np.any(pds.sa.ids[30:50] < 30))
    # the rest not
    assert_array_equal(pds.sa.ids[:30], range(30))

    # and now try generating more permutations
    nruns = 2
    permutation = AttributePermutator(['targets', 'ids'], assure=True, count=nruns)
    pds = list(permutation.generate(ds))
    assert_equal(len(pds), nruns)
    for p in pds:
        assert_false(np.all(p.sa.ids == ds.sa.ids))

    # permute feature attrs
    ds.fa['ids'] = range(ds.shape[1])
    permutation = AttributePermutator('fa.ids', assure=True)
    pds = permutation(ds)
    assert_false(np.all(pds.fa.ids == ds.fa.ids))
Exemplo n.º 15
0
def test_chainmapper():
    # the chain needs at lest one mapper
    assert_raises(ValueError, ChainMapper, [])
    # a typical first mapper is to flatten
    cm = ChainMapper([FlattenMapper()])

    # few container checks
    assert_equal(len(cm), 1)
    assert_true(isinstance(cm[0], FlattenMapper))

    # now training
    # come up with data
    samples_shape = (2, 2, 4)
    data_shape = (4, ) + samples_shape
    data = np.arange(np.prod(data_shape)).reshape(data_shape)
    pristinedata = data.copy()
    target = [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
              [16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31],
              [32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47],
              [48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63]]
    target = np.array(target)

    # if it is not trained it knows nothing
    cm.train(data)

    # a new mapper should appear when doing feature selection
    cm.append(FeatureSliceMapper(range(1, 16)))
    assert_equal(cm.forward1(data[0]).shape, (15, ))
    assert_equal(len(cm), 2)
    # multiple slicing
    cm.append(FeatureSliceMapper([9, 14]))
    assert_equal(cm.forward1(data[0]).shape, (2, ))
    assert_equal(len(cm), 3)

    # check reproduction
    cm_clone = eval(repr(cm))
    assert_equal(repr(cm_clone), repr(cm))

    # what happens if we retrain the whole beast an same data as before
    cm.train(data)
    assert_equal(cm.forward1(data[0]).shape, (2, ))
    assert_equal(len(cm), 3)

    # let's map something
    mdata = cm.forward(data)
    assert_array_equal(mdata, target[:, [10, 15]])
    # and back
    rdata = cm.reverse(mdata)
    # original shape
    assert_equal(rdata.shape, data.shape)
    # content as far it could be restored
    assert_array_equal(rdata[rdata > 0], data[rdata > 0])
    assert_equal(np.sum(rdata > 0), 8)
Exemplo n.º 16
0
    def test_streamline_equal_mapper(self):
        self.build_streamline_things()

        self.prototypes_equal = self.dataset.samples
        self.pm = PrototypeMapper(similarities=self.similarities,
                                  prototypes=self.prototypes_equal,
                                  demean=False)
        self.pm.train(self.dataset.samples)
        ## debug("MAP","projected data: "+str(self.pm.proj))
        # check size:
        assert_array_equal(self.pm.proj.shape, (len(self.dataset.samples), len(self.prototypes_equal)*len(self.similarities)))
        # test symmetry
        assert_array_almost_equal(self.pm.proj, self.pm.proj.T)
Exemplo n.º 17
0
    def test_streamline_equal_mapper(self):
        self.build_streamline_things()

        self.prototypes_equal = self.dataset.samples
        self.pm = PrototypeMapper(similarities=self.similarities,
                                  prototypes=self.prototypes_equal,
                                  demean=False)
        self.pm.train(self.dataset.samples)
        ## debug("MAP","projected data: "+str(self.pm.proj))
        # check size:
        assert_array_equal(self.pm.proj.shape, (len(self.dataset.samples), len(self.prototypes_equal)*len(self.similarities)))
        # test symmetry
        assert_array_almost_equal(self.pm.proj, self.pm.proj.T)
Exemplo n.º 18
0
def test_glmnet_c():
    # define binary prob
    data = datasets['dumb2']

    # use GLMNET on binary problem
    clf = GLMNET_C()
    clf.ca.enable('estimates')

    clf.train(data)

    # test predictions
    pre = clf.predict(data.samples)

    assert_array_equal(pre, data.targets)
Exemplo n.º 19
0
def test_glmnet_state():
    #data = datasets['dumb2']
    # for some reason the R code fails with the dumb data
    data = datasets['chirp_linear']

    clf = GLMNET_R()

    clf.train(data)

    clf.ca.enable('predictions')

    p = clf.predict(data.samples)

    assert_array_equal(p, clf.ca.predictions)
Exemplo n.º 20
0
    def test_custom_split(self):
        #simulate half splitter
        hs = CustomPartitioner([(None,[0,1,2,3,4]),(None,[5,6,7,8,9])])
        spl = Splitter(attr='partitions')
        splits = [ list(spl.generate(p)) for p in hs.generate(self.data) ]
        self.failUnless(len(splits) == 2)

        for i,p in enumerate(splits):
            self.failUnless( len(p) == 2 )
            self.failUnless( p[0].nsamples == 50 )
            self.failUnless( p[1].nsamples == 50 )

        assert_array_equal(splits[0][1].sa['chunks'].unique, [0, 1, 2, 3, 4])
        assert_array_equal(splits[0][0].sa['chunks'].unique, [5, 6, 7, 8, 9])
        assert_array_equal(splits[1][1].sa['chunks'].unique, [5, 6, 7, 8, 9])
        assert_array_equal(splits[1][0].sa['chunks'].unique, [0, 1, 2, 3, 4])


        # check fully customized split with working and validation set specified
        cs = CustomPartitioner([([0,3,4],[5,9])])
        # we want to discared the unselected partition of the data, hence attr_value
        spl = Splitter(attr='partitions', attr_values=[1,2])
        splits = [ list(spl.generate(p)) for p in cs.generate(self.data) ]
        self.failUnless(len(splits) == 1)

        for i,p in enumerate(splits):
            self.failUnless( len(p) == 2 )
            self.failUnless( p[0].nsamples == 30 )
            self.failUnless( p[1].nsamples == 20 )

        self.failUnless((splits[0][1].sa['chunks'].unique == [5, 9]).all())
        self.failUnless((splits[0][0].sa['chunks'].unique == [0, 3, 4]).all())
Exemplo n.º 21
0
def test_forward_dense_array_mapper():
    mask = np.ones((3, 2), dtype="bool")
    map_ = mask_mapper(mask)

    # test shape reports
    assert_equal(map_.forward1(mask).shape, (6,))

    # test 1sample mapping
    assert_array_equal(map_.forward1(np.arange(6).reshape(3, 2)), [0, 1, 2, 3, 4, 5])

    # test 4sample mapping
    foursample = map_.forward(np.arange(24).reshape(4, 3, 2))
    assert_array_equal(
        foursample, [[0, 1, 2, 3, 4, 5], [6, 7, 8, 9, 10, 11], [12, 13, 14, 15, 16, 17], [18, 19, 20, 21, 22, 23]]
    )

    # check incomplete masks
    mask[1, 1] = 0
    map_ = mask_mapper(mask)
    assert_equal(map_.forward1(mask).shape, (5,))
    assert_array_equal(map_.forward1(np.arange(6).reshape(3, 2)), [0, 1, 2, 4, 5])

    # check that it doesn't accept wrong dataspace
    assert_raises(ValueError, map_.forward, np.arange(4).reshape(2, 2))

    # check fail if neither mask nor shape
    assert_raises(ValueError, mask_mapper)

    # check that a full mask is automatically created when providing shape
    m = mask_mapper(shape=(2, 3, 4))
    mp = m.forward1(np.arange(24).reshape(2, 3, 4))
    assert_array_equal(mp, np.arange(24))
Exemplo n.º 22
0
def test_glmnet_state():
    #data = datasets['dumb2']
    # for some reason the R code fails with the dumb data
    data = datasets['chirp_linear']

    clf = GLMNET_R()

    clf.train(data)

    clf.ca.enable('predictions')

    p = clf.predict(data.samples)

    assert_array_equal(p, clf.ca.predictions)
Exemplo n.º 23
0
def test_glmnet_c():
    # define binary prob
    data = datasets['dumb2']

    # use GLMNET on binary problem
    clf = GLMNET_C()
    clf.ca.enable('estimates')

    clf.train(data)

    # test predictions
    pre = clf.predict(data.samples)

    assert_array_equal(pre, data.targets)
Exemplo n.º 24
0
    def test_streamline_random_mapper(self):
        self.build_streamline_things()

        # Adding one more similarity to test multiple similarities in the streamline case:
        self.similarities.append(StreamlineSimilarity(distance=corouge))

        fraction = 0.5
        prototype_number = max(int(len(self.dataset.samples)*fraction),1)
        ## debug("MAP","Generating "+str(prototype_number)+" random prototypes.")
        self.prototypes_random = self.dataset.samples[np.random.permutation(self.dataset.samples.size)][:prototype_number]
        ## debug("MAP","prototypes: "+str(self.prototypes_random))

        self.pm = PrototypeMapper(similarities=self.similarities, prototypes=self.prototypes_random, demean=False)
        self.pm.train(self.dataset.samples) # , fraction=1.0)
        # test size:
        assert_array_equal(self.pm.proj.shape, (len(self.dataset.samples), len(self.prototypes_random)*len(self.similarities)))
Exemplo n.º 25
0
    def test_streamline_random_mapper(self):
        self.build_streamline_things()

        # Adding one more similarity to test multiple similarities in the streamline case:
        self.similarities.append(StreamlineSimilarity(distance=corouge))

        fraction = 0.5
        prototype_number = max(int(len(self.dataset.samples)*fraction),1)
        ## debug("MAP","Generating "+str(prototype_number)+" random prototypes.")
        self.prototypes_random = self.dataset.samples[np.random.permutation(self.dataset.samples.size)][:prototype_number]
        ## debug("MAP","prototypes: "+str(self.prototypes_random))

        self.pm = PrototypeMapper(similarities=self.similarities, prototypes=self.prototypes_random, demean=False)
        self.pm.train(self.dataset.samples) # , fraction=1.0)
        # test size:
        assert_array_equal(self.pm.proj.shape, (len(self.dataset.samples), len(self.prototypes_random)*len(self.similarities)))
Exemplo n.º 26
0
    def test_label_splitter(self):
        oes = OddEvenSplitter(attr='targets')

        splits = [ (first, second) for (first, second) in oes(self.data) ]

        assert_array_equal(splits[0][0].sa['targets'].unique, [0,2])
        assert_array_equal(splits[0][1].sa['targets'].unique, [1,3])
        assert_array_equal(splits[1][0].sa['targets'].unique, [1,3])
        assert_array_equal(splits[1][1].sa['targets'].unique, [0,2])
Exemplo n.º 27
0
def test_splitter():
    ds = give_data()
    # split with defaults
    spl1 = Splitter('chunks')
    assert_raises(NotImplementedError, spl1, ds)

    splits = list(spl1.generate(ds))
    assert_equal(len(splits), len(ds.sa['chunks'].unique))

    for split in splits:
        # it should have perform basic slicing!
        assert_true(split.samples.base is ds.samples)
        assert_equal(len(split.sa['chunks'].unique), 1)
        assert_true('lastsplit' in split.a)
    assert_true(splits[-1].a.lastsplit)

    # now again, more customized
    spl2 = Splitter('targets', attr_values = [0,1,1,2,3,3,3], count=4,
                   noslicing=True)
    splits = list(spl2.generate(ds))
    assert_equal(len(splits), 4)
    for split in splits:
        # it should NOT have perform basic slicing!
        assert_false(split.samples.base is ds.samples)
        assert_equal(len(split.sa['targets'].unique), 1)
        assert_equal(len(split.sa['chunks'].unique), 10)
    assert_true(splits[-1].a.lastsplit)

    # two should be identical
    assert_array_equal(splits[1].samples, splits[2].samples)

    # now go wild and split by feature attribute
    ds.fa['roi'] = np.repeat([0,1], 5)
    # splitter should auto-detect that this is a feature attribute
    spl3 = Splitter('roi')
    splits = list(spl3.generate(ds))
    assert_equal(len(splits), 2)
    for split in splits:
        assert_true(split.samples.base is ds.samples)
        assert_equal(len(split.fa['roi'].unique), 1)
        assert_equal(split.shape, (100, 5))

    # and finally test chained splitters
    cspl = ChainNode([spl2, spl3, spl1])
    splits = list(cspl.generate(ds))
    # 4 target splits and 2 roi splits each and 10 chunks each
    assert_equal(len(splits), 80)
Exemplo n.º 28
0
    def test_label_splitter(self):
        oes = OddEvenSplitter(attr='targets')

        splits = [(first, second) for (first, second) in oes(self.data)]

        assert_array_equal(splits[0][0].sa['targets'].unique, [0, 2])
        assert_array_equal(splits[0][1].sa['targets'].unique, [1, 3])
        assert_array_equal(splits[1][0].sa['targets'].unique, [1, 3])
        assert_array_equal(splits[1][1].sa['targets'].unique, [0, 2])
Exemplo n.º 29
0
def test_sifter():
    # somewhat duplicating the doctest
    ds = Dataset(samples=np.arange(8).reshape((4,2)),
                 sa={'chunks':   [ 0 ,  1 ,  2 ,  3 ],
                     'targets':  ['c', 'c', 'p', 'p']})
    par = ChainNode([NFoldPartitioner(cvtype=2, attr='chunks'),
                     Sifter([('partitions', 2),
                             ('targets', ['c', 'p'])])
                     ])
    dss = list(par.generate(ds))
    assert_equal(len(dss), 4)
    for ds_ in dss:
        testing = ds[ds_.sa.partitions == 2]
        assert_array_equal(np.unique(testing.sa.targets), ['c', 'p'])
        # and we still have both targets  present in training
        training = ds[ds_.sa.partitions == 1]
        assert_array_equal(np.unique(training.sa.targets), ['c', 'p'])
Exemplo n.º 30
0
def test_collections():
    sa = SampleAttributesCollection()
    assert_equal(len(sa), 0)

    assert_raises(ValueError, sa.__setitem__, 'test', 0)
    l = range(5)
    sa['test'] = l
    # auto-wrapped
    assert_true(isinstance(sa['test'], ArrayCollectable))
    assert_equal(len(sa), 1)

    # names which are already present in dict interface
    assert_raises(ValueError, sa.__setitem__, 'values', range(5))

    sa_c = copy.deepcopy(sa)
    assert_equal(len(sa), len(sa_c))
    assert_array_equal(sa.test, sa_c.test)
Exemplo n.º 31
0
def test_collections():
    sa = SampleAttributesCollection()
    assert_equal(len(sa), 0)

    assert_raises(ValueError, sa.__setitem__, 'test', 0)
    l = range(5)
    sa['test'] = l
    # auto-wrapped
    assert_true(isinstance(sa['test'], ArrayCollectable))
    assert_equal(len(sa), 1)

    # names which are already present in dict interface
    assert_raises(ValueError, sa.__setitem__, 'values', range(5))

    sa_c = copy.deepcopy(sa)
    assert_equal(len(sa), len(sa_c))
    assert_array_equal(sa.test, sa_c.test)
Exemplo n.º 32
0
    def test_label_splitter(self):
        oes = OddEvenPartitioner(attr='targets')
        spl = Splitter(attr='partitions')

        splits = [ list(spl.generate(p)) for p in oes.generate(self.data) ]

        assert_array_equal(splits[0][0].sa['targets'].unique, [0,2])
        assert_array_equal(splits[0][1].sa['targets'].unique, [1,3])
        assert_array_equal(splits[1][0].sa['targets'].unique, [1,3])
        assert_array_equal(splits[1][1].sa['targets'].unique, [0,2])
Exemplo n.º 33
0
def test_balancer():
    ds = give_data()
    # only mark the selection in an attribute
    bal = Balancer()
    res = bal(ds)
    # we get a new dataset, with shared samples
    assert_false(ds is res)
    assert_true(ds.samples is res.samples.base)
    # should kick out 2 samples in each chunk of 10
    assert_almost_equal(np.mean(res.sa.balanced_set), 0.8)
    # same as above, but actually apply the selection
    bal = Balancer(apply_selection=True, count=5)
    # just run it once
    res = bal(ds)
    # we get a new dataset, with shared samples
    assert_false(ds is res)
    # should kick out 2 samples in each chunk of 10
    assert_equal(len(res), int(0.8 * len(ds)))
    # now use it as a generator
    dses = list(bal.generate(ds))
    assert_equal(len(dses), 5)
    # with limit
    bal = Balancer(limit={'chunks': 3}, apply_selection=True)
    res = bal(ds)
    assert_equal(res.sa['chunks'].unique, (3,))
    assert_equal(get_nelements_per_value(res.sa.targets).values(),
                 [2] * 4)
    # fixed amount
    bal = Balancer(amount=1, limit={'chunks': 3}, apply_selection=True)
    res = bal(ds)
    assert_equal(get_nelements_per_value(res.sa.targets).values(),
                 [1] * 4)
    # fraction
    bal = Balancer(amount=0.499, limit=None, apply_selection=True)
    res = bal(ds)
    assert_array_equal(
            np.round(np.array(get_nelements_per_value(ds.sa.targets).values()) * 0.5),
            np.array(get_nelements_per_value(res.sa.targets).values()))
    # check on feature attribute
    ds.fa['one'] = np.tile([1,2], 5)
    ds.fa['chk'] = np.repeat([1,2], 5)
    bal = Balancer(attr='one', amount=2, limit='chk', apply_selection=True)
    res = bal(ds)
    assert_equal(get_nelements_per_value(res.fa.one).values(),
                 [4] * 2)
Exemplo n.º 34
0
def test_subset_filler():
    sm = FeatureSliceMapper(np.arange(3))
    sm_f0 = FeatureSliceMapper(np.arange(3), filler=0)
    sm_fm1 = FeatureSliceMapper(np.arange(3), filler=-1)
    sm_fnan = FeatureSliceMapper(np.arange(3), filler=np.nan)
    data = np.arange(12).astype(float).reshape((2, -1))

    sm.train(data)
    data_forwarded = sm.forward(data)

    for m in (sm, sm_f0, sm_fm1, sm_fnan):
        m.train(data)
        assert_array_equal(data_forwarded, m.forward(data))

    data_back_fm1 = sm_fm1.reverse(data_forwarded)
    ok_(np.all(data_back_fm1[:, 3:] == -1))
    data_back_fnan = sm_fnan.reverse(data_forwarded)
    ok_(np.all(np.isnan(data_back_fnan[:, 3:])))
Exemplo n.º 35
0
def test_subset_filler():
    sm = StaticFeatureSelection(np.arange(3))
    sm_f0 = StaticFeatureSelection(np.arange(3), filler=0)
    sm_fm1 = StaticFeatureSelection(np.arange(3), filler=-1)
    sm_fnan = StaticFeatureSelection(np.arange(3), filler=np.nan)
    data = np.arange(12).astype(float).reshape((2, -1))

    sm.train(data)
    data_forwarded = sm.forward(data)

    for m in (sm, sm_f0, sm_fm1, sm_fnan):
        m.train(data)
        assert_array_equal(data_forwarded, m.forward(data))

    data_back_fm1 = sm_fm1.reverse(data_forwarded)
    ok_(np.all(data_back_fm1[:, 3:] == -1))
    data_back_fnan = sm_fnan.reverse(data_forwarded)
    ok_(np.all(np.isnan(data_back_fnan[:, 3:])))
Exemplo n.º 36
0
    def test_discarded_boundaries(self):
        ds = datasets['hollow']
        # four runs
        ds.sa['chunks'] = np.repeat(np.arange(4), 10)
        # do odd even splitting for lots of boundaries in few splits
        part = ChainNode([OddEvenPartitioner(),
                          StripBoundariesSamples('chunks', 1, 2)])

        parts = [d.samples.sid for d in part.generate(ds)]

        # both dataset should have the same samples, because the boundaries are
        # identical and the same sample should be stripped
        assert_array_equal(parts[0], parts[1])

        # we strip 3 samples per boundary
        assert_equal(len(parts[0]), len(ds) - (3 * 3))

        for i in [9, 10, 11, 19, 20, 21, 29, 30, 31]:
            assert_false(i in parts[0])
Exemplo n.º 37
0
def test_array_collectable():
    c = ArrayCollectable()

    # empty by default
    assert_equal(c.name, None)
    assert_equal(c.value, None)

    # late assignment
    c.name = 'somename'
    assert_raises(ValueError, c._set, 12345)
    assert_equal(c.value, None)
    c.value = np.arange(5)
    assert_equal(c.name, 'somename')
    assert_array_equal(c.value, np.arange(5))

    # immediate content
    data = np.random.random(size=(3,10))
    c = ArrayCollectable(data.copy(), 'myname',
                         "This is a test", length=3)
    assert_equal(c.name, 'myname')
    assert_array_equal(c.value, data)
    assert_equal(c.__doc__, "This is a test")
    assert_equal(str(c), 'myname')

    # repr
    from numpy import array
    e = eval(repr(c))
    assert_equal(e.name, 'myname')
    assert_array_almost_equal(e.value, data)
    assert_equal(e.__doc__, "This is a test")

    # cannot assign array of wrong length
    assert_raises(ValueError, c._set, np.arange(5))
    assert_equal(len(c), 3)

    # shallow copy DOES create a view of value array
    c.value = np.arange(3)
    d = copy.copy(c)
    assert_true(d.value.base is c.value)

    # names starting with _ are not allowed
    assert_raises(ValueError, c._set_name, "_underscore")
Exemplo n.º 38
0
def test_array_collectable():
    c = ArrayCollectable()

    # empty by default
    assert_equal(c.name, None)
    assert_equal(c.value, None)

    # late assignment
    c.name = 'somename'
    assert_raises(ValueError, c._set, 12345)
    assert_equal(c.value, None)
    c.value = np.arange(5)
    assert_equal(c.name, 'somename')
    assert_array_equal(c.value, np.arange(5))

    # immediate content
    data = np.random.random(size=(3, 10))
    c = ArrayCollectable(data.copy(), 'myname', "This is a test", length=3)
    assert_equal(c.name, 'myname')
    assert_array_equal(c.value, data)
    assert_equal(c.__doc__, "This is a test")
    assert_equal(str(c), 'myname')

    # repr
    from numpy import array
    e = eval(repr(c))
    assert_equal(e.name, 'myname')
    assert_array_almost_equal(e.value, data)
    assert_equal(e.__doc__, "This is a test")

    # cannot assign array of wrong length
    assert_raises(ValueError, c._set, np.arange(5))
    assert_equal(len(c), 3)

    # shallow copy DOES create a view of value array
    c.value = np.arange(3)
    d = copy.copy(c)
    assert_true(d.value.base is c.value)

    # names starting with _ are not allowed
    assert_raises(ValueError, c._set_name, "_underscore")
Exemplo n.º 39
0
def test_mapper_vs_zscore():
    """Test by comparing to results of elderly z-score function
    """
    # data: 40 sample feature line in 20d space (40x20; samples x features)
    dss = [
        dataset_wizard(np.concatenate(
            [np.arange(40) for i in range(20)]).reshape(20,-1).T,
                targets=1, chunks=1),
        ] + datasets.values()

    for ds in dss:
        ds1 = deepcopy(ds)
        ds2 = deepcopy(ds)

        zsm = ZScoreMapper(chunks_attr=None)
        assert_raises(RuntimeError, zsm.forward, ds1.samples)
        zsm.train(ds1)
        ds1z = zsm.forward(ds1.samples)

        zscore(ds2, chunks_attr=None)
        assert_array_almost_equal(ds1z, ds2.samples)
        assert_array_equal(ds1.samples, ds.samples)
Exemplo n.º 40
0
def test_mapper_vs_zscore():
    """Test by comparing to results of elderly z-score function
    """
    # data: 40 sample feature line in 20d space (40x20; samples x features)
    dss = [
        dataset_wizard(np.concatenate(
            [np.arange(40) for i in range(20)]).reshape(20,-1).T,
                targets=1, chunks=1),
        ] + datasets.values()

    for ds in dss:
        ds1 = deepcopy(ds)
        ds2 = deepcopy(ds)

        zsm = ZScoreMapper(chunks_attr=None)
        assert_raises(RuntimeError, zsm.forward, ds1.samples)
        zsm.train(ds1)
        ds1z = zsm.forward(ds1.samples)

        zscore(ds2, chunks_attr=None)
        assert_array_almost_equal(ds1z, ds2.samples)
        assert_array_equal(ds1.samples, ds.samples)
Exemplo n.º 41
0
def test_fxmapper():
    origdata = np.arange(24).reshape(3,8)
    ds = Dataset(origdata.copy())
    ds.samples *= -1

    # test a mapper that doesn't change the shape
    # it shouldn't mapper along with axis it is applied
    m_s = FxMapper('samples', np.absolute)
    m_f = FxMapper('features', np.absolute)
    a_m = absolute_features()
    assert_array_equal(m_s.forward(ds), origdata)
    assert_array_equal(a_m.forward(ds), origdata)
    assert_array_equal(m_s.forward(ds), m_f.forward(ds))
Exemplo n.º 42
0
def test_selects():
    mask = np.ones((3, 2), dtype="bool")
    mask[1, 1] = 0
    mask0 = mask.copy()
    data = np.arange(6).reshape(mask.shape)
    map_ = mask_mapper(mask)

    # check if any exception is thrown if we get
    # out of the outIds
    # assert_raises(IndexError, map_.select_out, [0,1,2,6])

    # remove 1,2
    map_.append(StaticFeatureSelection([0, 3, 4]))
    assert_array_equal(map_.forward1(data), [0, 4, 5])
    # remove 1 more
    map_.append(StaticFeatureSelection([0, 2]))
    assert_array_equal(map_.forward1(data), [0, 5])

    # check if original mask wasn't perturbed
    assert_array_equal(mask, mask0)

    # check if original mask wasn't perturbed
    assert_array_equal(mask, mask0)
Exemplo n.º 43
0
    def test_odd_even_split(self):
        oes = OddEvenSplitter()

        splits = [ (train, test) for (train, test) in oes(self.data) ]

        self.failUnless(len(splits) == 2)

        for i,p in enumerate(splits):
            self.failUnless( len(p) == 2 )
            self.failUnless( p[0].nsamples == 50 )
            self.failUnless( p[1].nsamples == 50 )

        assert_array_equal(splits[0][1].sa['chunks'].unique, [1, 3, 5, 7, 9])
        assert_array_equal(splits[0][0].sa['chunks'].unique, [0, 2, 4, 6, 8])
        assert_array_equal(splits[1][0].sa['chunks'].unique, [1, 3, 5, 7, 9])
        assert_array_equal(splits[1][1].sa['chunks'].unique, [0, 2, 4, 6, 8])

        # check if it works on pure odd and even chunk ids
        moresplits = [ (train, test) for (train, test) in oes(splits[0][0])]

        for split in moresplits:
            self.failUnless(split[0] != None)
            self.failUnless(split[1] != None)
Exemplo n.º 44
0
def test_selects():
    mask = np.ones((3,2), dtype='bool')
    mask[1,1] = 0
    mask0 = mask.copy()
    data = np.arange(6).reshape(mask.shape)
    map_ = mask_mapper(mask)

    # check if any exception is thrown if we get
    # out of the outIds
    #assert_raises(IndexError, map_.select_out, [0,1,2,6])

    # remove 1,2
    map_.append(FeatureSliceMapper([0,3,4]))
    assert_array_equal(map_.forward1(data), [0, 4, 5])
    # remove 1 more
    map_.append(FeatureSliceMapper([0,2]))
    assert_array_equal(map_.forward1(data), [0, 5])

    # check if original mask wasn't perturbed
    assert_array_equal(mask, mask0)

    # check if original mask wasn't perturbed
    assert_array_equal(mask, mask0)
Exemplo n.º 45
0
    def test_half_split(self):
        hs = HalfSplitter()

        splits = [ (train, test) for (train, test) in hs(self.data) ]

        self.failUnless(len(splits) == 2)

        for i,p in enumerate(splits):
            self.failUnless( len(p) == 2 )
            self.failUnless( p[0].nsamples == 50 )
            self.failUnless( p[1].nsamples == 50 )

        assert_array_equal(splits[0][1].sa['chunks'].unique, [0, 1, 2, 3, 4])
        assert_array_equal(splits[0][0].sa['chunks'].unique, [5, 6, 7, 8, 9])
        assert_array_equal(splits[1][1].sa['chunks'].unique, [5, 6, 7, 8, 9])
        assert_array_equal(splits[1][0].sa['chunks'].unique, [0, 1, 2, 3, 4])

        # check if it works on pure odd and even chunk ids
        moresplits = [ (train, test) for (train, test) in hs(splits[0][0])]

        for split in moresplits:
            self.failUnless(split[0] != None)
            self.failUnless(split[1] != None)
Exemplo n.º 46
0
    def test_odd_even_split(self):
        oes = OddEvenSplitter()

        splits = [(train, test) for (train, test) in oes(self.data)]

        self.failUnless(len(splits) == 2)

        for i, p in enumerate(splits):
            self.failUnless(len(p) == 2)
            self.failUnless(p[0].nsamples == 50)
            self.failUnless(p[1].nsamples == 50)

        assert_array_equal(splits[0][1].sa['chunks'].unique, [1, 3, 5, 7, 9])
        assert_array_equal(splits[0][0].sa['chunks'].unique, [0, 2, 4, 6, 8])
        assert_array_equal(splits[1][0].sa['chunks'].unique, [1, 3, 5, 7, 9])
        assert_array_equal(splits[1][1].sa['chunks'].unique, [0, 2, 4, 6, 8])

        # check if it works on pure odd and even chunk ids
        moresplits = [(train, test) for (train, test) in oes(splits[0][0])]

        for split in moresplits:
            self.failUnless(split[0] != None)
            self.failUnless(split[1] != None)
Exemplo n.º 47
0
    def test_half_split(self):
        hs = HalfSplitter()

        splits = [(train, test) for (train, test) in hs(self.data)]

        self.failUnless(len(splits) == 2)

        for i, p in enumerate(splits):
            self.failUnless(len(p) == 2)
            self.failUnless(p[0].nsamples == 50)
            self.failUnless(p[1].nsamples == 50)

        assert_array_equal(splits[0][1].sa['chunks'].unique, [0, 1, 2, 3, 4])
        assert_array_equal(splits[0][0].sa['chunks'].unique, [5, 6, 7, 8, 9])
        assert_array_equal(splits[1][1].sa['chunks'].unique, [5, 6, 7, 8, 9])
        assert_array_equal(splits[1][0].sa['chunks'].unique, [0, 1, 2, 3, 4])

        # check if it works on pure odd and even chunk ids
        moresplits = [(train, test) for (train, test) in hs(splits[0][0])]

        for split in moresplits:
            self.failUnless(split[0] != None)
            self.failUnless(split[1] != None)
Exemplo n.º 48
0
def test_forward_dense_array_mapper():
    mask = np.ones((3,2), dtype='bool')
    map_ = mask_mapper(mask)

    # test shape reports
    assert_equal(map_.forward1(mask).shape, (6,))

    # test 1sample mapping
    assert_array_equal(map_.forward1(np.arange(6).reshape(3,2)),
                       [0,1,2,3,4,5])

    # test 4sample mapping
    foursample = map_.forward(np.arange(24).reshape(4,3,2))
    assert_array_equal(foursample,
                       [[0,1,2,3,4,5],
                        [6,7,8,9,10,11],
                        [12,13,14,15,16,17],
                        [18,19,20,21,22,23]])

    # check incomplete masks
    mask[1,1] = 0
    map_ = mask_mapper(mask)
    assert_equal(map_.forward1(mask).shape, (5,))
    assert_array_equal(map_.forward1(np.arange(6).reshape(3,2)),
                       [0,1,2,4,5])

    # check that it doesn't accept wrong dataspace
    assert_raises(ValueError, map_.forward, np.arange(4).reshape(2,2))

    # check fail if neither mask nor shape
    assert_raises(ValueError, mask_mapper)

    # check that a full mask is automatically created when providing shape
    m = mask_mapper(shape=(2, 3, 4))
    mp = m.forward1(np.arange(24).reshape(2, 3, 4))
    assert_array_equal(mp, np.arange(24))
Exemplo n.º 49
0
def test_featuregroup_mapper():
    ds = Dataset(np.arange(24).reshape(3,8))
    ds.fa['roi'] = [0, 1] * 4
    # just to check
    ds.sa['chunks'] = np.arange(3)

    # correct results
    csamples = [[3, 4], [11, 12], [19, 20]]
    croi = [0, 1]
    cchunks = np.arange(3)

    m = mean_group_feature(['roi'])
    mds = m.forward(ds)
    assert_equal(mds.shape, (3, 2))
    assert_array_equal(mds.samples, csamples)
    assert_array_equal(mds.fa.roi, np.unique([0, 1] * 4))
    # FAs should simply remain the same
    assert_array_equal(mds.sa.chunks, np.arange(3))
Exemplo n.º 50
0
 def test_symmetry(self):
     self.build_vector_based_pm()
     assert_array_almost_equal(self.pm.proj[:,self.samples.shape[0]],
                               self.pm.proj.T[self.samples.shape[0],:])
     assert_array_equal(self.pm.proj[:,self.samples.shape[0]],
                        self.pm.proj.T[self.samples.shape[0],:])
Exemplo n.º 51
0
def test_subset():
    data = np.array(
        [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
         [16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31],
         [32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47],
         [48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63]])
    # float array doesn't work
    sm = FeatureSliceMapper(np.ones(16))
    assert_raises(IndexError, sm.forward, data)

    # full mask
    sm = FeatureSliceMapper(slice(None))
    # should not change single samples
    assert_array_equal(sm.forward(data[0:1].copy()), data[0:1])
    # or multi-samples
    assert_array_equal(sm.forward(data.copy()), data)
    sm.train(data)
    # same on reverse
    assert_array_equal(sm.reverse(data[0:1].copy()), data[0:1])
    # or multi-samples
    assert_array_equal(sm.reverse(data.copy()), data)

    # identical mappers
    sm_none = FeatureSliceMapper(slice(None))
    sm_int = FeatureSliceMapper(np.arange(16))
    sm_bool = FeatureSliceMapper(np.ones(16, dtype='bool'))
    sms = [sm_none, sm_int, sm_bool]

    # test subsets
    sids = [3, 4, 5, 6]
    bsubset = np.zeros(16, dtype='bool')
    bsubset[sids] = True
    subsets = [sids, slice(3, 7), bsubset, [3, 3, 4, 4, 6, 6, 6, 5]]
    # all test subset result in equivalent masks, hence should do the same to
    # the mapper and result in identical behavior
    for st in sms:
        for i, sub in enumerate(subsets):
            # shallow copy
            orig = copy(st)
            subsm = FeatureSliceMapper(sub)
            # should do copy-on-write for all important stuff!!
            assert_true(orig.is_mergable(subsm))
            orig += subsm
            # test if selection did its job
            if i == 3:
                # special case of multiplying features
                assert_array_equal(orig.forward1(data[0].copy()), subsets[i])
            else:
                assert_array_equal(orig.forward1(data[0].copy()), sids)

    ## all of the above shouldn't change the original mapper
    #assert_array_equal(sm.get_mask(), np.arange(16))

    # check for some bug catcher
    # no 3D input
    #assert_raises(IndexError, sm.forward, np.ones((3,2,1)))
    # no input of wrong length
    if __debug__:
        # checked only in __debug__
        assert_raises(ValueError, sm.forward, np.ones(4))
Exemplo n.º 52
0
def test_datasetmapping():
    # 6 samples, 4 features
    data = np.arange(24).reshape(6, 4)
    ds = Dataset(data,
                 sa={
                     'timepoints': np.arange(6),
                     'multidim': data.copy()
                 },
                 fa={'fid': np.arange(4)})
    # with overlapping and non-overlapping boxcars
    startpoints = [0, 1, 4]
    boxlength = 2
    bm = BoxcarMapper(startpoints, boxlength, inspace='boxy')
    # train is critical
    bm.train(ds)
    mds = bm.forward(ds)
    assert_equal(len(mds), len(startpoints))
    assert_equal(mds.nfeatures, boxlength)
    # all samples attributes remain, but the can rotated/compressed into
    # multidimensional attributes
    assert_equal(sorted(mds.sa.keys()),
                 ['boxy_onsetidx'] + sorted(ds.sa.keys()))
    assert_equal(mds.sa.multidim.shape,
                 (len(startpoints), boxlength, ds.nfeatures))
    assert_equal(mds.sa.timepoints.shape, (len(startpoints), boxlength))
    assert_array_equal(mds.sa.timepoints.flatten(),
                       np.array([(s, s + 1) for s in startpoints]).flatten())
    assert_array_equal(mds.sa.boxy_onsetidx, startpoints)
    # feature attributes also get rotated and broadcasted
    assert_array_equal(mds.fa.fid, [ds.fa.fid, ds.fa.fid])
    # and finally there is a new one
    assert_array_equal(mds.fa.boxy_offsetidx,
                       np.repeat(np.arange(boxlength), 4).reshape(2, -1))

    # now see how it works on reverse()
    rds = bm.reverse(mds)
    # we got at least something of all original attributes back
    assert_equal(sorted(rds.sa.keys()), sorted(ds.sa.keys()))
    assert_equal(sorted(rds.fa.keys()), sorted(ds.fa.keys()))
    # it is not possible to reconstruct the full samples array
    # some samples even might show up multiple times (when there are overlapping
    # boxcars
    assert_array_equal(
        rds.samples,
        np.array([[0, 1, 2, 3], [4, 5, 6, 7], [4, 5, 6, 7], [8, 9, 10, 11],
                  [16, 17, 18, 19], [20, 21, 22, 23]]))
    assert_array_equal(rds.sa.timepoints, [0, 1, 1, 2, 4, 5])
    assert_array_equal(rds.sa.multidim, ds.sa.multidim[rds.sa.timepoints])
    # but feature attributes should be fully recovered
    assert_array_equal(rds.fa.fid, ds.fa.fid)
Exemplo n.º 53
0
    def test_custom_split(self):
        #simulate half splitter
        hs = CustomSplitter([(None, [0, 1, 2, 3, 4]), (None, [5, 6, 7, 8, 9])])
        splits = list(hs(self.data))
        self.failUnless(len(splits) == 2)

        for i, p in enumerate(splits):
            self.failUnless(len(p) == 2)
            self.failUnless(p[0].nsamples == 50)
            self.failUnless(p[1].nsamples == 50)

        assert_array_equal(splits[0][1].sa['chunks'].unique, [0, 1, 2, 3, 4])
        assert_array_equal(splits[0][0].sa['chunks'].unique, [5, 6, 7, 8, 9])
        assert_array_equal(splits[1][1].sa['chunks'].unique, [5, 6, 7, 8, 9])
        assert_array_equal(splits[1][0].sa['chunks'].unique, [0, 1, 2, 3, 4])

        # check fully customized split with working and validation set specified
        cs = CustomSplitter([([0, 3, 4], [5, 9])])
        splits = list(cs(self.data))
        self.failUnless(len(splits) == 1)

        for i, p in enumerate(splits):
            self.failUnless(len(p) == 2)
            self.failUnless(p[0].nsamples == 30)
            self.failUnless(p[1].nsamples == 20)

        self.failUnless((splits[0][1].sa['chunks'].unique == [5, 9]).all())
        self.failUnless((splits[0][0].sa['chunks'].unique == [0, 3, 4]).all())

        # full test with additional sampling and 3 datasets per split
        cs = CustomSplitter([([0, 3, 4], [5, 9], [2])],
                            npertarget=[3, 4, 1],
                            nrunspersplit=3)
        splits = list(cs(self.data))
        self.failUnless(len(splits) == 3)

        for i, p in enumerate(splits):
            self.failUnless(len(p) == 3)
            self.failUnless(p[0].nsamples == 12)
            self.failUnless(p[1].nsamples == 16)
            self.failUnless(p[2].nsamples == 4)

        # lets test selection of samples by ratio and combined with
        # other ways
        cs = CustomSplitter([([0, 3, 4], [5, 9], [2])],
                            npertarget=[[0.3, 0.6, 1.0, 0.5], 0.5, 'all'],
                            nrunspersplit=3)
        csall = CustomSplitter([([0, 3, 4], [5, 9], [2])], nrunspersplit=3)
        # lets craft simpler dataset
        #ds = Dataset(samples=np.arange(12), targets=[1]*6+[2]*6, chunks=1)
        splits = list(cs(self.data))
        splitsall = list(csall(self.data))

        self.failUnless(len(splits) == 3)
        ul = self.data.sa['targets'].unique

        assert_array_equal(
            (np.array(
                splitsall[0][0].get_nsamples_per_attr('targets').values()) *
             [0.3, 0.6, 1.0, 0.5]).round().astype(int),
            np.array(splits[0][0].get_nsamples_per_attr('targets').values()))

        assert_array_equal(
            (np.array(
                splitsall[0][1].get_nsamples_per_attr('targets').values()) *
             0.5).round().astype(int),
            np.array(splits[0][1].get_nsamples_per_attr('targets').values()))

        assert_array_equal(
            np.array(
                splitsall[0][2].get_nsamples_per_attr('targets').values()),
            np.array(splits[0][2].get_nsamples_per_attr('targets').values()))
Exemplo n.º 54
0
def test_zscore():
    """Test z-scoring transformation
    """
    # dataset: mean=2, std=1
    samples = np.array((0, 1, 3, 4, 2, 2, 3, 1, 1, 3, 3, 1, 2, 2, 2, 2)).\
        reshape((16, 1))
    data = dataset_wizard(samples.copy(), targets=range(16), chunks=[0] * 16)
    assert_equal(data.samples.mean(), 2.0)
    assert_equal(data.samples.std(), 1.0)
    data_samples = data.samples.copy()
    zscore(data, chunks_attr='chunks')

    # copy should stay intact
    assert_equal(data_samples.mean(), 2.0)
    assert_equal(data_samples.std(), 1.0)
    # we should be able to operate on ndarrays
    # But we can't change type inplace for an array, can't we?
    assert_raises(TypeError, zscore, data_samples, chunks_attr=None)
    # so lets do manually
    data_samples = data_samples.astype(float)
    zscore(data_samples, chunks_attr=None)
    assert_array_equal(data.samples, data_samples)
    print data_samples

    # check z-scoring
    check = np.array([-2, -1, 1, 2, 0, 0, 1, -1, -1, 1, 1, -1, 0, 0, 0, 0],
                    dtype='float64').reshape(16, 1)
    assert_array_equal(data.samples, check)

    data = dataset_wizard(samples.copy(), targets=range(16), chunks=[0] * 16)
    zscore(data, chunks_attr=None)
    assert_array_equal(data.samples, check)

    # check z-scoring taking set of labels as a baseline
    data = dataset_wizard(samples.copy(),
                   targets=[0, 2, 2, 2, 1] + [2] * 11,
                   chunks=[0] * 16)
    zscore(data, param_est=('targets', [0, 1]))
    assert_array_equal(samples, data.samples + 1.0)

    # check that zscore modifies in-place; only guaranteed if no upcasting is
    # necessary
    samples = samples.astype('float')
    data = dataset_wizard(samples,
                   targets=[0, 2, 2, 2, 1] + [2] * 11,
                   chunks=[0] * 16)
    zscore(data, param_est=('targets', [0, 1]))
    assert_array_equal(samples, data.samples)

    # these might be duplicating code above -- but twice is better than nothing

    # dataset: mean=2, std=1
    raw = np.array((0, 1, 3, 4, 2, 2, 3, 1, 1, 3, 3, 1, 2, 2, 2, 2))
    # dataset: mean=12, std=1
    raw2 = np.array((0, 1, 3, 4, 2, 2, 3, 1, 1, 3, 3, 1, 2, 2, 2, 2)) + 10
    # zscore target
    check = [-2, -1, 1, 2, 0, 0, 1, -1, -1, 1, 1, -1, 0, 0, 0, 0]

    ds = dataset_wizard(raw.copy(), targets=range(16), chunks=[0] * 16)
    pristine = dataset_wizard(raw.copy(), targets=range(16), chunks=[0] * 16)

    zm = ZScoreMapper()
    # should do global zscore by default
    zm.train(ds)                        # train
    assert_array_almost_equal(zm.forward(ds), np.transpose([check]))
    # should not modify the source
    assert_array_equal(pristine, ds)

    # if we tell it a different mean it should obey the order
    zm = ZScoreMapper(params=(3,1))
    zm.train(ds)
    assert_array_almost_equal(zm.forward(ds), np.transpose([check]) - 1 )
    assert_array_equal(pristine, ds)

    # let's look at chunk-wise z-scoring
    ds = dataset_wizard(np.hstack((raw.copy(), raw2.copy())),
                        targets=range(32),
                        chunks=[0] * 16 + [1] * 16)
    # by default chunk-wise
    zm = ZScoreMapper()
    zm.train(ds)                        # train
    assert_array_almost_equal(zm.forward(ds), np.transpose([check + check]))
    # we should be able to do that same manually
    zm = ZScoreMapper(params={0: (2,1), 1: (12,1)})
    zm.train(ds)                        # train
    assert_array_almost_equal(zm.forward(ds), np.transpose([check + check]))
Exemplo n.º 55
0
def test_sphere():
    # test sphere initialization
    s = ne.Sphere(1)
    center0 = (0, 0, 0)
    center1 = (1, 1, 1)
    assert_equal(len(s(center0)), 7)
    target = array([
        array([-1, 0, 0]),
        array([0, -1, 0]),
        array([0, 0, -1]),
        array([0, 0, 0]),
        array([0, 0, 1]),
        array([0, 1, 0]),
        array([1, 0, 0])
    ])
    # test of internals -- no recomputation of increments should be done
    prev_increments = s._increments
    assert_array_equal(s(center0), target)
    ok_(prev_increments is s._increments)
    # query lower dimensionality
    _ = s((0, 0))
    ok_(not prev_increments is s._increments)

    # test Sphere call
    target = [
        array([0, 1, 1]),
        array([1, 0, 1]),
        array([1, 1, 0]),
        array([1, 1, 1]),
        array([1, 1, 2]),
        array([1, 2, 1]),
        array([2, 1, 1])
    ]
    res = s(center1)
    assert_array_equal(array(res), target)
    # They all should be tuples
    ok_(np.all([isinstance(x, tuple) for x in res]))

    # test for larger diameter
    s = ne.Sphere(4)
    assert_equal(len(s(center1)), 257)

    # test extent keyword
    #s = ne.Sphere(4,extent=(1,1,1))
    #assert_array_equal(array(s((0,0,0))), array([[0,0,0]]))

    # test Errors during initialisation and call
    #assert_raises(ValueError, ne.Sphere, 2)
    #assert_raises(ValueError, ne.Sphere, 1.0)

    # no longer extent available
    assert_raises(TypeError, ne.Sphere, 1, extent=(1))
    assert_raises(TypeError, ne.Sphere, 1, extent=(1.0, 1.0, 1.0))

    s = ne.Sphere(1)
    #assert_raises(ValueError, s, (1))
    if __debug__:
        # No float coordinates allowed for now...
        # XXX might like to change that ;)
        #
        assert_raises(ValueError, s, (1.0, 1.0, 1.0))
Exemplo n.º 56
0
 def test_size(self):
     self.build_vector_based_pm()
     assert_array_equal(self.pm.proj.shape,
                        (self.samples.shape[0],
                         self.prototypes.shape[0] * len(self.similarities)))
Exemplo n.º 57
0
def test_query_engine():
    data = np.arange(54)
    # indices in 3D
    ind = np.transpose((np.ones((3, 3, 3)).nonzero()))
    # sphere generator for 3 elements diameter
    sphere = ne.Sphere(1)
    # dataset with just one "space"
    ds = Dataset([data, data], fa={'s_ind': np.concatenate((ind, ind))})
    # and the query engine attaching the generator to the "index-space"
    qe = ne.IndexQueryEngine(s_ind=sphere)
    # cannot train since the engine does not know about the second space
    assert_raises(ValueError, qe.train, ds)
    # now do it again with a full spec
    ds = Dataset([data, data],
                 fa={
                     's_ind': np.concatenate((ind, ind)),
                     't_ind': np.repeat([0, 1], 27)
                 })
    qe = ne.IndexQueryEngine(s_ind=sphere, t_ind=None)
    qe.train(ds)
    # internal representation check
    # YOH: invalid for new implementation with lookup tables (dictionaries)
    #assert_array_equal(qe._searcharray,
    #                   np.arange(54).reshape(qe._searcharray.shape) + 1)
    # should give us one corner, collapsing the 't_ind'
    assert_array_equal(qe(s_ind=(0, 0, 0)), [0, 1, 3, 9, 27, 28, 30, 36])
    # directly specifying an index for 't_ind' without having an ROI
    # generator, should give the same corner, but just once
    assert_array_equal(qe(s_ind=(0, 0, 0), t_ind=0), [0, 1, 3, 9])
    # just out of the mask -- no match
    assert_array_equal(qe(s_ind=(3, 3, 3)), [])
    # also out of the mask -- but single match
    assert_array_equal(qe(s_ind=(2, 2, 3), t_ind=1), [53])
    # query by id
    assert_array_equal(qe(s_ind=(0, 0, 0), t_ind=0), qe[0])
    assert_array_equal(qe(s_ind=(0, 0, 0), t_ind=[0, 1]), qe(s_ind=(0, 0, 0)))
    # should not fail if t_ind is outside
    assert_array_equal(qe(s_ind=(0, 0, 0), t_ind=[0, 1, 10]),
                       qe(s_ind=(0, 0, 0)))

    # should fail if asked about some unknown thing
    assert_raises(ValueError, qe.__call__, s_ind=(0, 0, 0), buga=0)

    # Test by using some literal feature atttribute
    ds.fa['lit'] = ['roi1', 'ro2', 'r3'] * 18
    # should work as well as before
    assert_array_equal(qe(s_ind=(0, 0, 0)), [0, 1, 3, 9, 27, 28, 30, 36])
    # should fail if asked about some unknown (yet) thing
    assert_raises(ValueError, qe.__call__, s_ind=(0, 0, 0), lit='roi1')

    # Create qe which can query literals as well
    qe_lit = ne.IndexQueryEngine(s_ind=sphere, t_ind=None, lit=None)
    qe_lit.train(ds)
    # should work as well as before
    assert_array_equal(qe_lit(s_ind=(0, 0, 0)), [0, 1, 3, 9, 27, 28, 30, 36])
    # and subselect nicely -- only /3 ones
    assert_array_equal(qe_lit(s_ind=(0, 0, 0), lit='roi1'),
                       [0, 3, 9, 27, 30, 36])
    assert_array_equal(qe_lit(s_ind=(0, 0, 0), lit=['roi1', 'ro2']),
                       [0, 1, 3, 9, 27, 28, 30, 36])
Exemplo n.º 58
0
    def test_n_group_split(self):
        """Test NGroupSplitter alongside with the reversal of the
        order of spit out datasets
        """
        # Test 2 groups like HalfSplitter first
        hs = NGroupSplitter(2)
        hs_reversed = NGroupSplitter(2, reverse=True)

        for isreversed, splitter in enumerate((hs, hs_reversed)):
            splits = list(splitter(self.data))
            self.failUnless(len(splits) == 2)

            for i, p in enumerate(splits):
                self.failUnless(len(p) == 2)
                self.failUnless(p[0].nsamples == 50)
                self.failUnless(p[1].nsamples == 50)

            assert_array_equal(splits[0][1 - isreversed].sa['chunks'].unique,
                               [0, 1, 2, 3, 4])
            assert_array_equal(splits[0][isreversed].sa['chunks'].unique,
                               [5, 6, 7, 8, 9])
            assert_array_equal(splits[1][1 - isreversed].sa['chunks'].unique,
                               [5, 6, 7, 8, 9])
            assert_array_equal(splits[1][isreversed].sa['chunks'].unique,
                               [0, 1, 2, 3, 4])

        # check if it works on pure odd and even chunk ids
        moresplits = list(hs(splits[0][0]))

        for split in moresplits:
            self.failUnless(split[0] != None)
            self.failUnless(split[1] != None)

        # now test more groups
        s5 = NGroupSplitter(5)
        s5_reversed = NGroupSplitter(5, reverse=True)

        # get the splits
        for isreversed, s5splitter in enumerate((s5, s5_reversed)):
            splits = list(s5splitter(self.data))

            # must have 10 splits
            self.failUnless(len(splits) == 5)

            # check split content
            assert_array_equal(splits[0][1 - isreversed].sa['chunks'].unique,
                               [0, 1])
            assert_array_equal(splits[0][isreversed].sa['chunks'].unique,
                               [2, 3, 4, 5, 6, 7, 8, 9])
            assert_array_equal(splits[1][1 - isreversed].sa['chunks'].unique,
                               [2, 3])
            assert_array_equal(splits[1][isreversed].sa['chunks'].unique,
                               [0, 1, 4, 5, 6, 7, 8, 9])
            # ...
            assert_array_equal(splits[4][1 - isreversed].sa['chunks'].unique,
                               [8, 9])
            assert_array_equal(splits[4][isreversed].sa['chunks'].unique,
                               [0, 1, 2, 3, 4, 5, 6, 7])

        # Test for too many groups
        def splitcall(spl, dat):
            return [(train, test) for (train, test) in spl(dat)]

        s20 = NGroupSplitter(20)
        self.assertRaises(ValueError, splitcall, s20, self.data)
Exemplo n.º 59
0
def test_flatten():
    samples_shape = (2, 2, 4)
    data_shape = (4, ) + samples_shape
    data = np.arange(np.prod(data_shape)).reshape(data_shape).view(myarray)
    pristinedata = data.copy()
    target = [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
              [16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31],
              [32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47],
              [48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63]]
    target = np.array(target).view(myarray)
    index_target = np.array([[0, 0, 0], [0, 0, 1], [0, 0, 2], [0, 0, 3],
                             [0, 1, 0], [0, 1, 1], [0, 1, 2], [0, 1, 3],
                             [1, 0, 0], [1, 0, 1], [1, 0, 2], [1, 0, 3],
                             [1, 1, 0], [1, 1, 1], [1, 1, 2], [1, 1, 3]])

    # array subclass survives
    ok_(isinstance(data, myarray))

    # actually, there should be no difference between a plain FlattenMapper and
    # a chain that only has a FlattenMapper as the one element
    for fm in [
            FlattenMapper(inspace='voxel'),
            ChainMapper([
                FlattenMapper(inspace='voxel'),
                FeatureSliceMapper(slice(None))
            ])
    ]:
        # not working if untrained
        assert_raises(RuntimeError, fm.forward1,
                      np.arange(np.sum(samples_shape) + 1))

        fm.train(data)

        ok_(isinstance(fm.forward(data), myarray))
        ok_(isinstance(fm.forward1(data[2]), myarray))
        assert_array_equal(fm.forward(data), target)
        assert_array_equal(fm.forward1(data[2]), target[2])
        assert_raises(ValueError, fm.forward, np.arange(4))

        # all of that leaves that data unmodified
        assert_array_equal(data, pristinedata)

        # reverse mapping
        ok_(isinstance(fm.reverse(target), myarray))
        ok_(isinstance(fm.reverse1(target[0]), myarray))
        ok_(isinstance(fm.reverse(target[1:2]), myarray))
        assert_array_equal(fm.reverse(target), data)
        assert_array_equal(fm.reverse1(target[0]), data[0])
        assert_array_equal(fm.reverse(target[1:2]), data[1:2])
        assert_raises(ValueError, fm.reverse, np.arange(14))

        # check one dimensional data, treated as scalar samples
        oned = np.arange(5)
        fm.train(Dataset(oned))
        # needs 2D
        assert_raises(ValueError, fm.forward, oned)
        # doesn't match mapper, since Dataset turns `oned` into (5,1)
        assert_raises(ValueError, fm.forward, oned)
        assert_equal(Dataset(oned).nfeatures, 1)

        # try dataset mode, with some feature attribute
        fattr = np.arange(np.prod(samples_shape)).reshape(samples_shape)
        ds = Dataset(data, fa={'awesome': fattr.copy()})
        assert_equal(ds.samples.shape, data_shape)
        fm.train(ds)
        dsflat = fm.forward(ds)
        ok_(isinstance(dsflat, Dataset))
        ok_(isinstance(dsflat.samples, myarray))
        assert_array_equal(dsflat.samples, target)
        assert_array_equal(dsflat.fa.awesome,
                           np.arange(np.prod(samples_shape)))
        assert_true(isinstance(dsflat.fa['awesome'], ArrayCollectable))
        # test index creation
        assert_array_equal(index_target, dsflat.fa.voxel)

        # and back
        revds = fm.reverse(dsflat)
        ok_(isinstance(revds, Dataset))
        ok_(isinstance(revds.samples, myarray))
        assert_array_equal(revds.samples, data)
        assert_array_equal(revds.fa.awesome, fattr)
        assert_true(isinstance(revds.fa['awesome'], ArrayCollectable))
        assert_false('voxel' in revds.fa)
Exemplo n.º 60
0
def test_mapper_aliases():
    mm=mask_mapper(np.ones((3,4,2), dtype='bool'))
    assert_array_equal(mm(np.ones((2,3,4,2))),
                       mm.forward(np.ones((2,3,4,2))))