Exemplo n.º 1
0
    def test_split_featurewise_dataset_measure(self):
        ds = datasets['uni3small']
        sana = RepeatedMeasure(
            SMLR(fit_all_weights=True).get_sensitivity_analyzer(),
            ChainNode(
                [NFoldPartitioner(),
                 Splitter('partitions', attr_values=[1])]))

        sens = sana(ds)
        # a sensitivity for each chunk and each label combination
        assert_equal(sens.shape, (len(ds.sa['chunks'].unique) *
                                  len(ds.sa['targets'].unique), ds.nfeatures))

        # Lets try more complex example with 'boosting'
        ds = datasets['uni3medium']
        ds.init_origids('samples')
        sana = RepeatedMeasure(
            SMLR(fit_all_weights=True).get_sensitivity_analyzer(),
            Balancer(amount=0.25, count=2, apply_selection=True),
            enable_ca=['datasets', 'repetition_results'])
        sens = sana(ds)

        assert_equal(sens.shape,
                     (2 * len(ds.sa['targets'].unique), ds.nfeatures))
        splits = sana.ca.datasets
        self.assertEqual(len(splits), 2)
        self.assertTrue(
            np.all([s.nsamples == ds.nsamples // 4 for s in splits]))
        # should have used different samples
        self.assertTrue(np.any([splits[0].sa.origids != splits[1].sa.origids]))
        # and should have got different sensitivities
        self.assertTrue(np.any(sens[0] != sens[3]))
Exemplo n.º 2
0
def test_splitclf_sensitivities():
    datasets = [
        normal_feature_dataset(perlabel=100,
                               nlabels=2,
                               nfeatures=4,
                               nonbogus_features=[0, i + 1],
                               snr=1,
                               nchunks=2) for i in xrange(2)
    ]

    sclf = SplitClassifier(SMLR(), NFoldPartitioner())
    analyzer = sclf.get_sensitivity_analyzer()

    senses1 = analyzer(datasets[0])
    senses2 = analyzer(datasets[1])

    for senses in senses1, senses2:
        # This should be False when comparing two folds
        assert_false(np.allclose(senses.samples[0], senses.samples[2]))
        assert_false(np.allclose(senses.samples[1], senses.samples[3]))
    # Moreover with new data we should have got different results
    # (i.e. it must retrained correctly)
    for s1, s2 in zip(senses1, senses2):
        assert_false(np.allclose(s1, s2))

    # and we should have "selected" "correct" voxels
    for i, senses in enumerate((senses1, senses2)):
        assert_equal(set(np.argsort(np.max(np.abs(senses), axis=0))[-2:]),
                     set((0, i + 1)))
Exemplo n.º 3
0
    def test_smlr_sensitivities(self):
        data = normal_feature_dataset(perlabel=10, nlabels=2, nfeatures=4)

        # use SMLR on binary problem, but not fitting all weights
        clf = SMLR(fit_all_weights=False)
        clf.train(data)

        # now ask for the sensitivities WITHOUT having to pass the dataset
        # again
        sens = clf.get_sensitivity_analyzer(force_train=False)(None)
        self.assertTrue(sens.shape == (len(data.UT) - 1, data.nfeatures))
Exemplo n.º 4
0
 def test_clf_transfer_measure(self):
     # and now on a classifier
     clf = SMLR()
     enode = BinaryFxNode(mean_mismatch_error, 'targets')
     tm = TransferMeasure(clf, Splitter('chunks', count=2),
                          enable_ca=['stats'])
     res = tm(self.dataset)
     manual_error = np.mean(res.samples.squeeze() != res.sa.targets)
     postproc_error = enode(res)
     tm_err = TransferMeasure(clf, Splitter('chunks', count=2),
                              postproc=enode)
     auto_error = tm_err(self.dataset)
     ok_(manual_error == postproc_error.samples[0, 0])
Exemplo n.º 5
0
def test_smlr_state():
    data = datasets['dumb']

    clf = SMLR()

    clf.train(data)

    clf.ca.enable('estimates')
    clf.ca.enable('predictions')

    p = np.asarray(clf.predict(data.samples))

    assert_array_equal(p, clf.ca.predictions)
    assert_equal(np.array(clf.ca.estimates).shape[0], np.array(p).shape[0])
Exemplo n.º 6
0
    def test_pseudo_cv_measure(self):
        clf = SMLR()
        enode = BinaryFxNode(mean_mismatch_error, 'targets')
        tm = TransferMeasure(clf, Splitter('partitions'), postproc=enode)
        cvgen = NFoldPartitioner()
        rm = RepeatedMeasure(tm, cvgen)
        res = rm(self.dataset)
        # one error per fold
        assert_equal(res.shape, (len(self.dataset.sa['chunks'].unique), 1))

        # we can do the same with Crossvalidation
        cv = CrossValidation(clf, cvgen, enable_ca=['stats', 'training_stats',
                                                    'datasets'])
        res = cv(self.dataset)
        assert_equal(res.shape, (len(self.dataset.sa['chunks'].unique), 1))
Exemplo n.º 7
0
    def test_smlr_state(self):
        data = datasets['dumb']

        clf = SMLR()

        clf.train(data)

        clf.ca.enable('estimates')
        clf.ca.enable('predictions')

        p = np.asarray(clf.predict(data.samples))

        self.assertTrue((p == clf.ca.predictions).all())
        self.assertTrue(
            np.array(clf.ca.estimates).shape[0] == np.array(p).shape[0])
Exemplo n.º 8
0
    def test_smlr(self):
        data = datasets['dumb']

        clf = SMLR()

        clf.train(data)

        # prediction has to be perfect
        #
        # XXX yoh: whos said that?? ;-)
        #
        # There is always a tradeoff between learning and
        # generalization errors so...  but in this case the problem is
        # more interesting: absent bias disallows to learn data you
        # have here -- there is no solution which would pass through
        # (0,0)
        predictions = clf.predict(data.samples)
        self.assertTrue((predictions == data.targets).all())
Exemplo n.º 9
0
    def test_union_feature_selection(self):
        # two methods: 5% highes F-scores, non-zero SMLR weights
        fss = [SensitivityBasedFeatureSelection(
                    OneWayAnova(),
                    FractionTailSelector(0.05, mode='select', tail='upper')),
               SensitivityBasedFeatureSelection(
                    SMLRWeights(SMLR(lm=1, implementation="C"),
                                postproc=sumofabs_sample()),
                    RangeElementSelector(mode='select'))]

        fs = CombinedFeatureSelection(fss, method='union')

        od_union = fs(self.dataset)

        self.assertTrue(fs.method == 'union')
        # check output dataset
        self.assertTrue(od_union.nfeatures <= self.dataset.nfeatures)
        # again for intersection
        fs = CombinedFeatureSelection(fss, method='intersection')
        od_intersect = fs(self.dataset)
        assert_true(od_intersect.nfeatures < od_union.nfeatures)
Exemplo n.º 10
0
    def descriptions(self):
        """Descriptions of registered items"""
        return list(self.__descriptions.keys())


clfswh = Warehouse(known_tags=_KNOWN_INTERNALS)  # classifiers
regrswh = Warehouse(known_tags=_KNOWN_INTERNALS)  # regressions

# NB:
#  - Nu-classifiers are turned off since for haxby DS default nu
#    is an 'infisible' one
#  - Python's SMLR is turned off for the duration of development
#    since it is slow and results should be the same as of C version
#
clfswh += [
    SMLR(lm=0.1, implementation="C", descr="SMLR(lm=0.1)"),
    SMLR(lm=1.0, implementation="C", descr="SMLR(lm=1.0)"),
    #SMLR(lm=10.0, implementation="C", descr="SMLR(lm=10.0)"),
    #SMLR(lm=100.0, implementation="C", descr="SMLR(lm=100.0)"),
    #SMLR(implementation="Python", descr="SMLR(Python)")
]

clfswh += \
     [ MulticlassClassifier(SMLR(lm=0.1),
                            descr='Pairs+maxvote multiclass on SMLR(lm=0.1)') ]

clfswh += [
    RandomClassifier(descr="Random"),
    RandomClassifier(same=True, descr="RandomSame"),
]
Exemplo n.º 11
0
        FeaturewiseMeasure.__init__(self, **kwargs)
        self.__mult = mult

    def _call(self, dataset):
        """Train linear SVM on `dataset` and extract weights from classifier.
        """
        sens = self.__mult * (np.arange(dataset.nfeatures) -
                              int(dataset.nfeatures / 2))
        return Dataset(sens[np.newaxis])


# Sample universal classifiers (linear and non-linear) which should be
# used whenever it doesn't matter what classifier it is for testing
# some higher level creations -- chosen so it is the fastest universal
# one. Also it should not punch state.py in the face how it is
# happening with kNN...
sample_clf_lin = SMLR(lm=0.1)  #sg.svm.LinearCSVMC(svm_impl='libsvm')

#if externals.exists('shogun'):
#    sample_clf_nl = sg.SVM(kernel_type='RBF', svm_impl='libsvm')
#else:
#classical one which was used for a while
#and surprisingly it is not bad at all for the unittests
sample_clf_nl = kNN(k=5)

# and also a regression-based classifier
r = clfswh['linear', 'regression_based', 'has_sensitivity']
if len(r) > 0: sample_clf_reg = r[0]
else:
    sample_clf_reg = None
Exemplo n.º 12
0
#   See COPYING file distributed along with the PyMVPA package for the
#   copyright and license terms.
#
### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##
"""Unit tests for PyMVPA sparse multinomial logistic regression classifier"""

import numpy as np

from mvpa2.testing import *
from mvpa2.testing.datasets import datasets

from mvpa2.clfs.smlr import SMLR
from mvpa2.misc.data_generators import normal_feature_dataset


@sweepargs(clf=(SMLR(), SMLR(implementation='Python')))
def test_smlr(clf):
    data = datasets['dumb']

    clf.train(data)

    # prediction has to be perfect
    #
    # XXX yoh: whos said that?? ;-)
    #
    # There is always a tradeoff between learning and
    # generalization errors so...  but in this case the problem is
    # more interesting: absent bias disallows to learn data you
    # have here -- there is no solution which would pass through
    # (0,0)
    predictions = clf.predict(data.samples)