Exemplo n.º 1
0
    def test_SplitRFE(self):
        # just a smoke test ATM
        from mvpa2.clfs.svm import LinearCSVMC
        from mvpa2.clfs.meta import MappedClassifier
        from mvpa2.misc.data_generators import normal_feature_dataset
        #import mvpa2.featsel.rfe
        #reload(mvpa2.featsel.rfe)
        from mvpa2.featsel.rfe import RFE, SplitRFE
        from mvpa2.generators.partition import NFoldPartitioner
        from mvpa2.featsel.helpers import FractionTailSelector
        from mvpa2.testing import ok_, assert_equal

        clf = LinearCSVMC(C=1)
        dataset = normal_feature_dataset(perlabel=20,
                                         nlabels=2,
                                         nfeatures=30,
                                         snr=1.,
                                         nonbogus_features=[1, 5])
        # flip one of the meaningful features around to see
        # if we are still getting proper selection
        dataset.samples[:, dataset.a.nonbogus_features[1]] *= -1
        # 4 partitions should be enough for testing
        partitioner = NFoldPartitioner(count=4)

        rfeclf = MappedClassifier(
            clf,
            SplitRFE(clf,
                     partitioner,
                     fselector=FractionTailSelector(0.2,
                                                    mode='discard',
                                                    tail='lower')))
        r0 = repr(rfeclf)

        ok_(rfeclf.mapper.nfeatures_min == 0)
        rfeclf.train(dataset)
        ok_(rfeclf.mapper.nfeatures_min > 0)
        predictions = rfeclf(dataset).samples

        # at least 1 of the nonbogus-features should be chosen
        ok_(
            len(
                set(dataset.a.nonbogus_features).intersection(
                    rfeclf.mapper.slicearg)) > 0)
        # check repr to have all needed pieces
        r = repr(rfeclf)
        s = str(rfeclf)
        ok_(('partitioner=NFoldP' in r) or
            ('partitioner=mvpa2.generators.partition.NFoldPartitioner' in r))
        ok_('lrn=' in r)
        ok_(not 'slicearg=' in r)
        assert_equal(r, r0)
Exemplo n.º 2
0
    clfswh += \
         FeatureSelectionClassifier(
             linearSVMC.clone(),
             SensitivityBasedFeatureSelection(
                SMLRWeights(SMLR(lm=0.1, implementation="C"),
                            postproc=maxofabs_sample()),
                RangeElementSelector(mode='select')),
             descr="LinSVM on SMLR(lm=0.1) non-0")

    _rfeclf = linearSVMC.clone()
    clfswh += \
         FeatureSelectionClassifier(
             _rfeclf,
             SplitRFE(
                 _rfeclf,
                 OddEvenPartitioner(),
                 fselector=FractionTailSelector(
                     0.2, mode='discard', tail='lower')),
             descr="LinSVM with nested-CV RFE")

    _clfs_splitrfe_svm_anova = \
         FeatureSelectionClassifier(
             linearSVMC.clone(),
             SplitRFE(
                 linearSVMC.clone(),
                 OddEvenPartitioner(),
                 fselector=FractionTailSelector(
                     0.2, mode='discard', tail='lower'),
                 fmeasure=OneWayAnova()),
             descr="LinSVM with nested-CV Anova RFE")
    clfswh += _clfs_splitrfe_svm_anova
Exemplo n.º 3
0
    clfswh += \
         FeatureSelectionClassifier(
             linearSVMC.clone(),
             SensitivityBasedFeatureSelection(
                SMLRWeights(SMLR(lm=0.1, implementation="C"),
                            postproc=maxofabs_sample()),
                RangeElementSelector(mode='select')),
             descr="LinSVM on SMLR(lm=0.1) non-0")

    _rfeclf = linearSVMC.clone()
    clfswh += \
         FeatureSelectionClassifier(
             _rfeclf,
             SplitRFE(
                 _rfeclf,
                 OddEvenPartitioner(),
                 fselector=FractionTailSelector(
                     0.2, mode='discard', tail='lower')),
             descr="LinSVM with nested-CV RFE")

    clfswh += \
        FeatureSelectionClassifier(
            linearSVMC.clone(),
            SensitivityBasedFeatureSelection(
                SMLRWeights(SMLR(lm=1.0, implementation="C"),
                            postproc=maxofabs_sample()),
                RangeElementSelector(mode='select')),
            descr="LinSVM on SMLR(lm=1) non-0")

    # "Interesting" classifiers
    clfswh += \
Exemplo n.º 4
0
    def test_SplitRFE(self, fmeasure):
        # just a smoke test ATM
        from mvpa2.clfs.svm import LinearCSVMC
        from mvpa2.clfs.meta import MappedClassifier
        from mvpa2.misc.data_generators import normal_feature_dataset
        #import mvpa2.featsel.rfe
        #reload(mvpa2.featsel.rfe)
        from mvpa2.featsel.rfe import RFE, SplitRFE
        from mvpa2.generators.partition import NFoldPartitioner
        from mvpa2.featsel.helpers import FractionTailSelector
        from mvpa2.testing import ok_, assert_equal

        clf = LinearCSVMC(C=1)
        dataset = normal_feature_dataset(perlabel=20,
                                         nlabels=2,
                                         nfeatures=11,
                                         snr=1.,
                                         nonbogus_features=[1, 5])
        # flip one of the meaningful features around to see
        # if we are still getting proper selection
        dataset.samples[:, dataset.a.nonbogus_features[1]] *= -1
        # 3 partitions should be enough for testing
        partitioner = NFoldPartitioner(count=3)

        rfeclf = MappedClassifier(
            clf,
            SplitRFE(
                clf,
                partitioner,
                fselector=FractionTailSelector(0.5,
                                               mode='discard',
                                               tail='lower'),
                fmeasure=fmeasure,
                # need to update only when using clf's sens anal
                update_sensitivity=fmeasure is None))
        r0 = repr(rfeclf)

        ok_(rfeclf.mapper.nfeatures_min == 0)
        rfeclf.train(dataset)
        ok_(rfeclf.mapper.nfeatures_min > 0)
        predictions = rfeclf(dataset).samples

        # at least 1 of the nonbogus-features should be chosen
        ok_(
            len(
                set(dataset.a.nonbogus_features).intersection(
                    rfeclf.mapper.slicearg)) > 0)

        # check repr to have all needed pieces
        r = repr(rfeclf)
        s = str(rfeclf)
        ok_(('partitioner=NFoldP' in r) or
            ('partitioner=mvpa2.generators.partition.NFoldPartitioner' in r))
        ok_('lrn=' in r)
        ok_(not 'slicearg=' in r)
        assert_equal(r, r0)

        if externals.exists('joblib'):
            rfeclf.mapper.nproc = -1
            # compare results against the one ran in parallel
            _slicearg = rfeclf.mapper.slicearg
            _predictions = predictions
            rfeclf.train(dataset)
            predictions = rfeclf(dataset).samples
            assert_array_equal(predictions, _predictions)
            assert_array_equal(_slicearg, rfeclf.mapper.slicearg)

        # Test that we can collect stats from cas within cross-validation
        sensitivities = []
        nested_errors = []
        nested_nfeatures = []

        def store_me(data, node, result):
            sens = node.measure.get_sensitivity_analyzer(
                force_train=False)(data)
            sensitivities.append(sens)
            nested_errors.append(node.measure.mapper.ca.nested_errors)
            nested_nfeatures.append(node.measure.mapper.ca.nested_nfeatures)

        cv = CrossValidation(rfeclf,
                             NFoldPartitioner(count=1),
                             callback=store_me,
                             enable_ca=['stats'])
        _ = cv(dataset)
        # just to make sure we collected them
        assert_equal(len(sensitivities), 1)
        assert_equal(len(nested_errors), 1)
        assert_equal(len(nested_nfeatures), 1)