示例#1
0
    def testAnalyzerWithSplitClassifier(self, clf):
        """Test analyzers in split classifier
        """
        # assumming many defaults it is as simple as
        mclf = SplitClassifier(clf=clf,
                               enable_states=['training_confusion',
                                              'confusion'])
        sana = mclf.getSensitivityAnalyzer(transformer=Absolute,
                                           enable_states=["sensitivities"])

        # Test access to transformers and combiners
        self.failUnless(sana.transformer is Absolute)
        self.failUnless(sana.combiner is FirstAxisMean)
        # and lets look at all sensitivities

        # and we get sensitivity analyzer which works on splits
        map_ = sana(self.dataset)
        self.failUnlessEqual(len(map_), self.dataset.nfeatures)

        if cfg.getboolean('tests', 'labile', default='yes'):
            for conf_matrix in [sana.clf.training_confusion] \
                              + sana.clf.confusion.matrices:
                self.failUnless(
                    conf_matrix.percentCorrect>75,
                    msg="We must have trained on each one more or " \
                    "less correctly. Got %f%% correct on %d labels" %
                    (conf_matrix.percentCorrect,
                     len(self.dataset.uniquelabels)))

        errors = [x.percentCorrect
                    for x in sana.clf.confusion.matrices]

        # XXX
        # That is too much to ask if the dataset is easy - thus
        # disabled for now
        #self.failUnless(N.min(errors) != N.max(errors),
        #                msg="Splits should have slightly but different " \
        #                    "generalization")

        # lets go through all sensitivities and see if we selected the right
        # features
        # XXX yoh: disabled checking of each map separately since in
        #     BoostedClassifierSensitivityAnalyzer and
        #     ProxyClassifierSensitivityAnalyzer
        #     we don't have yet way to provide transformers thus internal call
        #     to getSensitivityAnalyzer in _call of them is not parametrized
        if 'meta' in clf._clf_internals and len(map_.nonzero()[0])<2:
            # Some meta classifiers (5% of ANOVA) are too harsh ;-)
            return
        for map__ in [map_]: # + sana.combined_analyzer.sensitivities:
            selected = FixedNElementTailSelector(
                self.dataset.nfeatures -
                len(self.dataset.nonbogus_features))(map__)
            if cfg.getboolean('tests', 'labile', default='yes'):
                self.failUnlessEqual(
                    list(selected),
                    list(self.dataset.nonbogus_features),
                    msg="At the end we should have selected the right features")
示例#2
0
    def __testFSPipelineWithAnalyzerWithSplitClassifier(self, basic_clf):
        #basic_clf = LinearNuSVMC()
        multi_clf = MulticlassClassifier(clf=basic_clf)
        #svm_weigths = LinearSVMWeights(svm)

        # Proper RFE: aggregate sensitivities across multiple splits,
        # but also due to multi class those need to be aggregated
        # somehow. Transfer error here should be 'leave-1-out' error
        # of split classifier itself
        sclf = SplitClassifier(clf=basic_clf)
        rfe = RFE(sensitivity_analyzer=
                    sclf.getSensitivityAnalyzer(
                        enable_states=["sensitivities"]),
                  transfer_error=trans_error,
                  feature_selector=FeatureSelectionPipeline(
                      [FractionTailSelector(0.5),
                       FixedNElementTailSelector(1)]),
                  train_clf=True)

        # and we get sensitivity analyzer which works on splits and uses
        # sensitivity
        selected_features = rfe(self.dataset)