示例#1
0
    def test_diff_len_labels_str_treeclassifier(self):
        # check if the classifier can handle a dataset with labels as string of
        # variable length
        # was failing on TreeClassifier due to np.str dtype being assumed from first
        # returned value
        ds = datasets['uni4small'].copy()
        newlabels = dict([(l,l+'_'*li) for li,l in enumerate(ds.uniquetargets)])
        ds.targets = [newlabels[l] for l in ds.targets]

        clf = TreeClassifier(mvpa2.testing.clfs.SVM(), {
                'group1':(ds.uniquetargets[:2], mvpa2.testing.clfs.SVM()),
                'group2':(ds.uniquetargets[2:], mvpa2.testing.clfs.SVM())})
        clf.train(ds)
        predictions = clf.predict(ds)
        # predictions on the same ds as training should give same labels
        assert(np.all(np.unique(predictions) == ds.uniquetargets))
示例#2
0
    def test_diff_len_labels_str_treeclassifier(self):
        # check if the classifier can handle a dataset with labels as string of
        # variable length
        # was failing on TreeClassifier due to np.str dtype being assumed from first
        # returned value
        ds = datasets['uni4small'].copy()
        newlabels = dict([(l, l + '_' * li)
                          for li, l in enumerate(ds.uniquetargets)])
        ds.targets = [newlabels[l] for l in ds.targets]

        clf = TreeClassifier(
            mvpa2.testing.clfs.SVM(), {
                'group1': (ds.uniquetargets[:2], mvpa2.testing.clfs.SVM()),
                'group2': (ds.uniquetargets[2:], mvpa2.testing.clfs.SVM())
            })
        clf.train(ds)
        predictions = clf.predict(ds)
        # predictions on the same ds as training should give same labels
        assert (np.all(np.unique(predictions) == ds.uniquetargets))
示例#3
0
    def test_tree_classifier(self):
        """Basic tests for TreeClassifier
        """
        ds = datasets['uni4medium']
        # make it simple of the beast -- take only informative ones
        # because classifiers for the tree are selected randomly, so
        # performance varies a lot and we just need to check on
        # correct operation
        ds = ds[:, ds.fa.nonbogus_targets != [None]]

        clfs = clfswh['binary']  # pool of classifiers
        # Lets permute so each time we try some different combination
        # of the classifiers but exclude those operating on %s of
        # features since we might not have enough for that
        clfs = [
            clfs[i] for i in np.random.permutation(len(clfs))
            if not '%' in str(clfs[i])
        ]

        # NB: It is necessary that the same classifier was not used at
        # different nodes, since it would be re-trained for a new set
        # of targets, thus leading to incorrect behavior/high error.
        #
        # Clone only those few leading ones which we will use
        # throughout the test
        clfs = [clf.clone() for clf in clfs[:4]]

        # Test conflicting definition
        tclf = TreeClassifier(clfs[0], {
            'L0+2': (('L0', 'L2'), clfs[1]),
            'L2+3': (('L2', 'L3'), clfs[2])
        })
        self.assertRaises(ValueError, tclf.train, ds)
        """Should raise exception since label 2 is in both"""

        # Test insufficient definition
        tclf = TreeClassifier(clfs[0], {
            'L0+5': (('L0', 'L5'), clfs[1]),
            'L2+3': (('L2', 'L3'), clfs[2])
        })
        self.assertRaises(ValueError, tclf.train, ds)
        """Should raise exception since no group for L1"""

        # proper definition now
        tclf = TreeClassifier(clfs[0], {
            'L0+1': (('L0', 'L1'), clfs[1]),
            'L2+3': (('L2', 'L3'), clfs[2])
        })

        # Lets test train/test cycle using CVTE
        cv = CrossValidation(tclf,
                             OddEvenPartitioner(),
                             postproc=mean_sample(),
                             enable_ca=['stats', 'training_stats'])
        cverror = cv(ds).samples.squeeze()
        try:
            rtclf = repr(tclf)
        except:
            self.fail(msg="Could not obtain repr for TreeClassifier")

        # Test accessibility of .clfs
        self.assertTrue(tclf.clfs['L0+1'] is clfs[1])
        self.assertTrue(tclf.clfs['L2+3'] is clfs[2])

        cvtrc = cv.ca.training_stats
        cvtc = cv.ca.stats

        if cfg.getboolean('tests', 'labile', default='yes'):
            # just a dummy check to make sure everything is working
            self.assertTrue(cvtrc != cvtc)
            self.assertTrue(cverror < 0.3,
                            msg="Got too high error = %s using %s" %
                            (cverror, tclf))

        # Test trailing nodes with no classifier

        # That is why we use separate pool of classifiers here
        # (that is probably old/not-needed since switched to use clones)
        clfs_mc = clfswh['multiclass']  # pool of classifiers
        clfs_mc = [
            clfs_mc[i] for i in np.random.permutation(len(clfs_mc))
            if not '%' in str(clfs_mc[i])
        ]
        clfs_mc = [clf.clone() for clf in clfs_mc[:4]]  # and clones again

        tclf = TreeClassifier(clfs_mc[0], {
            'L0': (('L0', ), None),
            'L1+2+3': (('L1', 'L2', 'L3'), clfs_mc[1])
        })

        cv = CrossValidation(tclf,
                             OddEvenPartitioner(),
                             postproc=mean_sample(),
                             enable_ca=['stats', 'training_stats'])
        cverror = np.asscalar(cv(ds))
        if cfg.getboolean('tests', 'labile', default='yes'):
            self.assertTrue(cverror < 0.3,
                            msg="Got too high error = %s using %s" %
                            (cverror, tclf))