def test_learner_on(self, dataset):
        """ Default test case for Orange learners.
        """
        if isinstance(dataset.domain.class_var, Orange.feature.Discrete):
            indices = _MakeRandomIndices2(p0=0.3, stratified=True)(dataset)
        else:
            indices = _MakeRandomIndices2(p0=0.3)(dataset)
        learn = dataset.select(indices, 1)
        test = dataset.select(indices, 0)

        classifier = self.learner(learn)

        # Test for classVar
        self.assertTrue(hasattr(classifier, "class_var"))
        self.assertIs(classifier.class_var, dataset.domain.class_var)

        res = _testing.test_on_data([classifier], test)

        for ex in test:
            self.assertIsInstance(classifier(ex, Orange.core.GetValue),
                                  Orange.core.Value)
            self.assertIsInstance(classifier(ex, Orange.core.GetProbabilities),
                                  Orange.core.Distribution)

            value, dist = classifier(ex, Orange.core.GetBoth)

            self.assertIsInstance(value, Orange.core.Value)
            self.assertIsInstance(dist, Orange.core.Distribution)

            self.assertIs(dist.variable, classifier.class_var)

            if isinstance(dist, Orange.core.ContDistribution):
                dist_sum = sum(dist.values())
            else:
                dist_sum = sum(dist)

            self.assertGreater(dist_sum, 0.0)
            self.assertLess(abs(dist_sum - 1.0), 1e-3)

            # just for fun also test this
#            self.assertLess(abs(dist_sum - dist.abs), 1e-3)
            # not fun because it fails

        # Store classifier for possible use in subclasses
        self.classifier = classifier
Пример #2
0
    def test_learner_on(self, dataset):
        """ Default test case for Orange learners.
        """
        if isinstance(dataset.domain.class_var, Orange.feature.Discrete):
            indices = _MakeRandomIndices2(p0=0.3, stratified=True)(dataset)
        else:
            indices = _MakeRandomIndices2(p0=0.3)(dataset)
        learn = dataset.select(indices, 1)
        test = dataset.select(indices, 0)

        classifier = self.learner(learn)

        # Test for classVar
        self.assertTrue(hasattr(classifier, "class_var"))
        self.assertIs(classifier.class_var, dataset.domain.class_var)

        res = _testing.test_on_data([classifier], test)

        for ex in test:
            self.assertIsInstance(classifier(ex, Orange.core.GetValue),
                                  Orange.core.Value)
            self.assertIsInstance(classifier(ex, Orange.core.GetProbabilities),
                                  Orange.core.Distribution)

            value, dist = classifier(ex, Orange.core.GetBoth)

            self.assertIsInstance(value, Orange.core.Value)
            self.assertIsInstance(dist, Orange.core.Distribution)

            self.assertIs(dist.variable, classifier.class_var)

            if isinstance(dist, Orange.core.ContDistribution):
                dist_sum = sum(dist.values())
            else:
                dist_sum = sum(dist)

            self.assertGreater(dist_sum, 0.0)
            self.assertLess(abs(dist_sum - 1.0), 1e-3)

            # just for fun also test this
#            self.assertLess(abs(dist_sum - dist.abs), 1e-3)
            # not fun because it fails

        # Store classifier for possible use in subclasses
        self.classifier = classifier
    def multi_target_test(self, learner, data):
        indices = Orange.data.sample.SubsetIndices2(p0=0.3)(data)
        learn = data.select(indices, 1)
        test = data.select(indices, 0)

        predictor = learner(learn)
        self.assertIsInstance(predictor, Orange.classification.Classifier)
        self.multi_target_predictor_interface(predictor, learn.domain)

        from Orange.evaluation import testing as _testing

        r = _testing.test_on_data([predictor], test)

        def all_values(vals):
            for v in vals:
                self.assertIsInstance(v, Orange.core.Value)

        def all_dists(dist):
            for d in dist:
                self.assertIsInstance(d, Orange.core.Distribution)

        for ex in test:
            preds = predictor(ex, Orange.core.GetValue)
            all_values(preds)

            dist = predictor(ex, Orange.core.GetProbabilities)
            all_dists(dist)

            preds, dist = predictor(ex, Orange.core.GetBoth)
            all_values(preds)
            all_dists(dist)

            for d in dist:
                if isinstance(d, Orange.core.ContDistribution):
                    dist_sum = sum(d.values())
                else:
                    dist_sum = sum(d)

                self.assertGreater(dist_sum, 0.0)
                self.assertLess(abs(dist_sum - 1.0), 1e-3)

        return predictor
Пример #4
0
 def multi_target_test(self, learner, data):
     indices = Orange.data.sample.SubsetIndices2(p0=0.3)(data)
     learn = data.select(indices, 1)
     test = data.select(indices, 0)
     
     predictor = learner(learn)
     self.assertIsInstance(predictor, Orange.classification.Classifier)
     self.multi_target_predictor_interface(predictor, learn.domain)
     
     from Orange.evaluation import testing as _testing
     
     r = _testing.test_on_data([predictor], test)
     
     def all_values(vals):
         for v in vals:
             self.assertIsInstance(v, Orange.core.Value)
             
     def all_dists(dist):
         for d in dist:
             self.assertIsInstance(d, Orange.core.Distribution)
             
     for ex in test:
         preds = predictor(ex, Orange.core.GetValue)
         all_values(preds)
         
         dist = predictor(ex, Orange.core.GetProbabilities)
         all_dists(dist)
         
         preds, dist = predictor(ex, Orange.core.GetBoth)
         all_values(preds)
         all_dists(dist)
         
         for d in dist:
             if isinstance(d, Orange.core.ContDistribution):
                 dist_sum = sum(d.values())
             else:
                 dist_sum = sum(d)
                 
             self.assertGreater(dist_sum, 0.0)
             self.assertLess(abs(dist_sum - 1.0), 1e-3)
         
     return predictor
Пример #5
0
def benchmark_data_subsets_hellinger(data, rand, conv):
    # Levels: 1. Learn subset distance (2. Samples, 3. Learner)
    
    levels = 1
    results = {}
    dlen = len(data)
    
    level = 5
    l_domain = len(data.domain)

    class_vals = tuple(float(i) for i in xrange(len(data.domain.class_var.values)))

    if level > l_domain:
        level = l_domain

    #n_combinations = sum(factorial(l_domain)/factorial(l)/factorial(l_domain-l)
    #                     for l in xrange(1, level+1))*len(class_vals)
    n_combinations = sum(factorial(l_domain)/factorial(l)/factorial(l_domain-l)
                         for l in xrange(1, level+1))

    ddata = Orange.data.discretization.DiscretizeTable(data,
                method=Orange.feature.discretization.EqualWidth(n=len(data)/10))
    ddata = np.array([tuple(float(d[i]) for i in xrange(len(ddata.domain))) for d in ddata])
    #data = np.array([tuple(float(d[i]) for i in xrange(len(data.domain))) for d in data])
    ddata_distr = JointDistributions(ddata)


    dd_sq_vals = combined_distribution(ddata_distr, level, ddata)
    dd_sq_vals /= len(ddata)
    dd_sq_vals = np.sqrt(dd_sq_vals)

    mean_dist = None

    def get_current_CA(classifier):
        if mean_dist:
            return results[mean_dist][classifier.name]['CA'][0]
        else:
            return 1.0

    for sp in HDISTANCES:
        sn = conv.subset_size(sp)
        sample_results = {}
        dists = []
        for i in xrange(SAMPLE_SIZE):
            ind = random.sample(xrange(len(data)), int(sn))
            sn_data = data.get_items(ind)
            sn_ddata = ddata[ind]
            
            # Calculating Hellinger distance.
            sn_ddata_distr = JointDistributions(sn_ddata)
            sd_vals = combined_distribution(sn_ddata_distr, level, ddata)
            sd_vals /= sn
            r = np.sqrt(sd_vals) - dd_sq_vals
            dist = np.sqrt(np.sum(np.multiply(r,r))/2/n_combinations)
            dists.append(dist)

            #print fano_min_error(sn_ddata_distr, len(ddata.domain.class_var.values))
            
            classifiers = [l(sn_data) for l in LEARNERS]
            for j in xrange(len(LEARNERS)):
                classifiers[j].name = LEARNERS[j].name

            majority_vote = MajorityVoteClassifier(list(classifiers), name="vote")
            wcs = WeightedConfidenceSharingClassifier(list(classifiers), name="wcs")
            current_best = BestDecidesClassifier(list(classifiers), get_current_CA,
                                                 name="current_best")
            classifiers.append(majority_vote)
            classifiers.append(wcs)
            classifiers.append(current_best)

            CAs = Orange.evaluation.scoring.CA(test_on_data(classifiers, data))
            sample_results[i] = {}
            for idx, classifier in enumerate(LEARNERS_NAMES):
                sample_results[i][classifier] = {}
                sample_results[i][classifier]['CA'] = CAs[idx]
        
        mean_results = evaluate.dict_recur_mean_err(sample_results.values())
        mean_dist = float(sum(dists))/SAMPLE_SIZE
        results[mean_dist] = mean_results

    return (levels, results)