def test_split(self):
        entire_range = range(0, 561)

        test_beginning = split(entire_range, 0, 5)
        self.assertEquals(test_beginning[0], range(112, 561))
        self.assertEquals(test_beginning[1], range(0, 112))

        test_middle = split(entire_range, 2, 5)
        self.assertEquals(test_middle[0], range(0, 224) + range(336, 561))
        self.assertEquals(test_middle[1], range(224, 336))

        test_end = split(entire_range, 4, 5)
        self.assertEquals(test_end[0], range(0,448))
        self.assertEquals(test_end[1], range(448,561))
def main():
    train_data = diversity_experiment.get_nursery_data()
    shuffle(train_data.examples)
    outcomes = {'unpruned': [],
                'conservative': [],
                'liberal': []}

    for i in range(0, K):
        # Split into training, test, and validation sets
        training_set, the_rest = diversity_experiment.split(train_data.examples, i, K)
        test_set, validation_set = validation_split(the_rest)

        # Learn the original tree
        unpruned = decision_tree_builder.learn_tree(train_data.features,
                                                    train_data.class_vals,
                                                    DIV_FUNCT,
                                                    training_set)

        # Make pruned versions too
        print "Pruning conservatively"
        conservative_tree = TreePruner(copy.deepcopy(unpruned), 'conservative', validation_set).root

        print "Pruning liberally."
        liberal_tree = TreePruner(copy.deepcopy(unpruned), 'liberal', validation_set).root

        trees = {
            'unpruned': unpruned,
            'liberal':  liberal_tree,
            'conservative':  conservative_tree
        }

        # Test ALL the trees!
        for tree in trees:
            outcomes[tree].append(evaluate(trees[tree], test_set))

    pairwise = [
        ('unpruned', 'liberal'),
        ('unpruned', 'conservative'),
        ('liberal', 'conservative')
    ]

    # Get t-statistics
    diversity_experiment.analyze(outcomes, pairwise, [], 'prune.csv', K)
def main():
    train_data = diversity_experiment.get_nursery_data()
    shuffle(train_data.examples)
    outcomes = {'unpruned': [], 'conservative': [], 'liberal': []}

    for i in range(0, K):
        # Split into training, test, and validation sets
        training_set, the_rest = diversity_experiment.split(
            train_data.examples, i, K)
        test_set, validation_set = validation_split(the_rest)

        # Learn the original tree
        unpruned = decision_tree_builder.learn_tree(train_data.features,
                                                    train_data.class_vals,
                                                    DIV_FUNCT, training_set)

        # Make pruned versions too
        print "Pruning conservatively"
        conservative_tree = TreePruner(copy.deepcopy(unpruned), 'conservative',
                                       validation_set).root

        print "Pruning liberally."
        liberal_tree = TreePruner(copy.deepcopy(unpruned), 'liberal',
                                  validation_set).root

        trees = {
            'unpruned': unpruned,
            'liberal': liberal_tree,
            'conservative': conservative_tree
        }

        # Test ALL the trees!
        for tree in trees:
            outcomes[tree].append(evaluate(trees[tree], test_set))

    pairwise = [('unpruned', 'liberal'), ('unpruned', 'conservative'),
                ('liberal', 'conservative')]

    # Get t-statistics
    diversity_experiment.analyze(outcomes, pairwise, [], 'prune.csv', K)