def test_split(self): entire_range = range(0, 561) test_beginning = split(entire_range, 0, 5) self.assertEquals(test_beginning[0], range(112, 561)) self.assertEquals(test_beginning[1], range(0, 112)) test_middle = split(entire_range, 2, 5) self.assertEquals(test_middle[0], range(0, 224) + range(336, 561)) self.assertEquals(test_middle[1], range(224, 336)) test_end = split(entire_range, 4, 5) self.assertEquals(test_end[0], range(0,448)) self.assertEquals(test_end[1], range(448,561))
def main(): train_data = diversity_experiment.get_nursery_data() shuffle(train_data.examples) outcomes = {'unpruned': [], 'conservative': [], 'liberal': []} for i in range(0, K): # Split into training, test, and validation sets training_set, the_rest = diversity_experiment.split(train_data.examples, i, K) test_set, validation_set = validation_split(the_rest) # Learn the original tree unpruned = decision_tree_builder.learn_tree(train_data.features, train_data.class_vals, DIV_FUNCT, training_set) # Make pruned versions too print "Pruning conservatively" conservative_tree = TreePruner(copy.deepcopy(unpruned), 'conservative', validation_set).root print "Pruning liberally." liberal_tree = TreePruner(copy.deepcopy(unpruned), 'liberal', validation_set).root trees = { 'unpruned': unpruned, 'liberal': liberal_tree, 'conservative': conservative_tree } # Test ALL the trees! for tree in trees: outcomes[tree].append(evaluate(trees[tree], test_set)) pairwise = [ ('unpruned', 'liberal'), ('unpruned', 'conservative'), ('liberal', 'conservative') ] # Get t-statistics diversity_experiment.analyze(outcomes, pairwise, [], 'prune.csv', K)
def main(): train_data = diversity_experiment.get_nursery_data() shuffle(train_data.examples) outcomes = {'unpruned': [], 'conservative': [], 'liberal': []} for i in range(0, K): # Split into training, test, and validation sets training_set, the_rest = diversity_experiment.split( train_data.examples, i, K) test_set, validation_set = validation_split(the_rest) # Learn the original tree unpruned = decision_tree_builder.learn_tree(train_data.features, train_data.class_vals, DIV_FUNCT, training_set) # Make pruned versions too print "Pruning conservatively" conservative_tree = TreePruner(copy.deepcopy(unpruned), 'conservative', validation_set).root print "Pruning liberally." liberal_tree = TreePruner(copy.deepcopy(unpruned), 'liberal', validation_set).root trees = { 'unpruned': unpruned, 'liberal': liberal_tree, 'conservative': conservative_tree } # Test ALL the trees! for tree in trees: outcomes[tree].append(evaluate(trees[tree], test_set)) pairwise = [('unpruned', 'liberal'), ('unpruned', 'conservative'), ('liberal', 'conservative')] # Get t-statistics diversity_experiment.analyze(outcomes, pairwise, [], 'prune.csv', K)