Пример #1
0
def check_ID3():
   attribute_metadata = [{'name': "winner",'is_nominal': True},{'name': "opprundifferential",'is_nominal': False}]
   data_set = [[1, 0.27], [0, 0.42], [0, 0.86], [0, 0.68], [0, 0.04], [1, 0.01], [1, 0.33], [1, 0.42], [1, 0.42], [0, 0.51], [1, 0.4]]
   numerical_splits_count = [5, 5]
   n = ID3(data_set, attribute_metadata, numerical_splits_count, 0)
   fails = 0;
   if n and n.label == 1:
      print "Passed 1"
   else:
      print "Failed 1"
      fails += 1
   attribute_metadata = [{'name': "winner",'is_nominal': True},{'name': "opprundifferential",'is_nominal': False}]
   data_set = [[1, 0.27], [0, 0.42], [0, 0.86], [0, 0.68], [0, 0.04], [1, 0.01], [1, 0.33], [1, 0.42], [1, 0.42], [0, 0.51], [1, 0.4]]
   numerical_splits_count = [1, 1]
   n = ID3(data_set, attribute_metadata, numerical_splits_count, 5)
   if n and [n.classify(x) == x[0] for x in data_set] == [True, False, True, True, False, True, True, True, True, True, True]:
      print "Passed 2"
   else:
      print "Failed 2"
      fails += 1

   attribute_metadata = [{'name': "winner",'is_nominal': True},{'name': "opprundifferential",'is_nominal': False}]
   data_set = [[1, 0.27], [0, 0.42], [0, 0.86], [0, 0.68], [0, 0.04], [1, 0.01], [1, 0.33], [1, 0.42], [1, 0.42], [0, 0.51], [1, 0.4]]
   numerical_splits_count = [5, 5]
   n = ID3(data_set, attribute_metadata, numerical_splits_count, 5)
   if n and [n.classify(x) == x[0] for x in data_set] == [True, False, True, True, True, True, True, True, True, True, True]:
      print "Passed 3"
   else:
      print "Failed 3"
      fails += 1
   if fails > 0:
      print "not all tests passed, please see ID3."
   else:
      print "all tests passed."
def decision_tree_driver(train, validate = False, predict = False, prune = False,
    limit_splits_on_numerical = False, limit_depth = False, print_tree = False,
    print_dnf = False, learning_curve = False):
    
    train_set, attribute_metadata = parse(train, False)
    if limit_splits_on_numerical != False:
        numerical_splits_count = [limit_splits_on_numerical] * len(attribute_metadata)
    else:
        numerical_splits_count = [float("inf")] * len(attribute_metadata)
        
    if limit_depth != False:
        depth = limit_depth
    else:
        depth = float("inf")

    print "###\n#  Training Tree\n###"

    # call the ID3 classification algorithm with the appropriate options
    tree = ID3(train_set, attribute_metadata, numerical_splits_count, depth)
    print '\n'

    # call reduced error pruning using the pruning set
    if prune != False:
        print '###\n#  Pruning\n###'
        pruning_set, _ = parse(prune, False)
        n = Node()
        reduced_error_pruning(tree,train_set,pruning_set, 0, n)
        print ''

    # print tree visually
    if print_tree:
        print '###\n#  Decision Tree\n###'
        cursor = open('./output/tree.txt','w+')
        cursor.write(tree.print_tree())
        cursor.close()
        print 'Decision Tree written to /output/tree'
        print ''

    # print tree in disjunctive normalized form
    if print_dnf:
        print '###\n#  Decision Tree as DNF\n###'
        cursor = open('./output/DNF.txt','w+')
        cursor.write(tree.print_dnf_tree())
        cursor.close()
        print 'Decision Tree written to /output/DNF'
        print ''

    # test tree accuracy on validation set
    if validate != False:
        print '###\n#  Validating\n###'
        validate_set, _ = parse(validate, False)
        accuracy = validation_accuracy(tree,validate_set)
        print "Accuracy on validation set: " + str(accuracy)
        print ''

    # generate predictions on the test set
    if predict != False:
        print '###\n#  Generating Predictions on Test Set\n###'
        create_predictions(tree, predict)
        print ''

    # generate a learning curve using the validation set
    """if learning_curve and validate:
Пример #3
0
def decision_tree_driver(train, validate = False, predict = False, prune = False,
    limit_splits_on_numerical = False, limit_depth = False, print_tree = False,
    print_dnf = False, learning_curve = False):
    
    train_set, attribute_metadata = parse('D:/2016 Spring/349 Machine Learning/Problem Set 2/PS2.code/data/test_btrain.csv', False)
    train_set = handle_missing_value(train_set,attribute_metadata)
    if limit_splits_on_numerical != False:
        numerical_splits_count = [limit_splits_on_numerical] * len(attribute_metadata)
    else:
        numerical_splits_count = [float("inf")] * len(attribute_metadata)
        
    if limit_depth != False:
        depth = limit_depth
    else:
        depth = float("inf")

    print "###\n#  Training Tree\n###"

    # call the ID3 classification algorithm with the appropriate options
    tree = ID3(train_set, attribute_metadata, numerical_splits_count, depth)

    print '\n'

    # call reduced error pruning using the pruning set
#    if prune != False:
    print '###\n#  Pruning\n###'
    pruning_set, _ = parse('D:/2016 Spring/349 Machine Learning/Problem Set 2/PS2.code/data/test_bvalidate.csv', False)
    pruning_set = handle_missing_value(pruning_set,attribute_metadata)
    
    accuracy = validation_accuracy(tree,pruning_set)
    print(tree)
    print "Accuracy on validation set of original tree: " + str(accuracy)
    
    _ , newtree = reduced_error_pruning(tree,train_set,pruning_set)
    print ''

    # print tree visually
#    if print_tree:
#        print '###\n#  Decision Tree\n###'
#        cursor = open('./output/tree.txt','w+')
#        cursor.write(tree.print_tree())
#        cursor.close()
#        print 'Decision Tree written to /output/tree'
#        print ''

    # print tree in disjunctive normalized form
#    if print_dnf:
#        print '###\n#  Decision Tree as DNF\n###'
#        cursor = open('./output/DNF.txt','w+')
#        cursor.write(tree.print_dnf_tree())
#        cursor.close()
#        print 'Decision Tree written to /output/DNF'
#        print ''

    # test tree accuracy on validation set
#    if validate != False:
    print '###\n#  Validating\n###'

    accuracy2 = validation_accuracy(newtree,pruning_set)
    print(newtree)
    print "Accuracy on validation set of new tree: " + str(accuracy2)
    print ''
def decision_tree_driver(train,
                         validate=False,
                         predict=False,
                         new=False,
                         prune=False,
                         limit_splits_on_numerical=False,
                         limit_depth=False,
                         print_tree=False,
                         print_dnf=False,
                         learning_curve=False):

    train_set, attribute_metadata = parse(train, False)
    if limit_splits_on_numerical != False:
        numerical_splits_count = [limit_splits_on_numerical
                                  ] * len(attribute_metadata)
    else:
        numerical_splits_count = [float("inf")] * len(attribute_metadata)

    if limit_depth != False:
        depth = limit_depth
    else:
        depth = float("inf")

    origin_splits_count = copy.deepcopy(numerical_splits_count)

    print "###\n#  Training Tree\n###"

    # call the ID3 classification algorithm with the appropriate options
    tree = ID3(train_set, attribute_metadata, numerical_splits_count, depth)
    print 'finish'

    if validate != False:
        print '###\n#  Validating\n###'
        validate_set, _ = parse(validate, False)
        accuracy = validation_accuracy(
            tree, validate_set, attribute_metadata)  #add attribute_metadata
        print "Accuracy on validation set: " + str(accuracy)
        print ''

    # call reduced error pruning using the pruning set
    if prune != False:
        print '###\n#  Pruning\n###'
        pruning_set, _ = parse(prune, False)
        temptree = copy.deepcopy(tree)
        temp_origintree = temptree
        origintree = tree
        reduced_error_pruning(temptree, temp_origintree, tree, origintree,
                              train_set, pruning_set, attribute_metadata)
        print ''

    # print tree visually
    if print_tree:
        print '###\n#  Decision Tree\n###'
        cursor = open('./output/tree.txt', 'w+')
        cursor.write(tree.print_tree())
        cursor.close()
        print 'Decision Tree written to /output/tree'
        print ''

    # print tree in disjunctive normalized form
    if print_dnf:
        print '###\n#  Decision Tree as DNF\n###'
        cursor = open('./output/DNF.txt', 'w+')
        cursor.write(tree.print_dnf_tree())
        cursor.close()
        print 'Decision Tree written to /output/DNF'
        print ''

    # test tree accuracy on validation set
    if validate != False:
        print '###\n#  Validating\n###'
        validate_set, _ = parse(validate, False)
        accuracy = validation_accuracy(
            tree, validate_set, attribute_metadata)  #add attribute_metadata
        print "Accuracy on validation set: " + str(accuracy)
        print ''

    # generate predictions on the test set
    # if predict != False:
    #     print '###\n#  Generating Predictions on Test Set\n###'
    #     create_predictions(tree, predict, new)  #add new
    #     print ''

    # generate a learning curve using the validation set
    if learning_curve and validate:
        print '###\n#  Generating Learning Curve\n###'
        iterations = 3  # number of times to test each size
        get_graph(train_set, attribute_metadata, validate_set,
                  origin_splits_count, depth, 3, 0,
                  learning_curve['upper_bound'], learning_curve['increment'])
        print ''
Пример #5
0
def decision_tree_driver(train, validate = False, predict = False, prune = False,
    limit_splits_on_numerical = False, limit_depth = False, print_tree = False,
    print_dnf = False, learning_curve = False):
    
    train_set, attribute_metadata = parse(train, False)
    if limit_splits_on_numerical != False:
        numerical_splits_count = [limit_splits_on_numerical] * len(attribute_metadata)
    else:
        numerical_splits_count = [float("inf")] * len(attribute_metadata)
        
    if limit_depth != False:
        depth = limit_depth
    else:
        depth = float("inf")

    print "###\n#  Training Tree\n###"

    # call the ID3 classification algorithm with the appropriate options
    tree = ID3(train_set, attribute_metadata, numerical_splits_count, depth)
    print '\n'
    print "Nodes before pruning: " + str(tree.num_nodes())

    # # call reduced error pruning using the pruning set
    if prune != False:
        print '###\n#  Pruning\n###'
        pruning_set, _ = parse(prune, False)
        reduced_error_pruning(tree,train_set,pruning_set)
        print ''
        print "Nodes after pruning: " + str(tree.num_nodes())

    # print tree visually
    if print_tree:
        print '###\n#  Decision Tree\n###'
        cursor = open('./output/tree.txt','w+')
        cursor.write(tree.print_tree())
        cursor.close()
        print 'Decision Tree written to /output/tree'
        print ''

    # print tree in disjunctive normalized form
    if print_dnf:
        print '###\n#  Decision Tree as DNF\n###'
        cursor = open('./output/DNF.txt','w+')
        print tree.print_dnf_tree()
        # cursor.write(final)
        # cursor.close()
        print 'Decision Tree written to /output/DNF'
        print ''

    # test tree accuracy on validation set
    if validate != False:
        print '###\n#  Validating\n###'
        train_set, _ = parse(train, False)
        accuracy = validation_accuracy(tree, train_set)
        print "Accuracy on training set: " + str(accuracy)
        validate_set, _ = parse(validate, False)
        accuracy = validation_accuracy(tree,validate_set)
        print "Accuracy on validation set: " + str(accuracy)
        print ''

    # generate predictions on the test set
    if predict != False:
        print '###\n#  Generating Predictions on Test Set\n###'
        create_predictions(tree, predict)
        print ''

    # generate a learning curve using the validation set
    if learning_curve and validate:
        print '###\n#  Generating Learning Curve\n###'
        iterations = 2 # number of times to test each size
        print get_graph_data(train_set, attribute_metadata, validate_set, numerical_splits_count, iterations, [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0])
        # get_graph(train_set, attribute_metadata, validate_set, 
        #     numerical_splits_count, depth, 5, 0, learning_curve['upper_bound'],
        #     learning_curve['increment'])
        print ''
Пример #6
0
from modules.predictions import *
from modules.pickled import *
from modules.parse import *
from modules.node import *
import matplotlib.pyplot as plt

# Import training and validation datasets
TRAINING_SET_PATH = 'data/btrain.csv'
VALIDATION_SET_PATH = 'data/bvalidate.csv'
TEST_SET_PATH = 'data/btest.csv'
data, attr = parse(TRAINING_SET_PATH, True)
validate_data, validate_attr = parse(VALIDATION_SET_PATH, True)

# Train initial tree and print
# Current best values for accuracy are below
tree = ID3(data, attr, 14*[3], 5) 
print 'Question 2'
print 'DNF form of initial tree trained on ' + TRAINING_SET_PATH + ':'
tree.print_dnf_tree()
print '\r\n'

# Prune initial tree and print new tree
pruned_tree = reduced_error_pruning(tree,data,validate_data)
print 'Question 5'
print 'DNF form of reduced-error pruned tree:'
pruned_tree.print_dnf_tree()
print '\r\n'

# Calculate validation accuracy of initial tree and print
print 'Question 7'
print 'Initial tree validation accuracy:'
Пример #7
0
def decision_tree_driver(train, validate=False, predict=False, prune=False,
                         limit_splits_on_numerical=False, limit_depth=False, print_tree=False,
                         print_dnf=False, learning_curve=False):
    train_set, attribute_metadata = parse(train, False)
    if limit_splits_on_numerical != False:
        numerical_splits_count = [limit_splits_on_numerical] * len(attribute_metadata)
    else:
        numerical_splits_count = [float("inf")] * len(attribute_metadata)

    if limit_depth != False:
        depth = limit_depth
    else:
        depth = float("inf")

    print "###\n#  Training Tree\n###"

    # call the ID3 classification algorithm with the appropriate options
    tree = ID3(train_set, attribute_metadata, numerical_splits_count, depth)
    print '\n'

    # print tree visually
    if print_tree:
        print '###\n#  Decision Tree\n###'
        cursor = open('./output/tree.txt', 'w+')
        cursor.write(tree.print_tree())
        cursor.close()
        print 'Decision Tree written to /output/tree'
        print ''

    # print tree in disjunctive normalized form
    if print_dnf:
        print '###\n#  Decision Tree as DNF\n###'
        cursor = open('./output/DNF.txt', 'w+')
        cursor.write(tree.print_dnf_tree())
        cursor.close()
        print 'Decision Tree written to /output/DNF'
        print ''

    # test tree accuracy on validation set
    if validate != False:
        print '###\n#  Validating\n###'
        validate_set, _ = parse(validate, False)
        accuracy = validation_accuracy(tree, validate_set)
        print "Accuracy on training set: " + str(validation_accuracy(tree, train_set))
        print "Accuracy on validation set: " + str(accuracy)
        print ''

    # call reduced error pruning using the pruning set
    if prune != False:
        print '###\n#  Pruning\n###'
        pruning_set, _ = parse(prune, False)
        reduced_error_pruning(tree, pruning_set)
        print ''
        # print tree visually
        if print_tree:
            print '###\n#  Decision Tree\n###'
            cursor = open('./output/prune_tree.txt', 'w+')
            cursor.write(tree.print_tree())
            cursor.close()
            print 'Decision Tree written to /output/prune_tree'
            print ''

        # print tree in disjunctive normalized form
        if print_dnf:
            print '###\n#  Decision Tree as DNF\n###'
            cursor = open('./output/prune_DNF.txt', 'w+')
            cursor.write(tree.print_dnf_tree())
            cursor.close()
            print 'Decision Tree written to /output/prune_DNF'
            print ''

        # test tree accuracy on validation set
        if validate != False:
            print '###\n#  Validating\n###'
            validate_set, _ = parse(validate, False)
            accuracy = validation_accuracy(tree, validate_set)
            print "Accuracy on training set: " + str(validation_accuracy(tree, train_set))
            print "Accuracy on validation set: " + str(accuracy)
            print ''

        # generate predictions on the test set
        if predict != False:
            print '###\n#  Generating Predictions on Test Set\n###'
            with open('./output/predictions.csv', 'w+') as cursor:
                writer = csv.writer(cursor)
                fieldnames = ['winner', ' winpercent', ' oppwinpercent', ' weather', ' temperature', ' numinjured', ' oppnuminjured',
                              ' startingpitcher', ' oppstartingpitcher', ' dayssincegame', ' oppdayssincegame', ' homeaway',
                              ' rundifferential', ' opprundifferential']
                writer.writerow(fieldnames)
                writer.writerows(create_predictions(tree, predict))
            print ''

    # generate a learning curve using the validation set
    if learning_curve and validate:
        print '###\n#  Generating Learning Curve\n###'
        validate_set, _ = parse(validate, False)
        iterations = 5  # number of times to test each size
        get_graph(train_set, attribute_metadata, validate_set,
                  numerical_splits_count, depth, iterations, 0, learning_curve['upper_bound'],
                  learning_curve['increment'])
        print ''