def pruning_decision_tree_test():
    # load data
    X_train, y_train, X_test, y_test = data.sample_decision_tree_pruning()

    # build the tree
    dTree = decision_tree.DecisionTree()
    dTree.train(X_train, y_train)

    # print
    print('Your decision tree:')
    Utils.print_tree(dTree)
    print('My decision tree:')
    print(
        'branch 0{\n\tdeep: 0\n\tnum of samples for each class: 5 : 9 \n\tsplit by dim 0\n\tbranch 0->0{\n\t\tdeep: 1'
        '\n\t\tnum of samples for each class: 3 : 2 \n\t\tsplit by dim 1\n\t\tbranch 0->0->0{\n\t\t\tdeep: 2\n\t\t\t'
        'num of samples for each class: 3 \n\t\t\tclass:0\n\t\t}\n\t\tbranch 0->0->1{\n\t\t\tdeep: 2\n\t\t\tnum of '
        'samples for each class: 2 \n\t\t\tclass:1\n\t\t}\n\t}\n\tbranch 0->1{\n\t\tdeep: 1\n\t\tnum of samples for '
        'each class: 4 \n\t\tclass:1\n\t}\n\tbranch 0->2{\n\t\tdeep: 1\n\t\tnum of samples for each class: 2 : 3 '
        '\n\t\tsplit by dim 2\n\t\tbranch 0->2->0{\n\t\t\tdeep: 2\n\t\t\tnum of samples for each class: 3 \n\t\t\t'
        'class:1\n\t\t}\n\t\tbranch 0->2->1{\n\t\t\tdeep: 2\n\t\t\tnum of samples for each class: 2 \n\t\t\tclass:0'
        '\n\t\t}\n\t}\n}')

    Utils.reduced_error_prunning(dTree, X_test, y_test)

    print('Your decision tree after pruning:')
    Utils.print_tree(dTree)
    print('My decision tree after pruning:')
    print(
        'branch 0{\n\tdeep: 0\n\tnum of samples for each class: 5 : 9 \n\tsplit by dim 0\n\tbranch 0->0{\n\t\tdeep: '
        '1\n\t\tnum of samples for each class: 3 : 2 \n\t\tsplit by dim 1\n\t\tbranch 0->0->0{\n\t\t\tdeep: 2\n\t\t\t'
        'num of samples for each class: 3 \n\t\t\tclass:0\n\t\t}\n\t\tbranch 0->0->1{\n\t\t\tdeep: 2\n\t\t\tnum of '
        'samples for each class: 2 \n\t\t\tclass:1\n\t\t}\n\t}\n\tbranch 0->1{\n\t\tdeep: 1\n\t\tnum of samples for '
        'each class: 4 \n\t\tclass:1\n\t}\n\tbranch 0->2{\n\t\tdeep: 1\n\t\tnum of samples for each class: 2 : 3 '
        '\n\t\tclass:1\n\t}\n}')
def decision_tree_test():
    features, labels = data.sample_decision_tree_data()

    # build the tree
    dTree = decision_tree.DecisionTree()

    dTree.train(features, labels)

    # print
    print('Your decision tree: ')
    Utils.print_tree(dTree)
    print('My decision tree: ')
    print(
        'branch 0{\n\tdeep: 0\n\tnum of samples for each class: 2 : 2 \n\tsplit by dim 0\n\tbranch 0->0{\n\t\tdeep: '
        '1\n\t\tnum of samples for each class: 1 \n\t\tclass:0\n\t}\n\tbranch 0->1{\n\t\tdeep: 1\n\t\tnum of '
        'samples for each class: 1 : 1 \n\t\tsplit by dim 0\n\t\tbranch 0->1->0{\n\t\t\tdeep: 2\n\t\t\tnum of '
        'samples for each class: 1 \n\t\t\tclass:0\n\t\t}\n\t\tbranch 0->1->1{\n\t\t\tdeep: 2\n\t\t\tnum of '
        'samples for each class: 1 \n\t\t\tclass:1\n\t\t}\n\t}\n\tbranch 0->2{\n\t\tdeep: 1\n\t\tnum of '
        'samples for each class: 1 \n\t\tclass:1\n\t}\n}')

    # data
    X_test, y_test = data.sample_decision_tree_test()

    # testing
    y_est_test = dTree.predict(X_test)
    print('Your estimate test: ', y_est_test)
    print('My estimate test: ', [0, 0, 1])
示例#3
0
def test_big_tree():
    # load data
    X_train, y_train, X_test, y_test = data.load_decision_tree_data()

    # set classifier
    dTree = decision_tree.DecisionTree()

    # training
    dTree.train(X_train.tolist(), y_train.tolist())

    # print
    # Utils.print_tree(dTree)

    # testing
    y_est_test = dTree.predict(X_test)
    test_accu = accuracy_score(y_est_test, y_test)
    print('test_accu', test_accu)

    Utils.reduced_error_prunning(dTree, X_test, y_test)

    y_est_test = dTree.predict(X_test)
    test_accu = accuracy_score(y_est_test, y_test)
    print('test_accu', test_accu)

    # print
    Utils.print_tree(dTree)
示例#4
0
def test_tree():
    features, labels = data.sample_decision_tree_data()
    # build the tree
    dTree = decision_tree.DecisionTree()
    dTree.train(features, labels)
    # print
    Utils.print_tree(dTree)

    # data
    X_test, y_test = data.sample_decision_tree_test()
    # testing
    y_est_test = dTree.predict(X_test)
    test_accu = accuracy_score(y_est_test, y_test)
    print('test_accu', test_accu)

    Utils.reduced_error_prunning(dTree, X_test, y_test)

    y_est_test = dTree.predict(X_test)
    test_accu = accuracy_score(y_est_test, y_test)
    print('test_accu', test_accu)
示例#5
0
scaling_classes = {
    'min_max_scale': MinMaxScaler,
    'normalize': NormalizationScaler,
}

#best_model, best_k, best_function, best_scaler = model_selection_with_transformation(distance_funcs, scaling_classes, Xtrain, ytrain, Xval, yval)

import data
import hw1_dt as decision_tree
import utils as Utils
from sklearn.metrics import accuracy_score

features, labels = data.sample_decision_tree_data()

# build the tree
dTree = decision_tree.DecisionTree()
dTree.train(features, labels)

# print
Utils.print_tree(dTree)

# data
X_test, y_test = data.sample_decision_tree_test()

# testing
y_est_test = dTree.predict(X_test)

test_accu = accuracy_score(y_est_test, y_test)
print('test_accu', test_accu)
"""
示例#6
0
def reduced_error_prunning(decisionTree, X_test, y_test):

    if not decisionTree.root_node.splittable:
        return
    
    """
    predict = node.predict()
    """
    
    if decisionTree.root_node.splittable:
        labels =y_test
        to_split = decisionTree.root_node.feature_uniq_split
        cut = decisionTree.root_node.dim_split
        dr=[]
        dl=[]

        for m in to_split:
            res = []
            l = []
            for i in range(len(X_test)):
                if m == X_test[i][cut]:
                    l.append(labels[i])
                    a = list(X_test[i])
                    a.remove(m)
                    res.append(a)
            dl.append(l)
            dr.append(res)
        for i in range(len(decisionTree.root_node.children)):
            a=hw.DecisionTree()
            a.root_node = decisionTree.root_node.children[i]
            if i<=(len(dr)-1) and i<=(len(dl)-1):
                reduced_error_prunning(a,dr[i],dl[i])
                error1=0
                error2=0
                if a.predict(dr[i])and dl[i]:
                    for x in range(len(a.predict(dr[i]))):
                        if a.predict(dr[i])[x]==dl[i][x]:
                            error1+=1
                    for y in dl[i]:
                        if y==a.root_node.cls_max:
                            error2+=1
                    if error1<=error2:
                        a.root_node.splittable=False
                        a.root_node.children=[]
                        a.root_node.feature_uniq_split=None
                        a.root_node.dim_split=None
                    
              
                else:
                    a.root_node.splittable=False
                    a.root_node.children=[]
                    a.root_node.feature_uniq_split=None
                    a.root_node.dim_split=None
            else:
                a.root_node.splittable=False
                a.root_node.children=[]
                a.root_node.feature_uniq_split=None
                a.root_node.dim_split=None
                 
    else:
        decisionTree.splittable=False
        decisionTree.children=[]
        decisionTree.feature_uniq_split=None
        decisionTree.dim_split=None
        return