# -*- coding: utf-8 -* import pandas as pd import decision_tree # data data_file_encode = "gb18030" with open("../data/watermelon_3.csv", mode='r', encoding=data_file_encode) as data_file: df = pd.read_csv(data_file) ''' implementation of ID3 ''' root = decision_tree.TreeGenerate(df) accuracy_scores = [] # k-folds cross prediction n = len(df.index) k = 5 for i in range(k): m = int(n / k) test = [] for j in range(i * m, i * m + m): test.append(j) df_train = df.drop(test) df_test = df.iloc[test] root = decision_tree.TreeGenerate(df_train) # accuracy pred_true = 0
with open("../data/watermelon_2.csv", mode='r', encoding=data_file_encode) as data_file: df = pd.read_csv(data_file) ''' implementation of CART rely on decision_tree.py ''' import decision_tree # dicision tree visualization using pydotplus.graphviz index_train = [0, 1, 2, 5, 6, 9, 13, 14, 15, 16] df_train = df.iloc[index_train] df_test = df.drop(index_train) # generate a full tree root = decision_tree.TreeGenerate(df_train) decision_tree.DrawPNG(root, "decision_tree_full.png") print("accuracy of full tree: %.3f" % decision_tree.PredictAccuracy(root, df_test)) # pre-purning root = decision_tree.PrePurn(df_train, df_test) decision_tree.DrawPNG(root, "decision_tree_pre.png") print("accuracy of pre-purning tree: %.3f" % decision_tree.PredictAccuracy(root, df_test)) # # post-puring root = decision_tree.TreeGenerate(df_train) decision_tree.PostPurn(root, df_test) decision_tree.DrawPNG(root, "decision_tree_post.png") print("accuracy of post-purning tree: %.3f" %