import DT as decision_tree # 划分训练集和测试集 index = DataSet.shape[0] - 1 index_train = np.arange(index) rand_train = np.random.choice(index_train, size=random.randint(int((index + 1) / 2), index), replace=False) DataSet_train = DataSet.iloc[rand_train] DataSet_test = DataSet.drop(rand_train) # generate a full tree root = decision_tree.TreeGenerate(DataSet_train) decision_tree.DrawPNG( root, "Decision Tree/Decision Tree Based on Gini Index/Decision Tree Based on Gini Index.png", ) print("accuracy of full tree: %.3f" % decision_tree.PredictAccuracy(root, DataSet_test)) # pre-purning 预剪枝 root = decision_tree.PrePurn(DataSet_train, DataSet_test) decision_tree.DrawPNG( root, "Decision Tree/Decision Tree Based on Gini Index/decision_tree_pre.png") print("accuracy of pre-purning tree: %.3f" % decision_tree.PredictAccuracy(root, DataSet_test)) # # post-puring 后剪枝 root = decision_tree.TreeGenerate(DataSet_train) decision_tree.PostPurn(root, DataSet_test)
DateSet_train = DateSet.drop(test) DateSet_test = DateSet.iloc[test] root = decision_tree.TreeGenerate(DateSet_train) # generate the tree # test the accuracy pred_true = 0 # 遍历测试集 for i in DateSet_test.index: label = decision_tree.Predict(root, DateSet[DateSet.index == i]) if label == DateSet_test[DateSet_test.columns[-1]][i]: pred_true += 1 accuracy = pred_true / len(DateSet_test.index) accuracy_scores.append(accuracy) # print the prediction accuracy result accuracy_sum = 0 print("accuracy: ", end="") for i in range(k): print("%.3f " % accuracy_scores[i], end="") accuracy_sum += accuracy_scores[i] print("\naverage accuracy: %.3f" % (accuracy_sum / k)) # dicision tree visualization using pydotplus.graphviz root = decision_tree.TreeGenerate(DateSet) decision_tree.DrawPNG( root, "Decision Tree/Decision Trees Based on Information Entropy/Decision Trees Based on Information Entropy.png", )