示例#1
0
import DT as decision_tree

# 划分训练集和测试集
index = DataSet.shape[0] - 1
index_train = np.arange(index)
rand_train = np.random.choice(index_train,
                              size=random.randint(int((index + 1) / 2), index),
                              replace=False)

DataSet_train = DataSet.iloc[rand_train]
DataSet_test = DataSet.drop(rand_train)

# generate a full tree
root = decision_tree.TreeGenerate(DataSet_train)
decision_tree.DrawPNG(
    root,
    "Decision Tree/Decision Tree Based on Gini Index/Decision Tree Based on Gini Index.png",
)
print("accuracy of full tree: %.3f" %
      decision_tree.PredictAccuracy(root, DataSet_test))

# pre-purning 预剪枝
root = decision_tree.PrePurn(DataSet_train, DataSet_test)
decision_tree.DrawPNG(
    root,
    "Decision Tree/Decision Tree Based on Gini Index/decision_tree_pre.png")
print("accuracy of pre-purning tree: %.3f" %
      decision_tree.PredictAccuracy(root, DataSet_test))

# # post-puring 后剪枝
root = decision_tree.TreeGenerate(DataSet_train)
decision_tree.PostPurn(root, DataSet_test)
示例#2
0
    DateSet_train = DateSet.drop(test)
    DateSet_test = DateSet.iloc[test]
    root = decision_tree.TreeGenerate(DateSet_train)  # generate the tree

    # test the accuracy
    pred_true = 0
    # 遍历测试集
    for i in DateSet_test.index:
        label = decision_tree.Predict(root, DateSet[DateSet.index == i])
        if label == DateSet_test[DateSet_test.columns[-1]][i]:
            pred_true += 1

    accuracy = pred_true / len(DateSet_test.index)
    accuracy_scores.append(accuracy)

# print the prediction accuracy result
accuracy_sum = 0
print("accuracy: ", end="")
for i in range(k):
    print("%.3f  " % accuracy_scores[i], end="")
    accuracy_sum += accuracy_scores[i]
print("\naverage accuracy: %.3f" % (accuracy_sum / k))

# dicision tree visualization using pydotplus.graphviz
root = decision_tree.TreeGenerate(DateSet)

decision_tree.DrawPNG(
    root,
    "Decision Tree/Decision Trees Based on Information Entropy/Decision Trees Based on Information Entropy.png",
)