#%% DECISION TREE from sklearn.tree import DecisionTreeClassifier model = DecisionTreeClassifier(criterion='gini') compute_score(model, X_selected_train, y_train) compute_score_accuracy(model, X_selected_train, y_train) compute_score_f1(model, X_selected_train, y_train) model.fit(X_selected_train, y_train) y_pred = model.predict(X_selected_test) cm = confusion_matrix(y_test, y_pred) #Tree pruning - using minimal cost complexity pruning, so we recursively find the weakest link #(effective alpha), the nodes with the smallest effective alpha are pruned first. #the function returns effective alphas and corresponding leaf impurities at each step #As alpha ---> more of the tree is pruned, which increases the impurity of the leaves. path = model.cost_complexity_pruning_path(X_selected_train, y_train) ccp_alphas, impurities = path.ccp_alphas, path.impurities ccp_alphas, impurities = path.ccp_alphas, path.impurities plt.figure(figsize=(10, 6)) plt.plot(ccp_alphas, impurities, linewidth=1, color='black') plt.xlabel("effective alpha") plt.ylabel("total impurity of leaves") clfs = [] for ccp_alpha in ccp_alphas: clf = DecisionTreeClassifier(random_state=0, ccp_alpha=ccp_alpha) clf.fit(X_selected_train, y_train) clfs.append(clf) tree_depths = [clf.tree_.max_depth for clf in clfs]