def learn_depths(): # training decision tree for different heights train_acc = np.zeros(n_attr) test_acc = np.zeros(n_attr) for depth in range(n_attr): dtree = DecisionTree(x_train, y_train, max_depth=depth) dtree.fit() train_acc[depth] = dtree.accuracy(x_train, y_train) test_acc[depth] = dtree.accuracy(x_test, y_test) df = pd.DataFrame({ 'depth': range(1, n_attr + 1), 'Train accuracy': train_acc, 'Test accuracy': test_acc }) # df.to_csv('res/acc.csv') return df
def k_fold_cross_validation(x, y, k, shf=False): if shf: to_shf = np.column_stack((x, y)) to_shf = list(to_shf) shuffle(to_shf) to_shf = np.array(to_shf) x = np.delete(to_shf, -1, axis=1) y = to_shf[:, -1] train_acc = np.zeros((k, n_attr)) val_acc = np.zeros((k, n_attr)) for d in range(k): print(d, "th fold...") x_train = np.array([row for i, row in enumerate(x) if i % k != d]) x_val = np.array([row for i, row in enumerate(x) if i % k == d]) y_train = np.array([val for i, val in enumerate(y) if i % k != d]) y_val = np.array([val for i, val in enumerate(y) if i % k == d]) for depth in range(n_attr): dtree = DecisionTree(x_train, y_train, max_depth=depth) dtree.fit() # train_acc[d, depth] = dtree.accuracy(x_train, y_train) val_acc[d, depth] = dtree.accuracy(x_val, y_val) return val_acc
# %% import numpy as np from preprocess import get_train_data, get_test_data from dtree import DecisionTree x_train, y_train = get_train_data() x_test, y_test = get_test_data() decision_tree = DecisionTree(x_train, y_train, max_depth=1) decision_tree.fit() decision_tree.traverse() y_hat = decision_tree.predict(x_test) print("accuracy: ", decision_tree.accuracy(x_test, y_test)) # %% def get_stats(): TP = np.sum(np.logical_and(y_test == 1, y_hat == 1)) FP = np.sum(np.logical_and(y_test == 0, y_hat == 1)) TN = np.sum(np.logical_and(y_test == 0, y_hat == 0)) FN = np.sum(np.logical_and(y_test == 1, y_hat == 0)) return TP, FP, TN, FN def specificity(): TP, FP, TN, FN = get_stats() return TN / (TN + FP) def sensitivity(): TP, FP, TN, FN = get_stats() return TP / (TP + FN)