n_estimators=1000) #96.33 accuracy with ktree =RandomForestClassifier(criterion='gini', max_features='log2',random_state=10, max_depth=15,n_estimators=1000) y = np.ravel(Y_train) ktree.fit(X_train, y) kaggle_prediction = ktree.predict(X_kaggle) #forming a dataframe with columns id and class to save the predictions into a csv file frame = pd.DataFrame(kaggle_prediction, columns=['class']) frame.index = frame.index + 1 frame.to_csv( r'C:\Users\Vinni\Desktop\Sem 3 Spring 2021\CS 529 ML\Prog1 Decision Trees\Gene\gene_1\my_gene_kaggle.csv', index_label='id') #PLOTS #1) ALPHA VALUE AND ALPHA VS ACCURACY PLOT clf = DecisionTreeClassifier(random_state=20) path = clf.cost_complexity_pruning_path(X_train, Y_train) ccp_alphas, impurities = path.ccp_alphas, path.impurities clfs = [] ccp_alphas = ccp_alphas[:-1] for ccp_alpha in ccp_alphas: clf = DecisionTreeClassifier(random_state=20, ccp_alpha=ccp_alpha) clf.fit(X_train, Y_train) clfs.append(clf) print("Number of nodes in the last tree is:{} with cpp_alpha: {}".format( clfs[-1].tree_.node_count, ccp_alphas[-1])) train_scores = [clf.score(X_train, Y_train) for clf in clfs] test_scores = [clf.score(X_test, Y_test) for clf in clfs] fig, ax = plt.subplots() ax.set_xlabel("alpha")