def random_forest_algorithm(train_df, n_trees, n_bootstrap, n_features, dt_max_depth): forest = [] for i in range(n_trees): df_bootstrapped = bootstrapping(train_df, n_bootstrap) tree = decision_tree_algorithm(df_bootstrapped, max_depth=dt_max_depth, random_subspace=n_features) forest.append(tree) return forest
def random_forest_algorithm(train_df, val_df, n_trees, n_bootstrap, n_features, dt_max_depth, ml_task): forest = [] for i in range(n_trees): print("Decision-Tree # ", i+1) df_bootstrapped = bootstrapping(train_df, n_bootstrap) tree = decision_tree_algorithm(df_bootstrapped, ml_task=ml_task ,max_depth=dt_max_depth, random_subspace=n_features) tree_pruned = post_pruning(tree, train_df, val_df, ml_task) forest.append(tree_pruned) return forest
random.seed(0) df = df_input[features] df = df.rename(columns={ target_name: 'label' }) # decision_tree_functions.py requires the target name to be 'label' train_df, test_df = dtf.train_test_split(df, test_size) ########################### # DECISION TREE ALGORITHM # ########################### # #sub_tree = {question: [yes_answer, no_answer]} if is_ml_task_classification: # Classification tree = dtf.decision_tree_algorithm(train_df, ml_task="classification", max_depth=3) else: train_df, test_df = dtf.train_test_split(df, test_size) tree = dtf.decision_tree_algorithm(train_df, ml_task="regression", max_depth=3) r_squared = dtf.calculate_r_squared(test_df, tree) print('r_squared: ', r_squared) #create_plot(test_df, tree, title="Test Data") plot_target_vs_feature(test_df, tree, 'sig_z_all_legs', 'Unity Check [-]') plot_target_vs_feature(test_df, tree, 'sig_roll', 'Unity Check [-]') plot_target_vs_feature(test_df, tree, 'sig_pitch', 'Unity Check [-]') plot_target_vs_feature(test_df, tree, 'wd', 'Unity Check [-]')