"maxspeed_entry", "vehicle_speed_entry", "vehicle_speed_exit", "curvature_exit", "oneway_exit", "lane_count_entry", "has_right_of_way", "curvature_entry" ] mse_mean = np.array([]) mse_std = np.array([]) n_est_hist = np.array([]) samples = automatic_test.load_samples('../data/training_data/samples.pickle') samples = automatic_test.normalize_features(samples) train_sample_sets, test_sample_sets = automatic_test.get_cross_validation_samples(samples, 0.8, 5) for n_est in range(1,40, 2): rf_algo = regressors.RandomForestAlgorithm(feature_list, n_estimators = n_est) results = automatic_test.test([rf_algo], train_sample_sets, test_sample_sets, cross_validation=True) result_statistics = automatic_test.get_result_statistics(results) mse_mean = np.append(mse_mean, result_statistics[rf_algo]['average_mse']) mse_std = np.append(mse_std, result_statistics[rf_algo]['std_mse']) n_est_hist = np.append(n_est_hist, n_est) # Plot the results plt.hold(True) plt.plot(n_est_hist, mse_mean) plt.plot(n_est_hist, mse_mean + mse_std, 'r-') plt.plot(n_est_hist, mse_mean - mse_std, 'r-') plt.xlabel('n_estimators') plt.ylabel('average_mse') plt.show()
N_tests = 10 training_samples_steps = np.arange(10, N_training_samples+1, 10) all_training_errors = [] all_cv_errors = [] all_training_samples_sizes = [] for test_i in range(N_tests): print "====== Test number %d ======" % test_i random.shuffle(samples) cv_samples = samples[N_training_samples:] for training_samples_size in training_samples_steps: print "Training samples size:", training_samples_size training_samples = samples[:training_samples_size] automatic_test.train([rf_algo], training_samples) rs_train = automatic_test.get_result_statistics(automatic_test.predict([rf_algo], training_samples)) rs_cv = automatic_test.get_result_statistics(automatic_test.predict([rf_algo], cv_samples)) all_training_samples_sizes.append(training_samples_size) all_training_errors.append(rs_train[rf_algo]['mean_mse']) all_cv_errors.append(rs_cv[rf_algo]['mean_mse']) training_errors = [] cv_errors = [] training_samples_sizes = [] # Flatten the results for training_samples_size in training_samples_steps: indices = [i for i,x in enumerate(all_training_samples_sizes) if x == training_samples_size] training_samples_sizes.append(training_samples_size)