"Random Forest" print("Random Forest") seed_value = 4 random.seed(seed_value) regr = RandomForestRegressor(random_state=seed_value, n_jobs=-1, n_estimators=150) regr.fit(X_train, y_train) predictions_rf = regr.predict(X_test) rmse_rf = np.sqrt(metrics.mean_squared_error(y_test, predictions_rf)) mse_rf = mean_squared_error(y_test, predictions_rf) rse_rf = rse.calc_rse(y_test, mse_rf) print("mse {:.4f} rmse {:.4f} rse {:.4f}".format(mse_rf, rmse_rf, rse_rf)) "Gradient Boosting Trees" print("Gradient Boost") gbt = GradientBoostingRegressor(random_state=seed_value) param_grid = { 'learning_rate': [0.01, 0.1], 'n_estimators': [150, 250], 'max_depth': [7, 8] } cross_val_gbt = GridSearchCV(gbt, param_grid, cv=5) cross_val_gbt.fit(X_train, y_train)
individuals_model_test = std_scaler_model.transform(individuals_model_test) "Division Train Test" X_train = pd.DataFrame(data=individuals_model_train, columns=selected_features) y_train = individuals.loc[individuals['year'].isin([2015, 2016, 2017, 2018])]["individuals"] X_test = pd.DataFrame(data=individuals_model_test, columns=selected_features) y_test = individuals.loc[individuals['year'] == 2019]["individuals"] "Algoritmos y Evaluación" "Random Forest" print("Random Forest") seed_value = 4 random.seed(seed_value) regr = RandomForestRegressor(random_state=seed_value, n_jobs=-1, n_estimators=150) regr.fit(X_train, y_train) predictions_rf = regr.predict(X_test) rmse_rf = np.sqrt(metrics.mean_squared_error(y_test, predictions_rf)) mse_rf = mean_squared_error(y_test, predictions_rf) rse_rf = rse.calc_rse(y_test, mse_rf) print("mse {:.4f} rmse {:.4f} rse {:.4f}".format(mse_rf, rmse_rf, rse_rf))
pd.Series(pred_values_rf)], axis=1) results_rf.columns = ['test_index', 'prediction'] avg_results_rf = results_rf.groupby('test_index').median() results_xgb = pd.concat( [pd.Series(index_test), pd.Series(pred_values_xgb)], axis=1) results_xgb.columns = ['test_index', 'prediction'] avg_results_xgb = results_xgb.groupby('test_index').median() "Error Calculation" rmse_lr = np.sqrt(metrics.mean_squared_error(y, avg_results_lr)) mse_lr = mean_squared_error(y, avg_results_lr) rse_lr = rse.calc_rse(y, avg_results_lr.prediction) error_values_lr.append((mse_lr, rmse_lr, rse_lr)) print("mse {:.4f} rmse {:.4f} rse {:.4f}".format(mse_lr, rmse_lr, rse_lr)) rmse_rf = np.sqrt(metrics.mean_squared_error(y, avg_results_rf)) mse_rf = mean_squared_error(y, avg_results_rf) rse_rf = rse.calc_rse(y, avg_results_rf.prediction) error_values_rf.append((mse_rf, rmse_rf, rse_rf)) print("mse {:.4f} rmse {:.4f} rse {:.4f}".format(mse_rf, rmse_rf, rse_rf)) rmse_xgb = np.sqrt(metrics.mean_squared_error(y, avg_results_xgb)) mse_xgb = mean_squared_error(y, avg_results_xgb) rse_xgb = rse.calc_rse(y, avg_results_xgb.prediction)
columns=selected_features) y_test = individuals_train.loc[individuals_train['year'] == year_test]["individuals"] "Algorithms and evaluation" "Linear Regression" reg = LinearRegression() reg.fit(X_train, y_train) predictions_lr = reg.predict(X_test) rmse_lr = np.sqrt(metrics.mean_squared_error(y_test, predictions_lr)) mse_lr = mean_squared_error(y_test, predictions_lr) rse_lr = rse.calc_rse(y_test, predictions_lr) print("mse {:.4f} rmse {:.4f} rse {:.4f}".format(mse_lr, rmse_lr, rse_lr)) error_values_lr.append((year_test, mse_lr, rmse_lr, rse_lr)) "Random Forest" n_estimators = [100, 150] max_features = ['auto'] random_grid = {'n_estimators': n_estimators, 'max_features': max_features} seed_value = 4 random.seed(seed_value) regr = RandomForestRegressor(n_jobs=-1) regr_random = RandomizedSearchCV(estimator=regr,
pd.Series(pred_values_rf)], axis=1) results_rf.columns = ['test_index', 'prediction'] avg_results_rf = results_rf.groupby('test_index').median() results_xgb = pd.concat( [pd.Series(index_test), pd.Series(pred_values_xgb)], axis=1) results_xgb.columns = ['test_index', 'prediction'] avg_results_xgb = results_xgb.groupby('test_index').median() "Error Calculation" rmse_lr = np.sqrt(metrics.mean_squared_error(y_ind, avg_results_lr)) mse_lr = mean_squared_error(y_ind, avg_results_lr) rse_lr = rse.calc_rse(y_ind, avg_results_lr.prediction) print("mse {:.4f} rmse {:.4f} rse {:.4f}".format(mse_lr, rmse_lr, rse_lr)) error_values_lr.append((mse_lr, rmse_lr, rse_lr)) rmse_rf_final = np.sqrt(metrics.mean_squared_error(y_ind, avg_results_rf)) mse_rf_final = mean_squared_error(y_ind, avg_results_rf) rse_rf_final = rse.calc_rse(y_ind, avg_results_rf.prediction) if (j == 0): # Write individual predictions for firts iteration df1 = pd.DataFrame({ "real": y_ind, "prediction": avg_results_rf.prediction }) df1.to_csv('../results/TWO_STEP_rfpred.csv', index=False) print("mse {:.4f} rmse {:.4f} rse {:.4f}".format(mse_rf_final,