"Random Forest"

print("Random Forest")
seed_value = 4
random.seed(seed_value)

regr = RandomForestRegressor(random_state=seed_value,
                             n_jobs=-1,
                             n_estimators=150)
regr.fit(X_train, y_train)
predictions_rf = regr.predict(X_test)

rmse_rf = np.sqrt(metrics.mean_squared_error(y_test, predictions_rf))

mse_rf = mean_squared_error(y_test, predictions_rf)
rse_rf = rse.calc_rse(y_test, mse_rf)

print("mse {:.4f} rmse {:.4f} rse {:.4f}".format(mse_rf, rmse_rf, rse_rf))

"Gradient Boosting Trees"
print("Gradient Boost")

gbt = GradientBoostingRegressor(random_state=seed_value)

param_grid = {
    'learning_rate': [0.01, 0.1],
    'n_estimators': [150, 250],
    'max_depth': [7, 8]
}
cross_val_gbt = GridSearchCV(gbt, param_grid, cv=5)
cross_val_gbt.fit(X_train, y_train)
Exemplo n.º 2
0
individuals_model_test = std_scaler_model.transform(individuals_model_test)

"Division Train Test"

X_train = pd.DataFrame(data=individuals_model_train, columns=selected_features)
y_train = individuals.loc[individuals['year'].isin([2015, 2016, 2017,
                                                    2018])]["individuals"]

X_test = pd.DataFrame(data=individuals_model_test, columns=selected_features)
y_test = individuals.loc[individuals['year'] == 2019]["individuals"]

"Algoritmos y Evaluación"

"Random Forest"

print("Random Forest")
seed_value = 4
random.seed(seed_value)

regr = RandomForestRegressor(random_state=seed_value,
                             n_jobs=-1,
                             n_estimators=150)
regr.fit(X_train, y_train)
predictions_rf = regr.predict(X_test)

rmse_rf = np.sqrt(metrics.mean_squared_error(y_test, predictions_rf))

mse_rf = mean_squared_error(y_test, predictions_rf)
rse_rf = rse.calc_rse(y_test, mse_rf)

print("mse {:.4f} rmse {:.4f} rse {:.4f}".format(mse_rf, rmse_rf, rse_rf))
Exemplo n.º 3
0
                            pd.Series(pred_values_rf)],
                           axis=1)
    results_rf.columns = ['test_index', 'prediction']
    avg_results_rf = results_rf.groupby('test_index').median()

    results_xgb = pd.concat(
        [pd.Series(index_test),
         pd.Series(pred_values_xgb)], axis=1)
    results_xgb.columns = ['test_index', 'prediction']
    avg_results_xgb = results_xgb.groupby('test_index').median()

    "Error Calculation"

    rmse_lr = np.sqrt(metrics.mean_squared_error(y, avg_results_lr))
    mse_lr = mean_squared_error(y, avg_results_lr)
    rse_lr = rse.calc_rse(y, avg_results_lr.prediction)

    error_values_lr.append((mse_lr, rmse_lr, rse_lr))
    print("mse {:.4f} rmse {:.4f} rse {:.4f}".format(mse_lr, rmse_lr, rse_lr))

    rmse_rf = np.sqrt(metrics.mean_squared_error(y, avg_results_rf))
    mse_rf = mean_squared_error(y, avg_results_rf)
    rse_rf = rse.calc_rse(y, avg_results_rf.prediction)

    error_values_rf.append((mse_rf, rmse_rf, rse_rf))
    print("mse {:.4f} rmse {:.4f} rse {:.4f}".format(mse_rf, rmse_rf, rse_rf))

    rmse_xgb = np.sqrt(metrics.mean_squared_error(y, avg_results_xgb))
    mse_xgb = mean_squared_error(y, avg_results_xgb)
    rse_xgb = rse.calc_rse(y, avg_results_xgb.prediction)
                          columns=selected_features)
    y_test = individuals_train.loc[individuals_train['year'] ==
                                   year_test]["individuals"]

    "Algorithms and evaluation"

    "Linear Regression"

    reg = LinearRegression()
    reg.fit(X_train, y_train)

    predictions_lr = reg.predict(X_test)

    rmse_lr = np.sqrt(metrics.mean_squared_error(y_test, predictions_lr))
    mse_lr = mean_squared_error(y_test, predictions_lr)
    rse_lr = rse.calc_rse(y_test, predictions_lr)

    print("mse {:.4f} rmse {:.4f} rse {:.4f}".format(mse_lr, rmse_lr, rse_lr))
    error_values_lr.append((year_test, mse_lr, rmse_lr, rse_lr))

    "Random Forest"

    n_estimators = [100, 150]
    max_features = ['auto']
    random_grid = {'n_estimators': n_estimators, 'max_features': max_features}

    seed_value = 4
    random.seed(seed_value)

    regr = RandomForestRegressor(n_jobs=-1)
    regr_random = RandomizedSearchCV(estimator=regr,
Exemplo n.º 5
0
                            pd.Series(pred_values_rf)],
                           axis=1)
    results_rf.columns = ['test_index', 'prediction']
    avg_results_rf = results_rf.groupby('test_index').median()

    results_xgb = pd.concat(
        [pd.Series(index_test),
         pd.Series(pred_values_xgb)], axis=1)
    results_xgb.columns = ['test_index', 'prediction']
    avg_results_xgb = results_xgb.groupby('test_index').median()

    "Error Calculation"

    rmse_lr = np.sqrt(metrics.mean_squared_error(y_ind, avg_results_lr))
    mse_lr = mean_squared_error(y_ind, avg_results_lr)
    rse_lr = rse.calc_rse(y_ind, avg_results_lr.prediction)

    print("mse {:.4f} rmse {:.4f} rse {:.4f}".format(mse_lr, rmse_lr, rse_lr))
    error_values_lr.append((mse_lr, rmse_lr, rse_lr))

    rmse_rf_final = np.sqrt(metrics.mean_squared_error(y_ind, avg_results_rf))
    mse_rf_final = mean_squared_error(y_ind, avg_results_rf)
    rse_rf_final = rse.calc_rse(y_ind, avg_results_rf.prediction)
    if (j == 0):  # Write individual predictions for firts iteration
        df1 = pd.DataFrame({
            "real": y_ind,
            "prediction": avg_results_rf.prediction
        })
        df1.to_csv('../results/TWO_STEP_rfpred.csv', index=False)

    print("mse {:.4f} rmse {:.4f} rse {:.4f}".format(mse_rf_final,