def test_generate_input_rows_df(): #Let's generate the input rows that will be passed to the neural networks for prediction purposes input_rows_df = generate_input_rows_df(NUM_DAYS_PREDICTIONS, INDEX_WEATHER_STATION) assert(len(input_rows_df) > 0) features_list_check = ["TEMPERATURE", "NIEDERSCHLAG", "HOUR", "WEEK_DAY", "DAY", "MONTH", "YEAR"] #Let's check if correct columns generated assert(all(feature_name in features_list_check for feature_name in list(input_rows_df.columns))) #let's check if correct number of columns assert(len(input_rows_df.columns) == len(features_list_check)) #let's check if correct number of rows assert(len(input_rows_df) == (NUM_DAYS_PREDICTIONS + 1) * 24)
def test_perform_predictions_ensemble_next_days(): input_rows_df = generate_input_rows_df(NUM_DAYS_PREDICTIONS, INDEX_WEATHER_STATION) input_rows_X = input_rows_df[["TEMPERATURE", "NIEDERSCHLAG", "HOUR", "WEEK_DAY"]] #Let's load up the models for traffic_1 and traffic_2 list_models_traffic_1 = load_ensemble_models_from_disk("TRAFFIC_1", SITI_CODSITO, MODELS_PATH) list_models_traffic_2 = load_ensemble_models_from_disk("TRAFFIC_2", SITI_CODSITO, MODELS_PATH) y_predicted_traffic_1, y_predicted_traffic_2 = perform_predictions_ensemble_next_days(list_models_traffic_1, list_models_traffic_2, input_rows_X) #Let's check if a proper number of predictions has been generated assert(len(y_predicted_traffic_1) == len(input_rows_X) and len(y_predicted_traffic_2) == len(input_rows_X)) #Let's check if correct values have been indeed generated assert(all(is_int_valid_in_range(y_pred_label_1, 1, 5) for y_pred_label_1 in y_predicted_traffic_1)) assert(all(is_int_valid_in_range(y_pred_label_2, 1, 5) for y_pred_label_2 in y_predicted_traffic_2))
def test_perform_predictions_numeric_nn_next_days(): input_rows_df = generate_input_rows_df(NUM_DAYS_PREDICTIONS, INDEX_WEATHER_STATION) input_rows_X = input_rows_df[["TEMPERATURE", "NIEDERSCHLAG", "HOUR", "WEEK_DAY"]] list_objects_count_1 = load_numeric_nn_model_scal_enc("COUNT_1", SITI_CODSITO, models_path=MODELS_PATH) list_objects_count_2 = load_numeric_nn_model_scal_enc("COUNT_2", SITI_CODSITO, models_path=MODELS_PATH) assert(len(list_objects_count_1) == len(list_objects_count_2)) y_num_pred_count_1, y_num_pred_count_2 = perform_predictions_numeric_nn_next_days(list_objects_count_1, list_objects_count_2, input_rows_X) #Let's see if the proper output is generated from this method assert(len(y_num_pred_count_1) == len(y_num_pred_count_2) and len(y_num_pred_count_1) == len(input_rows_X) and len(y_num_pred_count_2) == len(input_rows_X)) #Predicted values are non-negative... assert(all(y_num_pred > 0 for y_num_pred in y_num_pred_count_1)) assert(all(y_num_pred > 0 for y_num_pred in y_num_pred_count_2)) #Predicted values are integers... assert(all(type(y_num_pred) == int for y_num_pred in y_num_pred_count_1)) assert(all(type(y_num_pred) == int for y_num_pred in y_num_pred_count_2))
def test_perform_numeric_nn_predictions(): input_rows_df = generate_input_rows_df(NUM_DAYS_PREDICTIONS, INDEX_WEATHER_STATION) input_rows_X = input_rows_df[["TEMPERATURE", "NIEDERSCHLAG", "HOUR", "WEEK_DAY"]] list_num_traffic_labels = ["COUNT_1", "COUNT_2"] for traffic_num_label in list_num_traffic_labels: #let's load up the objects for performing a prediction with a neural network list_objects_count_1 = load_numeric_nn_model_scal_enc(traffic_num_label, SITI_CODSITO, models_path=MODELS_PATH) assert(list_objects_count_1 != None) #Let's use these objects for performing predictions y_num_pred_count_1 = perform_numeric_nn_prediction(list_objects_count_1, input_rows_X) #Let's check if there are as many y values predicted generated as there are input row containing features assert(len(y_num_pred_count_1) == len(input_rows_X)) #Let's check if the predicted y values are >= 0 assert(all(y_num_pred > 0 for y_num_pred in y_num_pred_count_1)) #Let's check if the predicted values are indeed integers assert(all(type(y_num_pred) == int for y_num_pred in y_num_pred_count_1))
def test_perform_models_ensemble_prediction(): #Let's load up the models input_rows_df = generate_input_rows_df(NUM_DAYS_PREDICTIONS, INDEX_WEATHER_STATION) input_rows_X = input_rows_df[["TEMPERATURE", "NIEDERSCHLAG", "HOUR", "WEEK_DAY"]] list_traffic_labels = ["TRAFFIC_1", "TRAFFIC_2"] list_model_names = ["Optimized Decision Tree", "Simple Random Forest","Best KNN"] for traffic_level_label in list_traffic_labels: #Let's load up the ensemble models for both labels list_models = load_ensemble_models_from_disk(traffic_level_label, SITI_CODSITO, MODELS_PATH) assert(len(list_models) == 3) assert(list_models != None) #Predictions using Random Forest model y_labels_rf = list_models[0].predict(input_rows_X) #Predictions using KNN classifier y_labels_knn = list_models[1].predict(input_rows_X) #Predictions using Decision Tree y_labels_dec_tree = list_models[2].predict(input_rows_X) #Let's check if the predictions obtained are as many as the rows in the input data frame and host values in range [1,4] assert(len(y_labels_rf) == len(input_rows_X)) assert(all(is_int_valid_in_range(y_label, 1, 5) for y_label in y_labels_rf)) assert(len(y_labels_knn) == len(input_rows_X)) assert(all(is_int_valid_in_range(y_label, 1, 5) for y_label in y_labels_knn)) assert(len(y_labels_dec_tree) == len(input_rows_X)) assert(all(is_int_valid_in_range(y_label, 1, 5) for y_label in y_labels_dec_tree)) #Let's perform ensemble predictions y_labels_ensemble = perform_models_ensemble_prediction(input_rows_X, list_models, list_model_names) assert(y_labels_ensemble != None) assert(len(y_labels_ensemble) == len(input_rows_X)) assert(all(is_int_valid_in_range(y_label, 1, 5) for y_label in y_labels_ensemble))