def holt_winters_test(df): data = split_data(df)["Summer"][2]["total load actual"] train_data = data[:-48] test_data = data[-48:] # [alpha, beta, gamma, l0, b0, phi, s0,.., s_(m - 1)] _, indices = deseasonalise(train_data, 168, "multiplicative") init_params = [0.25, 0.75, train_data[0]] init_params.extend(indices) fitted_model = ExponentialSmoothing(train_data, seasonal_periods=168, seasonal="mul").fit( use_basinhopping=True, start_params=init_params) init_prediction = fitted_model.predict(0, len(train_data) + 48 - 1) params = fitted_model.params print(params) fitted_model = ExponentialSmoothing( train_data, seasonal_periods=168, seasonal="mul").fit(use_basinhopping=True) prediction = fitted_model.predict(0, len(train_data) + 48 - 1) params = fitted_model.params print(params) fig, ax = plt.subplots(1, 1, figsize=(20, 15), dpi=250) ax.plot(test_data, label="Actual Data") ax.plot(prediction[-48:], label="Non initialised") ax.plot(init_prediction[-48:], label="Initialised") ax.legend(loc="best") plt.show()
def analyse(df): all_data = split_data(df) for season in ["Winter", "Spring", "Summer", "Autumn"]: years = all_data[season] fig, axes = plt.subplots(2, 2, figsize=(20, 15), dpi=250) plt.suptitle(season + " - Data", y=0.99) for i, (year, ax) in enumerate(zip(years, axes.flatten())): deseason, ind, = deseasonalise(year["total load actual"], 168, "multiplicative") ax.plot(year, label="Actual") ax.plot(deseason, label="Deseasonalised") ax.set_title("Year " + str(i + 1)) ax.set_xticks([]) ax.legend(loc="best") adf = adfuller(deseason, autolag='AIC') print("Original Data") print("Test Statistic (rounded) = {:.3f}".format(adf[0])) print("P-value (rounded) = {:.3f}".format(adf[1])) print("Critical values: ") for k, v in adf[4].items(): print("\t{}: {:.4f} (The data is {}stationary with {}% " "confidence)".format(k, v, "not " if v < adf[0] else "", 100 - int(k[:-1]))) print() print() plt.show() # Plot Data ACFs fig, axes = plt.subplots(2, 2, figsize=(20, 15), dpi=250) plt.suptitle(season + " - ACFs (Actual)", y=0.99) for i, (year, ax) in enumerate(zip(years, axes.flatten())): plot_acf(year["total load actual"], ax=ax, alpha=0.05, lags=168) plt.show() # Plot Data PACFs fig, axes = plt.subplots(2, 2, figsize=(20, 15), dpi=250) plt.suptitle(season + " - PACFs (Actual)", y=0.99) for i, (year, ax) in enumerate(zip(years, axes.flatten())): plot_pacf(year["total load actual"], ax=ax, alpha=0.05, lags=168) plt.show() # Plot Deseasonalised ACFs fig, axes = plt.subplots(2, 2, figsize=(20, 15), dpi=250) plt.suptitle(season + " - ACFs (Deseasonalised)", y=0.99) for i, (year, ax) in enumerate(zip(years, axes.flatten())): deseason, _ = deseasonalise(year["total load actual"], 168, "multiplicative") plot_acf(deseason, ax=ax, alpha=0.05, lags=168) plt.show() # Plot Deseasonalised PACFs fig, axes = plt.subplots(2, 2, figsize=(20, 15), dpi=250) plt.suptitle(season + " - PACFs (Deseasonalised)", y=0.99) for i, (year, ax) in enumerate(zip(years, axes.flatten())): deseason, _ = deseasonalise(year["total load actual"], 168, "multiplicative") plot_pacf(deseason, ax=ax, alpha=0.05, lags=168) plt.show()
def deseason(df): year = df.loc["2015-01-01 00:00:00+01:00":"2015-02-28 23:00:00+01:00"] year_24, _ = deseasonalise(year["total load actual"], 24, "multiplicative") year_168, _ = deseasonalise(year["total load actual"], 168, "multiplicative") # Create figure font = {'size': 20} plt.rc('font', **font) fig = plt.figure(figsize=(20, 15), dpi=250) gs = fig.add_gridspec(2, 2) ax_1 = fig.add_subplot(gs[0, :]) ax_2 = fig.add_subplot(gs[1, 0]) ax_3 = fig.add_subplot(gs[1, 1]) # Plot data ax_1.plot(year) ax_2.plot(year_24) ax_3.plot(year_168) # Add weekend highlighting ax_1.axvspan("2015-01-03 00:00:00+01:00", "2015-01-04 23:00:00+01:00", alpha=0.1) ax_1.axvspan("2015-01-10 00:00:00+01:00", "2015-01-11 23:00:00+01:00", alpha=0.1) ax_1.axvspan("2015-01-17 00:00:00+01:00", "2015-01-18 23:00:00+01:00", alpha=0.1) ax_1.axvspan("2015-01-24 00:00:00+01:00", "2015-01-25 23:00:00+01:00", alpha=0.1) ax_1.axvspan("2015-01-31 00:00:00+01:00", "2015-02-01 23:00:00+01:00", alpha=0.1) ax_1.axvspan("2015-02-07 00:00:00+01:00", "2015-02-08 23:00:00+01:00", alpha=0.1) ax_1.axvspan("2015-02-14 00:00:00+01:00", "2015-02-15 23:00:00+01:00", alpha=0.1) ax_1.axvspan("2015-02-21 00:00:00+01:00", "2015-02-22 23:00:00+01:00", alpha=0.1) ax_1.axvspan("2015-02-28 00:00:00+01:00", "2015-02-28 23:00:00+01:00", alpha=0.1) ax_2.axvspan("2015-01-03 00:00:00+01:00", "2015-01-04 23:00:00+01:00", alpha=0.1) ax_2.axvspan("2015-01-10 00:00:00+01:00", "2015-01-11 23:00:00+01:00", alpha=0.1) ax_2.axvspan("2015-01-17 00:00:00+01:00", "2015-01-18 23:00:00+01:00", alpha=0.1) ax_2.axvspan("2015-01-24 00:00:00+01:00", "2015-01-25 23:00:00+01:00", alpha=0.1) ax_2.axvspan("2015-01-31 00:00:00+01:00", "2015-02-01 23:00:00+01:00", alpha=0.1) ax_2.axvspan("2015-02-07 00:00:00+01:00", "2015-02-08 23:00:00+01:00", alpha=0.1) ax_2.axvspan("2015-02-14 00:00:00+01:00", "2015-02-15 23:00:00+01:00", alpha=0.1) ax_2.axvspan("2015-02-21 00:00:00+01:00", "2015-02-22 23:00:00+01:00", alpha=0.1) ax_2.axvspan("2015-02-28 00:00:00+01:00", "2015-02-28 23:00:00+01:00", alpha=0.1) ax_3.axvspan("2015-01-03 00:00:00+01:00", "2015-01-04 23:00:00+01:00", alpha=0.1) ax_3.axvspan("2015-01-10 00:00:00+01:00", "2015-01-11 23:00:00+01:00", alpha=0.1) ax_3.axvspan("2015-01-17 00:00:00+01:00", "2015-01-18 23:00:00+01:00", alpha=0.1) ax_3.axvspan("2015-01-24 00:00:00+01:00", "2015-01-25 23:00:00+01:00", alpha=0.1) ax_3.axvspan("2015-01-31 00:00:00+01:00", "2015-02-01 23:00:00+01:00", alpha=0.1) ax_3.axvspan("2015-02-07 00:00:00+01:00", "2015-02-08 23:00:00+01:00", alpha=0.1) ax_3.axvspan("2015-02-14 00:00:00+01:00", "2015-02-15 23:00:00+01:00", alpha=0.1) ax_3.axvspan("2015-02-21 00:00:00+01:00", "2015-02-22 23:00:00+01:00", alpha=0.1) ax_3.axvspan("2015-02-28 00:00:00+01:00", "2015-02-28 23:00:00+01:00", alpha=0.1) # Add titles ax_1.set_xticks([]) ax_1.set_title("Year 1 - Winter - Actual") ax_2.set_xticks([]) ax_2.set_title("Hourly Seasonality Removed") ax_3.set_xticks([]) ax_3.set_title("Weekly Seasonality Removed") plt.show() # Plot the ACF and PACF of the deseasonalised data fig, axes = plt.subplots(2, 1, figsize=(20, 15), dpi=250) plot_acf(year_168, axes[0], lags=168) plot_pacf(year_168, axes[1], lags=168) axes[0].set_title("Year 1 - Winter - Deseasonalised ACF") axes[1].set_title("Year 1 - Winter - Deseasonalised PACF") plt.show()
def es_rnn_s(data, forecast_length, seasonality, demand_features, weather_features, weather, ensemble, multi_ts): # Model hyper parameters window_size = 336 num_epochs = 35 hidden_size = 40 num_layers = 4 dilations = [1, 4, 24, 168] level_variability_penalty = 80 percentile = 0.49 loss_func = pinball_loss grad_clipping = 20 auto_lr = False variable_lr = True auto_rate_threshold = 1.005 min_epochs_before_change = 2 residuals = tuple([[1, 3]]) init_level_smoothing = -1 init_seasonal_smoothing = -1.1 input_size = 1 + len(weather_features) if weather else 1 batch_first = True skip_lstm = False if multi_ts: local_init_lr = 0.01 global_init_lr = 0.008 local_rates = {10: 2e-3, 20: 1e-3, 30: 5e-4} global_rates = {10: 5e-3, 20: 1e-3, 30: 5e-4} else: local_init_lr = 0.005 global_init_lr = 0.008 local_rates = {10: 1e-3, 20: 5e-4, 30: 1e-4} global_rates = {10: 5e-3, 20: 1e-3, 30: 5e-4} # Split the data train_data = data[:-forecast_length] forecast_data = data[-(forecast_length + window_size):] batch_size = len(train_data) - window_size - forecast_length + 1 # Estimate the seasonal indices and inital smoothing levels init_seas = {} init_l_smooth = {} init_s_smooth = {} for c in demand_features: deseas, indic = deseasonalise(train_data[c], 168, "multiplicative") init_seas[c] = indic init_l_smooth[c] = init_level_smoothing init_s_smooth[c] = init_seasonal_smoothing # Create the model lstm = ES_RNN_S(forecast_length, input_size, batch_size, hidden_size, num_layers, demand_features, weather_features, seasonality, dropout=0, cell_type='LSTM', batch_first=batch_first, dilations=dilations, residuals=residuals, init_seasonality=init_seas, init_level_smoothing=init_l_smooth, init_seas_smoothing=init_s_smooth).double() # Register gradient clipping function for p in lstm.parameters(): p.register_hook( lambda grad: torch.clamp(grad, -grad_clipping, grad_clipping)) # Set model in training mode lstm.train() # Train the model, and make a (possibly ensembled) prediction. prediction, _ = train_and_predict_s( lstm, train_data, window_size, forecast_length, level_variability_penalty, loss_func, num_epochs, local_init_lr, global_init_lr, percentile, auto_lr, variable_lr, auto_rate_threshold, min_epochs_before_change, forecast_data, local_rates, global_rates, ensemble, multi_ts, skip_lstm, weather) # Return prediction return prediction
def test_model_week(data, output_size, input_size, hidden_size, num_layers, batch_first, dilations, demand_features, weather_features, seasonality, residuals, window_size, level_variability_penalty, loss_func, num_epochs, local_init_lr, global_init_lr, init_level_smoothing, init_seasonal_smoothing, percentile, auto_lr, variable_lr, auto_rate_threshold, min_epochs_before_change, local_rates, global_rates, grad_clipping, write_results, plot, year, season, ensemble, multi_ts, skip_lstm, model, init_params, res_base, weather): # Arrays and dictionaries to hold the results es_rnn_predictions = [] es_rnn_smapes = [] es_rnn_mases = [] naive2_predictions = [] naive2_smapes = [] naive2_mases = [] actuals_mases = [] actuals = [] owas = [] results = {i: {} for i in range(1, 8)} # Loop through each day in the week for i in range(8, 1, -1): # Figure out start and end points of the training/test data end_train = -(i * 24) start_test = -(i * 24 + window_size) end_test = -(i * 24 - output_size) if i != 2 else None train_data = data[:end_train] test_data = data[start_test:end_test] mase_data = data["total load actual"][:end_test] # Initialise (or not) the parameters if init_params: init_seas = {} init_l_smooth = {} init_s_smooth = {} for f in demand_features: deseas, indic = deseasonalise(train_data[f], 168, "multiplicative") init_seas[f] = indic init_l_smooth[f] = init_level_smoothing init_s_smooth[f] = init_seasonal_smoothing if f == "total load actual": train_deseas = deseas indices = indic else: deseas, indic = deseasonalise(train_data["total load actual"], 168, "multiplicative") train_deseas = deseas indices = indic init_seas = None init_l_smooth = None init_s_smooth = None # Calculate the batch size batch_size = len(train_data["total load actual"]) - window_size - \ output_size + 1 # Create a new model. Either mine or Smyl's if model: lstm = ES_RNN_I(output_size, input_size, batch_size, hidden_size, num_layers, demand_features, weather_features, seasonality, dropout=0, cell_type='LSTM', batch_first=batch_first, dilations=dilations, residuals=residuals, init_seasonality=init_seas, init_level_smoothing=init_l_smooth, init_seas_smoothing=init_s_smooth).double() else: lstm = ES_RNN_S(output_size, input_size, batch_size, hidden_size, num_layers, demand_features, weather_features, seasonality, dropout=0, cell_type='LSTM', batch_first=batch_first, dilations=dilations, residuals=residuals, init_seasonality=init_seas, init_level_smoothing=init_l_smooth, init_seas_smoothing=init_s_smooth).double() # Register gradient clipping function for p in lstm.parameters(): p.register_hook( lambda grad: torch.clamp(grad, -grad_clipping, grad_clipping)) # Set model in training mode lstm.train() print("----- TEST", str(9 - i), "-----") # Train the model. Discard prediction here (used in proper function) if model: _, losses = train_and_predict_i( lstm, train_data, window_size, output_size, level_variability_penalty, loss_func, num_epochs, local_init_lr, global_init_lr, percentile, auto_lr, variable_lr, auto_rate_threshold, min_epochs_before_change, test_data, local_rates, global_rates, ensemble, weather) else: _, losses = train_and_predict_s( lstm, train_data, window_size, output_size, level_variability_penalty, loss_func, num_epochs, local_init_lr, global_init_lr, percentile, auto_lr, variable_lr, auto_rate_threshold, min_epochs_before_change, test_data, local_rates, global_rates, ensemble, multi_ts, skip_lstm, weather) # Set model into evaluation mode lstm.eval() # Make ES_RNN_S Prediction prediction, actual, out_levels, out_seas, all_levels, all_seasons, \ rnn_out = lstm.predict(test_data, window_size, output_size, weather) # Convert test data to correct form for results saving test_data = torch.tensor(test_data["total load actual"], dtype=torch.double) # [[<- 48 ->],] generated, so remove the dimension prediction = pd.Series(prediction.squeeze(0).detach().tolist()) actual = pd.Series(actual.squeeze(0).detach().tolist()) out_levels = out_levels.squeeze(0).detach().tolist() out_seas = out_seas.squeeze(0).detach().tolist() all_levels = [l.detach().item() for l in all_levels] all_seasons = [s.detach().item() for s in all_seasons] rnn_out = rnn_out.squeeze(0).detach().tolist() # Make Naive2 Prediction naive_fit_forecast = reseasonalise(naive_2(train_deseas, output_size), indices, "multiplicative") naive_prediction = naive_fit_forecast[-output_size:].reset_index( drop=True) # Calculate errors es_rnn_smape = sMAPE(prediction, actual) es_rnn_mase = MASE(prediction, mase_data, 168, output_size) naive_smape = sMAPE(naive_prediction, actual) naive_mase = MASE(naive_prediction, mase_data, 168, output_size) owa = OWA(naive_smape, naive_mase, es_rnn_smape, es_rnn_mase) # Save values es_rnn_smapes.append(es_rnn_smape) es_rnn_mases.append(es_rnn_mase) naive2_smapes.append(naive_smape) naive2_mases.append(naive_mase) es_rnn_predictions.append(prediction) naive2_predictions.append(naive_prediction) actuals.append(actual) actuals_mases.append(mase_data) owas.append(owa) # Print results print("***** Test Results *****") print("ES-RNN sMAPE:", es_rnn_smape) print("Naive2 sMAPE:", naive_smape) print("ES-RNN MASE:", es_rnn_mase) print("Naive2 MASE:", naive_mase) print("OWA", owa) print("") # Save all results results[9 - i]["test_data"] = test_data.tolist() results[9 - i]["ESRNN_prediction"] = prediction.to_list() results[9 - i]["Naive2_prediction"] = naive_prediction.to_list() results[9 - i]["all_levels"] = all_levels results[9 - i]["out_levels"] = out_levels results[9 - i]["all_seas"] = all_seasons results[9 - i]["out_seas"] = out_seas results[9 - i]["rnn_out"] = rnn_out results[9 - i]["level_smoothing"] = float( lstm.level_smoothing_coeffs["total load actual"].data) results[9 - i]["seasonality_smoothing"] = float( lstm.seasonality_smoothing_coeffs["total load actual"].data) results[9 - i]["losses"] = losses sys.stderr.flush() sys.stdout.flush() # Print final results owas_np = np.array(owas) num_improved = len(owas_np[owas_np < 1.0]) avg_improve = float(np.around(owas_np[owas_np < 1.0].mean(), decimals=3)) avg_decline = float(np.around(owas_np[owas_np >= 1.0].mean(), decimals=3)) avg_owa = float(np.around(np.mean(owas), decimals=3)) print("***** OVERALL RESULTS *****") print("Average OWA:", avg_owa) print("No. Improved:", num_improved) print("Avg. Improvement:", avg_improve) print("Avg. Decline:", avg_decline) sys.stderr.flush() sys.stdout.flush() # Make note of final results results["overall"] = { "avg_owa": avg_owa, "num_improved": num_improved, "avg_improvement": avg_improve, "avg_decline": avg_decline } # Write results (NCC) if write_results: season_dict = {0: "_winter", 1: "_spring", 2: "_summer", 3: "_autumn"} name = sys.argv[1] if len(sys.argv) == 2: filename = name + ".txt" elif len(sys.argv) == 3: filename = name + "_year_" + str(year) + ".txt" elif len(sys.argv) == 4: s = season_dict[season] filename = name + "_year_" + str(year) + s + ".txt" elif len(sys.argv) == 6: s = season_dict[season] filename = name + "_year_" + str(year) + s + "_" +\ str(init_level_smoothing) + "_" +\ str(init_seasonal_smoothing) + ".txt" else: filename = "test.txt" res_path = os.path.join(res_base, filename) with open(res_path, "w") as res: json.dump(results, res) if plot: plot_test(results, window_size, output_size, print_results=True)
def test(demand_df, weather_df, season_no, model_no): demand_features = demand_df.columns weather_features = weather_df.columns # Add the weather data to the demand data for c in weather_df.columns: demand_df[c] = weather_df[c] # Testing hyper-parameters seasonality = 168 forecast_length = 48 # For the ES_RNN_S, for each test, train the model num_ensemble # times and average the predictions. Further, if internal ensembling is # also specified, each prediction from the model will actually be the # average of the predictions from the last 5 epochs ensemble = False num_ensemble = 3 # True = use final week for testing, False = use penultimate week for # validation testing = True # Model No.: [Function, Name, Deseasonalise?, Additional Parameters, # Return Parameters, Number of Repetitions] test_dict = { 1: [naive.naive_1, 'Naive1', False, None, False, 10], 2: [naive.naive_2, 'Naive2', True, None, False, 10], 3: [naive.naive_s, 'NaiveS', False, [seasonality], False, 10], 4: [exponential_smoothing.ses, 'SES', True, None, True, 10], 5: [exponential_smoothing.holt, 'Holt', True, None, True, 10], 6: [exponential_smoothing.damped, 'Damped', True, None, True, 10], 7: [ exponential_smoothing.holt_winters, 'Holt-Winters', False, [seasonality], True, 10 ], 8: [exponential_smoothing.comb, 'Comb', True, None, False, 10], 9: [arima.arima, 'ARIMA', True, "-- See arima_orders --", True, 10], 10: [arima.sarima, 'SARIMA', False, "-- See sarima_orders --", True, 1], 11: [arima.auto, 'Auto', False, [168], True, 1], 12: [theta.theta, 'Theta', True, None, True, 10], 13: [None, 'TSO', False, None, False, 1], 14: [ hybrid.es_rnn_s, 'ES-RNN-S', False, [ seasonality, demand_features, weather_features, False, ensemble, True ], False, 1 ], 15: [ hybrid.es_rnn_s, 'ES-RNN-SW', False, [ seasonality, demand_features, weather_features, True, ensemble, True ], False, 1 ], 16: [ hybrid.es_rnn_s, 'ES-RNN-D', False, [ seasonality, demand_features, weather_features, False, ensemble, False ], False, 1 ], 17: [ hybrid.es_rnn_s, 'ES-RNN-DW', False, [ seasonality, demand_features, weather_features, True, ensemble, False ], False, 1 ], 18: [ hybrid.es_rnn_i, 'ES-RNN-I', False, [seasonality, demand_features, weather_features, False, ensemble], False, 1 ], 19: [ hybrid.es_rnn_i, 'ES-RNN-IW', False, [seasonality, demand_features, weather_features, True, ensemble], False, 1 ], } # Optimal SARIMA orders for each season sarima_orders = { 1: [(2, 0, 0), (1, 0, 1, 168)], 2: [(2, 0, 1), (1, 0, 1, 168)], 3: [(2, 0, 1), (1, 0, 1, 168)], 4: [(1, 0, 2), (1, 0, 1, 168)] } # Optimum ARIMA Parameters (automatically checked, using the # identify_arima function) arima_orders = { 1: [[(2, 0, 0)], [(2, 0, 0)], [(1, 0, 2)], [(2, 0, 2)]], 2: [[(2, 0, 0)], [(2, 0, 0)], [(2, 0, 2)], [(2, 0, 2)]], 3: [[(1, 0, 1)], [(2, 0, 2)], [(2, 0, 2)], [(2, 0, 2)]], 4: [[(2, 0, 1)], [(2, 0, 2)], [(2, 0, 2)], [(2, 0, 2)]], } seas_dict = {1: "Spring", 2: "Summer", 3: "Autumn", 4: "Winter"} # Get the parameters for the model model_func, model_name, deseasonalise, params, ret_params, num_reps = \ test_dict[model_no] error_pairs = [("sMAPE", errors.sMAPE), ("RMSE", errors.RMSE), ("MASE", errors.MASE), ("MAE", errors.MAE)] # Build empty data structures to hold results, naive results, forecasts and # fitted parameters results = { e: { r: { y: {t: [0] * forecast_length for t in range(1, 8)} for y in range(1, 5) } for r in range(1, num_reps + 1) } for e in list(zip(*error_pairs))[0] + tuple(["OWA"]) } n_results = { e: { r: { y: {t: [0] * forecast_length for t in range(1, 8)} for y in range(1, 5) } for r in range(1, num_reps + 1) } for e in list(zip(*error_pairs))[0] + tuple(["OWA"]) } forecasts = { y: {r: {t: [] for t in range(1, 8)} for r in range(1, num_reps + 1)} for y in range(1, 5) } final_params = {y: [] for y in range(1, 5)} all_data = stats_helpers.split_data(demand_df) years_df = all_data[seas_dict[season_no]] # The final 7 days are reserved for final testing if testing: years = [years_df[i]["total load actual"] for i in range(4)] else: years = [years_df[i]["total load actual"][:-7 * 24] for i in range(4)] # Loop through the years for y_index, y in enumerate(years): # Specify correct ARIMA parameters if model_no == 9: params = arima_orders[season_no][y_index] if model_no == 10: params = sarima_orders[season_no] # Loop through the week of tests for t in range(8, 1, -1): # Get training and test data. Change y[:-0] to y[:None]. train_end = -(t * 24) test_end = -(t * 24 - forecast_length) if t > 2 else None train_data = y[:train_end] test_data = y[train_end:test_end] tso_data = years_df[y_index]["total load forecast"][ train_end:test_end] # Deseasonalise, always required for Naive2 train_deseas, indices = stats_helpers.deseasonalise( train_data, seasonality, "multiplicative") # Generate naïve forecast for use in MASE calculation naive_fit_forecast = stats_helpers.reseasonalise( naive.naive_2(train_deseas, forecast_length), indices, "multiplicative") naive_forecast = naive_fit_forecast[-forecast_length:] # Use deseasonalised data if needed if deseasonalise: train_data = train_deseas # Loop through the repetitions for r in range(1, num_reps + 1): # Handle the hybrid model individually if model_no > 13: # Hybrid model requires the dataframe and extra data if testing: test_end = -((t - 2) * 24) if t > 2 else None else: test_end = -((t + 5) * 24) # Think about it, see notes train_data = years_df[y_index][:test_end] # Generate ensemble if we are ensembling if ensemble: pred_ensemble = [] for i in range(num_ensemble): pred = model_func(train_data, forecast_length, *params) pred_ensemble.append(pred) forec_results = pd.Series( np.mean(pred_ensemble, axis=0)) else: forec_results = model_func(train_data, forecast_length, *params) # Handle the TSO forecast individually (no forecast method) elif model_no == 13: forec_results = tso_data # Handle the statistical models. Fit the model and forecast, # with additional params if needed else: if params is not None: forec_results = model_func(train_data, forecast_length, *params) else: forec_results = model_func(train_data, forecast_length) # Split results into fit-forecast and parameters if the # model also returned the values of its fitted parameters if ret_params: fit_forecast, fit_params = forec_results else: fit_forecast = forec_results # Reseasonalise if necessary if deseasonalise: fit_forecast = stats_helpers.reseasonalise( fit_forecast, indices, "multiplicative") # Select only the forecast, not the fitted values forecast = fit_forecast[-forecast_length:] # Loop through the error functions for e_name, e_func in error_pairs: # Loop through the lead times for l in range(1, forecast_length + 1): if e_name == "MASE": end = None if (t == 2 and l == 48) else -(t * 24 - l) error = e_func(forecast[:l], y[:end], seasonality, l) n_error = e_func(naive_forecast[:l], y[:end], seasonality, l) else: error = e_func(forecast[:l], test_data[:l]) n_error = e_func(naive_forecast[:l], test_data[:l]) # Save error results for all lead times results[e_name][r][y_index + 1][t - 1][l - 1] = error n_results[e_name][r][y_index + 1][t - 1][l - 1] = \ n_error # Save 48 hour forecast forecasts[y_index + 1][r][t - 1] = forecast.to_list() # Save model params only for final repetition and train time if r == num_reps and t == 2 and ret_params: final_params[y_index + 1] = fit_params print("Year:", str(y_index), "Test:", str(t), "Finished") # Calculate OWA for all forecasts for r in range(1, num_reps + 1): for y in range(1, 5): for t in range(1, 8): for l in range(0, forecast_length): results["OWA"][r][y][t][l] = errors.OWA( n_results["sMAPE"][r][y][t][l], n_results["MASE"][r][y][t][l], results["sMAPE"][r][y][t][l], results["MASE"][r][y][t][l], ) # Average the single 48 hour forecast results all_res = [] for r in range(1, num_reps + 1): for y in range(1, 5): for t in range(1, 8): all_res.append(results["OWA"][r][y][t][forecast_length - 1]) mean = np.around(np.mean(all_res), decimals=3) std = np.around(np.std(all_res), decimals=3) # Save averaged single 48 forecast results file_path = os.path.abspath(os.path.dirname(__file__)) res_path = os.path.join(file_path, "results/results_1.txt") with open(res_path) as file: results_1 = json.load(file) results_1[seas_dict[season_no]][model_name] = [mean, std] with open(res_path, "w") as file: json.dump(results_1, file) # Average the lead time results for OWA all_res_owa = {l: [] for l in range(1, forecast_length + 1)} for r in range(1, num_reps + 1): for y in range(1, 5): for t in range(1, 8): for l in range(1, forecast_length + 1): all_res_owa[l].append(results["OWA"][r][y][t][l - 1]) for l in all_res_owa.keys(): all_res_owa[l] = np.around(np.mean(all_res_owa[l]), decimals=3) # Average the lead time results for sMAPE all_res_smape = {l: [] for l in range(1, forecast_length + 1)} for r in range(1, num_reps + 1): for y in range(1, 5): for t in range(1, 8): for l in range(1, forecast_length + 1): all_res_smape[l].append(results["sMAPE"][r][y][t][l - 1]) for l in all_res_smape.keys(): all_res_smape[l] = np.around(np.mean(all_res_smape[l]), decimals=3) # Average the lead time results for MASE all_res_mase = {l: [] for l in range(1, forecast_length + 1)} for r in range(1, num_reps + 1): for y in range(1, 5): for t in range(1, 8): for l in range(1, forecast_length + 1): all_res_mase[l].append(results["MASE"][r][y][t][l - 1]) for l in all_res_mase.keys(): all_res_mase[l] = np.around(np.mean(all_res_mase[l]), decimals=3) # Save the lead time results for OWA res_path = os.path.join(file_path, "results/results_48_seasons_owa.txt") with open(res_path) as file: results_48 = json.load(file) for l in all_res_owa.keys(): results_48[str(l)][model_name][season_no - 1] = all_res_owa[l] with open(res_path, "w") as file: json.dump(results_48, file) # Save the lead time results for sMAPE res_path = os.path.join(file_path, "results/results_48_seasons_smape.txt") with open(res_path) as file: results_48 = json.load(file) for l in all_res_smape.keys(): results_48[str(l)][model_name][season_no - 1] = all_res_smape[l] with open(res_path, "w") as file: json.dump(results_48, file) # Save the lead time results for MASE res_path = os.path.join(file_path, "results/results_48_seasons_mase.txt") with open(res_path) as file: results_48 = json.load(file) for l in all_res_mase.keys(): results_48[str(l)][model_name][season_no - 1] = all_res_mase[l] with open(res_path, "w") as file: json.dump(results_48, file) # Save the raw forecasts and results res_filename = seas_dict[season_no] + "_" + model_name + "_results.txt" forec_filename = seas_dict[season_no] + "_" + model_name + "_forecasts.txt" res_path = os.path.join(file_path, "results/" + res_filename) forec_path = os.path.join(file_path, "results/" + forec_filename) with open(res_path, "w") as file: json.dump(results, file) with open(forec_path, "w") as file: json.dump(forecasts, file) # Save the parameters (if model returns parameters) if ret_params: param_path = os.path.join(file_path, "results/params.txt") with open(param_path) as file: saved_params = json.load(file) for y in range(1, 5): saved_params[model_name][str(season_no)][str(y)] = final_params[y] with open(param_path, "w") as file: json.dump(saved_params, file)