Пример #1
0
def holt_winters_test(df):
    data = split_data(df)["Summer"][2]["total load actual"]
    train_data = data[:-48]
    test_data = data[-48:]

    # [alpha, beta, gamma, l0, b0, phi, s0,.., s_(m - 1)]
    _, indices = deseasonalise(train_data, 168, "multiplicative")
    init_params = [0.25, 0.75, train_data[0]]
    init_params.extend(indices)

    fitted_model = ExponentialSmoothing(train_data,
                                        seasonal_periods=168,
                                        seasonal="mul").fit(
                                            use_basinhopping=True,
                                            start_params=init_params)
    init_prediction = fitted_model.predict(0, len(train_data) + 48 - 1)
    params = fitted_model.params
    print(params)

    fitted_model = ExponentialSmoothing(
        train_data, seasonal_periods=168,
        seasonal="mul").fit(use_basinhopping=True)
    prediction = fitted_model.predict(0, len(train_data) + 48 - 1)
    params = fitted_model.params
    print(params)

    fig, ax = plt.subplots(1, 1, figsize=(20, 15), dpi=250)
    ax.plot(test_data, label="Actual Data")
    ax.plot(prediction[-48:], label="Non initialised")
    ax.plot(init_prediction[-48:], label="Initialised")
    ax.legend(loc="best")
    plt.show()
Пример #2
0
def plot_a_season(df, season):
    split = split_data(df)

    for y in split[season]:
        # Plot first quarter
        fig, ax = plt.subplots(1, 1, figsize=(20, 15), dpi=250)
        ax.plot(y["total load actual"][:int(len(y["total load actual"]) /
                                            4.0)])
        plt.show()

        # Plot second quarter
        fig, ax = plt.subplots(1, 1, figsize=(20, 15), dpi=250)
        ax.plot(y["total load actual"]
                [int(len(y["total load actual"]) /
                     4.0):int(len(y["total load "
                                    "actual"]) / 2.0)])
        plt.show()

        # Plot third quarter
        fig, ax = plt.subplots(1, 1, figsize=(20, 15), dpi=250)
        ax.plot(y["total load actual"][int(len(y["total load actual"]) /
                                           2.0):int(3.0 * len(y["total load "
                                                                "actual"]) /
                                                    4.0)])
        plt.show()

        # Plot fourth quarter
        fig, ax = plt.subplots(1, 1, figsize=(20, 15), dpi=250)
        ax.plot(y["total load actual"][int(3.0 * len(y["total load actual"]) /
                                           4):])
        plt.show()
Пример #3
0
def plot_forecasts(df, season, year):
    split = split_data(df)
    for test in range(8, 1, -1):
        fig, axes = plt.subplots(2, 1, figsize=(20, 15), dpi=250)

        test_path = "/Users/matt/Projects/AdvancedResearchProject/results" \
                    "/non_ensemble_results/res/"
        (_, _, filenames) = next(os.walk(test_path))

        train_end = -(test * 24)
        test_end = -(test * 24 - 48) if test > 2 else None
        actual = split[season][year]["total load actual"][
            train_end:test_end].tolist()

        axes[0].plot(actual, label="Actual")
        axes[1].plot(actual, label="Actual")

        for file in filenames:
            if "forecasts" not in file:
                continue

            seas, method, _ = file.split("_")

            if seas != season:
                continue

            # methods = ["Naive1", "Naive2", "NaiveS", "SES", "Holt", "Damped",
            #            "Holt-Winters", "Comb", "ARIMA", "SARIMA", "Auto", "Theta",
            #            "TSO", "ES-RNN-S", "ES-RNN-SW", "ES-RNN-D", "ES-RNN-DW",
            #            "ES-RNN-I", "ES-RNN-IW"]
            # good = [""]

            # top = ["NaiveS", "ES-RNN-I", "Comb", "Holt", "Naive2", "SES", "TSO"]
            top = ["NaiveS", "TSO"]
            bottom = [
                "Theta", "Damped", "ARIMA", "SARIMA", "Holt-Winters", "Naive1"
            ]

            with open(test_path + file) as f:
                all_forecasts = json.load(f)
                forecast = all_forecasts[str(year)][str(1)][str(test - 1)]

            # Plot 6 tests on first axes
            if method in top:
                axes[0].plot(forecast, label=method, marker='o')
            elif method in bottom:
                axes[1].plot(forecast, label=method, marker='o')
            else:
                pass  # To handle the empty 'Auto' forecasts

        axes[0].legend(loc="best")
        axes[1].legend(loc="best")
        plt.show()
Пример #4
0
def analyse(df):
    all_data = split_data(df)

    for season in ["Winter", "Spring", "Summer", "Autumn"]:
        years = all_data[season]

        fig, axes = plt.subplots(2, 2, figsize=(20, 15), dpi=250)
        plt.suptitle(season + " - Data", y=0.99)
        for i, (year, ax) in enumerate(zip(years, axes.flatten())):
            deseason, ind, = deseasonalise(year["total load actual"], 168,
                                           "multiplicative")

            ax.plot(year, label="Actual")
            ax.plot(deseason, label="Deseasonalised")
            ax.set_title("Year " + str(i + 1))
            ax.set_xticks([])
            ax.legend(loc="best")

            adf = adfuller(deseason, autolag='AIC')
            print("Original Data")
            print("Test Statistic (rounded) = {:.3f}".format(adf[0]))
            print("P-value (rounded) = {:.3f}".format(adf[1]))
            print("Critical values: ")
            for k, v in adf[4].items():
                print("\t{}: {:.4f} (The data is {}stationary with {}% "
                      "confidence)".format(k, v, "not " if v < adf[0] else "",
                                           100 - int(k[:-1])))
            print()
        print()
        plt.show()

        # Plot Data ACFs
        fig, axes = plt.subplots(2, 2, figsize=(20, 15), dpi=250)
        plt.suptitle(season + " - ACFs (Actual)", y=0.99)
        for i, (year, ax) in enumerate(zip(years, axes.flatten())):
            plot_acf(year["total load actual"], ax=ax, alpha=0.05, lags=168)

        plt.show()

        # Plot Data PACFs
        fig, axes = plt.subplots(2, 2, figsize=(20, 15), dpi=250)
        plt.suptitle(season + " - PACFs (Actual)", y=0.99)
        for i, (year, ax) in enumerate(zip(years, axes.flatten())):
            plot_pacf(year["total load actual"], ax=ax, alpha=0.05, lags=168)

        plt.show()

        # Plot Deseasonalised ACFs
        fig, axes = plt.subplots(2, 2, figsize=(20, 15), dpi=250)
        plt.suptitle(season + " - ACFs (Deseasonalised)", y=0.99)
        for i, (year, ax) in enumerate(zip(years, axes.flatten())):
            deseason, _ = deseasonalise(year["total load actual"], 168,
                                        "multiplicative")
            plot_acf(deseason, ax=ax, alpha=0.05, lags=168)

        plt.show()

        # Plot Deseasonalised PACFs
        fig, axes = plt.subplots(2, 2, figsize=(20, 15), dpi=250)
        plt.suptitle(season + " - PACFs (Deseasonalised)", y=0.99)
        for i, (year, ax) in enumerate(zip(years, axes.flatten())):
            deseason, _ = deseasonalise(year["total load actual"], 168,
                                        "multiplicative")
            plot_pacf(deseason, ax=ax, alpha=0.05, lags=168)

        plt.show()
def run(demand_df, weather_df):
    # Optional command line argumenkts to specify year/season
    year = -1 if len(sys.argv) < 3 else int(sys.argv[2])
    season = -1 if len(sys.argv) < 4 else int(sys.argv[3])

    # Testing parameters
    window_size = 336
    output_size = 48
    plot = False
    ensemble = False
    skip_lstm = False
    init_params = True
    write_results = True
    file_location = str(os.path.abspath(os.path.dirname(__file__)))
    model = True  # True = Ingram, False = Smyl
    multiple = False  # Use multiple time series in Smyl's model
    weather = False  # Include weather data in the chosen model
    valid = True  # True = use validation set, False = use test set
    batch_first = True

    demand_features = demand_df.columns
    weather_features = weather_df.columns

    # If using weather features, add them to the demand_df
    if weather:
        for c in weather_df.columns:
            demand_df[c] = weather_df[c]

    # all_data = {Season: [Year1, ...]}
    all_data = split_data(demand_df)

    # Each year = [<- 12 week Train -> | <- 1 week Val. -> | <- 1 week Test ->]
    valid_sets = [
        all_data["Winter"][year if year >= 0 else 1][:-(7 * 24)],
        all_data["Spring"][year if year >= 0 else 0][:-(7 * 24)],
        all_data["Summer"][year if year >= 0 else 3][:-(7 * 24)],
        all_data["Autumn"][year if year >= 0 else 2][:-(7 * 24)]
    ]

    test_sets = [
        all_data["Winter"][year if year >= 0 else 1],
        all_data["Spring"][year if year >= 0 else 0],
        all_data["Summer"][year if year >= 0 else 3],
        all_data["Autumn"][year if year >= 0 else 2]
    ]

    if file_location == "/ddn/home/gkxx72/AdvancedResearchProject/dev/hybrid":
        res_base = "/ddn/home/gkxx72/AdvancedResearchProject/run/test_res/"
    else:
        res_base = "/Users/matt/Projects/AdvancedResearchProject/test/"

    if valid:
        data = valid_sets[season if season >= 0 else 2]
    else:
        data = test_sets[season if season >= 0 else 2]

    # Set the number of features
    if model:
        input_size = len(data.columns)  # My model, with or without weather
    elif weather:
        input_size = 1 + len(weather_df.columns)  # Smyl's model, with weather
    else:
        input_size = 1  # Smyl's model, without weather

    global_rates_dict = {
        "ingram": {
            10: 5e-3,
            20: 1e-3,
            30: 5e-4
        },
        "smyl_1": {
            10: 5e-3,
            20: 1e-3,
            30: 5e-4
        },
        "smyl_m": {
            10: 5e-3,
            20: 1e-3,
            30: 5e-4
        }
    }
    global_init_rates_dict = {
        "ingram": 0.008,
        "smyl_1": 0.008,
        "smyl_m": 0.008
    }
    local_rates_dict = {
        "ingram": {
            10: 2e-3,
            20: 1e-3,
            30: 5e-4
        },
        "smyl_1": {
            10: 1e-3,
            20: 5e-4,
            30: 1e-4
        },
        "smyl_m": {
            10: 2e-3,
            20: 1e-3,
            30: 5e-4
        }
    }
    local_init_rates_dict = {"ingram": 0.01, "smyl_1": 0.005, "smyl_m": 0.01}

    if model:
        global_init_lr = global_init_rates_dict["ingram"]
        global_rates = global_rates_dict["ingram"]
        local_init_lr = local_init_rates_dict["ingram"]
        local_rates = local_rates_dict["ingram"]
    else:
        if multiple:
            global_init_lr = global_init_rates_dict["smyl_m"]
            global_rates = global_rates_dict["smyl_m"]
            local_init_lr = local_init_rates_dict["smyl_m"]
            local_rates = local_rates_dict["smyl_m"]
        else:
            global_init_lr = global_init_rates_dict["smyl_1"]
            global_rates = global_rates_dict["smyl_1"]
            local_init_lr = local_init_rates_dict["smyl_1"]
            local_rates = local_rates_dict["smyl_1"]

    # Model hyper parameters
    num_epochs = 35
    hidden_size = 40
    num_layers = 4
    dilations = [1, 4, 24, 168]
    level_variability_penalty = 80
    percentile = 0.49
    loss_func = pinball_loss
    grad_clipping = 20
    auto_lr = False  # Automatically adjust learning rates
    variable_lr = True  # Use list of epoch/rate pairs
    auto_rate_threshold = 1.005  # If loss(x - 1) < 1.005 * loss(x) reduce rate
    min_epochs_before_change = 2
    residuals = tuple([[1, 3]])  # Residual connection from 2nd out -> 4th out
    seasonality = 168
    init_level_smoothing = int(sys.argv[4]) if len(sys.argv) >= 5 else -1
    init_seasonal_smoothing = int(sys.argv[5]) if len(sys.argv) >= 5 else -1.1

    test_model_week(data, output_size, input_size, hidden_size, num_layers,
                    batch_first, dilations, demand_features, weather_features,
                    seasonality, residuals, window_size,
                    level_variability_penalty, loss_func, num_epochs,
                    local_init_lr, global_init_lr, init_level_smoothing,
                    init_seasonal_smoothing, percentile, auto_lr, variable_lr,
                    auto_rate_threshold, min_epochs_before_change, local_rates,
                    global_rates, grad_clipping, write_results, plot, year,
                    season, ensemble, multiple, skip_lstm, model, init_params,
                    res_base, weather)
def check_errors(df):
    forecast_length = 48
    base = "/Users/matt/Projects/AdvancedResearchProject/results" \
           "/non_ensemble_results/res/"
    seasons = ["Spring_", "Summer_", "Autumn_", "Winter_"]
    methods = [
        "Naive2_forecasts.txt", "NaiveS_forecasts.txt", "TSO_forecasts.txt",
        "ES-RNN-I_forecasts.txt"
    ]
    seas_dict = {1: "Spring", 2: "Summer", 3: "Autumn", 4: "Winter"}

    # Begin with a hard coded season:
    seas = 3  # 0: Spring, 1: Summer, 2: Autumn, 3: Winter
    seas_n = seas_dict[seas + 1]

    split = split_data(df)

    # Load forecasts
    with open(base + seasons[seas] + methods[0]) as f:
        naive2_forecasts = json.load(f)

    with open(base + seasons[seas] + methods[1]) as f:
        naives_forecasts = json.load(f)

    with open(base + seasons[seas] + methods[2]) as f:
        tso_forecasts = json.load(f)

    with open(base + seasons[seas] + methods[3]) as f:
        es_rnn_i_forecasts = json.load(f)

    # TSO Results
    with open(base + seasons[seas] + "TSO_results.txt") as f:
        tso_results = json.load(f)

    # Calculate sMAPES:
    tso_smapes = []
    es_rnn_smapes = []
    naive2_smapes = []
    naives_smapes = []
    tso_test_smapes = []
    for y in range(1, 5):
        for t in range(1, 8):
            train_end = -((t + 1) * 24)
            test_end = -(
                (t + 1) * 24 - forecast_length) if (t + 1) > 2 else None
            naive2 = naive2_forecasts[str(y)][str(1)][str(t)]
            naives = naives_forecasts[str(y)][str(1)][str(t)]
            tso = tso_forecasts[str(y)][str(1)][str(t)]
            es_rnn = es_rnn_i_forecasts[str(y)][str(1)][str(t)]
            actual = split[seas_n][
                y - 1]["total load actual"][train_end:test_end].tolist()
            tso_smapes.append(sMAPE(pd.Series(actual), pd.Series(tso)))
            es_rnn_smapes.append(sMAPE(pd.Series(actual), pd.Series(es_rnn)))
            naive2_smapes.append(sMAPE(pd.Series(actual), pd.Series(naive2)))
            naives_smapes.append(sMAPE(pd.Series(actual), pd.Series(naives)))
            tso_test_smapes.append(
                tso_results["sMAPE"][str(1)][str(y)][str(t)][47])

    print("Average ES-RNN-I sMAPE:", np.mean(es_rnn_smapes))
    print("Average TSO sMAPE:", np.mean(tso_smapes))
    print("Average TSO (Results):", np.mean(tso_test_smapes))
    print("Average Naive2 sMAPE:", np.mean(naive2_smapes))
    print("Average NaiveS sMAPE:", np.mean(naives_smapes))
Пример #7
0
def test(demand_df, weather_df, season_no, model_no):
    demand_features = demand_df.columns
    weather_features = weather_df.columns

    # Add the weather data to the demand data
    for c in weather_df.columns:
        demand_df[c] = weather_df[c]

    # Testing hyper-parameters
    seasonality = 168
    forecast_length = 48

    # For the ES_RNN_S, for each test, train the model num_ensemble
    # times and average the predictions. Further, if internal ensembling is
    # also specified, each prediction from the model will actually be the
    # average of the predictions from the last 5 epochs
    ensemble = False
    num_ensemble = 3

    # True = use final week for testing, False = use penultimate week for
    # validation
    testing = True

    # Model No.: [Function, Name, Deseasonalise?, Additional Parameters,
    # Return Parameters, Number of Repetitions]
    test_dict = {
        1: [naive.naive_1, 'Naive1', False, None, False, 10],
        2: [naive.naive_2, 'Naive2', True, None, False, 10],
        3: [naive.naive_s, 'NaiveS', False, [seasonality], False, 10],
        4: [exponential_smoothing.ses, 'SES', True, None, True, 10],
        5: [exponential_smoothing.holt, 'Holt', True, None, True, 10],
        6: [exponential_smoothing.damped, 'Damped', True, None, True, 10],
        7: [
            exponential_smoothing.holt_winters, 'Holt-Winters', False,
            [seasonality], True, 10
        ],
        8: [exponential_smoothing.comb, 'Comb', True, None, False, 10],
        9: [arima.arima, 'ARIMA', True, "-- See arima_orders --", True, 10],
        10:
        [arima.sarima, 'SARIMA', False, "-- See sarima_orders --", True, 1],
        11: [arima.auto, 'Auto', False, [168], True, 1],
        12: [theta.theta, 'Theta', True, None, True, 10],
        13: [None, 'TSO', False, None, False, 1],
        14: [
            hybrid.es_rnn_s, 'ES-RNN-S', False,
            [
                seasonality, demand_features, weather_features, False,
                ensemble, True
            ], False, 1
        ],
        15: [
            hybrid.es_rnn_s, 'ES-RNN-SW', False,
            [
                seasonality, demand_features, weather_features, True, ensemble,
                True
            ], False, 1
        ],
        16: [
            hybrid.es_rnn_s, 'ES-RNN-D', False,
            [
                seasonality, demand_features, weather_features, False,
                ensemble, False
            ], False, 1
        ],
        17: [
            hybrid.es_rnn_s, 'ES-RNN-DW', False,
            [
                seasonality, demand_features, weather_features, True, ensemble,
                False
            ], False, 1
        ],
        18: [
            hybrid.es_rnn_i, 'ES-RNN-I', False,
            [seasonality, demand_features, weather_features, False, ensemble],
            False, 1
        ],
        19: [
            hybrid.es_rnn_i, 'ES-RNN-IW', False,
            [seasonality, demand_features, weather_features, True, ensemble],
            False, 1
        ],
    }

    # Optimal SARIMA orders for each season
    sarima_orders = {
        1: [(2, 0, 0), (1, 0, 1, 168)],
        2: [(2, 0, 1), (1, 0, 1, 168)],
        3: [(2, 0, 1), (1, 0, 1, 168)],
        4: [(1, 0, 2), (1, 0, 1, 168)]
    }

    # Optimum ARIMA Parameters (automatically checked, using the
    # identify_arima function)
    arima_orders = {
        1: [[(2, 0, 0)], [(2, 0, 0)], [(1, 0, 2)], [(2, 0, 2)]],
        2: [[(2, 0, 0)], [(2, 0, 0)], [(2, 0, 2)], [(2, 0, 2)]],
        3: [[(1, 0, 1)], [(2, 0, 2)], [(2, 0, 2)], [(2, 0, 2)]],
        4: [[(2, 0, 1)], [(2, 0, 2)], [(2, 0, 2)], [(2, 0, 2)]],
    }

    seas_dict = {1: "Spring", 2: "Summer", 3: "Autumn", 4: "Winter"}

    # Get the parameters for the model
    model_func, model_name, deseasonalise, params, ret_params, num_reps = \
        test_dict[model_no]
    error_pairs = [("sMAPE", errors.sMAPE), ("RMSE", errors.RMSE),
                   ("MASE", errors.MASE), ("MAE", errors.MAE)]

    # Build empty data structures to hold results, naive results, forecasts and
    # fitted parameters
    results = {
        e: {
            r: {
                y: {t: [0] * forecast_length
                    for t in range(1, 8)}
                for y in range(1, 5)
            }
            for r in range(1, num_reps + 1)
        }
        for e in list(zip(*error_pairs))[0] + tuple(["OWA"])
    }

    n_results = {
        e: {
            r: {
                y: {t: [0] * forecast_length
                    for t in range(1, 8)}
                for y in range(1, 5)
            }
            for r in range(1, num_reps + 1)
        }
        for e in list(zip(*error_pairs))[0] + tuple(["OWA"])
    }

    forecasts = {
        y: {r: {t: []
                for t in range(1, 8)}
            for r in range(1, num_reps + 1)}
        for y in range(1, 5)
    }

    final_params = {y: [] for y in range(1, 5)}

    all_data = stats_helpers.split_data(demand_df)
    years_df = all_data[seas_dict[season_no]]

    # The final 7 days are reserved for final testing
    if testing:
        years = [years_df[i]["total load actual"] for i in range(4)]
    else:
        years = [years_df[i]["total load actual"][:-7 * 24] for i in range(4)]

    # Loop through the years
    for y_index, y in enumerate(years):

        # Specify correct ARIMA parameters
        if model_no == 9:
            params = arima_orders[season_no][y_index]
        if model_no == 10:
            params = sarima_orders[season_no]

        # Loop through the week of tests
        for t in range(8, 1, -1):
            # Get training and test data. Change y[:-0] to y[:None].
            train_end = -(t * 24)
            test_end = -(t * 24 - forecast_length) if t > 2 else None
            train_data = y[:train_end]
            test_data = y[train_end:test_end]
            tso_data = years_df[y_index]["total load forecast"][
                train_end:test_end]

            # Deseasonalise, always required for Naive2
            train_deseas, indices = stats_helpers.deseasonalise(
                train_data, seasonality, "multiplicative")

            # Generate naïve forecast for use in MASE calculation
            naive_fit_forecast = stats_helpers.reseasonalise(
                naive.naive_2(train_deseas, forecast_length), indices,
                "multiplicative")
            naive_forecast = naive_fit_forecast[-forecast_length:]

            # Use deseasonalised data if needed
            if deseasonalise:
                train_data = train_deseas

            # Loop through the repetitions
            for r in range(1, num_reps + 1):

                # Handle the hybrid model individually
                if model_no > 13:
                    # Hybrid model requires the dataframe and extra data
                    if testing:
                        test_end = -((t - 2) * 24) if t > 2 else None
                    else:
                        test_end = -((t + 5) * 24)  # Think about it, see notes
                    train_data = years_df[y_index][:test_end]

                    # Generate ensemble if we are ensembling
                    if ensemble:
                        pred_ensemble = []
                        for i in range(num_ensemble):
                            pred = model_func(train_data, forecast_length,
                                              *params)
                            pred_ensemble.append(pred)

                        forec_results = pd.Series(
                            np.mean(pred_ensemble, axis=0))
                    else:
                        forec_results = model_func(train_data, forecast_length,
                                                   *params)

                # Handle the TSO forecast individually (no forecast method)
                elif model_no == 13:
                    forec_results = tso_data

                # Handle the statistical models. Fit the model and forecast,
                # with additional params if needed
                else:
                    if params is not None:
                        forec_results = model_func(train_data, forecast_length,
                                                   *params)
                    else:
                        forec_results = model_func(train_data, forecast_length)

                # Split results into fit-forecast and parameters if the
                # model also returned the values of its fitted parameters
                if ret_params:
                    fit_forecast, fit_params = forec_results
                else:
                    fit_forecast = forec_results

                # Reseasonalise if necessary
                if deseasonalise:
                    fit_forecast = stats_helpers.reseasonalise(
                        fit_forecast, indices, "multiplicative")

                # Select only the forecast, not the fitted values
                forecast = fit_forecast[-forecast_length:]

                # Loop through the error functions
                for e_name, e_func in error_pairs:

                    # Loop through the lead times
                    for l in range(1, forecast_length + 1):
                        if e_name == "MASE":
                            end = None if (t == 2
                                           and l == 48) else -(t * 24 - l)
                            error = e_func(forecast[:l], y[:end], seasonality,
                                           l)
                            n_error = e_func(naive_forecast[:l], y[:end],
                                             seasonality, l)
                        else:
                            error = e_func(forecast[:l], test_data[:l])
                            n_error = e_func(naive_forecast[:l], test_data[:l])

                        # Save error results for all lead times
                        results[e_name][r][y_index + 1][t - 1][l - 1] = error
                        n_results[e_name][r][y_index + 1][t - 1][l - 1] = \
                            n_error

                # Save 48 hour forecast
                forecasts[y_index + 1][r][t - 1] = forecast.to_list()

                # Save model params only for final repetition and train time
                if r == num_reps and t == 2 and ret_params:
                    final_params[y_index + 1] = fit_params
            print("Year:", str(y_index), "Test:", str(t), "Finished")

    # Calculate OWA for all forecasts
    for r in range(1, num_reps + 1):
        for y in range(1, 5):
            for t in range(1, 8):
                for l in range(0, forecast_length):
                    results["OWA"][r][y][t][l] = errors.OWA(
                        n_results["sMAPE"][r][y][t][l],
                        n_results["MASE"][r][y][t][l],
                        results["sMAPE"][r][y][t][l],
                        results["MASE"][r][y][t][l],
                    )

    # Average the single 48 hour forecast results
    all_res = []
    for r in range(1, num_reps + 1):
        for y in range(1, 5):
            for t in range(1, 8):
                all_res.append(results["OWA"][r][y][t][forecast_length - 1])

    mean = np.around(np.mean(all_res), decimals=3)
    std = np.around(np.std(all_res), decimals=3)

    # Save averaged single 48 forecast results
    file_path = os.path.abspath(os.path.dirname(__file__))
    res_path = os.path.join(file_path, "results/results_1.txt")
    with open(res_path) as file:
        results_1 = json.load(file)

    results_1[seas_dict[season_no]][model_name] = [mean, std]

    with open(res_path, "w") as file:
        json.dump(results_1, file)

    # Average the lead time results for OWA
    all_res_owa = {l: [] for l in range(1, forecast_length + 1)}
    for r in range(1, num_reps + 1):
        for y in range(1, 5):
            for t in range(1, 8):
                for l in range(1, forecast_length + 1):
                    all_res_owa[l].append(results["OWA"][r][y][t][l - 1])
    for l in all_res_owa.keys():
        all_res_owa[l] = np.around(np.mean(all_res_owa[l]), decimals=3)

    # Average the lead time results for sMAPE
    all_res_smape = {l: [] for l in range(1, forecast_length + 1)}
    for r in range(1, num_reps + 1):
        for y in range(1, 5):
            for t in range(1, 8):
                for l in range(1, forecast_length + 1):
                    all_res_smape[l].append(results["sMAPE"][r][y][t][l - 1])
    for l in all_res_smape.keys():
        all_res_smape[l] = np.around(np.mean(all_res_smape[l]), decimals=3)

    # Average the lead time results for MASE
    all_res_mase = {l: [] for l in range(1, forecast_length + 1)}
    for r in range(1, num_reps + 1):
        for y in range(1, 5):
            for t in range(1, 8):
                for l in range(1, forecast_length + 1):
                    all_res_mase[l].append(results["MASE"][r][y][t][l - 1])
    for l in all_res_mase.keys():
        all_res_mase[l] = np.around(np.mean(all_res_mase[l]), decimals=3)

    # Save the lead time results for OWA
    res_path = os.path.join(file_path, "results/results_48_seasons_owa.txt")
    with open(res_path) as file:
        results_48 = json.load(file)
    for l in all_res_owa.keys():
        results_48[str(l)][model_name][season_no - 1] = all_res_owa[l]
    with open(res_path, "w") as file:
        json.dump(results_48, file)

    # Save the lead time results for sMAPE
    res_path = os.path.join(file_path, "results/results_48_seasons_smape.txt")
    with open(res_path) as file:
        results_48 = json.load(file)
    for l in all_res_smape.keys():
        results_48[str(l)][model_name][season_no - 1] = all_res_smape[l]
    with open(res_path, "w") as file:
        json.dump(results_48, file)

    # Save the lead time results for MASE
    res_path = os.path.join(file_path, "results/results_48_seasons_mase.txt")
    with open(res_path) as file:
        results_48 = json.load(file)
    for l in all_res_mase.keys():
        results_48[str(l)][model_name][season_no - 1] = all_res_mase[l]
    with open(res_path, "w") as file:
        json.dump(results_48, file)

    # Save the raw forecasts and results
    res_filename = seas_dict[season_no] + "_" + model_name + "_results.txt"
    forec_filename = seas_dict[season_no] + "_" + model_name + "_forecasts.txt"
    res_path = os.path.join(file_path, "results/" + res_filename)
    forec_path = os.path.join(file_path, "results/" + forec_filename)

    with open(res_path, "w") as file:
        json.dump(results, file)
    with open(forec_path, "w") as file:
        json.dump(forecasts, file)

    # Save the parameters (if model returns parameters)
    if ret_params:
        param_path = os.path.join(file_path, "results/params.txt")
        with open(param_path) as file:
            saved_params = json.load(file)

        for y in range(1, 5):
            saved_params[model_name][str(season_no)][str(y)] = final_params[y]

        with open(param_path, "w") as file:
            json.dump(saved_params, file)
Пример #8
0
def plots_for_presentation(demand_df):
    font = {'size': 24}
    plt.rc('font', **font)

    all_data = split_data(demand_df)
    data = all_data["Spring"][2]

    fig, axes = plt.subplots(2, 2, figsize=(20, 15), dpi=250)
    axes = axes.flatten()
    axes[0].plot(
        data.loc["2017-03-06 00:00:00+01:00":"2017-03-12 23:00:00+01:00"]
        ["total load actual"],
        color="C0")
    axes[0].set_title("Total Energy Demanded")
    axes[1].plot(
        data.loc["2017-03-06 00:00:00+01:00":"2017-03-12 23:00:00+01:00"]
        ["generation fossil gas"],
        color="C1")
    axes[1].set_title("Energy Generated - Gas")
    axes[2].plot(
        data.loc["2017-03-06 00:00:00+01:00":"2017-03-12 23:00:00+01:00"]
        ["generation fossil oil"],
        color="C2")
    axes[2].set_title("Energy Generated - Oil")
    axes[3].plot(
        data.loc["2017-03-06 00:00:00+01:00":"2017-03-12 23:00:00+01:00"]
        ["price actual"],
        color="C3")
    axes[3].set_title("Energy Price")

    for ax in axes:
        ax.set_xticks([])
    plt.show()

    pre_end = 5 * 24
    inp_end = 14 * 24 + pre_end
    out_end = 2 * 24 + inp_end
    aft_end = 2 * 24 + out_end
    start = (64 - 19) * 24
    end = (64 + 4) * 24
    input_data = data["total load actual"][start:end].tolist()
    test_data = data[start + pre_end:start + out_end]

    model = torch.load("/Users/matt/Projects/AdvancedResearchProject/models"
                       "/model_all.pt")
    model.eval()

    levels = [l.item() for l in model.levels["total load actual"]]
    seasonals = [s.item() for s in model.seasonals["total load actual"]]

    fig, axes = plt.subplots(4, 1, figsize=(20, 15), dpi=250)
    axes[0].set_title("Sliding Window, Fitted ES Components, and Normalised "
                      "Data")
    axes[0].plot(input_data[:inp_end], color="C0", label="Input Data")
    axes[0].plot([i for i in range(inp_end, out_end)],
                 input_data[inp_end:out_end],
                 color="C0",
                 linestyle="--")
    axes[0].plot([i for i in range(out_end, aft_end)],
                 input_data[out_end:aft_end],
                 color="C0")
    axes[0].plot([pre_end, inp_end, inp_end, pre_end, pre_end],
                 [19000, 19000, 35000, 35000, 19000],
                 linestyle=":",
                 label="Input Window",
                 color="darkorange")
    axes[0].plot([inp_end, out_end, out_end, inp_end, inp_end],
                 [19000, 19000, 35000, 35000, 19000],
                 linestyle=":",
                 label="Output Window",
                 color="teal")
    axes[1].plot(levels[start + pre_end:start + inp_end],
                 color="C1",
                 label="Fitted ES Level Values")
    axes[1].plot([i for i in range(inp_end - pre_end, out_end - pre_end)],
                 levels[start + inp_end:start + out_end],
                 color="C1",
                 linestyle="--")
    axes[1].axvline(x=336, c='grey', linestyle=":")
    axes[2].plot(seasonals[start + pre_end:start + inp_end],
                 color="C2",
                 label="Fitted ES Seasonality Values")
    axes[2].plot([i for i in range(inp_end - pre_end, out_end - pre_end)],
                 seasonals[168 + start + inp_end:168 + start + out_end],
                 color="C2",
                 linestyle="--")
    axes[2].axvline(x=336, c='grey', linestyle=":")

    normalised_inp = np.log(
        np.array(input_data[pre_end:inp_end]) /
        (np.array(seasonals[168 + start + pre_end:168 + start + inp_end]) *
         levels[start + inp_end]))
    normalised_out = np.log(
        np.array(input_data[inp_end:out_end]) /
        (np.array(seasonals[168 + start + inp_end:168 + start + out_end]) *
         levels[start + inp_end]))

    axes[3].plot(normalised_inp,
                 color="C3",
                 label="De-seasonalised and "
                 "Normalised Data")
    axes[3].plot([i for i in range(inp_end - pre_end, out_end - pre_end)],
                 normalised_out,
                 color="C5",
                 linestyle="--")
    axes[3].axvline(x=336, c='grey', linestyle=":")
    for ax in axes:
        ax.legend(loc="upper left")
    plt.show()

    pred, out_actuals, out_levels, out_seas, all_levels, \
    all_seasonals, out = model.predict(test_data, window_size, output_size,
                                   weather)

    fig, axes = plt.subplots(4, 1, figsize=(20, 15), dpi=250)
    axes[0].set_title("dLSTM and Actual Output")
    axes[0].plot(normalised_out,
                 label="Actual Output",
                 color="C5",
                 linestyle="--")
    axes[0].plot(torch.log(out).detach().view(-1).numpy(),
                 label="dLSTM Output",
                 color="C3")
    axes[0].legend(loc="upper right")
    plt.show()
Пример #9
0
def plots_for_poster(demand_df):
    font = {'size': 24}
    plt.rc('font', **font)

    window_size = 336
    output_size = 48
    weather = False

    all_data = split_data(demand_df)
    data = all_data["Spring"][2]
    start_test = -(15 * 24 + window_size)
    end_test = -(15 * 24 - output_size)
    test_data = data[start_test:end_test]

    model = torch.load("/Users/matt/Projects/AdvancedResearchProject/models"
                       "/model_all"
                       ".pt")
    model.eval()

    levels = [l.item() for l in model.levels["total load actual"]]
    seasonals = [s.item() for s in model.seasonals["total load actual"]]

    fig, axes = plt.subplots(4, 1, figsize=(20, 15), dpi=250)
    axes[0].set_title("Input Data, Fitted ES Components, and Normalised Data")
    axes[0].plot(test_data["total load actual"][:window_size],
                 color="C0",
                 label="Input Data")
    axes[1].plot(levels[-window_size:],
                 color="C1",
                 label="Fitted ES Level Values")
    axes[2].plot(seasonals[-window_size:],
                 color="C2",
                 label="Fitted ES Seasonality Values")
    axes[2].plot([7 * 24, 9 * 24, 9 * 24, 7 * 24, 7 * 24],
                 [2.1, 2.1, 3.3, 3.3, 2.1],
                 color="C2",
                 linestyle=":",
                 label="Output Window")
    normalised = np.log(
        np.array(test_data["total load actual"][:window_size]) /
        (np.array(seasonals[-window_size:]) * levels[-1]))
    axes[3].plot(normalised,
                 color="C3",
                 label="De-seasonalised and "
                 "Normalised Data")
    for ax in axes:
        ax.legend(loc="upper left")
    plt.show()

    pred, out_actuals, out_levels, out_seas, all_levels, \
    all_seasonals, out = model.predict(test_data, window_size, output_size,
                                   weather)

    fig, axes = plt.subplots(4, 1, figsize=(20, 15), dpi=250)
    axes[0].set_title("dLSTM Output, Repeated Level and Seasonality Values, "
                      "and Final Forecast")
    axes[0].plot(out.view(-1).detach(), color="C3", label="dLSTM Output")
    axes[1].plot(out_levels.view(-1).detach(),
                 color="C1",
                 label="Extrapolated Level Values")
    axes[2].plot(out_seas.view(-1).detach(),
                 color="C2",
                 label="Repeated Seasonality Values")
    axes[3].plot(pred.view(-1).detach(), color="C4", label="Final Forecast")
    axes[3].plot(out_actuals, color="C5", label="Actual Data")
    for ax in axes:
        ax.legend(loc="upper left")
    plt.show()