示例#1
0
def evaluate_simp_avg_model(X):
    """
    Evaluate a Simple Expontential Smoothing Model
    :param X: list or series containing all historical data
    :return: mse (error metric) and the fitted model
    """
    # Prepare training dataset
    train_size = int(len(X) * 0.75)
    train, test = X[0:train_size], X[train_size:]
    history = [x for x in train]

    # Make predictions
    predictions = list()
    for t in range(len(test)):
        # Fit model
        model = SimpleExpSmoothing(history)
        model_fit = model.fit(smoothing_level=0.6, optimized=False)

        # Forecast
        yhat = model_fit.forecast()[0]

        # Store prediction and move forward one time step
        predictions.append(yhat)
        history.append(test[t])

    # calculate out of sample error
    mse = mean_squared_error(test, predictions)
    return mse, model_fit
示例#2
0
def estimate_SES(dataframe, name, alpha, sizeestimate):
    
    array = np.asarray(dataframe[name])
    model = SimpleExpSmoothing(array)
    fit = model.fit(smoothing_level=alpha,optimized=False)
    forecast = fit.forecast(sizeestimate)
    for index in range ( len(forecast) ):
        forecast[index] = round(forecast[index], 4)
    return forecast
示例#3
0
    def SES_f(self, df, a):
        try:
            simpleexp = SimpleExpSmoothing(np.array(np.array(df['Actual'])))
            fit_simpleexp = simpleexp.fit(smoothing_level=a,optimized=False)
            forecast = fit_simpleexp.forecast()[0]

            Cluster, Warehouse, WF, YF = generate_attrib(df)
            self.df_forecast.append({'Cluster':Cluster, 'Warehouse':Warehouse, 'Year':YF, "Week": WF, "Forecast":forecast})
            return print(f'DEBUG:Forecast:{Cluster}:{Warehouse}:{YF}:{WF}:{forecast}')
        except:
            return print("ERROR:FORECAST-SES")
def estimate_SES(dataframe, name, alpha, sizeestimate):
    # SES requires an array to work with, so we convert the column into an array
    array = np.asarray(dataframe[name])
    model = SimpleExpSmoothing(array)
    fit = model.fit(smoothing_level=alpha,optimized=False)
    # because this model assumes no trend or seasonality
    # all forecasts can be the same, i.e. a straight line
    forecast = fit.forecast(sizeestimate)
    for index in range ( len(forecast) ):
        forecast[index] = round(forecast[index], 4)
    return forecast
示例#5
0
def ses():
    # Simple Exponential Smoothing
    ses_obj = SimpleExpSmoothing(origin_series)

    for color, smoothing_level in [('blue', 0.2), ('red', 0.6),
                                   ('green', None)]:
        if not smoothing_level:
            fit = ses_obj.fit(optimized=True)
            smoothing_level = fit.model.params['smoothing_level']
        else:
            fit = ses_obj.fit(smoothing_level=smoothing_level, optimized=False)
        forecast = fit.forecast(12).rename(rf'$\alpha={smoothing_level}$')

        # plot
        forecast.plot(marker='o', color=color, legend=True)
        # plt.show()

        fitted = pd.Series(data=fit.fittedvalues.values, index=index)
        fitted.plot(marker='o', color=color)

        # plt.show()

    plt.show()
示例#6
0
    def exp_avg(self, month=6, smooth_level=0.2):
        """Exponential averaging the numerical data

        Keyword Arguments:
            month {int} -- Time span for each exponential average (default: {6})
            smooth_level {float} -- Hyperparameter for average decay (default : {0.2})
            smaller value means slower decay (more weight on older data)
        Returns:
            pd.Series -- Exponential averaged sales data
        """
        span = month * 30
        i = 0
        fitted_data = []
        while i < len(self.train):
            split_data = self.train.iloc[i:i + span]
            es = SimpleExpSmoothing(split_data)
            es_fit = es.fit(smoothing_level=smooth_level, optimized=False)
            fitted_data.append(es_fit.fittedvalues)
            i += span
        smooth_data = pd.concat(fitted_data)
        return smooth_data
示例#7
0
from statsmodels.tsa.api import SimpleExpSmoothing
#%%
train = airline[:'1959']
test = airline['1960':]
# %%

train['Thousands of Passengers'].plot()
test['Thousands of Passengers'].plot()
# 색깔이 분리되어서 보임
# %%
ses_model = SimpleExpSmoothing(train['Thousands of Passengers'])
# %%
#SimpleExpSmoothing(np.asarray(train['Thousands of Passengers']))

#%%
ses_result = ses_model.fit()

# %%
#테스트 데이터를 오염시키지 않기위해서
y_hat = test.copy()


# %%
y_hat['SES'] = ses_result.forecast(len(test))

# %%

plt.plot(train['Thousands of Passengers'], label='Train')
plt.plot(test['Thousands of Passengers'], label='Test')
plt.plot(y_hat['SES'], label='Simple Exp Smoothing')
# 트렌드와 패턴이 반영안된 걸 확인할 수 있음
def exp_smooth(signal, smoothing_lvl = 0.2):
    X = []
    for i in range(signal.shape[0]):
        model = SimpleExpSmoothing(signal[i].copy(order = 'C'))
        X[i] = model.fit(smoothing_level=.2)
    return np.array(X)
示例#9
0
 def simple_exponential_smoothing(self, y, bias, alpha):
     smoother = SimpleExpSmoothing(y - bias)
     fit_model = smoother.fit(smoothing_level=alpha)
     fitted = fit_model.fittedvalues
     self.model_params = (fit_model, bias, len(y))
     return fitted
def temp_pred(df, season):
    print(
        "*************************************** {} Results *****************************************"
        .format(season))
    temp = df['Temperature']
    print("Number of missing values in temperature variable: {}".format(
        temp.isna().sum()))
    temp_1 = temp.diff(periods=24)
    temp_1 = temp_1[24:]
    temp_2 = temp_1.diff()
    temp_2 = temp_2[1:]

    # dependent variable versus time
    plot_func(temp, 'temperature', 'time', 'temperature in Kelvin',
              'Historical hourly Weather data 2012-2013')
    plot_func(temp_1, 'temperature', 'time', 'Magnitude',
              'Historical hourly Weather data 2012-2013 (24th diff)')
    plot_func(temp_2, 'temperature', 'time', 'Magnitude',
              'Historical hourly Weather data 2012-2013 (24th+1st diff)')

    # ACF of the dependent variable
    lags = 50
    sm.graphics.tsa.plot_acf(temp,
                             lags=lags,
                             title='Autocorrelation of temperature')
    plt.show()
    sm.graphics.tsa.plot_acf(temp_1,
                             lags=lags,
                             title='Autocorrelation of temperature(24th diff)')
    plt.show()
    sm.graphics.tsa.plot_acf(
        temp_2,
        lags=lags,
        title='Autocorrelation of temperature(24th+1st diff)')
    plt.show()
    sm.graphics.tsa.plot_pacf(temp,
                              lags=lags,
                              title='partial correlation of temperature')
    plt.show()
    sm.graphics.tsa.plot_pacf(
        temp_1,
        lags=lags,
        title='partial correlation of temperature(24th diff)')
    plt.show()
    sm.graphics.tsa.plot_pacf(
        temp_2,
        lags=lags,
        title='partial correlation of temperature(24th+1st diff)')
    plt.show()

    lags = 240
    acf_1 = sm.graphics.tsa.acf(temp_1, nlags=lags)
    plt.figure()
    plt.stem(range(0, lags + 1)[::24], acf_1[::24], use_line_collection=True)
    plt.xlabel('Lags')
    plt.ylabel('Magnitude')
    plt.title('Autocorrelation plot for {} every 24 lags'.format(
        'temperature(24th diff)'))
    plt.show()

    acf_2 = sm.graphics.tsa.acf(temp_2, nlags=lags)
    plt.figure()
    plt.stem(range(0, lags + 1)[::24], acf_2[::24], use_line_collection=True)
    plt.xlabel('Lags')
    plt.ylabel('Magnitude')
    plt.title('Autocorrelation plot for {} every 24 lags'.format(
        'temperature(24th+1st diff)'))
    plt.show()

    pacf_1 = sm.graphics.tsa.pacf(temp_1, nlags=lags)
    plt.figure()
    plt.stem(range(0, lags + 1)[::24], pacf_1[::24], use_line_collection=True)
    plt.xlabel('Lags')
    plt.ylabel('Magnitude')
    plt.title('PartialAutocorrelation plot for {} every 24 lags'.format(
        'temperature(24th diff)'))
    plt.show()

    pacf_2 = sm.graphics.tsa.pacf(temp_2, nlags=lags)
    plt.figure()
    plt.stem(range(0, lags + 1)[::24], pacf_2[::24], use_line_collection=True)
    plt.xlabel('Lags')
    plt.ylabel('Magnitude')
    plt.title('PartialAutocorrelation plot for {} every 24 lags'.format(
        'temperature(24th+1st diff)'))
    plt.show()

    # Correlation Matrix with seaborn heatmap and Pearson's correlation coefficent
    corrMatrix = df.corr()
    ax = sns.heatmap(corrMatrix,
                     vmin=-1,
                     vmax=1,
                     center=0,
                     cmap=sns.diverging_palette(20, 220, n=200),
                     square=True,
                     annot=True)
    bottom, top = ax.get_ylim()
    ax.set_ylim(bottom + 0.5, top - 0.5)
    ax.set_xticklabels(ax.get_xticklabels(),
                       rotation=45,
                       horizontalalignment='right')
    plt.show()

    r_ht = correlation_coefficent_cal(df['Humidity'], df.Temperature)
    print(
        "The correlation coefficient between the Humidity and Temperature is {:.3f}"
        .format(r_ht))
    r_wst = correlation_coefficent_cal(df['Wind Speed'], df.Temperature)
    print(
        "The correlation coefficient between the Wind Speed and Temperature is {:.3f}"
        .format(r_wst))
    r_wdt = correlation_coefficent_cal(df['Wind Direction'], df.Temperature)
    print(
        "The correlation coefficient between the Wind Direction and Temperature is  {:.3f}"
        .format(r_wdt))
    r_pt = correlation_coefficent_cal(df['Pressure'], df.Temperature)
    print(
        "The correlation coefficient between the Pressure and Temperature is  {:.3f}"
        .format(r_pt))

    df['Temperature'].plot.hist(bins=20, grid=True,
                                edgecolor='k').autoscale(enable=True,
                                                         axis='both',
                                                         tight=True)
    plt.xlabel('Temperature in Kelvin')
    plt.ylabel('Frequency')
    plt.title('Histogram plot of Temperature distribution')
    plt.show()

    adf_cal(temp)
    adf_cal(temp_1)
    adf_cal(temp_2)

    # Detrending the data using Moving Average method
    detrended_2x4, ma_2x4 = cal_moving_average(temp,
                                               ma_order=4,
                                               folding_order=2)
    adf_cal(detrended_2x4)

    # Time series decomposition using STL(Seasonal and Trend decomposition using Loess) method
    STL1 = STL(temp)
    res = STL1.fit()
    fig = res.plot()
    plt.show()

    T = res.trend
    S = res.seasonal
    R = res.resid

    plt.figure()
    plt.plot(T, label='trend')
    plt.plot(S, label='Seasonal')
    plt.plot(R, label='residuals')
    plt.xlabel('Year')
    plt.ylabel('Magnitude')
    plt.title(
        'Trend, Seasonality, Residual components using STL Decomposition')
    plt.legend()
    plt.show()

    detrended = temp - T
    plt.figure()
    plt.plot(temp, label='Original')
    plt.plot(detrended, label='detrended')
    plt.xlabel('Year')
    plt.ylabel('Magnitude')
    plt.title('Original vs detrended')
    plt.legend()
    plt.show()

    adjusted_seasonal = temp - S
    plt.figure()
    plt.plot(temp, label='Original')
    plt.plot(adjusted_seasonal, label='Seasonally Adjusted')
    plt.xlabel('Year')
    plt.ylabel('Magnitude')
    plt.title('Original vs Seasonally adjusted')
    plt.legend()
    plt.show()

    # Measuring strength of trend and seasonality
    F = np.max([0, 1 - np.var(np.array(R)) / np.var(np.array(T + R))])
    print('Strength of trend for Hourly weather dataset is {:.3f}'.format(F))

    FS = np.max([0, 1 - np.var(np.array(R)) / np.var(np.array(S + R))])
    print(
        'Strength of seasonality for Hourly weather dataset is {:.3f}'.format(
            FS))

    # Average, Naive, Drift, Simple Exponential Smoothing, Holt's Linear and Holt's winter Seasonal Methods
    print(
        "--------------Average, Naive, Drift, Simple Exponential Smoothing, Holt's Linear and Holt's winter Seasonal Methods-----------------"
    )
    train, test = train_test_split(temp, shuffle=False, test_size=0.2)
    train.index.freq = '1H'
    test.index.freq = '1H'
    h = len(test)

    train_pred_avg = []
    for i in range(1, len(train)):
        res = avg_method(train.iloc[0:i])
        train_pred_avg.append(res)

    test_forecast_avg1 = np.ones(len(test)) * avg_method(train)
    test_forecast_avg = pd.DataFrame(test_forecast_avg1).set_index(test.index)
    residual_error_avg = np.array(train[1:]) - np.array(train_pred_avg)
    forecast_error_avg = test - test_forecast_avg1
    MSE_train_avg = np.mean((residual_error_avg)**2)
    MSE_test_avg = np.mean((forecast_error_avg)**2)
    mean_pred_avg = np.mean(residual_error_avg)
    mean_forecast_avg = np.mean(forecast_error_avg)
    print('Mean of prediction errors for Average method: ', mean_pred_avg)
    print('Mean of forecast errors for Average method: ', mean_forecast_avg)
    var_pred_avg = np.var(residual_error_avg)
    var_forecast_avg = np.var(forecast_error_avg)

    naive_train_pred = []
    for i in range(0, len(train) - 1):
        res = naive_method(train[i])
        naive_train_pred.append(res)

    res = np.ones(len(test)) * train[-1]
    naive_test_forecast1 = np.ones(len(test)) * res
    naive_test_forecast = pd.DataFrame(naive_test_forecast1).set_index(
        test.index)
    residual_error_naive = np.array(train[1:]) - np.array(naive_train_pred)
    forecast_error_naive = test - naive_test_forecast1
    MSE_train_naive = np.mean((residual_error_naive)**2)
    MSE_test_naive = np.mean((forecast_error_naive)**2)
    mean_pred_naive = np.mean(residual_error_naive)
    mean_forecast_naive = np.mean(forecast_error_naive)
    print('Mean of prediction errors for Naive method: ', mean_pred_naive)
    print('Mean of forecast errors for Naive method: ', mean_forecast_naive)
    var_pred_naive = np.var(residual_error_naive)
    var_forecast_naive = np.var(forecast_error_naive)

    drift_train_forecast = []
    for i in range(1, len(train)):
        if i == 1:
            drift_train_forecast.append(train[0])
        else:
            h = 1
            res = drift_method(train[0:i], h)
            drift_train_forecast.append(res)

    drift_test_forecast1 = []
    for h in range(1, len(test) + 1):
        res = drift_method(train, h)
        drift_test_forecast1.append(res)

    drift_test_forecast = pd.DataFrame(drift_test_forecast1).set_index(
        test.index)
    residual_error_drift = np.array(train[1:]) - np.array(drift_train_forecast)
    forecast_error_drift = np.array(test) - np.array(drift_test_forecast1)
    MSE_train_drift = np.mean((residual_error_drift)**2)
    MSE_test_drift = np.mean((forecast_error_drift)**2)
    mean_pred_drift = np.mean(residual_error_drift)
    mean_forecast_drift = np.mean(forecast_error_drift)
    print('Mean of prediction errors for Drift method: ', mean_pred_drift)
    print('Mean of forecast errors for Drift method: ', mean_forecast_drift)
    var_pred_drift = np.var(residual_error_drift)
    var_forecast_drift = np.var(forecast_error_drift)

    l0 = train[0]
    ses_train_pred = ses(train, 0.50, l0)
    ses_test_forecast1 = np.ones(len(test)) * (0.5 * (train[-1]) + (1 - 0.5) *
                                               (ses_train_pred[-1]))
    ses_test_forecast = pd.DataFrame(ses_test_forecast1).set_index(test.index)
    residual_error_ses = np.array(train[1:]) - np.array(ses_train_pred)
    forecast_error_ses = np.array(test) - np.array(ses_test_forecast1)
    MSE_train_SES = np.mean((residual_error_ses)**2)
    MSE_test_SES = np.mean((forecast_error_ses)**2)
    mean_pred_SES = np.mean(residual_error_ses)
    mean_forecast_SES = np.mean(forecast_error_ses)
    print('Mean of prediction errors for SES method: ', mean_pred_SES)
    print('Mean of forecast errors for SES method: ', mean_forecast_SES)
    var_pred_SES = np.var(residual_error_ses)
    var_forecast_SES = np.var(forecast_error_ses)

    # SES Method using statsmodels for alpha=0.5
    # ses_train = train.ewm(alpha=0.5, adjust=False).mean()  # Another way of doing it
    ses_model1 = SimpleExpSmoothing(train)
    ses_fitted_model1 = ses_model1.fit(smoothing_level=0.5, optimized=False)
    ses_train_pred1 = ses_fitted_model1.fittedvalues.shift(-1)
    ses_test_forecast1 = ses_fitted_model1.forecast(steps=len(test))
    ses_test_forecast1 = pd.DataFrame(ses_test_forecast1).set_index(test.index)
    MSE_test_SES1 = np.square(
        np.subtract(test.values,
                    np.ndarray.flatten(ses_test_forecast1.values))).mean()

    # Holt's Linear Trend
    holtl_fitted_model = ets.ExponentialSmoothing(train,
                                                  trend='additive',
                                                  damped=True,
                                                  seasonal=None).fit()
    holtl_train_pred = holtl_fitted_model.fittedvalues
    holtl_test_forecast = holtl_fitted_model.forecast(steps=len(test))
    holtl_test_forecast = pd.DataFrame(holtl_test_forecast).set_index(
        test.index)
    residual_error_holtl = np.subtract(
        train.values, np.ndarray.flatten(holtl_train_pred.values))
    forecast_error_holtl = np.subtract(
        test.values, np.ndarray.flatten(holtl_test_forecast.values))
    MSE_train_holtl = np.mean((residual_error_holtl)**2)
    MSE_test_holtl = np.mean((forecast_error_holtl)**2)
    mean_pred_holtl = np.mean(residual_error_holtl)
    mean_forecast_holtl = np.mean(forecast_error_holtl)
    print("Mean of prediction errors for Holt's Linear method: ",
          mean_pred_holtl)
    print("Mean of forecast errors for Holt's Linear method: ",
          mean_forecast_holtl)
    var_pred_holtl = np.var(residual_error_holtl)
    var_forecast_holtl = np.var(forecast_error_holtl)

    # Holt's Winter Seasonal Trend
    holtw_fitted_model = ets.ExponentialSmoothing(train,
                                                  trend='add',
                                                  damped=True,
                                                  seasonal='mul',
                                                  seasonal_periods=24).fit()
    holtw_train_pred = holtw_fitted_model.fittedvalues
    holtw_test_forecast = holtw_fitted_model.forecast(steps=len(test))
    holtw_test_forecast = pd.DataFrame(holtw_test_forecast).set_index(
        test.index)
    residual_error_holtw = np.subtract(
        train.values, np.ndarray.flatten(holtw_train_pred.values))
    forecast_error_holtw = np.subtract(
        test.values, np.ndarray.flatten(holtw_test_forecast.values))
    MSE_train_holtw = np.mean((residual_error_holtw)**2)
    MSE_test_holtw = np.mean((forecast_error_holtw)**2)
    mean_pred_holtw = np.mean(residual_error_holtw)
    mean_forecast_holtw = np.mean(forecast_error_holtw)
    print("Mean of prediction errors for Holt's Winter Seasonal method: ",
          mean_pred_holtw)
    print("Mean of forecast errors for Holt's Winter Seasonal method: ",
          mean_forecast_holtw)
    var_pred_holtw = np.var(residual_error_holtw)
    var_forecast_holtw = np.var(forecast_error_holtw)

    fig, ax = plt.subplots(figsize=(10, 8))
    ax.plot(train, label='Training set')
    ax.plot(test, label='Testing set')
    ax.plot(test_forecast_avg, label='Average h-step prediction')
    plt.xlabel('Time')
    plt.ylabel('Temperature')
    plt.title('Average Method')
    plt.legend(loc='upper left')
    plt.show()

    fig, ax = plt.subplots(figsize=(10, 8))
    ax.plot(train, label='Training set')
    ax.plot(test, label='Testing set')
    ax.plot(naive_test_forecast, label='Naive h-step prediction')
    plt.xlabel('Time')
    plt.ylabel('Temperature')
    plt.title('Naive Method')
    plt.legend(loc='upper left')
    plt.show()

    fig, ax = plt.subplots(figsize=(10, 8))
    ax.plot(train, label='Training set')
    ax.plot(test, label='Testing set')
    ax.plot(drift_test_forecast, label='Drift h-step prediction')
    plt.xlabel('Time')
    plt.ylabel('Temperature')
    plt.title('Drift Method')
    plt.legend(loc='upper left')
    plt.show()

    fig, ax = plt.subplots(figsize=(10, 8))
    ax.plot(train, label='Training set')
    ax.plot(test, label='Testing set')
    ax.plot(ses_test_forecast,
            label='Simple Exponential Smoothing h-step prediction')
    plt.xlabel('Time')
    plt.ylabel('Temperature')
    plt.title('SES Method')
    plt.legend(loc='upper left')
    plt.show()

    fig, ax = plt.subplots(figsize=(10, 8))
    ax.plot(train, label='Training set')
    ax.plot(test, label='Testing set')
    ax.plot(holtl_test_forecast, label="Holt's Linear h-step prediction")
    plt.xlabel('Time')
    plt.ylabel('Temperature')
    plt.title("Holt's Linear Method")
    plt.legend(loc='upper left')
    plt.show()

    fig, ax = plt.subplots(figsize=(10, 8))
    ax.plot(train, label='Training set')
    ax.plot(test, label='Testing set')
    ax.plot(holtw_test_forecast,
            label="Holt's Winter Seasonal h-step prediction")
    plt.xlabel('Time')
    plt.ylabel('Temperature')
    plt.title("Holt's Winter Seasonal Method")
    plt.legend(loc='upper left')
    plt.show()

    fig, ax = plt.subplots(figsize=(10, 8))
    # ax.plot(train, label='Training set')
    ax.plot(test, label='Testing set')
    ax.plot(holtw_test_forecast,
            label="Holt's Winter Seasonal h-step prediction")
    plt.xlabel('Time')
    plt.ylabel('Temperature')
    plt.title("Holt's Winter Seasonal Method")
    plt.legend(loc='upper left')
    plt.show()

    # Auto_correlation for Residual errors and Q value for Residual errors
    #Average Method
    k = len(test)
    lags = 30
    avg_residual_acf = cal_auto_corr(residual_error_avg, lags)
    Q_residual_avg = k * np.sum(np.array(avg_residual_acf[lags:])**2)
    plt.figure()
    plt.stem(range(-(lags - 1), lags),
             avg_residual_acf,
             use_line_collection=True)
    plt.xlabel('Lags')
    plt.ylabel('Magnitude')
    plt.title('Autocorrelation plot for Residual Error (Average Method)')
    plt.show()

    # Naive method
    k = len(test)
    lags = 30
    naive_residual_acf = cal_auto_corr(residual_error_naive, lags)
    Q_residual_naive = k * np.sum(np.array(naive_residual_acf[lags:])**2)
    plt.figure()
    plt.stem(range(-(lags - 1), lags),
             naive_residual_acf,
             use_line_collection=True)
    plt.xlabel('Lags')
    plt.ylabel('Magnitude')
    plt.title('Autocorrelation plot for Residual Error (Naive Method)')
    plt.show()

    # Drift Method
    k = len(test)
    lags = 30
    drift_residual_acf = cal_auto_corr(residual_error_drift, lags)
    Q_residual_drift = k * np.sum(np.array(drift_residual_acf[lags:])**2)
    plt.figure()
    plt.stem(range(-(lags - 1), lags),
             drift_residual_acf,
             use_line_collection=True)
    plt.xlabel('Lags')
    plt.ylabel('Magnitude')
    plt.title('Autocorrelation plot for Residual Error (Drift Method)')
    plt.show()

    # SES method
    k = len(test)
    lags = 30
    ses_residual_acf = cal_auto_corr(residual_error_ses, lags)
    Q_residual_SES = k * np.sum(np.array(ses_residual_acf[lags:])**2)
    plt.figure()
    plt.stem(range(-(lags - 1), lags),
             ses_residual_acf,
             use_line_collection=True)
    plt.xlabel('Lags')
    plt.ylabel('Magnitude')
    plt.title('Autocorrelation plot for Residual Error (SES Method)')
    plt.show()

    # holt's linear method
    k = len(test)
    lags = 30
    holtl_residual_acf = cal_auto_corr(residual_error_holtl, lags)
    Q_residual_holtl = k * np.sum(np.array(holtl_residual_acf[lags:])**2)
    plt.figure()
    plt.stem(range(-(lags - 1), lags),
             holtl_residual_acf,
             use_line_collection=True)
    plt.xlabel('Lags')
    plt.ylabel('Magnitude')
    plt.title("Autocorrelation plot for Residual Error (Holt's Linear Method)")
    plt.show()

    # holt's Winter Seasonal method
    k = len(train)
    lags = 30
    holtw_residual_acf = cal_auto_corr(residual_error_holtw, lags)
    Q_residual_holtw = k * np.sum(np.array(holtw_residual_acf[lags:])**2)
    print("Q-value of Residual error for Holts winter method: {}".format(
        Q_residual_holtw))
    plt.figure()
    plt.stem(range(-(lags - 1), lags),
             holtw_residual_acf,
             use_line_collection=True)
    plt.xlabel('Lags')
    plt.ylabel('Magnitude')
    plt.title(
        "Autocorrelation plot for Residual Error (Holt's winter Seasonal Method)"
    )
    plt.show()

    sm.graphics.tsa.plot_acf(
        holtw_residual_acf,
        lags=lags,
        title=
        "Autocorrelation for Residual Error (Holt's winter Seasonal Method)")
    plt.show()

    corr_avg = correlation_coefficent_cal(forecast_error_avg, test)
    corr_naive = correlation_coefficent_cal(forecast_error_naive, test)
    corr_drift = correlation_coefficent_cal(forecast_error_drift, test)
    corr_ses = correlation_coefficent_cal(forecast_error_ses, test)
    corr_holtl = correlation_coefficent_cal(forecast_error_holtl, test)
    corr_holtw = correlation_coefficent_cal(forecast_error_holtw, test)
    d = {
        'Methods': ['Average', 'Naive', 'Drift', 'SES', "HoltL", "HoltW"],
        'Q_val': [
            round(Q_residual_avg, 2),
            round(Q_residual_naive, 2),
            round(Q_residual_drift, 2),
            round(Q_residual_SES, 2),
            round(Q_residual_holtl, 2),
            round(Q_residual_holtw, 2)
        ],
        'MSE(P)': [
            round(MSE_train_avg, 2),
            round(MSE_train_naive, 2),
            round(MSE_train_drift, 2),
            round(MSE_train_SES, 2),
            round(MSE_train_holtl, 2),
            round(MSE_train_holtw, 2)
        ],
        'MSE(F)': [
            round(MSE_test_avg, 2),
            round(MSE_test_naive, 2),
            round(MSE_test_drift, 2),
            round(MSE_test_SES, 2),
            round(MSE_test_holtl, 2),
            round(MSE_test_holtw, 2)
        ],
        'var(P)': [
            round(var_pred_avg, 2),
            round(var_pred_naive, 2),
            round(var_pred_drift, 2),
            round(var_pred_SES, 2),
            round(var_pred_holtl, 2),
            round(var_pred_holtw, 2)
        ],
        'var(F)': [
            round(var_forecast_avg, 2),
            round(var_forecast_naive, 2),
            round(var_forecast_drift, 2),
            round(var_forecast_SES, 2),
            round(var_forecast_holtl, 2),
            round(var_forecast_holtw, 2)
        ],
        'corrcoeff': [
            round(corr_avg, 2),
            round(corr_naive, 2),
            round(corr_drift, 2),
            round(corr_ses, 2),
            round(corr_holtl, 2),
            round(corr_holtw, 2)
        ]
    }
    df1 = pd.DataFrame(data=d)
    df1 = df1.set_index('Methods')
    pd.set_option('display.max_columns', None)
    print(df1)

    # Forward step regression
    df2 = df[['Humidity', 'Temperature']]
    features = df2.drop(columns='Temperature')
    target = df2['Temperature']
    features = sm.add_constant(features)
    x_train, x_test, y_train, y_test = train_test_split(features,
                                                        target,
                                                        shuffle=False,
                                                        test_size=0.2)
    model = sm.OLS(y_train, x_train).fit()
    print(model.summary())

    df2 = df[['Humidity', 'Wind Speed', 'Temperature']]
    features = df2.drop(columns='Temperature')
    target = df2['Temperature']
    features = sm.add_constant(features)
    x_train, x_test, y_train, y_test = train_test_split(features,
                                                        target,
                                                        shuffle=False,
                                                        test_size=0.2)
    model = sm.OLS(y_train, x_train).fit()
    print(model.summary())

    df2 = df[['Humidity', 'Wind Speed', 'Wind Direction', 'Temperature']]
    features = df2.drop(columns='Temperature')
    target = df2['Temperature']
    features = sm.add_constant(features)
    x_train, x_test, y_train, y_test = train_test_split(features,
                                                        target,
                                                        shuffle=False,
                                                        test_size=0.2)
    model = sm.OLS(y_train, x_train).fit()
    print(model.summary())

    df2 = df[[
        'Humidity', 'Wind Speed', 'Wind Direction', 'Pressure', 'Temperature'
    ]]
    features = df2.drop(columns='Temperature')
    target = df2['Temperature']
    features = sm.add_constant(features)
    x_train, x_test, y_train, y_test = train_test_split(features,
                                                        target,
                                                        shuffle=False,
                                                        test_size=0.2)
    model = sm.OLS(y_train, x_train).fit()
    print(model.summary())

    df2 = df[['Humidity', 'Wind Speed', 'Wind Direction', 'Temperature']]
    features = df2.drop(columns='Temperature')
    target = df2['Temperature']
    features = sm.add_constant(features)
    x_train, x_test, y_train, y_test = train_test_split(features,
                                                        target,
                                                        shuffle=False,
                                                        test_size=0.2)
    model = sm.OLS(y_train, x_train).fit()
    print(model.summary())

    # Backward step regression
    df2 = df[[
        'Humidity', 'Wind Speed', 'Wind Direction', 'Pressure', 'Temperature'
    ]]
    features = df2.drop(columns='Temperature')
    target = df2['Temperature']
    features = sm.add_constant(features)
    x_train, x_test, y_train, y_test = train_test_split(features,
                                                        target,
                                                        shuffle=False,
                                                        test_size=0.2)
    model = sm.OLS(y_train, x_train).fit()
    print(model.summary())

    df2 = df[['Humidity', 'Wind Speed', 'Wind Direction', 'Temperature']]
    features = df2.drop(columns='Temperature')
    target = df2['Temperature']
    features = sm.add_constant(features)
    x_train, x_test, y_train, y_test = train_test_split(features,
                                                        target,
                                                        shuffle=False,
                                                        test_size=0.2)
    model = sm.OLS(y_train, x_train).fit()
    print(model.summary())

    # 1-step ahead prediction
    y_hat_OLS = model.predict(x_train)
    y_test_hat_OLS = model.predict(x_test)
    LR_plot_fun(y_train, y_test, y_hat_OLS, y_test_hat_OLS,
                'OLS Regression Method')

    prediction_error = y_train - y_hat_OLS
    forecast_error = y_test - y_test_hat_OLS
    lags = 30
    prediction_error_acf = cal_auto_corr(prediction_error, lags)
    forecast_error_acf = cal_auto_corr(forecast_error, lags)
    plot_acf(prediction_error_acf, lags=lags, var_name='OLS prediction error')
    plot_acf(forecast_error_acf, lags=lags, var_name='OLS forecast error')
    Q_value = cal_Q_value(prediction_error, prediction_error_acf, lags)

    T = len(x_train)
    K = len(x_train.columns)
    pred_var = (1 / (T - K - 1)) * (np.sum((prediction_error)**2))
    pred_std = np.sqrt((1 / (T - K - 1)) * (np.sum((prediction_error)**2)))
    print("Q value of the residual error: {:.2f}".format(Q_value))
    print("mean of prediction error: {:.2f}".format(np.mean(prediction_error)))
    print("variance of prediction error: ", pred_var)
    print("standard deviation of prediction error: ", pred_std)
    print("RMSE of prediction error: ", np.sqrt(np.mean(prediction_error**2)))

    T = len(x_test)
    K = len(x_test.columns)
    forecast_var = (1 / (T - K - 1)) * (np.sum((forecast_error)**2))
    forecast_std = np.sqrt((1 / (T - K - 1)) * (np.sum((forecast_error)**2)))
    print("mean of forecast error: {:.2f}".format(np.mean(forecast_error)))
    print("variance of forecast error: ", forecast_var)
    print("standard deviation of forecast error: ", forecast_std)
    print("RMSE of forecast error: ", np.sqrt(np.mean(forecast_error**2)))

    corr_coeff = round(correlation_coefficent_cal(y_test, y_test_hat_OLS), 2)
    plt.figure(figsize=(8, 6))
    plt.scatter(y_test,
                y_test_hat_OLS,
                c='green',
                alpha=1,
                label='y_test vs y_test_hat_OLS')
    plt.xlabel('y_test')
    plt.ylabel('y_test_hat_OLS')
    plt.title(
        "Scatter plot of y_test vs y_hat_test with correlation coefficient of {}"
        .format(corr_coeff))
    plt.legend()
    plt.show()

    corr_coeff1 = round(correlation_coefficent_cal(y_train, y_hat_OLS), 2)
    plt.figure(figsize=(8, 6))
    plt.scatter(y_train,
                y_hat_OLS,
                c='green',
                alpha=1,
                label='y_test vs y_test_hat_OLS')
    plt.xlabel('y_train')
    plt.ylabel('y_hat_OLS')
    plt.title(
        "Scatter plot of y_train vs y_hat_OLS with correlation coefficient of {}"
        .format(corr_coeff1))
    plt.legend()
    plt.show()
    return temp, temp_1, temp_2