Пример #1
0
                                           smoothing_slope=lamda)
predicted5['CSWS'] = fit1.forecast(len(test))

# plot the SPP load as daily average,
# in which the red line represents the trainning dataset,
# the green line represents the test dataset, and the blue line represents the forecasted value
name = 'method 5'
draw(train, test, predicted5, name)
rms_Method5 = sqrt(mean_squared_error(test['CSWS'], predicted5['CSWS']))
print("rms_Method5:", rms_Method5)

# Method 6: Holt Winters Method
predicted6 = test.copy()
fit1 = ExponentialSmoothing(
    np.asarray(train['CSWS']),
    seasonal_periods=7,
    trend='add',
    seasonal='add',
).fit()
predicted6['CSWS'] = fit1.forecast(len(test))
# Using Holts winter method will be the best option among the rest of the models beacuse of the seasonality factor.
# The Holt-Winters seasonal method comprises the forecast equation and three smoothing equations:
# one for the level t, one for trend t and one for the seasonal component denoted by st, with smoothing parameters.

# plot the SPP load as daily average,
# in which the red line represents the trainning dataset,
# the green line represents the test dataset, and the blue line represents the forecasted value
name = 'method 6'
draw(train, test, predicted6, name)
rms_Method6 = sqrt(mean_squared_error(test['CSWS'], predicted6['CSWS']))
print("rms_Method6:", rms_Method6)
Пример #2
0
def anomaly_holt(lista_datos, desv_mse=0):

    lista_puntos = np.arange(0, len(lista_datos), 1)

    df, df_train, df_test = h.create_train_test(lista_puntos, lista_datos)

    engine_output = {}

    ####################ENGINE START
    stepwise_model = ExponentialSmoothing(df_train['valores'],
                                          seasonal_periods=1)
    fit_stepwise_model = stepwise_model.fit()

    fit_forecast_pred_full = fit_stepwise_model.fittedvalues

    future_forecast_pred = fit_stepwise_model.forecast(len(df_test['valores']))

    ###### sliding windows

    #ventanas=h.windows(lista_datos,10)

    #print(ventanas[0])
    #training_data=[]
    #count=0

    #forecast_pred10 =[]
    #real_pred10=[]
    #for slot in ventanas:
    #if count != 0:
    #stepwise_model =  ExponentialSmoothing(training_data,seasonal_periods=1 )
    #fit_stepwise_model = stepwise_model.fit()

    #future_forecast_pred = fit_stepwise_model.forecast(len(slot))
    #forecast_pred10.extend(future_forecast_pred)
    #real_pred10.extend(slot)
    #training_data.extend(slot)

    #else:
    #training_data.extend(slot)
    #forecast_pred10.extend(slot)
    #real_pred10.extend(slot)
    #count=1

    #print ('Wndows prediction')
    ##print ( forecast_pred10)
    ##print ( real_pred10)

    #print ('Wndows mae '  + str(mean_absolute_error(forecast_pred10, real_pred10)))

    ####################ENGINE START

    ##########GRID to find seasonal n_periods
    mae_period = 99999999
    best_period = 0
    for period in range(2, 20):
        print("Period: " + str(period))
        stepwise_model = ExponentialSmoothing(
            df_train['valores'],
            seasonal_periods=period,
            trend='add',
            seasonal='add',
        )
        fit_stepwise_model = stepwise_model.fit()

        future_forecast_pred = fit_stepwise_model.forecast(
            len(df_test['valores']))
        #print ("valores")
        #print future_forecast_pred

        mae_temp = mean_absolute_error(future_forecast_pred.values,
                                       df_test['valores'].values)
        if mae_temp < mae_period:
            best_period = period
            mae_period = mae_temp
        else:
            print("mae:" + str(mae_temp))
    print("######best mae is " + str(mae_period) + " with the period " +
          str(best_period))

    stepwise_model = ExponentialSmoothing(
        df_train['valores'],
        seasonal_periods=best_period,
        trend='add',
        seasonal='add',
    )
    fit_stepwise_model = stepwise_model.fit()

    future_forecast_pred = fit_stepwise_model.forecast(len(df_test['valores']))
    print(future_forecast_pred.values)

    list_test = df_test['valores'].values
    mse_test = (future_forecast_pred - list_test)
    test_values = pd.DataFrame(future_forecast_pred,
                               index=df_test.index,
                               columns=['expected value'])

    print(list_test)

    mse = mean_squared_error(future_forecast_pred.values, list_test)

    print('Model_test mean error: {}'.format(mse))
    rmse = np.sqrt(mse)
    print('Model_test root error: {}'.format(rmse))

    mse_abs_test = abs(mse_test)

    df_aler = pd.DataFrame(future_forecast_pred,
                           index=df.index,
                           columns=['expected value'])
    df_aler['step'] = df['puntos']
    df_aler['real_value'] = df_test['valores']
    df_aler['mse'] = mse
    df_aler['rmse'] = rmse
    df_aler['mae'] = mean_absolute_error(list_test, future_forecast_pred)
    df_aler['anomaly_score'] = abs(df_aler['expected value'] -
                                   df_aler['real_value']) / df_aler['mae']
    df_aler_ult = df_aler[:5]
    df_aler_ult = df_aler_ult[
        (df_aler_ult.index == df_aler.index.max()) |
        (df_aler_ult.index == ((df_aler.index.max()) - 1))
        | (df_aler_ult.index == ((df_aler.index.max()) - 2)) |
        (df_aler_ult.index == ((df_aler.index.max()) - 3))
        | (df_aler_ult.index == ((df_aler.index.max()) - 4))]
    if len(df_aler_ult) == 0:
        exists_anom_last_5 = 'FALSE'
    else:
        exists_anom_last_5 = 'TRUE'

    df_aler = df_aler[(df_aler['anomaly_score'] > 2)]
    max = df_aler['anomaly_score'].max()
    min = df_aler['anomaly_score'].min()

    df_aler['anomaly_score'] = (df_aler['anomaly_score'] - min) / (max - min)

    max = df_aler_ult['anomaly_score'].max()
    min = df_aler_ult['anomaly_score'].min()

    df_aler_ult['anomaly_score'] = (df_aler_ult['anomaly_score'] -
                                    min) / (max - min)
    print("Anomaly finished. Start forecasting")
    stepwise_model1 = ExponentialSmoothing(df['valores'],
                                           seasonal_periods=best_period,
                                           seasonal='add')
    print("Pass the training")
    fit_stepwise_model1 = stepwise_model1.fit()
    future_forecast_pred1 = fit_stepwise_model1.forecast(5)
    print("Pass the forecast")

    engine_output['rmse'] = rmse
    engine_output['mse'] = mse
    engine_output['mae'] = mean_absolute_error(list_test, future_forecast_pred)
    engine_output['present_status'] = exists_anom_last_5
    engine_output['present_alerts'] = df_aler_ult.fillna(0).to_dict(
        orient='record')
    engine_output['past'] = df_aler.fillna(0).to_dict(orient='record')
    engine_output['engine'] = 'Holtwinters'
    print("Only for future")
    df_future = pd.DataFrame(future_forecast_pred1, columns=['value'])
    df_future['value'] = df_future.value.astype("float32")
    df_future['step'] = np.arange(len(lista_datos), len(lista_datos) + 5, 1)
    engine_output['future'] = df_future.to_dict(orient='record')
    test_values['step'] = test_values.index
    print("debug de Holtwinters")
    print(test_values)
    engine_output['debug'] = test_values.to_dict(orient='record')

    print("la prediccion es")
    print df_future

    return engine_output
Пример #3
0
#different combinations of windows
#%%%  Exponential Smoothening
#pip install pmdarima  #from anaconda
import pmdarima.datasets as pm
data2= pm.load_airpassengers(True)
data2

from statsmodels.tsa.api import ExponentialSmoothing, SimpleExpSmoothing, Holt
fit2 = SimpleExpSmoothing( np.asarray(data)).fit( smoothing_level=0.6, optimized=False)
data.tail(6)
data
fit2.forecast(5)  #forecast 5 period ahead

#%%%  #some error
fit3 = ExponentialSmoothing(np.asarray(data) ,seasonal_periods=7 , trend='add', seasonal='add',).fit()
fit3.forecast(5)

####
from statsmodels.tsa.api import ExponentialSmoothing
exp = ExponentialSmoothing(data)
exp_model = exp.fit(smoothing_level=0.1)
result = exp_model.fittedvalues
dir(exp_model)
data
exp_model.predict(30)
result
result.plot()


#%%%%
Пример #4
0
pyplot.show()

#=======================================
#utf-8  2020-03-09 16:47:04
#Exponential Smoothing for EAFV
from pandas import read_excel
from statsmodels.tsa.api import ExponentialSmoothing
from matplotlib import pyplot
series = read_excel('forecasting.xls',
                    index_col=0,
                    parse_dates=True,
                    squeeze=True)

# Holt method
fit3 = ExponentialSmoothing(series['EAFV'],
                            seasonal='add').fit(use_boxcox=True)
Forecasting = fit3.forecast(12).rename("Forecasting")
Forecasting.plot(color='orange', legend=True)
series['EAFV'].plot(title='Exponential Smoothing for EAFV',
                    color='blue',
                    legend=True)
fit3.fittedvalues.plot(color='orange')
pyplot.show()
print(Forecasting)

#Calculate MSE
from sklearn.metrics import mean_squared_error
MSE = mean_squared_error(fit3.fittedvalues, series['EAFV'])
print(MSE)

#Seperate EAFV to Testing dataset and training dataset
Пример #5
0
    plt.figure(figsize=(12, 8))
    plt.plot(train['Count'], label='Train')
    plt.plot(test['Count'], label='Test')
    plt.plot(y_hat_holt['Holt_linear'], label='Holt_linear')
    plt.legend(loc='best')
    plt.title("Holt线性趋势法")

    rms = sqrt(mean_squared_error(test['Count'], y_hat_holt['Holt_linear']))
    print("霍尔特(Holt)线性趋势法RMS:" + str(rms))
    # endregion

    # region Holt-Winters季节性预测模型
    from statsmodels.tsa.api import ExponentialSmoothing

    y_hat_HoltWinter = test.copy()
    fit1 = ExponentialSmoothing(np.asarray(train['Count']), seasonal_periods=7, trend='add', seasonal='add', ).fit()
    y_hat_HoltWinter['Holt_Winter'] = fit1.forecast(len(test))

    plt.figure(figsize=(12, 8))
    plt.plot(train['Count'], label='Train')
    plt.plot(test['Count'], label='Test')
    plt.plot(y_hat_HoltWinter['Holt_Winter'], label='Holt_Winter')
    plt.legend(loc='best')
    plt.title("Holt-Winters季节性预测法")

    rms = sqrt(mean_squared_error(test['Count'], y_hat_HoltWinter['Holt_Winter']))
    print("Holt-Winters季节性预测模型RMS:" + str(rms))
    # endregion

    # region自回归移动平均模型(ARIMA)
    import statsmodels.api as sm
Пример #6
0
def play(nvers=150, plot=True):
    import matplotlib.pyplot as plt
    from statsmodels.tsa.api import SimpleExpSmoothing, Holt, ExponentialSmoothing
    from keras.models import model_from_json
    import statsmodels.api as sm

    browser = RoboBrowser(
        parser="html.parser",
        user_agent=
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36",
    )
    url = "https://br.investing.com/crypto/bitcoin/btc-brl-historical-data"
    browser.open(url)
    df = pd.read_csv("Bitcoin/data_brl.csv")
    ftoday = pd.to_datetime(today(time.time()), dayfirst=True)
    rows = browser.find(
        class_="genTbl closedTbl historicalTbl").find_all("tr")[1:]
    hist = {"date": [], "val": []}
    for row in rows:
        itens = row.find_all("td")
        date = itens[0].text
        val = turnFloat(itens[1].text)
        fdate = pd.to_datetime(date, dayfirst=True)
        if date not in df["date"].values:
            d = ftoday - fdate
            if d.days > 0 or (d.days == 0 and time.localtime().tm_hour >= 21):
                hist["date"].append(date)
                hist["val"].append(val)
    hist["date"] = hist["date"][::-1]
    hist["val"] = hist["val"][::-1]
    df = pd.concat([df, pd.DataFrame(hist)], ignore_index=True)
    df.to_csv("Bitcoin/data_brl.csv", index=False)

    d = 0
    vals = df.val
    last = vals.iloc[-(2 + d)]
    now = vals.iloc[-(1 + d)]
    var = round((now - last) / last, 4)
    n_total = len(df)
    n = n_total

    l = df["val"][n - 1]
    nowdate = df["date"][n - 1]
    result = f"Data: {nowdate}\n"
    result += f"Valor presente: {l}\n"
    result += f"Variação: {var}\n\n"
    result += "Previsão:\n\n"
    amost = 15
    n_pred = 2

    pred = {}

    null = np.array([None for _ in range(n_total)])

    sp = 4
    name = f"ES_{sp}"
    fit1 = ExponentialSmoothing(
        np.asarray(df["val"]),
        seasonal_periods=sp,
        trend="add",
        seasonal="add",
    ).fit()
    forecast = fit1.forecast(n_pred)
    pred[name] = np.concatenate((null, forecast))
    if plot:
        plt.figure(figsize=(16, 8))
        plt.plot(df[-amost:]["val"], label="Valor Real")
        plt.plot(pred[name], label=f"Previsão {name}")

    sp = 7
    name = f"ES_{sp}"
    fit1 = ExponentialSmoothing(
        np.asarray(df["val"]),
        seasonal_periods=sp,
        trend="add",
        seasonal="add",
    ).fit()
    forecast = fit1.forecast(n_pred)
    pred[name] = np.concatenate((null, forecast))
    if plot:
        plt.plot(pred[name], label=f"Previsão {name}")

    sp = 12
    name = f"ES_{sp}"
    fit1 = ExponentialSmoothing(
        np.asarray(df["val"]),
        seasonal_periods=sp,
        trend="add",
        seasonal="add",
    ).fit()
    forecast = fit1.forecast(n_pred)
    pred[name] = np.concatenate((null, forecast))
    if plot:
        plt.plot(pred[name], label=f"Previsão {name}")

    name = f"SARIMAX"
    fit1 = sm.tsa.statespace.SARIMAX(df["val"],
                                     order=(2, 1, 4),
                                     seasonal_order=(0, 1, 1, 7)).fit()
    forecast = fit1.forecast(n_pred)
    pred[name] = np.concatenate((null, forecast))
    if plot:
        plt.plot(pred[name], label=f"Previsão {name}")

    sl = 0.6
    name = f"SES_{sl}"
    fit1 = SimpleExpSmoothing(np.asarray(df["val"])).fit(smoothing_level=sl,
                                                         optimized=False)
    forecast = fit1.forecast(n_pred)
    pred[name] = np.concatenate((null, forecast))
    if plot:
        plt.plot(pred[name], label=f"Previsão {name}")

    sl = 0.9
    name = f"SES_{sl}"
    fit1 = SimpleExpSmoothing(np.asarray(df["val"])).fit(smoothing_level=sl,
                                                         optimized=False)
    forecast = fit1.forecast(n_pred)
    pred[name] = np.concatenate((null, forecast))
    if plot:
        plt.plot(pred[name], label=f"Previsão {name}")

    sl = 1.2
    name = f"SES_{sl}"
    fit1 = SimpleExpSmoothing(np.asarray(df["val"])).fit(smoothing_level=sl,
                                                         optimized=False)
    forecast = fit1.forecast(n_pred)
    pred[name] = np.concatenate((null, forecast))
    if plot:
        plt.plot(pred[name], label=f"Previsão {name}")

    if plot:
        plt.legend(loc="best")
        plt.savefig("Bitcoin/bitgraphic.png")

    hist = {
        "ES_4": [],
        "ES_7": [],
        "ES_12": [],
        "SARIMAX": [],
        "SES_0.6": [],
        "SES_0.9": [],
        "SES_1.2": [],
        "DP": [],
        "DP_U": [],
        "DP_D": [],
        "NUM_U": [],
        "NUM_D": [],
        "DIST_U": [],
        "DIST_D": [],
    }

    preds = []
    ups = []
    downs = []

    for k in pred.keys():
        p = pred[k][n]
        preds.append(p)
        up = p > l
        if up:
            ups.append(p)
            hist[k].append(1)
        else:
            downs.append(p)
            hist[k].append(0)
        result += f"{k}: {p} --- {up}\n"

    sd = numerize(round(np.std(preds), 3))
    u_sd = numerize(round(np.std(ups), 3))
    d_sd = numerize(round(np.std(downs), 3))
    u_len = numerize(round(len(ups), 3))
    d_len = numerize(round(len(downs), 3))
    u_dif = numerize(round(abs(np.mean(ups) - l), 3))
    d_dif = numerize(round(abs(np.mean(downs) - l), 3))

    hist["DP"].append(sd)
    hist["DP_U"].append(u_sd)
    hist["DP_D"].append(d_sd)
    hist["NUM_U"].append(u_len)
    hist["NUM_D"].append(d_len)
    hist["DIST_U"].append(u_dif)
    hist["DIST_D"].append(d_dif)

    info = pd.DataFrame(hist)

    result += f"\nDesvio Padrão: {sd}\n"
    result += f"Desvio Padrão de crescentes: {u_sd}\n"
    result += f"Desvio Padrão de decrescentes: {d_sd}\n"
    result += f"Número de crescentes: {u_len}\n"
    result += f"Número de decrescentes: {d_len}\n"
    result += f"Distância de crescentes: {u_dif}\n"
    result += f"Distância de decrescentes: {d_dif}\n\n"

    name = f"gradient_brl_{nvers}s"
    model = pickle.load(open(f"Bitcoin/{name}.sav", "rb"))
    result += f"Análise de resultados por {name}:\n\n"

    probs = model.predict_proba(info)[0]
    up_prob = round(probs[1], 2)
    down_prob = round(probs[0], 2)
    if up_prob > 0.7:
        result += f"Probabilidade de ALTA para amanhã: {up_prob}\n"
        result += f"Ação: COMPRAR\n\n"

    elif up_prob > 0.4:
        result += f"Probabilidade incerta, com chance para alta de: {up_prob}\n"
        result += f"Ação: MANTER\n\n"

    else:
        result += f"Probabilidade de BAIXA para amanhã: {down_prob}\n"
        result += f"Ação: VENDER.\n\n"

    name = f"randomforest_brl_{nvers}s"
    model = pickle.load(open(f"Bitcoin/{name}.sav", "rb"))
    result += f"Análise de resultados por {name}:\n\n"

    probs = model.predict_proba(info)[0]
    up_prob = round(probs[1], 2)
    down_prob = round(probs[0], 2)
    if up_prob > 0.7:
        result += f"Probabilidade de ALTA para amanhã: {up_prob}\n"
        result += f"Ação: COMPRAR\n\n"

    elif up_prob > 0.4:
        result += f"Probabilidade incerta, com chance para alta de: {up_prob}\n"
        result += f"Ação: MANTER\n\n"

    else:
        result += f"Probabilidade de BAIXA para amanhã: {down_prob}\n"
        result += f"Ação: VENDER.\n\n"

    name = f"sequential_brl_{nvers}s"
    result += f"Analise de resultados por {name}:\n"
    # load json and create model
    json_file = open(f"Bitcoin/{name}.json", "r")
    loaded_model_json = json_file.read()
    json_file.close()
    model = model_from_json(loaded_model_json)
    # load weights into new model
    try:
        model.load_weights(f"Bitcoin/{name}.h5")
    except:
        pass

    probs = model.predict_proba(info)[0]
    up_prob = round(probs[1], 2)
    down_prob = round(probs[0], 2)
    if up_prob > 0.7:
        result += f"Probabilidade de ALTA para amanhã: {up_prob}\n"
        result += f"Ação: COMPRAR\n\n"

    elif up_prob > 0.4:
        result += f"Probabilidade incerta, com chance para alta de: {up_prob}\n"
        result += f"Ação: MANTER\n\n"

    else:
        result += f"Probabilidade de BAIXA para amanhã: {down_prob}\n"
        result += f"Ação: VENDER.\n\n"
    return result
Пример #7
0
from pandas import read_excel
import pandas as pd
import statsmodels.api as sm  
from statsmodels.tsa.api import ExponentialSmoothing

k54d_df = read_excel('K54Ddata_31404626.xls', sheet_name='data', header=0, 
              index_col=0, squeeze=True, parse_dates=True)
        
mod = sm.tsa.statespace.SARIMAX(k54d_df, order=(0,1,2), 
                                seasonal_order=(0,1,2,12))
results = mod.fit(disp=False)
fit1_arima = results.get_prediction(start=pd.to_datetime('2001-02-01'), dynamic=False)
Error_1 = k54d_df['2001-02-01':] - fit1_arima.predicted_mean


fit_1 = ExponentialSmoothing(k54d_df, seasonal_periods=12, trend='add', seasonal='mul', damped=False).fit()
fcast_1 = fit_1.forecast(12).rename("Holt-Winter's model")
fit_fcast_1 = fit_1.fittedvalues.append(fcast_1).rename("Holt-Winter's 1")
Error_2 = k54d_df['2001-02-01':] - fit_1.fittedvalues['2001-02-01':]

MSE1 = sum(Error_1 ** 2)*1.0/len(fit1_arima.predicted_mean)
MSE2 = sum(Error_2 ** 2)*1.0/len(fit_1.fittedvalues['2001-02-01':])

print("MSE ARIMA ",MSE1, "MSE HWM: ", MSE2)
len(df_test)


# In[121]:


len(df_train)


# In[122]:


#seasonal_periods = 2 weil Sommer und Winter Trends berücksichtigt werden sollen
#Schätzen von Holt-Winters
fit7 = ExponentialSmoothing(df_train.iloc[:,0], seasonal_periods=52, trend='add', seasonal='add').fit()
fcast7 = fit7.forecast(len(df_test)).rename("Holt-Winters Additive")

fit8 = ExponentialSmoothing(df_train.iloc[:,0], seasonal_periods=52, trend='add', seasonal='mul').fit()
fcast8 = fit8.forecast(len(df_test)).rename("Holt-Winters Multiplikativ")


# In[129]:


#Plotten der Ergebnisse

get_ipython().run_line_magic('matplotlib', 'inline')
plt.rcParams['figure.figsize'] = [15, 6]

plt.plot(df_test.index, df_test.values, label='Testdaten')
Пример #9
0
                                   columns="reporting_delay_rki")
nowcast_ts.columns = [
    f"N(T-{x},T)" for x in nowcast_ts.columns.get_level_values(0)
]
nowcast_ts.index.name = "T"
nowcast_ts.index.freq = "d"  # set frequency to daily

nowcast_ts.iloc[35:40, ]

# ## Exponential Smoothing
# We can use simple exponential smoothing as a normalization technique, where the actual time series is divided by the last smoothed level to obtain a normalized series close to 1. This can make differently scaled time series comparable and may increase the potential for cross-learning.

from statsmodels.tsa.api import ExponentialSmoothing, SimpleExpSmoothing, Holt

# fit1 = Holt(nowcast_ts["final"], damped=True).fit(smoothing_slope=0)
fit1 = ExponentialSmoothing(
    nowcast_ts["N(T-30,T)"].dropna()).fit(smoothing_level=0.4)

es_result = (pd.concat(
    [
        nowcast_ts["N(T-30,T)"],
        fit1.fittedvalues,
        nowcast_ts["N(T-30,T)"] / fit1.level,
        fit1.forecast(20),
    ],
    axis=1,
).rename(columns={
    "final": "original",
    0: "fitted",
    1: "normalized",
    2: "forecast"
}).reset_index().rename(columns={"index": "T"}))
# Plotting data along with ACF and PACF plots to see any seasonality/trend
bike['num_rides'].plot(figsize=(10, 6))

fig, axes = plt.subplots(1, 2, figsize=(15, 6))
fig = sm.graphics.tsa.plot_acf(bike['num_rides'], lags=36, ax=axes[0])
fig = sm.graphics.tsa.plot_pacf(bike['num_rides'], lags=36, ax=axes[1])

###############################
# Exponential Smoothing Model
###############################

# Fit a Holt-Winters model with additive trend and seasonality to our data
fit_hw = ExponentialSmoothing(bike['num_rides'],
                              seasonal_periods=12,
                              trend='add',
                              seasonal='add',
                              damped=True).fit()

# Plotting the H-W model's fitted values alongside the true data
bike_plot = bike['num_rides'].plot(figsize=(10, 6),
                                   title="Holt-Winters' Method Bike Share Fit")
bike_plot.set_ylabel("Number of Bike rentals")
bike_plot.set_xlabel("Year")

fit_hw.fittedvalues.plot(ax=bike_plot, style='--', color='DarkRed')
bike_plot.legend(['Bike Rentals', 'H-W Model Fit'])

# The MAPE (Mean Average Percentage Error) of the H-W model
np.average(
    np.absolute((fit_hw.fittedvalues - bike['num_rides']) / bike['num_rides']))
Пример #11
0
    print("Estimation of the last quarter of 2018 for World Population:",
          forecast[0])
elif errors[1] == min(errors):
    #if Simple Exponential Smoothing method gives  the smallest MAPE
    fit2 = SimpleExpSmoothing(np.asarray(dfWorlddata['POPULATION'])).fit(
        smoothing_level=ses_optimal_alpha, optimized=False)
    forecast = fit2.forecast(1)
    print("We applied Holt method for World population data")
    print("Estimation of the last quarter of 2018 for World Population:",
          forecast[0])
elif errors[2] == min(errors):
    #if Holt-Winters method gives  the smallest MAPE
    seasons = 10
    fit = ExponentialSmoothing(
        np.asarray(dfWorlddata['POPULATION']),
        seasonal_periods=seasons,
        trend='add',
        seasonal='add',
    ).fit()
    forecast = fit.forecast(1)
    print("We applied Holt method for World population data")
    print("Estimation of the last quarter of 2018 for World Population:",
          forecast[0])
else:
    #if Holt method gives  the smallest MAPE
    fit1 = holt_method(np.asarray(dfWorlddata["POPULATION"])).fit(
        smoothing_level=holt_optimal_alpha, smoothing_slope=holt_optimal_slope)
    forecast = fit1.forecast(1)
    print("We applied Holt method for World population data")
    print("Estimation of the last quarter of 2018 for World Population:",
          forecast[0])
from pandas import read_excel
import pandas as pd
from numpy import log, exp, sqrt
from statsmodels.tsa.api import ExponentialSmoothing


eafv_df = read_excel('EAFVdata_31404626.xls', sheet_name='data', header=0, 
              index_col=0, squeeze=True, parse_dates=True)

eafv_log =  log(eafv_df)
eafv_sqrt =  sqrt(eafv_df)

fit_1 = ExponentialSmoothing(eafv_df, seasonal_periods=12, trend='add', seasonal='mul').fit()
fit_2 = ExponentialSmoothing(eafv_df, seasonal_periods=12, trend='add', seasonal='add').fit()
fit_3 = ExponentialSmoothing(eafv_log, seasonal_periods=12, trend='add', seasonal='mul').fit()
fit_4 = ExponentialSmoothing(eafv_log, seasonal_periods=12, trend='add', seasonal='add').fit()
fit_5 = ExponentialSmoothing(eafv_sqrt, seasonal_periods=12, trend='add', seasonal='mul').fit()
fit_6 = ExponentialSmoothing(eafv_sqrt, seasonal_periods=12, trend='add', seasonal='add').fit()

Error_1 = eafv_df - fit_1.fittedvalues
Error_2 = eafv_df - fit_2.fittedvalues
Error_3 = eafv_df - exp(fit_3.fittedvalues)
Error_4 = eafv_df - exp(fit_4.fittedvalues)
Error_5 = eafv_df - fit_5.fittedvalues**2
Error_6 = eafv_df - fit_6.fittedvalues**2

MSE1=sum(Error_1**2)*1.0/len(fit_1.fittedvalues)
MSE2=sum(Error_2**2)*1.0/len(fit_2.fittedvalues)
MSE3=sum(Error_3**2)*1.0/len(exp(fit_3.fittedvalues))
MSE4=sum(Error_4**2)*1.0/len(exp(fit_4.fittedvalues))
MSE5=sum(Error_5**2)*1.0/len(fit_5.fittedvalues**2)
def TIME_SERIES_ALGO(df, bool_stat):
    dict_rmse = dict()

    bool_log, df_log = log_transformation(df)
    col = df.columns[0]
    # 1.. NAIVE APPROACH
    # IN THIS APPROCAH WE ASSIGN RECENT VALUE TO THE TEST DATAFRAME

    try:
        train, test = train_test_split(df)

        y_prd = np.asarray([train.ix[train.shape[0] - 1].values[0]] *
                           (test.shape[0]))

        rs_naive = sqrt(mean_squared_error(test[col].values, y_prd))
        print(rs_naive)
        dict_rmse["naive"] = rs_naive
        insert_into_database("NAIVE", rs_naive, "{}")

        if bool_log:
            # PERFORM SAME ABOVE THING FOR LOG TRANSFORMED DATA
            train, test = train_test_split(df_log)

            y_prd = np.asarray([train.ix[train.shape[0] - 1].values[0]] *
                               (test.shape[0]))

            y_prd = np.exp(y_prd)

            rs_naive_log = sqrt(mean_squared_error(test[col].values, y_prd))
            print(rs_naive_log)
            dict_rmse["naive_log"] = rs_naive_log
            insert_into_database("NAIVE", rs_naive_log, "{}")

    except Exception as e:
        insert_into_database("NAIVE", None, e)
        print(("error in modelling in naive approach,{}".format(e)))

    # 2..SIMPLE AVERAGE
    try:

        train, test = train_test_split(df)
        mean_forecast = train[col].mean()
        y_prd = np.asarray([mean_forecast] * test.shape[0])
        rs_mean = sqrt(mean_squared_error(test[col].values, y_prd))
        dict_rmse["simple_avg"] = rs_mean
        insert_into_database("SIMPLE_AVG", rs_mean, "{}")

        if bool_log:
            train, test = train_test_split(df_log)
            mean_forecast = train[col].mean()
            y_prd = np.asarray([mean_forecast] * test.shape[0])

            y_prd = np.exp(y_prd)

            rs_mean = sqrt(mean_squared_error(test[col].values, y_prd))
            dict_rmse["simple_avg_log"] = rs_mean
            insert_into_database("SIMPLE_AVG", rs_mean, "{}")

    except Exception as e:
        insert_into_database("SIMPLE_AVG", None, e)
        print(("error in moving average,{}".format(e)))

    # 3..MOVING AVERAGE

    # IN PROGRESS HAVE TO MODIFY IT...
    try:
        train, test = train_test_split(df)
        for i in range(25, 90):
            # As rolling mean returns mean fo ecah row we want mean f only last row because it is onlu used to forecast
            mean_moving = train[col].rolling(i).mean().ix[train.shape[0] - 1]
            print(mean_moving)
            y_prd = np.asarray([mean_moving] * test.shape[0])
            rs_moving = sqrt(mean_squared_error(test[col].values, y_prd))
            insert_into_database("MVG_AVG", rs_moving, "{}")

    except Exception as e:
        insert_into_database("MVG_AVG", None, e)
        print(("error in moving average,{}".format(e)))
    try:

        if bool_log:
            for i in range(25, 90):
                train, test = train_test_split(df_log)

                # print(type(train[col].rolling(i).mean()))
                mean_moving = train[col].rolling(i).mean().ix[train.shape[0] -
                                                              1]

                y_prd = np.array([mean_moving] * test.shape[0])
                print(y_prd)
                y_prd = np.exp(y_prd)

                rs_moving_log = sqrt(
                    mean_squared_error(test[col].values, y_prd))
                insert_into_database("MVG_AVERAGE", rs_moving_log, "{}")

    except Exception as e:
        insert_into_database("MVG_AVERAGE", None, e)
        print(("error in log moving average model, {}".format(e)))

    # 4.. SIMPLE EXPONENTIAL SMOOTHING
    try:
        train, test = train_test_split(df)
        fit2 = SimpleExpSmoothing(df[col]).fit(smoothing_level=0.6,
                                               optimized=False)
        # print(test.index[0])
        # print(test.index[test.shape[0]-1])
        y_prd = fit2.forecast(len(test))
        print(y_prd)

        rs_simple = sqrt(mean_squared_error(test.values, y_prd))
        dict_rmse["simple"] = rs_simple
        insert_into_database("SIMPLE_EXP", rs_simple, "{}")

    except Exception as e:
        print(("error is simple exp without log,{}".format(e)))
        insert_into_database("SIMPLE_EXP", None, e)

    try:
        if bool_log:
            train, test = train_test_split(df_log)
            fit2 = SimpleExpSmoothing(df[col]).fit(smoothing_level=0.6,
                                                   optimized=False)
            y_prd = fit2.forecast(len(test))
            y_prd = np.exp(y_prd)
            rs_simple = sqrt(mean_squared_error(test.values, y_prd))
            dict_rmse["simple_log"] = rs_simple
            insert_into_database("SIMPLE_EXP", rs_simple, "{}")

    except Exception as e:
        insert_into_database("SIMPLE_EXP", None, e)
        print(("simple exponential smoothing log,{}".format(e)))

    # HOT LINEAR METHOD FOR FORECASTING
    try:
        train, test = train_test_split(df)
        fit2 = Holt(train[col], exponential=True, damped=False).fit()
        y_prd = fit2.predict(test.index.values[0],
                             test.index.values[test.shape[0] - 1])
        rs_hotl = sqrt(mean_squared_error(test[col].values, y_prd))
        dict_rmse["rs_hotl"] = rs_hotl
        insert_into_database("HOLT_LINEAR", rs_hotl, "{}")

        if bool_log:
            train, test = train_test_split(df)
            fit2 = Holt(train[col], exponential=True, damped=False).fit()
            y_prd = fit2.predict(test.index.values[0],
                                 test.index.values[test.shape[0] - 1])
            y_prd = np.exp(y_prd)
            rs_hotl_log = sqrt(mean_squared_error(test[col].values, y_prd))
            dict_rmse["rs_hotl_log"] = rs_hotl_log
            insert_into_database("HOLT_LINEAR", rs_hotl_log, "{}")

    except Exception as e:
        insert_into_database("HOLT_LINEAR", None, e)
        print((
            "error in HOLT linear forecasting in without damped.{}".format(e)))

    try:

        fit2 = Holt(train[col], exponential=True, damped=True).fit()
        y_prd = fit2.predict(test.index.values[0],
                             test.index.values[test.shape[0] - 1])
        rs_holtld = sqrt(mean_squared_error(test[col].values, y_prd))
        dict_rmse["rs_holtld"] = rs_holtld
        insert_into_database("HOLT_LINEAR", rs_holtld, "{}")

        if bool_log:
            fit2 = Holt(train[col], exponential=True, damped=True).fit()
            y_prd = fit2.predict(test.index.values[0],
                                 test.index.values[test.shape[0] - 1])
            y_prd = np.exp(y_prd)
            rs_holtld = sqrt(mean_squared_error(test[col].values, y_prd))
            dict_rmse["rs_holtld"] = rs_holtld
            insert_into_database("HOLT_LINEAR", rs_holtld, "{}")

    except Exception as e:
        print(("error in HOLT linear smoothing  damped,{}".format(e)))
        insert_into_database("HOLT_LINEAR", None, e)

    # HOLT WINTERS FORECASTING..
    try:
        train, test = train_test_split(df)
        # print("fmmf")
        fit2 = ExponentialSmoothing(test[col],
                                    trend="mul",
                                    seasonal="mul",
                                    seasonal_periods=12).fit()
        y_prd = fit2.predict(test.index.values[0],
                             test.index.values[test.shape[0] - 1])
        rs_hlw = sqrt(mean_squared_error(test[col].values, y_prd))
        print(rs_hlw)
        dict_rmse["rs_hlw"] = rs_hlw
        insert_into_database("HOLT_WINTER", rs_hlw, "{}")

        if bool_log:
            train, test = train_test_split(df_log)
            fit2 = ExponentialSmoothing(test[col],
                                        trend="add",
                                        seasonal="add",
                                        seasonal_periods=12).fit()
            y_prd = fit2.predict(test.index.values[0],
                                 test.index.values[test.shape[0] - 1])
            y_prd = np.exp(y_prd)
            rs_hlw_log = sqrt(mean_squared_error(test[col].values, y_prd))
            print(rs_hlw_log)
            dict_rmse["rs_hlw_log"] = rs_hlw_log
            insert_into_database("HOLT_WINTER", rs_hlw_log, "{}")

    except Exception as e:
        print(("error in HOLT winter forecasting,{}".format(e)))
        insert_into_database("HOLT_WINTER", None, e)
    # ARIMA MODEL....

    # try:
    #     rs = test_stationary(df, col)
    #     if rs:
    #
    #         # Here we decide the order of diffrencing the Time Series
    #         df_diff = df - df.shift()
    #         df_diff.dropna(inplace=True)
    #         rs = test_stationary(df_diff, col)
    #         if rs:
    #             df_diff = df_diff - df_diff.shift()
    #
    #     df_diff.dropna(inplace=True)
    #
    #     train, test = train_test_split(df_diff)
    #
    #     """ The acf and pacf plots are
    #         used to calculate the the parametre for AR
    #         AND MA MODELS"""
    #
    #     ar_list = get_params_p(train)
    #     ma_list = get_params_q(train)
    #
    #     for i in ma_list:
    #         for j in ar_list:
    #             try:
    #                 model = ARIMA(train, order=(j, 0, i)).fit()
    #                 y_prd = model.predict(start=test.index.values[0], end=test.index.values[test.shape[0] - 1])
    #
    #                 rs = sqrt(mean_squared_error(test[col].values, y_prd))
    #                 insert_into_database("ARIMA", rs, "{}")
    #             except Exception as e:
    #
    #                 print(("error while training arima,{}".format(e)))
    #                 insert_into_database("ARIMA", None, e)
    # except Exception as e:
    #
    #     print(("error in arima model,{}".format(e)))
    #     insert_into_database("ARIMA", None, e)

    # .. SARIMAX
    try:
        train, test = train_test_split(df)
        p = d = q = list(range(0, 2))
        non_seas = list(itertools.product(p, d, q))
        lis = [1, 3, 6, 12, 24, 56]

        for i in lis:
            sea_so = [(x[0], x[1], x[2], i)
                      for x in list(itertools.product(p, d, q))]

            for j in non_seas:
                for k in sea_so:
                    try:
                        model = SARIMAX(train,
                                        order=j,
                                        seasonal_order=k,
                                        enforce_stationarity=False,
                                        enforce_invertibility=False).fit()
                        y_prd = model.predict(
                            start=test.index.values[0],
                            end=test.index.values[test.shape[0] - 1])

                        rs = sqrt(mean_squared_error(test.values, y_prd))

                        print(rs)
                        insert_into_database("SARIMAX", rs, "{}")
                    except Exception as e:
                        print(("error while training the SARIMAX MODELS,{}".
                               format(e)))
                        insert_into_database("SARIMAX", None, e)

    except Exception as e:
        print(("error in seasonal_arima,{}".format(e)))
        insert_into_database("SARIMAX", None, e)

    # ..AUTO_ARIMA..

    try:
        train, test = train_test_split(df)
        model = auto_arima(train,
                           start_p=1,
                           start_q=1,
                           start_P=1,
                           start_Q=1,
                           max_p=5,
                           max_q=5,
                           max_P=5,
                           max_Q=1,
                           d=1,
                           D=1,
                           seasonal=True)
        model = model.fit(train)
        y_prd = model.predict(n_periods=len(test))
        rs = sqrt(mean_squared_error(test.values, y_prd))
        print("results in auto_Arima", rs)
        dict_rmse["auto_arima"] = rs
        insert_into_database("AUTO_ARIMA", rs, "{}")

    except Exception as e:

        print("error in auto_Arima,{}".format(e))
        insert_into_database("Auto_arima", None, e)
Пример #14
0
                                                smoothing_slope=0.1)
pred['Holt_linear'] = fit_holt.forecast(len(test))

plt.plot(train['Close'], label='Train')
plt.plot(test['Close'], label='Test')
plt.plot(pred['Holt_linear'], label='Holt_linear')
plt.legend(loc='best')
plt.show()

#accuracy
mse_holt = mean_squared_error(test.Close, pred['Holt_linear'])
print(mse_holt)

#Holt winter
fit_holtwinter = ExponentialSmoothing(np.asarray(train['Close']),
                                      seasonal_periods=120,
                                      trend='add',
                                      seasonal='add').fit(smoothing_slope=0.01)
pred['Holt_Winter'] = fit_holtwinter.forecast(len(test))

plt.plot(train['Close'], label='Train')
plt.plot(test['Close'], label='Test')
plt.plot(pred['Holt_Winter'], label='Holt_Winter')
plt.legend(loc='best')
plt.show()
#accuracy
mse_holtwinter = mean_squared_error(test.Close, pred['Holt_Winter'])
print(mse_holtwinter)

#check stationarity
from statsmodels.tsa.stattools import adfuller
adftest = adfuller(train.Close, autolag='AIC')
Пример #15
0
                      join='inner')
series_df.columns = ['ftse', 'eafv', 'k226', 'jq2j', 'k54d']

series_df_2 = copy.deepcopy(series_df['2010-01-01':])
ftse_2 = series_df_2.ftse
eafv_2 = series_df_2.eafv
k226_2 = series_df_2.k226
jq2j_2 = series_df_2.jq2j
k54d_2 = series_df_2.k54d

lm_7 = 'ftse_2 ~ eafv_2 + k226_2 + jq2j_2 + k54d_2'
results_7 = ols(lm_7, data=series_df['2010-01-01':]).fit()

fit_1_10 = ExponentialSmoothing(eafv_2,
                                seasonal_periods=12,
                                trend='add',
                                seasonal='mul',
                                damped=False).fit()
fcast_1_10 = fit_1_10.forecast(12).rename("eafv forecast")

fit_2_10 = Holt(k226_2).fit(optimized=True)
fcast_2_10 = fit_2_10.forecast(12).rename("k226 forecast")

fit_3_10 = ExponentialSmoothing(jq2j_2,
                                seasonal_periods=12,
                                trend='add',
                                seasonal='mul',
                                damped=False).fit()
fcast_3_10 = fit_3_10.forecast(12).rename("jq2j forecast")

fit_4_10 = ExponentialSmoothing(k54d_2,
Пример #16
0
from pandas import read_excel
import matplotlib.pyplot as plt
import statsmodels.api as sm
from statsmodels.tsa.api import ExponentialSmoothing

k54d_df = read_excel('K54Ddata_31404626.xls',
                     sheet_name='data',
                     header=0,
                     index_col=0,
                     squeeze=True,
                     parse_dates=True)

fit_1_train = ExponentialSmoothing(k54d_df[0:-12],
                                   seasonal_periods=12,
                                   trend='add',
                                   seasonal='mul').fit()
k54d_test = k54d_df[-12:]
fcast_1_train = fit_1_train.forecast(12)

mod_train = sm.tsa.statespace.SARIMAX(k54d_df[0:-12],
                                      order=(0, 1, 2),
                                      seasonal_order=(0, 1, 2, 12))
results_train = mod_train.fit(disp=False)
fcast1_arima_train = results_train.get_forecast(steps=12).predicted_mean

plt.title('Forecast 2019 K54D')
k54d_test.plot(color='black', label='Actual', legend=True)
fcast_1_train.plot(color='blue', label='HWM', legend=True)
fcast1_arima_train.plot(color='red', label='ARIMA', legend=True)
plt.show()
Пример #17
0
plt.title(' MICROSOFT STOCK FOR LAST 10 MONTH AS ON  '+ str(today))
plt.plot(data['Close'])
plt.show()

#DEFINING INPUTS
data = data['Close'].tolist()
#CHANGE DATA FOR 30 DAYS FROM THE DATE WHEN YOU ARE RUNNING THIS CODE
start_date = '2020-04-16'
end_date = '2021-02-03'
index= pd.date_range(start=start_date, end=end_date, freq='B')
stock_data = pd.Series(data, index)
forecast_timestep = 2


get_ipython().magic('matplotlib inline')
fit1 = ExponentialSmoothing(stock_data, seasonal_periods=4, trend='add', seasonal='add', use_boxcox=True, initialization_method="estimated").fit()
fit2 = ExponentialSmoothing(stock_data, seasonal_periods=4, trend='add', seasonal='mul', use_boxcox=True, initialization_method="estimated").fit()
fit3 = ExponentialSmoothing(stock_data, seasonal_periods=4, trend='add', seasonal='add', damped_trend=True, use_boxcox=True, initialization_method="estimated").fit()
fit4 = ExponentialSmoothing(stock_data, seasonal_periods=4, trend='add', seasonal='mul', damped_trend=True, use_boxcox=True, initialization_method="estimated").fit()

ax = stock_data.plot(figsize=(16,10), color='black', title="Forecasts Without Damping factor" )
ax.set_ylabel("Prices $")
ax.set_xlabel("Date")
fit1.fittedvalues.plot(ax=ax, style='--', color='red')
fit2.fittedvalues.plot(ax=ax, style='--', color='green')
fit1.forecast(2).rename('Holt-Winters (add-seasonal)').plot(ax=ax, style='--', color='red', legend=True)
fit2.forecast(2).rename('Holt-Winters (mul-seasonal)').plot(ax=ax, style='--', color='green', legend=True)

ax = stock_data.plot(figsize=(16,10), color='black', title="Forecasts with Damping Factor" )
ax.set_ylabel("Prices $ ")
ax.set_xlabel("Year")
Пример #18
0
def modelo_predictivo_fall(request):

    days_chile2 = np.array([i for i in range(len(dates_chile))])

    datewise = pd.DataFrame({
        'Days Since': list(days_chile2),
        'Confirmed': casos_chile
    })

    es = ExponentialSmoothing(np.asarray(datewise['Confirmed']),
                              seasonal_periods=seasonal_periods_casos,
                              trend='add',
                              seasonal='mul').fit()

    days_in_future_cl = 20
    future_forcast_cl = np.array([
        i for i in range(len(dates_chile) + days_in_future_cl)
    ]).reshape(-1, 1)
    adjusted_dates_cl = future_forcast_cl[:-days_in_future_cl]
    start_cl = '03/03/2020'
    start_date_cl = datetime.datetime.strptime(start_cl, '%m/%d/%Y')
    future_forcast_dates_cl = []
    for i in range(len(future_forcast_cl)):
        future_forcast_dates_cl.append(
            (start_date_cl + datetime.timedelta(days=i)).strftime('%m/%d/%Y'))

    Predict_df_cl_1 = pd.DataFrame()
    Predict_df_cl_1["Fecha"] = list(
        future_forcast_dates_cl[-days_in_future_cl:])
    Predict_df_cl_1["N° Casos"] = np.round(list(es.forecast(20)))

    fig = go.Figure()
    fig.add_trace(
        go.Scatter(x=np.array(future_forcast_dates_cl),
                   y=datewise["Confirmed"],
                   mode='lines+markers',
                   name="Casos Reales"))
    fig.add_trace(
        go.Scatter(
            x=Predict_df_cl_1['Fecha'],
            y=Predict_df_cl_1["N° Casos"],
            mode='lines+markers',
            name="Predicción",
        ))

    fig.update_layout(title="Proyección de casos en 20 días",
                      xaxis_title="Fecha",
                      yaxis_title="Número de Casos",
                      legend=dict(x=0, y=1, traceorder="normal"))

    graph1 = fig.to_html(full_html=False)

    fig2 = make_subplots(rows=1,
                         cols=1,
                         shared_xaxes=True,
                         vertical_spacing=0.03,
                         specs=[[{
                             "type": "table"
                         }]])

    fig2.add_trace(go.Table(header=dict(values=Predict_df_cl_1.columns,
                                        font=dict(size=15),
                                        align="left"),
                            cells=dict(values=[
                                Predict_df_cl_1[k].tolist()
                                for k in Predict_df_cl_1.columns
                            ],
                                       align="left",
                                       font=dict(size=13))),
                   row=1,
                   col=1)
    fig2.update_layout(
        showlegend=False,
        title_text="Tabla de Proyecciones a 20 días",
    )

    graph2 = fig2.to_html(full_html=False)

    return render(
        request, "predicciones_fallecidos.html", {
            "grafico1": graph1,
            "fecha_casos_fall": fecha_casos_fall,
            "tabla1": graph2,
            "n_casos": num_cases_cl,
            "num_rec": casos_act,
            "num_death": num_death
        })
# Holt
print("Holt")
sm.tsa.seasonal_decompose(train.VALUE).plot()
result = sm.tsa.stattools.adfuller(train.VALUE)
# plt.show()
y_hat_avg = test.copy()
alpha = 0.03
fit1 = Holt(np.asarray(train['VALUE'])).fit(smoothing_level = alpha,smoothing_slope = 0.1)
y_hat_avg['Holt_linear'] = fit1.forecast(len(test))
rms = sqrt(mean_squared_error(test.VALUE, y_hat_avg.Holt_linear))
print("RMSE: ",rms)

# Holt-Winters
print("Holt-Winters")
y_hat_avg = test.copy()
seasons = 10
fit1 = ExponentialSmoothing(np.asarray(train['VALUE']) ,seasonal_periods=seasons ,trend='add', seasonal='add',).fit()
y_hat_avg['Holt_Winter'] = fit1.forecast(len(test))
rms = sqrt(mean_squared_error(test.VALUE, y_hat_avg.Holt_Winter))
print("RMSE: ",rms)

# Seasonal ARIMA
# This is a naive use of the technique. See - http://www.seanabu.com/2016/03/22/time-series-seasonal-ARIMA-model-in-python/
#print("Seasonal ARIMA")
#y_hat_avg = test.copy()
#fit1 = sm.tsa.statespace.SARIMAX(train.VALUE, order=(1, 0, 0),seasonal_order=(0,1,1,1)).fit()
#y_hat_avg['SARIMA'] = fit1.predict(start="2008-12-01", end="2018-11-30", dynamic=True)
#rms = sqrt(mean_squared_error(test.VALUE, y_hat_avg.SARIMA))
#print("RMSE: ",rms)
    
# selecting just the total pax and the month year labels to work the models on
oddata = pd.DataFrame(oddata, columns=['monthyear', 'Total_apax'])

# selecting the in and out data:::
indata = oddata[(data['YEAR'] < 2017)]
indata = indata.reset_index()
indata = indata.drop("index", axis=1)

outdata = oddata[data['YEAR'] >= 2017]  # year 2017 and 2018 becomes the sample data to test the model
outdata = outdata.reset_index()
outdata = outdata.drop("index", axis=1)

#### Code logic to implement HOLT-Winters method with seasonality cycle as 12
y_hw = indata.copy()
fit2 = ExponentialSmoothing(nm.asarray(y_hw['Total_apax']), seasonal_periods=12, trend='add', seasonal='mul', ).fit()

# plotting holt winters prediction with actual data
y_hw_plot = pd.concat([indata, outdata])  # combining both in and out samnple to track prediction over the entire launch
y_hw_plot = y_hw_plot.reset_index()
y_hw_plot = y_hw_plot.drop("index", axis=1)
y_hw_plot['Holt_Winter'] = fit2.predict(start=0, end=len(y_hw_plot) - 1)  # predicting with the paramemters returned

# calculating rms value for halt winters method::
rms_holt_winters = sqrt(mean_squared_error(y_hw_plot.Total_apax, y_hw_plot.Holt_Winter))

trace_real = go.Scatter(x=y_hw_plot['monthyear'], y=y_hw_plot['Total_apax'], mode='lines', name='real')
trace_predict = go.Scatter(x=y_hw_plot['monthyear'], y=y_hw_plot['Holt_Winter'], mode='lines', name='predict')

data_plot = [trace_real, trace_predict]
layout = go.Layout(
Пример #21
0
print(merge.head(4))
prediction = pd.merge(merge, temp2, on='Hour', how='left')
print(prediction.head(4))

prediction['Count'] = prediction['prediction']*prediction['ratio']*24
prediction['ID'] = prediction['ID_y']

submission = prediction.drop(['ID_x','day','ID_y','prediction', 'Hour', 'ratio'], axis=1)
pd.DataFrame(submission, columns=['ID', 'Count']).to_csv('Holt_linear.csv', index=False)

submission

"""**Holt's Winter Model**"""

y_hat_avg = valid.copy()
fit1 = ExponentialSmoothing(np.asarray(Train['Count']),seasonal_periods=7, trend= 'add', seasonal='add').fit()
y_hat_avg['Holt_Winter'] = fit1.forecast(len(valid))
plt.figure(figsize=(16,8))
plt.plot(Train.index, Train['Count'], label='Train')
plt.plot(valid.index, valid['Count'], label='vaild')
plt.plot(y_hat.index, y_hat_avg['Holt_Winter'], label='Holt_Winter')
plt.legend(loc='best')
plt.show()

rmse = sqrt(mean_squared_error(valid.Count,y_hat_avg.Holt_Winter))
print(rmse)

predict = fit1.forecast(len(test))
test['prediction'] = predict
print(test.head(4))
merge = pd.merge(test, test_original, on=('day','month','year'), how='left')
Пример #22
0
def make_forecast(db_session,
                  job_type: str = None,
                  periods: int = 24,
                  grouping: str = "month"):
    """Makes an job forecast."""
    query = db_session.query(Job).join(JobType)

    # exclude simulations
    query = query.filter(JobType.name != "Simulation")

    # exclude current month
    query = query.filter(Job.reported_at < date.today().replace(day=1))

    if job_type != "all":
        if job_type:
            query = query.filter(JobType.name == job_type)

    if grouping == "month":
        grouper = month_grouper
        query.filter(
            Job.reported_at > date.today() + relativedelta(months=-periods))

    jobs = query.all()
    jobs_sorted = sorted(jobs, key=grouper)

    dataframe_dict = {"ds": [], "y": []}

    for (last_day, items) in groupby(jobs_sorted, grouper):
        dataframe_dict["ds"].append(str(last_day))
        dataframe_dict["y"].append(len(list(items)))

    dataframe = pd.DataFrame.from_dict(dataframe_dict)

    if dataframe.empty:
        return {
            "categories": [],
            "series": [{
                "name": "Predicted",
                "data": []
            }],
        }

    # reset index to by month and drop month column
    dataframe.index = dataframe.ds
    dataframe.index.freq = "M"
    dataframe.drop("ds", inplace=True, axis=1)

    # fill periods without jobs with 0
    idx = pd.date_range(dataframe.index[0], dataframe.index[-1], freq="M")
    dataframe = dataframe.reindex(idx, fill_value=0)

    try:
        forecaster = ExponentialSmoothing(dataframe,
                                          seasonal_periods=12,
                                          trend="add",
                                          seasonal="add").fit(use_boxcox=True)
    except Exception as e:
        log.error(f"Issue forecasting jobs: {e}")
        return {
            "categories": [],
            "series": [{
                "name": "Predicted",
                "data": []
            }],
        }

    forecast = forecaster.forecast(12)
    forecast_df = pd.DataFrame({
        "ds": forecast.index.astype("str"),
        "yhat": forecast.values
    })

    forecast_data = forecast_df.to_dict("series")

    return {
        "categories":
        list(forecast_data["ds"]),
        "series": [{
            "name":
            "Predicted",
            "data":
            [max(math.ceil(x), 0) for x in list(forecast_data["yhat"])],
        }],
    }
b4 = Final_result_6.params.D1
b5 = Final_result_6.params.D2
b6 = Final_result_6.params.D3
b7 = Final_result_6.params.D4
b8 = Final_result_6.params.D5
b9 = Final_result_6.params.D6
b10 = Final_result_6.params.D7
b11 = Final_result_6.params.D8
b12 = Final_result_6.params.D9
b13 = Final_result_6.params.D10
b14 = Final_result_6.params.D11
b15 = Final_result_6.params.time

fit_1 = ExponentialSmoothing(eafv,
                             seasonal_periods=12,
                             trend='add',
                             seasonal='mul',
                             damped=False).fit()
fcast_1 = fit_1.forecast(12).rename("eafv forecast")

fit_2 = Holt(k226).fit(optimized=True)
fcast_2 = fit_2.forecast(12).rename("k226 forecast")

fit_3 = ExponentialSmoothing(jq2j,
                             seasonal_periods=12,
                             trend='add',
                             seasonal='mul',
                             damped=False).fit()
fcast_3 = fit_3.forecast(12).rename("jq2j forecast")

a1 = fit_1.fittedvalues
Пример #24
0
    66.834358381339, 40.8711884667851, 51.8285357927739, 57.4919099342262,
    65.2514698518726, 43.0612082202828, 54.7607571288007, 59.8344749355003,
    73.2570274672009, 47.6966237298, 61.0977680206996, 66.0557612187001
]

###
mdlFit = SimpleExpSmoothing(oildata, initialization_method="estimated").fit()
simple_res = mdlFit.forecast(steps=5)
print(simple_res.tolist())

###
mdlFit = Holt(air, initialization_method="estimated").fit()
holt_res = mdlFit.forecast(steps=5)
print(holt_res.tolist())

###
mdlFit = Holt(air, initialization_method="estimated", damped_trend=True).fit()
holt_res_damp = mdlFit.forecast(steps=5)
print(mdlFit.summary())
print(holt_res_damp.tolist())

###
mdlFit = ExponentialSmoothing(aust,
                              initialization_method="estimated",
                              seasonal_periods=4,
                              seasonal="add",
                              trend="add").fit()
hw_res = mdlFit.forecast(steps=5)
print(mdlFit.summary())
print(hw_res)
Пример #25
0
import matplotlib.pylab as plt
from statsmodels.tsa.api import ExponentialSmoothing

#Conversão data base em série temporal
venda_carros = pd.read_csv('bcdata.sgs.2020.csv',
                           sep=',',
                           header=0,
                           dayfirst=True,
                           index_col=0,
                           parse_dates=True,
                           squeeze=True)
venda_carros = venda_carros[:-1]

#Criaçao e modelagem dos dados para a previsão futura
fit1 = ExponentialSmoothing(venda_carros,
                            seasonal_periods=12,
                            trend='additive',
                            seasonal='additive').fit(use_boxcox=True)

#Valores futuros previstos
dados_previstos = fit1.forecast(12)

#Visualização do gráfico da série temporal, dos valores previstos e da série temporal + previstos
plt.figure(figsize=(10, 5))
plt.xlabel('Anos')
plt.ylabel('Quantidade de comerciais leves vendidos')
fit1.fittedvalues.plot(style='--', color='red')

plt.figure(figsize=(10, 5))
plt.xlabel('Anos')
plt.ylabel('Quantidade de comerciais leves vendidos (previsão)')
fit1.forecast(12).plot(style='--',
Пример #26
0
    des_results["mse"] = np.repeat(0, len(des_results))

    # reuse the DESMSE function above to cycle through all alpha beta values.
    for alpha in np.arange(0.0, 1.1, 0.1):
        for beta in np.arange(0.0, 1.1, 0.1):
            des_results.loc[(des_results["alpha"] == alpha) &
                            (des_results["beta"] == beta),
                            "mse"] = DESMSE(alpha, beta, temp)

    # find the best alpha beta: sort and put on row 1
    des_results = des_results.sort_values("mse")

    # use the best alpha,beta to forecast for the censored days
    # post results onto df_des
    predicted_daily_arrival = ExponentialSmoothing(temp,trend="add"). \
                                         fit(smoothing_level = des_results.iloc[0,0],
                                             smoothing_slope = des_results.iloc[0,1]). \
                                         forecast(16-temp.size)

    df_des.iloc[row_number,
                temp.size:] = predicted_daily_arrival.cumsum() + df_des.iloc[
                    row_number, temp.size - 1]

#Averaging Method
df_grocery1 = pd.read_excel('grocery_data.xlsx')
df_grocery1 = df_grocery1.iloc[1:, :16]

# rename columns
df_grocery1.columns = np.arange(1, 17)
df_avg = np.cumsum(df_grocery1, axis=1)

# fill null values with 40
def train(data):
	model = ExponentialSmoothing(np.asarray(data) ,seasonal_periods=7 ,trend='add', seasonal='add').fit()
	pred = model.forecast(1)
	return pred
Пример #28
0
line1, = plt.plot(fcast1, marker='o', color='blue')
plt.plot(fit2.fittedvalues, marker='o', color='red')
line2, = plt.plot(fcast2, marker='o', color='red')
plt.plot(fit3.fittedvalues, marker='o', color='green')
line3, = plt.plot(fcast3, marker='o', color='green')
plt.legend([line1, line2, line3], [fcast1.name, fcast2.name, fcast3.name])
plt.xticks([0, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60], [
    "1 Mar", "6 Mar", "11 Mar", "16 Mar", "21 Mar", "26 Mar", "31 Mar",
    "5 Apr", "10 Apr", "15 Apr", "20 Apr", "25 Apr", "30 Apr"
])
plt.title('SimpleExpSmoothing')
#plt.show()

# double exp smoothing

fit_double1 = ExponentialSmoothing(nuovi_pos, trend="add").fit()
fcast_double1 = fit_double1.forecast(
    15
)  #.rename(r'$\alpha=%s, beta=%s$', fit_double1.model.params['smoothing_level'], fit_double1.model.params['smoothing_trend'])
fit_double2 = ExponentialSmoothing(nuovi_pos, trend="mul").fit()
fcast_double2 = fit_double2.forecast(
    15
)  #.rename(r'$\alpha=%s, beta=%s$', fit_double2.model.params['smoothing_level'], fit_double2.model.params['smoothing_trend'])
fit_double3 = ExponentialSmoothing(
    nuovi_pos, trend="add", damped_trend=True).fit()  # damped non va bene!
fcast_double3 = fit_double3.forecast(
    15
)  #.rename(r'$\alpha=%s, beta=%s$', fit_double3.model.params['smoothing_level'], fit_double3.model.params['smoothing_trend'])

plt.figure(figsize=(12, 8))
plt.plot(nuovi_pos, marker='o', color='black')
Пример #29
0
test = df[26:]

#Aggregating the dataset at daily level
df.Timestamp = pd.to_datetime(df.Year, format='%Y-%m')
df.index = df.Timestamp
#df = df.resample('D').mean()
train.Timestamp = pd.to_datetime(train.Year, format='%Y-%m')
train.index = train.Timestamp
#train = train.resample('D').mean()
test.Timestamp = pd.to_datetime(test.Year, format='%Y-%m')
test.index = test.Timestamp
#test = test.resample('D').mean()

y_hat_avg = test.copy()
fit1 = ExponentialSmoothing(
    np.asarray(train['Revenue']),
    seasonal_periods=12,
    trend='add',
    seasonal='add',
).fit()
y_hat_avg['Holt_Winter'] = fit1.forecast(len(test))
plt.figure(figsize=(16, 8))
plt.plot(train['Revenue'], label='Train')
plt.plot(test['Revenue'], label='Test')
plt.plot(y_hat_avg['Holt_Winter'], label='Holt_Winter')
plt.legend(loc='best')
plt.show()

rms = sqrt(mean_squared_error(test.Revenue, y_hat_avg.Holt_Winter))
print('Root Mean Square: %s' % rms)  # RMSE: 15972.5519541
Пример #30
0
from pandas import read_excel
import matplotlib.pyplot as plt
from statsmodels.tsa.api import ExponentialSmoothing
from statsmodels.graphics.tsaplots import plot_acf

eafv_df = read_excel('EAFVdata_31404626.xls',
                     sheet_name='data',
                     header=0,
                     index_col=0,
                     squeeze=True,
                     parse_dates=True)

fit_1 = ExponentialSmoothing(eafv_df,
                             seasonal_periods=12,
                             trend='add',
                             seasonal='mul').fit()

Error_1 = eafv_df - fit_1.fittedvalues

plt.title('Time plot of EAFV Residuals')
Error_1.plot(label='EAFV Residuals', legend=True, color="black")
plot_acf(Error_1, title='ACF of EAFV Residuals', lags=50)
plt.show()