def forecast(data): shop_train = data['RPRICE'] fst_month_forecst = data.index.max() + pd.DateOffset(months=1) lst_month_forecst = fst_month_forecst + pd.DateOffset(months=11) shop_train.index = pd.DatetimeIndex(shop_train.index.values, freq=shop_train.index.inferred_freq) model = ExponentialSmoothing(shop_train, seasonal='mul', seasonal_periods=12).fit() shop_forecast = model.predict(start=fst_month_forecst, end=lst_month_forecst) df_frc = pd.DataFrame( shop_forecast.copy(), columns=['FORECAST']).reset_index().rename(columns={"index": "MDATE"}) df_frc['REPORT'] = data['REPORT'][0] df_frc = df_frc[['MDATE', 'REPORT', 'FORECAST']] #make some graphics if make_graph: df_shop_forec = data.copy() df_shop_forec['FITVAL'] = model.fittedvalues df_shop_forec = df_shop_forec.merge(df_frc, how='outer', on=['MDATE', 'REPORT']) make_img2(df_shop_forec, False) return df_frc
def Holt_Winters(paramsList=['pollution.csv', '0.93','pm', 'humidity', 'date'], specialParams=['7']): path = paramsList[0] trainRows = float(paramsList[1]) saveto = 'result.csv' df = pd.read_csv(path, usecols=paramsList[2:]) allRows = df.shape[0] season = specialParams[0] train = df[0:int(allRows*trainRows)] test = df[int(allRows*trainRows)+1:] df['Timestamp'] = pd.to_datetime(df[paramsList[-1]], format='%Y/%m/%d %H:%M') df.index = df['Timestamp'] df = df.resample('D').mean() train['Timestamp'] = pd.to_datetime(train[paramsList[-1]], format='%Y/%m/%d %H:%M') train.index = train['Timestamp'] train = train.resample('D').mean() test['Timestamp'] = pd.to_datetime(test[paramsList[-1]], format='%Y/%m/%d %H:%M') test.index = test['Timestamp'] test = test.resample('D').mean() y_hat = test.copy() nullArray = train.copy() nullArray['time'] = train.index # 以上可通用---------------------------- for i in range(2,len(paramsList)-1): print("进入循环") fit1 = ExponentialSmoothing(np.asarray(train[paramsList[i]]), seasonal_periods=int(season), trend='add', seasonal='add').fit() y_hat[paramsList[i]] = fit1.predict(start="2014/7/3", end="2014/9/21") y_hat[paramsList[i]] = round(y_hat[paramsList[i]],2) print("结束fit1") rms = sqrt(mean_squared_error(test[paramsList[i]], y_hat[paramsList[i]])) print(rms) y_hat['Holt_Winter'] = fit1.forecast(len(test)) plt.figure(figsize=(16, 8)) plt.plot(train[paramsList[i]], label='Train') plt.plot(test[paramsList[i]], label='Test') plt.plot(y_hat[paramsList[i]], label='Holt_Winter') plt.legend(loc='best') plt.show() y_hat['time'] = test.index yhat_naive = np.array(y_hat) nArray = np.array(nullArray) newArray = np.concatenate((nArray,yhat_naive),axis=0) s = pd.DataFrame(newArray, columns=paramsList[2:]) for i in range(2,len(paramsList)-1): s[paramsList[i]][0:int(len(s)*trainRows)] = "" s.to_csv(saveto,index=False,header=True,float_format='%.2f') '''
y_hat_avg = test.copy() fit1 = ExponentialSmoothing( np.asarray(train['Monthly Consumption of Type A Medicine']), seasonal_periods=12, trend='add', seasonal='add', ).fit() y_hat_avg['Holt_Winter'] = fit1.forecast(len(test)) plt.figure(figsize=(16, 8)) plt.plot(train['Monthly Consumption of Type A Medicine'], label='Train') plt.plot(test['Monthly Consumption of Type A Medicine'], label='Test') plt.plot(y_hat_avg['Holt_Winter'], label='Holt_Winter') plt.legend(loc='best') plt.show() rms = sqrt( mean_squared_error(test['Monthly Consumption of Type A Medicine'], y_hat_avg.Holt_Winter)) print(rms) p = range(35 - 45) plt.plot(y_hat_avg['Holt_Winter'], label='Holt_Winter') plt.show() pred = fit1.predict(start=0, end=40) plt.plot(pred, label="pred") plt.plot(df['Monthly Consumption of Type A Medicine'], label="df") plt.legend() plt.show() #ExponentialSmoothing.predict(np.asarray(train['Monthly Consumption of Type A Medicine']))
# In[301]: #rms = sqrt(mean_squared_error(test.Amount, y_hat_avg.Holt_Winter)) #print(rms) #RMSE = 23.9614925662 # In[316]: model.predict() # In[321]: pred = model.forecast(12) print(pred) # In[ ]: pred_imonth = input+1
plt.figure(figsize=(12, 8)) plt.plot(train['Count'], label='Train') plt.plot(test['Count'], label='Test') plt.plot(y_hat_HoltWinter['Holt_Winter'], label='Holt_Winter') plt.legend(loc='best') plt.title("Holt-Winters季节性预测法") rms = sqrt(mean_squared_error(test['Count'], y_hat_HoltWinter['Holt_Winter'])) print("Holt-Winters季节性预测模型RMS:" + str(rms)) # endregion # region自回归移动平均模型(ARIMA) import statsmodels.api as sm y_hat_avg = test.copy() fit1 = sm.tsa.statespace.SARIMAX(train.Count, order=(2, 1, 4), seasonal_order=(0, 1, 1, 7)).fit() y_hat_avg['ARIMA'] = fit1.predict(start="2013-11-1", end="2013-12-31", dynamic=True) plt.figure(figsize=(12, 8)) plt.plot(train['Count'], label='Train') plt.plot(test['Count'], label='Test') plt.plot(y_hat_avg['ARIMA'], label='ARIMA') plt.legend(loc='best') plt.title("ARIMA自回归移动平均法") rms = sqrt(mean_squared_error(test['Count'], y_hat_avg['ARIMA'])) print("自回归移动平均模型(ARIMA)RMS:" + str(rms)) # endregion plt.show()
indata = indata.reset_index() indata = indata.drop("index", axis=1) outdata = oddata[data['YEAR'] >= 2017] # year 2017 and 2018 becomes the sample data to test the model outdata = outdata.reset_index() outdata = outdata.drop("index", axis=1) #### Code logic to implement HOLT-Winters method with seasonality cycle as 12 y_hw = indata.copy() fit2 = ExponentialSmoothing(nm.asarray(y_hw['Total_apax']), seasonal_periods=12, trend='add', seasonal='mul', ).fit() # plotting holt winters prediction with actual data y_hw_plot = pd.concat([indata, outdata]) # combining both in and out samnple to track prediction over the entire launch y_hw_plot = y_hw_plot.reset_index() y_hw_plot = y_hw_plot.drop("index", axis=1) y_hw_plot['Holt_Winter'] = fit2.predict(start=0, end=len(y_hw_plot) - 1) # predicting with the paramemters returned # calculating rms value for halt winters method:: rms_holt_winters = sqrt(mean_squared_error(y_hw_plot.Total_apax, y_hw_plot.Holt_Winter)) trace_real = go.Scatter(x=y_hw_plot['monthyear'], y=y_hw_plot['Total_apax'], mode='lines', name='real') trace_predict = go.Scatter(x=y_hw_plot['monthyear'], y=y_hw_plot['Holt_Winter'], mode='lines', name='predict') data_plot = [trace_real, trace_predict] layout = go.Layout( title="HoltWinter method ::: RMS :: " + str(rms_holt_winters) ) fig = go.Figure(data=data_plot,layout=layout) plot(fig, filename="holt_winter.html") # Implementing auto:ARIMA for the same market
def preprocess_load_data_forec(dataframe, quarter_hour=True, short_term=True, scaler=None, n_ahead=1, calendars=None): # pre-process load data for forecasting: scale, split in train / test, de-seasonalize, and construct features # expects pandas Dataframe with a Datetimeindex and a load column containing the load data in MW with no missing # values. # Resolution either quarter hour (quarter_hour=True), if quarter_hour=False assumed to be hourly data # use GW for convenience and readability later, also the standard-scaled values are smaller dataframe = dataframe / 1000 # split data first so scaler and deseasonilizing can be trained on train set properly train_df_o, test_df_o = train_test_split(dataframe, test_size=0.2, shuffle=False) if scaler is None: scaler = StandardScaler() scaler.fit(np.array(train_df_o['load']).reshape(-1, 1)) train_df = pd.DataFrame( { 'load': scaler.transform(np.array(train_df_o['load']).reshape( -1, 1)).squeeze() }, index=train_df_o.index) test_df = pd.DataFrame( { 'load': scaler.transform(np.array(test_df_o['load']).reshape(-1, 1)).squeeze() }, index=test_df_o.index) # deseasonalize offset_train = pd.DataFrame(0, index=train_df.index, columns=['load']) offset_test = pd.DataFrame(0, index=test_df.index, columns=['load']) # decomp and train Holt Winters on decomp seasonal_periods = [24, 24 * 7] freq = 'H' if quarter_hour: seasonal_periods = [p * 4 for p in seasonal_periods] freq = '15T' for p in seasonal_periods: decomp = seasonal_decompose(train_df, period=p) exp = ExponentialSmoothing(decomp.seasonal, seasonal_periods=p, seasonal='add', freq=freq).fit() train_pred = exp.predict(start=train_df.index[0], end=train_df.index[-1]) test_pred = exp.predict(start=test_df.index[0], end=test_df.index[-1]) train_df['load'] = (train_df['load'] - train_pred) test_df['load'] = (test_df['load'] - test_pred) offset_train['load'] = offset_train['load'] + train_pred offset_test['load'] = offset_test['load'] + test_pred # construct features train_df = construct_features(dataframe=train_df, offset=offset_train, short_term=short_term, quarter_hour=quarter_hour, n_ahead=n_ahead, calendars=calendars) test_df = construct_features(dataframe=test_df, offset=offset_test, short_term=short_term, quarter_hour=quarter_hour, n_ahead=n_ahead, calendars=calendars) return train_df, test_df, scaler
import pandas as pd import numpy as np import matplotlib.pyplot as plt from statsmodels.tsa.api import ExponentialSmoothing, SimpleExpSmoothing, Holt sales_tg = pd.read_csv('forecasting/SalesTG/Variant Report.csv', dtype={'Variant SKU': 'str'}, parse_dates=['Issued At']) sales_2010 = sales_tg[(sales_tg['Customer Type'] == 'consumer') & (sales_tg['Variant SKU'] == '2010')][['Customer Name', 'Quantity', 'Issued At', 'Location Name']].reset_index().sort_values('Issued At') sales_2010.head(20) sales_2010[sales_2010['Issued At'] < '2017-01-01']['Quantity'].sum() sales_2010_month = sales_2010.set_index('Issued At').resample('M').sum().drop('index', axis =1) sales_2010_month.plot() holt_model = ExponentialSmoothing(endog=sales_2010_month, seasonal_periods=12).fit(smoothing_level=0.6) import datetime holt_model.predict(123, start=datetime.date(2016, 6,1), end = '2017-12-31')
scaler.fit(df) train = df[8036:-365] test = df[-365:] #---------1. PREDICTING RELATIVE HUMIDITY------------ print("Predicting humidity") y_hat_avg1 = test.copy() fit1 = ExponentialSmoothing(np.asarray(train['humidity']), seasonal_periods=2, trend='add', seasonal='add').fit() #y_hat_avg['Holt_Winter'] = fit1.forecast(len(test)) y_hat_avg1['Holt_Winter'] = fit1.predict(start=0, end=364) #plotting plt.figure() plt.plot(train['humidity'], label='Train') plt.plot(test['humidity'], label='Test') plt.plot(y_hat_avg1['Holt_Winter'], label='Holt-Winters') plt.title('RH1') plt.xlabel('date') plt.ylabel('Relative humidity') plt.legend(loc='best') #plt.show()