def Prediction(self, n_days, params='Best'): if params == 'Best': try: params = self.BestParams except: print('Please do a grid search to find the best parameters') self.Build_Training_Data(v=0, training=False) BestMod = SARIMAX(self.Values, order=params, missing='drop', enforce_invertibility=False) BestRes = BestMod.fit(disp=0) self.Pred = pd.DataFrame( columns=['County', 'mean', 'mean_ci_upper', 'mean_ci_lower']) for county in self.Counties: DataCounty = self.Data_Dates[county] ModelCounty = SARIMAX(DataCounty, order=params, missing='drop', enforce_invertibility=False) res = ModelCounty.smooth(BestRes.params) fc = res.get_prediction(0, len(DataCounty) + n_days) frame = fc.summary_frame(alpha=0.05) fc = frame['mean'] confInf = frame['mean_ci_lower'] confSup = frame['mean_ci_upper'] frame['County'] = [county] * len(frame) self.Pred = self.Pred.append( frame[['County', 'mean', 'mean_ci_upper', 'mean_ci_lower']]) self.Pred.index.name = 'date' return (self.Pred)
def _update(self, y, X=None): """ Internal update of forecasts using new data via Kalman smoothing/filtering of forecasts obtained from previously fitted forecaster. Parameters ---------- y : pandas.Series Updated time series which to use for updating the previously fitted forecaster. X : pandas.DataFrame, shape=[n_obs, n_vars], optional (default=None) An optional 2-d dataframe of exogenous variables. If provided, these variables are used as additional features in the regression operation. This should not include a constant or trend. Note that if an ``ARIMA`` is fit on exogenous features, it must also be provided exogenous features for making predictions. Returns ------- self : An instance of self """ # TODO for updating see https://github.com/statsmodels/statsmodels/issues/2788 and # https://github.com/statsmodels/statsmodels/issues/3318 # unnest series # unnest series y = self._prepare_y(y) X = self._prepare_X(X) # Update estimator. estimator = SARIMAX(y, exog=X, order=self.order, seasonal_order=self.seasonal_order, trend=self.trend, enforce_stationarity=self.enforce_stationarity, enforce_invertibility=self.enforce_invertibility) estimator.initialize_known( self._fitted_estimator.predicted_state[:, -1], self._fitted_estimator.predicted_state_cov[:, :, -1]) # Filter given fitted parameters. self._updated_estimator = estimator.smooth( self._fitted_estimator.params) return self
def GridSearch(self, n_days): self.Build_Training_Data(v=n_days, training=True) warnings.filterwarnings("ignore") params = [] scores = [] for p in range(1, 5): for q in range(1, 5): for d in range(3): try: model = SARIMAX(self.Values, order=(p, d, q), missing='drop', enforce_invertibility=False) results = model.fit(disp=0) scores_counties = [] for county in self.Counties: DataCounty = self.Data_Dates[county].dropna() ModelCounty = SARIMAX(DataCounty[:-self.v], order=(p, d, q), missing='drop', enforce_invertibility=False) res = ModelCounty.smooth(results.params) fc = res.get_prediction( len(DataCounty) - self.v, len(DataCounty)) frame = fc.summary_frame(alpha=0.05) fc = frame['mean'] Y = DataCounty.iloc[-self.v:].values Yhat = fc[-self.v:].values # Ybar = np.mean(Y) MAE = (sum(abs(Y - Yhat)) / self.v) scores_counties.append(MAE) except: print('Training failed for parameters :', (p, d, q)) scores.append(np.nanmean(scores_counties)) params.append((p, d, q)) argbest = np.argmin(scores) print('Best MAE : ', scores[argbest]) print('Best params : ', params[argbest]) self.BestParams = params[argbest]
def update_forecast_SARIMAX(self, recent_flow): model = SARIMAX(recent_flow, order=(4, 1, 1), seasonal_order=(0, 1, 1, 24)) self.model_fit_recent = model.smooth(self.model_params)
def GridSearch(Regions, Regions_Daily_Cases, Values, Food_Insecure=None, Pop=None, Dict_Pop=None, Dict_Food_Insec=None, exog=True, plot=False, v=7): Palette = dict(Regions[['Region', 'Color']].to_dict('split')['data']) warnings.filterwarnings("ignore") formatter = mdates.DateFormatter('%a %d/%m') params = [] scoresExog = [] List_Regions = pd.unique(Regions['Region']) for p in range(1, 5): for q in range(1, 5): for d in range(3): try: if exog: model = SARIMAX(Values, exog=np.array([Pop, Food_Insecure ]).transpose(), order=(p, d, q), missing='drop', enforce_invertibility=False) else: model = SARIMAX(Values, order=(p, d, q), missing='drop', enforce_invertibility=False) results = model.fit(disp=0) scores_counties = [] if plot: plt.figure() ax = plt.gca() plt.xticks(rotation=20) ax.xaxis.set_major_locator( mdates.DayLocator(interval=7)) ax.xaxis.set_major_formatter(formatter) for region in List_Regions: DataCounty = Regions_Daily_Cases[region].dropna() if exog: ModelCounty = SARIMAX( DataCounty[:-v], exog=np.array( [[Dict_Pop[region]] * len(DataCounty[:-v]), [Dict_Food_Insec[region]] * len(DataCounty[:-v])]).transpose(), order=(p, d, q), missing='drop', enforce_invertibility=False) else: ModelCounty = SARIMAX(DataCounty[:-v], order=(p, d, q), missing='drop', enforce_invertibility=False) res = ModelCounty.smooth(results.params) fc = res.get_prediction( len(DataCounty) - v, len(DataCounty), exog=np.array([[Dict_Pop[region]] * (v + 1), [Dict_Food_Insec[region]] * (v + 1) ]).transpose()) frame = fc.summary_frame(alpha=0.05) fc = frame['mean'] Y = DataCounty.iloc[-v:].values Yhat = fc[-v:].values # Ybar = np.mean(Y) MAE = (sum(abs(Y - Yhat)) / v) scores_counties.append(MAE) confInf = frame['mean_ci_lower'] confSup = frame['mean_ci_upper'] if plot: pl = plt.plot(DataCounty, label=region, color=Palette[region]) plt.fill_between(confInf.index, confSup, confInf, alpha=0.3, color=pl[0].get_color()) plt.title( "Daily Cases Predicted with a single ARIMA({},{},{}) model" .format(p, d, q)) plt.plot(fc, '--', color=pl[0].get_color()) if plot: plt.text(1, 0.9, 'Mean Absolute Error : {:.0f}'.format( np.nanmean(scores_counties)), transform=ax.transAxes, horizontalalignment='left') # plt.xlim([DataCounty.iloc[-v-7:].index[0], DataCounty.iloc[-v-7:].index[-1]]) plt.yscale('log') plt.legend(bbox_to_anchor=(1, 0.5), loc='center left', fontsize=6) plt.savefig( 'PredictionCountiesDailyExog/ARIMA{}{}{}_Pred.png'. format(p, d, q)) plt.show() scoresExog.append(np.nanmean(scores_counties)) params.append((p, d, q)) except: print('Training Failed for parameters :') print(p, d, q) argbest = np.argmin(scoresExog) print('Best distance : ', scoresExog[argbest]) print('Best params : ', params[argbest]) BestParams = params[argbest] return BestParams, scoresExog[argbest]
def Prediction(Regions, Regions_Daily_Cases, Values, BestParams, Food_Insecure=None, Pop=None, Dict_Pop=None, Dict_Food_Insec=None, exog=True, plot=False, v=7): BestMod = SARIMAX(Values, exog=np.array([Pop, Food_Insecure]).transpose(), order=BestParams, missing='drop', enforce_invertibility=False) BestRes = BestMod.fit() List_Regions = pd.unique(Regions['Region']) BestRes.summary() Predictions = pd.DataFrame( columns=['region', 'mean', 'mean_ci_upper', 'mean_ci_lower']) for region in List_Regions: DataCounty = Regions_Daily_Cases[region].dropna() if exog: ModelCounty = SARIMAX( DataCounty, exog=np.array([[Dict_Pop[region]] * len(DataCounty), [Dict_Food_Insec[region]] * len(DataCounty) ]).transpose(), order=BestParams, missing='drop', enforce_invertibility=False) else: ModelCounty = SARIMAX(DataCounty, order=BestParams, missing='drop', enforce_invertibility=False) res = ModelCounty.smooth(BestRes.params) if exog: fc = res.get_prediction(0, len(DataCounty) + v, exog=np.array([ [Dict_Pop[region]] * (v + 1), [Dict_Food_Insec[region]] * (v + 1) ]).transpose()) else: fc = res.get_prediction(0, len(DataCounty) + v) frame = fc.summary_frame(alpha=0.05) fc = frame['mean'] confInf = frame['mean_ci_lower'] confSup = frame['mean_ci_upper'] frame['region'] = [region] * len(frame) Predictions = Predictions.append( frame[['region', 'mean', 'mean_ci_upper', 'mean_ci_lower']]) if plot: pl = plt.plot(DataCounty, label=region, color=Palette[region]) plt.fill_between(confInf.index, confSup, confInf, alpha=0.3, color=pl[0].get_color()) plt.plot(fc, '--', color=pl[0].get_color()) plt.title('Best ARIMA Predictions Cases per 100k for ' + region) # plt.legend(bbox_to_anchor=(1,0.5),loc='center left',fontsize=6) plt.yscale('log') plt.savefig('PredictionsARIMABestExog/' + region) plt.show() Predictions.index.name = 'date' return (Predictions)