def fit(self, folds=3, thetas=(-2, -1, 0, 0.25, 0.5, 0.75, 1.25, 1.5, 1.75, 2)): """Function to theta models based on Kevin Sheppard's code. Selects the best theta for the series based on KFold cross-validation Parameters ---------- @Parameters thetas - tuple of float theta values to evaluate Returns ---------- None """ # Initialise the KFold object kf = TimeSeriesSplit(n_splits=folds) for i, series in enumerate(self.data.columns): x = self.data.loc[:self.train_ix[series] - 1, series] mspes = {t: np.empty((folds, 1)) for t in thetas} p = pd.DataFrame(None, index=["a0", "b0"], dtype=np.double) params = {i: p for i in range(folds)} fold_ix = 0 for tr_ix, te_ix in kf.split(x): # Set up data x_tr, x_te = x.iloc[tr_ix], x.iloc[te_ix] t = x_tr.shape[0] k = x_te.shape[0] for theta in thetas: # Estimate the different theta models params[fold_ix][theta] = self.estimate(x_tr, theta) # Forecast for different theta models: b0 = params[fold_ix][theta]["b0"] # New RHS for forecasting rhs_oos = np.ones((k, 2)) rhs_oos[:, 1] = np.arange(k) + t + 1 # Exp. Smoothing term fit_args = {"disp": False, "iprint": -1, "low_memory": True} ses = ExponentialSmoothing(x_tr).fit(**fit_args) alpha = ses.params.smoothing_level # Actual forecasting ses_forecast = ses.forecast(k) trend = (np.arange(k) + 1 / alpha - ((1 -alpha) ** t) / alpha) trend *= 0.5 * b0 forecast = np.array(ses_forecast + trend) mspes[theta][fold_ix] = mse(x_te, forecast) fold_ix += 1 # Evaluate the KFold for k, v in mspes.items(): mspes[k] = np.mean(v) self.best_theta[series] = min(mspes, key=mspes.get) self.fitted[series] = self.estimate(x, self.best_theta[series]) self.fit_success = True
def forecast(self, true_vals): """Function to forecast using the previously fitted models Parameters ---------- @Parameter true_vals - (default None) optional pd.DataFrame of the values to forecast using the data. Assumes they are adjacent to existing data, and that the column dimension matches. Returns ---------- None """ assert self.fit_success, "Please fit model before forecasting" assert self.data.shape[1] == true_vals.shape[1], "Dimension mismatch" steps = true_vals.shape[0] for series in self.data.columns: # Set up x = self.data.loc[:self.train_ix[series] - 1, series] k = true_vals.loc[:,series].shape[0] t = x.shape[0] # Generate the dataframe in which to save the forecasts res = pd.DataFrame(index=np.arange(steps),columns=[series, "Theta"]) res.loc[:, series] = true_vals.loc[:, series] # Smoothing parameter fit_args = {"disp": False, "iprint": -1, "low_memory": True} ses = ExponentialSmoothing(x).fit(**fit_args) alpha = ses.params.smoothing_level ses_forecast = ses.forecast(k) # New RHS for forecasting rhs_oos = np.ones((k, 2)) rhs_oos[:, 1] = np.arange(k) + t + 1 b0 = self.fitted[series]["b0"] trend = (np.arange(k) + 1 / alpha - ((1 - alpha) ** t) / alpha) trend *= 0.5 * b0 res.loc[:, "Theta"] = (ses_forecast + trend).values self.forecasts[series] = res """ temp = res.copy() temp.index += x.index[-1] plt.figure() plt.plot(temp.loc[:, series], label="True Forecast", color='black') plt.plot(x, label='Fitting Data', color='Gray') plt.plot(temp.loc[:, "Theta"], label="Forecast") plt.legend() plt.show() """ self.forecasts_generated = True
def fit(self, use_mle: bool = False, disp: bool = False) -> "ThetaModelResults": r""" Estimate model parameters. Parameters ---------- use_mle : bool, default False Estimate the parameters using MLE by fitting an ARIMA(0,1,1) with a drift. If False (the default), estimates parameters using OLS of a constant and a time-trend and by fitting a SES to the model data. disp : bool, default True Display iterative output from fitting the model. Notes ----- When using MLE, the parameters are estimated from the ARIMA(0,1,1) .. math:: X_t = X_{t-1} + b_0 + (\alpha-1)\epsilon_{t-1} + \epsilon_t When estimating the model using 2-step estimation, the model parameters are estimated using the OLS regression .. math:: X_t = a_0 + b_0 (t-1) + \eta_t and the SES .. math:: \tilde{X}_{t+1} = \alpha X_{t} + (1-\alpha)\tilde{X}_{t} Returns ------- ThetaModelResult Model results and forecasting """ if self._deseasonalize and self._use_test: self._test_seasonality() y, seasonal = self._deseasonalize_data() if use_mle: mod = SARIMAX(y, order=(0, 1, 1), trend="c") res = mod.fit(disp=disp) params = np.asarray(res.params) alpha = params[1] + 1 if alpha > 1: alpha = 0.9998 res = mod.fit_constrained({"ma.L1": alpha - 1}) params = np.asarray(res.params) b0 = params[0] sigma2 = params[-1] one_step = res.forecast(1) - b0 else: ct = add_trend(y, "ct", prepend=True)[:, :2] ct[:, 1] -= 1 _, b0 = np.linalg.lstsq(ct, y, rcond=None)[0] res = ExponentialSmoothing( y, initial_level=y[0], initialization_method="known").fit(disp=disp) alpha = res.params[0] sigma2 = None one_step = res.forecast(1) return ThetaModelResults(b0, alpha, sigma2, one_step, seasonal, use_mle, self)
# Get Residuals #~~~~~~~~~~~~~~ residuals = log_close - full_model.predict(0, len(close) - 1) # Residual Anlysis #~~~~~~~~~~~~~~~~~ ResidualAnalysis(datetime, residuals, nlags=252) #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### Model Validation #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ print('Running Model Validation\n------------------------') # Get Model Predictions #~~~~~~~~~~~~~~~~~~~~~~ pred_close = test_model.forecast(validation_size) # Get Erros #~~~~~~~~~~ error = validation_close - pred_close err_mu, err_sigma = error.mean(), error.std() # Plot Predictions #~~~~~~~~~~~~~~~~~ plt.figure() plt.plot(training_datetime[-validation_size:], training_close[-validation_size:], 'b', linewidth=1, label='Training') plt.plot(validation_datetime,