def test_concentrated_initialization(): # Compare a model where initialization is concentrated out versus # numarical maximum likelihood estimation mod1 = ExponentialSmoothing(oildata, initialization_method='concentrated') mod2 = ExponentialSmoothing(oildata) # First, fix the other parameters at a particular value res1 = mod1.filter([0.1]) res2 = mod2.fit_constrained({'smoothing_level': 0.1}, disp=0) # Alternatively, estimate the remaining parameters res1 = mod1.fit(disp=0) res2 = mod2.fit(disp=0) assert_allclose(res1.llf, res2.llf) assert_allclose(res1.initial_state, res2.initial_state, rtol=1e-5)
def setup_class(cls): mod = ExponentialSmoothing(aust, trend=True, damped_trend=True, seasonal=4, initialization_method='heuristic') super().setup_class(mod)
def setup_class(cls): # Test simple exponential smoothing (FPP: 7.1) against fpp::ses, with # a fixed coefficient 0.6 and simple initialization mod = ExponentialSmoothing(oildata, initialization_method='simple') res = mod.filter([results_params['oil_fpp2']['alpha']]) super().setup_class('oil_fpp2', res)
def simple_exponential_smoothing_ci(stochastic_process): N, t, alpha = 200, 160, 0.5 if stochastic_process == "GP": x0 = 20 realisations = pd.Series(sample_gaussian_process(20, 5, N), range(N)) elif stochastic_process == "RW": x0 = 0 realisations = pd.Series(list(sample_random_walk(0, N)), range(N)) else: quit() mod = ExponentialSmoothing(realisations[:t + 1], initialization_method='known', initial_level=x0).fit(disp=False) print(mod.summary()) forecasts = mod.get_forecast(N - (t + 1)) forecasts_ci = forecasts.conf_int(alpha=0.05) plot_ci(realisations, pd.Series(np.nan, range(t + 1)).append(forecasts.predicted_mean), forecasts_ci, alpha) if stochastic_process == "GP": plt.savefig('/Users/gwren/Downloads/36_ses_prediction_intervals.svg', format='svg') elif stochastic_process == "RW": plt.savefig('/Users/gwren/Downloads/37_ses_gaussian_random_walk.svg', format='svg') else: quit() py.show()
def fit(self, x, horizon): fit = ExponentialSmoothing(x).fit() point_forecast = fit.get_prediction(len(x), len(x) + horizon - 1) print('ETS', point_forecast.predicted_mean) # print('ETS', point_forecast.conf_int()) return point_forecast.predicted_mean
def fit(self, folds=3, thetas=(-2, -1, 0, 0.25, 0.5, 0.75, 1.25, 1.5, 1.75, 2)): """Function to theta models based on Kevin Sheppard's code. Selects the best theta for the series based on KFold cross-validation Parameters ---------- @Parameters thetas - tuple of float theta values to evaluate Returns ---------- None """ # Initialise the KFold object kf = TimeSeriesSplit(n_splits=folds) for i, series in enumerate(self.data.columns): x = self.data.loc[:self.train_ix[series] - 1, series] mspes = {t: np.empty((folds, 1)) for t in thetas} p = pd.DataFrame(None, index=["a0", "b0"], dtype=np.double) params = {i: p for i in range(folds)} fold_ix = 0 for tr_ix, te_ix in kf.split(x): # Set up data x_tr, x_te = x.iloc[tr_ix], x.iloc[te_ix] t = x_tr.shape[0] k = x_te.shape[0] for theta in thetas: # Estimate the different theta models params[fold_ix][theta] = self.estimate(x_tr, theta) # Forecast for different theta models: b0 = params[fold_ix][theta]["b0"] # New RHS for forecasting rhs_oos = np.ones((k, 2)) rhs_oos[:, 1] = np.arange(k) + t + 1 # Exp. Smoothing term fit_args = {"disp": False, "iprint": -1, "low_memory": True} ses = ExponentialSmoothing(x_tr).fit(**fit_args) alpha = ses.params.smoothing_level # Actual forecasting ses_forecast = ses.forecast(k) trend = (np.arange(k) + 1 / alpha - ((1 -alpha) ** t) / alpha) trend *= 0.5 * b0 forecast = np.array(ses_forecast + trend) mspes[theta][fold_ix] = mse(x_te, forecast) fold_ix += 1 # Evaluate the KFold for k, v in mspes.items(): mspes[k] = np.mean(v) self.best_theta[series] = min(mspes, key=mspes.get) self.fitted[series] = self.estimate(x, self.best_theta[series]) self.fit_success = True
def setup_class(cls): oildata_copy = oildata.copy() oildata_copy.name = ("oil", "data") mod = ExponentialSmoothing(oildata_copy, initialization_method='simple') res = mod.filter([results_params['oil_fpp2']['alpha']]) super().setup_class('oil_fpp2', res)
def forecast(self, true_vals): """Function to forecast using the previously fitted models Parameters ---------- @Parameter true_vals - (default None) optional pd.DataFrame of the values to forecast using the data. Assumes they are adjacent to existing data, and that the column dimension matches. Returns ---------- None """ assert self.fit_success, "Please fit model before forecasting" assert self.data.shape[1] == true_vals.shape[1], "Dimension mismatch" steps = true_vals.shape[0] for series in self.data.columns: # Set up x = self.data.loc[:self.train_ix[series] - 1, series] k = true_vals.loc[:,series].shape[0] t = x.shape[0] # Generate the dataframe in which to save the forecasts res = pd.DataFrame(index=np.arange(steps),columns=[series, "Theta"]) res.loc[:, series] = true_vals.loc[:, series] # Smoothing parameter fit_args = {"disp": False, "iprint": -1, "low_memory": True} ses = ExponentialSmoothing(x).fit(**fit_args) alpha = ses.params.smoothing_level ses_forecast = ses.forecast(k) # New RHS for forecasting rhs_oos = np.ones((k, 2)) rhs_oos[:, 1] = np.arange(k) + t + 1 b0 = self.fitted[series]["b0"] trend = (np.arange(k) + 1 / alpha - ((1 - alpha) ** t) / alpha) trend *= 0.5 * b0 res.loc[:, "Theta"] = (ses_forecast + trend).values self.forecasts[series] = res """ temp = res.copy() temp.index += x.index[-1] plt.figure() plt.plot(temp.loc[:, series], label="True Forecast", color='black') plt.plot(x, label='Fitting Data', color='Gray') plt.plot(temp.loc[:, "Theta"], label="Forecast") plt.legend() plt.show() """ self.forecasts_generated = True
def setup_class(cls): mod = ExponentialSmoothing(aust, trend=True, damped_trend=True, seasonal=4) start_params = pd.Series( [0.0005, 0.0004, 0.0005, 0.95, 17.0, 1.5, -0.2, 0.1, 0.4], index=mod.param_names) super().setup_class(mod, start_params=start_params, rtol=1e-1)
def test_parameterless_model(reset_randomstate): # GH 6687 x = np.cumsum(np.random.standard_normal(1000)) ses = ExponentialSmoothing(x, initial_level=x[0], initialization_method="known") with ses.fix_params({'smoothing_level': 0.5}): res = ses.fit() assert np.isnan(res.bse).all() assert res.fixed_params == ["smoothing_level"]
def setup_class(cls): # Test simple exponential smoothing (FPP: 7.1) against forecast::ets, # with estimated coefficients mod = ExponentialSmoothing(oildata, initialization_method='estimated', concentrate_scale=False) res = mod.filter([ results_params['oil_ets']['alpha'], results_params['oil_ets']['sigma2'], results_params['oil_ets']['l0'] ]) super().setup_class('oil_ets', res)
def holt_ci(): N, t = 200, 160 realisations = pd.Series(list(sample_random_walk(0, 0.1, N)), range(N)) mod = ExponentialSmoothing( realisations[:t + 1], trend=True, initialization_method='estimated').fit(disp=False) print(mod.summary()) forecasts = mod.get_forecast(N - (t + 1)) forecasts_ci = forecasts.conf_int(alpha=0.05) plot_ci(realisations, pd.Series(np.nan, range(t + 1)).append(forecasts.predicted_mean), forecasts_ci) py.show()
def setup_class(cls): # Test Holt-Winters seasonal method (FPP: 7.5) with no trend # against forecast::ets, with estimated coefficients mod = ExponentialSmoothing(aust, seasonal=4, concentrate_scale=False) params = np.r_[results_params['aust_ets3']['alpha'], results_params['aust_ets3']['gamma'], results_params['aust_ets3']['sigma2'], results_params['aust_ets3']['l0'], results_params['aust_ets3']['s0_0'], results_params['aust_ets3']['s0_1'], results_params['aust_ets3']['s0_2']] res = mod.filter(params) super().setup_class('aust_ets3', res)
def setup_class(cls): # Test Holt's linear trend method (FPP: 7.2) with a damped trend # against forecast::ets, with estimated coefficients mod = ExponentialSmoothing(air, trend=True, damped_trend=True, concentrate_scale=False) params = [ results_params['air_ets']['alpha'], results_params['air_ets']['beta'], results_params['air_ets']['phi'], results_params['air_ets']['sigma2'], results_params['air_ets']['l0'], results_params['air_ets']['b0'] ] res = mod.filter(params) super().setup_class('air_ets', res)
def setup_class(cls): # Test Holt's linear trend method (FPP: 7.2) against fpp::holt, # with fixed coefficients and simple initialization mod = ExponentialSmoothing(air, trend=True, concentrate_scale=False, initialization_method='simple') # alpha, beta^* params = [ results_params['air_fpp1']['alpha'], results_params['air_fpp1']['beta_star'], results_params['air_fpp1']['sigma2'] ] # beta = alpha * beta^* params[1] = params[0] * params[1] res = mod.filter(params) super().setup_class('air_fpp1', res)
def setup_class(cls): # Test Holt-Winters seasonal method (FPP: 7.5) against fpp::hw, # with estimated coefficients mod = ExponentialSmoothing(aust, trend=True, seasonal=4, concentrate_scale=False) params = np.r_[results_params['aust_fpp1']['alpha'], results_params['aust_fpp1']['beta'], results_params['aust_fpp1']['gamma'], results_params['aust_fpp1']['sigma2'], results_params['aust_fpp1']['l0'], results_params['aust_fpp1']['b0'], results_params['aust_fpp1']['s0_0'], results_params['aust_fpp1']['s0_1'], results_params['aust_fpp1']['s0_2']] res = mod.filter(params) super().setup_class('aust_fpp1', res)
def test_invalid(): # Tests for invalid model specifications that raise ValueErrors with pytest.raises(ValueError, match='Cannot have a seasonal period of 1.'): mod = ExponentialSmoothing(aust, seasonal=1) with pytest.raises( TypeError, match=( 'seasonal must be integer_like' r' \(int or np.integer, but not bool or timedelta64\) or None' )): mod = ExponentialSmoothing(aust, seasonal=True) with pytest.raises(ValueError, match='Invalid initialization method "invalid".'): mod = ExponentialSmoothing(aust, initialization_method='invalid') with pytest.raises(ValueError, match=('`initial_level` argument must be provided' ' when initialization method is set to' ' "known".')): mod = ExponentialSmoothing(aust, initialization_method='known') with pytest.raises(ValueError, match=('`initial_trend` argument must be provided' ' for models with a trend component when' ' initialization method is set to "known".')): mod = ExponentialSmoothing(aust, trend=True, initialization_method='known', initial_level=0) with pytest.raises(ValueError, match=('`initial_seasonal` argument must be provided' ' for models with a seasonal component when' ' initialization method is set to "known".')): mod = ExponentialSmoothing(aust, seasonal=4, initialization_method='known', initial_level=0) for arg in ['initial_level', 'initial_trend', 'initial_seasonal']: msg = ('Cannot give `%s` argument when initialization is "estimated"' % arg) with pytest.raises(ValueError, match=msg): mod = ExponentialSmoothing(aust, **{arg: 0}) with pytest.raises( ValueError, match=('Invalid length of initial seasonal values. Must be' ' one of s or s-1, where s is the number of seasonal' ' periods.')): mod = ExponentialSmoothing(aust, seasonal=4, initialization_method='known', initial_level=0, initial_seasonal=0) with pytest.raises(NotImplementedError, match='ExponentialSmoothing does not support `exog`.'): mod = ExponentialSmoothing(aust) mod.clone(aust, exog=air)
def setup_class(cls): mod = ExponentialSmoothing(aust, seasonal=4) start_params = pd.Series([0.5, 0.49, 30., 2., -2, -9], index=mod.param_names) super().setup_class(mod, start_params=start_params, rtol=1e-4)
def update_revenue_forecast(historicals, method, fcast_index, focus_scenario, p, d, q, P, D, Q): historicals = pd.DataFrame(historicals) historicals['DT_FIM_EXERC'] = pd.to_datetime(historicals['DT_FIM_EXERC']) models = {} # Revenue time series model data = historicals.set_index('DT_FIM_EXERC').asfreq('Q') y = data['Revenue'] # Transform if fcast_index != '': idx = data[fcast_index.upper()] y = y / idx * idx.iloc[-1] y = np.log(y) # Create forecast model if method == 'ets': rev_model = ExponentialSmoothing(y, trend=True, damped_trend=True, seasonal=4) elif method == 'arima': rev_model = SARIMAX(y, order=(p, d, q), seasonal_order=(P, D, Q, 4), trend='c') else: return {} rev_results = rev_model.fit() models['revenue'] = { 'Params': rev_results.params, 'diag': { 'In-sample RMSE': np.sqrt(rev_results.mse), 'In-sample MAE': rev_results.mae, 'Ljung-Box': rev_results.test_serial_correlation('ljungbox')[0, 0, -1], 'log-Likelihood': rev_results.llf, 'AICc': rev_results.aicc, 'BIC': rev_results.bic } } # Cross validation foldsize = 1 nfolds = round(y.shape[0] / (4 * foldsize)) - 1 cv_errors = [] for fold in range(nfolds, 0, -1): train_subset = y.iloc[:-(fold + 2) * (4 * foldsize)] valid_subset = y.iloc[-(fold + 2) * (4 * foldsize):-(fold + 1) * (4 * foldsize)] if train_subset.shape[0] < 16: continue fcasts = (rev_model.clone(np.log(train_subset)).fit().forecast( valid_subset.shape[0])) cv_errors = np.append(cv_errors, fcasts - np.log(valid_subset)) if len(cv_errors) > 4: models['revenue']['diag']['CV RMSE'] = np.sqrt( np.mean(np.array(cv_errors)**2)) models['revenue']['diag']['CV MAE'] = np.mean(np.abs(cv_errors)) # Generate simulated forecasts nsim = 100 horiz = int(np.sum(focus['scenario'] == focus_scenario)) forecasts = (pd.DataFrame({ 'y': rev_results.forecast(horiz), 'group': 'forecast', 'variable_1': '' }).reset_index()) simulations = (rev_results.simulate( horiz, repetitions=nsim, anchor=data.shape[0]).reset_index().melt('index', value_name='y').drop( columns='variable_0').assign(group='simulation')) simulations = (pd.concat( [simulations, forecasts]).reset_index(drop=True).rename(columns={ 'variable_1': 'iteration', 'index': 'DT_FIM_EXERC' }).pipe(add_quarters)) simulations['Revenue'] = np.exp(simulations['y']) if fcast_index != '': simulations = simulations.merge( focus[['DT_FIM_EXERC', fcast_index.upper()]][focus['scenario'] == focus_scenario], on="DT_FIM_EXERC", how="left") simulations['Revenue'] = simulations['Revenue'] \ * simulations[fcast_index.upper()] \ / data[fcast_index.upper()].iloc[-1] simulations['RevenueGrowth'] = 100 * ( simulations['Revenue'] / simulations.groupby('iteration')['Revenue'].shift(4) - 1) simulations.loc[simulations['RevenueGrowth'].isna(), 'RevenueGrowth'] = \ np.reshape( 100 * ( np.reshape( simulations['Revenue'][simulations['RevenueGrowth'].isna()].values, (nsim + 1, 4)) / historicals['Revenue'].tail(4).values - 1 ), ((nsim + 1) * 4) ) # Expenses regression model historicals['logRevenue'] = np.log(historicals['Revenue']) exog = historicals[['logRevenue', 'Q1', 'Q2', 'Q3', 'Q4']] opex_model = QuantReg(np.log(historicals['Opex']), exog) opex_results = opex_model.fit(q=0.5) opex_coefs = opex_results.params rmse = np.mean(opex_results.resid**2)**.5 models['opex'] = { 'Params': opex_results.params, 'diag': { 'In-sample RMSE': np.sqrt(np.mean(opex_results.resid)**2), 'In-sample MAE': np.mean(np.abs(opex_results.resid)), #'Ljung-Box': opex_results.test_serial_correlation('ljungbox')[0, 0, -1], #'log-Likelihood': opex_results.llf, #'AICc': opex_results.aicc, #'BIC': opex_results.bic } } # Simulations simulations['Opex'] = np.exp( opex_coefs[0] * np.log(simulations['Revenue']) + opex_coefs[1] * simulations['Q1'] + opex_coefs[2] * simulations['Q2'] + opex_coefs[3] * simulations['Q3'] + opex_coefs[4] * simulations['Q4'] + np.random.normal(0, rmse, simulations.shape[0]) * (simulations['group'] == 'simulation')) simulations['EBIT'] = simulations['Revenue'] - simulations['Opex'] simulations[ 'EBITMargin'] = 100 * simulations['EBIT'] / simulations['Revenue'] simulations['Taxes'] = simulations['EBIT'] * .34 simulations['NOPAT'] = simulations['EBIT'] - simulations['Taxes'] simulations = pd.concat( [historicals.assign(group='historicals', iteration=''), simulations]) return simulations.to_dict('records'), models
def setup_class(cls): mod = ExponentialSmoothing(aust, trend=True, seasonal=4) start_params = pd.Series( [0.0005, 0.0004, 0.5, 33., 0.4, 2.5, -2., -9.], index=mod.param_names) super().setup_class(mod, start_params=start_params, rtol=1e-3)
def setup_class(cls): mod = ExponentialSmoothing(air, trend=True, damped_trend=True) start_params = pd.Series([0.95, 0.0005, 0.9, 15., 2.5], index=mod.param_names) super().setup_class(mod, start_params=start_params, rtol=1e-1)
def setup_class(cls): mod = ExponentialSmoothing(oildata) start_params = pd.Series([0.85, 445.], index=mod.param_names) super().setup_class(mod, start_params=start_params, rtol=1e-5)
def fit(self, use_mle: bool = False, disp: bool = False) -> "ThetaModelResults": r""" Estimate model parameters. Parameters ---------- use_mle : bool, default False Estimate the parameters using MLE by fitting an ARIMA(0,1,1) with a drift. If False (the default), estimates parameters using OLS of a constant and a time-trend and by fitting a SES to the model data. disp : bool, default True Display iterative output from fitting the model. Notes ----- When using MLE, the parameters are estimated from the ARIMA(0,1,1) .. math:: X_t = X_{t-1} + b_0 + (\alpha-1)\epsilon_{t-1} + \epsilon_t When estimating the model using 2-step estimation, the model parameters are estimated using the OLS regression .. math:: X_t = a_0 + b_0 (t-1) + \eta_t and the SES .. math:: \tilde{X}_{t+1} = \alpha X_{t} + (1-\alpha)\tilde{X}_{t} Returns ------- ThetaModelResult Model results and forecasting """ if self._deseasonalize and self._use_test: self._test_seasonality() y, seasonal = self._deseasonalize_data() if use_mle: mod = SARIMAX(y, order=(0, 1, 1), trend="c") res = mod.fit(disp=disp) params = np.asarray(res.params) alpha = params[1] + 1 if alpha > 1: alpha = 0.9998 res = mod.fit_constrained({"ma.L1": alpha - 1}) params = np.asarray(res.params) b0 = params[0] sigma2 = params[-1] one_step = res.forecast(1) - b0 else: ct = add_trend(y, "ct", prepend=True)[:, :2] ct[:, 1] -= 1 _, b0 = np.linalg.lstsq(ct, y, rcond=None)[0] res = ExponentialSmoothing( y, initial_level=y[0], initialization_method="known").fit(disp=disp) alpha = res.params[0] sigma2 = None one_step = res.forecast(1) return ThetaModelResults(b0, alpha, sigma2, one_step, seasonal, use_mle, self)
def setup_class(cls): mod = ExponentialSmoothing(air, trend=True, initialization_method='heuristic') super().setup_class(mod)
def setup_class(cls): mod = ExponentialSmoothing(oildata, initialization_method='heuristic') super().setup_class(mod)
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ print("Building Holt-Winter's Model\n" + '-' * 28) # Split Data into Training and Validation #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ validation_ratio = .2 training_close, validation_close = ValidationSplit(log_close, validation_ratio) training_datetime, validation_datetime = ValidationSplit( datetime, validation_ratio) validation_size = len(validation_close) # Full Model (Using All Data) #~~~~~~~~~~~~~~~~~~~~~~~~~~~~ period = 20 full_model = ExponentialSmoothing( log_close, trend=True #, seasonal = period ).fit(maxiters=100000) # Test Model (Using Training Data) #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ test_model = ExponentialSmoothing(training_close, trend=True #seasonal = period ).fit(maxiters=100000) #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### Full Model Residuals #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Get Residuals #~~~~~~~~~~~~~~