def train_tbats(ts): """Trains TBATS model and returns model results. Args: ts (stax.TimeSeries): Time series to train model on. Returns: tuple: Model experiment results as `(model, test_pred, test_conf, test_metrics, OOS_pred, OOS_conf)`. """ parameter_space = { "seasonal_period": [[6, 15], [12, 15], [6, 30], [12, 30]], "use_box_cox": [True, False], "use_arma_errors": [True, False], } results = [] for sp in parameter_space["seasonal_period"]: for bx in parameter_space["use_box_cox"]: for ae in parameter_space["use_arma_errors"]: estimator = TBATS(use_box_cox=bx, use_arma_errors=ae, seasonal_periods=sp) horizon = len(ts.test.values) model = estimator.fit(ts.train.values) pred, conf = model.forecast(steps=horizon, confidence_level=0.95) mape = mean_absolute_error(ts.test.values, pred) / ts.test.values.mean() conf = list(zip(conf["lower_bound"], conf["upper_bound"])) results.append({ "mape": mape, "model": model, "pred": pred, "conf": conf, "parameters": { "seasonal_period": sp, "use_box_cox": bx, "use_arma_errors": ae } }) best_results = sorted(results, key=lambda x: x["mape"])[0] model = best_results["model"] pred = best_results["pred"] conf = best_results["conf"] metrics = [{"mean_absolute_percent_error": best_results["mape"]}] # Get OOS forecasts for the future estimator = TBATS( use_box_cox=best_results["parameters"]["use_box_cox"], use_arma_errors=best_results["parameters"]["use_arma_errors"], seasonal_periods=best_results["parameters"]["seasonal_period"]) oos_model = estimator.fit(ts.series) OOS_pred, OOS_conf = oos_model.forecast(steps=12, confidence_level=0.95) return model, pred, conf, metrics, OOS_pred, OOS_conf
def test_trend_and_seasonal(self): np.random.seed(234234) T = 35 steps = 5 alpha = 0.1 period_length = 6 y = [0] * T b = b0 = 2.1 l = l0 = 1.2 for t in range(0, T): d = np.random.normal() y[t] = l + b + d + 2 * np.sin(2 * np.pi * t / period_length) l = l + b + alpha * d components = dict( use_arma_errors=False, use_trend=True, use_damped_trend=False, use_box_cox=False, seasonal_periods=[period_length], ) y_for_train = y[:(T - steps)] y_to_forecast = y[(T - steps):] r_summary, r_model = self.r_tbats(y_for_train, components) estimator = TBATS(**components) py_model = estimator.fit(y_for_train) self.assert_py_model_is_not_worse(y_for_train, r_summary, r_model, py_model) self.assert_forecast_is_not_worse(y_to_forecast, r_model, py_model)
def predict_orders(): PredictionOrder.objects.all().delete() orders = OrderAmount.objects.all() dates = [] vals = [] for order in orders: dates.append(datetime.datetime.utcfromtimestamp(int(order.date)).strftime('%Y-%m-%d %H:%M:%S')) vals.append(order.value) order_purchase = pd.DataFrame() order_purchase['Datetime'] = dates order_purchase['order_count'] = vals order_purchase.set_index(pd.DatetimeIndex(order_purchase['Datetime'])) estimator_trend = TBATS(seasonal_periods=(7,), use_trend=True) model_trend = estimator_trend.fit(order_purchase['order_count']) y_forecast_trend = model_trend.forecast(steps=30) print(y_forecast_trend) date = datetime.datetime.now() for val in y_forecast_trend: timestamp = time.mktime(datetime.datetime.strptime( str(date.year)+"-" +str(date.month)+"-" + str(date.day), "%Y-%m-%d").timetuple()) PredictionOrder(date=timestamp, value=val).save() date += datetime.timedelta(days=1)
def train(self, **kwargs): bat = TBATS( seasonal_periods=list(get_unique_N(season_list(self.train_df), 1)), use_arma_errors=False, use_trend=True, ) self.model = bat.fit(self.train_df)
def test_constant_model(self): y = [3.2] * 20 estimator = TBATS() model = estimator.fit(y) assert np.allclose([0.0] * len(y), model.resid) assert np.allclose(y, model.y_hat) assert np.allclose([3.2] * 5, model.forecast(steps=5))
def test_damped_trend(self): components = dict(use_arma_errors=False, use_trend=True, use_damped_trend=True, use_box_cox=False) alpha = 0.4 beta = 0.6 phi = 0.9 np.random.seed(987) T = 100 b = 0 b_long = 0.0 l = 1 y = [0] * T for t in range(0, T): d = np.random.normal(scale=1.0) y[t] = l + b + d l = l + b + alpha * d b = (1 - phi) * b_long + phi * b + beta * d r_summary, r_model = self.r_tbats(y, components) estimator = TBATS(**components) py_model = estimator.fit(y) self.compare_model(r_summary, r_model, py_model) self.compare_forecast(r_model, py_model)
def test_conf_int(X_y_linear_trend): HORIZON = 5 X, y = X_y_linear_trend model = TBATS(use_arma_errors=False, use_box_cox=False) model_wrapped = TBATSWrapper(use_arma_errors=False, use_box_cox=False, conf_int=True, conf_int_level=0.95) model = model.fit(y[:-HORIZON]) model_wrapped = model_wrapped.fit(X[:-HORIZON], y[:-HORIZON]) preds_orig, conf_int = model.forecast(steps=HORIZON, confidence_level=0.95) preds = model_wrapped.predict(X[-HORIZON:]) expected_result = (pd.DataFrame( preds_orig, index=X.index[-HORIZON:], columns=["TBATS"]).assign(TBATS_lower=conf_int["lower_bound"]).assign( TBATS_upper=conf_int["upper_bound"])) print("expected_result", expected_result) print("preds", preds) print("preds_orig", preds_orig) assert_frame_equal(preds, expected_result)
def test_fit_predict_trigonometric_seasonal(self, seasonal_periods, seasonal_harmonics, starting_values): """ The aim of the test is to check if model is correctly discovering trigonometric series with no noise """ T = 100 steps = 10 l = 3.1 x0 = [[l]] # construct trigonometric series y = [l] * T for period in range(0, len(seasonal_periods)): period_length = seasonal_periods[period] period_harmonics = seasonal_harmonics[period] s_harmonic = np.array(starting_values[period]) s = s_harmonic[:int(len(s_harmonic) / 2)] s_star = s_harmonic[int(len(s_harmonic) / 2):] x0.append(s_harmonic) lambdas = 2 * np.pi * (np.arange( 1, period_harmonics + 1)) / period_length # add periodic impact to y for t in range(0, T): y[t] += np.sum(s) s_prev = s s = s_prev * np.cos(lambdas) + s_star * np.sin(lambdas) s_star = -s_prev * np.sin(lambdas) + s_star * np.cos(lambdas) x0 = np.concatenate(x0) y_to_fit = y[:(T - steps)] y_to_predict = y[(T - steps):] # pytest does not work well with spawn multiprocessing method # https://github.com/pytest-dev/pytest/issues/958 estimator = TBATS(use_box_cox=False, use_arma_errors=False, use_trend=False, seasonal_periods=seasonal_periods, multiprocessing_start_method='fork') fitted_model = estimator.fit(y_to_fit) resid = fitted_model.resid # seasonal model should be discovered assert np.array_equal(seasonal_periods, fitted_model.params.components.seasonal_periods) # at least as many harmonics as in original series assert np.all( np.asarray(seasonal_harmonics) <= fitted_model.params.components.seasonal_harmonics) # sequence should be modelled properly assert np.allclose([0] * (T - steps), resid, atol=0.2) assert np.allclose(y_to_fit, fitted_model.y_hat, atol=0.2) # forecast should be close to actual y_predicted = fitted_model.forecast(steps=steps) assert np.allclose(y_to_predict, y_predicted, 0.2)
def train(self, **kwargs): bat = TBATS( seasonal_periods=[self.seasons], use_arma_errors=False, use_box_cox=True, use_trend=True, ) self.model = bat.fit(self.train_df)
def Tbat_first(): from tbats import TBATS, BATS dataset = pd.read_csv('count_people.csv') train = dataset data = [] for i in dataset['col']: data.append(int(i)) test=dataset[-5:] estimator = TBATS(seasonal_periods=(2, 2)) model = estimator.fit(train['col']) y_forecast = model.forecast(steps=5) for i in y_forecast: data.append(int(i)) setGraf12(data) dataset = pd.read_csv('money.csv') train = dataset data = [] for i in dataset['col']: data.append(int(i)) test = dataset[-5:] estimator = TBATS(seasonal_periods=(2, 2)) model = estimator.fit(train['col']) y_forecast = model.forecast(steps=5) for i in y_forecast: data.append(int(i)) print(int(i)) setGraf13(data) dataset = pd.read_csv('passagers.csv') train = dataset data = [] for i in dataset['col']: data.append(int(i)) test = dataset[-5:] estimator = TBATS(seasonal_periods=(2, 2)) model = estimator.fit(train['col']) y_forecast = model.forecast(steps=5) for i in y_forecast: data.append(int(i)) setGraf14(data) return render_template('index.html',first_graf_link = "/Tbat_first",second_graf_link ="/Tbat_second",title = "Tbat")
class Tbats(base_model.BaseModel): """ Trigonometric seasonality, Box-Cox transformation, ARMA errors, Trend and Seasonal components. """ def _tune(self, y, period, x=None, metric="mse", val_size=None, verbose=False): """ Tune hyperparameters of the model. :param y: pd.Series or 1-D np.array, time series to predict. :param period: Int or Str, the number of observations per cycle: 1 or "annual" for yearly data, 4 or "quarterly" for quarterly data, 7 or "daily" for daily data, 12 or "monthly" for monthly data, 24 or "hourly" for hourly data, 52 or "weekly" for weekly data. First-letter abbreviations of strings work as well ("a", "q", "d", "m", "h" and "w", respectively). Additional reference: https://robjhyndman.com/hyndsight/seasonal-periods/. :param x: not used for TBATS model :param metric: not used for TBATS model; model selection is based on the AIC. :param val_size: Int, the number of most recent observations to use as validation set for tuning. :param verbose: Boolean, True for printing additional info while tuning. :return: None """ self.period = data_utils.period_to_int(period) if type(period) == str else period self.model = TBATS(seasonal_periods=[period], show_warnings=False) self.params["tuned"] = True def fit(self, y, period, x=None, metric="mse", val_size=None, verbose=False): """ Build the model using best-tuned hyperparameter values. :param y: pd.Series or 1-D np.array, time series to predict. :param period: Int or Str, the number of observations per cycle: 1 or "annual" for yearly data, 4 or "quarterly" for quarterly data, 7 or "daily" for daily data, 12 or "monthly" for monthly data, 24 or "hourly" for hourly data, 52 or "weekly" for weekly data. First-letter abbreviations of strings work as well ("a", "q", "d", "m", "h" and "w", respectively). Additional reference: https://robjhyndman.com/hyndsight/seasonal-periods/. :param x: not used for TBATS model :param metric: not used for TBATS model; model selection is based on the AIC. :param val_size: not used for TBATS model; model selection is based on the AIC. :param verbose: Boolean, True for printing additional info while tuning. :return: None """ self.y = y self.name = "TBATS" self.key = "tbats" self._tune(y=y, period=period, x=x, metric=metric, val_size=val_size, verbose=verbose) self.model = self.model.fit(y) def predict(self, horizon, x=None): """ Predict future values of the time series using the fitted model. :param horizon: Int, the number of observations in the future to predict :param x: not used for TBATS model :return: 1-D np.array with predictions """ return self.model.forecast(steps=horizon)
def test_trend_and_seasonal(self): T = 30 steps = 5 phi = 0.99 period_length = 6 y = [0] * T b = b0 = 2.1 l = l0 = 1.2 s = s0 = 0 s_star = s0_star = 0.2 for t in range(0, T): y[t] = l + phi * b + s l = l + phi * b b = phi * b lam = 2 * np.pi / period_length s_prev = s s = s_prev * np.cos(lam) + s_star * np.sin(lam) s_star = -s_prev * np.sin(lam) + s_star * np.cos(lam) y_to_fit = y[:(T - steps)] y_to_predict = y[(T - steps):] # pytest does not work well with spawn multiprocessing method # https://github.com/pytest-dev/pytest/issues/958 estimator = TBATS(use_arma_errors=False, use_trend=True, use_damped_trend=True, use_box_cox=False, seasonal_periods=[period_length], multiprocessing_start_method='fork') fitted_model = estimator.fit(y_to_fit) resid = fitted_model.resid # seasonal model with 1 harmonic should be chosen assert np.array_equal( [1], fitted_model.params.components.seasonal_harmonics) assert np.array_equal([period_length], fitted_model.params.components.seasonal_periods) assert np.isclose(phi, fitted_model.params.phi, atol=0.01) # from some point residuals should be close to 0 assert np.allclose([0] * (T - steps - 10), resid[10:], atol=0.06) assert np.allclose(y_to_fit[10:], fitted_model.y_hat[10:], atol=0.06) # forecast should be close to actual sequence y_predicted = fitted_model.forecast(steps=steps) assert np.allclose(y_to_predict, y_predicted, atol=0.5)
def scoreCVforTBATS(series, loss_function): errors = [] tscv = TimeSeriesSplit(n_splits=3) for train, test in tscv.split(series): train_length = train.shape[0] estimator = TBATS(n_jobs=1) train_set = series.values[train] periodic_length = math.floor(train_length / 12) * 12 train_set = train_set[-periodic_length:] model = estimator.fit(train_set) predictions = model.forecast(len(test)) actual = series.values[test] error = loss_function(predictions, actual) errors.append(error) return errors, np.mean(np.array(errors))
def tbats(ts, ts_log, ts_log_diff, forget_last, periods): last_steps = len(ts_log) #60 * 24 new_steps = forget_last trainset = ts_log[:-forget_last] # Fit the model estimator = TBATS( seasonal_periods=periods, use_arma_errors=False, # shall try only models without ARMA use_box_cox=False # will not use Box-Cox ) model = estimator.fit(trainset) # In-sample plt.plot(ts_log.to_numpy()) plt.plot(model.y_hat, color='red') plt.title('TBATS RSS: %.4f' % sum( (model.y_hat - ts_log[:-forget_last].to_numpy())**2)) plt.show() # Forecast ahead predicted = model.forecast(steps=forget_last, confidence_level=0.95) plt.plot(range(0, last_steps), np.exp(ts_log).to_numpy(), color='blue') plt.plot(range(last_steps - forget_last, last_steps), np.exp(predicted[0]), color='orange') ci = predicted[1] ax = plt.gca() ax.fill_between(range(last_steps - forget_last, last_steps), np.exp(ci['lower_bound']), np.exp(ci['upper_bound']), color='b', alpha=.1) plt.ylim(-2, np.max(350)) plt.axvline(x=last_steps - forget_last, color='red') plt.title( f"TBATS prediction of travel time (MAE: %.4f)" % mean_absolute_error( np.exp(ts_log).to_numpy()[-forget_last:], np.exp(predicted[0]))) plt.show() print(model.summary())
def test_long_seasonality(self): np.random.seed(5434) T = 300 steps = 5 alpha = 0.1 period_1_length = 7 period_2_length = 30.5 y = [0] * T b = b0 = 2.1 l = l0 = 1.2 for t in range(0, T): d = np.random.normal() s1 = 2 * np.cos(2 * np.pi * t / period_1_length) s2 = 3 * np.sin(2 * np.pi * t / period_2_length) y[t] = l + b + s1 + s2 + d l = l + b + alpha * d components = dict( use_arma_errors=False, use_trend=True, use_damped_trend=False, use_box_cox=False, seasonal_periods=[period_1_length, period_2_length], ) y_for_train = y[:(T - steps)] y_to_forecast = y[(T - steps):] r_summary, r_model = self.r_tbats(y_for_train, components) estimator = TBATS(n_jobs=1, **components) py_model = estimator.fit(y_for_train) self.assert_py_model_is_not_worse(y_for_train, r_summary, r_model, py_model) self.assert_forecast_is_not_worse(y_to_forecast, r_model, py_model)
lgbm_ft_predictions = lgbm_ft_model.predict(LGBM_X_test) lgbm_ft_rmse= np.sqrt(mean_squared_error(lgbm_ft_predictions,LGBM_Y_test)) print("Light GBM's Score:",lgbm_ft_rmse) LGBM_result['predictions'] = lgbm_ft_predictions ' from tbats import TBATS, BATS print("\n\nTraining T-BATS ...") train = df['Energy'][:val_bound] test = df['Energy'][-48:].values estimator = TBATS(seasonal_periods=(12,24)) model = estimator.fit(train) TBATS_result['predictions'] = model.forecast(steps=48) print("TBats Performace",np.sqrt(mean_squared_error(TBATS_forecast,test))) #pip freeze > requirements.txt """**IMPLEMENTING LSTM (if they could help)**""" from sklearn.preprocessing import MinMaxScaler import keras from keras.models import Sequential from keras.layers import Dense, LSTM, Dropout, Bidirectional, Flatten, BatchNormalization from keras.optimizers import Adam from sklearn.metrics import mean_squared_error def plot_predictions(test,predicted):
def train_models( train, models, forecast_len, full_df=None, seasonality="infer_from_data", in_sample=None, freq=None, GPU=None, ): seasons = select_seasonality(train, seasonality) periods = select_seasonality(train, "periodocity") models_dict = {} for m in models: if in_sample: print( "Model {} is being trained for in sample prediction".format(m)) else: print("Model {} is being trained for out of sample prediction". format(m)) if m == "ARIMA": models_dict[m] = pm.auto_arima(train, seasonal=True, m=seasons) if m == "Prophet": if freq == "D": model = Prophet(daily_seasonality=True) else: model = Prophet() models_dict[m] = model.fit(prophet_dataframe(train)) if m == "HWAAS": try: models_dict[m] = ExponentialSmoothing( train, seasonal_periods=seasons, trend="add", seasonal="add", damped=True, ).fit(use_boxcox=True) except: models_dict[m] = ExponentialSmoothing( train, seasonal_periods=seasons, trend="add", seasonal="add", damped=True, ).fit(use_boxcox=False) if m == "HWAMS": try: models_dict[m] = ExponentialSmoothing( train, seasonal_periods=seasons, trend="add", seasonal="mul", damped=True, ).fit(use_boxcox=True) except: try: models_dict[m] = ExponentialSmoothing( train, seasonal_periods=seasons, trend="add", seasonal="mul", damped=True, ).fit(use_boxcox=False) except: models_dict[m] = ExponentialSmoothing( train, seasonal_periods=seasons, trend=None, seasonal="add").fit(use_boxcox=False) # if m=="HOLT": # models_dict["HOLT"] = Holt(train,exponential=True).fit() if m == "PYAF": model = autof() model.train( iInputDS=train.reset_index(), iTime="Date", iSignal="Target", iHorizon=len(train), ) # bad coding to have horison here models_dict[m] = model.forecast(iInputDS=train.reset_index(), iHorizon=forecast_len) if m == "Gluonts": freqed = pd.infer_freq(train.index) if freqed == "MS": freq = "M" else: freq = freqed estimator = DeepAREstimator( freq=freq, prediction_length=forecast_len, trainer=Trainer(epochs=6, ctx="gpu"), ) # use_feat_dynamic_real=True if GPU: models_dict[m] = estimator.train( training_data=gluonts_dataframe(train)) else: models_dict[m] = estimator.train( training_data=gluonts_dataframe(train)) if m == "NBEATS": if GPU: device = torch.device("cuda") else: device = torch.device("cpu") if os.path.isfile(CHECKPOINT_NAME): os.remove(CHECKPOINT_NAME) stepped = 35 batch_size = 10 if in_sample: x_train, y_train, x_test, y_test, net, norm_constant = nbeats_dataframe( full_df, forecast_len, in_sample=True, device=device) optimiser = optim.Adam(net.parameters()) data = data_generator(x_train, y_train, batch_size) # test_losses = [] for r in range(stepped): train_100_grad_steps(data, device, net, optimiser) # test_losses models_dict[m] = {} models_dict[m]["model"] = net models_dict[m]["x_test"] = x_test models_dict[m]["y_test"] = y_test models_dict[m]["constant"] = norm_constant else: # if out_sample train is df x_train, y_train, net, norm_constant = nbeats_dataframe( full_df, forecast_len, in_sample=False, device=device) batch_size = 10 # greater than 4 for viz optimiser = optim.Adam(net.parameters()) data = data_generator(x_train, y_train, batch_size) stepped = 5 # test_losses = [] for r in range(stepped): # _, forecast = net(torch.tensor(x_train, dtype=torch.float)) ### Not Used # if GPU: # p = forecast.detach().numpy() ### Not Used # else: # p = forecast.detach().numpy() ### Not Used train_100_grad_steps(data, device, net, optimiser) # test_losses models_dict[m] = {} models_dict[m]["model"] = net models_dict[m]["tuple"] = (x_train, y_train, net, norm_constant) # if m=="TBA": # bat = TBATS(use_arma_errors=False,use_box_cox=True) # models_dict[m] = bat.fit(train) if m == "TATS": bat = TBATS( seasonal_periods=list(get_unique_N(season_list(train), 1)), use_arma_errors=False, use_trend=True, ) models_dict[m] = bat.fit(train) if m == "TBAT": bat = TBATS(use_arma_errors=False, use_box_cox=True, use_trend=True) models_dict[m] = bat.fit(train) if m == "TBATS1": bat = TBATS( seasonal_periods=[seasons], use_arma_errors=False, use_box_cox=True, use_trend=True, ) models_dict[m] = bat.fit(train) if m == "TBATP1": bat = TBATS( seasonal_periods=[periods], use_arma_errors=False, use_box_cox=True, use_trend=True, ) models_dict[m] = bat.fit(train) if m == "TBATS2": bat = TBATS( seasonal_periods=list(get_unique_N(season_list(train), 2)), use_arma_errors=False, use_box_cox=True, use_trend=True, ) models_dict[m] = bat.fit(train) # if m=="ProphetGluonts": # freqed = pd.infer_freq(train.index) # if freqed=="MS": # freq= "M" # else: # freq= freqed # models_dict["ProphetGluonts"] = ProphetPredictor(freq=freq, prediction_length=forecast_len) #use_feat_dynamic_real=True # models_dict["ProphetGluonts"] = list(models_dict["ProphetGluonts"]) return models_dict, seasons
def tbats_model(timeseries, train_length, s, slow=True): """ Previsioni con il modello TBATS Parameters ---------- timeseries : Series la serie temporale. train_length : int la lunghezza del set di train (in rapporto alla serie completa). s : list l'array dei periodi stagionali. slow : bool se False velocizza il processo di scelta del modello finale (di default è True). Returns ------- None. """ # controllo se i dati sono settimanali o giornalieri if s.count(52) == 1: f = 'W-MON' else: f = 'D' # creo il set di train train = timeseries[pd.date_range( start=timeseries.index[0], end=timeseries.index[int(len(timeseries) * train_length) - 1], freq=f)] # adatto il modello ai dati if slow: estimator_slow = TBATS(seasonal_periods=s) model = estimator_slow.fit(train) else: estimator = TBATS( seasonal_periods=s, use_arma_errors=False, # shall try only models without ARMA use_box_cox=False # will not use Box-Cox ) model = estimator.fit(train) # stampo i parametri del modello print(model.summary()) # predizioni in-sample (model.y_hat = train - model.resid) preds = model.y_hat tbats_dates = pd.date_range(start=timeseries.index[0], end=timeseries.index[len(train) - 1], freq=f) tbats_ts = pd.Series(preds, index=tbats_dates) # predizioni out-of-sample fcast, conf_int = model.forecast(steps=len(timeseries) - len(train), confidence_level=0.95) fcast_dates = pd.date_range(start=timeseries.index[len(train)], periods=len(timeseries) - len(train), freq=f) ts_fcast = pd.Series(fcast, index=fcast_dates) ts_ci_min = pd.Series(conf_int['lower_bound'], index=fcast_dates) ts_ci_max = pd.Series(conf_int['upper_bound'], index=fcast_dates) # grafico del modello plt.figure(figsize=(40, 20), dpi=80) plt.title('Modello TBATS per {}'.format(timeseries.name)) ax = train.plot(label='Train set', color='black') tbats_ts.plot(ax=ax, label='In-sample predictions', color='green') plt.legend() plt.show() print('MAE (in sample)', np.mean(np.abs(model.resid))) # grafico delle previsioni plt.figure(figsize=(40, 20), dpi=80) plt.title('Forecasting con TBATS per {}'.format(timeseries.name)) ax = timeseries.plot(label='Observed', color='black') ts_fcast.plot(ax=ax, label='Out-of-sample forecasts', alpha=.7, color='red') ax.fill_between(fcast_dates, ts_ci_min, ts_ci_max, color='k', alpha=.2) plt.legend() plt.show() # metriche di errore errore = ts_fcast - timeseries errore.dropna(inplace=True) print('MSE=%.4f' % (errore**2).mean()) print('MAE=%.4f' % (abs(errore)).mean())
# Printing the values of fitted models. for key, ets_model in ets_fits.items(): print("Exponential Smooting", key, "\n") print(ets_model.summary()) ######################################################### # 3. TBATS # For more information see here: https://pypi.org/project/tbats/ from tbats import TBATS, BATS # Initialization and fit for TBATS. tbats_estimator = TBATS(seasonal_periods=[12]) tbats_model = tbats_estimator.fit(price) print(tbats_model.summary()) # Forecast for 30 years ahead. y_forecast, confidence_info = tbats_model.forecast(steps=PERIODS_AHEAD, confidence_level=0.95) index_of_fc = pd.date_range(price.index[-1], periods=PERIODS_AHEAD + 1, freq='MS')[1:] fitted_series = pd.Series(y_forecast, index=index_of_fc) lower_series = pd.Series(confidence_info['lower_bound'], index=index_of_fc) upper_series = pd.Series(confidence_info['upper_bound'], index=index_of_fc) plt.plot(price, label='Initial Data')
# 线性全局模型 linear_res = [] for idx, name in enumerate(data_dir): y_forecasted = model_fit_predict(name) linear_res.append(y_forecasted) linear_res = pd.DataFrame(list(linear_res)).T linear_res.columns = ['p_pv', 'p_uv', 'r_pv', 'r_uv'] # 传统分解模型 tbats_res = [] for idx, name in enumerate(data_dir): data = pd.read_csv('processed_data/' + name + '.csv')[[name]].T print(idx) data = np.array(data)[0] estimator = TBATS(seasonal_periods=params_0['seasonal_periods'][idx]) fitted_model = estimator.fit(data) y_forecasted = fitted_model.forecast(steps=7) y_forecasted = [x * params_0['after_rate'][idx] for x in y_forecasted] tbats_res.append(y_forecasted) tbats_res = pd.DataFrame(tbats_res).T tbats_res.columns = ['p_pv', 'p_uv', 'r_pv', 'r_uv'] # 模型融合 rate = params_0['rh_rate'] res = pd.DataFrame() res['p_pv'] = linear_res['p_pv'].values * rate + tbats_res[ 'p_pv'].values * (1 - rate) res['p_uv'] = linear_res['p_uv'].values * rate + tbats_res[ 'p_uv'].values * (1 - rate) res['r_pv'] = tbats_res['r_pv']
def model_tbats(train_df, steps, kwargs): estimator = TBATS(seasonal_periods=(7, 365.25), n_jobs=1) model = estimator.fit(train_df) return model.forecast(steps=steps)
def anomaly_uni_TBATS(lista_datos, num_forecast=10, desv_mse=2, train='True', name='test'): lista_puntos = np.arange(0, len(lista_datos), 1) df, df_train, df_test = create_train_test(lista_puntos, lista_datos) engine_output = {} actual_model = '' if (train): ########################################################################################## #############################################################################################3 periods = seasonal_options(df.valores) estimator = TBATS(seasonal_periods=periods[:2]) # Fit model print("Starting Anomaly Model Fitted") fitted_model = estimator.fit(df_train['valores']) print("Anomaly Model Fitted") # Forecast 14 steps ahead anomaly_forecasted = fitted_model.forecast( steps=len(df_test['valores'])) mae = mean_absolute_error(anomaly_forecasted, df_test['valores'].values) #mae = mean_absolute_error(y_forecasted,df_test['valores'].values) df_aler = pd.DataFrame(anomaly_forecasted, index=df_test.index, columns=['expected value']) df_aler['step'] = df['puntos'] df_aler['real_value'] = df_test['valores'] df_aler['mae'] = mean_absolute_error(anomaly_forecasted, df_test['valores'].values) df_aler['anomaly_score'] = abs(df_aler['expected value'] - df_aler['real_value']) / df_aler['mae'] df_aler_ult = df_aler[:5] df_aler_ult = df_aler_ult[ (df_aler_ult.index == df_aler.index.max()) | (df_aler_ult.index == ((df_aler.index.max()) - 1)) | (df_aler_ult.index == ((df_aler.index.max()) - 2)) | (df_aler_ult.index == ((df_aler.index.max()) - 3)) | (df_aler_ult.index == ((df_aler.index.max()) - 4))] if len(df_aler_ult) == 0: exists_anom_last_5 = 'FALSE' else: exists_anom_last_5 = 'TRUE' df_aler = df_aler[(df_aler['anomaly_score'] > 2)] max = df_aler['anomaly_score'].max() min = df_aler['anomaly_score'].min() df_aler['anomaly_score'] = (df_aler['anomaly_score'] - min) / (max - min) max = df_aler_ult['anomaly_score'].max() min = df_aler_ult['anomaly_score'].min() df_aler_ult['anomaly_score'] = (df_aler_ult['anomaly_score'] - min) / (max - min) # Fit model fitted_model = estimator.fit(df['valores']) print("Forecast Model Fitted") # Forecast num_forecast steps ahead y_forecasted = fitted_model.forecast(steps=num_forecast) df_future = pd.DataFrame(y_forecasted, columns=['value']) df_future['value'] = df_future.value.astype("float32") df_future['step'] = np.arange(len(lista_datos), len(lista_datos) + num_forecast, 1) #engine_output['rmse'] = rmse #engine_output['mse'] = mse engine_output['mae'] = mae engine_output['present_status'] = exists_anom_last_5 engine_output['present_alerts'] = df_aler_ult.fillna(0).to_dict( orient='record') engine_output['past'] = df_aler.fillna(0).to_dict(orient='record') engine_output['engine'] = 'TBATS' print("Only for future") engine_output['future'] = df_future.to_dict(orient='record') test_values = pd.DataFrame(anomaly_forecasted, index=df_test.index, columns=['expected value']) test_values['step'] = test_values.index #print ("debug de Holtwinters") #print (test_values) engine_output['debug'] = test_values.to_dict(orient='record') #print ("la prediccion es") #print (df_future) return engine_output
def train_tbats_model(train_set: pd.Series): tbats_estimator = TBATS(seasonal_periods=(7, 30.4)) tbats_model = tbats_estimator.fit(train_set) return tbats_model
def recon_hybrid(df, chunks, steps, seasonal1=96, seasonal2=672, short='ARIMA', long='median', weeks=6): ''' Parameters ---------- df : Pandas Dataframe Dataframe with only one column called "Flow" and and a DateTime index chunks : list List with the chunks of missing values, which are lists as well. This variable is returned by the function wrangler.data_wrangler. steps : int Maximum number of steps that are going to be forecasted. seasonal1 : int, optional First seasonality of the time series. The default is 96, considering flow values every 15 minutes during a day. seasonal2 : int, optional Second seasonality of the time series. It is not used by all the methods. The default is 672, considering flow values every 15 minutes during a week. short : string, optional Defines the method used to impute whenever the chunk of missing data is smaller than the forecasting horizon. The default is 'ARIMA'. long : string, optional Same as "short", but regarding chunks larger than the forecasting horizon. The default is 'median'. weeks : int, optional Number of weeks to consider when imputing missing values. The default is 6. Returns ------- dataframe : Pandas Dataframe Dataframe with imputed values according to the selected methods. elapsed_time : float Elapsed time to perform the imputing method. ''' start_time = time.time() dataframe = df.copy() for c in chunks: if len(c) > steps: for n in c: values = [] for k in range(weeks): values.append( dataframe.loc[n - pd.Timedelta(value=(k + 1) * 7, unit='D')]) if long == 'median': dataframe.loc[n] = np.median(values) elif long == 'mean': dataframe.loc[n] = np.mean(values) else: ts = dataframe.loc[:c[0]].iloc[:-1] if short == 'ARIMA': arima_model = auto_arima(ts) y_forecast = arima_model.predict(n_periods=len(c)) elif short == 'TBATS': estimator = TBATS(seasonal_periods=[seasonal1, seasonal2]) fitted_model = estimator.fit(ts) y_forecast = fitted_model.forecast(steps=len(c)) elif short == 'HW': estimator = ExponentialSmoothing(ts, trend='add', seasonal='add', seasonal_periods=seasonal1) fitted_model = estimator.fit() y_forecast = fitted_model.forecast(steps=len(c)) elif short == 'KNN': res = pred.forecast(ts, KNeighborsRegressor(), horizon=len(c), estac=seasonal1, prt=0) y_forecast = res[3] elif short == 'RF': res = pred.forecast(ts, RandomForestRegressor(), horizon=len(c), estac=seasonal1, prt=0) y_forecast = res[3] elif short == 'SVR': res = pred.forecast(ts, SVR(), horizon=len(c), estac=seasonal1, prt=0) y_forecast = res[3] elif short == 'GPR': res = pred.forecast(ts, GaussianProcessRegressor(), horizon=len(c), estac=seasonal1, prt=0) y_forecast = res[3] j = 0 for n in c: dataframe.loc[n] = y_forecast[j] j += 1 elapsed_time = time.time() - start_time return dataframe, elapsed_time
from pkg.TBATSmod import saveforecast, save_individual_graph #============================================================================== # Forecast Model appliaction and save. train_df = pd.read_excel('train.xlsx', index_col=0) y_forecast = {} lower_int = {} upper_int = {} if __name__ == '__main__': estimator = TBATS(seasonal_periods=[12]) for index in train_df.index.values: fitted_model = estimator.fit(train_df.loc[index]) y_forecasted, confidence_int = fitted_model.forecast( steps=12, confidence_level=0.90) y_forecast[index] = confidence_int['mean'] lower_int[index] = confidence_int['lower_bound'] upper_int[index] = confidence_int['upper_bound'] saveforecast( pd.DataFrame(y_forecast).T, pd.DataFrame(lower_int).T, pd.DataFrame(upper_int).T, 'forecast') #============================================================================== # Produce graphs based on the forecast
import numpy as np import pandas as pd from tbats import TBATS from data_process import data_process if __name__ == "__main__": process_data = data_process() tbats_res = [] p_pv = [] temp = np.array(process_data[0])[0] estimator = TBATS(seasonal_periods=[7]) fitted_model = estimator.fit(temp) y_1 = fitted_model.forecast(steps=7) temp = np.array(process_data[1])[0] estimator = TBATS(seasonal_periods=[7]) fitted_model = estimator.fit(temp) y_2 = fitted_model.forecast(steps=7) for i in range(5): p_pv.append(0.65 * y_1[i] + 0.35 * y_2[i]) p_6 = (temp[-2] + temp[-9]) * 0.5 p_7 = (temp[-1] + temp[-8]) * 0.5 p_pv.append(p_6) p_pv.append(p_7) tbats_res.append(p_pv) p_uv = [] temp = np.array(process_data[2])[0] estimator = TBATS(seasonal_periods=[7])
def train(self, **kwargs): bat = TBATS(use_arma_errors=False, use_box_cox=True, use_trend=True) self.model = bat.fit(self.train_df)
t = np.array(range(0, 160)) y = 5 * np.sin(t * 2 * np.pi / 14.5) + 5 * np.cos(t * 2 * np.pi / 30.25) + \ ((t / 20) ** 1.5 + np.random.normal(size=160) * t / 50) + 10 y = np.asarray(y) y_to_train = y[:(len(y) - steps)] y_to_predict = y[(len(y) - steps):] estimator = TBATS( seasonal_periods=[14.5, 30.25], use_arma_errors=None, # shall try models with and without ARMA use_box_cox=False, # will not use Box-Cox use_trend=None, # will try models with trend and without it use_damped_trend=None, # will try models with daming and without it show_warnings=False, # will not be showing any warnings for chosen model ) fitted_model = estimator.fit(y_to_train) # Warning messages from the model, if any for warning in fitted_model.warnings: print(warning) print('Did the model fit?', fitted_model.is_fitted) # Model may fail to fit in edge-case situations print('AIC', fitted_model.aic) # may be np.inf # Lets check components used in the model print('\n\nMODEL SUMMARY\n\n') params = fitted_model.params components = fitted_model.params.components print('Smoothing parameter', params.alpha)
import time start_time = time.time() df = pd.read_csv("DATASET.csv") df.Date = pd.to_datetime(df.Date, format="%d/%m/%y") df.Transakce = df['Demand'].astype(float) df = df.sort_index() y = df y_to_train = y.iloc[:(len(y) - 90)] y_to_test = y.iloc[(len(y) - 90):] from tbats import BATS, TBATS estimator = TBATS(seasonal_periods=(7, 365)) model = estimator.fit(y_to_train["Demand"]) y_forecast = model.forecast(steps=90) y_test = y_to_test["Demand"] y_test = y_test.reset_index() plt.plot(y_forecast, label="Pred", color="black", zorder=1) plt.plot(y_test["Demand"], label="True", color="lightgray", zorder=0) plt.legend(loc="upper right") plt.xlabel('Days', fontsize=10) plt.ylabel('Demand', fontsize=10) Y_true = y_test["Demand"] Y_pred = y_forecast from sklearn.metrics import mean_squared_error, mean_absolute_error
if __name__ == '__main__': from multiprocessing import Process, freeze_support from tbats import TBATS, BATS import pandas as pd import matplotlib.pyplot as plt from pmdarima import auto_arima path = '/home/sownbanana/PycharmProjects/Scaler/Data/task_events/task_events_processed/part-{}-of-00500.csv' df = pd.read_csv(path.format(str(2).zfill(5))) df.index = pd.to_datetime(df['time']) df['arrival_rq'].plot() plt.show() y = df['arrival_rq'] estimator = TBATS(seasonal_periods=(14, 30.5)) model = estimator.fit(y) y_forecast = model.forecast(steps=14) # arima_model = auto_arima(y, seasonal=True, m=1) # y_forecast = arima_model.predict(n_periods=1) # y_forecast.plot() plt.plot(y_forecast) plt.show()