Пример #1
0
def train_tbats(ts):
    """Trains TBATS model and returns model results.
    
    Args:
      ts (stax.TimeSeries): Time series to train model on.
    
    Returns:
      tuple: Model experiment results as `(model, test_pred, test_conf, test_metrics, OOS_pred, OOS_conf)`.
    """

    parameter_space = {
        "seasonal_period": [[6, 15], [12, 15], [6, 30], [12, 30]],
        "use_box_cox": [True, False],
        "use_arma_errors": [True, False],
    }

    results = []

    for sp in parameter_space["seasonal_period"]:
        for bx in parameter_space["use_box_cox"]:
            for ae in parameter_space["use_arma_errors"]:

                estimator = TBATS(use_box_cox=bx,
                                  use_arma_errors=ae,
                                  seasonal_periods=sp)
                horizon = len(ts.test.values)
                model = estimator.fit(ts.train.values)
                pred, conf = model.forecast(steps=horizon,
                                            confidence_level=0.95)
                mape = mean_absolute_error(ts.test.values,
                                           pred) / ts.test.values.mean()
                conf = list(zip(conf["lower_bound"], conf["upper_bound"]))
                results.append({
                    "mape": mape,
                    "model": model,
                    "pred": pred,
                    "conf": conf,
                    "parameters": {
                        "seasonal_period": sp,
                        "use_box_cox": bx,
                        "use_arma_errors": ae
                    }
                })

    best_results = sorted(results, key=lambda x: x["mape"])[0]
    model = best_results["model"]
    pred = best_results["pred"]
    conf = best_results["conf"]
    metrics = [{"mean_absolute_percent_error": best_results["mape"]}]

    # Get OOS forecasts for the future

    estimator = TBATS(
        use_box_cox=best_results["parameters"]["use_box_cox"],
        use_arma_errors=best_results["parameters"]["use_arma_errors"],
        seasonal_periods=best_results["parameters"]["seasonal_period"])
    oos_model = estimator.fit(ts.series)
    OOS_pred, OOS_conf = oos_model.forecast(steps=12, confidence_level=0.95)
    return model, pred, conf, metrics, OOS_pred, OOS_conf
Пример #2
0
    def test_trend_and_seasonal(self):
        np.random.seed(234234)
        T = 35
        steps = 5
        alpha = 0.1
        period_length = 6
        y = [0] * T
        b = b0 = 2.1
        l = l0 = 1.2
        for t in range(0, T):
            d = np.random.normal()
            y[t] = l + b + d + 2 * np.sin(2 * np.pi * t / period_length)
            l = l + b + alpha * d

        components = dict(
            use_arma_errors=False,
            use_trend=True,
            use_damped_trend=False,
            use_box_cox=False,
            seasonal_periods=[period_length],
        )

        y_for_train = y[:(T - steps)]
        y_to_forecast = y[(T - steps):]

        r_summary, r_model = self.r_tbats(y_for_train, components)

        estimator = TBATS(**components)
        py_model = estimator.fit(y_for_train)

        self.assert_py_model_is_not_worse(y_for_train, r_summary, r_model,
                                          py_model)
        self.assert_forecast_is_not_worse(y_to_forecast, r_model, py_model)
Пример #3
0
def predict_orders():
    PredictionOrder.objects.all().delete()
    orders  = OrderAmount.objects.all()

    dates = []
    vals = []
    for order in orders:
        dates.append(datetime.datetime.utcfromtimestamp(int(order.date)).strftime('%Y-%m-%d %H:%M:%S'))
        vals.append(order.value)

    order_purchase = pd.DataFrame()
    order_purchase['Datetime'] = dates
    order_purchase['order_count'] = vals
    order_purchase.set_index(pd.DatetimeIndex(order_purchase['Datetime']))
    estimator_trend = TBATS(seasonal_periods=(7,), use_trend=True)
    model_trend = estimator_trend.fit(order_purchase['order_count'])
    y_forecast_trend = model_trend.forecast(steps=30)
    print(y_forecast_trend)

    date = datetime.datetime.now()


    for val in y_forecast_trend:
        timestamp = time.mktime(datetime.datetime.strptime( str(date.year)+"-" +str(date.month)+"-" + str(date.day), "%Y-%m-%d").timetuple())
        PredictionOrder(date=timestamp, value=val).save()
        date += datetime.timedelta(days=1)
Пример #4
0
 def train(self, **kwargs):
     bat = TBATS(
         seasonal_periods=list(get_unique_N(season_list(self.train_df), 1)),
         use_arma_errors=False,
         use_trend=True,
     )
     self.model = bat.fit(self.train_df)
Пример #5
0
 def test_constant_model(self):
     y = [3.2] * 20
     estimator = TBATS()
     model = estimator.fit(y)
     assert np.allclose([0.0] * len(y), model.resid)
     assert np.allclose(y, model.y_hat)
     assert np.allclose([3.2] * 5, model.forecast(steps=5))
Пример #6
0
    def test_damped_trend(self):
        components = dict(use_arma_errors=False,
                          use_trend=True,
                          use_damped_trend=True,
                          use_box_cox=False)

        alpha = 0.4
        beta = 0.6
        phi = 0.9
        np.random.seed(987)
        T = 100

        b = 0
        b_long = 0.0
        l = 1
        y = [0] * T
        for t in range(0, T):
            d = np.random.normal(scale=1.0)
            y[t] = l + b + d
            l = l + b + alpha * d
            b = (1 - phi) * b_long + phi * b + beta * d

        r_summary, r_model = self.r_tbats(y, components)

        estimator = TBATS(**components)
        py_model = estimator.fit(y)

        self.compare_model(r_summary, r_model, py_model)
        self.compare_forecast(r_model, py_model)
Пример #7
0
def test_conf_int(X_y_linear_trend):

    HORIZON = 5
    X, y = X_y_linear_trend

    model = TBATS(use_arma_errors=False, use_box_cox=False)
    model_wrapped = TBATSWrapper(use_arma_errors=False,
                                 use_box_cox=False,
                                 conf_int=True,
                                 conf_int_level=0.95)
    model = model.fit(y[:-HORIZON])
    model_wrapped = model_wrapped.fit(X[:-HORIZON], y[:-HORIZON])

    preds_orig, conf_int = model.forecast(steps=HORIZON, confidence_level=0.95)
    preds = model_wrapped.predict(X[-HORIZON:])

    expected_result = (pd.DataFrame(
        preds_orig, index=X.index[-HORIZON:],
        columns=["TBATS"]).assign(TBATS_lower=conf_int["lower_bound"]).assign(
            TBATS_upper=conf_int["upper_bound"]))
    print("expected_result", expected_result)

    print("preds", preds)
    print("preds_orig", preds_orig)

    assert_frame_equal(preds, expected_result)
Пример #8
0
    def test_fit_predict_trigonometric_seasonal(self, seasonal_periods,
                                                seasonal_harmonics,
                                                starting_values):
        """
        The aim of the test is to check if model is correctly discovering trigonometric series with no noise
        """
        T = 100
        steps = 10
        l = 3.1
        x0 = [[l]]

        # construct trigonometric series
        y = [l] * T
        for period in range(0, len(seasonal_periods)):
            period_length = seasonal_periods[period]
            period_harmonics = seasonal_harmonics[period]
            s_harmonic = np.array(starting_values[period])
            s = s_harmonic[:int(len(s_harmonic) / 2)]
            s_star = s_harmonic[int(len(s_harmonic) / 2):]
            x0.append(s_harmonic)
            lambdas = 2 * np.pi * (np.arange(
                1, period_harmonics + 1)) / period_length
            # add periodic impact to y
            for t in range(0, T):
                y[t] += np.sum(s)
                s_prev = s
                s = s_prev * np.cos(lambdas) + s_star * np.sin(lambdas)
                s_star = -s_prev * np.sin(lambdas) + s_star * np.cos(lambdas)

        x0 = np.concatenate(x0)

        y_to_fit = y[:(T - steps)]
        y_to_predict = y[(T - steps):]

        # pytest does not work well with spawn multiprocessing method
        # https://github.com/pytest-dev/pytest/issues/958
        estimator = TBATS(use_box_cox=False,
                          use_arma_errors=False,
                          use_trend=False,
                          seasonal_periods=seasonal_periods,
                          multiprocessing_start_method='fork')
        fitted_model = estimator.fit(y_to_fit)
        resid = fitted_model.resid

        # seasonal model should be discovered
        assert np.array_equal(seasonal_periods,
                              fitted_model.params.components.seasonal_periods)
        # at least as many harmonics as in original series
        assert np.all(
            np.asarray(seasonal_harmonics) <=
            fitted_model.params.components.seasonal_harmonics)

        # sequence should be modelled properly
        assert np.allclose([0] * (T - steps), resid, atol=0.2)
        assert np.allclose(y_to_fit, fitted_model.y_hat, atol=0.2)

        # forecast should be close to actual
        y_predicted = fitted_model.forecast(steps=steps)
        assert np.allclose(y_to_predict, y_predicted, 0.2)
Пример #9
0
 def train(self, **kwargs):
     bat = TBATS(
         seasonal_periods=[self.seasons],
         use_arma_errors=False,
         use_box_cox=True,
         use_trend=True,
     )
     self.model = bat.fit(self.train_df)
Пример #10
0
def Tbat_first():
    from tbats import TBATS, BATS
    dataset = pd.read_csv('count_people.csv')
    train = dataset
    data = []
    for i in dataset['col']:
        data.append(int(i))
    test=dataset[-5:]
    estimator = TBATS(seasonal_periods=(2, 2))
    model = estimator.fit(train['col'])
    y_forecast = model.forecast(steps=5)
    for i in y_forecast:
        data.append(int(i))
    setGraf12(data)
    dataset = pd.read_csv('money.csv')
    train = dataset
    data = []
    for i in dataset['col']:
        data.append(int(i))
    test = dataset[-5:]
    estimator = TBATS(seasonal_periods=(2, 2))
    model = estimator.fit(train['col'])
    y_forecast = model.forecast(steps=5)
    for i in y_forecast:
        data.append(int(i))
        print(int(i))
    setGraf13(data)
    dataset = pd.read_csv('passagers.csv')
    train = dataset
    data = []
    for i in dataset['col']:
        data.append(int(i))
    test = dataset[-5:]
    estimator = TBATS(seasonal_periods=(2, 2))
    model = estimator.fit(train['col'])
    y_forecast = model.forecast(steps=5)
    for i in y_forecast:
        data.append(int(i))
    setGraf14(data)
    return render_template('index.html',first_graf_link = "/Tbat_first",second_graf_link ="/Tbat_second",title = "Tbat")
Пример #11
0
class Tbats(base_model.BaseModel):
    """
    Trigonometric seasonality, Box-Cox transformation, ARMA errors, Trend and Seasonal components.
    """

    def _tune(self, y, period, x=None, metric="mse", val_size=None, verbose=False):
        """
        Tune hyperparameters of the model.
        :param y: pd.Series or 1-D np.array, time series to predict.
        :param period: Int or Str, the number of observations per cycle: 1 or "annual" for yearly data, 4 or "quarterly"
        for quarterly data, 7 or "daily" for daily data, 12 or "monthly" for monthly data, 24 or "hourly" for hourly
        data, 52 or "weekly" for weekly data. First-letter abbreviations of strings work as well ("a", "q", "d", "m",
        "h" and "w", respectively). Additional reference: https://robjhyndman.com/hyndsight/seasonal-periods/.
        :param x: not used for TBATS model
        :param metric: not used for TBATS model; model selection is based on the AIC.
        :param val_size: Int, the number of most recent observations to use as validation set for tuning.
        :param verbose: Boolean, True for printing additional info while tuning.
        :return: None
        """
        self.period = data_utils.period_to_int(period) if type(period) == str else period
        self.model = TBATS(seasonal_periods=[period], show_warnings=False)
        self.params["tuned"] = True

    def fit(self, y, period, x=None, metric="mse", val_size=None, verbose=False):
        """
        Build the model using best-tuned hyperparameter values.
        :param y: pd.Series or 1-D np.array, time series to predict.
        :param period: Int or Str, the number of observations per cycle: 1 or "annual" for yearly data, 4 or "quarterly"
        for quarterly data, 7 or "daily" for daily data, 12 or "monthly" for monthly data, 24 or "hourly" for hourly
        data, 52 or "weekly" for weekly data. First-letter abbreviations of strings work as well ("a", "q", "d", "m",
        "h" and "w", respectively). Additional reference: https://robjhyndman.com/hyndsight/seasonal-periods/.
        :param x: not used for TBATS model
        :param metric: not used for TBATS model; model selection is based on the AIC.
        :param val_size: not used for TBATS model; model selection is based on the AIC.
        :param verbose: Boolean, True for printing additional info while tuning.
        :return: None
        """
        self.y = y
        self.name = "TBATS"
        self.key = "tbats"
        self._tune(y=y, period=period, x=x, metric=metric, val_size=val_size, verbose=verbose)
        self.model = self.model.fit(y)

    def predict(self, horizon, x=None):
        """
        Predict future values of the time series using the fitted model.
        :param horizon: Int, the number of observations in the future to predict
        :param x: not used for TBATS model
        :return: 1-D np.array with predictions
        """
        return self.model.forecast(steps=horizon)
Пример #12
0
    def test_trend_and_seasonal(self):
        T = 30
        steps = 5

        phi = 0.99
        period_length = 6
        y = [0] * T
        b = b0 = 2.1
        l = l0 = 1.2
        s = s0 = 0
        s_star = s0_star = 0.2
        for t in range(0, T):
            y[t] = l + phi * b + s
            l = l + phi * b
            b = phi * b
            lam = 2 * np.pi / period_length
            s_prev = s
            s = s_prev * np.cos(lam) + s_star * np.sin(lam)
            s_star = -s_prev * np.sin(lam) + s_star * np.cos(lam)

        y_to_fit = y[:(T - steps)]
        y_to_predict = y[(T - steps):]

        # pytest does not work well with spawn multiprocessing method
        # https://github.com/pytest-dev/pytest/issues/958
        estimator = TBATS(use_arma_errors=False,
                          use_trend=True,
                          use_damped_trend=True,
                          use_box_cox=False,
                          seasonal_periods=[period_length],
                          multiprocessing_start_method='fork')

        fitted_model = estimator.fit(y_to_fit)
        resid = fitted_model.resid

        # seasonal model with 1 harmonic should be chosen
        assert np.array_equal(
            [1], fitted_model.params.components.seasonal_harmonics)
        assert np.array_equal([period_length],
                              fitted_model.params.components.seasonal_periods)

        assert np.isclose(phi, fitted_model.params.phi, atol=0.01)

        # from some point residuals should be close to 0
        assert np.allclose([0] * (T - steps - 10), resid[10:], atol=0.06)
        assert np.allclose(y_to_fit[10:], fitted_model.y_hat[10:], atol=0.06)

        # forecast should be close to actual sequence
        y_predicted = fitted_model.forecast(steps=steps)
        assert np.allclose(y_to_predict, y_predicted, atol=0.5)
Пример #13
0
def scoreCVforTBATS(series, loss_function):
    errors = []
    tscv = TimeSeriesSplit(n_splits=3)
    for train, test in tscv.split(series):
        train_length = train.shape[0]
        estimator = TBATS(n_jobs=1)
        train_set = series.values[train]
        periodic_length = math.floor(train_length / 12) * 12
        train_set = train_set[-periodic_length:]
        model = estimator.fit(train_set)
        predictions = model.forecast(len(test))
        actual = series.values[test]
        error = loss_function(predictions, actual)
        errors.append(error)
    return errors, np.mean(np.array(errors))
Пример #14
0
def tbats(ts, ts_log, ts_log_diff, forget_last, periods):
    last_steps = len(ts_log)  #60 * 24
    new_steps = forget_last
    trainset = ts_log[:-forget_last]

    # Fit the model
    estimator = TBATS(
        seasonal_periods=periods,
        use_arma_errors=False,  # shall try only models without ARMA
        use_box_cox=False  # will not use Box-Cox
    )
    model = estimator.fit(trainset)
    # In-sample
    plt.plot(ts_log.to_numpy())
    plt.plot(model.y_hat, color='red')
    plt.title('TBATS RSS: %.4f' % sum(
        (model.y_hat - ts_log[:-forget_last].to_numpy())**2))
    plt.show()

    # Forecast ahead
    predicted = model.forecast(steps=forget_last, confidence_level=0.95)
    plt.plot(range(0, last_steps), np.exp(ts_log).to_numpy(), color='blue')
    plt.plot(range(last_steps - forget_last, last_steps),
             np.exp(predicted[0]),
             color='orange')
    ci = predicted[1]
    ax = plt.gca()
    ax.fill_between(range(last_steps - forget_last, last_steps),
                    np.exp(ci['lower_bound']),
                    np.exp(ci['upper_bound']),
                    color='b',
                    alpha=.1)
    plt.ylim(-2, np.max(350))
    plt.axvline(x=last_steps - forget_last, color='red')
    plt.title(
        f"TBATS prediction of travel time (MAE: %.4f)" % mean_absolute_error(
            np.exp(ts_log).to_numpy()[-forget_last:], np.exp(predicted[0])))
    plt.show()
    print(model.summary())
Пример #15
0
    def test_long_seasonality(self):
        np.random.seed(5434)
        T = 300
        steps = 5
        alpha = 0.1
        period_1_length = 7
        period_2_length = 30.5
        y = [0] * T
        b = b0 = 2.1
        l = l0 = 1.2
        for t in range(0, T):
            d = np.random.normal()
            s1 = 2 * np.cos(2 * np.pi * t / period_1_length)
            s2 = 3 * np.sin(2 * np.pi * t / period_2_length)
            y[t] = l + b + s1 + s2 + d
            l = l + b + alpha * d

        components = dict(
            use_arma_errors=False,
            use_trend=True,
            use_damped_trend=False,
            use_box_cox=False,
            seasonal_periods=[period_1_length, period_2_length],
        )

        y_for_train = y[:(T - steps)]

        y_to_forecast = y[(T - steps):]

        r_summary, r_model = self.r_tbats(y_for_train, components)

        estimator = TBATS(n_jobs=1, **components)
        py_model = estimator.fit(y_for_train)

        self.assert_py_model_is_not_worse(y_for_train, r_summary, r_model,
                                          py_model)
        self.assert_forecast_is_not_worse(y_to_forecast, r_model, py_model)
Пример #16
0
lgbm_ft_predictions = lgbm_ft_model.predict(LGBM_X_test)

lgbm_ft_rmse= np.sqrt(mean_squared_error(lgbm_ft_predictions,LGBM_Y_test))
print("Light GBM's Score:",lgbm_ft_rmse)
LGBM_result['predictions'] = lgbm_ft_predictions

'
from tbats import TBATS, BATS

print("\n\nTraining T-BATS ...")

train = df['Energy'][:val_bound]
test = df['Energy'][-48:].values

estimator = TBATS(seasonal_periods=(12,24))
model = estimator.fit(train)
TBATS_result['predictions'] = model.forecast(steps=48)
print("TBats Performace",np.sqrt(mean_squared_error(TBATS_forecast,test)))

#pip freeze > requirements.txt

"""**IMPLEMENTING LSTM (if they could help)**"""

from sklearn.preprocessing import MinMaxScaler
import keras
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout, Bidirectional, Flatten, BatchNormalization
from keras.optimizers import Adam
from sklearn.metrics import mean_squared_error

def plot_predictions(test,predicted):
Пример #17
0
def train_models(
    train,
    models,
    forecast_len,
    full_df=None,
    seasonality="infer_from_data",
    in_sample=None,
    freq=None,
    GPU=None,
):

    seasons = select_seasonality(train, seasonality)

    periods = select_seasonality(train, "periodocity")

    models_dict = {}
    for m in models:
        if in_sample:
            print(
                "Model {} is being trained for in sample prediction".format(m))
        else:
            print("Model {} is being trained for out of sample prediction".
                  format(m))
        if m == "ARIMA":
            models_dict[m] = pm.auto_arima(train, seasonal=True, m=seasons)
        if m == "Prophet":
            if freq == "D":
                model = Prophet(daily_seasonality=True)
            else:
                model = Prophet()
            models_dict[m] = model.fit(prophet_dataframe(train))
        if m == "HWAAS":
            try:
                models_dict[m] = ExponentialSmoothing(
                    train,
                    seasonal_periods=seasons,
                    trend="add",
                    seasonal="add",
                    damped=True,
                ).fit(use_boxcox=True)
            except:
                models_dict[m] = ExponentialSmoothing(
                    train,
                    seasonal_periods=seasons,
                    trend="add",
                    seasonal="add",
                    damped=True,
                ).fit(use_boxcox=False)
        if m == "HWAMS":
            try:
                models_dict[m] = ExponentialSmoothing(
                    train,
                    seasonal_periods=seasons,
                    trend="add",
                    seasonal="mul",
                    damped=True,
                ).fit(use_boxcox=True)
            except:
                try:
                    models_dict[m] = ExponentialSmoothing(
                        train,
                        seasonal_periods=seasons,
                        trend="add",
                        seasonal="mul",
                        damped=True,
                    ).fit(use_boxcox=False)
                except:
                    models_dict[m] = ExponentialSmoothing(
                        train,
                        seasonal_periods=seasons,
                        trend=None,
                        seasonal="add").fit(use_boxcox=False)

        # if m=="HOLT":
        #   models_dict["HOLT"] = Holt(train,exponential=True).fit()
        if m == "PYAF":
            model = autof()
            model.train(
                iInputDS=train.reset_index(),
                iTime="Date",
                iSignal="Target",
                iHorizon=len(train),
            )  # bad coding to have horison here
            models_dict[m] = model.forecast(iInputDS=train.reset_index(),
                                            iHorizon=forecast_len)
        if m == "Gluonts":
            freqed = pd.infer_freq(train.index)
            if freqed == "MS":
                freq = "M"
            else:
                freq = freqed
            estimator = DeepAREstimator(
                freq=freq,
                prediction_length=forecast_len,
                trainer=Trainer(epochs=6, ctx="gpu"),
            )  # use_feat_dynamic_real=True
            if GPU:
                models_dict[m] = estimator.train(
                    training_data=gluonts_dataframe(train))
            else:
                models_dict[m] = estimator.train(
                    training_data=gluonts_dataframe(train))
        if m == "NBEATS":

            if GPU:
                device = torch.device("cuda")
            else:
                device = torch.device("cpu")

            if os.path.isfile(CHECKPOINT_NAME):
                os.remove(CHECKPOINT_NAME)
            stepped = 35
            batch_size = 10
            if in_sample:
                x_train, y_train, x_test, y_test, net, norm_constant = nbeats_dataframe(
                    full_df, forecast_len, in_sample=True, device=device)
                optimiser = optim.Adam(net.parameters())
                data = data_generator(x_train, y_train, batch_size)
                # test_losses = []
                for r in range(stepped):

                    train_100_grad_steps(data, device, net,
                                         optimiser)  # test_losses
                models_dict[m] = {}
                models_dict[m]["model"] = net
                models_dict[m]["x_test"] = x_test
                models_dict[m]["y_test"] = y_test
                models_dict[m]["constant"] = norm_constant

            else:  # if out_sample train is df

                x_train, y_train, net, norm_constant = nbeats_dataframe(
                    full_df, forecast_len, in_sample=False, device=device)

                batch_size = 10  # greater than 4 for viz
                optimiser = optim.Adam(net.parameters())
                data = data_generator(x_train, y_train, batch_size)
                stepped = 5
                # test_losses = []
                for r in range(stepped):
                    # _, forecast = net(torch.tensor(x_train, dtype=torch.float)) ### Not Used
                    # if GPU:
                    #   p = forecast.detach().numpy()                               ### Not Used
                    # else:
                    #   p = forecast.detach().numpy()                               ### Not Used
                    train_100_grad_steps(data, device, net,
                                         optimiser)  # test_losses
                models_dict[m] = {}
                models_dict[m]["model"] = net
                models_dict[m]["tuple"] = (x_train, y_train, net,
                                           norm_constant)

        # if m=="TBA":
        #   bat = TBATS(use_arma_errors=False,use_box_cox=True)
        #   models_dict[m] = bat.fit(train)
        if m == "TATS":
            bat = TBATS(
                seasonal_periods=list(get_unique_N(season_list(train), 1)),
                use_arma_errors=False,
                use_trend=True,
            )
            models_dict[m] = bat.fit(train)
        if m == "TBAT":
            bat = TBATS(use_arma_errors=False,
                        use_box_cox=True,
                        use_trend=True)
            models_dict[m] = bat.fit(train)
        if m == "TBATS1":
            bat = TBATS(
                seasonal_periods=[seasons],
                use_arma_errors=False,
                use_box_cox=True,
                use_trend=True,
            )
            models_dict[m] = bat.fit(train)
        if m == "TBATP1":
            bat = TBATS(
                seasonal_periods=[periods],
                use_arma_errors=False,
                use_box_cox=True,
                use_trend=True,
            )
            models_dict[m] = bat.fit(train)
        if m == "TBATS2":
            bat = TBATS(
                seasonal_periods=list(get_unique_N(season_list(train), 2)),
                use_arma_errors=False,
                use_box_cox=True,
                use_trend=True,
            )
            models_dict[m] = bat.fit(train)

        # if m=="ProphetGluonts":
        #   freqed = pd.infer_freq(train.index)
        #   if freqed=="MS":
        #     freq= "M"
        #   else:
        #     freq= freqed
        #   models_dict["ProphetGluonts"] = ProphetPredictor(freq=freq, prediction_length=forecast_len) #use_feat_dynamic_real=True
        #   models_dict["ProphetGluonts"] = list(models_dict["ProphetGluonts"])

    return models_dict, seasons
Пример #18
0
def tbats_model(timeseries, train_length, s, slow=True):
    """
    Previsioni con il modello TBATS

    Parameters
    ----------
    timeseries : Series
        la serie temporale.
    train_length : int
        la lunghezza del set di train (in rapporto alla serie completa).
    s : list
        l'array dei periodi stagionali.
    slow : bool
        se False velocizza il processo di scelta del modello finale (di default è True).

    Returns
    -------
    None.

    """

    # controllo se i dati sono settimanali o giornalieri
    if s.count(52) == 1:
        f = 'W-MON'
    else:
        f = 'D'

    # creo il set di train
    train = timeseries[pd.date_range(
        start=timeseries.index[0],
        end=timeseries.index[int(len(timeseries) * train_length) - 1],
        freq=f)]

    # adatto il modello ai dati
    if slow:
        estimator_slow = TBATS(seasonal_periods=s)
        model = estimator_slow.fit(train)
    else:
        estimator = TBATS(
            seasonal_periods=s,
            use_arma_errors=False,  # shall try only models without ARMA
            use_box_cox=False  # will not use Box-Cox
        )
        model = estimator.fit(train)

    # stampo i parametri del modello
    print(model.summary())

    # predizioni in-sample (model.y_hat = train - model.resid)
    preds = model.y_hat
    tbats_dates = pd.date_range(start=timeseries.index[0],
                                end=timeseries.index[len(train) - 1],
                                freq=f)
    tbats_ts = pd.Series(preds, index=tbats_dates)

    # predizioni out-of-sample
    fcast, conf_int = model.forecast(steps=len(timeseries) - len(train),
                                     confidence_level=0.95)
    fcast_dates = pd.date_range(start=timeseries.index[len(train)],
                                periods=len(timeseries) - len(train),
                                freq=f)
    ts_fcast = pd.Series(fcast, index=fcast_dates)
    ts_ci_min = pd.Series(conf_int['lower_bound'], index=fcast_dates)
    ts_ci_max = pd.Series(conf_int['upper_bound'], index=fcast_dates)

    # grafico del modello
    plt.figure(figsize=(40, 20), dpi=80)
    plt.title('Modello TBATS per {}'.format(timeseries.name))
    ax = train.plot(label='Train set', color='black')
    tbats_ts.plot(ax=ax, label='In-sample predictions', color='green')
    plt.legend()
    plt.show()
    print('MAE (in sample)', np.mean(np.abs(model.resid)))

    # grafico delle previsioni
    plt.figure(figsize=(40, 20), dpi=80)
    plt.title('Forecasting con TBATS per {}'.format(timeseries.name))
    ax = timeseries.plot(label='Observed', color='black')
    ts_fcast.plot(ax=ax,
                  label='Out-of-sample forecasts',
                  alpha=.7,
                  color='red')
    ax.fill_between(fcast_dates, ts_ci_min, ts_ci_max, color='k', alpha=.2)
    plt.legend()
    plt.show()

    # metriche di errore
    errore = ts_fcast - timeseries
    errore.dropna(inplace=True)
    print('MSE=%.4f' % (errore**2).mean())
    print('MAE=%.4f' % (abs(errore)).mean())
Пример #19
0
# Printing the values of fitted models.

for key, ets_model in ets_fits.items():
    print("Exponential Smooting", key, "\n")
    print(ets_model.summary())

#########################################################
# 3. TBATS
# For more information see here: https://pypi.org/project/tbats/

from tbats import TBATS, BATS

# Initialization and fit for TBATS.
tbats_estimator = TBATS(seasonal_periods=[12])
tbats_model = tbats_estimator.fit(price)

print(tbats_model.summary())

# Forecast for 30 years ahead.
y_forecast, confidence_info = tbats_model.forecast(steps=PERIODS_AHEAD,
                                                   confidence_level=0.95)

index_of_fc = pd.date_range(price.index[-1],
                            periods=PERIODS_AHEAD + 1,
                            freq='MS')[1:]
fitted_series = pd.Series(y_forecast, index=index_of_fc)
lower_series = pd.Series(confidence_info['lower_bound'], index=index_of_fc)
upper_series = pd.Series(confidence_info['upper_bound'], index=index_of_fc)

plt.plot(price, label='Initial Data')
Пример #20
0
    # 线性全局模型
    linear_res = []
    for idx, name in enumerate(data_dir):
        y_forecasted = model_fit_predict(name)
        linear_res.append(y_forecasted)
    linear_res = pd.DataFrame(list(linear_res)).T
    linear_res.columns = ['p_pv', 'p_uv', 'r_pv', 'r_uv']

    # 传统分解模型
    tbats_res = []
    for idx, name in enumerate(data_dir):
        data = pd.read_csv('processed_data/' + name + '.csv')[[name]].T
        print(idx)
        data = np.array(data)[0]
        estimator = TBATS(seasonal_periods=params_0['seasonal_periods'][idx])
        fitted_model = estimator.fit(data)
        y_forecasted = fitted_model.forecast(steps=7)
        y_forecasted = [x * params_0['after_rate'][idx] for x in y_forecasted]
        tbats_res.append(y_forecasted)

    tbats_res = pd.DataFrame(tbats_res).T
    tbats_res.columns = ['p_pv', 'p_uv', 'r_pv', 'r_uv']

    # 模型融合
    rate = params_0['rh_rate']
    res = pd.DataFrame()
    res['p_pv'] = linear_res['p_pv'].values * rate + tbats_res[
        'p_pv'].values * (1 - rate)
    res['p_uv'] = linear_res['p_uv'].values * rate + tbats_res[
        'p_uv'].values * (1 - rate)
    res['r_pv'] = tbats_res['r_pv']
Пример #21
0
def model_tbats(train_df, steps, kwargs):
    estimator = TBATS(seasonal_periods=(7, 365.25), n_jobs=1)
    model = estimator.fit(train_df)
    return model.forecast(steps=steps)
Пример #22
0
def anomaly_uni_TBATS(lista_datos,
                      num_forecast=10,
                      desv_mse=2,
                      train='True',
                      name='test'):

    lista_puntos = np.arange(0, len(lista_datos), 1)

    df, df_train, df_test = create_train_test(lista_puntos, lista_datos)

    engine_output = {}

    actual_model = ''

    if (train):

        ##########################################################################################
        #############################################################################################3
        periods = seasonal_options(df.valores)
        estimator = TBATS(seasonal_periods=periods[:2])
        # Fit model
        print("Starting Anomaly Model Fitted")

        fitted_model = estimator.fit(df_train['valores'])
        print("Anomaly Model Fitted")

        # Forecast 14 steps ahead
        anomaly_forecasted = fitted_model.forecast(
            steps=len(df_test['valores']))

        mae = mean_absolute_error(anomaly_forecasted,
                                  df_test['valores'].values)

        #mae = mean_absolute_error(y_forecasted,df_test['valores'].values)

        df_aler = pd.DataFrame(anomaly_forecasted,
                               index=df_test.index,
                               columns=['expected value'])
        df_aler['step'] = df['puntos']
        df_aler['real_value'] = df_test['valores']
        df_aler['mae'] = mean_absolute_error(anomaly_forecasted,
                                             df_test['valores'].values)
        df_aler['anomaly_score'] = abs(df_aler['expected value'] -
                                       df_aler['real_value']) / df_aler['mae']
        df_aler_ult = df_aler[:5]
        df_aler_ult = df_aler_ult[
            (df_aler_ult.index == df_aler.index.max()) |
            (df_aler_ult.index == ((df_aler.index.max()) - 1))
            | (df_aler_ult.index == ((df_aler.index.max()) - 2)) |
            (df_aler_ult.index == ((df_aler.index.max()) - 3))
            | (df_aler_ult.index == ((df_aler.index.max()) - 4))]
        if len(df_aler_ult) == 0:
            exists_anom_last_5 = 'FALSE'
        else:
            exists_anom_last_5 = 'TRUE'

        df_aler = df_aler[(df_aler['anomaly_score'] > 2)]
        max = df_aler['anomaly_score'].max()
        min = df_aler['anomaly_score'].min()

        df_aler['anomaly_score'] = (df_aler['anomaly_score'] - min) / (max -
                                                                       min)

        max = df_aler_ult['anomaly_score'].max()
        min = df_aler_ult['anomaly_score'].min()

        df_aler_ult['anomaly_score'] = (df_aler_ult['anomaly_score'] -
                                        min) / (max - min)

        # Fit model
        fitted_model = estimator.fit(df['valores'])
        print("Forecast Model Fitted")

        # Forecast num_forecast steps ahead
        y_forecasted = fitted_model.forecast(steps=num_forecast)

        df_future = pd.DataFrame(y_forecasted, columns=['value'])
        df_future['value'] = df_future.value.astype("float32")
        df_future['step'] = np.arange(len(lista_datos),
                                      len(lista_datos) + num_forecast, 1)

        #engine_output['rmse'] = rmse
        #engine_output['mse'] = mse
        engine_output['mae'] = mae
        engine_output['present_status'] = exists_anom_last_5
        engine_output['present_alerts'] = df_aler_ult.fillna(0).to_dict(
            orient='record')
        engine_output['past'] = df_aler.fillna(0).to_dict(orient='record')
        engine_output['engine'] = 'TBATS'
        print("Only for future")

        engine_output['future'] = df_future.to_dict(orient='record')
        test_values = pd.DataFrame(anomaly_forecasted,
                                   index=df_test.index,
                                   columns=['expected value'])

        test_values['step'] = test_values.index
        #print ("debug de Holtwinters")
        #print (test_values)
        engine_output['debug'] = test_values.to_dict(orient='record')

        #print ("la prediccion es")
        #print (df_future)

        return engine_output
def train_tbats_model(train_set: pd.Series):
    tbats_estimator = TBATS(seasonal_periods=(7, 30.4))
    tbats_model = tbats_estimator.fit(train_set)
    return tbats_model
Пример #24
0
def recon_hybrid(df,
                 chunks,
                 steps,
                 seasonal1=96,
                 seasonal2=672,
                 short='ARIMA',
                 long='median',
                 weeks=6):
    '''
    Parameters
    ----------
    df : Pandas Dataframe
        Dataframe with only one column called "Flow" and and a DateTime index
    chunks : list
        List with the chunks of missing values, which are lists as well. This variable is returned by 
        the function wrangler.data_wrangler.
    steps : int
        Maximum number of steps that are going to be forecasted.
    seasonal1 : int, optional
        First seasonality of the time series. The default is 96, considering flow values every 15 minutes during a day.
    seasonal2 : int, optional
        Second seasonality of the time series. It is not used by all the methods. 
        The default is 672, considering flow values every 15 minutes during a week.
    short : string, optional
        Defines the method used to impute whenever the chunk of missing data is smaller than the forecasting horizon. 
        The default is 'ARIMA'.
    long : string, optional
        Same as "short", but regarding chunks larger than the forecasting horizon. The default is 'median'.
    weeks : int, optional
        Number of weeks to consider when imputing missing values. The default is 6.

    Returns
    -------
    dataframe : Pandas Dataframe
        Dataframe with imputed values according to the selected methods.
    elapsed_time : float
        Elapsed time to perform the imputing method.
    '''
    start_time = time.time()
    dataframe = df.copy()
    for c in chunks:
        if len(c) > steps:
            for n in c:
                values = []
                for k in range(weeks):
                    values.append(
                        dataframe.loc[n -
                                      pd.Timedelta(value=(k + 1) *
                                                   7, unit='D')])

                if long == 'median':
                    dataframe.loc[n] = np.median(values)
                elif long == 'mean':
                    dataframe.loc[n] = np.mean(values)

        else:
            ts = dataframe.loc[:c[0]].iloc[:-1]
            if short == 'ARIMA':
                arima_model = auto_arima(ts)
                y_forecast = arima_model.predict(n_periods=len(c))

            elif short == 'TBATS':
                estimator = TBATS(seasonal_periods=[seasonal1, seasonal2])
                fitted_model = estimator.fit(ts)
                y_forecast = fitted_model.forecast(steps=len(c))

            elif short == 'HW':
                estimator = ExponentialSmoothing(ts,
                                                 trend='add',
                                                 seasonal='add',
                                                 seasonal_periods=seasonal1)
                fitted_model = estimator.fit()
                y_forecast = fitted_model.forecast(steps=len(c))
            elif short == 'KNN':
                res = pred.forecast(ts,
                                    KNeighborsRegressor(),
                                    horizon=len(c),
                                    estac=seasonal1,
                                    prt=0)
                y_forecast = res[3]
            elif short == 'RF':
                res = pred.forecast(ts,
                                    RandomForestRegressor(),
                                    horizon=len(c),
                                    estac=seasonal1,
                                    prt=0)
                y_forecast = res[3]
            elif short == 'SVR':
                res = pred.forecast(ts,
                                    SVR(),
                                    horizon=len(c),
                                    estac=seasonal1,
                                    prt=0)
                y_forecast = res[3]
            elif short == 'GPR':
                res = pred.forecast(ts,
                                    GaussianProcessRegressor(),
                                    horizon=len(c),
                                    estac=seasonal1,
                                    prt=0)
                y_forecast = res[3]

            j = 0
            for n in c:
                dataframe.loc[n] = y_forecast[j]
                j += 1
    elapsed_time = time.time() - start_time
    return dataframe, elapsed_time
Пример #25
0
from pkg.TBATSmod import saveforecast, save_individual_graph

#==============================================================================
# Forecast Model appliaction and save.

train_df = pd.read_excel('train.xlsx', index_col=0)

y_forecast = {}
lower_int = {}
upper_int = {}

if __name__ == '__main__':
    estimator = TBATS(seasonal_periods=[12])

    for index in train_df.index.values:
        fitted_model = estimator.fit(train_df.loc[index])
        y_forecasted, confidence_int = fitted_model.forecast(
            steps=12, confidence_level=0.90)

        y_forecast[index] = confidence_int['mean']
        lower_int[index] = confidence_int['lower_bound']
        upper_int[index] = confidence_int['upper_bound']

saveforecast(
    pd.DataFrame(y_forecast).T,
    pd.DataFrame(lower_int).T,
    pd.DataFrame(upper_int).T, 'forecast')

#==============================================================================
# Produce graphs based on the forecast
Пример #26
0
import numpy as np
import pandas as pd
from tbats import TBATS
from data_process import data_process

if __name__ == "__main__":

    process_data = data_process()

    tbats_res = []

    p_pv = []
    temp = np.array(process_data[0])[0]
    estimator = TBATS(seasonal_periods=[7])
    fitted_model = estimator.fit(temp)
    y_1 = fitted_model.forecast(steps=7)
    temp = np.array(process_data[1])[0]
    estimator = TBATS(seasonal_periods=[7])
    fitted_model = estimator.fit(temp)
    y_2 = fitted_model.forecast(steps=7)
    for i in range(5):
        p_pv.append(0.65 * y_1[i] + 0.35 * y_2[i])
    p_6 = (temp[-2] + temp[-9]) * 0.5
    p_7 = (temp[-1] + temp[-8]) * 0.5
    p_pv.append(p_6)
    p_pv.append(p_7)
    tbats_res.append(p_pv)

    p_uv = []
    temp = np.array(process_data[2])[0]
    estimator = TBATS(seasonal_periods=[7])
Пример #27
0
 def train(self, **kwargs):
     bat = TBATS(use_arma_errors=False, use_box_cox=True, use_trend=True)
     self.model = bat.fit(self.train_df)
Пример #28
0
    t = np.array(range(0, 160))
    y = 5 * np.sin(t * 2 * np.pi / 14.5) + 5 * np.cos(t * 2 * np.pi / 30.25) + \
        ((t / 20) ** 1.5 + np.random.normal(size=160) * t / 50) + 10
    y = np.asarray(y)
    y_to_train = y[:(len(y) - steps)]
    y_to_predict = y[(len(y) - steps):]

    estimator = TBATS(
        seasonal_periods=[14.5, 30.25],
        use_arma_errors=None,  # shall try models with and without ARMA
        use_box_cox=False,  # will not use Box-Cox
        use_trend=None,  # will try models with trend and without it
        use_damped_trend=None,  # will try models with daming and without it
        show_warnings=False,  # will not be showing any warnings for chosen model
    )
    fitted_model = estimator.fit(y_to_train)

    # Warning messages from the model, if any
    for warning in fitted_model.warnings:
        print(warning)

    print('Did the model fit?', fitted_model.is_fitted)  # Model may fail to fit in edge-case situations
    print('AIC', fitted_model.aic)  # may be np.inf

    # Lets check components used in the model
    print('\n\nMODEL SUMMARY\n\n')
    params = fitted_model.params
    components = fitted_model.params.components

    print('Smoothing parameter', params.alpha)
Пример #29
0
import time
start_time = time.time()

df = pd.read_csv("DATASET.csv")
df.Date = pd.to_datetime(df.Date, format="%d/%m/%y")
df.Transakce = df['Demand'].astype(float)
df = df.sort_index()
y = df

y_to_train = y.iloc[:(len(y) - 90)]
y_to_test = y.iloc[(len(y) - 90):]

from tbats import BATS, TBATS

estimator = TBATS(seasonal_periods=(7, 365))
model = estimator.fit(y_to_train["Demand"])
y_forecast = model.forecast(steps=90)

y_test = y_to_test["Demand"]
y_test = y_test.reset_index()

plt.plot(y_forecast, label="Pred", color="black", zorder=1)
plt.plot(y_test["Demand"], label="True", color="lightgray", zorder=0)
plt.legend(loc="upper right")
plt.xlabel('Days', fontsize=10)
plt.ylabel('Demand', fontsize=10)

Y_true = y_test["Demand"]
Y_pred = y_forecast

from sklearn.metrics import mean_squared_error, mean_absolute_error
Пример #30
0
if __name__ == '__main__':
    from multiprocessing import Process, freeze_support
    from tbats import TBATS, BATS
    import pandas as pd
    import matplotlib.pyplot as plt
    from pmdarima import auto_arima

    path = '/home/sownbanana/PycharmProjects/Scaler/Data/task_events/task_events_processed/part-{}-of-00500.csv'
    df = pd.read_csv(path.format(str(2).zfill(5)))
    df.index = pd.to_datetime(df['time'])
    df['arrival_rq'].plot()
    plt.show()

    y = df['arrival_rq']
    estimator = TBATS(seasonal_periods=(14, 30.5))
    model = estimator.fit(y)
    y_forecast = model.forecast(steps=14)

    # arima_model = auto_arima(y, seasonal=True, m=1)
    # y_forecast = arima_model.predict(n_periods=1)

    # y_forecast.plot()
    plt.plot(y_forecast)
    plt.show()