Exemplo n.º 1
0
def train_models(
    train,
    models,
    forecast_len,
    full_df=None,
    seasonality="infer_from_data",
    in_sample=None,
    freq=None,
    GPU=None,
):

    seasons = select_seasonality(train, seasonality)

    periods = select_seasonality(train, "periodocity")

    models_dict = {}
    for m in models:
        if in_sample:
            print(
                "Model {} is being trained for in sample prediction".format(m))
        else:
            print("Model {} is being trained for out of sample prediction".
                  format(m))
        if m == "ARIMA":
            models_dict[m] = pm.auto_arima(train, seasonal=True, m=seasons)
        if m == "Prophet":
            if freq == "D":
                model = Prophet(daily_seasonality=True)
            else:
                model = Prophet()
            models_dict[m] = model.fit(prophet_dataframe(train))
        if m == "HWAAS":
            try:
                models_dict[m] = ExponentialSmoothing(
                    train,
                    seasonal_periods=seasons,
                    trend="add",
                    seasonal="add",
                    damped=True,
                ).fit(use_boxcox=True)
            except:
                models_dict[m] = ExponentialSmoothing(
                    train,
                    seasonal_periods=seasons,
                    trend="add",
                    seasonal="add",
                    damped=True,
                ).fit(use_boxcox=False)
        if m == "HWAMS":
            try:
                models_dict[m] = ExponentialSmoothing(
                    train,
                    seasonal_periods=seasons,
                    trend="add",
                    seasonal="mul",
                    damped=True,
                ).fit(use_boxcox=True)
            except:
                try:
                    models_dict[m] = ExponentialSmoothing(
                        train,
                        seasonal_periods=seasons,
                        trend="add",
                        seasonal="mul",
                        damped=True,
                    ).fit(use_boxcox=False)
                except:
                    models_dict[m] = ExponentialSmoothing(
                        train,
                        seasonal_periods=seasons,
                        trend=None,
                        seasonal="add").fit(use_boxcox=False)

        # if m=="HOLT":
        #   models_dict["HOLT"] = Holt(train,exponential=True).fit()
        if m == "PYAF":
            model = autof()
            model.train(
                iInputDS=train.reset_index(),
                iTime="Date",
                iSignal="Target",
                iHorizon=len(train),
            )  # bad coding to have horison here
            models_dict[m] = model.forecast(iInputDS=train.reset_index(),
                                            iHorizon=forecast_len)
        if m == "Gluonts":
            freqed = pd.infer_freq(train.index)
            if freqed == "MS":
                freq = "M"
            else:
                freq = freqed
            estimator = DeepAREstimator(
                freq=freq,
                prediction_length=forecast_len,
                trainer=Trainer(epochs=6, ctx="gpu"),
            )  # use_feat_dynamic_real=True
            if GPU:
                models_dict[m] = estimator.train(
                    training_data=gluonts_dataframe(train))
            else:
                models_dict[m] = estimator.train(
                    training_data=gluonts_dataframe(train))
        if m == "NBEATS":

            if GPU:
                device = torch.device("cuda")
            else:
                device = torch.device("cpu")

            if os.path.isfile(CHECKPOINT_NAME):
                os.remove(CHECKPOINT_NAME)
            stepped = 35
            batch_size = 10
            if in_sample:
                x_train, y_train, x_test, y_test, net, norm_constant = nbeats_dataframe(
                    full_df, forecast_len, in_sample=True, device=device)
                optimiser = optim.Adam(net.parameters())
                data = data_generator(x_train, y_train, batch_size)
                # test_losses = []
                for r in range(stepped):

                    train_100_grad_steps(data, device, net,
                                         optimiser)  # test_losses
                models_dict[m] = {}
                models_dict[m]["model"] = net
                models_dict[m]["x_test"] = x_test
                models_dict[m]["y_test"] = y_test
                models_dict[m]["constant"] = norm_constant

            else:  # if out_sample train is df

                x_train, y_train, net, norm_constant = nbeats_dataframe(
                    full_df, forecast_len, in_sample=False, device=device)

                batch_size = 10  # greater than 4 for viz
                optimiser = optim.Adam(net.parameters())
                data = data_generator(x_train, y_train, batch_size)
                stepped = 5
                # test_losses = []
                for r in range(stepped):
                    # _, forecast = net(torch.tensor(x_train, dtype=torch.float)) ### Not Used
                    # if GPU:
                    #   p = forecast.detach().numpy()                               ### Not Used
                    # else:
                    #   p = forecast.detach().numpy()                               ### Not Used
                    train_100_grad_steps(data, device, net,
                                         optimiser)  # test_losses
                models_dict[m] = {}
                models_dict[m]["model"] = net
                models_dict[m]["tuple"] = (x_train, y_train, net,
                                           norm_constant)

        # if m=="TBA":
        #   bat = TBATS(use_arma_errors=False,use_box_cox=True)
        #   models_dict[m] = bat.fit(train)
        if m == "TATS":
            bat = TBATS(
                seasonal_periods=list(get_unique_N(season_list(train), 1)),
                use_arma_errors=False,
                use_trend=True,
            )
            models_dict[m] = bat.fit(train)
        if m == "TBAT":
            bat = TBATS(use_arma_errors=False,
                        use_box_cox=True,
                        use_trend=True)
            models_dict[m] = bat.fit(train)
        if m == "TBATS1":
            bat = TBATS(
                seasonal_periods=[seasons],
                use_arma_errors=False,
                use_box_cox=True,
                use_trend=True,
            )
            models_dict[m] = bat.fit(train)
        if m == "TBATP1":
            bat = TBATS(
                seasonal_periods=[periods],
                use_arma_errors=False,
                use_box_cox=True,
                use_trend=True,
            )
            models_dict[m] = bat.fit(train)
        if m == "TBATS2":
            bat = TBATS(
                seasonal_periods=list(get_unique_N(season_list(train), 2)),
                use_arma_errors=False,
                use_box_cox=True,
                use_trend=True,
            )
            models_dict[m] = bat.fit(train)

        # if m=="ProphetGluonts":
        #   freqed = pd.infer_freq(train.index)
        #   if freqed=="MS":
        #     freq= "M"
        #   else:
        #     freq= freqed
        #   models_dict["ProphetGluonts"] = ProphetPredictor(freq=freq, prediction_length=forecast_len) #use_feat_dynamic_real=True
        #   models_dict["ProphetGluonts"] = list(models_dict["ProphetGluonts"])

    return models_dict, seasons
Exemplo n.º 2
0
def train_models(train,
                 models,
                 forecast_len,
                 full_df=None,
                 seasonality="infer_from_data",
                 in_sample=None):

    seasons = select_seasonality(train, seasonality)

    models_dict = {}
    for m in models:
        if m == "ARIMA":
            models_dict["ARIMA"] = pm.auto_arima(train,
                                                 seasonal=True,
                                                 m=seasons)
        if m == "Prophet":
            model = Prophet()
            models_dict["Prophet"] = model.fit(prophet_dataframe(train))
        if m == "HWAAS":
            models_dict["HWAAS"] = ExponentialSmoothing(
                train,
                seasonal_periods=seasons,
                trend='add',
                seasonal='add',
                damped=True).fit(use_boxcox=True)
        if m == "HWAMS":
            models_dict["HWAMS"] = ExponentialSmoothing(
                train,
                seasonal_periods=seasons,
                trend='add',
                seasonal='mul',
                damped=True).fit(use_boxcox=True)
        # if m=="HOLT":
        #   models_dict["HOLT"] = Holt(train,exponential=True).fit()
        if m == "PYAF":
            model = autof.cForecastEngine()
            model.train(iInputDS=train.reset_index(),
                        iTime='Date',
                        iSignal='Target',
                        iHorizon=len(train))  # bad coding to have horison here
            models_dict["PYAF"] = model.forecast(iInputDS=train.reset_index(),
                                                 iHorizon=forecast_len)
        if m == "Gluonts":
            freqed = pd.infer_freq(train.index)
            if freqed == "MS":
                freq = "M"
            else:
                freq = freqed
            estimator = DeepAREstimator(
                freq=freq,
                prediction_length=forecast_len,
                trainer=Trainer(epochs=2))  #use_feat_dynamic_real=True
            print(train)
            print(type(train))
            print(gluonts_dataframe(train))
            models_dict["Gluonts"] = estimator.train(
                training_data=gluonts_dataframe(train))
        if m == "NBEATS":

            device = torch.device('cpu')
            seasons = select_seasonality(train, seasonality)

            if os.path.isfile(CHECKPOINT_NAME):
                os.remove(CHECKPOINT_NAME)
            stepped = 5
            batch_size = 10
            if in_sample:
                x_train, y_train, x_test, y_test, net, norm_constant = nbeats_dataframe(
                    full_df, forecast_len, in_sample=True)
                optimiser = optim.Adam(net.parameters())
                data = data_generator(x_train, y_train, batch_size)
                #test_losses = []
                for r in range(stepped):

                    train_100_grad_steps(data, device, net,
                                         optimiser)  #test_losses
                models_dict["NBEATS"] = {}
                models_dict["NBEATS"]["model"] = net
                models_dict["NBEATS"]["x_test"] = x_test
                models_dict["NBEATS"]["y_test"] = y_test
                models_dict["NBEATS"]["constant"] = norm_constant

            else:  # if out_sample train is df

                x_train, y_train, net, norm_constant = nbeats_dataframe(
                    full_df, forecast_len, in_sample=False)

                batch_size = 10  # greater than 4 for viz
                optimiser = optim.Adam(net.parameters())
                data = data_generator(x_train, y_train, batch_size)
                stepped = 5
                #test_losses = []
                for r in range(stepped):
                    _, forecast = net(torch.tensor(
                        x_train, dtype=torch.float))  ### Not Used
                    p = forecast.detach().numpy()  ### Not Used
                    train_100_grad_steps(data, device, net,
                                         optimiser)  #test_losses
                models_dict["NBEATS"] = {}
                models_dict["NBEATS"]["model"] = net
                models_dict["NBEATS"]["tuple"] = (x_train, y_train, net,
                                                  norm_constant)

        # if m=="ProphetGluonts":
        #   freqed = pd.infer_freq(train.index)
        #   if freqed=="MS":
        #     freq= "M"
        #   else:
        #     freq= freqed
        #   models_dict["ProphetGluonts"] = ProphetPredictor(freq=freq, prediction_length=forecast_len) #use_feat_dynamic_real=True
        #   models_dict["ProphetGluonts"] = list(models_dict["ProphetGluonts"])


# create a forecast engine. This is the main object handling all the operations
# We use the test-dataset as the last step of our training to generate the evaluation-metrics and do not use the test-dataset during prediction.
# get the best time series model for predicting one week

    return models_dict