Пример #1
0
def test_standardized_forecasts_error():
    """
    Simple test that standardized forecasts errors are calculated correctly.

    Just uses a different calculation method on a univariate series.
    """

    # Get the dataset
    true = results_kalman_filter.uc_uni
    data = pd.DataFrame(
        true['data'],
        index=pd.date_range('1947-01-01', '1995-07-01', freq='QS'),
        columns=['GDP']
    )
    data['lgdp'] = np.log(data['GDP'])

    # Fit an ARIMA(1,1,0) to log GDP
    mod = SARIMAX(data['lgdp'], order=(1,1,0))
    res = mod.fit(disp=-1)

    standardized_forecasts_error = (
        res.filter_results.forecasts_error[0] /
        np.sqrt(res.filter_results.forecasts_error_cov[0,0])
    )

    assert_allclose(
        res.filter_results.standardized_forecasts_error[0],
        standardized_forecasts_error,
    )
Пример #2
0
def sarima_model(feature):
    data = df1[feature]
    values = data.values
    values = values.astype('float32')
    split = int(len(list(df1["Dollar_eq"])) * 0.8)
    #scaled = scale(values,0,1)
    train = values[:split]
    test = values[split:]
    #order=(1, 1, 1), seasonal_order=(1, 1, 1, 1)
    model = SARIMAX(train, order=(1, 1, 1), seasonal_order=(1, 1, 1, 1))
    model_fit = model.fit(disp=False)
    yhat = model_fit.predict(len(train),
                             len(train) + len(test) - 1,
                             typ="levels")
    x = []
    for i in range(len(test)):
        x.append((yhat[i], test[i]))
    #print(x[0:10])
    rmse = sqrt(mean_squared_error(test, yhat))
    print("\n\n SARIMA RMSE: %.5f" % rmse)
    print()
Пример #3
0
def sarimax_fc(train,
               test,
               order,
               seas_order,
               exog_train=None,
               exog_test=None):
    model = SARIMAX(train,
                    order=order,
                    exog=exog_train,
                    seasonal_order=seas_order)
    results = model.fit()
    start, end = len(train), len(test) + len(train) - 1
    pred = results.predict(start, end, exog=exog_test,
                           typ='levels').rename('sarima_predictions')
    rmse_pred, rmse_pred_pct = rmse(test, pred), rmse(test, pred) / test.mean()
    results = {
        'prediction': pred,
        'rmse': rmse_pred,
        'rmse_pct': rmse_pred_pct
    }
    return results
Пример #4
0
def test_small_sample_serial_correlation_test():
    # Test the Ljung Box serial correlation test for small samples with df
    # adjustment using the Nile dataset. Ljung-Box statistic and p-value
    # are compared to R's Arima() and checkresiduals() functions in forecast
    # package:
    # library(forecast)
    # fit <- Arima(y, order=c(1,0,1), include.constant=FALSE)
    # checkresiduals(fit, lag=10)
    from statsmodels.tsa.statespace.sarimax import SARIMAX
    niledata = nile.data.load_pandas().data
    niledata.index = pd.date_range('1871-01-01', '1970-01-01', freq='AS')
    mod = SARIMAX(endog=niledata['volume'],
                  order=(1, 0, 1),
                  trend='n',
                  freq=niledata.index.freq)
    res = mod.fit()

    actual = res.test_serial_correlation(method='ljungbox',
                                         lags=10,
                                         df_adjust=True)[0, :, -1]
    assert_allclose(actual, [14.116, 0.0788], atol=1e-3)
Пример #5
0
def sarima_optimizer_aic(train, pdq, seasonal_pdq):
    best_aic, best_order, best_seasonal_order = float("inf"), float(
        "inf"), None
    for param in pdq:
        for param_seasonal in seasonal_pdq:
            try:
                sarimax_model = SARIMAX(train,
                                        order=param,
                                        seasonal_order=param_seasonal)
                results = sarimax_model.fit(disp=0)
                aic = results.aic
                if aic < best_aic:
                    best_aic, best_order, best_seasonal_order = aic, param,\
                                                                param_seasonal
                print('SARIMA{}x{}12 - AIC:{}'.format(param, param_seasonal,
                                                      aic))
            except:
                continue
    print('SARIMA{}x{}12 - AIC:{}'.format(best_order, best_seasonal_order,
                                          best_aic))
    return best_order, best_seasonal_order
Пример #6
0
def test_dynamic_against_sarimax():
    rs = np.random.RandomState(12345678)
    e = rs.standard_normal(1001)
    y = np.empty(1001)
    y[0] = e[0] * np.sqrt(1.0 / (1 - 0.9**2))
    for i in range(1, 1001):
        y[i] = 0.9 * y[i - 1] + e[i]
    smod = SARIMAX(y, order=(1, 0, 0), trend='c')
    sres = smod.fit(disp=False)
    mod = AutoReg(y, 1, old_names=False)
    spred = sres.predict(900, 1100)
    pred = mod.predict(sres.params[:2], 900, 1100)
    assert_allclose(spred, pred)

    spred = sres.predict(900, 1100, dynamic=True)
    pred = mod.predict(sres.params[:2], 900, 1100, dynamic=True)
    assert_allclose(spred, pred)

    spred = sres.predict(900, 1100, dynamic=50)
    pred = mod.predict(sres.params[:2], 900, 1100, dynamic=50)
    assert_allclose(spred, pred)
Пример #7
0
def sarimax_forecast(df):
    '''it takes a dataframe split it into train/forecast sets based on
    the availability of price and then forecasts electricity price for next hour.
    it returns forecast dataframe ('price','lower_interval', 'upper_interval') and
    historical price dataframe ('price')'''

    # split past and furture
    past = df[~df.price.isnull()]
    future = df[df.price.isnull()].drop('price', axis=1)
    # forecast for next time point only
    future = future.iloc[:1, :]
    if future.temp.isnull(
    )[0]:  # when weather forecast data is not available for that hour
        forecast = np.nan
        lower = np.nan
        upper = np.nan
        print('weather data is not available')
    else:
        past.index = pd.DatetimeIndex(past.index.values,
                                      freq=past.index.inferred_freq)
        # Build Model
        sarima = SARIMAX(past.price,
                         exog=past.drop('price', axis=1),
                         order=(1, 1, 1),
                         seasonal_order=(1, 0, 2, 7))
        sarima = sarima.fit(maxiter=300)
        # forecasting
        results = sarima.get_forecast(1, exog=future, alpha=0.05)
        forecast = sarima.forecast(1, exog=future, alpha=0.05)
        lower = results.conf_int()['lower price'][0]
        upper = results.conf_int()['upper price'][0]

    # create forecast df with datetimeIndex
    forecast = pd.DataFrame(dict(price=forecast,
                                 lower_interval=lower,
                                 upper_interval=upper),
                            index=future.index)
    forecast.index.name = 'date_time'
    past = past.iloc[-1:, 0]
    return forecast, past
Пример #8
0
def SARIMA_forecast(experiment, plot_fit=False):
    """
    Fit SARIMA model on L_train data and forecast N_test steps ahead

    Args:
        experiment(dict): Experiment dictionary.

    Returns:
        experiment(dict): Experiment dictionary with additional keys.
    """
    L_train = experiment['L_train']
    N_train = experiment['N_train']
    N_test = experiment['N_test']

    # Fit SARIMA model
    model = SARIMAX(L_train,
                    order=(1, 1, 1),
                    seasonal_order=(1, 1, 0, 24),
                    enforce_invertibility=False,
                    enforce_stationarity=False)
    model_fit = model.fit(disp=False)
    L_train_prediction = model_fit.fittedvalues
    L_test_prediction = model_fit.forecast(steps=N_test)

    # reshape arrays
    L_test_prediction = L_test_prediction.reshape((N_test, 1))
    L_train_prediction = L_train_prediction.reshape((N_train, 1))

    # plot fit on training data and prediction
    if plot_fit:
        plt.figure()
        plt.plot(L_train)
        plt.plot(L_train_prediction, 'red')
        plt.figure()
        plt.plot(L_test_prediction, 'red')

    experiment['L_test_prediction'] = L_test_prediction
    experiment['L_train_prediction'] = L_train_prediction

    return experiment
Пример #9
0
    def SARIMA(self,
               idx,
               numpredictions,
               order=(0, 1, 1),
               seasonal_order=(0, 1, 1, 24),
               trend=None):

        # use every hour to gain the parameters
        nDays = 7 * 4
        trainingData = self.dataX[max(idx - nDays * 24, 0):idx,
                                  cfg.prediction['pos']]
        model = SARIMAX(trainingData,
                        order=order,
                        trend=trend,
                        seasonal_order=seasonal_order,
                        enforce_stationarity=False,
                        enforce_invertibility=False)
        with warnings.catch_warnings():
            warnings.filterwarnings("ignore")
            self.trained_model = model.fit(disp=False)
        pred = self.trained_model.forecast(cfg.prediction['num_predictions'])
        return pred
    def SeasonaAutoIntegratedMovingAverageExogenousRegressors(
            self, data, exodata):
        #currently, exodata not used.
        #make a dataframe the size of prediction
        datahat = pd.DataFrame(np.zeros(shape=((self.end - self.start), 3)))

        # create a model for each axis and predict each axis
        for i in range(3):
            # make prediction
            x = data[i].values.tolist()  #get the col values to be a list
            x1 = exodata[i].values.tolist()
            model = SARIMAX(x,
                            exog=x1,
                            order=(1, 1, 1),
                            seasonal_order=(0, 0, 0, 0))
            model_fit = model.fit(disp=False)
            datahat.iloc[:,
                         i] = model_fit.predict(self.start,
                                                self.end - 1,
                                                exog=[exodata])  #not sure here

        return (datahat)
Пример #11
0
def forcast(i):    
   
  
    
    sarima_model = SARIMAX(train, order=(1,0,1), seasonal_order=(0,1,1,2), enforce_stationarity=False, enforce_invertibility=False)
    sfit = sarima_model.fit()
    print(sfit.summary())
    sfit.plot_diagnostics(figsize=(10, 6))
    plt.show()
    #print(sarima_model.summary())
    
    #dati di predicton non di forcast ancora detto prediction in sample
    
    ypred=sfit.predict(start=0,end=len(train))
    plt.plot(train)
    plt.plot(ypred)
    plt.title("trian")
    
    #previsione de dati quindi il forcat --->il modelo è stato esteso al futuro
    forewrap = sfit.get_forecast(steps=523)
    forecast_ci = forewrap.conf_int()
    forecast_val = forewrap.predicted_mean
    #forecast_val=forecast_val[1:]
    plt.plot(train)
    #plt.fill_between(forecast_ci.index,forecast_ci.iloc[:, 0],forecast_ci.iloc[:, 1], color='k', alpha=.25)
    plt.plot(forecast_val)
    plt.plot(test)
    plt.show()
    
    
    yfore = []
    for j in range(0, horizon_data_length):
        print("Actual {} {} {:.2f} forcast {:.2f}".format(i, j, test[j], forecast_val[j]))
        yfore.append(forecast_val[j])
        
    metrics = forecast_accuracy(forecast_val, test)
    print("RMSE is {}={:.2f} forecast{:.2f}".format(i ,metrics['rmse']))
    
    return yfore, horizon_data_length
def sarimaxPrdict(train_y,
                  p_order,
                  p_seasonal_order,
                  vtrend,
                  steps=1,
                  disp=False):

    model = SARIMAX(train_y,
                    order=p_order,
                    seasonal_order=p_seasonal_order,
                    trend=vtrend,
                    enforce_stationarity=False,
                    enforce_invertibility=False)

    model_fit = model.fit(disp=False)

    pred = model_fit.forecast(steps=steps)
    if disp:
        pred_ci = pd.DataFrame(index=pred.index)
        pred_ci['low'] = pred - pred * 0.05
        pred_ci['upper'] = pred + pred * 0.05

        #pred_ci.loc[y.index[-1]]=[y[-1],y[-1]]
        #pred_ci=pred_ci.sort_index()
        ax = train_y['2018':].plot(label='observed')
        pred.plot(ax=ax, label='Forecast', alpha=.7)

        ax.fill_between(pred.index,
                        pred_ci.iloc[:, 0],
                        pred_ci.iloc[:, 1],
                        color='k',
                        alpha=.1)

        ax.set_xlabel('Date')
        ax.set_ylabel(train_y.name)
        plt.legend()

        plt.show()
    return pred
def sarima_orders(ts):

    p = q = d = range(0, 2)
    pdq = list(itertools.product(p, d, q))
    seasonal_pdq = [(x[0], x[1], x[2], 12)
                    for x in list(itertools.product(p, d, q))]
    print('SARIMA parameters...')
    for param in pdq:
        for param_seasonal in seasonal_pdq:
            try:
                mod = SARIMAX(ts,
                              order=param,
                              seasonal_order=param_seasonal,
                              enforce_stationarity=False,
                              enforce_invertibility=False)
                results = mod.fit()
                print('ARIMA{}x{} - AIC:{}'.format(param, param_seasonal,
                                                   results.aic))

            except:
                print('hello')
                continue
Пример #14
0
def modifyFile(reader, writer, count):
    global dateToModify, index
    while (True):
        clas, data, label = getData(reader, count) 
        if (clas == 0):
            break     
        data0 = pd.Series(label) 
        data0.index = pd.Index(index)         
        try:
            model = SARIMAX(data0, order=(1,1,1), seasonal_order=(0,1,1,7)) 
            result = model.fit() 
        except:
            print("%d: failed to train sarimax model, abort" % clas)
            for i in range(0, count):
                writer.writerow(data[i] + [label[i]])
            continue       
        for i in dateToModify:
            label[i] = round(result.predict(i, i)[0])
            if (label[i] < 0):
                label[i] = 0
        for i in range(0, count):
            writer.writerow(data[i] + [label[i]])
Пример #15
0
 def GridSearch(self, n_days):
     self.Build_Training_Data(v=n_days, training=True)
     warnings.filterwarnings("ignore")
     params = []
     scores = []
     for p in range(1, 5):
         for q in range(1, 5):
             for d in range(3):
                 try:
                     model = SARIMAX(self.Values,
                                     order=(p, d, q),
                                     missing='drop',
                                     enforce_invertibility=False)
                     results = model.fit(disp=0)
                     scores_counties = []
                     for county in self.Counties:
                         DataCounty = self.Data_Dates[county].dropna()
                         ModelCounty = SARIMAX(DataCounty[:-self.v],
                                               order=(p, d, q),
                                               missing='drop',
                                               enforce_invertibility=False)
                         res = ModelCounty.smooth(results.params)
                         fc = res.get_prediction(
                             len(DataCounty) - self.v, len(DataCounty))
                         frame = fc.summary_frame(alpha=0.05)
                         fc = frame['mean']
                         Y = DataCounty.iloc[-self.v:].values
                         Yhat = fc[-self.v:].values
                         # Ybar = np.mean(Y)
                         MAE = (sum(abs(Y - Yhat)) / self.v)
                         scores_counties.append(MAE)
                 except:
                     print('Training failed for parameters :', (p, d, q))
                 scores.append(np.nanmean(scores_counties))
                 params.append((p, d, q))
     argbest = np.argmin(scores)
     print('Best MAE : ', scores[argbest])
     print('Best params : ', params[argbest])
     self.BestParams = params[argbest]
Пример #16
0
def arima_grid_search(dataframe, s):
    p = d = q = range(2)
    param_combinations = list(itertools.product(p, d, q))

    lowest_aic, pdq, pdqs = None, None, None
    total_iterations = 0
    for order in param_combinations:
        for (p, q, d) in param_combinations:
            seasonal_order = (p, q, d, s)
            total_iterations += 1
            model = SARIMAX(data,
                            order=order,
                            seasonal_order=seasonal_order,
                            enforce_stationarity=False,
                            enforce_invertibility=False,
                            disp=False,
                            simple_differencing=False)
            model_result = model.fit(maxiter=500, disp=False)
            if not lowest_aic or model_result.aic < lowest_aic:
                lowest_aic = model_result.aic
                ret = model_result
    return ret
    def PrintParameterMatrix(self, dataset, maxNonSeasonal, maxSeasonal,
                             lagsPerDay, noTrainingDays):
        max_p = maxNonSeasonal[0]
        max_d = maxNonSeasonal[1]
        max_q = maxNonSeasonal[2]
        max_P = maxSeasonal[0]
        max_D = maxSeasonal[1]
        max_Q = maxSeasonal[2]
        aic = list()
        bic = list()
        orders = list()

        train_set = dataset[-noTrainingDays * int(lagsPerDay):-int(lagsPerDay)]
        for P, D, Q in itertools.product(range(0, max_P + 1),
                                         range(0, max_D + 1),
                                         range(0, max_Q + 1)):
            for p, d, q in itertools.product(range(0, max_p + 1),
                                             range(0, max_d + 1),
                                             range(0, max_q + 1)):
                if P == 0 and D == 0 and Q == 0 and p == 0 and d == 0 and q == 0:
                    continue

                try:
                    model = SARIMAX(train_set,
                                    order=(p, d, q),
                                    seasonal_order=(P, D, Q, int(lagsPerDay)))
                    result = model.fit()
                    order = 'Model: Nonseanonal ('+ repr(p) +','+repr(d)+','+ repr(q) + ') Seasonal: '+\
                            '(' + repr(P) +',' + repr(D) +','+repr(Q) + ')'
                    print(order)
                    print(result.summary())
                    orders.append(order)
                    bic.append(result.bic)
                    aic.append(result.aic)
                except Exception as e:
                    print(e)

        results = pd.DataFrame(index=orders, data={'aic': aic, 'bic': bic})
        print(results)
Пример #18
0
    def SARIMA_GridSearch(self, idx, numpredictions):
        nDays = 7
        trainingData = self.dataX[(idx - (nDays * 24) - 1):(idx),
                                  cfg.prediction['pos']]
        cfg_list = sarima_configs(seasonal=[24])

        # do the grid search on the training data
        with warnings.catch_warnings():
            warnings.filterwarnings("ignore")
            #scores = grid_search(trainingData, cfg_list, cfg.prediction['num_predictions'], parallel=False)
            scores = grid_search(trainingData, cfg_list, 1, parallel=True)

        err = list()
        for i, data in enumerate(scores):
            err.append(data[1])
        index = err.index(min(err))
        # train the modell with the last 7 days and predict
        trainingData = self.dataX[max(idx - nDays * 24, 0):(idx),
                                  cfg.prediction['pos']]

        order = cfg_list[index][0]
        season_order = cfg_list[index][1]
        trend = cfg_list[index][2]

        #print('order = ', order)
        #print('season_order = ', season_order)
        #print('season_order = ', trend)

        model = SARIMAX(trainingData,
                        order=order,
                        seasonal_order=season_order,
                        trend=trend,
                        enforce_invertibility=False,
                        enforce_stationarity=False)
        with warnings.catch_warnings():
            warnings.filterwarnings("ignore")
            self.trained_model = model.fit(disp=False)
        pred = self.trained_model.forecast(cfg.prediction['num_predictions'])
        return pred
Пример #19
0
def show_bestScore(train_set, test_set):
    """
    Returns best cross-validated
    MAE and (p,d,q) order
    for a ts model.
    """
    start = input("Do you have p, d and q values defined? ")
    if start == "No" or start == "no" or start == "N" or start == "n":
        print("Please define p, d, q values and retry.")
    else:
        print("Finding out...")
        target = [values for values in train_set]
        testVals = [values for values in test_set]
        target = train_set.astype("float32")
        testVals = test_set.astype("float32")
        score = [10000, (0, 0, 0)]
        for p in pList:
            for d in dList:
                for q in qList:
                    order = (p, d, q)
                    model = SARIMAX(target, order=order)
                    fit = model.fit(disp=False)
                    preds = fit.forecast(len(test_set))
                    error = mean_absolute_error(testVals, preds)
                    if score[0] != 0 and error < score[0]:
                        score.pop()
                        score.pop()
                        score.append(error)
                        score.append(order)

        best_score, best_order = score[0], score[1]
        out = print("Best SARIMAX: MAE = %.f :: Order = %s" %
                    (best_score, best_order))
        if not best_score:
            print("Invalid or missing value for MAE. Please retry.")
        elif not best_order:
            print("Invalid or missing order of values. Please retry.")
        else:
            return out  # Best MAE = 700 :: Order = (8, 3, 1)
Пример #20
0
    def train_SARIMA_model(self, df, game_id, dep_var, indep_var, target, type,
                           model_type):
        Y = df[dep_var]
        X = df[indep_var]

        if len(Y) <= 24 and not self.seasonal_order is None:
            print("Length of dependent variable is to short: {}".format(len(Y)))
            return

        if len(X) <= 24 and not self.seasonal_order is None:
            print("Length of dependent variables is to short: {}".format(len(X)))
            return

        if self.seasonal_order is None:
            model = SARIMAX(endog=Y, exog=X, order=self.order, enforce_stationarity=False,  enforce_invertibility=False)
        else:
            model = SARIMAX(endog=Y, exog=X, order=self.order, seasonal_order=self.seasonal_order, enforce_stationarity=False,  enforce_invertibility=False)

        fitted_model = model.fit()
        # print(fitted_model.summary())
        save_path = util.build_model_save_path(game_id, target, type, model_type)
        fitted_model.save(save_path, remove_data=True)
Пример #21
0
def forecast(data):
    """
    Function to predict the next 7 values for the input data using SARIMAX
    Auto regression function

    Returns: A list of predictions for next 7 days
    """
    predictions = []
    for i in range(7):
        # Initializing the model
        model = SARIMAX(data,
                        order=(1, 1, 1),
                        trace=True,
                        error_action="ignore")
        # Fitting the model
        model_fit = model.fit(disp=False)
        # Predicting the values one by one
        yhat = model_fit.predict(len(data), len(data))
        data.append(int(yhat))
        # Appending each prediction to a list
        predictions.append(int(yhat))
    return predictions
Пример #22
0
def sarimax_forecast(hour=11):
    '''hour: hour of a day, range(0, 23),
    returns forecast, upper_intervals, lower_intervals, mape, mase, test, train'''

    df_all = get_data(hour=hour)

    # split past and furture
    past = df_all[~df_all.price.isnull()]
    future = df_all[df_all.price.isnull()].drop('price', axis=1)

    future = future.iloc[:1, :]
    if future.temp.isnull()[0]:
        forecast = np.array([np.nan])
        confidence_int = pd.DataFrame(
            {
                'lower price': np.nan,
                'upper price': np.nan
            }, index=['x'])

    else:
        past.index = pd.DatetimeIndex(past.index.values,
                                      freq=past.index.inferred_freq)
        # Build Model
        sarima = SARIMAX(past.price,
                         past.drop('price', axis=1),
                         order=(1, 1, 1),
                         seasonal_order=(1, 0, 2, 7))
        sarima = sarima.fit(maxiter=300)
        # forecasting
        results = sarima.get_forecast(1, exog=future, alpha=0.05)
        forecast = sarima.forecast(1, exog=future, alpha=0.05)
        confidence_int = results.conf_int()
    # create forecast df with datetimeIndex
    lower = confidence_int['lower price'][0]
    upper = confidence_int['upper price'][0]
    forecast = pd.DataFrame(dict(price=forecast, lower=lower, upper=upper),
                            index=future.index)
    past = past.iloc[-1:, 0]
    return forecast, past
Пример #23
0
    def sarima_forecast(history, config):
        order, sorder, trend = config
        # define model
        blockPrint()
        model = SARIMAX(history,
                        order=order,
                        seasonal_order=sorder,
                        trend=trend,
                        enforce_stationarity=False,
                        enforce_invertibility=False)
        # fit model
        t0 = time.time()
        model_fit = model.fit(disp=False)
        train_time = time.time() - t0

        #make one step forecast
        t0 = time.time()
        yhat = model_fit.predict(len(history), len(history))
        predi_time = time.time() - t0
        enablePrint()

        return yhat[0], train_time, predi_time
def train_sarima_model(y_train,
                       order,
                       seasonal_order,
                       plot_diagnostics=True,
                       **kwargs):
    """Trains a SARIMAX model based on the training data, y_train, 
    SARIMA parameters, order and seasonal_order, and other keywords.
    Will also return a diagnostics plot of the model if plot_diagnostics=True."""

    # Fit Model
    model = SARIMAX(y_train,
                    order=order,
                    seasonal_order=seasonal_order,
                    **kwargs)
    results = model.fit()

    # Plot diagnostics, if true
    if plot_diagnostics:
        results.plot_diagnostics(figsize=(15, 8))
        plt.show()

    return results
Пример #25
0
def SARIMAX_forecast(series, cfg, pred_len):
    """DOCSTRING"""

    X = series

    # set trainset to include all but last 48 months (4 years)
    # only training on data between 9-4 years ago
    train_size = int(len(X) - pred_len)
    train, test = X[0:train_size], X[train_size:]

    model = SARIMAX(train,
                    order=cfg[0],
                    seasonal_order=cfg[1],
                    trend=cfg[2],
                    initialization='approximate_diffuse')

    results = model.fit()

    # Predict 48 months from end of train set
    forecast = results.predict(start=test.index[0], end=test.index[-1])

    return forecast
Пример #26
0
def make_arimax_model_with_params(param_dict, time_series, train_fraction, exog_var):
    '''
    This function takes a dictionary of parameters and parameter values and makes a SARIMA model with those parameters.
    It then splits a time series into a train section and a test section. It fits the model on the train time series, then returns the fitted model, the train time series, and the test time series.
    
    It expects a parameter dictionary of the following format:
    param_dict = {'order': (p, d, q), 'seasonal': (p, d, q, m), 'trend': 'c' or 't' or 'n' or 'ct', 'enforce_stationarity': True or False, 'enforce_invertibility': True or False }
    
    It returns the fitted model (result), time_series_train, and time_series_test.
    
    If there are exogenous variables, it will return the fitted model, time_series_train, time_series_test, exog_series_train, and exog_series_test.
    '''
    warnings.filterwarnings("ignore")
    
    order_params = param_dict['order']
    seasonal_params = param_dict['seasonal']
    trend_param = param_dict['trend']
    stationarity_param = param_dict['enforce_stationarity']
    invertibility_param = param_dict['enforce_invertibility']
    
    time_series_length = len(time_series)
    time_series_train = time_series[0:math.ceil(train_fraction*time_series_length)]
    time_series_test = time_series[math.ceil(train_fraction*time_series_length):]
    
    if exog_var is None:
        exog_series_train = None
        exog_series_test = None
    else:
        exog_series_train = exog_var[0:math.ceil(train_fraction*time_series_length)]
        exog_series_test = exog_var[math.ceil(train_fraction*time_series_length):]
    
    model = SARIMAX(time_series_train, exog=exog_series_train, order=order_params, seasonal_order=seasonal_params, trend=trend_param, enforce_stationarity=stationarity_param, enforce_invertibility=invertibility_param)
    result = model.fit(display=0)
    
    if exog_var is None:
        return result, time_series_train, time_series_test, None, None
    else:
        return result, time_series_train, time_series_test, exog_series_train, exog_series_test
Пример #27
0
def predictTemperature2(startDate, endDate, temperature, n) -> list:
    if len(temperature) != 24:
        raise ValueError('temperature must be an array with 24 elements')

    start_date = datetime.strptime(startDate, "%Y-%m-%d")
    end_date = datetime.strptime(endDate, "%Y-%m-%d") + timedelta(hours=23)

    artificial_days = 10
    artificial_start = start_date - timedelta(days=artificial_days)
    date_list = [
        artificial_start + timedelta(hours=x)
        for x in range(((end_date - artificial_start).days + 1) * 24)
    ]
    # There is too little data (1 day) and we want to predict many days so we are going to create several
    # days worth of data prior to the true data, based on the existing data with a +- randomization
    rdm_temp = np.random.choice(range(-20, 20, 1), artificial_days * 24) / 10
    full_temp = np.concatenate(
        [rdm_temp + temperature * artificial_days, temperature])
    # Group in dataframe
    df = pd.DataFrame.from_dict({
        'date': date_list,
        'temp': full_temp
    }).set_index('date')
    sarima_model = SARIMAX(df,
                           order=(1, 1, 1),
                           seasonal_order=(1, 1, 1, 24),
                           enforce_invertibility=False,
                           enforce_stationarity=True,
                           initialization='approximate_diffuse')
    sarima_fit = sarima_model.fit()

    new_data_points = [(date_list[-1] + timedelta(days=k)).strftime("%Y-%m-%d")
                       for k in [1, n + 1]]
    sarima_pred = sarima_fit.get_prediction(*new_data_points)
    # Filter last value of the next day
    results = sarima_pred.prediction_results._forecasts[0][:-1]

    return results
Пример #28
0
def sarimax_cv(df):
    p = range(0, 4)
    d = range(0, 2)
    q = range(0, 2)
    s = [6, 12]

    # Generate all different combinations of p, q and q triplets
    pdq = list(itertools.product(p, d, q))

    # Generate all different combinations of seasonal p, q and q triplets
    seasonal_pdq = [(x[0], x[1], x[2], x[3])
                    for x in list(itertools.product(p, d, q, s))]

    #reformatting the data for use in SARIMAX model
    sorted_df = df[['Month_Year', 'Eviction_Notice'
                    ]].groupby('Month_Year').sum().reset_index()
    sorted_df['Eviction_Notice'] = sorted_df['Eviction_Notice'].astype(float)
    y = sorted_df[['Month_Year', 'Eviction_Notice']].set_index(['Month_Year'],
                                                               inplace=False)

    results = 40000000
    pdq_best = None
    seasonal_best = None
    for param in pdq:
        for param_seasonal in seasonal_pdq:
            try:
                mod = SARIMAX(y,order=param, seasonal_order=param_seasonal,\
                                enforce_stationarity=False, enforce_invertibility=False)

                results = mod.fit()

                if results.aic < results:
                    results = results.aic
                    pdq_best = param
                    seasonal_best = param_seasonal
            except:
                continue
    return results, pdq_best, seasonal_best
Пример #29
0
def sarima(n_input, n_preds, df):

    # To get which SARIMA model to use
    """
    auto_arima(df['Value'], m=12).summary()
    -> SARIMAX(1, 1, 2)x(2, 1, 2, 12)
    """
    train, test, _, _, inputs, _ = train_test_split(n_preds, df)

    model = SARIMAX(train['Value'],
                    order=(1, 1, 2),
                    seasonal_order=(2, 1, 2, 12))
    results = model.fit()

    start = len(train)
    end = len(train) + len(test) - 1
    predictions = results.predict(start=start,
                                  end=end,
                                  dynamic=False,
                                  typ='levels')
    predictions = predictions.tolist()

    return test, 'SARIMA ({}, {})'.format(n_input, n_preds), predictions
Пример #30
0
def modelEvaluated(data, sarimaOrder):
    #training data will be 8 years or 96 data points
    t = floor(len(data) * 0.8)
    x_train, x_test = data[0:t], data[t:]
    history = [x for x in x_train]
    #predictions
    predictions = list()
    order, sorder, trend = sarimaOrder
    for i in range(len(x_test)):
        model = SARIMAX(history,
                        order=order,
                        seasonal_order=sorder,
                        trend=trend,
                        simple_differencing=True,
                        enforce_stationarity=False,
                        enforce_invertibility=False)
        model_fit = model.fit(disp=False)
        y_hat = model_fit.forecast()[0]
        predictions.append(y_hat)
        history.append(x_test[i])
    #out-of-sample error
    error = mean_squared_error(x_test, predictions)
    return sqrt(error)
def sarima_forecast(history, config):
    order, sorder, trend = config
    # 定义模型
    # order是普通参数,seasonal_order是季节参数,trend是趋势类型
    # 该实现称为SARIMAX而不是SARIMA,因为方法名称的"X"表示该实现还支持外生变量。
    # 外生变量是并行时间序列变量,不是直接通过AR,I或MA流程建模的,而是作为模型的加权输入提供的。
    # 外生变量是可选的,可以通过"exog"参数指定,SARIMAX(data, exog=other_data,...)
    model = SARIMAX(history,
                    order=order,
                    seasonal_order=sorder,
                    trend=trend,
                    enforce_stationarity=False,
                    enforce_invertibility=False)
    # 训练模型过程中会有很多调试信息,disp=0或disp=False表示关闭信息
    model_fit = model.fit(disp=False)
    # 进行预测,有forecast(n)和predict(start,end)两种预测方法,foreast预测是对样本外的数据进行预测,predict可以对样本内和样本外的进行预测:
    # forecast(n)对于输入的训练数据history,每次向后预测n个数值,不写n时默认预测一个值
    # predict(start,end)表示预测从输入训练样本的第一个值开始计数,预测第start到第end个数据。输入5条训练数据,predict(8,9)表示预测第9~10条数据(样本外),predict(3,6)表示预测第4~7条数据(样本内)
    #yhat = model_fit.forecast()
    #yhat = model_fit.predict(start=len(history),end=len(history)),start和end可以省略
    yhat = model_fit.predict(len(history), len(history))
    # 返回预测数组中的第一条数据
    return yhat[0]