示例#1
0
def test_innovations_algo_filter_kalman_filter(reset_randomstate):
    # Test the innovations algorithm and filter against the Kalman filter
    # for exact likelihood evaluation of an ARMA process
    ar_params = np.array([0.5])
    ma_params = np.array([0.2])
    # TODO could generalize to sigma2 != 1, if desired, after #5324 is merged
    # and there is a sigma2 argument to arma_acovf
    # (but maybe this is not really necessary for the point of this test)
    sigma2 = 1

    endog = np.random.normal(size=10)

    # Innovations algorithm approach
    acovf = arma_acovf(np.r_[1, -ar_params], np.r_[1, ma_params],
                       nobs=len(endog))

    theta, v = innovations_algo(acovf)
    u = innovations_filter(endog, theta)
    llf_obs = -0.5 * u**2 / (sigma2 * v) - 0.5 * np.log(2 * np.pi * v)

    # Kalman filter apparoach
    mod = SARIMAX(endog, order=(len(ar_params), 0, len(ma_params)))
    res = mod.filter(np.r_[ar_params, ma_params, sigma2])

    # Test that the two approaches are identical
    atol = 1e-6 if PLATFORM_WIN else 0.0
    assert_allclose(u, res.forecasts_error[0], atol=atol)
    assert_allclose(theta[1:, 0], res.filter_results.kalman_gain[0, 0, :-1],
                    atol=atol)
    assert_allclose(llf_obs, res.llf_obs, atol=atol)
def test_standardized_forecasts_error():
    """
    Simple test that standardized forecasts errors are calculated correctly.

    Just uses a different calculation method on a univariate series.
    """

    # Get the dataset
    true = results_kalman_filter.uc_uni
    data = pd.DataFrame(
        true['data'],
        index=pd.date_range('1947-01-01', '1995-07-01', freq='QS'),
        columns=['GDP']
    )
    data['lgdp'] = np.log(data['GDP'])

    # Fit an ARIMA(1,1,0) to log GDP
    mod = SARIMAX(data['lgdp'], order=(1,1,0))
    res = mod.fit(disp=-1)

    standardized_forecasts_error = (
        res.filter_results.forecasts_error[0] /
        np.sqrt(res.filter_results.forecasts_error_cov[0,0])
    )

    assert_allclose(
        res.filter_results.standardized_forecasts_error[0],
        standardized_forecasts_error,
    )
def test_innovations_algo_direct_filter_kalman_filter(ar_params, ma_params,
                                                      sigma2):
    # Test the innovations algorithm and filter against the Kalman filter
    # for exact likelihood evaluation of an ARMA process, using the direct
    # function.

    endog = np.random.normal(size=10)

    # Innovations algorithm approach
    u, r = arma_innovations.arma_innovations(endog, ar_params, ma_params,
                                             sigma2)

    v = np.array(r) * sigma2
    u = np.array(u)

    llf_obs = -0.5 * u**2 / v - 0.5 * np.log(2 * np.pi * v)

    # Kalman filter apparoach
    mod = SARIMAX(endog, order=(len(ar_params), 0, len(ma_params)))
    res = mod.filter(np.r_[ar_params, ma_params, sigma2])

    # Test that the two approaches are identical
    assert_allclose(u, res.forecasts_error[0])
    # assert_allclose(theta[1:, 0], res.filter_results.kalman_gain[0, 0, :-1])
    assert_allclose(llf_obs, res.llf_obs)

    # Get llf_obs directly
    llf_obs2 = _arma_innovations.darma_loglikeobs_fast(
        endog, ar_params, ma_params, sigma2)

    assert_allclose(llf_obs2, res.llf_obs)
def test_integrated_process(ar_params, diff, ma_params, sigma2):
    # Test loglikelihood computation when model has integration

    nobs = 100

    endog = np.cumsum(np.random.normal(size=nobs))

    # Innovations algorithm approach
    llf_obs = arma_innovations.arma_loglikeobs(
        np.diff(endog, diff), ar_params, ma_params, sigma2)

    # Kalman filter apparoach
    mod = SARIMAX(endog, order=(len(ar_params), diff, len(ma_params)),
                  simple_differencing=True)
    res = mod.filter(np.r_[ar_params, ma_params, sigma2])

    # Test that the two approaches are identical
    assert_allclose(llf_obs, res.llf_obs)
示例#5
0
def sarimax_eval(train_df,
                 test_df,
                 train_column,
                 test_column,
                 start,
                 end,
                 p,
                 d,
                 q,
                 S,
                 P=0,
                 D=0,
                 Q=0):
    sarima = SARIMAX(
        endog=train_column,
        order=(p, d, q),  # (p, d, q)
        seasonal_order=(P, D, Q, S),
        enforce_stationarity=False,
        enforce_invertibility=False)  # (P, D, Q, S))

    # Fit SARIMA model.
    model = sarima.fit()

    # Generate predictions based on test set.
    preds = model.predict(start=start, end=end)

    # Evaluate predictions.
    mae = mean_absolute_error(test_column, preds)
    aic = model.aic
    #creating a parameter dictionary to be used for evaluating model
    parameters = {
        'mae': mae,
        'AIC': aic,
        'p': p,
        'd': d,
        'q': q,
        'P': P,
        'D': D,
        'Q': Q,
        'S': S
    }

    return parameters
示例#6
0
def test_small_sample_serial_correlation_test():
    # Test the Ljung Box serial correlation test for small samples with df
    # adjustment using the Nile dataset. Ljung-Box statistic and p-value
    # are compared to R's Arima() and checkresiduals() functions in forecast
    # package:
    # library(forecast)
    # fit <- Arima(y, order=c(1,0,1), include.constant=FALSE)
    # checkresiduals(fit, lag=10)
    from statsmodels.tsa.statespace.sarimax import SARIMAX
    niledata = nile.data.load_pandas().data
    niledata.index = pd.date_range('1871-01-01', '1970-01-01', freq='AS')
    mod = SARIMAX(
        endog=niledata['volume'], order=(1, 0, 1), trend='n',
        freq=niledata.index.freq)
    res = mod.fit()

    actual = res.test_serial_correlation(
        method='ljungbox', lags=10, df_adjust=True)[0, :, -1]
    assert_allclose(actual, [14.116, 0.0788], atol=1e-3)
    def sarimax_model_fit(self, x_train, y_train, df_time):#, y_test, x_test, df_test):

        # x_test.index = df_test
        # y_test.index = df_test

        x_train.index = df_time
        y_train.index = df_time

        model = SARIMAX(y_train, exog=x_train, order=(0, 1, 0), seasonal_order=(0, 0, 0, 0))
        model_fit = model.fit(disp=-1)
        print(model_fit.summary())

        # fc = model_fit.forecast(y_test.shape[0], exog = x_test)
        # fc.index = x_test.index
        # plt.plot(y_test, label='actual')
        # plt.plot(fc, label='forecast')
        # plt.legend(loc='upper left', fontsize=8)
        # st.pyplot()
        return model_fit
示例#8
0
def test_integrated_process(ar_params, diff, ma_params, sigma2):
    # Test loglikelihood computation when model has integration

    nobs = 100

    endog = np.cumsum(np.random.normal(size=nobs))

    # Innovations algorithm approach
    llf_obs = arma_innovations.arma_loglikeobs(np.diff(endog, diff), ar_params,
                                               ma_params, sigma2)

    # Kalman filter apparoach
    mod = SARIMAX(endog,
                  order=(len(ar_params), diff, len(ma_params)),
                  simple_differencing=True)
    res = mod.filter(np.r_[ar_params, ma_params, sigma2])

    # Test that the two approaches are identical
    assert_allclose(llf_obs, res.llf_obs)
 def order(self):
     from statsmodels.tsa.statespace.sarimax import SARIMAX
     from itertools import product
     p = d = q = range(0, 2)
     pdq = list(product(p, d, q))
     seasonal_pdq = [(x[0], x[1], x[2], 30) for x in list(product(p, d, q))]
     for param in pdq:
         for param_seasonal in seasonal_pdq:
             try:
                 mod = SARIMAX(self.original.Mean,
                               order=param,
                               seasonal_order=param_seasonal,
                               enforce_stationarity=False,
                               enforce_invertibility=False)
                 results = mod.fit()
                 print('ARIMA{}x{} - AIC:{}'.format(param, param_seasonal,
                                                    results.aic))
             except:
                 continue
示例#10
0
def seasonalAutoregressiveIntegratedMovingAverage2(day):
    col_daily = db['daily']
    dailyGrossSet = []
    for record in col_daily.find({"Year": 2018}):
        year = record['Year']
        movieNumber = record['MoviesTracked']
        gross = record['Gross($)'].replace(",", "")
        dailyGrossSet.append(int(gross) / int(movieNumber))
    print(dailyGrossSet[day])
    dailyGrossSet = dailyGrossSet[0:day]
    print(dailyGrossSet)
    # fit model
    model = SARIMAX(dailyGrossSet,
                    order=(1, 1, 1),
                    seasonal_order=(1, 1, 1, 1))
    model_fit = model.fit(disp=False)
    # make prediction
    yhat = model_fit.predict(len(dailyGrossSet), len(dailyGrossSet))
    print(yhat)
示例#11
0
def sarimax_forecast(train_data, sent_data, valid_sent_data, config):
    '''
    Returns a sarimax prediction, same as sarima with the addition of the
    exogenous reddit data
    '''
    order, sorder, trend = config
    # Pull out configuraton terms

    # fit model
    model = SARIMAX(endog=train_data,
                    exog=sent_data,
                    order=order,
                    seasonal_order=sorder,
                    trend=trend,
                    enforce_stationarity=False,
                    enforce_invertibility=False)
    # make one step prediction
    prediction = model.fit(disp=0).forecast(exog=valid_sent_data)[0]
    return prediction
    def test_02(self):
        data = pd.read_csv("nyoka/tests/JohnsonJohnsonWithDate.csv")
        data['index'] = pd.to_datetime(data['index'], format='%Y-%m-%d')
        data.set_index(['index'], inplace=True)

        mod = SARIMAX(data, order=(1, 0, 0), seasonal_order=(1, 0, 0, 4))
        result = mod.fit(disp=False)

        ArimaToPMML(result, 'jnj_seasonal_arima.pmml')
        model_name = self.adapaUtilities.upload_to_zserver(
            'jnj_seasonal_arima.pmml')
        z_pred = self.adapaUtilities.score_single_record(model_name)
        model_pred = result.forecast()[0]
        self.assertEqual(model_pred, z_pred['predicted_value'])

        z_pred = self.adapa_utility.score_in_zserver(
            model_name, 'nyoka/tests/test_jnj.csv', 'TS')
        model_pred = result.forecast(5)[-1]
        self.assertEqual(model_pred, z_pred)
示例#13
0
def sarimax_forecast(train_data, sent_data, valid_sent_data, config):
    '''
    Returns a sarimax prediction
    '''
    order, sorder, trend = tuple(config[0])
    # Pull out configuraton terms

    # fit model
    model = SARIMAX(
        endog=train_data,
        # exog=sent_data,
        order=order,
        seasonal_order=sorder,
        trend=trend,
        enforce_stationarity=False,
        enforce_invertibility=False)
    # make one step prediction
    prediction = model.fit(disp=0).forecast()[0]
    return prediction
def SARIMA_Forecast(data, config):

    #order:  A tuple p, d, and q parameters for the modeling of the trend.
    # sesonal_order: A tuple of P, D, Q, and m parameters for the modeling the seasonality
    # trend: A parameter for controlling a model of the deterministic trend as one of ‘n’,’c’,’t’,’ct’ for no trend, constant, linear, and constant with linear trend, respectively.

    order, sorder, trend = config
    # define model
    model = SARIMAX(data,
                    order=order,
                    seasonal_order=sorder,
                    trend=trend,
                    enforce_stationarity=False,
                    enforce_invertibility=False)
    # fit model
    model_fit = model.fit(disp=False)
    # make one step forecast
    forecast = model_fit.get_forecast()
    return forecast.predicted_mean, forecast.se_mean
示例#15
0
class Sarimax:
    def __init__(self, df, cfg):
        self.series = df[cfg['target_feature']]
        self.model = SARIMAX(self.series,
                             order=(3, 1, 0),
                             seasonal_order=(0, 0, 0, 12))

    def fit_model(self):
        # Fit model
        self.model = self.model.fit(disp=0)
        print(self.model.summary())

    def plot_autocorrelation(self):
        # Plot auto correlation
        autocorrelation_plot(self.series)
        plt.show()

    def predict_arima(self, series):
        return self.model.predict(series)
示例#16
0
 def test_seasonal_arima1(self):
     ts_data = self.statsmodels_data_helper.getData5()
     f_name = 'seasonal_arima1.pmml'
     model = SARIMAX(endog=ts_data,
                     exog=None,
                     order=(3, 1, 1),
                     seasonal_order=(3, 1, 1, 12),
                     trend='t',
                     measurement_error=True,
                     time_varying_regression=True,
                     mle_regression=False,
                     simple_differencing=True,
                     enforce_stationarity=False,
                     enforce_invertibility=False,
                     hamilton_representation=True,
                     concentrate_scale=False)
     result = model.fit()
     ArimaToPMML(result, f_name)
     self.assertEqual(self.schema.is_valid(f_name), True)
def grid_search_sarima_param(data, S = shift_2, print_params = False):
    """ Grid search for SARIMA optimal pdq and 
    seasonal PDQ parameters """

    S = S
    p = d = q = range(0,2)
    pdq = list(itertools.product(p,d,q))
    seasonal_PDQ = [(x[0], x[1], x[2], S) for x in pdq]

    warnings.filterwarnings("ignore") # specify to ignore warning messages

    min_rmse = 10000
    
    for param in pdq:
        for param_seasonal in seasonal_PDQ:
            try:
                model = SARIMAX(data,
                            order=param,
                            seasonal_order=param_seasonal,
                            enforce_stationarity=False,
                            enforce_invertibility=False)

                results = model.fit()

                k = len(test_data)
                forecast = results.forecast(k)
                forecast = np.exp(forecast)

                rmse = np.sqrt(sum((forecast-test_data['Airpass'])**2)/len(test_data))

                if rmse < min_rmse :
                    min_rmse = round(rmse,2)
                    optimal_aic = round(results.aic,2)
                    optimal_pdq = param
                    optimal_seasonal_pdq = param_seasonal

            except:
                continue

    if print_params:
        print('SARIMA{}x{} - AIC:{} - RMSE:{}'.format(optimal_pdq, optimal_seasonal_pdq, optimal_aic, min_rmse))

    return optimal_pdq, optimal_seasonal_pdq, optimal_aic, min_rmse
示例#18
0
def train_test_predict():
    for i in range(len(indices)):

        print(
            f'-------------------Now analyzing {indices[i]} -------------------'
        )
        # call AA to identify optional params and return fitted model
        stepwise_fit = AA(transformed_system_forecasts[indices[i]],
                          start_p=1,
                          start_q=1,
                          max_p=3,
                          max_q=3,
                          m=12,
                          start_P=0,
                          seasonal=True,
                          d=None,
                          D=1,
                          trace=True,
                          error_action='ignore',
                          suppress_warnings=True,
                          stepwise=True)

        # splitting training and testing datasets (1 full year for testing)
        # train = transformed_system_forecasts[indices[i]].iloc[:len(transformed_system_forecasts)-12]
        # test = transformed_system_forecasts[indices[i]].iloc[len(transformed_system_forecasts)-12:,i]
        train = transformed_system_forecasts.iloc[[0, 12], [
            i, i
        ]]  # selecting all but first 12 elements from ith column (time reversed)
        #test = transformed_system_forecasts.iloc[len(12:0:,i]
        #print(transformed_system_forecasts[indices[i]])
        print(train)
        #print(train[::-1])
        #print(test[::-1])
        #print(train.iloc[:,0].values)
        #try:

        model = SARIMAX(
            train,
            order=stepwise_fit.get_params()['order'],
            seasonal_order=stepwise_fit.get_params()['seasonal_order'])

        result = model.fit()
        result.summary()
示例#19
0
文件: arima.py 项目: raisulru/MMHE
    def get(self, request, *args, **kwargs):
        n_steps = int(self.request.query_params.get('nsteps', 10))

        last_date = UnivarientData.objects.latest(
            'date').date + datetime.timedelta(days=30)

        data = read_frame(UnivarientData.objects.all())
        data['date'] = pd.to_datetime(data['date'])
        data = data.drop('id', axis=1)
        data = data.set_index('date')
        arima = SARIMAX(data, order=(1, 0, 2), freq='M', seasonal_order=(1, 2, 1, 6),
                        enforce_stationarity=False, enforce_invertibility=False, ).fit()
        date_index = pd.date_range(start=last_date, periods=n_steps, freq='M')
        data = pd.DataFrame()
        data['prediction'] = arima.predict(date_index.min(), date_index.max())
        data['date'] = date_index
        data['date'] = data['date']
        predicted_data = data[['date', 'prediction']].values.tolist()
        return Response({'predicted_data': predicted_data})
示例#20
0
    def predict_product(self, product_id):
        """
        Receives a product id and predicts
        """
        product_ts = self.__get_product_ts(product_id)

        model = SARIMAX(product_ts, order=(0,1,2),
                        time_varying_regression=True,
                        mle_regression=False,
                        trend='n',
                        seasonal_order=(1,1,1,11)).fit()
        steps = PREDICTION_TIME * 4
        forecast = model.get_forecast(steps=steps, dynamic=True)
        history = product_ts[(product_ts.index > "2015") & (product_ts.index < "2016")]
        history = history.fillna(0)
        # Output
        predicted_mean = forecast.predicted_mean
        conf_int = forecast.conf_int()
        return np.exp(history), np.exp(predicted_mean), np.exp(conf_int)
    def __train_model(self,
                      series,
                      order,
                      seasonal_order,
                      exogenous=None,
                      max_iterations=50):
        """
        Trains the ARIMA family of model and returns the best model and fit
        :param series: time series to train model on
        :param order: (p, d, q)
        :param seasonal_order: (P, D, Q, m)
        :param exogenous: exogenous variables array
        :return: model, fit, score
        """
        model, fit, score = None, None, None
        trend = "n" if self.__drift == 0 else "c"
        try:
            if not self.__seasonal:
                model = SARIMAX(series,
                                exog=exogenous,
                                order=order,
                                trend=trend,
                                enforce_stationarity=False,
                                enforce_invertibility=False)
            else:
                model = SARIMAX(series,
                                exog=exogenous,
                                order=order,
                                seasonal_order=seasonal_order,
                                trend=trend,
                                enforce_stationarity=False,
                                enforce_invertibility=False)
            fit = model.fit(maxiter=max_iterations, disp=0)
            score = fit.aic
            print(
                "Order : " + str(order), ", Seasonal Order : " +
                str(seasonal_order) + ", AIC Score : " + str(score))
        except (ValueError, LinAlgError) as error:
            model, fit, score = None, None, None
            print(error)

        return model, fit, score
def arima_best(fh, train, val, p_range, d_range, q_range, loss_metric="MSE"):
    '''
    fh : int. Forecast horizon. While validation set can be longer than
            the forecast horizon, only the fh portion of the validation set
            will be used to calculate score/loss, instead of forecasting the
            entire length of the validation set. This is to keep consistent with
            the actual use purpose of the model which will be to predict only
            the selected forecast horizon.
    p_range: tuple of 2
    d_range: tuple of 2
    q_range: tuple of 2
    '''
    # Hyperparameters tunning
    #print("Tuning p, d, q:")
    #print("-"*50)
    # true values to be scored again
    true = val[:fh]
    min_loss = float("inf")
    best_model = None
    best_p = best_d = best_q = None
    for p in range(*p_range):
        for d in range(*d_range):
            for q in range(*q_range):
                model = SARIMAX(train,
                                order=(p, d, q),
                                seasonal_order=(4, 1, 2, 8),
                                enforce_stationarity=False,
                                enforce_invertibility=False,
                                trend=None).fit(maxiter=100, method="powell")
                # make prediction
                predictions = model.forecast(fh)
                loss = loss_func(loss_metric, tensor=False)(true, predictions)
                if loss < min_loss:
                    min_loss = loss
                    best_model = model
                    best_p = p
                    best_d = d
                    best_q = q
                    #print(f"{p}, {d}, {q}: Validation {loss_metric} ", round(min_loss, 4), end="\r")
    #print("-"*50)
    #return (best_p, best_d, best_q)
    return best_model, (best_p, best_d, best_q)
示例#23
0
def build_model(series, p, d, q, S, exog_data, P=None, D=None, Q=None):
    """
    Function to build SARIMAX model

    inputs:

        series = name of the series in the dataframe; should be specified in the following
        df['series_name'], series = 'series_name'

        p,d,q for arima modeling

        S: seasonal lag

        P,D,Q for seasonal modeling

        p,P: autoregressive components

        d,D: differencing components

        q,Q: moving average of error term components

        exog_data = matrix of exogenous variables

    default mode sets seasonal P, D, Q = p,d,Q

    Output;

    SARIMAX model results
    """
    if P is None:
        P = p
    if D is None:
        D = d
    if Q is None:
        Q = q
    model = SARIMAX(series,
                    order=(p, d, q),
                    seasonal_order=(P, D, Q, S),
                    exog=exog_data,
                    enforce_invertibility=True)
    results = model.fit()
    return results
示例#24
0
    def _getNextObs(self):
        """
            :return:    ????
        """

        # print('> In _getNextObs ')

        # Isolate important features
        features = self.stacionaryDf[self.stacionaryDf.columns.difference(['index', 'Date'])]

        # selected what we're gonna use
        scaled = features[:self.iterator + 1].values
        # remove infinites
        scaled[abs(scaled) == inf] = 0
        # Normalize
        scaled = self.scaler.fit_transform(scaled.astype('float32'))
        # to Df
        scaled = pd.DataFrame(scaled, columns=features.columns)

        # Predict next values
        pastDf = self.stacionaryDf['Close'][:self.iterator + 1]
        forecast_model = SARIMAX(pastDf.values, enforce_stationarity=False, simple_differencing=True)
        model_fit = forecast_model.fit(method='bfgs', disp=False)
        forecast = model_fit.get_forecast(
            steps=self.forecastLength, alpha=(1 - self.confidenceInterval))

        # ??? ??? ???
        obs = scaled.values[-1]                                                     # len 44
        obs = np.insert(obs, len(obs), forecast.predicted_mean, axis=0)             # Appends 10
        obs = np.insert(obs, len(obs), forecast.conf_int().flatten(), axis=0)       # Appends 20

        scaled_history = self.scaler.fit_transform(
            self.accountHistory.astype('float32'))

        obs = np.insert(obs, len(obs), scaled_history[:, -1], axis=0)

        obs = np.reshape(obs.astype('float16'), self.obsShape)
        obs[np.bitwise_not(np.isfinite(obs))] = 0

        # print('> Finished getNextObs ')

        return obs
示例#25
0
    def _update(self, y, X=None):
        """
        Internal update of forecasts using new data via Kalman smoothing/filtering of
        forecasts obtained from previously fitted forecaster.

        Parameters
        ----------
        y : pandas.Series
            Updated time series which to use for updating the previously fitted forecaster.
        X : pandas.DataFrame, shape=[n_obs, n_vars], optional (default=None)
            An optional 2-d dataframe of exogenous variables. If provided, these
            variables are used as additional features in the regression
            operation. This should not include a constant or trend. Note that
            if an ``ARIMA`` is fit on exogenous features, it must also be provided
            exogenous features for making predictions.

        Returns
        -------
        self : An instance of self
        """
        # TODO for updating see https://github.com/statsmodels/statsmodels/issues/2788 and
        #  https://github.com/statsmodels/statsmodels/issues/3318

        # unnest series
        # unnest series
        y = self._prepare_y(y)
        X = self._prepare_X(X)

        # Update estimator.
        estimator = SARIMAX(y,
                            exog=X,
                            order=self.order,
                            seasonal_order=self.seasonal_order,
                            trend=self.trend,
                            enforce_stationarity=self.enforce_stationarity,
                            enforce_invertibility=self.enforce_invertibility)
        estimator.initialize_known(self._fitted_estimator.predicted_state[:, -1],
                                   self._fitted_estimator.predicted_state_cov[:, :, -1])

        # Filter given fitted parameters.
        self._updated_estimator = estimator.smooth(self._fitted_estimator.params)
        return self
示例#26
0
def Auto_Arima(df,dirloc,filename):
    import itertools
    from statsmodels.tsa.statespace.sarimax import SARIMAX
    
    p=d=q=range(0,3)
    pdq = list(itertools.product(p,d,q))
    seas_decomp=[]
    for x in pdq:
        x1=(x[0],x[1],x[2],12)
        seas_decomp.append(x1)
    print("Computating AIC of Different Sesonal ARIMA.....\n")
    arima_order=[]
    seas_order=[]
    aic_val=[]
    
    for params in pdq:
        for seas_par in seas_decomp:
            mod = SARIMAX(df,order=params,seasonal_order=seas_par,enforce_stationarity=False, enforce_invertibility=False,freq="MS").fit()
            arima_order.append(params)
            seas_order.append(seas_par)
            aic_val.append(round(mod.aic,2))
            print("SARIMA: {} X {} | AIC = {}".format(params,seas_par,round(mod.aic,2)))
            
    results = pd.DataFrame({"ARIMA Order":arima_order,"Seasonal Order":seas_order,"AIC Value":aic_val}) 
    results_sorted = results.sort_values(by="AIC Value",ascending=True)
    results_sorted=results_sorted.reset_index(drop=True)
    print("Selected SARIMA Order:",results_sorted.head(2))
    
    final_model = SARIMAX(df,order=results_sorted["ARIMA Order"][0],seasona_order=results_sorted["Seasonal Order"][0],enforce_stationarity=False, enforce_invertibility=False,freq="MS").fit()
    print("Final Model Result Summary {}".format(final_model.summary()))
    print(results_sorted["ARIMA Order"][0])
    print(results_sorted["Seasonal Order"][0])
    predictions = final_model.predict(start=dt.datetime.strptime("2020-06-01","%Y-%m-%d"),end=dt.datetime.strptime("2020-12-01","%Y-%m-%d"))
    print("Average Monthly WTI Crude Oil Spot Price from June to Dec 2020:")
    print(predictions)
    with open(os.path.join(dirloc[:-5],outputfile),"a") as f:
         f.write("Simulation Result of SARIMA....\n")
         f.write(str(results_sorted))
         f.write("\n")
         f.write(str(predictions))
    f.close()
    return results_sorted
示例#27
0
def graph_full_model_forecast(dataframe,
                              target_column,
                              exog_forecast,
                              df_ref,
                              alpha=.05,
                              days_to_forecast=30,
                              train_days=270,
                              m_periods=1,
                              exogenous_column=None,
                              state_postal_code=None):
    '''
    summary function whose purpose is to graph a target_column's forecast
    '''
    if exogenous_column is not None:
        stepwise_fit, df_forecast = get_exogenous_forecast_dataframe(
            dataframe=dataframe,
            original_dataframe=df_ref,
            exog_forecast=exog_forecast,
            target_column=target_column,
            exogenous_column=exogenous_column,
            days_to_forecast=days_to_forecast,
            m_periods=m_periods)

    full_exog_model = SARIMAX(dataframe[target_column],
                              dataframe[exogenous_column],
                              order=stepwise_fit.order,
                              seasonal_order=stepwise_fit.seasonal_order)

    model = full_exog_model.fit()

    exog_forecast, forecast_object = build_SARIMAX_forecast(
        model=model,
        dataframe=df_forecast,
        target_column=target_column,
        stepwise_fit=stepwise_fit,
        alpha=alpha,
        days_to_forecast=days_to_forecast,
        original_df=df_ref,
        exogenous_column=exogenous_column,
        state_postal_code=state_postal_code)

    return forecast_object
def arima_evaluate(model, test, fh=8, refit=pd.Series(), metric=MAPE):
    '''
    model : SARIMAX model.
    test : pd Time series. Test data set.
    fh : int. Forecast horizon.
    refit : pd Time series. New time series data to refit the model on.
    '''
    if not refit.empty:
        params = model.params  # store previous parameters
        p_d_q = (model.model.k_ar_params, model.model.k_diff,
                 model.model.k_ma_params)
        model = SARIMAX(refit,
                        order=p_d_q,
                        enforce_stationarity=False,
                        enforce_invertibility=False,
                        trend=None).fit(params, maxiter=1000)
    pred = model.forecast(steps=fh)  # Forcast value
    true = test[:fh]  # true values
    loss = metric(pred.array, true.array)
    return pred, true, loss
示例#29
0
def sarimax_predictor(train_user: list, train_match: list,
                      test_match: list) -> float:
    """
    second method: Sarimax
    sarimax is a statistic method which using previous input
    and learn its pattern to predict future data
    input : training data (total_user, with exog data = total_event) in list of float
    output : list of total user prediction in float
    >>> sarimax_predictor([4,2,6,8], [3,1,2,4], [2])
    6.6666671111109626
    """
    order = (1, 2, 1)
    seasonal_order = (1, 1, 0, 7)
    model = SARIMAX(train_user,
                    exog=train_match,
                    order=order,
                    seasonal_order=seasonal_order)
    model_fit = model.fit(disp=False, maxiter=600, method="nm")
    result = model_fit.predict(1, len(test_match), exog=[test_match])
    return result[0]
示例#30
0
def param_heatmap(ts, limit_p, limit_q, itr, s=0):
    aics = np.zeros((limit_p, limit_q))
    aiccs = np.zeros((limit_p, limit_q))
    bics = np.zeros((limit_p, limit_q))
    for i in range(limit_p):
        for j in range(limit_q):
            if s == 0:
                model = SARIMAX(ts,
                                order=(i, itr, j),
                                initialization="approximate_diffuse")
            else:
                model = SARIMAX(ts,
                                seasonal_order=(i, itr, j, s),
                                initialization="approximate_diffuse")
            model_fit = model.fit(disp=0)
            aics[i, j] = model_fit.aic
            aiccs[i, j] = model_fit.aicc
            bics[i, j] = model_fit.bic
    heatmaps = {'aic': aics, 'aicc': aiccs, 'bic': bics}
    return heatmaps
def sarima_forecast(history, config):
    """
    This function forecast one step using SARIMAX model. From the statsmodels page:
    - order -> represented by the parametrs p, d, q for the model of the trend
    - seasonal_order -> represented by the parameters (P, D, Q)
    - trend -> to control the model deterministic trend (no trend 'n', 'c' constant, 't' linear, 'ct' constant with linear trend)
    """
    order, sorder, trend = config
    # define model
    model = SARIMAX(history,
                    order=order,
                    seasonal_order=sorder,
                    trend=trend,
                    enforce_stationarity=False,
                    enforce_invertibility=False)
    # fit model
    model_fit = model.fit(disp=False)
    # make one-step forecast
    yhat = model_fit.predict(len(history), len(history))
    return yhat[0]
示例#32
0
class SARIMAModel(SMModel):
    type = [ModelType.CONTINUOUS_PRICE, ModelType.UNIVARIATE]
    name = 'statsmodels.arima'
    default_params = {'order': (1, 1, 1)}

    @with_params
    def fit(self, x, **kwargs):
        params = kwargs.get('params')
        try:
            self.model = SARIMAX(x, order=params['order']) \
                    .fit(disp=params.get('disp',0))
            return self.model
        except (ValueError, np.linalg.linalg.LinAlgError):
            logger.error('ARIMA convergence error (order {} {} {})'.format(
                params['order'][0], params['order'][1], params['order'][2]))
            return None

    def predict(self, x, **kwargs):
        if not self.model:
            return None
        try:
            forecast = self.model.forecast(steps=x.shape[0])
            return to_discrete_double(forecast, -0.01, 0.01)
        except (ValueError, np.linalg.linalg.LinAlgError):
            logger.error('ARIMA convergence error (order {} {} {})'.format(
                self.params['order'][0], self.params['order'][1],
                self.params['order'][2]))

    @with_x
    def get_grid_search_configs(self, **kwargs):
        x_train = kwargs.get('x_train')
        x_test = kwargs.get('x_test')

        p_values = range(0, 6)
        d_values = range(0, 6)
        q_values = range(0, 6)
        # If series is stationary, don't apply differentiation
        adf = adfuller(x_train)  # 0 is score, 1 is pvalue
        if adf[1] < 0.05:  # Null hp rejected, series is stationary and requires no differentiation
            logger.info('Series is stationary, no need for differencing')
            d_values = [0]  # Set d = 0
        # Get all possible configs
        configs = []
        for p in p_values:
            for d in d_values:
                for q in q_values:
                    configs.append({
                        'params': {
                            'order': (p, d, q)
                        },
                        'x_train': x_train,
                        'x_test': x_test
                    })
        return configs
示例#33
0
文件: _ARIMAX.py 项目: ja4343/JAModel
    def train(self, features=None):
        """
        Train the model on a chosen set of features. If none are chosen, the default is to re run the model with the current best_features attribute. Note that the training is carried out on the training data, X_train, only. To access the result, use:

        Args:
            features : list - train model with list of desired features

        Returns:
        """
        if not isinstance(self.get_data('X_train'), pd.DataFrame):
            raise TypeError("ERROR: The input training data was not in the form of a pd.DataFrame.")
        print(' ')
        print("Training - ARIMAX")
        print("=================")
        print(" ")
        print("Running ARIMAX model on feauture set:")
        print(" ")
        if features == None:
            features = self.get_best_features()
        pprint.pprint(features)
        print(" ")
        self._current_features = features
        X_train_data = self.get_data('X_train')
        X_val_data = self.get_data('X_val')
        X_test_data = self.get_data('X_test')
        Y_train_data = self.get_data('Y_train')
        Y_val_data = self.get_data('Y_val')
        Y_test_data = self.get_data('Y_test')
        X_train_data_temp = X_train_data[features]
        X_val_data_temp = X_val_data[features]
        X_test_data_temp = X_test_data[features]
        model = SARIMAX(endog=pd.concat([Y_train_data, Y_val_data]), exog=pd.concat([X_train_data_temp, X_val_data_temp]), order=(self.p,self.d,self.q))
        model_fit = model.fit(disp=0)
        self._model = model_fit
        Y_test_pred = model_fit.forecast(len(Y_test_data), exog = np.array(X_test_data_temp).reshape(len(Y_test_data), len(X_test_data_temp.columns)))
        final_rmse_test = _test_metric(Y_test_data, Y_test_pred, 'rmse')
        self._test_error = final_rmse_test
        print(' ')
        print('The RMSE on the test set was: ', final_rmse_test[0])
        print('The mean percentage error is: ', final_rmse_test[1], '%.')
        print('\nFinished training. To access the most recent classifier, call get_model()')
示例#34
0
def process_data2():
    series = pd.read_excel('../../Data/Styrene-Net Industry Average 2010-2015.xlsx', header=0,
                           index_col=0, parse_dates=True)
    series.index.freq = 'MS'

    data = series.copy()

    actuals = pd.read_excel('../../Data/Styrene-Net Industry Average 2015-2018 Actuals.xlsx',
                            header=0, index_col=0, parse_dates=True)

    actuals.index.freq = 'MS'

    #Test ranges
    data = data['2010-01-01':]

    model = SARIMAX(np.log(data['Styrene']), order=(1,1,1), enforce_invertibility = False, exog = data[['Oil_Lag', 'Gas_Lag']]).fit()

    #auto_arima(data['Styrene'], seasonal=True, m=12, enforce_invertibility = False, exog = data[['Oil_Lag']]).summary()

    preds = []

    for i in actuals.index:
        df = actuals.loc[i,:]
        df = pd.DataFrame(df).T
        fd = pd.DataFrame(data = [df['Oil_Lag'], df['Gas_Lag']])
        fd.set_index = i+1
        fd = pd.DataFrame(fd).T
        df = pd.concat([df, fd])
        yhat_log = model.forecast(steps = 2, exog = df[['Oil_Lag', 'Gas_Lag']])
        yhat_log = yhat_log[[1]]
        yhat = numpy.exp(yhat_log)
        preds.append(yhat)
        act = pd.Series(actuals.loc[i,:])
        act = pd.DataFrame(act).T
        data = pd.concat([data, act], axis = 0)
        model = SARIMAX(np.log(data['Styrene']), order=(1,1,1), enforce_invertibility = False, exog = data[['Oil_Lag', 'Gas_Lag']]).fit()

    df = pd.DataFrame({'timestamp': [i.index for i in preds], 'value':[round(i[0],2) for i in preds]})
    df['timestamp'] = df.timestamp.apply(lambda x: str(x).split('[')[1].split(']')[0])
    df['timestamp'] = pd.to_datetime(df['timestamp'])
    df.to_csv('../../Data/Results.csv', index = False)
def test_innovations_algo_filter_kalman_filter(ar_params, ma_params, sigma2):
    # Test the innovations algorithm and filter against the Kalman filter
    # for exact likelihood evaluation of an ARMA process

    ar = np.r_[1, -ar_params]
    ma = np.r_[1, ma_params]

    endog = np.random.normal(size=10)
    nobs = len(endog)

    # Innovations algorithm approach
    arma_process_acovf = arma_acovf(ar, ma, nobs=nobs, sigma2=sigma2)
    acovf, acovf2 = np.array(_arma_innovations.darma_transformed_acovf_fast(
                     ar, ma, arma_process_acovf / sigma2))
    theta, r = _arma_innovations.darma_innovations_algo_fast(
        nobs, ar_params, ma_params, acovf, acovf2)
    u = _arma_innovations.darma_innovations_filter(endog, ar_params, ma_params,
                                                   theta)

    v = np.array(r) * sigma2
    u = np.array(u)

    llf_obs = -0.5 * u**2 / v - 0.5 * np.log(2 * np.pi * v)

    # Kalman filter apparoach
    mod = SARIMAX(endog, order=(len(ar_params), 0, len(ma_params)))
    res = mod.filter(np.r_[ar_params, ma_params, sigma2])

    # Test that the two approaches are identical
    assert_allclose(u, res.forecasts_error[0])
    # assert_allclose(theta[1:, 0], res.filter_results.kalman_gain[0, 0, :-1])
    assert_allclose(llf_obs, res.llf_obs)

    # Get llf_obs directly
    llf_obs2 = _arma_innovations.darma_loglikeobs_fast(
        endog, ar_params, ma_params, sigma2)

    assert_allclose(llf_obs2, res.llf_obs)
def test_innovations_algo_filter_kalman_filter(ar_params, ma_params, sigma2):
    # Test the innovations algorithm and filter against the Kalman filter
    # for exact likelihood evaluation of an ARMA process
    endog = np.random.normal(size=100)

    # Innovations algorithm approach
    llf = arma_innovations.arma_loglike(endog, ar_params, ma_params, sigma2)
    llf_obs = arma_innovations.arma_loglikeobs(endog, ar_params, ma_params,
                                               sigma2)
    score = arma_innovations.arma_score(endog, ar_params, ma_params, sigma2)
    score_obs = arma_innovations.arma_scoreobs(endog, ar_params, ma_params,
                                               sigma2)

    # Kalman filter apparoach
    mod = SARIMAX(endog, order=(len(ar_params), 0, len(ma_params)))
    params = np.r_[ar_params, ma_params, sigma2]

    # Test that the two approaches are the same
    assert_allclose(llf, mod.loglike(params))
    assert_allclose(llf_obs, mod.loglikeobs(params))
    # Note: the tolerance on the two gets worse as more nobs are added
    assert_allclose(score, mod.score(params), atol=1e-5)
    assert_allclose(score_obs, mod.score_obs(params), atol=1e-5)
def test_regression_with_arma_errors(ar_params, ma_params, sigma2):
    # Test loglikelihood computation when model has regressors
    nobs = 100

    eps = np.random.normal(nobs)
    exog = np.c_[np.ones(nobs), np.random.uniform(size=nobs)]
    beta = [5, -0.2]
    endog = np.dot(exog, beta) + eps

    # Innovations algorithm approach
    beta_hat = np.squeeze(np.linalg.pinv(exog).dot(endog))
    demeaned = endog - np.dot(exog, beta_hat)
    llf_obs = arma_innovations.arma_loglikeobs(
        demeaned, ar_params, ma_params, sigma2)

    # Kalman filter approach
    # (this works since we impose here that the regression coefficients are
    # beta_hat - in practice, the MLE estimates will not necessarily match
    # the OLS estimates beta_hat)
    mod = SARIMAX(endog, exog=exog, order=(len(ar_params), 0, len(ma_params)))
    res = mod.filter(np.r_[beta_hat, ar_params, ma_params, sigma2])

    # Test that the two approaches are identical
    assert_allclose(llf_obs, res.llf_obs)