Exemplo n.º 1
0
def diff_til_stationary(X):
    this_data = X
    num_diffs = 0
    while not is_stationary(this_data):
        num_diffs += 1
        this_data = diff(X)
    return (this_data, num_diffs)
def testIsDatasetStationary():

    seasonalARIMADataset = importSeasonalARIMADataset("co2_mm_mlo.csv")

    #order of p,d,q and P, D, Q is SARIMAX(0, 1, 1)x(2, 0, [1, 2], 12)
    #hence we take the first difference as d is 1 to check stationarity.
    seasonalARIMADataset["diff1"] = diff(seasonalARIMADataset["interpolated"],
                                         k_diff=1)

    agumentedDickeyFullerTest(seasonalARIMADataset["diff1"])
Exemplo n.º 3
0
 def DifferenceDataset(self,
                       dataset,
                       nonSeasonal=1,
                       seasonal=None,
                       seasonalPeriods=1):
     dataframe = diff(dataset,
                      k_diff=nonSeasonal,
                      k_seasonal_diff=seasonal,
                      seasonal_periods=seasonalPeriods)
     return dataframe
Exemplo n.º 4
0
def differencing():
    ticker = yf.Ticker("V")
    hist = ticker.history(start="2020-09-01", end="2020-10-3")
    ts = pd.Series(hist["Close"])
    differenced = diff(ts, k_diff=1)
    res = differenced / statistics.stdev(differenced)
    plt.plot(res)
    #sm.qqplot(res, line ='45')
    #plt.acorr(res, maxlags=30)
    plt.show()
Exemplo n.º 5
0
    def test_cases(self):
        # Basic cases
        for series, diff, seasonal_diff, seasonal_periods, result in self.cases:

            # Test numpy array
            x = tools.diff(series, diff, seasonal_diff, seasonal_periods)
            assert_almost_equal(x, result)

            # Test as Pandas Series
            series = pd.Series(series)

            # Rewrite to test as n-dimensional array
            series = np.c_[series, series]
            result = np.c_[result, result]

            # Test Numpy array
            x = tools.diff(series, diff, seasonal_diff, seasonal_periods)
            assert_almost_equal(x, result)

            # Test as Pandas Dataframe
            series = pd.DataFrame(series)
            x = tools.diff(series, diff, seasonal_diff, seasonal_periods)
            assert_almost_equal(x, result)
Exemplo n.º 6
0
    def test_cases(self):
        # Basic cases
        for series, diff, seasonal_diff, k_seasons, result in self.cases:
            
            # Test numpy array
            x = tools.diff(series, diff, seasonal_diff, k_seasons)
            assert_almost_equal(x, result)

            # Test as Pandas Series
            series = pd.Series(series)

            # Rewrite to test as n-dimensional array
            series = np.c_[series, series]
            result = np.c_[result, result]

            # Test Numpy array
            x = tools.diff(series, diff, seasonal_diff, k_seasons)
            assert_almost_equal(x, result)

            # Test as Pandas Dataframe
            series = pd.DataFrame(series)
            x = tools.diff(series, diff, seasonal_diff, k_seasons)
            assert_almost_equal(x, result)
Exemplo n.º 7
0
    def trend_dict(self, alpha1=.01, alpha2=.01):
        """returns the tickers that have a significant
         trend(drift) of the logprices (using ARIMA(1,1,0) model)

         1-alpha1 gives rejection region of H0:no trend
         1-alph2 gives confidence interval of trend """

        trend_dict = dict()
        for ticker in self.tickers:
            ser = self.series_dict[ticker].values
            diff = tools.diff(ser)
            if stattools.adfuller(diff)[1] > .01:
                continue

            mu, sig, n = np.mean(diff), np.std(diff, ddof=1), len(diff)
            dist0 = stats.norm(loc=0, scale=sig / n)
            rrc = dist0.interval(1 - alpha1)
            if mu < rrc[0] or mu > rrc[1]:
                dist1 = stats.norm(loc=mu, scale=sig / n)
                conf_int = dist1.interval(1 - alpha2)
                trend_dict[ticker] = conf_int

        return trend_dict
Exemplo n.º 8
0
def split_data(data: pd.DataFrame,
               example_test_data_ratio: float) -> Dict[str, Any]:
    """Node for splitting the data set into training and test
    sets.
    The split ratio parameter is taken from conf/project/parameters.yml.
    The data and the parameters will be loaded and provided to your function
    automatically when the pipeline is executed and it is time to run this node.
    """

    if data.empty or len(data) < 30:
        print('Data provided are too short!')
        # return dict(
        #             train_y=[],
        #             test_y=[],
        #             n=0,
        #         )
        raise ValueError('Data provided are too short!')

    uem = pd.Series(data=list(data['unempl_m']),
                    index=pd.date_range('1994-01-01',
                                        periods=len(data),
                                        freq='M')).dropna()

    uemd = diff(uem)

    uemd_train = uemd.iloc[:round(len(uemd) * (1 - example_test_data_ratio))]
    uemd_test = uemd.iloc[round(len(uemd) * example_test_data_ratio):]

    # When returning many variables, it is a good practice to give them names:
    return dict(
        #train_x=train_data_x,
        train_y=uemd_train,
        #test_x=test_data_x,
        test_y=uemd_test,
        n=len(uemd),
    )
Exemplo n.º 9
0
d = [0, 1]
pdq = list(itertools.product(p, d, q))
seasonal_pdq = [(x[0], x[1], x[2], 12)
                for x in list(itertools.product(p, d, q))]
print('Examples of parameter for SARIMA...')
print('SARIMAX: {} x {}'.format(pdq[1], seasonal_pdq[1]))
print('SARIMAX: {} x {}'.format(pdq[1], seasonal_pdq[2]))
print('SARIMAX: {} x {}'.format(pdq[2], seasonal_pdq[3]))
print('SARIMAX: {} x {}'.format(pdq[2], seasonal_pdq[4]))

rest_dict = {}

for param in pdq:
    for param_seasonal in seasonal_pdq:
        try:
            mod = SARIMAX(diff(y), order=param, seasonal_order=param_seasonal)
            results = mod.fit(maxiter=5, method='powell')
            #             print('ARIMA{}x{}12 - AIC:{}'.format(param,param_seasonal,results.aic))
            rest_dict[param] = {param_seasonal: results.aic}
        except:
            continue
print(rest_dict)

mod = SARIMAX(diff(y), order=(1, 0, 1), seasonal_order=(7, 1, 2, 12))

results = mod.fit(maxiter=100, method='powell')
print(results.summary().tables[1])

print(results)

results.plot_diagnostics(figsize=(18, 8))
Exemplo n.º 10
0
    def fit(self,
            start_params=None,
            transformed=True,
            includes_fixed=False,
            method=None,
            method_kwargs=None,
            gls=None,
            gls_kwargs=None,
            cov_type=None,
            cov_kwds=None,
            return_params=False,
            low_memory=False):
        """
        Fit (estimate) the parameters of the model.

        Parameters
        ----------
        start_params : array_like, optional
            Initial guess of the solution for the loglikelihood maximization.
            If None, the default is given by Model.start_params.
        transformed : bool, optional
            Whether or not `start_params` is already transformed. Default is
            True.
        includes_fixed : bool, optional
            If parameters were previously fixed with the `fix_params` method,
            this argument describes whether or not `start_params` also includes
            the fixed parameters, in addition to the free parameters. Default
            is False.
        method : str, optional
            The method used for estimating the parameters of the model. Valid
            options include 'statespace', 'innovations_mle', 'hannan_rissanen',
            'burg', 'innovations', and 'yule_walker'. Not all options are
            available for every specification (for example 'yule_walker' can
            only be used with AR(p) models).
        method_kwargs : dict, optional
            Arguments to pass to the fit function for the parameter estimator
            described by the `method` argument.
        gls : bool, optional
            Whether or not to use generalized least squares (GLS) to estimate
            regression effects. The default is False if `method='statespace'`
            and is True otherwise.
        gls_kwargs : dict, optional
            Arguments to pass to the GLS estimation fit method. Only applicable
            if GLS estimation is used (see `gls` argument for details).
        cov_type : str, optional
            The `cov_type` keyword governs the method for calculating the
            covariance matrix of parameter estimates. Can be one of:

            - 'opg' for the outer product of gradient estimator
            - 'oim' for the observed information matrix estimator, calculated
              using the method of Harvey (1989)
            - 'approx' for the observed information matrix estimator,
              calculated using a numerical approximation of the Hessian matrix.
            - 'robust' for an approximate (quasi-maximum likelihood) covariance
              matrix that may be valid even in the presence of some
              misspecifications. Intermediate calculations use the 'oim'
              method.
            - 'robust_approx' is the same as 'robust' except that the
              intermediate calculations use the 'approx' method.
            - 'none' for no covariance matrix calculation.

            Default is 'opg' unless memory conservation is used to avoid
            computing the loglikelihood values for each observation, in which
            case the default is 'oim'.
        cov_kwds : dict or None, optional
            A dictionary of arguments affecting covariance matrix computation.

            **opg, oim, approx, robust, robust_approx**

            - 'approx_complex_step' : bool, optional - If True, numerical
              approximations are computed using complex-step methods. If False,
              numerical approximations are computed using finite difference
              methods. Default is True.
            - 'approx_centered' : bool, optional - If True, numerical
              approximations computed using finite difference methods use a
              centered approximation. Default is False.
        return_params : bool, optional
            Whether or not to return only the array of maximizing parameters.
            Default is False.
        low_memory : bool, optional
            If set to True, techniques are applied to substantially reduce
            memory usage. If used, some features of the results object will
            not be available (including smoothed results and in-sample
            prediction), although out-of-sample forecasting is possible.
            Default is False.

        Returns
        -------
        ARIMAResults

        Examples
        --------
        >>> mod = sm.tsa.arima.ARIMA(endog, order=(1, 0, 0))
        >>> res = mod.fit()
        >>> print(res.summary())
        """
        # Determine which method to use
        # 1. If method is specified, make sure it is valid
        if method is not None:
            self._spec_arima.validate_estimator(method)
        # 2. Otherwise, use state space
        # TODO: may want to consider using innovations (MLE) if possible here,
        # (since in some cases it may be faster than state space), but it is
        # less tested.
        else:
            method = 'statespace'

        # Can only use fixed parameters with method='statespace'
        if self._has_fixed_params and method != 'statespace':
            raise ValueError('When parameters have been fixed, only the method'
                             ' "statespace" can be used; got "%s".' % method)

        # Handle kwargs related to the fit method
        if method_kwargs is None:
            method_kwargs = {}
        required_kwargs = []
        if method == 'statespace':
            required_kwargs = [
                'enforce_stationarity', 'enforce_invertibility',
                'concentrate_scale'
            ]
        elif method == 'innovations_mle':
            required_kwargs = ['enforce_invertibility']
        for name in required_kwargs:
            if name in method_kwargs:
                raise ValueError('Cannot override model level value for "%s"'
                                 ' when method="%s".' % (name, method))
            method_kwargs[name] = getattr(self, name)

        # Handle kwargs related to GLS estimation
        if gls_kwargs is None:
            gls_kwargs = {}

        # Handle starting parameters
        # TODO: maybe should have standard way of computing starting
        # parameters in this class?
        if start_params is not None:
            if method not in ['statespace', 'innovations_mle']:
                raise ValueError('Estimation method "%s" does not use starting'
                                 ' parameters, but `start_params` argument was'
                                 ' given.' % method)

            method_kwargs['start_params'] = start_params
            method_kwargs['transformed'] = transformed
            method_kwargs['includes_fixed'] = includes_fixed

        # Perform estimation, depending on whether we have exog or not
        p = None
        fit_details = None
        has_exog = self._spec_arima.exog is not None
        if has_exog or method == 'statespace':
            # Use GLS if it was explicitly requested (`gls = True`) or if it
            # was left at the default (`gls = None`) and the ARMA estimator is
            # anything but statespace.
            # Note: both GLS and statespace are able to handle models with
            # integration, so we don't need to difference endog or exog here.
            if has_exog and (gls or (gls is None and method != 'statespace')):
                p, fit_details = estimate_gls(
                    self.endog,
                    exog=self.exog,
                    order=self.order,
                    seasonal_order=self.seasonal_order,
                    include_constant=False,
                    arma_estimator=method,
                    arma_estimator_kwargs=method_kwargs,
                    **gls_kwargs)
            elif method != 'statespace':
                raise ValueError('If `exog` is given and GLS is disabled'
                                 ' (`gls=False`), then the only valid'
                                 " method is 'statespace'. Got '%s'." % method)
            else:
                method_kwargs.setdefault('disp', 0)

                res = super(ARIMA, self).fit(return_params=return_params,
                                             low_memory=low_memory,
                                             cov_type=cov_type,
                                             cov_kwds=cov_kwds,
                                             **method_kwargs)
                if not return_params:
                    res.fit_details = res.mlefit
        else:
            # Handle differencing if we have an integrated model
            # (these methods do not support handling integration internally,
            # so we need to manually do the differencing)
            endog = self.endog
            order = self._spec_arima.order
            seasonal_order = self._spec_arima.seasonal_order
            if self._spec_arima.is_integrated:
                warnings.warn('Provided `endog` series has been differenced'
                              ' to eliminate integration prior to parameter'
                              ' estimation by method "%s".' % method)
                endog = diff(
                    endog,
                    k_diff=self._spec_arima.diff,
                    k_seasonal_diff=self._spec_arima.seasonal_diff,
                    seasonal_periods=self._spec_arima.seasonal_periods)
                if order[1] > 0:
                    order = (order[0], 0, order[2])
                if seasonal_order[1] > 0:
                    seasonal_order = (seasonal_order[0], 0, seasonal_order[2],
                                      seasonal_order[3])

            # Now, estimate parameters
            if method == 'yule_walker':
                p, fit_details = yule_walker(endog,
                                             ar_order=order[0],
                                             demean=False,
                                             **method_kwargs)
            elif method == 'burg':
                p, fit_details = burg(endog,
                                      ar_order=order[0],
                                      demean=False,
                                      **method_kwargs)
            elif method == 'hannan_rissanen':
                p, fit_details = hannan_rissanen(endog,
                                                 ar_order=order[0],
                                                 ma_order=order[2],
                                                 demean=False,
                                                 **method_kwargs)
            elif method == 'innovations':
                p, fit_details = innovations(endog,
                                             ma_order=order[2],
                                             demean=False,
                                             **method_kwargs)
                # innovations computes estimates through the given order, so
                # we want to take the estimate associated with the given order
                p = p[-1]
            elif method == 'innovations_mle':
                p, fit_details = innovations_mle(endog,
                                                 order=order,
                                                 seasonal_order=seasonal_order,
                                                 demean=False,
                                                 **method_kwargs)

        # In all cases except method='statespace', we now need to extract the
        # parameters and, optionally, create a new results object
        if p is not None:
            # Need to check that fitted parameters satisfy given restrictions
            if (self.enforce_stationarity
                    and self._spec_arima.max_reduced_ar_order > 0
                    and not p.is_stationary):
                raise ValueError('Non-stationary autoregressive parameters'
                                 ' found with `enforce_stationarity=True`.'
                                 ' Consider setting it to False or using a'
                                 ' different estimation method, such as'
                                 ' method="statespace".')

            if (self.enforce_invertibility
                    and self._spec_arima.max_reduced_ma_order > 0
                    and not p.is_invertible):
                raise ValueError('Non-invertible moving average parameters'
                                 ' found with `enforce_invertibility=True`.'
                                 ' Consider setting it to False or using a'
                                 ' different estimation method, such as'
                                 ' method="statespace".')

            # Build the requested results
            if return_params:
                res = p.params
            else:
                # Handle memory conservation option
                if low_memory:
                    conserve_memory = self.ssm.conserve_memory
                    self.ssm.set_conserve_memory(MEMORY_CONSERVE)

                # Perform filtering / smoothing
                if (self.ssm.memory_no_predicted or self.ssm.memory_no_gain
                        or self.ssm.memory_no_smoothing):
                    func = self.filter
                else:
                    func = self.smooth
                res = func(p.params,
                           transformed=True,
                           includes_fixed=True,
                           cov_type=cov_type,
                           cov_kwds=cov_kwds)

                # Save any details from the fit method
                res.fit_details = fit_details

                # Reset memory conservation
                if low_memory:
                    self.ssm.set_conserve_memory(conserve_memory)

        return res
Exemplo n.º 11
0
def innovations_mle(endog,
                    order=(0, 0, 0),
                    seasonal_order=(0, 0, 0, 0),
                    demean=True,
                    enforce_invertibility=True,
                    start_params=None,
                    minimize_kwargs=None):
    """
    Estimate SARIMA parameters by MLE using innovations algorithm.

    Parameters
    ----------
    endog : array_like
        Input time series array.
    order : tuple, optional
        The (p,d,q) order of the model for the number of AR parameters,
        differences, and MA parameters. Default is (0, 0, 0).
    seasonal_order : tuple, optional
        The (P,D,Q,s) order of the seasonal component of the model for the
        AR parameters, differences, MA parameters, and periodicity. Default
        is (0, 0, 0, 0).
    demean : bool, optional
        Whether to estimate and remove the mean from the process prior to
        fitting the SARIMA coefficients. Default is True.
    enforce_invertibility : bool, optional
        Whether or not to transform the MA parameters to enforce invertibility
        in the moving average component of the model. Default is True.
    start_params : array_like, optional
        Initial guess of the solution for the loglikelihood maximization. The
        AR polynomial must be stationary. If `enforce_invertibility=True` the
        MA poylnomial must be invertible. If not provided, default starting
        parameters are computed using the Hannan-Rissanen method.
    minimize_kwargs : dict, optional
        Arguments to pass to scipy.optimize.minimize.

    Returns
    -------
    parameters : SARIMAXParams object
    other_results : Bunch
        Includes four components: `spec`, containing the `SARIMAXSpecification`
        instance corresponding to the input arguments; `minimize_kwargs`,
        containing any keyword arguments passed to `minimize`; `start_params`,
        containing the untransformed starting parameters passed to `minimize`;
        and `minimize_results`, containing the output from `minimize`.

    Notes
    -----
    The primary reference is [1]_, section 5.2.

    Note: we do not include `enforce_stationarity` as an argument, because this
    function requires stationarity.

    TODO: support concentrating out the scale (should be easy: use sigma2=1
          and then compute sigma2=np.sum(u**2 / v) / len(u); would then need to
          redo llf computation in the Cython function).

    TODO: add support for fixed parameters

    TODO: add support for secondary optimization that does not enforce
          stationarity / invertibility, starting from first step's parameters

    References
    ----------
    .. [1] Brockwell, Peter J., and Richard A. Davis. 2016.
       Introduction to Time Series and Forecasting. Springer.
    """
    spec = SARIMAXSpecification(endog,
                                order=order,
                                seasonal_order=seasonal_order,
                                enforce_stationarity=True,
                                enforce_invertibility=enforce_invertibility)
    endog = spec.endog
    if spec.is_integrated:
        warnings.warn('Provided `endog` series has been differenced to'
                      ' eliminate integration prior to ARMA parameter'
                      ' estimation.')
        endog = diff(endog,
                     k_diff=spec.diff,
                     k_seasonal_diff=spec.seasonal_diff,
                     seasonal_periods=spec.seasonal_periods)
    if demean:
        endog = endog - endog.mean()

    p = SARIMAXParams(spec=spec)

    if start_params is None:
        sp = SARIMAXParams(spec=spec)

        # Estimate starting parameters via Hannan-Rissanen
        hr, hr_results = hannan_rissanen(endog,
                                         ar_order=spec.ar_order,
                                         ma_order=spec.ma_order,
                                         demean=False)
        if spec.seasonal_periods == 0:
            # If no seasonal component, then `hr` gives starting parameters
            sp.params = hr.params
        else:
            # If we do have a seasonal component, estimate starting parameters
            # for the seasonal lags using the residuals from the previous step
            _ = SARIMAXSpecification(
                endog,
                seasonal_order=seasonal_order,
                enforce_stationarity=True,
                enforce_invertibility=enforce_invertibility)

            ar_order = np.array(spec.seasonal_ar_lags) * spec.seasonal_periods
            ma_order = np.array(spec.seasonal_ma_lags) * spec.seasonal_periods
            seasonal_hr, seasonal_hr_results = hannan_rissanen(
                hr_results.resid,
                ar_order=ar_order,
                ma_order=ma_order,
                demean=False)

            # Set the starting parameters
            sp.ar_params = hr.ar_params
            sp.ma_params = hr.ma_params
            sp.seasonal_ar_params = seasonal_hr.ar_params
            sp.seasonal_ma_params = seasonal_hr.ma_params
            sp.sigma2 = seasonal_hr.sigma2

        # Then, require starting parameters to be stationary and invertible
        if not sp.is_stationary:
            sp.ar_params = [0] * sp.k_ar_params
            sp.seasonal_ar_params = [0] * sp.k_seasonal_ar_params

        if not sp.is_invertible and spec.enforce_invertibility:
            sp.ma_params = [0] * sp.k_ma_params
            sp.seasonal_ma_params = [0] * sp.k_seasonal_ma_params

        start_params = sp.params
    else:
        sp = SARIMAXParams(spec=spec)
        sp.params = start_params
        if not sp.is_stationary:
            raise ValueError('Given starting parameters imply a non-stationary'
                             ' AR process. Innovations algorithm requires a'
                             ' stationary process.')

        if spec.enforce_invertibility and not sp.is_invertible:
            raise ValueError('Given starting parameters imply a non-invertible'
                             ' MA process with `enforce_invertibility=True`.')

    def obj(params):
        p.params = spec.constrain_params(params)

        return -arma_innovations.arma_loglike(
            endog,
            ar_params=-p.reduced_ar_poly.coef[1:],
            ma_params=p.reduced_ma_poly.coef[1:],
            sigma2=p.sigma2)

    # Untransform the starting parameters
    unconstrained_start_params = spec.unconstrain_params(start_params)

    # Perform the minimization
    if minimize_kwargs is None:
        minimize_kwargs = {}
    if 'options' not in minimize_kwargs:
        minimize_kwargs['options'] = {}
    minimize_kwargs['options'].setdefault('maxiter', 100)
    minimize_results = minimize(obj, unconstrained_start_params,
                                **minimize_kwargs)

    # TODO: show warning if convergence failed.

    # Reverse the transformation to get the optimal parameters
    p.params = spec.constrain_params(minimize_results.x)

    # Construct other results
    other_results = Bunch({
        'spec': spec,
        'minimize_results': minimize_results,
        'minimize_kwargs': minimize_kwargs,
        'start_params': start_params
    })

    return p, other_results
Exemplo n.º 12
0
 def doStationary(self, dataframe):
     stationary = diff(dataframe)
     return stationary
Exemplo n.º 13
0
df2['seasonal'].plot(legend='seasonal')
df2['trend'].plot(legend='trend')
df2['seasadj'].plot(legend='seasadj')
df2['irregular'].plot(legend='irregular')
df2['seasadj_irr'].plot(legend='fully adjusted')
df2['seasadj_log'].plot()  # 1st difference model in order to eliminate trend

df2.head()

#stationarity

from statsmodels.tsa.statespace.tools import diff
from statsmodels.tsa.stattools import adfuller

df2['diff_1_seasadj'] = diff(diff(df2['seasadj_log']))
df2['diff_1_seasadj'].plot()

df2['diff_1_seasadj'].replace(np.NaN, 0, inplace=True)
adfuller(df2['diff_1_seasadj'])  #reject Ho, conclude Ha: no unit root

#ACF(MA) - PACF(AR)

from statsmodels.graphics.tsaplots import plot_acf, plot_pacf

plot_acf(df2['diff_1_seasadj'])  # MA(4)

plot_pacf(df2['diff_1_seasadj'])  # AR(0)

#self-developed ARIMA
Exemplo n.º 14
0
    '$ax$', '$0$', '$x^2$'
]
datT = data[:, 0:trainEnd]

if plotACFs:
    for s in range(len(data)):
        vacf = acf(datT[s])
        plt.plot(vacf, label=snames[s])
    plt.title("Autocorrelation function ACF")
    plt.legend()
    plt.savefig('ACF.png', dpi=200, bbox_inches='tight')
    exit()

differenced = None
if order2 > 0:
    differenced = diff(data[series], k_diff=2)

mod = ARIMA(datT[series], order=(order1, order2, order3))
res = mod.fit()
print(res.summary())

p = mod.predict(res.params, end=100)
plt.title('ARIMA prediction (right of red line=predicted, left=training)')
plt.plot(p,
         label='ARIMA, order=(%d,%d,%d) predicted' % (order1, order2, order3))
plt.plot(data[series], label='True (%s)' % (snames[series]))
plt.plot(dataTrends[series],
         label='True (%s) trend (denoised)' % (snames[series]))
if not differenced is None:
    plt.plot(differenced, label='Differenced')
    plt.plot(data[series] - dataTrends[series], label='Random component')
Exemplo n.º 15
0
def gls(endog,
        exog=None,
        order=(0, 0, 0),
        seasonal_order=(0, 0, 0, 0),
        include_constant=None,
        n_iter=None,
        max_iter=50,
        tolerance=1e-8,
        arma_estimator='innovations_mle',
        arma_estimator_kwargs=None):
    """
    Estimate ARMAX parameters by GLS.

    Parameters
    ----------
    endog : array_like
        Input time series array.
    exog : array_like, optional
        Array of exogenous regressors. If not included, then `include_constant`
        must be True, and then `exog` will only include the constant column.
    order : tuple, optional
        The (p,d,q) order of the ARIMA model. Default is (0, 0, 0).
    seasonal_order : tuple, optional
        The (P,D,Q,s) order of the seasonal ARIMA model.
        Default is (0, 0, 0, 0).
    include_constant : bool, optional
        Whether to add a constant term in `exog` if it's not already there.
        The estimate of the constant will then appear as one of the `exog`
        parameters. If `exog` is None, then the constant will represent the
        mean of the process. Default is True if the specified model does not
        include integration and False otherwise.
    n_iter : int, optional
        Optionally iterate feasible GSL a specific number of times. Default is
        to iterate to convergence. If set, this argument overrides the
        `max_iter` and `tolerance` arguments.
    max_iter : int, optional
        Maximum number of feasible GLS iterations. Default is 50. If `n_iter`
        is set, it overrides this argument.
    tolerance : float, optional
        Tolerance for determining convergence of feasible GSL iterations. If
        `iter` is set, this argument has no effect.
        Default is 1e-8.
    arma_estimator : str, optional
        The estimator used for estimating the ARMA model. This option should
        not generally be used, unless the default method is failing or is
        otherwise unsuitable. Not all values will be valid, depending on the
        specified model orders (`order` and `seasonal_order`). Possible values
        are:
        * 'innovations_mle' - can be used with any specification
        * 'statespace' - can be used with any specification
        * 'hannan_rissanen' - can be used with any ARMA non-seasonal model
        * 'yule_walker' - only non-seasonal consecutive
          autoregressive (AR) models
        * 'burg' - only non-seasonal, consecutive autoregressive (AR) models
        * 'innovations' - only non-seasonal, consecutive moving
          average (MA) models.
        The default is 'innovations_mle'.
    arma_estimator_kwargs : dict, optional
        Arguments to pass to the ARMA estimator.

    Returns
    -------
    parameters : SARIMAXParams object
        Contains the parameter estimates from the final iteration.
    other_results : Bunch
        Includes eight components: `spec`, `params`, `converged`,
        `differences`, `iterations`, `arma_estimator`, 'arma_estimator_kwargs',
        and `arma_results`.

    Notes
    -----
    The primary reference is [1]_, section 6.6. In particular, the
    implementation follows the iterative procedure described in section 6.6.2.
    Construction of the transformed variables used to compute the GLS estimator
    described in section 6.6.1 is done via an application of the innovations
    algorithm (rather than explicit construction of the transformation matrix).

    Note that if the specified model includes integration, both the `endog` and
    `exog` series will be differenced prior to estimation and a warning will
    be issued to alert the user.

    References
    ----------
    .. [1] Brockwell, Peter J., and Richard A. Davis. 2016.
       Introduction to Time Series and Forecasting. Springer.
    """
    # Handle n_iter
    if n_iter is not None:
        max_iter = n_iter
        tolerance = np.inf

    # Default for include_constant is True if there is no integration and
    # False otherwise
    integrated = order[1] > 0 or seasonal_order[1] > 0
    if include_constant is None:
        include_constant = not integrated
    elif include_constant and integrated:
        raise ValueError('Cannot include a constant in an integrated model.')

    # Handle including the constant (need to do it now so that the constant
    # parameter can be included in the specification as part of `exog`.)
    if include_constant:
        exog = np.ones_like(endog) if exog is None else add_constant(exog)

    # Create the SARIMAX specification
    spec = SARIMAXSpecification(endog,
                                exog=exog,
                                order=order,
                                seasonal_order=seasonal_order)
    endog = spec.endog
    exog = spec.exog

    # Handle integration
    if spec.is_integrated:
        # TODO: this is the approach suggested by BD (see Remark 1 in
        # section 6.6.2 and Example 6.6.3), but maybe there are some cases
        # where we don't want to force this behavior on the user?
        warnings.warn('Provided `endog` and `exog` series have been'
                      ' differenced to eliminate integration prior to GLS'
                      ' parameter estimation.')
        endog = diff(endog,
                     k_diff=spec.diff,
                     k_seasonal_diff=spec.seasonal_diff,
                     seasonal_periods=spec.seasonal_periods)
        exog = diff(exog,
                    k_diff=spec.diff,
                    k_seasonal_diff=spec.seasonal_diff,
                    seasonal_periods=spec.seasonal_periods)
    augmented = np.c_[endog, exog]

    # Validate arma_estimator
    spec.validate_estimator(arma_estimator)
    if arma_estimator_kwargs is None:
        arma_estimator_kwargs = {}

    # Step 1: OLS
    mod_ols = OLS(endog, exog)
    res_ols = mod_ols.fit()
    exog_params = res_ols.params
    resid = res_ols.resid

    # 0th iteration parameters
    p = SARIMAXParams(spec=spec)
    p.exog_params = exog_params
    if spec.max_ar_order > 0:
        p.ar_params = np.zeros(spec.k_ar_params)
    if spec.max_seasonal_ar_order > 0:
        p.seasonal_ar_params = np.zeros(spec.k_seasonal_ar_params)
    if spec.max_ma_order > 0:
        p.ma_params = np.zeros(spec.k_ma_params)
    if spec.max_seasonal_ma_order > 0:
        p.seasonal_ma_params = np.zeros(spec.k_seasonal_ma_params)
    p.sigma2 = res_ols.scale

    ar_params = p.ar_params
    seasonal_ar_params = p.seasonal_ar_params
    ma_params = p.ma_params
    seasonal_ma_params = p.seasonal_ma_params
    sigma2 = p.sigma2

    # Step 2 - 4: iterate feasible GLS to convergence
    arma_results = [None]
    differences = [None]
    parameters = [p]
    converged = False if n_iter is None else None
    i = 0
    for i in range(1, max_iter + 1):
        prev = exog_params

        # Step 2: ARMA
        # TODO: allow estimator-specific kwargs?
        if arma_estimator == 'yule_walker':
            p_arma, res_arma = yule_walker(resid,
                                           ar_order=spec.ar_order,
                                           demean=False,
                                           **arma_estimator_kwargs)
        elif arma_estimator == 'burg':
            p_arma, res_arma = burg(resid,
                                    ar_order=spec.ar_order,
                                    demean=False,
                                    **arma_estimator_kwargs)
        elif arma_estimator == 'innovations':
            out, res_arma = innovations(resid,
                                        ma_order=spec.ma_order,
                                        demean=False,
                                        **arma_estimator_kwargs)
            p_arma = out[-1]
        elif arma_estimator == 'hannan_rissanen':
            p_arma, res_arma = hannan_rissanen(resid,
                                               ar_order=spec.ar_order,
                                               ma_order=spec.ma_order,
                                               demean=False,
                                               **arma_estimator_kwargs)
        else:
            # For later iterations, use a "warm start" for parameter estimates
            # (speeds up estimation and convergence)
            start_params = (None if i == 1 else np.r_[ar_params, ma_params,
                                                      seasonal_ar_params,
                                                      seasonal_ma_params,
                                                      sigma2])
            # Note: in each case, we do not pass in the order of integration
            # since we have already differenced the series
            tmp_order = (spec.order[0], 0, spec.order[2])
            tmp_seasonal_order = (spec.seasonal_order[0], 0,
                                  spec.seasonal_order[2],
                                  spec.seasonal_order[3])
            if arma_estimator == 'innovations_mle':
                p_arma, res_arma = innovations_mle(
                    resid,
                    order=tmp_order,
                    seasonal_order=tmp_seasonal_order,
                    demean=False,
                    start_params=start_params,
                    **arma_estimator_kwargs)
            else:
                p_arma, res_arma = statespace(
                    resid,
                    order=tmp_order,
                    seasonal_order=tmp_seasonal_order,
                    include_constant=False,
                    start_params=start_params,
                    **arma_estimator_kwargs)

        ar_params = p_arma.ar_params
        seasonal_ar_params = p_arma.seasonal_ar_params
        ma_params = p_arma.ma_params
        seasonal_ma_params = p_arma.seasonal_ma_params
        sigma2 = p_arma.sigma2
        arma_results.append(res_arma)

        # Step 3: GLS
        # Compute transformed variables that satisfy OLS assumptions
        # Note: In section 6.1.1 of Brockwell and Davis (2016), these
        # transformations are developed as computed by left multiplcation
        # by a matrix T. However, explicitly constructing T and then
        # performing the left-multiplications does not scale well when nobs is
        # large. Instead, we can retrieve the transformed variables as the
        # residuals of the innovations algorithm (the `normalize=True`
        # argument applies a Prais-Winsten-type normalization to the first few
        # observations to ensure homoskedasticity). Brockwell and Davis
        # mention that they also take this approach in practice.
        tmp, _ = arma_innovations.arma_innovations(augmented,
                                                   ar_params=ar_params,
                                                   ma_params=ma_params,
                                                   normalize=True)
        u = tmp[:, 0]
        x = tmp[:, 1:]

        # OLS on transformed variables
        mod_gls = OLS(u, x)
        res_gls = mod_gls.fit()
        exog_params = res_gls.params
        resid = endog - np.dot(exog, exog_params)

        # Construct the parameter vector for the iteration
        p = SARIMAXParams(spec=spec)
        p.exog_params = exog_params
        if spec.max_ar_order > 0:
            p.ar_params = ar_params
        if spec.max_seasonal_ar_order > 0:
            p.seasonal_ar_params = seasonal_ar_params
        if spec.max_ma_order > 0:
            p.ma_params = ma_params
        if spec.max_seasonal_ma_order > 0:
            p.seasonal_ma_params = seasonal_ma_params
        p.sigma2 = sigma2
        parameters.append(p)

        # Check for convergence
        difference = np.abs(exog_params - prev)
        differences.append(difference)
        if n_iter is None and np.all(difference < tolerance):
            converged = True
            break
    else:
        if n_iter is None:
            warnings.warn('Feasible GLS failed to converge in %d iterations.'
                          ' Consider increasing the maximum number of'
                          ' iterations using the `max_iter` argument or'
                          ' reducing the required tolerance using the'
                          ' `tolerance` argument.' % max_iter)

    # Construct final results
    p = parameters[-1]
    other_results = Bunch({
        'spec': spec,
        'params': parameters,
        'converged': converged,
        'differences': differences,
        'iterations': i,
        'arma_estimator': arma_estimator,
        'arma_estimator_kwargs': arma_estimator_kwargs,
        'arma_results': arma_results,
    })

    return p, other_results
Exemplo n.º 16
0
def simple_auto_stationarize(df,
                             verbosity=None,
                             alpha=None,
                             multitest=None,
                             get_conclusions=False,
                             get_actions=False):
    """Auto-stationarize the given time-series dataframe.

    Parameters
    ----------
    df : pandas.DataFrame
        A dataframe composed solely of numeric columns.
    verbosity : int, logging.Logger, optional
        If an int is given, it is interpreted as the logging lever to use. See
        https://docs.python.org/3/library/logging.html#levels for details. If a
        logging.Logger object is given, it is used for printing instead, with
        appropriate logging levels. If no value is provided, the default
        logging.Logger behaviour is used.
    alpha : int, optional
        Family-wise error rate (FWER) or false discovery rate (FDR), depending
        on the method used for multiple hypothesis testing error control. If no
        value is provided, a default value of 0.05 (5%) is used.
    multitest : str, optional
        The multiple hypothesis testing eror control method to use. If no value
        is provided, the Benjamini–Yekutieli is used. See
        `the documesimple_auto_stationarizentation of statsmodels' multipletests method for supported values <https://www.statsmodels.org/dev/generated/statsmodels.stats.multitest.multipletests.html>`.
    get_conclusions : bool, defaults to False
        If set to true, a conclusions dict is returned.
    get_actions : bool, defaults to False
        If set to true, an actions dict is returned.

    Returns
    -------
    results : pandas.DataFrame or dict
        By default, only he transformed dataframe is returned. However, if
        get_conclusions or get_actions are set to True, a dict is returned
        instead, with the following mappings:
        - `postdf` - Maps to the transformed dataframe.
        - `conclusions` - Maps to a dict mapping each column name to the
          arrived conclusion regarding its stationarity.
        - `actions` - Maps to a dict mapping each column name to the
          transformations performed on it to stationarize it.
    """  # noqa: E501
    if verbosity is not None:
        prev_verbosity = set_verbosity_level(verbosity)
    if alpha is None:
        alpha = DEF_ALPHA

    logger = get_logger()
    logger.info("Starting to auto-stationarize a dataframe!")
    logger.info("Starting to check input data validity...")
    logger.info(f"Data shape (time, variables) is {df.shape}.")
    # the first axis - rows - is expected to represent the time dimension,
    # while the second axis - columns - is expected to represent variables;
    # thus, the first expected to be much longer than the second
    logger.info(
        "Checking current data orientation (rows=time, columns=variables)...")
    if df.shape[1] >= df.shape[0]:
        logger.warning((
            "stationarizer's input dataframe has more columns than rows! "
            "Columns are expected to represent variables, while rows represent"
            " time steps, and thus the input dataframe is expected to have "
            "more rows than columns. Either the input data is inverted, or the"
            " data has far more variables than samples."))
    else:
        logger.info("Data orientation is valid.")
    # assert all columns are numeric
    all_cols_numeric = all([np.issubdtype(x, np.number) for x in df.dtypes])
    if not all_cols_numeric:
        err = ValueError(
            "All columns of stationarizer's input dataframe must be numeric!")
        logger.exception(err)

    # util var
    n = len(df.columns)

    # testing for unit root
    logger.info(
        ("Checking for the presence of a unit root in the input time series "
         "using the Augmented Dicky-Fuller test"))
    logger.info(
        ("Reminder:\n "
         "Null Hypothesis: The series has a unit root (value of a=1); meaning,"
         " it is NOT stationary.\n"
         "Alternate Hypothesis: The series has no unit root; it is either "
         "stationary or non-stationary of a different model than unit root."))
    adf_results = []
    for colname in df.columns:
        srs = df[colname]
        result = adfuller(srs, regression='ct')
        logger.info(
            (f"{colname}: test statistic={result[0]}, p-val={result[1]}."))
        adf_results.append(result)

    # testing for trend stationarity
    logger.info((
        "Testing for trend stationarity of input series using the KPSS test."))
    logger.info(("Reminder:\n"
                 "Null Hypothesis (H0): The series is trend-stationarity.\n"
                 "Alternative Hypothesis (H1): The series has a unit root."))
    kpss_results = []
    for colname in df.columns:
        srs = df[colname]
        result = kpss(srs, regression='ct')
        logger.info(
            (f"{colname}: test statistic={result[0]}, p-val={result[1]}."))
        kpss_results.append(result)

    # Controling FDR
    logger.info(
        ("Controling the False Discovery Rate (FDR) using the Benjamini-"
         f"Yekutieli procedure with α={DEF_ALPHA}."))
    adf_pvals = [x[1] for x in adf_results]
    kpss_pvals = [x[1] for x in kpss_results]
    pvals = adf_pvals + kpss_pvals
    by_res = multipletests(
        pvals=pvals,
        alpha=alpha,
        method='fdr_by',
        is_sorted=False,
    )
    reject = by_res[0]
    corrected_pvals = by_res[1]
    adf_rejections = reject[:n]
    kpss_rejections = reject[n:]
    adf_corrected_pvals = corrected_pvals[:n]  # noqa: F841
    kpss_corrected_pvals = corrected_pvals[n:]  # noqa: F841
    conclusion_counts = {}

    def dict_inc(dicti, key):
        try:
            dicti[key] += 1
        except KeyError:
            dicti[key] = 1

    # interpret results
    logger.info("Interpreting test results after FDR control...")
    conclusions = {}
    actions = {}
    for i, colname in enumerate(df.columns):
        conclusion = conclude_adf_and_kpss_results(
            adf_reject=adf_rejections[i], kpss_reject=kpss_rejections[i])
        dict_inc(conclusion_counts, conclusion)
        trans = CONCLUSION_TO_TRANSFORMATIONS[conclusion]
        conclusions[colname] = conclusion
        actions[colname] = trans
        logger.info((f"--{colname}--\n "
                     f"ADF corrected p-val: {adf_corrected_pvals[i]}, "
                     f"H0 rejected: {adf_rejections[i]}.\n"
                     f"KPSS corrected p-val: {kpss_corrected_pvals[i]}, "
                     f"H0 rejected: {kpss_rejections[i]}.\n"
                     f"Conclusion: {conclusion}\n Transformations: {trans}."))

    # making non-stationary series stationary!
    post_cols = {}
    logger.info("Applying transformations...")
    for colname in df.columns:
        srs = df[colname]
        if Transformation.DETREND in actions[colname]:
            logger.info(f"Detrending {colname} (len={len(srs)}).")
            srs = detrend(srs, order=1, axis=0)
        if Transformation.DIFFRENTIATE in actions[colname]:
            logger.info(f"Diffrentiating {colname} (len={len(srs)}).")
            srs = diff(srs, k_diff=1)
        post_cols[colname] = srs
        logger.info(f"{colname} transformed (len={len(post_cols[colname])}).")

    # equalizing lengths
    min_len = min([len(post_cols[x]) for x in post_cols])
    for colname in df.columns:
        post_cols[colname] = post_cols[colname][:min_len]
    postdf = df.copy()
    postdf = postdf.iloc[:min_len]
    for colname in df.columns:
        postdf[colname] = post_cols[colname]
    logger.info(f"Post transformation shape: {postdf.shape}")

    for k in conclusion_counts:
        count = conclusion_counts[k]
        ratio = 100 * (count / len(df.columns))
        logger.info(f"{count} series ({ratio}%) found with conclusion: {k}.")

    if verbosity is not None:
        set_verbosity_level(prev_verbosity)

    if not get_actions and not get_conclusions:
        return postdf
    results = {'postdf': postdf}
    if get_conclusions:
        results['conclusions'] = conclusions
    if get_actions:
        results['actions'] = actions
    return results
Exemplo n.º 17
0
import statsmodels.api as sm, pandas as pd
import matplotlib.pyplot as plt
import scipy.stats as stats
from statsmodels.tsa.statespace.tools import diff

airpass = sm.datasets.get_rdataset("AirPassengers", "datasets")
fig, axs = plt.subplots(3)
axs[0].set_title('Monthly Airline Passenger Numbers 1949-1960, in thousands')
axs[0].plot(pd.Series(airpass.data["value"]))
series, l = stats.boxcox(airpass.data["value"])
axs[1].plot(series)
axs[1].set_title('Box Cox Transformation')
differenced = diff(series, k_diff=12)
axs[2].plot(differenced)
axs[2].set_title('Seasonally differenced (m=12)')
fig.tight_layout()
plt.show()
Exemplo n.º 18
0
                                   seasonal='mul',
                                   seasonal_periods=12).fit()
final_predictions = final_model.forecast(36)

#compare set with predictions
df['Thousands of Passengers'].plot(legend=True, label='data')
final_predictions.plot(legend=True, label='final prediction')
# plt.show()

# ------- stationarity
#transform non stationary into stationary
df2 = pd.read_csv('data/samples.csv', index_col=0, parse_dates=True)
#subtract the time series to itself, shifted by one day
df2['b'] - df2['b'].shift(1)
#or via statsmodel...
diff(df2['b'], k_diff=1)

# ------- ACF and PACF
#non stationary => df
plot_acf(df, lags=40)
# plt.show()

# stationary
df3 = pd.read_csv('data/DailyTotalFemaleBirths.csv',
                  index_col='Date',
                  parse_dates=True)
df3.index.freq = 'D'

plot_acf(df3, lags=40)
plot_pacf(df3, lags=40)
plt.show()
Exemplo n.º 19
0
    df = df.dropna(inplace=True)

df.index

df.index.freq = 'MS'

df.head()
df.tail()

len(df)

from statsmodels.tsa.statespace.tools import diff

df['b'] - df['b'].shift(1)

diff(df['b'], k_diff=1).plot()

# ACF and PACF

import statsmodels.api as sm
from statsmodels.tsa.stattools import acovf, acf, pacf, pacf_yw, pacf_ols

file1 = r'C:\Damon\Udemy\Python for Time Series Data Analysis\TSA_COURSE_NOTEBOOKS\Data\airline_passengers.csv'
df1 = pd.read_csv(file1, index_col=0, parse_dates=True)
df1.rename(columns={'Thousands of Passengers': 'Pass_K'}, inplace=True)
df1.index.freq = 'MS'

file2 = r'C:\Damon\Udemy\Python for Time Series Data Analysis\TSA_COURSE_NOTEBOOKS\Data\DailyTotalFemaleBirths.csv'
df2 = pd.read_csv(file2, index_col='Date', parse_dates=True)
df2.index.freq = 'D'
Exemplo n.º 20
0
    def fit(self, start_params=None, ensure_causality=True,
            ensure_invertibility=True, **minimize_kwargs):
        R"""
        Estimates the \vec{phi}, \vec{theta}, sigma^2 and \vec{beta} via
        fitting an SARIMA-X(p, d, q)(P, D, Q, m) to `y` via MLE with Kalman
        filter.

        Parameters
        ----------
        start_params : array_like, optional
            Includes \vec{phi}, \vec{PHI}, \vec{theta}, \vec{THETA}, \sigma^2
            and \vec{beta}:

            [\phi_1,... \phi_p, \Phi_1, ... \Phi_P, \theta_1, ... \theta_q,
            \Theta_1, ... \Theta_Q, \sigma^2, \beta_1, ... \beta_k].

            Used to kick-off the MLE. Default is to use Hannan-Rissanen
            for \phi, \PHI, \theta, \THETA and \sigma^2; \beta are initialized
            to zeros.

        enforce_causality : bool, optional, default: True
            Whether constrain \vec{phi} s.t. \phi(B) has all its roots inside
            unit circle i.e. process is stationary.

        enforce_invertibility : bool, optional, default: True
            Whether constrain \vec{theta} s.t. \theta(B) has all its roots
            inside unit circle i.e. process is invertible.

        minimize_kwargs :  dict, optional
            Passed to scipy.optimize.minimize.

        Returns
        -------
            self
        """
        # Unpack orders (to prevent over-attribute acccess)
        p, d, q = self.order
        P, D, Q, m = self.seas_order

        # Difference endog and exog if needed
        if d != 0 or D != 0:
            self.endog = diff(self.endog, k_diff=d, k_seasonal_diff=D,
                              seasonal_periods=m)
            if self.exog is not None:
                self.exog = diff(self.exog, k_diff=d, k_seasonal_diff=D,
                                 seasonal_periods=m)

        # Get number of X-regressors
        k = self.exog.shape[1] if self.exog is not None else 0

        # If no initial params supplied, get it from Hannan-Rissanen
        if start_params is None:
            # TODO: Do this large AR fitting in one shot with reduced polys
            # as in sgd_sarimax's _get_design_mat. Until then, we `try`.
            try:
                # First for the non-seasonal part
                hr, hr_results = hannan_rissanen(self.endog, ar_order=p,
                                                 ma_order=q, demean=False)

                # Then the seasonal
                seas_hr_ar_order = m * np.arange(1, P + 1)
                seas_hr_ma_order = m * np.arange(1, Q + 1)
                seas_hr, _ = hannan_rissanen(
                                hr_results.resid, ar_order=seas_hr_ar_order,
                                ma_order=seas_hr_ma_order, demean=False,
                            )
            except ValueError:
                print("series too short for large AR(p) of hannan-risanen.")
                start_params = np.r_[
                                    np.zeros(p + P + q + Q + k),
                                    self.endog.var()
                                    ]
            else:
                # Stack them all
                start_params = np.hstack((hr.ar_params, seas_hr.ar_params,
                                          hr.ma_params, seas_hr.ma_params,
                                          seas_hr.sigma2, np.zeros(k)))

        # sigma^2 estimate is to be nonnegative so put a bound on it
        # bounds = ([(None, None) for _ in range(p + q + P + Q)] +
        #           [(0, None)] +
        #           [(None, None)] * k)
        bounds = None

        # Check if start_params satisfy stationarity and invertibility requests
        self.params.ar_params = start_params[:p]
        self.params.seasonal_ar_params = start_params[p:p + P]
        self.params.ma_params = start_params[p + P:p + P + q]
        self.params.seasonal_ma_params = start_params[p + P + q:p + P + q + Q]
        self.params.sigma2 = start_params[p + P + q + Q]

        if ensure_causality and not self.params.is_stationary:
            start_params[:p + P] = 0.
        if ensure_invertibility and not self.params.is_invertible:
            start_params[p + P:p + P + q + Q] = 0
        self.start_params = start_params

        # Maximize likelihood
        def _kalman_sarimax_loglike(params):
            return self.filter(params)[0]
        minimize_kwargs.setdefault("method", "BFGS")
        res = minimize(_kalman_sarimax_loglike, start_params, bounds=bounds,
                       **minimize_kwargs)
        self.mle_result = res

        # Put the estimated parameters to self.params
        self._set_params()

        return self
Exemplo n.º 21
0
np.sqrt(mean_squared_error(test_data, test_predictions))

# 55.45564409492191

final_model = ExponentialSmoothing(training_data['Thousands of Passengers'],
                                   trend='mul',
                                   seasonal='mul',
                                   seasonal_periods=12).fit()

df2 = pd.read_csv('samples.csv', index_col=0, parse_dates=True)

df2.info()

from statsmodels.tsa.statespace.tools import diff

diff(df2['b'], k_diff=1)
'''
Out[818]: 
1950-02-01    -5.0
1950-03-01    -5.0
1950-04-01    -2.0
1950-05-01    -2.0
1950-06-01     3.0
1950-07-01     8.0
1950-08-01    -8.0
1950-09-01     9.0
1950-10-01     2.0
1950-11-01    -1.0
1950-12-01    -4.0
1951-01-01     1.0
1951-02-01    -3.0