示例#1
0
    def __init__(self,
                 endog,
                 exog=None,
                 order=(0, 0, 0),
                 seasonal_order=(0, 0, 0, 0),
                 trend=None,
                 enforce_stationarity=True,
                 enforce_invertibility=True,
                 concentrate_scale=False,
                 dates=None,
                 freq=None,
                 missing='none'):
        # Default for trend
        # 'c' if there is no integration and 'n' otherwise
        # TODO: if trend='c', then we could alternatively use `demean=True` in
        # the estimation methods rather than setting up `exog` and using GLS.
        # Not sure if it's worth the trouble though.
        integrated = order[1] > 0 or seasonal_order[1] > 0
        if trend is None and not integrated:
            trend = 'c'
        elif trend is None:
            trend = 'n'

        # Construct the specification
        # (don't pass specific values of enforce stationarity/invertibility,
        # because we don't actually want to restrict the estimators based on
        # this criteria. Instead, we'll just make sure that the parameter
        # estimates from those methods satisfy the criteria.)
        self._spec_arima = SARIMAXSpecification(
            endog,
            exog=exog,
            order=order,
            seasonal_order=seasonal_order,
            trend=trend,
            enforce_stationarity=None,
            enforce_invertibility=None,
            concentrate_scale=concentrate_scale,
            dates=dates,
            freq=freq,
            missing=missing)
        exog = self._spec_arima._model.data.orig_exog

        # Initialize the base SARIMAX class
        # Note: we don't pass in a trend value to the base class, since ARIMA
        # standardizes the trend to always be part of exog, while the base
        # SARIMAX class puts it in the transition equation.
        super(ARIMA,
              self).__init__(endog,
                             exog,
                             order=order,
                             seasonal_order=seasonal_order,
                             enforce_stationarity=enforce_stationarity,
                             enforce_invertibility=enforce_invertibility,
                             concentrate_scale=concentrate_scale,
                             dates=dates,
                             freq=freq,
                             missing=missing)
示例#2
0
    def __init__(self, endog, exog=None, order=(1, 0, 0),
                 seas_order=(0, 0, 0, 0)):
        # Validate endog & exog
        self.endog = _check_endog(endog)
        self.exog = _check_exog(exog)

        # Save them to use in case of differencing
        self.the_endog = self.endog.copy()
        self.the_exog = self.exog.copy() if exog is not None else None

        # Orders are attributes too
        self.order = order
        self.seas_order = seas_order

        # "has" a specification and params (helps validate orders, also)
        self.spec = SARIMAXSpecification(self.the_endog, self.the_exog,
                                         self.order, self.seas_order)
        self.params = SARIMAXParams(self.spec)

        # If P == D == Q == 0, m stays the same; but should be 0, too
        if self.seas_order[:3] == (0, 0, 0):
            self.seas_order = (0, 0, 0, 0)

        # After validation, unpack order
        self.p, self.d, self.q = self.order
        self.P, self.D, self.Q, self.seas_period = self.seas_order

        # For convenience
        self.m = self.seas_period
示例#3
0
def test_validate_fixed_params(ar_order, ma_order, fixed_params,
                               invalid_fixed_params):
    # test validation with both _validate_fixed_params and directly with
    # hannan_rissanen

    endog = np.random.normal(size=100)
    spec = SARIMAXSpecification(endog, ar_order=ar_order, ma_order=ma_order)

    if invalid_fixed_params is None:
        _validate_fixed_params(fixed_params, spec.param_names)
        hannan_rissanen(
            endog, ar_order=ar_order, ma_order=ma_order,
            fixed_params=fixed_params, unbiased=False
        )
    else:
        valid_params = sorted(list(set(spec.param_names) - {'sigma2'}))
        msg = (
            f"Invalid fixed parameter(s): {invalid_fixed_params}. "
            f"Please select among {valid_params}."
        )
        # using direct `assert` to test error message instead of `match` since
        # the error message contains regex characters
        with pytest.raises(ValueError) as e:
            _validate_fixed_params(fixed_params, spec.param_names)
            assert e.msg == msg
        with pytest.raises(ValueError) as e:
            hannan_rissanen(
                endog, ar_order=ar_order, ma_order=ma_order,
                fixed_params=fixed_params, unbiased=False
            )
            assert e.msg == msg
def durbin_levinson(endog, ar_order=0, demean=True, adjusted=False):
    """
    Estimate AR parameters at multiple orders using Durbin-Levinson recursions.

    Parameters
    ----------
    endog : array_like or SARIMAXSpecification
        Input time series array, assumed to be stationary.
    ar_order : int, optional
        Autoregressive order. Default is 0.
    demean : bool, optional
        Whether to estimate and remove the mean from the process prior to
        fitting the autoregressive coefficients. Default is True.
    adjusted : bool, optional
        Whether to use the "adjusted" autocovariance estimator, which uses
        n - h degrees of freedom rather than n. This option can result in
        a non-positive definite autocovariance matrix. Default is False.

    Returns
    -------
    parameters : list of SARIMAXParams objects
        List elements correspond to estimates at different `ar_order`. For
        example, parameters[0] is an `SARIMAXParams` instance corresponding to
        `ar_order=0`.
    other_results : Bunch
        Includes one component, `spec`, containing the `SARIMAXSpecification`
        instance corresponding to the input arguments.

    Notes
    -----
    The primary reference is [1]_, section 2.5.1.

    This procedure assumes that the series is stationary.

    References
    ----------
    .. [1] Brockwell, Peter J., and Richard A. Davis. 2016.
       Introduction to Time Series and Forecasting. Springer.
    """
    max_spec = SARIMAXSpecification(endog, ar_order=ar_order)
    endog = max_spec.endog

    # Make sure we have a consecutive process
    if not max_spec.is_ar_consecutive:
        raise ValueError('Durbin-Levinson estimation unavailable for models'
                         ' with seasonal or otherwise non-consecutive AR'
                         ' orders.')

    gamma = acovf(endog,
                  adjusted=adjusted,
                  fft=True,
                  demean=demean,
                  nlag=max_spec.ar_order)

    # If no AR component, just a variance computation
    if max_spec.ar_order == 0:
        ar_params = [None]
        sigma2 = [gamma[0]]
    # Otherwise, AR model
    else:
        Phi = np.zeros((max_spec.ar_order, max_spec.ar_order))
        v = np.zeros(max_spec.ar_order + 1)

        Phi[0, 0] = gamma[1] / gamma[0]
        v[0] = gamma[0]
        v[1] = v[0] * (1 - Phi[0, 0]**2)

        for i in range(1, max_spec.ar_order):
            tmp = Phi[i - 1, :i]
            Phi[i, i] = (gamma[i + 1] - np.dot(tmp, gamma[i:0:-1])) / v[i]
            Phi[i, :i] = (tmp - Phi[i, i] * tmp[::-1])
            v[i + 1] = v[i] * (1 - Phi[i, i]**2)

        ar_params = [None] + [Phi[i, :i + 1] for i in range(max_spec.ar_order)]
        sigma2 = v

    # Compute output
    out = []
    for i in range(max_spec.ar_order + 1):
        spec = SARIMAXSpecification(ar_order=i)
        p = SARIMAXParams(spec=spec)
        if i == 0:
            p.params = sigma2[i]
        else:
            p.params = np.r_[ar_params[i], sigma2[i]]
        out.append(p)

        # Construct other results
    other_results = Bunch({
        'spec': spec,
    })

    return out, other_results
示例#5
0
class ARIMA(sarimax.SARIMAX):
    """
    Autoregressive Integrated Moving Average (ARIMA) model, and extensions

    This model is the basic interface for ARIMA-type models, including those
    with exogenous regressors and those with seasonal components. The most
    general form of the model is SARIMAX(p, d, q)x(P, D, Q, s). It also allows
    all specialized cases, including

    - autoregressive models: AR(p)
    - moving average models: MA(q)
    - mixed autoregressive moving average models: ARMA(p, q)
    - integration models: ARIMA(p, d, q)
    - seasonal models: SARIMA(P, D, Q, s)
    - regression with errors that follow one of the above ARIMA-type models

    Parameters
    ----------
    endog : array_like, optional
        The observed time-series process :math:`y`.
    exog : array_like, optional
        Array of exogenous regressors.
    order : tuple, optional
        The (p,d,q) order of the model for the autoregressive, differences, and
        moving average components. d is always an integer, while p and q may
        either be integers or lists of integers.
    seasonal_order : tuple, optional
        The (P,D,Q,s) order of the seasonal component of the model for the
        AR parameters, differences, MA parameters, and periodicity. Default
        is (0, 0, 0, 0). D and s are always integers, while P and Q
        may either be integers or lists of positive integers.
    trend : str{'n','c','t','ct'} or iterable, optional
        Parameter controlling the deterministic trend. Can be specified as a
        string where 'c' indicates a constant term, 't' indicates a
        linear trend in time, and 'ct' includes both. Can also be specified as
        an iterable defining a polynomial, as in `numpy.poly1d`, where
        `[1,1,0,1]` would denote :math:`a + bt + ct^3`. Default is 'c' for
        models without integration, and no trend for models with integration.
    enforce_stationarity : bool, optional
        Whether or not to require the autoregressive parameters to correspond
        to a stationarity process.
    enforce_invertibility : bool, optional
        Whether or not to require the moving average parameters to correspond
        to an invertible process.
    concentrate_scale : bool, optional
        Whether or not to concentrate the scale (variance of the error term)
        out of the likelihood. This reduces the number of parameters by one.
        This is only applicable when considering estimation by numerical
        maximum likelihood.
    trend_offset : int, optional
        The offset at which to start time trend values. Default is 1, so that
        if `trend='t'` the trend is equal to 1, 2, ..., nobs. Typically is only
        set when the model created by extending a previous dataset.
    dates : array_like of datetime, optional
        If no index is given by `endog` or `exog`, an array-like object of
        datetime objects can be provided.
    freq : str, optional
        If no index is given by `endog` or `exog`, the frequency of the
        time-series may be specified here as a Pandas offset or offset string.
    missing : str
        Available options are 'none', 'drop', and 'raise'. If 'none', no nan
        checking is done. If 'drop', any observations with nans are dropped.
        If 'raise', an error is raised. Default is 'none'.

    Notes
    -----
    This model incorporates both exogenous regressors and trend components
    through "regression with ARIMA errors".

    `enforce_stationarity` and `enforce_invertibility` are specified in the
    constructor because they affect loglikelihood computations, and so should
    not be changed on the fly. This is why they are not instead included as
    arguments to the `fit` method.

    TODO: should we use concentrate_scale=True by default?

    Examples
    --------
    >>> mod = sm.tsa.arima.ARIMA(endog, order=(1, 0, 0))
    >>> res = mod.fit()
    >>> print(res.summary())
    """
    def __init__(self,
                 endog,
                 exog=None,
                 order=(0, 0, 0),
                 seasonal_order=(0, 0, 0, 0),
                 trend=None,
                 enforce_stationarity=True,
                 enforce_invertibility=True,
                 concentrate_scale=False,
                 trend_offset=1,
                 dates=None,
                 freq=None,
                 missing='none',
                 validate_specification=True):
        # Default for trend
        # 'c' if there is no integration and 'n' otherwise
        # TODO: if trend='c', then we could alternatively use `demean=True` in
        # the estimation methods rather than setting up `exog` and using GLS.
        # Not sure if it's worth the trouble though.
        integrated = order[1] > 0 or seasonal_order[1] > 0
        if trend is None and not integrated:
            trend = 'c'
        elif trend is None:
            trend = 'n'

        # Construct the specification
        # (don't pass specific values of enforce stationarity/invertibility,
        # because we don't actually want to restrict the estimators based on
        # this criteria. Instead, we'll just make sure that the parameter
        # estimates from those methods satisfy the criteria.)
        self._spec_arima = SARIMAXSpecification(
            endog,
            exog=exog,
            order=order,
            seasonal_order=seasonal_order,
            trend=trend,
            enforce_stationarity=None,
            enforce_invertibility=None,
            concentrate_scale=concentrate_scale,
            trend_offset=trend_offset,
            dates=dates,
            freq=freq,
            missing=missing,
            validate_specification=validate_specification)
        exog = self._spec_arima._model.data.orig_exog

        # Raise an error if we have a constant in an integrated model

        has_trend = len(self._spec_arima.trend_terms) > 0
        if has_trend:
            lowest_trend = np.min(self._spec_arima.trend_terms)
            if lowest_trend < order[1] + seasonal_order[1]:
                raise ValueError(
                    'In models with integration (`d > 0`) or seasonal'
                    ' integration (`D > 0`), trend terms of lower order than'
                    ' `d + D` cannot be (as they would be eliminated due to'
                    ' the differencing operation). For example, a constant'
                    ' cannot be included in an ARIMA(1, 1, 1) model, but'
                    ' including a linear trend, which would have the same'
                    ' effect as fitting a constant to the differenced data,'
                    ' is allowed.')

        # Keep the given `exog` by removing the prepended trend variables
        input_exog = None
        if exog is not None:
            if _is_using_pandas(exog, None):
                input_exog = exog.iloc[:, self._spec_arima.k_trend:]
            else:
                input_exog = exog[:, self._spec_arima.k_trend:]

        # Initialize the base SARIMAX class
        # Note: we don't pass in a trend value to the base class, since ARIMA
        # standardizes the trend to always be part of exog, while the base
        # SARIMAX class puts it in the transition equation.
        super(ARIMA,
              self).__init__(endog,
                             exog,
                             trend=None,
                             order=order,
                             seasonal_order=seasonal_order,
                             enforce_stationarity=enforce_stationarity,
                             enforce_invertibility=enforce_invertibility,
                             concentrate_scale=concentrate_scale,
                             dates=dates,
                             freq=freq,
                             missing=missing,
                             validate_specification=validate_specification)
        self.trend = trend

        # Save the input exog and input exog names, so that we can refer to
        # them later (see especially `ARIMAResults.append`)
        self._input_exog = input_exog
        if exog is not None:
            self._input_exog_names = self.exog_names[self._spec_arima.k_trend:]
        else:
            self._input_exog_names = None

        # Override the public attributes for k_exog and k_trend to reflect the
        # distinction here (for the purpose of the superclass, these are both
        # combined as `k_exog`)
        self.k_exog = self._spec_arima.k_exog
        self.k_trend = self._spec_arima.k_trend

        # Remove some init kwargs that aren't used in this model
        unused = [
            'measurement_error', 'time_varying_regression', 'mle_regression',
            'simple_differencing', 'hamilton_representation'
        ]
        self._init_keys = [key for key in self._init_keys if key not in unused]

    @property
    def _res_classes(self):
        return {'fit': (ARIMAResults, ARIMAResultsWrapper)}

    def fit(self,
            start_params=None,
            transformed=True,
            includes_fixed=False,
            method=None,
            method_kwargs=None,
            gls=None,
            gls_kwargs=None,
            cov_type=None,
            cov_kwds=None,
            return_params=False,
            low_memory=False):
        """
        Fit (estimate) the parameters of the model.

        Parameters
        ----------
        start_params : array_like, optional
            Initial guess of the solution for the loglikelihood maximization.
            If None, the default is given by Model.start_params.
        transformed : bool, optional
            Whether or not `start_params` is already transformed. Default is
            True.
        includes_fixed : bool, optional
            If parameters were previously fixed with the `fix_params` method,
            this argument describes whether or not `start_params` also includes
            the fixed parameters, in addition to the free parameters. Default
            is False.
        method : str, optional
            The method used for estimating the parameters of the model. Valid
            options include 'statespace', 'innovations_mle', 'hannan_rissanen',
            'burg', 'innovations', and 'yule_walker'. Not all options are
            available for every specification (for example 'yule_walker' can
            only be used with AR(p) models).
        method_kwargs : dict, optional
            Arguments to pass to the fit function for the parameter estimator
            described by the `method` argument.
        gls : bool, optional
            Whether or not to use generalized least squares (GLS) to estimate
            regression effects. The default is False if `method='statespace'`
            and is True otherwise.
        gls_kwargs : dict, optional
            Arguments to pass to the GLS estimation fit method. Only applicable
            if GLS estimation is used (see `gls` argument for details).
        cov_type : str, optional
            The `cov_type` keyword governs the method for calculating the
            covariance matrix of parameter estimates. Can be one of:

            - 'opg' for the outer product of gradient estimator
            - 'oim' for the observed information matrix estimator, calculated
              using the method of Harvey (1989)
            - 'approx' for the observed information matrix estimator,
              calculated using a numerical approximation of the Hessian matrix.
            - 'robust' for an approximate (quasi-maximum likelihood) covariance
              matrix that may be valid even in the presence of some
              misspecifications. Intermediate calculations use the 'oim'
              method.
            - 'robust_approx' is the same as 'robust' except that the
              intermediate calculations use the 'approx' method.
            - 'none' for no covariance matrix calculation.

            Default is 'opg' unless memory conservation is used to avoid
            computing the loglikelihood values for each observation, in which
            case the default is 'oim'.
        cov_kwds : dict or None, optional
            A dictionary of arguments affecting covariance matrix computation.

            **opg, oim, approx, robust, robust_approx**

            - 'approx_complex_step' : bool, optional - If True, numerical
              approximations are computed using complex-step methods. If False,
              numerical approximations are computed using finite difference
              methods. Default is True.
            - 'approx_centered' : bool, optional - If True, numerical
              approximations computed using finite difference methods use a
              centered approximation. Default is False.
        return_params : bool, optional
            Whether or not to return only the array of maximizing parameters.
            Default is False.
        low_memory : bool, optional
            If set to True, techniques are applied to substantially reduce
            memory usage. If used, some features of the results object will
            not be available (including smoothed results and in-sample
            prediction), although out-of-sample forecasting is possible.
            Default is False.

        Returns
        -------
        ARIMAResults

        Examples
        --------
        >>> mod = sm.tsa.arima.ARIMA(endog, order=(1, 0, 0))
        >>> res = mod.fit()
        >>> print(res.summary())
        """
        # Determine which method to use
        # 1. If method is specified, make sure it is valid
        if method is not None:
            self._spec_arima.validate_estimator(method)
        # 2. Otherwise, use state space
        # TODO: may want to consider using innovations (MLE) if possible here,
        # (since in some cases it may be faster than state space), but it is
        # less tested.
        else:
            method = 'statespace'

        # Can only use fixed parameters with method='statespace'
        if self._has_fixed_params and method != 'statespace':
            raise ValueError('When parameters have been fixed, only the method'
                             ' "statespace" can be used; got "%s".' % method)

        # Handle kwargs related to the fit method
        if method_kwargs is None:
            method_kwargs = {}
        required_kwargs = []
        if method == 'statespace':
            required_kwargs = [
                'enforce_stationarity', 'enforce_invertibility',
                'concentrate_scale'
            ]
        elif method == 'innovations_mle':
            required_kwargs = ['enforce_invertibility']
        for name in required_kwargs:
            if name in method_kwargs:
                raise ValueError('Cannot override model level value for "%s"'
                                 ' when method="%s".' % (name, method))
            method_kwargs[name] = getattr(self, name)

        # Handle kwargs related to GLS estimation
        if gls_kwargs is None:
            gls_kwargs = {}

        # Handle starting parameters
        # TODO: maybe should have standard way of computing starting
        # parameters in this class?
        if start_params is not None:
            if method not in ['statespace', 'innovations_mle']:
                raise ValueError('Estimation method "%s" does not use starting'
                                 ' parameters, but `start_params` argument was'
                                 ' given.' % method)

            method_kwargs['start_params'] = start_params
            method_kwargs['transformed'] = transformed
            method_kwargs['includes_fixed'] = includes_fixed

        # Perform estimation, depending on whether we have exog or not
        p = None
        fit_details = None
        has_exog = self._spec_arima.exog is not None
        if has_exog or method == 'statespace':
            # Use GLS if it was explicitly requested (`gls = True`) or if it
            # was left at the default (`gls = None`) and the ARMA estimator is
            # anything but statespace.
            # Note: both GLS and statespace are able to handle models with
            # integration, so we don't need to difference endog or exog here.
            if has_exog and (gls or (gls is None and method != 'statespace')):
                p, fit_details = estimate_gls(
                    self.endog,
                    exog=self.exog,
                    order=self.order,
                    seasonal_order=self.seasonal_order,
                    include_constant=False,
                    arma_estimator=method,
                    arma_estimator_kwargs=method_kwargs,
                    **gls_kwargs)
            elif method != 'statespace':
                raise ValueError('If `exog` is given and GLS is disabled'
                                 ' (`gls=False`), then the only valid'
                                 " method is 'statespace'. Got '%s'." % method)
            else:
                method_kwargs.setdefault('disp', 0)

                res = super(ARIMA, self).fit(return_params=return_params,
                                             low_memory=low_memory,
                                             cov_type=cov_type,
                                             cov_kwds=cov_kwds,
                                             **method_kwargs)
                if not return_params:
                    res.fit_details = res.mlefit
        else:
            # Handle differencing if we have an integrated model
            # (these methods do not support handling integration internally,
            # so we need to manually do the differencing)
            endog = self.endog
            order = self._spec_arima.order
            seasonal_order = self._spec_arima.seasonal_order
            if self._spec_arima.is_integrated:
                warnings.warn('Provided `endog` series has been differenced'
                              ' to eliminate integration prior to parameter'
                              ' estimation by method "%s".' % method)
                endog = diff(
                    endog,
                    k_diff=self._spec_arima.diff,
                    k_seasonal_diff=self._spec_arima.seasonal_diff,
                    seasonal_periods=self._spec_arima.seasonal_periods)
                if order[1] > 0:
                    order = (order[0], 0, order[2])
                if seasonal_order[1] > 0:
                    seasonal_order = (seasonal_order[0], 0, seasonal_order[2],
                                      seasonal_order[3])

            # Now, estimate parameters
            if method == 'yule_walker':
                p, fit_details = yule_walker(endog,
                                             ar_order=order[0],
                                             demean=False,
                                             **method_kwargs)
            elif method == 'burg':
                p, fit_details = burg(endog,
                                      ar_order=order[0],
                                      demean=False,
                                      **method_kwargs)
            elif method == 'hannan_rissanen':
                p, fit_details = hannan_rissanen(endog,
                                                 ar_order=order[0],
                                                 ma_order=order[2],
                                                 demean=False,
                                                 **method_kwargs)
            elif method == 'innovations':
                p, fit_details = innovations(endog,
                                             ma_order=order[2],
                                             demean=False,
                                             **method_kwargs)
                # innovations computes estimates through the given order, so
                # we want to take the estimate associated with the given order
                p = p[-1]
            elif method == 'innovations_mle':
                p, fit_details = innovations_mle(endog,
                                                 order=order,
                                                 seasonal_order=seasonal_order,
                                                 demean=False,
                                                 **method_kwargs)

        # In all cases except method='statespace', we now need to extract the
        # parameters and, optionally, create a new results object
        if p is not None:
            # Need to check that fitted parameters satisfy given restrictions
            if (self.enforce_stationarity
                    and self._spec_arima.max_reduced_ar_order > 0
                    and not p.is_stationary):
                raise ValueError('Non-stationary autoregressive parameters'
                                 ' found with `enforce_stationarity=True`.'
                                 ' Consider setting it to False or using a'
                                 ' different estimation method, such as'
                                 ' method="statespace".')

            if (self.enforce_invertibility
                    and self._spec_arima.max_reduced_ma_order > 0
                    and not p.is_invertible):
                raise ValueError('Non-invertible moving average parameters'
                                 ' found with `enforce_invertibility=True`.'
                                 ' Consider setting it to False or using a'
                                 ' different estimation method, such as'
                                 ' method="statespace".')

            # Build the requested results
            if return_params:
                res = p.params
            else:
                # Handle memory conservation option
                if low_memory:
                    conserve_memory = self.ssm.conserve_memory
                    self.ssm.set_conserve_memory(MEMORY_CONSERVE)

                # Perform filtering / smoothing
                if (self.ssm.memory_no_predicted or self.ssm.memory_no_gain
                        or self.ssm.memory_no_smoothing):
                    func = self.filter
                else:
                    func = self.smooth
                res = func(p.params,
                           transformed=True,
                           includes_fixed=True,
                           cov_type=cov_type,
                           cov_kwds=cov_kwds)

                # Save any details from the fit method
                res.fit_details = fit_details

                # Reset memory conservation
                if low_memory:
                    self.ssm.set_conserve_memory(conserve_memory)

        return res
示例#6
0
    def __init__(self,
                 endog,
                 exog=None,
                 order=(0, 0, 0),
                 seasonal_order=(0, 0, 0, 0),
                 trend=None,
                 enforce_stationarity=True,
                 enforce_invertibility=True,
                 concentrate_scale=False,
                 trend_offset=1,
                 dates=None,
                 freq=None,
                 missing='none',
                 validate_specification=True):
        # Default for trend
        # 'c' if there is no integration and 'n' otherwise
        # TODO: if trend='c', then we could alternatively use `demean=True` in
        # the estimation methods rather than setting up `exog` and using GLS.
        # Not sure if it's worth the trouble though.
        integrated = order[1] > 0 or seasonal_order[1] > 0
        if trend is None and not integrated:
            trend = 'c'
        elif trend is None:
            trend = 'n'

        # Construct the specification
        # (don't pass specific values of enforce stationarity/invertibility,
        # because we don't actually want to restrict the estimators based on
        # this criteria. Instead, we'll just make sure that the parameter
        # estimates from those methods satisfy the criteria.)
        self._spec_arima = SARIMAXSpecification(
            endog,
            exog=exog,
            order=order,
            seasonal_order=seasonal_order,
            trend=trend,
            enforce_stationarity=None,
            enforce_invertibility=None,
            concentrate_scale=concentrate_scale,
            trend_offset=trend_offset,
            dates=dates,
            freq=freq,
            missing=missing,
            validate_specification=validate_specification)
        exog = self._spec_arima._model.data.orig_exog

        # Raise an error if we have a constant in an integrated model

        has_trend = len(self._spec_arima.trend_terms) > 0
        if has_trend:
            lowest_trend = np.min(self._spec_arima.trend_terms)
            if lowest_trend < order[1] + seasonal_order[1]:
                raise ValueError(
                    'In models with integration (`d > 0`) or seasonal'
                    ' integration (`D > 0`), trend terms of lower order than'
                    ' `d + D` cannot be (as they would be eliminated due to'
                    ' the differencing operation). For example, a constant'
                    ' cannot be included in an ARIMA(1, 1, 1) model, but'
                    ' including a linear trend, which would have the same'
                    ' effect as fitting a constant to the differenced data,'
                    ' is allowed.')

        # Keep the given `exog` by removing the prepended trend variables
        input_exog = None
        if exog is not None:
            if _is_using_pandas(exog, None):
                input_exog = exog.iloc[:, self._spec_arima.k_trend:]
            else:
                input_exog = exog[:, self._spec_arima.k_trend:]

        # Initialize the base SARIMAX class
        # Note: we don't pass in a trend value to the base class, since ARIMA
        # standardizes the trend to always be part of exog, while the base
        # SARIMAX class puts it in the transition equation.
        super(ARIMA,
              self).__init__(endog,
                             exog,
                             trend=None,
                             order=order,
                             seasonal_order=seasonal_order,
                             enforce_stationarity=enforce_stationarity,
                             enforce_invertibility=enforce_invertibility,
                             concentrate_scale=concentrate_scale,
                             dates=dates,
                             freq=freq,
                             missing=missing,
                             validate_specification=validate_specification)
        self.trend = trend

        # Save the input exog and input exog names, so that we can refer to
        # them later (see especially `ARIMAResults.append`)
        self._input_exog = input_exog
        if exog is not None:
            self._input_exog_names = self.exog_names[self._spec_arima.k_trend:]
        else:
            self._input_exog_names = None

        # Override the public attributes for k_exog and k_trend to reflect the
        # distinction here (for the purpose of the superclass, these are both
        # combined as `k_exog`)
        self.k_exog = self._spec_arima.k_exog
        self.k_trend = self._spec_arima.k_trend

        # Remove some init kwargs that aren't used in this model
        unused = [
            'measurement_error', 'time_varying_regression', 'mle_regression',
            'simple_differencing', 'hamilton_representation'
        ]
        self._init_keys = [key for key in self._init_keys if key not in unused]
示例#7
0
def innovations_mle(endog,
                    order=(0, 0, 0),
                    seasonal_order=(0, 0, 0, 0),
                    demean=True,
                    enforce_invertibility=True,
                    start_params=None,
                    minimize_kwargs=None):
    """
    Estimate SARIMA parameters by MLE using innovations algorithm.

    Parameters
    ----------
    endog : array_like
        Input time series array.
    order : tuple, optional
        The (p,d,q) order of the model for the number of AR parameters,
        differences, and MA parameters. Default is (0, 0, 0).
    seasonal_order : tuple, optional
        The (P,D,Q,s) order of the seasonal component of the model for the
        AR parameters, differences, MA parameters, and periodicity. Default
        is (0, 0, 0, 0).
    demean : bool, optional
        Whether to estimate and remove the mean from the process prior to
        fitting the SARIMA coefficients. Default is True.
    enforce_invertibility : bool, optional
        Whether or not to transform the MA parameters to enforce invertibility
        in the moving average component of the model. Default is True.
    start_params : array_like, optional
        Initial guess of the solution for the loglikelihood maximization. The
        AR polynomial must be stationary. If `enforce_invertibility=True` the
        MA poylnomial must be invertible. If not provided, default starting
        parameters are computed using the Hannan-Rissanen method.
    minimize_kwargs : dict, optional
        Arguments to pass to scipy.optimize.minimize.

    Returns
    -------
    parameters : SARIMAXParams object
    other_results : Bunch
        Includes four components: `spec`, containing the `SARIMAXSpecification`
        instance corresponding to the input arguments; `minimize_kwargs`,
        containing any keyword arguments passed to `minimize`; `start_params`,
        containing the untransformed starting parameters passed to `minimize`;
        and `minimize_results`, containing the output from `minimize`.

    Notes
    -----
    The primary reference is [1]_, section 5.2.

    Note: we do not include `enforce_stationarity` as an argument, because this
    function requires stationarity.

    TODO: support concentrating out the scale (should be easy: use sigma2=1
          and then compute sigma2=np.sum(u**2 / v) / len(u); would then need to
          redo llf computation in the Cython function).

    TODO: add support for fixed parameters

    TODO: add support for secondary optimization that does not enforce
          stationarity / invertibility, starting from first step's parameters

    References
    ----------
    .. [1] Brockwell, Peter J., and Richard A. Davis. 2016.
       Introduction to Time Series and Forecasting. Springer.
    """
    spec = SARIMAXSpecification(endog,
                                order=order,
                                seasonal_order=seasonal_order,
                                enforce_stationarity=True,
                                enforce_invertibility=enforce_invertibility)
    endog = spec.endog
    if spec.is_integrated:
        warnings.warn('Provided `endog` series has been differenced to'
                      ' eliminate integration prior to ARMA parameter'
                      ' estimation.')
        endog = diff(endog,
                     k_diff=spec.diff,
                     k_seasonal_diff=spec.seasonal_diff,
                     seasonal_periods=spec.seasonal_periods)
    if demean:
        endog = endog - endog.mean()

    p = SARIMAXParams(spec=spec)

    if start_params is None:
        sp = SARIMAXParams(spec=spec)

        # Estimate starting parameters via Hannan-Rissanen
        hr, hr_results = hannan_rissanen(endog,
                                         ar_order=spec.ar_order,
                                         ma_order=spec.ma_order,
                                         demean=False)
        if spec.seasonal_periods == 0:
            # If no seasonal component, then `hr` gives starting parameters
            sp.params = hr.params
        else:
            # If we do have a seasonal component, estimate starting parameters
            # for the seasonal lags using the residuals from the previous step
            _ = SARIMAXSpecification(
                endog,
                seasonal_order=seasonal_order,
                enforce_stationarity=True,
                enforce_invertibility=enforce_invertibility)

            ar_order = np.array(spec.seasonal_ar_lags) * spec.seasonal_periods
            ma_order = np.array(spec.seasonal_ma_lags) * spec.seasonal_periods
            seasonal_hr, seasonal_hr_results = hannan_rissanen(
                hr_results.resid,
                ar_order=ar_order,
                ma_order=ma_order,
                demean=False)

            # Set the starting parameters
            sp.ar_params = hr.ar_params
            sp.ma_params = hr.ma_params
            sp.seasonal_ar_params = seasonal_hr.ar_params
            sp.seasonal_ma_params = seasonal_hr.ma_params
            sp.sigma2 = seasonal_hr.sigma2

        # Then, require starting parameters to be stationary and invertible
        if not sp.is_stationary:
            sp.ar_params = [0] * sp.k_ar_params
            sp.seasonal_ar_params = [0] * sp.k_seasonal_ar_params

        if not sp.is_invertible and spec.enforce_invertibility:
            sp.ma_params = [0] * sp.k_ma_params
            sp.seasonal_ma_params = [0] * sp.k_seasonal_ma_params

        start_params = sp.params
    else:
        sp = SARIMAXParams(spec=spec)
        sp.params = start_params
        if not sp.is_stationary:
            raise ValueError('Given starting parameters imply a non-stationary'
                             ' AR process. Innovations algorithm requires a'
                             ' stationary process.')

        if spec.enforce_invertibility and not sp.is_invertible:
            raise ValueError('Given starting parameters imply a non-invertible'
                             ' MA process with `enforce_invertibility=True`.')

    def obj(params):
        p.params = spec.constrain_params(params)

        return -arma_innovations.arma_loglike(
            endog,
            ar_params=-p.reduced_ar_poly.coef[1:],
            ma_params=p.reduced_ma_poly.coef[1:],
            sigma2=p.sigma2)

    # Untransform the starting parameters
    unconstrained_start_params = spec.unconstrain_params(start_params)

    # Perform the minimization
    if minimize_kwargs is None:
        minimize_kwargs = {}
    if 'options' not in minimize_kwargs:
        minimize_kwargs['options'] = {}
    minimize_kwargs['options'].setdefault('maxiter', 100)
    minimize_results = minimize(obj, unconstrained_start_params,
                                **minimize_kwargs)

    # TODO: show warning if convergence failed.

    # Reverse the transformation to get the optimal parameters
    p.params = spec.constrain_params(minimize_results.x)

    # Construct other results
    other_results = Bunch({
        'spec': spec,
        'minimize_results': minimize_results,
        'minimize_kwargs': minimize_kwargs,
        'start_params': start_params
    })

    return p, other_results
示例#8
0
def innovations(endog, ma_order=0, demean=True):
    """
    Estimate MA parameters using innovations algorithm.

    Parameters
    ----------
    endog : array_like or SARIMAXSpecification
        Input time series array, assumed to be stationary.
    ma_order : int, optional
        Maximum moving average order. Default is 0.
    demean : bool, optional
        Whether to estimate and remove the mean from the process prior to
        fitting the moving average coefficients. Default is True.

    Returns
    -------
    parameters : list of SARIMAXParams objects
        List elements correspond to estimates at different `ma_order`. For
        example, parameters[0] is an `SARIMAXParams` instance corresponding to
        `ma_order=0`.
    other_results : Bunch
        Includes one component, `spec`, containing the `SARIMAXSpecification`
        instance corresponding to the input arguments.

    Notes
    -----
    The primary reference is [1]_, section 5.1.3.

    This procedure assumes that the series is stationary.

    References
    ----------
    .. [1] Brockwell, Peter J., and Richard A. Davis. 2016.
       Introduction to Time Series and Forecasting. Springer.
    """
    spec = max_spec = SARIMAXSpecification(endog, ma_order=ma_order)
    endog = max_spec.endog

    if demean:
        endog = endog - endog.mean()

    if not max_spec.is_ma_consecutive:
        raise ValueError('Innovations estimation unavailable for models with'
                         ' seasonal or otherwise non-consecutive MA orders.')

    sample_acovf = acovf(endog, fft=True)
    theta, v = innovations_algo(sample_acovf, nobs=max_spec.ma_order + 1)
    ma_params = [theta[i, :i] for i in range(1, max_spec.ma_order + 1)]
    sigma2 = v

    out = []
    for i in range(max_spec.ma_order + 1):
        spec = SARIMAXSpecification(ma_order=i)
        p = SARIMAXParams(spec=spec)
        if i == 0:
            p.params = sigma2[i]
        else:
            p.params = np.r_[ma_params[i - 1], sigma2[i]]
        out.append(p)

    # Construct other results
    other_results = Bunch({
        'spec': spec,
    })

    return out, other_results
示例#9
0
def statespace(endog,
               exog=None,
               order=(0, 0, 0),
               seasonal_order=(0, 0, 0, 0),
               include_constant=True,
               enforce_stationarity=True,
               enforce_invertibility=True,
               concentrate_scale=False,
               start_params=None,
               fit_kwargs=None):
    """
    Estimate SARIMAX parameters using state space methods.

    Parameters
    ----------
    endog : array_like
        Input time series array.
    order : tuple, optional
        The (p,d,q) order of the model for the number of AR parameters,
        differences, and MA parameters. Default is (0, 0, 0).
    seasonal_order : tuple, optional
        The (P,D,Q,s) order of the seasonal component of the model for the
        AR parameters, differences, MA parameters, and periodicity. Default
        is (0, 0, 0, 0).
    include_constant : bool, optional
        Whether to add a constant term in `exog` if it's not already there.
        The estimate of the constant will then appear as one of the `exog`
        parameters. If `exog` is None, then the constant will represent the
        mean of the process.
    enforce_stationarity : boolean, optional
        Whether or not to transform the AR parameters to enforce stationarity
        in the autoregressive component of the model. Default is True.
    enforce_invertibility : boolean, optional
        Whether or not to transform the MA parameters to enforce invertibility
        in the moving average component of the model. Default is True.
    concentrate_scale : boolean, optional
        Whether or not to concentrate the scale (variance of the error term)
        out of the likelihood. This reduces the number of parameters estimated
        by maximum likelihood by one.
    start_params : array_like, optional
        Initial guess of the solution for the loglikelihood maximization. The
        AR polynomial must be stationary. If `enforce_invertibility=True` the
        MA poylnomial must be invertible. If not provided, default starting
        parameters are computed using the Hannan-Rissanen method.
    fit_kwargs : dict, optional
        Arguments to pass to the state space model's `fit` method.

    Returns
    -------
    parameters : SARIMAXParams object
    other_results : Bunch
        Includes two components, `spec`, containing the `SARIMAXSpecification`
        instance corresponding to the input arguments; and
        `state_space_results`, corresponding to the results from the underlying
        state space model and Kalman filter / smoother.

    Notes
    -----
    The primary reference is [1]_.

    References
    ----------
    .. [1] Durbin, James, and Siem Jan Koopman. 2012.
       Time Series Analysis by State Space Methods: Second Edition.
       Oxford University Press.

    """
    # Handle including the constant (need to do it now so that the constant
    # parameter can be included in the specification as part of `exog`.)
    if include_constant:
        exog = np.ones_like(endog) if exog is None else add_constant(exog)

    # Create the specification
    spec = SARIMAXSpecification(endog,
                                exog=exog,
                                order=order,
                                seasonal_order=seasonal_order,
                                enforce_stationarity=enforce_stationarity,
                                enforce_invertibility=enforce_invertibility,
                                concentrate_scale=concentrate_scale)
    endog = spec.endog
    exog = spec.exog
    p = SARIMAXParams(spec=spec)

    # Check start parameters
    if start_params is not None:
        sp = SARIMAXParams(spec=spec)
        sp.params = start_params

        if spec.enforce_stationarity and not sp.is_stationary:
            raise ValueError('Given starting parameters imply a non-stationary'
                             ' AR process with `enforce_stationarity=True`.')

        if spec.enforce_invertibility and not sp.is_invertible:
            raise ValueError('Given starting parameters imply a non-invertible'
                             ' MA process with `enforce_invertibility=True`.')

    # Create and fit the state space model
    mod = SARIMAX(endog,
                  exog=exog,
                  order=spec.order,
                  seasonal_order=spec.seasonal_order,
                  enforce_stationarity=spec.enforce_stationarity,
                  enforce_invertibility=spec.enforce_invertibility,
                  concentrate_scale=spec.concentrate_scale)
    if fit_kwargs is None:
        fit_kwargs = {}
    fit_kwargs.setdefault('disp', 0)
    res_ss = mod.fit(start_params=start_params, **fit_kwargs)

    # Construct results
    p.params = res_ss.params
    res = Bunch({
        'spec': spec,
        'statespace_results': res_ss,
    })

    return p, res
示例#10
0
def hannan_rissanen(endog,
                    ar_order=0,
                    ma_order=0,
                    demean=True,
                    initial_ar_order=None,
                    unbiased=None):
    """
    Estimate ARMA parameters using Hannan-Rissanen procedure.

    Parameters
    ----------
    endog : array_like
        Input time series array, assumed to be stationary.
    ar_order : int
        Autoregressive order
    ma_order : int
        Moving average order
    demean : bool, optional
        Whether to estimate and remove the mean from the process prior to
        fitting the ARMA coefficients. Default is True.
    initial_ar_order : int, optional
        Order of long autoregressive process used for initial computation of
        residuals.
    unbiased: bool, optional
        Whether or not to apply the bias correction step. Default is True if
        the estimated coefficients from the previous step imply a stationary
        and invertible process and False otherwise.

    Returns
    -------
    parameters : SARIMAXParams object
    other_results : Bunch
        Includes three components: `spec`, containing the
        `SARIMAXSpecification` instance corresponding to the input arguments;
        `initial_ar_order`, containing the autoregressive lag order used in the
        first step; and `resid`, which contains the computed residuals from the
        last step.

    Notes
    -----
    The primary reference is [1]_, section 5.1.4, which describes a three-step
    procedure that we implement here.

    1. Fit a large-order AR model via Yule-Walker to estimate residuals
    2. Compute AR and MA estimates via least squares
    3. (Unless the estimated coefficients from step (2) are non-stationary /
       non-invertible or `unbiased=False`) Perform bias correction

    The order used for the AR model in the first step may be given as an
    argument. If it is not, we compute it as suggested by [2]_.

    The estimate of the variance that we use is computed from the residuals
    of the least-squares regression and not from the innovations algorithm.
    This is because our fast implementation of the innovations algorithm is
    only valid for stationary processes, and the Hannan-Rissanen procedure may
    produce estimates that imply non-stationary processes. To avoid
    inconsistency, we never compute this latter variance here, even if it is
    possible. See test_hannan_rissanen::test_brockwell_davis_example_517 for
    an example of how to compute this variance manually.

    This procedure assumes that the series is stationary, but if this is not
    true, it is still possible that this procedure will return parameters that
    imply a non-stationary / non-invertible process.

    Note that the third stage will only be applied if the parameters from the
    second stage imply a stationary / invertible model. If `unbiased=True` is
    given, then non-stationary / non-invertible parameters in the second stage
    will throw an exception.

    References
    ----------
    .. [1] Brockwell, Peter J., and Richard A. Davis. 2016.
       Introduction to Time Series and Forecasting. Springer.
    .. [2] Gomez, Victor, and Agustin Maravall. 2001.
       "Automatic Modeling Methods for Univariate Series."
       A Course in Time Series Analysis, 171–201.
    """
    spec = SARIMAXSpecification(endog, ar_order=ar_order, ma_order=ma_order)
    endog = spec.endog
    if demean:
        endog = endog - endog.mean()

    p = SARIMAXParams(spec=spec)

    nobs = len(endog)
    max_ar_order = spec.max_ar_order
    max_ma_order = spec.max_ma_order

    # Default initial_ar_order is as suggested by Gomez and Maravall (2001)
    if initial_ar_order is None:
        initial_ar_order = max(
            np.floor(np.log(nobs)**2).astype(int),
            2 * max(max_ar_order, max_ma_order))
    # Create a spec, just to validate the initial autoregressive order
    _ = SARIMAXSpecification(endog, ar_order=initial_ar_order)

    # Compute lagged endog
    # (`ar_ix`, and `ma_ix` below, are to account for non-consecutive lags;
    # for indexing purposes, must have dtype int)
    ar_ix = np.array(spec.ar_lags, dtype=int) - 1
    lagged_endog = lagmat(endog, max_ar_order, trim='both')[:, ar_ix]

    # If no AR or MA components, this is just a variance computation
    if max_ma_order == 0 and max_ar_order == 0:
        p.sigma2 = np.var(endog, ddof=0)
        resid = endog.copy()
    # If no MA component, this is just CSS
    elif max_ma_order == 0:
        mod = OLS(endog[max_ar_order:], lagged_endog)
        res = mod.fit()
        resid = res.resid
        p.ar_params = res.params
        p.sigma2 = res.scale
    # Otherwise ARMA model
    else:
        # Step 1: Compute long AR model via Yule-Walker, get residuals
        initial_ar_params, _ = yule_walker(endog,
                                           order=initial_ar_order,
                                           method='mle')
        X = lagmat(endog, initial_ar_order, trim='both')
        y = endog[initial_ar_order:]
        resid = y - X.dot(initial_ar_params)

        # Get lagged residuals for `exog` in least-squares regression
        ma_ix = np.array(spec.ma_lags, dtype=int) - 1
        lagged_resid = lagmat(resid, max_ma_order, trim='both')[:, ma_ix]

        # Step 2: estimate ARMA model via least squares
        ix = initial_ar_order + max_ma_order - max_ar_order
        mod = OLS(endog[initial_ar_order + max_ma_order:],
                  np.c_[lagged_endog[ix:], lagged_resid])
        res = mod.fit()
        p.ar_params = res.params[:spec.k_ar_params]
        p.ma_params = res.params[spec.k_ar_params:]
        resid = res.resid
        p.sigma2 = res.scale

        # Step 3: bias correction (if requested)
        if unbiased is True or unbiased is None:
            if p.is_stationary and p.is_invertible:
                Z = np.zeros_like(endog)
                V = np.zeros_like(endog)
                W = np.zeros_like(endog)

                ar_coef = p.ar_poly.coef
                ma_coef = p.ma_poly.coef

                for t in range(nobs):
                    if t >= max(max_ar_order, max_ma_order):
                        # Note: in the case of non-consecutive lag orders, the
                        # polynomials have the appropriate zeros so we don't
                        # need to subset `endog[t - max_ar_order:t]` or
                        # Z[t - max_ma_order:t]
                        tmp_ar = np.dot(-ar_coef[1:],
                                        endog[t - max_ar_order:t][::-1])
                        tmp_ma = np.dot(ma_coef[1:],
                                        Z[t - max_ma_order:t][::-1])
                        Z[t] = endog[t] - tmp_ar - tmp_ma

                V = lfilter([1], ar_coef, Z)
                W = lfilter(np.r_[1, -ma_coef[1:]], [1], Z)

                lagged_V = lagmat(V, max_ar_order, trim='both')
                lagged_W = lagmat(W, max_ma_order, trim='both')

                exog = np.c_[lagged_V[max(max_ma_order - max_ar_order, 0):,
                                      ar_ix],
                             lagged_W[max(max_ar_order - max_ma_order, 0):,
                                      ma_ix]]

                mod_unbias = OLS(Z[max(max_ar_order, max_ma_order):], exog)
                res_unbias = mod_unbias.fit()

                p.ar_params = (p.ar_params +
                               res_unbias.params[:spec.k_ar_params])
                p.ma_params = (p.ma_params +
                               res_unbias.params[spec.k_ar_params:])

                # Recompute sigma2
                resid = mod.endog - mod.exog.dot(np.r_[p.ar_params,
                                                       p.ma_params])
                p.sigma2 = np.inner(resid, resid) / len(resid)
            elif unbiased is True:
                raise ValueError('Cannot perform third step of Hannan-Rissanen'
                                 ' estimation to remove paramater bias,'
                                 ' because parameters estimated from the'
                                 ' second step are non-stationary or'
                                 ' non-invertible')

    # TODO: Gomez and Maravall (2001) or Gomez (1998)
    # propose one more step here to further improve MA estimates

    # Construct results
    other_results = Bunch({
        'spec': spec,
        'initial_ar_order': initial_ar_order,
        'resid': resid
    })

    return p, other_results
示例#11
0
def yule_walker(endog, ar_order=0, demean=True, unbiased=False):
    """
    Estimate AR parameters using Yule-Walker equations.

    Parameters
    ----------
    endog : array_like or SARIMAXSpecification
        Input time series array, assumed to be stationary.
    ar_order : int, optional
        Autoregressive order. Default is 0.
    demean : bool, optional
        Whether to estimate and remove the mean from the process prior to
        fitting the autoregressive coefficients. Default is True.
    unbiased : bool, optional
        Whether to use the "unbiased" autocovariance estimator, which uses
        n - h degrees of freedom rather than n. Note that despite the name, it
        is only truly unbiased if the process mean is known (rather than
        estimated) and for some processes it can result in a non-positive
        definite autocovariance matrix. Default is False.

    Returns
    -------
    parameters : SARIMAXParams object
        Contains the parameter estimates from the final iteration.
    other_results : Bunch
        Includes one component, `spec`, which is the `SARIMAXSpecification`
        instance corresponding to the input arguments.

    Notes
    -----
    The primary reference is [1]_, section 5.1.1.

    This procedure assumes that the series is stationary.

    For a description of the effect of the "unbiased" estimate of the
    autocovariance function, see 2.4.2 of [1]_.

    References
    ----------
    .. [1] Brockwell, Peter J., and Richard A. Davis. 2016.
       Introduction to Time Series and Forecasting. Springer.
    """
    spec = SARIMAXSpecification(endog, ar_order=ar_order)
    endog = spec.endog
    p = SARIMAXParams(spec=spec)

    if not spec.is_ar_consecutive:
        raise ValueError('Yule-Walker estimation unavailable for models with'
                         ' seasonal or non-consecutive AR orders.')

    # Estimate parameters
    method = 'unbiased' if unbiased else 'mle'
    p.ar_params, sigma = linear_model.yule_walker(endog,
                                                  order=ar_order,
                                                  demean=demean,
                                                  method=method)
    p.sigma2 = sigma**2

    # Construct other results
    other_results = Bunch({
        'spec': spec,
    })

    return p, other_results
示例#12
0
def gls(endog,
        exog=None,
        order=(0, 0, 0),
        seasonal_order=(0, 0, 0, 0),
        include_constant=None,
        n_iter=None,
        max_iter=50,
        tolerance=1e-8,
        arma_estimator='innovations_mle',
        arma_estimator_kwargs=None):
    """
    Estimate ARMAX parameters by GLS.

    Parameters
    ----------
    endog : array_like
        Input time series array.
    exog : array_like, optional
        Array of exogenous regressors. If not included, then `include_constant`
        must be True, and then `exog` will only include the constant column.
    order : tuple, optional
        The (p,d,q) order of the ARIMA model. Default is (0, 0, 0).
    seasonal_order : tuple, optional
        The (P,D,Q,s) order of the seasonal ARIMA model.
        Default is (0, 0, 0, 0).
    include_constant : bool, optional
        Whether to add a constant term in `exog` if it's not already there.
        The estimate of the constant will then appear as one of the `exog`
        parameters. If `exog` is None, then the constant will represent the
        mean of the process. Default is True if the specified model does not
        include integration and False otherwise.
    n_iter : int, optional
        Optionally iterate feasible GSL a specific number of times. Default is
        to iterate to convergence. If set, this argument overrides the
        `max_iter` and `tolerance` arguments.
    max_iter : int, optional
        Maximum number of feasible GLS iterations. Default is 50. If `n_iter`
        is set, it overrides this argument.
    tolerance : float, optional
        Tolerance for determining convergence of feasible GSL iterations. If
        `iter` is set, this argument has no effect.
        Default is 1e-8.
    arma_estimator : str, optional
        The estimator used for estimating the ARMA model. This option should
        not generally be used, unless the default method is failing or is
        otherwise unsuitable. Not all values will be valid, depending on the
        specified model orders (`order` and `seasonal_order`). Possible values
        are:
        * 'innovations_mle' - can be used with any specification
        * 'statespace' - can be used with any specification
        * 'hannan_rissanen' - can be used with any ARMA non-seasonal model
        * 'yule_walker' - only non-seasonal consecutive
          autoregressive (AR) models
        * 'burg' - only non-seasonal, consecutive autoregressive (AR) models
        * 'innovations' - only non-seasonal, consecutive moving
          average (MA) models.
        The default is 'innovations_mle'.
    arma_estimator_kwargs : dict, optional
        Arguments to pass to the ARMA estimator.

    Returns
    -------
    parameters : SARIMAXParams object
        Contains the parameter estimates from the final iteration.
    other_results : Bunch
        Includes eight components: `spec`, `params`, `converged`,
        `differences`, `iterations`, `arma_estimator`, 'arma_estimator_kwargs',
        and `arma_results`.

    Notes
    -----
    The primary reference is [1]_, section 6.6. In particular, the
    implementation follows the iterative procedure described in section 6.6.2.
    Construction of the transformed variables used to compute the GLS estimator
    described in section 6.6.1 is done via an application of the innovations
    algorithm (rather than explicit construction of the transformation matrix).

    Note that if the specified model includes integration, both the `endog` and
    `exog` series will be differenced prior to estimation and a warning will
    be issued to alert the user.

    References
    ----------
    .. [1] Brockwell, Peter J., and Richard A. Davis. 2016.
       Introduction to Time Series and Forecasting. Springer.
    """
    # Handle n_iter
    if n_iter is not None:
        max_iter = n_iter
        tolerance = np.inf

    # Default for include_constant is True if there is no integration and
    # False otherwise
    integrated = order[1] > 0 or seasonal_order[1] > 0
    if include_constant is None:
        include_constant = not integrated
    elif include_constant and integrated:
        raise ValueError('Cannot include a constant in an integrated model.')

    # Handle including the constant (need to do it now so that the constant
    # parameter can be included in the specification as part of `exog`.)
    if include_constant:
        exog = np.ones_like(endog) if exog is None else add_constant(exog)

    # Create the SARIMAX specification
    spec = SARIMAXSpecification(endog,
                                exog=exog,
                                order=order,
                                seasonal_order=seasonal_order)
    endog = spec.endog
    exog = spec.exog

    # Handle integration
    if spec.is_integrated:
        # TODO: this is the approach suggested by BD (see Remark 1 in
        # section 6.6.2 and Example 6.6.3), but maybe there are some cases
        # where we don't want to force this behavior on the user?
        warnings.warn('Provided `endog` and `exog` series have been'
                      ' differenced to eliminate integration prior to GLS'
                      ' parameter estimation.')
        endog = diff(endog,
                     k_diff=spec.diff,
                     k_seasonal_diff=spec.seasonal_diff,
                     seasonal_periods=spec.seasonal_periods)
        exog = diff(exog,
                    k_diff=spec.diff,
                    k_seasonal_diff=spec.seasonal_diff,
                    seasonal_periods=spec.seasonal_periods)
    augmented = np.c_[endog, exog]

    # Validate arma_estimator
    spec.validate_estimator(arma_estimator)
    if arma_estimator_kwargs is None:
        arma_estimator_kwargs = {}

    # Step 1: OLS
    mod_ols = OLS(endog, exog)
    res_ols = mod_ols.fit()
    exog_params = res_ols.params
    resid = res_ols.resid

    # 0th iteration parameters
    p = SARIMAXParams(spec=spec)
    p.exog_params = exog_params
    if spec.max_ar_order > 0:
        p.ar_params = np.zeros(spec.k_ar_params)
    if spec.max_seasonal_ar_order > 0:
        p.seasonal_ar_params = np.zeros(spec.k_seasonal_ar_params)
    if spec.max_ma_order > 0:
        p.ma_params = np.zeros(spec.k_ma_params)
    if spec.max_seasonal_ma_order > 0:
        p.seasonal_ma_params = np.zeros(spec.k_seasonal_ma_params)
    p.sigma2 = res_ols.scale

    ar_params = p.ar_params
    seasonal_ar_params = p.seasonal_ar_params
    ma_params = p.ma_params
    seasonal_ma_params = p.seasonal_ma_params
    sigma2 = p.sigma2

    # Step 2 - 4: iterate feasible GLS to convergence
    arma_results = [None]
    differences = [None]
    parameters = [p]
    converged = False if n_iter is None else None
    i = 0
    for i in range(1, max_iter + 1):
        prev = exog_params

        # Step 2: ARMA
        # TODO: allow estimator-specific kwargs?
        if arma_estimator == 'yule_walker':
            p_arma, res_arma = yule_walker(resid,
                                           ar_order=spec.ar_order,
                                           demean=False,
                                           **arma_estimator_kwargs)
        elif arma_estimator == 'burg':
            p_arma, res_arma = burg(resid,
                                    ar_order=spec.ar_order,
                                    demean=False,
                                    **arma_estimator_kwargs)
        elif arma_estimator == 'innovations':
            out, res_arma = innovations(resid,
                                        ma_order=spec.ma_order,
                                        demean=False,
                                        **arma_estimator_kwargs)
            p_arma = out[-1]
        elif arma_estimator == 'hannan_rissanen':
            p_arma, res_arma = hannan_rissanen(resid,
                                               ar_order=spec.ar_order,
                                               ma_order=spec.ma_order,
                                               demean=False,
                                               **arma_estimator_kwargs)
        else:
            # For later iterations, use a "warm start" for parameter estimates
            # (speeds up estimation and convergence)
            start_params = (None if i == 1 else np.r_[ar_params, ma_params,
                                                      seasonal_ar_params,
                                                      seasonal_ma_params,
                                                      sigma2])
            # Note: in each case, we do not pass in the order of integration
            # since we have already differenced the series
            tmp_order = (spec.order[0], 0, spec.order[2])
            tmp_seasonal_order = (spec.seasonal_order[0], 0,
                                  spec.seasonal_order[2],
                                  spec.seasonal_order[3])
            if arma_estimator == 'innovations_mle':
                p_arma, res_arma = innovations_mle(
                    resid,
                    order=tmp_order,
                    seasonal_order=tmp_seasonal_order,
                    demean=False,
                    start_params=start_params,
                    **arma_estimator_kwargs)
            else:
                p_arma, res_arma = statespace(
                    resid,
                    order=tmp_order,
                    seasonal_order=tmp_seasonal_order,
                    include_constant=False,
                    start_params=start_params,
                    **arma_estimator_kwargs)

        ar_params = p_arma.ar_params
        seasonal_ar_params = p_arma.seasonal_ar_params
        ma_params = p_arma.ma_params
        seasonal_ma_params = p_arma.seasonal_ma_params
        sigma2 = p_arma.sigma2
        arma_results.append(res_arma)

        # Step 3: GLS
        # Compute transformed variables that satisfy OLS assumptions
        # Note: In section 6.1.1 of Brockwell and Davis (2016), these
        # transformations are developed as computed by left multiplcation
        # by a matrix T. However, explicitly constructing T and then
        # performing the left-multiplications does not scale well when nobs is
        # large. Instead, we can retrieve the transformed variables as the
        # residuals of the innovations algorithm (the `normalize=True`
        # argument applies a Prais-Winsten-type normalization to the first few
        # observations to ensure homoskedasticity). Brockwell and Davis
        # mention that they also take this approach in practice.
        tmp, _ = arma_innovations.arma_innovations(augmented,
                                                   ar_params=ar_params,
                                                   ma_params=ma_params,
                                                   normalize=True)
        u = tmp[:, 0]
        x = tmp[:, 1:]

        # OLS on transformed variables
        mod_gls = OLS(u, x)
        res_gls = mod_gls.fit()
        exog_params = res_gls.params
        resid = endog - np.dot(exog, exog_params)

        # Construct the parameter vector for the iteration
        p = SARIMAXParams(spec=spec)
        p.exog_params = exog_params
        if spec.max_ar_order > 0:
            p.ar_params = ar_params
        if spec.max_seasonal_ar_order > 0:
            p.seasonal_ar_params = seasonal_ar_params
        if spec.max_ma_order > 0:
            p.ma_params = ma_params
        if spec.max_seasonal_ma_order > 0:
            p.seasonal_ma_params = seasonal_ma_params
        p.sigma2 = sigma2
        parameters.append(p)

        # Check for convergence
        difference = np.abs(exog_params - prev)
        differences.append(difference)
        if n_iter is None and np.all(difference < tolerance):
            converged = True
            break
    else:
        if n_iter is None:
            warnings.warn('Feasible GLS failed to converge in %d iterations.'
                          ' Consider increasing the maximum number of'
                          ' iterations using the `max_iter` argument or'
                          ' reducing the required tolerance using the'
                          ' `tolerance` argument.' % max_iter)

    # Construct final results
    p = parameters[-1]
    other_results = Bunch({
        'spec': spec,
        'params': parameters,
        'converged': converged,
        'differences': differences,
        'iterations': i,
        'arma_estimator': arma_estimator,
        'arma_estimator_kwargs': arma_estimator_kwargs,
        'arma_results': arma_results,
    })

    return p, other_results
示例#13
0
def burg(endog, ar_order=0, demean=True):
    """
    Estimate AR parameters using Burg technique.

    Parameters
    ----------
    endog : array_like or SARIMAXSpecification
        Input time series array, assumed to be stationary.
    ar_order : int, optional
        Autoregressive order. Default is 0.
    demean : bool, optional
        Whether to estimate and remove the mean from the process prior to
        fitting the autoregressive coefficients.

    Returns
    -------
    parameters : SARIMAXParams object
        Contains the parameter estimates from the final iteration.
    other_results : Bunch
        Includes one component, `spec`, which is the `SARIMAXSpecification`
        instance corresponding to the input arguments.

    Notes
    -----
    The primary reference is [1]_, section 5.1.2.

    This procedure assumes that the series is stationary.

    This function is a light wrapper around `statsmodels.linear_model.burg`.

    References
    ----------
    .. [1] Brockwell, Peter J., and Richard A. Davis. 2016.
       Introduction to Time Series and Forecasting. Springer.
    """
    spec = SARIMAXSpecification(endog, ar_order=ar_order)
    endog = spec.endog

    # Workaround for statsmodels.tsa.stattools.pacf_burg which doesn't work
    # on integer input
    # TODO: remove when possible
    if np.issubdtype(endog.dtype, np.dtype(int)):
        endog = endog * 1.0

    if not spec.is_ar_consecutive:
        raise ValueError('Burg estimation unavailable for models with'
                         ' seasonal or otherwise non-consecutive AR orders.')

    p = SARIMAXParams(spec=spec)

    if ar_order == 0:
        p.sigma2 = np.var(endog)
    else:
        p.ar_params, p.sigma2 = linear_model.burg(endog,
                                                  order=ar_order,
                                                  demean=demean)

        # Construct other results
    other_results = Bunch({
        'spec': spec,
    })

    return p, other_results
示例#14
0
def hannan_rissanen(endog, ar_order=0, ma_order=0, demean=True,
                    initial_ar_order=None, unbiased=None,
                    fixed_params=None):
    """
    Estimate ARMA parameters using Hannan-Rissanen procedure.

    Parameters
    ----------
    endog : array_like
        Input time series array, assumed to be stationary.
    ar_order : int or list of int
        Autoregressive order
    ma_order : int or list of int
        Moving average order
    demean : bool, optional
        Whether to estimate and remove the mean from the process prior to
        fitting the ARMA coefficients. Default is True.
    initial_ar_order : int, optional
        Order of long autoregressive process used for initial computation of
        residuals.
    unbiased : bool, optional
        Whether or not to apply the bias correction step. Default is True if
        the estimated coefficients from the previous step imply a stationary
        and invertible process and False otherwise.
    fixed_params : dict, optional
        Dictionary with names of fixed parameters as keys (e.g. 'ar.L1',
        'ma.L2'), which correspond to SARIMAXSpecification.param_names.
        Dictionary values are the values of the associated fixed parameters.

    Returns
    -------
    parameters : SARIMAXParams object
    other_results : Bunch
        Includes three components: `spec`, containing the
        `SARIMAXSpecification` instance corresponding to the input arguments;
        `initial_ar_order`, containing the autoregressive lag order used in the
        first step; and `resid`, which contains the computed residuals from the
        last step.

    Notes
    -----
    The primary reference is [1]_, section 5.1.4, which describes a three-step
    procedure that we implement here.

    1. Fit a large-order AR model via Yule-Walker to estimate residuals
    2. Compute AR and MA estimates via least squares
    3. (Unless the estimated coefficients from step (2) are non-stationary /
       non-invertible or `unbiased=False`) Perform bias correction

    The order used for the AR model in the first step may be given as an
    argument. If it is not, we compute it as suggested by [2]_.

    The estimate of the variance that we use is computed from the residuals
    of the least-squares regression and not from the innovations algorithm.
    This is because our fast implementation of the innovations algorithm is
    only valid for stationary processes, and the Hannan-Rissanen procedure may
    produce estimates that imply non-stationary processes. To avoid
    inconsistency, we never compute this latter variance here, even if it is
    possible. See test_hannan_rissanen::test_brockwell_davis_example_517 for
    an example of how to compute this variance manually.

    This procedure assumes that the series is stationary, but if this is not
    true, it is still possible that this procedure will return parameters that
    imply a non-stationary / non-invertible process.

    Note that the third stage will only be applied if the parameters from the
    second stage imply a stationary / invertible model. If `unbiased=True` is
    given, then non-stationary / non-invertible parameters in the second stage
    will throw an exception.

    References
    ----------
    .. [1] Brockwell, Peter J., and Richard A. Davis. 2016.
       Introduction to Time Series and Forecasting. Springer.
    .. [2] Gomez, Victor, and Agustin Maravall. 2001.
       "Automatic Modeling Methods for Univariate Series."
       A Course in Time Series Analysis, 171–201.
    """
    spec = SARIMAXSpecification(endog, ar_order=ar_order, ma_order=ma_order)

    fixed_params = _validate_fixed_params(fixed_params, spec.param_names)

    endog = spec.endog
    if demean:
        endog = endog - endog.mean()

    p = SARIMAXParams(spec=spec)

    nobs = len(endog)
    max_ar_order = spec.max_ar_order
    max_ma_order = spec.max_ma_order

    # Default initial_ar_order is as suggested by Gomez and Maravall (2001)
    if initial_ar_order is None:
        initial_ar_order = max(np.floor(np.log(nobs)**2).astype(int),
                               2 * max(max_ar_order, max_ma_order))
    # Create a spec, just to validate the initial autoregressive order
    _ = SARIMAXSpecification(endog, ar_order=initial_ar_order)

    # Unpack fixed and free ar/ma lags, ix, and params (fixed only)
    params_info = _package_fixed_and_free_params_info(
        fixed_params, spec.ar_lags, spec.ma_lags
    )

    # Compute lagged endog
    lagged_endog = lagmat(endog, max_ar_order, trim='both')

    # If no AR or MA components, this is just a variance computation
    if max_ma_order == 0 and max_ar_order == 0:
        p.sigma2 = np.var(endog, ddof=0)
        resid = endog.copy()
    # If no MA component, this is just CSS
    elif max_ma_order == 0:
        # extract 1) lagged_endog with free params; 2) lagged_endog with fixed
        # params; 3) endog residual after applying fixed params if applicable
        X_with_free_params = lagged_endog[:, params_info.free_ar_ix]
        X_with_fixed_params = lagged_endog[:, params_info.fixed_ar_ix]
        y = endog[max_ar_order:]
        if X_with_fixed_params.shape[1] != 0:
            y = y - X_with_fixed_params.dot(params_info.fixed_ar_params)

        # no free ar params -> variance computation on the endog residual
        if X_with_free_params.shape[1] == 0:
            p.ar_params = params_info.fixed_ar_params
            p.sigma2 = np.var(y, ddof=0)
            resid = y.copy()
        # otherwise OLS with endog residual (after applying fixed params) as y,
        # and lagged_endog with free params as X
        else:
            mod = OLS(y, X_with_free_params)
            res = mod.fit()
            resid = res.resid
            p.sigma2 = res.scale
            p.ar_params = _stitch_fixed_and_free_params(
                fixed_ar_or_ma_lags=params_info.fixed_ar_lags,
                fixed_ar_or_ma_params=params_info.fixed_ar_params,
                free_ar_or_ma_lags=params_info.free_ar_lags,
                free_ar_or_ma_params=res.params,
                spec_ar_or_ma_lags=spec.ar_lags
            )
    # Otherwise ARMA model
    else:
        # Step 1: Compute long AR model via Yule-Walker, get residuals
        initial_ar_params, _ = yule_walker(
            endog, order=initial_ar_order, method='mle')
        X = lagmat(endog, initial_ar_order, trim='both')
        y = endog[initial_ar_order:]
        resid = y - X.dot(initial_ar_params)

        # Get lagged residuals for `exog` in least-squares regression
        lagged_resid = lagmat(resid, max_ma_order, trim='both')

        # Step 2: estimate ARMA model via least squares
        ix = initial_ar_order + max_ma_order - max_ar_order
        X_with_free_params = np.c_[
            lagged_endog[ix:, params_info.free_ar_ix],
            lagged_resid[:, params_info.free_ma_ix]
        ]
        X_with_fixed_params = np.c_[
            lagged_endog[ix:, params_info.fixed_ar_ix],
            lagged_resid[:, params_info.fixed_ma_ix]
        ]
        y = endog[initial_ar_order + max_ma_order:]
        if X_with_fixed_params.shape[1] != 0:
            y = y - X_with_fixed_params.dot(
                np.r_[params_info.fixed_ar_params, params_info.fixed_ma_params]
            )

        # Step 2.1: no free ar params -> variance computation on the endog
        # residual
        if X_with_free_params.shape[1] == 0:
            p.ar_params = params_info.fixed_ar_params
            p.ma_params = params_info.fixed_ma_params
            p.sigma2 = np.var(y, ddof=0)
            resid = y.copy()
        # Step 2.2: otherwise OLS with endog residual (after applying fixed
        # params) as y, and lagged_endog and lagged_resid with free params as X
        else:
            mod = OLS(y, X_with_free_params)
            res = mod.fit()
            k_free_ar_params = len(params_info.free_ar_lags)
            p.ar_params = _stitch_fixed_and_free_params(
                fixed_ar_or_ma_lags=params_info.fixed_ar_lags,
                fixed_ar_or_ma_params=params_info.fixed_ar_params,
                free_ar_or_ma_lags=params_info.free_ar_lags,
                free_ar_or_ma_params=res.params[:k_free_ar_params],
                spec_ar_or_ma_lags=spec.ar_lags
            )
            p.ma_params = _stitch_fixed_and_free_params(
                fixed_ar_or_ma_lags=params_info.fixed_ma_lags,
                fixed_ar_or_ma_params=params_info.fixed_ma_params,
                free_ar_or_ma_lags=params_info.free_ma_lags,
                free_ar_or_ma_params=res.params[k_free_ar_params:],
                spec_ar_or_ma_lags=spec.ma_lags
            )
            resid = res.resid
            p.sigma2 = res.scale

        # Step 3: bias correction (if requested)

        # Step 3.1: validate `unbiased` argument and handle setting the default
        if unbiased is True:
            if len(fixed_params) != 0:
                raise NotImplementedError(
                    "Third step of Hannan-Rissanen estimation to remove "
                    "parameter bias is not yet implemented for the case "
                    "with fixed parameters."
                )
            elif not (p.is_stationary and p.is_invertible):
                raise ValueError(
                    "Cannot perform third step of Hannan-Rissanen estimation "
                    "to remove parameter bias, because parameters estimated "
                    "from the second step are non-stationary or "
                    "non-invertible."
                )
        elif unbiased is None:
            if len(fixed_params) != 0:
                unbiased = False
            else:
                unbiased = p.is_stationary and p.is_invertible

        # Step 3.2: bias correction
        if unbiased is True:
            Z = np.zeros_like(endog)
            V = np.zeros_like(endog)
            W = np.zeros_like(endog)

            ar_coef = p.ar_poly.coef
            ma_coef = p.ma_poly.coef

            for t in range(nobs):
                if t >= max(max_ar_order, max_ma_order):
                    # Note: in the case of non-consecutive lag orders, the
                    # polynomials have the appropriate zeros so we don't
                    # need to subset `endog[t - max_ar_order:t]` or
                    # Z[t - max_ma_order:t]
                    tmp_ar = np.dot(
                        -ar_coef[1:], endog[t - max_ar_order:t][::-1])
                    tmp_ma = np.dot(ma_coef[1:],
                                    Z[t - max_ma_order:t][::-1])
                    Z[t] = endog[t] - tmp_ar - tmp_ma

            V = lfilter([1], ar_coef, Z)
            W = lfilter(np.r_[1, -ma_coef[1:]], [1], Z)

            lagged_V = lagmat(V, max_ar_order, trim='both')
            lagged_W = lagmat(W, max_ma_order, trim='both')

            exog = np.c_[
                lagged_V[
                    max(max_ma_order - max_ar_order, 0):,
                    params_info.free_ar_ix
                ],
                lagged_W[
                    max(max_ar_order - max_ma_order, 0):,
                    params_info.free_ma_ix
                ]
            ]

            mod_unbias = OLS(Z[max(max_ar_order, max_ma_order):], exog)
            res_unbias = mod_unbias.fit()

            p.ar_params = (
                p.ar_params + res_unbias.params[:spec.k_ar_params])
            p.ma_params = (
                p.ma_params + res_unbias.params[spec.k_ar_params:])

            # Recompute sigma2
            resid = mod.endog - mod.exog.dot(
                np.r_[p.ar_params, p.ma_params])
            p.sigma2 = np.inner(resid, resid) / len(resid)

    # TODO: Gomez and Maravall (2001) or Gomez (1998)
    # propose one more step here to further improve MA estimates

    # Construct results
    other_results = Bunch({
        'spec': spec,
        'initial_ar_order': initial_ar_order,
        'resid': resid
    })
    return p, other_results
示例#15
0
    def __init__(self,
                 endog,
                 exog=None,
                 order=(0, 0, 0),
                 seasonal_order=(0, 0, 0, 0),
                 trend=None,
                 enforce_stationarity=True,
                 enforce_invertibility=True,
                 concentrate_scale=False,
                 trend_offset=1,
                 dates=None,
                 freq=None,
                 missing='none'):
        # Default for trend
        # 'c' if there is no integration and 'n' otherwise
        # TODO: if trend='c', then we could alternatively use `demean=True` in
        # the estimation methods rather than setting up `exog` and using GLS.
        # Not sure if it's worth the trouble though.
        integrated = order[1] > 0 or seasonal_order[1] > 0
        if trend is None and not integrated:
            trend = 'c'
        elif trend is None:
            trend = 'n'

        # Construct the specification
        # (don't pass specific values of enforce stationarity/invertibility,
        # because we don't actually want to restrict the estimators based on
        # this criteria. Instead, we'll just make sure that the parameter
        # estimates from those methods satisfy the criteria.)
        self._spec_arima = SARIMAXSpecification(
            endog,
            exog=exog,
            order=order,
            seasonal_order=seasonal_order,
            trend=trend,
            enforce_stationarity=None,
            enforce_invertibility=None,
            concentrate_scale=concentrate_scale,
            trend_offset=trend_offset,
            dates=dates,
            freq=freq,
            missing=missing)
        exog = self._spec_arima._model.data.orig_exog

        # Initialize the base SARIMAX class
        # Note: we don't pass in a trend value to the base class, since ARIMA
        # standardizes the trend to always be part of exog, while the base
        # SARIMAX class puts it in the transition equation.
        super(ARIMA,
              self).__init__(endog,
                             exog,
                             trend=None,
                             order=order,
                             seasonal_order=seasonal_order,
                             enforce_stationarity=enforce_stationarity,
                             enforce_invertibility=enforce_invertibility,
                             concentrate_scale=concentrate_scale,
                             dates=dates,
                             freq=freq,
                             missing=missing)
        self.trend = trend

        # Override the public attributes for k_exog and k_trend to reflect the
        # distinction here (for the purpose of the superclass, these are both
        # combined as `k_exog`)
        self.k_exog = self._spec_arima.k_exog
        self.k_trend = self._spec_arima.k_trend

        # Remove some init kwargs that aren't used in this model
        unused = [
            'measurement_error', 'time_varying_regression', 'mle_regression',
            'simple_differencing', 'hamilton_representation'
        ]
        self._init_keys = [key for key in self._init_keys if key not in unused]