Exemplo n.º 1
0
    def _fit_start_params_hr(self, order):
        """
        Get starting parameters for fit.

        Parameters
        ----------
        order : iterable
            (p,q,k) - AR lags, MA lags, and number of exogenous variables
            including the constant.

        Returns
        -------
        start_params : array
            A first guess at the starting parameters.

        Notes
        -----
        If necessary, fits an AR process with the laglength selected according
        to best BIC.  Obtain the residuals.  Then fit an ARMA(p,q) model via
        OLS using these residuals for a first approximation.  Uses a separate
        OLS regression to find the coefficients of exogenous variables.

        References
        ----------
        Hannan, E.J. and Rissanen, J.  1982.  "Recursive estimation of mixed
            autoregressive-moving average order."  `Biometrika`.  69.1.
        """
        p, q, k = order
        start_params = zeros((p + q + k))
        endog = self.endog.copy()  # copy because overwritten
        exog = self.exog
        if k != 0:
            ols_params = GLS(endog, exog).fit().params
            start_params[:k] = ols_params
            endog -= np.dot(exog, ols_params).squeeze()
        if q != 0:
            if p != 0:
                armod = AR(endog).fit(ic='bic', trend='nc')
                arcoefs_tmp = armod.params
                p_tmp = armod.k_ar
                resid = endog[p_tmp:] - np.dot(
                    lagmat(endog, p_tmp, trim='both'), arcoefs_tmp)
                if p < p_tmp + q:
                    endog_start = p_tmp + q - p
                    resid_start = 0
                else:
                    endog_start = 0
                    resid_start = p - p_tmp - q
                lag_endog = lagmat(endog, p, 'both')[endog_start:]
                lag_resid = lagmat(resid, q, 'both')[resid_start:]
                # stack ar lags and resids
                X = np.column_stack((lag_endog, lag_resid))
                coefs = GLS(endog[max(p_tmp + q, p):], X).fit().params
                start_params[k:k + p + q] = coefs
            else:
                start_params[k + p:k + p + q] = yule_walker(endog, order=q)[0]
        if q == 0 and p != 0:
            arcoefs = yule_walker(endog, order=p)[0]
            start_params[k:k + p] = arcoefs
        return start_params
Exemplo n.º 2
0
    def _fit_start_params_hr(self, order):
        """
        Get starting parameters for fit.

        Parameters
        ----------
        order : iterable
            (p,q,k) - AR lags, MA lags, and number of exogenous variables
            including the constant.

        Returns
        -------
        start_params : array
            A first guess at the starting parameters.

        Notes
        -----
        If necessary, fits an AR process with the laglength selected according
        to best BIC.  Obtain the residuals.  Then fit an ARMA(p,q) model via
        OLS using these residuals for a first approximation.  Uses a separate
        OLS regression to find the coefficients of exogenous variables.

        References
        ----------
        Hannan, E.J. and Rissanen, J.  1982.  "Recursive estimation of mixed
            autoregressive-moving average order."  `Biometrika`.  69.1.
        """
        p,q,k = order
        start_params = zeros((p+q+k))
        endog = self.endog.copy() # copy because overwritten
        exog = self.exog
        if k != 0:
            ols_params = GLS(endog, exog).fit().params
            start_params[:k] = ols_params
            endog -= np.dot(exog, ols_params).squeeze()
        if q != 0:
            if p != 0:
                armod = AR(endog).fit(ic='bic', trend='nc')
                arcoefs_tmp = armod.params
                p_tmp = armod.k_ar
                resid = endog[p_tmp:] - np.dot(lagmat(endog, p_tmp,
                                trim='both'), arcoefs_tmp)
                if p < p_tmp + q:
                    endog_start = p_tmp + q - p
                    resid_start = 0
                else:
                    endog_start = 0
                    resid_start = p - p_tmp - q
                lag_endog = lagmat(endog, p, 'both')[endog_start:]
                lag_resid = lagmat(resid, q, 'both')[resid_start:]
                # stack ar lags and resids
                X = np.column_stack((lag_endog, lag_resid))
                coefs = GLS(endog[max(p_tmp+q,p):], X).fit().params
                start_params[k:k+p+q] = coefs
            else:
                start_params[k+p:k+p+q] = yule_walker(endog, order=q)[0]
        if q==0 and p != 0:
            arcoefs = yule_walker(endog, order=p)[0]
            start_params[k:k+p] = arcoefs
        return start_params
Exemplo n.º 3
0
def pacf_yw(x, nlags=40, method='unbiased'):
    '''Partial autocorrelation estimated with non-recursive yule_walker

    Parameters
    ----------
    x : 1d array
        observations of time series for which pacf is calculated
    nlags : int
        largest lag for which pacf is returned
    method : 'unbiased' (default) or 'mle'
        method for the autocovariance calculations in yule walker

    Returns
    -------
    pacf : 1d array
        partial autocorrelations, maxlag+1 elements

    Notes
    -----
    This solves yule_walker for each desired lag and contains
    currently duplicate calculations.
    '''
    pacf = [1.]
    for k in range(1, nlags + 1):
        pacf.append(yule_walker(x, k, method=method)[0][-1])
    return np.array(pacf)
Exemplo n.º 4
0
def pacf_yw(x, nlags=40, method='unbiased'):
    '''Partial autocorrelation estimated with non-recursive yule_walker

    Parameters
    ----------
    x : 1d array
        observations of time series for which pacf is calculated
    nlags : int
        largest lag for which pacf is returned
    method : 'unbiased' (default) or 'mle'
        method for the autocovariance calculations in yule walker

    Returns
    -------
    pacf : 1d array
        partial autocorrelations, maxlag+1 elements

    Notes
    -----
    This solves yule_walker for each desired lag and contains
    currently duplicate calculations.
    '''
    pacf = [1.]
    for k in range(1, nlags + 1):
        pacf.append(yule_walker(x, k, method=method)[0][-1])
    return np.array(pacf)
Exemplo n.º 5
0
 def setupClass(cls):
     from statsmodels.datasets.sunspots import load
     data = load()
     cls.rho, cls.sigma = yule_walker(data.endog, order=4,
             method="mle")
     cls.R_params = [1.2831003105694765, -0.45240924374091945,
             -0.20770298557575195, 0.047943648089542337]
def yule_walker_acov(acov, order=1, method="unbiased", df=None, inv=False):
    """
    Estimate AR(p) parameters from acovf using Yule-Walker equation.


    Parameters
    ----------
    acov : array_like, 1d
        auto-covariance
    order : int, optional
        The order of the autoregressive process.  Default is 1.
    inv : bool
        If inv is True the inverse of R is also returned.  Default is False.

    Returns
    -------
    rho : ndarray
        The estimated autoregressive coefficients
    sigma
        TODO
    Rinv : ndarray
        inverse of the Toepliz matrix
    """
    return yule_walker(acov,
                       order=order,
                       method=method,
                       df=df,
                       inv=inv,
                       demean=False)
Exemplo n.º 7
0
 def setupClass(cls):
     from statsmodels.datasets.sunspots import load
     data = load()
     cls.rho, cls.sigma = yule_walker(data.endog, order=4,
                                      method="mle")
     cls.R_params = [1.2831003105694765, -0.45240924374091945,
                     -0.20770298557575195, 0.047943648089542337]
Exemplo n.º 8
0
def mdl(m, n, breakpoints, data):
    # maintain the order
    timestamps = list(breakpoints.keys())
    timestamps.sort()

    terms = []
    m_log = max(1, m)
    terms.append(math.log(m_log, 2))

    terms.append(m * math.log(n, 2))

    terms.append(sum(math.log(breakpoints[i], 2) for i in timestamps))

    term3 = term4 = 0

    for i in range(1, len(breakpoints)):
        ni = timestamps[i] - timestamps[i - 1]
        term3 += (breakpoints[timestamps[i]] + 2) / 2 * math.log(ni, 2)

        data_section_values = []

        for j in range(timestamps[i - 1], timestamps[i] - 1):
            data_section_values.append(data[1][j])

        rho, sigma = yule_walker(data_section_values,
                                 breakpoints[timestamps[i - 1]])
        var = math.pow(sigma, 2)
        term4 += ni / 2 * math.log(2 * math.pi * var, 2)

    terms.append(term3)
    terms.append(term4)

    terms.append(n / 2)

    return sum(terms)
def yule_walker_acov(acov, order=1, method="unbiased", df=None, inv=False):
    """
    Estimate AR(p) parameters from acovf using Yule-Walker equation.


    Parameters
    ----------
    acov : array-like, 1d
        auto-covariance
    order : integer, optional
        The order of the autoregressive process.  Default is 1.
    inv : bool
        If inv is True the inverse of R is also returned.  Default is False.

    Returns
    -------
    rho : ndarray
        The estimated autoregressive coefficients
    sigma
        TODO
    Rinv : ndarray
        inverse of the Toepliz matrix

    """
    return yule_walker(acov, order=order, method=method, df=df, inv=inv,
                       demean=False)
Exemplo n.º 10
0
def spec_ar(x, x_freq=1, n_freq=500, order_max=None, plot=True, **kwargs):
    x = np.r_[x]
    N = len(x)
    if order_max is None:
        order_max = min(N - 1, int(np.floor(10 * np.log10(N))))

    # Use Yule-Walker to find best AR model via AIC
    def aic(sigma2, df_model, nobs):
        return np.log(sigma2) + 2 * (1 + df_model) / nobs

    best_results = None

    for lag in range(order_max + 1):
        ar, sigma = yule_walker(x, order=lag, method='mle')
        model_aic = aic(sigma2=sigma**2, df_model=lag, nobs=N - lag)
        if best_results is None or model_aic < best_results['aic']:
            best_results = {
                'aic': model_aic,
                'order': lag,
                'ar': ar,
                'sigma2': sigma**2
            }

    order = best_results['order']
    freq = np.arange(0, n_freq) / (2 * (n_freq - 1))

    if order >= 1:
        ar, sigma2 = best_results['ar'], best_results['sigma2']

        outer_xy = np.outer(freq, np.arange(1, order + 1))
        cs = np.cos(2 * np.pi * outer_xy) @ ar
        sn = np.sin(2 * np.pi * outer_xy) @ ar

        spec = sigma2 / (x_freq * ((1 - cs)**2 + sn**2))

    else:
        sigma2 = best_results['sigma2']
        spec = (sigma2 / x_freq) * np.ones(len(freq))

    results = {
        'freq': freq,
        'spec': spec,
        'coh': None,
        'phase': None,
        'n.used': len(x),
        'method': 'AR(' + str(order) + ') spectrum'
    }

    if plot:
        plot_spec(results, coverage=None, **kwargs)

    return results
Exemplo n.º 11
0
def get_modes(processedFreq, fs, modelOrder=10):
    ar, sigma = yule_walker(processedFreq, order=modelOrder, method="mle")
    ar *= -1

    polyCoeff = np.array([1])
    polyCoeff = np.append(polyCoeff, ar)

    raizes_est_z = np.roots(polyCoeff)
    raizes_est_s = np.log(raizes_est_z) * fs

    # Remove negative frequencies
    raizes_est_s = [mode for mode in raizes_est_s if mode.imag > 0]

    # Calculates frequency in hertz and damping ratio in percentage
    freq_y = [mode.imag / (2 * np.pi) for mode in raizes_est_s]
    damp_x = [
        -np.divide(mode.real, np.absolute(mode)) for mode in raizes_est_s
    ]

    return damp_x, freq_y
def _get_ar_order(x):
    N = len(x)
    order_max = min(N - 1, int(np.floor(10 * np.log10(N))))

    # Use Yule-Walker to find best AR model via AIC
    def aic(sigma2, df_model, nobs):
        return np.log(sigma2) + 2 * (1 + df_model) / nobs

    best_results = None

    for lag in range(order_max+1):
        ar, sigma = yule_walker(x, order=lag, method='mle')
        model_aic = aic(sigma2=sigma**2, df_model=lag, nobs=N-lag)
        if best_results is None or model_aic < best_results['aic']:
            best_results = {
                'aic': model_aic,
                'order': lag,
                'ar': ar,
                'sigma2': sigma**2
            }

    return best_results['order']
Exemplo n.º 13
0
def spectrum0_ar(x, max_order='auto'):
    """Calculates f(0) of the spectrum of x using an AR fit."""
    n_samples = x.shape[0]

    if np.allclose(np.var(x), 0.0):
        return 0., 0.

    if max_order == 'auto':
        max_order = floor(10 * np.log10(n_samples))

    # calculate f(0) and AIC for each AR(p) model
    results = np.zeros((max_order, 3))
    for p in range(1, max_order + 1):
        coefs, sigma = yule_walker(x, order=p, demean=True, method='unbiased')
        results[p -
                1] = [p,
                      spec0_ar(sigma, coefs),
                      aic_ar(sigma, n_samples, p)]

    # return result for model minimizing the AIC
    min_id = np.argmin(results[:, -1])
    order, var0 = results[min_id, :2]

    return var0 / n_samples, order
train_loss = history.history['loss']
train_loss2 = history2.history['loss']
plt.rcParams['axes.facecolor'] = 'white'
plt.plot(x, train_loss, linewidth=1, label='LSTM training')
plt.plot(x, train_loss2, linewidth=1, label='ANN training')
plt.grid(True, which='both', axis='both')
plt.title('AR Model - MSE of ANN vs LSTM')
plt.xlabel('Epochs')
plt.ylabel('MSE')
plt.legend()
if save:
    plt.savefig("./imgs/AR Model - Training MSE.png", dpi=800)
plt.show()

# Yule-Walker
rho, sigma = yule_walker(y_train, order=3, method="mle")

yw_pred = np.ndarray.flatten(y_test)[:3]
for i in range(3, 100):
    yw_pred = np.append(yw_pred, [
        rho[0] * yw_pred[i - 1] + rho[1] * yw_pred[i - 2] +
        rho[2] * yw_pred[i - 3] + np.random.uniform(0, 0.1)
    ],
                        axis=0)

plt.rcParams['axes.facecolor'] = 'white'
plt.plot(x_axis[:100], yw_pred, linewidth=1, label='Predictions')
plt.plot(x_axis[:100],
         y_test[:100].reshape(100, ),
         linewidth=1,
         label='Ground Truth',
Exemplo n.º 15
0
def spec(x, order=2):
    
    beta, sigma = yule_walker(x, order)
    return sigma**2 / (1. - np.sum(beta))**2
Exemplo n.º 16
0
def spec(x, order=2):
    from statsmodels.regression.linear_model import yule_walker
    beta, sigma = yule_walker(x, order)
    return sigma**2 / (1. - np.sum(beta))**2
Exemplo n.º 17
0
def hannan_rissanen(endog,
                    ar_order=0,
                    ma_order=0,
                    demean=True,
                    initial_ar_order=None,
                    unbiased=None):
    """
    Estimate ARMA parameters using Hannan-Rissanen procedure.

    Parameters
    ----------
    endog : array_like
        Input time series array, assumed to be stationary.
    ar_order : int
        Autoregressive order
    ma_order : int
        Moving average order
    demean : bool, optional
        Whether to estimate and remove the mean from the process prior to
        fitting the ARMA coefficients. Default is True.
    initial_ar_order : int, optional
        Order of long autoregressive process used for initial computation of
        residuals.
    unbiased: bool, optional
        Whether or not to apply the bias correction step. Default is True if
        the estimated coefficients from the previous step imply a stationary
        and invertible process and False otherwise.

    Returns
    -------
    parameters : SARIMAXParams object
    other_results : Bunch
        Includes three components: `spec`, containing the
        `SARIMAXSpecification` instance corresponding to the input arguments;
        `initial_ar_order`, containing the autoregressive lag order used in the
        first step; and `resid`, which contains the computed residuals from the
        last step.

    Notes
    -----
    The primary reference is [1]_, section 5.1.4, which describes a three-step
    procedure that we implement here.

    1. Fit a large-order AR model via Yule-Walker to estimate residuals
    2. Compute AR and MA estimates via least squares
    3. (Unless the estimated coefficients from step (2) are non-stationary /
       non-invertible or `unbiased=False`) Perform bias correction

    The order used for the AR model in the first step may be given as an
    argument. If it is not, we compute it as suggested by [2]_.

    The estimate of the variance that we use is computed from the residuals
    of the least-squares regression and not from the innovations algorithm.
    This is because our fast implementation of the innovations algorithm is
    only valid for stationary processes, and the Hannan-Rissanen procedure may
    produce estimates that imply non-stationary processes. To avoid
    inconsistency, we never compute this latter variance here, even if it is
    possible. See test_hannan_rissanen::test_brockwell_davis_example_517 for
    an example of how to compute this variance manually.

    This procedure assumes that the series is stationary, but if this is not
    true, it is still possible that this procedure will return parameters that
    imply a non-stationary / non-invertible process.

    Note that the third stage will only be applied if the parameters from the
    second stage imply a stationary / invertible model. If `unbiased=True` is
    given, then non-stationary / non-invertible parameters in the second stage
    will throw an exception.

    References
    ----------
    .. [1] Brockwell, Peter J., and Richard A. Davis. 2016.
       Introduction to Time Series and Forecasting. Springer.
    .. [2] Gomez, Victor, and Agustin Maravall. 2001.
       "Automatic Modeling Methods for Univariate Series."
       A Course in Time Series Analysis, 171–201.
    """
    spec = SARIMAXSpecification(endog, ar_order=ar_order, ma_order=ma_order)
    endog = spec.endog
    if demean:
        endog = endog - endog.mean()

    p = SARIMAXParams(spec=spec)

    nobs = len(endog)
    max_ar_order = spec.max_ar_order
    max_ma_order = spec.max_ma_order

    # Default initial_ar_order is as suggested by Gomez and Maravall (2001)
    if initial_ar_order is None:
        initial_ar_order = max(
            np.floor(np.log(nobs)**2).astype(int),
            2 * max(max_ar_order, max_ma_order))
    # Create a spec, just to validate the initial autoregressive order
    _ = SARIMAXSpecification(endog, ar_order=initial_ar_order)

    # Compute lagged endog
    # (`ar_ix`, and `ma_ix` below, are to account for non-consecutive lags;
    # for indexing purposes, must have dtype int)
    ar_ix = np.array(spec.ar_lags, dtype=int) - 1
    lagged_endog = lagmat(endog, max_ar_order, trim='both')[:, ar_ix]

    # If no AR or MA components, this is just a variance computation
    if max_ma_order == 0 and max_ar_order == 0:
        p.sigma2 = np.var(endog, ddof=0)
        resid = endog.copy()
    # If no MA component, this is just CSS
    elif max_ma_order == 0:
        mod = OLS(endog[max_ar_order:], lagged_endog)
        res = mod.fit()
        resid = res.resid
        p.ar_params = res.params
        p.sigma2 = res.scale
    # Otherwise ARMA model
    else:
        # Step 1: Compute long AR model via Yule-Walker, get residuals
        initial_ar_params, _ = yule_walker(endog,
                                           order=initial_ar_order,
                                           method='mle')
        X = lagmat(endog, initial_ar_order, trim='both')
        y = endog[initial_ar_order:]
        resid = y - X.dot(initial_ar_params)

        # Get lagged residuals for `exog` in least-squares regression
        ma_ix = np.array(spec.ma_lags, dtype=int) - 1
        lagged_resid = lagmat(resid, max_ma_order, trim='both')[:, ma_ix]

        # Step 2: estimate ARMA model via least squares
        ix = initial_ar_order + max_ma_order - max_ar_order
        mod = OLS(endog[initial_ar_order + max_ma_order:],
                  np.c_[lagged_endog[ix:], lagged_resid])
        res = mod.fit()
        p.ar_params = res.params[:spec.k_ar_params]
        p.ma_params = res.params[spec.k_ar_params:]
        resid = res.resid
        p.sigma2 = res.scale

        # Step 3: bias correction (if requested)
        if unbiased is True or unbiased is None:
            if p.is_stationary and p.is_invertible:
                Z = np.zeros_like(endog)
                V = np.zeros_like(endog)
                W = np.zeros_like(endog)

                ar_coef = p.ar_poly.coef
                ma_coef = p.ma_poly.coef

                for t in range(nobs):
                    if t >= max(max_ar_order, max_ma_order):
                        # Note: in the case of non-consecutive lag orders, the
                        # polynomials have the appropriate zeros so we don't
                        # need to subset `endog[t - max_ar_order:t]` or
                        # Z[t - max_ma_order:t]
                        tmp_ar = np.dot(-ar_coef[1:],
                                        endog[t - max_ar_order:t][::-1])
                        tmp_ma = np.dot(ma_coef[1:],
                                        Z[t - max_ma_order:t][::-1])
                        Z[t] = endog[t] - tmp_ar - tmp_ma

                V = lfilter([1], ar_coef, Z)
                W = lfilter(np.r_[1, -ma_coef[1:]], [1], Z)

                lagged_V = lagmat(V, max_ar_order, trim='both')
                lagged_W = lagmat(W, max_ma_order, trim='both')

                exog = np.c_[lagged_V[max(max_ma_order - max_ar_order, 0):,
                                      ar_ix],
                             lagged_W[max(max_ar_order - max_ma_order, 0):,
                                      ma_ix]]

                mod_unbias = OLS(Z[max(max_ar_order, max_ma_order):], exog)
                res_unbias = mod_unbias.fit()

                p.ar_params = (p.ar_params +
                               res_unbias.params[:spec.k_ar_params])
                p.ma_params = (p.ma_params +
                               res_unbias.params[spec.k_ar_params:])

                # Recompute sigma2
                resid = mod.endog - mod.exog.dot(np.r_[p.ar_params,
                                                       p.ma_params])
                p.sigma2 = np.inner(resid, resid) / len(resid)
            elif unbiased is True:
                raise ValueError('Cannot perform third step of Hannan-Rissanen'
                                 ' estimation to remove paramater bias,'
                                 ' because parameters estimated from the'
                                 ' second step are non-stationary or'
                                 ' non-invertible')

    # TODO: Gomez and Maravall (2001) or Gomez (1998)
    # propose one more step here to further improve MA estimates

    # Construct results
    other_results = Bunch({
        'spec': spec,
        'initial_ar_order': initial_ar_order,
        'resid': resid
    })

    return p, other_results
Exemplo n.º 18
0
 def _spec(self, x, order=2):
     from statsmodels.regression.linear_model import yule_walker
     beta, sigma = yule_walker(x, order)
     return sigma ** 2 / (1. - np.sum(beta)) ** 2
Exemplo n.º 19
0
examples_all = range(10) + ['test_copy']

examples = examples_all  # [5]

if 0 in examples:
    print('\n Example 0')
    X = np.arange(1, 8)
    X = sm.add_constant(X)
    Y = np.array((1, 3, 4, 5, 8, 10, 9))
    rho = 2
    model = GLSAR(Y, X, 2)
    for i in range(6):
        results = model.fit()
        print('AR coefficients:', model.rho)
        rho, sigma = yule_walker(results.resid, order=model.order)
        model = GLSAR(Y, X, rho)

    par0 = results.params
    print('params fit', par0)
    model0if = GLSAR(Y, X, 2)
    res = model0if.iterative_fit(6)
    print('iterativefit beta', res.params)
    results.tvalues   # XXX is this correct? it does equal params/bse
    # but isn't the same as the AR example (which was wrong in the first place..)
    print(results.t_test([0, 1]))  # are sd and t correct? vs
    print(results.f_test(np.eye(2)))


rhotrue = np.array([0.5, 0.2])
nlags = np.size(rhotrue)
Exemplo n.º 20
0
y[0] = e[0]
y[1] = 1.5 * y[0] + e[1]

for index in range(2, Ndata):
    y[index] = 1.5 * y[index - 1] - 0.7 * y[index - 2] + e[index]

# Data processing
y -= np.mean(y)

# modelo
# y(k) = [ y(k-1) y(k-2) ]*[ a1 ] + e(k)
#                          [ a2 ]

order = 2
ar, sigma = yule_walker(y, order=order)
ar *= -1

coeff = np.array([1])
coeff = np.append(coeff, ar)

print("modos estimados")
raizes_est_z = np.roots(coeff)
raizes_est_s = np.log(raizes_est_z) / dt
print(raizes_est_s)

print("modos reais")
raizes_reais_z = np.roots([1, -1.5, 0.7])
raizes_reais_s = np.log(raizes_reais_z) / dt
print(raizes_reais_s)
Exemplo n.º 21
0
# ax.set_title("Simulated Variance")

fig, ax = plt.subplots(1, 1)
pcm = ax.pcolormesh(lon5, lat5, vardiff)
fig.colorbar(pcm, ax=ax)
ax.set_title("Difference in Variance")
print(np.nanmax(np.abs(vardiff)))

#%% Red Noise Mmodel Test at a single point

test_ar1 = ar1_map[klatss, klonss]
test_ssh = ssha[:, klatss, klonss]
test_var = (1 - test_ar1**2) * (np.var(test_ssh))
test_sig = np.sqrt(test_var)

lmrho, lmsigma = linear_model.yule_walker(test_ssh, order=1, method='adjusted')

simlen = 240
noisets = np.random.normal(0, test_sig, simlen)

ytest = np.zeros(simlen)
for i in range(1, simlen):
    ytest[i] = test_ar1 * ytest[i - 1] + noisets[i]

print("Simulated Correlation is %f " %
      (np.corrcoef(ytest[1:], ytest[:-1])[0, 1]))
print("Actual Correlation is %f " % (test_ar1))
print("Simulated Variance is %f" % (np.var(ytest)))
print("Actual Variance is %f" % (np.var(test_ssh)))
#%% Visualize some plots
Exemplo n.º 22
0
def hannan_rissanen(endog, ar_order=0, ma_order=0, demean=True,
                    initial_ar_order=None, unbiased=None,
                    fixed_params=None):
    """
    Estimate ARMA parameters using Hannan-Rissanen procedure.

    Parameters
    ----------
    endog : array_like
        Input time series array, assumed to be stationary.
    ar_order : int or list of int
        Autoregressive order
    ma_order : int or list of int
        Moving average order
    demean : bool, optional
        Whether to estimate and remove the mean from the process prior to
        fitting the ARMA coefficients. Default is True.
    initial_ar_order : int, optional
        Order of long autoregressive process used for initial computation of
        residuals.
    unbiased : bool, optional
        Whether or not to apply the bias correction step. Default is True if
        the estimated coefficients from the previous step imply a stationary
        and invertible process and False otherwise.
    fixed_params : dict, optional
        Dictionary with names of fixed parameters as keys (e.g. 'ar.L1',
        'ma.L2'), which correspond to SARIMAXSpecification.param_names.
        Dictionary values are the values of the associated fixed parameters.

    Returns
    -------
    parameters : SARIMAXParams object
    other_results : Bunch
        Includes three components: `spec`, containing the
        `SARIMAXSpecification` instance corresponding to the input arguments;
        `initial_ar_order`, containing the autoregressive lag order used in the
        first step; and `resid`, which contains the computed residuals from the
        last step.

    Notes
    -----
    The primary reference is [1]_, section 5.1.4, which describes a three-step
    procedure that we implement here.

    1. Fit a large-order AR model via Yule-Walker to estimate residuals
    2. Compute AR and MA estimates via least squares
    3. (Unless the estimated coefficients from step (2) are non-stationary /
       non-invertible or `unbiased=False`) Perform bias correction

    The order used for the AR model in the first step may be given as an
    argument. If it is not, we compute it as suggested by [2]_.

    The estimate of the variance that we use is computed from the residuals
    of the least-squares regression and not from the innovations algorithm.
    This is because our fast implementation of the innovations algorithm is
    only valid for stationary processes, and the Hannan-Rissanen procedure may
    produce estimates that imply non-stationary processes. To avoid
    inconsistency, we never compute this latter variance here, even if it is
    possible. See test_hannan_rissanen::test_brockwell_davis_example_517 for
    an example of how to compute this variance manually.

    This procedure assumes that the series is stationary, but if this is not
    true, it is still possible that this procedure will return parameters that
    imply a non-stationary / non-invertible process.

    Note that the third stage will only be applied if the parameters from the
    second stage imply a stationary / invertible model. If `unbiased=True` is
    given, then non-stationary / non-invertible parameters in the second stage
    will throw an exception.

    References
    ----------
    .. [1] Brockwell, Peter J., and Richard A. Davis. 2016.
       Introduction to Time Series and Forecasting. Springer.
    .. [2] Gomez, Victor, and Agustin Maravall. 2001.
       "Automatic Modeling Methods for Univariate Series."
       A Course in Time Series Analysis, 171–201.
    """
    spec = SARIMAXSpecification(endog, ar_order=ar_order, ma_order=ma_order)

    fixed_params = _validate_fixed_params(fixed_params, spec.param_names)

    endog = spec.endog
    if demean:
        endog = endog - endog.mean()

    p = SARIMAXParams(spec=spec)

    nobs = len(endog)
    max_ar_order = spec.max_ar_order
    max_ma_order = spec.max_ma_order

    # Default initial_ar_order is as suggested by Gomez and Maravall (2001)
    if initial_ar_order is None:
        initial_ar_order = max(np.floor(np.log(nobs)**2).astype(int),
                               2 * max(max_ar_order, max_ma_order))
    # Create a spec, just to validate the initial autoregressive order
    _ = SARIMAXSpecification(endog, ar_order=initial_ar_order)

    # Unpack fixed and free ar/ma lags, ix, and params (fixed only)
    params_info = _package_fixed_and_free_params_info(
        fixed_params, spec.ar_lags, spec.ma_lags
    )

    # Compute lagged endog
    lagged_endog = lagmat(endog, max_ar_order, trim='both')

    # If no AR or MA components, this is just a variance computation
    if max_ma_order == 0 and max_ar_order == 0:
        p.sigma2 = np.var(endog, ddof=0)
        resid = endog.copy()
    # If no MA component, this is just CSS
    elif max_ma_order == 0:
        # extract 1) lagged_endog with free params; 2) lagged_endog with fixed
        # params; 3) endog residual after applying fixed params if applicable
        X_with_free_params = lagged_endog[:, params_info.free_ar_ix]
        X_with_fixed_params = lagged_endog[:, params_info.fixed_ar_ix]
        y = endog[max_ar_order:]
        if X_with_fixed_params.shape[1] != 0:
            y = y - X_with_fixed_params.dot(params_info.fixed_ar_params)

        # no free ar params -> variance computation on the endog residual
        if X_with_free_params.shape[1] == 0:
            p.ar_params = params_info.fixed_ar_params
            p.sigma2 = np.var(y, ddof=0)
            resid = y.copy()
        # otherwise OLS with endog residual (after applying fixed params) as y,
        # and lagged_endog with free params as X
        else:
            mod = OLS(y, X_with_free_params)
            res = mod.fit()
            resid = res.resid
            p.sigma2 = res.scale
            p.ar_params = _stitch_fixed_and_free_params(
                fixed_ar_or_ma_lags=params_info.fixed_ar_lags,
                fixed_ar_or_ma_params=params_info.fixed_ar_params,
                free_ar_or_ma_lags=params_info.free_ar_lags,
                free_ar_or_ma_params=res.params,
                spec_ar_or_ma_lags=spec.ar_lags
            )
    # Otherwise ARMA model
    else:
        # Step 1: Compute long AR model via Yule-Walker, get residuals
        initial_ar_params, _ = yule_walker(
            endog, order=initial_ar_order, method='mle')
        X = lagmat(endog, initial_ar_order, trim='both')
        y = endog[initial_ar_order:]
        resid = y - X.dot(initial_ar_params)

        # Get lagged residuals for `exog` in least-squares regression
        lagged_resid = lagmat(resid, max_ma_order, trim='both')

        # Step 2: estimate ARMA model via least squares
        ix = initial_ar_order + max_ma_order - max_ar_order
        X_with_free_params = np.c_[
            lagged_endog[ix:, params_info.free_ar_ix],
            lagged_resid[:, params_info.free_ma_ix]
        ]
        X_with_fixed_params = np.c_[
            lagged_endog[ix:, params_info.fixed_ar_ix],
            lagged_resid[:, params_info.fixed_ma_ix]
        ]
        y = endog[initial_ar_order + max_ma_order:]
        if X_with_fixed_params.shape[1] != 0:
            y = y - X_with_fixed_params.dot(
                np.r_[params_info.fixed_ar_params, params_info.fixed_ma_params]
            )

        # Step 2.1: no free ar params -> variance computation on the endog
        # residual
        if X_with_free_params.shape[1] == 0:
            p.ar_params = params_info.fixed_ar_params
            p.ma_params = params_info.fixed_ma_params
            p.sigma2 = np.var(y, ddof=0)
            resid = y.copy()
        # Step 2.2: otherwise OLS with endog residual (after applying fixed
        # params) as y, and lagged_endog and lagged_resid with free params as X
        else:
            mod = OLS(y, X_with_free_params)
            res = mod.fit()
            k_free_ar_params = len(params_info.free_ar_lags)
            p.ar_params = _stitch_fixed_and_free_params(
                fixed_ar_or_ma_lags=params_info.fixed_ar_lags,
                fixed_ar_or_ma_params=params_info.fixed_ar_params,
                free_ar_or_ma_lags=params_info.free_ar_lags,
                free_ar_or_ma_params=res.params[:k_free_ar_params],
                spec_ar_or_ma_lags=spec.ar_lags
            )
            p.ma_params = _stitch_fixed_and_free_params(
                fixed_ar_or_ma_lags=params_info.fixed_ma_lags,
                fixed_ar_or_ma_params=params_info.fixed_ma_params,
                free_ar_or_ma_lags=params_info.free_ma_lags,
                free_ar_or_ma_params=res.params[k_free_ar_params:],
                spec_ar_or_ma_lags=spec.ma_lags
            )
            resid = res.resid
            p.sigma2 = res.scale

        # Step 3: bias correction (if requested)

        # Step 3.1: validate `unbiased` argument and handle setting the default
        if unbiased is True:
            if len(fixed_params) != 0:
                raise NotImplementedError(
                    "Third step of Hannan-Rissanen estimation to remove "
                    "parameter bias is not yet implemented for the case "
                    "with fixed parameters."
                )
            elif not (p.is_stationary and p.is_invertible):
                raise ValueError(
                    "Cannot perform third step of Hannan-Rissanen estimation "
                    "to remove parameter bias, because parameters estimated "
                    "from the second step are non-stationary or "
                    "non-invertible."
                )
        elif unbiased is None:
            if len(fixed_params) != 0:
                unbiased = False
            else:
                unbiased = p.is_stationary and p.is_invertible

        # Step 3.2: bias correction
        if unbiased is True:
            Z = np.zeros_like(endog)
            V = np.zeros_like(endog)
            W = np.zeros_like(endog)

            ar_coef = p.ar_poly.coef
            ma_coef = p.ma_poly.coef

            for t in range(nobs):
                if t >= max(max_ar_order, max_ma_order):
                    # Note: in the case of non-consecutive lag orders, the
                    # polynomials have the appropriate zeros so we don't
                    # need to subset `endog[t - max_ar_order:t]` or
                    # Z[t - max_ma_order:t]
                    tmp_ar = np.dot(
                        -ar_coef[1:], endog[t - max_ar_order:t][::-1])
                    tmp_ma = np.dot(ma_coef[1:],
                                    Z[t - max_ma_order:t][::-1])
                    Z[t] = endog[t] - tmp_ar - tmp_ma

            V = lfilter([1], ar_coef, Z)
            W = lfilter(np.r_[1, -ma_coef[1:]], [1], Z)

            lagged_V = lagmat(V, max_ar_order, trim='both')
            lagged_W = lagmat(W, max_ma_order, trim='both')

            exog = np.c_[
                lagged_V[
                    max(max_ma_order - max_ar_order, 0):,
                    params_info.free_ar_ix
                ],
                lagged_W[
                    max(max_ar_order - max_ma_order, 0):,
                    params_info.free_ma_ix
                ]
            ]

            mod_unbias = OLS(Z[max(max_ar_order, max_ma_order):], exog)
            res_unbias = mod_unbias.fit()

            p.ar_params = (
                p.ar_params + res_unbias.params[:spec.k_ar_params])
            p.ma_params = (
                p.ma_params + res_unbias.params[spec.k_ar_params:])

            # Recompute sigma2
            resid = mod.endog - mod.exog.dot(
                np.r_[p.ar_params, p.ma_params])
            p.sigma2 = np.inner(resid, resid) / len(resid)

    # TODO: Gomez and Maravall (2001) or Gomez (1998)
    # propose one more step here to further improve MA estimates

    # Construct results
    other_results = Bunch({
        'spec': spec,
        'initial_ar_order': initial_ar_order,
        'resid': resid
    })
    return p, other_results
Exemplo n.º 23
0
examples_all = range(10) + ['test_copy']

examples = examples_all  # [5]

if 0 in examples:
    print '\n Example 0'
    X = np.arange(1, 8)
    X = sm.add_constant(X)
    Y = np.array((1, 3, 4, 5, 8, 10, 9))
    rho = 2
    model = GLSAR(Y, X, 2)
    for i in range(6):
        results = model.fit()
        print 'AR coefficients:', model.rho
        rho, sigma = yule_walker(results.resid, order=model.order)
        model = GLSAR(Y, X, rho)

    par0 = results.params
    print 'params fit', par0
    model0if = GLSAR(Y, X, 2)
    res = model0if.iterative_fit(6)
    print 'iterativefit beta', res.params
    results.tvalues   # XXX is this correct? it does equal params/bse
    # but isn't the same as the AR example (which was wrong in the first place..)
    print results.t_test([0, 1])  # are sd and t correct? vs
    print results.f_test(np.eye(2))


rhotrue = np.array([0.5, 0.2])
nlags = np.size(rhotrue)
Exemplo n.º 24
0
def yule_walker(endog, ar_order=0, demean=True, unbiased=False):
    """
    Estimate AR parameters using Yule-Walker equations.

    Parameters
    ----------
    endog : array_like or SARIMAXSpecification
        Input time series array, assumed to be stationary.
    ar_order : int, optional
        Autoregressive order. Default is 0.
    demean : bool, optional
        Whether to estimate and remove the mean from the process prior to
        fitting the autoregressive coefficients. Default is True.
    unbiased : bool, optional
        Whether to use the "unbiased" autocovariance estimator, which uses
        n - h degrees of freedom rather than n. Note that despite the name, it
        is only truly unbiased if the process mean is known (rather than
        estimated) and for some processes it can result in a non-positive
        definite autocovariance matrix. Default is False.

    Returns
    -------
    parameters : SARIMAXParams object
        Contains the parameter estimates from the final iteration.
    other_results : Bunch
        Includes one component, `spec`, which is the `SARIMAXSpecification`
        instance corresponding to the input arguments.

    Notes
    -----
    The primary reference is [1]_, section 5.1.1.

    This procedure assumes that the series is stationary.

    For a description of the effect of the "unbiased" estimate of the
    autocovariance function, see 2.4.2 of [1]_.

    References
    ----------
    .. [1] Brockwell, Peter J., and Richard A. Davis. 2016.
       Introduction to Time Series and Forecasting. Springer.
    """
    spec = SARIMAXSpecification(endog, ar_order=ar_order)
    endog = spec.endog
    p = SARIMAXParams(spec=spec)

    if not spec.is_ar_consecutive:
        raise ValueError('Yule-Walker estimation unavailable for models with'
                         ' seasonal or non-consecutive AR orders.')

    # Estimate parameters
    method = 'unbiased' if unbiased else 'mle'
    p.ar_params, sigma = linear_model.yule_walker(endog,
                                                  order=ar_order,
                                                  demean=demean,
                                                  method=method)
    p.sigma2 = sigma**2

    # Construct other results
    other_results = Bunch({
        'spec': spec,
    })

    return p, other_results
Exemplo n.º 25
0
from statsmodels.regression.linear_model import yule_walker
import numpy as np
import pylab as plt
from scipy.signal import hilbert

x_func = lambda t: np.sin(10 * 2 * np.pi * t / 500) * np.sin(
    0.91 * 2 * np.pi * t / 500 + np.sin(0.31 * 2 * np.pi * t / 500) * 0.5)

t_train = np.arange(1000)
t_test = np.arange(1000, 1200)
x_train = x_func(t_train)
x_test = x_func(t_test)

order = 50
# ar, p, k = aryule(x_func(t_train), order, norm='biased')
ar, s = yule_walker(x_train, order, 'mle')

pred = x_train.tolist()

for x in range(len(t_test)):
    # pred.append(np.roll(ar, 0)[::-1].dot(pred[-order:]))
    pred.append(ar[::-1].dot(pred[-order:]))
plt.figure(dpi=200)
plt.plot(pred)

plt.plot(t_test, x_test, '--')
plt.plot(t_train, x_train)
# plt.plot(t_train, np.real(hilbert(x_train)))

plt.ylim(-2, 2)
plt.show()
Exemplo n.º 26
0
def spec(x, order=2):

    beta, sigma = yule_walker(x, order)
    return sigma**2 / (1. - np.sum(beta))**2
# train_loss2 = history2.history['loss']
# plt.rcParams['axes.facecolor'] = 'white'
# plt.plot(x, train_loss, linewidth=1, label='LSTM training')
# plt.plot(x, train_loss2, linewidth=1, label='ANN training')
# plt.grid(True, which='both', axis='both')
# plt.title('MA Model - Training MSE of ANN vs LSTM')
# plt.xlabel('Epochs')
# plt.ylabel('MSE')
# plt.legend()
# if save:
#     plt.savefig("./imgs/MA Model - Training MSE.png", dpi=800)
# plt.show()

# Yule-Walker

rho5, sigma5 = yule_walker(y_train, order=5, method="mle")
rho10, sigma10 = yule_walker(y_train, order=10, method="mle")
rho50, sigma50 = yule_walker(y_train, order=50, method="mle")
rho250, sigma250 = yule_walker(y_train, order=250, method="mle")

yw5_pred = np.ndarray.flatten(y_test)[:5]
for i in range(5, 10000):
    yw5_pred = np.append(yw5_pred, [np.dot(rho5, yw5_pred[-5:])], axis=0)
plt.rcParams['axes.facecolor'] = 'white'
plt.plot(x_axis[5:105], yw5_pred[5:105], linewidth=1, label='Predictions')
plt.plot(x_axis[5:105], y_test[5:105].reshape(100, ), linewidth=1, label='Ground Truth', linestyle='dashed')
plt.grid(True, which='both', axis='both')
plt.title('MA Model - Yule-Walker AR5 Prediction of 100 samples')
plt.xlabel('Time')
plt.ylabel('Value')
plt.legend()