def _fit_start_params_hr(self, order): """ Get starting parameters for fit. Parameters ---------- order : iterable (p,q,k) - AR lags, MA lags, and number of exogenous variables including the constant. Returns ------- start_params : array A first guess at the starting parameters. Notes ----- If necessary, fits an AR process with the laglength selected according to best BIC. Obtain the residuals. Then fit an ARMA(p,q) model via OLS using these residuals for a first approximation. Uses a separate OLS regression to find the coefficients of exogenous variables. References ---------- Hannan, E.J. and Rissanen, J. 1982. "Recursive estimation of mixed autoregressive-moving average order." `Biometrika`. 69.1. """ p, q, k = order start_params = zeros((p + q + k)) endog = self.endog.copy() # copy because overwritten exog = self.exog if k != 0: ols_params = GLS(endog, exog).fit().params start_params[:k] = ols_params endog -= np.dot(exog, ols_params).squeeze() if q != 0: if p != 0: armod = AR(endog).fit(ic='bic', trend='nc') arcoefs_tmp = armod.params p_tmp = armod.k_ar resid = endog[p_tmp:] - np.dot( lagmat(endog, p_tmp, trim='both'), arcoefs_tmp) if p < p_tmp + q: endog_start = p_tmp + q - p resid_start = 0 else: endog_start = 0 resid_start = p - p_tmp - q lag_endog = lagmat(endog, p, 'both')[endog_start:] lag_resid = lagmat(resid, q, 'both')[resid_start:] # stack ar lags and resids X = np.column_stack((lag_endog, lag_resid)) coefs = GLS(endog[max(p_tmp + q, p):], X).fit().params start_params[k:k + p + q] = coefs else: start_params[k + p:k + p + q] = yule_walker(endog, order=q)[0] if q == 0 and p != 0: arcoefs = yule_walker(endog, order=p)[0] start_params[k:k + p] = arcoefs return start_params
def _fit_start_params_hr(self, order): """ Get starting parameters for fit. Parameters ---------- order : iterable (p,q,k) - AR lags, MA lags, and number of exogenous variables including the constant. Returns ------- start_params : array A first guess at the starting parameters. Notes ----- If necessary, fits an AR process with the laglength selected according to best BIC. Obtain the residuals. Then fit an ARMA(p,q) model via OLS using these residuals for a first approximation. Uses a separate OLS regression to find the coefficients of exogenous variables. References ---------- Hannan, E.J. and Rissanen, J. 1982. "Recursive estimation of mixed autoregressive-moving average order." `Biometrika`. 69.1. """ p,q,k = order start_params = zeros((p+q+k)) endog = self.endog.copy() # copy because overwritten exog = self.exog if k != 0: ols_params = GLS(endog, exog).fit().params start_params[:k] = ols_params endog -= np.dot(exog, ols_params).squeeze() if q != 0: if p != 0: armod = AR(endog).fit(ic='bic', trend='nc') arcoefs_tmp = armod.params p_tmp = armod.k_ar resid = endog[p_tmp:] - np.dot(lagmat(endog, p_tmp, trim='both'), arcoefs_tmp) if p < p_tmp + q: endog_start = p_tmp + q - p resid_start = 0 else: endog_start = 0 resid_start = p - p_tmp - q lag_endog = lagmat(endog, p, 'both')[endog_start:] lag_resid = lagmat(resid, q, 'both')[resid_start:] # stack ar lags and resids X = np.column_stack((lag_endog, lag_resid)) coefs = GLS(endog[max(p_tmp+q,p):], X).fit().params start_params[k:k+p+q] = coefs else: start_params[k+p:k+p+q] = yule_walker(endog, order=q)[0] if q==0 and p != 0: arcoefs = yule_walker(endog, order=p)[0] start_params[k:k+p] = arcoefs return start_params
def pacf_yw(x, nlags=40, method='unbiased'): '''Partial autocorrelation estimated with non-recursive yule_walker Parameters ---------- x : 1d array observations of time series for which pacf is calculated nlags : int largest lag for which pacf is returned method : 'unbiased' (default) or 'mle' method for the autocovariance calculations in yule walker Returns ------- pacf : 1d array partial autocorrelations, maxlag+1 elements Notes ----- This solves yule_walker for each desired lag and contains currently duplicate calculations. ''' pacf = [1.] for k in range(1, nlags + 1): pacf.append(yule_walker(x, k, method=method)[0][-1]) return np.array(pacf)
def setupClass(cls): from statsmodels.datasets.sunspots import load data = load() cls.rho, cls.sigma = yule_walker(data.endog, order=4, method="mle") cls.R_params = [1.2831003105694765, -0.45240924374091945, -0.20770298557575195, 0.047943648089542337]
def yule_walker_acov(acov, order=1, method="unbiased", df=None, inv=False): """ Estimate AR(p) parameters from acovf using Yule-Walker equation. Parameters ---------- acov : array_like, 1d auto-covariance order : int, optional The order of the autoregressive process. Default is 1. inv : bool If inv is True the inverse of R is also returned. Default is False. Returns ------- rho : ndarray The estimated autoregressive coefficients sigma TODO Rinv : ndarray inverse of the Toepliz matrix """ return yule_walker(acov, order=order, method=method, df=df, inv=inv, demean=False)
def mdl(m, n, breakpoints, data): # maintain the order timestamps = list(breakpoints.keys()) timestamps.sort() terms = [] m_log = max(1, m) terms.append(math.log(m_log, 2)) terms.append(m * math.log(n, 2)) terms.append(sum(math.log(breakpoints[i], 2) for i in timestamps)) term3 = term4 = 0 for i in range(1, len(breakpoints)): ni = timestamps[i] - timestamps[i - 1] term3 += (breakpoints[timestamps[i]] + 2) / 2 * math.log(ni, 2) data_section_values = [] for j in range(timestamps[i - 1], timestamps[i] - 1): data_section_values.append(data[1][j]) rho, sigma = yule_walker(data_section_values, breakpoints[timestamps[i - 1]]) var = math.pow(sigma, 2) term4 += ni / 2 * math.log(2 * math.pi * var, 2) terms.append(term3) terms.append(term4) terms.append(n / 2) return sum(terms)
def yule_walker_acov(acov, order=1, method="unbiased", df=None, inv=False): """ Estimate AR(p) parameters from acovf using Yule-Walker equation. Parameters ---------- acov : array-like, 1d auto-covariance order : integer, optional The order of the autoregressive process. Default is 1. inv : bool If inv is True the inverse of R is also returned. Default is False. Returns ------- rho : ndarray The estimated autoregressive coefficients sigma TODO Rinv : ndarray inverse of the Toepliz matrix """ return yule_walker(acov, order=order, method=method, df=df, inv=inv, demean=False)
def spec_ar(x, x_freq=1, n_freq=500, order_max=None, plot=True, **kwargs): x = np.r_[x] N = len(x) if order_max is None: order_max = min(N - 1, int(np.floor(10 * np.log10(N)))) # Use Yule-Walker to find best AR model via AIC def aic(sigma2, df_model, nobs): return np.log(sigma2) + 2 * (1 + df_model) / nobs best_results = None for lag in range(order_max + 1): ar, sigma = yule_walker(x, order=lag, method='mle') model_aic = aic(sigma2=sigma**2, df_model=lag, nobs=N - lag) if best_results is None or model_aic < best_results['aic']: best_results = { 'aic': model_aic, 'order': lag, 'ar': ar, 'sigma2': sigma**2 } order = best_results['order'] freq = np.arange(0, n_freq) / (2 * (n_freq - 1)) if order >= 1: ar, sigma2 = best_results['ar'], best_results['sigma2'] outer_xy = np.outer(freq, np.arange(1, order + 1)) cs = np.cos(2 * np.pi * outer_xy) @ ar sn = np.sin(2 * np.pi * outer_xy) @ ar spec = sigma2 / (x_freq * ((1 - cs)**2 + sn**2)) else: sigma2 = best_results['sigma2'] spec = (sigma2 / x_freq) * np.ones(len(freq)) results = { 'freq': freq, 'spec': spec, 'coh': None, 'phase': None, 'n.used': len(x), 'method': 'AR(' + str(order) + ') spectrum' } if plot: plot_spec(results, coverage=None, **kwargs) return results
def get_modes(processedFreq, fs, modelOrder=10): ar, sigma = yule_walker(processedFreq, order=modelOrder, method="mle") ar *= -1 polyCoeff = np.array([1]) polyCoeff = np.append(polyCoeff, ar) raizes_est_z = np.roots(polyCoeff) raizes_est_s = np.log(raizes_est_z) * fs # Remove negative frequencies raizes_est_s = [mode for mode in raizes_est_s if mode.imag > 0] # Calculates frequency in hertz and damping ratio in percentage freq_y = [mode.imag / (2 * np.pi) for mode in raizes_est_s] damp_x = [ -np.divide(mode.real, np.absolute(mode)) for mode in raizes_est_s ] return damp_x, freq_y
def _get_ar_order(x): N = len(x) order_max = min(N - 1, int(np.floor(10 * np.log10(N)))) # Use Yule-Walker to find best AR model via AIC def aic(sigma2, df_model, nobs): return np.log(sigma2) + 2 * (1 + df_model) / nobs best_results = None for lag in range(order_max+1): ar, sigma = yule_walker(x, order=lag, method='mle') model_aic = aic(sigma2=sigma**2, df_model=lag, nobs=N-lag) if best_results is None or model_aic < best_results['aic']: best_results = { 'aic': model_aic, 'order': lag, 'ar': ar, 'sigma2': sigma**2 } return best_results['order']
def spectrum0_ar(x, max_order='auto'): """Calculates f(0) of the spectrum of x using an AR fit.""" n_samples = x.shape[0] if np.allclose(np.var(x), 0.0): return 0., 0. if max_order == 'auto': max_order = floor(10 * np.log10(n_samples)) # calculate f(0) and AIC for each AR(p) model results = np.zeros((max_order, 3)) for p in range(1, max_order + 1): coefs, sigma = yule_walker(x, order=p, demean=True, method='unbiased') results[p - 1] = [p, spec0_ar(sigma, coefs), aic_ar(sigma, n_samples, p)] # return result for model minimizing the AIC min_id = np.argmin(results[:, -1]) order, var0 = results[min_id, :2] return var0 / n_samples, order
train_loss = history.history['loss'] train_loss2 = history2.history['loss'] plt.rcParams['axes.facecolor'] = 'white' plt.plot(x, train_loss, linewidth=1, label='LSTM training') plt.plot(x, train_loss2, linewidth=1, label='ANN training') plt.grid(True, which='both', axis='both') plt.title('AR Model - MSE of ANN vs LSTM') plt.xlabel('Epochs') plt.ylabel('MSE') plt.legend() if save: plt.savefig("./imgs/AR Model - Training MSE.png", dpi=800) plt.show() # Yule-Walker rho, sigma = yule_walker(y_train, order=3, method="mle") yw_pred = np.ndarray.flatten(y_test)[:3] for i in range(3, 100): yw_pred = np.append(yw_pred, [ rho[0] * yw_pred[i - 1] + rho[1] * yw_pred[i - 2] + rho[2] * yw_pred[i - 3] + np.random.uniform(0, 0.1) ], axis=0) plt.rcParams['axes.facecolor'] = 'white' plt.plot(x_axis[:100], yw_pred, linewidth=1, label='Predictions') plt.plot(x_axis[:100], y_test[:100].reshape(100, ), linewidth=1, label='Ground Truth',
def spec(x, order=2): beta, sigma = yule_walker(x, order) return sigma**2 / (1. - np.sum(beta))**2
def spec(x, order=2): from statsmodels.regression.linear_model import yule_walker beta, sigma = yule_walker(x, order) return sigma**2 / (1. - np.sum(beta))**2
def hannan_rissanen(endog, ar_order=0, ma_order=0, demean=True, initial_ar_order=None, unbiased=None): """ Estimate ARMA parameters using Hannan-Rissanen procedure. Parameters ---------- endog : array_like Input time series array, assumed to be stationary. ar_order : int Autoregressive order ma_order : int Moving average order demean : bool, optional Whether to estimate and remove the mean from the process prior to fitting the ARMA coefficients. Default is True. initial_ar_order : int, optional Order of long autoregressive process used for initial computation of residuals. unbiased: bool, optional Whether or not to apply the bias correction step. Default is True if the estimated coefficients from the previous step imply a stationary and invertible process and False otherwise. Returns ------- parameters : SARIMAXParams object other_results : Bunch Includes three components: `spec`, containing the `SARIMAXSpecification` instance corresponding to the input arguments; `initial_ar_order`, containing the autoregressive lag order used in the first step; and `resid`, which contains the computed residuals from the last step. Notes ----- The primary reference is [1]_, section 5.1.4, which describes a three-step procedure that we implement here. 1. Fit a large-order AR model via Yule-Walker to estimate residuals 2. Compute AR and MA estimates via least squares 3. (Unless the estimated coefficients from step (2) are non-stationary / non-invertible or `unbiased=False`) Perform bias correction The order used for the AR model in the first step may be given as an argument. If it is not, we compute it as suggested by [2]_. The estimate of the variance that we use is computed from the residuals of the least-squares regression and not from the innovations algorithm. This is because our fast implementation of the innovations algorithm is only valid for stationary processes, and the Hannan-Rissanen procedure may produce estimates that imply non-stationary processes. To avoid inconsistency, we never compute this latter variance here, even if it is possible. See test_hannan_rissanen::test_brockwell_davis_example_517 for an example of how to compute this variance manually. This procedure assumes that the series is stationary, but if this is not true, it is still possible that this procedure will return parameters that imply a non-stationary / non-invertible process. Note that the third stage will only be applied if the parameters from the second stage imply a stationary / invertible model. If `unbiased=True` is given, then non-stationary / non-invertible parameters in the second stage will throw an exception. References ---------- .. [1] Brockwell, Peter J., and Richard A. Davis. 2016. Introduction to Time Series and Forecasting. Springer. .. [2] Gomez, Victor, and Agustin Maravall. 2001. "Automatic Modeling Methods for Univariate Series." A Course in Time Series Analysis, 171–201. """ spec = SARIMAXSpecification(endog, ar_order=ar_order, ma_order=ma_order) endog = spec.endog if demean: endog = endog - endog.mean() p = SARIMAXParams(spec=spec) nobs = len(endog) max_ar_order = spec.max_ar_order max_ma_order = spec.max_ma_order # Default initial_ar_order is as suggested by Gomez and Maravall (2001) if initial_ar_order is None: initial_ar_order = max( np.floor(np.log(nobs)**2).astype(int), 2 * max(max_ar_order, max_ma_order)) # Create a spec, just to validate the initial autoregressive order _ = SARIMAXSpecification(endog, ar_order=initial_ar_order) # Compute lagged endog # (`ar_ix`, and `ma_ix` below, are to account for non-consecutive lags; # for indexing purposes, must have dtype int) ar_ix = np.array(spec.ar_lags, dtype=int) - 1 lagged_endog = lagmat(endog, max_ar_order, trim='both')[:, ar_ix] # If no AR or MA components, this is just a variance computation if max_ma_order == 0 and max_ar_order == 0: p.sigma2 = np.var(endog, ddof=0) resid = endog.copy() # If no MA component, this is just CSS elif max_ma_order == 0: mod = OLS(endog[max_ar_order:], lagged_endog) res = mod.fit() resid = res.resid p.ar_params = res.params p.sigma2 = res.scale # Otherwise ARMA model else: # Step 1: Compute long AR model via Yule-Walker, get residuals initial_ar_params, _ = yule_walker(endog, order=initial_ar_order, method='mle') X = lagmat(endog, initial_ar_order, trim='both') y = endog[initial_ar_order:] resid = y - X.dot(initial_ar_params) # Get lagged residuals for `exog` in least-squares regression ma_ix = np.array(spec.ma_lags, dtype=int) - 1 lagged_resid = lagmat(resid, max_ma_order, trim='both')[:, ma_ix] # Step 2: estimate ARMA model via least squares ix = initial_ar_order + max_ma_order - max_ar_order mod = OLS(endog[initial_ar_order + max_ma_order:], np.c_[lagged_endog[ix:], lagged_resid]) res = mod.fit() p.ar_params = res.params[:spec.k_ar_params] p.ma_params = res.params[spec.k_ar_params:] resid = res.resid p.sigma2 = res.scale # Step 3: bias correction (if requested) if unbiased is True or unbiased is None: if p.is_stationary and p.is_invertible: Z = np.zeros_like(endog) V = np.zeros_like(endog) W = np.zeros_like(endog) ar_coef = p.ar_poly.coef ma_coef = p.ma_poly.coef for t in range(nobs): if t >= max(max_ar_order, max_ma_order): # Note: in the case of non-consecutive lag orders, the # polynomials have the appropriate zeros so we don't # need to subset `endog[t - max_ar_order:t]` or # Z[t - max_ma_order:t] tmp_ar = np.dot(-ar_coef[1:], endog[t - max_ar_order:t][::-1]) tmp_ma = np.dot(ma_coef[1:], Z[t - max_ma_order:t][::-1]) Z[t] = endog[t] - tmp_ar - tmp_ma V = lfilter([1], ar_coef, Z) W = lfilter(np.r_[1, -ma_coef[1:]], [1], Z) lagged_V = lagmat(V, max_ar_order, trim='both') lagged_W = lagmat(W, max_ma_order, trim='both') exog = np.c_[lagged_V[max(max_ma_order - max_ar_order, 0):, ar_ix], lagged_W[max(max_ar_order - max_ma_order, 0):, ma_ix]] mod_unbias = OLS(Z[max(max_ar_order, max_ma_order):], exog) res_unbias = mod_unbias.fit() p.ar_params = (p.ar_params + res_unbias.params[:spec.k_ar_params]) p.ma_params = (p.ma_params + res_unbias.params[spec.k_ar_params:]) # Recompute sigma2 resid = mod.endog - mod.exog.dot(np.r_[p.ar_params, p.ma_params]) p.sigma2 = np.inner(resid, resid) / len(resid) elif unbiased is True: raise ValueError('Cannot perform third step of Hannan-Rissanen' ' estimation to remove paramater bias,' ' because parameters estimated from the' ' second step are non-stationary or' ' non-invertible') # TODO: Gomez and Maravall (2001) or Gomez (1998) # propose one more step here to further improve MA estimates # Construct results other_results = Bunch({ 'spec': spec, 'initial_ar_order': initial_ar_order, 'resid': resid }) return p, other_results
def _spec(self, x, order=2): from statsmodels.regression.linear_model import yule_walker beta, sigma = yule_walker(x, order) return sigma ** 2 / (1. - np.sum(beta)) ** 2
examples_all = range(10) + ['test_copy'] examples = examples_all # [5] if 0 in examples: print('\n Example 0') X = np.arange(1, 8) X = sm.add_constant(X) Y = np.array((1, 3, 4, 5, 8, 10, 9)) rho = 2 model = GLSAR(Y, X, 2) for i in range(6): results = model.fit() print('AR coefficients:', model.rho) rho, sigma = yule_walker(results.resid, order=model.order) model = GLSAR(Y, X, rho) par0 = results.params print('params fit', par0) model0if = GLSAR(Y, X, 2) res = model0if.iterative_fit(6) print('iterativefit beta', res.params) results.tvalues # XXX is this correct? it does equal params/bse # but isn't the same as the AR example (which was wrong in the first place..) print(results.t_test([0, 1])) # are sd and t correct? vs print(results.f_test(np.eye(2))) rhotrue = np.array([0.5, 0.2]) nlags = np.size(rhotrue)
y[0] = e[0] y[1] = 1.5 * y[0] + e[1] for index in range(2, Ndata): y[index] = 1.5 * y[index - 1] - 0.7 * y[index - 2] + e[index] # Data processing y -= np.mean(y) # modelo # y(k) = [ y(k-1) y(k-2) ]*[ a1 ] + e(k) # [ a2 ] order = 2 ar, sigma = yule_walker(y, order=order) ar *= -1 coeff = np.array([1]) coeff = np.append(coeff, ar) print("modos estimados") raizes_est_z = np.roots(coeff) raizes_est_s = np.log(raizes_est_z) / dt print(raizes_est_s) print("modos reais") raizes_reais_z = np.roots([1, -1.5, 0.7]) raizes_reais_s = np.log(raizes_reais_z) / dt print(raizes_reais_s)
# ax.set_title("Simulated Variance") fig, ax = plt.subplots(1, 1) pcm = ax.pcolormesh(lon5, lat5, vardiff) fig.colorbar(pcm, ax=ax) ax.set_title("Difference in Variance") print(np.nanmax(np.abs(vardiff))) #%% Red Noise Mmodel Test at a single point test_ar1 = ar1_map[klatss, klonss] test_ssh = ssha[:, klatss, klonss] test_var = (1 - test_ar1**2) * (np.var(test_ssh)) test_sig = np.sqrt(test_var) lmrho, lmsigma = linear_model.yule_walker(test_ssh, order=1, method='adjusted') simlen = 240 noisets = np.random.normal(0, test_sig, simlen) ytest = np.zeros(simlen) for i in range(1, simlen): ytest[i] = test_ar1 * ytest[i - 1] + noisets[i] print("Simulated Correlation is %f " % (np.corrcoef(ytest[1:], ytest[:-1])[0, 1])) print("Actual Correlation is %f " % (test_ar1)) print("Simulated Variance is %f" % (np.var(ytest))) print("Actual Variance is %f" % (np.var(test_ssh))) #%% Visualize some plots
def hannan_rissanen(endog, ar_order=0, ma_order=0, demean=True, initial_ar_order=None, unbiased=None, fixed_params=None): """ Estimate ARMA parameters using Hannan-Rissanen procedure. Parameters ---------- endog : array_like Input time series array, assumed to be stationary. ar_order : int or list of int Autoregressive order ma_order : int or list of int Moving average order demean : bool, optional Whether to estimate and remove the mean from the process prior to fitting the ARMA coefficients. Default is True. initial_ar_order : int, optional Order of long autoregressive process used for initial computation of residuals. unbiased : bool, optional Whether or not to apply the bias correction step. Default is True if the estimated coefficients from the previous step imply a stationary and invertible process and False otherwise. fixed_params : dict, optional Dictionary with names of fixed parameters as keys (e.g. 'ar.L1', 'ma.L2'), which correspond to SARIMAXSpecification.param_names. Dictionary values are the values of the associated fixed parameters. Returns ------- parameters : SARIMAXParams object other_results : Bunch Includes three components: `spec`, containing the `SARIMAXSpecification` instance corresponding to the input arguments; `initial_ar_order`, containing the autoregressive lag order used in the first step; and `resid`, which contains the computed residuals from the last step. Notes ----- The primary reference is [1]_, section 5.1.4, which describes a three-step procedure that we implement here. 1. Fit a large-order AR model via Yule-Walker to estimate residuals 2. Compute AR and MA estimates via least squares 3. (Unless the estimated coefficients from step (2) are non-stationary / non-invertible or `unbiased=False`) Perform bias correction The order used for the AR model in the first step may be given as an argument. If it is not, we compute it as suggested by [2]_. The estimate of the variance that we use is computed from the residuals of the least-squares regression and not from the innovations algorithm. This is because our fast implementation of the innovations algorithm is only valid for stationary processes, and the Hannan-Rissanen procedure may produce estimates that imply non-stationary processes. To avoid inconsistency, we never compute this latter variance here, even if it is possible. See test_hannan_rissanen::test_brockwell_davis_example_517 for an example of how to compute this variance manually. This procedure assumes that the series is stationary, but if this is not true, it is still possible that this procedure will return parameters that imply a non-stationary / non-invertible process. Note that the third stage will only be applied if the parameters from the second stage imply a stationary / invertible model. If `unbiased=True` is given, then non-stationary / non-invertible parameters in the second stage will throw an exception. References ---------- .. [1] Brockwell, Peter J., and Richard A. Davis. 2016. Introduction to Time Series and Forecasting. Springer. .. [2] Gomez, Victor, and Agustin Maravall. 2001. "Automatic Modeling Methods for Univariate Series." A Course in Time Series Analysis, 171–201. """ spec = SARIMAXSpecification(endog, ar_order=ar_order, ma_order=ma_order) fixed_params = _validate_fixed_params(fixed_params, spec.param_names) endog = spec.endog if demean: endog = endog - endog.mean() p = SARIMAXParams(spec=spec) nobs = len(endog) max_ar_order = spec.max_ar_order max_ma_order = spec.max_ma_order # Default initial_ar_order is as suggested by Gomez and Maravall (2001) if initial_ar_order is None: initial_ar_order = max(np.floor(np.log(nobs)**2).astype(int), 2 * max(max_ar_order, max_ma_order)) # Create a spec, just to validate the initial autoregressive order _ = SARIMAXSpecification(endog, ar_order=initial_ar_order) # Unpack fixed and free ar/ma lags, ix, and params (fixed only) params_info = _package_fixed_and_free_params_info( fixed_params, spec.ar_lags, spec.ma_lags ) # Compute lagged endog lagged_endog = lagmat(endog, max_ar_order, trim='both') # If no AR or MA components, this is just a variance computation if max_ma_order == 0 and max_ar_order == 0: p.sigma2 = np.var(endog, ddof=0) resid = endog.copy() # If no MA component, this is just CSS elif max_ma_order == 0: # extract 1) lagged_endog with free params; 2) lagged_endog with fixed # params; 3) endog residual after applying fixed params if applicable X_with_free_params = lagged_endog[:, params_info.free_ar_ix] X_with_fixed_params = lagged_endog[:, params_info.fixed_ar_ix] y = endog[max_ar_order:] if X_with_fixed_params.shape[1] != 0: y = y - X_with_fixed_params.dot(params_info.fixed_ar_params) # no free ar params -> variance computation on the endog residual if X_with_free_params.shape[1] == 0: p.ar_params = params_info.fixed_ar_params p.sigma2 = np.var(y, ddof=0) resid = y.copy() # otherwise OLS with endog residual (after applying fixed params) as y, # and lagged_endog with free params as X else: mod = OLS(y, X_with_free_params) res = mod.fit() resid = res.resid p.sigma2 = res.scale p.ar_params = _stitch_fixed_and_free_params( fixed_ar_or_ma_lags=params_info.fixed_ar_lags, fixed_ar_or_ma_params=params_info.fixed_ar_params, free_ar_or_ma_lags=params_info.free_ar_lags, free_ar_or_ma_params=res.params, spec_ar_or_ma_lags=spec.ar_lags ) # Otherwise ARMA model else: # Step 1: Compute long AR model via Yule-Walker, get residuals initial_ar_params, _ = yule_walker( endog, order=initial_ar_order, method='mle') X = lagmat(endog, initial_ar_order, trim='both') y = endog[initial_ar_order:] resid = y - X.dot(initial_ar_params) # Get lagged residuals for `exog` in least-squares regression lagged_resid = lagmat(resid, max_ma_order, trim='both') # Step 2: estimate ARMA model via least squares ix = initial_ar_order + max_ma_order - max_ar_order X_with_free_params = np.c_[ lagged_endog[ix:, params_info.free_ar_ix], lagged_resid[:, params_info.free_ma_ix] ] X_with_fixed_params = np.c_[ lagged_endog[ix:, params_info.fixed_ar_ix], lagged_resid[:, params_info.fixed_ma_ix] ] y = endog[initial_ar_order + max_ma_order:] if X_with_fixed_params.shape[1] != 0: y = y - X_with_fixed_params.dot( np.r_[params_info.fixed_ar_params, params_info.fixed_ma_params] ) # Step 2.1: no free ar params -> variance computation on the endog # residual if X_with_free_params.shape[1] == 0: p.ar_params = params_info.fixed_ar_params p.ma_params = params_info.fixed_ma_params p.sigma2 = np.var(y, ddof=0) resid = y.copy() # Step 2.2: otherwise OLS with endog residual (after applying fixed # params) as y, and lagged_endog and lagged_resid with free params as X else: mod = OLS(y, X_with_free_params) res = mod.fit() k_free_ar_params = len(params_info.free_ar_lags) p.ar_params = _stitch_fixed_and_free_params( fixed_ar_or_ma_lags=params_info.fixed_ar_lags, fixed_ar_or_ma_params=params_info.fixed_ar_params, free_ar_or_ma_lags=params_info.free_ar_lags, free_ar_or_ma_params=res.params[:k_free_ar_params], spec_ar_or_ma_lags=spec.ar_lags ) p.ma_params = _stitch_fixed_and_free_params( fixed_ar_or_ma_lags=params_info.fixed_ma_lags, fixed_ar_or_ma_params=params_info.fixed_ma_params, free_ar_or_ma_lags=params_info.free_ma_lags, free_ar_or_ma_params=res.params[k_free_ar_params:], spec_ar_or_ma_lags=spec.ma_lags ) resid = res.resid p.sigma2 = res.scale # Step 3: bias correction (if requested) # Step 3.1: validate `unbiased` argument and handle setting the default if unbiased is True: if len(fixed_params) != 0: raise NotImplementedError( "Third step of Hannan-Rissanen estimation to remove " "parameter bias is not yet implemented for the case " "with fixed parameters." ) elif not (p.is_stationary and p.is_invertible): raise ValueError( "Cannot perform third step of Hannan-Rissanen estimation " "to remove parameter bias, because parameters estimated " "from the second step are non-stationary or " "non-invertible." ) elif unbiased is None: if len(fixed_params) != 0: unbiased = False else: unbiased = p.is_stationary and p.is_invertible # Step 3.2: bias correction if unbiased is True: Z = np.zeros_like(endog) V = np.zeros_like(endog) W = np.zeros_like(endog) ar_coef = p.ar_poly.coef ma_coef = p.ma_poly.coef for t in range(nobs): if t >= max(max_ar_order, max_ma_order): # Note: in the case of non-consecutive lag orders, the # polynomials have the appropriate zeros so we don't # need to subset `endog[t - max_ar_order:t]` or # Z[t - max_ma_order:t] tmp_ar = np.dot( -ar_coef[1:], endog[t - max_ar_order:t][::-1]) tmp_ma = np.dot(ma_coef[1:], Z[t - max_ma_order:t][::-1]) Z[t] = endog[t] - tmp_ar - tmp_ma V = lfilter([1], ar_coef, Z) W = lfilter(np.r_[1, -ma_coef[1:]], [1], Z) lagged_V = lagmat(V, max_ar_order, trim='both') lagged_W = lagmat(W, max_ma_order, trim='both') exog = np.c_[ lagged_V[ max(max_ma_order - max_ar_order, 0):, params_info.free_ar_ix ], lagged_W[ max(max_ar_order - max_ma_order, 0):, params_info.free_ma_ix ] ] mod_unbias = OLS(Z[max(max_ar_order, max_ma_order):], exog) res_unbias = mod_unbias.fit() p.ar_params = ( p.ar_params + res_unbias.params[:spec.k_ar_params]) p.ma_params = ( p.ma_params + res_unbias.params[spec.k_ar_params:]) # Recompute sigma2 resid = mod.endog - mod.exog.dot( np.r_[p.ar_params, p.ma_params]) p.sigma2 = np.inner(resid, resid) / len(resid) # TODO: Gomez and Maravall (2001) or Gomez (1998) # propose one more step here to further improve MA estimates # Construct results other_results = Bunch({ 'spec': spec, 'initial_ar_order': initial_ar_order, 'resid': resid }) return p, other_results
examples_all = range(10) + ['test_copy'] examples = examples_all # [5] if 0 in examples: print '\n Example 0' X = np.arange(1, 8) X = sm.add_constant(X) Y = np.array((1, 3, 4, 5, 8, 10, 9)) rho = 2 model = GLSAR(Y, X, 2) for i in range(6): results = model.fit() print 'AR coefficients:', model.rho rho, sigma = yule_walker(results.resid, order=model.order) model = GLSAR(Y, X, rho) par0 = results.params print 'params fit', par0 model0if = GLSAR(Y, X, 2) res = model0if.iterative_fit(6) print 'iterativefit beta', res.params results.tvalues # XXX is this correct? it does equal params/bse # but isn't the same as the AR example (which was wrong in the first place..) print results.t_test([0, 1]) # are sd and t correct? vs print results.f_test(np.eye(2)) rhotrue = np.array([0.5, 0.2]) nlags = np.size(rhotrue)
def yule_walker(endog, ar_order=0, demean=True, unbiased=False): """ Estimate AR parameters using Yule-Walker equations. Parameters ---------- endog : array_like or SARIMAXSpecification Input time series array, assumed to be stationary. ar_order : int, optional Autoregressive order. Default is 0. demean : bool, optional Whether to estimate and remove the mean from the process prior to fitting the autoregressive coefficients. Default is True. unbiased : bool, optional Whether to use the "unbiased" autocovariance estimator, which uses n - h degrees of freedom rather than n. Note that despite the name, it is only truly unbiased if the process mean is known (rather than estimated) and for some processes it can result in a non-positive definite autocovariance matrix. Default is False. Returns ------- parameters : SARIMAXParams object Contains the parameter estimates from the final iteration. other_results : Bunch Includes one component, `spec`, which is the `SARIMAXSpecification` instance corresponding to the input arguments. Notes ----- The primary reference is [1]_, section 5.1.1. This procedure assumes that the series is stationary. For a description of the effect of the "unbiased" estimate of the autocovariance function, see 2.4.2 of [1]_. References ---------- .. [1] Brockwell, Peter J., and Richard A. Davis. 2016. Introduction to Time Series and Forecasting. Springer. """ spec = SARIMAXSpecification(endog, ar_order=ar_order) endog = spec.endog p = SARIMAXParams(spec=spec) if not spec.is_ar_consecutive: raise ValueError('Yule-Walker estimation unavailable for models with' ' seasonal or non-consecutive AR orders.') # Estimate parameters method = 'unbiased' if unbiased else 'mle' p.ar_params, sigma = linear_model.yule_walker(endog, order=ar_order, demean=demean, method=method) p.sigma2 = sigma**2 # Construct other results other_results = Bunch({ 'spec': spec, }) return p, other_results
from statsmodels.regression.linear_model import yule_walker import numpy as np import pylab as plt from scipy.signal import hilbert x_func = lambda t: np.sin(10 * 2 * np.pi * t / 500) * np.sin( 0.91 * 2 * np.pi * t / 500 + np.sin(0.31 * 2 * np.pi * t / 500) * 0.5) t_train = np.arange(1000) t_test = np.arange(1000, 1200) x_train = x_func(t_train) x_test = x_func(t_test) order = 50 # ar, p, k = aryule(x_func(t_train), order, norm='biased') ar, s = yule_walker(x_train, order, 'mle') pred = x_train.tolist() for x in range(len(t_test)): # pred.append(np.roll(ar, 0)[::-1].dot(pred[-order:])) pred.append(ar[::-1].dot(pred[-order:])) plt.figure(dpi=200) plt.plot(pred) plt.plot(t_test, x_test, '--') plt.plot(t_train, x_train) # plt.plot(t_train, np.real(hilbert(x_train))) plt.ylim(-2, 2) plt.show()
# train_loss2 = history2.history['loss'] # plt.rcParams['axes.facecolor'] = 'white' # plt.plot(x, train_loss, linewidth=1, label='LSTM training') # plt.plot(x, train_loss2, linewidth=1, label='ANN training') # plt.grid(True, which='both', axis='both') # plt.title('MA Model - Training MSE of ANN vs LSTM') # plt.xlabel('Epochs') # plt.ylabel('MSE') # plt.legend() # if save: # plt.savefig("./imgs/MA Model - Training MSE.png", dpi=800) # plt.show() # Yule-Walker rho5, sigma5 = yule_walker(y_train, order=5, method="mle") rho10, sigma10 = yule_walker(y_train, order=10, method="mle") rho50, sigma50 = yule_walker(y_train, order=50, method="mle") rho250, sigma250 = yule_walker(y_train, order=250, method="mle") yw5_pred = np.ndarray.flatten(y_test)[:5] for i in range(5, 10000): yw5_pred = np.append(yw5_pred, [np.dot(rho5, yw5_pred[-5:])], axis=0) plt.rcParams['axes.facecolor'] = 'white' plt.plot(x_axis[5:105], yw5_pred[5:105], linewidth=1, label='Predictions') plt.plot(x_axis[5:105], y_test[5:105].reshape(100, ), linewidth=1, label='Ground Truth', linestyle='dashed') plt.grid(True, which='both', axis='both') plt.title('MA Model - Yule-Walker AR5 Prediction of 100 samples') plt.xlabel('Time') plt.ylabel('Value') plt.legend()