def test_add_trend_prepend_dataframe(self): n = 10 x = self.rng.randn(n, 1) x = pd.DataFrame(x, columns=["col1"]) trend_1 = add_trend(x, trend="ct", prepend=True) trend_2 = add_trend(x, trend="ct", prepend=False) assert_frame_equal(trend_1.iloc[:, :2], trend_2.iloc[:, 1:])
def test_add_trend_prepend_dataframe(self): n = 10 x = randn(n, 1) x = pd.DataFrame(x, columns=['col1']) trend_1 = add_trend(x, trend='ct', prepend=True) trend_2 = add_trend(x, trend='ct', prepend=False) assert_frame_equal(trend_1.iloc[:, :2], trend_2.iloc[:, 1:])
def test_add_trend_prepend_dataframe(self): n = 10 x = self.rng.randn(n, 1) x = pd.DataFrame(x, columns=['col1']) trend_1 = add_trend(x, trend='ct', prepend=True) trend_2 = add_trend(x, trend='ct', prepend=False) assert_frame_equal(trend_1.iloc[:, :2], trend_2.iloc[:, 1:])
def p_tests(z: NDArray, lag: int, trend: str): x, y = z[:, 1:], z[:, 0] nobs = x.shape[0] x = add_trend(x, trend=trend) beta = lstsq(x, y, rcond=None)[0] u = y - x @ beta z_lead = z[1:] z_lag = add_trend(z[:-1], trend=trend) phi = lstsq(z_lag, z_lead, rcond=None)[0] xi = z_lead - z_lag @ phi omega = xi.T @ xi / nobs for i in range(1, lag + 1): w = 1 - i / (lag + 1) gamma = xi[i:].T @ xi[:-i] / nobs omega += w * (gamma + gamma.T) omega21 = omega[0, 1:] omega22 = omega[1:, 1:] omega112 = omega[0, 0] - np.squeeze(omega21.T @ inv(omega22) @ omega21) denom = u.T @ u / nobs p_u = nobs * omega112 / denom tr = add_trend(nobs=z.shape[0], trend=trend) if tr.shape[1]: z = z - tr @ lstsq(tr, z, rcond=None)[0] else: z = z - z[:1] # Recenter on first m_zz = z.T @ z / nobs p_z = nobs * (omega @ inv(m_zz)).trace() return p_u, p_z
def test_add_trend_duplicate_name(self): x = pd.DataFrame(np.zeros((10, 1)), columns=["trend"]) with pytest.warns(ColumnNameConflict): add_trend(x, trend="ct") y = add_trend(x, trend="ct") assert "const" in y.columns assert "trend_0" in y.columns
def fit( self, kernel: str = "bartlett", bandwidth: Optional[float] = None, force_int: bool = True, diff: bool = False, df_adjust: bool = False, ) -> CointegrationAnalysisResults: cov_est, eta, beta = self._common_fit(kernel, bandwidth, force_int, diff) omega = np.asarray(cov_est.cov.long_run) lmbda = np.asarray(cov_est.cov.one_sided) sigma = np.asarray(cov_est.cov.short_run) lmbda2 = lmbda[:, 1:] sigma_inv = np.linalg.inv(sigma) y, x = np.asarray(self._y_df), np.asarray(self._x) x_star = x[1:] - eta @ (sigma_inv @ lmbda2) kx = x.shape[1] omega_12 = omega[:1, 1:] omega_22 = omega[1:, 1:] omega_22_inv = np.linalg.inv(omega_22) bias = np.zeros((kx + 1, 1)) bias[1:] = omega_22_inv @ omega_12.T # K x K K by 1 # K by 1 y_star = y[1:] - eta @ (sigma_inv @ lmbda2 @ beta[:, None] + bias) z_star = add_trend(x_star, trend=self._trend) params = np.linalg.lstsq(z_star, y_star, rcond=None)[0] omega_11 = omega[:1, :1] nobs, nvar = z_star.shape scale = 1.0 if not df_adjust else nobs / (nobs - nvar) omega_112 = scale * omega_11 - omega_12 @ omega_22_inv @ omega_12.T param_cov = omega_112 * np.linalg.inv(z_star.T @ z_star) cols = add_trend(self._x.iloc[:10], self._trend).columns params = pd.Series(params.squeeze(), index=cols, name="params") param_cov = pd.DataFrame(param_cov, columns=cols, index=cols) resid, r2, r2_adj = self._final_statistics(params) resid_kern = KERNEL_ESTIMATORS[kernel](resid, bandwidth=cov_est.bandwidth, force_int=cov_est.force_int) return CointegrationAnalysisResults( params, param_cov, resid, omega_112[0, 0], resid_kern, kx, self._trend, df_adjust, r2, r2_adj, "Fully Modified OLS", )
def test_add_trend_duplicate_name(self): x = pd.DataFrame(np.zeros((10, 1)), columns=['trend']) with warnings.catch_warnings(record=True) as w: assert_produces_warning(add_trend(x, trend='ct'), ColumnNameConflict) y = add_trend(x, trend='ct') # should produce a single warning assert len(w) > 0 assert 'const' in y.columns assert 'trend_0' in y.columns
def test_add_trend_duplicate_name(self): x = pd.DataFrame(np.zeros((10, 1)), columns=['trend']) with warnings.catch_warnings(record=True) as w: assert_produces_warning(add_trend(x, trend='ct'), ColumnNameConflict) y = add_trend(x, trend='ct') # should produce a single warning assert len(w) > 0 assert 'const' in y.columns assert 'trend_0' in y.columns
def _po_ptests( z: pd.DataFrame, xsection: RegressionResults, test_type: str, trend: str, kernel: str, bandwidth: Optional[int], force_int: bool, ) -> PhillipsOuliarisTestResults: nobs = z.shape[0] z_lead = z.iloc[1:] z_lag = add_trend(z.iloc[:-1], trend=trend) phi = np.linalg.lstsq(z_lag, z_lead, rcond=None)[0] xi = z_lead - np.asarray(z_lag @ phi) ker_est = KERNEL_ESTIMATORS[kernel] cov_est = ker_est(xi, bandwidth=bandwidth, center=False, force_int=force_int) cov = cov_est.cov # Rescale to match definition in PO omega = (nobs - 1) / nobs * np.asarray(cov.long_run) u = np.asarray(xsection.resid) if test_type == "pu": denom = u.T @ u / nobs omega21 = omega[0, 1:] omega22 = omega[1:, 1:] omega22_inv = np.linalg.inv(omega22) omega112 = omega[0, 0] - np.squeeze(omega21.T @ omega22_inv @ omega21) test_stat = nobs * float(np.squeeze(omega112 / denom)) else: # returning p_z _z = np.asarray(z) if trend != "n": tr = add_trend(nobs=_z.shape[0], trend=trend) _z = _z - tr @ np.linalg.lstsq(tr, _z, rcond=None)[0] else: _z = _z - _z[:1] # Ensure first observation is 0 m_zz = _z.T @ _z / nobs test_stat = nobs * float( np.squeeze((omega @ np.linalg.inv(m_zz)).trace())) cv = phillips_ouliaris_cv(test_type, trend, z.shape[1], z.shape[0]) pval = phillips_ouliaris_pval(test_stat, test_type, trend, z.shape[1]) return PhillipsOuliarisTestResults( test_stat, pval, cv, order=z.shape[1], xsection=xsection, test_type=test_type, kernel_est=cov_est, )
def test_add_trend_ct(self): n = 20 x = np.zeros((20, 1)) y = add_trend(x, trend='ct') assert np.all(y[:, 1] == 1.0) assert_equal(y[0, 2], 1.0) assert_array_almost_equal(np.diff(y[:, 2]), np.ones((n - 1)))
def test_add_time_trend_dataframe(self): n = 10 x = self.rng.randn(n, 1) x = pd.DataFrame(x, columns=['col1']) trend_1 = add_trend(x, trend='t') assert_array_almost_equal(np.asarray(trend_1['trend']), np.arange(1.0, n + 1))
def test_add_time_trend_dataframe(self): n = 10 x = randn(n, 1) x = pd.DataFrame(x, columns=['col1']) trend_1 = add_trend(x, trend='t') assert_array_almost_equal(np.asarray(trend_1['trend']), np.arange(1.0, n + 1))
def _format_variables(self, leads: int, lags: int) -> Tuple[pd.DataFrame, pd.DataFrame]: """Format the variables for the regression""" x = self._x y = self._y_df delta_x = x.diff() data = [y, x] for lag in range(-lags, leads + 1): lag_data = delta_x.shift(-lag) typ = "LAG" if lag < 0 else "LEAD" lag_data.columns = [ f"D.{c}.{typ}{abs(lag)}" for c in lag_data.columns ] if lag == 0: lag_data.columns = [f"D.{c}" for c in lag_data.columns] data.append(lag_data) data_df: pd.DataFrame = pd.concat(data, axis=1).dropna() lhs, rhs = data_df.iloc[:, :1], data_df.iloc[:, 1:] nrhs = rhs.shape[1] rhs = add_trend(rhs, trend=self._trend, prepend=True) ntrend = rhs.shape[1] - nrhs if ntrend: nx = x.shape[1] trend = rhs.iloc[:, :ntrend] rhs = pd.concat( [ rhs.iloc[:, ntrend:ntrend + nx], trend, rhs.iloc[:, ntrend + nx:] ], axis=1, ) return lhs, rhs
def test_add_time_trend_dataframe(self): n = 10 x = self.rng.randn(n, 1) x = pd.DataFrame(x, columns=["col1"]) trend_1 = add_trend(x, trend="t") assert_array_almost_equal(np.asarray(trend_1["trend"]), np.arange(1.0, n + 1))
def _common_fit( self, kernel: str, bandwidth: Optional[float], force_int: bool, diff: bool) -> Tuple[lrcov.CovarianceEstimator, NDArray, NDArray]: kernel = _check_kernel(kernel) res = _cross_section(self._y, self._x, self._trend) x = np.asarray(self._x) eta_1 = np.asarray(res.resid) x_trend = self._trend if self._x_trend is None else self._x_trend tr = add_trend(nobs=x.shape[0], trend=x_trend) if tr.shape[1] > 1 and diff: delta_tr = np.diff(tr[:, 1:], axis=0) delta_x = np.diff(x, axis=0) gamma = np.linalg.lstsq(delta_tr, delta_x, rcond=None)[0] eta_2 = delta_x - delta_tr @ gamma else: if tr.shape[1]: gamma = np.linalg.lstsq(tr, x, rcond=None)[0] eps = x - tr @ gamma else: eps = x eta_2 = np.diff(eps, axis=0) eta = np.column_stack([eta_1[1:], eta_2]) kernel = _check_kernel(kernel) kern_est = KERNEL_ESTIMATORS[kernel] cov_est = kern_est(eta, bandwidth=bandwidth, center=False, force_int=force_int) beta = np.asarray(res.params)[:x.shape[1]] return cov_est, eta, beta
def _estimate_df_regression(y, trend, lags): """Helper function that estimates the core (A)DF regression Parameters ---------- y : array The data for the lag selection trend : {'nc','c','ct','ctt'} The trend order lags : int The number of lags to include in the ADF regression Returns ------- ols_res : OLSResults A results class object produced by OLS.fit() Notes ----- See statsmodels.regression.linear_model.OLS for details on the results returned """ delta_y = diff(y) rhs = lagmat(delta_y[:, None], lags, trim='both', original='in') nobs = rhs.shape[0] lhs = rhs[:, 0].copy() # lag-0 values are lhs, Is copy() necessary? rhs[:, 0] = y[-nobs - 1:-1] # replace lag 0 with level of y if trend != 'nc': rhs = add_trend(rhs[:, :lags + 1], trend) return OLS(lhs, rhs).fit()
def test_add_trend_ct(self): n = 20 x = np.zeros((20, 1)) y = add_trend(x, trend="ct") assert np.all(y[:, 1] == 1.0) assert_equal(y[0, 2], 1.0) assert_array_almost_equal(np.diff(y[:, 2]), np.ones((n - 1)))
def _estimate_df_regression(y, trend, lags): """Helper function that estimates the core (A)DF regression Parameters ---------- y : ndarray The data for the lag selection trend : {'nc','c','ct','ctt'} The trend order lags : int The number of lags to include in the ADF regression Returns ------- ols_res : OLSResults A results class object produced by OLS.fit() Notes ----- See statsmodels.regression.linear_model.OLS for details on the results returned """ delta_y = diff(y) rhs = lagmat(delta_y[:, None], lags, trim='both', original='in') nobs = rhs.shape[0] lhs = rhs[:, 0].copy() # lag-0 values are lhs, Is copy() necessary? rhs[:, 0] = y[-nobs - 1:-1] # replace lag 0 with level of y rhs = _add_column_names(rhs, lags) if trend != 'nc': rhs = add_trend(rhs.iloc[:, :lags + 1], trend) return OLS(lhs, rhs).fit()
def simulate_kpss( nobs: int, b: int, trend: str = "c", rng: Optional[RandomState] = None, ) -> float: """ Simulated the KPSS test statistic for nobs observations, performing b replications. """ if rng is None: rng = RandomState() rng.seed(0) standard_normal = rng.standard_normal e = standard_normal((nobs, b)) z = np.ones((nobs, 1)) if trend == "ct": z = add_trend(z, trend="t") zinv = np.linalg.pinv(z) trend_coef = zinv.dot(e) resid = e - cast(np.ndarray, z.dot(trend_coef)) s = np.cumsum(resid, axis=0) lam = (resid**2.0).mean(axis=0) kpss = 1 / (nobs**2.0) * (s**2.0).sum(axis=0) / lam return kpss
def _df_select_lags(y, trend, max_lags, method, low_memory=False): """ Helper method to determine the best lag length in DF-like regressions Parameters ---------- y : ndarray The data for the lag selection exercise trend : {'nc','c','ct','ctt'} The trend order max_lags : int The maximum number of lags to check. This setting affects all estimation since the sample is adjusted by max_lags when fitting the models method : {'AIC','BIC','t-stat'} The method to use when estimating the model low_memory : bool Flag indicating whether to use the low-memory algorithm for lag-length selection. Returns ------- best_ic : float The information criteria at the selected lag best_lag : int The selected lag Notes ----- If max_lags is None, the default value of 12 * (nobs/100)**(1/4) is used. """ nobs = y.shape[0] # This is the absolute maximum number of lags possible, # only needed to very short time series. max_max_lags = nobs // 2 - 1 if trend != 'nc': max_max_lags -= len(trend) if max_lags is None: max_lags = int(ceil(12. * power(nobs / 100., 1 / 4.))) max_lags = max(min(max_lags, max_max_lags), 0) if low_memory: out = _autolag_ols_low_memory(y, max_lags, trend, method) return out delta_y = diff(y) rhs = lagmat(delta_y[:, None], max_lags, trim='both', original='in') nobs = rhs.shape[0] rhs[:, 0] = y[-nobs - 1:-1] # replace 0 with level of y lhs = delta_y[-nobs:] if trend != 'nc': full_rhs = add_trend(rhs, trend, prepend=True) else: full_rhs = rhs start_lag = full_rhs.shape[1] - rhs.shape[1] + 1 ic_best, best_lag = _autolag_ols(lhs, full_rhs, start_lag, max_lags, method) return ic_best, best_lag
def test_add_trend_ctt(): n = 10 x = np.zeros((n, 1)) y = add_trend(x, trend="ctt") assert np.all(y[:, 1] == 1.0) assert y[0, 2] == 1.0 assert_array_almost_equal(np.diff(y[:, 2]), np.ones((n - 1))) assert y[0, 3] == 1.0 assert_array_almost_equal(np.diff(y[:, 3]), np.arange(3.0, 2.0 * n, 2.0))
def test_add_trend_ctt(self): n = 10 x = np.zeros((n, 1)) y = add_trend(x, trend='ctt') assert np.all(y[:, 1] == 1.0) assert y[0, 2] == 1.0 assert_array_almost_equal(np.diff(y[:, 2]), np.ones((n - 1))) assert y[0, 3] == 1.0 assert_array_almost_equal(np.diff(y[:, 3]), np.arange(3.0, 2.0 * n, 2.0))
def _compute_statistic(self): """Core routine to estimate PP test statistics""" # 1. Estimate Regression y, trend = self._y, self._trend nobs = y.shape[0] if self._lags is None: self._lags = int(ceil(12. * power(nobs / 100., 1 / 4.))) lags = self._lags rhs = y[:-1, None] rhs = _add_column_names(rhs, 0) lhs = y[1:, None] if trend != 'nc': rhs = add_trend(rhs, trend) resols = OLS(lhs, rhs).fit() k = rhs.shape[1] n, u = resols.nobs, resols.resid lam2 = cov_nw(u, lags, demean=False) lam = sqrt(lam2) # 2. Compute components s2 = u.dot(u) / (n - k) s = sqrt(s2) gamma0 = s2 * (n - k) / n sigma = resols.bse[0] sigma2 = sigma**2.0 rho = resols.params[0] # 3. Compute statistics self._stat_tau = sqrt(gamma0 / lam2) * ((rho - 1) / sigma) \ - 0.5 * ((lam2 - gamma0) / lam) * (n * sigma / s) self._stat_rho = n * (rho - 1) \ - 0.5 * (n ** 2.0 * sigma2 / s2) * (lam2 - gamma0) self._nobs = int(resols.nobs) if self._test_type == 'rho': self._stat = self._stat_rho dist_type = 'ADF-z' else: self._stat = self._stat_tau dist_type = 'ADF-t' self._pvalue = mackinnonp(self._stat, regression=trend, dist_type=dist_type) critical_values = mackinnoncrit(regression=trend, nobs=n, dist_type=dist_type) self._critical_values = { "1%": critical_values[0], "5%": critical_values[1], "10%": critical_values[2] } self._title = self._test_name + ' (Z-' + self._test_type + ')'
def _compute_statistic(self): """Core routine to estimate PP test statistics""" # 1. Estimate Regression y, trend = self._y, self._trend nobs = y.shape[0] if self._lags is None: self._lags = int(ceil(12. * power(nobs / 100., 1 / 4.))) lags = self._lags rhs = y[:-1, None] lhs = y[1:, None] if trend != 'nc': rhs = add_trend(rhs, trend) resols = OLS(lhs, rhs).fit() k = rhs.shape[1] n, u = resols.nobs, resols.resid lam2 = cov_nw(u, lags, demean=False) lam = sqrt(lam2) # 2. Compute components s2 = u.dot(u) / (n - k) s = sqrt(s2) gamma0 = s2 * (n - k) / n sigma = resols.bse[0] sigma2 = sigma ** 2.0 rho = resols.params[0] # 3. Compute statistics self._stat_tau = sqrt(gamma0 / lam2) * ((rho - 1) / sigma) \ - 0.5 * ((lam2 - gamma0) / lam) * (n * sigma / s) self._stat_rho = n * (rho - 1) \ - 0.5 * (n ** 2.0 * sigma2 / s2) * (lam2 - gamma0) self._nobs = int(resols.nobs) if self._test_type == 'rho': self._stat = self._stat_rho dist_type = 'ADF-z' else: self._stat = self._stat_tau dist_type = 'ADF-t' self._pvalue = mackinnonp(self._stat, regression=trend, dist_type=dist_type) critical_values = mackinnoncrit(regression=trend, nobs=n, dist_type=dist_type) self._critical_values = {"1%": critical_values[0], "5%": critical_values[1], "10%": critical_values[2]} self._title = self._test_name + ' (Z-' + self._test_type + ')'
def _cross_section(y: ArrayLike1D, x: ArrayLike2D, trend: str) -> RegressionResults: if trend not in ("n", "c", "ct", "ctt"): raise ValueError('trend must be one of "n", "c", "ct" or "ctt"') y = ensure1d(y, "y", True) x = ensure2d(x, "x") if not isinstance(x, pd.DataFrame): cols = [f"x{i}" for i in range(1, x.shape[1] + 1)] x = pd.DataFrame(x, columns=cols, index=y.index) x = add_trend(x, trend) res = OLS(y, x).fit() return res
def test_errors(self): n = 100 with pytest.raises(ValueError): add_trend(x=None, trend='unknown', nobs=n) with pytest.raises(ValueError): add_trend(x=None, trend='ct') x = np.ones((100, 1)) with pytest.raises(ValueError): add_trend(x, trend='ct', has_constant='raise')
def test_errors(self): n = 100 with pytest.raises(ValueError): add_trend(x=None, trend='unknown', nobs=n) with pytest.raises(ValueError): add_trend(x=None, trend='ct') x = np.ones((100, 1)) with pytest.raises(ValueError): add_trend(x, trend='ct', has_constant='raise')
def test_errors(self): n = 100 with pytest.raises(ValueError): add_trend(x=None, trend="unknown", nobs=n) with pytest.raises(ValueError): add_trend(x=None, trend="ct") x = np.ones((100, 1)) with pytest.raises(ValueError): add_trend(x, trend="ct", has_constant="raise")
def _df_select_lags(y, trend, max_lags, method): """ Helper method to determine the best lag length in DF-like regressions Parameters ---------- y : ndarray The data for the lag selection exercise trend : {'nc','c','ct','ctt'} The trend order max_lags : int The maximum number of lags to check. This setting affects all estimation since the sample is adjusted by max_lags when fitting the models method : {'AIC','BIC','t-stat'} The method to use when estimating the model Returns ------- best_ic : float The information criteria at the selected lag best_lag : int The selected lag Notes ----- If max_lags is None, the default value of 12 * (nobs/100)**(1/4) is used. """ nobs = y.shape[0] delta_y = diff(y) if max_lags is None: max_lags = int(ceil(12. * power(nobs / 100., 1 / 4.))) rhs = lagmat(delta_y[:, None], max_lags, trim='both', original='in') nobs = rhs.shape[0] rhs[:, 0] = y[-nobs - 1:-1] # replace 0 with level of y lhs = delta_y[-nobs:] if trend != 'nc': full_rhs = add_trend(rhs, trend, prepend=True) else: full_rhs = rhs start_lag = full_rhs.shape[1] - rhs.shape[1] + 1 ic_best, best_lag = _autolag_ols(lhs, full_rhs, start_lag, max_lags, method) return ic_best, best_lag
def _df_select_lags(y, trend, max_lags, method): """ Helper method to determine the best lag length in DF-like regressions Parameters ---------- y : array The data for the lag selection exercise trend : {'nc','c','ct','ctt'} The trend order max_lags : int The maximum number of lags to check. This setting affects all estimation since the sample is adjusted by max_lags when fitting the models method : {'AIC','BIC','t-stat'} The method to use when estimating the model Returns ------- best_ic : float The information criteria at the selected lag best_lag : int The selected lag Notes ----- If max_lags is None, the default value of 12 * (nobs/100)**(1/4) is used. """ nobs = y.shape[0] delta_y = diff(y) if max_lags is None: max_lags = int(ceil(12. * power(nobs / 100., 1 / 4.))) rhs = lagmat(delta_y[:, None], max_lags, trim='both', original='in') nobs = rhs.shape[0] rhs[:, 0] = y[-nobs - 1:-1] # replace 0 with level of y lhs = delta_y[-nobs:] if trend != 'nc': full_rhs = add_trend(rhs, trend, prepend=True) else: full_rhs = rhs start_lag = full_rhs.shape[1] - rhs.shape[1] + 1 ic_best, best_lag = _autolag_ols(lhs, full_rhs, start_lag, max_lags, method) return ic_best, best_lag
def _final_statistics(self, theta: pd.Series) -> Tuple[pd.Series, float, float]: z = add_trend(self._x, self._trend) nobs, nvar = z.shape resid = self._y - np.asarray(z @ theta) resid.name = "resid" center = 0.0 tss_df = 0 if "c" in self._trend: center = self._y.mean() tss_df = 1 y_centered = self._y - center ssr = resid.T @ resid tss = y_centered.T @ y_centered r2 = 1.0 - ssr / tss r2_adj = 1.0 - (ssr / (nobs - nvar)) / (tss / (nobs - tss_df)) return resid, r2, r2_adj
def p_tests_vec(z: NDArray, lag: int, trend: str) -> Tuple[np.ndarray, np.ndarray]: assert z.ndim == 3 z_lag, z_lead = z[:, :-1], z[:, 1:] nobs = z.shape[1] if trend == "c": z = demean(z) z_lag = demean(z_lag) z_lead = demean(z_lead) elif trend in ("ct", "ctt"): post = [] for v in (z, z_lag, z_lead): tr = add_trend(nobs=v.shape[1], trend=trend) tr /= np.sqrt((tr**2).mean(0) * nobs) trptr = tr.T @ tr trpv = tr.T @ v post.append(v - tr @ solve(trptr, trpv)) z, z_lag, z_lead = post else: z = z - z[:, :1] x, y = z[..., 1:], z[..., :1] u = y if x.shape[-1]: beta = solve(inner_prod(x), inner_prod(x, y)) u = y - x @ beta phi = solve(inner_prod(z_lag), inner_prod(z_lag, z_lead)) xi = z_lead - z_lag @ phi omega = inner_prod(xi) / nobs for i in range(1, lag + 1): w = 1 - i / (lag + 1) gamma = inner_prod(xi[:, i:], xi[:, :-i]) / nobs omega += w * (gamma + cast(np.ndarray, gamma).transpose((0, 2, 1))) omega21 = omega[:, :1, 1:] omega22 = omega[:, 1:, 1:] omega112 = omega[:, :1, :1] - omega21 @ inv(omega22) @ omega21.transpose( (0, 2, 1)) denom = inner_prod(u) / nobs p_u = nobs * np.squeeze(omega112 / denom) # z detrended above m_zz = inner_prod(z) / nobs # ufunc trace using einsum p_z = nobs * np.einsum("...ii", omega @ inv(m_zz)) return p_u, p_z
def _compute_statistic(self): # 1. Estimate model with trend nobs, y, trend = self._nobs, self._y, self._trend z = add_trend(nobs=nobs, trend=trend) res = OLS(y, z).fit() # 2. Compute KPSS test u = res.resid if self._lags is None: self._lags = int(ceil(12. * power(nobs / 100., 1 / 4.))) lam = cov_nw(u, self._lags, demean=False) s = cumsum(u) self._stat = 1 / (nobs ** 2.0) * sum(s ** 2.0) / lam self._nobs = u.shape[0] self._pvalue, critical_values = kpss_crit(self._stat, trend) self._critical_values = {"1%": critical_values[0], "5%": critical_values[1], "10%": critical_values[2]}
def _compute_statistic(self): """Core routine to estimate DF-GLS test statistic""" # 1. GLS detrend trend, c = self._trend, self._c nobs = self._y.shape[0] ct = c / nobs z = add_trend(nobs=nobs, trend=trend) delta_z = z.copy() delta_z[1:, :] = delta_z[1:, :] - (1 + ct) * delta_z[:-1, :] delta_y = self._y.copy()[:, None] delta_y[1:] = delta_y[1:] - (1 + ct) * delta_y[:-1] detrend_coef = pinv(delta_z).dot(delta_y) y = self._y y_detrended = y - z.dot(detrend_coef).ravel() # 2. determine lag length, if needed if self._lags is None: max_lags, method = self._max_lags, self._method icbest, bestlag = _df_select_lags(y_detrended, 'nc', max_lags, method, low_memory=self._low_memory) self._lags = bestlag # 3. Run Regression lags = self._lags resols = _estimate_df_regression(y_detrended, lags=lags, trend='nc') self._regression = resols self._nobs = int(resols.nobs) self._stat = resols.tvalues[0] self._pvalue = mackinnonp(self._stat, regression=trend, dist_type='DFGLS') critical_values = mackinnoncrit(regression=trend, nobs=self._nobs, dist_type='DFGLS') self._critical_values = { "1%": critical_values[0], "5%": critical_values[1], "10%": critical_values[2] }
def z_tests(z: NDArray, lag: int, trend: str): z = add_trend(z, trend=trend) u = z if z.shape[1] > 1: u = z[:, 0] - z[:, 1:] @ lstsq(z[:, 1:], z[:, 0], rcond=None)[0] alpha = (u[:-1].T @ u[1:]) / (u[:-1].T @ u[:-1]) k = u[1:] - alpha * u[:-1] nobs = u.shape[0] one_sided_strict = 0.0 for i in range(1, lag + 1): w = 1 - i / (lag + 1) one_sided_strict += 1 / nobs * w * k[i:].T @ k[:-i] u2 = u[:-1].T @ u[:-1] z = (alpha - 1) - nobs * one_sided_strict / u2 z_a = nobs * z long_run = k.T @ k / nobs + 2 * one_sided_strict z_t = np.sqrt(u2) * z / long_run return z_a, z_t
def z_tests(z: NDArray, lag: int, trend: str) -> Tuple[float, float]: z = add_trend(z, trend=trend) u = z if z.shape[1] > 1: delta = np.linalg.lstsq(z[:, 1:], z[:, 0], rcond=None)[0] u = z[:, 0] - z[:, 1:] @ delta alpha = (u[:-1].T @ u[1:]) / (u[:-1].T @ u[:-1]) k = u[1:] - alpha * u[:-1] nobs = u.shape[0] one_sided_strict = 0.0 for i in range(1, lag + 1): w = 1 - i / (lag + 1) one_sided_strict += 1 / nobs * w * k[i:].T @ k[:-i] u2 = u[:-1].T @ u[:-1] z = (alpha - 1) - nobs * one_sided_strict / u2 z_a = nobs * z long_run = k.T @ k / nobs + 2 * one_sided_strict se = np.sqrt(long_run / u2) z_t = z / se return float(z_a), float(z_t)
def _compute_statistic(self): """Core routine to estimate DF-GLS test statistic""" # 1. GLS detrend trend, c = self._trend, self._c nobs = self._y.shape[0] ct = c / nobs z = add_trend(nobs=nobs, trend=trend) delta_z = z.copy() delta_z[1:, :] = delta_z[1:, :] - (1 + ct) * delta_z[:-1, :] delta_y = self._y.copy()[:, None] delta_y[1:] = delta_y[1:] - (1 + ct) * delta_y[:-1] detrend_coef = pinv(delta_z).dot(delta_y) y = self._y y_detrended = y - z.dot(detrend_coef).ravel() # 2. determine lag length, if needed if self._lags is None: max_lags, method = self._max_lags, self._method icbest, bestlag = _df_select_lags(y_detrended, 'nc', max_lags, method) self._lags = bestlag # 3. Run Regression lags = self._lags resols = _estimate_df_regression(y_detrended, lags=lags, trend='nc') self._regression = resols self._nobs = int(resols.nobs) self._stat = resols.tvalues[0] self._pvalue = mackinnonp(self._stat, regression=trend, dist_type='DFGLS') critical_values = mackinnoncrit(regression=trend, nobs=self._nobs, dist_type='DFGLS') self._critical_values = {"1%": critical_values[0], "5%": critical_values[1], "10%": critical_values[2]}
def z_tests_vec(z: NDArray, lag: int, trend: str) -> Tuple[np.ndarray, np.ndarray]: assert z.ndim == 3 nobs = int(z.shape[1]) if trend == "c": z = demean(z) elif trend in ("ct", "ctt"): tr = add_trend(nobs=nobs, trend=trend) tr /= np.sqrt((tr**2).mean(0) * nobs) trptr = tr.T @ tr trpz = tr.T @ z z = z - tr @ solve(trptr, trpz) y = z[..., :1] x = z[..., 1:] u = y if z.shape[-1] > 1: xpx = inner_prod(x) xpx_inv = inv(xpx) b = xpx_inv @ inner_prod(x, y) u = y - x @ b nseries = u.shape[0] u = u.reshape((nseries, -1)).T ulag = u[:-1] ulead = u[1:] alpha = (ulead * ulag).mean(0) / (ulag**2).mean(0) one_sided_strict = np.zeros_like(alpha) k = ulead - ulag * alpha for i in range(1, lag + 1): w = 1 - i / (lag + 1) one_sided_strict += 1 / nobs * w * (k[i:] * k[:-i]).sum(0) u2 = (u[:-1] * u[:-1]).sum(0) z = (alpha - 1) - nobs * one_sided_strict / u2 z_a = nobs * z long_run = (k**2).sum(0) / nobs + 2 * one_sided_strict z_t = np.sqrt(u2) * z / np.sqrt(long_run) assert isinstance(z_a, np.ndarray) assert isinstance(z_t, np.ndarray) return z_a, z_t
def simulate_kpss(nobs, b, trend="c", rng=None): """ Simulated the KPSS test statistic for nobs observations, performing b replications. """ if rng is None: rng = RandomState() rng.seed(0) standard_normal = rng.standard_normal e = standard_normal((nobs, b)) z = np.ones((nobs, 1)) if trend == "ct": z = add_trend(z, trend="t") zinv = np.linalg.pinv(z) trend_coef = zinv.dot(e) resid = e - z.dot(trend_coef) s = np.cumsum(resid, axis=0) lam = np.mean(resid**2.0, axis=0) kpss = 1 / (nobs**2.0) * np.sum(s**2.0, axis=0) / lam return kpss
def test_add_trend_no_input(self): n = 100 y = add_trend(x=None, trend='ct', nobs=n) assert np.all(y[:, 0] == 1.0) assert y[0, 1] == 1.0 assert_array_almost_equal(np.diff(y[:, 1]), np.ones((n - 1)))
def test_add_trend_t(self): n = 20 x = np.zeros((20, 1)) y = add_trend(x, trend='t') assert y[0, 1] == 1.0 assert_array_almost_equal(np.diff(y[:, 1]), np.ones((n - 1)))
def test_skip_constant(self): x = np.ones((100, 1)) appended = add_trend(x, trend='c', has_constant='add') assert_array_equal(np.ones((100, 2)), appended) appended = add_trend(x, trend='c', has_constant='skip') assert_array_equal(np.ones((100, 1)), appended)
def test_add_trend_prepend(self): n = 10 x = self.rng.randn(n, 1) trend_1 = add_trend(x, trend='ct', prepend=True) trend_2 = add_trend(x, trend='ct', prepend=False) assert_equal(trend_1[:, :2], trend_2[:, 1:])
def test_add_trend_c(self): x = np.zeros((10, 1)) y = add_trend(x, trend='c') assert np.all(y[:, 1] == 1.0)
def test_skip_constant(self): x = np.ones((100, 1)) appended = add_trend(x, trend="c", has_constant="add") assert_array_equal(np.ones((100, 2)), appended) appended = add_trend(x, trend="c", has_constant="skip") assert_array_equal(np.ones((100, 1)), appended)