def kpss(x, regression='c', lags=None, store=False): """ Kwiatkowski-Phillips-Schmidt-Shin test for stationarity. Computes the Kwiatkowski-Phillips-Schmidt-Shin (KPSS) test for the null hypothesis that x is level or trend stationary. Parameters ---------- x : array_like, 1d Data series regression : str{'c', 'ct'} Indicates the null hypothesis for the KPSS test * 'c' : The data is stationary around a constant (default) * 'ct' : The data is stationary around a trend lags : int Indicates the number of lags to be used. If None (default), lags is set to int(12 * (n / 100)**(1 / 4)), as outlined in Schwert (1989). store : bool If True, then a result instance is returned additionally to the KPSS statistic (default is False). Returns ------- kpss_stat : float The KPSS test statistic p_value : float The p-value of the test. The p-value is interpolated from Table 1 in Kwiatkowski et al. (1992), and a boundary point is returned if the test statistic is outside the table of critical values, that is, if the p-value is outside the interval (0.01, 0.1). lags : int The truncation lag parameter crit : dict The critical values at 10%, 5%, 2.5% and 1%. Based on Kwiatkowski et al. (1992). resstore : (optional) instance of ResultStore An instance of a dummy class with results attached as attributes Notes ----- To estimate sigma^2 the Newey-West estimator is used. If lags is None, the truncation lag parameter is set to int(12 * (n / 100) ** (1 / 4)), as outlined in Schwert (1989). The p-values are interpolated from Table 1 of Kwiatkowski et al. (1992). If the computed statistic is outside the table of critical values, then a warning message is generated. Missing values are not handled. References ---------- D. Kwiatkowski, P. C. B. Phillips, P. Schmidt, and Y. Shin (1992): Testing the Null Hypothesis of Stationarity against the Alternative of a Unit Root. `Journal of Econometrics` 54, 159-178. """ from warnings import warn nobs = len(x) x = np.asarray(x) hypo = regression.lower() # if m is not one, n != m * n if nobs != x.size: raise ValueError("x of shape {0} not understood".format(x.shape)) if hypo == 'ct': # p. 162 Kwiatkowski et al. (1992): y_t = beta * t + r_t + e_t, # where beta is the trend, r_t a random walk and e_t a stationary # error term. resids = OLS(x, add_constant(np.arange(1, nobs + 1))).fit().resid crit = [0.119, 0.146, 0.176, 0.216] elif hypo == 'c': # special case of the model above, where beta = 0 (so the null # hypothesis is that the data is stationary around r_0). resids = x - x.mean() crit = [0.347, 0.463, 0.574, 0.739] else: raise ValueError("hypothesis '{0}' not understood".format(hypo)) if lags is None: # from Kwiatkowski et al. referencing Schwert (1989) lags = int(np.ceil(12. * np.power(nobs / 100., 1 / 4.))) pvals = [0.10, 0.05, 0.025, 0.01] eta = sum(resids.cumsum()**2) / (nobs**2) # eq. 11, p. 165 s_hat = _sigma_est_kpss(resids, nobs, lags) kpss_stat = eta / s_hat p_value = np.interp(kpss_stat, crit, pvals) if p_value == pvals[-1]: warn("p-value is smaller than the indicated p-value", InterpolationWarning) elif p_value == pvals[0]: warn("p-value is greater than the indicated p-value", InterpolationWarning) crit_dict = {'10%': crit[0], '5%': crit[1], '2.5%': crit[2], '1%': crit[3]} if store: rstore = ResultsStore() rstore.lags = lags rstore.nobs = nobs stationary_type = "level" if hypo == 'c' else "trend" rstore.H0 = "The series is {0} stationary".format(stationary_type) rstore.HA = "The series is not {0} stationary".format(stationary_type) return kpss_stat, p_value, crit_dict, rstore else: return kpss_stat, p_value, lags, crit_dict
def run(self, x, arlags=1, regression='c', method='mle', varest='var94'): """ Leybourne-McCabe stationarity test The Leybourne-McCabe test can be used to test for stationarity in a univariate process. Parameters ---------- x : array_like data series arlags : int number of autoregressive terms to include, default=None regression : {'c','ct'} Constant and trend order to include in regression * 'c' : constant only (default) * 'ct' : constant and trend method : {'mle','ols'} Method used to estimate ARIMA(p, 1, 1) filter model * 'mle' : condition sum of squares maximum likelihood (default) * 'ols' : two-stage least squares varest : {'var94','var99'} Method used for residual variance estimation * 'var94' : method used in original Leybourne-McCabe paper (1994) (default) * 'var99' : method used in follow-up paper (1999) Returns ------- lmstat : float test statistic pvalue : float based on MC-derived critical values arlags : int AR(p) order used to create the filtered series cvdict : dict critical values for the test statistic at the 1%, 5%, and 10% levels Notes ----- H0 = series is stationary Basic process is to create a filtered series which removes the AR(p) effects from the series under test followed by an auxiliary regression similar to that of Kwiatkowski et al (1992). The AR(p) coefficients are obtained by estimating an ARIMA(p, 1, 1) model. Two methods are provided for ARIMA estimation: MLE and two-stage least squares. Two methods are provided for residual variance estimation used in the calculation of the test statistic. The first method ('var94') is the mean of the squared residuals from the filtered regression. The second method ('var99') is the MA(1) coefficient times the mean of the squared residuals from the ARIMA(p, 1, 1) filtering model. An empirical autolag procedure is provided. In this context, the number of lags is equal to the number of AR(p) terms used in the filtering step. The number of AR(p) terms is set equal to the to the first PACF falling within the 95% confidence interval. Maximum nuber of AR lags is limited to 1/2 series length. References ---------- Kwiatkowski, D., Phillips, P.C.B., Schmidt, P. & Shin, Y. (1992). Testing the null hypothesis of stationarity against the alternative of a unit root. Journal of Econometrics, 54: 159–178. Leybourne, S.J., & McCabe, B.P.M. (1994). A consistent test for a unit root. Journal of Business and Economic Statistics, 12: 157–166. Leybourne, S.J., & McCabe, B.P.M. (1999). Modified stationarity tests with data-dependent model-selection rules. Journal of Business and Economic Statistics, 17: 264-270. Schwert, G W. (1987). Effects of model specification on tests for unit roots in macroeconomic data. Journal of Monetary Economics, 20: 73–103. """ if regression not in ['c', 'ct']: raise ValueError('LM: regression option \'%s\' not understood' % regression) if method not in ['mle', 'ols']: raise ValueError('LM: method option \'%s\' not understood' % method) if varest not in ['var94', 'var99']: raise ValueError('LM: varest option \'%s\' not understood' % varest) x = np.asarray(x) if x.ndim > 2 or (x.ndim == 2 and x.shape[1] != 1): raise ValueError( 'LM: x must be a 1d array or a 2d array with a single column') x = np.reshape(x, (-1, 1)) # determine AR order if not specified if arlags == None: arlags = self._autolag(x) elif not isinstance(arlags, int) or arlags < 1 or arlags > int(len(x) / 2): raise ValueError('LM: arlags must be an integer in range [1..%s]' % str(int(len(x) / 2))) # estimate the reduced ARIMA(p, 1, 1) model if method == 'mle': arfit = ARIMA(x, order=(arlags, 1, 1), trend=regression).fit() resids = arfit.resid arcoeffs = arfit.arparams theta = arfit.maparams[0] else: arcoeffs, theta, resids = self._tsls_arima(x, arlags, model=regression) # variance estimator from (1999) LM paper var99 = abs(theta * np.sum(resids**2) / len(resids)) # create the filtered series: # z(t) = x(t) - arcoeffs[0]*x(t-1) - ... - arcoeffs[p-1]*x(t-p) z = np.full(len(x) - arlags, np.inf) for i in range(len(z)): z[i] = x[i + arlags] for j in range(len(arcoeffs)): z[i] -= arcoeffs[j] * x[i + arlags - j - 1] # regress the filtered series against a constant and # trend term (if requested) if regression == 'c': resids = z - z.mean() else: resids = OLS(z, add_constant(np.arange(1, len(z) + 1))).fit().resid # variance estimator from (1994) LM paper var94 = np.sum(resids**2) / len(resids) # compute test statistic with specified variance estimator eta = np.sum(resids.cumsum()**2) / (len(resids)**2) if varest == 'var99': lmstat = eta / var99 else: lmstat = eta / var94 # calculate pval crit = self.__leybourne_crit(lmstat, regression) lmpval = crit[0] cvdict = crit[1] return lmstat, lmpval, arlags, cvdict