Пример #1
0
 def test_add_trend_prepend_dataframe(self):
     n = 10
     x = self.rng.randn(n, 1)
     x = pd.DataFrame(x, columns=["col1"])
     trend_1 = add_trend(x, trend="ct", prepend=True)
     trend_2 = add_trend(x, trend="ct", prepend=False)
     assert_frame_equal(trend_1.iloc[:, :2], trend_2.iloc[:, 1:])
Пример #2
0
 def test_add_trend_prepend_dataframe(self):
     n = 10
     x = randn(n, 1)
     x = pd.DataFrame(x, columns=['col1'])
     trend_1 = add_trend(x, trend='ct', prepend=True)
     trend_2 = add_trend(x, trend='ct', prepend=False)
     assert_frame_equal(trend_1.iloc[:, :2], trend_2.iloc[:, 1:])
Пример #3
0
 def test_add_trend_prepend_dataframe(self):
     n = 10
     x = self.rng.randn(n, 1)
     x = pd.DataFrame(x, columns=['col1'])
     trend_1 = add_trend(x, trend='ct', prepend=True)
     trend_2 = add_trend(x, trend='ct', prepend=False)
     assert_frame_equal(trend_1.iloc[:, :2], trend_2.iloc[:, 1:])
Пример #4
0
def p_tests(z: NDArray, lag: int, trend: str):
    x, y = z[:, 1:], z[:, 0]
    nobs = x.shape[0]
    x = add_trend(x, trend=trend)
    beta = lstsq(x, y, rcond=None)[0]
    u = y - x @ beta
    z_lead = z[1:]
    z_lag = add_trend(z[:-1], trend=trend)
    phi = lstsq(z_lag, z_lead, rcond=None)[0]
    xi = z_lead - z_lag @ phi

    omega = xi.T @ xi / nobs
    for i in range(1, lag + 1):
        w = 1 - i / (lag + 1)
        gamma = xi[i:].T @ xi[:-i] / nobs
        omega += w * (gamma + gamma.T)
    omega21 = omega[0, 1:]
    omega22 = omega[1:, 1:]
    omega112 = omega[0, 0] - np.squeeze(omega21.T @ inv(omega22) @ omega21)
    denom = u.T @ u / nobs
    p_u = nobs * omega112 / denom

    tr = add_trend(nobs=z.shape[0], trend=trend)
    if tr.shape[1]:
        z = z - tr @ lstsq(tr, z, rcond=None)[0]
    else:
        z = z - z[:1]  # Recenter on first
    m_zz = z.T @ z / nobs
    p_z = nobs * (omega @ inv(m_zz)).trace()
    return p_u, p_z
Пример #5
0
    def test_add_trend_duplicate_name(self):
        x = pd.DataFrame(np.zeros((10, 1)), columns=["trend"])
        with pytest.warns(ColumnNameConflict):
            add_trend(x, trend="ct")
            y = add_trend(x, trend="ct")

        assert "const" in y.columns
        assert "trend_0" in y.columns
Пример #6
0
    def fit(
        self,
        kernel: str = "bartlett",
        bandwidth: Optional[float] = None,
        force_int: bool = True,
        diff: bool = False,
        df_adjust: bool = False,
    ) -> CointegrationAnalysisResults:
        cov_est, eta, beta = self._common_fit(kernel, bandwidth, force_int,
                                              diff)
        omega = np.asarray(cov_est.cov.long_run)
        lmbda = np.asarray(cov_est.cov.one_sided)
        sigma = np.asarray(cov_est.cov.short_run)

        lmbda2 = lmbda[:, 1:]
        sigma_inv = np.linalg.inv(sigma)
        y, x = np.asarray(self._y_df), np.asarray(self._x)
        x_star = x[1:] - eta @ (sigma_inv @ lmbda2)

        kx = x.shape[1]
        omega_12 = omega[:1, 1:]
        omega_22 = omega[1:, 1:]
        omega_22_inv = np.linalg.inv(omega_22)
        bias = np.zeros((kx + 1, 1))
        bias[1:] = omega_22_inv @ omega_12.T
        # K x K        K by 1
        #  K by 1
        y_star = y[1:] - eta @ (sigma_inv @ lmbda2 @ beta[:, None] + bias)
        z_star = add_trend(x_star, trend=self._trend)
        params = np.linalg.lstsq(z_star, y_star, rcond=None)[0]

        omega_11 = omega[:1, :1]
        nobs, nvar = z_star.shape
        scale = 1.0 if not df_adjust else nobs / (nobs - nvar)
        omega_112 = scale * omega_11 - omega_12 @ omega_22_inv @ omega_12.T
        param_cov = omega_112 * np.linalg.inv(z_star.T @ z_star)
        cols = add_trend(self._x.iloc[:10], self._trend).columns
        params = pd.Series(params.squeeze(), index=cols, name="params")
        param_cov = pd.DataFrame(param_cov, columns=cols, index=cols)
        resid, r2, r2_adj = self._final_statistics(params)
        resid_kern = KERNEL_ESTIMATORS[kernel](resid,
                                               bandwidth=cov_est.bandwidth,
                                               force_int=cov_est.force_int)
        return CointegrationAnalysisResults(
            params,
            param_cov,
            resid,
            omega_112[0, 0],
            resid_kern,
            kx,
            self._trend,
            df_adjust,
            r2,
            r2_adj,
            "Fully Modified OLS",
        )
Пример #7
0
    def test_add_trend_duplicate_name(self):
        x = pd.DataFrame(np.zeros((10, 1)), columns=['trend'])
        with warnings.catch_warnings(record=True) as w:
            assert_produces_warning(add_trend(x, trend='ct'),
                                    ColumnNameConflict)
            y = add_trend(x, trend='ct')
            # should produce a single warning

        assert len(w) > 0
        assert 'const' in y.columns
        assert 'trend_0' in y.columns
Пример #8
0
    def test_add_trend_duplicate_name(self):
        x = pd.DataFrame(np.zeros((10, 1)), columns=['trend'])
        with warnings.catch_warnings(record=True) as w:
            assert_produces_warning(add_trend(x, trend='ct'),
                                    ColumnNameConflict)
            y = add_trend(x, trend='ct')
            # should produce a single warning

        assert len(w) > 0
        assert 'const' in y.columns
        assert 'trend_0' in y.columns
Пример #9
0
def _po_ptests(
    z: pd.DataFrame,
    xsection: RegressionResults,
    test_type: str,
    trend: str,
    kernel: str,
    bandwidth: Optional[int],
    force_int: bool,
) -> PhillipsOuliarisTestResults:
    nobs = z.shape[0]
    z_lead = z.iloc[1:]
    z_lag = add_trend(z.iloc[:-1], trend=trend)
    phi = np.linalg.lstsq(z_lag, z_lead, rcond=None)[0]
    xi = z_lead - np.asarray(z_lag @ phi)

    ker_est = KERNEL_ESTIMATORS[kernel]
    cov_est = ker_est(xi,
                      bandwidth=bandwidth,
                      center=False,
                      force_int=force_int)
    cov = cov_est.cov
    # Rescale to match definition in PO
    omega = (nobs - 1) / nobs * np.asarray(cov.long_run)

    u = np.asarray(xsection.resid)
    if test_type == "pu":
        denom = u.T @ u / nobs
        omega21 = omega[0, 1:]
        omega22 = omega[1:, 1:]
        omega22_inv = np.linalg.inv(omega22)
        omega112 = omega[0, 0] - np.squeeze(omega21.T @ omega22_inv @ omega21)
        test_stat = nobs * float(np.squeeze(omega112 / denom))
    else:
        # returning p_z
        _z = np.asarray(z)
        if trend != "n":
            tr = add_trend(nobs=_z.shape[0], trend=trend)
            _z = _z - tr @ np.linalg.lstsq(tr, _z, rcond=None)[0]
        else:
            _z = _z - _z[:1]  # Ensure first observation is 0
        m_zz = _z.T @ _z / nobs
        test_stat = nobs * float(
            np.squeeze((omega @ np.linalg.inv(m_zz)).trace()))
    cv = phillips_ouliaris_cv(test_type, trend, z.shape[1], z.shape[0])
    pval = phillips_ouliaris_pval(test_stat, test_type, trend, z.shape[1])
    return PhillipsOuliarisTestResults(
        test_stat,
        pval,
        cv,
        order=z.shape[1],
        xsection=xsection,
        test_type=test_type,
        kernel_est=cov_est,
    )
Пример #10
0
 def test_add_trend_ct(self):
     n = 20
     x = np.zeros((20, 1))
     y = add_trend(x, trend='ct')
     assert np.all(y[:, 1] == 1.0)
     assert_equal(y[0, 2], 1.0)
     assert_array_almost_equal(np.diff(y[:, 2]), np.ones((n - 1)))
Пример #11
0
 def test_add_time_trend_dataframe(self):
     n = 10
     x = self.rng.randn(n, 1)
     x = pd.DataFrame(x, columns=['col1'])
     trend_1 = add_trend(x, trend='t')
     assert_array_almost_equal(np.asarray(trend_1['trend']),
                               np.arange(1.0, n + 1))
Пример #12
0
 def test_add_time_trend_dataframe(self):
     n = 10
     x = randn(n, 1)
     x = pd.DataFrame(x, columns=['col1'])
     trend_1 = add_trend(x, trend='t')
     assert_array_almost_equal(np.asarray(trend_1['trend']),
                               np.arange(1.0, n + 1))
Пример #13
0
    def _format_variables(self, leads: int,
                          lags: int) -> Tuple[pd.DataFrame, pd.DataFrame]:
        """Format the variables for the regression"""
        x = self._x
        y = self._y_df
        delta_x = x.diff()
        data = [y, x]

        for lag in range(-lags, leads + 1):
            lag_data = delta_x.shift(-lag)
            typ = "LAG" if lag < 0 else "LEAD"
            lag_data.columns = [
                f"D.{c}.{typ}{abs(lag)}" for c in lag_data.columns
            ]
            if lag == 0:
                lag_data.columns = [f"D.{c}" for c in lag_data.columns]
            data.append(lag_data)
        data_df: pd.DataFrame = pd.concat(data, axis=1).dropna()
        lhs, rhs = data_df.iloc[:, :1], data_df.iloc[:, 1:]
        nrhs = rhs.shape[1]
        rhs = add_trend(rhs, trend=self._trend, prepend=True)
        ntrend = rhs.shape[1] - nrhs
        if ntrend:
            nx = x.shape[1]
            trend = rhs.iloc[:, :ntrend]
            rhs = pd.concat(
                [
                    rhs.iloc[:, ntrend:ntrend + nx], trend,
                    rhs.iloc[:, ntrend + nx:]
                ],
                axis=1,
            )
        return lhs, rhs
Пример #14
0
 def test_add_time_trend_dataframe(self):
     n = 10
     x = self.rng.randn(n, 1)
     x = pd.DataFrame(x, columns=["col1"])
     trend_1 = add_trend(x, trend="t")
     assert_array_almost_equal(np.asarray(trend_1["trend"]),
                               np.arange(1.0, n + 1))
Пример #15
0
 def _common_fit(
         self, kernel: str, bandwidth: Optional[float], force_int: bool,
         diff: bool) -> Tuple[lrcov.CovarianceEstimator, NDArray, NDArray]:
     kernel = _check_kernel(kernel)
     res = _cross_section(self._y, self._x, self._trend)
     x = np.asarray(self._x)
     eta_1 = np.asarray(res.resid)
     x_trend = self._trend if self._x_trend is None else self._x_trend
     tr = add_trend(nobs=x.shape[0], trend=x_trend)
     if tr.shape[1] > 1 and diff:
         delta_tr = np.diff(tr[:, 1:], axis=0)
         delta_x = np.diff(x, axis=0)
         gamma = np.linalg.lstsq(delta_tr, delta_x, rcond=None)[0]
         eta_2 = delta_x - delta_tr @ gamma
     else:
         if tr.shape[1]:
             gamma = np.linalg.lstsq(tr, x, rcond=None)[0]
             eps = x - tr @ gamma
         else:
             eps = x
         eta_2 = np.diff(eps, axis=0)
     eta = np.column_stack([eta_1[1:], eta_2])
     kernel = _check_kernel(kernel)
     kern_est = KERNEL_ESTIMATORS[kernel]
     cov_est = kern_est(eta,
                        bandwidth=bandwidth,
                        center=False,
                        force_int=force_int)
     beta = np.asarray(res.params)[:x.shape[1]]
     return cov_est, eta, beta
Пример #16
0
def _estimate_df_regression(y, trend, lags):
    """Helper function that estimates the core (A)DF regression

    Parameters
    ----------
    y : array
        The data for the lag selection
    trend : {'nc','c','ct','ctt'}
        The trend order
    lags : int
        The number of lags to include in the ADF regression

    Returns
    -------
    ols_res : OLSResults
        A results class object produced by OLS.fit()

    Notes
    -----
    See statsmodels.regression.linear_model.OLS for details on the results
    returned
    """
    delta_y = diff(y)

    rhs = lagmat(delta_y[:, None], lags, trim='both', original='in')
    nobs = rhs.shape[0]
    lhs = rhs[:, 0].copy()  # lag-0 values are lhs, Is copy() necessary?
    rhs[:, 0] = y[-nobs - 1:-1]  # replace lag 0 with level of y

    if trend != 'nc':
        rhs = add_trend(rhs[:, :lags + 1], trend)

    return OLS(lhs, rhs).fit()
Пример #17
0
 def test_add_trend_ct(self):
     n = 20
     x = np.zeros((20, 1))
     y = add_trend(x, trend="ct")
     assert np.all(y[:, 1] == 1.0)
     assert_equal(y[0, 2], 1.0)
     assert_array_almost_equal(np.diff(y[:, 2]), np.ones((n - 1)))
Пример #18
0
def _estimate_df_regression(y, trend, lags):
    """Helper function that estimates the core (A)DF regression

    Parameters
    ----------
    y : ndarray
        The data for the lag selection
    trend : {'nc','c','ct','ctt'}
        The trend order
    lags : int
        The number of lags to include in the ADF regression

    Returns
    -------
    ols_res : OLSResults
        A results class object produced by OLS.fit()

    Notes
    -----
    See statsmodels.regression.linear_model.OLS for details on the results
    returned
    """
    delta_y = diff(y)

    rhs = lagmat(delta_y[:, None], lags, trim='both', original='in')
    nobs = rhs.shape[0]
    lhs = rhs[:, 0].copy()  # lag-0 values are lhs, Is copy() necessary?
    rhs[:, 0] = y[-nobs - 1:-1]  # replace lag 0 with level of y
    rhs = _add_column_names(rhs, lags)

    if trend != 'nc':
        rhs = add_trend(rhs.iloc[:, :lags + 1], trend)

    return OLS(lhs, rhs).fit()
Пример #19
0
def simulate_kpss(
    nobs: int,
    b: int,
    trend: str = "c",
    rng: Optional[RandomState] = None,
) -> float:
    """
    Simulated the KPSS test statistic for nobs observations,
    performing b replications.
    """
    if rng is None:
        rng = RandomState()
        rng.seed(0)

    standard_normal = rng.standard_normal

    e = standard_normal((nobs, b))
    z = np.ones((nobs, 1))
    if trend == "ct":
        z = add_trend(z, trend="t")
    zinv = np.linalg.pinv(z)
    trend_coef = zinv.dot(e)
    resid = e - cast(np.ndarray, z.dot(trend_coef))
    s = np.cumsum(resid, axis=0)
    lam = (resid**2.0).mean(axis=0)
    kpss = 1 / (nobs**2.0) * (s**2.0).sum(axis=0) / lam
    return kpss
Пример #20
0
def _df_select_lags(y, trend, max_lags, method, low_memory=False):
    """
    Helper method to determine the best lag length in DF-like regressions

    Parameters
    ----------
    y : ndarray
        The data for the lag selection exercise
    trend : {'nc','c','ct','ctt'}
        The trend order
    max_lags : int
        The maximum number of lags to check.  This setting affects all
        estimation since the sample is adjusted by max_lags when
        fitting the models
    method : {'AIC','BIC','t-stat'}
        The method to use when estimating the model
    low_memory : bool
        Flag indicating whether to use the low-memory algorithm for
        lag-length selection.

    Returns
    -------
    best_ic : float
        The information criteria at the selected lag
    best_lag : int
        The selected lag

    Notes
    -----
    If max_lags is None, the default value of 12 * (nobs/100)**(1/4) is used.
    """
    nobs = y.shape[0]
    # This is the absolute maximum number of lags possible,
    # only needed to very short time series.
    max_max_lags = nobs // 2 - 1
    if trend != 'nc':
        max_max_lags -= len(trend)
    if max_lags is None:
        max_lags = int(ceil(12. * power(nobs / 100., 1 / 4.)))
        max_lags = max(min(max_lags, max_max_lags), 0)
    if low_memory:
        out = _autolag_ols_low_memory(y, max_lags, trend, method)
        return out

    delta_y = diff(y)
    rhs = lagmat(delta_y[:, None], max_lags, trim='both', original='in')
    nobs = rhs.shape[0]
    rhs[:, 0] = y[-nobs - 1:-1]  # replace 0 with level of y
    lhs = delta_y[-nobs:]

    if trend != 'nc':
        full_rhs = add_trend(rhs, trend, prepend=True)
    else:
        full_rhs = rhs

    start_lag = full_rhs.shape[1] - rhs.shape[1] + 1
    ic_best, best_lag = _autolag_ols(lhs, full_rhs, start_lag, max_lags,
                                     method)
    return ic_best, best_lag
Пример #21
0
def test_add_trend_ctt():
    n = 10
    x = np.zeros((n, 1))
    y = add_trend(x, trend="ctt")
    assert np.all(y[:, 1] == 1.0)
    assert y[0, 2] == 1.0
    assert_array_almost_equal(np.diff(y[:, 2]), np.ones((n - 1)))
    assert y[0, 3] == 1.0
    assert_array_almost_equal(np.diff(y[:, 3]), np.arange(3.0, 2.0 * n, 2.0))
Пример #22
0
 def test_add_trend_ctt(self):
     n = 10
     x = np.zeros((n, 1))
     y = add_trend(x, trend='ctt')
     assert np.all(y[:, 1] == 1.0)
     assert y[0, 2] == 1.0
     assert_array_almost_equal(np.diff(y[:, 2]), np.ones((n - 1)))
     assert y[0, 3] == 1.0
     assert_array_almost_equal(np.diff(y[:, 3]),
                               np.arange(3.0, 2.0 * n, 2.0))
Пример #23
0
    def _compute_statistic(self):
        """Core routine to estimate PP test statistics"""
        # 1. Estimate Regression
        y, trend = self._y, self._trend
        nobs = y.shape[0]

        if self._lags is None:
            self._lags = int(ceil(12. * power(nobs / 100., 1 / 4.)))
        lags = self._lags

        rhs = y[:-1, None]
        rhs = _add_column_names(rhs, 0)
        lhs = y[1:, None]
        if trend != 'nc':
            rhs = add_trend(rhs, trend)

        resols = OLS(lhs, rhs).fit()
        k = rhs.shape[1]
        n, u = resols.nobs, resols.resid
        lam2 = cov_nw(u, lags, demean=False)
        lam = sqrt(lam2)
        # 2. Compute components
        s2 = u.dot(u) / (n - k)
        s = sqrt(s2)
        gamma0 = s2 * (n - k) / n
        sigma = resols.bse[0]
        sigma2 = sigma**2.0
        rho = resols.params[0]
        # 3. Compute statistics
        self._stat_tau = sqrt(gamma0 / lam2) * ((rho - 1) / sigma) \
            - 0.5 * ((lam2 - gamma0) / lam) * (n * sigma / s)
        self._stat_rho = n * (rho - 1) \
            - 0.5 * (n ** 2.0 * sigma2 / s2) * (lam2 - gamma0)

        self._nobs = int(resols.nobs)
        if self._test_type == 'rho':
            self._stat = self._stat_rho
            dist_type = 'ADF-z'
        else:
            self._stat = self._stat_tau
            dist_type = 'ADF-t'

        self._pvalue = mackinnonp(self._stat,
                                  regression=trend,
                                  dist_type=dist_type)
        critical_values = mackinnoncrit(regression=trend,
                                        nobs=n,
                                        dist_type=dist_type)
        self._critical_values = {
            "1%": critical_values[0],
            "5%": critical_values[1],
            "10%": critical_values[2]
        }

        self._title = self._test_name + ' (Z-' + self._test_type + ')'
Пример #24
0
    def _compute_statistic(self):
        """Core routine to estimate PP test statistics"""
        # 1. Estimate Regression
        y, trend = self._y, self._trend
        nobs = y.shape[0]

        if self._lags is None:
            self._lags = int(ceil(12. * power(nobs / 100., 1 / 4.)))
        lags = self._lags

        rhs = y[:-1, None]
        lhs = y[1:, None]
        if trend != 'nc':
            rhs = add_trend(rhs, trend)

        resols = OLS(lhs, rhs).fit()
        k = rhs.shape[1]
        n, u = resols.nobs, resols.resid
        lam2 = cov_nw(u, lags, demean=False)
        lam = sqrt(lam2)
        # 2. Compute components
        s2 = u.dot(u) / (n - k)
        s = sqrt(s2)
        gamma0 = s2 * (n - k) / n
        sigma = resols.bse[0]
        sigma2 = sigma ** 2.0
        rho = resols.params[0]
        # 3. Compute statistics
        self._stat_tau = sqrt(gamma0 / lam2) * ((rho - 1) / sigma) \
            - 0.5 * ((lam2 - gamma0) / lam) * (n * sigma / s)
        self._stat_rho = n * (rho - 1) \
            - 0.5 * (n ** 2.0 * sigma2 / s2) * (lam2 - gamma0)

        self._nobs = int(resols.nobs)
        if self._test_type == 'rho':
            self._stat = self._stat_rho
            dist_type = 'ADF-z'
        else:
            self._stat = self._stat_tau
            dist_type = 'ADF-t'

        self._pvalue = mackinnonp(self._stat,
                                  regression=trend,
                                  dist_type=dist_type)
        critical_values = mackinnoncrit(regression=trend,
                                        nobs=n,
                                        dist_type=dist_type)
        self._critical_values = {"1%": critical_values[0],
                                 "5%": critical_values[1],
                                 "10%": critical_values[2]}

        self._title = self._test_name + ' (Z-' + self._test_type + ')'
Пример #25
0
def _cross_section(y: ArrayLike1D, x: ArrayLike2D,
                   trend: str) -> RegressionResults:
    if trend not in ("n", "c", "ct", "ctt"):
        raise ValueError('trend must be one of "n", "c", "ct" or "ctt"')
    y = ensure1d(y, "y", True)
    x = ensure2d(x, "x")

    if not isinstance(x, pd.DataFrame):
        cols = [f"x{i}" for i in range(1, x.shape[1] + 1)]
        x = pd.DataFrame(x, columns=cols, index=y.index)
    x = add_trend(x, trend)
    res = OLS(y, x).fit()
    return res
Пример #26
0
 def test_errors(self):
     n = 100
     with pytest.raises(ValueError):
         add_trend(x=None, trend='unknown', nobs=n)
     with pytest.raises(ValueError):
         add_trend(x=None, trend='ct')
     x = np.ones((100, 1))
     with pytest.raises(ValueError):
         add_trend(x, trend='ct', has_constant='raise')
Пример #27
0
 def test_errors(self):
     n = 100
     with pytest.raises(ValueError):
         add_trend(x=None, trend='unknown', nobs=n)
     with pytest.raises(ValueError):
         add_trend(x=None, trend='ct')
     x = np.ones((100, 1))
     with pytest.raises(ValueError):
         add_trend(x, trend='ct', has_constant='raise')
Пример #28
0
 def test_errors(self):
     n = 100
     with pytest.raises(ValueError):
         add_trend(x=None, trend="unknown", nobs=n)
     with pytest.raises(ValueError):
         add_trend(x=None, trend="ct")
     x = np.ones((100, 1))
     with pytest.raises(ValueError):
         add_trend(x, trend="ct", has_constant="raise")
Пример #29
0
def _df_select_lags(y, trend, max_lags, method):
    """
    Helper method to determine the best lag length in DF-like regressions

    Parameters
    ----------
    y : ndarray
        The data for the lag selection exercise
    trend : {'nc','c','ct','ctt'}
        The trend order
    max_lags : int
        The maximum number of lags to check.  This setting affects all
        estimation since the sample is adjusted by max_lags when
        fitting the models
    method : {'AIC','BIC','t-stat'}
        The method to use when estimating the model

    Returns
    -------
    best_ic : float
        The information criteria at the selected lag
    best_lag : int
        The selected lag

    Notes
    -----
    If max_lags is None, the default value of 12 * (nobs/100)**(1/4) is used.
    """
    nobs = y.shape[0]
    delta_y = diff(y)

    if max_lags is None:
        max_lags = int(ceil(12. * power(nobs / 100., 1 / 4.)))

    rhs = lagmat(delta_y[:, None], max_lags, trim='both', original='in')
    nobs = rhs.shape[0]
    rhs[:, 0] = y[-nobs - 1:-1]  # replace 0 with level of y
    lhs = delta_y[-nobs:]

    if trend != 'nc':
        full_rhs = add_trend(rhs, trend, prepend=True)
    else:
        full_rhs = rhs

    start_lag = full_rhs.shape[1] - rhs.shape[1] + 1
    ic_best, best_lag = _autolag_ols(lhs, full_rhs, start_lag, max_lags,
                                     method)

    return ic_best, best_lag
Пример #30
0
def _df_select_lags(y, trend, max_lags, method):
    """
    Helper method to determine the best lag length in DF-like regressions

    Parameters
    ----------
    y : array
        The data for the lag selection exercise
    trend : {'nc','c','ct','ctt'}
        The trend order
    max_lags : int
        The maximum number of lags to check.  This setting affects all
        estimation since the sample is adjusted by max_lags when
        fitting the models
    method : {'AIC','BIC','t-stat'}
        The method to use when estimating the model

    Returns
    -------
    best_ic : float
        The information criteria at the selected lag
    best_lag : int
        The selected lag

    Notes
    -----
    If max_lags is None, the default value of 12 * (nobs/100)**(1/4) is used.
    """
    nobs = y.shape[0]
    delta_y = diff(y)

    if max_lags is None:
        max_lags = int(ceil(12. * power(nobs / 100., 1 / 4.)))

    rhs = lagmat(delta_y[:, None], max_lags, trim='both', original='in')
    nobs = rhs.shape[0]
    rhs[:, 0] = y[-nobs - 1:-1]  # replace 0 with level of y
    lhs = delta_y[-nobs:]

    if trend != 'nc':
        full_rhs = add_trend(rhs, trend, prepend=True)
    else:
        full_rhs = rhs

    start_lag = full_rhs.shape[1] - rhs.shape[1] + 1
    ic_best, best_lag = _autolag_ols(lhs, full_rhs, start_lag, max_lags, method)

    return ic_best, best_lag
Пример #31
0
 def _final_statistics(self, theta: pd.Series) -> Tuple[pd.Series, float, float]:
     z = add_trend(self._x, self._trend)
     nobs, nvar = z.shape
     resid = self._y - np.asarray(z @ theta)
     resid.name = "resid"
     center = 0.0
     tss_df = 0
     if "c" in self._trend:
         center = self._y.mean()
         tss_df = 1
     y_centered = self._y - center
     ssr = resid.T @ resid
     tss = y_centered.T @ y_centered
     r2 = 1.0 - ssr / tss
     r2_adj = 1.0 - (ssr / (nobs - nvar)) / (tss / (nobs - tss_df))
     return resid, r2, r2_adj
Пример #32
0
def p_tests_vec(z: NDArray, lag: int,
                trend: str) -> Tuple[np.ndarray, np.ndarray]:
    assert z.ndim == 3
    z_lag, z_lead = z[:, :-1], z[:, 1:]
    nobs = z.shape[1]
    if trend == "c":
        z = demean(z)
        z_lag = demean(z_lag)
        z_lead = demean(z_lead)
    elif trend in ("ct", "ctt"):
        post = []
        for v in (z, z_lag, z_lead):
            tr = add_trend(nobs=v.shape[1], trend=trend)
            tr /= np.sqrt((tr**2).mean(0) * nobs)
            trptr = tr.T @ tr
            trpv = tr.T @ v
            post.append(v - tr @ solve(trptr, trpv))
        z, z_lag, z_lead = post
    else:
        z = z - z[:, :1]

    x, y = z[..., 1:], z[..., :1]
    u = y
    if x.shape[-1]:
        beta = solve(inner_prod(x), inner_prod(x, y))
        u = y - x @ beta
    phi = solve(inner_prod(z_lag), inner_prod(z_lag, z_lead))
    xi = z_lead - z_lag @ phi

    omega = inner_prod(xi) / nobs
    for i in range(1, lag + 1):
        w = 1 - i / (lag + 1)
        gamma = inner_prod(xi[:, i:], xi[:, :-i]) / nobs
        omega += w * (gamma + cast(np.ndarray, gamma).transpose((0, 2, 1)))
    omega21 = omega[:, :1, 1:]
    omega22 = omega[:, 1:, 1:]
    omega112 = omega[:, :1, :1] - omega21 @ inv(omega22) @ omega21.transpose(
        (0, 2, 1))
    denom = inner_prod(u) / nobs
    p_u = nobs * np.squeeze(omega112 / denom)

    # z detrended above
    m_zz = inner_prod(z) / nobs
    # ufunc trace using einsum
    p_z = nobs * np.einsum("...ii", omega @ inv(m_zz))

    return p_u, p_z
Пример #33
0
 def _compute_statistic(self):
     # 1. Estimate model with trend
     nobs, y, trend = self._nobs, self._y, self._trend
     z = add_trend(nobs=nobs, trend=trend)
     res = OLS(y, z).fit()
     # 2. Compute KPSS test
     u = res.resid
     if self._lags is None:
         self._lags = int(ceil(12. * power(nobs / 100., 1 / 4.)))
     lam = cov_nw(u, self._lags, demean=False)
     s = cumsum(u)
     self._stat = 1 / (nobs ** 2.0) * sum(s ** 2.0) / lam
     self._nobs = u.shape[0]
     self._pvalue, critical_values = kpss_crit(self._stat, trend)
     self._critical_values = {"1%": critical_values[0],
                              "5%": critical_values[1],
                              "10%": critical_values[2]}
Пример #34
0
    def _compute_statistic(self):
        """Core routine to estimate DF-GLS test statistic"""
        # 1. GLS detrend
        trend, c = self._trend, self._c

        nobs = self._y.shape[0]
        ct = c / nobs
        z = add_trend(nobs=nobs, trend=trend)

        delta_z = z.copy()
        delta_z[1:, :] = delta_z[1:, :] - (1 + ct) * delta_z[:-1, :]
        delta_y = self._y.copy()[:, None]
        delta_y[1:] = delta_y[1:] - (1 + ct) * delta_y[:-1]
        detrend_coef = pinv(delta_z).dot(delta_y)
        y = self._y
        y_detrended = y - z.dot(detrend_coef).ravel()

        # 2. determine lag length, if needed
        if self._lags is None:
            max_lags, method = self._max_lags, self._method
            icbest, bestlag = _df_select_lags(y_detrended,
                                              'nc',
                                              max_lags,
                                              method,
                                              low_memory=self._low_memory)
            self._lags = bestlag

        # 3. Run Regression
        lags = self._lags

        resols = _estimate_df_regression(y_detrended, lags=lags, trend='nc')
        self._regression = resols
        self._nobs = int(resols.nobs)
        self._stat = resols.tvalues[0]
        self._pvalue = mackinnonp(self._stat,
                                  regression=trend,
                                  dist_type='DFGLS')
        critical_values = mackinnoncrit(regression=trend,
                                        nobs=self._nobs,
                                        dist_type='DFGLS')
        self._critical_values = {
            "1%": critical_values[0],
            "5%": critical_values[1],
            "10%": critical_values[2]
        }
Пример #35
0
def z_tests(z: NDArray, lag: int, trend: str):
    z = add_trend(z, trend=trend)
    u = z
    if z.shape[1] > 1:
        u = z[:, 0] - z[:, 1:] @ lstsq(z[:, 1:], z[:, 0], rcond=None)[0]
    alpha = (u[:-1].T @ u[1:]) / (u[:-1].T @ u[:-1])
    k = u[1:] - alpha * u[:-1]
    nobs = u.shape[0]
    one_sided_strict = 0.0
    for i in range(1, lag + 1):
        w = 1 - i / (lag + 1)
        one_sided_strict += 1 / nobs * w * k[i:].T @ k[:-i]
    u2 = u[:-1].T @ u[:-1]

    z = (alpha - 1) - nobs * one_sided_strict / u2
    z_a = nobs * z
    long_run = k.T @ k / nobs + 2 * one_sided_strict
    z_t = np.sqrt(u2) * z / long_run
    return z_a, z_t
Пример #36
0
def z_tests(z: NDArray, lag: int, trend: str) -> Tuple[float, float]:
    z = add_trend(z, trend=trend)
    u = z
    if z.shape[1] > 1:
        delta = np.linalg.lstsq(z[:, 1:], z[:, 0], rcond=None)[0]
        u = z[:, 0] - z[:, 1:] @ delta
    alpha = (u[:-1].T @ u[1:]) / (u[:-1].T @ u[:-1])
    k = u[1:] - alpha * u[:-1]
    nobs = u.shape[0]
    one_sided_strict = 0.0
    for i in range(1, lag + 1):
        w = 1 - i / (lag + 1)
        one_sided_strict += 1 / nobs * w * k[i:].T @ k[:-i]
    u2 = u[:-1].T @ u[:-1]
    z = (alpha - 1) - nobs * one_sided_strict / u2
    z_a = nobs * z
    long_run = k.T @ k / nobs + 2 * one_sided_strict
    se = np.sqrt(long_run / u2)
    z_t = z / se
    return float(z_a), float(z_t)
Пример #37
0
    def _compute_statistic(self):
        """Core routine to estimate DF-GLS test statistic"""
        # 1. GLS detrend
        trend, c = self._trend, self._c

        nobs = self._y.shape[0]
        ct = c / nobs
        z = add_trend(nobs=nobs, trend=trend)

        delta_z = z.copy()
        delta_z[1:, :] = delta_z[1:, :] - (1 + ct) * delta_z[:-1, :]
        delta_y = self._y.copy()[:, None]
        delta_y[1:] = delta_y[1:] - (1 + ct) * delta_y[:-1]
        detrend_coef = pinv(delta_z).dot(delta_y)
        y = self._y
        y_detrended = y - z.dot(detrend_coef).ravel()

        # 2. determine lag length, if needed
        if self._lags is None:
            max_lags, method = self._max_lags, self._method
            icbest, bestlag = _df_select_lags(y_detrended, 'nc', max_lags, method)
            self._lags = bestlag

        # 3. Run Regression
        lags = self._lags

        resols = _estimate_df_regression(y_detrended,
                                         lags=lags,
                                         trend='nc')
        self._regression = resols
        self._nobs = int(resols.nobs)
        self._stat = resols.tvalues[0]
        self._pvalue = mackinnonp(self._stat,
                                  regression=trend,
                                  dist_type='DFGLS')
        critical_values = mackinnoncrit(regression=trend,
                                        nobs=self._nobs,
                                        dist_type='DFGLS')
        self._critical_values = {"1%": critical_values[0],
                                 "5%": critical_values[1],
                                 "10%": critical_values[2]}
Пример #38
0
def z_tests_vec(z: NDArray, lag: int,
                trend: str) -> Tuple[np.ndarray, np.ndarray]:
    assert z.ndim == 3
    nobs = int(z.shape[1])
    if trend == "c":
        z = demean(z)
    elif trend in ("ct", "ctt"):
        tr = add_trend(nobs=nobs, trend=trend)
        tr /= np.sqrt((tr**2).mean(0) * nobs)
        trptr = tr.T @ tr
        trpz = tr.T @ z
        z = z - tr @ solve(trptr, trpz)
    y = z[..., :1]
    x = z[..., 1:]
    u = y
    if z.shape[-1] > 1:
        xpx = inner_prod(x)
        xpx_inv = inv(xpx)
        b = xpx_inv @ inner_prod(x, y)
        u = y - x @ b
    nseries = u.shape[0]
    u = u.reshape((nseries, -1)).T
    ulag = u[:-1]
    ulead = u[1:]
    alpha = (ulead * ulag).mean(0) / (ulag**2).mean(0)
    one_sided_strict = np.zeros_like(alpha)
    k = ulead - ulag * alpha
    for i in range(1, lag + 1):
        w = 1 - i / (lag + 1)
        one_sided_strict += 1 / nobs * w * (k[i:] * k[:-i]).sum(0)

    u2 = (u[:-1] * u[:-1]).sum(0)
    z = (alpha - 1) - nobs * one_sided_strict / u2
    z_a = nobs * z
    long_run = (k**2).sum(0) / nobs + 2 * one_sided_strict
    z_t = np.sqrt(u2) * z / np.sqrt(long_run)
    assert isinstance(z_a, np.ndarray)
    assert isinstance(z_t, np.ndarray)
    return z_a, z_t
Пример #39
0
def simulate_kpss(nobs, b, trend="c", rng=None):
    """
    Simulated the KPSS test statistic for nobs observations,
    performing b replications.
    """
    if rng is None:
        rng = RandomState()
        rng.seed(0)

    standard_normal = rng.standard_normal

    e = standard_normal((nobs, b))
    z = np.ones((nobs, 1))
    if trend == "ct":
        z = add_trend(z, trend="t")
    zinv = np.linalg.pinv(z)
    trend_coef = zinv.dot(e)
    resid = e - z.dot(trend_coef)
    s = np.cumsum(resid, axis=0)
    lam = np.mean(resid**2.0, axis=0)
    kpss = 1 / (nobs**2.0) * np.sum(s**2.0, axis=0) / lam
    return kpss
Пример #40
0
 def test_add_trend_no_input(self):
     n = 100
     y = add_trend(x=None, trend='ct', nobs=n)
     assert np.all(y[:, 0] == 1.0)
     assert y[0, 1] == 1.0
     assert_array_almost_equal(np.diff(y[:, 1]), np.ones((n - 1)))
Пример #41
0
 def test_add_trend_t(self):
     n = 20
     x = np.zeros((20, 1))
     y = add_trend(x, trend='t')
     assert y[0, 1] == 1.0
     assert_array_almost_equal(np.diff(y[:, 1]), np.ones((n - 1)))
Пример #42
0
 def test_skip_constant(self):
     x = np.ones((100, 1))
     appended = add_trend(x, trend='c', has_constant='add')
     assert_array_equal(np.ones((100, 2)), appended)
     appended = add_trend(x, trend='c', has_constant='skip')
     assert_array_equal(np.ones((100, 1)), appended)
Пример #43
0
 def test_add_trend_prepend(self):
     n = 10
     x = self.rng.randn(n, 1)
     trend_1 = add_trend(x, trend='ct', prepend=True)
     trend_2 = add_trend(x, trend='ct', prepend=False)
     assert_equal(trend_1[:, :2], trend_2[:, 1:])
Пример #44
0
 def test_add_trend_c(self):
     x = np.zeros((10, 1))
     y = add_trend(x, trend='c')
     assert np.all(y[:, 1] == 1.0)
Пример #45
0
 def test_skip_constant(self):
     x = np.ones((100, 1))
     appended = add_trend(x, trend="c", has_constant="add")
     assert_array_equal(np.ones((100, 2)), appended)
     appended = add_trend(x, trend="c", has_constant="skip")
     assert_array_equal(np.ones((100, 1)), appended)