def kernel_optimal_bandwidth(x: Float64Array, kernel: str = "bartlett") -> int: """ Parameters x : ndarray Array of data to use when computing optimal bandwidth kernel : str, optional Name of kernel to use. Supported kernels include: * 'bartlett', 'newey-west' : Bartlett's kernel * 'parzen', 'gallant' : Parzen's kernel * 'qs', 'quadratic-spectral', 'andrews' : Quadratic spectral kernel Returns ------- int Optimal bandwidth. Set to nobs - 1 if computed bandwidth is larger. Notes ----- .. todo:: * Explain mathematics involved * References See Also -------- linearmodels.iv.covariance.kernel_weight_bartlett, linearmodels.iv.covariance.kernel_weight_parzen, linearmodels.iv.covariance.kernel_weight_quadratic_spectral """ t = x.shape[0] x = x.squeeze() if kernel in ("bartlett", "newey-west"): q, c = 1, 1.1447 m_star = int(ceil(4 * (t / 100)**(2 / 9))) elif kernel in ("qs", "andrews", "quadratic-spectral"): q, c = 2, 1.3221 m_star = int(ceil(4 * (t / 100)**(2 / 25))) elif kernel in ("gallant", "parzen"): q, c = 2, 2.6614 m_star = int(ceil(4 * (t / 100)**(4 / 25))) else: raise ValueError("Unknown kernel: {0}".format(kernel)) sigma = empty(m_star + 1) sigma[0] = x.T @ x / t for i in range(1, m_star + 1): sigma[i] = x[i:].T @ x[:-i] / t s0 = sigma[0] + 2 * sigma[1:].sum() sq = 2 * npsum(sigma[1:] * arange(1, m_star + 1)**q) rate = 1 / (2 * q + 1) gamma = c * ((sq / s0)**2)**rate m = gamma * t**rate return min(int(ceil(m)), t - 1)
def _post_estimation( self, params: Float64Array, cov_estimator: Union[HomoskedasticCovariance, HeteroskedasticCovariance, KernelCovariance, ClusteredCovariance, ], cov_type: str, ) -> Dict[str, Any]: columns = self._columns index = self._index eps = self.resids(params) fitted_values = self._dependent.ndarray - eps fitted = DataFrameWrapper( fitted_values, index=self._dependent.rows, columns=["fitted_values"], ) assert isinstance(self._absorbed_dependent, DataFrame) absorbed_effects = DataFrameWrapper( self._absorbed_dependent.to_numpy() - fitted_values, columns=["absorbed_effects"], index=self._dependent.rows, ) weps = self.wresids(params) cov = cov_estimator.cov debiased = cov_estimator.debiased residual_ss = (weps.T @ weps)[0, 0] w = self.weights.ndarray root_w = sqrt(w) e = self._dependent.ndarray * root_w if self.has_constant: e = e - root_w * average(self._dependent.ndarray, weights=w) total_ss = float(e.T @ e) r2 = max(1 - residual_ss / total_ss, 0.0) e = self._absorbed_dependent.to_numpy() # already scaled by root_w # If absorbing contains a constant, but exog does not, no need to demean assert isinstance(self._absorbed_exog, DataFrame) if self._const_col is not None: col = self._const_col x = self._absorbed_exog.to_numpy()[:, col:col + 1] mu = (lstsq(x, e, rcond=None)[0]).squeeze() e = e - x * mu aborbed_total_ss = float(e.T @ e) r2_absorbed = max(1 - residual_ss / aborbed_total_ss, 0.0) fstat = self._f_statistic(params, cov, debiased) out = { "params": Series(params.squeeze(), columns, name="parameter"), "eps": SeriesWrapper(eps.squeeze(), index=index, name="residual"), "weps": SeriesWrapper(weps.squeeze(), index=index, name="weighted residual"), "cov": DataFrame(cov, columns=columns, index=columns), "s2": float(cov_estimator.s2), "debiased": debiased, "residual_ss": float(residual_ss), "total_ss": float(total_ss), "r2": float(r2), "fstat": fstat, "vars": columns, "instruments": [], "cov_config": cov_estimator.config, "cov_type": cov_type, "method": self._method, "cov_estimator": cov_estimator, "fitted": fitted, "original_index": self._original_index, "absorbed_effects": absorbed_effects, "absorbed_r2": r2_absorbed, } return out