def test_string(): assert get_string({"v": "1"}, "v") == "1" assert get_string({"v": True}, "a") is None with pytest.raises(TypeError, match=r".not a str"): get_string({"v": 1.0}, "v") with pytest.raises(TypeError, match=r".not a str"): get_string({"v": b"1"}, "v") assert get_string({"v": None}, "v") is None
def test_float(): assert get_float({"v": True}, "v") == 1.0 assert get_float({"v": True}, "a") is None assert get_string({"v": None}, "v") is None with pytest.raises(TypeError, match=r".not a float"): get_float({"v": "1.0"}, "v")
def setup_covariance_estimator( cov_estimators: CovarianceManager, cov_type: str, y: NDArray, x: NDArray, params: NDArray, entity_ids: NDArray, time_ids: NDArray, *, debiased: bool = False, extra_df: int = 0, **cov_config: Any, ) -> Union[HomoskedasticCovariance]: estimator = cov_estimators[cov_type] kernel = get_string(cov_config, "kernel") bandwidth = get_float(cov_config, "bandwidth") group_debias = get_bool(cov_config, "group_debias") clusters = get_array_like(cov_config, "clusters") if estimator is HomoskedasticCovariance: return HomoskedasticCovariance(y, x, params, entity_ids, time_ids, debiased=debiased, extra_df=extra_df) elif estimator is HeteroskedasticCovariance: return HeteroskedasticCovariance(y, x, params, entity_ids, time_ids, debiased=debiased, extra_df=extra_df) elif estimator is ClusteredCovariance: return ClusteredCovariance( y, x, params, entity_ids, time_ids, debiased=debiased, extra_df=extra_df, clusters=clusters, group_debias=group_debias, ) elif estimator is DriscollKraay: return DriscollKraay( y, x, params, entity_ids, time_ids, debiased=debiased, extra_df=extra_df, kernel=kernel, bandwidth=bandwidth, ) else: # ACCovariance: return ACCovariance( y, x, params, entity_ids, time_ids, debiased=debiased, extra_df=extra_df, kernel=kernel, bandwidth=bandwidth, )
def setup_covariance_estimator( cov_estimators: CovarianceManager, cov_type: str, y: Float64Array, x: Float64Array, params: Float64Array, entity_ids: IntArray, time_ids: IntArray, *, debiased: bool = False, extra_df: int = 0, **cov_config: Any, ) -> Union[HomoskedasticCovariance]: estimator = cov_estimators[cov_type] unknown_kwargs = [ str(key) for key in cov_config if str(key) not in estimator.ALLOWED_KWARGS ] if unknown_kwargs: if estimator.ALLOWED_KWARGS: allowed = ", ".join(estimator.ALLOWED_KWARGS) kwarg_err = f"only supports the keyword arguments: {allowed}" else: kwarg_err = "does not support any keyword arguments" msg = ( f"Covariance estimator {estimator.__name__} {kwarg_err}. Unknown keyword " f"arguments were passed to the estimator. The unknown keyword argument(s) " f"are: {', '.join(unknown_kwargs)} ") raise ValueError(msg) kernel = get_string(cov_config, "kernel") bandwidth = get_float(cov_config, "bandwidth") group_debias = get_bool(cov_config, "group_debias") clusters = get_array_like(cov_config, "clusters") if estimator is HomoskedasticCovariance: return HomoskedasticCovariance(y, x, params, entity_ids, time_ids, debiased=debiased, extra_df=extra_df) elif estimator is HeteroskedasticCovariance: return HeteroskedasticCovariance(y, x, params, entity_ids, time_ids, debiased=debiased, extra_df=extra_df) elif estimator is ClusteredCovariance: return ClusteredCovariance( y, x, params, entity_ids, time_ids, debiased=debiased, extra_df=extra_df, clusters=clusters, group_debias=group_debias, ) elif estimator is DriscollKraay: return DriscollKraay( y, x, params, entity_ids, time_ids, debiased=debiased, extra_df=extra_df, kernel=kernel, bandwidth=bandwidth, ) else: # ACCovariance: return ACCovariance( y, x, params, entity_ids, time_ids, debiased=debiased, extra_df=extra_df, kernel=kernel, bandwidth=bandwidth, )
def fit( self, center: bool = True, use_cue: bool = False, steps: int = 2, disp: int = 10, max_iter: int = 1000, cov_type: str = "robust", debiased: bool = True, **cov_config: Union[bool, int, str], ) -> GMMFactorModelResults: """ Estimate model parameters Parameters ---------- center : bool, optional Flag indicating to center the moment conditions before computing the weighting matrix. use_cue : bool, optional Flag indicating to use continuously updating estimator steps : int, optional Number of steps to use when estimating parameters. 2 corresponds to the standard efficient GMM estimator. Higher values will iterate until convergence or up to the number of steps given disp : int, optional Number of iterations between printed update. 0 or negative values suppresses output max_iter : int, positive, optional Maximum number of iterations when minimizing objective cov_type : str, optional Name of covariance estimator debiased : bool, optional Flag indicating whether to debias the covariance estimator using a degree of freedom adjustment **cov_config Additional covariance-specific options. See Notes. Returns ------- GMMFactorModelResults Results class with parameter estimates, covariance and test statistics Notes ----- The kernel covariance estimator takes the optional arguments ``kernel``, one of 'bartlett', 'parzen' or 'qs' (quadratic spectral) and ``bandwidth`` (a positive integer). """ nobs, n = self.portfolios.shape k = self.factors.shape[1] excess_returns = not self._risk_free nrf = int(not bool(excess_returns)) # 1. Starting Values - use 2 pass mod = LinearFactorModel(self.portfolios, self.factors, risk_free=self._risk_free) res = mod.fit() betas = np.asarray(res.betas).ravel() lam = np.asarray(res.risk_premia) mu = self.factors.ndarray.mean(0) sv = np.r_[betas, lam, mu][:, None] g = self._moments(sv, excess_returns) g -= g.mean(0)[None, :] if center else 0 kernel: Optional[str] = None bandwidth: Optional[float] = None if cov_type not in ("robust", "heteroskedastic", "kernel"): raise ValueError("Unknown weight: {0}".format(cov_type)) if cov_type in ("robust", "heteroskedastic"): weight_est_instance = HeteroskedasticWeight(g, center=center) cov_est = HeteroskedasticCovariance else: # 'kernel': kernel = get_string(cov_config, "kernel") bandwidth = get_float(cov_config, "bandwidth") weight_est_instance = KernelWeight(g, center=center, kernel=kernel, bandwidth=bandwidth) cov_est = KernelCovariance w = weight_est_instance.w(g) args = (excess_returns, w) # 2. Step 1 using w = inv(s) from SV callback = callback_factory(self._j, args, disp=disp) opt_res = minimize( self._j, sv, args=args, callback=callback, options={ "disp": bool(disp), "maxiter": max_iter }, ) params = opt_res.x last_obj = opt_res.fun iters = 1 # 3. Step 2 using step 1 estimates if not use_cue: while iters < steps: iters += 1 g = self._moments(params, excess_returns) w = weight_est_instance.w(g) args = (excess_returns, w) # 2. Step 1 using w = inv(s) from SV callback = callback_factory(self._j, args, disp=disp) opt_res = minimize( self._j, params, args=args, callback=callback, options={ "disp": bool(disp), "maxiter": max_iter }, ) params = opt_res.x obj = opt_res.fun if np.abs(obj - last_obj) < 1e-6: break last_obj = obj else: cue_args = (excess_returns, weight_est_instance) callback = callback_factory(self._j_cue, cue_args, disp=disp) opt_res = minimize( self._j_cue, params, args=cue_args, callback=callback, options={ "disp": bool(disp), "maxiter": max_iter }, ) params = opt_res.x # 4. Compute final S and G for inference g = self._moments(params, excess_returns) s = g.T @ g / nobs jac = self._jacobian(params, excess_returns) if cov_est is HeteroskedasticCovariance: cov_est_inst = HeteroskedasticCovariance( g, jacobian=jac, center=center, debiased=debiased, df=self.factors.shape[1], ) else: cov_est_inst = KernelCovariance( g, jacobian=jac, center=center, debiased=debiased, df=self.factors.shape[1], kernel=kernel, bandwidth=bandwidth, ) full_vcv = cov_est_inst.cov sel = slice((n * k), (n * k + k + nrf)) rp = params[sel] rp_cov = full_vcv[sel, sel] sel = slice(0, (n * (k + 1)), (k + 1)) alphas = g.mean(0)[sel, None] alpha_vcv = s[sel, sel] / nobs stat = self._j(params, excess_returns, w) jstat = WaldTestStatistic(stat, "All alphas are 0", n - k - nrf, name="J-statistic") # R2 calculation betas = np.reshape(params[:(n * k)], (n, k)) resids = self.portfolios.ndarray - self.factors.ndarray @ betas.T resids -= resids.mean(0)[None, :] residual_ss = (resids**2).sum() total = self.portfolios.ndarray total = total - total.mean(0)[None, :] total_ss = (total**2).sum() r2 = 1.0 - residual_ss / total_ss param_names = [] for portfolio in self.portfolios.cols: for factor in self.factors.cols: param_names.append("beta-{0}-{1}".format(portfolio, factor)) if not excess_returns: param_names.append("lambda-risk_free") param_names.extend(["lambda-{0}".format(f) for f in self.factors.cols]) param_names.extend(["mu-{0}".format(f) for f in self.factors.cols]) rp_names = list(self.factors.cols)[:] if not excess_returns: rp_names.insert(0, "risk_free") params = np.c_[alphas, betas] # 5. Return values res_dict = AttrDict( params=params, cov=full_vcv, betas=betas, rp=rp, rp_cov=rp_cov, alphas=alphas, alpha_vcv=alpha_vcv, jstat=jstat, rsquared=r2, total_ss=total_ss, residual_ss=residual_ss, param_names=param_names, portfolio_names=self.portfolios.cols, factor_names=self.factors.cols, name=self._name, cov_type=cov_type, model=self, nobs=nobs, rp_names=rp_names, iter=iters, cov_est=cov_est_inst, ) return GMMFactorModelResults(res_dict)
def fit( self, cov_type: str = "robust", debiased: bool = True, **cov_config: Union[bool, int, str], ) -> LinearFactorModelResults: """ Estimate model parameters Parameters ---------- cov_type : str, optional Name of covariance estimator debiased : bool, optional Flag indicating whether to debias the covariance estimator using a degree of freedom adjustment **cov_config Additional covariance-specific options. See Notes. Returns ------- LinearFactorModelResults Results class with parameter estimates, covariance and test statistics Notes ----- The kernel covariance estimator takes the optional arguments ``kernel``, one of 'bartlett', 'parzen' or 'qs' (quadratic spectral) and ``bandwidth`` (a positive integer). """ nobs, nf, nport, nrf, s1, s2, s3 = self._boundaries() excess_returns = not self._risk_free f = self.factors.ndarray p = self.portfolios.ndarray nport = p.shape[1] # Step 1, n regressions to get B fc = np.c_[np.ones((nobs, 1)), f] b = lstsq(fc, p, rcond=None)[0] # nf+1 by np eps = p - fc @ b if excess_returns: betas = b[1:].T else: betas = b.T.copy() betas[:, 0] = 1.0 sigma_m12 = self._sigma_m12 lam = lstsq(sigma_m12 @ betas, sigma_m12 @ p.mean(0)[:, None], rcond=None)[0] expected = betas @ lam pricing_errors = p - expected.T # Moments alphas = pricing_errors.mean(0)[:, None] moments = self._moments(eps, betas, alphas, pricing_errors) # Jacobian jacobian = self._jacobian(betas, lam, alphas) if cov_type not in ("robust", "heteroskedastic", "kernel"): raise ValueError("Unknown weight: {0}".format(cov_type)) if cov_type in ("robust", "heteroskedastic"): cov_est_inst = HeteroskedasticCovariance( moments, jacobian=jacobian, center=False, debiased=debiased, df=fc.shape[1], ) else: # 'kernel': bandwidth = get_float(cov_config, "bandwidth") kernel = get_string(cov_config, "kernel") cov_est_inst = KernelCovariance( moments, jacobian=jacobian, center=False, debiased=debiased, df=fc.shape[1], kernel=kernel, bandwidth=bandwidth, ) # VCV full_vcv = cov_est_inst.cov alpha_vcv = full_vcv[s2:, s2:] stat = float(alphas.T @ np.linalg.pinv(alpha_vcv) @ alphas) jstat = WaldTestStatistic(stat, "All alphas are 0", nport - nf - nrf, name="J-statistic") total_ss = ((p - p.mean(0)[None, :])**2).sum() residual_ss = (eps**2).sum() r2 = 1 - residual_ss / total_ss rp = lam rp_cov = full_vcv[s1:s2, s1:s2] betas = betas if excess_returns else betas[:, 1:] params = np.c_[alphas, betas] param_names = [] for portfolio in self.portfolios.cols: param_names.append("alpha-{0}".format(portfolio)) for factor in self.factors.cols: param_names.append("beta-{0}-{1}".format(portfolio, factor)) if not excess_returns: param_names.append("lambda-risk_free") for factor in self.factors.cols: param_names.append("lambda-{0}".format(factor)) # Pivot vcv to remove unnecessary and have correct order order = np.reshape(np.arange(s1), (nport, nf + 1)) order[:, 0] = np.arange(s2, s3) order = order.ravel() order = np.r_[order, s1:s2] full_vcv = full_vcv[order][:, order] factor_names = list(self.factors.cols) rp_names = factor_names[:] if not excess_returns: rp_names.insert(0, "risk_free") res = AttrDict( params=params, cov=full_vcv, betas=betas, rp=rp, rp_cov=rp_cov, alphas=alphas, alpha_vcv=alpha_vcv, jstat=jstat, rsquared=r2, total_ss=total_ss, residual_ss=residual_ss, param_names=param_names, portfolio_names=self.portfolios.cols, factor_names=factor_names, name=self._name, cov_type=cov_type, model=self, nobs=nobs, rp_names=rp_names, cov_est=cov_est_inst, ) return LinearFactorModelResults(res)
def fit( self, cov_type: str = "robust", debiased: bool = True, **cov_config: Union[str, float], ) -> LinearFactorModelResults: """ Estimate model parameters Parameters ---------- cov_type : str, optional Name of covariance estimator debiased : bool, optional Flag indicating whether to debias the covariance estimator using a degree of freedom adjustment **cov_config : dict Additional covariance-specific options. See Notes. Returns ------- LinearFactorModelResults Results class with parameter estimates, covariance and test statistics Notes ----- Supported covariance estimators are: * 'robust' - Heteroskedasticity-robust covariance estimator * 'kernel' - Heteroskedasticity and Autocorrelation consistent (HAC) covariance estimator The kernel covariance estimator takes the optional arguments ``kernel``, one of 'bartlett', 'parzen' or 'qs' (quadratic spectral) and ``bandwidth`` (a positive integer). """ p = self.portfolios.ndarray f = self.factors.ndarray nportfolio = p.shape[1] nobs, nfactor = f.shape fc = np.c_[np.ones((nobs, 1)), f] rp = f.mean(0)[:, None] fe = f - f.mean(0) b = np.linalg.pinv(fc) @ p eps = p - fc @ b alphas = b[:1].T nloading = (nfactor + 1) * nportfolio xpxi = np.eye(nloading + nfactor) xpxi[:nloading, :nloading] = np.kron(np.eye(nportfolio), np.linalg.pinv(fc.T @ fc / nobs)) f_rep = np.tile(fc, (1, nportfolio)) eps_rep = np.tile(eps, (nfactor + 1, 1)) # 1 2 3 ... 25 1 2 3 ... eps_rep = eps_rep.ravel(order="F") eps_rep = np.reshape(eps_rep, (nobs, (nfactor + 1) * nportfolio), order="F") xe = f_rep * eps_rep xe = np.c_[xe, fe] if cov_type in ("robust", "heteroskedastic"): cov_est = HeteroskedasticCovariance(xe, inv_jacobian=xpxi, center=False, debiased=debiased, df=fc.shape[1]) rp_cov_est = HeteroskedasticCovariance(fe, jacobian=np.eye(f.shape[1]), center=False, debiased=debiased, df=1) elif cov_type == "kernel": kernel = get_string(cov_config, "kernel") bandwidth = get_float(cov_config, "bandwidth") cov_est = KernelCovariance( xe, inv_jacobian=xpxi, center=False, debiased=debiased, df=fc.shape[1], bandwidth=bandwidth, kernel=kernel, ) bw = cov_est.bandwidth _cov_config = {k: v for k, v in cov_config.items()} _cov_config["bandwidth"] = bw rp_cov_est = KernelCovariance( fe, jacobian=np.eye(f.shape[1]), center=False, debiased=debiased, df=1, bandwidth=bw, kernel=kernel, ) else: raise ValueError("Unknown cov_type: {0}".format(cov_type)) full_vcv = cov_est.cov rp_cov = rp_cov_est.cov vcv = full_vcv[:nloading, :nloading] # Rearrange VCV order = np.reshape(np.arange((nfactor + 1) * nportfolio), (nportfolio, nfactor + 1)) order = order.T.ravel() vcv = vcv[order][:, order] # Return values alpha_vcv = vcv[:nportfolio, :nportfolio] stat = float(alphas.T @ np.linalg.pinv(alpha_vcv) @ alphas) jstat = WaldTestStatistic(stat, "All alphas are 0", nportfolio, name="J-statistic") params = b.T betas = b[1:].T residual_ss = (eps**2).sum() e = p - p.mean(0)[None, :] total_ss = (e**2).sum() r2 = 1 - residual_ss / total_ss param_names = [] for portfolio in self.portfolios.cols: param_names.append("alpha-{0}".format(portfolio)) for factor in self.factors.cols: param_names.append("beta-{0}-{1}".format(portfolio, factor)) for factor in self.factors.cols: param_names.append("lambda-{0}".format(factor)) res = AttrDict( params=params, cov=full_vcv, betas=betas, rp=rp, rp_cov=rp_cov, alphas=alphas, alpha_vcv=alpha_vcv, jstat=jstat, rsquared=r2, total_ss=total_ss, residual_ss=residual_ss, param_names=param_names, portfolio_names=self.portfolios.cols, factor_names=self.factors.cols, name=self._name, cov_type=cov_type, model=self, nobs=nobs, rp_names=self.factors.cols, cov_est=cov_est, ) return LinearFactorModelResults(res)