Пример #1
0
def test_string():
    assert get_string({"v": "1"}, "v") == "1"
    assert get_string({"v": True}, "a") is None
    with pytest.raises(TypeError, match=r".not a str"):
        get_string({"v": 1.0}, "v")
    with pytest.raises(TypeError, match=r".not a str"):
        get_string({"v": b"1"}, "v")
    assert get_string({"v": None}, "v") is None
Пример #2
0
def test_float():
    assert get_float({"v": True}, "v") == 1.0
    assert get_float({"v": True}, "a") is None
    assert get_string({"v": None}, "v") is None
    with pytest.raises(TypeError, match=r".not a float"):
        get_float({"v": "1.0"}, "v")
Пример #3
0
def setup_covariance_estimator(
    cov_estimators: CovarianceManager,
    cov_type: str,
    y: NDArray,
    x: NDArray,
    params: NDArray,
    entity_ids: NDArray,
    time_ids: NDArray,
    *,
    debiased: bool = False,
    extra_df: int = 0,
    **cov_config: Any,
) -> Union[HomoskedasticCovariance]:
    estimator = cov_estimators[cov_type]
    kernel = get_string(cov_config, "kernel")
    bandwidth = get_float(cov_config, "bandwidth")
    group_debias = get_bool(cov_config, "group_debias")
    clusters = get_array_like(cov_config, "clusters")

    if estimator is HomoskedasticCovariance:
        return HomoskedasticCovariance(y,
                                       x,
                                       params,
                                       entity_ids,
                                       time_ids,
                                       debiased=debiased,
                                       extra_df=extra_df)
    elif estimator is HeteroskedasticCovariance:
        return HeteroskedasticCovariance(y,
                                         x,
                                         params,
                                         entity_ids,
                                         time_ids,
                                         debiased=debiased,
                                         extra_df=extra_df)
    elif estimator is ClusteredCovariance:
        return ClusteredCovariance(
            y,
            x,
            params,
            entity_ids,
            time_ids,
            debiased=debiased,
            extra_df=extra_df,
            clusters=clusters,
            group_debias=group_debias,
        )
    elif estimator is DriscollKraay:
        return DriscollKraay(
            y,
            x,
            params,
            entity_ids,
            time_ids,
            debiased=debiased,
            extra_df=extra_df,
            kernel=kernel,
            bandwidth=bandwidth,
        )
    else:  # ACCovariance:
        return ACCovariance(
            y,
            x,
            params,
            entity_ids,
            time_ids,
            debiased=debiased,
            extra_df=extra_df,
            kernel=kernel,
            bandwidth=bandwidth,
        )
Пример #4
0
def setup_covariance_estimator(
    cov_estimators: CovarianceManager,
    cov_type: str,
    y: Float64Array,
    x: Float64Array,
    params: Float64Array,
    entity_ids: IntArray,
    time_ids: IntArray,
    *,
    debiased: bool = False,
    extra_df: int = 0,
    **cov_config: Any,
) -> Union[HomoskedasticCovariance]:
    estimator = cov_estimators[cov_type]
    unknown_kwargs = [
        str(key) for key in cov_config
        if str(key) not in estimator.ALLOWED_KWARGS
    ]
    if unknown_kwargs:
        if estimator.ALLOWED_KWARGS:
            allowed = ", ".join(estimator.ALLOWED_KWARGS)
            kwarg_err = f"only supports the keyword arguments: {allowed}"
        else:
            kwarg_err = "does not support any keyword arguments"
        msg = (
            f"Covariance estimator {estimator.__name__} {kwarg_err}. Unknown keyword "
            f"arguments were passed to the estimator. The unknown keyword argument(s) "
            f"are: {', '.join(unknown_kwargs)} ")
        raise ValueError(msg)
    kernel = get_string(cov_config, "kernel")
    bandwidth = get_float(cov_config, "bandwidth")
    group_debias = get_bool(cov_config, "group_debias")
    clusters = get_array_like(cov_config, "clusters")

    if estimator is HomoskedasticCovariance:

        return HomoskedasticCovariance(y,
                                       x,
                                       params,
                                       entity_ids,
                                       time_ids,
                                       debiased=debiased,
                                       extra_df=extra_df)
    elif estimator is HeteroskedasticCovariance:
        return HeteroskedasticCovariance(y,
                                         x,
                                         params,
                                         entity_ids,
                                         time_ids,
                                         debiased=debiased,
                                         extra_df=extra_df)
    elif estimator is ClusteredCovariance:
        return ClusteredCovariance(
            y,
            x,
            params,
            entity_ids,
            time_ids,
            debiased=debiased,
            extra_df=extra_df,
            clusters=clusters,
            group_debias=group_debias,
        )
    elif estimator is DriscollKraay:
        return DriscollKraay(
            y,
            x,
            params,
            entity_ids,
            time_ids,
            debiased=debiased,
            extra_df=extra_df,
            kernel=kernel,
            bandwidth=bandwidth,
        )
    else:  # ACCovariance:
        return ACCovariance(
            y,
            x,
            params,
            entity_ids,
            time_ids,
            debiased=debiased,
            extra_df=extra_df,
            kernel=kernel,
            bandwidth=bandwidth,
        )
Пример #5
0
    def fit(
        self,
        center: bool = True,
        use_cue: bool = False,
        steps: int = 2,
        disp: int = 10,
        max_iter: int = 1000,
        cov_type: str = "robust",
        debiased: bool = True,
        **cov_config: Union[bool, int, str],
    ) -> GMMFactorModelResults:
        """
        Estimate model parameters

        Parameters
        ----------
        center : bool, optional
            Flag indicating to center the moment conditions before computing
            the weighting matrix.
        use_cue : bool, optional
            Flag indicating to use continuously updating estimator
        steps : int, optional
            Number of steps to use when estimating parameters.  2 corresponds
            to the standard efficient GMM estimator. Higher values will
            iterate until convergence or up to the number of steps given
        disp : int, optional
            Number of iterations between printed update. 0 or negative values
            suppresses output
        max_iter : int, positive, optional
            Maximum number of iterations when minimizing objective
        cov_type : str, optional
            Name of covariance estimator
        debiased : bool, optional
            Flag indicating whether to debias the covariance estimator using
            a degree of freedom adjustment
        **cov_config
            Additional covariance-specific options.  See Notes.

        Returns
        -------
        GMMFactorModelResults
            Results class with parameter estimates, covariance and test statistics

        Notes
        -----
        The kernel covariance estimator takes the optional arguments
        ``kernel``, one of 'bartlett', 'parzen' or 'qs' (quadratic spectral)
        and ``bandwidth`` (a positive integer).
        """

        nobs, n = self.portfolios.shape
        k = self.factors.shape[1]
        excess_returns = not self._risk_free
        nrf = int(not bool(excess_returns))
        # 1. Starting Values - use 2 pass
        mod = LinearFactorModel(self.portfolios,
                                self.factors,
                                risk_free=self._risk_free)
        res = mod.fit()
        betas = np.asarray(res.betas).ravel()
        lam = np.asarray(res.risk_premia)
        mu = self.factors.ndarray.mean(0)
        sv = np.r_[betas, lam, mu][:, None]
        g = self._moments(sv, excess_returns)
        g -= g.mean(0)[None, :] if center else 0
        kernel: Optional[str] = None
        bandwidth: Optional[float] = None
        if cov_type not in ("robust", "heteroskedastic", "kernel"):
            raise ValueError("Unknown weight: {0}".format(cov_type))
        if cov_type in ("robust", "heteroskedastic"):
            weight_est_instance = HeteroskedasticWeight(g, center=center)
            cov_est = HeteroskedasticCovariance
        else:  # 'kernel':
            kernel = get_string(cov_config, "kernel")
            bandwidth = get_float(cov_config, "bandwidth")
            weight_est_instance = KernelWeight(g,
                                               center=center,
                                               kernel=kernel,
                                               bandwidth=bandwidth)
            cov_est = KernelCovariance

        w = weight_est_instance.w(g)

        args = (excess_returns, w)

        # 2. Step 1 using w = inv(s) from SV
        callback = callback_factory(self._j, args, disp=disp)
        opt_res = minimize(
            self._j,
            sv,
            args=args,
            callback=callback,
            options={
                "disp": bool(disp),
                "maxiter": max_iter
            },
        )
        params = opt_res.x
        last_obj = opt_res.fun
        iters = 1
        # 3. Step 2 using step 1 estimates
        if not use_cue:
            while iters < steps:
                iters += 1
                g = self._moments(params, excess_returns)
                w = weight_est_instance.w(g)
                args = (excess_returns, w)

                # 2. Step 1 using w = inv(s) from SV
                callback = callback_factory(self._j, args, disp=disp)
                opt_res = minimize(
                    self._j,
                    params,
                    args=args,
                    callback=callback,
                    options={
                        "disp": bool(disp),
                        "maxiter": max_iter
                    },
                )
                params = opt_res.x
                obj = opt_res.fun
                if np.abs(obj - last_obj) < 1e-6:
                    break
                last_obj = obj

        else:
            cue_args = (excess_returns, weight_est_instance)
            callback = callback_factory(self._j_cue, cue_args, disp=disp)
            opt_res = minimize(
                self._j_cue,
                params,
                args=cue_args,
                callback=callback,
                options={
                    "disp": bool(disp),
                    "maxiter": max_iter
                },
            )
            params = opt_res.x

        # 4. Compute final S and G for inference
        g = self._moments(params, excess_returns)
        s = g.T @ g / nobs
        jac = self._jacobian(params, excess_returns)
        if cov_est is HeteroskedasticCovariance:
            cov_est_inst = HeteroskedasticCovariance(
                g,
                jacobian=jac,
                center=center,
                debiased=debiased,
                df=self.factors.shape[1],
            )
        else:
            cov_est_inst = KernelCovariance(
                g,
                jacobian=jac,
                center=center,
                debiased=debiased,
                df=self.factors.shape[1],
                kernel=kernel,
                bandwidth=bandwidth,
            )

        full_vcv = cov_est_inst.cov
        sel = slice((n * k), (n * k + k + nrf))
        rp = params[sel]
        rp_cov = full_vcv[sel, sel]
        sel = slice(0, (n * (k + 1)), (k + 1))
        alphas = g.mean(0)[sel, None]
        alpha_vcv = s[sel, sel] / nobs
        stat = self._j(params, excess_returns, w)
        jstat = WaldTestStatistic(stat,
                                  "All alphas are 0",
                                  n - k - nrf,
                                  name="J-statistic")

        # R2 calculation
        betas = np.reshape(params[:(n * k)], (n, k))
        resids = self.portfolios.ndarray - self.factors.ndarray @ betas.T
        resids -= resids.mean(0)[None, :]
        residual_ss = (resids**2).sum()
        total = self.portfolios.ndarray
        total = total - total.mean(0)[None, :]
        total_ss = (total**2).sum()
        r2 = 1.0 - residual_ss / total_ss
        param_names = []
        for portfolio in self.portfolios.cols:
            for factor in self.factors.cols:
                param_names.append("beta-{0}-{1}".format(portfolio, factor))
        if not excess_returns:
            param_names.append("lambda-risk_free")
        param_names.extend(["lambda-{0}".format(f) for f in self.factors.cols])
        param_names.extend(["mu-{0}".format(f) for f in self.factors.cols])
        rp_names = list(self.factors.cols)[:]
        if not excess_returns:
            rp_names.insert(0, "risk_free")
        params = np.c_[alphas, betas]
        # 5. Return values
        res_dict = AttrDict(
            params=params,
            cov=full_vcv,
            betas=betas,
            rp=rp,
            rp_cov=rp_cov,
            alphas=alphas,
            alpha_vcv=alpha_vcv,
            jstat=jstat,
            rsquared=r2,
            total_ss=total_ss,
            residual_ss=residual_ss,
            param_names=param_names,
            portfolio_names=self.portfolios.cols,
            factor_names=self.factors.cols,
            name=self._name,
            cov_type=cov_type,
            model=self,
            nobs=nobs,
            rp_names=rp_names,
            iter=iters,
            cov_est=cov_est_inst,
        )

        return GMMFactorModelResults(res_dict)
Пример #6
0
    def fit(
        self,
        cov_type: str = "robust",
        debiased: bool = True,
        **cov_config: Union[bool, int, str],
    ) -> LinearFactorModelResults:
        """
        Estimate model parameters

        Parameters
        ----------
        cov_type : str, optional
            Name of covariance estimator
        debiased : bool, optional
            Flag indicating whether to debias the covariance estimator using
            a degree of freedom adjustment
        **cov_config
            Additional covariance-specific options.  See Notes.

        Returns
        -------
        LinearFactorModelResults
            Results class with parameter estimates, covariance and test statistics

        Notes
        -----
        The kernel covariance estimator takes the optional arguments
        ``kernel``, one of 'bartlett', 'parzen' or 'qs' (quadratic spectral)
        and ``bandwidth`` (a positive integer).
        """
        nobs, nf, nport, nrf, s1, s2, s3 = self._boundaries()
        excess_returns = not self._risk_free
        f = self.factors.ndarray
        p = self.portfolios.ndarray
        nport = p.shape[1]

        # Step 1, n regressions to get B
        fc = np.c_[np.ones((nobs, 1)), f]
        b = lstsq(fc, p, rcond=None)[0]  # nf+1 by np
        eps = p - fc @ b
        if excess_returns:
            betas = b[1:].T
        else:
            betas = b.T.copy()
            betas[:, 0] = 1.0

        sigma_m12 = self._sigma_m12
        lam = lstsq(sigma_m12 @ betas,
                    sigma_m12 @ p.mean(0)[:, None],
                    rcond=None)[0]
        expected = betas @ lam
        pricing_errors = p - expected.T
        # Moments
        alphas = pricing_errors.mean(0)[:, None]
        moments = self._moments(eps, betas, alphas, pricing_errors)
        # Jacobian
        jacobian = self._jacobian(betas, lam, alphas)

        if cov_type not in ("robust", "heteroskedastic", "kernel"):
            raise ValueError("Unknown weight: {0}".format(cov_type))
        if cov_type in ("robust", "heteroskedastic"):
            cov_est_inst = HeteroskedasticCovariance(
                moments,
                jacobian=jacobian,
                center=False,
                debiased=debiased,
                df=fc.shape[1],
            )
        else:  # 'kernel':
            bandwidth = get_float(cov_config, "bandwidth")
            kernel = get_string(cov_config, "kernel")
            cov_est_inst = KernelCovariance(
                moments,
                jacobian=jacobian,
                center=False,
                debiased=debiased,
                df=fc.shape[1],
                kernel=kernel,
                bandwidth=bandwidth,
            )

        # VCV
        full_vcv = cov_est_inst.cov
        alpha_vcv = full_vcv[s2:, s2:]
        stat = float(alphas.T @ np.linalg.pinv(alpha_vcv) @ alphas)
        jstat = WaldTestStatistic(stat,
                                  "All alphas are 0",
                                  nport - nf - nrf,
                                  name="J-statistic")

        total_ss = ((p - p.mean(0)[None, :])**2).sum()
        residual_ss = (eps**2).sum()
        r2 = 1 - residual_ss / total_ss
        rp = lam
        rp_cov = full_vcv[s1:s2, s1:s2]
        betas = betas if excess_returns else betas[:, 1:]
        params = np.c_[alphas, betas]
        param_names = []
        for portfolio in self.portfolios.cols:
            param_names.append("alpha-{0}".format(portfolio))
            for factor in self.factors.cols:
                param_names.append("beta-{0}-{1}".format(portfolio, factor))
        if not excess_returns:
            param_names.append("lambda-risk_free")
        for factor in self.factors.cols:
            param_names.append("lambda-{0}".format(factor))

        # Pivot vcv to remove unnecessary and have correct order
        order = np.reshape(np.arange(s1), (nport, nf + 1))
        order[:, 0] = np.arange(s2, s3)
        order = order.ravel()
        order = np.r_[order, s1:s2]
        full_vcv = full_vcv[order][:, order]
        factor_names = list(self.factors.cols)
        rp_names = factor_names[:]
        if not excess_returns:
            rp_names.insert(0, "risk_free")
        res = AttrDict(
            params=params,
            cov=full_vcv,
            betas=betas,
            rp=rp,
            rp_cov=rp_cov,
            alphas=alphas,
            alpha_vcv=alpha_vcv,
            jstat=jstat,
            rsquared=r2,
            total_ss=total_ss,
            residual_ss=residual_ss,
            param_names=param_names,
            portfolio_names=self.portfolios.cols,
            factor_names=factor_names,
            name=self._name,
            cov_type=cov_type,
            model=self,
            nobs=nobs,
            rp_names=rp_names,
            cov_est=cov_est_inst,
        )

        return LinearFactorModelResults(res)
Пример #7
0
    def fit(
        self,
        cov_type: str = "robust",
        debiased: bool = True,
        **cov_config: Union[str, float],
    ) -> LinearFactorModelResults:
        """
        Estimate model parameters

        Parameters
        ----------
        cov_type : str, optional
            Name of covariance estimator
        debiased : bool, optional
            Flag indicating whether to debias the covariance estimator using
            a degree of freedom adjustment
        **cov_config : dict
            Additional covariance-specific options.  See Notes.

        Returns
        -------
        LinearFactorModelResults
            Results class with parameter estimates, covariance and test statistics

        Notes
        -----
        Supported covariance estimators are:

        * 'robust' - Heteroskedasticity-robust covariance estimator
        * 'kernel' - Heteroskedasticity and Autocorrelation consistent (HAC)
          covariance estimator

        The kernel covariance estimator takes the optional arguments
        ``kernel``, one of 'bartlett', 'parzen' or 'qs' (quadratic spectral)
        and ``bandwidth`` (a positive integer).
        """
        p = self.portfolios.ndarray
        f = self.factors.ndarray
        nportfolio = p.shape[1]
        nobs, nfactor = f.shape
        fc = np.c_[np.ones((nobs, 1)), f]
        rp = f.mean(0)[:, None]
        fe = f - f.mean(0)
        b = np.linalg.pinv(fc) @ p
        eps = p - fc @ b
        alphas = b[:1].T

        nloading = (nfactor + 1) * nportfolio
        xpxi = np.eye(nloading + nfactor)
        xpxi[:nloading, :nloading] = np.kron(np.eye(nportfolio),
                                             np.linalg.pinv(fc.T @ fc / nobs))
        f_rep = np.tile(fc, (1, nportfolio))
        eps_rep = np.tile(eps, (nfactor + 1, 1))  # 1 2 3 ... 25 1 2 3 ...
        eps_rep = eps_rep.ravel(order="F")
        eps_rep = np.reshape(eps_rep, (nobs, (nfactor + 1) * nportfolio),
                             order="F")
        xe = f_rep * eps_rep
        xe = np.c_[xe, fe]
        if cov_type in ("robust", "heteroskedastic"):
            cov_est = HeteroskedasticCovariance(xe,
                                                inv_jacobian=xpxi,
                                                center=False,
                                                debiased=debiased,
                                                df=fc.shape[1])
            rp_cov_est = HeteroskedasticCovariance(fe,
                                                   jacobian=np.eye(f.shape[1]),
                                                   center=False,
                                                   debiased=debiased,
                                                   df=1)
        elif cov_type == "kernel":
            kernel = get_string(cov_config, "kernel")
            bandwidth = get_float(cov_config, "bandwidth")
            cov_est = KernelCovariance(
                xe,
                inv_jacobian=xpxi,
                center=False,
                debiased=debiased,
                df=fc.shape[1],
                bandwidth=bandwidth,
                kernel=kernel,
            )
            bw = cov_est.bandwidth
            _cov_config = {k: v for k, v in cov_config.items()}
            _cov_config["bandwidth"] = bw
            rp_cov_est = KernelCovariance(
                fe,
                jacobian=np.eye(f.shape[1]),
                center=False,
                debiased=debiased,
                df=1,
                bandwidth=bw,
                kernel=kernel,
            )
        else:
            raise ValueError("Unknown cov_type: {0}".format(cov_type))
        full_vcv = cov_est.cov
        rp_cov = rp_cov_est.cov
        vcv = full_vcv[:nloading, :nloading]

        # Rearrange VCV
        order = np.reshape(np.arange((nfactor + 1) * nportfolio),
                           (nportfolio, nfactor + 1))
        order = order.T.ravel()
        vcv = vcv[order][:, order]

        # Return values
        alpha_vcv = vcv[:nportfolio, :nportfolio]
        stat = float(alphas.T @ np.linalg.pinv(alpha_vcv) @ alphas)
        jstat = WaldTestStatistic(stat,
                                  "All alphas are 0",
                                  nportfolio,
                                  name="J-statistic")
        params = b.T
        betas = b[1:].T
        residual_ss = (eps**2).sum()
        e = p - p.mean(0)[None, :]
        total_ss = (e**2).sum()
        r2 = 1 - residual_ss / total_ss
        param_names = []
        for portfolio in self.portfolios.cols:
            param_names.append("alpha-{0}".format(portfolio))
            for factor in self.factors.cols:
                param_names.append("beta-{0}-{1}".format(portfolio, factor))
        for factor in self.factors.cols:
            param_names.append("lambda-{0}".format(factor))

        res = AttrDict(
            params=params,
            cov=full_vcv,
            betas=betas,
            rp=rp,
            rp_cov=rp_cov,
            alphas=alphas,
            alpha_vcv=alpha_vcv,
            jstat=jstat,
            rsquared=r2,
            total_ss=total_ss,
            residual_ss=residual_ss,
            param_names=param_names,
            portfolio_names=self.portfolios.cols,
            factor_names=self.factors.cols,
            name=self._name,
            cov_type=cov_type,
            model=self,
            nobs=nobs,
            rp_names=self.factors.cols,
            cov_est=cov_est,
        )

        return LinearFactorModelResults(res)