Exemplos de IVData.drop em Python, exemplos de linearmodels.iv.data.IVData.drop em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: test_model.py Projeto: peteos123/linearmodels

def test_drop_missing(data):
    p = data.portfolios
    if isinstance(p, pd.DataFrame):
        p.iloc[::33] = np.nan
    else:
        p[::33] = np.nan

    res = TradedFactorModel(p, data.factors).fit()

    p = IVData(p)
    f = IVData(data.factors)
    isnull = p.isnull | f.isnull
    p.drop(isnull)
    f.drop(isnull)

    res2 = TradedFactorModel(p, f).fit()
    assert_equal(np.asarray(res.params), np.asarray(res2.params))

Exemplo n.º 2

0

Exibir arquivo

class Interaction(object):
    """
    Class that simplifies specifying interactions

    Parameters
    ----------
    cat : {ndarray, Series, DataFrame, DataArray}, optional
        Variables to treat as categoricals. Best format is a Categorical
        Series or DataFrame containing Categorical Series. Other formats
        are converted to Categorical Series, column-by-column. cats has
        shape (nobs, ncat).
    cont : {ndarray, Series, DataFrame, DataArray}, optional
        Variables to treat as continuous, (nobs, ncont).

    Notes
    -----
    For each variable in `cont`, computes the interaction of the variable
    and the cartesian product of the categories.

    Examples
    --------
    >>> import numpy as np
    >>> from linearmodels.iv.absorbing import Interaction
    >>> rs = np.random.RandomState(0)
    >>> n = 100000
    >>> cats = rs.randint(2, size=n)  # binary dummy
    >>> cont = rs.standard_normal((n, 3))
    >>> interact = Interaction(cats, cont)
    >>> interact.sparse.shape  # Get the shape of the dummy matrix
    (100000, 6)

    >>> rs = np.random.RandomState(0)
    >>> import pandas as pd
    >>> cats_df = pd.concat([pd.Series(pd.Categorical(rs.randint(5,size=n)))
    ...                     for _ in range(4)],1)
    >>> cats_df.describe()
                 0       1       2       3
    count   100000  100000  100000  100000
    unique       5       5       5       5
    top          3       3       0       4
    freq     20251   20195   20331   20158

    >>> interact = Interaction(cats, cont)
    >>> interact.sparse.shape # Cart product of all cats, 5**4, times ncont, 3
    (100000, 1875)
    """

    _iv_data = IVData(None, "none", 1)

    def __init__(
        self,
        cat: OptionalArrayLike = None,
        cont: OptionalArrayLike = None,
        nobs: Optional[int] = None,
    ) -> None:
        self._cat = cat
        self._cont = cont
        self._cat_data = self._iv_data
        self._cont_data = self._iv_data
        self._nobs = nobs
        self._check_data()

    @property
    def nobs(self) -> int:
        assert self._nobs is not None
        return self._nobs

    def _check_data(self) -> None:
        cat, cont = self._cat, self._cont
        cat_nobs = getattr(cat, "shape", (0, ))[0]
        cont_nobs = getattr(cont, "shape", (0, ))[0]
        nobs = max(cat_nobs, cont_nobs)
        if cat is None and cont is None:
            if self._nobs is not None:
                self._cont_data = self._cat_data = IVData(None,
                                                          "none",
                                                          nobs=self._nobs)
            else:
                raise ValueError(
                    "nobs must be provided when cat and cont are None")
            return
        self._nobs = nobs

        self._cat_data = IVData(cat, "cat", nobs=nobs, convert_dummies=False)
        self._cont_data = IVData(cont,
                                 "cont",
                                 nobs=nobs,
                                 convert_dummies=False)
        if self._cat_data.shape[1] == self._cont_data.shape[1] == 0:
            raise ValueError("Both cat and cont are empty arrays")
        cat_data = self._cat_data.pandas
        convert = [
            col for col in cat_data
            if not (is_categorical_dtype(cat_data[col]))
        ]
        if convert:
            cat_data = DataFrame(
                {col: cat_data[col].astype("category")
                 for col in cat_data})
            self._cat_data = IVData(cat_data, "cat", convert_dummies=False)

    @property
    def cat(self) -> DataFrame:
        """Categorical Variables"""
        return self._cat_data.pandas

    @property
    def cont(self) -> DataFrame:
        """Continuous Variables"""
        return self._cont_data.pandas

    @property
    def isnull(self) -> Series:
        return self.cat.isnull().any(1) | self.cont.isnull().any(1)

    def drop(self, locs: BoolArray) -> None:
        self._cat_data.drop(locs)
        self._cont_data.drop(locs)

    @property
    def sparse(self) -> sp.csc_matrix:
        r"""
        Construct a sparse interaction matrix

        Returns
        -------
        csc_matrix
            Dummy interaction constructed from the cartesian product of
            the categories and each of the continuous variables.

        Notes
        -----
        The number of columns in `dummy_interact` is

        .. math::

            ncont \times \prod_{i=1}^{ncat} |c_i|

        where :math:`|c_i|` is the number distinct categories in column i.
        """
        if self.cat.shape[1] and self.cont.shape[1]:
            out = []
            for col in self.cont:
                out.append(
                    category_continuous_interaction(self.cat,
                                                    self.cont[col],
                                                    precondition=False))
            return sp.hstack(out, format="csc")
        elif self.cat.shape[1]:
            return category_interaction(category_product(self.cat),
                                        precondition=False)
        elif self.cont.shape[1]:
            return sp.csc_matrix(self._cont_data.ndarray)
        else:  # empty interaction
            return sp.csc_matrix(empty((self._cat_data.shape[0], 0)))

    @property
    def hash(self) -> List[Tuple[str, ...]]:
        """
        Construct a hash that will be invariant for any permutation of
        inputs that produce the same fit when used as regressors"""
        # Sorted hashes of any categoricals
        hasher = hash_func()
        cat_hashes = []
        cat = self.cat
        for col in cat:
            hasher.update(
                ascontiguousarray(self.cat[col].cat.codes.to_numpy().data))
            cat_hashes.append(hasher.hexdigest())
            hasher = _reset(hasher)
        sorted_hashes = tuple(sorted(cat_hashes))

        hashes = []
        cont = self.cont
        for col in cont:
            hasher.update(ascontiguousarray(cont[col].to_numpy()).data)
            hashes.append(sorted_hashes + (hasher.hexdigest(), ))
            hasher = _reset(hasher)

        return sorted(hashes)

    @staticmethod
    def from_frame(frame: DataFrame) -> Interaction:
        """
        Convenience function the simplifies using a DataFrame

        Parameters
        ----------
        frame : DataFrame
            Frame containing categorical and continuous variables. All
            categorical variables are passed to `cat` and all other
            variables are passed as `cont`.

        Returns
        -------
        Interaction
            Instance using the columns of frame

        Examples
        --------
        >>> import numpy as np
        >>> from linearmodels.iv.absorbing import Interaction
        >>> import pandas as pd
        >>> rs = np.random.RandomState(0)
        >>> n = 100000
        >>> cats = pd.concat([pd.Series(pd.Categorical(rs.randint(i+2,size=n)))
        ...                  for i in range(4)],1)
        >>> cats.columns = ['cat{0}'.format(i) for i in range(4)]
        >>> columns = ['cont{0}'.format(i) for i in range(6)]
        >>> cont = pd.DataFrame(rs.standard_normal((n, 6)), columns=columns)
        >>> frame = pd.concat([cats, cont], 1)
        >>> interact = Interaction.from_frame(frame)
        >>> interact.sparse.shape # Cart product of all cats, 5!, times ncont, 6
        (100000, 720)
        """
        cat_cols = [col for col in frame if is_categorical_dtype(frame[col])]
        cont_cols = [col for col in frame if col not in cat_cols]
        return Interaction(frame[cat_cols],
                           frame[cont_cols],
                           nobs=frame.shape[0])

Exemplo n.º 3

0

Exibir arquivo

Arquivo: model.py Projeto: codacy-badger/linearmodels

class IVLIML(object):
    r"""
    Limited information ML and k-class estimation of IV models

    Parameters
    ----------
    dependent : array_like
        Endogenous variables (nobs by 1)
    exog : array_like
        Exogenous regressors  (nobs by nexog)
    endog : array_like
        Endogenous regressors (nobs by nendog)
    instruments : array_like
        Instrumental variables (nobs by ninstr)
    weights : array_like, default None
        Observation weights used in estimation
    fuller : float, default 0
        Fuller's alpha to modify LIML estimator. Default returns unmodified
        LIML estimator.
    kappa : float, default None
        Parameter value for k-class estimation.  If None, computed to
        produce LIML parameter estimate.

    Notes
    -----
    ``kappa`` and ``fuller`` should not be used simultaneously since Fuller's
    alpha applies an adjustment to ``kappa``, and so the same result can be
    computed using only ``kappa``. Fuller's alpha is used to adjust the
    LIML estimate of :math:`\kappa`, which is computed whenever ``kappa``
    is not provided.

    The LIML estimator is defined as

    .. math::

      \hat{\beta}_{\kappa} & =(X(I-\kappa M_{z})X)^{-1}X(I-\kappa M_{z})Y\\
      M_{z} & =I-P_{z}\\
      P_{z} & =Z(Z'Z)^{-1}Z'

    where :math:`Z` contains both the exogenous regressors and the instruments.
    :math:`\kappa` is estimated as part of the LIML estimator.

    When using Fuller's :math:`\alpha`, the value used is modified to

    .. math::

      \kappa-\alpha/(n-n_{instr})

    .. todo::

      * VCV: bootstrap

    See Also
    --------
    IV2SLS, IVGMM, IVGMMCUE
    """
    def __init__(
        self,
        dependent: IVDataLike,
        exog: Optional[IVDataLike],
        endog: Optional[IVDataLike],
        instruments: Optional[IVDataLike],
        *,
        weights: Optional[IVDataLike] = None,
        fuller: Numeric = 0,
        kappa: OptionalNumeric = None,
    ):

        self.dependent = IVData(dependent, var_name="dependent")
        nobs: int = self.dependent.shape[0]
        self.exog = IVData(exog, var_name="exog", nobs=nobs)
        self.endog = IVData(endog, var_name="endog", nobs=nobs)
        self.instruments = IVData(instruments,
                                  var_name="instruments",
                                  nobs=nobs)
        self._original_index = self.dependent.pandas.index
        if weights is None:
            weights = ones(self.dependent.shape)
        weights = IVData(weights).ndarray
        if any(weights <= 0):
            raise ValueError("weights must be strictly positive.")
        weights = weights / nanmean(weights)
        self.weights = IVData(weights, var_name="weights", nobs=nobs)

        self._drop_locs = self._drop_missing()
        # dependent variable
        w = sqrt(self.weights.ndarray)
        self._y = self.dependent.ndarray
        self._wy = self._y * w
        # model regressors
        self._x = c_[self.exog.ndarray, self.endog.ndarray]
        self._wx = self._x * w
        # first-stage regressors
        self._z = c_[self.exog.ndarray, self.instruments.ndarray]
        self._wz = self._z * w

        self._has_constant = False
        self._regressor_is_exog = array([True] * self.exog.shape[1] +
                                        [False] * self.endog.shape[1])
        self._columns = self.exog.cols + self.endog.cols
        self._instr_columns = self.exog.cols + self.instruments.cols
        self._index = self.dependent.rows

        self._validate_inputs()
        if not hasattr(self, "_method"):
            self._method = "IV-LIML"
            additional = []
            if fuller != 0:
                additional.append("fuller(alpha={0})".format(fuller))
            if kappa is not None:
                additional.append("kappa={0}".format(kappa))
            if additional:
                self._method += "(" + ", ".join(additional) + ")"
        self._result_container: IVResultType = IVResults

        self._kappa = kappa
        self._fuller = fuller
        if kappa is not None and not isscalar(kappa):
            raise ValueError("kappa must be None or a scalar")
        if not isscalar(fuller):
            raise ValueError("fuller must be None or a scalar")
        if kappa is not None and fuller != 0:
            import warnings

            warnings.warn(
                "kappa and fuller should not normally be used "
                "simultaneously.  Identical results can be computed "
                "using kappa only",
                UserWarning,
            )
        if endog is None and instruments is None:
            self._result_container = OLSResults
            self._method = "OLS"
        self._formula = ""

    @staticmethod
    def from_formula(
        formula: str,
        data: DataFrame,
        *,
        weights: Optional[IVDataLike] = None,
        fuller: float = 0,
        kappa: OptionalNumeric = None,
    ) -> "IVLIML":
        """
        Parameters
        ----------
        formula : str
            Patsy formula modified for the IV syntax described in the notes
            section
        data : DataFrame
            DataFrame containing the variables used in the formula
        weights : array_like, default None
            Observation weights used in estimation
        fuller : float, default 0
            Fuller's alpha to modify LIML estimator. Default returns unmodified
            LIML estimator.
        kappa : float, default None
            Parameter value for k-class estimation.  If not provided, computed to
            produce LIML parameter estimate.

        Returns
        -------
        IVLIML
            Model instance

        Notes
        -----
        The IV formula modifies the standard Patsy formula to include a
        block of the form [endog ~ instruments] which is used to indicate
        the list of endogenous variables and instruments.  The general
        structure is `dependent ~ exog [endog ~ instruments]` and it must
        be the case that the formula expressions constructed from blocks
        `dependent ~ exog endog` and `dependent ~ exog instruments` are both
        valid Patsy formulas.

        A constant must be explicitly included using '1 +' if required.

        Examples
        --------
        >>> import numpy as np
        >>> from linearmodels.datasets import wage
        >>> from linearmodels.iv import IVLIML
        >>> data = wage.load()
        >>> formula = 'np.log(wage) ~ 1 + exper + exper ** 2 + brthord + [educ ~ sibs]'
        >>> mod = IVLIML.from_formula(formula, data)
        """
        parser = IVFormulaParser(formula, data)
        dep, exog, endog, instr = parser.data
        mod: "IVLIML" = IVLIML(dep,
                               exog,
                               endog,
                               instr,
                               weights=weights,
                               fuller=fuller,
                               kappa=kappa)
        mod.formula = formula
        return mod

    def predict(
        self,
        params: ArrayLike,
        *,
        exog: Optional[IVDataLike] = None,
        endog: Optional[IVDataLike] = None,
        data: DataFrame = None,
        eval_env: int = 4,
    ) -> DataFrame:
        """
        Predict values for additional data

        Parameters
        ----------
        params : array_like
            Model parameters (nvar by 1)
        exog : array_like
            Exogenous regressors (nobs by nexog)
        endog : array_like
            Endogenous regressors (nobs by nendog)
        data : DataFrame
            Values to use when making predictions from a model constructed
            from a formula
        eval_env : int
            Depth of use when evaluating formulas using Patsy.

        Returns
        -------
        DataFrame
            Fitted values from supplied data and parameters

        Notes
        -----
        The number of parameters must satisfy nvar = nexog + nendog.

        When using `exog` and `endog`, regressor matrix is constructed as
        `[exog, endog]` and so parameters must be aligned to this structure.
        The the the same structure used in model estimation.

        If `data` is not none, then `exog` and `endog` must be none.
        Predictions from models constructed using formulas can
        be computed using either `exog` and `endog`, which will treat these are
        arrays of values corresponding to the formula-processed data, or using
        `data` which will be processed using the formula used to construct the
        values corresponding to the original model specification.
        """
        if data is not None and self.formula is None:
            raise ValueError("Unable to use data when the model was not "
                             "created using a formula.")
        if data is not None and (exog is not None or endog is not None):
            raise ValueError("Predictions can only be constructed using one "
                             "of exog/endog or data, but not both.")
        if exog is not None or endog is not None:
            exog = IVData(exog).pandas
            endog = IVData(endog).pandas
        elif data is not None:
            parser = IVFormulaParser(self.formula, data, eval_env=eval_env)
            exog = parser.exog
            endog = parser.endog
        exog_endog = concat([exog, endog], 1)
        x = asarray(exog_endog)
        params = atleast_2d(asarray(params))
        if params.shape[0] == 1:
            params = params.T
        pred = DataFrame(x @ params,
                         index=exog_endog.index,
                         columns=["predictions"])

        return pred

    @property
    def formula(self) -> str:
        """Formula used to create the model"""
        return self._formula

    @formula.setter
    def formula(self, value: str) -> None:
        """Formula used to create the model"""
        self._formula = value

    def _validate_inputs(self) -> None:
        x, z = self._x, self._z
        if x.shape[1] == 0:
            raise ValueError("Model must contain at least one regressor.")
        if self.instruments.shape[1] < self.endog.shape[1]:
            raise ValueError(
                "The number of instruments ({0}) must be at least "
                "as large as the number of endogenous regressors"
                " ({1}).".format(self.instruments.shape[1],
                                 self.endog.shape[1]))
        if matrix_rank(x) < x.shape[1]:
            raise ValueError("regressors [exog endog] do not have full "
                             "column rank")
        if matrix_rank(z) < z.shape[1]:
            raise ValueError("instruments [exog instruments]  do not have "
                             "full column rank")
        self._has_constant, self._const_loc = has_constant(x)

    def _drop_missing(self) -> NDArray:
        data = (self.dependent, self.exog, self.endog, self.instruments,
                self.weights)
        missing: NDArray = any(c_[[dh.isnull for dh in data]], 0)
        if any(missing):
            if npall(missing):
                raise ValueError("All observations contain missing data. "
                                 "Model cannot be estimated.")
            self.dependent.drop(missing)
            self.exog.drop(missing)
            self.endog.drop(missing)
            self.instruments.drop(missing)
            self.weights.drop(missing)

        missing_warning(missing)
        return missing

    @staticmethod
    def estimate_parameters(x: NDArray, y: NDArray, z: NDArray,
                            kappa: Numeric) -> NDArray:
        """
        Parameter estimation without error checking

        Parameters
        ----------
        x : ndarray
            Regressor matrix (nobs by nvar)
        y : ndarray
            Regressand matrix (nobs by 1)
        z : ndarray
            Instrument matrix (nobs by ninstr)
        kappa : scalar
            Parameter value for k-class estimator

        Returns
        -------
        ndarray
            Estimated parameters (nvar by 1)

        Notes
        -----
        Exposed as a static method to facilitate estimation with other data,
        e.g., bootstrapped samples.  Performs no error checking.
        """
        pinvz = pinv(z)
        p1 = (x.T @ x) * (1 - kappa) + kappa * ((x.T @ z) @ (pinvz @ x))
        p2 = (x.T @ y) * (1 - kappa) + kappa * ((x.T @ z) @ (pinvz @ y))
        return inv(p1) @ p2

    def _estimate_kappa(self) -> float:
        y, x, z = self._wy, self._wx, self._wz
        is_exog = self._regressor_is_exog
        e = c_[y, x[:, ~is_exog]]
        x1 = x[:, is_exog]

        ez = e - z @ (pinv(z) @ e)
        if x1.shape[1] == 0:  # No exogenous regressors
            ex1 = e
        else:
            ex1 = e - x1 @ (pinv(x1) @ e)

        vpmzv_sqinv = inv_sqrth(ez.T @ ez)
        q = vpmzv_sqinv @ (ex1.T @ ex1) @ vpmzv_sqinv
        return min(eigvalsh(q))

    def fit(self,
            *,
            cov_type: str = "robust",
            debiased: bool = False,
            **cov_config: Any) -> Union[OLSResults, IVResults]:
        """
        Estimate model parameters

        Parameters
        ----------
        cov_type : str, default "robust"
            Name of covariance estimator to use. Supported covariance
            estimators are:

            * 'unadjusted', 'homoskedastic' - Classic homoskedastic inference
            * 'robust', 'heteroskedastic' - Heteroskedasticity robust inference
            * 'kernel' - Heteroskedasticity and autocorrelation robust
              inference
            * 'cluster' - One-way cluster dependent inference.
              Heteroskedasticity robust

        debiased : bool, default False
            Flag indicating whether to debiased the covariance estimator using
            a degree of freedom adjustment.
        **cov_config
            Additional parameters to pass to covariance estimator. The list
            of optional parameters differ according to ``cov_type``. See
            the documentation of the alternative covariance estimators for
            the complete list of available commands.

        Returns
        -------
        IVResults
            Results container

        Notes
        -----
        Additional covariance parameters depend on specific covariance used.
        The see the docstring of specific covariance estimator for a list of
        supported options. Defaults are used if no covariance configuration
        is provided.

        See also
        --------
        linearmodels.iv.covariance.HomoskedasticCovariance
        linearmodels.iv.covariance.HeteroskedasticCovariance
        linearmodels.iv.covariance.KernelCovariance
        linearmodels.iv.covariance.ClusteredCovariance
        """
        wy, wx, wz = self._wy, self._wx, self._wz
        liml_kappa = self._estimate_kappa()
        kappa = self._kappa
        if kappa is not None:
            est_kappa = kappa
        else:
            est_kappa = liml_kappa

        if self._fuller != 0:
            nobs, ninstr = wz.shape
            est_kappa -= self._fuller / (nobs - ninstr)

        params = self.estimate_parameters(wx, wy, wz, est_kappa)

        cov_estimator = COVARIANCE_ESTIMATORS[cov_type]
        cov_config["debiased"] = debiased
        cov_config["kappa"] = est_kappa
        cov_config_copy = {k: v for k, v in cov_config.items()}
        if "center" in cov_config_copy:
            del cov_config_copy["center"]
        cov_estimator_inst = cov_estimator(wx, wy, wz, params,
                                           **cov_config_copy)

        results = {"kappa": est_kappa, "liml_kappa": liml_kappa}
        pe = self._post_estimation(params, cov_estimator_inst, cov_type)
        results.update(pe)

        return self._result_container(results, self)

    def wresids(self, params: NDArray) -> NDArray:
        """
        Compute weighted model residuals

        Parameters
        ----------
        params : ndarray
            Model parameters (nvar by 1)

        Returns
        -------
        ndarray
            Weighted model residuals

        Notes
        -----
        Uses weighted versions of data instead of raw data.  Identical to
        resids if all weights are unity.
        """
        return self._wy - self._wx @ params

    def resids(self, params: NDArray) -> NDArray:
        """
        Compute model residuals

        Parameters
        ----------
        params : ndarray
            Model parameters (nvar by 1)

        Returns
        -------
        ndarray
            Model residuals
        """
        return self._y - self._x @ params

    @property
    def has_constant(self) -> bool:
        """Flag indicating the model includes a constant or equivalent"""
        return self._has_constant

    @property
    def isnull(self) -> NDArray:
        """Locations of observations with missing values"""
        return self._drop_locs

    @property
    def notnull(self) -> NDArray:
        """Locations of observations included in estimation"""
        return logical_not(self._drop_locs)

    def _f_statistic(
            self, params: NDArray, cov: NDArray,
            debiased: bool) -> Union[WaldTestStatistic, InvalidTestStatistic]:
        const_loc = find_constant(self._x)
        nobs, nvar = self._x.shape
        return f_statistic(params, cov, debiased, nobs - nvar, const_loc)

    def _post_estimation(self, params: NDArray,
                         cov_estimator: CovarianceEstimator,
                         cov_type: str) -> Dict[str, Any]:
        columns = self._columns
        index = self._index
        eps = self.resids(params)
        y = self.dependent.pandas
        fitted = DataFrame(asarray(y) - eps, y.index, ["fitted_values"])
        weps = self.wresids(params)
        cov = cov_estimator.cov
        debiased = cov_estimator.debiased

        residual_ss = weps.T @ weps

        w = self.weights.ndarray
        e = self._wy
        if self.has_constant:
            e = e - sqrt(self.weights.ndarray) * average(self._y, weights=w)

        total_ss = float(e.T @ e)
        r2 = 1 - residual_ss / total_ss

        fstat = self._f_statistic(params, cov, debiased)
        out = {
            "params": Series(params.squeeze(), columns, name="parameter"),
            "eps": Series(eps.squeeze(), index=index, name="residual"),
            "weps": Series(weps.squeeze(),
                           index=index,
                           name="weighted residual"),
            "cov": DataFrame(cov, columns=columns, index=columns),
            "s2": float(cov_estimator.s2),
            "debiased": debiased,
            "residual_ss": float(residual_ss),
            "total_ss": float(total_ss),
            "r2": float(r2),
            "fstat": fstat,
            "vars": columns,
            "instruments": self._instr_columns,
            "cov_config": cov_estimator.config,
            "cov_type": cov_type,
            "method": self._method,
            "cov_estimator": cov_estimator,
            "fitted": fitted,
            "original_index": self._original_index,
        }

        return out

Exemplo n.º 4

0

Exibir arquivo

class TradedFactorModel(object):
    r"""Linear factor models estimator applicable to traded factors

    Parameters
    ----------
    portfolios : array-like
        Test portfolio returns (nobs by nportfolio)
    factors : array-like
        Priced factor returns (nobs by nfactor)

    Notes
    -----
    Implements both time-series estimators of risk premia, factor loadings
    and zero-alpha tests.

    The model estimated is

    .. math::

        r_{it}^e = \alpha_i + f_t \beta_i + \epsilon_{it}

    where :math:`r_{it}^e` is the excess return on test portfolio i and
    :math:`f_t` are the traded factor returns.  The model is directly
    tested using the estimated values :math:`\hat{\alpha}_i`. Risk premia,
    :math:`\lambda_i` are estimated using the sample averages of the factors,
    which must be excess returns on traded portfolios.
    """
    def __init__(self, portfolios, factors):
        self.portfolios = IVData(portfolios, var_name='portfolio')
        self.factors = IVData(factors, var_name='factor')
        self._name = self.__class__.__name__
        self._formula = None
        self._validate_data()

    def __str__(self):
        out = self.__class__.__name__
        f, p = self.factors.shape[1], self.portfolios.shape[1]
        out += ' with {0} factors, {1} test portfolios'.format(f, p)
        return out

    def __repr__(self):
        return self.__str__() + '\nid: {0}'.format(hex(id(self)))

    def _drop_missing(self):
        data = (self.portfolios, self.factors)
        missing = np.any(np.c_[[dh.isnull for dh in data]], 0)
        if any(missing):
            if all(missing):
                raise ValueError('All observations contain missing data. '
                                 'Model cannot be estimated.')
            self.portfolios.drop(missing)
            self.factors.drop(missing)
        missing_warning(missing)

        return missing

    def _validate_data(self):
        p = self.portfolios.ndarray
        f = self.factors.ndarray
        if p.shape[0] != f.shape[0]:
            raise ValueError('The number of observations in portfolios and '
                             'factors is not the same.')
        self._drop_missing()

        p = self.portfolios.ndarray
        f = self.factors.ndarray
        if has_constant(p)[0]:
            raise ValueError(
                'portfolios must not contains a constant or equivalent.')
        if has_constant(f)[0]:
            raise ValueError(
                'factors must not contain a constant or equivalent.')
        if matrix_rank(f) < f.shape[1]:
            raise ValueError(
                'Model cannot be estimated. factors do not have full column rank.'
            )
        if matrix_rank(p) < p.shape[1]:
            raise ValueError(
                'Model cannot be estimated. portfolios do not have full column rank.'
            )

    @property
    def formula(self):
        return self._formula

    @formula.setter
    def formula(self, value):
        self._formula = value

    @staticmethod
    def _prepare_data_from_formula(formula, data, portfolios):
        na_action = NAAction(on_NA='raise', NA_types=[])
        orig_formula = formula
        if portfolios is not None:
            factors = dmatrix(formula + ' + 0',
                              data,
                              return_type='dataframe',
                              NA_action=na_action)
        else:
            formula = formula.split('~')
            portfolios = dmatrix(formula[0].strip() + ' + 0',
                                 data,
                                 return_type='dataframe',
                                 NA_action=na_action)
            factors = dmatrix(formula[1].strip() + ' + 0',
                              data,
                              return_type='dataframe',
                              NA_action=na_action)

        return factors, portfolios, orig_formula

    @classmethod
    def from_formula(cls, formula, data, *, portfolios=None):
        """
        Parameters
        ----------
        formula : str
            Patsy formula modified for the syntax described in the notes
        data : DataFrame
            DataFrame containing the variables used in the formula
        portfolios : array-like, optional
            Portfolios to be used in the model

        Returns
        -------
        model : TradedFactorModel
            Model instance

        Notes
        -----
        The formula can be used in one of two ways.  The first specified only the
        factors and uses the data provided in ``portfolios`` as the test portfolios.
        The second specified the portfolio using ``+`` to separate the test portfolios
        and ``~`` to separate the test portfolios from the factors.

        Examples
        --------
        >>> from linearmodels.datasets import french
        >>> from linearmodels.asset_pricing import TradedFactorModel
        >>> data = french.load()
        >>> formula = 'S1M1 + S1M5 + S3M3 + S5M1 + S5M5 ~ MktRF + SMB + HML'
        >>> mod = TradedFactorModel.from_formula(formula, data)

        Using only factors

        >>> portfolios = data[['S1M1', 'S1M5', 'S3M1', 'S3M5', 'S5M1', 'S5M5']]
        >>> formula = 'MktRF + SMB + HML'
        >>> mod = TradedFactorModel.from_formula(formula, data, portfolios=portfolios)
        """
        factors, portfolios, formula = cls._prepare_data_from_formula(
            formula, data, portfolios)
        mod = cls(portfolios, factors)
        mod.formula = formula
        return mod

    def fit(self, cov_type='robust', debiased=True, **cov_config):
        """
        Estimate model parameters

        Parameters
        ----------
        cov_type : str, optional
            Name of covariance estimator
        debiased : bool, optional
            Flag indicating whether to debias the covariance estimator using
            a degree of freedom adjustment
        **cov_config : dict
            Additional covariance-specific options.  See Notes.

        Returns
        -------
        results : LinearFactorModelResults
            Results class with parameter estimates, covariance and test statistics

        Notes
        -----
        Supported covariance estimators are:

        * 'robust' - Heteroskedasticity-robust covariance estimator
        * 'kernel' - Heteroskedasticity and Autocorrelation consistent (HAC)
          covariance estimator

        The kernel covariance estimator takes the optional arguments
        ``kernel``, one of 'bartlett', 'parzen' or 'qs' (quadratic spectral)
        and ``bandwidth`` (a positive integer).
        """
        p = self.portfolios.ndarray
        f = self.factors.ndarray
        nportfolio = p.shape[1]
        nobs, nfactor = f.shape
        fc = np.c_[np.ones((nobs, 1)), f]
        rp = f.mean(0)[:, None]
        fe = f - f.mean(0)
        b = pinv(fc) @ p
        eps = p - fc @ b
        alphas = b[:1].T

        nloading = (nfactor + 1) * nportfolio
        xpxi = np.eye(nloading + nfactor)
        xpxi[:nloading, :nloading] = np.kron(np.eye(nportfolio),
                                             pinv(fc.T @ fc / nobs))
        f_rep = np.tile(fc, (1, nportfolio))
        eps_rep = np.tile(eps, (nfactor + 1, 1))  # 1 2 3 ... 25 1 2 3 ...
        eps_rep = eps_rep.ravel(order='F')
        eps_rep = np.reshape(eps_rep, (nobs, (nfactor + 1) * nportfolio),
                             order='F')
        xe = f_rep * eps_rep
        xe = np.c_[xe, fe]
        if cov_type in ('robust', 'heteroskedastic'):
            cov_est = HeteroskedasticCovariance(xe,
                                                inv_jacobian=xpxi,
                                                center=False,
                                                debiased=debiased,
                                                df=fc.shape[1])
            rp_cov_est = HeteroskedasticCovariance(fe,
                                                   jacobian=np.eye(f.shape[1]),
                                                   center=False,
                                                   debiased=debiased,
                                                   df=1)
        elif cov_type == 'kernel':
            cov_est = KernelCovariance(xe,
                                       inv_jacobian=xpxi,
                                       center=False,
                                       debiased=debiased,
                                       df=fc.shape[1],
                                       **cov_config)
            bw = cov_est.bandwidth
            _cov_config = {k: v for k, v in cov_config.items()}
            _cov_config['bandwidth'] = bw
            rp_cov_est = KernelCovariance(fe,
                                          jacobian=np.eye(f.shape[1]),
                                          center=False,
                                          debiased=debiased,
                                          df=1,
                                          **_cov_config)
        else:
            raise ValueError('Unknown cov_type: {0}'.format(cov_type))
        full_vcv = cov_est.cov
        rp_cov = rp_cov_est.cov
        vcv = full_vcv[:nloading, :nloading]

        # Rearrange VCV
        order = np.reshape(np.arange((nfactor + 1) * nportfolio),
                           (nportfolio, nfactor + 1))
        order = order.T.ravel()
        vcv = vcv[order][:, order]

        # Return values
        alpha_vcv = vcv[:nportfolio, :nportfolio]
        stat = float(alphas.T @ pinv(alpha_vcv) @ alphas)
        jstat = WaldTestStatistic(stat,
                                  'All alphas are 0',
                                  nportfolio,
                                  name='J-statistic')
        params = b.T
        betas = b[1:].T
        residual_ss = (eps**2).sum()
        e = p - p.mean(0)[None, :]
        total_ss = (e**2).sum()
        r2 = 1 - residual_ss / total_ss
        param_names = []
        for portfolio in self.portfolios.cols:
            param_names.append('alpha-{0}'.format(portfolio))
            for factor in self.factors.cols:
                param_names.append('beta-{0}-{1}'.format(portfolio, factor))
        for factor in self.factors.cols:
            param_names.append('lambda-{0}'.format(factor))

        res = AttrDict(params=params,
                       cov=full_vcv,
                       betas=betas,
                       rp=rp,
                       rp_cov=rp_cov,
                       alphas=alphas,
                       alpha_vcv=alpha_vcv,
                       jstat=jstat,
                       rsquared=r2,
                       total_ss=total_ss,
                       residual_ss=residual_ss,
                       param_names=param_names,
                       portfolio_names=self.portfolios.cols,
                       factor_names=self.factors.cols,
                       name=self._name,
                       cov_type=cov_type,
                       model=self,
                       nobs=nobs,
                       rp_names=self.factors.cols,
                       cov_est=cov_est)

        return LinearFactorModelResults(res)

Exemplo n.º 5

0

Exibir arquivo

class _FactorModelBase(object):
    r"""
    Base class for all factor models.

    Parameters
    ----------
    portfolios : array_like
        Test portfolio returns (nobs by nportfolio)
    factors : array_like
        Priced factor returns (nobs by nfactor)
    """
    def __init__(self, portfolios: IVDataLike, factors: IVDataLike):
        self.portfolios = IVData(portfolios, var_name="portfolio")
        self.factors = IVData(factors, var_name="factor")
        self._name = self.__class__.__name__
        self._formula: Optional[str] = None
        self._validate_data()

    def __str__(self) -> str:
        out = self.__class__.__name__
        f, p = self.factors.shape[1], self.portfolios.shape[1]
        out += " with {0} factors, {1} test portfolios".format(f, p)
        return out

    def __repr__(self) -> str:
        return self.__str__() + "\nid: {0}".format(hex(id(self)))

    def _drop_missing(self) -> NDArray:
        data = (self.portfolios, self.factors)
        missing = cast(NDArray, np.any(np.c_[[dh.isnull for dh in data]], 0))
        if any(missing):
            if all(missing):
                raise ValueError("All observations contain missing data. "
                                 "Model cannot be estimated.")
            self.portfolios.drop(missing)
            self.factors.drop(missing)
        missing_warning(missing)

        return missing

    def _validate_data(self) -> None:
        p = self.portfolios.ndarray
        f = self.factors.ndarray
        if p.shape[0] != f.shape[0]:
            raise ValueError("The number of observations in portfolios and "
                             "factors is not the same.")
        self._drop_missing()

        p = self.portfolios.ndarray
        f = self.factors.ndarray
        if has_constant(p)[0]:
            raise ValueError("portfolios must not contains a constant or "
                             "equivalent and must not have rank\n"
                             "less than the dimension of the smaller shape.")
        if has_constant(f)[0]:
            raise ValueError(
                "factors must not contain a constant or equivalent.")
        if np.linalg.matrix_rank(f) < f.shape[1]:
            raise ValueError(
                "Model cannot be estimated. factors do not have full column rank."
            )
        if p.shape[0] < (f.shape[1] + 1):
            raise ValueError(
                "Model cannot be estimated. portfolios must have factors + 1 or "
                "more returns to\nestimate the model parameters.")

    @property
    def formula(self) -> Optional[str]:
        return self._formula

    @formula.setter
    def formula(self, value: Optional[str]) -> None:
        self._formula = value

    @staticmethod
    def _prepare_data_from_formula(
            formula: str, data: DataFrame,
            portfolios: DataFrame) -> Tuple[DataFrame, DataFrame, str]:
        na_action = NAAction(on_NA="raise", NA_types=[])
        orig_formula = formula
        if portfolios is not None:
            factors = dmatrix(formula + " + 0",
                              data,
                              return_type="dataframe",
                              NA_action=na_action)
        else:
            formula_components = formula.split("~")
            portfolios = dmatrix(
                formula_components[0].strip() + " + 0",
                data,
                return_type="dataframe",
                NA_action=na_action,
            )
            factors = dmatrix(
                formula_components[1].strip() + " + 0",
                data,
                return_type="dataframe",
                NA_action=na_action,
            )

        return factors, portfolios, orig_formula

Exemplo n.º 6

0

Exibir arquivo

Arquivo: model.py Projeto: matbuechner/linearmodels

class IVLIML(object):
    r"""
    Limited information ML and k-class estimation of IV models

    Parameters
    ----------
    dependent : array-like
        Endogenous variables (nobs by 1)
    exog : array-like
        Exogenous regressors  (nobs by nexog)
    endog : array-like
        Endogenous regressors (nobs by nendog)
    instruments : array-like
        Instrumental variables (nobs by ninstr)
    weights : array-like, optional
        Observation weights used in estimation
    fuller : float, optional
        Fuller's alpha to modify LIML estimator. Default returns unmodified
        LIML estimator.
    kappa : float, optional
        Parameter value for k-class estimation.  If not provided, computed to
        produce LIML parameter estimate.

    Notes
    -----
    ``kappa`` and ``fuller`` should not be used simultaneously since Fuller's
    alpha applies an adjustment to ``kappa``, and so the same result can be
    computed using only ``kappa``. Fuller's alpha is used to adjust the
    LIML estimate of :math:`\kappa`, which is computed whenever ``kappa``
    is not provided.

    The LIML estimator is defined as

    .. math::

      \hat{\beta}_{\kappa} & =(X(I-\kappa M_{z})X)^{-1}X(I-\kappa M_{z})Y\\
      M_{z} & =I-P_{z}\\
      P_{z} & =Z(Z'Z)^{-1}Z'

    where :math:`Z` contains both the exogenous regressors and the instruments.
    :math:`\kappa` is estimated as part of the LIML estimator.

    When using Fuller's :math:`\alpha`, the value used is modified to

    .. math::

      \kappa-\alpha/(n-n_{instr})

    .. todo::

      * VCV: bootstrap

    See Also
    --------
    IV2SLS, IVGMM, IVGMMCUE
    """
    def __init__(self,
                 dependent: ArrayLike,
                 exog: OptionalArrayLike,
                 endog: OptionalArrayLike,
                 instruments: OptionalArrayLike,
                 *,
                 weights: OptionalArrayLike = None,
                 fuller: Numeric = 0,
                 kappa: OptionalNumeric = None):

        self.dependent = IVData(dependent, var_name='dependent')
        nobs = self.dependent.shape[0]  # type: int
        self.exog = IVData(exog, var_name='exog', nobs=nobs)
        self.endog = IVData(endog, var_name='endog', nobs=nobs)
        self.instruments = IVData(instruments,
                                  var_name='instruments',
                                  nobs=nobs)
        self._original_index = self.dependent.pandas.index
        if weights is None:
            weights = ones(self.dependent.shape)
        weights = IVData(weights).ndarray
        if any(weights <= 0):
            raise ValueError('weights must be strictly positive.')
        weights = weights / nanmean(weights)
        self.weights = IVData(weights, var_name='weights', nobs=nobs)

        self._drop_locs = self._drop_missing()
        # dependent variable
        w = sqrt(self.weights.ndarray)
        self._y = self.dependent.ndarray
        self._wy = self._y * w
        # model regressors
        self._x = c_[self.exog.ndarray, self.endog.ndarray]
        self._wx = self._x * w
        # first-stage regressors
        self._z = c_[self.exog.ndarray, self.instruments.ndarray]
        self._wz = self._z * w

        self._has_constant = False
        self._regressor_is_exog = array([True] * self.exog.shape[1] +
                                        [False] * self.endog.shape[1])
        self._columns = self.exog.cols + self.endog.cols
        self._instr_columns = self.exog.cols + self.instruments.cols
        self._index = self.dependent.rows

        self._validate_inputs()
        if not hasattr(self, '_method'):
            self._method = 'IV-LIML'
            additional = []
            if fuller != 0:
                additional.append('fuller(alpha={0})'.format(fuller))
            if kappa is not None:
                additional.append('kappa={0}'.format(kappa))
            if additional:
                self._method += '(' + ', '.join(additional) + ')'
        if not hasattr(self, '_result_container'):
            self._result_container = IVResults

        self._kappa = kappa
        self._fuller = fuller
        if kappa is not None and not isscalar(kappa):
            raise ValueError('kappa must be None or a scalar')
        if not isscalar(fuller):
            raise ValueError('fuller must be None or a scalar')
        if kappa is not None and fuller != 0:
            import warnings
            warnings.warn(
                'kappa and fuller should not normally be used '
                'simultaneously.  Identical results can be computed '
                'using kappa only', UserWarning)
        if endog is None and instruments is None:
            self._result_container = OLSResults
            self._method = 'OLS'
        self._formula = None

    @staticmethod
    def from_formula(formula, data, *, weights=None, fuller=0, kappa=None):
        """
        Parameters
        ----------
        formula : str
            Patsy formula modified for the IV syntax described in the notes
            section
        data : DataFrame
            DataFrame containing the variables used in the formula
        weights : array-like, optional
            Observation weights used in estimation
        fuller : float, optional
            Fuller's alpha to modify LIML estimator. Default returns unmodified
            LIML estimator.
        kappa : float, optional
            Parameter value for k-class estimation.  If not provided, computed to
            produce LIML parameter estimate.

        Returns
        -------
        model : IVLIML
            Model instance

        Notes
        -----
        The IV formula modifies the standard Patsy formula to include a
        block of the form [endog ~ instruments] which is used to indicate
        the list of endogenous variables and instruments.  The general
        structure is `dependent ~ exog [endog ~ instruments]` and it must
        be the case that the formula expressions constructed from blocks
        `dependent ~ exog endog` and `dependent ~ exog instruments` are both
        valid Patsy formulas.

        A constant must be explicitly included using '1 +' if required.

        Examples
        --------
        >>> import numpy as np
        >>> from linearmodels.datasets import wage
        >>> from linearmodels.iv import IVLIML
        >>> data = wage.load()
        >>> formula = 'np.log(wage) ~ 1 + exper + exper ** 2 + brthord + [educ ~ sibs]'
        >>> mod = IVLIML.from_formula(formula, data)
        """
        parser = IVFormulaParser(formula, data)
        dep, exog, endog, instr = parser.data
        mod = IVLIML(dep,
                     exog,
                     endog,
                     instr,
                     weights=weights,
                     fuller=fuller,
                     kappa=kappa)
        mod.formula = formula
        return mod

    def predict(self, params, *, exog=None, endog=None, data=None, eval_env=4):
        """
        Predict values for additional data

        Parameters
        ----------
        params : array-like
            Model parameters (nvar by 1)
        exog : array-like
            Exogenous regressors (nobs by nexog)
        endog : array-like
            Endogenous regressors (nobs by nendog)
        data : DataFrame
            Values to use when making predictions from a model constructed
            from a formula
        eval_env : int
            Depth of use when evaluating formulas using Patsy.

        Returns
        -------
        predictions : DataFrame
            Fitted values from supplied data and parameters

        Notes
        -----
        The number of parameters must satisfy nvar = nexog + nendog.

        When using `exog` and `endog`, regressor matrix is constructed as
        `[exog, endog]` and so parameters must be aligned to this structure.
        The the the same structure used in model estimation.

        If `data` is not none, then `exog` and `endog` must be none.
        Predictions from models constructed using formulas can
        be computed using either `exog` and `endog`, which will treat these are
        arrays of values corresponding to the formula-processed data, or using
        `data` which will be processed using the formula used to construct the
        values corresponding to the original model specification.
        """
        if data is not None and self.formula is None:
            raise ValueError('Unable to use data when the model was not '
                             'created using a formula.')
        if data is not None and (exog is not None or endog is not None):
            raise ValueError('Predictions can only be constructed using one '
                             'of exog/endog or data, but not both.')
        if exog is not None or endog is not None:
            exog = IVData(exog).pandas
            endog = IVData(endog).pandas
        else:
            parser = IVFormulaParser(self.formula, data, eval_env=eval_env)
            exog = parser.exog
            endog = parser.endog
        exog_endog = concat([exog, endog], 1)
        x = asarray(exog_endog)
        params = atleast_2d(asarray(params))
        if params.shape[0] == 1:
            params = params.T
        pred = DataFrame(x @ params,
                         index=exog_endog.index,
                         columns=['predictions'])

        return pred

    @property
    def formula(self):
        """Formula used to create the model"""
        return self._formula

    @formula.setter
    def formula(self, value):
        """Formula used to create the model"""
        self._formula = value

    def _validate_inputs(self):
        x, z = self._x, self._z
        if x.shape[1] == 0:
            raise ValueError('Model must contain at least one regressor.')
        if self.instruments.shape[1] < self.endog.shape[1]:
            raise ValueError(
                'The number of instruments ({0}) must be at least '
                'as large as the number of endogenous regressors'
                ' ({1}).'.format(self.instruments.shape[1],
                                 self.endog.shape[1]))
        if matrix_rank(x) < x.shape[1]:
            raise ValueError('regressors [exog endog] do not have full '
                             'column rank')
        if matrix_rank(z) < z.shape[1]:
            raise ValueError('instruments [exog instruments]  do not have '
                             'full column rank')
        self._has_constant, self._const_loc = has_constant(x)

    def _drop_missing(self):
        data = (self.dependent, self.exog, self.endog, self.instruments,
                self.weights)
        missing = any(c_[[dh.isnull for dh in data]], 0)
        if any(missing):
            if all(missing):
                raise ValueError('All observations contain missing data. '
                                 'Model cannot be estimated.')
            self.dependent.drop(missing)
            self.exog.drop(missing)
            self.endog.drop(missing)
            self.instruments.drop(missing)
            self.weights.drop(missing)

        missing_warning(missing)
        return missing

    @staticmethod
    def estimate_parameters(x, y, z, kappa):
        """
        Parameter estimation without error checking

        Parameters
        ----------
        x : ndarray
            Regressor matrix (nobs by nvar)
        y : ndarray
            Regressand matrix (nobs by 1)
        z : ndarray
            Instrument matrix (nobs by ninstr)
        kappa : scalar
            Parameter value for k-class estimator

        Returns
        -------
        params : ndarray
            Estimated parameters (nvar by 1)

        Notes
        -----
        Exposed as a static method to facilitate estimation with other data,
        e.g., bootstrapped samples.  Performs no error checking.
        """
        pinvz = pinv(z)
        p1 = (x.T @ x) * (1 - kappa) + kappa * ((x.T @ z) @ (pinvz @ x))
        p2 = (x.T @ y) * (1 - kappa) + kappa * ((x.T @ z) @ (pinvz @ y))
        return inv(p1) @ p2

    def _estimate_kappa(self):
        y, x, z = self._wy, self._wx, self._wz
        is_exog = self._regressor_is_exog
        e = c_[y, x[:, ~is_exog]]
        x1 = x[:, is_exog]

        ez = e - z @ (pinv(z) @ e)
        if x1.shape[1] == 0:  # No exogenous regressors
            ex1 = e
        else:
            ex1 = e - x1 @ (pinv(x1) @ e)

        vpmzv_sqinv = inv_sqrth(ez.T @ ez)
        q = vpmzv_sqinv @ (ex1.T @ ex1) @ vpmzv_sqinv
        return min(eigvalsh(q))

    def fit(self, *, cov_type='robust', debiased=False, **cov_config):
        """
        Estimate model parameters

        Parameters
        ----------
        cov_type : str, optional
            Name of covariance estimator to use. Supported covariance
            estimators are:

            * 'unadjusted', 'homoskedastic' - Classic homoskedastic inference
            * 'robust', 'heteroskedastic' - Heteroskedasticity robust inference
            * 'kernel' - Heteroskedasticity and autocorrelation robust
              inference
            * 'cluster' - One-way cluster dependent inference.
              Heteroskedasticity robust

        debiased : bool, optional
            Flag indicating whether to debiased the covariance estimator using
            a degree of freedom adjustment.
        **cov_config
            Additional parameters to pass to covariance estimator. The list
            of optional parameters differ according to ``cov_type``. See
            the documentation of the alternative covariance estimators for
            the complete list of available commands.

        Returns
        -------
        results : IVResults
            Results container

        Notes
        -----
        Additional covariance parameters depend on specific covariance used.
        The see the docstring of specific covariance estimator for a list of
        supported options. Defaults are used if no covariance configuration
        is provided.

        See also
        --------
        linearmodels.iv.covariance.HomoskedasticCovariance
        linearmodels.iv.covariance.HeteroskedasticCovariance
        linearmodels.iv.covariance.KernelCovariance
        linearmodels.iv.covariance.ClusteredCovariance
        """
        wy, wx, wz = self._wy, self._wx, self._wz
        liml_kappa = self._estimate_kappa()
        kappa = self._kappa
        if kappa is None:
            kappa = liml_kappa

        if self._fuller != 0:
            nobs, ninstr = wz.shape
            kappa -= self._fuller / (nobs - ninstr)

        params = self.estimate_parameters(wx, wy, wz, kappa)

        cov_estimator = COVARIANCE_ESTIMATORS[cov_type]
        cov_config['debiased'] = debiased
        cov_config['kappa'] = kappa
        cov_config_copy = {k: v for k, v in cov_config.items()}
        if 'center' in cov_config_copy:
            del cov_config_copy['center']
        cov_estimator = cov_estimator(wx, wy, wz, params, **cov_config_copy)

        results = {'kappa': kappa, 'liml_kappa': liml_kappa}
        pe = self._post_estimation(params, cov_estimator, cov_type)
        results.update(pe)

        return self._result_container(results, self)

    def wresids(self, params):
        """
        Compute weighted model residuals

        Parameters
        ----------
        params : ndarray
            Model parameters (nvar by 1)

        Returns
        -------
        wresids : ndarray
            Weighted model residuals

        Notes
        -----
        Uses weighted versions of data instead of raw data.  Identical to
        resids if all weights are unity.
        """
        return self._wy - self._wx @ params

    def resids(self, params):
        """
        Compute model residuals

        Parameters
        ----------
        params : ndarray
            Model parameters (nvar by 1)

        Returns
        -------
        resids : ndarray
            Model residuals
        """
        return self._y - self._x @ params

    @property
    def has_constant(self):
        """Flag indicating the model includes a constant or equivalent"""
        return self._has_constant

    @property
    def isnull(self):
        """Locations of observations with missing values"""
        return self._drop_locs

    @property
    def notnull(self):
        """Locations of observations included in estimation"""
        return logical_not(self._drop_locs)

    def _f_statistic(self, params, cov, debiased):
        non_const = ~(ptp(self._x, 0) == 0)
        test_params = params[non_const]
        test_cov = cov[non_const][:, non_const]
        test_stat = test_params.T @ inv(test_cov) @ test_params
        test_stat = float(test_stat)
        nobs, nvar = self._x.shape
        null = 'All parameters ex. constant are zero'
        name = 'Model F-statistic'
        df = test_params.shape[0]
        if debiased:
            wald = WaldTestStatistic(test_stat / df,
                                     null,
                                     df,
                                     nobs - nvar,
                                     name=name)
        else:
            wald = WaldTestStatistic(test_stat, null, df, name=name)

        return wald

    def _post_estimation(self, params, cov_estimator, cov_type):
        columns = self._columns
        index = self._index
        eps = self.resids(params)
        y = self.dependent.pandas
        fitted = DataFrame(asarray(y) - eps, y.index, ['fitted_values'])
        weps = self.wresids(params)
        cov = cov_estimator.cov
        debiased = cov_estimator.debiased

        residual_ss = (weps.T @ weps)

        w = self.weights.ndarray
        e = self._wy
        if self.has_constant:
            e = e - sqrt(self.weights.ndarray) * average(self._y, weights=w)

        total_ss = float(e.T @ e)
        r2 = 1 - residual_ss / total_ss

        fstat = self._f_statistic(params, cov, debiased)
        out = {
            'params': Series(params.squeeze(), columns, name='parameter'),
            'eps': Series(eps.squeeze(), index=index, name='residual'),
            'weps': Series(weps.squeeze(),
                           index=index,
                           name='weighted residual'),
            'cov': DataFrame(cov, columns=columns, index=columns),
            's2': float(cov_estimator.s2),
            'debiased': debiased,
            'residual_ss': float(residual_ss),
            'total_ss': float(total_ss),
            'r2': float(r2),
            'fstat': fstat,
            'vars': columns,
            'instruments': self._instr_columns,
            'cov_config': cov_estimator.config,
            'cov_type': cov_type,
            'method': self._method,
            'cov_estimator': cov_estimator,
            'fitted': fitted,
            'original_index': self._original_index
        }

        return out