Пример #1
0
 def hash(self) -> Tuple[Tuple[str, ...], ...]:
     hashes: List[Tuple[str, ...]] = []
     hasher = hash_func()
     if self._cat is not None:
         for col in self._cat:
             hasher.update(
                 ascontiguousarray(
                     to_numpy(get_codes(self._cat[col].cat)).data))
             hashes.append((hasher.hexdigest(), ))
             hasher = _reset(hasher)
     if self._cont is not None:
         for col in self._cont:
             hasher.update(ascontiguousarray(
                 to_numpy(self._cont[col]).data))
             hashes.append((hasher.hexdigest(), ))
             hasher = _reset(hasher)
     if self._interactions is not None:
         for interact in self._interactions:
             hashes.extend(interact.hash)
     # Add weight hash if provided
     if self._weights is not None:
         hasher = hash_func()
         hasher.update(ascontiguousarray(self._weights.data))
         hashes.append((hasher.hexdigest(), ))
     return tuple(sorted(hashes))
Пример #2
0
    def _post_estimation(self, params: ndarray, cov_estimator, cov_type: str):
        columns = self._columns
        index = self._index
        eps = self.resids(params)
        fitted = DataFrame(self._dependent.ndarray - eps, index=self._dependent.rows,
                           columns=['fitted_values'])
        absorbed_effects = DataFrame(to_numpy(self._absorbed_dependent) - to_numpy(fitted),
                                     columns=['absorbed_effects'], index=self._dependent.rows)

        weps = self.wresids(params)
        cov = cov_estimator.cov
        debiased = cov_estimator.debiased

        residual_ss = (weps.T @ weps)[0, 0]

        w = self.weights.ndarray
        root_w = sqrt(w)
        e = self._dependent.ndarray * root_w
        if self.has_constant:
            e = e - root_w * average(self._dependent.ndarray, weights=w)

        total_ss = float(e.T @ e)
        r2 = max(1 - residual_ss / total_ss, 0.0)

        e = to_numpy(self._absorbed_dependent)  # already scaled by root_w
        # If absorbing contains a constant, but exog does not, no need to demean
        if self._const_col is not None:
            col = self._const_col
            x = to_numpy(self._absorbed_exog)[:, col:col + 1]
            mu = (lstsq(x, to_numpy(e))[0]).squeeze()
            e = e - x * mu

        aborbed_total_ss = float(e.T @ e)
        r2_absorbed = max(1 - residual_ss / aborbed_total_ss, 0.0)

        fstat = self._f_statistic(params, cov, debiased)
        out = {'params': Series(params.squeeze(), columns, name='parameter'),
               'eps': Series(eps.squeeze(), index=index, name='residual'),
               'weps': Series(weps.squeeze(), index=index, name='weighted residual'),
               'cov': DataFrame(cov, columns=columns, index=columns),
               's2': float(cov_estimator.s2),
               'debiased': debiased,
               'residual_ss': float(residual_ss),
               'total_ss': float(total_ss),
               'r2': float(r2),
               'fstat': fstat,
               'vars': columns,
               'instruments': [],
               'cov_config': cov_estimator.config,
               'cov_type': cov_type,
               'method': self._method,
               'cov_estimator': cov_estimator,
               'fitted': fitted,
               'original_index': self._original_index,
               'absorbed_effects': absorbed_effects,
               'absorbed_r2': r2_absorbed}

        return out
Пример #3
0
 def _drop_missing(self) -> NDArray:
     missing = to_numpy(self.dependent.isnull)
     missing |= to_numpy(self.exog.isnull)
     missing |= to_numpy(self._absorb_inter.cat.isnull().any(1))
     missing |= to_numpy(self._absorb_inter.cont.isnull().any(1))
     for interact in self._interaction_list:
         missing |= to_numpy(interact.isnull)
     if npany(missing):
         self.dependent.drop(missing)
         self.exog.drop(missing)
         self._absorb_inter.drop(missing)
         for interact in self._interaction_list:
             interact.drop(missing)
     missing_warning(missing)
     return missing
Пример #4
0
def category_continuous_interaction(cat: AnyPandas,
                                    cont: AnyPandas,
                                    precondition: bool = True) -> csc_matrix:
    """
    Parameters
    ----------
    cat : Series
        Categorical series to convert to dummy variables
    cont : {Series, DataFrame}
        Continuous variable values to use in the dummy interaction
    precondition : bool
        Flag whether dummies should be preconditioned

    Returns
    -------
    csc_matrix
        Sparse matrix of dummy interactions with unit column norm
    """
    codes = get_codes(category_product(cat).cat)
    interact = csc_matrix(
        (to_numpy(cont).flat, (arange(codes.shape[0]), codes)))
    if not precondition:
        return interact
    else:
        contioned = preconditioner(interact)[0]
        assert isinstance(contioned, csc_matrix)
        return contioned
Пример #5
0
def test_drop_missing():
    gen = generate_data(2,
                        True,
                        2,
                        factor_format="pandas",
                        ncont=0,
                        cont_interactions=1)
    gen.y[::53] = np.nan
    gen.x[::79] = np.nan
    with pytest.warns(MissingValueWarning):
        AbsorbingLS(gen.y,
                    gen.x,
                    absorb=gen.absorb,
                    interactions=gen.interactions)

    gen = generate_data(2,
                        True,
                        2,
                        factor_format="pandas",
                        ncont=0,
                        cont_interactions=1)
    for col in gen.absorb:
        gen.absorb[col] = gen.absorb[col].astype("int64").astype("object")
        col_iloc = gen.absorb.columns.get_loc(col)
        gen.absorb.iloc[::91, col_iloc] = np.nan
        gen.absorb[col] = pd.Categorical(to_numpy(gen.absorb[col]))
    with pytest.warns(MissingValueWarning):
        AbsorbingLS(gen.y,
                    gen.x,
                    absorb=gen.absorb,
                    interactions=gen.interactions)
Пример #6
0
def test_interaction_cont_only(cont):
    interact = Interaction(cont=cont)
    assert interact.nobs == cont.shape[0]
    assert_frame_equal(cont, interact.cont)
    expected = to_numpy(cont)
    actual = interact.sparse
    assert isinstance(actual, csc_matrix)
    assert_allclose(expected, actual.A)
Пример #7
0
def test_absorbing_regressors_hash(cat, cont, interact, weights):
    areg = AbsorbingRegressor(cat=cat,
                              cont=cont,
                              interactions=interact,
                              weights=weights)
    # Build hash
    hashes = []
    for col in cat:
        hashes.append(
            (hasher.single(to_numpy(get_codes(cat[col].cat)).data), ))
    for col in cont:
        hashes.append((hasher.single(to_numpy(cont[col]).data), ))
    hashes = sorted(hashes)
    if interact is not None:
        for inter in interact:
            hashes.extend(inter.hash)
    if weights is not None:
        hashes.append((hasher.single(weights.data), ))
    hashes = tuple(sorted(hashes))
    assert hashes == areg.hash
Пример #8
0
    def wresids(self, params: ndarray):
        """
        Compute weighted model residuals

        Parameters
        ----------
        params : ndarray
            Model parameters (nvar by 1)

        Returns
        -------
        wresids : ndarray
            Weighted model residuals

        Notes
        -----
        Uses weighted versions of data instead of raw data.  Identical to
        resids if all weights are unity.
        """
        return to_numpy(self._absorbed_dependent) - to_numpy(self._absorbed_exog) @ params
Пример #9
0
    def hash(self):
        """
        Construct a hash that will be invariant for any permutation of
        inputs that produce the same fit when used as regressors"""
        # Sorted hashes of any categoricals
        hasher = hash_func()
        cat_hashes = []
        cat = self.cat
        for col in cat:
            hasher.update(ascontiguousarray(to_numpy(get_codes(self.cat[col].cat)).data))
            cat_hashes.append(hasher.hexdigest())
            hasher = _reset(hasher)
        cat_hashes = tuple(sorted(cat_hashes))

        hashes = []
        cont = self.cont
        for col in cont:
            hasher.update(ascontiguousarray(to_numpy(cont[col]).data))
            hashes.append(cat_hashes + (hasher.hexdigest(),))
            hasher = _reset(hasher)

        return sorted(hashes)
Пример #10
0
def test_interaction_cat_cont(cat, cont):
    interact = Interaction(cat=cat, cont=cont)
    assert interact.nobs == cat.shape[0]
    assert_frame_equal(cat, interact.cat)
    assert_frame_equal(cont, interact.cont)
    base = category_interaction(category_product(cat), precondition=False).A
    expected = []
    for i in range(cont.shape[1]):
        element = base.copy()
        element[np.where(element)] = to_numpy(cont.iloc[:, i])
        expected.append(element)
    expected = np.column_stack(expected)
    actual = interact.sparse
    assert isinstance(actual, csc_matrix)
    assert_allclose(expected, interact.sparse.A)
Пример #11
0
def test_against_ols(ols_data):
    mod = AbsorbingLS(
        ols_data.y,
        ols_data.x,
        absorb=ols_data.absorb,
        interactions=ols_data.interactions,
        weights=ols_data.weights,
    )
    res = mod.fit()
    absorb = []
    has_dummy = False
    if ols_data.absorb is not None:
        absorb.append(to_numpy(ols_data.absorb.cont))
        if ols_data.absorb.cat.shape[1] > 0:
            dummies = dummy_matrix(ols_data.absorb.cat, precondition=False)[0]
            assert isinstance(dummies, sp.csc_matrix)
            absorb.append(dummies.A)
        has_dummy = ols_data.absorb.cat.shape[1] > 0
    if ols_data.interactions is not None:
        for interact in ols_data.interactions:
            absorb.append(interact.sparse.A)
    _x = ols_data.x
    if absorb:
        absorb = np.column_stack(absorb)
        if np.any(np.ptp(_x, 0) == 0) and has_dummy:
            if ols_data.weights is None:
                absorb = annihilate(absorb, np.ones((absorb.shape[0], 1)))
            else:
                root_w = np.sqrt(mod.weights.ndarray)
                wabsorb = annihilate(root_w * absorb, root_w)
                absorb = (1.0 / root_w) * wabsorb
        rank = np.linalg.matrix_rank(absorb)
        if rank < absorb.shape[1]:
            a, b = np.linalg.eig(absorb.T @ absorb)
            order = np.argsort(a)[::-1]
            a, b = a[order], b[:, order]
            z = absorb @ b
            absorb = z[:, :rank]
        _x = np.column_stack([_x, absorb])
    ols_mod = _OLS(ols_data.y, _x, weights=ols_data.weights)
    ols_res = ols_mod.fit()

    assert_results_equal(ols_res, res)
Пример #12
0
    def _regressors(self) -> csc_matrix:
        regressors = []

        if self._cat is not None and self._cat.shape[1] > 0:
            regressors.append(dummy_matrix(self._cat, precondition=False)[0])
        if self._cont is not None and self._cont.shape[1] > 0:
            regressors.append(csc_matrix(to_numpy(self._cont)))
        if self._interactions is not None:
            regressors.extend([interact.sparse for interact in self._interactions])

        if regressors:
            regressor_mat = sp.hstack(regressors, format='csc')
            approx_rank = regressor_mat.shape[1]
            self._approx_rank = approx_rank
            if self._weights is not None:
                return (sp.diags(sqrt(self._weights.squeeze())).dot(regressor_mat)).asformat('csc')
            return regressor_mat
        else:
            self._approx_rank = 0
            return csc_matrix(empty((0, 0)))
Пример #13
0
    def fit(
        self,
        *,
        cov_type: str = "robust",
        debiased: bool = False,
        lsmr_options: Optional[Dict[str, Union[float, bool]]] = None,
        use_cache: bool = True,
        **cov_config: Any,
    ) -> AbsorbingLSResults:
        """
        Estimate model parameters

        Parameters
        ----------
        cov_type : str, optional
            Name of covariance estimator to use. Supported covariance
            estimators are:

            * 'unadjusted', 'homoskedastic' - Classic homoskedastic inference
            * 'robust', 'heteroskedastic' - Heteroskedasticity robust inference
            * 'kernel' - Heteroskedasticity and autocorrelation robust
              inference
            * 'cluster' - One-way cluster dependent inference.
              Heteroskedasticity robust

        debiased : bool, optional
            Flag indicating whether to debiased the covariance estimator using
            a degree of freedom adjustment.
        **cov_config
            Additional parameters to pass to covariance estimator. The list
            of optional parameters differ according to ``cov_type``. See
            the documentation of the alternative covariance estimators for
            the complete list of available commands.
        lsmr_options : dict
            Dictionary of options to pass to scipy.sparse.linalg.lsmr
        use_cache : bool
            Flag indicating whether the variables, once purged from the
            absorbed variables and interactions, should be stored in the cache,
            and retrieved if available. Cache can dramatically speed up
            re-fitting large models when the set of absorbed variables and
            interactions are identical.

        Returns
        -------
        AbsorbingLSResults
            Results container

        Notes
        -----
        Additional covariance parameters depend on specific covariance used.
        The see the docstring of specific covariance estimator for a list of
        supported options. Defaults are used if no covariance configuration
        is provided.

        If use_cache is True, then variables are hashed based on their
        contents using either a 64 bit value (if xxhash is installed) or
        a 256 bit value. This allows variables to be reused in different
        models if the set of absorbing variables and interactions is held
        constant.

        See also
        --------
        linearmodels.iv.covariance.HomoskedasticCovariance
        linearmodels.iv.covariance.HeteroskedasticCovariance
        linearmodels.iv.covariance.KernelCovariance
        linearmodels.iv.covariance.ClusteredCovariance
        """

        if self._absorbed_dependent is None:
            self._first_time_fit(use_cache, lsmr_options)

        self._x = exog_resid = to_numpy(self.absorbed_exog)
        dep_resid = to_numpy(self.absorbed_dependent)
        if self._exog.shape[1] == 0:
            params = empty((0, 1))
        else:
            if exog_resid.shape[1]:
                check_absorbed(exog_resid, self.exog.cols)
            params = lstsq(exog_resid, dep_resid, rcond=None)[0]
            self._num_params += exog_resid.shape[1]

        cov_estimator = COVARIANCE_ESTIMATORS[cov_type]
        cov_config["debiased"] = debiased
        cov_config["kappa"] = 0.0
        cov_config_copy = {k: v for k, v in cov_config.items()}
        if "center" in cov_config_copy:
            del cov_config_copy["center"]
        cov_estimator_inst = cov_estimator(exog_resid, dep_resid, exog_resid,
                                           params, **cov_config_copy)

        results = {"kappa": 0.0, "liml_kappa": 0.0}
        pe = self._post_estimation(params, cov_estimator_inst, cov_type)
        results.update(pe)
        results["df_model"] = self._num_params

        return AbsorbingLSResults(results, self)
Пример #14
0
    def _post_estimation(
        self,
        params: NDArray,
        cov_estimator: Union[HomoskedasticCovariance,
                             HeteroskedasticCovariance, KernelCovariance,
                             ClusteredCovariance, ],
        cov_type: str,
    ) -> Dict[str, Any]:
        columns = self._columns
        index = self._index
        eps = self.resids(params)
        fitted = DataFrame(
            self._dependent.ndarray - eps,
            index=self._dependent.rows,
            columns=["fitted_values"],
        )
        absorbed_effects = DataFrame(
            to_numpy(self._absorbed_dependent) - to_numpy(fitted),
            columns=["absorbed_effects"],
            index=self._dependent.rows,
        )

        weps = self.wresids(params)
        cov = cov_estimator.cov
        debiased = cov_estimator.debiased

        residual_ss = (weps.T @ weps)[0, 0]

        w = self.weights.ndarray
        root_w = sqrt(w)
        e = self._dependent.ndarray * root_w
        if self.has_constant:
            e = e - root_w * average(self._dependent.ndarray, weights=w)

        total_ss = float(e.T @ e)
        r2 = max(1 - residual_ss / total_ss, 0.0)

        e = to_numpy(self._absorbed_dependent)  # already scaled by root_w
        # If absorbing contains a constant, but exog does not, no need to demean
        if self._const_col is not None:
            col = self._const_col
            x = to_numpy(self._absorbed_exog)[:, col:col + 1]
            mu = (lstsq(x, e, rcond=None)[0]).squeeze()
            e = e - x * mu

        aborbed_total_ss = float(e.T @ e)
        r2_absorbed = max(1 - residual_ss / aborbed_total_ss, 0.0)

        fstat = self._f_statistic(params, cov, debiased)
        out = {
            "params": Series(params.squeeze(), columns, name="parameter"),
            "eps": Series(eps.squeeze(), index=index, name="residual"),
            "weps": Series(weps.squeeze(),
                           index=index,
                           name="weighted residual"),
            "cov": DataFrame(cov, columns=columns, index=columns),
            "s2": float(cov_estimator.s2),
            "debiased": debiased,
            "residual_ss": float(residual_ss),
            "total_ss": float(total_ss),
            "r2": float(r2),
            "fstat": fstat,
            "vars": columns,
            "instruments": [],
            "cov_config": cov_estimator.config,
            "cov_type": cov_type,
            "method": self._method,
            "cov_estimator": cov_estimator,
            "fitted": fitted,
            "original_index": self._original_index,
            "absorbed_effects": absorbed_effects,
            "absorbed_r2": r2_absorbed,
        }

        return out
Пример #15
0
def test_interaction_cat_cont_convert(cat, cont):
    base = Interaction(cat, cont)
    interact = Interaction(to_numpy(cat), cont)
    assert_allclose(base.sparse.A, interact.sparse.A)