def test_not_absorbed_const(): x = np.random.standard_normal((200, 3)) x[:, 0] = 0 na = not_absorbed(x, True, 0) assert na == [0, 1, 2] x[:, 0] = x[:, 1] x[:, 1] = 0 na = not_absorbed(x, True, 1) assert na == [0, 1, 2]
def _first_time_fit( self, use_cache: bool, absorb_options: Optional[Dict[str, Union[bool, str, ArrayLike, None, Dict[str, Any]]]], method: str, ) -> None: weights = (cast(Float64Array, self.weights.ndarray) if self._is_weighted else None) use_hdfe = weights is None and method in ("auto", "hdfe") use_hdfe = use_hdfe and not self._absorb_inter.cont.shape[1] use_hdfe = use_hdfe and not self._interaction_list if not use_hdfe and method == "hdfe": raise RuntimeError( "HDFE has been set as the method but the model cannot be estimated " "using HDFE. HDFE requires that the model is unweighted and that the " "absorbed regressors include only fixed effects (dummy variables)." ) areg = AbsorbingRegressor( cat=self._absorb_inter.cat, cont=self._absorb_inter.cont, interactions=self._interaction_list, weights=weights, ) areg_constant = areg.has_constant self._regressors = areg.regressors self._num_params += areg.approx_rank # Do not double count intercept-like terms self._has_constant = self._has_constant_exog or areg_constant self._num_params -= min(self._has_constant_exog, areg_constant) self._regressors_hash = areg.hash self._constant_absorbed = self._has_constant_exog and areg_constant dep = self._dependent.ndarray exog = cast(Float64Array, self._exog.ndarray) root_w = sqrt(self._weight_data.ndarray) dep = root_w * dep exog = root_w * exog denom = root_w.T @ root_w mu_dep = (root_w.T @ dep) / denom mu_exog = (root_w.T @ exog) / denom absorb_options = {} if absorb_options is None else absorb_options assert isinstance(self._regressors, sp.csc_matrix) if self._regressors.shape[1] > 0: if use_hdfe: from pyhdfe import create absorb_options["drop_singletons"] = False algo = create(self._absorb_inter.cat, **absorb_options) dep_exog = column_stack((dep, exog)) resids = algo.residualize(dep_exog) dep_resid = resids[:, :1] exog_resid = resids[:, 1:] else: self._regressors = preconditioner(self._regressors)[0] dep_exog = column_stack((dep, exog)) resid = lsmr_annihilate( self._regressors, dep_exog, use_cache, self._regressors_hash, **absorb_options, ) dep_resid = resid[:, :1] exog_resid = resid[:, 1:] else: dep_resid = dep exog_resid = exog if self._constant_absorbed: dep_resid += root_w * mu_dep exog_resid += root_w * mu_exog if not self._drop_absorbed: check_absorbed(exog_resid, self.exog.cols, exog) else: ncol = exog_resid.shape[1] retain = not_absorbed(exog_resid) if not retain: raise ValueError( "All columns in exog have been fully absorbed by the " "included effects. This model cannot be estimated.") elif len(retain) < ncol: drop = set(range(ncol)).difference(retain) dropped = ", ".join([str(self.exog.cols[i]) for i in drop]) warnings.warn( absorbing_warn_msg.format(absorbed_variables=dropped), AbsorbingEffectWarning, ) exog_resid = exog_resid[:, retain] self._columns = [self._columns[i] for i in retain] self._absorbed_dependent = DataFrame( dep_resid, index=self._dependent.pandas.index, columns=self._dependent.pandas.columns, ) self._absorbed_exog = DataFrame(exog_resid, index=self._exog.pandas.index, columns=self._columns)
def test_all_absorbed_const(): x = np.zeros((200, 3)) na = not_absorbed(x, True, 0) assert na == [0] na = not_absorbed(x, True, 1) assert na == [1]
def _first_time_fit( self, use_cache: bool, lsmr_options: Optional[Dict[str, Union[float, bool]]]) -> None: weights = self.weights.ndarray if self._is_weighted else None areg = AbsorbingRegressor( cat=self._absorb_inter.cat, cont=self._absorb_inter.cont, interactions=self._interaction_list, weights=weights, ) areg_constant = areg.has_constant self._regressors = preconditioner(areg.regressors)[0] self._num_params += areg.approx_rank # Do not double count intercept-like terms self._has_constant = self._has_constant_exog or areg_constant self._num_params -= min(self._has_constant_exog, areg_constant) self._regressors_hash = areg.hash self._constant_absorbed = self._has_constant_exog and areg_constant dep = self._dependent.ndarray exog = self._exog.ndarray root_w = sqrt(self._weight_data.ndarray) dep = root_w * dep exog = root_w * exog denom = root_w.T @ root_w mu_dep = (root_w.T @ dep) / denom mu_exog = (root_w.T @ exog) / denom lsmr_options = {} if lsmr_options is None else lsmr_options assert isinstance(self._regressors, sp.csc_matrix) if self._regressors.shape[1] > 0: dep_resid = lsmr_annihilate(self._regressors, dep, use_cache, self._regressors_hash, **lsmr_options) exog_resid = lsmr_annihilate(self._regressors, exog, use_cache, self._regressors_hash, **lsmr_options) else: dep_resid = dep exog_resid = exog if self._constant_absorbed: dep_resid += root_w * mu_dep exog_resid += root_w * mu_exog if not self._drop_absorbed: check_absorbed(exog_resid, self.exog.cols, exog) else: ncol = exog_resid.shape[1] retain = not_absorbed(exog_resid) if not retain: raise ValueError( "All columns in exog have been fully absorbed by the " "included effects. This model cannot be estimated.") elif len(retain) < ncol: drop = set(range(ncol)).difference(retain) dropped = ", ".join([str(self.exog.cols[i]) for i in drop]) warnings.warn( absorbing_warn_msg.format(absorbed_variables=dropped), AbsorbingEffectWarning, ) exog_resid = exog_resid[:, retain] self._columns = [self._columns[i] for i in retain] self._absorbed_dependent = DataFrame( dep_resid, index=self._dependent.pandas.index, columns=self._dependent.pandas.columns, ) self._absorbed_exog = DataFrame(exog_resid, index=self._exog.pandas.index, columns=self._columns)