def setup_class(cls): fweights = [1, 1, 1, 2, 2, 2, 3, 3, 3, 1, 1, 1, 2, 2, 2, 3, 3] # faking aweights by using normalized freq_weights fweights = np.array(fweights) wsum = fweights.sum() cls.corr_fact = np.sqrt((wsum - 1.) / wsum) model = GLM(cpunish_data.endog, cpunish_data.exog, family=sm.families.Poisson(), freq_weights=fweights) cls.res1 = model.fit(cov_type='HC0')
def test_glm(self): # preliminary, getting started with basic test for GLM.get_prediction res_wls = self.res_wls mod_wls = res_wls.model y, X, wi = mod_wls.endog, mod_wls.exog, mod_wls.weights w_sqrt = np.sqrt(wi) # notation wi is weights, `w` is var mod_glm = GLM(y * w_sqrt, X * w_sqrt[:, None]) # compare using t distribution res_glm = mod_glm.fit(use_t=True) pred_glm = res_glm.get_prediction() sf_glm = pred_glm.summary_frame() pred_res_wls = res_wls.get_prediction() sf_wls = pred_res_wls.summary_frame() n_compare = 30 # in glm with predict wendog assert_allclose(sf_glm.values[:n_compare], sf_wls.values[:n_compare, :4]) # compare using normal distribution res_glm = mod_glm.fit() # default use_t=False pred_glm = res_glm.get_prediction() sf_glm = pred_glm.summary_frame() res_wls = mod_wls.fit(use_t=False) pred_res_wls = res_wls.get_prediction() sf_wls = pred_res_wls.summary_frame() assert_allclose(sf_glm.values[:n_compare], sf_wls.values[:n_compare, :4]) # function for parameter transformation # should be separate test method rates = params_transform_univariate(res_glm.params, res_glm.cov_params()) rates2 = np.column_stack( (np.exp(res_glm.params), res_glm.bse * np.exp(res_glm.params), np.exp(res_glm.conf_int()))) assert_allclose(rates.summary_frame().values, rates2, rtol=1e-13) # with identity transform pt = params_transform_univariate(res_glm.params, res_glm.cov_params(), link=links.identity()) assert_allclose(pt.tvalues, res_glm.tvalues, rtol=1e-13) assert_allclose(pt.se_mean, res_glm.bse, rtol=1e-13) ptt = pt.t_test() assert_allclose(ptt[0], res_glm.tvalues, rtol=1e-13) assert_allclose(ptt[1], res_glm.pvalues, rtol=1e-13) # prediction with exog and no weights does not error (i.e. smoke test) res_glm = mod_glm.fit() pred_glm = res_glm.get_prediction(X)
def setup_class(cls): data = sm.datasets.star98.load(as_pandas=False) data.exog = add_constant(data.exog, prepend=False) cls.res1 = GLM(data.endog, data.exog, family=sm.families.Binomial()).fit() weights = data.endog.sum(axis=1) endog2 = data.endog[:, 0] / weights cls.res2 = GLM(endog2, data.exog, family=sm.families.Binomial(), var_weights=weights).fit()
def setup_class(cls): from sm2.datasets.star98 import load data = load(as_pandas=False) exog = add_constant(data.exog, prepend=True) offset = np.ones(len(data.endog)) exog_keep = exog[:, :-5] cls.mod2 = GLM(data.endog, exog_keep, family=family.Binomial(), offset=offset) cls.mod1 = GLM(data.endog, exog, family=family.Binomial(), offset=offset) cls.init()
def setup_class(cls): fweights = [1, 1, 1, 2, 2, 2, 3, 3, 3, 1, 1, 1, 2, 2, 2, 3, 3] # faking aweights by using normalized freq_weights fweights = np.array(fweights) gid = np.arange(1, 17 + 1) // 2 n_groups = len(np.unique(gid)) # no wnobs yet in sandwich covariance calcualtion cls.corr_fact = 1 / np.sqrt(n_groups / (n_groups - 1)) cov_kwds = {'groups': gid, 'use_correction': False} model = GLM(cpunish_data.endog, cpunish_data.exog, family=sm.families.Poisson(), freq_weights=fweights) cls.res1 = model.fit(cov_type='cluster', cov_kwds=cov_kwds)
def setup_class(cls): fweights = [1, 1, 1, 2, 2, 2, 3, 3, 3, 1, 1, 1, 2, 2, 2, 3, 3] # faking aweights by using normalized freq_weights fweights = np.array(fweights) wsum = fweights.sum() nobs = len(cpunish_data.endog) aweights = fweights / wsum * nobs # This is really close when corr_fact = (wsum - 1.) / wsum, but to # avoid having loosen precision of the assert_allclose, I'm doing this # manually. Its *possible* lowering the IRLS convergence criterion # in stata and here will make this less sketchy. cls.corr_fact = np.sqrt((wsum - 1.) / wsum) * 0.98518473599905609 model = GLM(cpunish_data.endog, cpunish_data.exog, family=sm.families.Poisson(), var_weights=aweights) cls.res1 = model.fit(cov_type='HC0')
def setup_class(cls): fweights = [1, 1, 1, 2, 2, 2, 3, 3, 3, 1, 1, 1, 2, 2, 2, 3, 3] # faking aweights by using normalized freq_weights fweights = np.array(fweights) cls.res1 = GLM(cpunish_data.endog, cpunish_data.exog, family=sm.families.Poisson(), freq_weights=fweights).fit(cov_type='HC1')
def setup_class(cls): fweights = [1, 1, 1, 2, 2, 2, 3, 3, 3, 1, 1, 1, 2, 2, 2, 3, 3] # faking aweights by using normalized freq_weights fweights = np.array(fweights) wsum = fweights.sum() nobs = len(cpunish_data.endog) aweights = fweights / wsum * nobs model = GLM(cpunish_data.endog, cpunish_data.exog, family=sm.families.Poisson(), var_weights=aweights) cls.res1 = model.fit() # Need to copy to avoid inplace adjustment cls.res2 = copy.copy(res_stata.results_poisson_aweight_nonrobust) cls.res2.resids = cls.res2.resids.copy() # Need to adjust resids for pearson and deviance to add weights cls.res2.resids[:, 3:5] *= np.sqrt(aweights[:, np.newaxis])
def setup_class(cls): nobs = 30 np.random.seed(987128) x = np.random.randn(nobs, 3) y = x.sum(1) + np.random.randn(nobs) index = ['obs%02d' % i for i in range(nobs)] # add one extra column to check that it doesn't matter cls.data = pd.DataFrame(np.round(np.column_stack((y, x)), 4), columns='y var1 var2 var3'.split(), index=index) cls.res = GLM.from_formula('y ~ var1 + var2', data=cls.data).fit()
def test_poisson_residuals(): nobs, k_exog = 100, 5 np.random.seed(987125) x = np.random.randn(nobs, k_exog - 1) x = add_constant(x) y_true = x.sum(1) / 2 y = y_true + 2 * np.random.randn(nobs) exposure = 1 + np.arange(nobs) // 4 yp = np.random.poisson(np.exp(y_true) * exposure) yp[10:15] += 10 fam = sm.families.Poisson() mod_poi_e = GLM(yp, x, family=fam, exposure=exposure) res_poi_e = mod_poi_e.fit() mod_poi_w = GLM(yp / exposure, x, family=fam, var_weights=exposure) res_poi_w = mod_poi_w.fit() assert_allclose(res_poi_e.resid_response / exposure, res_poi_w.resid_response) assert_allclose(res_poi_e.resid_pearson, res_poi_w.resid_pearson) assert_allclose(res_poi_e.resid_deviance, res_poi_w.resid_deviance) assert_allclose(res_poi_e.resid_anscombe, res_poi_w.resid_anscombe) assert_allclose(res_poi_e.resid_anscombe_unscaled, res_poi_w.resid_anscombe)
def setup_class(cls): data = sm.datasets.cpunish.load_pandas() endog = data.endog data = data.exog data['EXECUTIONS'] = endog data['INCOME'] /= 1000 aweights = np.array([1, 2, 3, 4, 5, 4, 3, 2, 1, 2, 3, 4, 5, 4, 3, 2, 1]) model = GLM.from_formula( 'EXECUTIONS ~ INCOME + SOUTH - 1', data=data, family=sm.families.Gaussian(link=sm.families.links.log()), var_weights=aweights) cls.res1 = model.fit(rtol=1e-25, atol=0)
def check_weights_as_formats(weights): model = GLM(cpunish_data.endog, cpunish_data.exog, family=sm.families.Poisson(), freq_weights=weights) res = model.fit() assert isinstance(res._freq_weights, np.ndarray) assert isinstance(res._var_weights, np.ndarray) assert isinstance(res._iweights, np.ndarray) model = GLM(cpunish_data.endog, cpunish_data.exog, family=sm.families.Poisson(), var_weights=weights) res = model.fit() assert isinstance(res._freq_weights, np.ndarray) assert isinstance(res._var_weights, np.ndarray) assert isinstance(res._iweights, np.ndarray)
def test_incompatible_weights_input(): # TODO: GH reference? weights = [1, 1, 1, 2, 2, 2, 3, 3, 3, 1, 1, 1, 2, 2, 2, 3, 3] exog = cpunish_data.exog endog = cpunish_data.endog family = sm.families.Poisson() # Too short with pytest.raises(ValueError): GLM(endog, exog, family=family, freq_weights=weights[:-1]) with pytest.raises(ValueError): GLM(endog, exog, family=family, var_weights=weights[:-1]) # Too long with pytest.raises(ValueError): GLM(endog, exog, family=family, freq_weights=weights + [3]) with pytest.raises(ValueError): GLM(endog, exog, family=family, var_weights=weights + [3]) # Too many dimensions with pytest.raises(ValueError): GLM(endog, exog, family=family, freq_weights=[weights, weights]) with pytest.raises(ValueError): GLM(endog, exog, family=family, var_weights=[weights, weights])
def test_warnings_raised(): weights = [1, 1, 1, 2, 2, 2, 3, 3, 3, 1, 1, 1, 2, 2, 2, 3, 3] # faking aweights by using normalized freq_weights weights = np.array(weights) gid = np.arange(1, 17 + 1) // 2 cov_kwds = {'groups': gid, 'use_correction': False} with warnings.catch_warnings(record=True) as w: model = GLM(cpunish_data.endog, cpunish_data.exog, family=sm.families.Poisson(), freq_weights=weights) res1 = model.fit(cov_type='cluster', cov_kwds=cov_kwds) res1.summary() # TODO: Should this be marked as a smoke test? assert len(w) >= 1 with warnings.catch_warnings(record=True) as w: model = GLM(cpunish_data.endog, cpunish_data.exog, family=sm.families.Poisson(), var_weights=weights) res1 = model.fit(cov_type='cluster', cov_kwds=cov_kwds) res1.summary() assert len(w) >= 1
def setup_class(cls): model = GLM(cpunish_data.endog, cpunish_data.exog, **cls.mod_kwargs) cls.res1 = model.fit()