Пример #1
0
 def setup_class(cls):
     fweights = [1, 1, 1, 2, 2, 2, 3, 3, 3, 1, 1, 1, 2, 2, 2, 3, 3]
     # faking aweights by using normalized freq_weights
     fweights = np.array(fweights)
     wsum = fweights.sum()
     cls.corr_fact = np.sqrt((wsum - 1.) / wsum)
     model = GLM(cpunish_data.endog, cpunish_data.exog,
                 family=sm.families.Poisson(), freq_weights=fweights)
     cls.res1 = model.fit(cov_type='HC0')
Пример #2
0
    def test_glm(self):
        # preliminary, getting started with basic test for GLM.get_prediction
        res_wls = self.res_wls
        mod_wls = res_wls.model
        y, X, wi = mod_wls.endog, mod_wls.exog, mod_wls.weights

        w_sqrt = np.sqrt(wi)  # notation wi is weights, `w` is var
        mod_glm = GLM(y * w_sqrt, X * w_sqrt[:, None])

        # compare using t distribution
        res_glm = mod_glm.fit(use_t=True)
        pred_glm = res_glm.get_prediction()
        sf_glm = pred_glm.summary_frame()

        pred_res_wls = res_wls.get_prediction()
        sf_wls = pred_res_wls.summary_frame()
        n_compare = 30  # in glm with predict wendog
        assert_allclose(sf_glm.values[:n_compare],
                        sf_wls.values[:n_compare, :4])

        # compare using normal distribution

        res_glm = mod_glm.fit()  # default use_t=False
        pred_glm = res_glm.get_prediction()
        sf_glm = pred_glm.summary_frame()

        res_wls = mod_wls.fit(use_t=False)
        pred_res_wls = res_wls.get_prediction()
        sf_wls = pred_res_wls.summary_frame()
        assert_allclose(sf_glm.values[:n_compare],
                        sf_wls.values[:n_compare, :4])

        # function for parameter transformation
        # should be separate test method
        rates = params_transform_univariate(res_glm.params,
                                            res_glm.cov_params())

        rates2 = np.column_stack(
            (np.exp(res_glm.params), res_glm.bse * np.exp(res_glm.params),
             np.exp(res_glm.conf_int())))
        assert_allclose(rates.summary_frame().values, rates2, rtol=1e-13)

        # with identity transform
        pt = params_transform_univariate(res_glm.params,
                                         res_glm.cov_params(),
                                         link=links.identity())

        assert_allclose(pt.tvalues, res_glm.tvalues, rtol=1e-13)
        assert_allclose(pt.se_mean, res_glm.bse, rtol=1e-13)
        ptt = pt.t_test()
        assert_allclose(ptt[0], res_glm.tvalues, rtol=1e-13)
        assert_allclose(ptt[1], res_glm.pvalues, rtol=1e-13)

        # prediction with exog and no weights does not error (i.e. smoke test)
        res_glm = mod_glm.fit()
        pred_glm = res_glm.get_prediction(X)
Пример #3
0
 def setup_class(cls):
     data = sm.datasets.star98.load(as_pandas=False)
     data.exog = add_constant(data.exog, prepend=False)
     cls.res1 = GLM(data.endog, data.exog,
                    family=sm.families.Binomial()).fit()
     weights = data.endog.sum(axis=1)
     endog2 = data.endog[:, 0] / weights
     cls.res2 = GLM(endog2, data.exog,
                    family=sm.families.Binomial(),
                    var_weights=weights).fit()
Пример #4
0
    def setup_class(cls):
        from sm2.datasets.star98 import load
        data = load(as_pandas=False)
        exog = add_constant(data.exog, prepend=True)
        offset = np.ones(len(data.endog))
        exog_keep = exog[:, :-5]
        cls.mod2 = GLM(data.endog, exog_keep, family=family.Binomial(),
                       offset=offset)

        cls.mod1 = GLM(data.endog, exog, family=family.Binomial(),
                       offset=offset)
        cls.init()
Пример #5
0
    def setup_class(cls):
        fweights = [1, 1, 1, 2, 2, 2, 3, 3, 3, 1, 1, 1, 2, 2, 2, 3, 3]
        # faking aweights by using normalized freq_weights
        fweights = np.array(fweights)

        gid = np.arange(1, 17 + 1) // 2
        n_groups = len(np.unique(gid))

        # no wnobs yet in sandwich covariance calcualtion
        cls.corr_fact = 1 / np.sqrt(n_groups / (n_groups - 1))
        cov_kwds = {'groups': gid, 'use_correction': False}
        model = GLM(cpunish_data.endog, cpunish_data.exog,
                    family=sm.families.Poisson(), freq_weights=fweights)
        cls.res1 = model.fit(cov_type='cluster', cov_kwds=cov_kwds)
Пример #6
0
    def setup_class(cls):
        fweights = [1, 1, 1, 2, 2, 2, 3, 3, 3, 1, 1, 1, 2, 2, 2, 3, 3]
        # faking aweights by using normalized freq_weights
        fweights = np.array(fweights)
        wsum = fweights.sum()
        nobs = len(cpunish_data.endog)
        aweights = fweights / wsum * nobs

        # This is really close when corr_fact = (wsum - 1.) / wsum, but to
        # avoid having loosen precision of the assert_allclose, I'm doing this
        # manually. Its *possible* lowering the IRLS convergence criterion
        # in stata and here will make this less sketchy.
        cls.corr_fact = np.sqrt((wsum - 1.) / wsum) * 0.98518473599905609
        model = GLM(cpunish_data.endog, cpunish_data.exog,
                    family=sm.families.Poisson(), var_weights=aweights)
        cls.res1 = model.fit(cov_type='HC0')
Пример #7
0
    def setup_class(cls):
        fweights = [1, 1, 1, 2, 2, 2, 3, 3, 3, 1, 1, 1, 2, 2, 2, 3, 3]
        # faking aweights by using normalized freq_weights
        fweights = np.array(fweights)

        cls.res1 = GLM(cpunish_data.endog, cpunish_data.exog,
                       family=sm.families.Poisson(),
                       freq_weights=fweights).fit(cov_type='HC1')
Пример #8
0
    def setup_class(cls):
        fweights = [1, 1, 1, 2, 2, 2, 3, 3, 3, 1, 1, 1, 2, 2, 2, 3, 3]
        # faking aweights by using normalized freq_weights
        fweights = np.array(fweights)
        wsum = fweights.sum()
        nobs = len(cpunish_data.endog)
        aweights = fweights / wsum * nobs

        model = GLM(cpunish_data.endog, cpunish_data.exog,
                    family=sm.families.Poisson(), var_weights=aweights)
        cls.res1 = model.fit()

        # Need to copy to avoid inplace adjustment
        cls.res2 = copy.copy(res_stata.results_poisson_aweight_nonrobust)
        cls.res2.resids = cls.res2.resids.copy()

        # Need to adjust resids for pearson and deviance to add weights
        cls.res2.resids[:, 3:5] *= np.sqrt(aweights[:, np.newaxis])
Пример #9
0
    def setup_class(cls):
        nobs = 30
        np.random.seed(987128)
        x = np.random.randn(nobs, 3)
        y = x.sum(1) + np.random.randn(nobs)
        index = ['obs%02d' % i for i in range(nobs)]
        # add one extra column to check that it doesn't matter
        cls.data = pd.DataFrame(np.round(np.column_stack((y, x)), 4),
                                columns='y var1 var2 var3'.split(),
                                index=index)

        cls.res = GLM.from_formula('y ~ var1 + var2', data=cls.data).fit()
Пример #10
0
def test_poisson_residuals():
    nobs, k_exog = 100, 5
    np.random.seed(987125)
    x = np.random.randn(nobs, k_exog - 1)
    x = add_constant(x)

    y_true = x.sum(1) / 2
    y = y_true + 2 * np.random.randn(nobs)
    exposure = 1 + np.arange(nobs) // 4

    yp = np.random.poisson(np.exp(y_true) * exposure)
    yp[10:15] += 10

    fam = sm.families.Poisson()
    mod_poi_e = GLM(yp, x, family=fam, exposure=exposure)
    res_poi_e = mod_poi_e.fit()

    mod_poi_w = GLM(yp / exposure, x, family=fam, var_weights=exposure)
    res_poi_w = mod_poi_w.fit()

    assert_allclose(res_poi_e.resid_response / exposure,
                    res_poi_w.resid_response)
    assert_allclose(res_poi_e.resid_pearson,
                    res_poi_w.resid_pearson)
    assert_allclose(res_poi_e.resid_deviance,
                    res_poi_w.resid_deviance)
    assert_allclose(res_poi_e.resid_anscombe,
                    res_poi_w.resid_anscombe)
    assert_allclose(res_poi_e.resid_anscombe_unscaled,
                    res_poi_w.resid_anscombe)
Пример #11
0
 def setup_class(cls):
     data = sm.datasets.cpunish.load_pandas()
     endog = data.endog
     data = data.exog
     data['EXECUTIONS'] = endog
     data['INCOME'] /= 1000
     aweights = np.array([1, 2, 3, 4, 5, 4, 3, 2, 1, 2, 3, 4, 5, 4, 3, 2, 1])
     model = GLM.from_formula(
         'EXECUTIONS ~ INCOME + SOUTH - 1',
         data=data,
         family=sm.families.Gaussian(link=sm.families.links.log()),
         var_weights=aweights)
     cls.res1 = model.fit(rtol=1e-25, atol=0)
Пример #12
0
def check_weights_as_formats(weights):
    model = GLM(cpunish_data.endog, cpunish_data.exog,
                family=sm.families.Poisson(),
                freq_weights=weights)
    res = model.fit()
    assert isinstance(res._freq_weights, np.ndarray)
    assert isinstance(res._var_weights, np.ndarray)
    assert isinstance(res._iweights, np.ndarray)

    model = GLM(cpunish_data.endog, cpunish_data.exog,
                family=sm.families.Poisson(),
                var_weights=weights)
    res = model.fit()
    assert isinstance(res._freq_weights, np.ndarray)
    assert isinstance(res._var_weights, np.ndarray)
    assert isinstance(res._iweights, np.ndarray)
Пример #13
0
def test_incompatible_weights_input():
    # TODO: GH reference?
    weights = [1, 1, 1, 2, 2, 2, 3, 3, 3, 1, 1, 1, 2, 2, 2, 3, 3]
    exog = cpunish_data.exog
    endog = cpunish_data.endog
    family = sm.families.Poisson()
    # Too short
    with pytest.raises(ValueError):
        GLM(endog, exog, family=family, freq_weights=weights[:-1])
    with pytest.raises(ValueError):
        GLM(endog, exog, family=family, var_weights=weights[:-1])

    # Too long
    with pytest.raises(ValueError):
        GLM(endog, exog, family=family, freq_weights=weights + [3])
    with pytest.raises(ValueError):
        GLM(endog, exog, family=family, var_weights=weights + [3])

    # Too many dimensions
    with pytest.raises(ValueError):
        GLM(endog, exog, family=family, freq_weights=[weights, weights])
    with pytest.raises(ValueError):
        GLM(endog, exog, family=family, var_weights=[weights, weights])
Пример #14
0
def test_warnings_raised():
    weights = [1, 1, 1, 2, 2, 2, 3, 3, 3, 1, 1, 1, 2, 2, 2, 3, 3]
    # faking aweights by using normalized freq_weights
    weights = np.array(weights)

    gid = np.arange(1, 17 + 1) // 2

    cov_kwds = {'groups': gid, 'use_correction': False}
    with warnings.catch_warnings(record=True) as w:
        model = GLM(cpunish_data.endog, cpunish_data.exog,
                    family=sm.families.Poisson(),
                    freq_weights=weights)
        res1 = model.fit(cov_type='cluster', cov_kwds=cov_kwds)
        res1.summary()  # TODO: Should this be marked as a smoke test?
        assert len(w) >= 1

    with warnings.catch_warnings(record=True) as w:
        model = GLM(cpunish_data.endog, cpunish_data.exog,
                    family=sm.families.Poisson(),
                    var_weights=weights)
        res1 = model.fit(cov_type='cluster', cov_kwds=cov_kwds)
        res1.summary()
        assert len(w) >= 1
Пример #15
0
 def setup_class(cls):
     model = GLM(cpunish_data.endog, cpunish_data.exog,
                 **cls.mod_kwargs)
     cls.res1 = model.fit()