Exemplo n.º 1
0
    def test_equivalence(self):
        """
        The Equivalence covariance structure can represent an
        exchangeable covariance structure.  Here we check that the
        results are identical using the two approaches.
        """

        np.random.seed(3424)
        endog = np.random.normal(size=20)
        exog = np.random.normal(size=(20, 2))
        exog[:, 0] = 1
        groups = np.kron(np.arange(5), np.ones(4))
        groups[12:] = 3 # Create unequal size groups

        # Set up an Equivalence covariance structure to mimic an
        # Exchangeable covariance structure.
        pairs = {}
        start = [0, 4, 8, 12]
        for k in range(4):
            pairs[k] = {}

            # Diagonal values (variance parameters)
            if k < 3:
                pairs[k][0] = (start[k] + np.r_[0, 1, 2, 3],
                               start[k] + np.r_[0, 1, 2, 3])
            else:
                pairs[k][0] = (start[k] + np.r_[0, 1, 2, 3, 4, 5, 6, 7],
                               start[k] + np.r_[0, 1, 2, 3, 4, 5, 6, 7])

            # Off-diagonal pairs (covariance parameters)
            if k < 3:
                a, b = np.tril_indices(4, -1)
                pairs[k][1] = (start[k] + a, start[k] + b)
            else:
                a, b = np.tril_indices(8, -1)
                pairs[k][1] = (start[k] + a, start[k] + b)

        ex = sm.cov_struct.Exchangeable()
        model1 = sm.GEE(endog, exog, groups, cov_struct=ex)
        result1 = model1.fit()

        for return_cov in False, True:

            ec = sm.cov_struct.Equivalence(pairs, return_cov=return_cov)
            model2 = sm.GEE(endog, exog, groups, cov_struct=ec)
            result2 = model2.fit()

            # Use large atol/rtol for the correlation case since there
            # are some small differences in the results due to degree
            # of freedom differences.
            if return_cov == True:
                atol, rtol = 1e-6, 1e-6
            else:
                atol, rtol = 1e-3, 1e-3
            assert_allclose(result1.params, result2.params, atol=atol, rtol=rtol)
            assert_allclose(result1.bse, result2.bse, atol=atol, rtol=rtol)
            assert_allclose(result1.scale, result2.scale, atol=atol, rtol=rtol)
def dosim(hyp, cov_struct=None, mcrep=500):

    # Storage for the simulation results
    scales = [[], []]

    # P-values from the score test
    pv = []

    # Monte Carlo loop
    for k in range(mcrep):

        # Generate random "probability points" u  that are uniformly
        # distributed, and correlated within clusters
        z = np.random.normal(size=n)
        u = np.random.normal(size=n // m)
        u = np.kron(u, np.ones(m))
        z = r * z + np.sqrt(1 - r**2) * u
        u = norm.cdf(z)

        # Generate the observed responses
        y = negbinom(u, mu=mu[hyp], scale=scale)

        # Fit the null model
        m0 = sm.GEE(y,
                    x0,
                    groups=grp,
                    cov_struct=cov_struct,
                    family=sm.families.Poisson())
        r0 = m0.fit(scale='X2')
        scales[0].append(r0.scale)

        # Fit the alternative model
        m1 = sm.GEE(y,
                    x,
                    groups=grp,
                    cov_struct=cov_struct,
                    family=sm.families.Poisson())
        r1 = m1.fit(scale='X2')
        scales[1].append(r1.scale)

        # Carry out the score test
        st = m1.compare_score_test(r0)
        pv.append(st["p-value"])

    pv = np.asarray(pv)
    rslt = [np.mean(pv), np.mean(pv < 0.1)]

    return rslt, scales
Exemplo n.º 3
0
def vcfassoc(formula, covariate_df, groups=None):

    y, X = patsy.dmatrices(str(formula), covariate_df, return_type='dataframe')
    # get the column containing genotype
    ix = get_genotype_ix(X)
    Binomial = sm.families.Binomial
    logit = sm.families.links.Logit()

    if groups is not None:
        #covariate_df['grps'] = map(str, range(len(covariate_df) / 8)) * 8
        if not isinstance(groups, (pd.DataFrame, np.ndarray)):
            cov = Exchangeable()
            model = sm.GEE(y,
                           X,
                           groups=covariate_df[groups],
                           cov_struct=cov,
                           family=Binomial())
        else:
            model = sm.GLS(logit(y), X, sigma=groups.ix[X.index, X.index])
    else:
        model = sm.GLM(y, X, missing='drop', family=Binomial())

    result = model.fit(maxiter=1000)
    res = {
        'OR': np.exp(result.params[ix]),
        'pvalue': result.pvalues[ix],
        'z': result.tvalues[ix],
        'OR_CI': tuple(np.exp(result.conf_int().ix[ix, :])),
    }
    try:
        res['df_resid'] = result.df_resid
    except AttributeError:
        pass
    return res
Exemplo n.º 4
0
    def test_margins_gaussian(self):
        """
        Check marginal effects for a Gaussian GEE fit.  Marginal
        effects and ordinary effects should be equal.
        """

        n = 40
        np.random.seed(34234)
        exog = np.random.normal(size=(n, 3))
        exog[:, 0] = 1

        groups = np.kron(np.arange(n/4), np.r_[1, 1, 1, 1])

        params = np.r_[0, 1, -1]
        lin_pred = np.dot(exog, params)
        prob = 1 / (1 + np.exp(-lin_pred))

        endog = exog[:, 1] + np.random.normal(size=n)

        model = sm.GEE(endog, exog, groups)
        result = model.fit(start_params=[-4.88085602e-04, 1.18501903, 4.78820100e-02])

        marg = result.get_margeff()

        assert_allclose(marg.margeff, result.params[1:])
        assert_allclose(marg.margeff_se, result.bse[1:])
Exemplo n.º 5
0
    def test_equivalence_from_pairs(self):

        np.random.seed(3424)
        endog = np.random.normal(size=50)
        exog = np.random.normal(size=(50, 2))
        exog[:, 0] = 1
        groups = np.kron(np.arange(5), np.ones(10))
        groups[30:] = 3 # Create unequal size groups

        # Set up labels.
        labels = np.kron(np.arange(5), np.ones(10)).astype(np.int32)
        labels = labels[np.random.permutation(len(labels))]

        eq = sm.cov_struct.Equivalence(labels=labels, return_cov=True)
        model1 = sm.GEE(endog, exog, groups, cov_struct=eq)

        # Call this directly instead of letting init do it to get the
        # result before reindexing.
        eq._pairs_from_labels()

        # Make sure the size is correct to hold every element.
        for g in model1.group_labels:
            p = eq.pairs[g]
            vl = [len(x[0]) for x in p.values()]
            m = sum(groups == g)
            assert_allclose(sum(vl), m*(m+1)/2)

        # Check for duplicates.
        ixs = set([])
        for g in model1.group_labels:
            for v in eq.pairs[g].values():
                for a, b in zip(v[0], v[1]):
                    ky = (a, b)
                    assert(ky not in ixs)
                    ixs.add(ky)

        # Smoke test
        eq = sm.cov_struct.Equivalence(labels=labels, return_cov=True)
        model1 = sm.GEE(endog, exog, groups, cov_struct=eq)
        result1 = model1.fit(maxiter=2)
Exemplo n.º 6
0
    def setup(self):
        #fit for each test, because results will be changed by test
        x = self.exog
        np.random.seed(987689)
        y_count = np.random.poisson(np.exp(x.sum(1) - x.mean()))
        groups = np.random.randint(0, 4, size=x.shape[0])
        # use start_params to speed up test, difficult convergence not tested
        start_params = np.array([0., 1., 1., 1.])

        vi = sm.cov_struct.Independence()
        family = sm.families.Poisson()
        self.results = sm.GEE(y_count, self.exog, groups, family=family,
                                cov_struct=vi).fit(start_params=start_params)
Exemplo n.º 7
0
    def setup(self):
        #fit for each test, because results will be changed by test
        x = self.exog
        np.random.seed(987689)
        #y_count = np.random.poisson(np.exp(x.sum(1) - x.mean()))
        y_count = np.random.poisson(np.exp(x.sum(1) - x.sum(1).mean(0)))
        groups = np.random.randint(0, 4, size=x.shape[0])
        # use start_params to speed up test, difficult convergence not tested
        start_params = np.array([0., 1., 1., 1.])
        # params_est = np.array([-0.0063238 ,  0.99463752,  1.02790201,  0.98080081])

        vi = sm.cov_struct.Independence()
        family = sm.families.Poisson()
        mod = sm.GEE(y_count, self.exog, groups, family=family, cov_struct=vi)
        self.results = mod.fit(start_params=start_params,
                               cov_type='bias_reduced')
Exemplo n.º 8
0
    def setup(self):
        #fit for each test, because results will be changed by test
        x = self.exog
        np.random.seed(987689)
        #y_count = np.random.poisson(np.exp(x.sum(1) - x.mean()))
        y_count = np.random.poisson(np.exp(x.sum(1) - x.sum(1).mean(0)))
        groups = np.random.randint(0, 4, size=x.shape[0])
        # use start_params to speed up test, difficult convergence not tested
        start_params = np.array([0., 1., 1., 1.])

        # no sm. import
        # vi = sm.dependence_structures.Independence()
        from statsmodels.genmod.dependence_structures import Independence
        vi = Independence()
        family = sm.families.Poisson()
        self.results = sm.GEE(y_count, self.exog, groups, family=family,
                                cov_struct=vi).fit(start_params=start_params,
                                                   cov_type='naive')
Exemplo n.º 9
0
    def test_margins_poisson(self):
        """
        Check marginal effects for a Poisson GEE fit.
        """

        np.random.seed(34234)
        endog = np.r_[10, 15, 12, 13, 20, 18, 26, 29]
        exog = np.ones((8, 2))
        exog[:, 1] = np.r_[0, 0, 0, 0, 1, 1, 1, 1]

        groups = np.arange(8)

        model = sm.GEE(endog, exog, groups, family=sm.families.Poisson())
        result = model.fit(cov_type='naive', start_params=[2.52572864, 0.62057649])

        marg = result.get_margeff()

        assert_allclose(marg.margeff, np.r_[11.0928], rtol=1e-6)
        assert_allclose(marg.margeff_se, np.r_[3.269015], rtol=1e-6)
Exemplo n.º 10
0
def test_plots():

    np.random.seed(378)
    exog = np.random.normal(size=100)
    endog = np.random.normal(size=(100, 2))
    groups = np.kron(np.arange(50), np.r_[1, 1])

    model = sm.GEE(exog, endog, groups)
    result = model.fit()

    import matplotlib.pyplot as plt

    # Smoke tests
    fig = result.plot_added_variable(1)
    plt.close(fig)
    fig = result.plot_partial_residuals(1)
    plt.close(fig)
    fig = result.plot_ceres_residuals(1)
    plt.close(fig)
Exemplo n.º 11
0
    def test_margins_logistic(self):
        """
        Check marginal effects for a binomial GEE fit.  Comparison
        comes from Stata.
        """

        np.random.seed(34234)
        endog = np.r_[0, 0, 0, 0, 1, 1, 1, 1]
        exog = np.ones((8, 2))
        exog[:, 1] = np.r_[1, 2, 1, 1, 2, 1, 2, 2]

        groups = np.arange(8)

        model = sm.GEE(endog, exog, groups, family=sm.families.Binomial())
        result = model.fit(cov_type='naive', start_params=[-3.29583687,  2.19722458])

        marg = result.get_margeff()

        assert_allclose(marg.margeff, np.r_[0.4119796])
        assert_allclose(marg.margeff_se, np.r_[0.1379962], rtol=1e-6)
Exemplo n.º 12
0
    def test_multinomial(self):
        """
        Check the 2-class multinomial (nominal) GEE fit against
        logistic regression.
        """

        np.random.seed(34234)
        endog = np.r_[0, 0, 0, 0, 1, 1, 1, 1]
        exog = np.ones((8, 2))
        exog[:, 1] = np.r_[1, 2, 1, 1, 2, 1, 2, 2]

        groups = np.arange(8)

        model = sm.NominalGEE(endog, exog, groups)
        results = model.fit(cov_type='naive', start_params=[3.295837, -2.197225])


        logit_model = sm.GEE(endog, exog, groups, family=sm.families.Binomial())
        logit_results = logit_model.fit(cov_type='naive')

        assert_allclose(results.params, -logit_results.params, rtol=1e-5)
        assert_allclose(results.bse, logit_results.bse, rtol=1e-5)
Exemplo n.º 13
0
import statsmodels.api as sm
import statsmodels.formula.api as smf
spector_data = sm.datasets.spector.load()
family = sm.families.Binomial()
va = sm.cov_struct.Autoregressive()
model = sm.GEE(spector_data.endog,
               spector_data.exog,
               spector_data.group,
               family=family,
               cov_struct=va)
result = model.fit()
print(result.summary())