Exemplo n.º 1
0
def logistic_regression():
    '''Logistic regression example
    chapter 7.3, p 130
    [tbd]: the cloglog values are inconsistent with those mentioned in the book.
    This is probably due to the specific definitions of "loglog" and "cloglog"
    in the respective languages.
    '''

    inFile = r'GLM_data/Table 7.2 Beetle mortality.xls'
    df = get_data(inFile)

    # adjust the unusual column names in the Excel file
    colNames = [name.split(',')[1].lstrip() for name in df.columns.values]
    df.columns = colNames

    # fit the model
    df['tested'] = df['n']
    df['killed'] = df['y']
    df['survived'] = df['tested'] - df['killed']
    model = glm('survived + killed ~ x', data=df, family=Binomial()).fit()
    print model.summary()

    print '-' * 65
    print 'Equivalent solution:'

    model = glm('I(n - y) + y ~ x', data=df, family=Binomial()).fit()
    print model.summary()

    # The fitted number of survivors can be obtained by
    fits = df['n'] * (1 - model.fittedvalues)
    print 'Fits Logit:'
    print fits

    # The fits for other link functions are:
    model_probit = glm('I(n - y) + y ~ x',
                       data=df,
                       family=Binomial(links.probit)).fit()
    print model_probit.summary()

    fits_probit = df['n'] * (1 - model_probit.fittedvalues)
    print 'Fits Probit:'
    print fits_probit

    model_cll = glm('I(n - y) + y ~ x',
                    data=df,
                    family=Binomial(links.cloglog)).fit()
    print model_cll.summary()
    fits_cll = df['n'] * (1 - model_cll.fittedvalues)
    print 'Fits Extreme Value:'
    print fits_cll
Exemplo n.º 2
0
    def test_compare_logit(self):

        vs = Independence()
        family = Binomial()

        Y = 1 * (np.random.normal(size=100) < 0)
        X1 = np.random.normal(size=100)
        X2 = np.random.normal(size=100)
        X3 = np.random.normal(size=100)
        groups = np.random.randint(0, 4, size=100)

        D = pd.DataFrame({"Y": Y, "X1": X1, "X2": X2, "X3": X3})

        mod1 = GEE.from_formula("Y ~ X1 + X2 + X3",
                                groups,
                                D,
                                family=family,
                                cov_struct=vs)
        rslt1 = mod1.fit()

        mod2 = sm.logit("Y ~ X1 + X2 + X3", data=D)
        rslt2 = mod2.fit(disp=False)

        assert_almost_equal(rslt1.params.values,
                            rslt2.params.values,
                            decimal=10)
Exemplo n.º 3
0
    def test_ordinal(self):

        family = Binomial()

        endog, exog, groups = load_data("gee_ordinal_1.csv",
                                        icept=False)

        va = GlobalOddsRatio("ordinal")

        mod = OrdinalGEE(endog, exog, groups, None, family, va)
        rslt = mod.fit()

        # Regression test
        cf = np.r_[1.09250002, 0.0217443 , -0.39851092, -0.01812116,
                   0.03023969, 1.18258516, 0.01803453, -1.10203381]
        assert_almost_equal(rslt.params, cf, decimal=5)

        # Regression test
        se = np.r_[0.10883461, 0.10330197, 0.11177088, 0.05486569,
                   0.05997153, 0.09168148, 0.05953324, 0.0853862]
        assert_almost_equal(rslt.bse, se, decimal=5)

        # Check that we get the correct results type
        assert_equal(type(rslt), OrdinalGEEResultsWrapper)
        assert_equal(type(rslt._results), OrdinalGEEResults)
Exemplo n.º 4
0
 def _regression(self,in_vars):
     X = self.X[in_vars]
     if self.fit_weight is None:
         if self.kw_algorithm_class_args is not None:
             glm = GLM(self.y,sm.add_constant(X),family = Binomial(link=logit),**self.kw_algorithm_class_args)
         else:
             glm = GLM(self.y,sm.add_constant(X),family = Binomial(link=logit))
     else:
         if self.kw_algorithm_class_args is not None:
             glm = GLM(self.y,sm.add_constant(X),family = Binomial(link=logit),freq_weights = self.fit_weight,**self.kw_algorithm_class_args)
         else:
             glm = GLM(self.y,sm.add_constant(X),family = Binomial(link=logit),freq_weights = self.fit_weight)         
     clf = glm.fit()      
     clf.intercept_=[clf.params.const]
     clf.coef_=[clf.params[1:]]
     return clf
Exemplo n.º 5
0
    def test_default_time(self):
        """
        Check that the time defaults work correctly.
        """

        endog, exog, group = load_data("gee_logistic_1.csv")

        # Time values for the autoregressive model
        T = np.zeros(len(endog))
        idx = set(group)
        for ii in idx:
            jj = np.flatnonzero(group == ii)
            T[jj] = lrange(len(jj))

        family = Binomial()
        va = Autoregressive()

        md1 = GEE(endog, exog, group, family=family, cov_struct=va)
        mdf1 = md1.fit()

        md2 = GEE(endog, exog, group, time=T, family=family, cov_struct=va)
        mdf2 = md2.fit()

        assert_almost_equal(mdf1.params, mdf2.params, decimal=6)
        assert_almost_equal(mdf1.standard_errors(),
                            mdf2.standard_errors(),
                            decimal=6)
Exemplo n.º 6
0
def gendat_ordinal():

    os = ordinal_simulator()
    os.params = np.r_[0., 1]
    os.ngroups = 200
    os.thresholds = [1, 0, -1]
    os.dparams = [
        1.,
    ]
    os.simulate()

    data = np.concatenate((os.endog[:, None], os.exog, os.group[:, None]),
                          axis=1)

    os.endog_ex, os.exog_ex, os.intercepts, os.nthresh = \
        gee_setup_ordinal(data, 0)

    os.group_ex = os.exog_ex[:, -1]
    os.exog_ex = os.exog_ex[:, 0:-1]

    os.exog_ex = np.concatenate((os.intercepts, os.exog_ex), axis=1)

    va = GlobalOddsRatio(4, "ordinal")

    lhs = np.array([[0., 0., 0, 1., 0.], [0., 0, 0, 0, 1]])
    rhs = np.r_[0., 1]

    return os, va, Binomial(), (lhs, rhs)
Exemplo n.º 7
0
    def test_ordinal_pandas(self):

        family = Binomial()

        endog_orig, exog_orig, groups = load_data("gee_ordinal_1.csv",
                                                  icept=False)

        data = np.concatenate(
            (endog_orig[:, None], exog_orig, groups[:, None]), axis=1)
        data = pd.DataFrame(data)
        data.columns = ["endog", "x1", "x2", "x3", "x4", "x5", "group"]

        # Recode as cumulative indicators
        endog, exog, intercepts, nlevel = \
            gee_setup_ordinal(data, "endog")

        exog1 = np.concatenate((intercepts, exog), axis=1)
        groups = exog1[:, -1]
        exog1 = exog1[:, 0:-1]

        v = GlobalOddsRatio(nlevel, "ordinal")

        beta = gee_ordinal_starting_values(endog_orig, exog_orig.shape[1])

        md = GEE(endog, exog1, groups, None, family, v)
        mdf = md.fit(start_params=beta)

        cf = np.r_[1.09238131, 0.02148193, -0.39879146, -0.01855666,
                   0.02983409, 1.18123172, 0.01845318, -1.10233886]
        se = np.r_[0.10878752, 0.10326078, 0.11171241, 0.05488705, 0.05995019,
                   0.0916574, 0.05951445, 0.08539281]

        assert_almost_equal(mdf.params, cf, decimal=2)
        assert_almost_equal(mdf.bse, se, decimal=2)
def test_calc_wdesign_mat():

    # seperately tests that _calc_wdesign_mat
    # returns sensible results
    #
    # regression test

    np.random.seed(435265)
    X = np.random.normal(size=(3, 3))
    y = np.random.randint(0, 2, size=3)
    beta = np.random.normal(size=3)
    mod = OLS(y, X)
    dmat = _calc_wdesign_mat(mod, beta, {})
    assert_allclose(dmat,
                    np.array([[1.306314, -0.024897, 1.326498],
                              [-0.539219, -0.483028, -0.703503],
                              [-3.327987, 0.524541, -0.139761]]),
                    atol=1e-6,
                    rtol=0)

    mod = GLM(y, X, family=Binomial())
    dmat = _calc_wdesign_mat(mod, beta, {})
    assert_allclose(dmat,
                    np.array([[0.408616, -0.007788, 0.41493],
                              [-0.263292, -0.235854, -0.343509],
                              [-0.11241, 0.017718, -0.004721]]),
                    atol=1e-6,
                    rtol=0)
Exemplo n.º 9
0
    def fit(self,
            start_params=None,
            maxiter=100000,
            maxfun=5000,
            disp=False,
            method='bfgs',
            **kwds):
        """
        Fit the model.
        Parameters
        ----------
        start_params : array-like
            A vector of starting values for the regression
            coefficients.  If None, a default is chosen.
        maxiter : integer
            The maximum number of iterations
        disp : bool
            Show convergence stats.
        method : str
            The optimization method to use.
        """

        if start_params is None:
            start_params = sm.GLM(self.endog, self.exog,
                                  family=Binomial()).fit(disp=False).params
            start_params = np.append(start_params, [0.5] * self.Z.shape[1])

        return super(Beta, self).fit(start_params=start_params,
                                     maxiter=maxiter,
                                     maxfun=maxfun,
                                     method=method,
                                     disp=disp,
                                     **kwds)
def test_join_naive():

    # tests that the results of all the intermediate steps
    # remains correct for naive join, does this for OLS and GLM
    #
    # regression test

    np.random.seed(435265)
    X = np.random.normal(size=(50, 3))
    y = np.random.randint(0, 2, size=50)
    mod = OLS(y, X)
    res_l = []
    for i in range(2):
        res = _est_regularized_naive(mod, i, 2, fit_kwds={"alpha": 0.1})
        res_l.append(res)
    joined = _join_naive(res_l)
    assert_allclose(joined, np.array([-0.020757, 0., 0.]), atol=1e-6, rtol=0)

    mod = GLM(y, X, family=Binomial())
    res_l = []
    for i in range(2):
        res = _est_regularized_naive(mod, i, 2, fit_kwds={"alpha": 0.1})
        res_l.append(res)
    joined = _join_naive(res_l)
    assert_allclose(joined, np.array([0., 0., 0.]), atol=1e-6, rtol=0)
def test_join_debiased():

    # tests that the results of all the intermediate steps
    # remains correct for debiased join, does this for OLS and GLM
    #
    # regression test

    np.random.seed(435265)
    X = np.random.normal(size=(50, 3))
    y = np.random.randint(0, 2, size=50)
    mod = OLS(y, X)
    res_l = []
    for i in range(2):
        res = _est_regularized_debiased(mod, i, 2, fit_kwds={"alpha": 0.1})
        res_l.append(res)
    joined = _join_debiased(res_l)
    assert_allclose(joined,
                    np.array([-0.167548, -0.016567, -0.34414]),
                    atol=1e-6,
                    rtol=0)

    mod = GLM(y, X, family=Binomial())
    res_l = []
    for i in range(2):
        res = _est_regularized_debiased(mod, i, 2, fit_kwds={"alpha": 0.1})
        res_l.append(res)
    joined = _join_debiased(res_l)
    assert_allclose(joined,
                    np.array([-0.164515, -0.412854, -0.223955]),
                    atol=1e-6,
                    rtol=0)
def initial_guess_mean(endog, exog_mean, bounded_reg_link, method="Default"):
    """
    A function that obtains an initial guess for the regression 
    parameters related to the mean in a bounded data regression model. The
    initial guess is obtained from a quasi-likelihood regression.
    
    :param endog (array_like): 1d array of endogenous response variable.
    
    :param exog_mean (array_like): A nobs x k array where nobs is the number 
    of observations and k is the number of mean regressors. An intercept is 
    not included by default and should be added by the user.
    
    :param bounded_reg_link: An instance of BoundedRegLink. Recall that
    the default precision link is None.
    
    :param method (str): The method to be used to obtain the initial guesses.
    The options are: 'Default' (estimate the mean by quasi-likelihood) and 
    'R' (use the same strategy used in R's version).
    """
    if method == "Default":
        initial_guess_mean_param = (sm.GLM(
            endog,
            exog_mean,
            family=Binomial(link=bounded_reg_link.get_link_mean()),
        ).fit(disp=False).params)
    elif method == "R":
        endog_mod = bounded_reg_link.link_mean(endog)
        initial_guess_mean_param = (sm.OLS(endog_mod,
                                           exog_mean).fit(disp=False).params)
    else:
        raise ValueError(
            "Please enter a valid method for the initial guess, " +
            "the options are 'Default' and 'R'.")

    return correct_dimension(initial_guess_mean_param)
Exemplo n.º 13
0
def test_fit_joblib():

    # tests that the results of all the intermediate steps
    # remains correct for joblib fit, does this for OLS and GLM
    # and a variety of model sizes
    #
    # regression test

    np.random.seed(435265)
    X = np.random.normal(size=(50, 3))
    y = np.random.randint(0, 2, size=50)

    mod = DistributedModel(1, model_class=OLS)
    fit = mod.fit(_data_gen(y, X, 1), parallel_method="joblib",
                  fit_kwds={"alpha": 0.5})
    assert_allclose(fit.params, np.array([-0.191606, -0.012565, -0.351398]),
                    atol=1e-6, rtol=0)
    mod = DistributedModel(2, model_class=OLS)
    fit = mod.fit(_data_gen(y, X, 2), parallel_method="joblib",
                  fit_kwds={"alpha": 0.5})
    assert_allclose(fit.params, np.array([-0.157416, -0.029643, -0.471653]),
                    atol=1e-6, rtol=0)
    mod = DistributedModel(3, model_class=OLS)
    fit = mod.fit(_data_gen(y, X, 3), parallel_method="joblib",
                  fit_kwds={"alpha": 0.5})
    assert_allclose(fit.params, np.array([-0.124891, -0.050934, -0.403354]),
                    atol=1e-6, rtol=0)

    mod = DistributedModel(1, model_class=GLM,
                           init_kwds={"family": Binomial()})
    fit = mod.fit(_data_gen(y, X, 1), parallel_method="joblib",
                  fit_kwds={"alpha": 0.5})
    assert_allclose(fit.params, np.array([-0.164515, -0.412854, -0.223955]),
                    atol=1e-6, rtol=0)
    mod = DistributedModel(2, model_class=GLM,
                           init_kwds={"family": Binomial()})
    fit = mod.fit(_data_gen(y, X, 2), parallel_method="joblib",
                  fit_kwds={"alpha": 0.5})
    assert_allclose(fit.params, np.array([-0.142513, -0.360324, -0.295485]),
                    atol=1e-6, rtol=0)
    mod = DistributedModel(3, model_class=GLM,
                           init_kwds={"family": Binomial()})
    fit = mod.fit(_data_gen(y, X, 3), parallel_method="joblib",
                  fit_kwds={"alpha": 0.5})
    assert_allclose(fit.params, np.array([-0.110487, -0.306431, -0.243921]),
                    atol=1e-6, rtol=0)
def test_debiased_v_average():

    # tests that the debiased method performs better than the standard
    # average.  Does this for both OLS and GLM.

    np.random.seed(435265)
    N = 200
    p = 10
    m = 4

    beta = np.random.normal(size=p)
    beta = beta * np.random.randint(0, 2, p)
    X = np.random.normal(size=(N, p))
    y = X.dot(beta) + np.random.normal(size=N)

    db_mod = DistributedModel(m)
    fitOLSdb = db_mod.fit(_data_gen(y, X, m), fit_kwds={"alpha": 0.2})
    olsdb = np.linalg.norm(fitOLSdb.params - beta)
    n_mod = DistributedModel(m,
                             estimation_method=_est_regularized_naive,
                             join_method=_join_naive)
    fitOLSn = n_mod.fit(_data_gen(y, X, m), fit_kwds={"alpha": 0.2})
    olsn = np.linalg.norm(fitOLSn.params - beta)

    assert_(olsdb < olsn)

    prob = 1 / (1 + np.exp(-X.dot(beta) + np.random.normal(size=N)))
    y = 1. * (prob > 0.5)

    db_mod = DistributedModel(m,
                              model_class=GLM,
                              init_kwds={"family": Binomial()})
    fitGLMdb = db_mod.fit(_data_gen(y, X, m), fit_kwds={"alpha": 0.2})
    glmdb = np.linalg.norm(fitGLMdb.params - beta)
    n_mod = DistributedModel(m,
                             model_class=GLM,
                             init_kwds={"family": Binomial()},
                             estimation_method=_est_regularized_naive,
                             join_method=_join_naive)
    fitGLMn = n_mod.fit(_data_gen(y, X, m), fit_kwds={"alpha": 0.2})
    glmn = np.linalg.norm(fitGLMn.params - beta)

    assert_(glmdb < glmn)
Exemplo n.º 15
0
 def test_logit(self):
     from statsmodels.formula.api import glm
     from statsmodels.genmod.families import Binomial
     
     inData = logit.getData()
     dfFit = logit.prepareForFit(inData)
     model = glm('ok + failed ~ temp', data=dfFit, family=Binomial()).fit()
     logit.showResults(inData, model)
     
     self.assertAlmostEqual(model.params.Intercept, -15.042902, places=5)
Exemplo n.º 16
0
def senility_and_WAIS():
    '''Another example of logistic regression.
    chapter 7.8, p 143
    [tbd]: I don't understand how the "Binomial model" (grouped response)
    is supposed to work, in either language'''

    inFile = r'GLM_data/Table 7.8 Senility and WAIS.xls'
    df = get_data(inFile)

    # ungrouped
    model = glm('s ~ x', data=df, family=Binomial()).fit()
    print model.summary()
Exemplo n.º 17
0
    def setup_class(cls):

        family = Binomial()

        endog, exog, groups = load_data("gee_ordinal_1.csv", icept=False)

        va = GlobalOddsRatio("ordinal")

        cls.mod = OrdinalGEE(endog, exog, groups, None, family, va)
        cls.start_params = np.array([
            1.09250002, 0.0217443, -0.39851092, -0.01812116, 0.03023969,
            1.18258516, 0.01803453, -1.10203381
        ])
Exemplo n.º 18
0
    def test_wrapper(self):

        endog, exog, groups = load_data("gee_ordinal_1.csv", icept=False)

        endog = pd.Series(endog, name='yendog')
        exog = pd.DataFrame(exog)
        groups = pd.Series(groups, name='the_group')

        family = Binomial()
        va = GlobalOddsRatio("ordinal")
        mod = OrdinalGEE(endog, exog, groups, None, family, va)
        rslt2 = mod.fit()

        check_wrapper(rslt2)
Exemplo n.º 19
0
def general_logistic_regression():
    '''Example General Logistic Recression,
    Example 7.4.1, p. 135'''

    # Get the data
    inFile = r'GLM_data/Table 7.5 Embryogenic anthers.xls'
    df = get_data(inFile)

    # Define the variables so that they match Dobson
    df['n_y'] = df['n'] - df['y']
    df['newstor'] = df['storage'] - 1
    df['x'] = np.log(df['centrifuge'])

    # Model 1
    model1 = glm('n_y + y ~ newstor*x', data=df, family=Binomial()).fit()
    print model1.summary()

    # Model 2
    model2 = glm('n_y + y ~ newstor+x', data=df, family=Binomial()).fit()
    print model2.summary()

    # Model 3
    model3 = glm('n_y + y ~ x', data=df, family=Binomial()).fit()
    print model3.summary()
Exemplo n.º 20
0
    def __init__(self, formula=None, data=None, link=logit, **kwargs):

        if formula:
            y, X = patsy.dmatrices(formula, data, 1)
            self._y_design_info = y.design_info
            self._X_design_info = X.design_info
            self._model = GLM(y, X, family=Binomial(link), **kwargs)
            self._fit = self._model.fit()
            self._betas = self._fit.params
            self._link = link
        else:
            self._y_design_info = None
            self._X_design_info = None
            self._model = None
            self._fit = None
            self._betas = None
            self._link = link
def test_est_unregularized_naive():

    # tests that the shape of all the intermediate steps
    # remains correct for unregularized naive estimation,
    # does this for OLS and GLM

    np.random.seed(435265)
    X = np.random.normal(size=(50, 3))
    y = np.random.randint(0, 2, size=50)
    beta = np.random.normal(size=3)
    mod = OLS(y, X)
    res = _est_unregularized_naive(mod, 0, 2, fit_kwds={"alpha": 0.5})

    assert_equal(res.shape, beta.shape)

    mod = GLM(y, X, family=Binomial())
    res = _est_unregularized_naive(mod, 0, 2, fit_kwds={"alpha": 0.5})

    assert_equal(res.shape, beta.shape)
Exemplo n.º 22
0
    def test_ordinal(self):

        family = Binomial()

        endog, exog, groups = load_data("gee_ordinal_1.csv", icept=False)

        v = GlobalOddsRatio("ordinal")

        md = GEE(endog, exog, groups, None, family, v)
        md.setup_ordinal()
        mdf = md.fit()

        cf = np.r_[1.09238131, 0.02148193, -0.39879146, -0.01855666,
                   0.02983409, 1.18123172, 0.01845318, -1.10233886]
        se = np.r_[0.10878752, 0.10326078, 0.11171241, 0.05488705, 0.05995019,
                   0.0916574, 0.05951445, 0.08539281]

        assert_almost_equal(mdf.params, cf, decimal=5)
        assert_almost_equal(mdf.bse, se, decimal=5)
Exemplo n.º 23
0
    def test_ordinal(self):

        family = Binomial()

        endog, exog, groups = load_data("gee_ordinal_1.csv", icept=False)

        v = GlobalOddsRatio("ordinal")

        md = OrdinalGEE(endog, exog, groups, None, family, v)
        mdf = md.fit()

        cf = np.r_[1.09250002, 0.0217443, -0.39851092, -0.01812116, 0.03023969,
                   1.18258516, 0.01803453, -1.10203381]

        se = np.r_[0.10883461, 0.10330197, 0.11177088, 0.05486569, 0.05997153,
                   0.09168148, 0.05953324, 0.0853862]

        assert_almost_equal(mdf.params, cf, decimal=5)
        assert_almost_equal(mdf.bse, se, decimal=5)
def test_est_regularized_debiased():

    # tests that the shape of all the intermediate steps
    # remains correct for regularized debiased estimation,
    # does this for OLS and GLM

    np.random.seed(435265)
    X = np.random.normal(size=(50, 3))
    y = np.random.randint(0, 2, size=50)
    beta = np.random.normal(size=3)
    mod = OLS(y, X)
    res = _est_regularized_debiased(mod, 0, 2, fit_kwds={"alpha": 0.5})
    bhat = res[0]
    grad = res[1]
    ghat_l = res[2]
    that_l = res[3]

    assert_(isinstance(res, tuple))
    assert_equal(bhat.shape, beta.shape)
    assert_equal(grad.shape, beta.shape)
    assert_(isinstance(ghat_l, list))
    assert_(isinstance(that_l, list))
    assert_equal(len(ghat_l), len(that_l))
    assert_equal(ghat_l[0].shape, (2, ))
    assert_(isinstance(that_l[0], float))

    mod = GLM(y, X, family=Binomial())
    res = _est_regularized_debiased(mod, 0, 2, fit_kwds={"alpha": 0.5})
    bhat = res[0]
    grad = res[1]
    ghat_l = res[2]
    that_l = res[3]

    assert_(isinstance(res, tuple))
    assert_equal(bhat.shape, beta.shape)
    assert_equal(grad.shape, beta.shape)
    assert_(isinstance(ghat_l, list))
    assert_(isinstance(that_l, list))
    assert_equal(len(ghat_l), len(that_l))
    assert_equal(ghat_l[0].shape, (2, ))
    assert_(isinstance(that_l[0], float))
Exemplo n.º 25
0
    def fit_regression(self, ax=None, x_range=None, grid=None):
        """Fit the regression model."""
        # Create the grid for the regression
        if grid is None:
            if self.truncate:
                x_min, x_max = self.x_range
            else:
                if ax is None:
                    x_min, x_max = x_range
                else:
                    x_min, x_max = ax.get_xlim()
            grid = np.linspace(x_min, x_max, 100)
        ci = self.ci

        # Fit the regression
        if self.order > 1:
            yhat, yhat_boots = self.fit_poly(grid, self.order)
        elif self.logistic:
            from statsmodels.genmod.generalized_linear_model import GLM
            from statsmodels.genmod.families import Binomial
            yhat, yhat_boots = self.fit_statsmodels(grid,
                                                    GLM,
                                                    family=Binomial())
        elif self.lowess:
            ci = None
            grid, yhat = self.fit_lowess()
        elif self.robust:
            from statsmodels.robust.robust_linear_model import RLM
            yhat, yhat_boots = self.fit_statsmodels(grid, RLM)
        elif self.logx:
            yhat, yhat_boots = self.fit_logx(grid)
        else:
            yhat, yhat_boots = self.fit_fast(grid)

        # Compute the confidence interval at each grid point
        if ci is None:
            err_bands = None
        else:
            err_bands = utils.ci(yhat_boots, ci, axis=0)

        return grid, yhat, err_bands
Exemplo n.º 26
0
    def test_compare_logit(self):

        vs = Independence()
        family = Binomial()

        Y = 1 * (np.random.normal(size=100) < 0)
        X1 = np.random.normal(size=100)
        X2 = np.random.normal(size=100)
        X3 = np.random.normal(size=100)
        groups = np.random.randint(0, 4, size=100)

        D = pd.DataFrame({"Y": Y, "X1": X1, "X2": X2, "X3": X3})

        md = GEE.from_formula("Y ~ X1 + X2 + X3",
                              D,
                              None,
                              groups=groups,
                              family=family,
                              covstruct=vs).fit()

        sml = sm.logit("Y ~ X1 + X2 + X3", data=D).fit()

        assert_almost_equal(sml.params.values, md.params, decimal=10)
Exemplo n.º 27
0
    def test_margins(self):

        n = 300
        exog = np.random.normal(size=(n, 4))
        exog[:, 0] = 1
        exog[:, 1] = 1 * (exog[:, 2] < 0)

        group = np.kron(np.arange(n / 4), np.ones(4))
        time = np.zeros((n, 1))

        beta = np.r_[0, 1, -1, 0.5]
        lpr = np.dot(exog, beta)
        prob = 1 / (1 + np.exp(-lpr))

        endog = 1 * (np.random.uniform(size=n) < prob)

        fa = Binomial()
        ex = Exchangeable()

        md = GEE(endog, exog, group, time, fa, ex)
        mdf = md.fit()

        marg = GEEMargins(mdf, ())
        marg.summary()
Exemplo n.º 28
0
    plt.xlabel("Outside Temperature [F]")
    plt.title("Defects of the Space Shuttle O-Rings vs temperature")
    plt.tight_layout
    
    # Plot the fit
    x = np.arange(50, 85)
    alpha = model.params[0]
    beta = model.params[1]
    y = logistic(x, beta, alpha)
    
    plt.hold(True)
    plt.plot(x,y,'r')
    plt.xlim([50, 85])
    
    outFile = 'ChallengerPlain.png'
    showData(outFile)
    
    
if __name__ == '__main__':
    inData = getData()
    dfFit = prepareForFit(inData)
    
    # fit the model
    
    # --- >>> START stats <<< ---
    model = glm('ok + failed ~ temp', data=dfFit, family=Binomial()).fit()
    # --- >>> STOP stats <<< ---
    
    print(model.summary())
    
    showResults(inData, model)
Exemplo n.º 29
0
'''
Prediction(80)
probability prediction: 0.872046286637

Prediction(100)
probability prediction: 0.970179520648

'''

#获取数据
inData = getData()
#得到频率计算后的数据
dfFit = prepareForFit(inData)
#Generalized Linear Model 建立二项式模型
model = glm('同盾多头借贷未命中 +同盾多头借贷命中 ~ 同盾分数', data=dfFit, family=Binomial()).fit()
print(model.summary())
chi2 = model.pearson_chi2
'''Out[37]: 46.893438309853522  分数越小,p值越大,H0成立,模型越好'''
print("the chi2 is smaller,the model is better")

alpha = model.params[0]
beta = model.params[1]

Plot(inData, alpha, beta, "logiscti regression")

#测试
Prediction(20)
Prediction(60)
Prediction(80)
# -*- coding: utf-8 -*-

#import library
import pandas as pd
from statsmodels.formula.api import glm
from statsmodels.genmod.families import Binomial

#데이터 가져오기
crabs = pd.read_csv("horseshoe.csv")

#로지스틱 회귀
model = glm("satellite_1 ~ width + spine", data=crabs, family=Binomial()).fit()
print(model.summary())
"""
                 Generalized Linear Model Regression Results                  
==============================================================================
Dep. Variable:            satellite_1   No. Observations:                  173
Model:                            GLM   Df Residuals:                      170
Model Family:                Binomial   Df Model:                            2
Link Function:                  logit   Scale:                          1.0000
Method:                          IRLS   Log-Likelihood:                -97.218
Date:                Wed, 06 May 2020   Deviance:                       194.44
Time:                        17:13:38   Pearson chi2:                     165.
No. Iterations:                     4                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept    -12.4410      2.723     -4.568      0.000     -17.779      -7.103
width          0.4980      0.102      4.887      0.000       0.298       0.698
spine          0.0282      0.220      0.128      0.898      -0.402       0.458