Exemplo n.º 1
0
def fit_logistic(X_hold,Y_hold,Firth=False,resBase=None,LRtest=True):
    """
    Fits a logistic regression model using standard (when Firth = False) or Firth's method (when Firth = True).
    resBase is the result of a previous call to a regression that is used to store data for Firth's method.
    LRtest indicates if the likelihood ratio test should be reported.
    """
    if not Firth:
        res = GLM(Y_hold, X_hold, family=families.Binomial()).fit()#XXX Confirm this with logistic using older XXXX
        # AICc adjustment
        res.aicc = statsmodels.tools.eval_measures.aicc(res.llf, nobs=res.nobs, df_modelwc=res.df_model+1)
        # Correct BIC
        res.bic = statsmodels.tools.eval_measures.bic(res.llf, nobs=res.nobs, df_modelwc=res.df_model+1)
    else:
        if resBase is None:
            sys.stderr.write('resBase must be provided to do Firth regression\n')
            sys.exit(1)
        elif type(resBase) is not statsmodels.genmod.generalized_linear_model.GLMResultsWrapper:
            sys.stderr.write('resBase must be type statsmodels.genmod.generalized_linear_model.GLMResultsWrapper\n')
            sys.exit(2)
        else:
            res = resBase
        #Do Firth's logistic regression
        (rint, rbeta, rbse, rfitll, pi) = fit_firth(Y_hold, X_hold, start_vec = None)
        
        if LRtest:    
            # LRT
            null_X = np.delete(arr=X_hold,obj=range(int(np.size(X_hold)/len(X_hold)))[1:int(np.size(X_hold)/len(X_hold))],axis=1)
            (null_intercept, null_beta, null_bse, null_fitll, null_pi) = fit_firth(Y_hold, null_X, start_vec = None)
            lrstat = -2.*(null_fitll - rfitll)
            lrt_pvalue = 1.
            if lrstat > 0.: # non-convergence
                lrt_pvalue = stats.chi2.sf(lrstat, 1)
            res.llnull = null_fitll
            res.lrstat = lrstat
            res.lrt_pval = lrt_pvalue
        
        # AICc adjustment for Firth model
        aicc = statsmodels.tools.eval_measures.aicc(rfitll, nobs=len(Y_hold), df_modelwc=np.shape(X_hold)[1])
        # AIC
        aic = statsmodels.tools.eval_measures.aic(rfitll, nobs=len(Y_hold), df_modelwc=np.shape(X_hold)[1])
        # BIC
        bic = statsmodels.tools.eval_measures.bic(rfitll, nobs=len(Y_hold), df_modelwc=np.shape(X_hold)[1])
        #Store parameters, standard errors, likelihoods, and statistics
        rint = np.array([rint])
        rbeta = np.array(rbeta)
        res.params = np.concatenate([rint,rbeta])
        res.bse = rbse
        res.llf = rfitll
        res.aicc = aicc
        res.aic = aic
        res.bic = bic
        
        #Get Wald p vals for parameters
        res.pvalues = 1. - chi2.cdf(x=(res.params/res.bse)**2, df=1)
        
        #Add predicted y
        res.predict = pi
        
    return res
Exemplo n.º 2
0
def iterate_logistic(X_hold,Y_hold, fixed_columns = [0], Firth=False):
    """
    Fits logistic regression to the provided data while using the fixed_columns in the regression.
    Firth specifies if Firth regression should be used.
    
    Returns matrices of fitted betas, pvalues, aic, aicc (second order aic), and bic
    """
    l = np.size(fixed_columns)+1
    k = np.shape(X_hold)[1]

    betas = np.zeros([k,l])
    pvalues = np.zeros([k,l])
    aic = np.zeros([k,1])
    aicc = np.zeros([k,1])
    bic = np.zeros([k,1])
    
    # Fit constant
    if Firth:
        null_X = np.delete(arr=X_hold,obj=range(int(np.size(X_hold)/len(X_hold)))[1:int(np.size(X_hold)/len(X_hold))],axis=1)
        (null_intercept, null_beta, null_bse, null_fitll, null_pi) = fit_firth(Y_hold, null_X, start_vec = None)
        
        #Using this as a way to return a model in the same class as GLM.
        res = GLM(Y_hold, null_X, family=families.Binomial()).fit()
        # AICc adjustment for Firth model
        res.aicc = statsmodels.tools.eval_measures.aicc(null_fitll, nobs=res.nobs, df_modelwc=res.df_model+1)
        # AIC
        res.aic = statsmodels.tools.eval_measures.aic(null_fitll, nobs=res.nobs, df_modelwc=res.df_model+1)
        # BIC
        res.bic = statsmodels.tools.eval_measures.bic(null_fitll, nobs=res.nobs, df_modelwc=res.df_model+1)
        #Store parameters, standard errors, likelihoods, and statistics
        res.params = np.array([null_intercept])
        #Get Wald p vals for parameters
        res.pvalues = 1. - chi2.cdf(x=(res.params/null_bse)**2, df=1)
    else:
        res = fit_logistic(X_hold[:,0],Y_hold)
    
    betas[0,:] = res.params
    pvalues[0,:] = res.pvalues
    aic[0] = res.aic
    aicc[0] = res.aicc
    bic[0] = res.bic
    
    #Set variable for use later
    resBase = copy.deepcopy(res)
    
    NAN = ~np.isnan(X_hold).any(axis=0)
    for i in range(1,k):
        if NAN[i]:
            if i not in fixed_columns:
                columns = fixed_columns.copy()
                columns.append(i)
                res = fit_logistic(X_hold[:,columns],Y_hold, Firth=Firth, resBase=resBase,LRtest=False)
                betas[i,:] = res.params
                pvalues[i,:] = res.pvalues
                aic[i] = res.aic
                aicc[i] = res.aicc
                bic[i] = res.bic
    return betas, pvalues,aic,aicc,bic