Пример #1
0
 def setup_class(cls):
     data = sm.datasets.randhie.load_pandas()
     cls.endog = data.endog
     cls.data = data
     exog = sm.add_constant(data.exog.iloc[:, 1:4], prepend=False)
     exog_infl = sm.add_constant(data.exog.iloc[:, 0], prepend=False)
     # we don't need to verify convergence here
     start_params = np.asarray([
         0.10337834587498942, -1.0459825102508549, -0.08219794475894268,
         0.00856917434709146, -0.026795737379474334, 1.4823632430107334
     ])
     model = sm.ZeroInflatedPoisson(data.endog,
                                    exog,
                                    exog_infl=exog_infl,
                                    inflation='logit')
     cls.res1 = model.fit(start_params=start_params,
                          method='newton',
                          maxiter=500,
                          disp=0)
     # for llnull test
     cls.res1._results._attach_nullmodel = True
     cls.init_keys = ['exog_infl', 'exposure', 'inflation', 'offset']
     cls.init_kwds = {'inflation': 'logit'}
     res2 = RandHIE()
     res2.zero_inflated_poisson_logit()
     cls.res2 = res2
def tiny_zip(l):
    zip_mod, zip_ppf_obs, zip_pred = [None for i in range(3)]
    zip_rmse = 0
    xtr = np.array([item[1:] for item in l])
    ytr = np.array([item[0] for item in l]).reshape(-1, 1)
    zip_res = []
    try:
        if np.count_nonzero(ytr) > 0:
            zip_mod = sm.ZeroInflatedPoisson(ytr, xtr).fit_regularized(
                maxiter=10000, disp=0, maxfun=10000)
            # print(zip_mod.summary())
            zip_mean_pred = zip_mod.predict(xtr,
                                            exog_infl=np.ones((len(xtr), 1)))
            zip_ppf_obs = stats.poisson.ppf(q=0.95, mu=zip_mean_pred)
            zip_rmse_tr = np.sqrt(mean_squared_error(ytr, zip_ppf_obs))
            zip_res = [zip_mod, zip_ppf_obs, zip_rmse_tr]
        else:
            zip_res = return_zeros(ytr, "AllZeros")

    except np.linalg.LinAlgError as e:
        if 'Singular matrix' in str(e):
            # print(" You should not have reached this point. ")
            # print(" Regularization should avoid the singular matrix. ")
            nzeros = len(ytr) - np.count_nonzero(ytr)
            zip_res = return_zeros(ytr, "Singular")
            prop = round((100 * nzeros) / len(ytr), 2)
            # print(" Proportion of zeros: ", prop)
            zip_prop_err_singmat.append(prop)
    except AssertionError as e:
        zip_res = return_zeros(ytr, "Assert")
    except ValueError as e:
        print("\t\t\tIgnored output containing np.nan or np.inf")
        pass
    return zip_res
Пример #3
0
def fitZIP(preCellType,postCellType):
    
    # Get data from file
    filename = "data_dense_model\%s_%s.csv" % (preCellType,postCellType)
    df = pd.read_csv(filename,header=None,names=["data"])
    
    # Prepare data for fitting
    X = df.data
    nobs = len(X)
    exog = np.ones(nobs)
    freq = np.bincount(X) / nobs
    binValue = list(range(0,len(freq)))
    
    # Fit Data
    mod_ZIP = sm.ZeroInflatedPoisson(X, exog)
    res_ZIP = mod_ZIP.fit(disp=False)
    
    # Get fitting results
    probs_zip = res_ZIP.predict(which='prob')
    probsm_zip = probs_zip.mean(0)
    
    # Export freq and probsm_zinb
    values = {'x': freq,
                'xFit': probsm_zip}
    outputDF = DataFrame(values, columns= ['x', 'xFit'])
    outputfilename = "fit_dense_model\%s_%s_ZIP.csv" % (preCellType,postCellType)
    export_csv = outputDF.to_csv (outputfilename,index=None,header=True)
    
    # Export fit results
    X = res_ZIP.summary().as_csv()
    outputfilenameFit = "fit_dense_model\%s_%s_ZIP_FitResults.csv" % (preCellType,postCellType)
    text_file = open(outputfilenameFit, "w")
    n = text_file.write(X)
    text_file.close()
Пример #4
0
 def setup_class(cls):
     expected_params = [1, 0.5]
     np.random.seed(123)
     nobs = 200
     exog = np.ones((nobs, 2))
     exog[:nobs//2, 1] = 2
     mu_true = exog.dot(expected_params)
     cls.endog = sm.distributions.zipoisson.rvs(mu_true, 0.05,
                                                size=mu_true.shape)
     model = sm.ZeroInflatedPoisson(cls.endog, exog)
     cls.res = model.fit(method='bfgs', maxiter=5000, maxfun=5000, disp=0)
Пример #5
0
    def test_exposure(self):
        # This test mostly the equivalence of offset and exposure = exp(offset)
        # use data arrays from class model
        model1 = self.res1.model
        offset = model1.offset
        model3 = sm.ZeroInflatedPoisson(model1.endog,
                                        model1.exog,
                                        exog_infl=model1.exog_infl,
                                        exposure=np.exp(offset))
        res3 = model3.fit(start_params=self.res1.params,
                          method='newton',
                          maxiter=500,
                          disp=False)

        assert_allclose(res3.params, self.res1.params, atol=1e-6, rtol=1e-6)
        fitted1 = self.res1.predict()
        fitted3 = res3.predict()
        assert_allclose(fitted3, fitted1, atol=1e-6, rtol=1e-6)

        ex = model1.exog
        ex_infl = model1.exog_infl
        offset = model1.offset
        fitted1_0 = self.res1.predict(exog=ex,
                                      exog_infl=ex_infl,
                                      offset=offset.tolist())
        fitted3_0 = res3.predict(exog=ex,
                                 exog_infl=ex_infl,
                                 exposure=np.exp(offset))
        assert_allclose(fitted3_0, fitted1_0, atol=1e-6, rtol=1e-6)

        ex = model1.exog[:10:2]
        ex_infl = model1.exog_infl[:10:2]
        offset = offset[:10:2]
        # # TODO: this raises with shape mismatch,
        # # i.e. uses offset or exposure from model -> fix it or not?
        # GLM.predict to setting offset and exposure to zero
        # fitted1_1 = self.res1.predict(exog=ex, exog_infl=ex_infl)
        # fitted3_1 = res3.predict(exog=ex, exog_infl=ex_infl)
        # assert_allclose(fitted3_1, fitted1_1, atol=1e-6, rtol=1e-6)

        fitted1_2 = self.res1.predict(exog=ex,
                                      exog_infl=ex_infl,
                                      offset=offset)
        fitted3_2 = res3.predict(exog=ex,
                                 exog_infl=ex_infl,
                                 exposure=np.exp(offset))
        assert_allclose(fitted3_2, fitted1_2, atol=1e-6, rtol=1e-6)
        assert_allclose(fitted1_2, fitted1[:10:2], atol=1e-6, rtol=1e-6)
        assert_allclose(fitted3_2, fitted1[:10:2], atol=1e-6, rtol=1e-6)

        # without specifying offset and exposure
        fitted1_3 = self.res1.predict(exog=ex, exog_infl=ex_infl)
        fitted3_3 = res3.predict(exog=ex, exog_infl=ex_infl)
        assert_allclose(fitted3_3, fitted1_3, atol=1e-6, rtol=1e-6)
Пример #6
0
 def test_pd_offset_exposure(self):
     endog = pd.DataFrame({'F': [0.0, 0.0, 0.0, 0.0, 1.0]})
     exog = pd.DataFrame({'I': [1.0, 1.0, 1.0, 1.0, 1.0],
                          'C': [0.0, 1.0, 0.0, 1.0, 0.0]})
     exposure = pd.Series([1., 1, 1, 2, 1])
     offset = pd.Series([1, 1, 1, 2, 1])
     sm.Poisson(endog=endog, exog=exog, offset=offset).fit()
     inflations = ['logit', 'probit']
     for inflation in inflations:
         sm.ZeroInflatedPoisson(endog=endog, exog=exog["I"],
                                exposure=exposure,
                                inflation=inflation).fit()
Пример #7
0
def test_poi_nb_zip_zinb_tiny_subset(meta, m):
    exog_names = r"rowid;latitude;longitude;target;dbuiltup;dforest;drecreation;dbrr;dwrl;dwrn;dwrr;dcamping;dcaravan;dcross;dgolf;dheem;dhaven;dsafari;dwater;attr;dbath;lu;lc;maxmeanhaz;maxstdhaz".split(";")[4:]

    np.random.seed(2)

    randint = np.random.randint(0, high=len(m)-1, size=800)

    msel = m[randint,:]

    Y = msel[:, 0]
    X = msel[:, 1:]

    # Ynz, Xnz = trim_value(Y, X, 0)

    print("Msel shape: ", msel.shape)

    xtrain, xtest, ytrain, ytest = train_test_split(X, Y, train_size=0.60, random_state=42)

    print(xtrain.shape, ytrain.shape, xtest.shape, ytest.shape)

    print
    print("Model: Poisson")
    poi_mod = sm.Poisson(ytrain, xtrain).fit(method="newton", maxiter=50)
    poi_mean_pred = poi_mod.predict(xtest)
    poi_ppf_obs = stats.poisson.ppf(q=0.95, mu=poi_mean_pred)
    poi_rmse = np.sqrt(mean_squared_error(ytest, poi_ppf_obs))
    # print(np.unique(poi_ppf_obs, return_counts=True))
    print("RMSE Poisson: ", poi_rmse)
    # print(poi_mod.summary(yname='tickbites', xname=exog_names))

    print
    print("Model: Neg. Binomial")
    nb_mod = sm.NegativeBinomial(ytrain, xtrain).fit(start_params = None, method = 'newton', maxiter=50)
    nb_pred = nb_mod.predict(xtest)
    nb_rmse = np.sqrt(mean_squared_error(ytest, nb_pred))
    # print(np.unique(nb_pred, return_counts=True))
    print("RMSE Negative Binomial: ", nb_rmse)

    print
    print("Model: Zero Inflated Poisson")
    zip_mod = sm.ZeroInflatedPoisson(ytrain, xtrain).fit(method="newton", maxiter=50)
    zip_mean_pred = zip_mod.predict(xtest, exog_infl=np.ones((len(xtest), 1)))
    zip_ppf_obs = stats.poisson.ppf(q=0.95, mu=zip_mean_pred)
    zip_rmse = np.sqrt(mean_squared_error(ytest, zip_ppf_obs))
    print("RMSE Zero-Inflated Poisson", zip_rmse)

    print
    print("Model: Zero Inflated Neg. Binomial")
    zinb_mod = sm.ZeroInflatedNegativeBinomialP(ytrain, xtrain).fit(method="newton", maxiter=50)
    zinb_pred = zinb_mod.predict(xtest, exog_infl=np.ones((len(xtest), 1)))
    zinb_rmse = np.sqrt(mean_squared_error(ytest, zinb_pred))
    print("RMSE Zero-Inflated Negative Binomial: ", zinb_rmse)
Пример #8
0
    def test_names(self):
        param_names = ['inflate_lncoins', 'inflate_const', 'idp', 'lpi',
                       'fmde', 'const']
        assert_array_equal(self.res1.model.exog_names, param_names)
        assert_array_equal(self.res1.params.index.tolist(), param_names)
        assert_array_equal(self.res1.bse.index.tolist(), param_names)

        exog = sm.add_constant(self.data.exog.iloc[:,1:4], prepend=True)
        exog_infl = sm.add_constant(self.data.exog.iloc[:,0], prepend=True)
        param_names = ['inflate_const', 'inflate_lncoins', 'const', 'idp',
                       'lpi', 'fmde']
        model = sm.ZeroInflatedPoisson(self.data.endog, exog,
            exog_infl=exog_infl, inflation='logit')
        assert_array_equal(model.exog_names, param_names)
 def setup_class(cls):
     data = sm.datasets.randhie.load(as_pandas=False)
     cls.endog = data.endog
     exog = sm.add_constant(data.exog[:,1:4], prepend=False)
     exog_infl = sm.add_constant(data.exog[:,0], prepend=False)
     cls.res1 = sm.ZeroInflatedPoisson(data.endog, exog,
         exog_infl=exog_infl, inflation='probit').fit(method='newton', maxiter=500,
                                                      disp=0)
     # for llnull test
     cls.res1._results._attach_nullmodel = True
     cls.init_keys = ['exog_infl', 'exposure', 'inflation', 'offset']
     cls.init_kwds = {'inflation': 'probit'}
     res2 = RandHIE.zero_inflated_poisson_probit
     cls.res2 = res2
Пример #10
0
def tiny_zip(l):
    print("\t\tRunning Zero-Inflated Poisson")
    zip_mod, zip_ppf_obs, zip_pred = [None for i in range(3)]
    zip_rmse = 0
    xtr = np.array([item[1:] for item in l])
    ytr = np.array([item[0] for item in l]).reshape(-1, 1)
    try:
        zip_mod = sm.ZeroInflatedPoisson(ytr, xtr).fit(method="newton", maxiter=50)
        zip_mean_pred = zip_mod.predict(xtr, exog_infl=np.ones((len(xtr), 1)))
        zip_ppf_obs = stats.poisson.ppf(q=0.95, mu=zip_mean_pred)
        zip_rmse = np.sqrt(mean_squared_error(ytr, zip_ppf_obs))
    except np.linalg.LinAlgError as e:
        if 'Singular matrix' in str(e):
            print("\t\t\tIgnored a singular matrix.")
    except ValueError:
        print("\t\t\tIgnored output containing np.nan or np.inf")
    return [zip_mod, zip_ppf_obs, zip_rmse]
Пример #11
0
def test_poi_nb_zip_zinb_raw_data(meta, m):
    Y = m[:, 0]
    X = m[:, 1:]
    Ynz, Xnz = trim_value(Y, X, 0)
    xtrain, xtest, ytrain, ytest = train_test_split(X, Y, train_size=0.60, random_state=77)

    print("Training with: ", xtrain.shape, ytrain.shape)
    print("Testing with: ", xtest.shape, ytest.shape)

    print()
    print("Model: Poisson")
    poi_mod = sm.Poisson(ytrain, xtrain).fit(method="newton", maxiter=50)
    poi_mean_pred = poi_mod.predict(xtest)
    poi_ppf_obs = stats.poisson.ppf(q=0.95, mu=poi_mean_pred)
    poi_rmse = np.sqrt(mean_squared_error(ytest, poi_ppf_obs))

    print("Model: Zero Inflated Poisson")
    zip_mod = sm.ZeroInflatedPoisson(ytrain, xtrain).fit(method="newton", maxiter=50)
    zip_mean_pred = zip_mod.predict(xtest, exog_infl=np.ones((len(xtest), 1)))
    zip_ppf_obs = stats.poisson.ppf(q=0.95, mu=zip_mean_pred)
    zip_rmse = np.sqrt(mean_squared_error(ytest, zip_ppf_obs))

    print("Model: Zero Inflated Neg. Binomial")
    zinb_mod = sm.ZeroInflatedNegativeBinomialP(ytrain, xtrain).fit(method="newton", maxiter=50)
    zinb_pred = zinb_mod.predict(xtest, exog_infl=np.ones((len(xtest), 1)))
    zinb_rmse = np.sqrt(mean_squared_error(ytest, zinb_pred))

    print()
    print("Model: Zero Inflated Neg. Binomial")
    zinb_mod = sm.ZeroInflatedNegativeBinomialP(ytrain, xtrain).fit(method="newton", maxiter=50)
    zinb_pred = zinb_mod.predict(xtest)
    zinb_rmse = np.sqrt(mean_squared_error(ytrain, zinb_pred))

    print("RMSE Poisson: ", poi_rmse)
    print("RMSE Negative Binomial: ", nb_rmse)
    print("RMSE Zero-Inflated Poisson", zip_rmse)
    print("RMSE Zero-Inflated Negative Binomial: ", zinb_rmse)
Пример #12
0
poi_mod = sm.Poisson(ytrain, xtrain).fit(method="newton", maxiter=50)
poi_mean_pred = poi_mod.predict(xtest)
poi_ppf_obs = stats.poisson.ppf(q=0.95, mu=poi_mean_pred)
poi_rmse = np.sqrt(mean_squared_error(ytest, poi_ppf_obs))

print("Model: Neg. Binomial")
nb_mod = sm.NegativeBinomial(ytrain, xtrain).fit(start_params=None,
                                                 method='newton',
                                                 maxiter=50)
nb_pred = nb_mod.predict(xtest)
nb_rmse = np.sqrt(mean_squared_error(ytest, nb_pred))

print(np.ones(len(xtest)).shape)

print("Model: Zero Inflated Poisson")
zip_mod = sm.ZeroInflatedPoisson(ytrain, xtrain).fit(method="newton",
                                                     maxiter=50)
zip_mean_pred = zip_mod.predict(xtest, exog_infl=np.ones((len(xtest), 1)))
zip_ppf_obs = stats.poisson.ppf(q=0.95, mu=zip_mean_pred)
zip_rmse = np.sqrt(mean_squared_error(ytest, zip_ppf_obs))

print("Model: Zero Inflated Neg. Binomial")
zinb_mod = sm.ZeroInflatedNegativeBinomialP(ytrain,
                                            xtrain).fit(method="newton",
                                                        maxiter=50)
zinb_pred = zinb_mod.predict(xtest, exog_infl=np.ones((len(xtest), 1)))
zinb_rmse = np.sqrt(mean_squared_error(ytest, zinb_pred))

print(xtrain.shape, ytrain.shape, xtest.shape, ytest.shape)

print("RMSE Poisson: ", poi_rmse)
print("RMSE Neg. Bin.: ", nb_rmse)
y_train_disab, X_train_disab = dmatrices(expr_disab, train_disab, return_type='dataframe')
y_test_disab, X_test_disab = dmatrices(expr_disab, test_disab, return_type='dataframe')

#train the model
poisson_training_results_disab = sm.GLM(y_train_disab, X_train_disab, family=sm.families.Poisson()).fit()
print(poisson_training_results_disab.summary())


#%% ZIP Model FOR DISABLED

#check zeros. 
ax = sns.distplot(finalhatedata_nonNAN['Disability_p1000'])
plt.title('Distribution of Disabled hate crime rate')

#run zip model on the data and print summary. 
zip_training_results_disab = sm.ZeroInflatedPoisson(endog=y_train_disab, exog=X_train_disab, exog_infl=X_train_disab, inflation='logit').fit()
print(zip_training_results_disab.summary())


#%% poisson reg - trans PLACE VS trans RATE

#columns
list(finalhatedata_nonNAN)

#check the mean and variance 
print('variance='+str(finalhatedata_nonNAN['Transgender_p1000'].var()))
print('mean='+str(finalhatedata_nonNAN['Transgender_p1000'].mean()))

#create train and test data frames. 
mask_trans = np.random.rand(len(finalhatedata_nonNAN)) < 0.8
train_trans = finalhatedata_nonNAN[mask_trans]