def fitZINB(preCellType,postCellType): # Get data from file filename = "data_dense_model\%s_%s.csv" % (preCellType,postCellType) df = pd.read_csv(filename,header=None,names=["data"]) # Prepare data for fitting X = df.data nobs = len(X) exog = np.ones(nobs) freq = np.bincount(X) / nobs binValue = list(range(0,len(freq))) # Fit Data mod_ZINB = sm.ZeroInflatedNegativeBinomialP(X, exog) res_ZINB = mod_ZINB.fit(disp=False) # Get fitting results probs_zinb = res_ZINB.predict(which='prob') probsm_zinb = probs_zinb.mean(0) # Export freq and probsm_zinb values = {'x': freq, 'xFit': probsm_zinb} outputDF = DataFrame(values, columns= ['x', 'xFit']) outputfilename = "fit_dense_model\%s_%s_ZINB.csv" % (preCellType,postCellType) export_csv = outputDF.to_csv (outputfilename,index=None,header=True) # Export fit results X = res_ZINB.summary().as_csv() outputfilenameFit = "fit_dense_model\%s_%s_ZINB_FitResults.csv" % (preCellType,postCellType) text_file = open(outputfilenameFit, "w") n = text_file.write(X) text_file.close()
def tiny_zinb(l): zinb_mod, zinb_pred = [None for i in range(2)] zinb_rmse = 0 xtr = np.array([item[1:] for item in l]) ytr = np.array([item[0] for item in l]).reshape(-1, 1) try: if np.count_nonzero(ytr) > 0: zinb_mod = sm.ZeroInflatedNegativeBinomialP(ytr, xtr).fit_regularized(maxiter=10000, disp=0, maxfun=10000) #nm va # print(zinb_mod.summary()) zinb_pred = zinb_mod.predict(xtr, which="mean", exog_infl=np.ones((len(xtr), 1))) zinb_rmse = np.sqrt(mean_squared_error(ytr, zinb_pred)) zinb_res = [zinb_mod, zinb_pred, zinb_rmse] else: zinb_res = return_zeros(ytr, "AllZeros") except np.linalg.LinAlgError as e: if 'Singular matrix' in str(e): # print(" You should not have reached this point. ") # print(" Regularization should avoid the singular matrix. ") nzeros = len(ytr) - np.count_nonzero(ytr) zinb_res = return_zeros(ytr, "Singular") prop = round((100 * nzeros) / len(ytr), 2) # print(" Proportion of zeros: ", prop) zinb_prop_err_singmat.append(prop) except AssertionError as e: zinb_res = return_zeros(ytr, "Assert") except ValueError as e: print("\t\t\tIgnored output containing np.nan or np.inf") pass return zinb_res
def test_poi_nb_zip_zinb_tiny_subset(meta, m): exog_names = r"rowid;latitude;longitude;target;dbuiltup;dforest;drecreation;dbrr;dwrl;dwrn;dwrr;dcamping;dcaravan;dcross;dgolf;dheem;dhaven;dsafari;dwater;attr;dbath;lu;lc;maxmeanhaz;maxstdhaz".split(";")[4:] np.random.seed(2) randint = np.random.randint(0, high=len(m)-1, size=800) msel = m[randint,:] Y = msel[:, 0] X = msel[:, 1:] # Ynz, Xnz = trim_value(Y, X, 0) print("Msel shape: ", msel.shape) xtrain, xtest, ytrain, ytest = train_test_split(X, Y, train_size=0.60, random_state=42) print(xtrain.shape, ytrain.shape, xtest.shape, ytest.shape) print print("Model: Poisson") poi_mod = sm.Poisson(ytrain, xtrain).fit(method="newton", maxiter=50) poi_mean_pred = poi_mod.predict(xtest) poi_ppf_obs = stats.poisson.ppf(q=0.95, mu=poi_mean_pred) poi_rmse = np.sqrt(mean_squared_error(ytest, poi_ppf_obs)) # print(np.unique(poi_ppf_obs, return_counts=True)) print("RMSE Poisson: ", poi_rmse) # print(poi_mod.summary(yname='tickbites', xname=exog_names)) print print("Model: Neg. Binomial") nb_mod = sm.NegativeBinomial(ytrain, xtrain).fit(start_params = None, method = 'newton', maxiter=50) nb_pred = nb_mod.predict(xtest) nb_rmse = np.sqrt(mean_squared_error(ytest, nb_pred)) # print(np.unique(nb_pred, return_counts=True)) print("RMSE Negative Binomial: ", nb_rmse) print print("Model: Zero Inflated Poisson") zip_mod = sm.ZeroInflatedPoisson(ytrain, xtrain).fit(method="newton", maxiter=50) zip_mean_pred = zip_mod.predict(xtest, exog_infl=np.ones((len(xtest), 1))) zip_ppf_obs = stats.poisson.ppf(q=0.95, mu=zip_mean_pred) zip_rmse = np.sqrt(mean_squared_error(ytest, zip_ppf_obs)) print("RMSE Zero-Inflated Poisson", zip_rmse) print print("Model: Zero Inflated Neg. Binomial") zinb_mod = sm.ZeroInflatedNegativeBinomialP(ytrain, xtrain).fit(method="newton", maxiter=50) zinb_pred = zinb_mod.predict(xtest, exog_infl=np.ones((len(xtest), 1))) zinb_rmse = np.sqrt(mean_squared_error(ytest, zinb_pred)) print("RMSE Zero-Inflated Negative Binomial: ", zinb_rmse)
def test_poi_nb_zip_zinb_raw_data(meta, m): Y = m[:, 0] X = m[:, 1:] Ynz, Xnz = trim_value(Y, X, 0) xtrain, xtest, ytrain, ytest = train_test_split(X, Y, train_size=0.60, random_state=77) print("Training with: ", xtrain.shape, ytrain.shape) print("Testing with: ", xtest.shape, ytest.shape) print() print("Model: Poisson") poi_mod = sm.Poisson(ytrain, xtrain).fit(method="newton", maxiter=50) poi_mean_pred = poi_mod.predict(xtest) poi_ppf_obs = stats.poisson.ppf(q=0.95, mu=poi_mean_pred) poi_rmse = np.sqrt(mean_squared_error(ytest, poi_ppf_obs)) print("Model: Zero Inflated Poisson") zip_mod = sm.ZeroInflatedPoisson(ytrain, xtrain).fit(method="newton", maxiter=50) zip_mean_pred = zip_mod.predict(xtest, exog_infl=np.ones((len(xtest), 1))) zip_ppf_obs = stats.poisson.ppf(q=0.95, mu=zip_mean_pred) zip_rmse = np.sqrt(mean_squared_error(ytest, zip_ppf_obs)) print("Model: Zero Inflated Neg. Binomial") zinb_mod = sm.ZeroInflatedNegativeBinomialP(ytrain, xtrain).fit(method="newton", maxiter=50) zinb_pred = zinb_mod.predict(xtest, exog_infl=np.ones((len(xtest), 1))) zinb_rmse = np.sqrt(mean_squared_error(ytest, zinb_pred)) print() print("Model: Zero Inflated Neg. Binomial") zinb_mod = sm.ZeroInflatedNegativeBinomialP(ytrain, xtrain).fit(method="newton", maxiter=50) zinb_pred = zinb_mod.predict(xtest) zinb_rmse = np.sqrt(mean_squared_error(ytrain, zinb_pred)) print("RMSE Poisson: ", poi_rmse) print("RMSE Negative Binomial: ", nb_rmse) print("RMSE Zero-Inflated Poisson", zip_rmse) print("RMSE Zero-Inflated Negative Binomial: ", zinb_rmse)
def setup_class(cls): data = sm.datasets.randhie.load(as_pandas=False) cls.endog = data.endog exog = data.exog start_params = np.array( [-2.83983767, -2.31595924, -3.9263248 , -4.01816431, -5.52251843, -2.4351714 , -4.61636366, -4.17959785, -0.12960256, -0.05653484, -0.21206673, 0.08782572, -0.02991995, 0.22901208, 0.0620983 , 0.06809681, 0.0841814 , 0.185506 , 1.36527888]) mod = sm.ZeroInflatedNegativeBinomialP( cls.endog, exog, exog_infl=exog, p=2) res = mod.fit(start_params=start_params, method="bfgs", maxiter=1000, disp=0) cls.res = res
def setup_class(cls): expected_params = [1, 1, 0.5] np.random.seed(987123) nobs = 500 exog = np.ones((nobs, 2)) exog[:nobs//2, 1] = 0 prob_infl = 0.15 mu_true = np.exp(exog.dot(expected_params[:-1])) cls.endog = sm.distributions.zinegbin.rvs(mu_true, expected_params[-1], 2, prob_infl, size=mu_true.shape) model = sm.ZeroInflatedNegativeBinomialP(cls.endog, exog, p=2) cls.res = model.fit(method='bfgs', maxiter=5000, maxfun=5000, disp=0) # attach others cls.prob_infl = prob_infl
def setup_class(cls): data = sm.datasets.randhie.load(as_pandas=False) cls.endog = data.endog exog = sm.add_constant(data.exog[:,1], prepend=False) exog_infl = sm.add_constant(data.exog[:,0], prepend=False) # cheating for now, parameters are not well identified in this dataset # see https://github.com/statsmodels/statsmodels/pull/3928#issuecomment-331724022 sp = np.array([1.88, -10.28, -0.20, 1.14, 1.34]) cls.res1 = sm.ZeroInflatedNegativeBinomialP(data.endog, exog, exog_infl=exog_infl, p=2).fit(start_params=sp, method='nm', xtol=1e-6, maxiter=5000, disp=0) # for llnull test cls.res1._results._attach_nullmodel = True cls.init_keys = ['exog_infl', 'exposure', 'inflation', 'offset', 'p'] cls.init_kwds = {'inflation': 'logit', 'p': 2} res2 = RandHIE.zero_inflated_negative_binomial cls.res2 = res2
def setup_class(cls): data = sm.datasets.randhie.load() cls.endog = data.endog exog = sm.add_constant(data.exog[:, 1], prepend=False) exog_infl = sm.add_constant(data.exog[:, 0], prepend=False) cls.res1 = sm.ZeroInflatedNegativeBinomialP(data.endog, exog, exog_infl=exog_infl, p=2).fit(method='nm', maxiter=500) # for llnull test cls.res1._results._attach_nullmodel = True cls.init_keys = ['exog_infl', 'exposure', 'inflation', 'offset', 'p'] cls.init_kwds = {'inflation': 'logit', 'p': 2} res2 = RandHIE() res2.zero_inflated_negative_binomial() cls.res2 = res2
def get_oinb_estimate(scTRIP): keys = scTRIP.keys() key_list = [] mu_list = [] alpha_list = [] mean_list = [] median_list = [] auc_list = [] var_list = [] ncells_list = [] counts_list = [] sum_list = [] for key in keys: logger.info(f'we are dealing with cell {key}') counts = list(scTRIP[key]) if max(counts) != 0: key_list.append(key) counts = pd.Series(counts) res = sm.ZeroInflatedNegativeBinomialP( counts, np.ones_like(counts)).fit(maxiter=200) alpha_list.append(res.params['alpha']) mu_list.append(res.params['const']) # Get the original list with ones zero_counts = [a for a in counts if a == 0] non_zero_counts = [a for a in counts if a > 0] zero_counts = [x + 1 for x in zero_counts] #original_counts = zero_counts + non_zero_counts original_counts = counts #Keep normalized coutns ncells_list.append(len(counts)) counts_list.append(list(counts)) mean_list.append(np.mean(original_counts)) median_list.append(np.median(original_counts)) sum_list.append(np.sum(original_counts)) var_list.append(np.var(original_counts)) auc_list.append(get_auc(original_counts)) pop_df = pd.DataFrame([ key_list, mean_list, median_list, var_list, auc_list, mu_list, alpha_list, ncells_list, counts_list, sum_list ]) pop_df = pop_df.transpose() pop_df.columns = [ 'tBC', 'mean', 'median', 'var', 'auc', 'mu', 'alpha', 'ncells', 'counts', 'sum' ] return pop_df
def tiny_zinb(l): print("\t\tRunning Zero-Inflated NegBin") zinb_mod, zinb_pred = [None for i in range(2)] zinb_rmse = 0 xtr = np.array([item[1:] for item in l]) ytr = np.array([item[0] for item in l]).reshape(-1, 1) try: zinb_mod = sm.ZeroInflatedNegativeBinomialP(ytr, xtr).fit(method="newton", maxiter=50) zinb_pred = zinb_mod.predict(xtr, exog_infl=np.ones((len(xtr), 1))) zinb_rmse = np.sqrt(mean_squared_error(ytr, zinb_pred)) except np.linalg.LinAlgError as e: if 'Singular matrix' in str(e): print("\t\t\tIgnored a singular matrix.") except ValueError: print("\t\t\tIgnored output containing np.nan or np.inf") return [zinb_mod, zinb_pred, zinb_rmse]
poi_rmse = np.sqrt(mean_squared_error(ytest, poi_ppf_obs)) print("Model: Neg. Binomial") nb_mod = sm.NegativeBinomial(ytrain, xtrain).fit(start_params=None, method='newton', maxiter=50) nb_pred = nb_mod.predict(xtest) nb_rmse = np.sqrt(mean_squared_error(ytest, nb_pred)) print(np.ones(len(xtest)).shape) print("Model: Zero Inflated Poisson") zip_mod = sm.ZeroInflatedPoisson(ytrain, xtrain).fit(method="newton", maxiter=50) zip_mean_pred = zip_mod.predict(xtest, exog_infl=np.ones((len(xtest), 1))) zip_ppf_obs = stats.poisson.ppf(q=0.95, mu=zip_mean_pred) zip_rmse = np.sqrt(mean_squared_error(ytest, zip_ppf_obs)) print("Model: Zero Inflated Neg. Binomial") zinb_mod = sm.ZeroInflatedNegativeBinomialP(ytrain, xtrain).fit(method="newton", maxiter=50) zinb_pred = zinb_mod.predict(xtest, exog_infl=np.ones((len(xtest), 1))) zinb_rmse = np.sqrt(mean_squared_error(ytest, zinb_pred)) print(xtrain.shape, ytrain.shape, xtest.shape, ytest.shape) print("RMSE Poisson: ", poi_rmse) print("RMSE Neg. Bin.: ", nb_rmse) print("RMSE ZIP", zip_rmse) print("RMSE ZINB: ", zinb_rmse)