def tiny_negbin(l): nb_mod, nb_pred = [None for i in range(2)] nb_rmse = 0 xtr = np.array([item[1:] for item in l]) ytr = np.array([item[0] for item in l]).reshape(-1, 1) nb_res = [] try: if np.count_nonzero(ytr) > 0: nb_mod = sm.NegativeBinomialP(ytr, xtr).fit_regularized(maxiter=10000, disp=0, maxfun=10000, exposure=None, offset=None) # print(nb_mod.summary()) nb_mean_pred = nb_mod.predict(xtr, which="mean") nb_rmse_tr = np.sqrt(mean_squared_error(ytr, nb_mean_pred)) nb_res = [nb_mod, nb_mean_pred, nb_rmse_tr] else: nb_res = return_zeros(ytr, "AllZeros") except np.linalg.LinAlgError as e: if 'Singular matrix' in str(e): # print(" You should not have reached this point. ") # print(" Regularization should avoid the singular matrix. ") nzeros = len(ytr) - np.count_nonzero(ytr) prop = round((100 * nzeros) / len(ytr), 2) # print(" Proportion of zeros: ", prop) nb_res = return_zeros(ytr, "Singular") nb_prop_err_singmat.append(prop) except AssertionError as e: nb_res = return_zeros(ytr, "Assert") except ValueError as e: print("\t\t\tIgnored output containing np.nan or np.inf") pass return nb_res
# count data models (w/ exposure!) y = df.y_count # DV m_poiss = sm.Poisson( y, X, exposure=df['x_timespan'].values).fit() print(m_poiss.summary2()) m_NB2 = sm.NegativeBinomial( y, X, loglike_method='nb2', exposure=df['x_timespan'].values).fit() print(m_NB2.summary2()) m_NB1 = sm.NegativeBinomial( y, X, loglike_method='nb1', exposure=df['x_timespan'].values).fit() print(m_NB1.summary2()) m_NBP = sm.NegativeBinomialP( y, X, exposure=df['x_timespan'].values).fit() print(m_NBP.summary2()) #endregion #region REGRESSION MODELS # OLS family y = df.y_count m_OLS = sm.OLS(y, X).fit() # i.i.d. errors print(m_OLS.summary2()) m_GLS = sm.GLS(y, X).fit() # arbitrary covariance between errors