def test_crossed_logit_vb_formula(): data = gen_crossed_logit_pandas(10, 10, 1, 2) fml = "y ~ fe" fml_vc = {"a": "0 + C(a)", "b": "0 + C(b)"} glmm1 = BinomialBayesMixedGLM.from_formula(fml, fml_vc, data, vcp_p=0.5) rslt1 = glmm1.fit_vb() glmm2 = BinomialBayesMixedGLM(glmm1.endog, glmm1.exog, glmm1.exog_vc, glmm1.ident, vcp_p=0.5) rslt2 = glmm2.fit_vb() assert_allclose(rslt1.params, rslt2.params, atol=1e-4) rslt1.summary() rslt2.summary() for rslt in rslt1, rslt2: cp = rslt.cov_params() p = len(rslt.params) if rslt is rslt1: assert_equal(cp.shape, np.r_[p, ]) assert_equal(cp > 0, True * np.ones(p)) else: assert_equal(cp.shape, np.r_[p, ]) assert_equal(cp > 0, True * np.ones(p))
def test_crossed_logit_vb_formula(): data = gen_crossed_logit_pandas(10, 10, 1, 2) fml = "y ~ fe" fml_vc = {"a": "0 + C(a)", "b": "0 + C(b)"} glmm1 = BinomialBayesMixedGLM.from_formula(fml, fml_vc, data, vcp_p=0.5) rslt1 = glmm1.fit_vb() glmm2 = BinomialBayesMixedGLM( glmm1.endog, glmm1.exog, glmm1.exog_vc, glmm1.ident, vcp_p=0.5) rslt2 = glmm2.fit_vb() assert_allclose(rslt1.params, rslt2.params, atol=1e-4) rslt1.summary() rslt2.summary() for rslt in rslt1, rslt2: cp = rslt.cov_params() p = len(rslt.params) if rslt is rslt1: assert_equal(cp.shape, np.r_[p,]) assert_equal(cp > 0, True*np.ones(p)) else: assert_equal(cp.shape, np.r_[p,]) assert_equal(cp > 0, True*np.ones(p))
def test_crossed_logit_vb(): y, exog_fe, exog_vc, ident = gen_crossed_logit(10, 10, 1, 2) glmm1 = BinomialBayesMixedGLM(y, exog_fe, exog_vc, ident, vcp_p=0.5, fe_p=0.5) rslt1 = glmm1.fit_map() glmm2 = BinomialBayesMixedGLM(y, exog_fe, exog_vc, ident, vcp_p=0.5, fe_p=0.5) rslt2 = glmm2.fit_vb(mean=rslt1.params) rslt1.summary() rslt2.summary() assert_allclose(rslt1.params[0:5], np.r_[ -5.43073978e-01, -2.46197518e+00, -2.36582801e+00, -9.64030461e-03, 2.32701078e-03], rtol=1e-4, atol=1e-4) assert_allclose(rslt1.cov_params.flat[0:5], np.r_[ 4.12927123e-02, -2.04448923e-04, 4.64829219e-05, 1.20377543e-04, -1.45003234e-04], rtol=1e-4, atol=1e-4) assert_allclose(rslt2.params[0:5], np.r_[ -0.70834417, -0.3571011, 0.19126823, -0.36074489, 0.058976], rtol=1e-4, atol=1e-4) assert_allclose(rslt2.cov_params[0:5], np.r_[ 0.05212492, 0.04729656, 0.03916944, 0.25921842, 0.25782576], rtol=1e-4, atol=1e-4)
def test_elbo_grad(): for f in range(2): for j in range(2): if f == 0: if j == 0: y, exog_fe, exog_vc, ident = gen_simple_logit(10, 10, 2) else: y, exog_fe, exog_vc, ident = gen_crossed_logit( 10, 10, 1, 2) elif f == 1: if j == 0: y, exog_fe, exog_vc, ident = gen_simple_poisson( 10, 10, 0.5) else: y, exog_fe, exog_vc, ident = gen_crossed_poisson( 10, 10, 1, 0.5) exog_vc = sparse.csr_matrix(exog_vc) if f == 0: glmm1 = BinomialBayesMixedGLM(y, exog_fe, exog_vc, ident, vcp_p=0.5) else: glmm1 = PoissonBayesMixedGLM(y, exog_fe, exog_vc, ident, vcp_p=0.5) rslt1 = glmm1.fit_map() for k in range(3): if k == 0: vb_mean = rslt1.params vb_sd = np.ones_like(vb_mean) elif k == 1: vb_mean = np.zeros(len(vb_mean)) vb_sd = np.ones_like(vb_mean) else: vb_mean = np.random.normal(size=len(vb_mean)) vb_sd = np.random.uniform(1, 2, size=len(vb_mean)) mean_grad, sd_grad = glmm1.vb_elbo_grad(vb_mean, vb_sd) def elbo(vec): n = len(vec) // 2 return glmm1.vb_elbo(vec[:n], vec[n:]) x = np.concatenate((vb_mean, vb_sd)) g1 = approx_fprime(x, elbo, 1e-5) n = len(x) // 2 mean_grad_n = g1[:n] sd_grad_n = g1[n:] assert_allclose(mean_grad, mean_grad_n, atol=1e-2, rtol=1e-2) assert_allclose(sd_grad, sd_grad_n, atol=1e-2, rtol=1e-2)
def test_crossed_logit_map(): y, exog_fe, exog_vc, ident = gen_crossed_logit(10, 10, 1, 2) exog_vc = sparse.csr_matrix(exog_vc) glmm = BinomialBayesMixedGLM(y, exog_fe, exog_vc, ident, vcp_p=0.5) rslt = glmm.fit_map() assert_allclose(glmm.logposterior_grad(rslt.params), np.zeros_like(rslt.params), atol=1e-4)
def test_crossed_logit_vb(): y, exog_fe, exog_vc, ident = gen_crossed_logit(10, 10, 1, 2) glmm1 = BinomialBayesMixedGLM( y, exog_fe, exog_vc, ident, vcp_p=0.5, fe_p=0.5) rslt1 = glmm1.fit_map() glmm2 = BinomialBayesMixedGLM( y, exog_fe, exog_vc, ident, vcp_p=0.5, fe_p=0.5) rslt2 = glmm2.fit_vb(mean=rslt1.params) rslt1.summary() rslt2.summary() assert_allclose( rslt1.params[0:5], np.r_[-5.43073978e-01, -2.46197518e+00, -2.36582801e+00, -9.64030461e-03, 2.32701078e-03], rtol=1e-4, atol=1e-4) assert_allclose( rslt1.cov_params().flat[0:5], np.r_[4.12927123e-02, -2.04448923e-04, 4.64829219e-05, 1.20377543e-04, -1.45003234e-04], rtol=1e-4, atol=1e-4) assert_allclose( rslt2.params[0:5], np.r_[-0.70834417, -0.3571011, 0.19126823, -0.36074489, 0.058976], rtol=1e-4, atol=1e-4) assert_allclose( rslt2.cov_params()[0:5], np.r_[0.05212492, 0.04729656, 0.03916944, 0.25921842, 0.25782576], rtol=1e-4, atol=1e-4) for rslt in rslt1, rslt2: cp = rslt.cov_params() p = len(rslt.params) if rslt is rslt1: assert_equal(cp.shape, np.r_[p, p]) np.linalg.cholesky(cp) else: assert_equal(cp.shape, np.r_[p,]) assert_equal(cp > 0, True*np.ones(p))
def test_scale_map(): y, exog_fe, exog_vc, ident = gen_simple_logit(10, 10, 0) exog_fe -= exog_fe.mean(0) exog_fe /= exog_fe.std(0) exog_vc = sparse.csr_matrix(exog_vc) rslts = [] for scale_fe in False, True: glmm = BinomialBayesMixedGLM( y, exog_fe, exog_vc, ident, vcp_p=0.5, fe_p=0.5) rslt = glmm.fit_map(scale_fe=scale_fe) rslts.append(rslt) assert_allclose(rslts[0].params, rslts[1].params, rtol=1e-4)
def test_doc_examples(): np.random.seed(8767) n = 200 m = 20 data = pd.DataFrame({"Year": np.random.uniform(0, 1, n), "Village": np.random.randint(0, m, n)}) data['year_cen'] = data['Year'] - data.Year.mean() # Binomial outcome lpr = np.random.normal(size=m)[data.Village] lpr += np.random.normal(size=m)[data.Village] * data.year_cen y = (np.random.uniform(size=n) < 1 / (1 + np.exp(-lpr))) data["y"] = y.astype(np.int) # These lines should agree with the example in the class docstring. random = {"a": '0 + C(Village)', "b": '0 + C(Village)*year_cen'} model = BinomialBayesMixedGLM.from_formula( 'y ~ year_cen', random, data) result = model.fit_vb() _ = result # Poisson outcome lpr = np.random.normal(size=m)[data.Village] lpr += np.random.normal(size=m)[data.Village] * data.year_cen data["y"] = np.random.poisson(np.exp(lpr)) # These lines should agree with the example in the class docstring. random = {"a": '0 + C(Village)', "b": '0 + C(Village)*year_cen'} model = PoissonBayesMixedGLM.from_formula( 'y ~ year_cen', random, data) result = model.fit_vb() _ = result
def test_crossed_logit_vb_formula(): data = gen_crossed_logit_pandas(10, 10, 1, 2) fml = "y ~ fe" fml_vc = {"a": "0 + C(a)", "b": "0 + C(b)"} glmm1 = BinomialBayesMixedGLM.from_formula(fml, fml_vc, data, vcp_p=0.5) rslt1 = glmm1.fit_vb() glmm2 = BinomialBayesMixedGLM( glmm1.endog, glmm1.exog, glmm1.exog_vc, glmm1.ident, vcp_p=0.5) rslt2 = glmm2.fit_vb() assert_allclose(rslt1.params, rslt2.params, atol=1e-4) rslt1.summary() rslt2.summary()
def fit_mixed_lm(subjects_data, formula, random_factors_formulas=None): model = BinomialBayesMixedGLM.from_formula(formula, random_factors_formulas, subjects_data) result = model.fit_vb() return model, result
def test_crossed_logit_map(): y, exog_fe, exog_vc, ident = gen_crossed_logit(10, 10, 1, 2) exog_vc = sparse.csr_matrix(exog_vc) glmm = BinomialBayesMixedGLM(y, exog_fe, exog_vc, ident, vcp_p=0.5) rslt = glmm.fit_map() assert_allclose(glmm.logposterior_grad(rslt.params), np.zeros_like(rslt.params), atol=1e-4) # Check dimensions and PSD status of cov_params cp = rslt.cov_params() p = len(rslt.params) assert_equal(cp.shape, np.r_[p, p]) np.linalg.cholesky(cp)
def _train(self, X, y): # Initialize the output mapping = {} # Estimate target type, if necessary if self.binomial_target is None: if len(y.unique()) <= 2: binomial_target = True else: binomial_target = False else: binomial_target = self.binomial_target # The estimation does not have to converge -> at least converge to the same value. np.random.seed(2001) for switch in self.ordinal_encoder.category_mapping: col = switch.get('col') values = switch.get('mapping') data = self._rename_and_merge(X, y, col) try: with warnings.catch_warnings(): warnings.filterwarnings("ignore") if binomial_target: # Classification, returns (regularized) log odds per category as stored in vc_mean # Note: md.predict() returns: output = fe_mean + vcp_mean + vc_mean[category] md = bgmm.from_formula('target ~ 1', {'a': '0 + C(feature)'}, data).fit_vb() index_names = [int(float(re.sub(r'C\(feature\)\[(\S+)\]', r'\1', index_name))) for index_name in md.model.vc_names] estimate = pd.Series(md.vc_mean, index=index_names) else: # Regression, returns (regularized) mean deviation of the observation's category from the global mean md = smf.mixedlm('target ~ 1', data, groups=data['feature']).fit() tmp = dict() for key, value in md.random_effects.items(): tmp[key] = value[0] estimate = pd.Series(tmp) except np.linalg.LinAlgError: # Singular matrix -> just return all zeros estimate = pd.Series(np.zeros(len(values)), index=values) # Ignore unique columns. This helps to prevent overfitting on id-like columns if len(X[col].unique()) == len(y): estimate[:] = 0 if self.handle_unknown == 'return_nan': estimate.loc[-1] = np.nan elif self.handle_unknown == 'value': estimate.loc[-1] = 0 if self.handle_missing == 'return_nan': estimate.loc[values.loc[np.nan]] = np.nan elif self.handle_missing == 'value': estimate.loc[-2] = 0 mapping[col] = estimate return mapping
def test_crossed_logit_map(): y, exog_fe, exog_vc, ident = gen_crossed_logit(10, 10, 1, 2) exog_vc = sparse.csr_matrix(exog_vc) glmm = BinomialBayesMixedGLM(y, exog_fe, exog_vc, ident, vcp_p=0.5) rslt = glmm.fit_map() assert_allclose( glmm.logposterior_grad(rslt.params), np.zeros_like(rslt.params), atol=1e-4) # Check dimensions and PSD status of cov_params cp = rslt.cov_params() p = len(rslt.params) assert_equal(cp.shape, np.r_[p, p]) np.linalg.cholesky(cp)
def test_crossed_logit_vb_formula(): data = gen_crossed_logit_pandas(10, 10, 1, 2) fml = "y ~ fe" fml_vc = {"a": "0 + C(a)", "b": "0 + C(b)"} glmm1 = BinomialBayesMixedGLM.from_formula( fml, fml_vc, data, vcp_p=0.5) rslt1 = glmm1.fit_vb() glmm2 = BinomialBayesMixedGLM(glmm1.endog, glmm1.exog_fe, glmm1.exog_vc, glmm1.ident, vcp_p=0.5) rslt2 = glmm2.fit_vb() assert_allclose(rslt1.params, rslt2.params, atol=1e-4) rslt1.summary() rslt2.summary()
def test_simple_logit_map(): y, exog_fe, exog_vc, ident = gen_simple_logit(10, 10, 2) exog_vc = sparse.csr_matrix(exog_vc) glmm = BinomialBayesMixedGLM(y, exog_fe, exog_vc, ident, vcp_p=0.5) rslt = glmm.fit_map() assert_allclose(glmm.logposterior_grad(rslt.params), np.zeros_like(rslt.params), atol=1e-3) # Test the predict method for linear in False, True: for exog in None, exog_fe: pr1 = rslt.predict(linear=linear, exog=exog) pr2 = glmm.predict(rslt.params, linear=linear, exog=exog) assert_allclose(pr1, pr2) if not linear: assert_equal(pr1.min() >= 0, True) assert_equal(pr1.max() <= 1, True)
def test_simple_logit_vb(): y, exog_fe, exog_vc, ident = gen_simple_logit(10, 10, 0) exog_vc = sparse.csr_matrix(exog_vc) glmm1 = BinomialBayesMixedGLM(y, exog_fe, exog_vc, ident, vcp_p=0.5, fe_p=0.5) rslt1 = glmm1.fit_map() glmm2 = BinomialBayesMixedGLM(y, exog_fe, exog_vc, ident, vcp_p=0.5, fe_p=0.5) rslt2 = glmm2.fit_vb(rslt1.params) rslt1.summary() rslt2.summary() assert_allclose(rslt1.params[0:5], np.r_[0.75330405, -0.71643228, -2.49091288, -0.00959806, 0.00450254], rtol=1e-4, atol=1e-4) assert_allclose(rslt2.params[0:5], np.r_[0.79338836, -0.7599833, -0.64149356, -0.24772884, 0.10775366], rtol=1e-4, atol=1e-4)
def test_simple_logit_map(): y, exog_fe, exog_vc, ident = gen_simple_logit(10, 10, 2) exog_vc = sparse.csr_matrix(exog_vc) glmm = BinomialBayesMixedGLM(y, exog_fe, exog_vc, ident, vcp_p=0.5) rslt = glmm.fit_map() assert_allclose( glmm.logposterior_grad(rslt.params), np.zeros_like(rslt.params), atol=1e-3) # Test the predict method for linear in False, True: for exog in None, exog_fe: pr1 = rslt.predict(linear=linear, exog=exog) pr2 = glmm.predict(rslt.params, linear=linear, exog=exog) assert_allclose(pr1, pr2) if not linear: assert_equal(pr1.min() >= 0, True) assert_equal(pr1.max() <= 1, True)
def test_simple_logit_vb(): y, exog_fe, exog_vc, ident = gen_simple_logit(10, 10, 0) exog_vc = sparse.csr_matrix(exog_vc) glmm1 = BinomialBayesMixedGLM(y, exog_fe, exog_vc, ident, vcp_p=0.5, fe_p=0.5) rslt1 = glmm1.fit_map() glmm2 = BinomialBayesMixedGLM(y, exog_fe, exog_vc, ident, vcp_p=0.5, fe_p=0.5) rslt2 = glmm2.fit_vb(rslt1.params) rslt1.summary() rslt2.summary() assert_allclose(rslt1.params[0:5], np.r_[ 0.75330405, -0.71643228, -2.49091288, -0.00959806, 0.00450254], rtol=1e-4, atol=1e-4) assert_allclose(rslt2.params[0:5], np.r_[ 0.79338836, -0.7599833, -0.64149356, -0.24772884, 0.10775366], rtol=1e-4, atol=1e-4)
def test_simple_logit_vb(): y, exog_fe, exog_vc, ident = gen_simple_logit(10, 10, 0) exog_vc = sparse.csr_matrix(exog_vc) glmm1 = BinomialBayesMixedGLM( y, exog_fe, exog_vc, ident, vcp_p=0.5, fe_p=0.5) rslt1 = glmm1.fit_map() glmm2 = BinomialBayesMixedGLM( y, exog_fe, exog_vc, ident, vcp_p=0.5, fe_p=0.5) rslt2 = glmm2.fit_vb(rslt1.params) rslt1.summary() rslt2.summary() assert_allclose( rslt1.params[0:5], np.r_[0.75330405, -0.71643228, -2.49091288, -0.00959806, 0.00450254], rtol=1e-4, atol=1e-4) assert_allclose( rslt2.params[0:5], np.r_[0.79338836, -0.7599833, -0.64149356, -0.24772884, 0.10775366], rtol=1e-4, atol=1e-4) for rslt in rslt1, rslt2: cp = rslt.cov_params() p = len(rslt.params) if rslt is rslt1: assert_equal(cp.shape, np.r_[p, p]) np.linalg.cholesky(cp) else: assert_equal(cp.shape, np.r_[p,]) assert_equal(cp > 0, True*np.ones(p))
def test_crossed_logit_vb(): y, exog_fe, exog_vc, ident = gen_crossed_logit(10, 10, 1, 2) glmm1 = BinomialBayesMixedGLM(y, exog_fe, exog_vc, ident, vcp_p=0.5, fe_p=0.5) rslt1 = glmm1.fit_map() glmm2 = BinomialBayesMixedGLM(y, exog_fe, exog_vc, ident, vcp_p=0.5, fe_p=0.5) rslt2 = glmm2.fit_vb(mean=rslt1.params) rslt1.summary() rslt2.summary() assert_allclose(rslt1.params[0:5], np.r_[-5.43073978e-01, -2.46197518e+00, -2.36582801e+00, -9.64030461e-03, 2.32701078e-03], rtol=1e-4, atol=1e-4) assert_allclose(rslt1.cov_params().flat[0:5], np.r_[4.12927123e-02, -2.04448923e-04, 4.64829219e-05, 1.20377543e-04, -1.45003234e-04], rtol=1e-4, atol=1e-4) assert_allclose(rslt2.params[0:5], np.r_[-0.70834417, -0.3571011, 0.19126823, -0.36074489, 0.058976], rtol=1e-4, atol=1e-4) assert_allclose(rslt2.cov_params()[0:5], np.r_[0.05212492, 0.04729656, 0.03916944, 0.25921842, 0.25782576], rtol=1e-4, atol=1e-4) for rslt in rslt1, rslt2: cp = rslt.cov_params() p = len(rslt.params) if rslt is rslt1: assert_equal(cp.shape, np.r_[p, p]) np.linalg.cholesky(cp) else: assert_equal(cp.shape, np.r_[p, ]) assert_equal(cp > 0, True * np.ones(p))
def test_logit_map_crossed_formula(): data = gen_crossed_logit_pandas(10, 10, 1, 0.5) fml = "y ~ fe" fml_vc = {"a": "0 + C(a)", "b": "0 + C(b)"} glmm = BinomialBayesMixedGLM.from_formula( fml, fml_vc, data, vcp_p=0.5) rslt = glmm.fit_map() assert_allclose(glmm.logposterior_grad(rslt.params), np.zeros_like(rslt.params), atol=1e-4) rslt.summary() r = rslt.random_effects("a") assert_allclose(r.iloc[0, :].values, np.r_[-0.02004904, 0.094014], atol=1e-4)
def test_simple_logit_vb(): y, exog_fe, exog_vc, ident = gen_simple_logit(10, 10, 0) exog_vc = sparse.csr_matrix(exog_vc) glmm1 = BinomialBayesMixedGLM(y, exog_fe, exog_vc, ident, vcp_p=0.5, fe_p=0.5) rslt1 = glmm1.fit_map() glmm2 = BinomialBayesMixedGLM(y, exog_fe, exog_vc, ident, vcp_p=0.5, fe_p=0.5) rslt2 = glmm2.fit_vb(rslt1.params) rslt1.summary() rslt2.summary() assert_allclose(rslt1.params[0:5], np.r_[0.75330405, -0.71643228, -2.49091288, -0.00959806, 0.00450254], rtol=1e-4, atol=1e-4) assert_allclose(rslt2.params[0:5], np.r_[0.79338836, -0.7599833, -0.64149356, -0.24772884, 0.10775366], rtol=1e-4, atol=1e-4) for rslt in rslt1, rslt2: cp = rslt.cov_params() p = len(rslt.params) if rslt is rslt1: assert_equal(cp.shape, np.r_[p, p]) np.linalg.cholesky(cp) else: assert_equal(cp.shape, np.r_[p, ]) assert_equal(cp > 0, True * np.ones(p))
def test_logit_map_crossed_formula(): data = gen_crossed_logit_pandas(10, 10, 1, 0.5) fml = "y ~ fe" fml_vc = {"a": "0 + C(a)", "b": "0 + C(b)"} glmm = BinomialBayesMixedGLM.from_formula(fml, fml_vc, data, vcp_p=0.5) rslt = glmm.fit_map() assert_allclose(glmm.logposterior_grad(rslt.params), np.zeros_like(rslt.params), atol=1e-4) rslt.summary() r = rslt.random_effects("a") assert_allclose(r.iloc[0, :].values, np.r_[-0.02004904, 0.094014], atol=1e-4) # Check dimensions and PSD status of cov_params cm = rslt.cov_params() p = rslt.params.shape[0] assert_equal(list(cm.shape), [p, p]) np.linalg.cholesky(cm)
def test_logit_map_crossed_formula(): data = gen_crossed_logit_pandas(10, 10, 1, 0.5) fml = "y ~ fe" fml_vc = {"a": "0 + C(a)", "b": "0 + C(b)"} glmm = BinomialBayesMixedGLM.from_formula(fml, fml_vc, data, vcp_p=0.5) rslt = glmm.fit_map() assert_allclose( glmm.logposterior_grad(rslt.params), np.zeros_like(rslt.params), atol=1e-4) rslt.summary() r = rslt.random_effects("a") assert_allclose( r.iloc[0, :].values, np.r_[-0.02004904, 0.094014], atol=1e-4) # Check dimensions and PSD status of cov_params cm = rslt.cov_params() p = rslt.params.shape[0] assert_equal(list(cm.shape), [p, p]) np.linalg.cholesky(cm)
df = get_data(group) for outcome in "bucketacc", "bucketcomp": fml = get_formula(adj_time=adj_time) fmx = outcome + " ~ " + fml yl = { "bucketacc": "target accuracy", "bucketcomp": "competitor accuracy" }[outcome] vcx = get_vcf(vcs, adj_time) fmx = outcome + " ~ " + fml model = BinomialBayesMixedGLM.from_formula( fmx, vcx, df, vcp_p=3, fe_p=3) fid = open(group + ".pkl", "rb") pars = pickle.load(fid) fid.close() if adj_time: tm = pars["tm_adj"] ts = pars["ts_adj"] else: tm = pars["tm"] ts = pars["ts"] if use_vb: params = pd.read_csv( "%s_params_%d_%s_%s_vb.csv" % (group, vcs, outcome, adjs)) else:
select_cols = [ "play_id", "game_id", "touchdown", "yards_gained", "turnover", "posteam", "defteam", "yardline_100", "half_seconds_remaining", "play_type", "shotgun", "no_huddle", "qb_dropback", "pass_length", "pass_location", "run_location", "run_gap", "field_goal_result", "opp_fg_prob", "opp_td_prob", "fumble_forced", "fumble_not_forced", "fumble_lost", "penalty" ] nfl_rush_2019 = nfl_2019[nfl_2019["play_type"] == "rush"] # first, fit rush outcome models # 1 - touchdown rush_penalty_mod = BinomialBayesMixedGLM.from_formula( 'penalty ~ shotgun + no_huddle + qb_dropback + run_location + run_gap', ['0 + rusher_id', '0 + def_id'], data=nfl_rush_2019) rush_penalty_result = rush_penalty_mod.fit_vb # 2 - rushing yards rush_yard_mod = PoissonBayesMixedGLM.from_formula( 'yards_gained ~ shotgun + no_huddle + qb_dropback + run_location + run_gap', ['0 + rusher_id', '0 + def_id'], data=nfl_rush_2019) rush_yard_result = rush_yard_mod.fit_vb() # 3 - rushing turnovers (fumbles) rush_turnover_mod = BinomialBayesMixedGLM.from_formula( 'turnover ~ shotgun + no_huddle + qb_dropback + run_location + run_gap',
vcn = ["Sample", "Exon", "Gene", "Person"] fml = "Imprinted ~ KidRank + C(Lib) + Boy" vc_fml = {"Sample": "0 + C(Sample)", "Exon": "0 + C(Exon)", "Gene": "0 + C(Gene)", "Person": "0 + C(Person)"} if kc == 3: fml += " + Pat" fml = fml.replace("Pat", "Pat01") fml = fml.replace("C(Lib)", "C(Lib)*Pat01") fml = fml.replace("KidRank", "KidRank*Pat01") dy = dx.drop("PlacentaWeight", axis=1) if kc != 3: model = BinomialBayesMixedGLM.from_formula(fml, vc_fml, dy, vcp_p=3, fe_p=3) else: ident = [] exog_vc = [] for g in dy.Gene.unique(): ident.append(genecode[g]) exog_vc.append((dy.Gene == g).astype(np.int)) for e in dy.Exon.unique(): ident.append(exoncode[e]) exog_vc.append((dy.Exon == e).astype(np.int)) for p in dy.Person.unique(): ident.append(4) exog_vc.append((dy.Person == p).astype(np.int))
def glmm_model(data, features, y, random_effects): model = BinomialBayesMixedGLM.from_formula(f'{y} ~ {features}', random_effects, data) result = model.fit_vb() return result
tmp['诊断严重程度'] = tmp['诊断严重程度'].replace({'轻微': 0, '严重': 1}) tmp['治疗'] = tmp['治疗'].replace({'标准': 0, '新药': 1}) tmp = tmp.reset_index() del tmp['index'] tmp = tmp.rename(columns={ '周数': 'zhous', '值': 'zhi', '组': 'zu', '诊断严重程度': 'severity', '治疗': 'drug' }) tmp.to_csv(r"D:/书籍资料整理/属性数据分析/抑郁症治疗_展开.csv") random = {"a": '0 + C(zu)'} model = BinomialBayesMixedGLM.from_formula( 'zhi ~ severity + drug + zhous+drug:zhous', random, tmp) result = model.fit_vb() #给出的结果大致上与书中的结果差不多,估计差异在 #书中给出的结果为使用高斯-埃尔米特求积 #而statsmodels使用的是贝叶斯方法. #结果给出的是方差可能要开根号才能求标准差 #另外 print(result.summary()) data = pd.read_csv(r"D:/书籍资料整理/属性数据分析/老鼠.csv") random = {"a": '0 + C(簇)'} model = BinomialBayesMixedGLM.from_formula('死亡 ~ C(组) ', random, data) result = model.fit_vb() result.summary()
import numpy as np from statsmodels.genmod.bayes_mixed_glm import (BinomialBayesMixedGLM, PoissonBayesMixedGLM) import pandas as pd from scipy import sparse from numpy.testing import assert_allclose, assert_equal from scipy.optimize import approx_fprime np.random.seed(8767) n = 200 m = 20 data = pd.DataFrame({ "Year": np.random.uniform(0, 1, n), "Village": np.random.randint(0, m, n) }) data['year_cen'] = data['Year'] - data.Year.mean() # Binomial outcome lpr = np.random.normal(size=m)[data.Village] lpr += np.random.normal(size=m)[data.Village] * data.year_cen y = (np.random.uniform(size=n) < 1 / (1 + np.exp(-lpr))) data["y"] = y.astype(int) # These lines should agree with the example in the class docstring. random = {"a": '0 + C(Village)'} print(data) model = BinomialBayesMixedGLM.from_formula('y ~ year_cen', random, data) result = model.fit_vb()