def run_validate_arviz(inferred): az.plot_ppc(inferred, data_pairs={'y': 'y_hat'}) loo = az.loo(inferred, pointwise=True) az.plot_khat(loo) az.plot_loo_pit(inferred, y='y', y_hat='y_hat') loo_pit = az.loo_pit(inferred, y='y', y_hat='y_hat') bfmi = az.bfmi(inferred) if any(bfmi < 0.5): print("BFMI warning:", bfmi < 0.5) print("LOO analysis:\n", loo) return loo, loo_pit, bfmi
""" LOO-PIT ECDF Plot ================= _thumb: .5, .7 """ import matplotlib.pyplot as plt import arviz as az az.style.use("arviz-darkgrid") idata = az.load_arviz_data("radon") az.plot_loo_pit(idata, y="y", ecdf=True, color="maroon") plt.show()
""" LOO-PIT ECDF Plot ================= _thumb: .5, .7 """ import matplotlib.pyplot as plt import arviz as az az.style.use("arviz-darkgrid") idata = az.load_arviz_data("radon") log_like = idata.sample_stats.log_likelihood.sel(chain=0).values.T log_weights = az.psislw(-log_like)[0] az.plot_loo_pit(idata, y="y_like", log_weights=log_weights, ecdf=True, color="maroon") plt.show()
""" LOO-PIT Overlay Plot ==================== _thumb: .5, .7 """ import matplotlib.pyplot as plt import arviz as az az.style.use("arviz-darkgrid") idata = az.load_arviz_data("non_centered_eight") az.plot_loo_pit(idata=idata, y="obs", color="indigo") plt.show()
""" LOO-PIT ECDF Plot ================= _thumb: .5, .7 """ import arviz as az idata = az.load_arviz_data("radon") log_like = idata.sample_stats.log_likelihood.sel(chain=0).values.T log_weights = az.psislw(-log_like)[0] ax = az.plot_loo_pit(idata, y="y_like", log_weights=log_weights, ecdf=True, color="orange", backend="bokeh")
""" LOO-PIT Overlay Plot ==================== _thumb: .5, .7 """ import arviz as az idata = az.load_arviz_data("non_centered_eight") ax = az.plot_loo_pit(idata=idata, y="obs", color="green", backend="bokeh")
""" LOO-PIT ECDF Plot ================= _thumb: .5, .7 _example_title: Plot LOO predictive ECDF compared to ECDF of uniform distribution to assess predictive calibration. """ import arviz as az idata = az.load_arviz_data("radon") ax = az.plot_loo_pit(idata, y="y", ecdf=True, color="orange", backend="bokeh")
def main(): parser = argparse.ArgumentParser(description='Combine the individual posteriors for each S value.') parser.add_argument('datafile', type=str, help='path to csv containing beta values') parser.add_argument('patientinfofile', type=str, help='path to csv containing patientinfo') parser.add_argument('outputdir', type=str, default='~', help='path to folder in which to store output') parser.add_argument('sample', type=str, help='samplename of beta array (must be a col in datafile index in patientinfo)') # Execute the parse_args() method args = parser.parse_args() datafile = args.datafile patientinfofile = args.patientinfofile outputdir = args.outputdir sample = args.sample outsamplesdir = os.path.join(outputdir, sample, 'posterior') outfinaldir = os.path.join(outputdir, sample, 'outfinal') os.makedirs(outfinaldir, exist_ok=True) beta_values = pd.read_csv(datafile, index_col = 0) patientinfo = pd.read_csv(patientinfofile, keep_default_na=False, index_col = 0) beta = beta_values[sample].dropna().values age = patientinfo.loc[sample, 'age'] outsampleslist = glob.glob(os.path.join(outsamplesdir, 'sample_*.pkl')) S = list() results = dict() for outsamples in outsampleslist: s = int(outsamples.split('.pkl')[0].split('_')[-1]) try: with open(outsamples, 'rb') as f: res = joblib.load(f) results[s] = res S.append(s) print(s) except EOFError: print('sample_{}.pkl is not a correctly formatted pickle file'.format(s)) S.sort() n = len(beta) logZs = np.empty(len(S)) logZerrs = np.empty(len(S)) Nsamples = np.empty(len(S), dtype=int) for index, s in enumerate(S): try: logZs[index] = results[s].logz[-1] logZerrs[index] = results[s].logzerr[-1] Nsamples[index] = results[s].niter except: logZs = results[s]['logz'] logZerrs = results[s]['logzerr'] Nsamples = results[s]['niter'] logZs_bootstrap = np.random.normal(loc = logZs, scale=logZerrs, size = (10000, len(logZs))) prob_s = softmax(logZs) prob_s_bootstrap = softmax(logZs_bootstrap, axis=1) prob_s_err = np.std(prob_s_bootstrap, axis=0) print('\nS:P(S)') for i, s in enumerate(S): print('{}:{:.3e}'.format(s, prob_s[i])) df = pd.DataFrame({'S':S, 'prob':prob_s, 'prob_err':prob_s_err}) df.S.astype(int) df.to_csv(os.path.join(outfinaldir, "prob_of_S.csv"), index=False) sns.set_style('white') sns.set_context("paper", font_scale=1.6) fig, ax = plt.subplots() ax.bar(S, prob_s, yerr=prob_s_err, color=sns.xkcd_rgb["denim blue"]) sns.despine() plt.xlabel("Stem Cell Number (S)") plt.ylabel("Probability") plt.tight_layout() plt.savefig(os.path.join(outfinaldir, "probability_S.png"), dpi = 300) plt.close() Ndraws = 3000 Ssamples = np.random.choice(S, size=Ndraws, p=prob_s) final_posterior = np.empty((Ndraws, 8)) final_posterior[:, -1] = Ssamples beta_hat = np.empty((1, Ndraws, n)) LL = np.empty((1, Ndraws, n)) progress_ints = (np.arange(0.1, 1.1, 0.1) * Ndraws - 1).astype(int) counter = 10 for i in range(Ndraws): if i in progress_ints: print('{}% complete'.format(counter)) counter += 10 s = Ssamples[i] try: posterior = dynesty.utils.resample_equal(results[s].samples, softmax(results[s].logwt)) except: posterior = results[s]['samples'] random_row = np.random.randint(posterior.shape[0]) final_posterior[i, :7] = posterior[random_row, :7] lamsample, musample, gammasample, deltasample, etasample = final_posterior[i, :5] kappasample = posterior[random_row, 7:] LL[0, i, :] = flipflop.loglikelihood_perpoint(posterior[random_row, :], beta, s, age) ProbDist = flipflop.runModel(s, lamsample, musample, gammasample, age) k_sample = np.random.choice(np.arange(0, 2*s+1), size=n, p=ProbDist) beta_sample = k_sample / (2*s) beta_sample = flipflop.rescale_beta(beta_sample, deltasample, etasample) beta_hat[0, i, :] = flipflop.beta_rvs(beta_sample, kappasample[k_sample]) with open(os.path.join(outfinaldir, "finalposterior.pkl"), 'wb') as f: joblib.dump(final_posterior, f) df = pd.DataFrame({'lam':final_posterior[:,0], 'mu':final_posterior[:,1], 'gamma':final_posterior[:,2], 'delta':final_posterior[:,3], 'eta':final_posterior[:,4], 'kappamean':final_posterior[:,5], 'kappadisp':final_posterior[:,6], 'S':Ssamples}) df.to_csv(os.path.join(outfinaldir, "finalposterior.csv"), index=False) fig, ax = plt.subplots() plt.hist(beta, np.linspace(0, 1, 100), density=True, alpha=0.4, linewidth=0) plt.hist(np.ravel(beta_hat), np.linspace(0, 1, 100), density=True, alpha=0.4, linewidth=0) plt.legend(("Data", "Posterior predictive")) plt.xlabel("Fraction Methylated (Beta)") plt.ylabel("Probability density") sns.despine() plt.tight_layout() plt.savefig("{}/posterior_predictive.png".format(outfinaldir), dpi = 300) plt.close() inference = az.from_dict(posterior={'lam':final_posterior[:,0], 'mu':final_posterior[:,1], 'gamma':final_posterior[:,2], 'delta':final_posterior[:,3], 'eta':final_posterior[:,4], 'kappamean':final_posterior[:,5], 'kappadisp':final_posterior[:,6], 'S':Ssamples}, observed_data={'beta':beta}, posterior_predictive={'beta_hat':beta_hat}, sample_stats={"log_likelihood":LL} ) az.to_netcdf(inference, "{}/inference.nc".format(outfinaldir)) pairs = az.plot_pair(inference, var_names=('lam', 'mu', 'gamma', 'delta', 'eta', 'kappamean', 'kappadisp')) plt.savefig('{}/plot_pairs.png'.format(outfinaldir), dpi=300) plt.close() az.plot_loo_pit(inference, y='beta', y_hat='beta_hat', ecdf=True) plt.savefig('{}/plot_loo_pit_ecdf.png'.format(outfinaldir), dpi=300) plt.close() sns.set_context("paper", font_scale=1.0) az.plot_loo_pit(inference, y='beta', y_hat='beta_hat') plt.ylabel('Leave One Out - Probability Integral Transform') plt.xlabel('Cumulative Density Function') plt.savefig('{}/plot_loo_pit.png'.format(outfinaldir), dpi=300) plt.close()