def convergence_diagnose_birt(m, a): # birt_model = pymc.MCMC(...) pymc.raftery_lewis(m.a, q=0.025, r=0.01) scores = pymc.geweke(m.a, intervals=20) pymc.Matplot.geweke_plot(scores) pymc.gelman_rubin(m)
def bayes_model(data, savedir, num_jobs=1): mu, alpha, beta = param_selector(data) @pm.stochastic(observed=True) def custom_stochastic(value=data, mu=mu, alpha=alpha, beta=beta): r = np.zeros(len(value)) for i in range(1, len(value)): r[i] = math.exp(-beta * (value[i] - value[i - 1])) * (1 + r[i - 1]) #Calculate the loglikelihood loglik = -value[-1] * mu loglik = loglik + alpha / beta * sum( np.exp(-beta * (value[-1] - value)) - 1) loglik = loglik + np.sum(np.log(mu + alpha * r)) return loglik model = pm.MCMC([mu, alpha, beta, custom_stochastic]) srfpass = False while srfpass == False: for i in range(num_jobs): model.sample(300000, 60000, 7) model.write_csv('{0}full_params{1}.csv'.format(savedir, i)) srfpass = srf_checker(pm.gelman_rubin(model)) with open('{0}srf.csv'.format(savedir), 'w') as wfile: print('Parameter,SRF', file=wfile) srfdict = pm.gelman_rubin(model) for k, v in srfdict.items(): print('{0},{1}'.format(k, v), file=wfile)
def gelman_rubin(chain,labels,plt_label, nchunk=10): nparameters = chain.shape[2] niter = chain.shape[1] gelman_rubin_r = np.zeros(shape=(nchunk,nparameters)) niter_plot = np.zeros(nchunk,dtype='int') for i in xrange(nchunk): niter_plot[i] = int(niter/nchunk)*i+int(niter/nchunk) gelman_rubin_r[i,:] = pymc.gelman_rubin(chain[:,:niter_plot[i],:]) ### plotting fig, ax = plt.subplots(1,1, figsize=(8,8)) cmap = get_cmap(nparameters) for i in xrange(nparameters): ax.plot(niter_plot,gelman_rubin_r[:,i],'o',label=labels[i],color=cmap(i),lw=1.5,linestyle='-',alpha=0.6) ax.set_ylabel('Gelman-Rubin R') ax.set_xlabel('iteration') ### limits min, max = gelman_rubin_r.min(), gelman_rubin_r.max() ax.set_ylim(min*0.95,max*1.05) if max > 10: ax.set_yscale('log',nonposy='clip',subsy=(1,2,4)) ax.yaxis.set_minor_formatter(minorFormatter) ax.yaxis.set_major_formatter(majorFormatter) ax.axhline(1.2, linestyle='--', color='red',lw=1,zorder=-1) ax.legend(prop={'size':10},ncol=nparameters / 5,numpoints=1,markerscale=0.7) fig.tight_layout() plt.savefig('gelman_rubin_'+plt_label+'.png',dpi=150) plt.close()
def test_simple(self): S2 = copy.copy(S) S2.sample(iterations, burnin, progress_bar=0) gr = pymc.gelman_rubin(S2) for i in gr: assert_approx_equal(gr[i], 1., 2)
def test_simple(self): S2 = copy.copy(S) S2.sample(10000, 2000, progress_bar=0) gr = pymc.gelman_rubin(S2) for i in gr: assert_approx_equal(gr[i], 1., 2)
def fit_two_mcmc(time, signal, height_th, one_pulse, sigma0, # signal noise sum_mu, sum_tau, sum_a, sum_b, diff_tau, diff_a, diff_b, sampling, burn, thin, Plot=False, debug=False, auto=False): # LIMIT SEARCH FOR OFFSETS _t_initial=time[pd.srlatch_rev(signal,0,height_th)][0] _t_final=time[pd.srlatch_rev(signal,0,height_th)][-1] def model(x, f): # PRIORS y_err = sigma0 # print (_t_initial,_t_final, one_x_offset_init) one_x_offset = pymc.Uniform("one_x_offset", _t_initial, time[np.argmax(signal)], value=_t_initial) two_x_offset = pymc.Uniform("two_x_offset", _t_initial, _t_final, value=_t_final) sum_of_amps = pymc.TruncatedNormal("sum_amps", mu=sum_mu, tau=sum_tau, a=sum_a, b=sum_b, value=sum_mu) #sigma/mu is the n=1 std deviation in units of n=1 amplitude diff_of_amps = pymc.TruncatedNormal("diff_amps", mu=0, tau=diff_tau, a=diff_a, b=diff_b, value=0) one_x_amplitude = (sum_of_amps+diff_of_amps)/2 two_x_amplitude = (sum_of_amps-diff_of_amps)/2 # MODEL @pymc.deterministic(plot=False) def mod_two_pulse(x=time, one_x_offset=one_x_offset, two_x_offset=two_x_offset, one_x_amplitude=one_x_amplitude, two_x_amplitude=two_x_amplitude): return one_pulse(x, x_offset=one_x_offset, amplitude=one_x_amplitude)+\ one_pulse(x, x_offset=two_x_offset, amplitude=two_x_amplitude) #likelihoodsy y = pymc.Normal("y", mu=mod_two_pulse, tau= 1.0/y_err**2, value=signal, observed=True) return locals() MDL = pymc.MCMC(model(time,signal), db='pickle') # The sample is stored in a Python serialization (pickle) database # MDL.use_step_method(pymc.AdaptiveMetropolis, # [MDL.sum_of_amps, MDL.diff_of_amps], # scales={MDL.sum_of_amps:np.sqrt(1/sum_tau), # MDL.diff_of_amps:np.sqrt(1/diff_tau)}, # ) if auto: # uses Raftery Lewis to determine fit Parameters per trace: # https://pymc-devs.github.io/pymc/modelchecking.html#convergence-diagnostics # pilot run InitSamples = 4*len(time) InitMDL = MDL InitMDL.sample(iter=InitSamples, burn=int(InitSamples*.5), thin=10) pymc_diagnostic = pymc.raftery_lewis(InitMDL, q=0.025, r=0.02, verbose=0) [EstBurn, EstSampling, EstThin] = np.max( np.array( [pymc_diagnostic[i] for i in pymc_diagnostic.keys()[1:]] # first key: mod_two_pulse irrelavent ), axis=0)[2:] # first 2 diagnostics: 1st order Markov Chain irrelavent # print [EstBurn, EstSampling, EstThin] # actual run MDL.sample(iter=EstSampling, burn=EstBurn, thin=EstThin, verbose=0) else: MDL.sample(iter=sampling, burn=burn, thin=thin, verbose=-1) # thin: consider every 'thin' samples # burn: number of samples to discard: decide by num of samples to run till parameters stabilise at desired precision if Plot: y_fit = MDL.mod_two_pulse.value #get mcmc fitted values plt.plot(time, signal, 'b', marker='o', ls='-', lw=1, label='Observed') plt.plot(time,y_fit,'k', marker='+', ls='--', ms=5, mew=2, label='Bayesian Fit Values') plt.legend() pymc.Matplot.plot(MDL) if debug: for i in np.arange(10): MDL.sample(iter=sampling, burn=burn, thin=thin, verbose=0) pymc.gelman_rubin(MDL) pymc.Matplot.summary_plot(MDL) return MDL #usage: MDL.one_x_offset.value for fitted result
def get_Bayes(measurements=[], chunksize=5, Ndp=5, iter=50000, burn=5000): sc = pymc.Uniform('sc', 0.1, 2.0, value=0.24) tau = pymc.Uniform('tau', 0.0, 1.0, value=0.5) concinit = 1.0 conclo = 0.1 conchi = 10.0 concentration = pymc.Uniform('concentration', lower=conclo, upper=conchi, value=concinit) # The stick-breaking construction: requires Ndp beta draws dependent on the # concentration, before the probability mass function is actually constructed. #betas = pymc.Beta('betas', alpha=1, beta=concentration, size=Ndp) betas = pymc.Beta('betas', alpha=1, beta=1, size=Ndp - 1) @pymc.deterministic def pmf(betas=betas): "Construct a probability mass function for the truncated Dirichlet process" # prod = lambda x: np.exp(np.sum(np.log(x))) # Slow but more accurate(?) prod = np.prod value = map(lambda i, u: u * prod(1.0 - betas[:i]), enumerate(betas)) value.append(1.0 - sum(value[:])) # force value to sum to 1 return value # The cluster assignments: each data point's estimated cluster ID. # Remove idinit to allow clusterid to be randomly initialized: Ndata = len(measurements) idinit = np.zeros(Ndata, dtype=np.int64) clusterid = pymc.Categorical('clusterid', p=pmf, size=Ndata, value=idinit) @pymc.deterministic(name='clustermean') def clustermean(clusterid=clusterid, sc=sc, Ndp=Ndp): return sc * np.arange(1, Ndp + 1)[clusterid] @pymc.deterministic(name='clusterprec') def clusterprec(clusterid=clusterid, sc=sc, tau=tau, Ndp=Ndp): return 1.0 / (sc * sc * tau * tau * (np.arange(1, Ndp + 1)[clusterid])) y = pymc.Normal('y', mu=clustermean, tau=clusterprec, observed=True, value=measurements) ## for predictive poeterior simulation @pymc.deterministic(name='y_sim') def y_sim(value=[0], sc=sc, tau=tau, clusterid=clusterid, Ndp=Ndp): n = np.arange(1, Ndp + 1)[np.random.choice(clusterid)] return np.random.normal(loc=sc * n, scale=sc * tau * n) m = pymc.Model({ "scale": sc, "tau": tau, "betas": betas, "clusterid": clusterid, "normal": y, "pred": y_sim }) sc_samples = [] modes = [] simulations = [] for i in range(0, chunksize): mc = pymc.MCMC(m) mc.sample(iter=50000, burn=10000) plot(mc) sc_sample = mc.trace('sc')[:] sc_samples.append(sc_sample) simulation = mc.trace('y_sim')[:] simulations.append(simulation) plt.hist(measurements, 50, fc='gray', histtype='stepfilled', alpha=0.3, normed=False) plt.hist(simulation, 30, fc='blue', histtype='stepfilled', alpha=0.3, normed=True) hist, edges = np.histogram( measurements, bins=100, range=[np.min(measurements) - 0.25, np.max(measurements) + 0.25]) argm = hist.argmax() (edges[argm] + edges[argm + 1]) / 2 modes.append((edges[argm] + edges[argm + 1]) / 2) if chunksize <= 1: gr = np.nan else: pymc.gelman_rubin(sc_samples) dic = { 'gelman_rubin': gr, 'modes': modes, 'simulations': simulations, 'sc_samples': sc_samples } return dic
def sample(self, dbname, n_runs=1, iter=10000, burn=1000, thin=10, gelman_rubin=False, progress_bar=False, **kwargs): """ Sample from the posteriors using MCMC. Implementation note: This method sets the attribute `mcmc`. Parameters ---------- dbname : str Path to the file the pickled MCMC object is written to. If the path exists, the existing database is updated. n_runs : int, optional (default: 1) The number of times MCMC is run. Must be >1, if Gelman-Rubin statistic is used. iter : int, optional (default: 10000) The number of iterations per MCMC run. burn : int, optional (default: 1000) The number of samples discarded from the beginning of a parameter's trace. thin : int, optional (default: 10) Each `thin` sample is discarded to reduce auto-correlation. gelman_rubin : bool, optional (default: False) If True, compute the Gelman-Rubin statistic for each sampled parameter and print to stdout. n_runs must be >1. progress_bar : bool, optional (default: False) If True, show progress bar while MCMC samples. **kwargs Additional keyword arguments passed to PyMC's sample call. """ if self.model is None: error_msg = ("Model doesn't exist in sampling stage. " "Please create the model before sampling.") sys.exit(error_msg) db = "pickle" if os.path.isfile(dbname): db = pymc.database.pickle.load(dbname) # init MCMC sampling object self.mcmc = pymc.MCMC(self.model, db=db, dbname=dbname) # sample using MCMC for _ in range(n_runs): self.mcmc.sample(iter=iter, burn=burn, thin=thin, progress_bar=progress_bar, **kwargs) # close the database file self.mcmc.db.close() # compute Gelman-Rubin statistic if gelman_rubin: if n_runs < 2: print("The Gelman-Rubin statistic requires", "multiple MCMC runs.", file=sys.stderr) else: print("Gelman-Rubin statistics:") for param in self.parameters: gr = pymc.gelman_rubin(self.mcmc)[param] print(f"\t{param} : {gr}")
mcmc = pymc.MCMC(make_poisson(5, 1., 20.), db='pickle') # Run 3 chains: for i in range(3): mcmc.sample(iter=10000, burn=5000, thin=1) print # to handle missing newline from progress bar # Generate a dict of Geweke test z scores for each RV, here using early # segments 10% of the chain length, a final segment 50% of the length, # and producing scores for 10 early intervals. scores = pymc.geweke(mcmc, first=0.1, last=0.5, intervals=10) # The Matplot functions automatically produce new figures for each plot. pymc.Matplot.geweke_plot(scores['rate'], 'rate') pymc.Matplot.geweke_plot(scores['mu'], 'mu') print 'Rhat values:', pymc.gelman_rubin(mcmc) # Plot credible regions and R values: pymc.Matplot.summary_plot(mcmc) def make_on_off(n_off, expo_off, n_on, expo_on, mean0): """ Make a PyMC model for inferring a Poisson signal rate parameter, `s`, for 'on-off' observations with uncertain background rate, `b`. Parameters ---------- n_off, n_on : int Event counts off-source and on-source
def plot_summary( sol, save=False, draw=True, save_as_png=False, dpi=None, ignore=subplots_to_ignore, fig_nb="", ): """ Plots a parameter summary and Gelman-Rubin R-hat for multiple chains """ ext = ['png' if save_as_png else 'pdf'][0] ch_nb = sol.mcmc["nb_chain"] keys = sorted([k for k in sol.var_dict.keys() if k not in ignore]) trac = [[sol.var_dict[x].trace(chain=n).mean(axis=0) for x in keys] for n in range(ch_nb)] deps = [var_depth(sol.var_dict[x]) for x in keys] lbls = list( reversed( flatten([[k + '%s' % (x + 1) for x in range(d)] if d > 1 else k for k, d in zip(keys, deps)]))) if ch_nb >= 2: rhat = [ gelman_rubin([ sol.MDL.trace(var, -x)[:] for x in range(sol.mcmc['nb_chain']) ]) for var in keys ] R = np.array(flatten(rhat)) R[R > 5] = 5 else: print( "\nTwo or more chains of equal length required for Gelman-Rubin convergence" ) R = len(lbls) * [None] fig, axes = plt.subplots(figsize=(6, 4)) gs2 = gridspec.GridSpec(3, 3) ax1 = plt.subplot(gs2[:, :-1]) ax2 = plt.subplot(gs2[:, -1], sharey=ax1) for i in range(len(lbls)): for c in range(ch_nb): val_m = np.array(flatten(trac[c])) ax1.scatter(val_m[i], len(val_m) - (i + 1), color="C0", marker=".", s=50, facecolor='k', edgecolors='k', alpha=1) ax2.scatter(R[i], i, color="C3", marker="<", s=50, alpha=1) ax1.set_ylim([-1, len(lbls)]) ax1.set_yticks(list(range(0, len(lbls)))) ax1.set_yticklabels([parlbl_dic[l] for l in lbls]) ax1.set_axisbelow(True) ax1.yaxis.grid(True) ax1.xaxis.grid(False) ax1.set_xlim(ax1.get_xlim()) ax1.set_xlabel(r'Parameter value') plt.setp(ax2.get_yticklabels(), visible=False) ax2.set_xlim([0.5, 5.5]) ax2.set_xticklabels(["", "1", "2", "3", "4", "5+"]) ax2.set_xticks([ 0.5, 1, 2, 3, 4, 5, ]) ax2.set_axisbelow(True) ax2.yaxis.grid(True) ax2.xaxis.grid(False) ax2.set_xlabel(r'$\hat{R}$') ax2.axvline(1, ls='--', color='C0', zorder=0) plt.tight_layout() plt.close(fig) if save: fn = '%sSUM-%s-%s.%s' % (fig_nb, sol.model_type_str, sol.filename, ext) save_figure(fig, subfolder='Summaries', fname=fn, dpi=dpi) if draw: return fig else: return None
# # It is simple to run multiple chains sequentially in DisMod-MR, although I worry that this gives a false sense of security about the convergence. # <codecell> # setup a model and run the chain once dm = new_model(data) dm.setup_model('p', rate_model='neg_binom') %time dm.fit(how='mcmc', iter=2000, burn=1000, thin=1) # <codecell> # to run it more times, use the sample method of the dm.mcmc object # use the same iter/burn/thin settings for future convenience for i in range(4): dm.mcmc.sample(iter=2000, burn=1000, thin=1) # <codecell> # calculate Gelman-Rubin statistic for all model variables R_hat = pm.gelman_rubin(dm.mcmc) # examine for gamma_p_100 R_hat['gamma_p_100'] # <codecell>
def plot_summary(sol, save=False, draw=True, save_as_png=False, dpi=None, ignore=default_ignore, fig_nb="", ): """ Plots a parameter summary and Gelman-Rubin R-hat for multiple chains """ ext = ['png' if save_as_png else 'pdf'][0] ch_nb = sol.mcmc["nb_chain"] keys = sorted([k for k in sol.var_dict.keys() if k not in ignore]) trac = [[sol.var_dict[x].trace(chain=n).mean(axis=0) for x in keys] for n in range(ch_nb)] deps = [var_depth(sol.var_dict[x]) for x in keys] lbls = list(reversed(flatten([[k+'%s'%(x+1) for x in range(d)] if d > 1 else k for k, d in zip(keys,deps)]))) if ch_nb >= 2: rhat = [gelman_rubin([sol.MDL.trace(var, -x)[:] for x in range(sol.mcmc['nb_chain'])]) for var in keys] R = np.array(flatten(rhat)) R[R > 5] = 5 else: print("\nTwo or more chains of equal length required for Gelman-Rubin convergence") R = len(lbls)*[None] fig, axes = plt.subplots(figsize=(6,4)) gs2 = gridspec.GridSpec(3, 3) ax1 = plt.subplot(gs2[:, :-1]) ax2 = plt.subplot(gs2[:, -1], sharey = ax1) for i in range(len(lbls)): for c in range(ch_nb): val_m = np.array(flatten(trac[c])) ax1.scatter(val_m[i], len(val_m)-(i+1) , color="C0", marker=".", s=50, facecolor='k', edgecolors='k',alpha=1) ax2.scatter(R[i], i, color="C3", marker="<", s=50, alpha=1) ax1.set_ylim([-1, len(lbls)]) ax1.set_yticks(list(range(0,len(lbls)))) ax1.set_yticklabels([parlbl_dic[l] for l in lbls]) ax1.set_axisbelow(True) ax1.yaxis.grid(True) ax1.xaxis.grid(False) ax1.set_xlim(ax1.get_xlim()) ax1.set_xlabel(r'Parameter value') plt.setp(ax2.get_yticklabels(), visible=False) ax2.set_xlim([0.5, 5.5]) ax2.set_xticklabels(["","1","2","3","4","5+"]) ax2.set_xticks([0.5, 1, 2, 3, 4, 5, ]) ax2.set_axisbelow(True) ax2.yaxis.grid(True) ax2.xaxis.grid(False) ax2.set_xlabel(r'$\hat{R}$') ax2.axvline(1, ls='--', color='C0', zorder=0) plt.tight_layout() plt.close(fig) if save: fn = '%sSUM-%s-%s.%s'%(fig_nb,sol.model_type_str,sol.filename,ext) save_figure(fig, subfolder='Summaries', fname=fn, dpi=dpi) if draw: return fig else: return None
def plot_summary(sol, save=False, save_as_png=True, fig_dpi=144): if save_as_png: save_as = 'png' else: save_as = 'pdf' MDL, ch_n = sol.MDL, sol.mcmc["nb_chain"] model = get_model_type(sol) filename = sol.filename.replace("\\", "/").split("/")[-1].split(".")[0] keys = sorted([x.__name__ for x in MDL.deterministics]) + sorted( [x.__name__ for x in MDL.stochastics]) try: keys.remove("zmod") keys.remove("log_m_i") keys.remove("log_tau_i") keys.remove("cond") except: pass for (i, k) in enumerate(keys): vect = old_div((MDL.trace(k)[:].size), (len(MDL.trace(k)[:]))) if vect > 1: keys[i] = [k + "%d" % n for n in range(1, vect + 1)] keys = list(reversed(sorted(flatten(keys)))) try: r_hat = gelman_rubin(MDL) except: print( "\nTwo or more chains of equal length required for Gelman-Rubin convergence" ) fig, axes = plt.subplots(figsize=(6, 4)) gs2 = gridspec.GridSpec(3, 3) ax1 = plt.subplot(gs2[:, :-1]) ax2 = plt.subplot(gs2[:, -1], sharey=ax1) ax2.set_xlabel("R-hat") ax2.plot([1, 1], [-1, len(keys)], "--", color="C7", zorder=0) for (i, k) in enumerate(keys): test = k[-1] not in ["%d" % d for d in range(1, 8)] or k == "R0" for c in range(ch_n): if test: imp = None val_m = MDL.stats(k[:imp], chain=c)[k[:imp]]['mean'] hpd_h = MDL.stats(k[:imp], chain=c)[k[:imp]]['95% HPD interval'][0] hpd_l = MDL.stats(k[:imp], chain=c)[k[:imp]]['95% HPD interval'][1] else: imp = -1 val_m = MDL.stats(k[:imp], chain=c)[k[:imp]]['mean'][int(k[-1]) - 1] hpd_h = MDL.stats( k[:imp], chain=c)[k[:imp]]['95% HPD interval'][0][int(k[-1]) - 1] hpd_l = MDL.stats( k[:imp], chain=c)[k[:imp]]['95% HPD interval'][1][int(k[-1]) - 1] val = val_m err = [[abs(hpd_h - val_m)], [abs(hpd_l - val_m)]] if ch_n % 2 != 0: o_s = 0 else: o_s = 0.5 ax1.scatter(val, i - (old_div(ch_n, 2)) * (1. / ch_n / 1.4) + (1. / ch_n / 1.4) * (c + o_s), color="C0", marker="o", s=50, edgecolors='C7', alpha=0.7) ax1.errorbar(val, i - (old_div(ch_n, 2)) * (1. / ch_n / 1.4) + (1. / ch_n / 1.4) * (c + o_s), xerr=err, color="C7", fmt=" ", zorder=0) if ch_n >= 2: R = np.array(r_hat[k[:imp]]) R[R > 3] = 3 if test: ax2.scatter(R, i, color="C1", marker="<", s=50, alpha=0.7) else: ax2.scatter(R[int(k[-1]) - 1], i, color="C1", marker="<", s=50, alpha=0.7) ax1.set_ylim([-1, len(keys)]) ax1.set_yticks(list(range(0, len(keys)))) ax1.set_yticklabels(keys) plt.setp(ax2.get_yticklabels(), visible=False) ax2.set_xlim([0.5, 3.5]) ax2.set_xticklabels(["", "1", "2", "3+"]) ax2.set_xticks([0.5, 1, 2, 3]) ax1.set_xlabel("Parameter values") plt.tight_layout() if save: save_where = '/Figures/Summaries/' working_path = getcwd().replace("\\", "/") + "/" save_path = working_path + save_where print("\nSaving summary figure in:\n", save_path) if not path.exists(save_path): makedirs(save_path) fig.savefig(save_path + 'Summary-%s-%s.%s' % (model, filename, save_as), dpi=fig_dpi, bbox_inches='tight') try: plt.close(fig) except: pass return fig
def main(mcmc_args=None): print('Setting up parameters and priors...') params = Params() # Set up location here with command line arguments in a list. params.cmd_line_chg(['--kalbar']) assert params.site_name + 'fields.txt' == 'data/kalbarfields.txt' # Set parameters specific to Bayesian runs params.PLOT = False params.OUTPUT = False # This sends a message to CalcSol on whether or not to use CUDA if params.CUDA: globalvars.cuda = True else: globalvars.cuda = False # get wind data and day labels wind_data, days = PM.get_wind_data(*params.get_wind_params()) params.ndays = len(days) # reduce domain params.domain_info = (10000.0, 400) #25 m sided cells domain_res = params.domain_info[0] / params.domain_info[1] cell_area = domain_res**2 locinfo = LocInfo(params.dataset, params.coord, params.domain_info) ###################################################################### ##### Model Priors ##### ###################################################################### lam = pm.Beta("lam", 5, 1, value=0.95) f_a1 = pm.TruncatedNormal("f_a1", 6, 0.3, 0, 9, value=6) f_a2 = pm.TruncatedNormal("f_a2", 20, 0.3, 15, 24, value=20) f_b1_p = pm.Gamma("fb1_p", 2, 1, value=1.5, trace=False, plot=False) #alpha,beta parameterization @pm.deterministic(trace=True, plot=True) def f_b1(f_b1_p=f_b1_p): return f_b1_p + 1 f_b2_p = pm.Gamma("fb2_p", 2, 1, value=1.5, trace=False, plot=False) @pm.deterministic(trace=True, plot=True) def f_b2(f_b2_p=f_b2_p): return f_b2_p + 1 g_aw = pm.Gamma("g_aw", 2.2, 1, value=1.0) g_bw = pm.Gamma("g_bw", 5, 1, value=3.8) # flight diffusion parameters. note: mean is average over flight advection sig_x = pm.Gamma("sig_x", 26, 0.15, value=180) sig_y = pm.Gamma("sig_y", 15, 0.15, value=150) corr_p = pm.Beta("corr_p", 5, 5, value=0.5, trace=False, plot=False) @pm.deterministic(trace=True, plot=True) def corr(corr_p=corr_p): return corr_p * 2 - 1 # local spread paramters sig_x_l = pm.Gamma("sig_xl", 2, 0.08, value=10) sig_y_l = pm.Gamma("sig_yl", 2, 0.14, value=10) corr_l_p = pm.Beta("corr_l_p", 5, 5, value=0.5, trace=False, plot=False) @pm.deterministic(trace=True, plot=True) def corr_l(corr_l_p=corr_l_p): return corr_l_p * 2 - 1 mu_r = pm.Normal("mu_r", 1., 1, value=1) n_periods = pm.Poisson("n_periods", 30, value=30) #alpha_pow = prev. time exponent in ParasitoidModel.h_flight_prob xi = pm.Gamma("xi", 1, 1, value=0.75) # presence to oviposition/emergence factor em_obs_prob = pm.Beta("em_obs_prob", 1, 1, value=0.05) # per-wasp prob of # observing emergence in release field grid given max leaf collection # this is dependent on the size of the cell surrounding the grid point # ...not much to be done about this. grid_obs_prob = pm.Beta("grid_obs_prob", 1, 1, value=0.005) # probability of # observing a wasp present in the grid cell given max leaf sampling #card_obs_prob = pm.Beta("card_obs_prob",1,1,value=0.5) # probability of # observing a wasp present in the grid cell given max leaf sampling #### Data collection model background for sentinel fields #### # Need to fix linear units for area. Meters would be best. # Effective collection area (constant between fields) is very uncertain with warnings.catch_warnings(): # squelsh a warning based on pymc coding we don't need to worry about warnings.simplefilter("ignore", RuntimeWarning) A_collected = pm.TruncatedNormal("A_collected", 2500, 1 / 2500, 0, min(locinfo.field_sizes.values()) * cell_area, value=2500) # in m**2 # Each field has its own binomial probability. # Probabilities are likely to be small, and pm.Beta cannot handle small # parameter values. So we will use TruncatedNormal again. N = len(locinfo.sent_ids) sent_obs_probs = np.empty(N, dtype=object) # fix beta for the Beta distribution sent_beta = 40 # mean of Beta distribution will be A_collected/field size for n, key in enumerate(locinfo.sent_ids): sent_obs_probs[n] = pm.Beta( "sent_obs_probs_{}".format(key), A_collected / (locinfo.field_sizes[key] * cell_area) * sent_beta / (1 - A_collected / (locinfo.field_sizes[key] * cell_area)), sent_beta, value=0.1 * 3600 / (locinfo.field_sizes[key] * cell_area)) sent_obs_probs = pm.Container(sent_obs_probs) # Max a Posterirori estimates have consistantly returned a value near zero # for sprd_factor. So we will comment these sections. # if params.dataset == 'kalbar': # # factor for kalbar initial spread # sprd_factor = pm.Uniform("sprd_factor",0,1,value=0.3) # else: # sprd_factor = None sprd_factor = None #### Collect variables and setup block update #### params_ary = pm.Container( np.array([ g_aw, g_bw, f_a1, f_b1, f_a2, f_b2, sig_x, sig_y, corr, sig_x_l, sig_y_l, corr_l, lam, n_periods, mu_r ], dtype=object)) # The stochastic variables in this list (and the stochastics behind the # deterministic ones) should be block updated in order to avoid the large # computational expense of evaluating the model multiple times for each # MCMC iteration. To do this, starting step variances must be definied # for each variable. This is done via a scaling dict. stoc_vars = [ g_aw, g_bw, f_a1, f_b1_p, f_a2, f_b2_p, sig_x, sig_y, corr_p, sig_x_l, sig_y_l, corr_l_p, lam, n_periods, mu_r ] step_scales = { g_aw: 0.04, g_bw: 0.08, f_a1: 0.25, f_b1_p: 0.05, f_a2: 0.25, f_b2_p: 0.05, sig_x: 2, sig_y: 2, corr_p: 0.0005, sig_x_l: 2, sig_y_l: 2, corr_l_p: 0.0005, lam: 0.0005, n_periods: 1, mu_r: 0.005 } print('Getting initial model values...') ###################################################################### ##### Run Model ##### ###################################################################### @pm.deterministic(plot=False, trace=False) def pop_model(params=params, params_ary=params_ary, locinfo=locinfo, wind_data=wind_data, days=days, sprd_factor=sprd_factor): '''This function acts as an interface between PyMC and the model. Not only does it run the model, but it provides an emergence potential based on the population model result projected forward from feasible oviposition dates. To modify how this projection happens, edit popdensity_to_emergence. Returned values from this function should be nearly ready to compare to data. ''' modeltic = time.time() ### Alter params with stochastic variables ### # g wind function parameters params.g_params = tuple(params_ary[0:2]) # f time of day function parameters params.f_params = tuple(params_ary[2:6]) # Diffusion coefficients params.Dparams = tuple(params_ary[6:9]) params.Dlparams = tuple(params_ary[9:12]) # Probability of any flight during the day under ideal circumstances params.lam = params_ary[12] # TRY BOTH SCALINGS - VARYING mu_r and n_periods # scaling flight advection to wind advection # number of time periods (based on interp_num) in one flight params.n_periods = params_ary[ 13] # if interp_num = 30, this is # of minutes params.mu_r = params_ary[14] ### PHASE ONE ### # First, get spread probability for each day as a coo sparse matrix max_shape = np.array([0, 0]) pm_args = [(days[0], wind_data, *params.get_model_params(), params.r_start)] pm_args.extend([(day, wind_data, *params.get_model_params()) for day in days[1:params.ndays]]) ##### Kalbar wind started recording a day late. Spread the population ##### locally before running full model. if sprd_factor is not None: res = params.domain_info[0] / params.domain_info[1] mean_drift = np.array([-25., 15.]) xdrift_int = int(mean_drift[0] // res) xdrift_r = mean_drift[0] % res ydrift_int = int(mean_drift[1] // res) ydrift_r = mean_drift[1] % res longsprd = PM.get_mvn_cdf_values( res, np.array([xdrift_r, ydrift_r]), PM.Dmat(params_ary[6], params_ary[7], params_ary[8])) shrtsprd = PM.get_mvn_cdf_values( res, np.array([0., 0.]), PM.Dmat(params_ary[9], params_ary[10], params_ary[11])) mlen = int( max(longsprd.shape[0], shrtsprd.shape[0]) + max(abs(xdrift_int), abs(ydrift_int)) * 2) sprd = np.zeros((mlen, mlen)) lbds = [ int(mlen // 2 - longsprd.shape[0] // 2), int(mlen // 2 + longsprd.shape[0] // 2 + 1) ] sprd[lbds[0] - ydrift_int:lbds[1] - ydrift_int, lbds[0] + xdrift_int:lbds[1] + xdrift_int] = longsprd * sprd_factor sbds = [ int(mlen // 2 - shrtsprd.shape[0] // 2), int(mlen // 2 + shrtsprd.shape[0] // 2 + 1) ] sprd[sbds[0]:sbds[1], sbds[0]:sbds[1]] += shrtsprd * (1 - sprd_factor) sprd[int(sprd.shape[0] // 2), int(sprd.shape[0] // 2)] += max(0, 1 - sprd.sum()) pmf_list = [sparse.coo_matrix(sprd)] else: pmf_list = [] ###################### Get pmf_list from multiprocessing pmf_list.extend(pool.starmap(PM.prob_mass, pm_args)) for pmf in pmf_list: for dim in range(2): if pmf.shape[dim] > max_shape[dim]: max_shape[dim] = pmf.shape[dim] r_spread = [] # holds the one-day spread for each release day. # Reshape the prob. mass function of each release day into solution form for ii in range(params.r_dur): offset = params.domain_info[1] - pmf_list[ii].shape[0] // 2 dom_len = params.domain_info[1] * 2 + 1 r_spread.append( sparse.coo_matrix( (pmf_list[ii].data, (pmf_list[ii].row + offset, pmf_list[ii].col + offset)), shape=(dom_len, dom_len)).tocsr()) ### PHASE TWO ### # Pass the probability list, pmf_list, and other info to convolution solver. # This will return the finished population model. with Capturing() as output: if sprd_factor is not None: # extend day count by one days_ext = [days[0] - 1] days_ext.extend(days) modelsol = get_populations(r_spread, pmf_list, days_ext, params.ndays + 1, dom_len, max_shape, params.r_dur, params.r_number, params.r_mthd()) # remove the first one and start where wind started. modelsol = modelsol[1:] else: modelsol = get_populations(r_spread, pmf_list, days, params.ndays, dom_len, max_shape, params.r_dur, params.r_number, params.r_mthd()) # modelsol now holds the model results for this run as CSR sparse arrays # get emergence potential (measured in expected number of wasps previously # present whose oviposition would result in emergence on the given date) # from the model result release_emerg, sentinel_emerg = popdensity_to_emergence( modelsol, locinfo) # get the expected wasp populations at grid points on sample days grid_counts = popdensity_grid(modelsol, locinfo) # get the expected wasp populations in cardinal directions '''card_counts = popdensity_card(modelsol,locinfo,params.domain_info)''' ## For the lists release_emerg and sentinel_emerg: ## Each list entry corresponds to a data collection day (one array) ## In each array: ## Each column corresponds to an emergence observation day (as in data) ## Each row corresponds to a grid point or sentinel field, respectively ## For the array grid_counts: ## Each column corresponds to an observation day ## Each row corresponds to a grid point ## For the list card_counts: ## Each list entry corresponds to a sampling day (one array) ## Each column corresponds to a step in a cardinal direction ## Each row corresponds to a cardinal direction # print('{:03.1f} sec./model at {}'.format(time.time() - modeltic, # time.strftime("%H:%M:%S %d/%m/%Y")),end='\r') # sys.stdout.flush() return (release_emerg, sentinel_emerg, grid_counts) #,card_counts) print('Parsing model output and connecting to Bayesian model...') ###################################################################### ##### Connect Model to Data ##### ###################################################################### ### Parse the results of pop_model into separate deterministic variables ### '''Get Poisson probabilities for sentinal field emergence. Parameters: xi is constant, emerg is a list of ndarrays, betas is a 1D array of field probabilities''' Ncollections = len(locinfo.sent_DataFrames) sent_poi_rates = [] for ii in range(Ncollections): s_ndays = len(locinfo.sent_DataFrames[ii]['datePR'].unique()) sent_poi_rates.append( pm.Lambda('sent_poi_rate_{}'.format(ii), lambda xi=xi, ndays=s_ndays, betas=sent_obs_probs, emerg_model=pop_model[1][ii]: xi * emerg_model * np.tile( betas, (ndays, 1)).T, trace=False)) sent_poi_rates = pm.Container(sent_poi_rates) '''Return Poisson probabilities for release field grid emergence. Parameters: xi is constant, emerg is a list of ndarrays. collection effort is specified in locinfo.''' Ncollections = len(locinfo.release_DataFrames) rel_poi_rates = [] for ii in range(Ncollections): r_effort = locinfo.release_collection[ii] #fraction of max collection r_ndays = len(locinfo.release_DataFrames[ii]['datePR'].unique()) rel_poi_rates.append( pm.Lambda('rel_poi_rate_{}'.format(ii), lambda xi=xi, ndays=r_ndays, r_effort=r_effort, beta= em_obs_prob, emerg_model=pop_model[0][ii]: xi * emerg_model * np.tile(r_effort * beta, (ndays, 1)).T, trace=False)) rel_poi_rates = pm.Container(rel_poi_rates) @pm.deterministic(plot=False, trace=False) def grid_poi_rates(locinfo=locinfo, beta=grid_obs_prob, obs_model=pop_model[2]): '''Return Poisson probabilities for grid sampling obs_model is an ndarray, sampling effort is specified in locinfo.''' return beta * locinfo.grid_samples * obs_model '''Return Poisson probabilities for cardinal direction sampling obs_model is a list of ndarrays, sampling effort is assumed constant''' ''' card_poi_rates = [] for ii,obs in enumerate(pop_model[3]): card_poi_rates.append(pm.Lambda('card_poi_rate_{}'.format(ii), lambda beta=card_obs_prob, obs=obs: beta*obs)) card_poi_rates = pm.Container(card_poi_rates) ''' # Given the expected wasp densities from pop_model, actual wasp densities # are modeled as a thinned Poisson random variable about that mean. # Each wasp in the area then has a small probability of being seen. ### Connect sentinel emergence data to model ### N_sent_collections = len(locinfo.sent_DataFrames) # Create list of collection variables sent_collections = [] for ii in range(N_sent_collections): # Apparently, pymc does not play well with 2D array parameters sent_collections.append( np.empty(sent_poi_rates[ii].value.shape, dtype=object)) for n in range(sent_collections[ii].shape[0]): for m in range(sent_collections[ii].shape[1]): sent_collections[ii][n, m] = pm.Poisson( "sent_em_obs_{}_{}_{}".format(ii, n, m), sent_poi_rates[ii][n, m], value=float(locinfo.sentinel_emerg[ii][n, m]), observed=True) sent_collections = pm.Container(sent_collections) ### Connect release-field emergence data to model ### N_release_collections = len(locinfo.release_DataFrames) # Create list of collection variables rel_collections = [] for ii in range(N_release_collections): rel_collections.append( np.empty(rel_poi_rates[ii].value.shape, dtype=object)) for n in range(rel_collections[ii].shape[0]): for m in range(rel_collections[ii].shape[1]): rel_collections[ii][n, m] = pm.Poisson( "rel_em_obs_{}_{}_{}".format(ii, n, m), rel_poi_rates[ii][n, m], value=float(locinfo.release_emerg[ii][n, m]), observed=True) rel_collections = pm.Container(rel_collections) ### Connect grid sampling data to model ### grid_obs = np.empty(grid_poi_rates.value.shape, dtype=object) for n in range(grid_obs.shape[0]): for m in range(grid_obs.shape[1]): grid_obs[n, m] = pm.Poisson("grid_obs_{}_{}".format(n, m), grid_poi_rates[n, m], value=float(locinfo.grid_obs[n, m]), observed=True) grid_obs = pm.Container(grid_obs) ### Connect cardinal direction data to model ### ''' N_card_collections = len(locinfo.card_obs_DataFrames) # Create list of sampling variables card_collections = [] for ii in range(N_card_collections): card_collections.append(np.empty(card_poi_rates[ii].value.shape, dtype=object)) for n in range(card_collections[ii].shape[0]): for m in range(card_collections[ii].shape[1]): card_collections[ii][n,m] = pm.Poisson( "card_obs_{}_{}_{}".format(ii,n,m), card_poi_rates[ii][n,m], value=locinfo.card_obs[ii][n,m], observed=True, plot=False) card_collections = pm.Container(card_collections) ''' ###################################################################### ##### Collect Model and Run ##### ###################################################################### ### Collect model ### if sprd_factor is not None: Bayes_model = pm.Model([ lam, f_a1, f_a2, f_b1_p, f_b2_p, f_b1, f_b2, g_aw, g_bw, sig_x, sig_y, corr_p, corr, sig_x_l, sig_y_l, corr_l_p, corr_l, n_periods, mu_r, sprd_factor, grid_obs_prob, xi, em_obs_prob, A_collected, sent_obs_probs, params_ary, pop_model, grid_poi_rates, rel_poi_rates, sent_poi_rates, grid_obs, rel_collections, sent_collections ]) else: Bayes_model = pm.Model([ lam, f_a1, f_a2, f_b1_p, f_b2_p, f_b1, f_b2, g_aw, g_bw, sig_x, sig_y, corr_p, corr, sig_x_l, sig_y_l, corr_l_p, corr_l, n_periods, mu_r, grid_obs_prob, xi, em_obs_prob, A_collected, sent_obs_probs, params_ary, pop_model, grid_poi_rates, rel_poi_rates, sent_poi_rates, grid_obs, rel_collections, sent_collections ]) ### Run if parameters were passed in ### if mcmc_args is not None: if len(mcmc_args) == 3: # New run nsamples = int(mcmc_args[0]) burn = int(mcmc_args[1]) fname = mcmc_args[2] if fname[-3:] != '.h5': fname += '.h5' mcmc = pm.MCMC(Bayes_model, db='hdf5', dbname=fname, dbmode='a', dbcomplevel=0) mcmc.use_step_method(pm.AdaptiveMetropolis, stoc_vars, scales=step_scales, interval=500, shrink_if_necessary=True) try: tic = time.time() print('Sampling...') mcmc.sample(nsamples, burn) # sampling finished. commit to database and continue print('Sampling finished.') print('Time elapsed: {}'.format(time.time() - tic)) print('Saving...') #mcmc.save_state() mcmc.commit() print('Closing...') mcmc.db.close() except: print('Exception: database closing...') mcmc.db.close() raise return elif len(mcmc_args) == 2: # Resume run fname = mcmc_args[0] nsamples = int(mcmc_args[1]) fname = fname.strip() if fname[-3:] != '.h5': fname += '.h5' if os.path.isfile(fname): db = pm.database.hdf5.load(fname) mcmc = pm.MCMC(Bayes_model, db=db) mcmc.use_step_method(pm.AdaptiveMetropolis, stoc_vars, scales=step_scales, interval=500, shrink_if_necessary=True) # database loaded. else: print('File not found: {}'.format(fname)) return try: tic = time.time() print('Sampling...') mcmc.sample(nsamples) # sampling finished. commit to database and continue print('Sampling finished.') print('Time elapsed: {}'.format(time.time() - tic)) print('Saving...') #mcmc.save_state() mcmc.commit() print('Closing...') mcmc.db.close() except: print('Exception: database closing...') mcmc.db.close() raise return ###################################################################### ##### Start Interactive Menu ##### ###################################################################### print('--------------- MCMC MAIN MENU ---------------') print(" 'new': Start a new MCMC chain from the beginning.") print("'cont': Continue a previous MCMC chain from an hdf5 file.") #print("'plot': Plot traces/distribution from an hdf5 file.") print("'quit': Quit.") cmd = input('Enter: ') cmd = cmd.strip().lower() if cmd == 'new': print('\n\n') print('--------------- New MCMC Chain ---------------') while True: val = input("Enter number of realizations or 'quit' to quit:") val = val.strip() if val == 'q' or val == 'quit': return else: try: nsamples = int(val) val2 = input("Enter number of realizations to discard:") val2 = val2.strip() if val2 == 'q' or val2 == 'quit': return else: burn = int(val2) fname = input( "Enter filename to save or 'back' to cancel:") fname = fname.strip() if fname == 'q' or fname == 'quit': return elif fname == 'b' or fname == 'back': continue else: fname = fname + '.h5' break # BREAK LOOP AND RUN MCMC WITH GIVEN VALUES except ValueError: print('Unrecognized input.') continue ##### RUN FIRST MCMC HERE ##### mcmc = pm.MCMC(Bayes_model, db='hdf5', dbname=fname, dbmode='a', dbcomplevel=0) mcmc.use_step_method(pm.AdaptiveMetropolis, stoc_vars, scales=step_scales, interval=500, shrink_if_necessary=True) try: tic = time.time() print('Sampling...') mcmc.sample(nsamples, burn) # sampling finished. commit to database and continue print('Sampling finished.') print('Time elapsed: {}'.format(time.time() - tic)) print('Saving...') #mcmc.save_state() mcmc.commit() except: print('Exception: database closing...') mcmc.db.close() raise elif cmd == 'cont': # Load db and continue print('\n') while True: fname = input("Enter path to database to load, or 'q' to quit:") fname = fname.strip() if fname.lower() == 'q' or fname.lower() == 'quit': return else: if fname[-3:] != '.h5': fname += '.h5' if os.path.isfile(fname): db = pm.database.hdf5.load(fname) mcmc = pm.MCMC(Bayes_model, db=db) mcmc.use_step_method(pm.AdaptiveMetropolis, stoc_vars, scales=step_scales, interval=500, shrink_if_necessary=True) break # database loaded else: print('File not found.') #continue elif cmd == 'plot': # Get filename and pass to plotting routine. pass # return elif cmd == 'quit' or cmd == 'q': return else: print('Command not recognized.') print('Quitting....') return ##### MCMC Loop ##### # This should be reached only by cmd == 'new' or 'cont' with a database. # It resumes sampling of a previously sampled chain. print('\n') while True: print('--------------- MCMC ---------------') print(" 'report': generate report on traces") print("'inspect': launch IPython to inspect state") print(" 'run': conduct further sampling") print(" 'quit': Quit") cmd = input('Enter: ') cmd = cmd.strip() cmd = cmd.lower() if cmd == 'inspect': try: import IPython IPython.embed() except ImportError: print('IPython not found.') except: print('Exception: database closing...') mcmc.db.close() raise elif cmd == 'run': val = input("Enter number of realizations or 'back':") val = val.strip() if val == 'back' or val == 'b': continue else: try: nsamples = int(val) except ValueError: print('Unrecognized input.') continue # Run chain try: tic = time.time() print('Sampling...') mcmc.sample(nsamples) # sampling finished. commit to database and continue print('Sampling finished.') print('Time elapsed: {}'.format(time.time() - tic)) print('Saving...') #mcmc.save_state() mcmc.commit() except: print('Exception: database closing...') mcmc.db.close() raise elif cmd == 'report': try: import Bayes_Plot Bayes_Plot.plot_traces(db=db) print('Gelman-Rubin statistics') gr = pm.gelman_rubin(mcmc) print(gr) with open('./diagnostics/gelman-rubin.txt', 'w') as f: f.write('Variable R_hat\n') f.write('---------------------\n') for key, val in gr.items(): f.write(key + ': {}\n'.format(val)) except: print('Exception: database closing...') mcmc.db.close() raise elif cmd == 'quit' or cmd == 'q': mcmc.db.close() print('Database closed.') break else: print('Command not recognized.')
beta = pm.Normal('beta', mu=0, sd=20) sigma = pm.Uniform('sigma', lower=0) y = pm.Normal('y', mu=beta*X + alpha, sd=sigma, observed=Y) start = pm.find_MAP() step = pm.NUTS(state=start) with model: if(multicore): trace = pm.sample(itenum, step, start=start, njobs=chainnum, random_seed=range(chainnum), progress_bar=False) else: ts=[pm.sample(itenum, step, chain=i, progressbar=False) for i in range(chainnum)] trace=merge_traces(ts) if(saveimage): pm.traceplot(trace).savefig("simple_linear_trace.png") print "Rhat="+str(pm.gelman_rubin(trace)) t1=time.clock() print "elapsed time="+str(t1-t0) #trace if(not multicore): trace=ts[0] with model: pm.traceplot(trace,model.vars) pm.forestplot(trace) import pickle as pkl with open("simplelinearregression_model.pkl","w") as fpw: pkl.dump(model,fpw)
def bodelike_plot(pbproject=HS_PROJECT, model_id='gpa3', varname='phase', control_genotype='VT37804_TNTin', blocked_genotype='VT37804_TNTE', num_chains=4, takelast=10000, alpha=0.05, plot_control=True, plot_silenced=True, img_format='png', show=False): def varnames(result, varname): hyperfly, hyperfly_postfix, hyperfly_variables, flies, flies_variables = flies_and_variables(result) hvar = varname + hyperfly_postfix if varname in set(hyperfly_variables) else None fvars = [varname + '_' + fly for fly in flies] if varname in set(flies_variables) else None return hvar, fvars def mix_chains(chains): # assert len(chains) >= num_chains mixed = np.array([np.nan] * (num_chains * takelast)) for i, chain in enumerate(chains): mixed[i * takelast: (i+1) * takelast] = chain[-takelast:] return mixed # Available results results = all_computed_results(pbproject.mcmc_dir) results = results[results.model_id == model_id] ctraces = {} straces = {} results.genotype = results.genotype.apply(lambda gen: gen.partition('__')[0]) # Collect and mix traces for all frequencies for (model_id, freq), data in results.groupby(('model_id', 'freq')): print '\t\t\tCollecting traces for frequency %g' % freq control = MCMCRunManager(data[data.genotype == control_genotype].iloc[0]['path']) # ad-hoc silenced = MCMCRunManager(data[data.genotype == blocked_genotype].iloc[0]['path']) # ad-hoc chvar, _ = varnames(control, varname) # control hierarchical var, fly vars shvar, _ = varnames(silenced, varname) # silenced hierarchical var, fly vars ctraces[freq] = mix_chains(control.traces(chvar)) straces[freq] = mix_chains(silenced.traces(shvar)) # The frequencies we are interested in... freqs = (0.5, 1, 2, 4, 8, 16, 32, 40) # Copute HPDs. Compute the rope too, see Kruschke. chpds = [hpd(ctraces[freq], alpha) for freq in freqs] shpds = [hpd(straces[freq], alpha) for freq in freqs] # Plot the traces if plot_control: plt.plot(np.hstack([ctraces[freq] for freq in freqs]), color='b', label=control_genotype.replace('_', 'x')) if plot_silenced: plt.plot(np.hstack([straces[freq] for freq in freqs]), color='r', label=blocked_genotype.replace('_', 'x')) # Plot the HPD regions + setup ticks xticklocations = [] xticklabels = [] for i, freq in enumerate(freqs): xmin = num_chains * takelast * i xmax = num_chains * takelast * (i + 1) plt.axvline(x=xmax, color='k') plt.plot((xmin, xmax), [chpds[i][0]] * 2, color='c', linewidth=4) plt.plot((xmin, xmax), [chpds[i][1]] * 2, color='c', linewidth=4) plt.plot((xmin, xmax), [shpds[i][0]] * 2, color='m', linewidth=4) plt.plot((xmin, xmax), [shpds[i][1]] * 2, color='m', linewidth=4) # Gelman-Rubin R^2 (might interest: Geweke, autocorr, put graphically in the plot) cgr = gelman_rubin(ctraces[freq].reshape(num_chains, -1)) print '\t%s %s control freq %.1f; GR=%.2f' % (model_id, varname, freq, cgr) sgr = gelman_rubin(straces[freq].reshape(num_chains, -1)) print '\t%s %s blocked freq %.1f; GR=%.2f' % (model_id, varname, freq, sgr) # xticks xticklocations.append(xmin + (xmax - xmin) / 2.) xticklabels.append('%g\nbgr=%.2f\ncgr=%.2f' % (freq, sgr, cgr)) plt.title('Model: %s; Variable: %s' % (model_id, varname)) plt.xlabel('$\omega$') plt.ylabel('%s' % varname) plt.tick_params(axis='x', # changes apply to the x-axis which='both', # both major and minor ticks are affected top='off', # ticks along the top edge are off bottom='on', # ticks along the bottom edge are on labelbottom='on') # labels along the bottom edge are off plt.xticks(xticklocations, xticklabels) plt.legend() plt.tight_layout() # Save dest_dir = op.join(pbproject.plots_dir, 'bbode', model, '%s-vs-%s' % (control_genotype, blocked_genotype)) ensure_dir(dest_dir) plt.savefig(op.join(dest_dir, '%s-vs-%s-%s-%s.%s' % (control_genotype, blocked_genotype, model_id, varname, img_format))) # Show if show: plt.show()