def main(): with pm.Model() as model: # Using a strong prior. Meaning the mean is towards zero than towards 1 prior = pm.Beta('prior', 0.5, 3) output = pm.Binomial('output', n=100, observed=50, p=prior) step = pm.Metropolis() trace = pm.sample(1000, step=step) pm.traceplot(trace) pm.plot_posterior(trace, figsize=(5, 5), kde_plot=True, rope=[0.45, 0.55]) # Rope is an interval that you define # This is a value you eppect. You can check # If ROPE fall on HPD or not. If it falls, it means # our value is within HPD and may be increasing sample # size would make our mean estimate better. # gelman rubin pm.gelman_rubin(trace) # forestplot pm.forestplot(trace, varnames=['prior']) # summary [look at mc error here. This is the std error, should be low] pm.df_summary(trace) #autocorrelation pm.autocorrplot(trace) # effective size pm.effective_n(trace)['prior']
def info(_s: state, plot: bool = False): """ Print info about current state :param _s: the state :param plot: whether to plot something """ print("=====") print("Iterations:", _s.iterations) print("-----") print("min durations", np.min(_s.durations)) print("min durations[0]", np.min(_s.durations[0])) print("max durations", np.max(_s.durations)) print("max durations[0,:,:]", np.max(_s.durations[0, :, :])) print("min durations_values", np.min(_s.durations_values)) print("min durations_values[0]", np.min(_s.durations_values[0])) print("max durations_values", np.max(_s.durations_values)) print("-----") pm_params = {'trace': _s.trace, 'varnames': ['mean', 'std']} pm.summary(**pm_params) if plot: pm.traceplot(**pm_params) pm.autocorrplot(**pm_params) print("=====\n\n")
def update_bayesian_modeling(mean_upd, var_upd, alpha_upd, beta_upd, inv_a_upd, inv_b_upd, iv_upd, strategy, stock_price, strike_price, risk_free, time): with pm.Model() as update_model: prior = pm.InverseGamma('bv', inv_a_upd, inv_b_upd) likelihood = pm.InverseGamma('like', inv_a_upd, inv_b_upd, observed=iv_upd) with update_model: # step = pm.Metropolis() v_trace_update = pm.sample(10000, tune=1000) #print(v_trace['bv'][:]) trace_update = v_trace_update['bv'][:] #print(trace) pm.traceplot(v_trace_update) plt.show() pm.autocorrplot(v_trace_update) plt.show() pm.plot_posterior(v_trace_update[100:], color='#87ceeb', point_estimate='mean') plt.show() s = pm.summary(v_trace_update).round(2) print("\n Summary") print(s) a = np.random.choice(trace_update, 10000, replace=True) ar = [] for i in range(9999): t = a[i] / 100 ar.append(t) #print("Bayesian Volatility Values", ar) op = [] for i in range(9999): temp = BS_price(strategy, stock_price, strike_price, risk_free, ar[i], time) op.append(temp) #print("Bayesian Option Prices", op) plt.hist(ar, bins=50) plt.title("Volatility") plt.ylabel("Frequency") plt.show() plt.hist(op, bins=50) plt.title("Option Price") plt.ylabel("Frequency") plt.show() return trace_update
def createModel(self, xdata, ydata=None, trace_len=200): shape = xdata.shape with pm.Model() as model: alpha = pm.Normal('alpha', mu=0, sd=100) theta = pm.DensityDist('theta', self.loglikelihood_t, shape=30, testval=0) sigma = pm.DensityDist('sigma', self.loglikelihood_s, testval=1) _sum = theta[0] * xdata[:, 0] for i in range(1, shape[1]): _sum += theta[i] * xdata[:, i] likelihood = pm.Normal('estimated', mu=(alpha + _sum).astype('float32'), sd=sigma.astype('float32'), shape=100, observed=ydata) step = pm.Metropolis() trace = pm.sample(trace_len, njobs=4) self.trace = trace pm.traceplot(trace, ['sigma', 'theta']) pm.autocorrplot(trace, ['sigma', 'theta'])
def main(): data = np.array([ 51.06, 55.12, 53.73, 50.24, 52.05, 56.40, 48.45, 52.34, 55.65, 51.49, 51.86, 63.43, 53.00, 56.09, 51.93, 52.31, 52.33, 57.48, 57.44, 55.14, 53.93, 54.62, 56.09, 68.58, 51.36, 55.47, 50.73, 51.94, 54.95, 50.39, 52.91, 51.5, 52.68, 47.72, 49.73, 51.82, 54.99, 52.84, 53.19, 54.52, 51.46, 53.73, 51.61, 49.81, 52.42, 54.3, 53.84, 53.16 ]) # look at the distribution of the data sns.kdeplot(data) # All these distributions are used to model std # It is safe to use exponential # half cauchy has a fat tail # Exponential parameter lambda high indicates a high steep # Ineverse gamma with pm.Model() as model: mu = pm.Uniform('mu', 30, 80) sigma = pm.HalfNormal('sigma', sd=10) df = pm.Exponential( 'df', 1.5) # lamda = 1.5, it will be more steep, 0.5 less output = pm.StudentT('output', mu=mu, sigma=sigma, nu=df, observed=data) trace = pm.sample(1000) # gelman rubin pm.gelman_rubin(trace) # forestplot pm.forestplot(trace) # summary [look at mc error here. This is the std error, should be low] pm.summary(trace) #autocorrelation pm.autocorrplot(trace) # effective size pm.effective_n(trace)
def plot_model_diagnostics(model, save_dir, file_id, export=True): """generate and export a range of diagnostic plots for a given model""" # ensure folder exists if export is True: if not os.path.exists(save_dir): os.makedirs(save_dir) model_name = model.__class__.__name__ trace_df = pm.trace_to_dataframe(model.trace, varnames=model.df_params) sns.pairplot(trace_df) if export is True: plt.savefig(save_dir + f'{model_name}_{file_id}_pairplot.pdf', format='pdf', bbox_inches='tight') plt.cla() pm.traceplot(model.trace, varnames=model.df_params) if export is True: plt.savefig(save_dir + f'{model_name}_{file_id}_traceplot.pdf', format='pdf', bbox_inches='tight') plt.cla() pm.autocorrplot(model.trace, varnames=model.df_params) if export is True: plt.savefig(save_dir + f'{model_name}_{file_id}_autocorrplot.pdf', format='pdf', bbox_inches='tight') plt.cla() pm.forestplot(model.trace, varnames=model.df_params) if export is True: plt.savefig(save_dir + f'{model_name}_{file_id}_forestplot.pdf', format='pdf', bbox_inches='tight') plt.cla() # close all figs, otherwise we can run out of memory plt.close("all")
def bayesian_modeling(mean, var, alpha, beta, inv_a, inv_b, iv): with pm.Model() as model: prior = pm.InverseGamma('bv', inv_a, inv_b) likelihood = pm.Gamma('like', alpha, beta, observed=iv) with model: # step = pm.Metropolis() v_trace = pm.sample(10000, tune=1000) #print(v_trace['bv'][:]) trace = v_trace['bv'][:] #print(trace1) pm.traceplot(v_trace) plt.show() pm.autocorrplot(v_trace) plt.show() #s = pm.summary(v_trace) #print(s) return trace
# summary pm.summary(t_0) pm.summary(t_1) pm.summary(t_2) # forest plot pm.forestplot([t_0, t_1, t_2], figsize=(16, 12), textsize=20, markersize=20) # acceptance rate print('Model 0: acc_rate = ' + str(step_0.accepted / (niter * nchains))) print('Model 1: acc_rate = ' + str(step_1.accepted / (niter * nchains))) print('Model 2: acc_rate = ' + str(step_2.accepted / (niter * nchains))) # ACF pm.autocorrplot(t_0, var_names=['m'], combined=True, textsize=20) pm.autocorrplot(t_1, var_names=['m'], combined=True, textsize=20) pm.autocorrplot(t_2, var_names=['m'], combined=True, textsize=20) # ESS print(pm.effective_n(t_0)) print(pm.effective_n(t_1)) print(pm.effective_n(t_2)) # Gelman-Rubin print(pm.gelman_rubin(t_0)) print(pm.gelman_rubin(t_1)) print(pm.gelman_rubin(t_2)) # Geweke plt.rcParams['figure.figsize'] = (16, 12)
# Run a small trace with just one chain so the autocorrelation plots are legible try: model_trace_small = vartbl['model_trace_small'] except: with model: stepper = pm.NUTS() model_trace_small = pm.sample(draws=5000, stepper=stepper, tune=1000, chains=1, cores=16) vartbl['model_trace_small'] = model_trace_small # Run the autocorrelation plots for beta_i's individually for i in range(n): pm.autocorrplot(model_trace_small, varnames=[var_names[i]]) # ************************************************************************************************* # 2.6 Based on your samples construct an estimate for the posterior mean. # ************************************************************************************************* beta_PM = np.mean(beta_samples, axis=0) print(f'Estimated Posterior Means:') for i, beta_name in enumerate(beta_names): print(f'{beta_name:13} = {beta_PM[i]:+0.3f}') # ************************************************************************************************* # 2.7. Select at least 2 datapoints and visualize a histogram of the posterior probabilities. # Denote the posterior mean and MAP on your plot for each datapoint # *************************************************************************************************
trace2 = pm.sample(2000, start=start, tune=1000) chain2 = trace2 varnames1 = ['beta', 'beta1', 'beta2', 'sigma', 'mu'] varnames2 = ['beta', 'beta1', 'beta2', 'sigma'] pm.plot_posterior(chain2, varnames1) plt.show() map_estimate = pm.find_MAP(model=unpooled_model) print(map_estimate) # x_lim = 10 # com_pred = chain2.get_values('Observed_pred')[:].ravel() # plt.hist(com_pred, range=[0, 2], bins=130, histtype='stepfilled') # plt.show() # 画出自相关曲线 pm.autocorrplot(chain2, varnames2) plt.show() with unpooled_model: post_pred = pm.sample_ppc(trace2, samples=1000) plt.figure() ax = sns.distplot(post_pred['Observed'].mean(axis=1), label='Posterior predictive means') ax.axvline(elec_faults.mean(), color='r', ls='--', label='True mean') ax.legend() tracedf = pm.trace_to_dataframe(trace2, varnames=['beta1', 'beta2']) sns.pairplot(tracedf) plt.show() # sns.kdeplot(trace2['beta1'])
plt.ylabel('Parameter') # Plot beta trace plt.subplot(212) plt.title(r'Trace of $\beta$') plt.plot(beta_samples, color='b') plt.xlabel('Samples') plt.ylabel('Parameter') plt.tight_layout(h_pad=0.8) figsize(20, 12) pm.traceplot(sleep_trace, ['alpha', 'beta']) # %% pm.autocorrplot(sleep_trace, ['alpha', 'beta']) # Sort the values by time offset wake_data.sort_values('time_offset', inplace=True) # Time is the time offset time = np.array(wake_data.loc[:, 'time_offset']) # Observations are the indicator wake_obs = np.array(wake_data.loc[:, 'indicator']) with pm.Model() as wake_model: # Create the alpha and beta parameters alpha = pm.Normal('alpha', mu=0.0, tau=0.01, testval=0.0) beta = pm.Normal('beta', mu=0.0, tau=0.01, testval=0.0)
ls='--', color='black', label="average posterior \nprobability of diabetes") plt.savefig("stuff_dinner.png") plt.clf() plt.cla() plt.close() def diabetes_posterior(time_offset): figsize(16, 8) prob = exponential(time_offset, beta_samples, alpha_samples) plt.hist(prob, bins=100, histtype='step', lw=4) plt.title('Probability Distribution for high blood sugar at offset %s' % time_offset) plt.xlabel('Probability of High Blood Sugar') plt.ylabel('Samples') plt.show() for i in range(20, 27): diabetes_posterior(i * 10) figsize(20, 12) pm.traceplot(diabetes_trace, ['alpha', 'beta']) plt.show() pm.autocorrplot(diabetes_trace, ['alpha', 'beta']) plt.show()
# define the prior theta = pm.Beta('theta', mu * kappa, (1 - mu) * kappa, shape=len(N)) # define the likelihood y = pm.Bernoulli('y', p=theta[coin], observed=y) # Generate a MCMC chain trace = pm.sample(10000, step=pm.NUTS(), random_seed=(123), progressbar=False) ## Check the results. burnin = 100 # posterior samples to discard ## Print summary for each trace #pm.df_summary(trace[burnin:]) #pm.df_summary(trace) ## Check for mixing and autocorrelation pm.autocorrplot(trace[burnin:], varnames=['mu', 'kappa']) #pm.autocorrplot(trace, vars =[mu, kappa]) ## Plot KDE and sampled values for each parameter. pm.traceplot(trace[burnin:]) #pm.traceplot(trace) # Create arrays with the posterior sample theta1_sample = trace['theta'][:,0][burnin:] theta28_sample = trace['theta'][:,27][burnin:] mu_sample = trace['mu'][burnin:] kappa_sample = trace['kappa'][burnin:] # Plot mu histogram fig, ax = plt.subplots(2, 2, figsize=(12,12)) pm.plot_posterior(mu_sample, ax=ax[0, 0], color='skyblue')
yl = pm.Normal('yl', mu[x], tau=tau, observed=z) # Generate a MCMC chain start = pm.find_MAP() steps = pm.Metropolis() trace = pm.sample(20000, steps, start, progressbar=False) # EXAMINE THE RESULTS burnin = 2000 thin = 50 # Print summary for each trace #pm.summary(trace[burnin::thin]) #pm.summary(trace) # Check for mixing and autocorrelation pm.autocorrplot(trace[burnin::thin], vars=model.unobserved_RVs[:-1]) ## Plot KDE and sampled values for each parameter. #pm.traceplot(trace[burnin::thin]) pm.traceplot(trace) a0_sample = trace['a0'][burnin::thin] a_sample = trace['a'][burnin::thin] # Convert baseline to the original scale m_sample = a0_sample.repeat(NxLvl).reshape(len(a0_sample), NxLvl) + a_sample b0_sample = m_sample.mean(axis=1) b0_sample = b0_sample * np.std(y) + np.mean(y) # Convert baseline to the original scale n_sample = b0_sample.repeat(NxLvl).reshape(len(b0_sample), NxLvl) b_sample = (m_sample - n_sample) b_sample = b_sample * np.std(y)
pm.traceplot(chain, lines={'theta':theta_real}) ''' with first_model: step = pm.Metropolis() multi_trace = pm.sample(1000, step=step, njobs=4, cores=1) #njobs=4 burnin = 0 multi_chain = multi_trace[burnin:] pm.traceplot(multi_chain, lines={'theta': theta_real}) # In[6]: pm.gelman_rubin(multi_chain) # In[7]: pm.forestplot(multi_chain, varnames=['theta']) # In[8]: pm.summary(multi_chain) # # In[11]: pm.autocorrplot(multi_chain) #自相关 # In[12]: pm.effective_n(multi_chain)['theta'] #有效采样大小
psi = pm.Beta('psi', 1, 1) alpha = pm.Normal('alpha', 0, 10) beta = pm.Normal('beta', 0, 10, shape=2) lam = pm.math.exp(alpha + beta[0] * fish_data['child'] + beta[1] * fish_data['camper']) y = pm.ZeroInflatedPoisson('y', psi, lam, observed=fish_data['count']) trace_ZIP_reg = pm.sample(2000, njobs=1) chain_ZIP_reg = trace_ZIP_reg[100:] pm.traceplot(chain_ZIP_reg) plt.savefig('img710.png') plt.figure() pm.autocorrplot(chain_ZIP_reg) plt.savefig('img710b.png') plt.figure() children = [0, 1, 2, 3, 4] fish_count_pred_0 = [] fish_count_pred_1 = [] thin = 5 for n in children: # without_camper = chain_ZIP_reg['alpha'][::thin] + chain_ZIP_reg['beta'][:, 0][::thin] * n # with_camper = without_camper + chain_ZIP_reg['beta'][:, 1][::thin] without_camper = chain_ZIP_reg['alpha'] + chain_ZIP_reg['beta'][:, 0] * n with_camper = without_camper + chain_ZIP_reg['beta'][:, 1] fish_count_pred_0.append(np.exp(without_camper)) fish_count_pred_1.append(np.exp(with_camper))
# Generate a MCMC chain start = pm.find_MAP() steps = pm.Metropolis() trace = pm.sample(20000, steps, start, progressbar=False) # EXAMINE THE RESULTS burnin = 2000 thin = 50 # Print summary for each trace #pm.summary(trace[burnin::thin]) #pm.summary(trace) # Check for mixing and autocorrelation pm.autocorrplot(trace[burnin::thin], vars=model.unobserved_RVs[:-1]) ## Plot KDE and sampled values for each parameter. #pm.traceplot(trace[burnin::thin]) pm.traceplot(trace) a0_sample = trace['a0'][burnin::thin] a_sample = trace['a'][burnin::thin] # Convert baseline to the original scale m_sample = a0_sample.repeat(NxLvl).reshape(len(a0_sample), NxLvl) + a_sample b0_sample = m_sample.mean(axis=1) b0_sample = b0_sample * np.std(y) + np.mean(y) # Convert baseline to the original scale n_sample = b0_sample.repeat(NxLvl).reshape(len(b0_sample), NxLvl) b_sample = (m_sample - n_sample) b_sample = b_sample * np.std(y)
trace = pm.sample(2000, step=step, start=start) #%% 检视结果 trace = trace[100:] # burn-in # traceplot 得到两幅图,左边是核密度估计图(Kernel Density Estimation, KDE) # 可理解为平滑后的直方图;右边是采样过程中的采样值,其看起来应该像白噪声,即有很好的 # 混合度(mixing) pm.traceplot(trace) # 用forestplot将 R_hat(应小于1.1且在1附近)和参数均值、50% HPD # (Highest Posterior Density, HPD)、95% HPD 表示出来 pm.forestplot(trace, varnames=['alpha']) pm.forestplot(trace, varnames=['beta']) # 理想的采样应该不会是自相关的,用autocorrplot观察自相关程度。 pm.autocorrplot(trace) # summary提供对后验的文字描述 pm.summary(trace) # 后验可视化总结:Kruschke图 pm.plot_posterior(trace['alpha'], kde_plot=True) pm.plot_posterior(trace['beta'], kde_plot=True) #%% 对后验进行解释和可视化 plt.plot(handsize, stature, 'b.') alpha_m = trace['alpha'].mean() beta_m = trace['beta'].mean() plt.plot(handsize, alpha_m+beta_m*handsize, c='k', \ label='y={:.2f}+{:.2f}*x'.format(alpha_m, beta_m)) plt.xlabel('$x$', fontsize=16)
# define the likelihood y = pm.Bernoulli('y', p=theta[coin], observed=y) # Generate a MCMC chain trace = pm.sample(1000, progressbar=False) ## Check the results. ## Print summary for each trace #pm.df_summary(trace) #pm.df_summary(trace) ## Check for mixing and autocorrelation pm.autocorrplot(trace, varnames=['mu', 'kappa']) #pm.autocorrplot(trace, varnames =[mu, kappa]) ## Plot KDE and sampled values for each parameter. pm.traceplot(trace) #pm.traceplot(trace) # Create arrays with the posterior sample theta1_sample = trace['theta'][:,0] theta2_sample = trace['theta'][:,1] theta3_sample = trace['theta'][:,2] mu_sample = trace['mu'] kappa_sample = trace['kappa'] # Scatter plot hyper-parameters
# Generate a MCMC chain start = pm.find_MAP() # find a reasonable starting point. step1 = pm.Metropolis([theta, mu]) step2 = pm.NUTS([kappa]) trace = pm.sample(10000, [step1, step2], start=start, random_seed=(123), progressbar=False) ## Check the results. burnin = 2000 # posterior samples to discard thin = 10 # posterior samples to discard ## Print summary for each trace #pm.summary(trace[burnin::thin]) #pm.summary(trace) ## Check for mixing and autocorrelation pm.autocorrplot(trace[burnin::thin], vars =[mu, kappa]) #pm.autocorrplot(trace, vars =[mu, kappa]) ## Plot KDE and sampled values for each parameter. pm.traceplot(trace[burnin::thin]) #pm.traceplot(trace) # Create arrays with the posterior sample theta1_sample = trace['theta'][:,0][burnin::thin] theta28_sample = trace['theta'][:,27][burnin::thin] mu_sample = trace['mu'][burnin::thin] kappa_sample = trace['kappa'][burnin::thin] fig = plt.figure(figsize=(12,12)) # Plot mu histogram
with pm.Model() as model_t: alpha = pm.Normal('alpha', mu=0, sd=100) beta = pm.Normal('beta', mu=0, sd=1) epsilon = pm.HalfCauchy('epsilon', 5) nu = pm.Deterministic('nu', pm.Exponential('nu_', 1/29) + 1) y_pred = pm.StudentT('y_pred', mu=alpha + beta * x_3, sd=epsilon, nu=nu, observed= y_3) start = pm.find_MAP() step = pm.NUTS(scaling= start) trace_t = pm.sample(2000, setp = step, start = start, njobs=1) pm.traceplot(trace_t) plt.savefig('img4142.png') pm.autocorrplot(trace_t) plt.savefig('img4143.png') plt.clf() beta_c, alpha_c = stats.linregress(x_3, y_3)[:2] plt.plot(x_3, (alpha_c + beta_c * x_3), 'k', label='non-robust', alpha=0.5) plt.plot(x_3, y_3, 'bo') alpha_m = trace_t['alpha'].mean(0) beta_m = trace_t['beta'].mean(0) plt.plot(x_3, alpha_m + beta_m * x_3, c='k', label='robust') plt.xlabel('$x$', fontsize=16) plt.ylabel('$y$', fontsize=16, rotation=0) plt.legend(loc=2, fontsize=14)
chain2 = data_chain2[:,i:i+1] burn_in = 0 length = (ie-i0)//step n = chain1[burn_in:burn_in+length].shape[0] W = (chain1[burn_in:burn_in+length].std()**2 + chain2[burn_in:burn_in+length].std()**2)/2 mean1 = chain1[burn_in:burn_in+length].mean() mean2 = chain2[burn_in:burn_in+length].mean() mean = (mean1 + mean2)/2 B = n * ((mean1 - mean)**2 + (mean2 - mean)**2) var_theta = (1 - 1/n) * W + 1/n*B print("Gelman-Rubin Diagnostic: ", np.sqrt(var_theta/W)) corr_plot1 = pm3.autocorrplot(data_traceplot1,var_names=names,grid=(1,N_var),figsize=(12,6.5),textsize=18,combined=True) corr_plot1 = corr_plot1[None,:] for i in range(N_var): corr_plot1[0, i].set_xlabel('Lag Index',fontsize=26) corr_plot1[0, 0].set_ylabel('Autocorrelation Value',fontsize=26) plt.savefig("plots/autocorrelation.png", bbox_inches='tight', pad_inches=0.01) plt.figure(figsize=(12,6.5)) for i in range(data_chain1.shape[1]): gw_plot = pm3.geweke(data_chain1[:,i],.1,.5,20) plt.scatter(gw_plot[:,0],gw_plot[:,1],label="%s"%names[i]) plt.axhline(-1.98, c='r') plt.axhline(1.98, c='r') plt.xticks(fontsize=22) plt.yticks(fontsize=22) plt.xlabel("Subchain sample number",fontsize=26)
Observed = pm.Normal("Observed", theta, sd=sigma, observed=elec_faults1) # 观测值 # Observed = pm.Gamma("Observed", theta, sigma, observed=elec_faults1) # 观测值 # Observed_pred = pm.Bound(pm.Normal, lower=0.0)('Observed_pred', mu=theta, sd=sigma, shape=elec_faults1.shape) # 观测值 # start = pm.find_MAP() # step = pm.Metropolis() trace2 = pm.sample(1000) chain2 = trace2 varnames2 = ['beta', 'beta1', 'beta2'] # pm.traceplot(chain2, varnames2, kde_plot=True, text_size=14, color='#6CA6CD') pm.traceplot(chain2, varnames2) plt.show() # 画出自相关曲线 pm.autocorrplot(chain2) plt.show() with unpooled_model: post_pred = pm.sample_ppc(trace2, samples=1000) plt.figure() ax = sns.distplot(post_pred['Observed'].mean(axis=1), label='Posterior predictive means') ax.axvline(elec_faults.mean(), color='r', ls='--', label='True mean') ax.legend() plt.show() # map_estimate = pm.find_MAP(model=unpooled_model) # print(map_estimate) # x_lim = 60 # com_pred = chain2.get_values('Observed_pred')[::10].ravel() # plt.hist(com_pred, range=[-2, 5], bins=90, histtype='stepfilled', color='#6CA6CD')
%config InlineBackend.figure_format = 'retina' %matplotlib inline plt.rcParams["figure.figsize"] = (10, 5) np.random.seed(42) # Prepare the data x = uniform(0, 20).rvs(30) eps = norm(0, 4).rvs(30) y = 11 + 3*x + eps # Sampling w/ Metropolis with pm.Model() as model: b_0 = pm.Normal("b_0", mu=0, sd=10) b_1 = pm.Normal("b_1", mu=0, sd=2) e = pm.HalfCauchy("e", 2) mu = pm.Deterministic("mu", b_0 + b_1*x) Y = pm.Normal("Y", mu=mu, sd=e, observed=y) trace = pm.sample(10000, step=pm.Metropolis()) pm.autocorrplot(trace, varnames=["b_0", "b_1", "e"]); plt.savefig("./results/4-12-autocorrelation-metropolis.png") # Sampling w/ NUTS with pm.Model() as model: b_0 = pm.Normal("b_0", mu=0, sd=10) b_1 = pm.Normal("b_1", mu=0, sd=2) e = pm.HalfCauchy("e", 2) mu = pm.Deterministic("mu", b_0 + b_1*x) Y = pm.Normal("Y", mu=mu, sd=e, observed=y) trace = pm.sample(10000) pm.autocorrplot(trace, varnames=["b_0", "b_1", "e"]); plt.savefig("./results/4-12-autocorrelation-nuts.png")
pm.traceplot(trace) print('\n--- Optimal Model Parameters ---') #%% Discarding early samples (burnin) fig = plt.figure(figsize=(10,4)) plt.subplot(121) _ = plt.title('Burnin trace') _ = plt.ylim(freq_mu - 0.2, freq_mu + 0.2) _ = plt.plot(trace.get_values('mu')[:1000]) fig = plt.subplot(122) _ = plt.title('Full trace') _ = plt.ylim(freq_mu - 0.2, freq_mu + 0.2) _ = plt.plot(trace.get_values('mu')) #%% Autocorrelation Test Plot # A measure of correlation between successive samples in the MCMC sampling chain. # When samples have low correlation with each other, they are adding more # "information" to the estimate of your parameter value than samples that are # highly correlated. # Visually, you are looking for an autocorrelation plot that tapers off to zero # relatively quickly and then oscilates above and below zero correlation. If # your autocorrelation plot does not taper off - it is generally a sign of poor # mixing and you should revisit your model selection (eg. likelihood) and # sampling methods (eg. Metropolis). _ = pm.autocorrplot(trace[:2000], var_names=['mu'])
for idx, x_i in enumerate(x): plt.subplot(2, 2, idx + 1) plt.scatter(x_i, y) plt.xlabel('$x_{}$'.format(idx + 1), fontsize=16) plt.ylabel('$y$', rotation=0, fontsize=16) plt.subplot(2, 2, idx + 2) plt.scatter(x[0], x[1]) plt.xlabel('$x_{}$'.format(idx), fontsize=16) plt.ylabel('$x_{}$'.format(idx + 1), rotation=0, fontsize=16) #scatter_plot(X_centered, y) #plt.savefig('img425.png') with pm.Model() as model_mlr: alpha_tmp = pm.Normal('alpha_tmp', mu=0, sd=10) beta = pm.Normal('beta', mu=0, sd=1, shape=2) epsolon = pm.HalfCauchy('epsilon', 5) mu = alpha_tmp + pm.math.dot(beta, X_centered) alpha = pm.Deterministic('alpha', alpha_tmp - pm.math.dot(beta, X_mean)) y_pred = pm.Normal('y_pred', mu=mu, sd=epsolon, observed=y) trace_mlr = pm.sample(5000, njobs=1) varnames = ['alpha', 'beta', 'epsilon'] pm.traceplot(trace_mlr, varnames) plt.savefig('img426.png') pm.summary(trace_mlr[500:], varnames) pm.autocorrplot(trace_mlr[500:], varnames) plt.savefig('img4261.png')
""" plt.scatter(x_2, y_2) plt.xlabel('$x$', fontsize=16) plt.ylabel('$y$', fontsize=16, rotation=0) plt.savefig('img422.png') """ with pm.Model() as model_poly: alpha = pm.Normal('alpha', mu=0, sd=10) beta1 = pm.Normal('beta1', mu=0, sd=1) beta2 = pm.Normal('beta2', mu=0, sd=1) epsilon = pm.HalfCauchy('epsilon', 5) mu = alpha + beta1 * x_2 + beta2 * x_2**2 y_pred = pm.Normal('y_pred', mu=mu, sd=epsilon, observed=y_2) trace_poly = pm.sample(2000, njobs=1) pm.traceplot(trace_poly) plt.savefig('img423.png') pm.autocorrplot(trace_poly) plt.savefig('img4232.png') plt.clf() x_p = np.linspace(-6, 6) y_p = trace_poly['alpha'].mean( ) + trace_poly['beta1'].mean() * x_p + trace_poly['beta2'].mean() * x_p**2 plt.scatter(x_2, y_2) plt.xlabel('$x$', fontsize=16) plt.ylabel('$y$', fontsize=16, rotation=0) plt.plot(x_p, y_p, c='k') plt.savefig('img424.png')
beta4 = pm.Normal('beta4', 0, 20) # define likelihood 建立与时间相关的函数 theta = beta[ companyABC] + beta1 * elec_year + beta2 * elec_tem1 + beta3 * elec_hPa1 + beta4 * elec_RH1 Observed = pm.StudentT("Observed", mu=theta, sd=sigma, nu=nu, observed=elec_faults1) # 观测值 start = pm.find_MAP() # step = pm.Metropolis() trace3 = pm.sample(6000, start=start) chain3 = trace3 varnames1 = ['beta', 'beta1', 'beta2', 'beta3', 'beta4'] pm.traceplot(chain3, varnames1) plt.show() varnames1 = ['sigma', 'mu_a', 'sigma_a'] pm.traceplot(chain3, varnames1) plt.show() # 画出自相关曲线 pm.autocorrplot(chain3) plt.show() # ====================================================================== # 模型对比与后验分析 # ====================================================================== # Waic = pm.compare([traces_ols_glm, trace1], [mdl_ols_glm, pooled_model], ic='WAIC') # Waic = pm.compare([trace2, trace3], [partial_model, mulpartial_model], ic='WAIC') # print(Waic)
# Specify the model in PyMC with pm.Model() as model: # define the hyperparameters mu = pm.Beta('mu', 2, 2) kappa = pm.Gamma('kappa', 1, 0.1) # define the prior theta = pm.Beta('theta', mu * kappa, (1 - mu) * kappa, shape=len(N)) # define the likelihood y = pm.Bernoulli('y', p=theta[coin], observed=y) #print dir(theta) # Generate a MCMC chain step = pm.Metropolis() trace = pm.sample(5000, step, progressbar=False) ''' # Restricted models like this could be difficult to sample. This is related # to the censoring comment in the book. One way to detect that something is # wrong with the sampling is to compare the autocorrelation plot and the # sampled values under different sampler, or you can try combinations of # sampler like this # step1 = pm.Metropolis([theta, mu]) # step2 = pm.Slice([kappa]) # trace = pm.sample(5000, [step1, step2], progressbar=False) # or this (this combination was used to generate the figures) # start = pm.find_MAP() # step1 = pm.Metropolis([theta, mu]) # step2 = pm.NUTS([kappa])
trace = pm.sample(sample, step, start=start, progressbar=True) # ================================ _ = pm.traceplot(trace, ['p']) # ================================ # Geweke Test # Return z-scores for convergence diagnostics. # Compare the mean of the first % of series with the mean of the last % of # series. x is divided into a number of segments for which this difference is # computed. If the series is converged, this score should oscillate between # -1 and 1. # ================================ score = pm.geweke(trace.get_values('p')) figure = plt.figure(figsize=(7,2)) ax = plt.subplot() _ = ax.scatter(score[:,0],score[:,1],c='black',marker='x') x_min = min(score[:,0]) x_max = max(score[:,0]) _ = ax.hlines(y =[-1],xmin=x_min,xmax=x_max,colors='#1aaf5d',linestyles='dashed',linewidths=1) _ = ax.hlines(y =[1],xmin=x_min,xmax=x_max,colors='#0075c2',linestyles='dashed',linewidths=1) _ = ax.set_xlim([x_min,x_max]) _ = ax.set_xlabel('First Iteration') _ = ax.set_ylabel('Z Score for Geweke Test') plt.show() # ================================ pm.autocorrplot(trace,['p'])
y = pm.Bernoulli('y', p=theta[coin], observed=y) # Generate a MCMC chain trace = pm.sample(10000, step=pm.NUTS(), random_seed=(123), progressbar=False) ## Check the results. burnin = 100 # posterior samples to discard ## Print summary for each trace #pm.df_summary(trace[burnin:]) #pm.df_summary(trace) ## Check for mixing and autocorrelation pm.autocorrplot(trace[burnin:], varnames=['mu', 'kappa']) #pm.autocorrplot(trace, vars =[mu, kappa]) ## Plot KDE and sampled values for each parameter. pm.traceplot(trace[burnin:]) #pm.traceplot(trace) # Create arrays with the posterior sample theta1_sample = trace['theta'][:, 0][burnin:] theta28_sample = trace['theta'][:, 27][burnin:] mu_sample = trace['mu'][burnin:] kappa_sample = trace['kappa'][burnin:] # Plot mu histogram fig, ax = plt.subplots(2, 2, figsize=(12, 12)) pm.plot_posterior(mu_sample, ax=ax[0, 0], color='skyblue')
with pm.Model() as model: alpha = pm.Normal('alpha', mu=0, sd=10) beta = pm.Normal('beta', mu=0, sd=10) epsilon = pm.HalfCauchy('epsilon', 5) mu = pm.Deterministic('mu', alpha + beta * x) y_pred = pm.Normal('y_pred', mu=mu, sd=epsilon, observed=y) start = pm.find_MAP() step = pm.Metropolis() trace = pm.sample(11000, step, start, njobs=1) trace_n = trace[1000:] pm.traceplot(trace_n) plt.savefig('img404.png') """ varnames = ['alpha', 'beta', 'epsilon'] pm.autocorrplot(trace_n, varnames) plt.savefig('img405.png') plt.clf() sns.kdeplot(trace_n['alpha'], trace_n['beta']) plt.xlabel(r'$\alpha$', fontsize=16) plt.ylabel(r'$\beta$', fontsize=16, rotation=0) plt.savefig('img406.png') """ plt.clf() plt.plot(x, y, 'b.') alpha_m = trace_n['alpha'].mean()
def show_autocorrelation(self, end_burning_index=10000, fraction_to_show=5): assert self.trace is not None, "must use the method sample" pm.autocorrplot(self.trace[end_burning_index::fraction_to_show], varnames=['sd']) plt.show()
theta = pm.Beta('theta', mu * kappa, (1 - mu) * kappa, shape=len(N)) # define the likelihood y = pm.Bernoulli('y', p=theta[coin], observed=y) # Generate a MCMC chain trace = pm.sample(1000, progressbar=False) ## Check the results. ## Print summary for each trace #pm.df_summary(trace) #pm.df_summary(trace) ## Check for mixing and autocorrelation pm.autocorrplot(trace, varnames=['mu', 'kappa']) #pm.autocorrplot(trace, varnames =[mu, kappa]) ## Plot KDE and sampled values for each parameter. pm.traceplot(trace) #pm.traceplot(trace) # Create arrays with the posterior sample theta1_sample = trace['theta'][:, 0] theta2_sample = trace['theta'][:, 1] theta3_sample = trace['theta'][:, 2] mu_sample = trace['mu'] kappa_sample = trace['kappa'] # Scatter plot hyper-parameters fig, ax = plt.subplots(4, 3, figsize=(12, 12))
""" """ fig = plt.figure(figsize=(11,3)) ax = fig.add_subplot(111) x_lim = 60 mu = np.int(freq_results['x']) for i in np.arange(x_lim): plt.bar(i, stats.poisson.pmf(mu, i), color=colors[3]) _ = ax.set_xlim(0, x_lim) _ = ax.set_ylim(0, 0.1) _ = ax.set_xlabel('Response time in seconds') _ = ax.set_ylabel('Probability mass') _ = ax.set_title('Estimated Poisson distribution for Hangout chat response time') _ = plt.legend(['$\lambda$ = %s' % mu]) plt.show() """ # Gonna have to figure out how to get PYMC3 basic_model = pm.Model() with basic_model: mu = pm.Uniform('mu', lower=0, upper=60) likelihood = pm.Poisson('likelihood', mu=mu, observed=messages['time_delay_seconds'].values) start = pm.find_MAP() step = pm.Metropolis() trace = pm.sample(200000, step, start=start, progressbar=True) # _ = pm.traceplot(trace, varnames=['mu'], lines={'mu':freq_results['x']}) _ = pm.autocorrplot(trace[:2000], varnames=['mu']) plt.show()