Пример #1
0
def main():

    with pm.Model() as model:
        # Using a strong prior. Meaning the mean is towards zero than towards 1
        prior = pm.Beta('prior', 0.5, 3)

        output = pm.Binomial('output', n=100, observed=50, p=prior)

        step = pm.Metropolis()
        trace = pm.sample(1000, step=step)
        pm.traceplot(trace)

    pm.plot_posterior(trace, figsize=(5, 5), kde_plot=True,
                      rope=[0.45, 0.55])  # Rope is an interval that you define
    # This is a value you eppect. You can check
    # If ROPE fall on HPD or not. If it falls, it means
    # our value is within HPD and may be increasing sample
    # size would make our mean estimate better.

    # gelman rubin
    pm.gelman_rubin(trace)

    # forestplot
    pm.forestplot(trace, varnames=['prior'])

    # summary [look at mc error here. This is the std error, should be low]
    pm.df_summary(trace)

    #autocorrelation
    pm.autocorrplot(trace)

    # effective size
    pm.effective_n(trace)['prior']
Пример #2
0
def info(_s: state, plot: bool = False):
    """
    Print info about current state
    :param _s: the state
    :param plot: whether to plot something
    """
    print("=====")
    print("Iterations:", _s.iterations)
    print("-----")
    print("min durations", np.min(_s.durations))
    print("min durations[0]", np.min(_s.durations[0]))
    print("max durations", np.max(_s.durations))
    print("max durations[0,:,:]", np.max(_s.durations[0, :, :]))
    print("min durations_values", np.min(_s.durations_values))
    print("min durations_values[0]", np.min(_s.durations_values[0]))
    print("max durations_values", np.max(_s.durations_values))
    print("-----")

    pm_params = {'trace': _s.trace, 'varnames': ['mean', 'std']}

    pm.summary(**pm_params)

    if plot:
        pm.traceplot(**pm_params)
        pm.autocorrplot(**pm_params)

    print("=====\n\n")
Пример #3
0
def update_bayesian_modeling(mean_upd, var_upd, alpha_upd, beta_upd, inv_a_upd,
                             inv_b_upd, iv_upd, strategy, stock_price,
                             strike_price, risk_free, time):
    with pm.Model() as update_model:
        prior = pm.InverseGamma('bv', inv_a_upd, inv_b_upd)

        likelihood = pm.InverseGamma('like',
                                     inv_a_upd,
                                     inv_b_upd,
                                     observed=iv_upd)

    with update_model:
        # step = pm.Metropolis()

        v_trace_update = pm.sample(10000, tune=1000)
        #print(v_trace['bv'][:])
        trace_update = v_trace_update['bv'][:]
        #print(trace)

    pm.traceplot(v_trace_update)
    plt.show()

    pm.autocorrplot(v_trace_update)
    plt.show()

    pm.plot_posterior(v_trace_update[100:],
                      color='#87ceeb',
                      point_estimate='mean')
    plt.show()

    s = pm.summary(v_trace_update).round(2)
    print("\n Summary")
    print(s)

    a = np.random.choice(trace_update, 10000, replace=True)
    ar = []
    for i in range(9999):
        t = a[i] / 100
        ar.append(t)
    #print("Bayesian Volatility Values", ar)

    op = []
    for i in range(9999):
        temp = BS_price(strategy, stock_price, strike_price, risk_free, ar[i],
                        time)
        op.append(temp)
    #print("Bayesian Option Prices", op)

    plt.hist(ar, bins=50)
    plt.title("Volatility")
    plt.ylabel("Frequency")
    plt.show()

    plt.hist(op, bins=50)
    plt.title("Option Price")
    plt.ylabel("Frequency")
    plt.show()
    return trace_update
Пример #4
0
 def createModel(self, xdata, ydata=None, trace_len=200):
     shape = xdata.shape
     with pm.Model() as model:
         alpha = pm.Normal('alpha', mu=0, sd=100)
         theta = pm.DensityDist('theta', self.loglikelihood_t, shape=30, testval=0)
         sigma = pm.DensityDist('sigma', self.loglikelihood_s, testval=1)
         _sum = theta[0] * xdata[:, 0]
         for i in range(1, shape[1]):
             _sum += theta[i] * xdata[:, i]
         likelihood = pm.Normal('estimated', mu=(alpha + _sum).astype('float32'),
                                sd=sigma.astype('float32'), shape=100, observed=ydata)
         step = pm.Metropolis()
         trace = pm.sample(trace_len, njobs=4)
     self.trace = trace
     pm.traceplot(trace, ['sigma', 'theta'])
     pm.autocorrplot(trace, ['sigma', 'theta'])
def main():

    data = np.array([
        51.06, 55.12, 53.73, 50.24, 52.05, 56.40, 48.45, 52.34, 55.65, 51.49,
        51.86, 63.43, 53.00, 56.09, 51.93, 52.31, 52.33, 57.48, 57.44, 55.14,
        53.93, 54.62, 56.09, 68.58, 51.36, 55.47, 50.73, 51.94, 54.95, 50.39,
        52.91, 51.5, 52.68, 47.72, 49.73, 51.82, 54.99, 52.84, 53.19, 54.52,
        51.46, 53.73, 51.61, 49.81, 52.42, 54.3, 53.84, 53.16
    ])

    # look at the distribution of the data
    sns.kdeplot(data)

    # All these distributions are used to model std
    # It is safe to use exponential
    # half cauchy has a fat tail
    # Exponential parameter lambda high indicates a high steep
    # Ineverse gamma
    with pm.Model() as model:
        mu = pm.Uniform('mu', 30, 80)
        sigma = pm.HalfNormal('sigma', sd=10)
        df = pm.Exponential(
            'df', 1.5)  # lamda = 1.5, it will be more steep, 0.5 less
        output = pm.StudentT('output',
                             mu=mu,
                             sigma=sigma,
                             nu=df,
                             observed=data)

        trace = pm.sample(1000)

        # gelman rubin
        pm.gelman_rubin(trace)

        # forestplot
        pm.forestplot(trace)

        # summary [look at mc error here. This is the std error, should be low]
        pm.summary(trace)

        #autocorrelation
        pm.autocorrplot(trace)

        # effective size
        pm.effective_n(trace)
Пример #6
0
def plot_model_diagnostics(model, save_dir, file_id, export=True):
    """generate and export a range of diagnostic plots for a given model"""

    # ensure folder exists
    if export is True:
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)

    model_name = model.__class__.__name__

    trace_df = pm.trace_to_dataframe(model.trace, varnames=model.df_params)

    sns.pairplot(trace_df)
    if export is True:
        plt.savefig(save_dir + f'{model_name}_{file_id}_pairplot.pdf',
                    format='pdf',
                    bbox_inches='tight')
        plt.cla()

    pm.traceplot(model.trace, varnames=model.df_params)
    if export is True:
        plt.savefig(save_dir + f'{model_name}_{file_id}_traceplot.pdf',
                    format='pdf',
                    bbox_inches='tight')
        plt.cla()

    pm.autocorrplot(model.trace, varnames=model.df_params)
    if export is True:
        plt.savefig(save_dir + f'{model_name}_{file_id}_autocorrplot.pdf',
                    format='pdf',
                    bbox_inches='tight')
        plt.cla()

    pm.forestplot(model.trace, varnames=model.df_params)
    if export is True:
        plt.savefig(save_dir + f'{model_name}_{file_id}_forestplot.pdf',
                    format='pdf',
                    bbox_inches='tight')
        plt.cla()

    # close all figs, otherwise we can run out of memory
    plt.close("all")
Пример #7
0
def bayesian_modeling(mean, var, alpha, beta, inv_a, inv_b, iv):
    with pm.Model() as model:
        prior = pm.InverseGamma('bv', inv_a, inv_b)

        likelihood = pm.Gamma('like', alpha, beta, observed=iv)

    with model:
        # step = pm.Metropolis()

        v_trace = pm.sample(10000, tune=1000)
        #print(v_trace['bv'][:])
        trace = v_trace['bv'][:]
        #print(trace1)

    pm.traceplot(v_trace)
    plt.show()

    pm.autocorrplot(v_trace)
    plt.show()

    #s = pm.summary(v_trace)
    #print(s)
    return trace
Пример #8
0
# summary
pm.summary(t_0)
pm.summary(t_1)
pm.summary(t_2)

# forest plot
pm.forestplot([t_0, t_1, t_2], figsize=(16, 12), textsize=20, markersize=20)

# acceptance rate
print('Model 0: acc_rate = ' + str(step_0.accepted / (niter * nchains)))
print('Model 1: acc_rate = ' + str(step_1.accepted / (niter * nchains)))
print('Model 2: acc_rate = ' + str(step_2.accepted / (niter * nchains)))

# ACF
pm.autocorrplot(t_0, var_names=['m'], combined=True, textsize=20)
pm.autocorrplot(t_1, var_names=['m'], combined=True, textsize=20)
pm.autocorrplot(t_2, var_names=['m'], combined=True, textsize=20)

# ESS
print(pm.effective_n(t_0))
print(pm.effective_n(t_1))
print(pm.effective_n(t_2))

# Gelman-Rubin
print(pm.gelman_rubin(t_0))
print(pm.gelman_rubin(t_1))
print(pm.gelman_rubin(t_2))

# Geweke
plt.rcParams['figure.figsize'] = (16, 12)
Пример #9
0
# Run a small trace with just one chain so the autocorrelation plots are legible
try:
    model_trace_small = vartbl['model_trace_small']
except:
    with model:
        stepper = pm.NUTS()
        model_trace_small = pm.sample(draws=5000,
                                      stepper=stepper,
                                      tune=1000,
                                      chains=1,
                                      cores=16)
    vartbl['model_trace_small'] = model_trace_small

# Run the autocorrelation plots for beta_i's individually
for i in range(n):
    pm.autocorrplot(model_trace_small, varnames=[var_names[i]])

# *************************************************************************************************
# 2.6 Based on your samples construct an estimate for the posterior mean.
# *************************************************************************************************

beta_PM = np.mean(beta_samples, axis=0)
print(f'Estimated Posterior Means:')
for i, beta_name in enumerate(beta_names):
    print(f'{beta_name:13} = {beta_PM[i]:+0.3f}')

# *************************************************************************************************
# 2.7. Select at least 2 datapoints and visualize a histogram of the posterior probabilities.
# Denote the posterior mean and MAP on your plot for each datapoint
# *************************************************************************************************
Пример #10
0
    trace2 = pm.sample(2000, start=start, tune=1000)
chain2 = trace2
varnames1 = ['beta', 'beta1', 'beta2', 'sigma', 'mu']
varnames2 = ['beta', 'beta1', 'beta2', 'sigma']
pm.plot_posterior(chain2, varnames1)
plt.show()

map_estimate = pm.find_MAP(model=unpooled_model)
print(map_estimate)

# x_lim = 10
# com_pred = chain2.get_values('Observed_pred')[:].ravel()
# plt.hist(com_pred,  range=[0, 2], bins=130, histtype='stepfilled')
# plt.show()
# 画出自相关曲线
pm.autocorrplot(chain2, varnames2)
plt.show()

with unpooled_model:
    post_pred = pm.sample_ppc(trace2, samples=1000)
plt.figure()
ax = sns.distplot(post_pred['Observed'].mean(axis=1),
                  label='Posterior predictive means')
ax.axvline(elec_faults.mean(), color='r', ls='--', label='True mean')
ax.legend()

tracedf = pm.trace_to_dataframe(trace2, varnames=['beta1', 'beta2'])
sns.pairplot(tracedf)
plt.show()

# sns.kdeplot(trace2['beta1'])
Пример #11
0
plt.ylabel('Parameter')

# Plot beta trace
plt.subplot(212)
plt.title(r'Trace of $\beta$')
plt.plot(beta_samples, color='b')
plt.xlabel('Samples')
plt.ylabel('Parameter')
plt.tight_layout(h_pad=0.8)

figsize(20, 12)
pm.traceplot(sleep_trace, ['alpha', 'beta'])

# %%

pm.autocorrplot(sleep_trace, ['alpha', 'beta'])

# Sort the values by time offset
wake_data.sort_values('time_offset', inplace=True)

# Time is the time offset
time = np.array(wake_data.loc[:, 'time_offset'])

# Observations are the indicator
wake_obs = np.array(wake_data.loc[:, 'indicator'])

with pm.Model() as wake_model:
    # Create the alpha and beta parameters
    alpha = pm.Normal('alpha', mu=0.0, tau=0.01, testval=0.0)
    beta = pm.Normal('beta', mu=0.0, tau=0.01, testval=0.0)
Пример #12
0
         ls='--',
         color='black',
         label="average posterior \nprobability of diabetes")

plt.savefig("stuff_dinner.png")
plt.clf()
plt.cla()
plt.close()


def diabetes_posterior(time_offset):
    figsize(16, 8)
    prob = exponential(time_offset, beta_samples, alpha_samples)
    plt.hist(prob, bins=100, histtype='step', lw=4)
    plt.title('Probability Distribution for high blood sugar at offset %s' %
              time_offset)
    plt.xlabel('Probability of High Blood Sugar')
    plt.ylabel('Samples')
    plt.show()


for i in range(20, 27):
    diabetes_posterior(i * 10)

figsize(20, 12)
pm.traceplot(diabetes_trace, ['alpha', 'beta'])
plt.show()

pm.autocorrplot(diabetes_trace, ['alpha', 'beta'])
plt.show()
    # define the prior
    theta = pm.Beta('theta', mu * kappa, (1 - mu) * kappa, shape=len(N))
    # define the likelihood
    y = pm.Bernoulli('y', p=theta[coin], observed=y)
#   Generate a MCMC chain
    trace = pm.sample(10000, step=pm.NUTS(), random_seed=(123), progressbar=False)

## Check the results.
burnin = 100  # posterior samples to discard

## Print summary for each trace
#pm.df_summary(trace[burnin:])
#pm.df_summary(trace)

## Check for mixing and autocorrelation
pm.autocorrplot(trace[burnin:], varnames=['mu', 'kappa'])
#pm.autocorrplot(trace, vars =[mu, kappa])

## Plot KDE and sampled values for each parameter.
pm.traceplot(trace[burnin:])
#pm.traceplot(trace)

# Create arrays with the posterior sample
theta1_sample = trace['theta'][:,0][burnin:]
theta28_sample = trace['theta'][:,27][burnin:]
mu_sample = trace['mu'][burnin:]
kappa_sample = trace['kappa'][burnin:]

# Plot mu histogram
fig, ax = plt.subplots(2, 2, figsize=(12,12))
pm.plot_posterior(mu_sample, ax=ax[0, 0], color='skyblue')
    yl = pm.Normal('yl', mu[x], tau=tau, observed=z)
    # Generate a MCMC chain
    start = pm.find_MAP()
    steps = pm.Metropolis()
    trace = pm.sample(20000, steps, start, progressbar=False)

# EXAMINE THE RESULTS
burnin = 2000
thin = 50

# Print summary for each trace
#pm.summary(trace[burnin::thin])
#pm.summary(trace)

# Check for mixing and autocorrelation
pm.autocorrplot(trace[burnin::thin], vars=model.unobserved_RVs[:-1])

## Plot KDE and sampled values for each parameter.
#pm.traceplot(trace[burnin::thin])
pm.traceplot(trace)

a0_sample = trace['a0'][burnin::thin]
a_sample = trace['a'][burnin::thin]
# Convert baseline to the original scale
m_sample = a0_sample.repeat(NxLvl).reshape(len(a0_sample), NxLvl) + a_sample
b0_sample = m_sample.mean(axis=1)
b0_sample = b0_sample * np.std(y) + np.mean(y)
# Convert baseline to the original scale
n_sample = b0_sample.repeat(NxLvl).reshape(len(b0_sample), NxLvl)
b_sample = (m_sample - n_sample)
b_sample = b_sample * np.std(y)
Пример #15
0
pm.traceplot(chain, lines={'theta':theta_real})
'''

with first_model:
    step = pm.Metropolis()
    multi_trace = pm.sample(1000, step=step, njobs=4, cores=1)  #njobs=4

burnin = 0
multi_chain = multi_trace[burnin:]
pm.traceplot(multi_chain, lines={'theta': theta_real})

# In[6]:

pm.gelman_rubin(multi_chain)

# In[7]:

pm.forestplot(multi_chain, varnames=['theta'])

# In[8]:

pm.summary(multi_chain)  #

# In[11]:

pm.autocorrplot(multi_chain)  #自相关

# In[12]:

pm.effective_n(multi_chain)['theta']  #有效采样大小
Пример #16
0
    psi = pm.Beta('psi', 1, 1)

    alpha = pm.Normal('alpha', 0, 10)
    beta = pm.Normal('beta', 0, 10, shape=2)
    lam = pm.math.exp(alpha + beta[0] * fish_data['child'] +
                      beta[1] * fish_data['camper'])

    y = pm.ZeroInflatedPoisson('y', psi, lam, observed=fish_data['count'])
    trace_ZIP_reg = pm.sample(2000, njobs=1)

chain_ZIP_reg = trace_ZIP_reg[100:]
pm.traceplot(chain_ZIP_reg)
plt.savefig('img710.png')

plt.figure()
pm.autocorrplot(chain_ZIP_reg)
plt.savefig('img710b.png')

plt.figure()
children = [0, 1, 2, 3, 4]
fish_count_pred_0 = []
fish_count_pred_1 = []
thin = 5
for n in children:
    #	without_camper = chain_ZIP_reg['alpha'][::thin] + chain_ZIP_reg['beta'][:, 0][::thin] * n
    #	with_camper = without_camper + chain_ZIP_reg['beta'][:, 1][::thin]
    without_camper = chain_ZIP_reg['alpha'] + chain_ZIP_reg['beta'][:, 0] * n
    with_camper = without_camper + chain_ZIP_reg['beta'][:, 1]
    fish_count_pred_0.append(np.exp(without_camper))
    fish_count_pred_1.append(np.exp(with_camper))
    # Generate a MCMC chain
    start = pm.find_MAP()
    steps = pm.Metropolis()
    trace = pm.sample(20000, steps, start, progressbar=False)


# EXAMINE THE RESULTS
burnin = 2000
thin = 50

# Print summary for each trace
#pm.summary(trace[burnin::thin])
#pm.summary(trace)

# Check for mixing and autocorrelation
pm.autocorrplot(trace[burnin::thin], vars=model.unobserved_RVs[:-1])

## Plot KDE and sampled values for each parameter.
#pm.traceplot(trace[burnin::thin])
pm.traceplot(trace)

a0_sample = trace['a0'][burnin::thin]
a_sample = trace['a'][burnin::thin]
# Convert baseline to the original scale
m_sample = a0_sample.repeat(NxLvl).reshape(len(a0_sample), NxLvl) + a_sample
b0_sample = m_sample.mean(axis=1)
b0_sample = b0_sample * np.std(y) + np.mean(y)
# Convert baseline to the original scale
n_sample = b0_sample.repeat(NxLvl).reshape(len(b0_sample), NxLvl)
b_sample = (m_sample - n_sample)
b_sample = b_sample * np.std(y)
Пример #18
0
    trace = pm.sample(2000, step=step, start=start)

#%% 检视结果
trace = trace[100:]  # burn-in
# traceplot 得到两幅图,左边是核密度估计图(Kernel Density Estimation, KDE)
# 可理解为平滑后的直方图;右边是采样过程中的采样值,其看起来应该像白噪声,即有很好的
# 混合度(mixing)
pm.traceplot(trace)

# 用forestplot将 R_hat(应小于1.1且在1附近)和参数均值、50% HPD
# (Highest Posterior Density, HPD)、95% HPD 表示出来
pm.forestplot(trace, varnames=['alpha'])
pm.forestplot(trace, varnames=['beta'])

# 理想的采样应该不会是自相关的,用autocorrplot观察自相关程度。
pm.autocorrplot(trace)

# summary提供对后验的文字描述
pm.summary(trace)

# 后验可视化总结:Kruschke图
pm.plot_posterior(trace['alpha'], kde_plot=True)
pm.plot_posterior(trace['beta'], kde_plot=True)

#%% 对后验进行解释和可视化
plt.plot(handsize, stature, 'b.')
alpha_m = trace['alpha'].mean()
beta_m = trace['beta'].mean()
plt.plot(handsize, alpha_m+beta_m*handsize, c='k', \
         label='y={:.2f}+{:.2f}*x'.format(alpha_m, beta_m))
plt.xlabel('$x$', fontsize=16)
    # define the likelihood
    y = pm.Bernoulli('y', p=theta[coin], observed=y)

#   Generate a MCMC chain

    trace = pm.sample(1000, progressbar=False)


## Check the results.

## Print summary for each trace
#pm.df_summary(trace)
#pm.df_summary(trace)

## Check for mixing and autocorrelation
pm.autocorrplot(trace, varnames=['mu', 'kappa'])
#pm.autocorrplot(trace, varnames =[mu, kappa])

## Plot KDE and sampled values for each parameter.
pm.traceplot(trace)
#pm.traceplot(trace)

# Create arrays with the posterior sample
theta1_sample = trace['theta'][:,0]
theta2_sample = trace['theta'][:,1]
theta3_sample = trace['theta'][:,2]
mu_sample = trace['mu']
kappa_sample = trace['kappa']


# Scatter plot hyper-parameters
#   Generate a MCMC chain
    start = pm.find_MAP()  # find a reasonable starting point.
    step1 = pm.Metropolis([theta, mu])
    step2 = pm.NUTS([kappa])
    trace = pm.sample(10000, [step1, step2], start=start, random_seed=(123), progressbar=False)

## Check the results.
burnin = 2000  # posterior samples to discard
thin = 10  # posterior samples to discard

## Print summary for each trace
#pm.summary(trace[burnin::thin])
#pm.summary(trace)

## Check for mixing and autocorrelation
pm.autocorrplot(trace[burnin::thin], vars =[mu, kappa])
#pm.autocorrplot(trace, vars =[mu, kappa])

## Plot KDE and sampled values for each parameter.
pm.traceplot(trace[burnin::thin])
#pm.traceplot(trace)

# Create arrays with the posterior sample
theta1_sample = trace['theta'][:,0][burnin::thin]
theta28_sample = trace['theta'][:,27][burnin::thin]
mu_sample = trace['mu'][burnin::thin]
kappa_sample = trace['kappa'][burnin::thin]

fig = plt.figure(figsize=(12,12))

# Plot mu histogram
Пример #21
0
with pm.Model() as model_t:
	alpha = pm.Normal('alpha', mu=0, sd=100)
	beta = pm.Normal('beta', mu=0, sd=1)
	epsilon = pm.HalfCauchy('epsilon', 5)
	nu = pm.Deterministic('nu', pm.Exponential('nu_', 1/29) + 1)
	
	y_pred = pm.StudentT('y_pred', mu=alpha + beta * x_3, sd=epsilon, nu=nu, observed= y_3)
	
	start = pm.find_MAP()
	step = pm.NUTS(scaling= start)
	trace_t = pm.sample(2000, setp = step, start = start, njobs=1)


pm.traceplot(trace_t)
plt.savefig('img4142.png')
pm.autocorrplot(trace_t)
plt.savefig('img4143.png')


plt.clf()

beta_c, alpha_c = stats.linregress(x_3, y_3)[:2]
plt.plot(x_3, (alpha_c + beta_c * x_3), 'k', label='non-robust', alpha=0.5)
plt.plot(x_3, y_3, 'bo')
alpha_m = trace_t['alpha'].mean(0)
beta_m = trace_t['beta'].mean(0)
plt.plot(x_3, alpha_m + beta_m * x_3, c='k', label='robust')

plt.xlabel('$x$', fontsize=16)
plt.ylabel('$y$', fontsize=16, rotation=0)
plt.legend(loc=2, fontsize=14)
Пример #22
0
    chain2 = data_chain2[:,i:i+1]
    
    burn_in = 0
    length = (ie-i0)//step
    
    n = chain1[burn_in:burn_in+length].shape[0]
    
    W = (chain1[burn_in:burn_in+length].std()**2 + chain2[burn_in:burn_in+length].std()**2)/2
    mean1 = chain1[burn_in:burn_in+length].mean()
    mean2 = chain2[burn_in:burn_in+length].mean()
    mean = (mean1 + mean2)/2
    B = n * ((mean1 - mean)**2 + (mean2 - mean)**2)
    var_theta = (1 - 1/n) * W + 1/n*B
    print("Gelman-Rubin Diagnostic: ", np.sqrt(var_theta/W))

corr_plot1 = pm3.autocorrplot(data_traceplot1,var_names=names,grid=(1,N_var),figsize=(12,6.5),textsize=18,combined=True)
corr_plot1 = corr_plot1[None,:]
for i in range(N_var):
    corr_plot1[0, i].set_xlabel('Lag Index',fontsize=26)
corr_plot1[0, 0].set_ylabel('Autocorrelation Value',fontsize=26)
plt.savefig("plots/autocorrelation.png", bbox_inches='tight', pad_inches=0.01)

plt.figure(figsize=(12,6.5))
for i in range(data_chain1.shape[1]): 
    gw_plot = pm3.geweke(data_chain1[:,i],.1,.5,20)
    plt.scatter(gw_plot[:,0],gw_plot[:,1],label="%s"%names[i])
plt.axhline(-1.98, c='r')
plt.axhline(1.98, c='r')
plt.xticks(fontsize=22)
plt.yticks(fontsize=22)
plt.xlabel("Subchain sample number",fontsize=26)
Пример #23
0
    Observed = pm.Normal("Observed", theta, sd=sigma,
                         observed=elec_faults1)  # 观测值
    # Observed = pm.Gamma("Observed", theta, sigma,  observed=elec_faults1)  # 观测值
    # Observed_pred = pm.Bound(pm.Normal, lower=0.0)('Observed_pred', mu=theta, sd=sigma, shape=elec_faults1.shape)  # 观测值
    # start = pm.find_MAP()
    # step = pm.Metropolis()
    trace2 = pm.sample(1000)

chain2 = trace2
varnames2 = ['beta', 'beta1', 'beta2']
# pm.traceplot(chain2, varnames2, kde_plot=True, text_size=14, color='#6CA6CD')
pm.traceplot(chain2, varnames2)
plt.show()

# 画出自相关曲线
pm.autocorrplot(chain2)
plt.show()

with unpooled_model:
    post_pred = pm.sample_ppc(trace2, samples=1000)
plt.figure()
ax = sns.distplot(post_pred['Observed'].mean(axis=1),
                  label='Posterior predictive means')
ax.axvline(elec_faults.mean(), color='r', ls='--', label='True mean')
ax.legend()
plt.show()
# map_estimate = pm.find_MAP(model=unpooled_model)
# print(map_estimate)
# x_lim = 60
# com_pred = chain2.get_values('Observed_pred')[::10].ravel()
# plt.hist(com_pred,  range=[-2, 5], bins=90, histtype='stepfilled', color='#6CA6CD')
%config InlineBackend.figure_format = 'retina'
%matplotlib inline
plt.rcParams["figure.figsize"] = (10, 5)
np.random.seed(42)

# Prepare the data
x = uniform(0, 20).rvs(30)
eps = norm(0, 4).rvs(30)
y = 11 + 3*x + eps

# Sampling w/ Metropolis
with pm.Model() as model:
    b_0 = pm.Normal("b_0", mu=0, sd=10)
    b_1 = pm.Normal("b_1", mu=0, sd=2)
    e = pm.HalfCauchy("e", 2)
    mu = pm.Deterministic("mu", b_0 + b_1*x)
    Y = pm.Normal("Y", mu=mu, sd=e, observed=y)
    trace = pm.sample(10000, step=pm.Metropolis())
pm.autocorrplot(trace, varnames=["b_0", "b_1", "e"]);
plt.savefig("./results/4-12-autocorrelation-metropolis.png")

# Sampling w/ NUTS
with pm.Model() as model:
    b_0 = pm.Normal("b_0", mu=0, sd=10)
    b_1 = pm.Normal("b_1", mu=0, sd=2)
    e = pm.HalfCauchy("e", 2)
    mu = pm.Deterministic("mu", b_0 + b_1*x)
    Y = pm.Normal("Y", mu=mu, sd=e, observed=y)
    trace = pm.sample(10000)
pm.autocorrplot(trace, varnames=["b_0", "b_1", "e"]);
plt.savefig("./results/4-12-autocorrelation-nuts.png")
Пример #25
0
pm.traceplot(trace)

print('\n--- Optimal Model Parameters ---')

#%% Discarding early samples (burnin)

fig = plt.figure(figsize=(10,4))
plt.subplot(121)
_ = plt.title('Burnin trace')
_ = plt.ylim(freq_mu - 0.2, freq_mu + 0.2) 
_ = plt.plot(trace.get_values('mu')[:1000])

fig = plt.subplot(122)
_ = plt.title('Full trace')
_ = plt.ylim(freq_mu - 0.2, freq_mu + 0.2) 
_ = plt.plot(trace.get_values('mu'))

#%% Autocorrelation Test Plot
# A measure of correlation between successive samples in the MCMC sampling chain.
# When samples have low correlation with each other, they are adding more 
# "information" to the estimate of your parameter value than samples that are 
# highly correlated.

# Visually, you are looking for an autocorrelation plot that tapers off to zero 
# relatively quickly and then oscilates above and below zero correlation. If 
# your autocorrelation plot does not taper off - it is generally a sign of poor 
# mixing and you should revisit your model selection (eg. likelihood) and 
# sampling methods (eg. Metropolis).

_ = pm.autocorrplot(trace[:2000], var_names=['mu'])
Пример #26
0
    for idx, x_i in enumerate(x):
        plt.subplot(2, 2, idx + 1)
        plt.scatter(x_i, y)
        plt.xlabel('$x_{}$'.format(idx + 1), fontsize=16)
        plt.ylabel('$y$', rotation=0, fontsize=16)
    plt.subplot(2, 2, idx + 2)
    plt.scatter(x[0], x[1])
    plt.xlabel('$x_{}$'.format(idx), fontsize=16)
    plt.ylabel('$x_{}$'.format(idx + 1), rotation=0, fontsize=16)


#scatter_plot(X_centered, y)
#plt.savefig('img425.png')

with pm.Model() as model_mlr:
    alpha_tmp = pm.Normal('alpha_tmp', mu=0, sd=10)
    beta = pm.Normal('beta', mu=0, sd=1, shape=2)
    epsolon = pm.HalfCauchy('epsilon', 5)
    mu = alpha_tmp + pm.math.dot(beta, X_centered)
    alpha = pm.Deterministic('alpha', alpha_tmp - pm.math.dot(beta, X_mean))
    y_pred = pm.Normal('y_pred', mu=mu, sd=epsolon, observed=y)

    trace_mlr = pm.sample(5000, njobs=1)

varnames = ['alpha', 'beta', 'epsilon']
pm.traceplot(trace_mlr, varnames)
plt.savefig('img426.png')
pm.summary(trace_mlr[500:], varnames)
pm.autocorrplot(trace_mlr[500:], varnames)
plt.savefig('img4261.png')
Пример #27
0
"""
plt.scatter(x_2, y_2)
plt.xlabel('$x$', fontsize=16)
plt.ylabel('$y$', fontsize=16, rotation=0)
plt.savefig('img422.png')
"""

with pm.Model() as model_poly:
    alpha = pm.Normal('alpha', mu=0, sd=10)
    beta1 = pm.Normal('beta1', mu=0, sd=1)
    beta2 = pm.Normal('beta2', mu=0, sd=1)
    epsilon = pm.HalfCauchy('epsilon', 5)
    mu = alpha + beta1 * x_2 + beta2 * x_2**2
    y_pred = pm.Normal('y_pred', mu=mu, sd=epsilon, observed=y_2)
    trace_poly = pm.sample(2000, njobs=1)

pm.traceplot(trace_poly)
plt.savefig('img423.png')
pm.autocorrplot(trace_poly)
plt.savefig('img4232.png')

plt.clf()
x_p = np.linspace(-6, 6)
y_p = trace_poly['alpha'].mean(
) + trace_poly['beta1'].mean() * x_p + trace_poly['beta2'].mean() * x_p**2
plt.scatter(x_2, y_2)
plt.xlabel('$x$', fontsize=16)
plt.ylabel('$y$', fontsize=16, rotation=0)
plt.plot(x_p, y_p, c='k')
plt.savefig('img424.png')
Пример #28
0
    beta4 = pm.Normal('beta4', 0, 20)

    # define likelihood 建立与时间相关的函数
    theta = beta[
        companyABC] + beta1 * elec_year + beta2 * elec_tem1 + beta3 * elec_hPa1 + beta4 * elec_RH1
    Observed = pm.StudentT("Observed",
                           mu=theta,
                           sd=sigma,
                           nu=nu,
                           observed=elec_faults1)  # 观测值

    start = pm.find_MAP()
    # step = pm.Metropolis()
    trace3 = pm.sample(6000, start=start)
chain3 = trace3
varnames1 = ['beta', 'beta1', 'beta2', 'beta3', 'beta4']
pm.traceplot(chain3, varnames1)
plt.show()
varnames1 = ['sigma', 'mu_a', 'sigma_a']
pm.traceplot(chain3, varnames1)
plt.show()
# 画出自相关曲线
pm.autocorrplot(chain3)
plt.show()
# ======================================================================
# 模型对比与后验分析
# ======================================================================
# Waic = pm.compare([traces_ols_glm, trace1], [mdl_ols_glm, pooled_model], ic='WAIC')
# Waic = pm.compare([trace2, trace3], [partial_model, mulpartial_model], ic='WAIC')
# print(Waic)
# Specify the model in PyMC
with pm.Model() as model:
# define the hyperparameters
    mu = pm.Beta('mu', 2, 2)
    kappa = pm.Gamma('kappa', 1, 0.1)
    # define the prior
    theta = pm.Beta('theta', mu * kappa, (1 - mu) * kappa, shape=len(N))
    # define the likelihood
    y = pm.Bernoulli('y', p=theta[coin], observed=y)
    #print dir(theta)

#   Generate a MCMC chain
    step = pm.Metropolis()
    trace = pm.sample(5000, step, progressbar=False)
'''
#   Restricted models like this could be difficult to sample. This is related
#   to the censoring comment in the book. One way to detect that something is
#   wrong with the sampling is to compare the autocorrelation plot and the
#   sampled values under different sampler, or you can try combinations of
#   sampler like this

#    step1 = pm.Metropolis([theta, mu])
#    step2 = pm.Slice([kappa])
#    trace = pm.sample(5000, [step1, step2], progressbar=False)

#    or this (this combination was used to generate the figures)

#    start = pm.find_MAP()
#    step1 = pm.Metropolis([theta, mu])
#    step2 = pm.NUTS([kappa])
Пример #30
0
    trace = pm.sample(sample, step, start=start, progressbar=True)

# ================================
_ = pm.traceplot(trace, ['p'])

# ================================
# Geweke Test
# Return z-scores for convergence diagnostics.

# Compare the mean of the first % of series with the mean of the last % of
# series. x is divided into a number of segments for which this difference is
# computed. If the series is converged, this score should oscillate between
# -1 and 1.
# ================================
score = pm.geweke(trace.get_values('p'))
figure = plt.figure(figsize=(7,2))
ax = plt.subplot()
_ = ax.scatter(score[:,0],score[:,1],c='black',marker='x')
x_min = min(score[:,0])
x_max = max(score[:,0])            
_ = ax.hlines(y =[-1],xmin=x_min,xmax=x_max,colors='#1aaf5d',linestyles='dashed',linewidths=1)
_ = ax.hlines(y =[1],xmin=x_min,xmax=x_max,colors='#0075c2',linestyles='dashed',linewidths=1)
_ = ax.set_xlim([x_min,x_max])     
_ = ax.set_xlabel('First Iteration')
_ = ax.set_ylabel('Z Score for Geweke Test')
plt.show()


# ================================
pm.autocorrplot(trace,['p'])
Пример #31
0
    y = pm.Bernoulli('y', p=theta[coin], observed=y)
    #   Generate a MCMC chain
    trace = pm.sample(10000,
                      step=pm.NUTS(),
                      random_seed=(123),
                      progressbar=False)

## Check the results.
burnin = 100  # posterior samples to discard

## Print summary for each trace
#pm.df_summary(trace[burnin:])
#pm.df_summary(trace)

## Check for mixing and autocorrelation
pm.autocorrplot(trace[burnin:], varnames=['mu', 'kappa'])
#pm.autocorrplot(trace, vars =[mu, kappa])

## Plot KDE and sampled values for each parameter.
pm.traceplot(trace[burnin:])
#pm.traceplot(trace)

# Create arrays with the posterior sample
theta1_sample = trace['theta'][:, 0][burnin:]
theta28_sample = trace['theta'][:, 27][burnin:]
mu_sample = trace['mu'][burnin:]
kappa_sample = trace['kappa'][burnin:]

# Plot mu histogram
fig, ax = plt.subplots(2, 2, figsize=(12, 12))
pm.plot_posterior(mu_sample, ax=ax[0, 0], color='skyblue')
Пример #32
0
with pm.Model() as model:
    alpha = pm.Normal('alpha', mu=0, sd=10)
    beta = pm.Normal('beta', mu=0, sd=10)
    epsilon = pm.HalfCauchy('epsilon', 5)

    mu = pm.Deterministic('mu', alpha + beta * x)
    y_pred = pm.Normal('y_pred', mu=mu, sd=epsilon, observed=y)

    start = pm.find_MAP()
    step = pm.Metropolis()
    trace = pm.sample(11000, step, start, njobs=1)

trace_n = trace[1000:]
pm.traceplot(trace_n)
plt.savefig('img404.png')
"""
varnames = ['alpha', 'beta', 'epsilon']
pm.autocorrplot(trace_n, varnames)
plt.savefig('img405.png')

plt.clf()
sns.kdeplot(trace_n['alpha'], trace_n['beta'])
plt.xlabel(r'$\alpha$', fontsize=16)
plt.ylabel(r'$\beta$', fontsize=16, rotation=0)
plt.savefig('img406.png')

"""

plt.clf()
plt.plot(x, y, 'b.')
alpha_m = trace_n['alpha'].mean()
    def show_autocorrelation(self, end_burning_index=10000, fraction_to_show=5):

        assert self.trace is not None, "must use the method sample"

        pm.autocorrplot(self.trace[end_burning_index::fraction_to_show], varnames=['sd'])
        plt.show()
Пример #34
0
    theta = pm.Beta('theta', mu * kappa, (1 - mu) * kappa, shape=len(N))
    # define the likelihood
    y = pm.Bernoulli('y', p=theta[coin], observed=y)

    #   Generate a MCMC chain

    trace = pm.sample(1000, progressbar=False)

## Check the results.

## Print summary for each trace
#pm.df_summary(trace)
#pm.df_summary(trace)

## Check for mixing and autocorrelation
pm.autocorrplot(trace, varnames=['mu', 'kappa'])
#pm.autocorrplot(trace, varnames =[mu, kappa])

## Plot KDE and sampled values for each parameter.
pm.traceplot(trace)
#pm.traceplot(trace)

# Create arrays with the posterior sample
theta1_sample = trace['theta'][:, 0]
theta2_sample = trace['theta'][:, 1]
theta3_sample = trace['theta'][:, 2]
mu_sample = trace['mu']
kappa_sample = trace['kappa']

# Scatter plot hyper-parameters
fig, ax = plt.subplots(4, 3, figsize=(12, 12))
Пример #35
0
"""
"""
fig = plt.figure(figsize=(11,3))
ax = fig.add_subplot(111)
x_lim = 60
mu = np.int(freq_results['x'])
for i in np.arange(x_lim):
    plt.bar(i, stats.poisson.pmf(mu, i), color=colors[3])

_ = ax.set_xlim(0, x_lim)
_ = ax.set_ylim(0, 0.1)
_ = ax.set_xlabel('Response time in seconds')
_ = ax.set_ylabel('Probability mass')
_ = ax.set_title('Estimated Poisson distribution for Hangout chat response time')
_ = plt.legend(['$\lambda$ = %s' % mu])
plt.show()
"""
# Gonna have to figure out how to get PYMC3
basic_model = pm.Model()
with basic_model:
    mu = pm.Uniform('mu', lower=0, upper=60)
    likelihood = pm.Poisson('likelihood', mu=mu, observed=messages['time_delay_seconds'].values)
    start = pm.find_MAP()
    step = pm.Metropolis()
    trace = pm.sample(200000, step, start=start, progressbar=True)

# _ = pm.traceplot(trace, varnames=['mu'], lines={'mu':freq_results['x']})
_ = pm.autocorrplot(trace[:2000], varnames=['mu'])
plt.show()