def main(): # Hyperparameters n_flips = 125 n_coins = 10 n_draws = 5000 n_init_steps = 10000 n_burn_in_steps = 1000 # Create Causal Distribution causal_probs = np.random.uniform(size=n_coins) # Create Observations X = np.array([ np.random.choice(2, p=[1 - p_, p_], size=n_flips) for i, p_ in enumerate(causal_probs) ]).T # Create Model with pm.Model() as model: ps = pm.Beta('probs', alpha=1, beta=1, shape=n_coins) components = pm.Bernoulli.dist(p=ps, shape=n_coins) w = pm.Dirichlet('w', a=np.ones(n_coins)) mix = pm.Mixture('mix', w=w, comp_dists=components, observed=X) # Train Model with model: trace = pm.sample(n_draws, n_init=n_init_steps, tune=n_burn_in_steps) # Display Results pm.plot_trace(trace, var_names=['w', 'probs']) plt.show() pm.plot_posterior(trace, var_names=['w', 'probs']) plt.show()
def show_posterior_summary(self, parameters_name, figsize=(10, 8), **kwargs): """ """ self.print_model_summary(parameters_name=parameters_name) if not self.map: with self.model: pm.plot_trace(self.traces, compact=True)
def show_posterior_summary(self, parameters_name, figsize=(10, 8), **kwargs): """ """ self.print_model_summary(parameters_name=[ 'player_ability_mu', 'hyper_sigma', 'level_difficulty_mu' ]) if not self.map: with self.model: pm.plot_trace(data=self.traces, var_names=parameters_name, **kwargs) return None
def show_posterior_summary(self, parameters_name, figsize=(10, 8), **kwargs): """ """ self.print_model_summary(parameters_name=parameters_name) if self.map: visualize_regression_lines( X=self.X, y=self.y, intercepts=[self.map_estimate['Intercept']], slopes=[self.map_estimate['Slope']], figsize=figsize, overlay=False, predictions=None, title='Posterior MAP Regression Line', logistic=self.logistic, **kwargs) else: with self.model: posterior_checks = pm.sample_posterior_predictive( self.traces, var_names=['Intercept', 'Slope', 'y']) setattr(self, 'posterior_checks', posterior_checks) pm.plot_trace(self.traces, compact=True) visualize_regression_lines( X=self.X, y=self.y, intercepts=self.posterior_checks['Intercept'], slopes=self.posterior_checks['Slope'], figsize=figsize, overlay=False, predictions=self.posterior_checks['y'], title='Posterior Regression Lines', logistic=self.logistic, **kwargs)
def test_pm(): # This takes 5min to run # Hiding this import in here import pymc3 as pm parm_dict = mcmc.grab_parmdict() outroot = os.path.join(resource_filename('frb', 'tests'), 'files', 'mcmc') with mcmc.pm_four_parameter_model(parm_dict, beta=3.): # Sample #trace = pm.sample(40000, tune=2000) # This defaults to 4 chains trace = pm.sample(1000, tune=500) # This defaults to 4 chains # Save the traces -- Needs to be done before the plot pm.save_trace(trace, directory=outroot, overwrite=True) print("All done with the 4 parameter, beta=3 run ") # Save a plot plt.clf() _ = pm.plot_trace(trace) #plt.savefig(os.path.join(outroot, 'traceplot.png')) # Parameters jdict = utils.jsonify(parm_dict) utils.savejson(os.path.join(outroot, 'parms.json'), jdict, easy_to_read=True)
alpha = pm.Normal('alpha', mu=0, sd=10) beta = pm.Normal('beta', mu=0, sd=10, shape=2) sigma = pm.HalfNormal('sigma', sd=1) # Expected value of outcome mu = alpha + beta[0] * X1 + beta[1] * X2 # Likelihood (sampling distribution) of observations Y_obs = pm.Normal('Y_obs', mu=mu, sd=sigma, observed=Y) # MCMC sample the posterior distributions of the model parameters trace_0 = pm.sample(100, nuts_kwargs={'target_accept': 0.9}, tune=1000, chains=4) # Detailed summary of the posrterior print(pm.summary(trace_0)) # https://ericmjl.github.io/bayesian-stats-talk/ # Plot trace of parameters pm.plot_trace(trace_0) plt.show() pm.plot_posterior(trace_0, color='#87ceeb') plt.show() # Plot joint-distribution of parameters pm.plot_joint(trace_0, kind='kde', fill_last=False) plt.show()
# find that gradients are available. There are an impressive number of draws # per second for a "block box" style computation! However, note that if the # model can be represented directly by PyMC3 (like the AR(p) models # mentioned above), then computation can be substantially faster. # # Inference is complete, but are the results any good? There are a number # of ways to check. The first is to look at the posterior distributions # (with lines showing the MLE values): plt.tight_layout() # Note: the syntax here for the lines argument is required for # PyMC3 versions >= 3.7 # For version <= 3.6 you can use lines=dict(res_mle.params) instead _ = pm.plot_trace( trace, lines=[(k, {}, [v]) for k, v in dict(res_mle.params).items()], combined=True, figsize=(12, 12), ) # The estimated posteriors clearly peak close to the parameters found by # MLE. We can also see a summary of the estimated values: pm.summary(trace) # Here $\hat{R}$ is the Gelman-Rubin statistic. It tests for lack of # convergence by comparing the variance between multiple chains to the # variance within each chain. If convergence has been achieved, the between- # chain and within-chain variances should be identical. If $\hat{R}<1.2$ for # all model parameters, we can have some confidence that convergence has # been reached. #
fig, ax = plt.subplots(figsize=(7, 7)) ax.errorbar(df['x'].values, df['y'].values, fmt='ro', yerr=df['y_error'].values, xerr=df['x_error'].values, ecolor='black') # %% with pm.Model() as model_robust: family = pm.glm.families.StudentT() pm.glm.GLM.from_formula('y ~ x', df, family=family) trace_robust = pm.sample(40000, cores=2) # %% {"scrolled": false} pm.plot_trace(trace_robust) # %% fig = plt.figure(figsize=(10, 7)) pm.plot_posterior_predictive_glm(trace_robust, label='posterior predictive regression lines') ax = fig.axes[0] ax.errorbar(df['x'].values, df['y'].values, fmt='ro', yerr=df['y_error'].values, xerr=df['x_error'].values, ecolor='black') # %%
# Import libraries import numpy as np import pymc3 as pm import numpy as np import pymc3 as pm import theano x = np.array([0, 1, 0, 1, 0, 0, 0, 0, 0, 1]) x_shared = theano.shared(x) with pm.Model() as model: p = pm.Beta('mu', 1, 1) obs = pm.Binomial('obs', n=10, p=p, observed=x_shared) trace = pm.sample(1000) pm.plot_trace(trace) x_shared.set_value([0, 0, 0]) with model: post_pred = pm.sample_posterior_predictive(trace, samples=5)
def pymc3_plot(model, trace): with model: pm.plot_trace(trace)
# use a DensityDist (use a lamdba function to "call" the Op) pm.DensityDist("likelihood", loglike, observed=theta) # Draw samples trace = pm.sample( ndraws, tune=nburn, return_inferencedata=True, cores=1, compute_convergence_checks=False, ) # ### How does the posterior distribution compare with the MLE estimation? # # The clearly peak around the MLE estimate. results_dict = { "intercept": res_mle.params[0], "var.e": res_mle.params[1], "var.x.coeff": res_mle.params[2], "var.w.coeff": res_mle.params[3], } plt.tight_layout() _ = pm.plot_trace( trace, lines=[(k, {}, [v]) for k, v in dict(results_dict).items()], combined=True, figsize=(12, 12), )
c1 = np.random.lognormal(np.log(1e-4), 0.15, n) c2 = np.random.lognormal(np.log(1.0), 0.15, n) expected_s1 = 0.85 expected_s2 = 1 - expected_s1 expected_m1 = 0.15 expected_m2 = 2 # c_obs = (expected_s1 ** expected_m1 * c1) + (expected_s2 ** expected_m2 * c2) c_obs = np.random.lognormal(np.log(1.0 / 30), 0.15, n) glover = pm.Model() with glover: s1 = pm.Normal("s1", mu=0.8, sd=1) m1 = pm.Normal("m1", mu=0.15, sd=1) m2 = pm.Lognormal("m2", mu=0, sd=0.6) r1 = pm.Normal("r1", mu=1e-4, sd=0.02) r2 = pm.Normal("r2", mu=0, sd=4) expected_mu = (s1**m1 * r1) + ((1 - s1)**m2 * r2) c_measured = pm.Normal("c_measured", mu=expected_mu, observed=c_obs) trace = pm.sample(draws=5000, tune=4000, chains=1) a = pm.plot_trace(trace) print(pm.summary(trace))
with pm.Model() as model_0: alpha = pm.Normal('alpha', mu=0, sd=10) beta = pm.Normal('beta', mu=0, sd=10) mu = alpha + pm.math.dot(x_c, beta) theta = pm.Deterministic('theta', pm.math.sigmoid(mu)) bd = pm.Deterministic('bd', -alpha / beta) yl = pm.Bernoulli('yl', p=theta, observed=y_0) trace_0 = pm.sample(1000) varnames = ['alpha', 'beta', 'bd'] pm.summary(trace_0, varnames) pm.plot_trace(trace_0, varnames) ####################### # multi variable logit ####################### df = iris.query("species == ('setosa', 'versicolor')") y_1 = pd.Categorical(df['species']).codes x_n = ['sepal_length', 'sepal_width'] # note: not centering this time x_1 = df[x_n].values with pm.Model() as model_1: alpha = pm.Normal('alpha', mu=0, sd=10) beta = pm.Normal('beta', mu=0, sd=2, shape=len(x_n))
import numpy as np data = Series(np.loadtxt('../data/chemical_shifts.csv'), name='shift') data_mean = data.mean() ######### # in book ######### with pm.Model() as model_g: mu = pm.Uniform('mu', lower=40, upper=70) sigma = pm.HalfNormal('sigma', sd=10) y = pm.Normal('y', mu=mu, sd=sigma, observed=data) trace_g = pm.sample(1000) pm.plot_trace(trace_g) ################ # modification 1 ################ with pm.Model() as model1: mu = pm.Normal('mu', data_mean, 5) sigma = pm.HalfNormal('sigma', sd=10) y = pm.Normal('y', mu=mu, sd=sigma, observed=data) trace1 = pm.sample(1000) pm.plot_trace(trace1) ################ # modification 2 ################