Exemplo n.º 1
0
#------------------------------------------------------------
# perform the MCMC sampling
np.random.seed(0)
S = pymc.MCMC(model)
S.sample(iter=25000, burn=2000)

#------------------------------------------------------------
# Extract the MCMC traces
trace_mu = S.trace('mu')[:]
trace_sigma = S.trace('sigma')[:]

fig = plt.figure(figsize=(5, 3.75))
ax, = plot_mcmc([trace_mu, trace_sigma],
                fig=fig,
                limits=[(-3.2, 4.2), (0, 5)],
                bounds=(0.08, 0.12, 0.95, 0.95),
                labels=(r'$\mu$', r'$\sigma$'),
                levels=[0.683, 0.955, 0.997],
                colors='k')

#----------------------------------------------------------------------
# Compute and plot likelihood with known ei for comparison
# (Same as fig_likelihood_gaussgauss)
sigma = np.linspace(0.01, 5, 41)
mu = np.linspace(-3.2, 4.2, 41)

logL = gaussgauss_logL(xi, ei, mu, sigma[:, np.newaxis])
logL -= logL.max()

im = ax.contourf(mu,
                 sigma,
Exemplo n.º 2
0
M = SigBG('M', A, x0, sigma, observed=True, value=x)

model = dict(M=M, A=A, x0=x0, log_sigma=log_sigma, sigma=sigma)

#----------------------------------------------------------------------
# Run the MCMC sampling
S = pymc.MCMC(model)
S.sample(iter=25000, burn=5000)

#------------------------------------------------------------
# Plot the results
fig = plt.figure(figsize=(5, 5))
ax_list = plot_mcmc([S.trace(s)[:] for s in ['A', 'x0', 'sigma']],
                    limits=[(0.05, 0.65), (5.75, 6.65), (0.05, 0.85)],
                    labels=['$A$', '$\mu$', r'$\sigma$'],
                    bounds=(0.1, 0.1, 0.95, 0.95),
                    true_values=[A_true, x0_true, sigma_true],
                    fig=fig, colors='k')

ax = plt.axes([0.62, 0.62, 0.33, 0.33])
x_pdf = np.linspace(0, 10, 100)
y_pdf = A_true * signal.pdf(x_pdf) + (1 - A_true) * background.pdf(x_pdf)

ax.hist(x, 15, normed=True, histtype='stepfilled', alpha=0.5)
ax.plot(x_pdf, y_pdf, '-k')

ax.set_xlim(0, 10)
ax.set_ylim(0, 0.5)
ax.set_xlabel('$x$')
ax.set_ylabel(r'$y_{\rm obs}$')
Exemplo n.º 3
0
          (0.15, 0.45),
          (0.55, 1.3),
          (0.25, 2.1)]

# we assume mu1 < mu2, but the results may be switched
#  due to the symmetry of the problem.  If so, switch back
if np.median(trace2[0]) > np.median(trace2[1]):
    trace2 = trace2[[1, 0, 3, 2, 4], :]
    N2_norm_mu = N2.mu[N2.M2_mu2, N2.M2_mu1,
                       N2.M2_sigma2, N2.M2_sigma1, N2.M2_ratio]
    N2_norm_Sig = N2.C[N2.M2_mu2, N2.M2_mu1,
                       N2.M2_sigma2, N2.M2_sigma1, N2.M2_ratio]

# Plot the simple 2-component model
ax, = plot_mcmc(trace1, fig=fig, bounds=[0.6, 0.6, 0.95, 0.95],
                limits=[(0.3, 0.8), (0.75, 1.15)],
                labels=[r'$\mu$', r'$\sigma$'], colors='k')

ax.text(0.05, 0.95, "Single Gaussian fit", va='top', ha='left',
        transform=ax.transAxes)

# Plot the 5-component model
ax_list = plot_mcmc(trace2, limits=limits, labels=labels,
                    true_values=true_values, fig=fig,
                    bounds=(0.12, 0.12, 0.95, 0.95),
                    colors='k')
for ax in ax_list:
    for axis in [ax.xaxis, ax.yaxis]:
        axis.set_major_locator(plt.MaxNLocator(4))

plt.show()
Exemplo n.º 4
0
M = SigBG('M', A, x0, sigma, observed=True, value=x)

model = dict(M=M, A=A, x0=x0, log_sigma=log_sigma, sigma=sigma)

#----------------------------------------------------------------------
# Run the MCMC sampling
S = pymc.MCMC(model)
S.sample(iter=25000, burn=5000)

#------------------------------------------------------------
# Plot the results
fig = plt.figure(figsize=(5, 5))
ax_list = plot_mcmc([S.trace(s)[:] for s in ['A', 'x0', 'sigma']],
                    limits=[(0.05, 0.65), (5.75, 6.65), (0.05, 0.85)],
                    labels=['$A$', '$\mu$', r'$\sigma$'],
                    bounds=(0.1, 0.1, 0.95, 0.95),
                    true_values=[A_true, x0_true, sigma_true],
                    fig=fig, colors='k')

ax = plt.axes([0.62, 0.62, 0.33, 0.33])
x_pdf = np.linspace(0, 10, 100)
y_pdf = A_true * signal.pdf(x_pdf) + (1 - A_true) * background.pdf(x_pdf)

ax.hist(x, 15, normed=True, histtype='stepfilled', alpha=0.5)
ax.plot(x_pdf, y_pdf, '-k')

ax.set_xlim(0, 10)
ax.set_ylim(0, 0.5)
ax.set_xlabel('$x$')
ax.set_ylabel(r'$y_{\rm obs}$')
Exemplo n.º 5
0
starting_guesses[:, 1] *= 20  # start sigma between 0 and 20

sampler = emcee.EnsembleSampler(nwalkers, ndim, log_posterior, args=[F, e])
sampler.run_mcmc(starting_guesses, nsteps)

sample = sampler.chain  # shape = (nwalkers, nsteps, ndim)
sample = sampler.chain[:, nburn:, :].reshape(-1, 2)

# Now that we have the samples, we'll use a convenience routine from astroML to plot the traces and the contours representing one and two standard deviations:

# In[13]:

from astroML.plotting import plot_mcmc

fig = plt.figure()
ax = plot_mcmc(sample.T, fig=fig, labels=[r'$\mu$', r'$\sigma$'], colors='k')
ax[0].plot(sample[:, 0], sample[:, 1], ',k', alpha=0.1)
ax[0].plot([mu_true], [sigma_true], 'o', color='red', ms=10)

# The red dot indicates ground truth (from our problem setup), and the contours indicate one and two standard deviations (68% and 95% confidence levels).  In other words, based on this analysis we are 68% confident that the model lies within the inner contour, and 95% confident that the model lies within the outer contour.
#
# Note here that $\sigma = 0$ is consistent with our data within two standard deviations: that is, depending on the certainty threshold you're interested in, our data are not enough to confidently rule out the possibility of a non-varying source!
#
# The other thing to notice is that this posterior is definitely *not* Gaussian: this can be seen by the lack of symmetry in the vertical direction.  That means that the Gaussian approximation used within the frequentist approach may not reflect the true uncertainties in the result.  This isn't an issue with frequentism itself (i.e. there are certainly ways to account for non-Gaussianity within the frequentist paradigm), but the *vast majority* of commonly applied frequentist techniques make the explicit  or implicit assumption of Gaussianity of the distribution.  Bayesian approaches generally don't require such assumptions.
#
# <small>(Side note on priors: there are good arguments that a flat prior on $\sigma$ subtley biases the calculation in this case: i.e. a flat prior is not necessarily non-informative in the case of scale factors like $\sigma$. There are interesting arguments to be made that the [Jeffreys Prior](http://en.wikipedia.org/wiki/Jeffreys_prior) would be more applicable. Here I believe the Jeffreys prior is not suitable, because $\sigma$ is not a true scale factor (i.e. the Gaussian has contributions from $e_i$ as well). On this question, I'll have to defer to others who have more expertise. Note that subtle &mdash; some would say subjective &mdash; questions like this are among the features of Bayesian analysis that frequentists take issue with).</small>

### Conclusion

# I hope I've been able to convey through this post how philosophical differences underlying frequentism and Bayesianism lead to fundamentally different approaches to simple problems, which nonetheless can often yield similar or even identical results.
#
Exemplo n.º 6
0
#------------------------------------------------------------
# perform the MCMC sampling
pymc.numpy.random.seed(0)
S = pymc.MCMC(model)
S.sample(iter=25000, burn=2000)

#------------------------------------------------------------
# Extract the MCMC traces
trace_mu = S.trace('mu')[:]
trace_sigma = S.trace('sigma')[:]

fig = plt.figure()
ax, = plot_mcmc([trace_mu, trace_sigma], fig=fig,
                limits=[(-3, 5), (0, 5)],
                labels=(r'$\mu$', r'$\sigma$'),
                levels=[0.683, 0.955, 0.997],
                colors='k', linewidths=2)

#----------------------------------------------------------------------
# Compute and plot likelihood with known ei for comparison
# (Same as fig_likelihood_gaussgauss)
sigma = np.linspace(0.01, 5, 41)
mu = np.linspace(-3, 5, 41)

logL = gaussgauss_logL(xi, ei, mu, sigma[:, np.newaxis])
logL -= logL.max()

im = ax.contourf(mu, sigma, convert_to_stdev(logL),
                 levels=(0, 0.683, 0.955, 0.997),
                 cmap=plt.cm.binary_r, alpha=0.5)
Exemplo n.º 7
0
#------------------------------------------------------------
# perform the MCMC sampling
pymc.numpy.random.seed(0)
S = pymc.MCMC(model)
S.sample(iter=25000, burn=2000)

#------------------------------------------------------------
# Extract the MCMC traces
trace_mu = S.trace('mu')[:]
trace_sigma = S.trace('sigma')[:]

fig = plt.figure()
ax, = plot_mcmc([trace_mu, trace_sigma],
                fig=fig,
                limits=[(-3, 5), (0, 5)],
                labels=(r'$\mu$', r'$\sigma$'),
                levels=[0.683, 0.955, 0.997],
                colors='k',
                linewidths=2)

#----------------------------------------------------------------------
# Compute and plot likelihood with known ei for comparison
# (Same as fig_likelihood_gaussgauss)
sigma = np.linspace(0.01, 5, 41)
mu = np.linspace(-3, 5, 41)

logL = gaussgauss_logL(xi, ei, mu, sigma[:, np.newaxis])
logL -= logL.max()

im = ax.contourf(mu,
                 sigma,
Exemplo n.º 8
0
starting_guesses[:, 1] *= 20    # start sigma between 0 and 20

sampler = emcee.EnsembleSampler(nwalkers, ndim, log_posterior, args=[F, e])
sampler.run_mcmc(starting_guesses, nsteps)

sample = sampler.chain  # shape = (nwalkers, nsteps, ndim)
sample = sampler.chain[:, nburn:, :].reshape(-1, 2)


# Now that we have the samples, we'll use a convenience routine from astroML to plot the traces and the contours representing one and two standard deviations:

# In[13]:

from astroML.plotting import plot_mcmc
fig = plt.figure()
ax = plot_mcmc(sample.T, fig=fig, labels=[r'$\mu$', r'$\sigma$'], colors='k')
ax[0].plot(sample[:, 0], sample[:, 1], ',k', alpha=0.1)
ax[0].plot([mu_true], [sigma_true], 'o', color='red', ms=10);


# The red dot indicates ground truth (from our problem setup), and the contours indicate one and two standard deviations (68% and 95% confidence levels).  In other words, based on this analysis we are 68% confident that the model lies within the inner contour, and 95% confident that the model lies within the outer contour.
# 
# Note here that $\sigma = 0$ is consistent with our data within two standard deviations: that is, depending on the certainty threshold you're interested in, our data are not enough to confidently rule out the possibility of a non-varying source!
# 
# The other thing to notice is that this posterior is definitely *not* Gaussian: this can be seen by the lack of symmetry in the vertical direction.  That means that the Gaussian approximation used within the frequentist approach may not reflect the true uncertainties in the result.  This isn't an issue with frequentism itself (i.e. there are certainly ways to account for non-Gaussianity within the frequentist paradigm), but the *vast majority* of commonly applied frequentist techniques make the explicit  or implicit assumption of Gaussianity of the distribution.  Bayesian approaches generally don't require such assumptions.
# 
# <small>(Side note on priors: there are good arguments that a flat prior on $\sigma$ subtley biases the calculation in this case: i.e. a flat prior is not necessarily non-informative in the case of scale factors like $\sigma$. There are interesting arguments to be made that the [Jeffreys Prior](http://en.wikipedia.org/wiki/Jeffreys_prior) would be more applicable. Here I believe the Jeffreys prior is not suitable, because $\sigma$ is not a true scale factor (i.e. the Gaussian has contributions from $e_i$ as well). On this question, I'll have to defer to others who have more expertise. Note that subtle &mdash; some would say subjective &mdash; questions like this are among the features of Bayesian analysis that frequentists take issue with).</small>

### Conclusion

# I hope I've been able to convey through this post how philosophical differences underlying frequentism and Bayesianism lead to fundamentally different approaches to simple problems, which nonetheless can often yield similar or even identical results.
        return tt.sum(
            np.log(A * np.exp(-0.5 * (
                (x - x0) / sigma)**2) / np.sqrt(2 * np.pi) / sigma +
                   (1 - A) / W_true))

    SigBG = pm.DensityDist('sigbg', logp=sigbg_like, observed=x)
    trace = pm.sample(draws=5000, tune=1000)

# ------------------------------------------------------------
# Plot the results
fig = plt.figure(figsize=(5, 5))
ax_list = plot_mcmc([trace[s] for s in ['A', 'x0']] + [
    np.exp(trace['log_sigma']),
],
                    limits=[(0.05, 0.65), (5.75, 6.65), (0.05, 0.85)],
                    labels=[r'$A$', r'$\mu$', r'$\sigma$'],
                    bounds=(0.1, 0.1, 0.95, 0.95),
                    true_values=[A_true, x0_true, sigma_true],
                    fig=fig,
                    colors='k')

ax = plt.axes([0.62, 0.62, 0.33, 0.33])
x_pdf = np.linspace(0, 10, 100)
y_pdf = A_true * signal.pdf(x_pdf) + (1 - A_true) * background.pdf(x_pdf)

ax.hist(x, 15, density=True, histtype='stepfilled', alpha=0.5)
ax.plot(x_pdf, y_pdf, '-k')

ax.set_xlim(0, 10)
ax.set_ylim(0, 0.5)
ax.set_xlabel('$x$')
Exemplo n.º 10
0
          (0.12, 0.45),
          (0.76, 1.3),
          (0.3, 2.5)]

# We assume mu1 < mu2, but the results may be switched
# due to the symmetry of the problem.  If so, switch back
if np.median(trace2['M2_mu1']) < np.median(trace2['M2_mu2']):
    trace2_for_plot = [np.exp(trace2[i]) if 'log_sigma' in i else trace2[i] for i in
                       ['M2_mu1', 'M2_mu2', 'M2_log_sigma1', 'M2_log_sigma2', 'ratio']]
else:
    trace2_for_plot = [np.exp(trace2[i]) if 'log_sigma' in i else trace2[i] for i in
                       ['M2_mu2', 'M2_mu1', 'M2_log_sigma2', 'M2_log_sigma1', 'ratio']]

# Plot the simple 2-component model
ax, = plot_mcmc([trace1['M1_mu'], np.exp(trace1['M1_log_sigma'])],
                fig=fig, bounds=[0.6, 0.6, 0.95, 0.95],
                limits=[(0.3, 0.65), (0.75, 1.05)],
                labels=[r'$\mu$', r'$\sigma$'], colors='k')

ax.text(0.05, 0.95, "Single Gaussian fit", va='top', ha='left',
        transform=ax.transAxes)

# Plot the 5-component model
ax_list = plot_mcmc(trace2_for_plot, limits=limits, labels=labels,
                    true_values=true_values, fig=fig,
                    bounds=(0.12, 0.12, 0.95, 0.95),
                    colors='k')
for ax in ax_list:
    for axis in [ax.xaxis, ax.yaxis]:
        axis.set_major_locator(plt.MaxNLocator(4))

plt.show()
Exemplo n.º 11
0
#------------------------------------------------------------
# perform the MCMC sampling
np.random.seed(0)
S = pymc.MCMC(model)
S.sample(iter=25000, burn=2000)

#------------------------------------------------------------
# Extract the MCMC traces
trace_mu = S.trace('mu')[:]
trace_sigma = S.trace('sigma')[:]

fig = plt.figure(figsize=(5, 3.75))
ax, = plot_mcmc([trace_mu, trace_sigma], fig=fig,
                limits=[(-3.2, 4.2), (0, 5)],
                bounds=(0.08, 0.12, 0.95, 0.95),
                labels=(r'$\mu$', r'$\sigma$'),
                levels=[0.683, 0.955, 0.997],
                colors='k')

#----------------------------------------------------------------------
# Compute and plot likelihood with known ei for comparison
# (Same as fig_likelihood_gaussgauss)
sigma = np.linspace(0.01, 5, 41)
mu = np.linspace(-3.2, 4.2, 41)

logL = gaussgauss_logL(xi, ei, mu, sigma[:, np.newaxis])
logL -= logL.max()

im = ax.contourf(mu, sigma, convert_to_stdev(logL),
                 levels=(0, 0.683, 0.955, 0.997),
                 cmap=plt.cm.binary_r, alpha=0.5)
Exemplo n.º 12
0
    def sigma_m(log_sigma, log_error):
        return np.sqrt(np.exp(log_sigma)**2 + np.exp(log_error)**2)

    x = pm.Normal('x', mu=mu, sd=sigma_m(log_sigma, log_error), observed=xi)

    # perform the MCMC sampling
    trace = pm.sample(draws=4500, tune=1500)

# ------------------------------------------------------------
# Extract the MCMC traces

fig = plt.figure(figsize=(5, 3.75))
ax, = plot_mcmc([trace['mu'], np.exp(trace['log_sigma'])],
                fig=fig,
                limits=[(-3.2, 4.2), (0, 5)],
                bounds=(0.08, 0.12, 0.95, 0.95),
                labels=(r'$\mu$', r'$\sigma$'),
                levels=[0.683, 0.955, 0.997],
                colors='k')

# ----------------------------------------------------------------------
# Compute and plot likelihood with known ei for comparison
# (Same as fig_likelihood_gaussgauss)
sigma = np.linspace(0.01, 5, 41)
mu = np.linspace(-3.2, 4.2, 41)

logL = gaussgauss_logL(xi, ei, mu, sigma[:, np.newaxis])
logL -= logL.max()

im = ax.contourf(mu,
                 sigma,
Exemplo n.º 13
0
          (0.15, 0.45),
          (0.55, 1.3),
          (0.25, 2.1)]

# we assume mu1 < mu2, but the results may be switched
#  due to the symmetry of the problem.  If so, switch back
if np.median(trace2[0]) > np.median(trace2[1]):
    trace2 = trace2[[1, 0, 3, 2, 4], :]
    N2_norm_mu = N2.mu[N2.M2_mu2, N2.M2_mu1,
                       N2.M2_sigma2, N2.M2_sigma1, N2.M2_ratio]
    N2_norm_Sig = N2.C[N2.M2_mu2, N2.M2_mu1,
                       N2.M2_sigma2, N2.M2_sigma1, N2.M2_ratio]

# Plot the simple 2-component model
ax, = plot_mcmc(trace1, fig=fig, bounds=[0.6, 0.6, 0.95, 0.95],
                limits=[(0.3, 0.8), (0.75, 1.15)],
                labels=[r'$\mu$', r'$\sigma$'], colors='k')

ax.text(0.05, 0.95, "Single Gaussian fit", va='top', ha='left',
        transform=ax.transAxes)

# Plot the 5-component model
ax_list = plot_mcmc(trace2, limits=limits, labels=labels,
                    true_values=true_values, fig=fig,
                    bounds=(0.12, 0.12, 0.95, 0.95),
                    colors='k')
for ax in ax_list:
    for axis in [ax.xaxis, ax.yaxis]:
        axis.set_major_locator(plt.MaxNLocator(4))

plt.show()
Exemplo n.º 14
0
# perform the MCMC sampling
pymc.numpy.random.seed(0)
S = pymc.MCMC(model)
S.sample(iter=25000, burn=2000)

# ------------------------------------------------------------
# Extract the MCMC traces
trace_mu = S.trace("mu")[:]
trace_sigma = S.trace("sigma")[:]

fig = plt.figure()
ax, = plot_mcmc(
    [trace_mu, trace_sigma],
    fig=fig,
    limits=[(-3.2, 4.2), (0, 5)],
    labels=(r"$\mu$", r"$\sigma$"),
    levels=[0.683, 0.955, 0.997],
    colors="k",
    linewidths=2,
)

# ----------------------------------------------------------------------
# Compute and plot likelihood with known ei for comparison
# (Same as fig_likelihood_gaussgauss)
sigma = np.linspace(0.01, 5, 41)
mu = np.linspace(-3.2, 4.2, 41)

logL = gaussgauss_logL(xi, ei, mu, sigma[:, np.newaxis])
logL -= logL.max()

im = ax.contourf(mu, sigma, convert_to_stdev(logL), levels=(0, 0.683, 0.955, 0.997), cmap=plt.cm.binary_r, alpha=0.5)