Exemplo n.º 1
0
def run(n_samples=3000):
    model = build_model()
    start = model.test_point
    h = pm.find_hessian(start, model=model)
    step = pm.Metropolis(model.vars, h, blocked=True, model=model)
    trace = pm.sample(n_samples, step, start, model=model)
    return trace
Exemplo n.º 2
0
def run(n_samples=3000):
    model = build_model()
    start = model.test_point
    h = pm.find_hessian(start, model=model)
    step = pm.Metropolis(model.vars, h, blocked=True, model=model)
    trace = pm.sample(n_samples, step=step, start=start, model=model)
    return trace
Exemplo n.º 3
0
def init_nuts(init='advi', n_init=500000, model=None, **kwargs):
    """Initialize and sample from posterior of a continuous model.

    This is a convenience function. NUTS convergence and sampling speed is extremely
    dependent on the choice of mass/scaling matrix. In our experience, using ADVI
    to estimate a diagonal covariance matrix and using this as the scaling matrix
    produces robust results over a wide class of continuous models.

    Parameters
    ----------
    init : str {'advi', 'advi_map', 'map', 'nuts'}
        Initialization method to use.
        * advi : Run ADVI to estimate posterior mean and diagonal covariance matrix.
        * advi_map: Initialize ADVI with MAP and use MAP as starting point.
        * map : Use the MAP as starting point.
        * nuts : Run NUTS and estimate posterior mean and covariance matrix.
    n_init : int
        Number of iterations of initializer
        If 'advi', number of iterations, if 'metropolis', number of draws.
    model : Model (optional if in `with` context)
    **kwargs : keyword arguments
        Extra keyword arguments are forwarded to pymc3.NUTS.

    Returns
    -------
    start, nuts_sampler

    start : pymc3.model.Point
        Starting point for sampler
    nuts_sampler : pymc3.step_methods.NUTS
        Instantiated and initialized NUTS sampler object
    """

    model = pm.modelcontext(model)

    pm._log.info('Initializing NUTS using {}...'.format(init))

    if init == 'advi':
        v_params = pm.variational.advi(n=n_init)
        start = pm.variational.sample_vp(v_params, 1, progressbar=False)[0]
        cov = np.power(model.dict_to_array(v_params.stds), 2)
    elif init == 'advi_map':
        start = pm.find_MAP()
        v_params = pm.variational.advi(n=n_init, start=start)
        cov = np.power(model.dict_to_array(v_params.stds), 2)
    elif init == 'map':
        start = pm.find_MAP()
        cov = pm.find_hessian(point=start)

    elif init == 'nuts':
        init_trace = pm.sample(step=pm.NUTS(), draws=n_init)
        cov = pm.trace_cov(init_trace[n_init//2:])

        start = {varname: np.mean(init_trace[varname]) for varname in init_trace.varnames}
    else:
        raise NotImplemented('Initializer {} is not supported.'.format(init))

    step = pm.NUTS(scaling=cov, is_cov=True, **kwargs)

    return start, step
Exemplo n.º 4
0
def _laplace(model):
    """
    Fit a model using a laplace approximation. Mainly for pedagogical use. ``mcmc`` and ``advi``
    are better approximations

    Parameters
    ----------
    model: PyMC3 model

    Returns
    -------
    Dictionary, the keys are the names of the variables and the values tuples of modes and standard
    deviations.
    """
    with model:
        varis = [v for v in model.unobserved_RVs if not pm.util.is_transformed_name(v.name)]
        maps = pm.find_MAP(start=model.test_point, vars=varis)
        hessian = pm.find_hessian(maps, vars=varis)
        if np.linalg.det(hessian) == 0:
            raise np.linalg.LinAlgError("Singular matrix. Use mcmc or advi method")
        stds = np.diag(np.linalg.inv(hessian) ** 0.5)
        maps = [v for (k, v) in maps.items() if not pm.util.is_transformed_name(k)]
        modes = [v.item() if v.size == 1 else v for v in maps]
        names = [v.name for v in varis]
        shapes = [np.atleast_1d(mode).shape for mode in modes]
        stds_reshaped = []
        idx0 = 0
        for shape in shapes:
            idx1 = idx0 + sum(shape)
            stds_reshaped.append(np.reshape(stds[idx0:idx1], shape))
            idx0 = idx1
    return dict(zip(names, zip(modes, stds_reshaped)))
Exemplo n.º 5
0
def test_plots_multidimensional():

    # Test single trace
    from .models import multidimensional_model

    start, model, _ = multidimensional_model()
    with model as model:
        h = np.diag(find_hessian(start))
        step = Metropolis(model.vars, h)
        trace = sample(3000, step, start)

        traceplot(trace)
Exemplo n.º 6
0
def test_plots_multidimensional():

    # Test single trace
    from .models import multidimensional_model

    start, model, _ = multidimensional_model()
    with model as model:
        h = np.diag(find_hessian(start))
        step = Metropolis(model.vars, h)
        trace = sample(3000, step, start)

        traceplot(trace)
Exemplo n.º 7
0
def test_plots():

    # Test single trace
    from pymc3.examples import arbitrary_stochastic as asmod

    with asmod.model as model:

        start = model.test_point
        h = find_hessian(start)
        step = Metropolis(model.vars, h)
        trace = sample(3000, step, start)

        traceplot(trace)
        forestplot(trace)

        autocorrplot(trace)
Exemplo n.º 8
0
def test_plots():

    # Test single trace
    from pymc3.examples import arbitrary_stochastic as asmod

    with asmod.model as model:

        start = model.test_point
        h = find_hessian(start)
        step = Metropolis(model.vars, h)
        trace = sample(3000, step, start)

        traceplot(trace)
        forestplot(trace)

        autocorrplot(trace)
Exemplo n.º 9
0
def make_normal_approx(vars):
    '''
    Gets the normal approximation for the posterior near its maximum for the specified variables.
    Useful for getting quick summaries without sampling from, e.g. linear models.
    Don't use for complex models!
    Parameters:
        vars: a list of variable names
    Returns:
        a DataFrame with the MAP estimates, standard deviations, and compatible intervals estimated from the Hessian
    '''
    map_est = pm.find_MAP()
    std_est = (1/pm.find_hessian(map_est, vars=vars))**0.5
    data = []
    for var in vars:
        i = vars.index(var)
        cv = sp.stats.norm.ppf(0.97)
        data.append([map_est[var.name].round(3), std_est[i,i].round(3), (map_est[var.name] - std_est[i, i] * cv).round(3), (map_est[var.name] + std_est[i, i] * cv).round(3)])
    summary = pd.DataFrame(data, columns = ['map', 'sd', 'hdi_3%', 'hdi_97%'], index = vars)
    return summary
Exemplo n.º 10
0
def test_leapfrog_reversible():
    n = 3
    start, model, _ = models.non_normal(n)

    with model:
        h = pm.find_hessian(start, model=model)
        step = pm.HamiltonianMC(model.vars, h, model=model)

    bij = DictToArrayBijection(step.ordering, start)

    logp, dlogp = list(map(bij.mapf, step.fs))
    H = Hamiltonian(logp, dlogp, step.potential)

    q0 = bij.map(start)
    p0 = np.ones(n)*.05
    for e in [.01, .1, 1.2]:
        for L in [1, 2, 3, 4, 20]:

            q, p = q0, p0
            q, p = leapfrog(H, q, p, L, e)
            q, p = leapfrog(H, q, -p, L, e)

            close_to(q, q0, 1e-8, str((L, e)))
            close_to(-p, p0, 1e-8, str((L, e)))
Exemplo n.º 11
0
def test_leapfrog_reversible():
    n = 3
    start, model, _ = models.non_normal(n)

    with model:
        h = pm.find_hessian(start, model=model)
        step = pm.HamiltonianMC(model.vars, h, model=model)

    bij = pm.DictToArrayBijection(step.ordering, start)

    logp, dlogp = list(map(bij.mapf, step.fs))
    H = Hamiltonian(logp, dlogp, step.potential)

    q0 = bij.map(start)
    p0 = np.ones(n)*.05
    for e in [.01, .1, 1.2]:
        for L in [1, 2, 3, 4, 20]:

            q, p = q0, p0
            q, p = leapfrog(H, q, p, L, e)
            q, p = leapfrog(H, q, -p, L, e)

            close_to(q, q0, 1e-8, str((L, e)))
            close_to(-p, p0, 1e-8, str((L, e)))
Exemplo n.º 12
0
    # model equation
    sex_idx = data.sex.values
    height_mu = intercept[sex_idx] + beta[sex_idx] * data.weight

    mc.Normal('height', mu=height_mu, sd=error, observed=data.height)

# In[78]:

model.vars

# In[79]:

with model:
    start = mc.find_MAP()
    step = mc.NUTS(state=start)
    hessian = mc.find_hessian(start)
    trace = mc.sample(5000, step, start=start)

# In[80]:

fig, axes = plt.subplots(3, 2, figsize=(8, 6), squeeze=False)
mc.traceplot(trace, vars=['intercept', 'beta', 'error'], ax=axes)
fig.tight_layout()
fig.savefig("ch16-multilevel-sample-trace.pdf")
fig.savefig("ch16-multilevel-sample-trace.png")

# In[81]:

intercept_m, intercept_f = trace.get_values('intercept').mean(axis=0)

# In[82]:
Exemplo n.º 13
0
import pymc3 as pm

with pm.Model() as model:
    x = pm.Normal('x', 1, 1)
    x2 = pm.Potential('x2', -x**2)

    start = model.test_point
    h = pm.find_hessian(start)
    step = pm.Metropolis(model.vars, h)


def run(n=3000):
    if n == "short":
        n = 50
    with model:
        trace = pm.sample(n, step=step, start=start)


if __name__ == '__main__':
    run()
Exemplo n.º 14
0
def init_nuts(init='auto', njobs=1, n_init=500000, model=None,
              random_seed=-1, progressbar=True, **kwargs):
    """Set up the mass matrix initialization for NUTS.

    NUTS convergence and sampling speed is extremely dependent on the
    choice of mass/scaling matrix. This function implements different
    methods for choosing or adapting the mass matrix.

    Parameters
    ----------
    init : str
        Initialization method to use.

        * auto : Choose a default initialization method automatically.
          Currently, this is `'jitter+adapt_diag'`, but this can change in
          the future. If you depend on the exact behaviour, choose an
          initialization method explicitly.
        * adapt_diag : Start with a identity mass matrix and then adapt
          a diagonal based on the variance of the tuning samples. All
          chains use the test value (usually the prior mean) as starting
          point.
        * jitter+adapt_diag : Same as `adapt_diag`, but add uniform jitter
          in [-1, 1] to the starting point in each chain.
        * advi+adapt_diag : Run ADVI and then adapt the resulting diagonal
          mass matrix based on the sample variance of the tuning samples.
        * advi+adapt_diag_grad : Run ADVI and then adapt the resulting
          diagonal mass matrix based on the variance of the gradients
          during tuning. This is **experimental** and might be removed
          in a future release.
        * advi : Run ADVI to estimate posterior mean and diagonal mass
          matrix.
        * advi_map: Initialize ADVI with MAP and use MAP as starting point.
        * map : Use the MAP as starting point. This is discouraged.
        * nuts : Run NUTS and estimate posterior mean and mass matrix from
          the trace.
    njobs : int
        Number of parallel jobs to start.
    n_init : int
        Number of iterations of initializer
        If 'ADVI', number of iterations, if 'nuts', number of draws.
    model : Model (optional if in `with` context)
    progressbar : bool
        Whether or not to display a progressbar for advi sampling.
    **kwargs : keyword arguments
        Extra keyword arguments are forwarded to pymc3.NUTS.

    Returns
    -------
    start : pymc3.model.Point
        Starting point for sampler
    nuts_sampler : pymc3.step_methods.NUTS
        Instantiated and initialized NUTS sampler object
    """
    model = pm.modelcontext(model)

    vars = kwargs.get('vars', model.vars)
    if set(vars) != set(model.vars):
        raise ValueError('Must use init_nuts on all variables of a model.')
    if not pm.model.all_continuous(vars):
        raise ValueError('init_nuts can only be used for models with only '
                         'continuous variables.')

    if not isinstance(init, str):
        raise TypeError('init must be a string.')

    if init is not None:
        init = init.lower()

    if init == 'auto':
        init = 'jitter+adapt_diag'

    pm._log.info('Initializing NUTS using {}...'.format(init))

    random_seed = int(np.atleast_1d(random_seed)[0])

    cb = [
        pm.callbacks.CheckParametersConvergence(
            tolerance=1e-2, diff='absolute'),
        pm.callbacks.CheckParametersConvergence(
            tolerance=1e-2, diff='relative'),
    ]

    if init == 'adapt_diag':
        start = [model.test_point] * njobs
        mean = np.mean([model.dict_to_array(vals) for vals in start], axis=0)
        var = np.ones_like(mean)
        potential = quadpotential.QuadPotentialDiagAdapt(
            model.ndim, mean, var, 10)
        if njobs == 1:
            start = start[0]
    elif init == 'jitter+adapt_diag':
        start = []
        for _ in range(njobs):
            mean = {var: val.copy() for var, val in model.test_point.items()}
            for val in mean.values():
                val[...] += 2 * np.random.rand(*val.shape) - 1
            start.append(mean)
        mean = np.mean([model.dict_to_array(vals) for vals in start], axis=0)
        var = np.ones_like(mean)
        potential = quadpotential.QuadPotentialDiagAdapt(
            model.ndim, mean, var, 10)
        if njobs == 1:
            start = start[0]
    elif init == 'advi+adapt_diag_grad':
        approx = pm.fit(
            random_seed=random_seed,
            n=n_init, method='advi', model=model,
            callbacks=cb,
            progressbar=progressbar,
            obj_optimizer=pm.adagrad_window,
        )  # type: pm.MeanField
        start = approx.sample(draws=njobs)
        start = list(start)
        stds = approx.bij.rmap(approx.std.eval())
        cov = model.dict_to_array(stds) ** 2
        mean = approx.bij.rmap(approx.mean.get_value())
        mean = model.dict_to_array(mean)
        weight = 50
        potential = quadpotential.QuadPotentialDiagAdaptGrad(
            model.ndim, mean, cov, weight)
        if njobs == 1:
            start = start[0]
    elif init == 'advi+adapt_diag':
        approx = pm.fit(
            random_seed=random_seed,
            n=n_init, method='advi', model=model,
            callbacks=cb,
            progressbar=progressbar,
            obj_optimizer=pm.adagrad_window,
        )  # type: pm.MeanField
        start = approx.sample(draws=njobs)
        start = list(start)
        stds = approx.bij.rmap(approx.std.eval())
        cov = model.dict_to_array(stds) ** 2
        mean = approx.bij.rmap(approx.mean.get_value())
        mean = model.dict_to_array(mean)
        weight = 50
        potential = quadpotential.QuadPotentialDiagAdapt(
            model.ndim, mean, cov, weight)
        if njobs == 1:
            start = start[0]
    elif init == 'advi':
        approx = pm.fit(
            random_seed=random_seed,
            n=n_init, method='advi', model=model,
            callbacks=cb,
            progressbar=progressbar,
            obj_optimizer=pm.adagrad_window
        )  # type: pm.MeanField
        start = approx.sample(draws=njobs)
        start = list(start)
        stds = approx.bij.rmap(approx.std.eval())
        cov = model.dict_to_array(stds) ** 2
        potential = quadpotential.QuadPotentialDiag(cov)
        if njobs == 1:
            start = start[0]
    elif init == 'advi_map':
        start = pm.find_MAP()
        approx = pm.MeanField(model=model, start=start)
        pm.fit(
            random_seed=random_seed,
            n=n_init, method=pm.KLqp(approx),
            callbacks=cb,
            progressbar=progressbar,
            obj_optimizer=pm.adagrad_window
        )
        start = approx.sample(draws=njobs)
        start = list(start)
        stds = approx.bij.rmap(approx.std.eval())
        cov = model.dict_to_array(stds) ** 2
        potential = quadpotential.QuadPotentialDiag(cov)
        if njobs == 1:
            start = start[0]
    elif init == 'map':
        start = pm.find_MAP()
        cov = pm.find_hessian(point=start)
        start = [start] * njobs
        potential = quadpotential.QuadPotentialFull(cov)
        if njobs == 1:
            start = start[0]
    elif init == 'nuts':
        init_trace = pm.sample(draws=n_init, step=pm.NUTS(),
                               tune=n_init // 2,
                               random_seed=random_seed)
        cov = np.atleast_1d(pm.trace_cov(init_trace))
        start = list(np.random.choice(init_trace, njobs))
        potential = quadpotential.QuadPotentialFull(cov)
        if njobs == 1:
            start = start[0]
    else:
        raise NotImplementedError('Initializer {} is not supported.'.format(init))

    step = pm.NUTS(potential=potential, **kwargs)

    return start, step
Exemplo n.º 15
0
import pymc3 as pm

with pm.Model() as model:
    x = pm.Normal('x', 1, 1)
    x2 = pm.Potential('x2', -x ** 2)

    start = model.test_point
    h = pm.find_hessian(start)
    step = pm.Metropolis(model.vars, h)


def run(n=3000):
    if n == "short":
        n = 50
    with model:
        pm.sample(n, step=step, start=start)

if __name__ == '__main__':
    run()
Exemplo n.º 16
0
def init_nuts(init='ADVI',
              njobs=1,
              n_init=500000,
              model=None,
              random_seed=-1,
              progressbar=True,
              **kwargs):
    """Initialize and sample from posterior of a continuous model.

    This is a convenience function. NUTS convergence and sampling speed is extremely
    dependent on the choice of mass/scaling matrix. In our experience, using ADVI
    to estimate a diagonal covariance matrix and using this as the scaling matrix
    produces robust results over a wide class of continuous models.

    Parameters
    ----------
    init : str {'ADVI', 'ADVI_MAP', 'MAP', 'NUTS'}
        Initialization method to use.
        * ADVI : Run ADVI to estimate posterior mean and diagonal covariance matrix.
        * ADVI_MAP: Initialize ADVI with MAP and use MAP as starting point.
        * MAP : Use the MAP as starting point.
        * NUTS : Run NUTS and estimate posterior mean and covariance matrix.
    njobs : int
        Number of parallel jobs to start.
    n_init : int
        Number of iterations of initializer
        If 'ADVI', number of iterations, if 'metropolis', number of draws.
    model : Model (optional if in `with` context)
    progressbar : bool
        Whether or not to display a progressbar for advi sampling.
    **kwargs : keyword arguments
        Extra keyword arguments are forwarded to pymc3.NUTS.

    Returns
    -------
    start : pymc3.model.Point
        Starting point for sampler
    nuts_sampler : pymc3.step_methods.NUTS
        Instantiated and initialized NUTS sampler object
    """

    model = pm.modelcontext(model)

    pm._log.info('Initializing NUTS using {}...'.format(init))

    random_seed = int(np.atleast_1d(random_seed)[0])

    if init is not None:
        init = init.lower()
    cb = [
        pm.callbacks.CheckParametersConvergence(tolerance=1e-2,
                                                diff='absolute'),
        pm.callbacks.CheckParametersConvergence(tolerance=1e-2,
                                                diff='relative'),
    ]
    if init == 'advi':
        approx = pm.fit(random_seed=random_seed,
                        n=n_init,
                        method='advi',
                        model=model,
                        callbacks=cb,
                        progressbar=progressbar,
                        obj_optimizer=pm.adagrad_window)  # type: pm.MeanField
        start = approx.sample(draws=njobs)
        stds = approx.gbij.rmap(approx.std.eval())
        cov = model.dict_to_array(stds)**2
        if njobs == 1:
            start = start[0]
    elif init == 'advi_map':
        start = pm.find_MAP()
        approx = pm.MeanField(model=model, start=start)
        pm.fit(random_seed=random_seed,
               n=n_init,
               method=pm.ADVI.from_mean_field(approx),
               callbacks=cb,
               progressbar=progressbar,
               obj_optimizer=pm.adagrad_window)
        start = approx.sample(draws=njobs)
        stds = approx.gbij.rmap(approx.std.eval())
        cov = model.dict_to_array(stds)**2
        if njobs == 1:
            start = start[0]
    elif init == 'map':
        start = pm.find_MAP()
        cov = pm.find_hessian(point=start)
    elif init == 'nuts':
        init_trace = pm.sample(draws=n_init,
                               step=pm.NUTS(),
                               tune=n_init // 2,
                               random_seed=random_seed)
        cov = np.atleast_1d(pm.trace_cov(init_trace))
        start = np.random.choice(init_trace, njobs)
        if njobs == 1:
            start = start[0]
    else:
        raise NotImplementedError(
            'Initializer {} is not supported.'.format(init))

    step = pm.NUTS(scaling=cov, is_cov=True, **kwargs)

    return start, step
Exemplo n.º 17
0
def init_nuts(init='ADVI',
              njobs=1,
              n_init=500000,
              model=None,
              random_seed=-1,
              **kwargs):
    """Initialize and sample from posterior of a continuous model.

    This is a convenience function. NUTS convergence and sampling speed is extremely
    dependent on the choice of mass/scaling matrix. In our experience, using ADVI
    to estimate a diagonal covariance matrix and using this as the scaling matrix
    produces robust results over a wide class of continuous models.

    Parameters
    ----------
    init : str {'ADVI', 'ADVI_MAP', 'MAP', 'NUTS'}
        Initialization method to use.
        * ADVI : Run ADVI to estimate posterior mean and diagonal covariance matrix.
        * ADVI_MAP: Initialize ADVI with MAP and use MAP as starting point.
        * MAP : Use the MAP as starting point.
        * NUTS : Run NUTS and estimate posterior mean and covariance matrix.
    njobs : int
        Number of parallel jobs to start.
    n_init : int
        Number of iterations of initializer
        If 'ADVI', number of iterations, if 'metropolis', number of draws.
    model : Model (optional if in `with` context)
    **kwargs : keyword arguments
        Extra keyword arguments are forwarded to pymc3.NUTS.

    Returns
    -------
    start, nuts_sampler

    start : pymc3.model.Point
        Starting point for sampler
    nuts_sampler : pymc3.step_methods.NUTS
        Instantiated and initialized NUTS sampler object
    """

    model = pm.modelcontext(model)

    pm._log.info('Initializing NUTS using {}...'.format(init))

    random_seed = int(np.atleast_1d(random_seed)[0])

    if init is not None:
        init = init.lower()

    if init == 'advi':
        v_params = pm.variational.advi(n=n_init, random_seed=random_seed)
        start = pm.variational.sample_vp(v_params,
                                         njobs,
                                         progressbar=False,
                                         hide_transformed=False,
                                         random_seed=random_seed)
        if njobs == 1:
            start = start[0]
        cov = np.power(model.dict_to_array(v_params.stds), 2)
    elif init == 'advi_map':
        start = pm.find_MAP()
        v_params = pm.variational.advi(n=n_init,
                                       start=start,
                                       random_seed=random_seed)
        cov = np.power(model.dict_to_array(v_params.stds), 2)
    elif init == 'map':
        start = pm.find_MAP()
        cov = pm.find_hessian(point=start)
    elif init == 'nuts':
        init_trace = pm.sample(step=pm.NUTS(),
                               draws=n_init,
                               random_seed=random_seed)[n_init // 2:]
        cov = np.atleast_1d(pm.trace_cov(init_trace))
        start = np.random.choice(init_trace, njobs)
        if njobs == 1:
            start = start[0]
    else:
        raise NotImplementedError(
            'Initializer {} is not supported.'.format(init))

    step = pm.NUTS(scaling=cov, is_cov=True, **kwargs)

    return start, step
sf = dx_grid / dx_exact  # Jacobian scale factor
plt.figure()
#plt.stem(grid, posterior, use_line_collection=True)
plt.bar(grid, posterior, width=1 / n, alpha=0.2)
plt.plot(xs, post_exact * sf)
plt.title('grid approximation')
plt.yticks([])
plt.xlabel('θ')
plt.savefig('../figures/bb_grid.pdf')

# Laplace
with pm.Model() as normal_aproximation:
    theta = pm.Beta('theta', 1., 1.)
    y = pm.Binomial('y', n=1, p=theta, observed=data)  # Bernoulli
    mean_q = pm.find_MAP()
    std_q = ((1 / pm.find_hessian(mean_q, vars=[theta]))**0.5)[0]
    mu = mean_q['theta']

print([mu, std_q])

plt.figure()
plt.plot(xs, stats.norm.pdf(xs, mu, std_q), '--', label='Laplace')
post_exact = stats.beta.pdf(xs, h + 1, t + 1)
plt.plot(xs, post_exact, label='exact')
plt.title('Quadratic approximation')
plt.xlabel('θ', fontsize=14)
plt.yticks([])
plt.legend()
plt.savefig('../figures/bb_laplace.pdf')

# HMC
Exemplo n.º 19
0
def init_nuts(init='auto',
              chains=1,
              n_init=500000,
              model=None,
              random_seed=None,
              progressbar=True,
              **kwargs):
    """Set up the mass matrix initialization for NUTS.

    NUTS convergence and sampling speed is extremely dependent on the
    choice of mass/scaling matrix. This function implements different
    methods for choosing or adapting the mass matrix.

    Parameters
    ----------
    init : str
        Initialization method to use.

        * auto : Choose a default initialization method automatically.
          Currently, this is `'jitter+adapt_diag'`, but this can change in
          the future. If you depend on the exact behaviour, choose an
          initialization method explicitly.
        * adapt_diag : Start with a identity mass matrix and then adapt
          a diagonal based on the variance of the tuning samples. All
          chains use the test value (usually the prior mean) as starting
          point.
        * jitter+adapt_diag : Same as `adapt_diag`, but add uniform jitter
          in [-1, 1] to the starting point in each chain.
        * advi+adapt_diag : Run ADVI and then adapt the resulting diagonal
          mass matrix based on the sample variance of the tuning samples.
        * advi+adapt_diag_grad : Run ADVI and then adapt the resulting
          diagonal mass matrix based on the variance of the gradients
          during tuning. This is **experimental** and might be removed
          in a future release.
        * advi : Run ADVI to estimate posterior mean and diagonal mass
          matrix.
        * advi_map: Initialize ADVI with MAP and use MAP as starting point.
        * map : Use the MAP as starting point. This is discouraged.
        * nuts : Run NUTS and estimate posterior mean and mass matrix from
          the trace.
    chains : int
        Number of jobs to start.
    n_init : int
        Number of iterations of initializer
        If 'ADVI', number of iterations, if 'nuts', number of draws.
    model : Model (optional if in `with` context)
    progressbar : bool
        Whether or not to display a progressbar for advi sampling.
    **kwargs : keyword arguments
        Extra keyword arguments are forwarded to pymc3.NUTS.

    Returns
    -------
    start : pymc3.model.Point
        Starting point for sampler
    nuts_sampler : pymc3.step_methods.NUTS
        Instantiated and initialized NUTS sampler object
    """
    model = pm.modelcontext(model)

    vars = kwargs.get('vars', model.vars)
    if set(vars) != set(model.vars):
        raise ValueError('Must use init_nuts on all variables of a model.')
    if not pm.model.all_continuous(vars):
        raise ValueError('init_nuts can only be used for models with only '
                         'continuous variables.')

    if not isinstance(init, str):
        raise TypeError('init must be a string.')

    if init is not None:
        init = init.lower()

    if init == 'auto':
        init = 'jitter+adapt_diag'

    pm._log.info('Initializing NUTS using {}...'.format(init))

    if random_seed is not None:
        random_seed = int(np.atleast_1d(random_seed)[0])
        np.random.seed(random_seed)

    cb = [
        pm.callbacks.CheckParametersConvergence(tolerance=1e-2,
                                                diff='absolute'),
        pm.callbacks.CheckParametersConvergence(tolerance=1e-2,
                                                diff='relative'),
    ]

    if init == 'adapt_diag':
        start = [model.test_point] * chains
        mean = np.mean([model.dict_to_array(vals) for vals in start], axis=0)
        var = np.ones_like(mean)
        potential = quadpotential.QuadPotentialDiagAdapt(
            model.ndim, mean, var, 10)
    elif init == 'jitter+adapt_diag':
        start = []
        for _ in range(chains):
            mean = {var: val.copy() for var, val in model.test_point.items()}
            for val in mean.values():
                val[...] += 2 * np.random.rand(*val.shape) - 1
            start.append(mean)
        mean = np.mean([model.dict_to_array(vals) for vals in start], axis=0)
        var = np.ones_like(mean)
        potential = quadpotential.QuadPotentialDiagAdapt(
            model.ndim, mean, var, 10)
    elif init == 'advi+adapt_diag_grad':
        approx = pm.fit(
            random_seed=random_seed,
            n=n_init,
            method='advi',
            model=model,
            callbacks=cb,
            progressbar=progressbar,
            obj_optimizer=pm.adagrad_window,
        )  # type: pm.MeanField
        start = approx.sample(draws=chains)
        start = list(start)
        stds = approx.bij.rmap(approx.std.eval())
        cov = model.dict_to_array(stds)**2
        mean = approx.bij.rmap(approx.mean.get_value())
        mean = model.dict_to_array(mean)
        weight = 50
        potential = quadpotential.QuadPotentialDiagAdaptGrad(
            model.ndim, mean, cov, weight)
    elif init == 'advi+adapt_diag':
        approx = pm.fit(
            random_seed=random_seed,
            n=n_init,
            method='advi',
            model=model,
            callbacks=cb,
            progressbar=progressbar,
            obj_optimizer=pm.adagrad_window,
        )  # type: pm.MeanField
        start = approx.sample(draws=chains)
        start = list(start)
        stds = approx.bij.rmap(approx.std.eval())
        cov = model.dict_to_array(stds)**2
        mean = approx.bij.rmap(approx.mean.get_value())
        mean = model.dict_to_array(mean)
        weight = 50
        potential = quadpotential.QuadPotentialDiagAdapt(
            model.ndim, mean, cov, weight)
    elif init == 'advi':
        approx = pm.fit(random_seed=random_seed,
                        n=n_init,
                        method='advi',
                        model=model,
                        callbacks=cb,
                        progressbar=progressbar,
                        obj_optimizer=pm.adagrad_window)  # type: pm.MeanField
        start = approx.sample(draws=chains)
        start = list(start)
        stds = approx.bij.rmap(approx.std.eval())
        cov = model.dict_to_array(stds)**2
        potential = quadpotential.QuadPotentialDiag(cov)
    elif init == 'advi_map':
        start = pm.find_MAP(include_transformed=True)
        approx = pm.MeanField(model=model, start=start)
        pm.fit(random_seed=random_seed,
               n=n_init,
               method=pm.KLqp(approx),
               callbacks=cb,
               progressbar=progressbar,
               obj_optimizer=pm.adagrad_window)
        start = approx.sample(draws=chains)
        start = list(start)
        stds = approx.bij.rmap(approx.std.eval())
        cov = model.dict_to_array(stds)**2
        potential = quadpotential.QuadPotentialDiag(cov)
    elif init == 'map':
        start = pm.find_MAP(include_transformed=True)
        cov = pm.find_hessian(point=start)
        start = [start] * chains
        potential = quadpotential.QuadPotentialFull(cov)
    elif init == 'nuts':
        init_trace = pm.sample(draws=n_init,
                               step=pm.NUTS(),
                               tune=n_init // 2,
                               random_seed=random_seed)
        cov = np.atleast_1d(pm.trace_cov(init_trace))
        start = list(np.random.choice(init_trace, chains))
        potential = quadpotential.QuadPotentialFull(cov)
    else:
        raise NotImplementedError(
            'Initializer {} is not supported.'.format(init))

    step = pm.NUTS(potential=potential, **kwargs)

    return start, step
Exemplo n.º 20
0
def init_nuts(init='advi', n_init=500000, model=None):
    """Initialize and sample from posterior of a continuous model.

    This is a convenience function. NUTS convergence and sampling speed is extremely
    dependent on the choice of mass/scaling matrix. In our experience, using ADVI
    to estimate a diagonal covariance matrix and using this as the scaling matrix
    produces robust results over a wide class of continuous models.

    Parameters
    ----------
    init : str {'advi', 'advi_map', 'map', 'nuts'}
        Initialization method to use.
        * advi : Run ADVI to estimate posterior mean and diagonal covariance matrix.
        * advi_map: Initialize ADVI with MAP and use MAP as starting point.
        * map : Use the MAP as starting point.
        * nuts : Run NUTS and estimate posterior mean and covariance matrix.
    n_init : int
        Number of iterations of initializer
        If 'advi', number of iterations, if 'metropolis', number of draws.
    model : Model (optional if in `with` context)

    Returns
    -------
    start, nuts_sampler

    start : pymc3.model.Point
        Starting point for sampler
    nuts_sampler : pymc3.step_methods.NUTS
        Instantiated and initialized NUTS sampler object
    """

    model = pm.modelcontext(model)

    pm._log.info('Initializing NUTS using {}...'.format(init))

    if init == 'advi':
        v_params = pm.variational.advi(n=n_init)
        start = pm.variational.sample_vp(v_params, 1, progressbar=False)[0]
        cov = np.power(model.dict_to_array(v_params.stds), 2)
    elif init == 'advi_map':
        start = pm.find_MAP()
        v_params = pm.variational.advi(n=n_init, start=start)
        cov = np.power(model.dict_to_array(v_params.stds), 2)
    elif init == 'map':
        start = pm.find_MAP()
        cov = pm.find_hessian(point=start)

    elif init == 'nuts':
        init_trace = pm.sample(step=pm.NUTS(), draws=n_init)
        cov = pm.trace_cov(init_trace[n_init // 2:])

        start = {
            varname: np.mean(init_trace[varname])
            for varname in init_trace.varnames
        }
    else:
        raise NotImplemented('Initializer {} is not supported.'.format(init))

    step = pm.NUTS(scaling=cov, is_cov=True)

    return start, step
Exemplo n.º 21
0
_m.x = np.linspace(-0.1, 1.1, 100)
_m.f_x = 2 * _m.x**2 * (1 - _m.x)**2
plt.plot(_m.x, _m.f_x)

# As the analytical solution shows we have three points where the derivative is zero: $x=0$, $x=1$, and $x=\frac{1}{2}$. The value $x=\frac{1}{2}$ maximizes the function, and is the answer.

# # 2.6

# +
_26 = Object()
_26.data = np.repeat((0, 1), (3, 6))
with pm.Model() as _26.na:
    _26.p = pm.Uniform('p', 0, 1)
    _26.w = pm.Binomial('w', n=len(_26.data), p=_26.p, observed=_26.data.sum())
    _26.mean_p = pm.find_MAP()
    _26.std_q = ((1 / pm.find_hessian(_26.mean_p, vars=[_26.p]))**0.5)[0]

_26.mean_p['p'], _26.std_q
# -

# Assuming the posterior is Gaussian, it's maximized at $0.67$ and its standard deviation is $0.16$.

# 89% confidence interval:

_26.norm_dist = stats.norm(_26.mean_p['p'], _26.std_q)
_26.z = stats.norm.ppf([(1 - .89) / 2, 1 - (1 - 0.89) / 2])
print("89% confidence interval:", _26.mean_p['p'] + _26.std_q * _26.z)

# # Medium
# ## 2M1