示例#1
0
    def get_step_for_trace(
        self,
        trace=None,
        model=None,
        regular_window=0,
        regular_variance=1e-3,
        **kwargs,
    ):
        """Get a PyMC3 NUTS step tuned for a given burn-in trace

        Args:
            trace: The ``MultiTrace`` output from a previous run of
                ``pymc3.sample``.
            regular_window: The weight (in units of number of steps) to use
                when regularizing the mass matrix estimate.
            regular_variance: The amplitude of the regularization for the mass
                matrix. This will be added to the diagonal of the covariance
                matrix with weight given by ``regular_window``.

        """
        model = pm.modelcontext(model)

        # If not given, use the trivial metric
        if trace is None or model.ndim == 1:
            potential = quad.QuadPotentialDiag(np.ones(model.ndim))

        else:
            # Loop over samples and convert to the relevant parameter space;
            # I'm sure that there's an easier way to do this, but I don't know
            # how to make something work in general...
            N = len(trace) * trace.nchains
            samples = np.empty((N, model.ndim))
            i = 0
            for chain in trace._straces.values():
                for p in chain:
                    samples[i] = model.bijection.map(p)
                    i += 1

            if self.dense:
                # Compute the regularized sample covariance
                cov = np.cov(samples, rowvar=0)
                if regular_window > 0:
                    cov = cov * N / (N + regular_window)
                    cov[np.diag_indices_from(cov)] += (regular_variance *
                                                       regular_window /
                                                       (N + regular_window))
                potential = quad.QuadPotentialFull(cov)
            else:
                var = np.var(samples, axis=0)
                if regular_window > 0:
                    var = var * N / (N + regular_window)
                    var += (regular_variance * regular_window /
                            (N + regular_window))
                potential = quad.QuadPotentialDiag(var)

        return pm.NUTS(potential=potential, **kwargs)
示例#2
0
def test_random_dense():
    np.random.seed(42)
    for _ in range(3):
        cov = np.random.rand(5, 5)
        cov += cov.T
        cov += 10 * np.eye(5)
        inv = np.linalg.inv(cov)
        assert np.allclose(inv.dot(cov), np.eye(5))

        pots = [
            quadpotential.QuadPotentialFull(cov),
            quadpotential.QuadPotentialFullInv(inv),
        ]
        if quadpotential.chol_available:
            pot = quadpotential.QuadPotential_Sparse(scipy.sparse.csc_matrix(cov))
            pots.append(pot)
        for pot in pots:
            cov_ = np.cov(np.array([pot.random() for _ in range(1000)]).T)
            assert np.allclose(cov_, inv, atol=0.1)
    times_consumed = []

    N = 100
    alphas = norm.rvs(size=N)
    betas = halfnorm.rvs(size=(N, 2))
    sigmas = halfnorm.rvs(size=N)

    print("=========================================")
    print(f"Tuning NUTS")
    print("=========================================")

    n_chains = 4
    init_trace = pm.sample(draws=1000, tune=1000, cores=n_chains)
    cov = np.atleast_1d(pm.trace_cov(init_trace))
    start = list(np.random.choice(init_trace, n_chains))
    potential = quadpotential.QuadPotentialFull(cov)
    step_size = init_trace.get_sampler_stats("step_size_bar")[-1]
    size = m.bijection.ordering.size
    step_scale = step_size * (size**0.25)

    # with pm.Model() as model_new:  # reset model. If you use theano.shared you can also update the value of model1 above
    for i in range(N):
        # No-U-Turn Sampler NUTS
        print("=========================================")
        print(f"Turn {i}")
        print("=========================================")
        # start = {"alpha": alphas[i], "beta": betas[i], "sigma": sigmas[i]}
        time_zero = default_timer()
        step = pm.NUTS(potential=potential,
                       adapt_step_size=False,
                       step_scale=step_scale)
示例#4
0
def init_nuts(init='auto',
              chains=1,
              n_init=500000,
              model=None,
              random_seed=None,
              progressbar=True,
              **kwargs):
    """Set up the mass matrix initialization for NUTS.

    NUTS convergence and sampling speed is extremely dependent on the
    choice of mass/scaling matrix. This function implements different
    methods for choosing or adapting the mass matrix.

    Parameters
    ----------
    init : str
        Initialization method to use.

        * auto : Choose a default initialization method automatically.
          Currently, this is `'jitter+adapt_diag'`, but this can change in
          the future. If you depend on the exact behaviour, choose an
          initialization method explicitly.
        * adapt_diag : Start with a identity mass matrix and then adapt
          a diagonal based on the variance of the tuning samples. All
          chains use the test value (usually the prior mean) as starting
          point.
        * jitter+adapt_diag : Same as `adapt_diag`, but add uniform jitter
          in [-1, 1] to the starting point in each chain.
        * advi+adapt_diag : Run ADVI and then adapt the resulting diagonal
          mass matrix based on the sample variance of the tuning samples.
        * advi+adapt_diag_grad : Run ADVI and then adapt the resulting
          diagonal mass matrix based on the variance of the gradients
          during tuning. This is **experimental** and might be removed
          in a future release.
        * advi : Run ADVI to estimate posterior mean and diagonal mass
          matrix.
        * advi_map: Initialize ADVI with MAP and use MAP as starting point.
        * map : Use the MAP as starting point. This is discouraged.
        * nuts : Run NUTS and estimate posterior mean and mass matrix from
          the trace.
    chains : int
        Number of jobs to start.
    n_init : int
        Number of iterations of initializer
        If 'ADVI', number of iterations, if 'nuts', number of draws.
    model : Model (optional if in `with` context)
    progressbar : bool
        Whether or not to display a progressbar for advi sampling.
    **kwargs : keyword arguments
        Extra keyword arguments are forwarded to pymc3.NUTS.

    Returns
    -------
    start : pymc3.model.Point
        Starting point for sampler
    nuts_sampler : pymc3.step_methods.NUTS
        Instantiated and initialized NUTS sampler object
    """
    model = pm.modelcontext(model)

    vars = kwargs.get('vars', model.vars)
    if set(vars) != set(model.vars):
        raise ValueError('Must use init_nuts on all variables of a model.')
    if not pm.model.all_continuous(vars):
        raise ValueError('init_nuts can only be used for models with only '
                         'continuous variables.')

    if not isinstance(init, str):
        raise TypeError('init must be a string.')

    if init is not None:
        init = init.lower()

    if init == 'auto':
        init = 'jitter+adapt_diag'

    pm._log.info('Initializing NUTS using {}...'.format(init))

    if random_seed is not None:
        random_seed = int(np.atleast_1d(random_seed)[0])
        np.random.seed(random_seed)

    cb = [
        pm.callbacks.CheckParametersConvergence(tolerance=1e-2,
                                                diff='absolute'),
        pm.callbacks.CheckParametersConvergence(tolerance=1e-2,
                                                diff='relative'),
    ]

    if init == 'adapt_diag':
        start = [model.test_point] * chains
        mean = np.mean([model.dict_to_array(vals) for vals in start], axis=0)
        var = np.ones_like(mean)
        potential = quadpotential.QuadPotentialDiagAdapt(
            model.ndim, mean, var, 10)
    elif init == 'jitter+adapt_diag':
        start = []
        for _ in range(chains):
            mean = {var: val.copy() for var, val in model.test_point.items()}
            for val in mean.values():
                val[...] += 2 * np.random.rand(*val.shape) - 1
            start.append(mean)
        mean = np.mean([model.dict_to_array(vals) for vals in start], axis=0)
        var = np.ones_like(mean)
        potential = quadpotential.QuadPotentialDiagAdapt(
            model.ndim, mean, var, 10)
    elif init == 'advi+adapt_diag_grad':
        approx = pm.fit(
            random_seed=random_seed,
            n=n_init,
            method='advi',
            model=model,
            callbacks=cb,
            progressbar=progressbar,
            obj_optimizer=pm.adagrad_window,
        )  # type: pm.MeanField
        start = approx.sample(draws=chains)
        start = list(start)
        stds = approx.bij.rmap(approx.std.eval())
        cov = model.dict_to_array(stds)**2
        mean = approx.bij.rmap(approx.mean.get_value())
        mean = model.dict_to_array(mean)
        weight = 50
        potential = quadpotential.QuadPotentialDiagAdaptGrad(
            model.ndim, mean, cov, weight)
    elif init == 'advi+adapt_diag':
        approx = pm.fit(
            random_seed=random_seed,
            n=n_init,
            method='advi',
            model=model,
            callbacks=cb,
            progressbar=progressbar,
            obj_optimizer=pm.adagrad_window,
        )  # type: pm.MeanField
        start = approx.sample(draws=chains)
        start = list(start)
        stds = approx.bij.rmap(approx.std.eval())
        cov = model.dict_to_array(stds)**2
        mean = approx.bij.rmap(approx.mean.get_value())
        mean = model.dict_to_array(mean)
        weight = 50
        potential = quadpotential.QuadPotentialDiagAdapt(
            model.ndim, mean, cov, weight)
    elif init == 'advi':
        approx = pm.fit(random_seed=random_seed,
                        n=n_init,
                        method='advi',
                        model=model,
                        callbacks=cb,
                        progressbar=progressbar,
                        obj_optimizer=pm.adagrad_window)  # type: pm.MeanField
        start = approx.sample(draws=chains)
        start = list(start)
        stds = approx.bij.rmap(approx.std.eval())
        cov = model.dict_to_array(stds)**2
        potential = quadpotential.QuadPotentialDiag(cov)
    elif init == 'advi_map':
        start = pm.find_MAP(include_transformed=True)
        approx = pm.MeanField(model=model, start=start)
        pm.fit(random_seed=random_seed,
               n=n_init,
               method=pm.KLqp(approx),
               callbacks=cb,
               progressbar=progressbar,
               obj_optimizer=pm.adagrad_window)
        start = approx.sample(draws=chains)
        start = list(start)
        stds = approx.bij.rmap(approx.std.eval())
        cov = model.dict_to_array(stds)**2
        potential = quadpotential.QuadPotentialDiag(cov)
    elif init == 'map':
        start = pm.find_MAP(include_transformed=True)
        cov = pm.find_hessian(point=start)
        start = [start] * chains
        potential = quadpotential.QuadPotentialFull(cov)
    elif init == 'nuts':
        init_trace = pm.sample(draws=n_init,
                               step=pm.NUTS(),
                               tune=n_init // 2,
                               random_seed=random_seed)
        cov = np.atleast_1d(pm.trace_cov(init_trace))
        start = list(np.random.choice(init_trace, chains))
        potential = quadpotential.QuadPotentialFull(cov)
    else:
        raise NotImplementedError(
            'Initializer {} is not supported.'.format(init))

    step = pm.NUTS(potential=potential, **kwargs)

    return start, step