Exemplo n.º 1
0
def initialize_sampling_state(
    model: Model,
    observed: Optional[dict] = None,
    state: Optional[flow.SamplingState] = None
) -> Tuple[flow.SamplingState, List[str]]:
    """
    Initialize the model provided state and/or observed variables.

    Parameters
    ----------
    model : pymc4.Model
    observed : Optional[dict]
    state : Optional[flow.SamplingState]

    Returns
    -------
    state: pymc4.flow.SamplingState
        The model's sampling state
    deterministic_names: List[str]
        The list of names of the model's deterministics
    """
    _, state = flow.evaluate_meta_model(model, observed=observed, state=state)
    deterministic_names = list(state.deterministics)

    state, transformed_names = state.as_sampling_state()
    return state, deterministic_names + transformed_names
Exemplo n.º 2
0
 def logpfn(*values):
     split_view = self.order.split(values[0])
     _, st = flow.evaluate_meta_model(self.model, values=split_view)
     return st.collect_log_prob()
Exemplo n.º 3
0
def sample_prior_predictive(
    model: ModelType,
    sample_shape: Union[int, Tuple[int]] = 1000,
    sample_from_observed: bool = True,
    var_names: Optional[Union[str, List[str]]] = None,
    state: Optional[SamplingState] = None,
    use_auto_batching: bool = True,
) -> InferenceData:
    """
    Draw ``sample_shape`` values from the model for the desired ``var_names``.

    Parameters
    ----------
    model : types.GeneratorType, pymc4.Model
        Model to draw samples from
    sample_shape: Union[int, Tuple[int]]
        The sample shape of the draw. Every distribution has its core dimensions
        (e.g. ``pm.Normal("x", 0, tf.ones(2))`` has a single core dimension with ``shape=(2,)``).
        The ``sample_shape`` controls the total number of draws to make from a distribution, and
        the shape that will be prepended to the core dimensions. In the above case, if
        ``sample_shape=(3, 1)``, then the resulting draw will have ``shape=(3, 1, 2)``. If an
        ``int`` is passed, it's converted to a tuple with a single entry: ``(sample_shape,)``
    sample_from_observed: bool
        If ``False``, the distributions that were assigned observed values wont be resampled, and
        the observed values will used for computations downstream.
        If ``True``, the distributions that were assigned observed values will be resampled. This
        means that their observed value will be completely ignored (including its implied shape),
        and a new sample will be drawn from the prior distribution.
        Observed variables are only returned in the ``Samples`` dictionary if
        ``sample_from_observed`` is ``True`` or the name of the observed variable is explicitly
        provided in ``var_names``.
    var_names: Optional[Union[str, List[str]]]
        The list of variable names that will be included in the returned samples. Strings can be
        used to specify a single variable. If ``None``, the samples drawn for all untransformed
        distributions and deterministics will be returned in the ``Samples`` dictionary.
        Furthermore, if ``sample_from_observed=True``, then the observed variable names will be
        added to the untransformed distributions.
    state : Optional[pymc4.flow.SamplingState]
        A ``SamplingState`` that can be used to specify distributions fixed values and change
        observed values.
    use_auto_batching: bool
        A bool value that indicates whether ``sample_prior_predictive`` should automatically batch
        the draws or not. If you are sure you have manually tuned your model to be fully
        vectorized, then you can set this to ``False``, and your sampling should be faster than
        the auto batched counterpart. If you are not sure if your model is vectorized, then auto
        batching will safely sample from it but with some additional overhead.

    Returns
    -------
    Samples: InferenceDataType
        An ArviZ's InferenceData object with a prior_predictive group

    Examples
    --------
    Lets define a simple model to sample from

    >>> import pymc4 as pm
    >>> @pm.model
    ... def model():
    ...     sd = yield pm.HalfNormal("sd", 1.)
    ...     norm = yield pm.Normal("n", 0, sd, observed=np.random.randn(10))

    Now, we may want to draw samples from the model's prior, ignoring the
    observed values.

    >>> prior_samples = sample_prior_predictive(model(), sample_shape=(20, 3))

    The samples are returned as an InferenceData object with a prior_predictive group

    >>> sorted(list(prior_samples.prior_predictive))
    ['model/n', 'model/sd']

    The drawn values are the xarray DataSet values, and their shape will depend on the supplied
    ``sample_shape``

    >>> [v.shape for v in prior_samples.prior_predictive.values()]
    [(1, 20, 3), (1, 20, 3)]

    If we only wanted to draw samples from unobserved variables we would have done the following

    >>> prior_samples = sample_prior_predictive(model(), sample_from_observed=False)
    >>> sorted(list(prior_samples.prior_predictive))
    ['model/sd']

    Notes
    -----
    If ``sample_from_observed=False``, the observed value passed to the variables will be used in
    the later stages of the model's computation.

    >>> import pymc4 as pm
    >>> @pm.model
    ... def model2():
    ...     sd = yield pm.HalfNormal("sd", 1.)
    ...     x = yield pm.Normal("x", 0, sd, observed=np.ones(10))
    ...     y = yield pm.Normal("y", x, 1e-8)
    >>> prior_samples = sample_prior_predictive(
    ...     model2(), sample_shape=(20,), sample_from_observed=False
    ... )
    >>> np.allclose(np.mean(prior_samples.prior_predictive["model2/y"]), 1)
    True

    Furthermore, this has consequences at the shape level of the drawn samples
    >>> prior_samples.prior_predictive["model2/y"].shape
    (1, 20, 10)

    If ``sample_from_observed=True`` the value of the ``x`` random variable will be drawn from its
    prior distribution, which will have consequences both at the value and shape levels of
    downstream computations

    >>> prior_samples = sample_prior_predictive(
    ...     model2(), sample_shape=(20,), sample_from_observed=True
    ... ).prior_predictive
    >>> np.allclose(np.mean(prior_samples["model2/y"]), 1)
    False
    >>> prior_samples["model2/y"].shape
    (1, 20)

    If you take special care to fully vectorize your model, you will be able
    to sample from it when you set ``use_auto_batching=False``
    >>> import numpy as np
    >>> from time import time
    >>> observed = np.ones(10, dtype="float32")
    >>> @pm.model
    ... def vect_model():
    ...     mu = yield pm.Normal("mu", 0, 1, conditionally_independent=True)
    ...     scale = yield pm.HalfNormal("scale", 1, conditionally_independent=True)
    ...     obs = yield pm.Normal(
    ...         "obs", mu, scale, event_stack=len(observed), observed=observed
    ...     )
    >>> st1 = time()
    >>> prior_samples1 = sample_prior_predictive(
    ...     vect_model(), sample_shape=(30, 20), use_auto_batching=False
    ... ).prior_predictive
    >>> st2 = en1 = time()
    >>> prior_samples2 = sample_prior_predictive(
    ...     vect_model(), sample_shape=(30, 20), use_auto_batching=True
    ... ).prior_predictive
    >>> en2 = time()
    >>> prior_samples2["vect_model/obs"].shape
    (1, 30, 20, 10)
    >>> prior_samples1["vect_model/obs"].shape
    (1, 30, 20, 10)
    >>> (en1 - st1) < (en2 - st2)
    True

    """
    if isinstance(sample_shape, int):
        sample_shape = (sample_shape, )

    # Do a single forward pass to establish the distributions, deterministics and observeds
    _, state = evaluate_meta_model(model, state=state)
    distributions_names = list(state.untransformed_values)
    deterministic_names = list(state.deterministics_values)
    observed = None
    traced_observeds: Set[str] = set()
    if sample_from_observed:
        state.observed_values = observed = {
            k: None
            for k in state.observed_values
        }
        distributions_names = distributions_names + list(state.observed_values)

    if isinstance(var_names, str):
        var_names = [var_names]

    if var_names is None:
        var_names = distributions_names + deterministic_names
    else:
        # We can trace the observed values if their names are explicitly requested in var_names
        traced_observeds = set([
            var_name for var_name in var_names
            if var_name in state.observed_values
        ])
    if not set(var_names) <= (set(distributions_names + deterministic_names)
                              | traced_observeds):
        raise ValueError(
            "Some of the supplied var_names are not defined in the supplied "
            "model {}.\nList of unknown var_names: {}".format(
                model,
                list(
                    set(var_names) -
                    set(distributions_names + deterministic_names)),
            ))

    # If we don't have to auto-batch, then we can simply evaluate the model
    if not use_auto_batching:
        _, state = evaluate_model(model,
                                  observed=observed,
                                  sample_shape=sample_shape)
        all_values = collections.ChainMap(state.all_values,
                                          state.deterministics_values)
        return trace_to_arviz(
            prior_predictive={k: all_values[k].numpy()
                              for k in var_names})

    # Setup the function that makes a single draw
    @tf.function(autograph=False)
    def single_draw(index):
        _, state = evaluate_model(model, observed=observed)
        return tuple(
            state.untransformed_values[k] if k in state.untransformed_values
            else (state.observed_values[k] if k in
                  traced_observeds else state.deterministics_values[k])
            for k in var_names)

    # Make draws in parallel with tf.vectorized_map
    samples = tf.vectorized_map(single_draw,
                                tf.range(int(np.prod(sample_shape))))

    # Convert the samples to ndarrays and make a dictionary with the desired sample_shape
    output = dict()
    for name, sample in zip(var_names, samples):
        sample = sample.numpy()
        output[name] = np.reshape(sample, sample_shape + sample.shape[1:])

    return trace_to_arviz(prior_predictive=output)