Пример #1
0
 def single_draw(index):
     _, state = evaluate_model(model, observed=observed)
     return tuple(
         state.untransformed_values[k] if k in state.untransformed_values
         else (state.observed_values[k] if k in
               traced_observeds else state.deterministics[k])
         for k in var_names)
Пример #2
0
    def sample(self, n):
        """Generate samples from posterior distribution."""
        q_samples = dict(zip(self.unobserved_keys, self.approx.sample(n)))

        # TODO - Account for deterministics as well.
        # For all transformed_variables, apply inverse of bijector to sampled values to match support in constraint space.
        _, st = flow.evaluate_model(self.model)
        for transformed_name in self.state.transformed_values:
            untransformed_name = NameParts.from_name(
                transformed_name).full_untransformed_name
            transform = st.distributions[untransformed_name].transform
            if transform.JacobianPreference == JacobianPreference.Forward:
                q_samples[untransformed_name] = transform.forward(
                    q_samples[transformed_name])
            else:
                q_samples[untransformed_name] = transform.inverse(
                    q_samples[transformed_name])

        # Add a new axis so as n_chains=1 for InferenceData: handles shape issues
        trace = {k: v.numpy()[np.newaxis] for k, v in q_samples.items()}
        trace = az.from_dict(trace, observed_data=self.state.observed_values)
        return trace
Пример #3
0
def sample_prior_predictive(
    model: ModelType,
    sample_shape: Union[int, Tuple[int]] = 1000,
    sample_from_observed: bool = True,
    var_names: Optional[List[str]] = None,
    state: Optional[SamplingState] = None,
) -> Dict[str, np.ndarray]:
    """
    Draw ``sample_shape`` values from the model for the desired ``var_names``.

    Parameters
    ----------
    model : types.GeneratorType, pymc4.Model
        Model to draw samples from
    sample_shape: Union[int, Tuple[int]]
        The sample shape of the draw. Every distribution has its core
        dimensions (e.g. ``pm.Normal("x", 0, tf.ones(2))`` has a single core
        dimension with ``shape=(2,)``). The ``sample_shape`` controls the total
        number of draws to make from a distribution, and the shape that will
        be prepended to the core dimensions. In the above case, if
        ``sample_shape=(3, 1)``, then the resulting draw will have
        ``shape=(3, 1, 2)``. If an ``int`` is passed, it's converted to a tuple
        with a single entry: ``(sample_shape,)``
    sample_from_observed: bool
        If ``False``, the distributions that were assigned observed values wont
        be resampled, and the observed values will used for computations
        downstream.
        If ``True``, the distributions that were assigned observed values will
        be resampled. This means that their observed value will be completely
        ignored (including its implied shape), and a new sample will be drawn
        from the prior distribution.
        Observed variables are only returned in the ``Samples`` dictionary if
        ``sample_from_observed`` is ``True`` or the name of the observed
        variable is explicitly provided in ``var_names``.
    var_names: Optional[List[str]]
        The list of variable names that will be included in the returned
        samples. If ``None``, the samples drawn for all untransformed
        distributions and deterministics will be returned in the ``Samples``
        dictionary. Furthermore, if ``sample_from_observed=True``, then the
        observed variable names will be added to the untransformed
        distributions.
    state : Optional[pymc4.flow.SamplingState]
        A ``SamplingState`` that can be used to specify distributions fixed
        values and change observed values.

    Returns
    -------
    Samples: Dict[str, np.ndarray]
        A dictionary of ``var_names`` keys and their corresponding drawn
        samples.

    Examples
    --------
    Lets define a simple model to sample from

    >>> import pymc4 as pm
    >>> @pm.model
    ... def model():
    ...     sd = yield pm.HalfNormal("sd", 1.)
    ...     norm = yield pm.Normal("n", 0, sd, observed=np.random.randn(10))

    Now, we may want to draw samples from the model's prior, ignoring the
    observed values.

    >>> prior_samples = sample_prior_predictive(model(), sample_shape=(20, 3))

    The samples are returned as a dictionary with the variable names as keys

    >>> sorted(list(prior_samples))
    ['model/n', 'model/sd']

    The drawn values are the dictionary's values, and their shape will depend
    on the supplied ``sample_shape``

    >>> [v.shape for v in prior_samples.values()]
    [(20, 3), (20, 3)]

    If we only wanted to draw samples from unobserved variables we would
    have done the following

    >>> prior_samples = sample_prior_predictive(model(), sample_from_observed=False)
    >>> sorted(list(prior_samples))
    ['model/sd']

    Notes
    -----
    If ``sample_from_observed=False``, the observed value passed to the
    variables will be used in the later stages of the model's computation.

    >>> import pymc4 as pm
    >>> @pm.model
    ... def model2():
    ...     sd = yield pm.HalfNormal("sd", 1.)
    ...     x = yield pm.Normal("x", 0, sd, observed=np.ones(10))
    ...     y = yield pm.Normal("y", x, 1e-8)
    >>> prior_samples = sample_prior_predictive(
    ...     model2(), sample_shape=(20,), sample_from_observed=False
    ... )
    >>> np.allclose(np.mean(prior_samples["model2/y"]), 1)
    True

    Furthermore, this has consequences at the shape level of the drawn samples
    >>> prior_samples["model2/y"].shape
    (20, 10)

    If ``sample_from_observed=True`` the value of the ``x`` random variable
    will be drawn from its prior distribution, which will have consequences
    both at the value and shape levels of downstream computations

    >>> prior_samples = sample_prior_predictive(
    ...     model2(), sample_shape=(20,), sample_from_observed=True
    ... )
    >>> np.allclose(np.mean(prior_samples["model2/y"]), 1)
    False
    >>> prior_samples["model2/y"].shape
    (20,)

    """
    if isinstance(sample_shape, int):
        sample_shape = (sample_shape,)

    # Do a single forward pass to establish the distributions, deterministics and observeds
    state = evaluate_model(model, state=state)[1]
    distributions_names = list(state.untransformed_values)
    deterministic_names = list(state.deterministics)
    observed = None
    traced_observeds: Set[str] = set()
    if sample_from_observed:
        state.observed_values = observed = {k: None for k in state.observed_values}
        distributions_names = distributions_names + list(state.observed_values)
    if var_names is None:
        var_names = distributions_names + deterministic_names
    else:
        # We can trace the observed values if their names are explicitly requested in var_names
        traced_observeds = set(
            [var_name for var_name in var_names if var_name in state.observed_values]
        )
    if not set(var_names) <= (set(distributions_names + deterministic_names) | traced_observeds):
        raise ValueError(
            "Some of the supplied var_names are not defined in the supplied "
            "model {}.\nList of unknown var_names: {}".format(
                model, list(set(var_names) - set(distributions_names + deterministic_names))
            )
        )

    # Setup the function that makes a single draw
    @tf.function(autograph=False)
    def single_draw(index):
        _, st = evaluate_model(model, observed=observed)
        return tuple(
            [
                (
                    st.untransformed_values[k]
                    if k in st.untransformed_values
                    else (st.observed_values[k] if k in traced_observeds else st.deterministics[k])
                )
                for k in var_names
            ]
        )

    # Make draws in parallel with tf.vectorized_map
    samples = tf.vectorized_map(single_draw, tf.range(int(np.prod(sample_shape))))

    # Convert the samples to ndarrays and make a dictionary with the desired sample_shape
    output = dict()
    for name, sample in zip(var_names, samples):
        sample = sample.numpy()
        output[name] = np.reshape(sample, sample_shape + sample.shape[1:])
    return output
Пример #4
0
def sample_prior_predictive(
    model: ModelType,
    sample_shape: Union[int, Tuple[int]] = 1000,
    sample_from_observed: bool = True,
    var_names: Optional[Union[str, List[str]]] = None,
    state: Optional[SamplingState] = None,
    use_auto_batching: bool = True,
) -> InferenceData:
    """
    Draw ``sample_shape`` values from the model for the desired ``var_names``.

    Parameters
    ----------
    model : types.GeneratorType, pymc4.Model
        Model to draw samples from
    sample_shape: Union[int, Tuple[int]]
        The sample shape of the draw. Every distribution has its core dimensions
        (e.g. ``pm.Normal("x", 0, tf.ones(2))`` has a single core dimension with ``shape=(2,)``).
        The ``sample_shape`` controls the total number of draws to make from a distribution, and
        the shape that will be prepended to the core dimensions. In the above case, if
        ``sample_shape=(3, 1)``, then the resulting draw will have ``shape=(3, 1, 2)``. If an
        ``int`` is passed, it's converted to a tuple with a single entry: ``(sample_shape,)``
    sample_from_observed: bool
        If ``False``, the distributions that were assigned observed values wont be resampled, and
        the observed values will used for computations downstream.
        If ``True``, the distributions that were assigned observed values will be resampled. This
        means that their observed value will be completely ignored (including its implied shape),
        and a new sample will be drawn from the prior distribution.
        Observed variables are only returned in the ``Samples`` dictionary if
        ``sample_from_observed`` is ``True`` or the name of the observed variable is explicitly
        provided in ``var_names``.
    var_names: Optional[Union[str, List[str]]]
        The list of variable names that will be included in the returned samples. Strings can be
        used to specify a single variable. If ``None``, the samples drawn for all untransformed
        distributions and deterministics will be returned in the ``Samples`` dictionary.
        Furthermore, if ``sample_from_observed=True``, then the observed variable names will be
        added to the untransformed distributions.
    state : Optional[pymc4.flow.SamplingState]
        A ``SamplingState`` that can be used to specify distributions fixed values and change
        observed values.
    use_auto_batching: bool
        A bool value that indicates whether ``sample_prior_predictive`` should automatically batch
        the draws or not. If you are sure you have manually tuned your model to be fully
        vectorized, then you can set this to ``False``, and your sampling should be faster than
        the auto batched counterpart. If you are not sure if your model is vectorized, then auto
        batching will safely sample from it but with some additional overhead.

    Returns
    -------
    Samples: InferenceDataType
        An ArviZ's InferenceData object with a prior_predictive group

    Examples
    --------
    Lets define a simple model to sample from

    >>> import pymc4 as pm
    >>> @pm.model
    ... def model():
    ...     sd = yield pm.HalfNormal("sd", 1.)
    ...     norm = yield pm.Normal("n", 0, sd, observed=np.random.randn(10))

    Now, we may want to draw samples from the model's prior, ignoring the
    observed values.

    >>> prior_samples = sample_prior_predictive(model(), sample_shape=(20, 3))

    The samples are returned as an InferenceData object with a prior_predictive group

    >>> sorted(list(prior_samples.prior_predictive))
    ['model/n', 'model/sd']

    The drawn values are the xarray DataSet values, and their shape will depend on the supplied
    ``sample_shape``

    >>> [v.shape for v in prior_samples.prior_predictive.values()]
    [(1, 20, 3), (1, 20, 3)]

    If we only wanted to draw samples from unobserved variables we would have done the following

    >>> prior_samples = sample_prior_predictive(model(), sample_from_observed=False)
    >>> sorted(list(prior_samples.prior_predictive))
    ['model/sd']

    Notes
    -----
    If ``sample_from_observed=False``, the observed value passed to the variables will be used in
    the later stages of the model's computation.

    >>> import pymc4 as pm
    >>> @pm.model
    ... def model2():
    ...     sd = yield pm.HalfNormal("sd", 1.)
    ...     x = yield pm.Normal("x", 0, sd, observed=np.ones(10))
    ...     y = yield pm.Normal("y", x, 1e-8)
    >>> prior_samples = sample_prior_predictive(
    ...     model2(), sample_shape=(20,), sample_from_observed=False
    ... )
    >>> np.allclose(np.mean(prior_samples.prior_predictive["model2/y"]), 1)
    True

    Furthermore, this has consequences at the shape level of the drawn samples
    >>> prior_samples.prior_predictive["model2/y"].shape
    (1, 20, 10)

    If ``sample_from_observed=True`` the value of the ``x`` random variable will be drawn from its
    prior distribution, which will have consequences both at the value and shape levels of
    downstream computations

    >>> prior_samples = sample_prior_predictive(
    ...     model2(), sample_shape=(20,), sample_from_observed=True
    ... ).prior_predictive
    >>> np.allclose(np.mean(prior_samples["model2/y"]), 1)
    False
    >>> prior_samples["model2/y"].shape
    (1, 20)

    If you take special care to fully vectorize your model, you will be able
    to sample from it when you set ``use_auto_batching=False``
    >>> import numpy as np
    >>> from time import time
    >>> observed = np.ones(10, dtype="float32")
    >>> @pm.model
    ... def vect_model():
    ...     mu = yield pm.Normal("mu", 0, 1, conditionally_independent=True)
    ...     scale = yield pm.HalfNormal("scale", 1, conditionally_independent=True)
    ...     obs = yield pm.Normal(
    ...         "obs", mu, scale, event_stack=len(observed), observed=observed
    ...     )
    >>> st1 = time()
    >>> prior_samples1 = sample_prior_predictive(
    ...     vect_model(), sample_shape=(30, 20), use_auto_batching=False
    ... ).prior_predictive
    >>> st2 = en1 = time()
    >>> prior_samples2 = sample_prior_predictive(
    ...     vect_model(), sample_shape=(30, 20), use_auto_batching=True
    ... ).prior_predictive
    >>> en2 = time()
    >>> prior_samples2["vect_model/obs"].shape
    (1, 30, 20, 10)
    >>> prior_samples1["vect_model/obs"].shape
    (1, 30, 20, 10)
    >>> (en1 - st1) < (en2 - st2)
    True

    """
    if isinstance(sample_shape, int):
        sample_shape = (sample_shape, )

    # Do a single forward pass to establish the distributions, deterministics and observeds
    _, state = evaluate_meta_model(model, state=state)
    distributions_names = list(state.untransformed_values)
    deterministic_names = list(state.deterministics_values)
    observed = None
    traced_observeds: Set[str] = set()
    if sample_from_observed:
        state.observed_values = observed = {
            k: None
            for k in state.observed_values
        }
        distributions_names = distributions_names + list(state.observed_values)

    if isinstance(var_names, str):
        var_names = [var_names]

    if var_names is None:
        var_names = distributions_names + deterministic_names
    else:
        # We can trace the observed values if their names are explicitly requested in var_names
        traced_observeds = set([
            var_name for var_name in var_names
            if var_name in state.observed_values
        ])
    if not set(var_names) <= (set(distributions_names + deterministic_names)
                              | traced_observeds):
        raise ValueError(
            "Some of the supplied var_names are not defined in the supplied "
            "model {}.\nList of unknown var_names: {}".format(
                model,
                list(
                    set(var_names) -
                    set(distributions_names + deterministic_names)),
            ))

    # If we don't have to auto-batch, then we can simply evaluate the model
    if not use_auto_batching:
        _, state = evaluate_model(model,
                                  observed=observed,
                                  sample_shape=sample_shape)
        all_values = collections.ChainMap(state.all_values,
                                          state.deterministics_values)
        return trace_to_arviz(
            prior_predictive={k: all_values[k].numpy()
                              for k in var_names})

    # Setup the function that makes a single draw
    @tf.function(autograph=False)
    def single_draw(index):
        _, state = evaluate_model(model, observed=observed)
        return tuple(
            state.untransformed_values[k] if k in state.untransformed_values
            else (state.observed_values[k] if k in
                  traced_observeds else state.deterministics_values[k])
            for k in var_names)

    # Make draws in parallel with tf.vectorized_map
    samples = tf.vectorized_map(single_draw,
                                tf.range(int(np.prod(sample_shape))))

    # Convert the samples to ndarrays and make a dictionary with the desired sample_shape
    output = dict()
    for name, sample in zip(var_names, samples):
        sample = sample.numpy()
        output[name] = np.reshape(sample, sample_shape + sample.shape[1:])

    return trace_to_arviz(prior_predictive=output)