Пример #1
0
def test_HSStep_NegativeBinomial_sparse():
    np.random.seed(2032)
    M = 5
    N = 50
    X = np.random.normal(size=N * M).reshape((N, M))
    beta_true = np.array([1, 1, 2, 2, 0])
    y_nb = pm.NegativeBinomial.dist(np.exp(X.dot(beta_true)), 1).random()

    X = sp.sparse.csr_matrix(X)

    N_draws = 500
    with pm.Model():
        beta = HorseShoe("beta", tau=1, shape=M)
        pm.NegativeBinomial("y",
                            mu=at.exp(sp_dot(X, at.shape_padright(beta))),
                            alpha=1,
                            observed=y_nb)
        hsstep = HSStep([beta])
        trace = pm.sample(
            draws=N_draws,
            step=hsstep,
            chains=1,
            return_inferencedata=True,
            compute_convergence_checks=False,
        )

    beta_samples = trace.posterior["beta"][0].values
    assert beta_samples.shape == (N_draws, M)
    np.testing.assert_allclose(beta_samples.mean(0), beta_true, atol=0.5)
Пример #2
0
def test_HSStep_sparse():
    np.random.seed(2032)
    M = 5
    N = 50
    X = np.random.normal(size=N * M).reshape((N, M))
    beta_true = np.random.normal(10, size=M)
    y = np.random.normal(X.dot(beta_true), 1)
    X = sp.sparse.csr_matrix(X)

    M = X.shape[1]
    with pm.Model():
        beta = HorseShoe("beta", tau=1, shape=M)
        pm.Normal("y",
                  mu=sp_dot(X, at.shape_padright(beta)),
                  sigma=1,
                  observed=y)
        hsstep = HSStep([beta])
        trace = pm.sample(
            draws=50,
            tune=0,
            step=hsstep,
            chains=1,
            return_inferencedata=True,
            compute_convergence_checks=False,
        )

    beta_samples = trace.posterior["beta"][0].values
    assert beta_samples.shape == (50, M)
    np.testing.assert_allclose(beta_samples.mean(0), beta_true, atol=0.3)
Пример #3
0
def pad_dims(input, leftdims, rightdims):
    """Reshapes the input to a (leftdims + rightdims) tensor

    This helper function is used to convert pooling inputs with arbitrary
    non-pooling dimensions to the correct number of dimensions for the
    GPU pooling ops.

    This reduces or expands the number of dimensions of the input to
    exactly `leftdims`, by adding extra dimensions on the left or by
    combining some existing dimensions on the left of the input.

    Use `unpad_dims` to reshape back to the original dimensions.

    Examples
    --------
    Given input of shape (3, 5, 7), ``pad_dims(input, 2, 2)``
    adds a singleton dimension and reshapes to (1, 3, 5, 7).
    Given that output from pad_dims, ``unpad_dims(output, input, 2, 2)``
    reshapes back to (3, 5, 7).

    Given input of shape (3, 5, 7, 9), ``pad_dims(input, 2, 2)``
    does not reshape and returns output with shape (3, 5, 7, 9).

    Given input of shape (3, 5, 7, 9, 11), ``pad_dims(input, 2, 2)``
    combines the first two dimensions and reshapes to (15, 7, 9, 11).

    Given input of shape (3, 5, 7, 9), ``pad_dims(input, 2, 3)``
    adds a singleton dimension and reshapes to (1, 3, 5, 7, 9).
    """
    assert input.ndim >= rightdims

    if input.ndim == (leftdims + rightdims):
        return input

    # extract image dimensions
    img_shape = input.shape[-rightdims:]

    non_pool_ndim = input.ndim - rightdims
    if non_pool_ndim < leftdims:
        # too few dimensions, pad on the left
        dummy_dims = tensor.as_tensor([1] * (leftdims - non_pool_ndim))
        new_shape = tensor.join(0, dummy_dims, input.shape[:non_pool_ndim], img_shape)
    else:
        # too many dimensions, combine the leading dimensions
        batched_ndim = non_pool_ndim - leftdims + 1
        batch_size = tensor.prod(input.shape[:batched_ndim])
        # convert to a vector for tensor.join
        batch_size = tensor.shape_padright(batch_size, 1)
        new_shape = tensor.join(
            0, batch_size, input.shape[batched_ndim:non_pool_ndim], img_shape
        )

    # store in the required shape
    new_shape = tensor.cast(new_shape, "int64")
    input_ND = GpuReshape(leftdims + rightdims)(input, new_shape)
    return input_ND
Пример #4
0
    def logp(self, states):
        r"""Create a Theano graph that computes the log-likelihood for a discrete Markov chain.

        This is the log-likelihood for the joint distribution of states, :math:`S_t`, conditional
        on state samples, :math:`s_t`, given by the following:

        .. math::

            \int_{S_0} P(S_1 = s_1 \mid S_0) dP(S_0) \prod^{T}_{t=2} P(S_t = s_t \mid S_{t-1} = s_{t-1})

        The first term (i.e. the integral) simply computes the marginal :math:`P(S_1 = s_1)`, so
        another way to express this result is as follows:

        .. math::

            P(S_1 = s_1) \prod^{T}_{t=2} P(S_t = s_t \mid S_{t-1} = s_{t-1})

        """  # noqa: E501

        states_tt = at.as_tensor(states)

        if states.ndim > 1 or self.Gammas.ndim > 3 or self.gamma_0.ndim > 1:
            raise NotImplementedError("Broadcasting not supported.")

        Gammas_tt = at_broadcast_to(self.Gammas, (states.shape[0], ) +
                                    tuple(self.Gammas.shape)[-2:])
        gamma_0_tt = self.gamma_0

        Gamma_1_tt = Gammas_tt[0]
        P_S_1_tt = at.dot(gamma_0_tt, Gamma_1_tt)[states_tt[0]]

        # def S_logp_fn(S_tm1, S_t, Gamma):
        #     return at.log(Gamma[..., S_tm1, S_t])
        #
        # P_S_2T_tt, _ = aesara.scan(
        #     S_logp_fn,
        #     sequences=[
        #         {
        #             "input": states_tt,
        #             "taps": [-1, 0],
        #         },
        #         Gammas_tt,
        #     ],
        # )
        P_S_2T_tt = Gammas_tt[at.arange(1, states.shape[0]), states[:-1],
                              states[1:]]

        log_P_S_1T_tt = at.concatenate(
            [at.shape_padright(at.log(P_S_1_tt)),
             at.log(P_S_2T_tt)])

        res = log_P_S_1T_tt.sum()
        res.name = "states_logp"

        return res
Пример #5
0
    def dist(
        cls,
        rho,
        sigma=None,
        tau=None,
        *,
        init_dist=None,
        steps=None,
        constant=False,
        ar_order=None,
        **kwargs,
    ):
        _, sigma = get_tau_sigma(tau=tau, sigma=sigma)
        sigma = at.as_tensor_variable(floatX(sigma))
        rhos = at.atleast_1d(at.as_tensor_variable(floatX(rho)))

        if "init" in kwargs:
            warnings.warn(
                "init parameter is now called init_dist. Using init will raise an error in a future release.",
                FutureWarning,
            )
            init_dist = kwargs["init"]

        ar_order = cls._get_ar_order(rhos=rhos,
                                     constant=constant,
                                     ar_order=ar_order)
        steps = get_steps(steps=steps,
                          shape=kwargs.get("shape", None),
                          step_shape_offset=ar_order)
        if steps is None:
            raise ValueError("Must specify steps or shape parameter")
        steps = at.as_tensor_variable(intX(steps), ndim=0)

        if init_dist is not None:
            if not isinstance(init_dist, TensorVariable) or not isinstance(
                    init_dist.owner.op, RandomVariable):
                raise ValueError(
                    f"Init dist must be a distribution created via the `.dist()` API, "
                    f"got {type(init_dist)}")
                check_dist_not_registered(init_dist)
            if init_dist.owner.op.ndim_supp > 1:
                raise ValueError(
                    "Init distribution must have a scalar or vector support dimension, ",
                    f"got ndim_supp={init_dist.owner.op.ndim_supp}.",
                )
        else:
            # Sigma must broadcast with ar_order
            init_dist = Normal.dist(sigma=at.shape_padright(sigma),
                                    size=(*sigma.shape, ar_order))

        # Tell Aeppl to ignore init_dist, as it will be accounted for in the logp term
        init_dist = ignore_logprob(init_dist)

        return super().dist(
            [rhos, sigma, init_dist, steps, ar_order, constant], **kwargs)
Пример #6
0
    def logp(self, value):
        """
        Calculate log-probability of defined ``MixtureSameFamily`` distribution at specified value.

        Parameters
        ----------
        value : numeric
            Value(s) for which log-probability is calculated. If the log probabilities for multiple
            values are desired the values must be provided in a numpy array or Aesara tensor

        Returns
        -------
        TensorVariable
        """

        comp_dists = self.comp_dists
        w = self.w
        mixture_axis = self.mixture_axis

        event_shape = comp_dists.shape[mixture_axis + 1:]

        # To be able to broadcast the comp_dists.logp with w and value
        # We first have to pad the shape of w to the right with ones
        # so that it can broadcast with the event_shape.

        w = at.shape_padright(w, len(event_shape))

        # Second, we have to add the mixture_axis to the value tensor
        # To insert the mixture axis at the correct location, we use the
        # negative number index. This way, we can also handle situations
        # in which, value is an observed value with more batch dimensions
        # than the ones present in the comp_dists.
        comp_dists_ndim = len(comp_dists.shape)

        value = at.shape_padaxis(value, axis=mixture_axis - comp_dists_ndim)

        comp_logp = comp_dists.logp(value)
        return bound(
            logsumexp(at.log(w) + comp_logp, axis=mixture_axis,
                      keepdims=False),
            w >= 0,
            w <= 1,
            at.allclose(w.sum(axis=mixture_axis - comp_dists_ndim), 1),
            broadcast_conditions=False,
        )
Пример #7
0
def marginal_mixture_moment(op, rv, rng, weights, *components):
    ndim_supp = components[0].owner.op.ndim_supp
    weights = at.shape_padright(weights, ndim_supp)
    mix_axis = -ndim_supp - 1

    if len(components) == 1:
        moment_components = moment(components[0])

    else:
        moment_components = at.stack(
            [moment(component) for component in components],
            axis=mix_axis,
        )

    mix_moment = at.sum(weights * moment_components, axis=mix_axis)
    if components[0].dtype in discrete_types:
        mix_moment = at.round(mix_moment)
    return mix_moment
Пример #8
0
    def __init__(self, w, comp_dists, mixture_axis=-1, *args, **kwargs):
        self.w = at.as_tensor_variable(w)
        if not isinstance(comp_dists, Distribution):
            raise TypeError(
                "The MixtureSameFamily distribution only accepts Distribution "
                f"instances as its components. Got {type(comp_dists)} instead."
            )
        self.comp_dists = comp_dists
        if mixture_axis < 0:
            mixture_axis = len(comp_dists.shape) + mixture_axis
            if mixture_axis < 0:
                raise ValueError(
                    "`mixture_axis` is supposed to be in shape of components' distribution. "
                    f"Got {mixture_axis + len(comp_dists.shape)} axis instead out of the bounds."
                )
        comp_shape = to_tuple(comp_dists.shape)
        self.shape = comp_shape[:mixture_axis] + comp_shape[mixture_axis + 1:]
        self.mixture_axis = mixture_axis
        kwargs.setdefault("dtype", self.comp_dists.dtype)

        # Compute the mode so we don't always have to pass a initval
        defaults = kwargs.pop("defaults", [])
        event_shape = self.comp_dists.shape[mixture_axis + 1:]
        _w = at.shape_padleft(
            at.shape_padright(w, len(event_shape)),
            len(self.comp_dists.shape) - w.ndim - len(event_shape),
        )
        mode = take_along_axis(
            self.comp_dists.mode,
            at.argmax(_w, keepdims=True),
            axis=mixture_axis,
        )
        self.mode = mode[(..., 0) + (slice(None), ) * len(event_shape)]

        if not all_discrete(comp_dists):
            mean = at.as_tensor_variable(self.comp_dists.mean)
            self.mean = (_w * mean).sum(axis=mixture_axis)
            if "mean" not in defaults:
                defaults.append("mean")
        defaults.append("mode")

        super().__init__(defaults=defaults, *args, **kwargs)
Пример #9
0
    def rv_op(cls, weights, *components, size=None):
        # Create new rng for the mix_indexes internal RV
        mix_indexes_rng = aesara.shared(np.random.default_rng())

        single_component = len(components) == 1
        ndim_supp = components[0].owner.op.ndim_supp

        if size is not None:
            components = cls._resize_components(size, *components)
        elif not single_component:
            # We might need to broadcast components when size is not specified
            shape = tuple(at.broadcast_shape(*components))
            size = shape[:len(shape) - ndim_supp]
            components = cls._resize_components(size, *components)

        # Extract replication ndims from components and weights
        ndim_batch = components[0].ndim - ndim_supp
        if single_component:
            # One dimension is taken by the mixture axis in the single component case
            ndim_batch -= 1

        # The weights may imply extra batch dimensions that go beyond what is already
        # implied by the component dimensions (ndim_batch)
        weights_ndim_batch = max(0, weights.ndim - ndim_batch - 1)

        # If weights are large enough that they would broadcast the component distributions
        # we try to resize them. This in necessary to avoid duplicated values in the
        # random method and for equivalency with the logp method
        if weights_ndim_batch:
            new_size = at.concatenate([
                weights.shape[:weights_ndim_batch],
                components[0].shape[:ndim_batch],
            ])
            components = cls._resize_components(new_size, *components)

            # Extract support and batch ndims from components and weights
            ndim_batch = components[0].ndim - ndim_supp
            if single_component:
                ndim_batch -= 1
            weights_ndim_batch = max(0, weights.ndim - ndim_batch - 1)

        assert weights_ndim_batch == 0

        # Component RVs terms are accounted by the Mixture logprob, so they can be
        # safely ignored by Aeppl
        components = [ignore_logprob(component) for component in components]

        # Create a OpFromGraph that encapsulates the random generating process
        # Create dummy input variables with the same type as the ones provided
        weights_ = weights.type()
        components_ = [component.type() for component in components]
        mix_indexes_rng_ = mix_indexes_rng.type()

        mix_axis = -ndim_supp - 1

        # Stack components across mixture axis
        if single_component:
            # If single component, we consider it as being already "stacked"
            stacked_components_ = components_[0]
        else:
            stacked_components_ = at.stack(components_, axis=mix_axis)

        # Broadcast weights to (*batched dimensions, stack dimension), ignoring support dimensions
        weights_broadcast_shape_ = stacked_components_.shape[:ndim_batch + 1]
        weights_broadcasted_ = at.broadcast_to(weights_,
                                               weights_broadcast_shape_)

        # Draw mixture indexes and append (stack + ndim_supp) broadcastable dimensions to the right
        mix_indexes_ = at.random.categorical(weights_broadcasted_,
                                             rng=mix_indexes_rng_)
        mix_indexes_padded_ = at.shape_padright(mix_indexes_, ndim_supp + 1)

        # Index components and squeeze mixture dimension
        mix_out_ = at.take_along_axis(stacked_components_,
                                      mix_indexes_padded_,
                                      axis=mix_axis)
        mix_out_ = at.squeeze(mix_out_, axis=mix_axis)

        # Output mix_indexes rng update so that it can be updated in place
        mix_indexes_rng_next_ = mix_indexes_.owner.outputs[0]

        mix_op = MarginalMixtureRV(
            inputs=[mix_indexes_rng_, weights_, *components_],
            outputs=[mix_indexes_rng_next_, mix_out_],
        )

        # Create the actual MarginalMixture variable
        mix_out = mix_op(mix_indexes_rng, weights, *components)

        # Reference nodes to facilitate identification in other classmethods
        mix_out.tag.weights = weights
        mix_out.tag.components = components
        mix_out.tag.choices_rng = mix_indexes_rng

        return mix_out
Пример #10
0
    def _comp_logp(self, value):
        comp_dists = self.comp_dists

        if self.comp_is_distribution:
            # Value can be many things. It can be the self tensor, the mode
            # test point or it can be observed data. The latter case requires
            # careful handling of shape, as the observed's shape could look
            # like (repetitions,) + dist_shape, which does not include the last
            # mixture axis. For this reason, we try to eval the value.shape,
            # compare it with self.shape and shape_padright if we infer that
            # the value holds observed data
            try:
                val_shape = tuple(value.shape.eval())
            except AttributeError:
                val_shape = value.shape
            except aesara.graph.fg.MissingInputError:
                val_shape = None
            try:
                self_shape = tuple(self.shape)
            except AttributeError:
                # Happens in __init__ when computing self.logp(comp_modes)
                self_shape = None
            comp_shape = tuple(comp_dists.shape)
            ndim = value.ndim
            if val_shape is not None and not (
                (self_shape is not None and val_shape == self_shape)
                    or val_shape == comp_shape):
                # value is neither the test point nor the self tensor, it
                # is likely to hold observed values, so we must compute the
                # ndim discarding the dimensions that don't match
                # self_shape
                if self_shape and val_shape[-len(self_shape):] == self_shape:
                    # value has observed values for the Mixture
                    ndim = len(self_shape)
                elif comp_shape and val_shape[-len(comp_shape):] == comp_shape:
                    # value has observed for the Mixture components
                    ndim = len(comp_shape)
                else:
                    # We cannot infer what was passed, we handle this
                    # as was done in earlier versions of Mixture. We pad
                    # always if ndim is lower or equal to 1  (default
                    # legacy implementation)
                    if ndim <= 1:
                        ndim = len(comp_dists.shape) - 1
            else:
                # We reach this point if value does not hold observed data, so
                # we can use its ndim safely to determine shape padding, or it
                # holds something that we cannot infer, so we revert to using
                # the value's ndim for shape padding.
                # We will always pad a single dimension if ndim is lower or
                # equal to 1 (default legacy implementation)
                if ndim <= 1:
                    ndim = len(comp_dists.shape) - 1
            if ndim < len(comp_dists.shape):
                value_ = at.shape_padright(value, len(comp_dists.shape) - ndim)
            else:
                value_ = value
            return comp_dists.logp(value_)
        else:
            return at.squeeze(
                at.stack([comp_dist.logp(value) for comp_dist in comp_dists],
                         axis=-1))