def logp(self, states): r"""Create a Theano graph that computes the log-likelihood for a discrete Markov chain. This is the log-likelihood for the joint distribution of states, :math:`S_t`, conditional on state samples, :math:`s_t`, given by the following: .. math:: \int_{S_0} P(S_1 = s_1 \mid S_0) dP(S_0) \prod^{T}_{t=2} P(S_t = s_t \mid S_{t-1} = s_{t-1}) The first term (i.e. the integral) simply computes the marginal :math:`P(S_1 = s_1)`, so another way to express this result is as follows: .. math:: P(S_1 = s_1) \prod^{T}_{t=2} P(S_t = s_t \mid S_{t-1} = s_{t-1}) """ # noqa: E501 states_tt = at.as_tensor(states) if states.ndim > 1 or self.Gammas.ndim > 3 or self.gamma_0.ndim > 1: raise NotImplementedError("Broadcasting not supported.") Gammas_tt = at_broadcast_to(self.Gammas, (states.shape[0], ) + tuple(self.Gammas.shape)[-2:]) gamma_0_tt = self.gamma_0 Gamma_1_tt = Gammas_tt[0] P_S_1_tt = at.dot(gamma_0_tt, Gamma_1_tt)[states_tt[0]] # def S_logp_fn(S_tm1, S_t, Gamma): # return at.log(Gamma[..., S_tm1, S_t]) # # P_S_2T_tt, _ = aesara.scan( # S_logp_fn, # sequences=[ # { # "input": states_tt, # "taps": [-1, 0], # }, # Gammas_tt, # ], # ) P_S_2T_tt = Gammas_tt[at.arange(1, states.shape[0]), states[:-1], states[1:]] log_P_S_1T_tt = at.concatenate( [at.shape_padright(at.log(P_S_1_tt)), at.log(P_S_2T_tt)]) res = log_P_S_1T_tt.sum() res.name = "states_logp" return res
def tt_broadcast_arrays(*args: TensorVariable): """Broadcast any number of arrays against each other. Parameters ---------- `*args` : array_likes The arrays to broadcast. """ bcast_shape = broadcast_shape(*args) return tuple(at_broadcast_to(a, bcast_shape) for a in args)
def distribution_subset_args(dist, shape, idx): """Obtain subsets of a distribution parameters via indexing. This is used to effectively "lift" slices/`Subtensor` `Op`s up to a distribution's parameters. In other words, `pm.Normal(mu, sigma)[idx]` becomes `pm.Normal(mu[idx], sigma[idx])`. In computations, the former requires the entire evaluation of `pm.Normal(mu, sigma)` (e.g. its `.logp` or a sample from `.random`), which could be very complex, while the latter only evaluates the subset of interest. XXX: this lifting isn't appropriate for every distribution. It's fine for most scalar distributions and even some multivariate distributions, but some required functionality is missing in order to handle even the latter. Parameters ---------- dist : Distribution The distribution object with the parameters to be indexed. shape : tuple or Shape The shape of the distribution's output/support. This is used to (naively) determine the parameters' broadcasting pattern. idx : ndarray or TensorVariable The indices applied to the parameters of `dist`. Returns ------- A ``dict`` of the broadcasted and indexed parameters for `dist`. """ dist_param_names = dist._distr_parameters_for_repr() res = dict() for param in dist_param_names: bcast_res = at_broadcast_to(getattr(dist, param), shape) res[param] = bcast_res[idx] return res
def distribution_subset_args(dist, shape, idx, point=None): """Obtain subsets of a distribution parameters via indexing. This is used to effectively "lift" slices/`Subtensor` `Op`s up to a distribution's parameters. In other words, `pm.Normal(mu, sigma)[idx]` becomes `pm.Normal(mu[idx], sigma[idx])`. In computations, the former requires the entire evaluation of `pm.Normal(mu, sigma)` (e.g. its `.logp` or a sample from `.random`), which could be very complex, while the latter only evaluates the subset of interest. XXX: this lifting isn't appropriate for every distribution. It's fine for most scalar distributions and even some multivariate distributions, but some required functionality is missing in order to handle even the latter. Parameters ---------- dist : Distribution The distribution object with the parameters to be indexed. shape : tuple or Shape The shape of the distribution's output/support. This is used to (naively) determine the parameters' broadcasting pattern. idx : ndarray or TensorVariable The indices applied to the parameters of `dist`. point : dict (optional) A dictionary keyed on the `str` names of each parameter in `dist`, which are mapped to NumPy values for the corresponding parameter. When this is given, the Theano parameters are replaced by their values in the dictionary. Returns ------- res: list An ordered set of broadcasted and indexed parameters for `dist`. """ dist_param_names = dist._distr_parameters_for_repr() if point: # Try to get a concrete/NumPy value if a `point` parameter was # given. try: idx = get_test_value(idx) except TestValueError: # pragma: no cover pass res = [] for param in dist_param_names: # Use the (sampled) point, if present if point is None or param not in point: x = getattr(dist, param, None) if x is None: continue else: x = point[param] bcast_res = at_broadcast_to(x, shape) res.append(bcast_res[idx]) return res