def __pow__(self, other): if (isinstance(other, aesara.compile.SharedVariable) and other.get_value().squeeze().shape == ()): other = at.squeeze(other) return Exponentiated(self, other) elif isinstance(other, Number): return Exponentiated(self, other) elif np.asarray(other).squeeze().shape == (): other = np.squeeze(other) return Exponentiated(self, other) raise ValueError( "A covariance function can only be exponentiated by a scalar value" )
def rv_op(cls, weights, *components, size=None): # Create new rng for the mix_indexes internal RV mix_indexes_rng = aesara.shared(np.random.default_rng()) single_component = len(components) == 1 ndim_supp = components[0].owner.op.ndim_supp if size is not None: components = cls._resize_components(size, *components) elif not single_component: # We might need to broadcast components when size is not specified shape = tuple(at.broadcast_shape(*components)) size = shape[:len(shape) - ndim_supp] components = cls._resize_components(size, *components) # Extract replication ndims from components and weights ndim_batch = components[0].ndim - ndim_supp if single_component: # One dimension is taken by the mixture axis in the single component case ndim_batch -= 1 # The weights may imply extra batch dimensions that go beyond what is already # implied by the component dimensions (ndim_batch) weights_ndim_batch = max(0, weights.ndim - ndim_batch - 1) # If weights are large enough that they would broadcast the component distributions # we try to resize them. This in necessary to avoid duplicated values in the # random method and for equivalency with the logp method if weights_ndim_batch: new_size = at.concatenate([ weights.shape[:weights_ndim_batch], components[0].shape[:ndim_batch], ]) components = cls._resize_components(new_size, *components) # Extract support and batch ndims from components and weights ndim_batch = components[0].ndim - ndim_supp if single_component: ndim_batch -= 1 weights_ndim_batch = max(0, weights.ndim - ndim_batch - 1) assert weights_ndim_batch == 0 # Component RVs terms are accounted by the Mixture logprob, so they can be # safely ignored by Aeppl components = [ignore_logprob(component) for component in components] # Create a OpFromGraph that encapsulates the random generating process # Create dummy input variables with the same type as the ones provided weights_ = weights.type() components_ = [component.type() for component in components] mix_indexes_rng_ = mix_indexes_rng.type() mix_axis = -ndim_supp - 1 # Stack components across mixture axis if single_component: # If single component, we consider it as being already "stacked" stacked_components_ = components_[0] else: stacked_components_ = at.stack(components_, axis=mix_axis) # Broadcast weights to (*batched dimensions, stack dimension), ignoring support dimensions weights_broadcast_shape_ = stacked_components_.shape[:ndim_batch + 1] weights_broadcasted_ = at.broadcast_to(weights_, weights_broadcast_shape_) # Draw mixture indexes and append (stack + ndim_supp) broadcastable dimensions to the right mix_indexes_ = at.random.categorical(weights_broadcasted_, rng=mix_indexes_rng_) mix_indexes_padded_ = at.shape_padright(mix_indexes_, ndim_supp + 1) # Index components and squeeze mixture dimension mix_out_ = at.take_along_axis(stacked_components_, mix_indexes_padded_, axis=mix_axis) mix_out_ = at.squeeze(mix_out_, axis=mix_axis) # Output mix_indexes rng update so that it can be updated in place mix_indexes_rng_next_ = mix_indexes_.owner.outputs[0] mix_op = MarginalMixtureRV( inputs=[mix_indexes_rng_, weights_, *components_], outputs=[mix_indexes_rng_next_, mix_out_], ) # Create the actual MarginalMixture variable mix_out = mix_op(mix_indexes_rng, weights, *components) # Reference nodes to facilitate identification in other classmethods mix_out.tag.weights = weights mix_out.tag.components = components mix_out.tag.choices_rng = mix_indexes_rng return mix_out
def _comp_logp(self, value): comp_dists = self.comp_dists if self.comp_is_distribution: # Value can be many things. It can be the self tensor, the mode # test point or it can be observed data. The latter case requires # careful handling of shape, as the observed's shape could look # like (repetitions,) + dist_shape, which does not include the last # mixture axis. For this reason, we try to eval the value.shape, # compare it with self.shape and shape_padright if we infer that # the value holds observed data try: val_shape = tuple(value.shape.eval()) except AttributeError: val_shape = value.shape except aesara.graph.fg.MissingInputError: val_shape = None try: self_shape = tuple(self.shape) except AttributeError: # Happens in __init__ when computing self.logp(comp_modes) self_shape = None comp_shape = tuple(comp_dists.shape) ndim = value.ndim if val_shape is not None and not ( (self_shape is not None and val_shape == self_shape) or val_shape == comp_shape): # value is neither the test point nor the self tensor, it # is likely to hold observed values, so we must compute the # ndim discarding the dimensions that don't match # self_shape if self_shape and val_shape[-len(self_shape):] == self_shape: # value has observed values for the Mixture ndim = len(self_shape) elif comp_shape and val_shape[-len(comp_shape):] == comp_shape: # value has observed for the Mixture components ndim = len(comp_shape) else: # We cannot infer what was passed, we handle this # as was done in earlier versions of Mixture. We pad # always if ndim is lower or equal to 1 (default # legacy implementation) if ndim <= 1: ndim = len(comp_dists.shape) - 1 else: # We reach this point if value does not hold observed data, so # we can use its ndim safely to determine shape padding, or it # holds something that we cannot infer, so we revert to using # the value's ndim for shape padding. # We will always pad a single dimension if ndim is lower or # equal to 1 (default legacy implementation) if ndim <= 1: ndim = len(comp_dists.shape) - 1 if ndim < len(comp_dists.shape): value_ = at.shape_padright(value, len(comp_dists.shape) - ndim) else: value_ = value return comp_dists.logp(value_) else: return at.squeeze( at.stack([comp_dist.logp(value) for comp_dist in comp_dists], axis=-1))
def _comp_modes(self): try: return at.as_tensor_variable(self.comp_dists.mode) except AttributeError: return at.squeeze(at.stack([comp_dist.mode for comp_dist in self.comp_dists], axis=-1))
def __call__(self, X): return at.squeeze(at.dot(X, self.A) + self.b)