def _compute_message_and_mask_to_parent(self, index, m, *u_parents): # Normally we don't need to care about masks when computing the # message. However, in this node we want to avoid computing huge message # arrays so we sum some axis already here. Thus, we need to apply the # mask mask = self.mask parent = self.parents[index] # Compute both messages for i in range(2): # Add extra axes to the message from children m[i] = utils.add_trailing_axes(m[i], i+1) # List of elements to multiply together A = [m[i]] for k in range(len(u_parents)): if k != index: A.append(u_parents[k][i]) # Compute the sum over some axes already here in order to avoid huge # message matrices. m[i] = _message_sum_multiply(parent.plates, parent.dims[i], *A) # Compute the mask s = utils.axes_to_collapse(np.shape(mask), parent.plates) mask = np.any(mask, axis=s, keepdims=True) mask = utils.squeeze_to_dim(mask, len(parent.plates)) return (m, mask)
def update_u(self, u, mask=True): # Store the computed moments u but do not change moments for # observations, i.e., utilize the mask. for ind in range(len(u)): # Add axes to the mask for the variable dimensions (mask # contains only axes for the plates). u_mask = utils.add_trailing_axes(mask, self.ndims[ind]) # Enlarge self.u[ind] as necessary so that it can store the # broadcasted result. sh = utils.broadcasted_shape_from_arrays(self.u[ind], u[ind], u_mask) self.u[ind] = utils.repeat_to_shape(self.u[ind], sh) # Use mask to update only unobserved plates and keep the # observed as before np.copyto(self.u[ind], u[ind], where=u_mask) # Make sure u has the correct number of dimensions: shape = self.get_shape(ind) ndim = len(shape) ndim_u = np.ndim(self.u[ind]) if ndim > ndim_u: self.u[ind] = utils.add_leading_axes(u[ind], ndim - ndim_u) elif ndim < np.ndim(self.u[ind]): raise Exception("Weird, this shouldn't happen.. :)")
def _compute_message_and_mask_to_parent(self, index, m, *u_parents): # Normally we don't need to care about masks when computing the # message. However, in this node we want to avoid computing huge message # arrays so we sum some axis already here. Thus, we need to apply the # mask mask = self.mask parent = self.parents[index] # Compute both messages for i in range(2): # Add extra axes to the message from children m[i] = utils.add_trailing_axes(m[i], i + 1) # List of elements to multiply together A = [m[i]] for k in range(len(u_parents)): if k != index: A.append(u_parents[k][i]) # Compute the sum over some axes already here in order to avoid huge # message matrices. m[i] = _message_sum_multiply(parent.plates, parent.dims[i], *A) # Compute the mask s = utils.axes_to_collapse(np.shape(mask), parent.plates) mask = np.any(mask, axis=s, keepdims=True) mask = utils.squeeze_to_dim(mask, len(parent.plates)) return (m, mask)
def lower_bound_contribution(self, gradient=False): # Compute E[ log p(X|parents) - log q(X) ] over q(X)q(parents) # Messages from parents #u_parents = [parent.message_to_child() for parent in self.parents] u_parents = self._message_from_parents() phi = self._distribution.compute_phi_from_parents(*u_parents) # G from parents L = self._distribution.compute_cgf_from_parents(*u_parents) # L = g # G for unobserved variables (ignored variables are handled # properly automatically) latent_mask = np.logical_not(self.observed) #latent_mask = np.logical_and(self.mask, np.logical_not(self.observed)) # F for observed, G for latent L = L + np.where(self.observed, self.f, -self.g) for (phi_p, phi_q, u_q, dims) in zip(phi, self.phi, self.u, self.dims): # Form a mask which puts observed variables to zero and # broadcasts properly latent_mask_i = utils.add_trailing_axes( utils.add_leading_axes(latent_mask, len(self.plates) - np.ndim(latent_mask)), len(dims)) axis_sum = tuple(range(-len(dims), 0)) # Compute the term phi_q = np.where(latent_mask_i, phi_q, 0) # TODO/FIXME: Use einsum here? Z = np.sum((phi_p - phi_q) * u_q, axis=axis_sum) L = L + Z return (np.sum(np.where(self.mask, L, 0)) * self._plate_multiplier( self.plates, np.shape(L), np.shape(self.mask)))
def get_message(self, index, u_parents): (m, mask) = self.message_from_children() parent = self.parents[index] # Compute both messages for i in range(2): # Add extra axes to the message from children #m_shape = np.shape(m[i]) + (1,) * (i+1) #m[i] = np.reshape(m[i], m_shape) # Put masked elements to zero np.copyto(m[i], 0, where=np.logical_not(mask)) # Add extra axes to the mask from children #mask_shape = np.shape(mask) + (1,) * (i+1) #mask_i = np.reshape(mask, mask_shape) #mask_i = mask m[i] = utils.add_trailing_axes(m[i], i+1) #for k in range(i+1): #m[i] = np.expand_dims(m[i], axis=-1) #mask_i = np.expand_dims(mask_i, axis=-1) # List of elements to multiply together A = [m[i]] for k in range(len(u_parents)): if k != index: A.append(u_parents[k][i]) # Find out which axes are summed over. Also, full_shape = utils.broadcasted_shape_from_arrays(*A) axes = utils.axes_to_collapse(full_shape, parent.get_shape(i)) # Compute the multiplier for cancelling the # plate-multiplier. Because we are summing over the # dimensions already in this function (for efficiency), we # need to cancel the effect of the plate-multiplier # applied in the message_to_parent function. r = 1 for j in axes: r *= full_shape[j] # Compute dot product (and cancel plate-multiplier) m[i] = utils.sum_product(*A, axes_to_sum=axes, keepdims=True) / r # Compute the mask s = utils.axes_to_collapse(np.shape(mask), parent.plates) mask = np.any(mask, axis=s, keepdims=True) mask = utils.squeeze_to_dim(mask, len(parent.plates)) return (m, mask)
def get_message(self, index, u_parents): (m, mask) = self.message_from_children() parent = self.parents[index] # Compute both messages for i in range(2): # Add extra axes to the message from children #m_shape = np.shape(m[i]) + (1,) * (i+1) #m[i] = np.reshape(m[i], m_shape) # Put masked elements to zero np.copyto(m[i], 0, where=np.logical_not(mask)) # Add extra axes to the mask from children #mask_shape = np.shape(mask) + (1,) * (i+1) #mask_i = np.reshape(mask, mask_shape) #mask_i = mask m[i] = utils.add_trailing_axes(m[i], i + 1) #for k in range(i+1): #m[i] = np.expand_dims(m[i], axis=-1) #mask_i = np.expand_dims(mask_i, axis=-1) # List of elements to multiply together A = [m[i]] for k in range(len(u_parents)): if k != index: A.append(u_parents[k][i]) # Find out which axes are summed over. Also, full_shape = utils.broadcasted_shape_from_arrays(*A) axes = utils.axes_to_collapse(full_shape, parent.get_shape(i)) # Compute the multiplier for cancelling the # plate-multiplier. Because we are summing over the # dimensions already in this function (for efficiency), we # need to cancel the effect of the plate-multiplier # applied in the message_to_parent function. r = 1 for j in axes: r *= full_shape[j] # Compute dot product (and cancel plate-multiplier) m[i] = utils.sum_product(*A, axes_to_sum=axes, keepdims=True) / r # Compute the mask s = utils.axes_to_collapse(np.shape(mask), parent.plates) mask = np.any(mask, axis=s, keepdims=True) mask = utils.squeeze_to_dim(mask, len(parent.plates)) return (m, mask)
def _compute_phi_from_parents(*u_parents): # Compute weighted average of the parameters # Cluster parameters Phi = distribution._compute_phi_from_parents(*(u_parents[1:])) # Contributions/weights/probabilities P = u_parents[0][0] phi = list() for ind in range(len(Phi)): # Compute element-wise product and then sum over K clusters. # Note that the dimensions aren't perfectly aligned because # the cluster dimension (K) may be arbitrary for phi, and phi # also has dimensions (Dd,..,D0) of the parameters. # Shape(phi) = [Nn,..,K,..,N0,Dd,..,D0] # Shape(p) = [Nn,..,N0,K] # Shape(result) = [Nn,..,N0,Dd,..,D0] # General broadcasting rules apply for Nn,..,N0, that is, # preceding dimensions may be missing or dimension may be # equal to one. Probably, shape(phi) has lots of missing # dimensions and/or dimensions that are one. if cluster_plate < 0: cluster_axis = cluster_plate - distribution.ndims[ind] #else: # cluster_axis = cluster_plate # Move cluster axis to the last: # Shape(phi) = [Nn,..,N0,Dd,..,D0,K] phi.append(utils.moveaxis(Phi[ind], cluster_axis, -1)) # Add axes to p: # Shape(p) = [Nn,..,N0,K,1,..,1] p = utils.add_trailing_axes(P, distribution.ndims[ind]) # Move cluster axis to the last: # Shape(p) = [Nn,..,N0,1,..,1,K] p = utils.moveaxis(p, -(distribution.ndims[ind] + 1), -1) #print('Mixture.compute_phi, p:', np.sum(p, axis=-1)) #print('mixture.compute_phi shapes:') #print(np.shape(p)) #print(np.shape(phi[ind])) # Now the shapes broadcast perfectly and we can sum # p*phi over the last axis: # Shape(result) = [Nn,..,N0,Dd,..,D0] phi[ind] = utils.sum_product(p, phi[ind], axes_to_sum=-1) return phi
def _compute_phi_from_parents(*u_parents): # Compute weighted average of the parameters # Cluster parameters Phi = distribution._compute_phi_from_parents(*(u_parents[1:])) # Contributions/weights/probabilities P = u_parents[0][0] phi = list() for ind in range(len(Phi)): # Compute element-wise product and then sum over K clusters. # Note that the dimensions aren't perfectly aligned because # the cluster dimension (K) may be arbitrary for phi, and phi # also has dimensions (Dd,..,D0) of the parameters. # Shape(phi) = [Nn,..,K,..,N0,Dd,..,D0] # Shape(p) = [Nn,..,N0,K] # Shape(result) = [Nn,..,N0,Dd,..,D0] # General broadcasting rules apply for Nn,..,N0, that is, # preceding dimensions may be missing or dimension may be # equal to one. Probably, shape(phi) has lots of missing # dimensions and/or dimensions that are one. if cluster_plate < 0: cluster_axis = cluster_plate - distribution.ndims[ind] #else: # cluster_axis = cluster_plate # Move cluster axis to the last: # Shape(phi) = [Nn,..,N0,Dd,..,D0,K] phi.append(utils.moveaxis(Phi[ind], cluster_axis, -1)) # Add axes to p: # Shape(p) = [Nn,..,N0,K,1,..,1] p = utils.add_trailing_axes(P, distribution.ndims[ind]) # Move cluster axis to the last: # Shape(p) = [Nn,..,N0,1,..,1,K] p = utils.moveaxis(p, -(distribution.ndims[ind]+1), -1) #print('Mixture.compute_phi, p:', np.sum(p, axis=-1)) #print('mixture.compute_phi shapes:') #print(np.shape(p)) #print(np.shape(phi[ind])) # Now the shapes broadcast perfectly and we can sum # p*phi over the last axis: # Shape(result) = [Nn,..,N0,Dd,..,D0] phi[ind] = utils.sum_product(p, phi[ind], axes_to_sum=-1) return phi
def compute_phi_from_parents(self, *u_parents, mask=True): # Compute weighted average of the parameters # Cluster parameters Phi = self.distribution.compute_phi_from_parents(*(u_parents[1:])) # Contributions/weights/probabilities P = u_parents[0][0] phi = list() for ind in range(len(Phi)): # Compute element-wise product and then sum over K clusters. # Note that the dimensions aren't perfectly aligned because # the cluster dimension (K) may be arbitrary for phi, and phi # also has dimensions (Dd,..,D0) of the parameters. # Shape(phi) = [Nn,..,K,..,N0,Dd,..,D0] # Shape(p) = [Nn,..,N0,K] # Shape(result) = [Nn,..,N0,Dd,..,D0] # General broadcasting rules apply for Nn,..,N0, that is, # preceding dimensions may be missing or dimension may be # equal to one. Probably, shape(phi) has lots of missing # dimensions and/or dimensions that are one. if self.cluster_plate < 0: cluster_axis = self.cluster_plate - self.distribution.ndims[ind] else: raise RuntimeError("Cluster plate should be negative") # Move cluster axis to the last: # Shape(phi) = [Nn,..,N0,Dd,..,D0,K] if np.ndim(Phi[ind]) >= abs(cluster_axis): phi.append(utils.moveaxis(Phi[ind], cluster_axis, -1)) else: phi.append(Phi[ind][...,None]) # Add axes to p: # Shape(p) = [Nn,..,N0,K,1,..,1] p = utils.add_trailing_axes(P, self.distribution.ndims[ind]) # Move cluster axis to the last: # Shape(p) = [Nn,..,N0,1,..,1,K] p = utils.moveaxis(p, -(self.distribution.ndims[ind]+1), -1) # Now the shapes broadcast perfectly and we can sum # p*phi over the last axis: # Shape(result) = [Nn,..,N0,Dd,..,D0] phi[ind] = utils.sum_product(p, phi[ind], axes_to_sum=-1) return phi
def _set_moments(self, u, mask=True): # Store the computed moments u but do not change moments for # observations, i.e., utilize the mask. for ind in range(len(u)): # Add axes to the mask for the variable dimensions (mask # contains only axes for the plates). u_mask = utils.add_trailing_axes(mask, self._distribution.ndims[ind]) # Enlarge self.u[ind] as necessary so that it can store the # broadcasted result. sh = utils.broadcasted_shape_from_arrays(self.u[ind], u[ind], u_mask) self.u[ind] = utils.repeat_to_shape(self.u[ind], sh) # TODO/FIXME/BUG: The mask of observations is not used, observations # may be overwritten!!! ??? # Hah, this function is used to set the observations! The caller # should be careful what mask he uses! If you want to set only # latent variables, then use such a mask. # Use mask to update only unobserved plates and keep the # observed as before np.copyto(self.u[ind], u[ind], where=u_mask) # Make sure u has the correct number of dimensions: # TODO/FIXME: Maybe it would be good to also check that u has a # shape that is a sub-shape of get_shape. shape = self.get_shape(ind) ndim = len(shape) ndim_u = np.ndim(self.u[ind]) if ndim > ndim_u: self.u[ind] = utils.add_leading_axes(u[ind], ndim - ndim_u) elif ndim < ndim_u: raise RuntimeError( "The size of the variable %s's %s-th moment " "array is %s which is larger than it should " "be, that is, %s, based on the plates %s and " "dimension %s. Check that you have provided " "plates properly." % (self.name, ind, np.shape(self.u[ind]), shape, self.plates, self.dims[ind]))
def _set_moments(self, u, mask=True): # Store the computed moments u but do not change moments for # observations, i.e., utilize the mask. for ind in range(len(u)): # Add axes to the mask for the variable dimensions (mask # contains only axes for the plates). u_mask = utils.add_trailing_axes(mask, self._distribution.ndims[ind]) # Enlarge self.u[ind] as necessary so that it can store the # broadcasted result. sh = utils.broadcasted_shape_from_arrays(self.u[ind], u[ind], u_mask) self.u[ind] = utils.repeat_to_shape(self.u[ind], sh) # TODO/FIXME/BUG: The mask of observations is not used, observations # may be overwritten!!! ??? # Hah, this function is used to set the observations! The caller # should be careful what mask he uses! If you want to set only # latent variables, then use such a mask. # Use mask to update only unobserved plates and keep the # observed as before np.copyto(self.u[ind], u[ind], where=u_mask) # Make sure u has the correct number of dimensions: # TODO/FIXME: Maybe it would be good to also check that u has a # shape that is a sub-shape of get_shape. shape = self.get_shape(ind) ndim = len(shape) ndim_u = np.ndim(self.u[ind]) if ndim > ndim_u: self.u[ind] = utils.add_leading_axes(u[ind], ndim - ndim_u) elif ndim < ndim_u: raise RuntimeError( "The size of the variable %s's %s-th moment " "array is %s which is larger than it should " "be, that is, %s, based on the plates %s and " "dimension %s. Check that you have provided " "plates properly." % (self.name, ind, np.shape( self.u[ind]), shape, self.plates, self.dims[ind]))
def lower_bound_contribution(self, gradient=False): # Compute E[ log p(X|parents) - log q(X) ] over q(X)q(parents) # Messages from parents #u_parents = [parent.message_to_child() for parent in self.parents] u_parents = self._message_from_parents() phi = self._compute_phi_from_parents(*u_parents) # G from parents L = self._compute_cgf_from_parents(*u_parents) # L = g # G for unobserved variables (ignored variables are handled # properly automatically) latent_mask = np.logical_not(self.observed) #latent_mask = np.logical_and(self.mask, np.logical_not(self.observed)) # F for observed, G for latent L = L + np.where(self.observed, self.f, -self.g) for (phi_p, phi_q, u_q, dims) in zip(phi, self.phi, self.u, self.dims): # Form a mask which puts observed variables to zero and # broadcasts properly latent_mask_i = utils.add_trailing_axes( utils.add_leading_axes( latent_mask, len(self.plates) - np.ndim(latent_mask)), len(dims)) axis_sum = tuple(range(-len(dims),0)) # Compute the term phi_q = np.where(latent_mask_i, phi_q, 0) # TODO/FIXME: Use einsum here? Z = np.sum((phi_p-phi_q) * u_q, axis=axis_sum) L = L + Z return (np.sum(np.where(self.mask, L, 0)) * self._plate_multiplier(self.plates, np.shape(L), np.shape(self.mask)))
def _compute_message_to_parent(parent, index, u, *u_parents): """ . """ #print('Mixture.compute_message:') if index == 0: # Shape(phi) = [Nn,..,K,..,N0,Dd,..,D0] # Shape(L) = [Nn,..,K,..,N0] # Shape(u) = [Nn,..,N0,Dd,..,D0] # Shape(result) = [Nn,..,N0,K] # Compute g: # Shape(g) = [Nn,..,K,..,N0] g = distribution._compute_cgf_from_parents(*(u_parents[1:])) # Reshape(g): # Shape(g) = [Nn,..,N0,K] g = utils.moveaxis(g, cluster_plate, -1) # Compute phi: # Shape(phi) = [Nn,..,K,..,N0,Dd,..,D0] phi = distribution._compute_phi_from_parents(*(u_parents[1:])) # Reshape phi: # Shape(phi) = [Nn,..,N0,K,Dd,..,D0] for ind in range(len(phi)): phi[ind] = utils.moveaxis( phi[ind], cluster_plate - distribution.ndims[ind], -1 - distribution.ndims[ind]) # Reshape u: # Shape(u) = [Nn,..,N0,1,Dd,..,D0] u_self = list() for ind in range(len(u)): u_self.append( np.expand_dims(u[ind], axis=(-1 - distribution.ndims[ind]))) # Compute logpdf: # Shape(L) = [Nn,..,N0,K] L = distribution._compute_logpdf(u_self, phi, g, 0) # Sum over other than the cluster dimensions? No! # Hmm.. I think the message passing method will do # that automatically ## print(np.shape(phi[0])) ## print(np.shape(u_self[0])) ## print(np.shape(g)) ## print(np.shape(L)) return [L] elif index >= 1: # Parent index for the distribution used for the # mixture. index = index - 1 # Reshape u: # Shape(u) = [Nn,..1,..,N0,Dd,..,D0] u_self = list() for ind in range(len(u)): if cluster_plate < 0: cluster_axis = cluster_plate - distribution.ndims[ind] else: cluster_axis = cluster_plate u_self.append(np.expand_dims(u[ind], axis=cluster_axis)) # Message from the mixed distribution m = distribution._compute_message_to_parent( index, u_self, *(u_parents[1:])) # Weigh the messages with the responsibilities for i in range(len(m)): # Shape(m) = [Nn,..,K,..,N0,Dd,..,D0] # Shape(p) = [Nn,..,N0,K] # Shape(result) = [Nn,..,K,..,N0,Dd,..,D0] # Number of axes for the variable dimensions for # the parent message. D = distribution.ndims_parents[index][i] # Responsibilities for clusters are the first # parent's first moment: # Shape(p) = [Nn,..,N0,K] p = u_parents[0][0] # Move the cluster axis to the proper place: # Shape(p) = [Nn,..,K,..,N0] p = utils.moveaxis(p, -1, cluster_plate) # Add axes for variable dimensions to the contributions # Shape(p) = [Nn,..,K,..,N0,1,..,1] p = utils.add_trailing_axes(p, D) if cluster_plate < 0: # Add the variable dimensions cluster_axis = cluster_plate - D # Add axis for clusters: # Shape(m) = [Nn,..,1,..,N0,Dd,..,D0] #m[i] = np.expand_dims(m[i], axis=cluster_axis) # # TODO: You could do summing here already so that # you wouldn't compute huge matrices as # intermediate result. Use einsum. ## print(np.shape(m[i])) ## print(np.shape(p)) # Compute the message contributions for each # cluster: # Shape(result) = [Nn,..,K,..,N0,Dd,..,D0] m[i] = m[i] * p #print(np.shape(m[i])) return m
def compute_message_to_parent(self, parent, index, u, *u_parents): if index == 0: # Shape(phi) = [Nn,..,K,..,N0,Dd,..,D0] # Shape(L) = [Nn,..,K,..,N0] # Shape(u) = [Nn,..,N0,Dd,..,D0] # Shape(result) = [Nn,..,N0,K] # Compute g: # Shape(g) = [Nn,..,K,..,N0] g = self.distribution.compute_cgf_from_parents(*(u_parents[1:])) # Reshape(g): # Shape(g) = [Nn,..,N0,K] g = utils.moveaxis(g, self.cluster_plate, -1) # Compute phi: # Shape(phi) = [Nn,..,K,..,N0,Dd,..,D0] phi = self.distribution.compute_phi_from_parents(*(u_parents[1:])) # Move phi axis: # Shape(phi) = [Nn,..,N0,K,Dd,..,D0] for ind in range(len(phi)): if self.cluster_plate < 0: axis_from = self.cluster_plate - self.ndims[ind] else: raise RuntimeError("Cluster plate axis must be negative") axis_to = -1 - self.ndims[ind] if np.ndim(phi[ind]) >= abs(axis_from): # Cluster plate axis exists, move it to the correct position phi[ind] = utils.moveaxis(phi[ind], axis_from, axis_to) else: # No cluster plate axis, just add a new axis to the correct # position, if phi has something on that axis if np.ndim(phi[ind]) >= abs(axis_to): phi[ind] = np.expand_dims(phi[ind], axis=axis_to) # Reshape u: # Shape(u) = [Nn,..,N0,1,Dd,..,D0] u_self = list() for ind in range(len(u)): u_self.append(np.expand_dims(u[ind], axis=(-1 - self.ndims[ind]))) # Compute logpdf: # Shape(L) = [Nn,..,N0,K] L = self.distribution.compute_logpdf(u_self, phi, g, 0, self.ndims) # Sum over other than the cluster dimensions? No! # Hmm.. I think the message passing method will do # that automatically m = [L] return m elif index >= 1: # Parent index for the distribution used for the # mixture. index = index - 1 # Reshape u: # Shape(u) = [Nn,..1,..,N0,Dd,..,D0] u_self = list() for ind in range(len(u)): if self.cluster_plate < 0: cluster_axis = self.cluster_plate - self.ndims[ind] else: cluster_axis = self.cluster_plate u_self.append(np.expand_dims(u[ind], axis=cluster_axis)) # Message from the mixed distribution m = self.distribution.compute_message_to_parent(parent, index, u_self, *(u_parents[1:])) # Weigh the messages with the responsibilities for i in range(len(m)): # Shape(m) = [Nn,..,K,..,N0,Dd,..,D0] # Shape(p) = [Nn,..,N0,K] # Shape(result) = [Nn,..,K,..,N0,Dd,..,D0] # Number of axes for the variable dimensions for # the parent message. D = self.ndims_parents[index][i] # Responsibilities for clusters are the first # parent's first moment: # Shape(p) = [Nn,..,N0,K] p = u_parents[0][0] # Move the cluster axis to the proper place: # Shape(p) = [Nn,..,K,..,N0] p = utils.atleast_nd(p, abs(self.cluster_plate)) p = utils.moveaxis(p, -1, self.cluster_plate) # Add axes for variable dimensions to the contributions # Shape(p) = [Nn,..,K,..,N0,1,..,1] p = utils.add_trailing_axes(p, D) if self.cluster_plate < 0: # Add the variable dimensions cluster_axis = self.cluster_plate - D # Add axis for clusters: # Shape(m) = [Nn,..,1,..,N0,Dd,..,D0] # m[i] = np.expand_dims(m[i], axis=cluster_axis) # # TODO: You could do summing here already so that # you wouldn't compute huge matrices as # intermediate result. Use einsum. # Compute the message contributions for each # cluster: # Shape(result) = [Nn,..,K,..,N0,Dd,..,D0] m[i] = m[i] * p return m
def compute_phi_from_parents(self, *u_parents, mask=True): # Compute weighted average of the parameters # Cluster parameters Phi = self.distribution.compute_phi_from_parents(*(u_parents[1:])) # Contributions/weights/probabilities P = u_parents[0][0] phi = list() nans = False for ind in range(len(Phi)): # Compute element-wise product and then sum over K clusters. # Note that the dimensions aren't perfectly aligned because # the cluster dimension (K) may be arbitrary for phi, and phi # also has dimensions (Dd,..,D0) of the parameters. # Shape(phi) = [Nn,..,K,..,N0,Dd,..,D0] # Shape(p) = [Nn,..,N0,K] # Shape(result) = [Nn,..,N0,Dd,..,D0] # General broadcasting rules apply for Nn,..,N0, that is, # preceding dimensions may be missing or dimension may be # equal to one. Probably, shape(phi) has lots of missing # dimensions and/or dimensions that are one. if self.cluster_plate < 0: cluster_axis = self.cluster_plate - self.ndims[ind] else: raise RuntimeError("Cluster plate should be negative") # Move cluster axis to the last: # Shape(phi) = [Nn,..,N0,Dd,..,D0,K] if np.ndim(Phi[ind]) >= abs(cluster_axis): phi.append(utils.moveaxis(Phi[ind], cluster_axis, -1)) else: phi.append(Phi[ind][..., None]) # Add axes to p: # Shape(p) = [Nn,..,N0,K,1,..,1] p = utils.add_trailing_axes(P, self.ndims[ind]) # Move cluster axis to the last: # Shape(p) = [Nn,..,N0,1,..,1,K] p = utils.moveaxis(p, -(self.ndims[ind] + 1), -1) # Now the shapes broadcast perfectly and we can sum # p*phi over the last axis: # Shape(result) = [Nn,..,N0,Dd,..,D0] phi[ind] = utils.sum_product(p, phi[ind], axes_to_sum=-1) if np.any(np.isnan(phi[ind])): nans = True if nans: warnings.warn( "The natural parameters of mixture distribution " "contain nans. This may happen if you use fixed " "parameters in your model. Technically, one possible " "reason is that the cluster assignment probability " "for some element is zero (p=0) and the natural " "parameter of that cluster is -inf, thus " "0*(-inf)=nan. Solution: Use parameters that assign " "non-zero probabilities for the whole domain." ) return phi
def compute_message_to_parent(self, parent, index, u, *u_parents): if index == 0: # Shape(phi) = [Nn,..,K,..,N0,Dd,..,D0] # Shape(L) = [Nn,..,K,..,N0] # Shape(u) = [Nn,..,N0,Dd,..,D0] # Shape(result) = [Nn,..,N0,K] # Compute g: # Shape(g) = [Nn,..,K,..,N0] g = self.distribution.compute_cgf_from_parents(*(u_parents[1:])) # Reshape(g): # Shape(g) = [Nn,..,N0,K] g = utils.moveaxis(g, self.cluster_plate, -1) # Compute phi: # Shape(phi) = [Nn,..,K,..,N0,Dd,..,D0] phi = self.distribution.compute_phi_from_parents(*(u_parents[1:])) # Move phi axis: # Shape(phi) = [Nn,..,N0,K,Dd,..,D0] for ind in range(len(phi)): if self.cluster_plate < 0: axis_from = self.cluster_plate-self.distribution.ndims[ind] else: raise RuntimeError("Cluster plate axis must be negative") axis_to = -1-self.distribution.ndims[ind] if np.ndim(phi[ind]) >= abs(axis_from): # Cluster plate axis exists, move it to the correct position phi[ind] = utils.moveaxis(phi[ind], axis_from, axis_to) else: # No cluster plate axis, just add a new axis to the correct # position, if phi has something on that axis if np.ndim(phi[ind]) >= abs(axis_to): phi[ind] = np.expand_dims(phi[ind], axis=axis_to) # Reshape u: # Shape(u) = [Nn,..,N0,1,Dd,..,D0] u_self = list() for ind in range(len(u)): u_self.append(np.expand_dims(u[ind], axis=(-1-self.distribution.ndims[ind]))) # Compute logpdf: # Shape(L) = [Nn,..,N0,K] L = self.distribution.compute_logpdf(u_self, phi, g, 0) # Sum over other than the cluster dimensions? No! # Hmm.. I think the message passing method will do # that automatically m = [L] return m elif index >= 1: # Parent index for the distribution used for the # mixture. index = index - 1 # Reshape u: # Shape(u) = [Nn,..1,..,N0,Dd,..,D0] u_self = list() for ind in range(len(u)): if self.cluster_plate < 0: cluster_axis = self.cluster_plate - self.distribution.ndims[ind] else: cluster_axis = self.cluster_plate u_self.append(np.expand_dims(u[ind], axis=cluster_axis)) # Message from the mixed distribution m = self.distribution.compute_message_to_parent(parent, index, u_self, *(u_parents[1:])) # Weigh the messages with the responsibilities for i in range(len(m)): # Shape(m) = [Nn,..,K,..,N0,Dd,..,D0] # Shape(p) = [Nn,..,N0,K] # Shape(result) = [Nn,..,K,..,N0,Dd,..,D0] # Number of axes for the variable dimensions for # the parent message. D = self.distribution.ndims_parents[index][i] # Responsibilities for clusters are the first # parent's first moment: # Shape(p) = [Nn,..,N0,K] p = u_parents[0][0] # Move the cluster axis to the proper place: # Shape(p) = [Nn,..,K,..,N0] p = utils.atleast_nd(p, abs(self.cluster_plate)) p = utils.moveaxis(p, -1, self.cluster_plate) # Add axes for variable dimensions to the contributions # Shape(p) = [Nn,..,K,..,N0,1,..,1] p = utils.add_trailing_axes(p, D) if self.cluster_plate < 0: # Add the variable dimensions cluster_axis = self.cluster_plate - D # Add axis for clusters: # Shape(m) = [Nn,..,1,..,N0,Dd,..,D0] #m[i] = np.expand_dims(m[i], axis=cluster_axis) # # TODO: You could do summing here already so that # you wouldn't compute huge matrices as # intermediate result. Use einsum. # Compute the message contributions for each # cluster: # Shape(result) = [Nn,..,K,..,N0,Dd,..,D0] m[i] = m[i] * p return m
def _compute_message_to_parent(parent, index, u, *u_parents): """ . """ #print('Mixture.compute_message:') if index == 0: # Shape(phi) = [Nn,..,K,..,N0,Dd,..,D0] # Shape(L) = [Nn,..,K,..,N0] # Shape(u) = [Nn,..,N0,Dd,..,D0] # Shape(result) = [Nn,..,N0,K] # Compute g: # Shape(g) = [Nn,..,K,..,N0] g = distribution._compute_cgf_from_parents(*(u_parents[1:])) # Reshape(g): # Shape(g) = [Nn,..,N0,K] g = utils.moveaxis(g, cluster_plate, -1) # Compute phi: # Shape(phi) = [Nn,..,K,..,N0,Dd,..,D0] phi = distribution._compute_phi_from_parents(*(u_parents[1:])) # Reshape phi: # Shape(phi) = [Nn,..,N0,K,Dd,..,D0] for ind in range(len(phi)): phi[ind] = utils.moveaxis(phi[ind], cluster_plate-distribution.ndims[ind], -1-distribution.ndims[ind]) # Reshape u: # Shape(u) = [Nn,..,N0,1,Dd,..,D0] u_self = list() for ind in range(len(u)): u_self.append(np.expand_dims(u[ind], axis=(-1-distribution.ndims[ind]))) # Compute logpdf: # Shape(L) = [Nn,..,N0,K] L = distribution._compute_logpdf(u_self, phi, g, 0) # Sum over other than the cluster dimensions? No! # Hmm.. I think the message passing method will do # that automatically ## print(np.shape(phi[0])) ## print(np.shape(u_self[0])) ## print(np.shape(g)) ## print(np.shape(L)) return [L] elif index >= 1: # Parent index for the distribution used for the # mixture. index = index - 1 # Reshape u: # Shape(u) = [Nn,..1,..,N0,Dd,..,D0] u_self = list() for ind in range(len(u)): if cluster_plate < 0: cluster_axis = cluster_plate - distribution.ndims[ind] else: cluster_axis = cluster_plate u_self.append(np.expand_dims(u[ind], axis=cluster_axis)) # Message from the mixed distribution m = distribution._compute_message_to_parent(index, u_self, *(u_parents[1:])) # Weigh the messages with the responsibilities for i in range(len(m)): # Shape(m) = [Nn,..,K,..,N0,Dd,..,D0] # Shape(p) = [Nn,..,N0,K] # Shape(result) = [Nn,..,K,..,N0,Dd,..,D0] # Number of axes for the variable dimensions for # the parent message. D = distribution.ndims_parents[index][i] # Responsibilities for clusters are the first # parent's first moment: # Shape(p) = [Nn,..,N0,K] p = u_parents[0][0] # Move the cluster axis to the proper place: # Shape(p) = [Nn,..,K,..,N0] p = utils.moveaxis(p, -1, cluster_plate) # Add axes for variable dimensions to the contributions # Shape(p) = [Nn,..,K,..,N0,1,..,1] p = utils.add_trailing_axes(p, D) if cluster_plate < 0: # Add the variable dimensions cluster_axis = cluster_plate - D # Add axis for clusters: # Shape(m) = [Nn,..,1,..,N0,Dd,..,D0] #m[i] = np.expand_dims(m[i], axis=cluster_axis) # # TODO: You could do summing here already so that # you wouldn't compute huge matrices as # intermediate result. Use einsum. ## print(np.shape(m[i])) ## print(np.shape(p)) # Compute the message contributions for each # cluster: # Shape(result) = [Nn,..,K,..,N0,Dd,..,D0] m[i] = m[i] * p #print(np.shape(m[i])) return m