def _update_phi_from_parents(self, *u_parents): # TODO/FIXME: Could this be combined to the function # _update_distribution_and_lowerbound ? # No, because some initialization methods may want to use this. # This makes correct broadcasting self.phi = self._distribution.compute_phi_from_parents(*u_parents) #self.phi = self._compute_phi_from_parents(*u_parents) self.phi = list(self.phi) # Make sure phi has the correct number of axes. It makes life # a bit easier elsewhere. for i in range(len(self.phi)): axes = len(self.plates) + self.ndims[i] - np.ndim(self.phi[i]) if axes > 0: # Add axes self.phi[i] = misc.add_leading_axes(self.phi[i], axes) elif axes < 0: # Remove extra leading axes first = -(len(self.plates)+self.ndims[i]) sh = np.shape(self.phi[i])[first:] self.phi[i] = np.reshape(self.phi[i], sh) # Check that the shape is correct if not misc.is_shape_subset(np.shape(self.phi[i]), self.get_shape(i)): raise ValueError("Incorrect shape of phi[%d] in node class %s. " "Shape is %s but it should be broadcastable " "to shape %s." % (i, self.__class__.__name__, np.shape(self.phi[i]), self.get_shape(i)))
def _update_phi_from_parents(self, *u_parents): # TODO/FIXME: Could this be combined to the function # _update_distribution_and_lowerbound ? # No, because some initialization methods may want to use this. # This makes correct broadcasting self.phi = self._distribution.compute_phi_from_parents(*u_parents) #self.phi = self._compute_phi_from_parents(*u_parents) self.phi = list(self.phi) # Make sure phi has the correct number of axes. It makes life # a bit easier elsewhere. for i in range(len(self.phi)): axes = len(self.plates) + self.ndims[i] - np.ndim(self.phi[i]) if axes > 0: # Add axes self.phi[i] = misc.add_leading_axes(self.phi[i], axes) elif axes < 0: # Remove extra leading axes first = -(len(self.plates) + self.ndims[i]) sh = np.shape(self.phi[i])[first:] self.phi[i] = np.reshape(self.phi[i], sh) # Check that the shape is correct if not misc.is_shape_subset(np.shape(self.phi[i]), self.get_shape(i)): raise ValueError( "Incorrect shape of phi[%d] in node class %s. " "Shape is %s but it should be broadcastable " "to shape %s." % (i, self.__class__.__name__, np.shape( self.phi[i]), self.get_shape(i)))
def _compute_message_to_parent(self, index, m_child, u_Z, u_X): """ """ if index == 0: m0 = 0 # Compute Child * X, sum over variable axes and move the gated axis # to be the last. Need to do some shape changing in order to make # Child and X to broadcast properly. for i in range(len(m_child)): ndim = len(self.dims[i]) c = m_child[i][...,None] c = misc.moveaxis(c, -1, -ndim-1) gated_axis = self.gated_plate - ndim x = u_X[i] if np.ndim(x) < abs(gated_axis): x = np.expand_dims(x, -ndim-1) else: x = misc.moveaxis(x, gated_axis, -ndim-1) axes = tuple(range(-ndim, 0)) m0 = m0 + misc.sum_product(c, x, axes_to_sum=axes) # Make sure the variable axis does not use broadcasting m0 = m0 * np.ones(self.K) # Send the message m = [m0] return m elif index == 1: m = [] for i in range(len(m_child)): # Make the moments of Z and the message from children # broadcastable. The gated plate is handled as the last axis in # the arrays and moved to the correct position at the end. # Add variable axes to Z moments ndim = len(self.dims[i]) z = misc.add_trailing_axes(u_Z[0], ndim) z = misc.moveaxis(z, -ndim-1, -1) # Axis index of the gated plate gated_axis = self.gated_plate - ndim # Add the gate axis to the message from the children c = misc.add_trailing_axes(m_child[i], 1) # Compute the message to parent mi = z * c # Add extra axes if necessary if np.ndim(mi) < abs(gated_axis): mi = misc.add_leading_axes(mi, abs(gated_axis) - np.ndim(mi)) # Move the axis to the correct position mi = misc.moveaxis(mi, -1, gated_axis) m.append(mi) return m else: raise ValueError("Invalid parent index")
def _compute_message_to_parent(self, index, m_child, u_Z, u_X): """ """ if index == 0: m0 = 0 # Compute Child * X, sum over variable axes and move the gated axis # to be the last. Need to do some shape changing in order to make # Child and X to broadcast properly. for i in range(len(m_child)): ndim = len(self.dims[i]) c = m_child[i][..., None] c = misc.moveaxis(c, -1, -ndim - 1) gated_axis = self.gated_plate - ndim x = u_X[i] if np.ndim(x) < abs(gated_axis): x = np.expand_dims(x, -ndim - 1) else: x = misc.moveaxis(x, gated_axis, -ndim - 1) axes = tuple(range(-ndim, 0)) m0 = m0 + misc.sum_product(c, x, axes_to_sum=axes) # Make sure the variable axis does not use broadcasting m0 = m0 * np.ones(self.K) # Send the message m = [m0] return m elif index == 1: m = [] for i in range(len(m_child)): # Make the moments of Z and the message from children # broadcastable. The gated plate is handled as the last axis in # the arrays and moved to the correct position at the end. # Add variable axes to Z moments ndim = len(self.dims[i]) z = misc.add_trailing_axes(u_Z[0], ndim) z = misc.moveaxis(z, -ndim - 1, -1) # Axis index of the gated plate gated_axis = self.gated_plate - ndim # Add the gate axis to the message from the children c = misc.add_trailing_axes(m_child[i], 1) # Compute the message to parent mi = z * c # Add extra axes if necessary if np.ndim(mi) < abs(gated_axis): mi = misc.add_leading_axes(mi, abs(gated_axis) - np.ndim(mi)) # Move the axis to the correct position mi = misc.moveaxis(mi, -1, gated_axis) m.append(mi) return m else: raise ValueError("Invalid parent index")
def _set_moments(self, u, mask=True, broadcast=True): self._check_shape(u, broadcast=broadcast) # Store the computed moments u but do not change moments for # observations, i.e., utilize the mask. for ind in range(len(u)): # Add axes to the mask for the variable dimensions (mask # contains only axes for the plates). u_mask = misc.add_trailing_axes(mask, self.ndims[ind]) # Enlarge self.u[ind] as necessary so that it can store the # broadcasted result. sh = misc.broadcasted_shape_from_arrays(self.u[ind], u[ind], u_mask) self.u[ind] = misc.repeat_to_shape(self.u[ind], sh) # TODO/FIXME/BUG: The mask of observations is not used, observations # may be overwritten!!! ??? # Hah, this function is used to set the observations! The caller # should be careful what mask he uses! If you want to set only # latent variables, then use such a mask. # Use mask to update only unobserved plates and keep the # observed as before np.copyto(self.u[ind], u[ind], where=u_mask) # Make sure u has the correct number of dimensions: shape = self.get_shape(ind) ndim = len(shape) ndim_u = np.ndim(self.u[ind]) if ndim > ndim_u: self.u[ind] = misc.add_leading_axes(u[ind], ndim - ndim_u) elif ndim < ndim_u: # This should not ever happen because we already checked the # shape at the beginning of the function. raise RuntimeError( "This error should not happen. Fix shape checking." "The size of the variable %s's %s-th moment " "array is %s which is larger than it should " "be, that is, %s, based on the plates %s and " "dimension %s. Check that you have provided " "plates properly." % (self.name, ind, np.shape(self.u[ind]), shape, self.plates, self.dims[ind]))
def lower_bound_contribution(self, gradient=False): # Compute E[ log p(X|parents) - log q(X) ] over q(X)q(parents) # Messages from parents #u_parents = [parent.message_to_child() for parent in self.parents] u_parents = self._message_from_parents() phi = self._distribution.compute_phi_from_parents(*u_parents) # G from parents L = self._distribution.compute_cgf_from_parents(*u_parents) # L = g # G for unobserved variables (ignored variables are handled # properly automatically) latent_mask = np.logical_not(self.observed) #latent_mask = np.logical_and(self.mask, np.logical_not(self.observed)) # F for observed, G for latent L = L + np.where(self.observed, self.f, -self.g) for (phi_p, phi_q, u_q, dims) in zip(phi, self.phi, self.u, self.dims): # Form a mask which puts observed variables to zero and # broadcasts properly latent_mask_i = misc.add_trailing_axes( misc.add_leading_axes( latent_mask, len(self.plates) - np.ndim(latent_mask)), len(dims)) axis_sum = tuple(range(-len(dims),0)) # Compute the term phi_q = np.where(latent_mask_i, phi_q, 0) # TODO/FIXME: Use einsum here? Z = np.sum((phi_p-phi_q) * u_q, axis=axis_sum) L = L + Z return (np.sum(np.where(self.mask, L, 0)) * self._plate_multiplier(self.plates, np.shape(L), np.shape(self.mask)))
def lower_bound_contribution(self, gradient=False, ignore_masked=True): r"""Compute E[ log p(X|parents) - log q(X) ] If deterministic annealing is used, the term E[ -log q(X) ] is divided by the anneling coefficient. That is, phi and cgf of q are multiplied by the temperature (inverse annealing coefficient). """ # Annealing temperature T = 1 / self.annealing # Messages from parents u_parents = self._message_from_parents() phi = self._distribution.compute_phi_from_parents(*u_parents) # G from parents L = self._distribution.compute_cgf_from_parents(*u_parents) # G for unobserved variables (ignored variables are handled properly # automatically) latent_mask = np.logical_not(self.observed) # G and F if np.all(self.observed): z = np.nan elif T == 1: z = -self.g else: z = -T * self.g ## TRIED THIS BUT IT WAS WRONG: ## z = -T * self.g + (1-T) * self.f ## if np.any(np.isnan(self.f)): ## warnings.warn("F(x) not implemented for node %s. This " ## "is required for annealed lower bound " ## "computation." % self.__class__.__name__) ## ## It was wrong because the optimal q distribution has f which is ## weighted by 1/T and here the f of q is weighted by T so the ## total weight is 1, thus it cancels out with f of p. L = L + np.where(self.observed, self.f, z) for (phi_p, phi_q, u_q, dims) in zip(phi, self.phi, self.u, self.dims): # Form a mask which puts observed variables to zero and # broadcasts properly latent_mask_i = misc.add_trailing_axes( misc.add_leading_axes( latent_mask, len(self.plates) - np.ndim(latent_mask)), len(dims)) axis_sum = tuple(range(-len(dims),0)) # Compute the term phi_q = np.where(latent_mask_i, phi_q, 0) # Apply annealing # TODO/FIXME: Use einsum here? Z = np.sum((phi_p-T*phi_q) * u_q, axis=axis_sum) L = L + Z if ignore_masked: return (np.sum(np.where(self.mask, L, 0)) * self.broadcasting_multiplier(self.plates, np.shape(L), np.shape(self.mask)) * np.prod(self.plates_multiplier)) else: return (np.sum(L) * self.broadcasting_multiplier(self.plates, np.shape(L)) * np.prod(self.plates_multiplier))
def lower_bound_contribution(self, gradient=False, ignore_masked=True): r"""Compute E[ log p(X|parents) - log q(X) ] If deterministic annealing is used, the term E[ -log q(X) ] is divided by the anneling coefficient. That is, phi and cgf of q are multiplied by the temperature (inverse annealing coefficient). """ # Annealing temperature T = 1 / self.annealing # Messages from parents u_parents = self._message_from_parents() phi = self._distribution.compute_phi_from_parents(*u_parents) # G from parents L = self._distribution.compute_cgf_from_parents(*u_parents) # G for unobserved variables (ignored variables are handled properly # automatically) latent_mask = np.logical_not(self.observed) # G and F if np.all(self.observed): z = np.nan elif T == 1: z = -self.g else: z = -T * self.g ## TRIED THIS BUT IT WAS WRONG: ## z = -T * self.g + (1-T) * self.f ## if np.any(np.isnan(self.f)): ## warnings.warn("F(x) not implemented for node %s. This " ## "is required for annealed lower bound " ## "computation." % self.__class__.__name__) ## ## It was wrong because the optimal q distribution has f which is ## weighted by 1/T and here the f of q is weighted by T so the ## total weight is 1, thus it cancels out with f of p. L = L + np.where(self.observed, self.f, z) for (phi_p, phi_q, u_q, dims) in zip(phi, self.phi, self.u, self.dims): # Form a mask which puts observed variables to zero and # broadcasts properly latent_mask_i = misc.add_trailing_axes( misc.add_leading_axes(latent_mask, len(self.plates) - np.ndim(latent_mask)), len(dims)) axis_sum = tuple(range(-len(dims), 0)) # Compute the term phi_q = np.where(latent_mask_i, phi_q, 0) # Apply annealing phi_diff = phi_p - T * phi_q # Handle 0 * -inf phi_diff = np.where(u_q != 0, phi_diff, 0) # TODO/FIXME: Use einsum here? Z = np.sum(phi_diff * u_q, axis=axis_sum) L = L + Z if ignore_masked: return (np.sum(np.where(self.mask, L, 0)) * self.broadcasting_multiplier(self.plates, np.shape(L), np.shape(self.mask)) * np.prod(self.plates_multiplier)) else: return (np.sum(L) * self.broadcasting_multiplier(self.plates, np.shape(L)) * np.prod(self.plates_multiplier))