Пример #1
0
    def _update_phi_from_parents(self, *u_parents):

        # TODO/FIXME: Could this be combined to the function
        # _update_distribution_and_lowerbound ?
        # No, because some initialization methods may want to use this.

        # This makes correct broadcasting
        self.phi = self._distribution.compute_phi_from_parents(*u_parents)
        #self.phi = self._compute_phi_from_parents(*u_parents)
        self.phi = list(self.phi)
        # Make sure phi has the correct number of axes. It makes life
        # a bit easier elsewhere.
        for i in range(len(self.phi)):
            axes = len(self.plates) + self.ndims[i] - np.ndim(self.phi[i])
            if axes > 0:
                # Add axes
                self.phi[i] = misc.add_leading_axes(self.phi[i], axes)
            elif axes < 0:
                # Remove extra leading axes
                first = -(len(self.plates)+self.ndims[i])
                sh = np.shape(self.phi[i])[first:]
                self.phi[i] = np.reshape(self.phi[i], sh)
            # Check that the shape is correct
            if not misc.is_shape_subset(np.shape(self.phi[i]),
                                         self.get_shape(i)):
                raise ValueError("Incorrect shape of phi[%d] in node class %s. "
                                 "Shape is %s but it should be broadcastable "
                                 "to shape %s."
                                 % (i,
                                    self.__class__.__name__,
                                    np.shape(self.phi[i]),
                                    self.get_shape(i)))
Пример #2
0
    def _update_phi_from_parents(self, *u_parents):

        # TODO/FIXME: Could this be combined to the function
        # _update_distribution_and_lowerbound ?
        # No, because some initialization methods may want to use this.

        # This makes correct broadcasting
        self.phi = self._distribution.compute_phi_from_parents(*u_parents)
        #self.phi = self._compute_phi_from_parents(*u_parents)
        self.phi = list(self.phi)
        # Make sure phi has the correct number of axes. It makes life
        # a bit easier elsewhere.
        for i in range(len(self.phi)):
            axes = len(self.plates) + self.ndims[i] - np.ndim(self.phi[i])
            if axes > 0:
                # Add axes
                self.phi[i] = misc.add_leading_axes(self.phi[i], axes)
            elif axes < 0:
                # Remove extra leading axes
                first = -(len(self.plates) + self.ndims[i])
                sh = np.shape(self.phi[i])[first:]
                self.phi[i] = np.reshape(self.phi[i], sh)
            # Check that the shape is correct
            if not misc.is_shape_subset(np.shape(self.phi[i]),
                                        self.get_shape(i)):
                raise ValueError(
                    "Incorrect shape of phi[%d] in node class %s. "
                    "Shape is %s but it should be broadcastable "
                    "to shape %s." %
                    (i, self.__class__.__name__, np.shape(
                        self.phi[i]), self.get_shape(i)))
Пример #3
0
    def _compute_message_to_parent(self, index, m_child, u_Z, u_X):
        """
        """
        if index == 0:
            m0 = 0
            # Compute Child * X, sum over variable axes and move the gated axis
            # to be the last.  Need to do some shape changing in order to make
            # Child and X to broadcast properly.
            for i in range(len(m_child)):
                ndim = len(self.dims[i])
                c = m_child[i][...,None]
                c = misc.moveaxis(c, -1, -ndim-1)
                gated_axis = self.gated_plate - ndim
                x = u_X[i]
                if np.ndim(x) < abs(gated_axis):
                    x = np.expand_dims(x, -ndim-1)
                else:
                    x = misc.moveaxis(x, gated_axis, -ndim-1)
                axes = tuple(range(-ndim, 0))
                m0 = m0 + misc.sum_product(c, x, axes_to_sum=axes)

            # Make sure the variable axis does not use broadcasting
            m0 = m0 * np.ones(self.K)

            # Send the message
            m = [m0]
            return m

        elif index == 1:

            m = []
            for i in range(len(m_child)):
                # Make the moments of Z and the message from children
                # broadcastable. The gated plate is handled as the last axis in
                # the arrays and moved to the correct position at the end.

                # Add variable axes to Z moments
                ndim = len(self.dims[i])
                z = misc.add_trailing_axes(u_Z[0], ndim)
                z = misc.moveaxis(z, -ndim-1, -1)
                # Axis index of the gated plate
                gated_axis = self.gated_plate - ndim
                # Add the gate axis to the message from the children
                c = misc.add_trailing_axes(m_child[i], 1)
                # Compute the message to parent
                mi = z * c
                # Add extra axes if necessary
                if np.ndim(mi) < abs(gated_axis):
                    mi = misc.add_leading_axes(mi,
                                                abs(gated_axis) - np.ndim(mi))
                # Move the axis to the correct position
                mi = misc.moveaxis(mi, -1, gated_axis)
                m.append(mi)

            return m

        else:
            raise ValueError("Invalid parent index")
Пример #4
0
    def _compute_message_to_parent(self, index, m_child, u_Z, u_X):
        """
        """
        if index == 0:
            m0 = 0
            # Compute Child * X, sum over variable axes and move the gated axis
            # to be the last.  Need to do some shape changing in order to make
            # Child and X to broadcast properly.
            for i in range(len(m_child)):
                ndim = len(self.dims[i])
                c = m_child[i][..., None]
                c = misc.moveaxis(c, -1, -ndim - 1)
                gated_axis = self.gated_plate - ndim
                x = u_X[i]
                if np.ndim(x) < abs(gated_axis):
                    x = np.expand_dims(x, -ndim - 1)
                else:
                    x = misc.moveaxis(x, gated_axis, -ndim - 1)
                axes = tuple(range(-ndim, 0))
                m0 = m0 + misc.sum_product(c, x, axes_to_sum=axes)

            # Make sure the variable axis does not use broadcasting
            m0 = m0 * np.ones(self.K)

            # Send the message
            m = [m0]
            return m

        elif index == 1:

            m = []
            for i in range(len(m_child)):
                # Make the moments of Z and the message from children
                # broadcastable. The gated plate is handled as the last axis in
                # the arrays and moved to the correct position at the end.

                # Add variable axes to Z moments
                ndim = len(self.dims[i])
                z = misc.add_trailing_axes(u_Z[0], ndim)
                z = misc.moveaxis(z, -ndim - 1, -1)
                # Axis index of the gated plate
                gated_axis = self.gated_plate - ndim
                # Add the gate axis to the message from the children
                c = misc.add_trailing_axes(m_child[i], 1)
                # Compute the message to parent
                mi = z * c
                # Add extra axes if necessary
                if np.ndim(mi) < abs(gated_axis):
                    mi = misc.add_leading_axes(mi,
                                               abs(gated_axis) - np.ndim(mi))
                # Move the axis to the correct position
                mi = misc.moveaxis(mi, -1, gated_axis)
                m.append(mi)

            return m

        else:
            raise ValueError("Invalid parent index")
Пример #5
0
    def _set_moments(self, u, mask=True, broadcast=True):

        self._check_shape(u, broadcast=broadcast)

        # Store the computed moments u but do not change moments for
        # observations, i.e., utilize the mask.
        for ind in range(len(u)):
            # Add axes to the mask for the variable dimensions (mask
            # contains only axes for the plates).
            u_mask = misc.add_trailing_axes(mask, self.ndims[ind])

            # Enlarge self.u[ind] as necessary so that it can store the
            # broadcasted result.
            sh = misc.broadcasted_shape_from_arrays(self.u[ind], u[ind], u_mask)
            self.u[ind] = misc.repeat_to_shape(self.u[ind], sh)

            # TODO/FIXME/BUG: The mask of observations is not used, observations
            # may be overwritten!!! ???
            
            # Hah, this function is used to set the observations! The caller
            # should be careful what mask he uses! If you want to set only
            # latent variables, then use such a mask.
            
            # Use mask to update only unobserved plates and keep the
            # observed as before
            np.copyto(self.u[ind],
                      u[ind],
                      where=u_mask)

            # Make sure u has the correct number of dimensions:
            shape = self.get_shape(ind)
            ndim = len(shape)
            ndim_u = np.ndim(self.u[ind])
            if ndim > ndim_u:
                self.u[ind] = misc.add_leading_axes(u[ind], ndim - ndim_u)
            elif ndim < ndim_u:
                # This should not ever happen because we already checked the
                # shape at the beginning of the function.
                raise RuntimeError(
                    "This error should not happen. Fix shape checking."
                    "The size of the variable %s's %s-th moment "
                    "array is %s which is larger than it should "
                    "be, that is, %s, based on the plates %s and "
                    "dimension %s. Check that you have provided "
                    "plates properly."
                    % (self.name,
                       ind,
                       np.shape(self.u[ind]), 
                       shape,
                       self.plates,
                       self.dims[ind]))
Пример #6
0
    def lower_bound_contribution(self, gradient=False):
        # Compute E[ log p(X|parents) - log q(X) ] over q(X)q(parents)
        
        # Messages from parents
        #u_parents = [parent.message_to_child() for parent in self.parents]
        u_parents = self._message_from_parents()
        phi = self._distribution.compute_phi_from_parents(*u_parents)
        # G from parents
        L = self._distribution.compute_cgf_from_parents(*u_parents)
        # L = g
        # G for unobserved variables (ignored variables are handled
        # properly automatically)
        latent_mask = np.logical_not(self.observed)
        #latent_mask = np.logical_and(self.mask, np.logical_not(self.observed))
        # F for observed, G for latent
        L = L + np.where(self.observed, self.f, -self.g)
        for (phi_p, phi_q, u_q, dims) in zip(phi, self.phi, self.u, self.dims):
            # Form a mask which puts observed variables to zero and
            # broadcasts properly
            latent_mask_i = misc.add_trailing_axes(
                                misc.add_leading_axes(
                                    latent_mask,
                                    len(self.plates) - np.ndim(latent_mask)),
                                len(dims))
            axis_sum = tuple(range(-len(dims),0))

            # Compute the term
            phi_q = np.where(latent_mask_i, phi_q, 0)
            # TODO/FIXME: Use einsum here?
            Z = np.sum((phi_p-phi_q) * u_q, axis=axis_sum)

            L = L + Z

        return (np.sum(np.where(self.mask, L, 0))
                * self._plate_multiplier(self.plates,
                                         np.shape(L),
                                         np.shape(self.mask)))
Пример #7
0
    def lower_bound_contribution(self, gradient=False, ignore_masked=True):
        r"""Compute E[ log p(X|parents) - log q(X) ]

        If deterministic annealing is used, the term E[ -log q(X) ] is
        divided by the anneling coefficient.  That is, phi and cgf of q
        are multiplied by the temperature (inverse annealing
        coefficient).
        
        """

        # Annealing temperature
        T = 1 / self.annealing
        
        # Messages from parents
        u_parents = self._message_from_parents()
        phi = self._distribution.compute_phi_from_parents(*u_parents)
        # G from parents
        L = self._distribution.compute_cgf_from_parents(*u_parents)

        # G for unobserved variables (ignored variables are handled properly
        # automatically)
        latent_mask = np.logical_not(self.observed)

        # G and F
        if np.all(self.observed):
            z = np.nan
        elif T == 1:
            z = -self.g
        else:
            z = -T * self.g
            ## TRIED THIS BUT IT WAS WRONG:
            ## z = -T * self.g + (1-T) * self.f
            ## if np.any(np.isnan(self.f)):
            ##     warnings.warn("F(x) not implemented for node %s. This "
            ##                   "is required for annealed lower bound "
            ##                   "computation." % self.__class__.__name__)
            ##
            ## It was wrong because the optimal q distribution has f which is
            ## weighted by 1/T and here the f of q is weighted by T so the
            ## total weight is 1, thus it cancels out with f of p.

        L = L + np.where(self.observed, self.f, z)

        for (phi_p, phi_q, u_q, dims) in zip(phi, self.phi, self.u, self.dims):
            # Form a mask which puts observed variables to zero and
            # broadcasts properly
            latent_mask_i = misc.add_trailing_axes(
                                misc.add_leading_axes(
                                    latent_mask,
                                    len(self.plates) - np.ndim(latent_mask)),
                                len(dims))
            axis_sum = tuple(range(-len(dims),0))

            # Compute the term
            phi_q = np.where(latent_mask_i, phi_q, 0)
            # Apply annealing
            # TODO/FIXME: Use einsum here?
            Z = np.sum((phi_p-T*phi_q) * u_q, axis=axis_sum)

            L = L + Z

        if ignore_masked:
            return (np.sum(np.where(self.mask, L, 0))
                    * self.broadcasting_multiplier(self.plates,
                                                   np.shape(L),
                                                   np.shape(self.mask))
                    * np.prod(self.plates_multiplier))
        else:
            return (np.sum(L)
                    * self.broadcasting_multiplier(self.plates,
                                                   np.shape(L))
                    * np.prod(self.plates_multiplier))
Пример #8
0
    def lower_bound_contribution(self, gradient=False, ignore_masked=True):
        r"""Compute E[ log p(X|parents) - log q(X) ]

        If deterministic annealing is used, the term E[ -log q(X) ] is
        divided by the anneling coefficient.  That is, phi and cgf of q
        are multiplied by the temperature (inverse annealing
        coefficient).
        
        """

        # Annealing temperature
        T = 1 / self.annealing

        # Messages from parents
        u_parents = self._message_from_parents()
        phi = self._distribution.compute_phi_from_parents(*u_parents)
        # G from parents
        L = self._distribution.compute_cgf_from_parents(*u_parents)

        # G for unobserved variables (ignored variables are handled properly
        # automatically)
        latent_mask = np.logical_not(self.observed)

        # G and F
        if np.all(self.observed):
            z = np.nan
        elif T == 1:
            z = -self.g
        else:
            z = -T * self.g
            ## TRIED THIS BUT IT WAS WRONG:
            ## z = -T * self.g + (1-T) * self.f
            ## if np.any(np.isnan(self.f)):
            ##     warnings.warn("F(x) not implemented for node %s. This "
            ##                   "is required for annealed lower bound "
            ##                   "computation." % self.__class__.__name__)
            ##
            ## It was wrong because the optimal q distribution has f which is
            ## weighted by 1/T and here the f of q is weighted by T so the
            ## total weight is 1, thus it cancels out with f of p.

        L = L + np.where(self.observed, self.f, z)

        for (phi_p, phi_q, u_q, dims) in zip(phi, self.phi, self.u, self.dims):
            # Form a mask which puts observed variables to zero and
            # broadcasts properly
            latent_mask_i = misc.add_trailing_axes(
                misc.add_leading_axes(latent_mask,
                                      len(self.plates) - np.ndim(latent_mask)),
                len(dims))
            axis_sum = tuple(range(-len(dims), 0))

            # Compute the term
            phi_q = np.where(latent_mask_i, phi_q, 0)
            # Apply annealing
            phi_diff = phi_p - T * phi_q
            # Handle 0 * -inf
            phi_diff = np.where(u_q != 0, phi_diff, 0)
            # TODO/FIXME: Use einsum here?
            Z = np.sum(phi_diff * u_q, axis=axis_sum)

            L = L + Z

        if ignore_masked:
            return (np.sum(np.where(self.mask, L, 0)) *
                    self.broadcasting_multiplier(self.plates, np.shape(L),
                                                 np.shape(self.mask)) *
                    np.prod(self.plates_multiplier))
        else:
            return (np.sum(L) *
                    self.broadcasting_multiplier(self.plates, np.shape(L)) *
                    np.prod(self.plates_multiplier))