示例#1
0
    def cost(self, Y, Y_hat):
        """
        Cost for convnets is hardcoded to be the cost for sigmoids.
        TODO: move the cost into the non-linearity class.

        Parameters
        ----------
        Y : theano.gof.Variable
            Output of `fprop`
        Y_hat : theano.gof.Variable
            Targets

        Returns
        -------
        cost : theano.gof.Variable
            0-D tensor describing the cost

        Notes
        -----
        Cost mean across units, mean across batch of KL divergence
        KL(P || Q) where P is defined by Y and Q is defined by Y_hat
        KL(P || Q) = p log p - p log q + (1-p) log (1-p) - (1-p) log (1-q)
        """
        assert self.nonlin.non_lin_name == "sigmoid", ("ConvElemwise "
                                                       "supports "
                                                       "cost function "
                                                       "for only "
                                                       "sigmoid layer "
                                                       "for now.")
        batch_axis = self.output_space.get_batch_axis()
        ave_total = kl(Y=Y, Y_hat=Y_hat, batch_axis=batch_axis)
        ave = ave_total.mean()
        return ave
示例#2
0
    def cost(self, Y, Y_hat):
        """
        Cost for convnets is hardcoded to be the cost for sigmoids.
        TODO: move the cost into the non-linearity class.

        Parameters
        ----------
        Y : theano.gof.Variable
            Output of `fprop`
        Y_hat : theano.gof.Variable
            Targets

        Returns
        -------
        cost : theano.gof.Variable
            0-D tensor describing the cost

        Notes
        -----
        Cost mean across units, mean across batch of KL divergence
        KL(P || Q) where P is defined by Y and Q is defined by Y_hat
        KL(P || Q) = p log p - p log q + (1-p) log (1-p) - (1-p) log (1-q)
        """
        assert self.nonlin.non_lin_name == "sigmoid", ("ConvElemwise "
                                                       "supports "
                                                       "cost function "
                                                       "for only "
                                                       "sigmoid layer "
                                                       "for now.")
        batch_axis = self.output_space.get_batch_axis()
        ave_total = kl(Y=Y, Y_hat=Y_hat, batch_axis=batch_axis)
        ave = ave_total.mean()
        return ave
示例#3
0
def test_kl():
    """
    Test whether function kl() has properly processed the input.
    """
    init_mode = theano.config.compute_test_value
    theano.config.compute_test_value = 'raise'
    
    try:
        mlp = MLP(layers=[Sigmoid(dim=10, layer_name='Y', irange=0.1)],
                  nvis=10)
        X = mlp.get_input_space().make_theano_batch()
        Y = mlp.get_output_space().make_theano_batch()
        X.tag.test_value = np.random.random(
            get_debug_values(X)[0].shape).astype(theano.config.floatX)
        Y_hat = mlp.fprop(X)

        # This call should not raise any error:
        ave = kl(Y, Y_hat, 1)

        # The following calls should raise ValueError exceptions:
        Y.tag.test_value[2][3] = 1.1
        np.testing.assert_raises(ValueError, kl, Y, Y_hat, 1)
        Y.tag.test_value[2][3] = -0.1
        np.testing.assert_raises(ValueError, kl, Y, Y_hat, 1)
    
    finally:
        theano.config.compute_test_value = init_mode
示例#4
0
    def kl(self, Y, Y_hat):
        """
        Computes the KL divergence.


        Parameters
        ----------
        Y : Variable
            targets for the sigmoid outputs. Currently Y must be purely binary.
            If it's not, you'll still get the right gradient, but the
            value in the monitoring channel will be wrong.
        Y_hat : Variable
            predictions made by the sigmoid layer. Y_hat must be generated by
            fprop, i.e., it must be a symbolic sigmoid.

        Returns
        -------
        ave : Variable
            average kl divergence between Y and Y_hat.

        Notes
        -----
        Warning: This function expects a sigmoid nonlinearity in the
        output layer and it uses kl function under pylearn2/expr/nnet/.
        Returns a batch (vector) of mean across units of KL
        divergence for each example,
        KL(P || Q) where P is defined by Y and Q is defined by Y_hat:

        p log p - p log q + (1-p) log (1-p) - (1-p) log (1-q)
        For binary p, some terms drop out:
        - p log q - (1-p) log (1-q)
        - p log sigmoid(z) - (1-p) log sigmoid(-z)
        p softplus(-z) + (1-p) softplus(z)
        """
        batch_axis = self.output_space.get_batch_axis()
        div = kl(Y=Y, Y_hat=Y_hat, batch_axis=batch_axis)
        return div
示例#5
0
    def kl(self, Y, Y_hat):
        """
        Computes the KL divergence.


        Parameters
        ----------
        Y : Variable
            targets for the sigmoid outputs. Currently Y must be purely binary.
            If it's not, you'll still get the right gradient, but the
            value in the monitoring channel will be wrong.
        Y_hat : Variable
            predictions made by the sigmoid layer. Y_hat must be generated by
            fprop, i.e., it must be a symbolic sigmoid.

        Returns
        -------
        ave : Variable
            average kl divergence between Y and Y_hat.

        Notes
        -----
        Warning: This function expects a sigmoid nonlinearity in the
        output layer and it uses kl function under pylearn2/expr/nnet/.
        Returns a batch (vector) of mean across units of KL
        divergence for each example,
        KL(P || Q) where P is defined by Y and Q is defined by Y_hat:

        p log p - p log q + (1-p) log (1-p) - (1-p) log (1-q)
        For binary p, some terms drop out:
        - p log q - (1-p) log (1-q)
        - p log sigmoid(z) - (1-p) log sigmoid(-z)
        p softplus(-z) + (1-p) softplus(z)
        """
        batch_axis = self.output_space.get_batch_axis()
        div = kl(Y=Y, Y_hat=Y_hat, batch_axis=batch_axis)
        return div