Exemplo n.º 1
0
 def test_kl_multivariate_normal_0d(self):
     with self.test_session():
         loc_one = tf.constant(0.0)
         scale_one = tf.constant(1.0)
         self.assertAllClose(
             kl_multivariate_normal(loc_one, scale_one).eval(), 0.0)
         loc_one = tf.constant(10.0)
         scale_one = tf.constant(2.0)
         self.assertAllClose(
             kl_multivariate_normal(loc_one, scale_one).eval(), 50.806854)
         loc_one = tf.constant(0.0)
         scale_one = tf.constant(1.0)
         loc_two = tf.constant(0.0)
         scale_two = tf.constant(1.0)
         self.assertAllClose(
             kl_multivariate_normal(loc_one,
                                    scale_one,
                                    loc_two=loc_two,
                                    scale_two=scale_two).eval(), 0.0)
         loc_one = tf.constant(10.0)
         scale_one = tf.constant(2.0)
         loc_two = tf.constant(10.0)
         scale_two = tf.constant(5.0)
         self.assertAllClose(
             kl_multivariate_normal(loc_one,
                                    scale_one,
                                    loc_two=loc_two,
                                    scale_two=scale_two).eval(),
             0.496290802)
Exemplo n.º 2
0
 def test_kl_multivariate_normal_2d(self):
     with self.test_session():
         loc_one = tf.constant([[0.0, 0.0], [0.0, 0.0]])
         scale_one = tf.constant([[1.0, 1.0], [1.0, 1.0]])
         self.assertAllClose(
             kl_multivariate_normal(loc_one, scale_one).eval(),
             np.array([0.0, 0.0]))
         loc_one = tf.constant([[10.0, 10.0], [10.0, 10.0]])
         scale_one = tf.constant([[2.0, 2.0], [2.0, 2.0]])
         self.assertAllClose(
             kl_multivariate_normal(loc_one, scale_one).eval(),
             np.array([101.61370849, 101.61370849]))
         loc_one = tf.constant([[10.0, 10.0], [10.0, 10.0]])
         scale_one = tf.constant([[2.0, 2.0], [2.0, 2.0]])
         loc_two = tf.constant([[10.0, 10.0], [10.0, 10.0]])
         scale_two = tf.constant([[2.0, 2.0], [2.0, 2.0]])
         self.assertAllClose(
             kl_multivariate_normal(loc_one,
                                    scale_one,
                                    loc_two=loc_two,
                                    scale_two=scale_two).eval(),
             np.array([0.0, 0.0]))
         loc_one = tf.constant([[10.0, 10.0], [0.0, 0.0]])
         scale_one = tf.constant([[2.0, 2.0], [1.0, 1.0]])
         loc_two = tf.constant([[9.0, 9.0], [0.0, 0.0]])
         scale_two = tf.constant([[1.0, 1.0], [1.0, 1.0]])
         self.assertAllClose(
             kl_multivariate_normal(loc_one,
                                    scale_one,
                                    loc_two=loc_two,
                                    scale_two=scale_two).eval(),
             np.array([2.6137056350, 0.0]))
 def test_kl_multivariate_normal_0d(self):
   with self.test_session():
     loc_one = tf.constant(0.0)
     scale_one = tf.constant(1.0)
     self.assertAllClose(kl_multivariate_normal(loc_one,
                                                scale_one).eval(),
                         0.0)
     loc_one = tf.constant(10.0)
     scale_one = tf.constant(2.0)
     self.assertAllClose(kl_multivariate_normal(loc_one,
                                                scale_one).eval(),
                         50.806854)
     loc_one = tf.constant(0.0)
     scale_one = tf.constant(1.0)
     loc_two = tf.constant(0.0)
     scale_two = tf.constant(1.0)
     self.assertAllClose(kl_multivariate_normal(loc_one,
                                                scale_one,
                                                loc_two=loc_two,
                                                scale_two=scale_two).eval(),
                         0.0)
     loc_one = tf.constant(10.0)
     scale_one = tf.constant(2.0)
     loc_two = tf.constant(10.0)
     scale_two = tf.constant(5.0)
     self.assertAllClose(kl_multivariate_normal(loc_one,
                                                scale_one,
                                                loc_two=loc_two,
                                                scale_two=scale_two).eval(),
                         0.496290802)
Exemplo n.º 4
0
def build_reparam_kl_loss(inference):
    """Build loss function. Its automatic differentiation
  is a stochastic gradient of

  .. math::

    -ELBO =  - ( E_{q(z; \lambda)} [ \log p(x | z) ]
          + KL(q(z; \lambda) || p(z)) )

  based on the reparameterization trick (Kingma and Welling, 2014).

  It assumes the KL is analytic.

  For model wrappers, it assumes the prior is :math:`p(z) =
  \mathcal{N}(z; 0, 1)`.

  Computed by sampling from :math:`q(z;\lambda)` and evaluating the
  expectation using Monte Carlo sampling.
  """
    p_log_lik = [0.0] * inference.n_samples
    for s in range(inference.n_samples):
        z_sample = {}
        for z, qz in six.iteritems(inference.latent_vars):
            # Copy q(z) to obtain new set of posterior samples.
            qz_copy = copy(qz, scope='inference_' + str(s))
            z_sample[z] = qz_copy.value()

        if inference.model_wrapper is None:
            # Form dictionary in order to replace conditioning on prior or
            # observed variable with conditioning on posterior sample or
            # observed data.
            dict_swap = z_sample
            for x, obs in six.iteritems(inference.data):
                if isinstance(x, RandomVariable):
                    dict_swap[x] = obs

            for x, obs in six.iteritems(inference.data):
                if isinstance(x, RandomVariable):
                    x_copy = copy(x, dict_swap, scope='inference_' + str(s))
                    p_log_lik[s] += tf.reduce_sum(x_copy.log_prob(obs))
        else:
            x = inference.data
            p_log_lik[s] = inference.model_wrapper.log_lik(x, z_sample)

    p_log_lik = tf.pack(p_log_lik)

    if inference.model_wrapper is None:
        kl = tf.reduce_sum([
            tf.reduce_sum(
                kl_multivariate_normal(qz.mu, qz.sigma, z.mu, z.sigma))
            for z, qz in six.iteritems(inference.latent_vars)
        ])
    else:
        kl = tf.reduce_sum([
            tf.reduce_sum(kl_multivariate_normal(qz.mu, qz.sigma))
            for qz in six.itervalues(inference.latent_vars)
        ])

    loss = -(tf.reduce_mean(p_log_lik) - kl)
    return loss
 def test_kl_multivariate_normal_2d(self):
   with self.test_session():
     loc_one = tf.constant([[0.0, 0.0], [0.0, 0.0]])
     scale_one = tf.constant([[1.0, 1.0], [1.0, 1.0]])
     self.assertAllClose(kl_multivariate_normal(loc_one,
                                                scale_one).eval(),
                         np.array([0.0, 0.0]))
     loc_one = tf.constant([[10.0, 10.0], [10.0, 10.0]])
     scale_one = tf.constant([[2.0, 2.0], [2.0, 2.0]])
     self.assertAllClose(kl_multivariate_normal(loc_one,
                                                scale_one).eval(),
                         np.array([101.61370849, 101.61370849]))
     loc_one = tf.constant([[10.0, 10.0], [10.0, 10.0]])
     scale_one = tf.constant([[2.0, 2.0], [2.0, 2.0]])
     loc_two = tf.constant([[10.0, 10.0], [10.0, 10.0]])
     scale_two = tf.constant([[2.0, 2.0], [2.0, 2.0]])
     self.assertAllClose(kl_multivariate_normal(loc_one,
                                                scale_one,
                                                loc_two=loc_two,
                                                scale_two=scale_two).eval(),
                         np.array([0.0, 0.0]))
     loc_one = tf.constant([[10.0, 10.0], [0.0, 0.0]])
     scale_one = tf.constant([[2.0, 2.0], [1.0, 1.0]])
     loc_two = tf.constant([[9.0, 9.0], [0.0, 0.0]])
     scale_two = tf.constant([[1.0, 1.0], [1.0, 1.0]])
     self.assertAllClose(kl_multivariate_normal(loc_one,
                                                scale_one,
                                                loc_two=loc_two,
                                                scale_two=scale_two).eval(),
                         np.array([2.6137056350, 0.0]))
Exemplo n.º 6
0
    def build_score_loss_kl(self):
        """Build loss function. Its automatic differentiation
    is a stochastic gradient of

    .. math::

      -ELBO =  - ( E_{q(z; \lambda)} [ \log p(x | z) ]
             + KL(q(z; \lambda) || p(z)) )

    based on the score function estimator. (Paisley et al., 2012)

    It assumes the KL is analytic.

    For model wrappers, it assumes the prior is :math:`p(z) =
    \mathcal{N}(z; 0, 1)`.

    Computed by sampling from :math:`q(z;\lambda)` and evaluating the
    expectation using Monte Carlo sampling.
    """
        p_log_lik = [0.0] * self.n_samples
        q_log_prob = [0.0] * self.n_samples
        for s in range(self.n_samples):
            z_sample = {}
            for z, qz in six.iteritems(self.latent_vars):
                # Copy q(z) to obtain new set of posterior samples.
                qz_copy = copy(qz, scope='inference_' + str(s))
                z_sample[z] = qz_copy.value()
                q_log_prob[s] += tf.reduce_sum(
                    qz.log_prob(tf.stop_gradient(z_sample[z])))

            if self.model_wrapper is None:
                for x, obs in six.iteritems(self.data):
                    if isinstance(x, RandomVariable):
                        # Copy p(x | z), replacing any conditioning on prior with
                        # conditioning on posterior sample.
                        x_copy = copy(x,
                                      dict_swap=z_sample,
                                      scope='inference_' + str(s))
                        p_log_lik[s] += tf.reduce_sum(x_copy.log_prob(obs))
            else:
                x = self.data
                p_log_lik[s] = self.model_wrapper.log_lik(x, z_sample)

        p_log_lik = tf.pack(p_log_lik)
        q_log_prob = tf.pack(q_log_prob)

        if self.model_wrapper is None:
            kl = tf.reduce_sum([
                kl_multivariate_normal(qz.mu, qz.sigma, z.mu, z.sigma)
                for z, qz in six.iteritems(self.latent_vars)
            ])
        else:
            kl = tf.reduce_sum([
                kl_multivariate_normal(qz.mu, qz.sigma)
                for qz in six.itervalues(self.latent_vars)
            ])

        self.loss = tf.reduce_mean(p_log_lik) - kl
        return -(tf.reduce_mean(q_log_prob * tf.stop_gradient(p_log_lik)) - kl)
Exemplo n.º 7
0
    def build_reparam_loss_kl(self):
        """Build loss function. Its automatic differentiation
        is a stochastic gradient of

        .. math::

            -ELBO =  - ( E_{q(z; \lambda)} [ \log p(x | z) ]
                        + KL(q(z; \lambda) || p(z)) )

        based on the reparameterization trick. (Kingma and Welling, 2014)

        It assumes the KL is analytic.

        It assumes the prior is :math:`p(z) = \mathcal{N}(z; 0, 1)`

        Computed by sampling from :math:`q(z;\lambda)` and evaluating the
        expectation using Monte Carlo sampling.
        """
        x = self.data
        z = self.variational.sample(self.n_samples)

        mu = tf.pack([layer.loc for layer in self.variational.layers])
        sigma = tf.pack([layer.scale for layer in self.variational.layers])
        self.loss = tf.reduce_mean(self.model.log_lik(x, z)) - \
                    kl_multivariate_normal(mu, sigma)
        return -self.loss
Exemplo n.º 8
0
    def build_score_loss_kl(self):
        """Build loss function. Its automatic differentiation
        is a stochastic gradient of

        .. math::

            -ELBO =  - ( E_{q(z; \lambda)} [ \log p(x | z) ]
                         + KL(q(z; \lambda) || p(z)) )

        based on the score function estimator. (Paisley et al., 2012)

        It assumes the KL is analytic.

        It assumes the prior is :math:`p(z) = \mathcal{N}(z; 0, 1)`.

        Computed by sampling from :math:`q(z;\lambda)` and evaluating the
        expectation using Monte Carlo sampling.
        """
        x = self.data
        z = self.variational.sample(self.n_samples)

        q_log_prob = self.variational.log_prob(stop_gradient(z))
        p_log_lik = self.model.log_lik(x, z)
        mu = tf.pack([layer.loc for layer in self.variational.layers])
        sigma = tf.pack([layer.scale for layer in self.variational.layers])
        kl = kl_multivariate_normal(mu, sigma)
        self.loss = tf.reduce_mean(p_log_lik) - kl
        return -(tf.reduce_mean(q_log_prob * stop_gradient(p_log_lik)) - kl)
Exemplo n.º 9
0
    def build_loss(self):
        # ELBO = E_{q(z | x)} [ log p(x | z) ] - KL(q(z | x) || p(z))
        with tf.variable_scope("model") as scope:
            x = self.x
            # TODO samples 1 set of latent variables for each data point
            z, _ = self.variational.sample(x, self.n_data)

            mu = tf.pack([layer.m for layer in self.variational.layers])
            sigma = tf.pack([layer.s for layer in self.variational.layers])
            self.loss = tf.reduce_sum(self.model.log_lik(x, z)) - \
                        kl_multivariate_normal(mu, sigma)

        return -self.loss
Exemplo n.º 10
0
    def build_loss(self):
        # ELBO = E_{q(z | x)} [ log p(x | z) ] - KL(q(z | x) || p(z))
        with tf.variable_scope("model") as scope:
            x = self.x
            # TODO samples 1 set of latent variables for each data point
            z, _ = self.variational.sample(x, self.n_data)

            mu = tf.pack([layer.m for layer in self.variational.layers])
            sigma = tf.pack([layer.s for layer in self.variational.layers])
            self.loss = tf.reduce_sum(self.model.log_lik(x, z)) - \
                        kl_multivariate_normal(mu, sigma)

        return -self.loss
Exemplo n.º 11
0
    def build_loss(self):
        # ELBO = E_{q(z | x)} [ log p(x | z) ] - KL(q(z | x) || p(z))
        # In general, there should be a scale factor due to data
        # subsampling, so that
        # ELBO = N / M * ( ELBO using x_b )
        # where x^b is a mini-batch of x, with sizes M and N respectively.
        # This is absorbed into the learning rate.
        with tf.variable_scope("model") as scope:
            self.variational.set_params(self.variational.mapping(self.x))
            z = self.variational.sample(self.n_data)
            self.losses = tf.reduce_sum(self.model.log_likelihood(self.x, z)) - \
                          kl_multivariate_normal(self.variational.m,
                                                 self.variational.s)

        return -self.losses
Exemplo n.º 12
0
    def build_loss(self):
        # ELBO = E_{q(z | x)} [ log p(x | z) ] - KL(q(z | x) || p(z))
        # In general, there should be a scale factor due to data
        # subsampling, so that
        # ELBO = N / M * ( ELBO using x_b )
        # where x^b is a mini-batch of x, with sizes M and N respectively.
        # This is absorbed into the learning rate.
        with tf.variable_scope("model") as scope:
            self.variational.set_params(self.variational.mapping(self.x))
            z = self.variational.sample(self.n_data)
            self.losses = tf.reduce_sum(self.model.log_likelihood(self.x, z)) - \
                          kl_multivariate_normal(self.variational.m,
                                                 self.variational.s)

        return -self.losses
Exemplo n.º 13
0
    def build_reparam_loss_kl(self):
        """
        Loss function to minimize, whose gradient is a stochastic
        gradient based on the reparameterization trick.

        ELBO = E_{q(z; lambda)} [ log p(x | z) ] + KL(q(z; lambda) || p(z))
        where KL is analytic

        It assumes the model prior is p(z) = N(z; 0, 1).
        """
        x = self.data.sample(self.n_data)
        z, self.samples = self.variational.sample(x, self.n_minibatch)

        mu = tf.pack([layer.m for layer in self.variational.layers])
        sigma = tf.pack([layer.s for layer in self.variational.layers])
        self.loss = tf.reduce_mean(self.model.log_lik(x, z)) - \
                    kl_multivariate_normal(mu, sigma)
        return -self.loss
Exemplo n.º 14
0
    def build_reparam_loss_kl(self):
        """
        Loss function to minimize, whose gradient is a stochastic
        gradient based on the reparameterization trick.

        ELBO = E_{q(z; lambda)} [ log p(x | z) ] + KL(q(z; lambda) || p(z))
        where KL is analytic

        It assumes the model prior is p(z) = N(z; 0, 1).
        """
        x = self.data.sample(self.n_data)
        z, self.samples = self.variational.sample(x, self.n_minibatch)

        mu = tf.pack([layer.m for layer in self.variational.layers])
        sigma = tf.pack([layer.s for layer in self.variational.layers])
        self.loss = tf.reduce_mean(self.model.log_lik(x, z)) - \
                    kl_multivariate_normal(mu, sigma)
        return -self.loss
Exemplo n.º 15
0
    def build_score_loss_kl(self):
        """
        Loss function to minimize, whose gradient is a stochastic
        gradient based on the score function estimator.

        ELBO = E_{q(z; lambda)} [ log p(x | z) ] + KL(q(z; lambda) || p(z))
        where KL is analytic

        It assumes the model prior is p(z) = N(z; 0, 1).
        """
        x = self.data.sample(self.n_data)
        z, self.samples = self.variational.sample(x, self.n_minibatch)

        q_log_prob = tf.zeros([self.n_minibatch], dtype=tf.float32)
        for i in range(self.variational.num_factors):
            q_log_prob += self.variational.log_prob_zi(i, tf.stop_gradient(z))

        p_log_lik = self.model.log_lik(x, z)
        mu = tf.pack([layer.m for layer in self.variational.layers])
        sigma = tf.pack([layer.s for layer in self.variational.layers])
        kl = kl_multivariate_normal(mu, sigma)
        self.loss = tf.reduce_mean(p_log_lik) - kl
        return -(tf.reduce_mean(q_log_prob * tf.stop_gradient(p_log_lik)) - kl)
Exemplo n.º 16
0
    def build_score_loss_kl(self):
        """
        Loss function to minimize, whose gradient is a stochastic
        gradient based on the score function estimator.

        ELBO = E_{q(z; lambda)} [ log p(x | z) ] + KL(q(z; lambda) || p(z))
        where KL is analytic

        It assumes the model prior is p(z) = N(z; 0, 1).
        """
        x = self.data.sample(self.n_data)
        z, self.samples = self.variational.sample(x, self.n_minibatch)

        q_log_prob = tf.zeros([self.n_minibatch], dtype=tf.float32)
        for i in range(self.variational.num_factors):
            q_log_prob += self.variational.log_prob_zi(i, tf.stop_gradient(z))

        p_log_lik = self.model.log_lik(x, z)
        mu = tf.pack([layer.m for layer in self.variational.layers])
        sigma = tf.pack([layer.s for layer in self.variational.layers])
        kl = kl_multivariate_normal(mu, sigma)
        self.loss = tf.reduce_mean(p_log_lik) - kl
        return -(tf.reduce_mean(q_log_prob * tf.stop_gradient(p_log_lik)) - kl)
Exemplo n.º 17
0
 def test_kl_multivariate_normal_1d(self):
   with self.test_session():
     loc_one = tf.constant([0.0])
     scale_one = tf.constant([1.0])
     self.assertAllClose(kl_multivariate_normal(loc_one,
                                                scale_one).eval(),
                         0.0)
     loc_one = tf.constant([10.0])
     scale_one = tf.constant([2.0])
     self.assertAllClose(kl_multivariate_normal(loc_one,
                                                scale_one).eval(),
                         50.806854)
     loc_one = tf.constant([10.0])
     scale_one = tf.constant([2.0])
     loc_two = tf.constant([10.0])
     scale_two = tf.constant([2.0])
     self.assertAllClose(kl_multivariate_normal(loc_one,
                                                scale_one,
                                                loc_two=loc_two,
                                                scale_two=scale_two).eval(),
                         0.0)
     loc_one = tf.constant([0.0, 0.0])
     scale_one = tf.constant([1.0, 1.0])
     self.assertAllClose(kl_multivariate_normal(loc_one,
                                                scale_one).eval(),
                         0.0)
     loc_one = tf.constant([10.0, 10.0])
     scale_one = tf.constant([2.0, 2.0])
     self.assertAllClose(kl_multivariate_normal(loc_one,
                                                scale_one).eval(),
                         101.61370849)
     loc_one = tf.constant([10.0, 10.0])
     scale_one = tf.constant([2.0, 2.0])
     loc_two = tf.constant([9.0, 9.0])
     scale_two = tf.constant([1.0, 1.0])
     self.assertAllClose(kl_multivariate_normal(loc_one,
                                                scale_one,
                                                loc_two=loc_two,
                                                scale_two=scale_two).eval(),
                         2.6137056350)
Exemplo n.º 18
0
 def test_kl_multivariate_normal_1d(self):
     with self.test_session():
         loc_one = tf.constant([0.0])
         scale_one = tf.constant([1.0])
         self.assertAllClose(
             kl_multivariate_normal(loc_one, scale_one).eval(), 0.0)
         loc_one = tf.constant([10.0])
         scale_one = tf.constant([2.0])
         self.assertAllClose(
             kl_multivariate_normal(loc_one, scale_one).eval(), 50.806854)
         loc_one = tf.constant([10.0])
         scale_one = tf.constant([2.0])
         loc_two = tf.constant([10.0])
         scale_two = tf.constant([2.0])
         self.assertAllClose(
             kl_multivariate_normal(loc_one,
                                    scale_one,
                                    loc_two=loc_two,
                                    scale_two=scale_two).eval(), 0.0)
         loc_one = tf.constant([0.0, 0.0])
         scale_one = tf.constant([1.0, 1.0])
         self.assertAllClose(
             kl_multivariate_normal(loc_one, scale_one).eval(), 0.0)
         loc_one = tf.constant([10.0, 10.0])
         scale_one = tf.constant([2.0, 2.0])
         self.assertAllClose(
             kl_multivariate_normal(loc_one, scale_one).eval(),
             101.61370849)
         loc_one = tf.constant([10.0, 10.0])
         scale_one = tf.constant([2.0, 2.0])
         loc_two = tf.constant([9.0, 9.0])
         scale_two = tf.constant([1.0, 1.0])
         self.assertAllClose(
             kl_multivariate_normal(loc_one,
                                    scale_one,
                                    loc_two=loc_two,
                                    scale_two=scale_two).eval(),
             2.6137056350)
Exemplo n.º 19
0
 def test_contraint_raises(self):
   with self.test_session():
     loc_one = tf.constant(10.0)
     scale_one = tf.constant(-1.0)
     loc_two = tf.constant(10.0)
     scale_two = tf.constant(-1.0)
     with self.assertRaisesOpError('Condition'):
       kl_multivariate_normal(loc_one,
                              scale_one).eval()
       kl_multivariate_normal(loc_one,
                              scale_one,
                              loc_two=loc_two,
                              scale_two=scale_two).eval()
     loc_one = np.inf * tf.constant(10.0)
     scale_one = tf.constant(1.0)
     loc_two = tf.constant(10.0)
     scale_two = tf.constant(1.0)
     with self.assertRaisesOpError('Inf'):
       kl_multivariate_normal(loc_one,
                              scale_one).eval()
       kl_multivariate_normal(loc_one,
                              scale_one,
                              loc_two=loc_two,
                              scale_two=scale_two).eval()
     loc_one = tf.constant(10.0)
     scale_one = tf.constant(1.0)
     loc_two = np.nan * tf.constant(10.0)
     scale_two = tf.constant(1.0)
     with self.assertRaisesOpError('NaN'):
       kl_multivariate_normal(loc_one,
                              scale_one).eval()
       kl_multivariate_normal(loc_one,
                              scale_one,
                              loc_two=loc_two,
                              scale_two=scale_two).eval()
Exemplo n.º 20
0
Arquivo: klqp.py Projeto: zizai/edward
def build_score_kl_loss_and_gradients(inference, var_list):
  """Build loss function and gradients based on the score function
  estimator (Paisley et al., 2012).

  It assumes the KL is analytic.

  For model wrappers, it assumes the prior is :math:`p(z) =
  \mathcal{N}(z; 0, 1)`.

  Computed by sampling from :math:`q(z;\lambda)` and evaluating the
  expectation using Monte Carlo sampling.
  """
  p_log_lik = [0.0] * inference.n_samples
  q_log_prob = [0.0] * inference.n_samples
  for s in range(inference.n_samples):
    scope = 'inference_' + str(id(inference)) + '/' + str(s)
    z_sample = {}
    for z, qz in six.iteritems(inference.latent_vars):
      # Copy q(z) to obtain new set of posterior samples.
      qz_copy = copy(qz, scope=scope)
      z_sample[z] = qz_copy.value()
      z_log_prob = tf.reduce_sum(qz.log_prob(tf.stop_gradient(z_sample[z])))
      if z in inference.scale:
        z_log_prob *= inference.scale[z]

      q_log_prob[s] += z_log_prob

    if inference.model_wrapper is None:
      # Form dictionary in order to replace conditioning on prior or
      # observed variable with conditioning on a specific value.
      dict_swap = z_sample
      for x, qx in six.iteritems(inference.data):
        if isinstance(x, RandomVariable):
          if isinstance(qx, RandomVariable):
            qx_copy = copy(qx, scope=scope)
            dict_swap[x] = qx_copy.value()
          else:
            dict_swap[x] = qx

      for x in six.iterkeys(inference.data):
        if isinstance(x, RandomVariable):
          x_copy = copy(x, dict_swap, scope=scope)
          x_log_lik = tf.reduce_sum(x_copy.log_prob(dict_swap[x]))
          if x in inference.scale:
            x_log_lik *= inference.scale[x]

          p_log_lik[s] += x_log_lik
    else:
      x = inference.data
      p_log_lik[s] = inference.model_wrapper.log_lik(x, z_sample)

  p_log_lik = tf.pack(p_log_lik)
  q_log_prob = tf.pack(q_log_prob)

  if inference.model_wrapper is None:
    kl = tf.reduce_sum([inference.data.get(z, 1.0) *
                        tf.reduce_sum(kl_multivariate_normal(
                            qz.mu, qz.sigma, z.mu, z.sigma))
                        for z, qz in six.iteritems(inference.latent_vars)])
  else:
    kl = tf.reduce_sum([tf.reduce_sum(kl_multivariate_normal(qz.mu, qz.sigma))
                        for qz in six.itervalues(inference.latent_vars)])

  if var_list is None:
    var_list = tf.trainable_variables()

  loss = -(tf.reduce_mean(p_log_lik) - kl)
  grads = tf.gradients(
      -(tf.reduce_mean(q_log_prob * tf.stop_gradient(p_log_lik)) - kl),
      [v.ref() for v in var_list])
  grads_and_vars = list(zip(grads, var_list))
  return loss, grads_and_vars
Exemplo n.º 21
0
            dict_swap[x] = qx_copy.value()
        else:
            dict_swap[x] = qx

for x in six.iterkeys(data):
    if isinstance(x, RandomVariable):
        x_copy = copy(x, dict_swap)
        x_log_lik = tf.reduce_sum(x_copy.log_prob(dict_swap[x]))

        p_log_lik[0] += x_log_lik

p_log_lik = tf.pack(p_log_lik)

kl = tf.reduce_sum([
    data.get(z, 1.0) *
    tf.reduce_sum(kl_multivariate_normal(qz.mu, qz.sigma, z.mu, z.sigma))
    for z, qz in six.iteritems(latent_vars)
])

loss = -(tf.reduce_mean(p_log_lik) - kl)

# benchmark the gradient time
grads = tf.gradients(loss, [v.ref() for v in var_list])[0]

init = tf.initialize_all_variables()
feed_dict = {}
for key, value in six.iteritems(data):
    if isinstance(key, tf.Tensor):
        feed_dict[key] = value
init.run(feed_dict)
Exemplo n.º 22
0
Arquivo: klqp.py Projeto: zizai/edward
def build_reparam_kl_loss_and_gradients(inference, var_list):
  """Build loss function. Its automatic differentiation
  is a stochastic gradient of

  .. math::

    -\\text{ELBO} =  - ( \mathbb{E}_{q(z; \lambda)} [ \log p(x \mid z) ]
          + \\text{KL}(q(z; \lambda) \| p(z)) )

  based on the reparameterization trick (Kingma and Welling, 2014).

  It assumes the KL is analytic.

  For model wrappers, it assumes the prior is :math:`p(z) =
  \mathcal{N}(z; 0, 1)`.

  Computed by sampling from :math:`q(z;\lambda)` and evaluating the
  expectation using Monte Carlo sampling.
  """
  p_log_lik = [0.0] * inference.n_samples
  for s in range(inference.n_samples):
    scope = 'inference_' + str(id(inference)) + '/' + str(s)
    z_sample = {}
    for z, qz in six.iteritems(inference.latent_vars):
      # Copy q(z) to obtain new set of posterior samples.
      qz_copy = copy(qz, scope=scope)
      z_sample[z] = qz_copy.value()

    if inference.model_wrapper is None:
      # Form dictionary in order to replace conditioning on prior or
      # observed variable with conditioning on a specific value.
      dict_swap = z_sample
      for x, qx in six.iteritems(inference.data):
        if isinstance(x, RandomVariable):
          if isinstance(qx, RandomVariable):
            qx_copy = copy(qx, scope=scope)
            dict_swap[x] = qx_copy.value()
          else:
            dict_swap[x] = qx

      for x in six.iterkeys(inference.data):
        if isinstance(x, RandomVariable):
          x_copy = copy(x, dict_swap, scope=scope)
          x_log_lik = tf.reduce_sum(x_copy.log_prob(dict_swap[x]))
          if x in inference.scale:
            x_log_lik *= inference.scale[x]

          p_log_lik[s] += x_log_lik
    else:
      x = inference.data
      p_log_lik[s] = inference.model_wrapper.log_lik(x, z_sample)

  p_log_lik = tf.pack(p_log_lik)

  if inference.model_wrapper is None:
    kl = tf.reduce_sum([inference.data.get(z, 1.0) *
                        tf.reduce_sum(kl_multivariate_normal(
                            qz.mu, qz.sigma, z.mu, z.sigma))
                        for z, qz in six.iteritems(inference.latent_vars)])
  else:
    kl = tf.reduce_sum([tf.reduce_sum(kl_multivariate_normal(qz.mu, qz.sigma))
                        for qz in six.itervalues(inference.latent_vars)])

  loss = -(tf.reduce_mean(p_log_lik) - kl)

  if var_list is None:
    var_list = tf.trainable_variables()

  grads = tf.gradients(loss, [v.ref() for v in var_list])
  grads_and_vars = list(zip(grads, var_list))
  return loss, grads_and_vars
Exemplo n.º 23
0
 def test_contraint_raises(self):
     with self.test_session():
         loc_one = tf.constant(10.0)
         scale_one = tf.constant(-1.0)
         loc_two = tf.constant(10.0)
         scale_two = tf.constant(-1.0)
         with self.assertRaisesOpError('Condition'):
             kl_multivariate_normal(loc_one, scale_one).eval()
             kl_multivariate_normal(loc_one,
                                    scale_one,
                                    loc_two=loc_two,
                                    scale_two=scale_two).eval()
         loc_one = np.inf * tf.constant(10.0)
         scale_one = tf.constant(1.0)
         loc_two = tf.constant(10.0)
         scale_two = tf.constant(1.0)
         with self.assertRaisesOpError('Inf'):
             kl_multivariate_normal(loc_one, scale_one).eval()
             kl_multivariate_normal(loc_one,
                                    scale_one,
                                    loc_two=loc_two,
                                    scale_two=scale_two).eval()
         loc_one = tf.constant(10.0)
         scale_one = tf.constant(1.0)
         loc_two = np.nan * tf.constant(10.0)
         scale_two = tf.constant(1.0)
         with self.assertRaisesOpError('NaN'):
             kl_multivariate_normal(loc_one, scale_one).eval()
             kl_multivariate_normal(loc_one,
                                    scale_one,
                                    loc_two=loc_two,
                                    scale_two=scale_two).eval()