示例#1
0
    def test_monto_carlo_objective(self):
        with self.test_session() as sess:
            log_p, log_q = prepare_test_payload()

            obj = monte_carlo_objective(log_p, log_q, axis=0)
            obj_shape = obj.get_shape().as_list()
            assert_allclose(
                *sess.run([obj, log_mean_exp(log_p - log_q, axis=0)]))

            obj_k = monte_carlo_objective(log_p, log_q, axis=0, keepdims=True)
            self.assertListEqual([1] + obj_shape, obj_k.get_shape().as_list())
            assert_allclose(*sess.run(
                [obj_k,
                 log_mean_exp(log_p - log_q, axis=0, keepdims=True)]))
示例#2
0
    def test_monto_carlo_objective(self):
        with self.test_session() as sess:
            log_p, log_q = prepare_test_payload()

            ll = importance_sampling_log_likelihood(log_p, log_q, axis=0)
            ll_shape = ll.get_shape().as_list()
            assert_allclose(
                *sess.run([ll, log_mean_exp(log_p - log_q, axis=0)]))

            ll_k = importance_sampling_log_likelihood(log_p,
                                                      log_q,
                                                      axis=0,
                                                      keepdims=True)
            self.assertListEqual([1] + ll_shape, ll_k.get_shape().as_list())
            assert_allclose(*sess.run([
                ll_k, log_mean_exp(log_p - log_q, axis=0, keepdims=True)
            ]))
示例#3
0
def monte_carlo_objective(log_joint,
                          latent_log_prob,
                          axis=None,
                          keepdims=False,
                          name=None):
    """
    Derive the Monte-Carlo objective.

    .. math::

        \\mathcal{L}_{K}(\\mathbf{x};\\theta,\\phi) =
            \\mathbb{E}_{\\mathbf{z}^{(1:K)} \\sim q_{\\phi}(\\mathbf{z}|\\mathbf{x})}\\Bigg[
                \\log \\frac{1}{K} \\sum_{k=1}^K {
                    \\frac{p_{\\theta}(\\mathbf{x},\\mathbf{z}^{(k)})}
                         {q_{\\phi}(\\mathbf{z}^{(k)}|\\mathbf{x})}
                }
            \\Bigg]

    Args:
        log_joint: Values of :math:`\\log p(\\mathbf{z},\\mathbf{x})`,
            computed with :math:`\\mathbf{z} \\sim q(\\mathbf{z}|\\mathbf{x})`.
        latent_log_prob: :math:`q(\\mathbf{z}|\\mathbf{x})`.
        axis: The sampling dimensions to be averaged out.
        keepdims (bool): When `axis` is specified, whether or not to keep
            the averaged dimensions?  (default :obj:`False`)
        name (str): TensorFlow name scope of the graph nodes.
            (default "monte_carlo_objective")

    Returns:
        tf.Tensor: The Monte Carlo objective.  Not applicable for training.
    """
    _require_multi_samples(axis, 'monte carlo objective')
    log_joint = tf.convert_to_tensor(log_joint)
    latent_log_prob = tf.convert_to_tensor(latent_log_prob)
    with tf.name_scope(name,
                       default_name='monte_carlo_objective',
                       values=[log_joint, latent_log_prob]):
        likelihood = log_joint - latent_log_prob
        objective = log_mean_exp(likelihood, axis=axis, keepdims=keepdims)
        return objective
示例#4
0
def iwae_estimator(log_values, axis, keepdims=False, name=None):
    """
    Derive the gradient estimator for
    :math:`\\mathbb{E}_{q(\\mathbf{z}^{(1:K)}|\\mathbf{x})}\\Big[\\log \\frac{1}{K} \\sum_{k=1}^K f\\big(\\mathbf{x},\\mathbf{z}^{(k)}\\big)\\Big]`,
    by IWAE (Burda, Y., Grosse, R. and Salakhutdinov, R., 2015) algorithm.

    .. math::

        \\begin{aligned}
            &\\nabla\\,\\mathbb{E}_{q(\\mathbf{z}^{(1:K)}|\\mathbf{x})}\\Big[\\log \\frac{1}{K} \\sum_{k=1}^K f\\big(\\mathbf{x},\\mathbf{z}^{(k)}\\big)\\Big]
                = \\nabla \\, \\mathbb{E}_{q(\\mathbf{\\epsilon}^{(1:K)})}\\Bigg[\\log \\frac{1}{K} \\sum_{k=1}^K w_k\\Bigg]
                = \\mathbb{E}_{q(\\mathbf{\\epsilon}^{(1:K)})}\\Bigg[\\nabla \\log \\frac{1}{K} \\sum_{k=1}^K w_k\\Bigg] = \\\\
                & \\quad \\mathbb{E}_{q(\\mathbf{\\epsilon}^{(1:K)})}\\Bigg[\\frac{\\nabla \\frac{1}{K} \\sum_{k=1}^K w_k}{\\frac{1}{K} \\sum_{i=1}^K w_i}\\Bigg]
                = \\mathbb{E}_{q(\\mathbf{\\epsilon}^{(1:K)})}\\Bigg[\\frac{\\sum_{k=1}^K w_k \\nabla \\log w_k}{\\sum_{i=1}^K w_i}\\Bigg]
                = \\mathbb{E}_{q(\\mathbf{\\epsilon}^{(1:K)})}\\Bigg[\\sum_{k=1}^K \\widetilde{w}_k \\nabla \\log w_k\\Bigg]
        \\end{aligned}

    Args:
        log_values: Log values of the target function given `z` and `x`, i.e.,
            :math:`\\log f(\\mathbf{z},\\mathbf{x})`.
        axis: The sampling dimensions to be averaged out.
        keepdims (bool): When `axis` is specified, whether or not to keep
            the averaged dimensions?  (default :obj:`False`)
        name (str): TensorFlow name scope of the graph nodes.
            (default "iwae_estimator")

    Returns:
        tf.Tensor: The surrogate for optimizing the target function
            with IWAE gradient estimator.
    """
    _require_multi_samples(axis, 'iwae estimator')
    log_values = tf.convert_to_tensor(log_values)
    with tf.name_scope(name, default_name='iwae_estimator',
                       values=[log_values]):
        estimator = log_mean_exp(log_values, axis=axis, keepdims=keepdims)
        return estimator
示例#5
0
def importance_sampling_log_likelihood(log_joint,
                                       latent_log_prob,
                                       axis,
                                       keepdims=False,
                                       name=None):
    """
    Compute :math:`\\log p(\\mathbf{x})` by importance sampling.

    .. math::

        \\log p(\\mathbf{x}) =
            \\log \\mathbb{E}_{q(\\mathbf{z}|\\mathbf{x})} \\Big[\\exp\\big(\\log p(\\mathbf{x},\\mathbf{z}) - \\log q(\\mathbf{z}|\\mathbf{x})\\big) \\Big]

    Args:
        log_joint: Values of :math:`\\log p(\\mathbf{z},\\mathbf{x})`,
            computed with :math:`\\mathbf{z} \\sim q(\\mathbf{z}|\\mathbf{x})`.
        latent_log_prob: :math:`q(\\mathbf{z}|\\mathbf{x})`.
        axis: The sampling dimensions to be averaged out.
        keepdims (bool): When `axis` is specified, whether or not to keep
            the averaged dimensions?  (default :obj:`False`)
        name (str): TensorFlow name scope of the graph nodes.
            (default "importance_sampling_log_likelihood")

    Returns:
        The computed :math:`\\log p(x)`.
    """
    _require_multi_samples(axis, 'importance sampling log-likelihood')
    log_joint = tf.convert_to_tensor(log_joint)
    latent_log_prob = tf.convert_to_tensor(latent_log_prob)
    with tf.name_scope(name,
                       default_name='importance_sampling_log_likelihood',
                       values=[log_joint, latent_log_prob]):
        log_p = log_mean_exp(log_joint - latent_log_prob,
                             axis=axis,
                             keepdims=keepdims)
        return log_p