def testStudentsTNllAgainstTfp(self):
     """Check that our Student's T NLL matches TensorFlow Probability."""
     for _ in range(10):
         x = np.random.normal()
         df = np.exp(4. * np.random.normal())
         scale = np.exp(4. * np.random.normal())
         nll = util.students_t_nll(x, df, scale)
         nll_true = -tfp.distributions.StudentT(
             df=df, loc=tf.zeros_like(scale), scale=scale).log_prob(x)
         self.assertAllClose(nll, nll_true)
示例#2
0
def lossfun_students(x, scale_lo=1e-5, scale_init=1.):
    """A variant of lossfun() that uses the NLL of a Student's t-distribution.

  Args:
    x: The residual for which the loss is being computed. Must be a rank-2
      tensor, where the innermost dimension is the batch index, and the
      outermost dimension corresponds to different "channels", where this
      function will assign each channel its own variable shape (log-df) and
      scale parameters that are constructed as TF variables and can be optimized
      over. Must be a TF tensor or numpy array of single or double precision
      floats. The precision of `x` will determine the precision of the latent
      variables used to model scale and log-df internally.
    scale_lo: The lowest possible value for the loss's scale parameters. Must be
      > 0 and a scalar. This value may have more of an effect than you think, as
      the loss is unbounded as scale approaches zero (say, at a delta function).
    scale_init: The initial value used for the loss's scale parameters. This
      also defines the zero-point of the latent representation of scales, so SGD
      may cause optimization to gravitate towards producing scales near this
      value.

  Returns:
    A tuple of the form (`loss`, `log_df`, `scale`).

    `loss`: a TF tensor of the same type and shape as input `x`, containing
    the loss at each element of `x` as a function of `x`, `log_df`, and
    `scale`. These "losses" are actually negative log-likelihoods (as produced
    by distribution.nllfun()) and so they are not actually bounded from below
    by zero. You'll probably want to minimize their sum or mean.

    `scale`: a TF tensor of the same type as x, of size (1, x.shape[1]), as we
    construct a scale variable for each dimension of `x` but not for each
    batch element. This contains the current estimated scale parameter for
    each dimension, and will change during optimization.

    `log_df`: a TF tensor of the same type as x, of size (1, x.shape[1]), as we
    construct an log-DF variable for each dimension of `x` but not for each
    batch element. This contains the current estimated log(degrees-of-freedom)
    parameter for each dimension, and will change during optimization.

  Raises:
    ValueError: If any of the arguments are invalid.
  """
    _check_scale(scale_lo, scale_init)

    float_dtype = x.dtype
    assert_ops = [tf.Assert(tf.equal(tf.rank(x), 2), [tf.rank(x)])]
    with tf.control_dependencies(assert_ops):
        log_df = tf.compat.v1.get_variable(name='LogDf',
                                           initializer=tf.zeros(
                                               (1, x.shape[1]), float_dtype))
        scale = _construct_scale(x, scale_lo, scale_init, float_dtype)
        loss = util.students_t_nll(x, tf.math.exp(log_df), scale)
        return loss, log_df, scale
示例#3
0
    def __call__(self, x):
        """Evaluates the loss function on a matrix.

    Args:
      x: The residual for which the loss is being computed. Must be a rank-2
        tensor, where the innermost dimension is the batch index, and the
        outermost dimension corresponds to different "channels" and whose size
        must be equal to `num_channels'.

    Returns:
      A TF tensor of the same type and shape as input `x`, containing
      the loss at each element of `x` as a function of `x`, `df`, and
      `scale`. These "losses" are actually negative log-likelihoods.
    """
        x = tf.convert_to_tensor(x)
        tf.debugging.assert_rank(x, 2)
        tf.debugging.assert_same_float_dtype([x], self._float_dtype)
        with tf.control_dependencies([
                tf.Assert(tf.equal(x.shape[1], self._num_channels),
                          [x.shape[1], self._num_channels])
        ]):
            return util.students_t_nll(x, self.df(), self.scale())