def testStudentsTNllAgainstTfp(self): """Check that our Student's T NLL matches TensorFlow Probability.""" for _ in range(10): x = np.random.normal() df = np.exp(4. * np.random.normal()) scale = np.exp(4. * np.random.normal()) nll = util.students_t_nll(x, df, scale) nll_true = -tfp.distributions.StudentT( df=df, loc=tf.zeros_like(scale), scale=scale).log_prob(x) self.assertAllClose(nll, nll_true)
def lossfun_students(x, scale_lo=1e-5, scale_init=1.): """A variant of lossfun() that uses the NLL of a Student's t-distribution. Args: x: The residual for which the loss is being computed. Must be a rank-2 tensor, where the innermost dimension is the batch index, and the outermost dimension corresponds to different "channels", where this function will assign each channel its own variable shape (log-df) and scale parameters that are constructed as TF variables and can be optimized over. Must be a TF tensor or numpy array of single or double precision floats. The precision of `x` will determine the precision of the latent variables used to model scale and log-df internally. scale_lo: The lowest possible value for the loss's scale parameters. Must be > 0 and a scalar. This value may have more of an effect than you think, as the loss is unbounded as scale approaches zero (say, at a delta function). scale_init: The initial value used for the loss's scale parameters. This also defines the zero-point of the latent representation of scales, so SGD may cause optimization to gravitate towards producing scales near this value. Returns: A tuple of the form (`loss`, `log_df`, `scale`). `loss`: a TF tensor of the same type and shape as input `x`, containing the loss at each element of `x` as a function of `x`, `log_df`, and `scale`. These "losses" are actually negative log-likelihoods (as produced by distribution.nllfun()) and so they are not actually bounded from below by zero. You'll probably want to minimize their sum or mean. `scale`: a TF tensor of the same type as x, of size (1, x.shape[1]), as we construct a scale variable for each dimension of `x` but not for each batch element. This contains the current estimated scale parameter for each dimension, and will change during optimization. `log_df`: a TF tensor of the same type as x, of size (1, x.shape[1]), as we construct an log-DF variable for each dimension of `x` but not for each batch element. This contains the current estimated log(degrees-of-freedom) parameter for each dimension, and will change during optimization. Raises: ValueError: If any of the arguments are invalid. """ _check_scale(scale_lo, scale_init) float_dtype = x.dtype assert_ops = [tf.Assert(tf.equal(tf.rank(x), 2), [tf.rank(x)])] with tf.control_dependencies(assert_ops): log_df = tf.compat.v1.get_variable(name='LogDf', initializer=tf.zeros( (1, x.shape[1]), float_dtype)) scale = _construct_scale(x, scale_lo, scale_init, float_dtype) loss = util.students_t_nll(x, tf.math.exp(log_df), scale) return loss, log_df, scale
def __call__(self, x): """Evaluates the loss function on a matrix. Args: x: The residual for which the loss is being computed. Must be a rank-2 tensor, where the innermost dimension is the batch index, and the outermost dimension corresponds to different "channels" and whose size must be equal to `num_channels'. Returns: A TF tensor of the same type and shape as input `x`, containing the loss at each element of `x` as a function of `x`, `df`, and `scale`. These "losses" are actually negative log-likelihoods. """ x = tf.convert_to_tensor(x) tf.debugging.assert_rank(x, 2) tf.debugging.assert_same_float_dtype([x], self._float_dtype) with tf.control_dependencies([ tf.Assert(tf.equal(x.shape[1], self._num_channels), [x.shape[1], self._num_channels]) ]): return util.students_t_nll(x, self.df(), self.scale())