def _inv_hessian_control_inputs(inv_hessian): """Computes control inputs to validate a provided inverse Hessian. These ensure that the provided inverse Hessian is positive definite and symmetric. Args: inv_hessian: The starting estimate for the inverse of the Hessian at the initial point. Returns: A list of tf.Assert ops suitable for use with tf.control_dependencies. """ # The easiest way to validate if the inverse Hessian is positive definite is # to compute its Cholesky decomposition. is_positive_definite = tf.reduce_all(tf.math.is_finite( tf.linalg.cholesky(inv_hessian)), axis=[-1, -2]) # Then check that the supplied inverse Hessian is symmetric. is_symmetric = tf.equal( bfgs_utils.norm(inv_hessian - _batch_transpose(inv_hessian), dims=2), 0) # Simply adding a control dependencies on these results is not enough to # trigger them, we need to add asserts on the results. return [ tf.Assert( is_positive_definite, ['Initial inverse Hessian is not positive definite.', inv_hessian ]), tf.Assert(is_symmetric, ['Initial inverse Hessian is not symmetric', inv_hessian]) ]
def call(self, inputs, count_weights=None): inputs = utils.ensure_tensor(inputs) if count_weights is not None: if self.output_mode != COUNT: raise ValueError( "`count_weights` is not used when `output_mode` is not `'count'`. " "Received `count_weights={}`.".format(count_weights)) count_weights = utils.ensure_tensor(count_weights, self.compute_dtype) depth = self.num_tokens if isinstance(inputs, tf.SparseTensor): max_value = tf.reduce_max(inputs.values) min_value = tf.reduce_min(inputs.values) else: max_value = tf.reduce_max(inputs) min_value = tf.reduce_min(inputs) condition = tf.logical_and( tf.greater(tf.cast(depth, max_value.dtype), max_value), tf.greater_equal(min_value, tf.cast(0, min_value.dtype))) assertion = tf.Assert(condition, [ "Input values must be in the range 0 <= values < num_tokens" " with num_tokens={}".format(depth) ]) with tf.control_dependencies([assertion]): return utils.encode_categorical_inputs( inputs, output_mode=self.output_mode, depth=depth, dtype=self.compute_dtype, sparse=self.sparse, count_weights=count_weights)
def to_normalized_coordinates(boxlist, height, width, check_range=True, scope=None): """Converts absolute box coordinates to normalized coordinates in [0, 1]. Usually one uses the dynamic shape of the image or conv-layer tensor: boxlist = box_list_ops.to_normalized_coordinates(boxlist, tf.shape(images)[1], tf.shape(images)[2]), This function raises an assertion failed error at graph execution time when the maximum coordinate is smaller than 1.01 (which means that coordinates are already normalized). The value 1.01 is to deal with small rounding errors. Args: boxlist: BoxList with coordinates in terms of pixel-locations. height: Maximum value for height of absolute box coordinates. width: Maximum value for width of absolute box coordinates. check_range: If True, checks if the coordinates are normalized or not. scope: name scope. Returns: boxlist with normalized coordinates in [0, 1]. """ with tf.name_scope(scope, 'ToNormalizedCoordinates'): height = tf.cast(height, tf.float32) width = tf.cast(width, tf.float32) if check_range: max_val = tf.reduce_max(boxlist.get()) max_assert = tf.Assert(tf.greater(max_val, 1.01), ['max value is lower than 1.01: ', max_val]) with tf.control_dependencies([max_assert]): width = tf.identity(width) return scale(boxlist, 1 / height, 1 / width)
def partition_spline_curve(alpha): """Applies a curve to alpha >= 0 to compress its range before interpolation. This is a weird hand-crafted function designed to take in alpha values and curve them to occupy a short finite range that works well when using spline interpolation to model the partition function Z(alpha). Because Z(alpha) is only varied in [0, 4] and is especially interesting around alpha=2, this curve is roughly linear in [0, 4] with a slope of ~1 at alpha=0 and alpha=4 but a slope of ~10 at alpha=2. When alpha > 4 the curve becomes logarithmic. Some (input, output) pairs for this function are: [(0, 0), (1, ~1.2), (2, 4), (3, ~6.8), (4, 8), (8, ~8.8), (400000, ~12)] This function is continuously differentiable. Args: alpha: A numpy array or TF tensor (float32 or float64) with values >= 0. Returns: An array/tensor of curved values >= 0 with the same type as `alpha`, to be used as input x-coordinates for spline interpolation. """ c = lambda z: tf.cast(z, alpha.dtype) assert_ops = [tf.Assert(tf.reduce_all(alpha >= 0.), [alpha])] with tf.control_dependencies(assert_ops): x = tf.where(alpha < 4, (c(2.25) * alpha - c(4.5)) / (tf.abs(alpha - c(2)) + c(0.25)) + alpha + c(2), c(5) / c(18) * util.log_safe(c(4) * alpha - c(15)) + c(8)) return x
def call(self, inputs, count_weights=None): if isinstance(inputs, (list, np.ndarray)): inputs = tf.convert_to_tensor(inputs) if inputs.shape.rank == 1: inputs = tf.compat.v1.expand_dims(inputs, 1) if count_weights is not None and self.output_mode != COUNT: raise ValueError("count_weights is not used in " "`output_mode='multi_hot'`. Please pass a single input.") out_depth = self.num_tokens multi_hot_output = (self.output_mode == MULTI_HOT) if isinstance(inputs, tf.SparseTensor): max_value = tf.reduce_max(inputs.values) min_value = tf.reduce_min(inputs.values) else: max_value = tf.reduce_max(inputs) min_value = tf.reduce_min(inputs) condition = tf.logical_and( tf.greater( tf.cast(out_depth, max_value.dtype), max_value), tf.greater_equal( min_value, tf.cast(0, min_value.dtype))) tf.Assert(condition, [ "Input values must be in the range 0 <= values < num_tokens" " with num_tokens={}".format(out_depth) ]) if self.sparse: return sparse_bincount(inputs, out_depth, multi_hot_output, count_weights) else: return dense_bincount(inputs, out_depth, multi_hot_output, count_weights)
def _upsample(x, up_sz, f, direction, shift): """Upsample by a factor of 2 using transposed reflecting boundary conditions. This function undecimates `x` along the axis specified by `direction` and then convolves it with filter `f`, thereby upsampling it to have a size of `up_sz`. This function is a bit awkward, as it's written to be the transpose of _downsample(), which uses reflecting boundary conditions. As such, this function approximates *the transpose of reflecting boundary conditions*, which is not the same as reflecting boundary conditions. TODO(barron): Write out the true transpose of reflecting boundary conditions. Args: x: The input tensor (numpy or TF), of size (num_channels, width, height). up_sz: A tuple of ints of size (upsampled_width, upsampled_height). Care should be taken by the caller to match the upsampled_width/height with the input width/height along the axis that isn't being upsampled. f: The input filter, which must be an odd-length 1D numpy array. direction: The spatial direction in [0, 1] along which `x` will be convolved with `f` after being undecimated. Because `x` has a batch/channels dimension, `direction` == 0 corresponds to downsampling along axis 1 in `x`, and `direction` == 1 corresponds to downsampling along axis 2 in `x`. shift: A shift amount in [0, 1] by which `x` will be shifted along the axis specified by `direction` after undecimating. Returns: `x` undecimated and convolved with `f` along the spatial dimension `direction` with transposed reflection boundary conditions with an offset of `shift`, to match size `up_sz`. """ _check_resample_inputs(x, f, direction, shift) assert_ops = [tf.Assert(tf.equal(tf.rank(f), 1), [tf.rank(f)])] with tf.control_dependencies(assert_ops): # Undecimate `x` by a factor of 2 along `direction`, by stacking it with # and tensor of all zeros along the right axis and then reshaping it such # that the zeros are interleaved. if direction == 0: sz_ex = tf.shape(x) * [1, 2, 1] elif direction == 1: sz_ex = tf.shape(x) * [1, 1, 2] if shift == 0: x_and_zeros = [x, tf.zeros_like(x)] elif shift == 1: x_and_zeros = [tf.zeros_like(x), x] x_undecimated = tf.reshape(tf.stack(x_and_zeros, direction + 2), sz_ex) # Ensure that `x_undecimated` has a size of `up_sz`, by slicing and padding # as needed. x_undecimated = x_undecimated[:, 0:up_sz[0], 0:up_sz[1]] x_undecimated = tf.pad(x_undecimated, [[0, 0], [0, up_sz[0] - tf.shape(x_undecimated)[1]], [0, up_sz[1] - tf.shape(x_undecimated)[2]]]) # Pad `x_undecimated` with reflection boundary conditions. x_padded = pad_reflecting(x_undecimated, len(f) // 2, (len(f) - 1) // 2, direction + 1) # Convolved x_undecimated with a flipped version of f. f_ex = tf.expand_dims(f[::-1], 1 - direction) y = tf.nn.conv2d(x_padded[:, :, :, tf.newaxis], tf.cast(f_ex, x.dtype)[:, :, tf.newaxis, tf.newaxis], [1, 1, 1, 1], 'VALID')[:, :, :, 0] return y
def log_base_partition_function(self, alpha): r"""Approximate the distribution's log-partition function with a 1D spline. Because the partition function (Z(\alpha) in the paper) of the distribution is difficult to model analytically, we approximate it with a (transformed) cubic hermite spline: Each alpha is pushed through a nonlinearity before being used to interpolate into a spline, which allows us to use a relatively small spline to accurately model the log partition function over the range of all non-negative input values. Args: alpha: A tensor or scalar of single or double precision floats containing the set of alphas for which we would like an approximate log partition function. Must be non-negative, as the partition function is undefined when alpha < 0. Returns: An approximation of log(Z(alpha)) accurate to within 1e-6 """ float_dtype = alpha.dtype # The partition function is undefined when `alpha`< 0. assert_ops = [tf.Assert(tf.reduce_all(alpha >= 0.), [alpha])] with tf.control_dependencies(assert_ops): # Transform `alpha` to the form expected by the spline. x = partition_spline_curve(alpha) # Interpolate into the spline. return cubic_spline.interpolate1d( x * tf.cast(self._spline_x_scale, float_dtype), tf.cast(self._spline_values, float_dtype), tf.cast(self._spline_tangents, float_dtype))
def pad_to_fixed_size(input_tensor, size, constant_values=0): """Pads data to a fixed length at the first dimension. Args: input_tensor: `Tensor` with any dimension. size: `int` number for the first dimension of output Tensor. constant_values: `int` value assigned to the paddings. Returns: `Tensor` with the first dimension padded to `size`. """ input_shape = input_tensor.get_shape().as_list() padding_shape = [] # Computes the padding length on the first dimension. padding_length = size - tf.shape(input=input_tensor)[0] assert_length = tf.Assert(tf.greater_equal(padding_length, 0), [padding_length]) with tf.control_dependencies([assert_length]): padding_shape.append(padding_length) # Copies shapes of the rest of input shape dimensions. for i in range(1, len(input_shape)): padding_shape.append(tf.shape(input=input_tensor)[i]) # Pads input tensor to the fixed first dimension. paddings = tf.cast(constant_values * tf.ones(padding_shape), input_tensor.dtype) padded_tensor = tf.concat([input_tensor, paddings], axis=0) output_shape = input_shape output_shape[0] = size padded_tensor.set_shape(output_shape) return padded_tensor
def evaluate_interpolation(coefficients, t0, t1, t): """Evaluates the value of polynomial interpolation at the given time point. Args: coefficients: List of `Tensor`s that hold polynomial coefficients. Must have length greater or equal to 2. t0: Scalar floating `Tensor` giving the start of the interval. t1: Scalar floating `Tensor` giving the end of the interval. t: Scalar floating `Tensor` giving the desired interpolation point. Returns: interpolated_value: Polynomial interpolation at time `t`. Raises: ValueError: If `coefficients` has less than 2 elements. """ if len(coefficients) < 2: raise ValueError('`coefficients` must have at least 2 elements.') with tf.name_scope('interp_evaluate'): dtype = dtype_util.common_dtype(coefficients) t0 = tf.convert_to_tensor(t0) t1 = tf.convert_to_tensor(t1) t = tf.convert_to_tensor(t) assert_op = tf.Assert( (t0 <= t) & (t <= t1), ['invalid interpolation, fails `t0 <= t <= t1`:', t0, t, t1]) with tf.control_dependencies([assert_op]): x = tf.cast((t - t0) / (t1 - t0), dtype) xs = [tf.constant(1, dtype), x] for _ in range(2, len(coefficients)): xs.append(xs[-1] * x) return weighted_sum(list(reversed(xs)), coefficients)
def __call__(self, x): """Evaluates the loss function on a matrix. Args: x: The residual for which the loss is being computed. Must be a rank-2 tensor, where the innermost dimension is the batch index, and the outermost dimension corresponds to different "channels" and whose size must be equal to `num_channels'. Returns: A TF tensor of the same type and shape as input `x`, containing the loss at each element of `x` as a function of `x`, `alpha`, and `scale`. These "losses" are actually negative log-likelihoods (as produced by distribution.nllfun()) and so they are not actually bounded from below by zero --- it is okay if they go negative! You'll probably want to minimize their sum or mean. """ x = tf.convert_to_tensor(x) tf.debugging.assert_rank(x, 2) tf.debugging.assert_same_float_dtype([x], self._float_dtype) with tf.control_dependencies([ tf.Assert( tf.equal(x.shape[1], self._num_channels), [x.shape[1], self._num_channels]) ]): return self._distribution.nllfun(x, self.alpha(), self.scale())
def assert_ops(): """Creates a list of assert operations.""" if not self._validate_args: return [] assert_ops = [] if ((not initial_state_missing) and (previous_solver_internal_state is not None)): assert_initial_state_matches_previous_solver_internal_state = ( tf.assert_near( tf.norm( original_initial_state - previous_solver_internal_state. backward_differences[0], np.inf), 0., message= '`previous_solver_internal_state` does not match ' '`initial_state`.')) assert_ops.append( assert_initial_state_matches_previous_solver_internal_state ) if solution_times_chosen_by_solver: assert_ops.append( util.assert_positive(final_time - initial_time, 'final_time - initial_time')) else: assert_ops += [ util.assert_increasing(solution_times, 'solution_times'), util.assert_nonnegative( solution_times[0] - initial_time, 'solution_times[0] - initial_time'), ] if max_num_steps is not None: assert_ops.append( util.assert_positive(max_num_steps, 'max_num_steps')) if max_num_newton_iters is not None: assert_ops.append( util.assert_positive(max_num_newton_iters, 'max_num_newton_iters')) assert_ops += [ util.assert_positive(rtol, 'rtol'), util.assert_positive(atol, 'atol'), util.assert_positive(first_step_size, 'first_step_size'), util.assert_positive(safety_factor, 'safety_factor'), util.assert_positive(min_step_size_factor, 'min_step_size_factor'), util.assert_positive(max_step_size_factor, 'max_step_size_factor'), tf.Assert((max_order >= 1) & (max_order <= bdf_util.MAX_ORDER), [ '`max_order` must be between 1 and {}.'.format( bdf_util.MAX_ORDER) ]), util.assert_positive(newton_tol_factor, 'newton_tol_factor'), util.assert_positive(newton_step_size_factor, 'newton_step_size_factor'), ] return assert_ops
def nllfun(self, x, alpha, scale): r"""Implements the negative log-likelihood (NLL). Specifically, we implement -log(p(x | 0, \alpha, c) of Equation 16 in the paper as nllfun(x, alpha, shape). Args: x: The residual for which the NLL is being computed. x can have any shape, and alpha and scale will be broadcasted to match x's shape if necessary. Must be a tensorflow tensor or numpy array of floats. alpha: The shape parameter of the NLL (\alpha in the paper), where more negative values cause outliers to "cost" more and inliers to "cost" less. Alpha can be any non-negative value, but the gradient of the NLL with respect to alpha has singularities at 0 and 2 so you may want to limit usage to (0, 2) during gradient descent. Must be a tensorflow tensor or numpy array of floats. Varying alpha in that range allows for smooth interpolation between a Cauchy distribution (alpha = 0) and a Normal distribution (alpha = 2) similar to a Student's T distribution. scale: The scale parameter of the loss. When |x| < scale, the NLL is like that of a (possibly unnormalized) normal distribution, and when |x| > scale the NLL takes on a different shape according to alpha. Must be a tensorflow tensor or numpy array of floats. Returns: The NLLs for each element of x, in the same shape as x. This is returned as a TensorFlow graph node of floats with the same precision as x. """ # `scale` and `alpha` must have the same type as `x`. tf.debugging.assert_type(scale, x.dtype) tf.debugging.assert_type(alpha, x.dtype) assert_ops = [ # `scale` must be > 0. tf.Assert(tf.reduce_all(scale > 0.), [scale]), # `alpha` must be >= 0. tf.Assert(tf.reduce_all(alpha >= 0.), [alpha]), ] with tf.control_dependencies(assert_ops): loss = general.lossfun(x, alpha, scale, approximate=False) log_partition = (tf.math.log(scale) + self.log_base_partition_function(alpha)) nll = loss + log_partition return nll
def call(self, inputs): if not self.max_tokens and self._vocab_size is None: raise ValueError( "You must set the layer's vocabulary before calling it. " "Either pass a `vocabulary` argument to the layer, or " "call `layer.adapt(dataset)` with some sample data.") self._called = True if self._key_dtype == tf.int64 and inputs.dtype == tf.int32: inputs = tf.cast(inputs, tf.int64) lookup_result = self._table_handler.lookup(inputs) lookup_checks = [] if self.num_oov_indices == 0 and not self.invert: if tf_utils.is_sparse(inputs): lookup_values = lookup_result.values input_values = inputs.values elif tf_utils.is_ragged(inputs): lookup_values = lookup_result.flat_values input_values = inputs.flat_values else: lookup_values = lookup_result input_values = inputs oov_indices = tf.where(tf.equal(lookup_values, -1)) oov_inputs = tf.compat.v1.gather_nd(input_values, oov_indices) msg = tf.strings.format( "When `num_oov_indices=0` all inputs should be in vocabulary, " "found OOV values {}, consider setting `num_oov_indices=1`.", (oov_inputs, )) assertion = tf.Assert(tf.equal(tf.compat.v1.size(oov_indices), 0), [msg]) lookup_checks.append(assertion) with tf.control_dependencies(lookup_checks): if self.output_mode == INT: return tf.identity(lookup_result) multi_hot_output = (self.output_mode == MULTI_HOT) if self._vocab_size and not self.pad_to_max_tokens: out_depth = self._vocab_size else: out_depth = self.max_tokens if self.sparse: bincounts = category_encoding.sparse_bincount( lookup_result, out_depth, multi_hot_output) else: bincounts = category_encoding.dense_bincount( lookup_result, out_depth, multi_hot_output) if self.output_mode == TF_IDF: return tf.multiply(bincounts, self.tf_idf_weights) return bincounts
def call(self, inputs, count_weights=None): if isinstance(inputs, (list, np.ndarray)): inputs = tf.convert_to_tensor(inputs) def expand_dims(inputs, axis): if tf_utils.is_sparse(inputs): return tf.sparse.expand_dims(inputs, axis) else: return tf.compat.v1.expand_dims(inputs, axis) original_shape = inputs.shape # In all cases, we should uprank scalar input to a single sample. if inputs.shape.rank == 0: inputs = expand_dims(inputs, -1) # One hot will unprank only if the final output dimension is not already 1. if self.output_mode == ONE_HOT: if inputs.shape[-1] != 1: inputs = expand_dims(inputs, -1) # TODO(b/190445202): remove output rank restriction. if inputs.shape.rank > 2: raise ValueError( "Received input shape {}, which would result in output rank {}. " "Currently only outputs up to rank 2 are supported.".format( original_shape, inputs.shape.rank)) if count_weights is not None and self.output_mode != COUNT: raise ValueError( "`count_weights` is not used when `output_mode` is not `'count'`. " "Received `count_weights={}`.".format(count_weights)) out_depth = self.num_tokens binary_output = self.output_mode in (MULTI_HOT, ONE_HOT) if isinstance(inputs, tf.SparseTensor): max_value = tf.reduce_max(inputs.values) min_value = tf.reduce_min(inputs.values) else: max_value = tf.reduce_max(inputs) min_value = tf.reduce_min(inputs) condition = tf.logical_and( tf.greater(tf.cast(out_depth, max_value.dtype), max_value), tf.greater_equal(min_value, tf.cast(0, min_value.dtype))) assertion = tf.Assert(condition, [ "Input values must be in the range 0 <= values < num_tokens" " with num_tokens={}".format(out_depth) ]) with tf.control_dependencies([assertion]): if self.sparse: return sparse_bincount(inputs, out_depth, binary_output, count_weights) else: return dense_bincount(inputs, out_depth, binary_output, count_weights)
def inv_partition_spline_curve(x): """The inverse of partition_spline_curve().""" c = lambda z: tf.cast(z, x.dtype) assert_ops = [tf.Assert(tf.reduce_all(x >= 0.), [x])] with tf.control_dependencies(assert_ops): alpha = tf.where( x < 8, c(0.5) * x + tf.where( x <= 4, c(1.25) - tf.sqrt(c(1.5625) - x + c(.25) * tf.square(x)), c(-1.25) + tf.sqrt(c(9.5625) - c(3) * x + c(.25) * tf.square(x))), c(3.75) + c(0.25) * util.exp_safe(x * c(3.6) - c(28.8))) return alpha
def construct(im, num_levels, wavelet_type): """Constructs a wavelet decomposition of an image. Args: im: A numpy or TF tensor of single or double precision floats of size (batch_size, width, height) num_levels: The number of levels (or scales) of the wavelet decomposition to apply. A value of 0 returns a "wavelet decomposition" that is just the image. wavelet_type: The kind of wavelet to use, see generate_filters(). Returns: A wavelet decomposition of `im` that has `num_levels` levels (not including the coarsest residual level) and is of type `wavelet_type`. This decomposition is represented as a tuple of 3-tuples, with the final element being a tensor: ((band00, band01, band02), (band10, band11, band12), ..., resid) Where band** and resid are TF tensors. Each element of these nested tuples is of shape [batch_size, width * 2^-(level+1), height * 2^-(level+1)], though the spatial dimensions may be off by 1 if width and height are not factors of 2. The residual image is of the same (rough) size as the last set of bands. The floating point precision of these tensors matches that of `im`. """ if len(im.shape) != 3: raise ValueError( 'Expected `im` to have a rank of 3, but is of size {}'.format( im.shape)) if num_levels == 0: return (tf.convert_to_tensor(value=im), ) max_num_levels = get_max_num_levels(tf.shape(im)) assert_ops = [ tf.Assert(tf.greater_equal(max_num_levels, num_levels), [tf.shape(im), num_levels, max_num_levels]) ] with tf.control_dependencies(assert_ops): filters = generate_filters(wavelet_type) pyr = [] for _ in range(num_levels): hi = _downsample(im, filters.analysis_hi, 0, 1) lo = _downsample(im, filters.analysis_lo, 0, 0) pyr.append( (_downsample(hi, filters.analysis_hi, 1, 1), _downsample(lo, filters.analysis_hi, 1, 1), _downsample(hi, filters.analysis_lo, 1, 0))) # pyformat: disable im = _downsample(lo, filters.analysis_lo, 1, 0) pyr.append(im) pyr = tuple(pyr) return pyr
def random_cropped_inputs(): """Cropped inputs with stateless random ops.""" input_shape = tf.compat.v1.shape(inputs) crop_size = tf.stack( [input_shape[0], self.height, self.width, input_shape[3]]) check = tf.Assert( tf.reduce_all(input_shape >= crop_size), [self.height, self.width]) with tf.control_dependencies([check]): limit = input_shape - crop_size + 1 offset = stateless_random_ops.stateless_random_uniform( tf.compat.v1.shape(input_shape), dtype=crop_size.dtype, maxval=crop_size.dtype.max, seed=self._rng.make_seeds()[:, 0]) % limit return tf.slice(inputs, offset, crop_size)
def _lookup_dense(self, inputs): """Lookup table values for a dense Tensor, handling masking and OOV.""" # When executing eagerly and tracing keras.Input objects, # do not call lookup. # This is critical for restoring SavedModel, which will first trace # layer.call and then attempt to restore the table. We need the table to # be uninitialized for the restore to work, but calling the table # uninitialized would error. if tf.executing_eagerly() and backend.is_keras_tensor(inputs): lookups = tf.zeros_like(inputs, dtype=self._value_dtype) else: lookups = self.lookup_table.lookup(inputs) if self.mask_token is not None: mask_locations = tf.equal(inputs, self._mask_key) lookups = tf.where(mask_locations, self._mask_value, lookups) if self.invert: return lookups lookup_checks = [] if self.num_oov_indices == 0: # If we have zero oov indices, we need to check for oov inputs. oov_indices = tf.where(tf.equal(lookups, -1)) oov_inputs = tf.gather_nd(inputs, oov_indices) msg = tf.strings.format( "When `num_oov_indices=0` all inputs should be in vocabulary, " "found OOV values {}, consider setting `num_oov_indices=1`.", (oov_inputs, ), ) assertion = tf.Assert(tf.equal(tf.size(oov_indices), 0), [msg]) lookup_checks.append(assertion) elif self.num_oov_indices > 1: # If we have multiple oov indices, we need a further hashing step. if self._key_dtype.is_integer: oov_indices = tf.math.floormod(inputs, self.num_oov_indices) else: oov_indices = tf.strings.to_hash_bucket_fast( inputs, num_buckets=self.num_oov_indices) oov_indices = oov_indices + self._oov_start_index() oov_locations = tf.equal(lookups, self._default_value) lookups = tf.where(oov_locations, oov_indices, lookups) with tf.control_dependencies(lookup_checks): return tf.identity(lookups)
def call(self, inputs): if isinstance(inputs, (list, tuple, np.ndarray)): inputs = tf.convert_to_tensor(inputs) if not self.max_tokens and self._vocab_size is None: raise ValueError( "You must set the layer's vocabulary before calling it. " "Either pass a `vocabulary` argument to the layer, or " "call `layer.adapt(dataset)` with some sample data.") self._called = True if self._key_dtype == tf.int64 and inputs.dtype == tf.int32: inputs = tf.cast(inputs, tf.int64) lookup_result = self._table_handler.lookup(inputs) lookup_checks = [] if self.num_oov_indices == 0 and not self.invert: if tf_utils.is_sparse(inputs): lookup_values = lookup_result.values input_values = inputs.values elif tf_utils.is_ragged(inputs): lookup_values = lookup_result.flat_values input_values = inputs.flat_values else: lookup_values = lookup_result input_values = inputs # tf.where needs rank > 0. if input_values.shape.rank == 0: input_values = self._expand_dims(input_values, -1) lookup_values = self._expand_dims(lookup_values, -1) oov_indices = tf.where(tf.equal(lookup_values, -1)) oov_inputs = tf.compat.v1.gather_nd(input_values, oov_indices) msg = tf.strings.format( "When `num_oov_indices=0` all inputs should be in vocabulary, " "found OOV values {}, consider setting `num_oov_indices=1`.", (oov_inputs, )) assertion = tf.Assert(tf.equal(tf.compat.v1.size(oov_indices), 0), [msg]) lookup_checks.append(assertion) with tf.control_dependencies(lookup_checks): if self.output_mode == INT: return tf.identity(lookup_result) else: return self._encode_output(lookup_result)
def _downsample(x, f, direction, shift): """Downsample by a factor of 2 using reflecting boundary conditions. This function convolves `x` with filter `f` with reflecting boundary conditions, and then decimates by a factor of 2. This is usually done to downsample `x`, assuming `f` is some smoothing filter, but will also be used for wavelet transformations in which `f` is not a smoothing filter. Args: x: The input tensor (numpy or TF), of size (num_channels, width, height). f: The input filter, which must be an odd-length 1D numpy array. direction: The spatial direction in [0, 1] along which `x` will be convolved with `f` and then decimated. Because `x` has a batch/channels dimension, `direction` == 0 corresponds to downsampling along axis 1 in `x`, and `direction` == 1 corresponds to downsampling along axis 2 in `x`. shift: A shift amount in [0, 1] by which `x` will be shifted along the axis specified by `direction` before filtering. Returns: `x` convolved with `f` along the spatial dimension `direction` with reflection boundary conditions with an offset of `shift`. """ _check_resample_inputs(x, f, direction, shift) assert_ops = [tf.Assert(tf.equal(tf.rank(f), 1), [tf.rank(f)])] with tf.control_dependencies(assert_ops): # The above and below padding amounts are different so as to support odd # and even length filters. An odd-length filter of length n causes a padding # of (n-1)/2 on both sides, while an even-length filter will pad by one less # below than above. x_padded = pad_reflecting(x, (len(f) - 1) // 2, len(f) // 2, direction + 1) if direction == 0: x_padded = x_padded[:, shift:, :] f_ex = f[:, tf.newaxis] strides = [1, 2, 1, 1] elif direction == 1: x_padded = x_padded[:, :, shift:] f_ex = f[tf.newaxis, :] strides = [1, 1, 2, 1] y = tf.nn.conv2d(x_padded[:, :, :, tf.newaxis], tf.cast(f_ex, x.dtype)[:, :, tf.newaxis, tf.newaxis], strides, 'VALID')[:, :, :, 0] return y
def _lookup_dense(self, inputs): """Lookup table values for a dense Tensor, handling masking and OOV.""" # Avoid calling lookup on the table when tracing keras.Inputs. This is # critical for SavedModel, which expects an uninitialized StaticHashTable # and will trace the model before restoring the table. if backend.is_keras_tensor(inputs): lookups = tf.zeros_like(inputs, dtype=self._value_dtype) else: lookups = self.lookup_table.lookup(inputs) if self.mask_token is not None: mask_locations = tf.equal(inputs, self._mask_key) lookups = tf.where(mask_locations, self._mask_value, lookups) if self.invert: return lookups lookup_checks = [] if self.num_oov_indices == 0: # If we have zero oov indices, we need to check for oov inputs. oov_indices = tf.where(tf.equal(lookups, -1)) oov_inputs = tf.gather_nd(inputs, oov_indices) msg = tf.strings.format( "When `num_oov_indices=0` all inputs should be in vocabulary, " "found OOV values {}, consider setting `num_oov_indices=1`.", (oov_inputs, )) assertion = tf.Assert(tf.equal(tf.size(oov_indices), 0), [msg]) lookup_checks.append(assertion) elif self.num_oov_indices > 1: # If we have multiple oov indices, we need a further hashing step. if self._key_dtype.is_integer: oov_indices = tf.math.floormod(inputs, self.num_oov_indices) else: oov_indices = tf.strings.to_hash_bucket_fast( inputs, num_buckets=self.num_oov_indices) oov_indices = oov_indices + self._oov_start_index() oov_locations = tf.equal(lookups, self._default_value) lookups = tf.where(oov_locations, oov_indices, lookups) with tf.control_dependencies(lookup_checks): return tf.identity(lookups)
def to_absolute_coordinates(boxlist, height, width, check_range=True, maximum_normalized_coordinate=1.1, scope=None): """Converts normalized box coordinates to absolute pixel coordinates. This function raises an assertion failed error when the maximum box coordinate value is larger than maximum_normalized_coordinate (in which case coordinates are already absolute). Args: boxlist: BoxList with coordinates in range [0, 1]. height: Maximum value for height of absolute box coordinates. width: Maximum value for width of absolute box coordinates. check_range: If True, checks if the coordinates are normalized or not. maximum_normalized_coordinate: Maximum coordinate value to be considered as normalized, default to 1.1. scope: name scope. Returns: boxlist with absolute coordinates in terms of the image size. """ with tf.name_scope(scope, 'ToAbsoluteCoordinates'): height = tf.cast(height, tf.float32) width = tf.cast(width, tf.float32) # Ensure range of input boxes is correct. if check_range: box_maximum = tf.reduce_max(boxlist.get()) max_assert = tf.Assert( tf.greater_equal(maximum_normalized_coordinate, box_maximum), [ 'maximum box coordinate value is larger ' 'than %f: ' % maximum_normalized_coordinate, box_maximum ]) with tf.control_dependencies([max_assert]): width = tf.identity(width) return scale(boxlist, height, width)
def sort_by_field(boxlist, field, order=SortOrder.descend, scope=None): """Sort boxes and associated fields according to a scalar field. A common use case is reordering the boxes according to descending scores. Args: boxlist: BoxList holding N boxes. field: A BoxList field for sorting and reordering the BoxList. order: (Optional) descend or ascend. Default is descend. scope: name scope. Returns: sorted_boxlist: A sorted BoxList with the field in the specified order. Raises: ValueError: if specified field does not exist ValueError: if the order is not either descend or ascend """ with tf.name_scope(scope, 'SortByField'): if order != SortOrder.descend and order != SortOrder.ascend: raise ValueError('Invalid sort order') field_to_sort = boxlist.get_field(field) if len(field_to_sort.shape.as_list()) != 1: raise ValueError('Field should have rank 1') num_boxes = boxlist.num_boxes() num_entries = tf.size(field_to_sort) length_assert = tf.Assert(tf.equal(num_boxes, num_entries), [ 'Incorrect field size: actual vs expected.', num_entries, num_boxes ]) with tf.control_dependencies([length_assert]): _, sorted_indices = tf.nn.top_k(field_to_sort, num_boxes, sorted=True) if order == SortOrder.ascend: sorted_indices = tf.reverse_v2(sorted_indices, [0]) return gather(boxlist, sorted_indices)
def __call__(self, x): """Evaluates the loss function on a matrix. Args: x: The residual for which the loss is being computed. Must be a rank-2 tensor, where the innermost dimension is the batch index, and the outermost dimension corresponds to different "channels" and whose size must be equal to `num_channels'. Returns: A TF tensor of the same type and shape as input `x`, containing the loss at each element of `x` as a function of `x`, `df`, and `scale`. These "losses" are actually negative log-likelihoods. """ x = tf.convert_to_tensor(x) tf.debugging.assert_rank(x, 2) tf.debugging.assert_same_float_dtype([x], self._float_dtype) with tf.control_dependencies([ tf.Assert(tf.equal(x.shape[1], self._num_channels), [x.shape[1], self._num_channels]) ]): return util.students_t_nll(x, self.df(), self.scale())
def interpolate1d(x, values, tangents): r"""Perform cubic hermite spline interpolation on a 1D spline. The x coordinates of the spline knots are at [0 : 1 : len(values)-1]. Queries outside of the range of the spline are computed using linear extrapolation. See https://en.wikipedia.org/wiki/Cubic_Hermite_spline for details, where "x" corresponds to `x`, "p" corresponds to `values`, and "m" corresponds to `tangents`. Args: x: A tensor of any size of single or double precision floats containing the set of values to be used for interpolation into the spline. values: A vector of single or double precision floats containing the value of each knot of the spline being interpolated into. Must be the same length as `tangents` and the same type as `x`. tangents: A vector of single or double precision floats containing the tangent (derivative) of each knot of the spline being interpolated into. Must be the same length as `values` and the same type as `x`. Returns: The result of interpolating along the spline defined by `values`, and `tangents`, using `x` as the query values. Will be the same length and type as `x`. """ # `values` and `tangents` must have the same type as `x`. tf.debugging.assert_type(values, x.dtype) tf.debugging.assert_type(tangents, x.dtype) float_dtype = x.dtype assert_ops = [ # `values` must be a vector. tf.Assert(tf.equal(tf.rank(values), 1), [tf.shape(values)]), # `tangents` must be a vector. tf.Assert(tf.equal(tf.rank(tangents), 1), [tf.shape(values)]), # `values` and `tangents` must have the same length. tf.Assert( tf.equal(tf.shape(values)[0], tf.shape(tangents)[0]), [tf.shape(values)[0], tf.shape(tangents)[0]]), ] with tf.control_dependencies(assert_ops): # Find the indices of the knots below and above each x. x_lo = tf.cast( tf.floor( tf.clip_by_value(x, 0., tf.cast(tf.shape(values)[0] - 2, float_dtype))), tf.int32) x_hi = x_lo + 1 # Compute the relative distance between each `x` and the knot below it. t = x - tf.cast(x_lo, float_dtype) # Compute the cubic hermite expansion of `t`. t_sq = tf.square(t) t_cu = t * t_sq h01 = -2. * t_cu + 3. * t_sq h00 = 1. - h01 h11 = t_cu - t_sq h10 = h11 - t_sq + t # Linearly extrapolate above and below the extents of the spline for all # values. value_before = tangents[0] * t + values[0] value_after = tangents[-1] * (t - 1.) + values[-1] # Cubically interpolate between the knots below and above each query point. neighbor_values_lo = tf.gather(values, x_lo) neighbor_values_hi = tf.gather(values, x_hi) neighbor_tangents_lo = tf.gather(tangents, x_lo) neighbor_tangents_hi = tf.gather(tangents, x_hi) value_mid = (neighbor_values_lo * h00 + neighbor_values_hi * h01 + neighbor_tangents_lo * h10 + neighbor_tangents_hi * h11) # Return the interpolated or extrapolated values for each query point, # depending on whether or not the query lies within the span of the spline. return tf.where(t < 0., value_before, tf.where(t > 1., value_after, value_mid))
def kl_divergence(distribution_a, distribution_b, allow_nan_stats=True, name=None): """Get the KL-divergence KL(distribution_a || distribution_b). If there is no KL method registered specifically for `type(distribution_a)` and `type(distribution_b)`, then the class hierarchies of these types are searched. If one KL method is registered between any pairs of classes in these two parent hierarchies, it is used. If more than one such registered method exists, the method whose registered classes have the shortest sum MRO paths to the input types is used. If more than one such shortest path exists, the first method identified in the search is used (favoring a shorter MRO distance to `type(distribution_a)`). Args: distribution_a: The first distribution. distribution_b: The second distribution. allow_nan_stats: Python `bool`, default `True`. When `True`, statistics (e.g., mean, mode, variance) use the value "`NaN`" to indicate the result is undefined. When `False`, an exception is raised if one or more of the statistic's batch members are undefined. name: Python `str` name prefixed to Ops created by this class. Returns: A Tensor with the batchwise KL-divergence between `distribution_a` and `distribution_b`. Raises: NotImplementedError: If no KL method is defined for distribution types of `distribution_a` and `distribution_b`. """ kl_fn = _registered_kl(type(distribution_a), type(distribution_b)) if kl_fn is None: raise NotImplementedError( "No KL(distribution_a || distribution_b) registered for distribution_a " "type {} and distribution_b type {}".format( type(distribution_a).__name__, type(distribution_b).__name__)) with tf.name_scope("KullbackLeibler"): kl_t = kl_fn(distribution_a, distribution_b, name=name) if allow_nan_stats: return kl_t # Check KL for NaNs kl_t = tf.identity(kl_t, name="kl") with tf.control_dependencies([ tf.Assert( tf.logical_not( tf.reduce_any(input_tensor=tf.math.is_nan(kl_t))), [("KL calculation between {} and {} returned NaN values " "(and was called with allow_nan_stats=False). Values:". format(distribution_a.name, distribution_b.name)), kl_t]) ]): return tf.identity(kl_t, name="checked_kl")
def _brent(objective_fn, left_bracket, right_bracket, value_at_left_bracket=None, value_at_right_bracket=None, absolute_root_tolerance=2e-7, relative_root_tolerance=None, function_tolerance=2e-7, max_iterations=100, stopping_policy_fn=None, validate_args=False, name=None): r"""Finds root(s) of a function of a single variable using Brent's method. [Brent's method](https://en.wikipedia.org/wiki/Brent%27s_method) is a root-finding algorithm combining the bisection method, the secant method and extrapolation. Like bisection it is guaranteed to converge towards a root if one exists, but that convergence is superlinear and on par with less reliable methods. This implementation is a translation of the algorithm described in the [original article](https://academic.oup.com/comjnl/article/14/4/422/325237). Args: objective_fn: Python callable for which roots are searched. It must be a callable of a single `Tensor` parameter and return a `Tensor` of the same shape and dtype as `left_bracket`. left_bracket: `Tensor` or Python float representing the first starting points. The function will search for roots between each pair of points defined by `left_bracket` and `right_bracket`. The shape of `left_bracket` should match that of the input to `objective_fn`. right_bracket: `Tensor` of the same shape and dtype as `left_bracket` or Python float representing the second starting points. The function will search for roots between each pair of points defined by `left_bracket` and `right_bracket`. This argument must have the same shape as `left_bracket`. value_at_left_bracket: Optional `Tensor` or Python float representing the value of `objective_fn` at `left_bracket`. If specified, this argument must have the same shape as `left_bracket`. If not specified, the value will be evaluated during the search. Default value: None. value_at_right_bracket: Optional `Tensor` or Python float representing the value of `objective_fn` at `right_bracket`. If specified, this argument must have the same shape as `right_bracket`. If not specified, the value will be evaluated during the search. Default value: None. absolute_root_tolerance: Optional `Tensor` representing the absolute tolerance for estimated roots, with the total tolerance being calculated as `(absolute_root_tolerance + relative_root_tolerance * |root|) / 2`. If specified, this argument must be positive, broadcast with the shape of `left_bracket` and have the same dtype. Default value: `2e-7`. relative_root_tolerance: Optional `Tensor` representing the relative tolerance for estimated roots, with the total tolerance being calculated as `(absolute_root_tolerance + relative_root_tolerance * |root|) / 2`. If specified, this argument must be positive, broadcast with the shape of `left_bracket` and have the same dtype. Default value: `None` which translates to `4 * numpy.finfo(left_bracket.dtype.as_numpy_dtype).eps`. function_tolerance: Optional `Tensor` representing the tolerance used to check for roots. If the absolute value of `objective_fn` is smaller than or equal to `function_tolerance` at a given estimate, then that estimate is considered a root for the function. If specified, this argument must broadcast with the shape of `left_bracket` and have the same dtype. Set to zero to match Brent's original algorithm and to continue the search until an exact root is found. Default value: `2e-7`. max_iterations: Optional `Tensor` of an integral dtype or Python integer specifying the maximum number of steps to perform for each initial point. Must broadcast with the shape of `left_bracket`. If an element is set to zero, the function will not search for any root for the corresponding points in `left_bracket` and `right_bracket`. Instead, it will return the best estimate from the inputs. Default value: `100`. stopping_policy_fn: Python `callable` controlling the algorithm termination. It must be a callable accepting a `Tensor` of booleans with the shape of `left_bracket` (each denoting whether the search is finished for each starting point), and returning a scalar boolean `Tensor` (indicating whether the overall search should stop). Typical values are `tf.reduce_all` (which returns only when the search is finished for all pairs of points), and `tf.reduce_any` (which returns as soon as the search is finished for any pair of points). Default value: `None` which translates to `tf.reduce_all`. validate_args: Python `bool` indicating whether to validate arguments such as `left_bracket`, `right_bracket`, `absolute_root_tolerance`, `relative_root_tolerance`, `function_tolerance`, and `max_iterations`. Default value: `False`. name: Python `str` name prefixed to ops created by this function. Returns: brent_results: A Python object containing the following attributes: estimated_root: `Tensor` containing the best estimate explored. If the search was successful within the specified tolerance, this estimate is a root of the objective function. objective_at_estimated_root: `Tensor` containing the value of the objective function at `estimated_root`. If the search was successful within the specified tolerance, then this is close to 0. It has the same dtype and shape as `estimated_root`. num_iterations: `Tensor` containing the number of iterations performed. It has the same dtype as `max_iterations` and shape as `estimated_root`. converged: Scalar boolean `Tensor` indicating whether `estimated_root` is a root within the tolerance specified for the search. It has the same shape as `estimated_root`. Raises: ValueError: if the `stopping_policy_fn` is not callable. """ with tf.compat.v1.name_scope(name, default_name="brent_root", values=[ left_bracket, right_bracket, value_at_left_bracket, value_at_right_bracket, max_iterations ]): state, params, constants = _prepare_brent_args( objective_fn, left_bracket, right_bracket, value_at_left_bracket, value_at_right_bracket, absolute_root_tolerance, relative_root_tolerance, function_tolerance, max_iterations, stopping_policy_fn) assertions = [] if validate_args: assertions += [ tf.Assert( tf.reduce_all( state.value_at_last_estimate * state.value_at_best_estimate <= constants.zero_value), [ state.value_at_last_estimate, state.value_at_best_estimate ]), tf.Assert( tf.reduce_all( params.absolute_root_tolerance > constants.zero), [params.absolute_root_tolerance]), tf.Assert( tf.reduce_all( params.relative_root_tolerance > constants.zero), [params.relative_root_tolerance]), tf.Assert( tf.reduce_all(params.function_tolerance >= constants.zero), [params.function_tolerance]), tf.Assert( tf.reduce_all( params.max_iterations >= state.num_iterations), [params.max_iterations]), ] with tf.compat.v1.control_dependencies(assertions): result = tf.while_loop( # Negate `_should_stop` to determine if the search should continue. # This means, in particular, that tf.reduce_*all* will return only # when the search is finished for *all* starting points. lambda loop_vars: ~_should_stop(loop_vars, params. stopping_policy_fn), lambda state: _brent_loop_body(state, params, constants), loop_vars=[state], maximum_iterations=max_iterations) state = result[0] converged = tf.math.abs(state.value_at_best_estimate) <= function_tolerance return BrentResults( estimated_root=state.best_estimate, objective_at_estimated_root=state.value_at_best_estimate, num_iterations=state.num_iterations, converged=converged)
def softquantiles(x, quantiles, quantile_width=None, axis=-1, may_squeeze=True, **kwargs): """Computes soft quantiles via optimal transport. This operator takes advantage of the fact that an exhaustive softsort is not required to recover a single quantile. Instead, one can transport all input values in x onto only 3 weighted values. Target weights are adjusted so that those values in x that are transported to the middle value in the target vector y correspond to those concentrating around the quantile of interest. This idea generalizes to more quantiles, interleaving small weights on the quantile indices and bigger weights in between, corresponding to the gap from one desired quantile to the next one. Args: x: Tensor<float> of any shape. quantiles: list<float> the quantiles to be returned. It can also be a single float. quantile_width: (float) mass given to the bucket supposed to attract points whose value concentrate around the desired quantile value. Bigger width means that we allow the soft quantile to be a mixture of more points further away from the quantile. If None, the width is set at 1/n where n is the number of values considered (the size along the 'axis'). axis: (int) the axis along which to compute the quantile. may_squeeze: (bool) should we squeeze the output tensor in case of a single quantile. **kwargs: see SoftQuantilizer for possible extra parameters. Returns: A Tensor<float> similar to the input tensor, but the axis dimension is replaced by the number of quantiles specified in the quantiles list. Hence, if only a quantile is requested (quantiles is a float) only one value in that axis is returned. When several quantiles are requested, the tensor will have that many values in that axis. Raises: tf.errors.InvalidArgumentError when the quantiles and quantile width are not correct, namely quantiles are either not in sorted order or the quantile_width is too large. """ if isinstance(quantiles, float): quantiles = [quantiles] quantiles = tf.constant(quantiles, tf.float32) # Preprocesses submitted quantiles to check that they satisfy elementary # constraints. valid_quantiles = tf.boolean_mask( quantiles, tf.logical_and(quantiles > 0.0, quantiles < 1.0)) num_quantiles = tf.shape(valid_quantiles)[0] # Includes values on both ends of [0,1]. extended_quantiles = tf.concat([[0.0], valid_quantiles, [1.0]], axis=0) # Builds filler_weights in between the target quantiles. filler_weights = extended_quantiles[1:] - extended_quantiles[:-1] if quantile_width is None: quantile_width = tf.reduce_min( tf.concat( [filler_weights, [1.0 / tf.cast(tf.shape(x)[axis], dtype=x.dtype)]], axis=0)) # Takes into account quantile_width in the definition of weights shift = -tf.ones(tf.shape(filler_weights), dtype=x.dtype) shift = shift + 0.5 * ( tf.one_hot(0, num_quantiles + 1) + tf.one_hot(num_quantiles, num_quantiles + 1)) filler_weights = filler_weights + quantile_width * shift assert_op = tf.Assert(tf.reduce_all(filler_weights >= 0.0), [filler_weights]) with tf.control_dependencies([assert_op]): # Adds one more value to have tensors of the same shape to interleave them. quantile_weights = tf.ones(num_quantiles + 1) * quantile_width # Interleaves the filler_weights with the quantile weights. weights = tf.reshape( tf.stack([filler_weights, quantile_weights], axis=1), (-1,))[:-1] # Sends only the positive weights to the softsort operator. positive_weights = tf.boolean_mask(weights, weights > 0.0) all_quantiles = softsort( x, direction='ASCENDING', axis=axis, target_weights=positive_weights, **kwargs) # Recovers the indices corresponding to the desired quantiles. odds = tf.math.floormod(tf.range(weights.shape[0], dtype=tf.float32), 2) positives = tf.cast(weights > 0.0, tf.float32) indices = tf.cast(tf.math.cumsum(positives) * odds, dtype=tf.int32) indices = tf.boolean_mask(indices, indices > 0) - 1 result = tf.gather(all_quantiles, indices, axis=axis) # In the specific case where we want a single quantile, squeezes the # quantile dimension. can_squeeze = tf.equal(tf.shape(result)[axis], 1) if tf.math.logical_and(can_squeeze, may_squeeze): result = tf.squeeze(result, axis=axis) return result
def __call__(self, x): """Evaluates the loss function on a batch of images. Args: x: The image residuals for which the loss is being computed, which is expected to be the differences between RGB images. Must be a rank-4 tensor, where the innermost dimension is the batch index, and the remaining 3 dimension corresponds `self._image_size` (two spatial, one channel). Returns: A TF tensor of the same type and shape as input `x`, containing the loss at each element of `x` as a function of `x`, `alpha`, and `scale`. These "losses" are actually negative log-likelihoods (as produced by distribution.nllfun()) and so they are not actually bounded from below by zero --- it is okay if they go negative! You'll probably want to minimize their sum or mean. """ x = tf.convert_to_tensor(x) tf.debugging.assert_rank(x, 4) with tf.control_dependencies([ tf.Assert( tf.reduce_all(tf.equal(x.shape[1:], self._image_size)), [x.shape[1:], self._image_size]) ]): if self._color_space == 'YUV': x = util.rgb_to_syuv(x) # If `color_space` == 'RGB', do nothing. # Reshape `x` from # (num_batches, width, height, num_channels) to # (num_batches * num_channels, width, height) width, height, num_channels = self._image_size x_stack = tf.reshape( tf.transpose(x, perm=(0, 3, 1, 2)), (-1, width, height)) # Turn each channel in `x_stack` into the spatial representation specified # by `representation`. if self._representation in wavelet.generate_filters(): x_stack = wavelet.flatten( wavelet.rescale( wavelet.construct(x_stack, self._wavelet_num_levels, self._representation), self._wavelet_scale_base)) elif self._representation == 'DCT': x_stack = util.image_dct(x_stack) # If `representation` == 'PIXEL', do nothing. # Reshape `x_stack` from # (num_batches * num_channels, width, height) to # (num_batches, num_channels * width * height) x_mat = tf.reshape( tf.transpose( tf.reshape(x_stack, [-1, num_channels, width, height]), perm=[0, 2, 3, 1]), [-1, width * height * num_channels]) # Set up the adaptive loss. Note, if `use_students_t` == True then # `alpha_mat` actually contains "log(df)" values. loss_mat = self._lossfun(x_mat) # Reshape the loss function's outputs to have the shapes as the input. loss = tf.reshape(loss_mat, [-1, width, height, num_channels]) if self._summarize_loss: # Summarize the `alpha` and `scale` parameters as images (normalized to # [0, 1]) and histograms. # Note that these may look unintuitive unless the colorspace is 'RGB' # and the image representation is 'PIXEL', as the image summaries # (like most images) are rendered as RGB pixels. log_scale = tf.math.log(self.scale()) log_scale_min = tf.reduce_min(log_scale) log_scale_max = tf.reduce_max(log_scale) tf.summary.image('/log_scale', (log_scale[tf.newaxis] - log_scale_min) / (log_scale_max - log_scale_min + 1e-10)) tf.summary.histogram('/log_scale', log_scale) if not self._use_students_t: alpha = self.alpha() alpha_min = tf.reduce_min(alpha) alpha_max = tf.reduce_max(alpha) tf.summary.image('/alpha', (alpha[tf.newaxis] - alpha_min) / (alpha_max - alpha_min + 1e-10)) tf.summary.histogram('/alpha', alpha) return loss
def draw_samples(self, alpha, scale): r"""Draw samples from the robust distribution. This function implements Algorithm 1 the paper. This code is written to allow for sampling from a set of different distributions, each parametrized by its own alpha and scale values, as opposed to the more standard approach of drawing N samples from the same distribution. This is done by repeatedly performing N instances of rejection sampling for each of the N distributions until at least one proposal for each of the N distributions has been accepted. All samples assume a zero mean --- to get non-zero mean samples, just add each mean to each sample. Args: alpha: A TF tensor/scalar or numpy array/scalar of floats where each element is the shape parameter of that element's distribution. scale: A TF tensor/scalar or numpy array/scalar of floats where each element is the scale parameter of that element's distribution. Must be the same shape as `alpha`. Returns: A TF tensor with the same shape and precision as `alpha` and `scale` where each element is a sample drawn from the zero-mean distribution specified for that element by `alpha` and `scale`. """ # `scale` must have the same type as `alpha`. float_dtype = alpha.dtype tf.debugging.assert_type(scale, float_dtype) assert_ops = [ # `scale` must be > 0. tf.Assert(tf.reduce_all(scale > 0.), [scale]), # `alpha` must be >= 0. tf.Assert(tf.reduce_all(alpha >= 0.), [alpha]), # `alpha` and `scale` must have the same shape. tf.Assert( tf.reduce_all(tf.equal(tf.shape(alpha), tf.shape(scale))), [tf.shape(alpha), tf.shape(scale)]), ] with tf.control_dependencies(assert_ops): shape = tf.shape(alpha) # The distributions we will need for rejection sampling. The sqrt(2) # scaling of the Cauchy distribution corrects for our differing # conventions for standardization. cauchy = tfp.distributions.Cauchy(loc=0., scale=tf.sqrt(2.)) uniform = tfp.distributions.Uniform(low=0., high=1.) def while_cond(_, accepted): """Terminate the loop only when all samples have been accepted.""" return ~tf.reduce_all(accepted) def while_body(samples, accepted): """Generate N proposal samples, and then perform rejection sampling.""" # Draw N samples from a Cauchy, our proposal distribution. cauchy_sample = tf.cast(cauchy.sample(shape), float_dtype) # Compute the likelihood of each sample under its target distribution. nll = self.nllfun(cauchy_sample, alpha, tf.cast(1, float_dtype)) # Bound the NLL. We don't use the approximate loss as it may cause # unpredictable behavior in the context of sampling. nll_bound = general.lossfun( cauchy_sample, tf.cast(0, float_dtype), tf.cast(1, float_dtype), approximate=False) + self.log_base_partition_function( alpha) # Draw N samples from a uniform distribution, and use each uniform # sample to decide whether or not to accept each proposal sample. uniform_sample = tf.cast(uniform.sample(shape), float_dtype) accept = uniform_sample <= tf.math.exp(nll_bound - nll) # If a sample is accepted, replace its element in `samples` with the # proposal sample, and set its bit in `accepted` to True. samples = tf.where(accept, cauchy_sample, samples) accepted = accept | accepted return (samples, accepted) # Initialize the loop. The first item does not matter as it will get # overwritten, the second item must be all False. while_loop_vars = (tf.zeros(shape, float_dtype), tf.zeros(shape, dtype=bool)) # Perform rejection sampling until all N samples have been accepted. terminal_state = tf.while_loop(cond=while_cond, body=while_body, loop_vars=while_loop_vars) # Because our distribution is a location-scale family, we sample from # p(x | 0, \alpha, 1) and then scale each sample by `scale`. samples = tf.multiply(terminal_state[0], scale) return samples