def FProp(self, theta, inputs, paddings): """Apply global spatial pooling to inputs. Args: theta: A `.NestedMap` object containing weights' values of this layer and its children layers. inputs: The inputs tensor. It is expected to be of shape [batch, time, frequency, channel]. The time dimension corresponds to the height dimension as in images and the frequency dimension corresponds to the width dimension as in images. paddings: The paddings tensor. It is expected to be of shape [batch, time]. Defaults to None, which means there no paddings. Returns: outputs, out_paddings pair. - outputs: has shape [batch, 1, 1, channel]. - out_paddings: None or has shape [batch, 1]. """ p = self.params assert p.pooling_type in ['MAX', 'AVG'], p.pooling_type b, t, f = py_utils.GetShape(inputs, ndims=3) if paddings is not None: paddings = py_utils.HasShape(paddings, [b, t]) if paddings is not None: mask = 1.0 - paddings[..., tf.newaxis, tf.newaxis] else: mask = tf.ones([b, t, 1, 1], p.dtype) if p.pooling_type == 'AVG': global_sum = tf.reduce_sum(inputs * mask, axis=[1, 2], keepdims=True) f = tf.cast(tf.convert_to_tensor(f), p.dtype) count = f * tf.reduce_sum(mask, axis=[1, 2], keepdims=True) out_feature = global_sum / tf.maximum(1.0, count) elif p.pooling_type == 'MAX': large_negative = (tf.ones_like(inputs) * p.dtype.max * tf.constant(-0.7, dtype=p.dtype)) padded_inputs = tf.where_v2(mask > 0.0, inputs, large_negative) out_feature = tf.reduce_max(padded_inputs, axis=[1, 2], keepdims=True) if paddings is None: out_paddings = None else: out_paddings = tf.reduce_min(paddings, axis=1, keepdims=True) out_feature *= 1.0 - out_paddings[..., tf.newaxis, tf.newaxis] return out_feature, out_paddings
def _PaddedMaxFn(inp): """Apply padded max using reduce_max with paddings replaced by neginf.""" # Replace all padded features with -inf. neginf_padding = tf.where(inp.padding > 0, -np.inf * inp.padding, inp.padding) features = inp.features + neginf_padding[..., tf.newaxis] features = tf.reduce_max(features, axis=-2) # Replace features of all padded points by zeros. If a batch of points are # all padded, then reduce_min over the padding will be 1. We set the # features to be zero, so that we don't get any downstream issue with # NaNs. Note that inf * 0 = NaN. padding = tf.reduce_min(inp.padding, axis=-1) features = tf.where_v2(tf.cast(padding[..., tf.newaxis], tf.bool), tf.zeros_like(features), features) features = py_utils.CheckNumerics(features) if nested_output: return py_utils.NestedMap(features=features, padding=padding) else: return features