def NMSIndices(self, bboxes, scores, max_output_size, nms_iou_threshold=0.3, score_threshold=0.01): """Apply NMS to a series of 3d bounding boxes in 7-DOF format. Args: bboxes: A [num_boxes, 7] floating point Tensor of bounding boxes in [x, y, z, dx, dy, dz, phi] format. scores: A [num_boxes] floating point Tensor containing box scores. max_output_size: Maximum number of boxes to predict per input. nms_iou_threshold: IoU threshold to use when determining whether two boxes overlap for purposes of suppression. score_threshold: The score threshold passed to NMS that allows NMS to quickly ignore irrelevant boxes. Returns: The NMS indices and the mask of the padded indices. """ bboxes = py_utils.HasShape(bboxes, [-1, 7]) # Extract x, y, w, h, then convert to extrema. # # Note that we drop the rotation angle because we don't have an NMS # operation that takes rotation into account. bboxes_2d = tf.stack( [bboxes[:, 0], bboxes[:, 1], bboxes[:, 3], bboxes[:, 4]], axis=-1) bboxes_extrema = geometry.XYWHToBBoxes(bboxes_2d) # Compute NMS with padding; we use the padded version so this function can # be used in a map_fn. This function returns the scalar number of boxes # for each example. # # We use an IoU threshold of 0.3 since our anchor boxes have rotations # that make the default IoU threshold of 0.5 possibly too high. nms_index_padded, num_valid = tf.image.non_max_suppression_padded( bboxes_extrema, scores, iou_threshold=nms_iou_threshold, max_output_size=max_output_size, score_threshold=score_threshold, pad_to_max_output_size=True) # Return the mask of valid indices instead of just a scalar number. mask = tf.concat( [tf.ones([num_valid]), tf.zeros([max_output_size - num_valid])], axis=0) nms_index_padded = tf.where(mask > 0, nms_index_padded, tf.zeros_like(nms_index_padded)) return nms_index_padded, mask
def _Extract(self, features): p = self.params # Label values match the proto enum car.open_dataset.Label.Type. The value # range is [1..4] for non-background labels. labels = tf.cast(_Dense(features['labels']), tf.int32) labels = py_utils.PadOrTrimTo(labels, [p.max_num_objects]) label_ids = tf.reshape(_Dense(features['label_ids'], ''), [-1]) label_ids = py_utils.PadOrTrimTo(label_ids, [p.max_num_objects], '') bboxes_3d = tf.reshape(_Dense(features['bboxes_3d']), [-1, 7]) bboxes_3d_mask = tf.ones([tf.shape(bboxes_3d)[0]]) bboxes_3d_num_points = tf.cast( _Dense(features['bboxes_3d_num_points']), tf.int32) bboxes_3d = py_utils.PadOrTrimTo(bboxes_3d, [p.max_num_objects, 7]) bboxes_3d_mask = py_utils.PadOrTrimTo(bboxes_3d_mask, [p.max_num_objects]) bboxes_3d_num_points = py_utils.PadOrTrimTo(bboxes_3d_num_points, [p.max_num_objects]) label_metadata = tf.reshape(_Dense(features['label_metadata']), [-1, 4]) label_metadata = py_utils.PadOrTrimTo(label_metadata, [p.max_num_objects, 4]) detection_difficulties = py_utils.PadOrTrimTo( tf.cast(_Dense(features['detection_difficulties']), tf.int32), [p.max_num_objects]) combined_detection_difficulties = py_utils.PadOrTrimTo( tf.cast(_Dense(features['combined_detection_difficulties']), tf.int32), [p.max_num_objects]) tracking_difficulties = py_utils.PadOrTrimTo( tf.cast(_Dense(features['tracking_difficulties']), tf.int32), [p.max_num_objects]) unfiltered_bboxes_3d_mask = bboxes_3d_mask if p.filter_labels: valid_labels = tf.constant([p.filter_labels]) bbox_mask = tf.reduce_any(tf.equal(tf.expand_dims(labels, 1), valid_labels), axis=1) bboxes_3d_mask *= tf.cast(bbox_mask, tf.float32) outputs = { 'labels': labels, 'label_ids': label_ids, 'detection_difficulties': detection_difficulties, 'combined_detection_difficulties': combined_detection_difficulties, 'tracking_difficulties': tracking_difficulties, 'bboxes_3d': bboxes_3d, 'bboxes_3d_mask': bboxes_3d_mask, 'bboxes_3d_num_points': bboxes_3d_num_points, 'unfiltered_bboxes_3d_mask': unfiltered_bboxes_3d_mask, 'speed': label_metadata[:, :2], 'acceleration': label_metadata[:, 2:], } return py_utils.NestedMap(outputs)
def _InputBatch(self): targets = tf.ones([self.params.batch_size, 1024], dtype=tf.int32) input_batch = py_utils.NestedMap() input_batch.tgt = py_utils.NestedMap() input_batch.tgt.ids = tf.roll(targets, 1, axis=1) input_batch.tgt.labels = targets input_batch.tgt.segment_ids = tf.minimum(targets, 1) input_batch.tgt.segment_pos = targets input_batch = input_batch.Transform( lambda t: tf.ensure_shape(t, (self.params.batch_size, 1024))) return input_batch
def testLimitsOutputImagesIfBatchIsSmall(self): batch_size = 1 tensors = [tf.zeros((batch_size, 3, 5)), tf.ones((batch_size, 2, 2))] with self.session() as s: fig = plot.MatplotlibFigureSummary('summary', self.FIGSIZE, max_outputs=3) for t in tensors: fig.AddSubplot([t]) im = fig.Finalize() summary_str = s.run(im) summary = tf.summary.Summary.FromString(summary_str) self.assertEqual(len(summary.value), 1)
def testRepeatMoEFProp(self): """Test to verify RecurrentDenseBuilder.DecoderLayerStack(). Test without this change fails. """ batch_dim = 2 length_dim = 4 input_dim = 4 builder = gshard_builder.RecurrentDenseBuilder.Params().Set( model_dim=input_dim, num_devices=2, moe_hidden_dim=16, e_dim=2, attention_key_value_dim=input_dim, attention_num_heads=1, c_dim=2, emh_split=[-1, 0, -1, -1], ehm_split=[-1, 0, -1, -1]) b = builder.Instantiate() layers = [ b.DecSelfAttention('dec_self_attention'), b.MoE('moe', decoder=True) ] p = b.DecoderLayerStack('rep', layers, 2) with self.session(graph=tf.Graph()) as sess: tf.random.set_seed(2019) # we will reduce the length_dim by 2 dynamically. layer = p.Instantiate() inputs = tf.ones([batch_dim, length_dim, input_dim]) segment_ids = tf.ones([batch_dim, length_dim]) segment_pos = tf.ones([batch_dim, length_dim]) layer_inputs = py_utils.NestedMap(vec=inputs, segment_id=segment_ids, segment_pos=segment_pos, encoder_output=inputs, encoder_segment_id=segment_ids, encoder_segment_pos=segment_pos, aux_loss=tf.zeros([])) outputs = layer.FPropDefaultTheta(layer_inputs) sess.run(tf.global_variables_initializer()) sess.run(outputs)
def _DerivePaddingsAndIds(src_ids, tgt_labels): """tgt_ids is tgt_labels shifted right by one, with a SOS ID prepended.""" tgt_ids = tf.concat([[p.sos_id], tgt_labels[:-1]], axis=0) src_paddings = tf.zeros(tf.shape(src_ids), dtype=tf.float32) tgt_paddings = tf.zeros(tf.shape(tgt_ids), dtype=tf.float32) tgt_weights = tf.ones(tf.shape(tgt_ids), dtype=tf.float32) bucket_key = tf.cast( tf.maximum(tf.reduce_sum(1.0 - src_paddings), tf.reduce_sum(1.0 - tgt_paddings)), tf.int32) return src_paddings, tgt_ids, tgt_paddings, tgt_weights, bucket_key
def FPropTower(self, theta, input_batch): p = self.params tf.logging.info('input_batch=%r', input_batch) ids, paddings, labels_ids, weights = self._TrimIfPossible( input_batch.ids, input_batch.paddings, input_batch.labels, input_batch.weights) fprop_dtype = py_utils.FPropDtype(p) paddings = tf.cast(paddings, fprop_dtype) weights = tf.cast(weights, fprop_dtype) tf.logging.info('inputs={}'.format((ids, paddings, labels_ids, weights))) batch_size = tf.shape(ids)[0] state0 = None labels = py_utils.NestedMap(class_ids=labels_ids, class_weights=weights) fprop_kwargs = dict() if 'segment_ids' in input_batch: fprop_kwargs.update( segment_ids=input_batch.segment_ids, segment_pos=input_batch.segment_pos) xent_output, _ = self.lm.FProp(theta.lm, ids, paddings, state0, labels, **fprop_kwargs) if 'segment_ids' in input_batch: num_sentences = input_batch.num_sentences else: num_sentences = tf.ones(shape=[batch_size], dtype=tf.int32) # +num_sentences to account for the end of sequence symbol. num_words = tf.cast( tf.reduce_sum(input_batch.word_count + num_sentences), fprop_dtype) predicted_labels = tf.cast(xent_output.per_example_argmax, labels_ids.dtype) num_preds = xent_output.total_weight mean_acc = tf.reduce_sum( tf.cast(tf.equal(labels_ids, predicted_labels), fprop_dtype) * weights) / tf.math.maximum(num_preds, 1) loss = xent_output.avg_xent per_sequence_loss = tf.reduce_sum( xent_output.per_example_xent * weights, axis=1) if p.train.sum_loss_across_tokens_in_batch: loss = xent_output.total_xent else: per_sequence_loss /= tf.reduce_sum(weights, axis=1) return { 'loss': (loss, num_preds), 'fraction_of_correct_next_step_preds': (mean_acc, num_preds), 'log_pplx': (xent_output.avg_xent, num_preds), 'log_pplx_per_word': (xent_output.total_xent / num_words, num_words), 'num_predictions': (num_preds, 1), 'num_words': (num_words, 1), 'num_sentences': (tf.reduce_sum(num_sentences), 1), }, { 'loss': per_sequence_loss, }
def create_projection_matrix(nb_random_projections, dim, seed=0, scaling=0): r"""Constructs the matrix of random projections. Constructs a matrix of random orthogonal projections. Each projection vector has direction chosen uniformly at random and either deterministic length \sqrt{dim} or length taken from the \chi(dim) distribution (in the latter case marginal distributions of the projections are dim-dimensional Gaussian vectors with associated identity covariance matrix). Args: nb_random_projections: number of random projections. dim: dimensionality of each random projection. seed: random seed used to construct projections. scaling: 1 if all the random projections need to be renormalized to have length \sqrt{dim}, 0 if the lengths of random projections should follow \chi(dim) distribution. Returns: The matrix of random projections of the shape [nb_random_projections, dim]. """ if nb_random_projections == 0: return None nb_full_blocks = nb_random_projections // dim block_list = [] current_seed = seed for _ in range(nb_full_blocks): unstructured_block = tf.random.normal((dim, dim), seed=current_seed) q, _ = tf.linalg.qr(unstructured_block) q = tf.transpose(q) block_list.append(q) current_seed = next_seed(current_seed) remaining_rows = nb_random_projections - nb_full_blocks * dim if remaining_rows > 0: unstructured_block = tf.random.normal((dim, dim), seed=current_seed) q, _ = tf.linalg.qr(unstructured_block) q = tf.transpose(q) block_list.append(q[0:remaining_rows]) final_matrix = tf.concat(block_list, 0) current_seed = next_seed(current_seed) if scaling == 0: squares = tf.math.square( tf.random.normal((nb_random_projections, dim), seed=current_seed)) squared_lengths = tf.math.reduce_sum(squares, axis=1) multiplier = tf.math.sqrt(squared_lengths) elif scaling == 1: multiplier = tf.math.sqrt(float(dim)) * tf.ones( (nb_random_projections)) else: raise ValueError("Scaling must be one of {0, 1}. Was %s" % scaling) return tf.linalg.matmul(tf.linalg.diag(multiplier), final_matrix)
def testBasic(self): with self.session(): t = 3 # [BTNH]. content = tf.linalg.diag(tf.ones([t]))[None, :, None, :] # [LNH]. abs_pos_emb = tf.reshape( tf.range(t * (2 * t - 1), dtype=tf.float32), [2 * t - 1, 1, t]) tf.logging.info('content=%s abs_pos_emb=%s', content.eval(), abs_pos_emb.eval()) self.assertAllClose( [[[[6., 3., 0.], [10., 7., 4.], [14., 11., 8.]]]], attention_util.RelPositionBias(content, abs_pos_emb).eval())
def noncausal_denominator(qs, ks): """Computes FAVOR normalizer in noncausal attention. Args: qs: query_prime tensor of the shape [L,B,H,M]. ks: key_prime tensor of the shape [L,B,H,M]. Returns: FAVOR normalizer in noncausal attention. """ all_ones = tf.ones([ks.shape[0]]) ks_sum = tf.einsum("lbhm,l->bhm", ks, all_ones) return tf.einsum("lbhm,bhm->lbh", qs, ks_sum)
def testDoesNotDieOnMatplotlibError(self): invalid_dim_data = tf.ones((5,)) with self.session() as s: fig = plot.MatplotlibFigureSummary('summary', self.FIGSIZE, max_outputs=1) fig.AddSubplot([invalid_dim_data]) im = fig.Finalize() summary_str = s.run(im) summary = tf.summary.Summary.FromString(summary_str) self.assertEqual(len(summary.value), 1) value = summary.value[0] # Generates dummy 1-pixel image. self.assertEqual(value.image.width, 1) self.assertEqual(value.image.height, 1)
def testLargerBatch(self): batch_size = 4 tensors = [tf.ones((batch_size, 3, 5)), tf.ones((batch_size, 2, 2))] with self.session() as s: fig = plot.MatplotlibFigureSummary('larger_batch', self.FIGSIZE, max_outputs=batch_size) for t in tensors: fig.AddSubplot([t]) im = fig.Finalize() summary_str = s.run(im) summary = tf.summary.Summary.FromString(summary_str) self.assertEqual(len(summary.value), batch_size) for n, value in enumerate(summary.value): self.assertEqual(value.tag, u'larger_batch/image/%d' % n) self.assertEqual(value.image.width, self.EXPECTED_DPI * self.FIGSIZE[0]) self.assertEqual(value.image.height, self.EXPECTED_DPI * self.FIGSIZE[1]) self.assertEqual(value.image.colorspace, 3) self.assertNotEqual(value.image.encoded_image_string, self.default_encoded_image)
def testSpectrumAugmenterWithPerDomainPolicyFreqMask(self): with self.session(use_gpu=False, graph=tf.Graph()): tf.random.set_seed(1234) inputs = tf.ones([6, 5, 4, 2], dtype=tf.float32) input_domain_ids = tf.constant( [[1] * 5, [2] * 5, [0] * 5, [2] * 5, [0] * 5, [1] * 5], dtype=tf.float32) paddings = tf.zeros([3, 5]) p = spectrum_augmenter.SpectrumAugmenter.Params() p.name = 'specAug_layers' p.domain_ids = [0, 1, 2] p.freq_mask_max_bins = [0, 3, 8] p.time_mask_max_frames = 0 p.random_seed = 1234 specaug_layer = p.Instantiate() expected_output = np.array([[[[0., 0.], [0., 0.], [1., 1.], [1., 1.]], [[0., 0.], [0., 0.], [1., 1.], [1., 1.]], [[0., 0.], [0., 0.], [1., 1.], [1., 1.]], [[0., 0.], [0., 0.], [1., 1.], [1., 1.]], [[0., 0.], [0., 0.], [1., 1.], [1., 1.]]], [[[1., 1.], [0., 0.], [0., 0.], [0., 0.]], [[1., 1.], [0., 0.], [0., 0.], [0., 0.]], [[1., 1.], [0., 0.], [0., 0.], [0., 0.]], [[1., 1.], [0., 0.], [0., 0.], [0., 0.]], [[1., 1.], [0., 0.], [0., 0.], [0., 0.]]], [[[1., 1.], [1., 1.], [1., 1.], [1., 1.]], [[1., 1.], [1., 1.], [1., 1.], [1., 1.]], [[1., 1.], [1., 1.], [1., 1.], [1., 1.]], [[1., 1.], [1., 1.], [1., 1.], [1., 1.]], [[1., 1.], [1., 1.], [1., 1.], [1., 1.]]], [[[0., 0.], [0., 0.], [0., 0.], [0., 0.]], [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], [[0., 0.], [0., 0.], [0., 0.], [0., 0.]]], [[[1., 1.], [1., 1.], [1., 1.], [1., 1.]], [[1., 1.], [1., 1.], [1., 1.], [1., 1.]], [[1., 1.], [1., 1.], [1., 1.], [1., 1.]], [[1., 1.], [1., 1.], [1., 1.], [1., 1.]], [[1., 1.], [1., 1.], [1., 1.], [1., 1.]]], [[[1., 1.], [0., 0.], [0., 0.], [1., 1.]], [[1., 1.], [0., 0.], [0., 0.], [1., 1.]], [[1., 1.], [0., 0.], [0., 0.], [1., 1.]], [[1., 1.], [0., 0.], [0., 0.], [1., 1.]], [[1., 1.], [0., 0.], [0., 0.], [1., 1.]]]]) h, _ = specaug_layer.FPropDefaultTheta( inputs, paddings, domain_ids=input_domain_ids) actual_layer_output = self.evaluate(h) print(np.array_repr(actual_layer_output)) self.assertAllClose(actual_layer_output, expected_output)
def _InputPaddingValue(self, key, tensorspec): """Returns a scalar value to pad the tensor corresponding to key with. This function is used by the TFDatasetBatchBySequenceLength DataSource to specify the value used for padding. Args: key: The NestedMap key to return padding value for. tensorspec: a tf.TensorSpec describing the tensor to be padded. """ if key.endswith('_paddings'): return tf.ones([], dtype=tensorspec.dtype) else: return tf.zeros([], dtype=tensorspec.dtype)
def FProp(self, theta, inputs, paddings): """Apply global spatial pooling to inputs. Args: theta: A `.NestedMap` object containing weights' values of this layer and its children layers. inputs: The inputs tensor. It is expected to be of shape [batch, time, frequency, channel]. The time dimension corresponds to the height dimension as in images and the frequency dimension corresponds to the width dimension as in images. paddings: The paddings tensor. It is expected to be of shape [batch, time]. Defaults to None, which means there no paddings. Returns: outputs, out_paddings pair. - outputs: has shape [batch, 1, 1, channel]. - out_paddings: None or has shape [batch, 1]. """ p = self.params assert p.pooling_type in ['MAX', 'AVG'], p.pooling_type b, t, f = py_utils.GetShape(inputs, ndims=3) if paddings is not None: paddings = py_utils.HasShape(paddings, [b, t]) if paddings is not None: mask = 1.0 - paddings[..., tf.newaxis, tf.newaxis] else: mask = tf.ones([b, t, 1, 1], p.dtype) if p.pooling_type == 'AVG': global_sum = tf.reduce_sum(inputs * mask, axis=[1, 2], keepdims=True) f = tf.cast(tf.convert_to_tensor(f), p.dtype) count = f * tf.reduce_sum(mask, axis=[1, 2], keepdims=True) out_feature = global_sum / tf.maximum(1.0, count) elif p.pooling_type == 'MAX': large_negative = (tf.ones_like(inputs) * p.dtype.max * tf.constant(-0.7, dtype=p.dtype)) padded_inputs = tf.where_v2(mask > 0.0, inputs, large_negative) out_feature = tf.reduce_max(padded_inputs, axis=[1, 2], keepdims=True) if paddings is None: out_paddings = None else: out_paddings = tf.reduce_min(paddings, axis=1, keepdims=True) out_feature *= 1.0 - out_paddings[..., tf.newaxis, tf.newaxis] return out_feature, out_paddings
def forward(inputs, alpha): with tf.name_scope("entmax_loss"): alpha_shape = inputs.get_shape().as_list() alpha_shape[axis] = 1 alpha = tf.fill(alpha_shape, alpha) alpha = tf.cast(alpha, dtype=inputs.dtype) d = inputs.get_shape().as_list()[axis] alpha_m1 = alpha - 1.0 inputs = inputs * alpha_m1 max_val = tf.math.reduce_max(inputs, axis=axis, keepdims=True) tau_lo = max_val - tf.ones(alpha.get_shape().as_list(), dtype=inputs.dtype) tau_hi = max_val - tf.math.pow( tf.cast((1.0 / d), dtype=inputs.dtype), alpha_m1) f_lo = tf.math.reduce_sum( _calculate_probability(tf.math.subtract(inputs, tau_lo), alpha), axis) - 1.0 dm = tau_hi - tau_lo for _ in range(n_iter): dm /= 2 tau_m = tau_lo + dm p_m = _calculate_probability(inputs - tau_m, alpha) f_m = tf.math.reduce_sum(p_m, axis) - 1.0 mask = tf.expand_dims(tf.math.greater(f_m * f_lo, 0), axis) tau_lo = tf.where(mask, tau_m, tau_lo) if ensure_sum_one: p_m /= tf.expand_dims(tf.math.reduce_sum(p_m, axis), axis) def grad_fn(d_outputs): with tf.name_scope("entmax_grad"): gppr = tf.where(p_m > 0, tf.math.pow(p_m, 2.0 - alpha), tf.zeros_like(p_m)) d_inputs = d_outputs * gppr q = tf.math.reduce_sum(d_inputs, axis) / tf.math.reduce_sum( gppr, axis) q = tf.expand_dims(q, axis) d_inputs -= q * gppr return d_inputs, d_inputs return p_m, grad_fn
def _ComputeConvOutputPaddingV2(paddings, window, stride, padding_algorithm='SAME'): """Computes paddings for convolution and pooling output. - If padding_algorithm='SAME': out_padding[i] == 0 if the in_padding corresponding to that output is 0. This prevents the output from shrinking unnecessarily when striding. - If padding algorithm='VALID': out_padding[i] == 1 iff any in_padding corresponding to that output is 1. Args: paddings: The paddings tensor. It is expected to be of shape [batch, time]. window: The size of the windows. stride: The time-stride between adjacent windows. padding_algorithm: 'SAME' or 'VALID'. Returns: out_padding, The new padding tensor of size [batch, ceil(time / stride)]. """ if stride == 1 and padding_algorithm == 'SAME': return paddings paddings, slice_len = _PadForLengthCompatibleStridesV2( paddings, stride, padding_algorithm, 1.0) expanded_paddings = tf.expand_dims(paddings, -1) if padding_algorithm == 'SAME': # Using a strided conv1d of size 1x1 we find all non-padded positions for # the specified stride. out_paddings = tf.nn.conv1d( expanded_paddings, filters=tf.ones([1, 1, 1], paddings.dtype), stride=stride, padding='SAME', name='padding_conv') elif padding_algorithm == 'VALID': out_paddings = tf.nn.pool( expanded_paddings, [window], 'MAX', padding=padding_algorithm, strides=[stride]) out_paddings = tf.squeeze(out_paddings, -1) if stride > 1: slice_end = py_utils.GetShape(out_paddings)[1] - slice_len out_paddings = out_paddings[:, :slice_end] return out_paddings
def testBasic(self): with self.session(): t = 3 # [BTNH]. content = tf.linalg.diag(tf.ones([t]))[None, :, None, :] # [LNH]. abs_pos_emb = tf.reshape( tf.range(t * (2 * t - 1), dtype=tf.float32), [2 * t - 1, 1, t]) tf.logging.info('content=%s abs_pos_emb=%s', content.eval(), abs_pos_emb.eval()) p = attention_util.PositionalAttenLogits.Params().Set(name='rel_pos_bias') pos_atten_logits = p.Instantiate() self.assertAllClose( [[[[6., 3., 0.], [10., 7., 4.], [14., 11., 8.]]]], pos_atten_logits.RelPositionBias(content, abs_pos_emb).eval(), )
def _GetFurthestPoint(): """Get point that is furthest from those already selected. We also bias the sampling towards real points by setting the distance to padded points negative until we are out of real points. """ # Set padded points distance to negative so they aren't selected. padding_masked_distance_to_selected = tf.where( tf.equal(padding, 0.0), distance_to_selected, -1.0 * tf.ones( (batch_size, num_points), dtype=tf.float32)) # But only do this when we still have valid points left. padding_masked_distance_to_selected = tf.where( tf.less(curr_idx, num_valid_points), padding_masked_distance_to_selected, distance_to_selected) return tf.argmax(padding_masked_distance_to_selected, axis=-1, output_type=tf.int32)
def _CreateDynamicShapeInputs(self, batch_dim, length_dim, input_dim): inputs = tf.random.normal([batch_dim, length_dim, input_dim], seed=92837472) # Create segment_ids with random number of 1s and stack 0s at end. num_ones = tf.random.uniform( shape=(), minval=1, maxval=length_dim, dtype=tf.int32) segment_ids = tf.concat([ tf.ones([batch_dim, num_ones]), tf.zeros([batch_dim, length_dim - num_ones]) ], axis=1) # Remove unpadded positions from the end. max_seq_len = tf.cast( tf.reduce_max(tf.reduce_sum(segment_ids, -1)), tf.int32) inputs = inputs[:, :max_seq_len, :] segment_ids = segment_ids[:, :max_seq_len] unused_segment_pos = tf.zeros_like(segment_ids) return inputs, segment_ids, unused_segment_pos
def testMelFeaturesPaddedRightStacked(self): self._CreateFrontendParams() p = self.params p.stack_right_context = 2 p.frame_stride = p.stack_right_context + 1 mel_frontend = p.Instantiate() sample_rate, pcm = self._GetPcm() pcm *= 32768 # Convert to 4D [batch, time, packet, channels]. sample_count = tf.shape(pcm)[1] packet_size = 11 # A non-round number. trimmed_pcm = pcm[:, 0:(sample_count // packet_size) * packet_size] src_inputs = tf.reshape(trimmed_pcm, (1, -1, packet_size, 1)) # Create paddings such that the first 455 packets are unpadded. paddings = tf.concat([ tf.zeros([1, 455], dtype=tf.float32), tf.ones([1, tf.shape(src_inputs)[1] - 455], dtype=tf.float32) ], axis=1) # frame_step=240, frame_size=600, +1200 right padded frames # 455 packets * 11 frames rounds = 5005 frames, rounds down to 21 mel # frames. Divide by 3 for stacking = 7. # TODO(talremez): Make sure with this makes sense. expected_unpadded = 6 outputs = mel_frontend.FPropDefaultTheta( py_utils.NestedMap(src_inputs=src_inputs, paddings=paddings)) log_mel = outputs.src_inputs paddings = outputs.paddings with self.session(): pcm = self.evaluate(pcm) tf.logging.info('pcm: ~ %s = %s', pcm.shape, pcm) self.assertGreater(33000, np.amax(pcm)) self.assertGreater(np.amax(pcm), 2.) log_mel, paddings, sample_rate = self.evaluate( [log_mel, paddings, sample_rate]) self.assertEqual(sample_rate, p.sample_rate) self.assertEqual(paddings.shape, log_mel.shape[0:2]) self.assertAllEqual(paddings[:, 0:expected_unpadded], np.zeros([1, expected_unpadded])) self.assertAllEqual( paddings[:, expected_unpadded:], np.ones([1, paddings.shape[1] - expected_unpadded]))
def testMassLayer(self): with self.session(use_gpu=False) as sess: batch_size = 3 seq_len = 10 p = self._MassParams() mass_layer = data_augmenter.MASS(p) seq_ids = tf.fill([batch_size, seq_len], 4) weights = tf.ones([batch_size, seq_len]) actual_seq_len = tf.fill([batch_size], 10) mass_out = mass_layer.Mask(seq_ids, weights, actual_seq_len) (src_ids, tgt_ids, tgt_labels, tgt_weights) = sess.run([ mass_out.src.ids, mass_out.tgt.ids, mass_out.tgt.labels, mass_out.tgt.weights ]) self.assertAllEqual(np.sum(src_ids == 3, axis=1), [5, 5, 5]) self.assertAllEqual(np.sum(tgt_ids == 3, axis=1), [5, 5, 5]) self.assertAllEqual( tgt_labels, 4 * np.ones([batch_size, seq_len], dtype=np.int32)) self.assertAllEqual(np.sum(tgt_weights, axis=1), [5., 5., 5.])
def _Extract(self, features): p = self.params points_xyz = tf.reshape(_Dense(features['pointcloud/xyz']), [-1, 3]) points_feature = tf.reshape( _Dense(features['pointcloud/reflectance']), [-1, p.num_features]) if p.max_num_points is not None: npoints = tf.shape(points_xyz)[0] points_xyz = py_utils.PadOrTrimTo(points_xyz, [p.max_num_points, 3]) points_feature = py_utils.PadOrTrimTo(points_feature, [p.max_num_points, p.num_features]) points_padding = 1.0 - py_utils.PadOrTrimTo( tf.ones([npoints]), [p.max_num_points]) ret = py_utils.NestedMap( points_xyz=points_xyz, points_feature=points_feature) if p.max_num_points is not None: ret.points_padding = points_padding return ret
def testRematerialize(self): # Test the dropout consistency between fprop and bprop. b = builder.Base.Params() b = b.Instantiate() start_block = layers.DeterministicDropoutLayer.Params().Set( name='start_dropout', keep_prob=0.7) # Build 4 dropout layers, each wrapped by RematerializeFn. num_blocks = 4 blocks = [] blocks_per_cell = 2 for i in range(num_blocks): blocks.append(layers.DeterministicDropoutLayer.Params().Set( name='dropout_{}'.format(i), keep_prob=0.7)) cells = [] while blocks: heads, blocks = blocks[:blocks_per_cell], blocks[blocks_per_cell:] cell_name = 'cell_{}'.format(len(cells)) cells.append( b._Rematerialize(name=cell_name, body=b._Seq(cell_name, *heads))) with self.session(use_gpu=False, graph=tf.Graph()) as sess: tf.random.set_seed(12345) p = b._Seq('test', start_block, *cells) mdl = p.Instantiate() # y = mdl.Frop(x * w) # Fake input x = tf.ones([4, 5]) # Construct weights. w = tf.get_variable('w', shape=[4, 5], initializer=tf.constant_initializer([[1] * 5] * 4)) y = mdl.FPropDefaultTheta(x * w) # Construct loss function such that gradients = final activation. # dy/dw = y = mdl.Frop(x * w) when w is 1. loss = tf.reduce_sum(y) grads = py_utils.ComputeGradients(loss, py_utils.NestedMap(w=w)) tf.global_variables_initializer().run() y_val, grads_val = sess.run([y, grads.Transform(tuple)]) grads_val = grads_val['w'][1] self.assertAllClose(y_val, grads_val) self.assertEqual(py_utils.GetStepSeed().eval(), 1553244033)
def Callback(theta, encoder_outputs, num_hyps_per_beam): initial_results, states = self._InitBeamSearchStateCallback( theta, encoder_outputs, num_hyps_per_beam) assert hasattr(states, 'time_step') if tf.is_tensor(encoder_outputs.padding): batch_size = tf.shape(encoder_outputs.padding)[1] else: # Required for multisource models. batch_size = tf.shape( list(encoder_outputs.padding.values())[0])[1] num_hyps = batch_size * num_hyps_per_beam if biased: # states.consistent is initially all True states.consistent = tf.ones([ num_hyps, ], dtype=tf.bool) if stochastic: dtype = py_utils.FPropDtype(self.params) states.cumulative_log_probs = tf.zeros([num_hyps, 1], dtype=dtype) states.perturbed_cumulative_log_probs = tf.zeros([num_hyps, 1], dtype=dtype) # Temporary tensors that store information passed from # PreBeamSearchStepCallback to PostBeamSearchStepCallback. These are # used for updating states.cumulative_log_probs and # states.perturbed_cumulative_log_probs for the next step, which # requires the knowledge of the chosen IDs, which only becomes available # after PreBeamSearchStepCallback. states.tmp_states = py_utils.NestedMap( # Top-k (non-perturbed) log-probs. Used for updating # `cumulative_log_probs` in PostBeamSearchStepCallback. top_k_log_probs=tf.zeros([num_hyps, k], dtype=dtype), # Vocab ID of each item of `top_k_log_probs`. top_k_ids=tf.zeros([num_hyps, k], dtype=tf.int32), # Perturbed cumulative log-probs of the top-k IDs. Used for updating # `perturbed_cumulative_log_probs` in PostBeamSearchStepCallback. new_perturbed_cumulative_log_probs=tf.zeros([num_hyps, k], dtype=dtype), ) return initial_results, states
def _Extract(self, features): """Returns the laser Tensor.""" p = self.params all_xyzs = [] all_laser_features = [] for lidar in p.lidar_names: for ri in p.lidar_returns: feature_name = 'laser_%s_%s' % (lidar, ri) laser_data = tf.reshape(_Dense(features[feature_name]), [-1, 3 + p.num_features]) # We expect lidar_$lidar_$ri and lidar_$lidar_$ri_flow has # same number of points. feature_name += '_flow' flow_data = tf.reshape(_Dense(features[feature_name]), [-1, 3 + 1]) points_xyz = laser_data[..., 0:3] points_feature = tf.concat([laser_data[..., 3:], flow_data], axis=1) all_xyzs += [points_xyz] all_laser_features += [points_feature] # Stack all of the points along the major dimension points_xyz = tf.concat(all_xyzs, axis=0) points_feature = tf.concat(all_laser_features, axis=0) if p.max_num_points is not None: npoints = tf.shape(points_xyz)[0] points_xyz = py_utils.PadOrTrimTo(points_xyz, [p.max_num_points, 3]) points_feature = py_utils.PadOrTrimTo( points_feature, [p.max_num_points, p.num_features + 4]) points_padding = 1.0 - py_utils.PadOrTrimTo( tf.ones([npoints]), [p.max_num_points]) ret = py_utils.NestedMap(points_xyz=points_xyz, points_feature=points_feature) if p.max_num_points is not None: ret.points_padding = points_padding return ret
def testSpectrumAugmenterWithFrequencyMask(self): with self.session(use_gpu=False, graph=tf.Graph()): tf.random.set_seed(1234) inputs = tf.ones([3, 5, 10, 1], dtype=tf.float32) paddings = tf.zeros([3, 5]) hs = [] for p in [ spectrum_augmenter.SpectrumAugmenter.Params(), spectrum_augmenter_on_device.SpectrumAugmenterOnDevice.Params() ]: p.name = 'specAug_layers' p.freq_mask_max_bins = 6 p.freq_mask_count = 2 p.time_mask_max_frames = 0 p.random_seed = 34567 specaug_layer = p.Instantiate() h, _ = specaug_layer.FPropDefaultTheta(inputs, paddings) hs.append(h) layer_output, layer_output_on_device = self.evaluate(hs) self.assertAllClose(layer_output, layer_output_on_device)
def _GetBetaGamma(self, theta, inputs, **kwargs): assert 'class_emb' in kwargs class_emb = kwargs['class_emb'] # class_emb is a one-hot vector of shape [batch, class_emb_dim=num_classes]. class_ids = tf.math.argmax(class_emb, axis=-1, output_type=tf.int32) # [batch, dim] # Not using matmul/einsum to avoid potential precision problem on TPU with # sparse inputs. beta = tf.gather(theta.beta, class_ids) gamma = tf.gather(theta.gamma, class_ids) # Extend to [batch, 1, ... 1, dim] batch = py_utils.GetShape(inputs)[0] to_shape = tf.concat([[batch], tf.ones([py_utils.GetRank(inputs) - 2], tf.int32), [self.params.dim]], axis=0) beta = tf.reshape(beta, to_shape) gamma = tf.reshape(gamma, to_shape) return beta, gamma
def testAddMultiCurveSubplot(self): with self.session(graph=tf.Graph(), use_gpu=False) as sess: fig = plot.MatplotlibFigureSummary('XXX') batch_size = 2 tensor = tf.ones([batch_size, 3]) paddings = tf.constant([[0., 0., 0.], [0., 1., 1.]]) plot.AddMultiCurveSubplot(fig, [tensor, tensor], paddings, labels=['label1', 'label2'], xlabels=tf.constant(['a', 'b']), title='Title', ylabel='Ylabel') summary_str = sess.run(fig.Finalize()) summary = tf.Summary.FromString(summary_str) self.assertEqual(len(summary.value), batch_size) for n, value in enumerate(summary.value): self.assertEqual(value.tag, 'XXX/image/%d' % n) self.assertGreater(value.image.width, 0) self.assertGreater(value.image.height, 0) self.assertNotEqual(value.image.encoded_image_string, self.default_encoded_image)
def testSpectrumAugmenterUnstacking(self): with self.session(use_gpu=False, graph=tf.Graph()) as sess: tf.random.set_seed(1234) inputs = tf.ones([3, 5, 10, 1], dtype=tf.float32) paddings = tf.zeros([3, 5]) hs = [] for p in [ spectrum_augmenter.SpectrumAugmenter.Params(), spectrum_augmenter_on_device.SpectrumAugmenterOnDevice. Params() ]: p.name = 'specAug_layers' p.unstack = True p.stack_height = 2 p.freq_mask_max_bins = 5 p.time_mask_max_frames = 8 p.random_seed = 12345 specaug_layer = p.Instantiate() h, _ = specaug_layer.FPropDefaultTheta(inputs, paddings) hs.append(h) layer_output, layer_output_on_device = sess.run(hs) self.assertAllClose(layer_output, layer_output_on_device)