def testRevNetLayerFProp(self): with self.session() as sess: tf.set_random_seed(321) input_1 = tf.random_normal([5, 3], seed=89122) input_2 = tf.random_normal([5, 3], seed=19438) p = reversible_layers.RevNetLayer.Params() p.name = 'revnet_simple' p.f_params = layers.FCLayer.Params().Set(input_dim=3, output_dim=3) p.g_params = layers.FCLayer.Params().Set(input_dim=3, output_dim=3) revnet_layer = p.Instantiate() h, _, _ = revnet_layer.FPropDefaultTheta( py_utils.NestedMap(split1=input_1, split2=input_2)) tf.global_variables_initializer().run() actual_layer_output = sess.run(h) expected_split1 = np.array([[-0.7262997, 0.9276514, -0.20907314], [-0.7089523, 0.24923629, 2.5001974], [1.6766014, 0.26847264, -0.2510258], [0.9629222, -0.57908165, 0.0485389], [2.7580009, 0.17540382, 1.6282884]], dtype=np.float32) expected_split2 = np.array([[1.1282716, 1.4266306, -0.16530532], [-0.3836313, 0.04922554, 0.25543338], [0.03718817, 1.5488712, 2.1594636], [-2.1252284, 3.2059612, 0.1118355], [3.4058936, -0.63690275, -0.95291173]], dtype=np.float32) self.assertAllClose(expected_split1, actual_layer_output.split1) self.assertAllClose(expected_split2, actual_layer_output.split2)
def testSoftCondLayer(self): num_experts = 100 with self.session(use_gpu=False, graph=tf.Graph()) as sess: tf.set_random_seed(24332) p = layers.SoftCondLayer.Params().Set( name='soft_cond', cond_dim=2, num_experts=num_experts, body=lingvo_layers.FCLayer.Params().Set(input_dim=2, output_dim=2)) l = p.Instantiate() x = tf.random_normal(shape=[1, 2, 2]) y = l.FPropDefaultTheta(x) tf.global_variables_initializer().run() x_val, y_val, vars_val = sess.run([x, y, l.vars]) p_nz = layers.SoftCondLayer.Params().Set( name='soft_cond_nonzeros', cond_dim=2, num_experts=num_experts, nonzeros_mean=True, body=lingvo_layers.FCLayer.Params().Set(input_dim=2, output_dim=2)) l_nz = p_nz.Instantiate() x_nz = tf.random_normal(shape=[1, 2, 2]) y_nz = l_nz.FPropDefaultTheta(x_nz) tf.global_variables_initializer().run() x_nz_val, y_nz_val, vars_nz_val = sess.run([x_nz, y_nz, l_nz.vars]) np_val = x_val[0] np_nz_val = x_nz_val[0] taks_weight = np.exp(-1.0 * np.dot(np.mean(np_val, 0), vars_val.w)) taks_weight = 1.0 / (1.0 + taks_weight) nzs = np.count_nonzero(np_nz_val, 0).astype('float32') + 1e-10 taks_weight_nz = np.exp( -1.0 * np.dot(np.sum(np_nz_val, 0) / nzs, vars_nz_val.w)) taks_weight_nz = 1.0 / (1.0 + taks_weight_nz) weighted_weight = np.einsum('i,ijk->jk', taks_weight, vars_val.body.w) weighted_weight_nz = np.einsum('i,ijk->jk', taks_weight_nz, vars_nz_val.body.w) weighted_bias = np.einsum('i,ij->j', taks_weight, vars_val.body.b) weighted_bias_nz = np.einsum('i,ij->j', taks_weight_nz, vars_nz_val.body.b) np_val_out = np.maximum( 0, np.dot(np_val, weighted_weight) + weighted_bias) np_val_out_nz = np.maximum( 0, np.dot(np_nz_val, weighted_weight_nz) + weighted_bias_nz) self.assertAllClose(np_val_out, y_val[0]) self.assertAllClose(np_val_out_nz, y_nz_val[0])
def _ForwardPass(self, p): tf.set_random_seed(8372749040) stt_enc = encoder.AsrEncoder(p) batch = py_utils.NestedMap() batch.src_inputs = tf.random_normal([2, 20, 16, 3], seed=92837472) batch.paddings = tf.zeros([2, 20]) return stt_enc.FPropDefaultTheta(batch)
def _testDecoderFPropFloatHelper(self, func_inline=False, num_decoder_layers=1, target_seq_len=5, residual_start=0): """Computes decoder from params and computes loss with random inputs.""" cluster = cluster_factory.ForTestingWorker(add_summary=True) config = tf.ConfigProto(graph_options=tf.GraphOptions( optimizer_options=tf.OptimizerOptions( do_function_inlining=func_inline))) with cluster, self.session(use_gpu=False, config=config) as sess: tf.set_random_seed(8372749040) vn_config = py_utils.VariationalNoiseParams(None, False, False) p = self._DecoderParams(vn_config) p.rnn_layers = num_decoder_layers p.residual_start = residual_start p.target_seq_len = target_seq_len dec = p.Instantiate() src_seq_len = 5 src_enc = tf.random_normal([src_seq_len, 2, 8], seed=9283748) src_enc_padding = tf.constant( [[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 1.0], [1.0, 1.0]], dtype=tf.float32) encoder_outputs = py_utils.NestedMap(encoded=src_enc, padding=src_enc_padding) target_ids = tf.transpose( tf.constant([[0, 1, 2, 3], [1, 2, 3, 4], [10, 11, 12, 15], [5, 6, 7, 8], [10, 5, 2, 5]], dtype=tf.int32)) target_labels = tf.transpose( tf.constant([[0, 1, 2, 3], [1, 2, 3, 4], [10, 11, 12, 13], [5, 7, 8, 10], [10, 5, 2, 4]], dtype=tf.int32)) target_paddings = tf.transpose( tf.constant([[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 1, 0], [0, 1, 0, 0], [1, 1, 1, 1]], dtype=tf.float32)) target_transcripts = tf.constant( ['abcd', 'bcde', 'klmp', 'fghi', 'kfcf']) target_weights = 1.0 - target_paddings targets = py_utils.NestedMap({ 'ids': target_ids, 'labels': target_labels, 'weights': target_weights, 'paddings': target_paddings, 'transcripts': target_transcripts, }) metrics = dec.FPropDefaultTheta(encoder_outputs, targets).metrics loss = metrics['loss'][0] correct_predicts = metrics['fraction_of_correct_next_step_preds'][ 0] summaries = tf.summary.merge( tf.get_collection(tf.GraphKeys.SUMMARIES)) tf.global_variables_initializer().run() loss_v, _ = sess.run([loss, correct_predicts]) summaries.eval() return loss_v
def testUnarySequentialLayer(self): g = tf.Graph() with g.as_default(): tf.set_random_seed(24332) p = layers.UnarySequentialLayer.Params().Set( name='seq', sub=[ lingvo_layers.FCLayer.Params().Set(name='foo', input_dim=32, output_dim=8), lingvo_layers.FCLayer.Params().Set(name='bar', input_dim=8, output_dim=8), lingvo_layers.FCLayer.Params().Set(name='baz', input_dim=8, output_dim=32), lingvo_layers.DropoutLayer.Params().Set(name='dropout', keep_prob=0.5) ]) p.is_eval = True l = p.Instantiate() x = tf.random_normal(shape=[2, 32]) y = l.FPropDefaultTheta(x) l.vars.Transform(lambda x: x.shape).VLog(0, 'vars: ') with self.session(graph=g) as sess: sess.run(tf.global_variables_initializer()) x_val, y_val, w = sess.run([x, y, l.vars]) act = x_val # relu(act \dot w + b) act = np.maximum(0, np.dot(act, w.foo.w) + w.foo.b) act = np.maximum(0, np.dot(act, w.bar.w) + w.bar.b) act = np.maximum(0, np.dot(act, w.baz.w) + w.baz.b) self.assertAllClose(act, y_val)
def _Sources(self): p = self.params if p.cur_iter_in_seed: self._cur_iter += 1 if p.integer_source_max: inputs = tf.random_uniform(p.source_shape, maxval=p.integer_source_max, dtype=tf.int32, seed=p.random_seed + 1000 * self._cur_iter) elif p.float_source_max: inputs = tf.random_uniform(p.source_shape, maxval=p.float_source_max, seed=p.random_seed + 1000 * self._cur_iter) else: inputs = tf.random_normal(p.source_shape, seed=p.random_seed + 1000 * self._cur_iter) paddings = tf.cast( tf.cumsum( tf.random_uniform(p.source_shape[:2], seed=p.random_seed + 1001 * self._cur_iter), axis=1) > 0.5 * p.source_shape[1], tf.float32) paddings = self._check_paddings(paddings) return inputs, paddings
def testParallelRepeatLayerLayer(self): repeat = 100 body_p = layers.SequentialLayer.Params().Set( name='body', sub=[ layers.LinearLayer.Params().Set( name='ln1', input_dims=2, output_dims=4), layers.FnLayer.Params().Set( name='relu', fn=tf.nn.relu, fn_meta=lambda x: py_utils.NestedMap(flops=1, out_shapes=(x,))), layers.LinearLayer.Params().Set( name='ln2', input_dims=4, output_dims=2) ]) with self.session(use_gpu=False, graph=tf.Graph()) as sess: tf.set_random_seed(24332) p = layers.ParallelRepeatLayer.Params().Set( name='moe', repeat=repeat, body=body_p) l = p.Instantiate() x = tf.random_normal(shape=[repeat, 2, 2]) y = l.FPropDefaultTheta(x) tf.global_variables_initializer().run() x_val, y_val, w = sess.run([x, y, l.vars]) np_val = [] for i in range(repeat): # relu(act \dot w_1) \dot w_2 np_val.append( np.dot( np.maximum(0, np.dot(x_val[i], w.body.ln1.w[i])), w.body.ln2.w[i])) np_val = np.stack(np_val) self.assertAllClose(np_val, y_val)
def GetBeamSearchHelperResults(sess, num_hyps_per_beam): np.random.seed(9384758) tf.set_random_seed(8274758) vocab_size = 12 src_len = 5 tgt_len = 7 src_batch_size = 2 tgt_batch_size = src_batch_size * num_hyps_per_beam p = beam_search_helper.BeamSearchHelper.Params().Set( name='bsh', target_seq_len=tgt_len) bs_helper = p.Instantiate() def InitBeamSearchState(unused_theta, unused_encoder_outputs, unused_num_hyps_per_beam): atten_probs = tf.constant(np.random.normal(size=(tgt_batch_size, src_len)), dtype=tf.float32) return (py_utils.NestedMap({ 'log_probs': tf.zeros([tgt_batch_size, vocab_size]), 'atten_probs': atten_probs, }), py_utils.NestedMap({'atten_probs': atten_probs})) def PreBeamSearchStepCallback(unused_theta, unused_encoder_outputs, unused_step_ids, states, unused_num_hyps_per_beam): atten_probs = tf.identity(states.atten_probs) logits = tf.random_normal([tgt_batch_size, vocab_size], seed=8273747) return (py_utils.NestedMap({ 'atten_probs': atten_probs, 'log_probs': logits }), states) def PostBeamSearchStepCallback(unused_theta, unused_encoder_outputs, unused_new_step_ids, states): return states src_enc = tf.random_normal([src_len, src_batch_size, 8], seed=982774838) src_enc_padding = tf.constant( [[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 1.0], [1.0, 1.0]], dtype=tf.float32) encoder_outputs = py_utils.NestedMap(encoded=src_enc, padding=src_enc_padding) theta = py_utils.NestedMap() decoder_output = bs_helper.BeamSearchDecode(theta, encoder_outputs, num_hyps_per_beam, InitBeamSearchState, PreBeamSearchStepCallback, PostBeamSearchStepCallback) topk_ids, topk_lens, topk_scores = sess.run([ decoder_output.topk_ids, decoder_output.topk_lens, decoder_output.topk_scores ]) return topk_ids, topk_lens, topk_scores
def PreBeamSearchStepCallback(unused_theta, unused_encoder_outputs, unused_step_ids, states, unused_num_hyps_per_beam): atten_probs = tf.identity(states.atten_probs) logits = tf.random_normal([tgt_batch_size, vocab_size], seed=8273747) return (py_utils.NestedMap({ 'atten_probs': atten_probs, 'log_probs': logits }), states)
def testDecoderWithOrientedPerClassNMS(self): batch_size = 4 num_preds = 8 num_classes = 10 # An example of setting the score threshold high and IOU threshold low # for classes we don't care about score_threshold = [1.0] * num_classes score_threshold[1] = 0.05 nms_iou_threshold = [0.0] * num_classes nms_iou_threshold[1] = 0.5 with tf.Graph().as_default(): tf.set_random_seed(12345) predicted_bboxes = tf.random_normal([batch_size, num_preds, 7]) classification_scores = tf.random_uniform( [batch_size, num_preds, num_classes], minval=0, maxval=1) bboxes, bbox_scores, valid_mask = detection_decoder.DecodeWithNMS( predicted_bboxes, classification_scores, nms_iou_threshold=nms_iou_threshold, score_threshold=score_threshold, use_oriented_per_class_nms=True) with self.session() as sess: outputs = sess.run([ predicted_bboxes, classification_scores, bboxes, bbox_scores, valid_mask ]) (input_bboxes, input_scores, output_bboxes, output_scores, mask) = outputs self.assertEqual((batch_size, num_preds, 7), input_bboxes.shape) self.assertEqual((batch_size, num_classes, num_preds, 7), output_bboxes.shape) self.assertEqual((batch_size, num_preds, num_classes), input_scores.shape) self.assertEqual((batch_size, num_classes, num_preds), output_scores.shape) self.assertEqual((batch_size, num_classes, num_preds), mask.shape) # Assert that NMS did some kind of filtering for each class for cls_idx in range(num_classes): self.assertEqual(mask[:, cls_idx, :].sum(), (input_scores[:, :, cls_idx] > score_threshold[cls_idx]).sum()) self.assertEqual(mask[:, cls_idx, :].sum(), (output_scores[:, cls_idx, :] > score_threshold[cls_idx]).sum())
def testCreateMask2D(self): width = 10 height = 20 with self.cached_session(): weights = tf.Variable(tf.random_normal([width, height], stddev=1), name="weights") masked_weights = pruning.apply_mask(weights, tf.get_variable_scope()) tf.global_variables_initializer().run() weights_val = weights.eval() masked_weights_val = masked_weights.eval() self.assertAllEqual(weights_val, masked_weights_val)
def testEmptySequentialLayerFPropMeta(self): g = tf.Graph() with g.as_default(): p = layers.SequentialLayer.Params().Set(name='seq') l = p.Instantiate() x = py_utils.NestedMap(val=tf.random_normal(shape=[2, 32])) y = l.FPropDefaultTheta(x) self.assertIsInstance(y.val, tf.Tensor) y_shape = l.FPropMeta( p, py_utils.Transform(lambda t: tshape.Shape(t.shape), x)).out_shapes[0] self.assertEqual(y.val.shape.as_list(), y_shape.val.ToTensorShape().as_list())
def testEmptySequentialLayer(self): g = tf.Graph() with g.as_default(): tf.set_random_seed(24332) p = layers.SequentialLayer.Params().Set(name='seq') l = p.Instantiate() x = tf.random_normal(shape=[2, 32]) y = l.FPropDefaultTheta(x) self.assertIsInstance(y, tf.Tensor) with self.session(graph=g) as sess: sess.run(tf.global_variables_initializer()) x_val, y_val = sess.run([x, y]) self.assertAllEqual(x_val, y_val)
def testParallelMatmulLayer(self): g = tf.Graph() with g.as_default(): tf.set_random_seed(24332) def MergeFn(xs): result = [] for x in zip(*xs): val = x[0] for v in x[1:]: val = tf.matmul(val, v) result.append(val) return tuple(result) p = layers.ParallelLayer.Params().Set( name='parallel', merge=MergeFn, sub=[ lingvo_layers.FCLayer.Params().Set(name='foo', input_dim=32, output_dim=4), lingvo_layers.FCLayer.Params().Set(name='bar', input_dim=32, output_dim=4), lingvo_layers.FCLayer.Params().Set(name='baz', input_dim=32, output_dim=4) ]) l = p.Instantiate() x = tf.random_normal(shape=[2, 4, 32]) y = l.FPropDefaultTheta(x) with self.session(graph=g) as sess: sess.run(tf.global_variables_initializer()) x_val, y_val, w = sess.run([x, y, l.vars]) out = [] act = x_val # relu(act \dot w + b) out += [np.maximum(0, np.matmul(act, w.foo.w) + w.foo.b)] self.assertEqual(out[-1].shape, (2, 4, 4)) out += [np.maximum(0, np.matmul(act, w.bar.w) + w.bar.b)] self.assertEqual(out[-1].shape, (2, 4, 4)) out += [np.maximum(0, np.matmul(act, w.baz.w) + w.baz.b)] self.assertEqual(out[-1].shape, (2, 4, 4)) np_result = out[0] for v in out[1:]: np_result = np.matmul(np_result, v) self.assertAllClose(np_result, y_val, atol=1e-5, rtol=1e-5)
def testBiasLayer(self): g = tf.Graph() with g.as_default(): tf.set_random_seed(24332) p = layers.BiasLayer.Params().Set(name='test', dims=10) l = p.Instantiate() x = tf.random_normal(shape=[2, 10]) y = l.FPropDefaultTheta(x) with self.session(graph=g) as sess: sess.run(tf.global_variables_initializer()) x_val, y_val, w_val = sess.run([x, y, l.vars]) self.assertEqual(w_val.b.shape, (10, )) self.assertAllClose(x_val + w_val.b, y_val)
def testMapLayer(self): g = tf.Graph() with g.as_default(): tf.set_random_seed(24332) p = layers.MapLayer.Params().Set( name='map', fn=tf.reduce_max, kwargs={'axis': 1}) l = p.Instantiate() x0, x1 = [tf.random_normal(shape=[2, 3, 5])] * 2 y0, y1 = l.FPropDefaultTheta(x0, x1) with self.session(graph=g) as sess: sess.run(tf.global_variables_initializer()) vx0, vx1, vy0, vy1 = sess.run([x0, x1, y0, y1]) self.assertAllClose(np.max(vx0, 1), vy0) self.assertAllClose(np.max(vx1, 1), vy1)
def testParallelLayer(self): g = tf.Graph() with g.as_default(): tf.set_random_seed(24332) p = layers.ParallelLayer.Params().Set( name='test', merge=lambda xs: tuple([tf.add_n(x) for x in zip(*xs)]), sub=[ lingvo_layers.FCLayer.Params().Set(name='foo', input_dim=32, output_dim=4), lingvo_layers.FCLayer.Params().Set(name='bar', input_dim=32, output_dim=4), layers.SequentialLayer.Params().Set( name='seq', sub=[ lingvo_layers.FCLayer.Params().Set(name='baz', input_dim=32, output_dim=4), lingvo_layers.DropoutLayer.Params().Set( name='dropout', keep_prob=0.5) ]) ]) p.is_eval = True l = p.Instantiate() x = tf.random_normal(shape=[2, 32]) y = l.FPropDefaultTheta(x) with self.session(graph=g) as sess: sess.run(tf.global_variables_initializer()) x_val, y_val, w = sess.run([x, y, l.vars]) out = [] act = x_val # relu(act \dot w + b) out += [np.maximum(0, np.matmul(act, w.foo.w) + w.foo.b)] self.assertEqual(out[-1].shape, (2, 4)) out += [np.maximum(0, np.matmul(act, w.bar.w) + w.bar.b)] self.assertEqual(out[-1].shape, (2, 4)) out += [np.maximum(0, np.matmul(act, w.seq.baz.w) + w.seq.baz.b)] self.assertEqual(out[-1].shape, (2, 4)) np_result = out[0] for v in out[1:]: np_result = np.add(np_result, v) self.assertAllClose(np_result, y_val)
def _testDecoderFPropHelper(self, params): """Computes decoder from params and computes loss with random inputs.""" dec = decoder.AsrDecoder(params) src_seq_len = 5 src_enc = tf.random_normal([src_seq_len, 2, 8], seed=982774838, dtype=py_utils.FPropDtype(params)) src_enc_padding = tf.constant( [[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 1.0], [1.0, 1.0]], dtype=py_utils.FPropDtype(params)) encoder_outputs = py_utils.NestedMap(encoded=src_enc, padding=src_enc_padding) # shape=[4, 5] target_ids = tf.transpose( tf.constant([[0, 1, 2, 3], [1, 2, 3, 4], [10, 11, 12, 15], [5, 6, 7, 8], [10, 5, 2, 5]], dtype=tf.int32)) # shape=[4, 5] target_labels = tf.transpose( tf.constant([[0, 1, 2, 3], [1, 2, 3, 4], [10, 11, 12, 13], [5, 7, 8, 10], [10, 5, 2, 4]], dtype=tf.int32)) # shape=[4, 5] target_paddings = tf.transpose( tf.constant([[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 1, 0], [0, 1, 0, 0], [1, 1, 1, 0]], dtype=py_utils.FPropDtype(params))) target_transcripts = tf.constant( ['abcd', 'bcde', 'klmp', 'fghi', 'kfcf']) target_weights = 1.0 - target_paddings # ids/labels/weights/paddings are all in [batch, time] shape. targets = py_utils.NestedMap({ 'ids': target_ids, 'labels': target_labels, 'weights': target_weights, 'paddings': target_paddings, 'transcripts': target_transcripts, }) metrics, per_sequence_loss = dec.FPropWithPerExampleLoss( encoder_outputs, targets) loss = metrics['loss'] return loss, per_sequence_loss
def testRepeatLayer(self): repeat = 100 with self.session(use_gpu=False, graph=tf.Graph()) as sess: tf.set_random_seed(24332) p = layers.RepeatLayer.Params().Set( name='recurrent', repeat=repeat, body=lingvo_layers.FCLayer.Params().Set(input_dim=2, output_dim=2)) l = p.Instantiate() x = tf.random_normal(shape=[2, 2]) y = l.FPropDefaultTheta(x) tf.global_variables_initializer().run() x_val, y_val, w = sess.run([x, y, l.vars]) np_val = x_val # relu(act \dot w + b) for i in range(repeat): np_val = np.maximum(0, np.dot(np_val, w.body.w[i]) + w.body.b[i]) self.assertAllClose(np_val, y_val)
def testSoftCondLayer(self): num_experts = 100 with self.session(use_gpu=False, graph=tf.Graph()) as sess: tf.set_random_seed(24332) p = layers.SoftCondLayer.Params().Set( name='soft_cond', cond_dim=2, num_experts=num_experts, body=lingvo_layers.FCLayer.Params().Set(input_dim=2, output_dim=2)) l = p.Instantiate() x = tf.random_normal(shape=[1, 2, 2]) y = l.FPropDefaultTheta(x) tf.global_variables_initializer().run() x_val, y_val, vars_val = sess.run([x, y, l.vars]) np_val = x_val[0] taks_weight = np.exp(-1.0 * np.dot(np.sum(np_val, 0), vars_val.w)) taks_weight = 1.0 / (1.0 + taks_weight) weighted_weight = np.einsum('i,ijk->jk', taks_weight, vars_val.body.w) weighted_bias = np.einsum('i,ij->j', taks_weight, vars_val.body.b) np_val = np.maximum(0, np.dot(np_val, weighted_weight) + weighted_bias) self.assertAllClose(np_val, y_val[0])
def _FPropChunk(self, theta, pcm_audio_chunk, pcm_audio_paddings): p = self.params pcm_audio_chunk = tf.cast(pcm_audio_chunk, tf.float32) # shape: [batch, time, _frame_size] framed_signal = tf.signal.frame(pcm_audio_chunk, self._frame_size, self._frame_step, p.pad_end) # Pre-emphasis. if p.preemph != 1.0: preemphasized = self._ApplyPreemphasis(framed_signal) else: preemphasized = framed_signal[:-1] # Noise. if p.noise_scale > 0.0: noise_signal = tf.random_normal( tf.shape(preemphasized), stddev=p.noise_scale, mean=0.0, seed=p.random_seed) else: noise_signal = 0.0 # Apply window fn. windowed_signal = preemphasized + noise_signal if self._window_fn is not None: window = self._window_fn(self._frame_size - 1, framed_signal.dtype) windowed_signal *= window mel_spectrogram = self._MelSpectrogram(windowed_signal) output_floor = 1.0 mel_spectrogram_log = tf.log( tf.maximum(float(output_floor), mel_spectrogram)) # Mean and stddev. mel_spectrogram_norm = ( (mel_spectrogram_log - tf.convert_to_tensor(p.per_bin_mean)) / tf.convert_to_tensor(p.per_bin_stddev)) return mel_spectrogram_norm, self._GetMelPadding(pcm_audio_paddings)
def testLinearLayer(self): g = tf.Graph() with g.as_default(): tf.set_random_seed(24332) p = layers.LinearLayer.Params().Set( name='test', input_dims=10, output_dims=5) l = p.Instantiate() xs = [] ys = [] for shape in ([2, 10], [2, 3, 10], [2, 3, 5, 10], [2, 3, 5, 7, 10]): x = tf.random_normal(shape=shape) y = l.FPropDefaultTheta(x) xs += [x] ys += [y] with self.session(graph=g) as sess: sess.run(tf.global_variables_initializer()) xs_val, ys_val, w_val = sess.run([xs, ys, l.vars]) self.assertEqual(w_val.w.shape, (10, 5)) for (xv, yv) in zip(xs_val, ys_val): self.assertAllClose(np.matmul(xv, w_val.w), yv)
def testDecoderSingleClassNMS(self): batch_size = 4 num_preds = 8 num_classes = 10 score_threshold = 0.05 nms_iou_threshold = 0.5 with tf.Graph().as_default(): tf.set_random_seed(12345) predicted_bboxes = tf.random_normal([batch_size, num_preds, 7]) classification_scores = tf.random_uniform( [batch_size, num_preds, num_classes], minval=0, maxval=1) bboxes, bbox_scores, valid_mask = detection_decoder.DecodeWithNMS( predicted_bboxes, classification_scores, nms_iou_threshold=nms_iou_threshold, score_threshold=score_threshold, use_oriented_per_class_nms=False) with self.session() as sess: outputs = sess.run([ predicted_bboxes, classification_scores, bboxes, bbox_scores, valid_mask ]) (input_bboxes, input_scores, output_bboxes, output_scores, mask) = outputs self.assertEqual((batch_size, num_preds, 7), input_bboxes.shape) self.assertEqual((batch_size, num_classes, num_preds, 7), output_bboxes.shape) self.assertEqual((batch_size, num_preds, num_classes), input_scores.shape) self.assertEqual((batch_size, num_classes, num_preds), output_scores.shape) self.assertEqual((batch_size, num_classes, num_preds), mask.shape)
def testGreedySearchHelper(self): with self.session(use_gpu=False) as sess: np.random.seed(9384758) tf.set_random_seed(8274758) vocab_size = 12 src_len = 5 tgt_len = 7 src_batch_size = 2 tgt_batch_size = src_batch_size p = beam_search_helper.GreedySearchHelper.Params().Set( name='gsh', target_seq_len=tgt_len) gs_helper = p.Instantiate() def InitGreedySearchState(unused_theta, unused_encoder_outputs, unused_num_hyps_per_beam): atten_probs = tf.constant( np.random.normal(size=(tgt_batch_size, src_len)), dtype=tf.float32) return (py_utils.NestedMap({ 'log_probs': tf.zeros([tgt_batch_size, vocab_size]), 'atten_probs': atten_probs, }), py_utils.NestedMap({'atten_probs': atten_probs})) def PreGreedySearchStepCallback(unused_theta, unused_encoder_outputs, unused_step_ids, states, unused_num_hyps_per_beam): atten_probs = tf.identity(states.atten_probs) logits = tf.random_normal([tgt_batch_size, vocab_size], seed=8273747) return (py_utils.NestedMap({ 'atten_probs': atten_probs, 'log_probs': logits }), states) def PostGreedySearchStepCallback(unused_theta, unused_encoder_outputs, unused_new_step_ids, states): return states src_enc = tf.random_normal([src_len, src_batch_size, 8], seed=982774838) src_enc_padding = tf.constant( [[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 1.0], [1.0, 1.0]], dtype=tf.float32) encoder_outputs = py_utils.NestedMap(encoded=src_enc, padding=src_enc_padding) theta = py_utils.NestedMap() (final_hyp_ids, final_hyp_lens, final_done_hyps) = gs_helper.GreedySearchDecode( theta, encoder_outputs, InitGreedySearchState, PreGreedySearchStepCallback, PostGreedySearchStepCallback) (final_hyp_ids, final_hyp_lens, final_done_hyps) = sess.run( [final_hyp_ids, final_hyp_lens, final_done_hyps]) print(np.array_repr(final_hyp_ids)) print(np.array_repr(final_hyp_lens)) print(np.array_repr(final_done_hyps)) expected_hyp_ids = [[2, 2, 6, 7, 1, 9, 4], [3, 9, 3, 9, 6, 5, 10]] expected_hyp_lens = [1, 7] expected_done_hyps = [True, False] self.assertEqual(expected_hyp_ids, final_hyp_ids.tolist()) self.assertEqual(expected_hyp_lens, final_hyp_lens.tolist()) self.assertEqual(expected_done_hyps, final_done_hyps.tolist())
def testCustomStepIds(self): with self.session(use_gpu=False) as sess: np.random.seed(9384758) tf.set_random_seed(8274758) vocab_size = 12 src_len = 5 tgt_len = 7 num_hyps_per_beam = 3 src_batch_size = 2 tgt_batch_size = src_batch_size * num_hyps_per_beam p = beam_search_helper.BeamSearchHelper.Params().Set( name='bsh', target_seq_len=tgt_len) bs_helper = p.Instantiate() def InitBeamSearchState(unused_theta, unused_encoder_outputs, unused_num_hyps_per_beam): atten_probs = tf.constant( np.random.normal(size=(tgt_batch_size, src_len)), dtype=tf.float32) return (py_utils.NestedMap({ 'log_probs': tf.zeros([tgt_batch_size, vocab_size]), 'atten_probs': atten_probs, 'step_ids': tf.zeros([tgt_batch_size, 1], dtype=tf.int32) }), py_utils.NestedMap({'atten_probs': atten_probs})) def PreBeamSearchStepCallback(unused_theta, unused_encoder_outputs, unused_step_ids, states, unused_num_hyps_per_beam): atten_probs = tf.identity(states.atten_probs) logits = tf.random_normal([tgt_batch_size, vocab_size], seed=8273747) return (py_utils.NestedMap({ 'atten_probs': atten_probs, 'log_probs': logits }), states) def PostBeamSearchStepCallback(unused_theta, unused_encoder_outputs, unused_new_step_ids, states): return states src_enc = tf.random_normal([src_len, src_batch_size, 8], seed=982774838) src_enc_padding = tf.constant( [[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 1.0], [1.0, 1.0]], dtype=tf.float32) encoder_outputs = py_utils.NestedMap(encoded=src_enc, padding=src_enc_padding) theta = py_utils.NestedMap() decoder_output = bs_helper.BeamSearchDecode( theta, encoder_outputs, num_hyps_per_beam, InitBeamSearchState, PreBeamSearchStepCallback, PostBeamSearchStepCallback) topk_ids, topk_lens, topk_scores = sess.run([ decoder_output.topk_ids, decoder_output.topk_lens, decoder_output.topk_scores ]) print(np.array_repr(topk_ids)) print(np.array_repr(topk_lens)) print(np.array_repr(topk_scores)) expected_topk_ids = [[4, 3, 4, 3, 2, 0, 0], [4, 3, 11, 2, 0, 0, 0], [4, 3, 6, 2, 0, 0, 0], [6, 0, 4, 6, 6, 11, 2], [6, 0, 4, 6, 1, 2, 0], [6, 0, 4, 6, 6, 2, 0]] expected_topk_lens = [5, 4, 4, 7, 6, 6] expected_topk_scores = [[8.27340603, 6.26949024, 5.59490776], [9.74691486, 8.46679497, 7.14809656]] self.assertEqual(expected_topk_ids, topk_ids.tolist()) self.assertEqual(expected_topk_lens, topk_lens.tolist()) self.assertAllClose(expected_topk_scores, topk_scores)
def testExpandTensor(self, input_shape, block_dim): weights = tf.random_normal(shape=input_shape) self._compare_expand_tensor_with_kronecker_product(weights, block_dim)