def _testDecoderFPropFloatHelper(self, func_inline=False, num_decoder_layers=1, target_seq_len=5, residual_start=0): """Computes decoder from params and computes loss with random inputs.""" cluster = cluster_factory.ForTestingWorker(add_summary=True) config = tf.ConfigProto(graph_options=tf.GraphOptions( optimizer_options=tf.OptimizerOptions( do_function_inlining=func_inline))) with cluster, self.session(use_gpu=False, config=config) as sess: tf.set_random_seed(8372749040) vn_config = py_utils.VariationalNoiseParams(None, False, False) p = self._DecoderParams(vn_config) p.rnn_layers = num_decoder_layers p.residual_start = residual_start p.target_seq_len = target_seq_len dec = p.Instantiate() src_seq_len = 5 src_enc = tf.random_normal([src_seq_len, 2, 8], seed=9283748) src_enc_padding = tf.constant( [[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 1.0], [1.0, 1.0]], dtype=tf.float32) encoder_outputs = py_utils.NestedMap(encoded=src_enc, padding=src_enc_padding) target_ids = tf.transpose( tf.constant([[0, 1, 2, 3], [1, 2, 3, 4], [10, 11, 12, 15], [5, 6, 7, 8], [10, 5, 2, 5]], dtype=tf.int32)) target_labels = tf.transpose( tf.constant([[0, 1, 2, 3], [1, 2, 3, 4], [10, 11, 12, 13], [5, 7, 8, 10], [10, 5, 2, 4]], dtype=tf.int32)) target_paddings = tf.transpose( tf.constant([[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 1, 0], [0, 1, 0, 0], [1, 1, 1, 1]], dtype=tf.float32)) target_transcripts = tf.constant( ['abcd', 'bcde', 'klmp', 'fghi', 'kfcf']) target_weights = 1.0 - target_paddings targets = py_utils.NestedMap({ 'ids': target_ids, 'labels': target_labels, 'weights': target_weights, 'paddings': target_paddings, 'transcripts': target_transcripts, }) metrics = dec.FPropDefaultTheta(encoder_outputs, targets).metrics loss = metrics['loss'][0] correct_predicts = metrics['fraction_of_correct_next_step_preds'][ 0] summaries = tf.summary.merge( tf.get_collection(tf.GraphKeys.SUMMARIES)) tf.global_variables_initializer().run() loss_v, _ = sess.run([loss, correct_predicts]) summaries.eval() return loss_v
def testDecoderSampleTargetSequences(self): p = self._DecoderParams(vn_config=py_utils.VariationalNoiseParams( None, False, False), num_classes=8) p.target_seq_len = 5 p.random_seed = 1 config = tf.ConfigProto(graph_options=tf.GraphOptions( optimizer_options=tf.OptimizerOptions(do_function_inlining=False))) with self.session(use_gpu=False, config=config) as sess: tf.set_random_seed(8372740) np.random.seed(35315) dec = p.Instantiate() source_sequence_length = 5 batch_size = 4 source_encodings = tf.constant(np.random.normal( size=[source_sequence_length, batch_size, p.source_dim]), dtype=tf.float32) source_encoding_padding = tf.constant( [[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 1.0], [0.0, 1.0, 1.0, 1.0], [0.0, 1.0, 1.0, 1.0], [0.0, 1.0, 1.0, 1.0]], dtype=tf.float32) encoder_outputs = py_utils.NestedMap( encoded=source_encodings, padding=source_encoding_padding) sampled_sequences = dec.SampleTargetSequences( dec.theta, encoder_outputs, random_seed=tf.to_int32(123)) self.assertAllEqual([batch_size, p.target_seq_len], sampled_sequences.ids.shape) tf.global_variables_initializer().run() decoder_output = sess.run(sampled_sequences) print('ids=%s' % np.array_repr(decoder_output.ids)) lens = np.sum(1 - decoder_output.paddings, axis=1) print('lens=%s' % lens) # pyformat: disable # pylint: disable=bad-whitespace,bad-continuation expected_ids = [[6, 2, 2, 2, 2], [0, 0, 7, 5, 1], [6, 1, 5, 1, 5], [6, 7, 7, 4, 4]] # pylint: enable=bad-whitespace,bad-continuation # pyformat: enable expected_lens = [2, 5, 5, 5] self.assertAllEqual(expected_lens, lens) self.assertAllEqual(expected_ids, decoder_output.ids) # Sample again with the same random seed. decoder_output2 = sess.run( dec.SampleTargetSequences(dec.theta, encoder_outputs, random_seed=tf.to_int32(123))) # Get the same output. self.assertAllEqual(decoder_output.ids, decoder_output2.ids) self.assertAllEqual(decoder_output.paddings, decoder_output2.paddings) # Sample again with a different random seed. decoder_output3 = sess.run( dec.SampleTargetSequences(dec.theta, encoder_outputs, random_seed=tf.to_int32(123456))) # Get different sequences. self.assertNotAllClose(expected_ids, decoder_output3.ids)
def _no_opt_sess_cfg(self): # Disable constant folding for convenience. return tf.config_pb2.ConfigProto(graph_options=tf.GraphOptions( optimizer_options=tf.OptimizerOptions( opt_level=tf.OptimizerOptions.L0, do_common_subexpression_elimination=False, do_function_inlining=False, do_constant_folding=False)), cluster_def=self._cluster_def)
def _testDecoderBeamSearchDecodeHelperWithOutput(self, params, src_seq_len=None, src_enc_padding=None): config = tf.ConfigProto(graph_options=tf.GraphOptions( optimizer_options=tf.OptimizerOptions(do_function_inlining=False))) p = params with self.session(use_gpu=False, config=config) as sess, self.SetEval(True): tf.set_random_seed(837274904) np.random.seed(837575) p.beam_search.num_hyps_per_beam = 4 p.dtype = tf.float32 p.target_seq_len = 5 dec = p.Instantiate() if src_seq_len is None: src_seq_len = 5 src_enc = tf.constant(np.random.uniform(size=(src_seq_len, 2, 8)), tf.float32) if src_enc_padding is None: src_enc_padding = tf.constant( [[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 1.0], [1.0, 1.0]], dtype=tf.float32) encoder_outputs = py_utils.NestedMap(encoded=src_enc, padding=src_enc_padding) done_hyps = dec.BeamSearchDecode(encoder_outputs).done_hyps tf.global_variables_initializer().run() softmax_wts = sess.run(dec.vars.softmax) print('softmax wts = ', softmax_wts) done_hyps_serialized = sess.run([done_hyps])[0] hyp = Hypothesis() print('done hyps shape = ', done_hyps_serialized.shape) for i in range(5): for j in range(8): print(i, j, len(done_hyps_serialized[i, j])) hyp.ParseFromString(done_hyps_serialized[2, 5]) print('hyp = ', hyp) return hyp
def _testDecoderBeamSearchDecodeHelperWithOutput(self, params, src_seq_len=None, src_enc_padding=None): config = tf.config_pb2.ConfigProto(graph_options=tf.GraphOptions( optimizer_options=tf.OptimizerOptions(do_function_inlining=False))) p = params with self.session(use_gpu=False, config=config), self.SetEval(True): tf.random.set_seed(837274904) np.random.seed(837575) p.beam_search.num_hyps_per_beam = 4 p.beam_search.length_normalization = 10. p.dtype = tf.float32 p.target_seq_len = 5 dec = p.Instantiate() if src_seq_len is None: src_seq_len = 5 src_enc = tf.constant(np.random.uniform(size=(src_seq_len, 2, 8)), tf.float32) if src_enc_padding is None: src_enc_padding = tf.constant( [[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 1.0], [1.0, 1.0]], dtype=tf.float32) encoder_outputs = py_utils.NestedMap(encoded=src_enc, padding=src_enc_padding) topk_hyps = dec.BeamSearchDecode(encoder_outputs).topk_hyps self.evaluate(tf.global_variables_initializer()) softmax_wts = self.evaluate(dec.vars.softmax) print('softmax wts = ', softmax_wts) topk_hyps_serialized = self.evaluate([topk_hyps])[0] hyp = Hypothesis() hyp.ParseFromString(topk_hyps_serialized[1, 2]) return hyp
def _testDecoderFPropGradientCheckerHelper(self, func_inline=False): config = tf.ConfigProto(graph_options=tf.GraphOptions( optimizer_options=tf.OptimizerOptions( do_function_inlining=func_inline))) with self.session(use_gpu=False, config=config) as sess: tf.set_random_seed(8372749040) np.random.seed(274854) vn_config = py_utils.VariationalNoiseParams(None, False, False) p = self._DecoderParams(vn_config) p.dtype = tf.float64 dec = p.Instantiate() src_seq_len = 5 src_enc = tf.constant(np.random.uniform(size=(src_seq_len, 2, 8)), tf.float64) src_enc_padding = tf.constant( [[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 1.0], [1.0, 1.0]], dtype=tf.float64) encoder_outputs = py_utils.NestedMap(encoded=src_enc, padding=src_enc_padding) target_ids = tf.transpose( tf.constant([[0, 1, 2, 3], [1, 2, 3, 4], [10, 11, 12, 15], [5, 6, 7, 8], [10, 5, 2, 5]], dtype=tf.int32)) target_labels = tf.transpose( tf.constant([[0, 1, 2, 3], [1, 2, 3, 4], [10, 11, 12, 13], [5, 7, 8, 10], [10, 5, 2, 4]], dtype=tf.int32)) target_paddings = tf.transpose( tf.constant([[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 1, 0], [0, 1, 0, 0], [1, 1, 1, 1]], dtype=tf.float64)) target_transcripts = tf.constant( ['abcd', 'bcde', 'klmp', 'fghi', 'kfcf']) target_weights = 1.0 - target_paddings targets = py_utils.NestedMap({ 'ids': target_ids, 'labels': target_labels, 'weights': target_weights, 'paddings': target_paddings, 'transcripts': target_transcripts, }) metrics = dec.FPropDefaultTheta(encoder_outputs, targets).metrics loss = metrics['loss'][0] all_vars = tf.trainable_variables() grads = tf.gradients(loss, all_vars) def DenseGrad(var, grad): if isinstance(grad, tf.Tensor): return grad elif isinstance(grad, tf.IndexedSlices): return tf.unsorted_segment_sum(grad.values, grad.indices, tf.shape(var)[0]) dense_grads = [DenseGrad(x, y) for (x, y) in zip(all_vars, grads)] tf.global_variables_initializer().run() test_utils.CompareToGoldenSingleFloat(self, 3.458078, loss.eval()) # Second run to make sure the function is determistic. test_utils.CompareToGoldenSingleFloat(self, 3.458078, loss.eval()) symbolic_grads = [x.eval() for x in dense_grads if x is not None] numerical_grads = [] for v in all_vars: numerical_grads.append( test_utils.ComputeNumericGradient(sess, loss, v)) for x, y in zip(symbolic_grads, numerical_grads): self.assertAllClose(x, y)