def testTrainLanguageModel(self): src = test_util.make_data_file( os.path.join(self.get_temp_dir(), "src.txt"), ["1 2 3 4", "5 6 7 8 9", "3 2"]) vocab = test_util.make_vocab( os.path.join(self.get_temp_dir(), "vocab.txt"), list(map(str, range(10)))) config = { "data": { "train_features_file": src, "vocabulary": vocab, }, "params": { "learning_rate": 0.0005, "optimizer": "Adam" }, "train": { "batch_size": 10, "max_step": 2, }, } model = models.LanguageModel(decoders.SelfAttentionDecoder( 2, num_units=32, ffn_inner_dim=32), embedding_size=16, reuse_embedding=False) runner = Runner(model, config) runner.train()
def testSelfAttentionDecoderFP16Training(self): decoder = decoders.SelfAttentionDecoder(2, num_units=6, num_heads=2, ffn_inner_dim=12) self._testDecoderTraining(decoder, support_alignment_history=True, dtype=tf.float16)
def testCreateVariablesLanguageModel(self): _, data_config = self._makeToyLMData() decoder = decoders.SelfAttentionDecoder( 2, num_units=16, num_heads=4, ffn_inner_dim=32, num_sources=0) model = models.LanguageModel(decoder, embedding_size=16) model.initialize(data_config) model.create_variables() self.assertTrue(len(model.trainable_variables) > 0)
def testSelfAttentionDecoderMultiSource(self): num_sources = 2 decoder = decoders.SelfAttentionDecoder(2, num_units=6, num_heads=2, ffn_inner_dim=12, num_sources=num_sources) self._testDecoder(decoder, num_sources=num_sources)
def testLanguageModelWithMissingStart(self): _, data_config = self._makeToyLMData() decoder = decoders.SelfAttentionDecoder( 2, num_units=16, num_heads=4, ffn_inner_dim=32, num_sources=0) model = models.LanguageModel(decoder, embedding_size=16) model.initialize(data_config) features, _ = model.features_inputter.make_features(tf.constant("")) with self.assertRaises(tf.errors.InvalidArgumentError): model(features)
def testSelfAttentionDecoder(self): decoder = decoders.SelfAttentionDecoder( num_layers=2, num_units=6, num_heads=2, ffn_inner_dim=12, vocab_size=10, ) self.assertTrue(decoder.initialized) self._testDecoder(decoder)
def testInitializeWithDropoutOverride(self): model = models.SequenceToSequence( inputters.WordEmbedder(16), inputters.WordEmbedder(16), encoders.SelfAttentionEncoder(2, 16, 4, 32), decoders.SelfAttentionDecoder(2, 16, 4, 32)) self.assertEqual(model.encoder.dropout, 0.1) _, _, data_config = self._makeToyClassifierData() params = dict(dropout=0.3) model.initialize(data_config, params=params) self.assertEqual(model.encoder.dropout, 0.3)
def _seq2seq_model(training=None): model = models.SequenceToSequence( inputters.WordEmbedder(16), inputters.WordEmbedder(16), encoders.SelfAttentionEncoder(2, 16, 4, 32), decoders.SelfAttentionDecoder(2, 16, 4, 32)) params = {} if training: params["optimizer"] = "SGD" params["learning_rate"] = 0.1 return model, params
def __init__(self): super(GPT2Small, self).__init__( decoder=decoders.SelfAttentionDecoder( num_layers=12, num_units=768, num_heads=12, ffn_inner_dim=3072, ffn_activation=layers.gelu, position_encoder_class=lambda: layers.PositionEmbedder(maximum_position=1024), num_sources=0), embedding_size=768)
def testLanguageModelServing(self): _, data_config = self._makeToyLMData() decoder = decoders.SelfAttentionDecoder(2, num_units=16, num_heads=4, ffn_inner_dim=32, num_sources=0) model = models.LanguageModel(decoder, embedding_size=16) model.initialize(data_config) function = model.serve_function() function.get_concrete_function()
def testLanguageModelWithStartOfSentence(self): _, data_config = self._makeToyLMData() data_config["sequence_controls"] = dict(start=True, end=False) decoder = decoders.SelfAttentionDecoder( 2, num_units=16, num_heads=4, ffn_inner_dim=32, num_sources=0) model = models.LanguageModel(decoder, embedding_size=16) model.initialize(data_config, params={"maximum_decoding_length": 1}) features, _ = model.features_inputter.make_features(tf.constant("")) features = tf.nest.map_structure(lambda t: tf.expand_dims(t, 0), features) # Add batch dim. _, predictions = self.evaluate(model(features)) # Predictions should not include the leading <s>. self.assertEqual(predictions["length"][0], 1) self.assertTupleEqual(predictions["tokens"].shape, (1, 1))
def testSequenceToSequenceWithSharedEmbedding(self): model = models.SequenceToSequence( inputters.WordEmbedder(16), inputters.WordEmbedder(16), encoders.SelfAttentionEncoder(2, 16, 4, 32), decoders.SelfAttentionDecoder(2, 16, 4, 32), share_embeddings=models.EmbeddingsSharingLevel.ALL) _, _, data_config = self._makeToyEnDeData() data_config["target_vocabulary"] = data_config["source_vocabulary"] model.initialize(data_config) self.assertTrue(model.decoder.initialized) model.build(None) self.assertEqual(model.labels_inputter.embedding.ref(), model.decoder.output_layer.weight.ref())
def _seq2seq_model(training=None, shared_embeddings=False): model = models.SequenceToSequence( inputters.WordEmbedder(16), inputters.WordEmbedder(16), encoders.SelfAttentionEncoder(2, 16, 4, 32), decoders.SelfAttentionDecoder(2, 16, 4, 32), share_embeddings=( models.sequence_to_sequence.EmbeddingsSharingLevel.ALL if shared_embeddings else models.EmbeddingsSharingLevel.NONE), ) params = {} if training: params["optimizer"] = "SGD" params["learning_rate"] = 0.1 return model, params
def testSelfAttentionDecoderWithoutSourceLength(self): batch_size = 4 depth = 6 decoder = decoders.SelfAttentionDecoder( num_layers=2, num_units=depth, num_heads=2, ffn_inner_dim=depth * 2, vocab_size=10, ) memory, _, _ = _generate_source_context(batch_size, depth) inputs = tf.random.uniform([batch_size, depth]) step = tf.constant(0) initial_state = decoder.initial_state(memory) decoder(inputs, step, state=initial_state)
def testLanguageModel(self, mode): # Mainly test that the code does not throw. decoder = decoders.SelfAttentionDecoder( 2, num_units=16, num_heads=4, ffn_inner_dim=32, num_sources=0) model = models.LanguageModel(decoder, embedding_size=16) features_file, data_config = self._makeToyLMData() params = { "optimizer": "SGD", "learning_rate": 0.1} self._testGenericModel( model, mode, features_file, data_config=data_config, batch_size=1 if mode == tf.estimator.ModeKeys.PREDICT else 16, prediction_heads=["tokens", "length"], params=params)
def testSelfAttentionDecoder(self): decoder = decoders.SelfAttentionDecoder( 2, num_units=6, num_heads=2, ffn_inner_dim=12) self._testDecoder(decoder)
def testSelfAttentionDecoderFP16(self): decoder = decoders.SelfAttentionDecoder(2, num_units=6, num_heads=2, ffn_inner_dim=12) self._testDecoder(decoder, dtype=tf.float16)
def testSelfAttentionDecoder(self): decoder = decoders.SelfAttentionDecoder(2, num_units=6, num_heads=2, ffn_inner_dim=12) self._testDecoder(decoder, support_alignment_history=False)