def test_transformer_encoder_get_config(self):
     num_layers = 2
     num_attention_heads = 2
     intermediate_size = 256
     model = transformer.TransformerEncoder(
         num_layers=num_layers,
         num_attention_heads=num_attention_heads,
         intermediate_size=intermediate_size)
     config = model.get_config()
     expected_config = {
         'name': 'transformer_encoder',
         'trainable': True,
         'dtype': 'float32',
         'num_layers': 2,
         'num_attention_heads': 2,
         'intermediate_size': 256,
         'activation': 'relu',
         'dropout_rate': 0.0,
         'attention_dropout_rate': 0.0,
         'use_bias': False,
         'norm_first': True,
         'norm_epsilon': 1e-06,
         'intermediate_dropout': 0.0
     }
     self.assertAllEqual(expected_config, config)
Пример #2
0
 def build(self, input_shape=None):
     self._encoder = transformer.TransformerEncoder(
         attention_dropout_rate=self._dropout_rate,
         dropout_rate=self._dropout_rate,
         intermediate_dropout=self._dropout_rate,
         norm_first=False,
         num_layers=self._num_encoder_layers,
     )
     self._decoder = transformer.TransformerDecoder(
         attention_dropout_rate=self._dropout_rate,
         dropout_rate=self._dropout_rate,
         intermediate_dropout=self._dropout_rate,
         norm_first=False,
         num_layers=self._num_decoder_layers)
     super().build(input_shape)
 def test_transformer_encoder(self):
     batch_size = 2
     sequence_length = 100
     feature_size = 256
     num_layers = 2
     num_attention_heads = 2
     intermediate_size = 256
     model = transformer.TransformerEncoder(
         num_layers=num_layers,
         num_attention_heads=num_attention_heads,
         intermediate_size=intermediate_size)
     input_tensor = tf.ones((batch_size, sequence_length, feature_size))
     attention_mask = tf.ones(
         (batch_size, sequence_length, sequence_length), dtype=tf.int64)
     pos_embed = tf.ones((batch_size, sequence_length, feature_size))
     out = model(input_tensor, attention_mask, pos_embed)
     self.assertAllEqual(tf.shape(out),
                         (batch_size, sequence_length, feature_size))