def test_get_config(self):
   num_attention_heads = 2
   decoder_block = transformer.TransformerDecoderLayer(
       num_attention_heads=num_attention_heads,
       intermediate_size=32,
       intermediate_activation='relu',
       dropout_rate=0.1,
       attention_dropout_rate=0.1,
       use_bias=False,
       norm_first=True,
       norm_epsilon=1e-6)
   decoder_block_config = decoder_block.get_config()
   new_decoder_block = transformer.TransformerDecoderLayer.from_config(
       decoder_block_config)
   self.assertEqual(decoder_block_config, new_decoder_block.get_config())
Exemplo n.º 2
0
 def build(self, unused_input_shapes):
     """Implements build() for the layer."""
     self.layers = []
     for i in range(self.num_hidden_layers):
         self.layers.append(
             transformer.TransformerDecoderLayer(
                 num_attention_heads=self.num_attention_heads,
                 intermediate_size=self.intermediate_size,
                 intermediate_activation=self.intermediate_activation,
                 dropout_rate=self.hidden_dropout_prob,
                 attention_dropout_rate=self.attention_probs_dropout_prob,
                 kernel_initializer=tf.keras.initializers.TruncatedNormal(
                     stddev=self.initializer_range),
                 multi_channel_cross_attention=self.
                 multi_channel_cross_attention,
                 name=("layer_%d" % i)))
     super(TransformerDecoder, self).build(unused_input_shapes)
 def test_use_bias_norm_first(self):
   num_attention_heads = 2
   hidden_size = 16
   decoder_block = transformer.TransformerDecoderLayer(
       num_attention_heads=num_attention_heads,
       intermediate_size=32,
       intermediate_activation='relu',
       dropout_rate=0.1,
       attention_dropout_rate=0.1,
       use_bias=False,
       norm_first=True,
       norm_epsilon=1e-6)
   # Forward path.
   dummy_tensor = tf.zeros([2, 4, 16], dtype=tf.float32)
   dummy_mask = tf.zeros([2, 4, 4], dtype=tf.float32)
   inputs = [dummy_tensor, dummy_tensor, dummy_mask, dummy_mask]
   output, _ = decoder_block(inputs)
   self.assertEqual(output.shape, (2, 4, hidden_size))
 def test_decoder_block_with_cache(self):
   num_attention_heads = 2
   hidden_size = 16
   decoder_block = transformer.TransformerDecoderLayer(
       num_attention_heads=num_attention_heads,
       intermediate_size=32,
       intermediate_activation='relu',
       dropout_rate=0.1,
       attention_dropout_rate=0.1)
   # Forward path.
   dummy_tensor = tf.zeros([2, 4, 16], dtype=tf.float32)
   dummy_mask = tf.zeros([2, 4, 4], dtype=tf.float32)
   inputs = [dummy_tensor, dummy_tensor, dummy_mask, dummy_mask]
   cache = _create_cache(2, 0, num_attention_heads,
                         hidden_size // num_attention_heads)
   output, cache = decoder_block(inputs, cache)
   self.assertEqual(output.shape, (2, 4, hidden_size))
   self.assertEqual(cache['value'].shape, (2, 4, 2, 8))
Exemplo n.º 5
0
 def test_get_config(self):
     num_attention_heads = 2
     decoder_block = transformer.TransformerDecoderLayer(
         num_attention_heads=num_attention_heads,
         intermediate_size=32,
         intermediate_activation='relu',
         dropout_rate=0.1,
         attention_dropout_rate=0.1,
         use_bias=False,
         norm_first=True,
         norm_epsilon=1e-6,
         intermediate_dropout=0.1,
         attention_initializer=tf.keras.initializers.RandomUniform(
             minval=0., maxval=1.))
     decoder_block_config = decoder_block.get_config()
     new_decoder_block = transformer.TransformerDecoderLayer.from_config(
         decoder_block_config)
     self.assertEqual(decoder_block_config, new_decoder_block.get_config())