def build(self, input_shape): """Builds the encoder stack.""" params = self.params self_attention_layer = attention_layer.SelfAttention( params["hidden_size"], params["num_heads"], params["attention_dropout"]) feed_forward_network = feed_forward_layer.FeedForwardNetwork( params["hidden_size"], params["filter_size"], params["relu_dropout"], params['train']) self.self_attention_layer = PrePostProcessingWrapper(self_attention_layer, params) self.feed_forward_network = PrePostProcessingWrapper(feed_forward_network, params) # Create final layer normalization layer. self.output_normalization = LayerNormalization(params["hidden_size"]) super(EncoderStack, self).build(input_shape)
def test_attention_layer(self): hidden_size = 64 filter_size = 32 relu_dropout = 0.5 train = True layer = feed_forward_layer.FeedForwardNetwork(hidden_size, filter_size, relu_dropout, train) length = 2 x = tf.ones([1, length, hidden_size]) y = layer(x) self.assertEqual(y.shape, ( 1, length, hidden_size, ))