def build(self, input_shape): """Builds the encoder stack.""" params = self.params self.projection_layer = ffn_layer.FeedForwardNetwork( params["enc_hidden_size"], params["enc_filter_size"], params["relu_dropout"]) for _ in range(params["num_hidden_layers"]): # Create sublayers for each layer. self_attention_layer = attention_layer.SelfAttention( params["enc_hidden_size"], params["num_heads"], params["attention_dropout"]) feed_forward_network = ffn_layer.FeedForwardNetwork( params["enc_hidden_size"], params["enc_filter_size"], params["relu_dropout"]) self.layers.append([ PrePostProcessingWrapper(self_attention_layer, params, params["enc_hidden_size"]), PrePostProcessingWrapper(feed_forward_network, params, params["enc_hidden_size"]) ]) # Create final layer normalization layer. self.output_normalization = LayerNormalization( params["enc_hidden_size"]) super(EncoderStack, self).build(input_shape)
def build(self, input_shape): """Builds the decoder stack.""" params = self.params for _ in range(params["num_hidden_layers"]): self_attention_layer = attention_layer.SelfAttention( params["hidden_size"], params["num_heads"], params["attention_dropout"]) enc_dec_attention_layer = attention_layer.Attention( params["hidden_size"], params["num_heads"], params["attention_dropout"]) feed_forward_network = ffn_layer.FeedForwardNetwork( params["hidden_size"], params["filter_size"], params["relu_dropout"]) self.layers.append([ PrePostProcessingWrapper(self_attention_layer, params), PrePostProcessingWrapper(enc_dec_attention_layer, params), PrePostProcessingWrapper(feed_forward_network, params) ]) self.output_normalization = LayerNormalization(params["hidden_size"]) super(DecoderStack, self).build(input_shape)
def __init__(self, params, name=None): """Initialize layers to build Transformer model. Args: params: hyperparameter object defining layer sizes, dropout values, etc. name: name of the model. """ super(Transformer, self).__init__(name=name) self.params = params self.embedding_softmax_layer = embedding_layer.EmbeddingSharedWeights( params["vocab_size"], params["hidden_size"]) self.encoder_px_stack = EncoderStack(params, "px_encoder") self.encoder_npx_stack = EncoderStack(params, "npx_encoder") self.property_lstm = ffn_layer.PropertyLSTMNetwork( params["property_inner_size"], params["property_size"], params["relu_dropout"], ) self.property_transformation_dense = ffn_layer.FeedForwardNetwork( params["enc_hidden_size"], params["enc_filter_size"], params["relu_dropout"]) self.decoder_stack = DecoderStack(params)