def encode(self, inputs, attention_bias): """Generate continuous representation for inputs. Args: inputs: int tensor with shape [batch_size, input_length]. attention_bias: float tensor with shape [batch_size, 1, 1, input_length] Returns: float tensor with shape [batch_size, input_length, hidden_size] """ with tf.name_scope("encode"): # Prepare inputs to the layer stack by adding positional encodings and # applying dropout. embedded_inputs = self.embedding_softmax_layer(inputs) inputs_padding = model_utils.get_padding(inputs) with tf.name_scope("add_pos_encoding"): length = tf.shape(embedded_inputs)[1] pos_encoding = model_utils.get_position_encoding( length, self.params["hidden_size"]) encoder_inputs = embedded_inputs + pos_encoding if self.train: encoder_inputs = tf.nn.dropout( encoder_inputs, 1 - self.params["layer_postprocess_dropout"]) return self.encoder_stack(encoder_inputs, attention_bias, inputs_padding)
def encode(self, inputs, attention_bias, training): """Generate continuous representation for inputs. Args: inputs: int tensor with shape [batch_size, input_length]. attention_bias: float tensor with shape [batch_size, 1, 1, input_length]. training: boolean, whether in training mode or not. Returns: float tensor with shape [batch_size, input_length, hidden_size] """ with tf.name_scope("encode"): # Prepare inputs to the layer stack by adding positional encodings and # applying dropout. embedded_inputs = self.embedding_softmax_layer(inputs) embedded_inputs = tf.cast(embedded_inputs, self.params["dtype"]) inputs_padding = model_utils.get_padding(inputs) attention_bias = tf.cast(attention_bias, self.params["dtype"]) with tf.name_scope("add_pos_encoding"): pos_encoding = self.position_embedding(inputs=embedded_inputs) pos_encoding = tf.cast(pos_encoding, self.params["dtype"]) encoder_inputs = embedded_inputs + pos_encoding if training: encoder_inputs = tf.nn.dropout( encoder_inputs, rate=self.params["layer_postprocess_dropout"]) return self.encoder_stack(encoder_inputs, attention_bias, inputs_padding, training=training)
def get_attention_bias(input_tensor, bias_type, padding_value=0, max_length=None): """A helper function to get various attention bias tensors.""" if bias_type not in ("single_cross", "multi_cross", "decoder_self"): raise ValueError("Invalid attention bias type: %s" % bias_type) if bias_type == "single_cross": length = tf_utils.get_shape_list(input_tensor, expected_rank=2)[1] bias = transformer_utils.get_padding_bias(input_tensor, padding_value=padding_value) elif bias_type == "multi_cross": length = tf_utils.get_shape_list(input_tensor, expected_rank=3)[2] padding = transformer_utils.get_padding(input_tensor, padding_value=padding_value) bias = padding * -1e9 else: if max_length is not None: length = max_length else: length = tf_utils.get_shape_list(input_tensor, expected_rank=2)[1] bias = transformer_utils.get_decoder_self_attention_bias(length) return tf.where(bias < 0, tf.zeros_like(bias), tf.ones_like(bias))
def test_get_padding(self): x = tf.constant([[1, 0, 0, 0, 2], [3, 4, 0, 0, 0], [0, 5, 6, 0, 7]]) padding = model_utils.get_padding(x, padding_value=0) self.assertAllEqual([[0, 1, 1, 1, 0], [0, 0, 1, 1, 1], [1, 0, 0, 1, 0]], padding)