Пример #1
0
  def __init__(self,
               word_vocab_size,
               word_embed_size,
               type_vocab_size,
               output_embed_size,
               max_sequence_length=512,
               normalization_type='no_norm',
               initializer=tf.keras.initializers.TruncatedNormal(stddev=0.02),
               dropout_rate=0.1,
               **kwargs):
    """Class initialization.

    Args:
      word_vocab_size: Number of words in the vocabulary.
      word_embed_size: Word embedding size.
      type_vocab_size: Number of word types.
      output_embed_size: Embedding size for the final embedding output.
      max_sequence_length: Maximum length of input sequence.
      normalization_type: String. The type of normalization_type, only
        `no_norm` and `layer_norm` are supported.
      initializer: The initializer to use for the embedding weights and
        linear projection weights.
      dropout_rate: Dropout rate.
      **kwargs: keyword arguments.
    """
    super(MobileBertEmbedding, self).__init__(**kwargs)
    self.word_vocab_size = word_vocab_size
    self.word_embed_size = word_embed_size
    self.type_vocab_size = type_vocab_size
    self.output_embed_size = output_embed_size
    self.max_sequence_length = max_sequence_length
    self.normalization_type = normalization_type
    self.initializer = tf.keras.initializers.get(initializer)
    self.dropout_rate = dropout_rate

    self.word_embedding = on_device_embedding.OnDeviceEmbedding(
        self.word_vocab_size,
        self.word_embed_size,
        initializer=initializer,
        name='word_embedding')
    self.type_embedding = on_device_embedding.OnDeviceEmbedding(
        self.type_vocab_size,
        self.output_embed_size,
        initializer=initializer,
        name='type_embedding')
    self.pos_embedding = position_embedding.PositionEmbedding(
        max_length=max_sequence_length,
        initializer=initializer,
        name='position_embedding')
    self.word_embedding_proj = tf.keras.layers.experimental.EinsumDense(
        'abc,cd->abd',
        output_shape=[None, self.output_embed_size],
        kernel_initializer=initializer,
        bias_axes='d',
        name='embedding_projection')
    self.layer_norm = _get_norm_layer(normalization_type, 'embedding_norm')
    self.dropout_layer = tf.keras.layers.Dropout(
        self.dropout_rate,
        name='embedding_dropout')
Пример #2
0
    def test_dynamic_layer_output_shape(self):
        max_sequence_length = 40
        test_layer = position_embedding.PositionEmbedding(
            use_dynamic_slicing=True, max_sequence_length=max_sequence_length)
        # Create a 3-dimensional input (the first dimension is implicit).
        width = 30
        input_tensor = tf.keras.Input(shape=(None, width))
        output_tensor = test_layer(input_tensor)

        # When using dynamic positional embedding shapes, the output is expected
        # to be the same as the input shape in all dimensions - but may be None if
        # the input shape is None there.
        expected_output_shape = [None, None, width]
        self.assertEqual(expected_output_shape, output_tensor.shape.as_list())
Пример #3
0
    def test_float16_dtype(self):
        test_layer = position_embedding.PositionEmbedding(dtype="float16")
        # Create a 3-dimensional input (the first dimension is implicit).
        sequence_length = 21
        width = 30
        input_tensor = tf.keras.Input(shape=(sequence_length, width))
        output_tensor = test_layer(input_tensor)

        # When using static positional embedding shapes, the output is expected
        # to be the same as the input shape in all dimensions save batch.
        expected_output_shape = [None, sequence_length, width]
        self.assertEqual(expected_output_shape, output_tensor.shape.as_list())
        # The default output dtype for this layer should be tf.float32.
        self.assertEqual(tf.float16, output_tensor.dtype)
Пример #4
0
    def test_dynamic_layer_slicing(self):
        max_sequence_length = 40
        test_layer = position_embedding.PositionEmbedding(
            use_dynamic_slicing=True, max_sequence_length=max_sequence_length)
        # Create a 3-dimensional input (the first dimension is implicit).
        width = 30
        input_tensor = tf.keras.Input(shape=(None, width))
        output_tensor = test_layer(input_tensor)

        model = tf.keras.Model(input_tensor, output_tensor)

        # Create input data that is shorter than max_sequence_length, which should
        # trigger a down-slice.
        input_length = 17
        # Note: This test explicitly uses a batch size of 1. This is to get around
        # Keras' restriction on Model invocations: inputs are expected to have the
        # same batch cardinality as outputs. In practice, this layer should be used
        # inside a model, where it can be projected when added to another tensor.
        input_data = np.ones((1, input_length, width))
        output_data = model.predict(input_data)

        self.assertAllEqual([1, input_length, width], output_data.shape)