Пример #1
0
    def __init__(self,
                 vocab_size=33708,
                 embedding_width=512,
                 dropout_rate=0.0,
                 padded_decode=False,
                 decode_max_length=None,
                 extra_decode_length=0,
                 beam_size=4,
                 alpha=0.6,
                 encoder_layer=None,
                 decoder_layer=None,
                 dtype=tf.float32,
                 eos_id=EOS_ID,
                 **kwargs):
        """Initialize layers to build Transformer model.

    Args:
      vocab_size: Size of vocabulary.
      embedding_width: Size of hidden layer for embedding.
      dropout_rate: Dropout probability.
      padded_decode: Whether to max_sequence_length padding is used. If set
        False, max_sequence_length padding is not used.
      decode_max_length: maximum number of steps to decode a sequence.
      extra_decode_length: Beam search will run extra steps to decode.
      beam_size: Number of beams for beam search
      alpha: The strength of length normalization for beam search.
      encoder_layer: An initialized encoder layer.
      decoder_layer: An initialized decoder layer.
      dtype: float dtype.
      eos_id: Id of end of sentence token.
      **kwargs: other keyword arguments.
    """
        super(Seq2SeqTransformer, self).__init__(**kwargs)
        self._vocab_size = vocab_size
        self._embedding_width = embedding_width
        self._dropout_rate = dropout_rate
        self._padded_decode = padded_decode
        self._decode_max_length = decode_max_length
        self._extra_decode_length = extra_decode_length
        self._beam_size = beam_size
        self._alpha = alpha
        self._dtype = dtype
        self._eos_id = eos_id
        self.embedding_lookup = keras_nlp.layers.OnDeviceEmbedding(
            vocab_size=self._vocab_size,
            embedding_width=self._embedding_width,
            initializer=tf.random_normal_initializer(
                mean=0., stddev=self._embedding_width**-0.5),
            scale_factor=self._embedding_width**0.5)
        self.encoder_layer = encoder_layer
        self.decoder_layer = decoder_layer
        self.position_embedding = layers.RelativePositionEmbedding(
            hidden_size=self._embedding_width)
        self.encoder_dropout = tf.keras.layers.Dropout(rate=self._dropout_rate)
        self.decoder_dropout = tf.keras.layers.Dropout(rate=self._dropout_rate)
    def __init__(self, hparams):
        super(ScreenCaptionModel, self).__init__()
        self._hparams = hparams
        with tf.name_scope('captioning'):
            self._word_embedding_layer = EmbeddingLayer(
                name='word',
                hidden_dim=self._hparams['hidden_size'],
                embedding_file=self._hparams['embedding_file'],
                vocab_size=self._hparams['vocab_size'],
                embedding_dim=self._hparams['hidden_size'],  # not used
                trainable=self._hparams['glove_trainable'])
            self._position_embedding_layer = layers.RelativePositionEmbedding(
                self._hparams['hidden_size'])

            self._encoder = EncoderLayer(self._hparams,
                                         self._word_embedding_layer)
            self._decoder = DecoderLayer(self._hparams,
                                         self._word_embedding_layer,
                                         self._position_embedding_layer)
            self._word_layer = tf.keras.layers.Dense(
                units=self._hparams['vocab_size'])

        self.model_metrics = {
            'loss': tf.keras.metrics.Mean(name='loss'),
            'caption_loss': tf.keras.metrics.Mean(name='caption_loss'),
            'global_norm': tf.keras.metrics.Mean(name='global_norm'),
        }

        self.caption_metrics = {}
        for score_name in self._SCORE_NAMES:
            scoped_name = 'COCO/{}'.format(score_name)
            self.caption_metrics[scoped_name] = tf.keras.metrics.Mean(
                name=scoped_name)

        self._word_vocab = []
        with tf.io.gfile.Open(self._hparams['word_vocab_path']) as f:
            for index, line in enumerate(f):
                if index >= self._hparams['vocab_size']:
                    break
                self._word_vocab.append(line.strip())
Пример #3
0
  def __init__(self,
               vocab_size=33708,
               hidden_size=512,
               dropout_rate=0.0,
               padded_decode=False,
               num_replicas=1,
               decode_batch_size=2048,
               decode_max_length=97,
               dtype=tf.float32,
               extra_decode_length=0,
               num_heads=8,
               num_layers=6,
               beam_size=4,
               alpha=0.6,
               encoder_layer=None,
               decoder_layer=None,
               name=None,
               **kwargs):
    """Initialize layers to build Transformer model.

    Arguments:
      vocab_size: Size of vocabulary.
      hidden_size: Size of hidden layer for embedding.
      dropout_rate: Dropout probability.
      padded_decode: Whether to max_sequence_length padding is used. If set
        False, max_sequence_length padding is not used.
      num_replicas: Number of replicas for distribution strategy.
      decode_batch_size: batch_size for decoding.
      decode_max_length: maximum number of steps to decode a sequence.
      dtype: data type.
      extra_decode_length: Beam search will run extra steps to decode.
      num_heads: Number of attention heads.
      num_layers: Number of identical layers for Transformer architecture.
      beam_size: Number of beams for beam search
      alpha: The strength of length normalization for beam search.
      encoder_layer: An initialized encoder layer.
      decoder_layer: An initialized decoder layer.
      name: name of the model.
      **kwargs: other keyword arguments.
    """
    super(Seq2SeqTransformer, self).__init__(**kwargs)
    self._vocab_size = vocab_size
    self._hidden_size = hidden_size
    self._dropout_rate = dropout_rate
    self._padded_decode = padded_decode
    self._num_replicas = num_replicas
    self._decode_batch_size = decode_batch_size
    self._decode_max_length = decode_max_length
    self._dtype = dtype
    self._extra_decode_length = extra_decode_length
    self._num_heads = num_heads
    self._num_layers = num_layers
    self._beam_size = beam_size
    self._alpha = alpha
    self.embedding_lookup = layers.OnDeviceEmbedding(
        vocab_size=self._vocab_size,
        embedding_width=self._hidden_size,
        initializer=tf.random_normal_initializer(
            mean=0., stddev=self._hidden_size**-0.5),
        use_scale=True)
    self.encoder_layer = encoder_layer
    self.decoder_layer = decoder_layer
    self.position_embedding = layers.RelativePositionEmbedding(
        hidden_size=self._hidden_size)
    self.encoder_dropout = tf.keras.layers.Dropout(rate=self._dropout_rate)
    self.decoder_dropout = tf.keras.layers.Dropout(rate=self._dropout_rate)