Exemplo n.º 1
0
    def decode_pass(self, targets, encoder_outputs, inputs_attention_bias):
        """Generate logits for each value in the target sequence.

    Args:
      targets: target values for the output sequence.
        int tensor with shape [batch_size, target_length]
      encoder_outputs: continuous representation of input sequence.
        float tensor with shape [batch_size, input_length, hidden_size]
      inputs_attention_bias: float tensor with shape [batch_size, 1, 1, input_length]

    Returns:
      float32 tensor with shape [batch_size, target_length, vocab_size]
    """
        # Prepare inputs to decoder layers by shifting targets, adding positional
        # encoding and applying dropout.
        decoder_inputs = self.embedding_softmax_layer(targets)
        with tf.name_scope("shift_targets"):
            # Shift targets to the right, and remove the last element
            decoder_inputs = tf.pad(
                decoder_inputs,
                [[0, 0], [1, 0], [0, 0]],
            )[:, :-1, :]
        with tf.name_scope("add_pos_encoding"):
            length = tf.shape(decoder_inputs)[1]
            # decoder_inputs += utils.get_position_encoding(
            #    length, self.params["hidden_size"])
            decoder_inputs += tf.cast(
                utils.get_position_encoding(length,
                                            self.params["hidden_size"]),
                dtype=self.params['dtype'],
            )
        if self.mode == "train":
            decoder_inputs = tf.nn.dropout(
                decoder_inputs,
                keep_prob=1 - self.params["layer_postprocess_dropout"])

        # Run values
        decoder_self_attention_bias = utils.get_decoder_self_attention_bias(
            length, dtype=tf.float32
            # dtype=self._params["dtype"]
        )

        # do decode
        outputs = self._call(
            decoder_inputs=decoder_inputs,
            encoder_outputs=encoder_outputs,
            decoder_self_attention_bias=decoder_self_attention_bias,
            attention_bias=inputs_attention_bias,
        )

        logits = self.embedding_softmax_layer.linear(outputs)
        return logits
Exemplo n.º 2
0
    def _get_symbols_to_logits_fn(self, max_decode_length):
        """Returns a decoding function that calculates logits of the next tokens."""

        timing_signal = utils.get_position_encoding(
            max_decode_length + 1,
            self.params["hidden_size"],
        )
        decoder_self_attention_bias = utils.get_decoder_self_attention_bias(
            max_decode_length, dtype=tf.float32
            # dtype=self._params["dtype"]
        )

        def symbols_to_logits_fn(ids, i, cache):
            """Generate logits for next potential IDs.

      Args:
        ids: Current decoded sequences.
          int tensor with shape [batch_size * beam_size, i + 1]
        i: Loop index
        cache: dictionary of values storing the encoder output, encoder-decoder
          attention bias, and previous decoder attention values.

      Returns:
        Tuple of
          (logits with shape [batch_size * beam_size, vocab_size],
           updated cache values)
      """
            # Set decoder input to the last generated IDs
            decoder_input = ids[:, -1:]

            # Preprocess decoder input by getting embeddings and adding timing signal.
            decoder_input = self.embedding_softmax_layer(decoder_input)
            decoder_input += tf.cast(x=timing_signal[i:i + 1],
                                     dtype=decoder_input.dtype)

            self_attention_bias = decoder_self_attention_bias[:, :,
                                                              i:i + 1, :i + 1]

            decoder_outputs = self._call(
                decoder_input,
                cache.get("encoder_outputs"),
                self_attention_bias,
                cache.get("encoder_decoder_attention_bias"),
                cache,
            )
            logits = self.embedding_softmax_layer.linear(decoder_outputs)
            logits = tf.squeeze(logits, axis=[1])
            return tf.cast(logits, tf.float32), cache

        return symbols_to_logits_fn
Exemplo n.º 3
0
  def _encode(self, input_dict):
    training = (self.mode == "train")

    if len(self.layers) == 0:
      # prepare encoder graph
      self.embedding_softmax_layer = embedding_layer.EmbeddingSharedWeights(
          self.params["src_vocab_size"], self.params["hidden_size"],
          pad_vocab_to_eight=self.params.get('pad_embeddings_2_eight', False),
      )

      for _ in range(self.params['encoder_layers']):
        # Create sublayers for each layer.
        self_attention_layer = attention_layer.SelfAttention(
          hidden_size=self.params["hidden_size"],
          num_heads=self.params["num_heads"],
          attention_dropout=self.params["attention_dropout"],
          train=training,
          regularizer=self.regularizer
        )
        feed_forward_network = ffn_layer.FeedFowardNetwork(
          hidden_size=self.params["hidden_size"],
          filter_size=self.params["filter_size"],
          relu_dropout=self.params["relu_dropout"],
          train=training,
          regularizer=self.regularizer
        )

        self.layers.append([
            PrePostProcessingWrapper(self_attention_layer, self.params,
                                     training),
            PrePostProcessingWrapper(feed_forward_network, self.params,
                                     training)
        ])

      # final normalization layer.
      print("Encoder:", self.norm_params["type"], self.mode)
      if self.norm_params["type"] =="batch_norm":
        self.output_normalization = Transformer_BatchNorm(
          training=training,
          params=self.norm_params)
      else:
        self.output_normalization = LayerNormalization(
          hidden_size=self.params["hidden_size"], params=self.norm_params)

    # actual encoder part
    with tf.name_scope("encode"):
      inputs = input_dict['source_tensors'][0]
      # Prepare inputs to the layer stack by adding positional encodings and
      # applying dropout.
      embedded_inputs = self.embedding_softmax_layer(inputs)
      if self.params["remove_padding"]:
        inputs_padding = utils.get_padding(inputs)
        #inputs_padding = utils.get_padding(inputs,dtype=self._params["dtype"])
      else:
        inputs_padding = None
      inputs_attention_bias = utils.get_padding_bias(inputs)
      # inputs_attention_bias = utils.get_padding_bias(inputs, dtype=self._params["dtype"])

      with tf.name_scope("add_pos_encoding"):
        length = tf.shape(embedded_inputs)[1]
        pos_encoding = utils.get_position_encoding(
            length, self.params["hidden_size"],
        )
        encoder_inputs = embedded_inputs + tf.cast(x=pos_encoding,
                                                   dtype=embedded_inputs.dtype)

      if self.mode == "train":
        encoder_inputs = tf.nn.dropout(encoder_inputs,
            keep_prob = 1.0 - self.params["layer_postprocess_dropout"],
        )

      encoded = self._call(encoder_inputs, inputs_attention_bias,
                           inputs_padding)
      return {'outputs': encoded,
              'inputs_attention_bias': inputs_attention_bias,
              'state': None,
              'src_lengths': input_dict['source_tensors'][1],
              'embedding_softmax_layer': self.embedding_softmax_layer,
              'encoder_input': inputs}