Exemplo n.º 1
0
    def predict(self, encoder_outputs, encoder_decoder_attention_bias):
        """Return predicted sequence."""
        batch_size = tf.shape(input=encoder_outputs)[0]
        input_length = tf.shape(input=encoder_outputs)[1]
        max_decode_length = input_length + self.params.extra_decode_length

        symbols_to_logits_fn = self._get_symbols_to_logits_fn(
            max_decode_length)

        # Create initial set of IDs that will be passed into symbols_to_logits_fn.
        initial_ids = tf.zeros([batch_size], dtype=tf.int32)

        # Create cache storing decoder attention values for each layer.
        cache = {
            "layer_%d" % layer: {
                "k":
                tf.zeros([batch_size, 0, self.params.hidden_size],
                         dtype=tf.bfloat16),
                "v":
                tf.zeros([batch_size, 0, self.params.hidden_size],
                         dtype=tf.bfloat16)
            }
            for layer in range(self.params.num_hidden_layers)
        }

        # Add encoder output and attention bias to the cache.
        # self.decoder_stack.enc_out_cache["encoder_outputs"] = encoder_outputs
        # self.decoder_stack.enc_out_cache["encoder_decoder_attention_bias"] = encoder_decoder_attention_bias

        # Initialize encoder-decoder projection cache
        self.decoder_stack.cache_encdec(encoder_outputs,
                                        encoder_decoder_attention_bias)

        # Use beam search to find the top beam_size sequences and scores.
        mlperf_log.transformer_print(key=mlperf_log.MODEL_HP_SEQ_BEAM_SEARCH,
                                     value={
                                         "vocab_size":
                                         self.params.vocab_size,
                                         "beam_size":
                                         self.params.beam_size,
                                         "alpha":
                                         self.params.alpha,
                                         "extra_decode_length":
                                         self.params.extra_decode_length
                                     })
        decoded_ids, scores = beam_search.sequence_beam_search(
            symbols_to_logits_fn=symbols_to_logits_fn,
            initial_ids=initial_ids,
            initial_cache=cache,
            vocab_size=self.params.vocab_size,
            beam_size=self.params.beam_size,
            alpha=self.params.alpha,
            max_decode_length=max_decode_length,
            eos_id=EOS_ID)

        # Get the top sequence for each batch element
        top_decoded_ids = decoded_ids[:, 0, 1:]
        top_scores = scores[:, 0]

        return {"outputs": top_decoded_ids, "scores": top_scores}
Exemplo n.º 2
0
    def predict(self, encoder_outputs, encoder_decoder_attention_bias):
        """Return predicted sequence."""
        batch_size = encoder_outputs.shape[0]
        input_length = encoder_outputs.shape[1]
        max_decode_length = input_length + self.param.extra_decode_length

        symbols_to_logits_fn = self._get_symbols_to_logits_fn(
            max_decode_length)

        initial_ids = nd.zeros(shape=batch_size, ctx=mx.cpu())
        # Create cache storing decoder attention values for each layer.
        '''cache = {
            "layer_%d" % layer: {
                "k": nd.zeros(shape=(batch_size, 0, self.param.hidden_size), ctx=ctx),
                "v": nd.zeros(shape=(batch_size, 0, self.param.hidden_size), ctx=ctx),
            } for layer in range(self.param.num_hidden_layers)}'''
        cache = {}
        for layer in range(self.param.num_hidden_layers):
            cache["layer_%d" % layer] = {
                "k":
                nd.zeros(shape=(batch_size, 1, self.param.hidden_size),
                         ctx=mx.cpu()),
                "v":
                nd.zeros(shape=(batch_size, 1, self.param.hidden_size),
                         ctx=mx.cpu()),
                "init":
                1
            }

        cache["encoder_outputs"] = encoder_outputs.as_in_context(mx.cpu())
        cache[
            "encoder_decoder_attention_bias"] = encoder_decoder_attention_bias.as_in_context(
                mx.cpu())

        decoded_ids, scores = beam_search.sequence_beam_search(
            symbols_to_logits_fn=symbols_to_logits_fn,
            initial_ids=initial_ids,
            initial_cache=cache,
            vocab_size=self.param.vocab_size,
            beam_size=self.param.beam_size,
            alpha=self.param.alpha,
            max_decode_length=max_decode_length,
            eos_id=EOS_ID)

        top_decoded_ids = decoded_ids[:, 0, 1:]
        top_scores = scores[:, 0]

        return {"outputs": top_decoded_ids, "scores": top_scores}
Exemplo n.º 3
0
    def predict(self, encoder_outputs, encoder_decoder_attention_bias):
        """Return predicted sequence."""
        batch_size = tf.shape(encoder_outputs)[0]
        input_length = tf.shape(encoder_outputs)[1]
        max_decode_length = input_length + self.params["extra_decode_length"]

        symbols_to_logits_fn = self._get_symbols_to_logits_fn(
            max_decode_length)

        # Create initial set of IDs that will be passed into symbols_to_logits_fn.
        # <BOS>: 0
        initial_ids = tf.zeros([batch_size], dtype=tf.int32)

        # Create cache storing decoder attention values for each layer.
        cache = {
            "layer_%d" % layer: {
                # tf.Tensor([], shape=(batch_size, 0, hidden_size), dtype=float32)
                "k": tf.zeros([batch_size, 0, self.params["hidden_size"]]),
                "v": tf.zeros([batch_size, 0, self.params["hidden_size"]]),
            }
            for layer in range(self.params["num_hidden_layers"])
        }

        # Add encoder output and attention bias to the cache.
        cache["encoder_outputs"] = encoder_outputs
        cache[
            "encoder_decoder_attention_bias"] = encoder_decoder_attention_bias

        # Use beam search to find the top beam_size sequences and scores.
        # decoded_ids's shape: [batch_size, beam_size, max_decode_length]
        # scores's shape: [batch_size, beam_size]
        decoded_ids, scores = beam_search.sequence_beam_search(
            symbols_to_logits_fn=symbols_to_logits_fn,
            initial_ids=initial_ids,
            initial_cache=cache,
            vocab_size=self.params["vocab_size"],
            beam_size=self.params["beam_size"],
            alpha=self.params["alpha"],
            max_decode_length=max_decode_length,
            eos_id=EOS_ID)

        # Get the top sequence for each batch element
        top_decoded_ids = decoded_ids[:, 0, 1:]  # without <BOS>
        top_scores = scores[:, 0]

        return {"outputs": top_decoded_ids, "scores": top_scores}
Exemplo n.º 4
0
    def predict(self, encoder_outputs, encoder_decoder_attention_bias):
        """
        :param encoder_outputs: [batch_size, input_length, hidden_size]
        :param encoder_decoder_attention_bias: [batch_size, 1, 1, length]
        :return: dict
        """
        batch_size = tf.shape(encoder_outputs)[0]
        max_decode_length = self.params.get('max_decode_length')

        symbols_to_logits_fn = self._get_symbols_to_logits_fn(
            max_decode_length)

        initial_ids = tf.zeros([batch_size], dtype=tf.int32)

        # Create cache storing decoder attention values for each layer.
        cache = {
            "layer_%d" % layer: {
                "k": tf.zeros([batch_size, 0, self.params["hidden_size"]]),
                "v": tf.zeros([batch_size, 0, self.params["hidden_size"]]),
            }
            for layer in range(self.params["num_blocks"])
        }

        # Add encoder output and attention bias to the cache.
        cache["encoder_outputs"] = encoder_outputs
        cache[
            "encoder_decoder_attention_bias"] = encoder_decoder_attention_bias

        #   Top decoded sequences [batch_size, beam_size, max_decode_length]
        #   sequence scores [batch_size, beam_size]
        decoded_ids, scores = beam_search.sequence_beam_search(
            symbols_to_logits_fn=symbols_to_logits_fn,
            initial_ids=initial_ids,
            initial_cache=cache,
            vocab_size=self.params.get('vocab_size'),
            beam_size=self.params.get('beam_size'),
            alpha=self.params.get('alpha'),
            max_decode_length=max_decode_length,
            eos_id=self.params.get('eos_id'),
        )

        top_decoded_ids = decoded_ids[:, 0, 1:]
        top_scores = scores[:, 0]

        return {"outputs": top_decoded_ids, "scores": top_scores}
Exemplo n.º 5
0
  def predict(self, encoder_outputs, encoder_decoder_attention_bias):
    """Return predicted sequence."""
    batch_size = tf.shape(encoder_outputs)[0]
    input_length = tf.shape(encoder_outputs)[1]
    max_decode_length = input_length + self.params.extra_decode_length

    symbols_to_logits_fn = self._get_symbols_to_logits_fn(max_decode_length)

    # Create initial set of IDs that will be passed into symbols_to_logits_fn.
    initial_ids = tf.zeros([batch_size], dtype=tf.int32)

    # Create cache storing decoder attention values for each layer.
    cache = {
        "layer_%d" % layer: {
            "k": tf.zeros([batch_size, 0, self.params.hidden_size]),
            "v": tf.zeros([batch_size, 0, self.params.hidden_size]),
        } for layer in range(self.params.num_hidden_layers)}

    # Add encoder output and attention bias to the cache.
    cache["encoder_outputs"] = encoder_outputs
    cache["encoder_decoder_attention_bias"] = encoder_decoder_attention_bias

    # Use beam search to find the top beam_size sequences and scores.
    decoded_ids, scores = beam_search.sequence_beam_search(
        symbols_to_logits_fn=symbols_to_logits_fn,
        initial_ids=initial_ids,
        initial_cache=cache,
        vocab_size=self.params.vocab_size,
        beam_size=self.params.beam_size,
        alpha=self.params.alpha,
        max_decode_length=max_decode_length,
        eos_id=EOS_ID)

    # Get the top sequence for each batch element
    top_decoded_ids = decoded_ids[:, 0, 1:]
    top_scores = scores[:, 0]

    return {"outputs": top_decoded_ids, "scores": top_scores}
  def predict(self, encoder_outputs, encoder_decoder_attention_bias,eos_id):
    """Return predicted sequence."""
    batch_size = tf.shape(encoder_outputs)[0]
    input_length = tf.shape(encoder_outputs)[1]
    max_decode_length = input_length + self.params.extra_decode_length

    symbols_to_logits_fn = self._get_symbols_to_logits_fn(max_decode_length)

    # Create initial set of IDs that will be passed into symbols_to_logits_fn.
    initial_ids = tf.zeros([batch_size], dtype=tf.int32)

    # Create cache storing decoder attention values for each layer.
    cache = {
        "layer_%d" % layer: {
            "k": tf.zeros([batch_size, 0, self.params.hidden_size]),
            "v": tf.zeros([batch_size, 0, self.params.hidden_size]),
        } for layer in range(self.params.num_hidden_layers)}

    # Add encoder output and attention bias to the cache.
    cache["encoder_outputs"] = encoder_outputs
    cache["encoder_decoder_attention_bias"] = encoder_decoder_attention_bias

    # Use beam search to find the top beam_size sequences and scores.
    decoded_ids, scores = beam_search.sequence_beam_search(
        symbols_to_logits_fn=symbols_to_logits_fn,
        initial_ids=initial_ids,
        initial_cache=cache,
        vocab_size=self.params.vocab_size,
        beam_size=self.params.beam_size,
        alpha=self.params.alpha,
        max_decode_length=max_decode_length,
        eos_id=eos_id)

    # Get the top sequence for each batch element
    top_decoded_ids = decoded_ids[:, :, 1:]
    top_scores = scores[:, 0]

    return {"outputs": top_decoded_ids, "scores": top_scores}
Exemplo n.º 7
0
    def predict(self, encoder_outputs, encoder_decoder_attention_bias):
        """Return predicted sequence."""
        if ModeKeys.is_predict_one(self.mode):
            batch_size = 1
        else:
            batch_size = tf.shape(encoder_outputs)[0]
        input_length = tf.shape(encoder_outputs)[1]
        max_decode_length = input_length + self.params.extra_decode_length

        symbols_to_logits_fn = self._get_symbols_to_logits_fn(
            max_decode_length)

        # Create initial set of IDs that will be passed into symbols_to_logits_fn.
        initial_ids = tf.zeros([batch_size], dtype=tf.int32)

        # Create cache storing decoder attention values for each layer.
        cache = {
            "layer_%d" % layer: {
                "k": tf.zeros([batch_size, 0, self.params.hidden_size]),
                "v": tf.zeros([batch_size, 0, self.params.hidden_size]),
            }
            for layer in range(self.params.num_hidden_layers)
        }

        # Add encoder output and attention bias to the cache.
        cache["encoder_outputs"] = encoder_outputs
        if not ModeKeys.is_predict_one(self.mode):
            cache[
                "encoder_decoder_attention_bias"] = encoder_decoder_attention_bias

        if self.params.beam_size > 1:
            print("!!!!!!!!!!! right here, beam_size = %i!!!!!!!!!!!!" %
                  self.params.beam_size)
            # Use beam search to find the top beam_size sequences and scores.
            decoded_ids, scores = beam_search.sequence_beam_search(
                symbols_to_logits_fn=symbols_to_logits_fn,
                initial_ids=initial_ids,
                initial_cache=cache,
                vocab_size=self.params.target_vocab_size,
                beam_size=self.params.beam_size,
                alpha=self.params.alpha,
                max_decode_length=max_decode_length,
                eos_id=EOS_ID)

            # Get the top sequence for each batch element
            top_decoded_ids = decoded_ids[:, 0, 1:]
            top_scores = scores[:, 0]

            return {"outputs": top_decoded_ids, "scores": top_scores}

        else:

            def inner_loop(i, finished, next_id, decoded_ids, cache):
                """One step of greedy decoding."""
                logits, cache = symbols_to_logits_fn(next_id, i, cache)
                next_id = tf.argmax(logits, -1, output_type=tf.int32)
                finished |= tf.equal(next_id, EOS_ID)
                # next_id = tf.expand_dims(next_id, axis=1)
                next_id = tf.reshape(next_id, shape=[-1, 1])
                decoded_ids = tf.concat([decoded_ids, next_id], axis=1)
                return i + 1, finished, next_id, decoded_ids, cache

            def is_not_finished(i, finished, *_):
                return (i < max_decode_length) & tf.logical_not(
                    tf.reduce_all(finished))

            decoded_ids = tf.zeros([batch_size, 0], dtype=tf.int32)
            finished = tf.fill([batch_size], False)
            next_id = tf.zeros([batch_size, 1], dtype=tf.int32)
            _, _, _, decoded_ids, _ = tf.while_loop(
                is_not_finished,
                inner_loop,
                [tf.constant(0), finished, next_id, decoded_ids, cache],
                shape_invariants=[
                    tf.TensorShape([]),
                    tf.TensorShape([None]),
                    tf.TensorShape([None, None]),
                    tf.TensorShape([None, None]),
                    nest.map_structure(get_state_shape_invariants, cache),
                ])

            return {"outputs": decoded_ids, "scores": tf.ones([batch_size, 1])}