Exemplo n.º 1
0
    def _runBeamSearchOpHelper(self,
                               b_size,
                               num_beams,
                               seq_len,
                               lm_weight,
                               init_best_score,
                               probs,
                               init_atten_probs,
                               atten_probs,
                               beam_size=3.0,
                               ensure_full_beam=False):
        eos_id = 2
        num_classes = 5
        num_hyps_per_beam = b_size / num_beams

        best_scores = tf.zeros([num_beams])
        cumulative_scores = tf.zeros([b_size])
        scores = tf.zeros([seq_len, b_size])
        hyps = tf.zeros([seq_len, b_size], dtype=tf.int32)
        prev_hyps = tf.zeros([seq_len, b_size], dtype=tf.int32)
        done_hyps = tf.as_string(tf.zeros([seq_len, b_size], dtype=tf.int32))
        lm_log_probs = tf.random_uniform([b_size, num_classes])
        best_scores += init_best_score

        for i, prob in enumerate(probs):
            (best_scores, cumulative_scores, scores, hyps, prev_hyps,
             done_hyps, atten_probs, done) = py_x_ops.beam_search_step(
                 prob,
                 init_atten_probs,
                 best_scores,
                 cumulative_scores,
                 scores,
                 hyps,
                 prev_hyps,
                 done_hyps,
                 atten_probs, [],
                 i,
                 lm_log_probs,
                 eos_id=eos_id,
                 beam_size=beam_size,
                 ensure_full_beam=ensure_full_beam,
                 num_hyps_per_beam=num_hyps_per_beam,
                 valid_eos_max_logit_delta=0.1,
                 lm_weight=lm_weight)

        with self.session(use_gpu=False) as sess:
            (best_scores, cumulative_scores, scores, hyps, prev_hyps,
             done_hyps, atten_probs, done, scores, atten_probs,
             lm_log_probs) = sess.run([
                 best_scores, cumulative_scores, scores, hyps, prev_hyps,
                 done_hyps, atten_probs, done, scores, atten_probs,
                 lm_log_probs
             ])

        return (best_scores, cumulative_scores, scores, hyps, prev_hyps,
                done_hyps, atten_probs, done, scores, atten_probs,
                lm_log_probs)
Exemplo n.º 2
0
  def _runBeamSearchOpHelper(self,
                             b_size,
                             num_beams,
                             seq_len,
                             init_best_score,
                             probs,
                             init_atten_probs,
                             atten_probs,
                             beam_size=3.0,
                             ensure_full_beam=False,
                             force_eos_in_last_step=False,
                             local_eos_threshold=-100.0):
    eos_id = 2
    num_hyps_per_beam = b_size / num_beams

    best_scores = tf.zeros([num_beams])
    cumulative_scores = tf.zeros([b_size])
    scores = tf.zeros([seq_len, b_size])
    hyps = tf.zeros([seq_len, b_size], dtype=tf.int32)
    prev_hyps = tf.zeros([seq_len, b_size], dtype=tf.int32)
    done_hyps = tf.as_string(tf.zeros([seq_len, b_size], dtype=tf.int32))
    best_scores += init_best_score

    for i, prob in enumerate(probs):
      (best_scores, cumulative_scores, scores, hyps, prev_hyps, done_hyps,
       atten_probs, done) = py_x_ops.beam_search_step(
           prob,
           init_atten_probs,
           best_scores,
           cumulative_scores,
           scores,
           hyps,
           prev_hyps,
           done_hyps,
           atten_probs, [],
           i,
           eos_id=eos_id,
           beam_size=beam_size,
           ensure_full_beam=ensure_full_beam,
           num_hyps_per_beam=num_hyps_per_beam,
           valid_eos_max_logit_delta=0.1,
           force_eos_in_last_step=force_eos_in_last_step,
           local_eos_threshold=local_eos_threshold)

    with self.session(use_gpu=False) as sess:
      (best_scores, cumulative_scores, scores, hyps, prev_hyps, done_hyps,
       atten_probs, done, scores, atten_probs) = sess.run([
           best_scores, cumulative_scores, scores, hyps, prev_hyps, done_hyps,
           atten_probs, done, scores, atten_probs
       ])

    return (best_scores, cumulative_scores, scores, hyps, prev_hyps, done_hyps,
            atten_probs, done, scores, atten_probs)
    def _BeamSearchStep(self, theta, source_encs, source_paddings, cur_step,
                        step_ids, core_bs_states, other_states,
                        num_hyps_per_beam, pre_beam_search_step_callback,
                        post_beam_search_step_callback,
                        additional_source_info):
        """Extend beam search hyps for one step.

      | num_beams = Number of source sequences to be decoded.
      | num_hyps_per_beam = Number of hyps to keep per source sequence.
      | num_hyps = num_beams * num_hyps_per_beam
      | src_seq_len = Number of time steps in the source sequence.
      | tgt_seq_len = Maximum allowed time steps in the target sequence.

    Args:
      theta: A NestedMap object containing weights' values of the decoder
          layer and its children layers.
      source_encs: A tensor of the shape [time, batch, depth]. The encoding of
          the source.
      source_paddings: A tensor of the shape [time, batch]. Padding state of
          the source.
      cur_step: A scalar int tensor, the current time step, 0-based.
      step_ids: An int tensor of shape [num_hyps, 1]. The input ids to the
          current search step.
      core_bs_states: A tuple of core beam search states. This list is
          maintained by this helper class.
      other_states: A `.NestedMap` of other beam search states. This `.NestedMap`
          is managed and updated by the client. It is expected that each of its
          member tensors are of rank >= 1. t[i, ...] is the state of the i-th
          hyp at the beginning of this search step.
      num_hyps_per_beam: Num of hyps to keep per beam.
      pre_beam_search_step_callback: The `PreBeamSearchStepCallback` callback.
          See class header comments for more details.
      post_beam_search_step_callback: The `PostBeamSearchStepCallback` callback.
          See class header comments for more details.
      additional_source_info: a `.NestedMap` of tensors containing extra context
          information about the source that may be useful for decoding.
    Returns:
      A tuple of following elements for the next beam search step,
      (next step, all_done, step_ids, core_bs_states, other_states)
    """
        p = self.params

        bs_results, other_states = pre_beam_search_step_callback(
            theta, source_encs, source_paddings, step_ids, other_states,
            num_hyps_per_beam, additional_source_info)

        (best_scores, cumulative_scores, in_scores, in_hyps, in_prev_hyps,
         in_done_hyps, in_atten_probs) = core_bs_states

        (out_best_scores, out_cumulative_scores, out_scores, out_hyps,
         out_prev_hyps, out_done_hyps, out_atten_probs,
         all_done) = py_x_ops.beam_search_step(
             bs_results.log_probs,
             bs_results.atten_probs,
             best_scores,
             cumulative_scores,
             in_scores,
             in_hyps,
             in_prev_hyps,
             in_done_hyps,
             in_atten_probs,
             bs_results.is_last_chunk if self._model_uses_eoc_id else [],
             cur_step, [],
             eoc_id=p.target_eoc_id,
             eos_id=p.target_eos_id,
             beam_size=p.beam_size,
             num_hyps_per_beam=num_hyps_per_beam,
             valid_eos_max_logit_delta=p.valid_eos_max_logit_delta,
             merge_paths=p.merge_paths,
             allow_empty_terminated_hyp=p.allow_empty_terminated_hyp,
             ensure_full_beam=p.ensure_full_beam)

        new_step_ids = tf.reshape(out_hyps[cur_step, :], tf.shape(step_ids))
        new_step_ids.set_shape(step_ids.get_shape())

        old_hyp_ids = tf.reshape(
            tf.slice(out_prev_hyps, begin=[cur_step, 0], size=[1, -1]), [-1])

        new_bs_states = (out_best_scores, out_cumulative_scores, out_scores,
                         out_hyps, out_prev_hyps, out_done_hyps,
                         out_atten_probs)

        def ReOrderHyps(x_in):
            if (isinstance(x_in, tf.Tensor) and x_in.shape.ndims
                    and x_in.shape.ndims > 0):
                x_out = tf.gather(x_in, old_hyp_ids)
                x_out.set_shape(x_in.get_shape())
                return x_out
            else:
                return x_in

        new_other_states = other_states.Transform(ReOrderHyps)

        final_other_states = post_beam_search_step_callback(
            theta, source_encs, source_paddings, new_step_ids,
            new_other_states, additional_source_info)

        return (cur_step + 1, all_done, new_step_ids, new_bs_states,
                final_other_states)
Exemplo n.º 4
0
    def testTopKTerminatedHypsOp(self):
        with self.session(use_gpu=False) as sess:
            b_size = 8
            num_beams = 2
            num_hyps_per_beam = b_size / num_beams
            seq_len = 6
            scores = tf.random_uniform([b_size, 5])
            atten_probs = tf.random_uniform([b_size, 3])
            src_seq_lengths = [3, 3]
            best_scores = tf.zeros([num_beams])
            cumulative_scores = tf.zeros([b_size])
            in_scores = tf.zeros([seq_len, b_size])
            in_hyps = tf.zeros([seq_len, b_size], dtype=tf.int32)
            in_prev_hyps = tf.zeros([seq_len, b_size], dtype=tf.int32)
            in_done_hyps = tf.as_string(
                tf.zeros([seq_len, b_size], dtype=tf.int32))
            in_atten_probs = tf.zeros([seq_len, b_size, 3])

            (out_best_scores_0, out_cumulative_scores_0, out_scores_0,
             out_hyps_0, out_prev_hyps_0, out_done_hyps_0, out_atten_probs_0,
             _) = py_x_ops.beam_search_step(
                 scores,
                 atten_probs,
                 best_scores,
                 cumulative_scores,
                 in_scores,
                 in_hyps,
                 in_prev_hyps,
                 in_done_hyps,
                 in_atten_probs, [],
                 0,
                 eos_id=2,
                 beam_size=3.0,
                 num_hyps_per_beam=num_hyps_per_beam)

            outputs = py_x_ops.beam_search_step(
                scores,
                atten_probs,
                out_best_scores_0,
                out_cumulative_scores_0,
                out_scores_0,
                out_hyps_0,
                out_prev_hyps_0,
                out_done_hyps_0,
                out_atten_probs_0, [],
                1,
                eos_id=2,
                beam_size=3.0,
                num_hyps_per_beam=num_hyps_per_beam)

            # Get the topk terminated hyps.
            in_done_hyps = outputs[5]
            topk_hyps = py_x_ops.top_k_terminated_hyps(
                in_done_hyps,
                src_seq_lengths,
                k=2,
                num_hyps_per_beam=num_hyps_per_beam,
                length_normalization=0.2,
                coverage_penalty=0.2,
                target_seq_length_ratio=1.0)
            seq_ids, seq_lens, seq_scores = py_x_ops.unpack_hyp(
                tf.reshape(topk_hyps, [-1]), max_seq_length=5)

            k1, k2, k3, k4 = sess.run(
                [topk_hyps, seq_ids, seq_lens, seq_scores])
            print(np.array_repr(k1))
            assert k1.size == 4

            expected_top1_for_beam_0 = """
      beam_id: 0
      ids: 3
      ids: 2
      scores: 0.897901892662
      scores: 0.997961401939
      atten_vecs {
        prob: 0.857856750488
        prob: 0.608582258224
        prob: 0.725398182869
      }
      atten_vecs {
        prob: 0.857856750488
        prob: 0.608582258224
        prob: 0.725398182869
      }
      normalized_score: 1.35659193993
      """
            expected_top2_for_beam_1 = """
      beam_id: 1
      ids: 0
      ids: 2
      scores: 0.753268480301
      scores: 0.789751410484
      atten_vecs {
        prob: 0.689820885658
        prob: 0.216090679169
        prob: 0.40637075901
      }
      atten_vecs {
        prob: 0.452527046204
        prob: 0.374898076057
        prob: 0.127457261086
      }
      normalized_score: 1.02671170235
      """
            self._SameHyp(expected_top1_for_beam_0, k1[0, 0])
            self._SameHyp(expected_top2_for_beam_1, k1[1, 1])

            self.assertAllClose(k2, [[3, 2, 0, 0, 0], [0, 2, 0, 0, 0],
                                     [4, 2, 0, 0, 0], [0, 2, 0, 0, 0]])
            self.assertAllClose(k3, [2, 2, 2, 2])
            self.assertAllClose(
                k4, [1.35659194, 1.02759778, 1.21130753, 1.0267117])
Exemplo n.º 5
0
  def testTopKTerminatedHypsOp(self):
    with self.session(use_gpu=False) as sess:
      b_size = 8
      num_beams = 2
      num_hyps_per_beam = b_size / num_beams
      seq_len = 6
      scores = tf.random_uniform([b_size, 5], seed=12345)
      atten_probs = tf.random_uniform([b_size, 3], seed=12345)
      src_seq_lengths = [3, 3]
      best_scores = tf.zeros([num_beams])
      cumulative_scores = tf.zeros([b_size])
      in_scores = tf.zeros([seq_len, b_size])
      in_hyps = tf.zeros([seq_len, b_size], dtype=tf.int32)
      in_prev_hyps = tf.zeros([seq_len, b_size], dtype=tf.int32)
      in_done_hyps = tf.as_string(tf.zeros([seq_len, b_size], dtype=tf.int32))
      in_atten_probs = tf.zeros([seq_len, b_size, 3])

      (out_best_scores_0, out_cumulative_scores_0, out_scores_0, out_hyps_0,
       out_prev_hyps_0, out_done_hyps_0, out_atten_probs_0,
       _) = py_x_ops.beam_search_step(
           scores,
           atten_probs,
           best_scores,
           cumulative_scores,
           in_scores,
           in_hyps,
           in_prev_hyps,
           in_done_hyps,
           in_atten_probs, [],
           0,
           eos_id=2,
           beam_size=3.0,
           num_hyps_per_beam=num_hyps_per_beam)

      outputs = py_x_ops.beam_search_step(
          scores,
          atten_probs,
          out_best_scores_0,
          out_cumulative_scores_0,
          out_scores_0,
          out_hyps_0,
          out_prev_hyps_0,
          out_done_hyps_0,
          out_atten_probs_0, [],
          1,
          eos_id=2,
          beam_size=3.0,
          num_hyps_per_beam=num_hyps_per_beam)

      # Get the topk terminated hyps.
      in_done_hyps = outputs[5]
      topk_hyps = py_x_ops.top_k_terminated_hyps(
          in_done_hyps,
          src_seq_lengths,
          k=2,
          num_hyps_per_beam=num_hyps_per_beam,
          length_normalization=0.2,
          coverage_penalty=0.2,
          target_seq_length_ratio=1.0)
      seq_ids, seq_lens, seq_scores = py_x_ops.unpack_hyp(
          tf.reshape(topk_hyps, [-1]), max_seq_length=5)

      k1, k2, k3, k4 = sess.run([topk_hyps, seq_ids, seq_lens, seq_scores])
      print(np.array_repr(k1))
      assert k1.size == 4

      expected_top1_for_beam_0 = """
      beam_id: 0
      ids: 1
      ids: 2
      scores: 0.86230338
      scores: 0.65504861
      atten_vecs {
        prob: 0.45372832
        prob: 0.86230338
        prob: 0.65504861
      }
      atten_vecs {
        prob: 0.45372832
        prob: 0.86230338
        prob: 0.65504861
      }
      normalized_score: 1.002714
      """
      expected_top2_for_beam_1 = """
      beam_id: 1
      ids: 3
      ids: 2
      scores: 0.38127339
      scores: 0.57700801
      atten_vecs {
        prob: 0.38612545
        prob: 0.42067075
        prob: 0.84442794
      }
      atten_vecs {
        prob: 0.18693292
        prob: 0.17821217
        prob: 0.66380036
      }
      normalized_score: 0.480028
      """
      self._SameHyp(expected_top1_for_beam_0, k1[0, 0])
      self._SameHyp(expected_top2_for_beam_1, k1[1, 1])

      self.assertAllClose(
          k2,
          [[1, 2, 0, 0, 0], [4, 2, 0, 0, 0], [4, 2, 0, 0, 0], [3, 2, 0, 0, 0]])
      self.assertAllClose(k3, [2, 2, 2, 2])
      self.assertAllClose(k4, [1.002714, 0.684296, 0.522484, 0.480028])