Exemplo n.º 1
0
def beam_decode(model, batch, vocab, params):

    def decode_onestep(enc_inp, enc_outputs, dec_input, dec_state, enc_extended_inp,
                       batch_oov_len, enc_pad_mask, use_coverage, prev_coverage):
        """
            Method to decode the output step by step (used for beamSearch decoding)
            Args:
                sess : tf.Session object
                batch : current batch, shape = [beam_size, 1, vocab_size( + max_oov_len if pointer_gen)]
                (for the beam search decoding, batch_size = beam_size)
                enc_outputs : hiddens outputs computed by the encoder LSTM
                dec_state : beam_size-many list of decoder previous state, LSTMStateTuple objects,
                shape = [beam_size, 2, hidden_size]
                dec_input : decoder_input, the previous decoded batch_size-many words, shape = [beam_size, embed_size]
                cov_vec : beam_size-many list of previous coverage vector
            Returns: A dictionary of the results of all the ops computations (see below for more details)
        """
        final_dists, dec_hidden, attentions, p_gens = model(enc_outputs,  # shape=(3, 115, 256)
                                                                       dec_state,  # shape=(3, 256)
                                                                       enc_inp,  # shape=(3, 115)
                                                                       enc_extended_inp,  # shape=(3, 115)
                                                                       dec_input,  # shape=(3, 1)
                                                                       batch_oov_len,  # shape=()
                                                                       enc_pad_mask,  # shape=(3, 115)
                                                                       use_coverage,
                                                                       prev_coverage)  # shape=(3, 115, 1)

        top_k_probs, top_k_ids = tf.nn.top_k(tf.squeeze(final_dists), k=params["beam_size"] * 2)
        top_k_log_probs = tf.math.log(top_k_probs)

        results = {"dec_state": dec_hidden,
                   "attention_vec": attentions,  # [batch_sz, max_len_x, 1]
                   "top_k_ids": top_k_ids,
                   "top_k_log_probs": top_k_log_probs,
                   "p_gen": p_gens,
                   }
        return results

    # end of the nested class

    # We run the encoder once and then we use the results to decode each time step token
    # state shape=(3, 256), enc_outputs shape=(3, 115, 256)
    enc_outputs, state = model.call_encoder(batch[0]["enc_input"])
    # print('enc_outputs is ', enc_outputs)
    # Initial Hypothesises (beam_size many list)
    # print('xxxxxxxx is ', batch[0]["enc_input"].shape[1])
    # hyps = [Hypothesis(tokens=[vocab.word_to_id('[START]')],  # [2]
    #                    # we initalize all the beam_size hypothesises with the token start
    #                    log_probs=[0.0],  # Initial log prob = 0
    #                    # state=state[0],
    #                    state=state[0],  # shape=(256,)
    #                    # initial dec_state (we will use only the first dec_state because they're initially the same)
    #                    attn_dists=[],
    #                    p_gens=[],  # we init the coverage vector to zero
    #                    coverage=np.zeros([batch[0]["enc_input"].shape[1], 1], dtype=np.float32))
    #         for _ in range(params['batch_size'])]  # batch_size == beam_size
    # hyps = [Hypothesis(tokens=[vocab.word_to_id('[START]')],  # [2]
    #                    # we initalize all the beam_size hypothesises with the token start
    #                    log_probs=[0.0],  # Initial log prob = 0
    #                    # state=state[0],
    #                    state=state[0],  # shape=(256,)
    #                    # initial dec_state (we will use only the first dec_state because they're initially the same)
    #                    attn_dists=[],
    #                    p_gens=[],  # we init the coverage vector to zero
    #                    coverage=np.zeros([batch[0]["enc_input"].shape[1], 1], dtype=np.float32))
    #         for _ in range(params['batch_size'])]  # batch_size == beam_size
    hyps = [Hypothesis(tokens=[vocab.word_to_id('[START]')],
                       log_probs=[0.0],
                       state=state[0],
                       p_gens=[],
                       attn_dists=[]) for _ in range(params['batch_size'])]
    # print('hyps', hyps)
    results = []  # list to hold the top beam_size hypothesises
    steps = 0  # initial step

    while steps < params['max_dec_steps'] and len(results) < params['beam_size']:
        # print('step is ', steps)
        latest_tokens = [h.latest_token for h in hyps]  # latest token for each hypothesis , shape : [beam_size]
        # print('latest_tokens is ', latest_tokens)
        # we replace all the oov is by the unknown token
        # print(latest_tokens)
        latest_tokens = [t if t in range(params['vocab_size']) else vocab.word_to_id('[UNK]') for t in latest_tokens]
        # latest_tokens = [t if t in vocab.id2word else vocab.word2id('[UNK]') for t in latest_tokens]
        # print('latest_tokens is ', latest_tokens)
        # we collect the last states for each hypothesis
        # print(latest_tokens)
        states = [h.state for h in hyps]
        # print('states i s', states)
        # prev_coverage = [h.coverage for h in hyps]  # list of coverage vectors (or None)
        # print('prev_coverage_1 is ', prev_coverage)
        # prev_coverage = tf.convert_to_tensor(prev_coverage)
        # print('prev_coverage_2 is ', prev_coverage)

        # we decode the top likely 2 x beam_size tokens tokens at time step t for each hypothesis
        # model, batch, vocab, params
        dec_input = tf.expand_dims(latest_tokens, axis=1)  # shape=(3, 1)
        # print('dec_input is ', dec_input)
        # print('step is ', steps)
        # print('dec_input is ', dec_input)
        # print('states is ', states)
        dec_states = tf.stack(states, axis=0)
        # print('dec_states is ', dec_states)
        # print('batch[0][enc_input] is ', batch[0]['enc_input'])
        # print('enc_outputs is ', enc_outputs)
        # print('dec_input is ', dec_input)
        # print('dec_states is ', dec_states)
        # print('batch[0][extended_enc_input is ', batch[0]['extended_enc_input'])  # problem maybe
        # print('batch[0][max_oov_len] is ', batch[0]['max_oov_len'])
        # print('batch[0][sample_encoder_pad_mask is ', batch[0]['sample_encoder_pad_mask'])
        # print('prev_coverage is ', prev_coverage)
        returns = decode_onestep(batch[0]['enc_input'],  # shape=(3, 115)
                                 enc_outputs,  # shape=(3, 115, 256)
                                 dec_input,  # shape=(3, 1)
                                 dec_states,  # shape=(3, 256)
                                 batch[0]['extended_enc_input'],  # shape=(3, 115)
                                 batch[0]['max_oov_len'],  # shape=()
                                 batch[0]['sample_encoder_pad_mask'],  # shape=(3, 115)
                                 params['is_coverage'],  # true
                                 prev_coverage=None)  # shape=(3, 115, 1)
        # print('returns["p_gen"] is ', returns["p_gen"])
        # print(np.squeeze(returns["p_gen"]))
        # np.squeeze(returns["p_gen"])
        # print('returns is ', returns["p_gen"])
        topk_ids, topk_log_probs, new_states, attn_dists, p_gens = returns['top_k_ids'],\
                                                                                   returns['top_k_log_probs'],\
                                                                                   returns['dec_state'],\
                                                                                   returns['attention_vec'],\
                                                                                   returns["p_gen"],\

        # print('topk_ids is ', topk_ids)
        # print('topk_log_probs is ', topk_log_probs)
        all_hyps = []
        num_orig_hyps = 1 if steps == 0 else len(hyps)
        num = 1
        # print('num_orig_hyps is ', num_orig_hyps)
        for i in range(num_orig_hyps):
            # h, new_state, attn_dist, p_gen, coverage = hyps[i], new_states[i], attn_dists[i], p_gens[i], prev_coverages[i]
            h, new_state, attn_dist, p_gen = hyps[i], new_states[i], attn_dists[i], p_gens[i]
            # print('h is ', h)
            # print('new_state is ', new_state) shape=(256,)
            # print('attn_dist ids ', attn_dist) shape=(115,)
            # print('p_gen is ', p_gen) 0.4332452
            # print('coverage is ', coverage)shape=(115, 1),
            num += 1
            # print('num is ', num)
            for j in range(params['beam_size'] * 2):
                # we extend each hypothesis with each of the top k tokens
                # (this gives 2 x beam_size new hypothesises for each of the beam_size old hypothesises)
                # print('topk_ids is ', topk_ids) shape=(3, 6)
                # print('token is ', topk_log_probs)
                # print('topk_log_probs is ', topk_log_probs)shape=(3, 6)
                # print(topk_ids[i, j].numpy())
                # print('steps is ', steps)
                # print(topk_log_probs[i, j].numpy())
                # print('h is ', h.avg_log_prob)
                # print(coverage)
                new_hyp = h.extend(token=topk_ids[i, j].numpy(),
                                   log_prob=topk_log_probs[i, j],
                                   state=new_state,
                                   attn_dist=attn_dist,
                                   p_gen=p_gen,
                                   )
                all_hyps.append(new_hyp)
        # in the following lines, we sort all the hypothesises, and select only the beam_size most likely hypothesises
        hyps = []
        sorted_hyps = sorted(all_hyps, key=lambda h: h.avg_log_prob, reverse=True)
        for h in sorted_hyps:
            if h.latest_token == vocab.word_to_id('[STOP]'):
                if steps >= params['min_dec_steps']:
                    results.append(h)
            else:
                # print(h.latest_token)
                hyps.append(h)
            if len(hyps) == params['beam_size'] or len(results) == params['beam_size']:
                break
        # print('hyps is ', hyps.)
        # print('steps is ', steps)
        steps += 1

    if len(results) == 0:
        results = hyps

    # At the end of the loop we return the most likely hypothesis, which holds the most likely ouput sequence,
    # given the input fed to the model
    hyps_sorted = sorted(results, key=lambda h: h.avg_log_prob, reverse=True)
    best_hyp = hyps_sorted[0]
    # print('best_hyp.tokens is ', best_hyp.tokens)
    best_hyp.abstract = " ".join(output_to_words(best_hyp.tokens, vocab, batch[0]["article_oovs"][0])[1:-1])
    best_hyp.text = batch[0]["article"].numpy()[0].decode()
    print('best_hyp is ', best_hyp.abstract)
    return best_hyp
Exemplo n.º 2
0
def beam_decode(model, batch, vocab, params):
    def decode_onestep(enc_inp, enc_outputs, dec_input, dec_state,
                       enc_extended_inp, batch_oov_len, enc_pad_mask,
                       use_coverage, prev_coverage):
        """
            Method to decode the output step by step (used for beamSearch decoding)
            Args:
                sess : tf.Session object
                batch : current batch, shape = [beam_size, 1, vocab_size( + max_oov_len if pointer_gen)]
                (for the beam search decoding, batch_size = beam_size)
                enc_outputs : hiddens outputs computed by the encoder LSTM
                dec_state : beam_size-many list of decoder previous state, LSTMStateTuple objects,
                shape = [beam_size, 2, hidden_size]
                dec_input : decoder_input, the previous decoded batch_size-many words, shape = [beam_size, embed_size]
                cov_vec : beam_size-many list of previous coverage vector
            Returns: A dictionary of the results of all the ops computations (see below for more details)
        """
        # print("enc_outputs:",enc_outputs.shape)
        # print("dec_state:",dec_state.shape)
        # print("enc_inp:",enc_inp.shape)
        # print("enc_extended_inp:",enc_extended_inp.shape)
        # print("dec_input:",dec_input.shape)
        dec_tar = tf.ones(shape=(params["beam_size"], 1))
        final_dists, dec_hidden = model(
            enc_outputs,  # shape=(32, 200, 128)
            dec_input,  # shape=(3, 1)
            dec_state,  # shape=(3, 128)
            dec_tar)  # shape=(200, )
        # enc_extended_inp)  # shape=(200, )
        # batch_oov_len,  # shape=()
        # enc_pad_mask,  # shape=(3, 115)
        # use_coverage,
        # prev_coverage)  # shape=(3, 115, 1)

        top_k_probs, top_k_ids = tf.nn.top_k(tf.squeeze(final_dists),
                                             k=params["beam_size"] * 2)
        top_k_log_probs = tf.math.log(top_k_probs)

        results = {
            "dec_state": dec_hidden,
            # "attention_vec": attentions,  # [batch_sz, max_len_x, 1]
            "top_k_ids": top_k_ids,
            "top_k_log_probs": top_k_log_probs,
            # "p_gen": p_gens,
        }
        return results

    # print(batch)
    params["batch_size"] = params["beam_size"]
    dataset = batch
    res = []
    for k in range(params["batch_size"]):
        # enc_input = tf.expand_dims(dataset["enc_input"][k], axis=1)
        enc_input = dataset["enc_input"][k]
        enc_ = tf.squeeze(tf.stack([[enc_input] * params["beam_size"]],
                                   axis=0))
        print(enc_)
        enc_outputs, state = model.call_encoder(enc_)  # 全部编码
        hyps = [
            Hypothesis(tokens=[vocab.word_to_id('[START]')],
                       log_probs=[0.0],
                       state=state[0],
                       p_gens=[],
                       attn_dists=[]) for _ in range(params['beam_size'])
        ]
        # print('hyps', hyps)
        results = []  # list to hold the top beam_size hypothesises
        steps = 0  # initial step

        while steps < params['max_dec_steps'] and len(
                results) < params['beam_size']:  # 一次beam_search
            # print('step is ', steps)
            latest_tokens = [
                h.latest_token for h in hyps
            ]  # latest token for each hypothesis , shape : [beam_size]
            latest_tokens = [
                t if t in range(params['vocab_size']) else
                vocab.word_to_id('[UNK]') for t in latest_tokens
            ]  # [batch]
            states = [h.state for h in hyps]  # [batch]
            dec_input = tf.expand_dims(latest_tokens,
                                       axis=1)  # shape=(beam, 1)
            dec_states = tf.stack(states, axis=0)  # shape=[beam,128]
            returns = decode_onestep(
                dataset['enc_input'][k],  # shape=(3, 115)
                enc_outputs,  # shape=(3, 115, 256)
                dec_input,  # shape=(3, 1)
                dec_states,  # shape=(3, 256)
                dataset['extended_enc_input'][k],  # shape=(3, 115)
                dataset['max_oov_len'],  # shape=()
                dataset['sample_encoder_pad_mask'][k],  # shape=(3, 115)
                True,  # true
                prev_coverage=None)  # shape=(3, 115, 1)
            topk_ids, topk_log_probs, new_states = returns['top_k_ids'], \
                                                   returns['top_k_log_probs'], \
                                                   returns['dec_state']
            # returns['attention_vec'], \
            # returns["p_gen"], \
            all_hyps = []
            num_orig_hyps = 1 if steps == 0 else len(hyps)
            num = 1
            # 获取3x3x2种可能性
            for i in range(num_orig_hyps):
                h, new_state = hyps[i], new_states[i]
                num += 1
                for j in range(params['beam_size'] * 2):
                    new_hyp = h.extend(
                        token=topk_ids[i, j].numpy(),
                        log_prob=topk_log_probs[i, j],
                        state=new_state,
                        attn_dist=None,
                        p_gen=[],
                    )
                    all_hyps.append(new_hyp)
            hyps = []
            # 取前3种
            sorted_hyps = sorted(all_hyps,
                                 key=lambda h: h.avg_log_prob,
                                 reverse=True)
            for h in sorted_hyps:
                if h.latest_token == vocab.word_to_id('[STOP]'):
                    if steps >= params['min_dec_steps']:
                        results.append(h)
                else:
                    hyps.append(h)
                if len(hyps) == params['beam_size'] or len(
                        results) == params['beam_size']:
                    break
            steps += 1

        if len(results) == 0:
            results = hyps

        hyps_sorted = sorted(results,
                             key=lambda h: h.avg_log_prob,
                             reverse=True)
        best_hyp = hyps_sorted[0]  # 取最优
        best_hyp.abstract = " ".join(
            output_to_words(best_hyp.tokens, vocab,
                            dataset["article_oovs"][0])[1:-1])
        best_hyp.text = dataset["article"].numpy()[0].decode()
        print('best_hyp is ', best_hyp.abstract)
        res.append(best_hyp.abstract)
    return res
Exemplo n.º 3
0
def batch_beam_decode(model, enc_data, vocab, params):
    #去掉部分参数,无用参数enc_pad_mask,只保留有用参数 enc_inp, enc_outputs, dec_input, dec_state,
    def decode_onestep(enc_inp, enc_outputs, dec_input, dec_state):
        """
            Method to decode the output step by step (used for beamSearch decoding)
            Args:
                sess : tf.Session object
                batch : current batch, shape = [beam_size, 1, vocab_size( + max_oov_len if pointer_gen)]
                (for the beam search decoding, batch_size = beam_size)
                enc_outputs : hiddens outputs computed by the encoder LSTM
                dec_state : beam_size-many list of decoder previous state, LSTMStateTuple objects,
                shape = [beam_size, 2, hidden_size]
                dec_input : decoder_input, the previous decoded batch_size-many words, shape = [beam_size, embed_size]
                cov_vec : beam_size-many list of previous coverage vector
            Returns: A dictionary of the results of all the ops computations (see below for more details)
        """
        #此处需要让batch_size=beam_size,可以用GPU加速,矩阵变换30000&9变成90000*3,做并行计算
        final_dists, dec_hidden, attentions, p_gens = model(
            enc_outputs,  # shape=(3, 115, 256)
            dec_state,  # shape=(3, 256)
            enc_inp,  # shape=(3, 115)
            dec_input)  # shape=(3, 1)
        #5000*2变成5000x2*1
        #拿到最大的概率值和对应的token_id,再将概率值进行log计算
        top_k_probs, top_k_ids = tf.nn.top_k(tf.squeeze(final_dists),
                                             k=params["beam_size"] * 2)
        top_k_log_probs = tf.math.log(top_k_probs)

        results = {
            "dec_state": dec_hidden,
            "top_k_ids": top_k_ids,
            "top_k_log_probs": top_k_log_probs,
        }
        return results

        # 判断输入长度

    batch_data = enc_data["enc_input"]  #shape=(batch_size,实际输入序列长度)
    batch_size = enc_data["enc_input"].shape[0]
    # 开辟结果存储list
    predicts = [''] * batch_size  #也是一批一起运算,大小为batch_size
    #print("batch_data,batch_size,predicts",batch_data,batch_size,predicts)
    # inputs = batch_data # shape=(batch_size,实际序列长度)
    inputs = tf.convert_to_tensor(batch_data)  #
    # We run the encoder once and then we use the results to decode each time step token

    enc_outputs, state = model.call_encoder(inputs)
    hyps = [
        Hypothesis(tokens=[vocab.word_to_id('[START]')],
                   log_probs=[0.0],
                   state=state[0]) for _ in range(params['batch_size'])
    ]
    # print('hyps', hyps)
    results = []  # list to hold the top beam_size hypothesises
    steps = 0  # initial step

    while steps < params['max_dec_steps']:
        # print('step is ', steps)
        latest_tokens = [
            h.latest_token for h in hyps
        ]  # latest token for each hypothesis , shape : [beam_size]
        latest_tokens = [
            t
            if t in range(params['vocab_size']) else vocab.word_to_id('[UNK]')
            for t in latest_tokens
        ]

        tokens = [h.tokens for h in hyps
                  ]  # tokens for each hypothesis , shape : [beam_size]
        tokens = [
            t
            if t in range(params['vocab_size']) else vocab.word_to_id('[UNK]')
            for t in tokens
        ]

        states = [h.state for h in hyps]
        # we decode the top likely 2 x beam_size tokens tokens at time step t for each hypothesis
        dec_input = tf.expand_dims(latest_tokens, axis=1)  # shape=(3, 1)

        enc_input = tf.expand_dims(tokens, axis=1)  # shape=(3, 1)

        dec_states = tf.stack(states, axis=0)
        print('decode_onestep', enc_input.get_shape(), enc_outputs.get_shape(),
              dec_input.get_shape(), dec_states.get_shape())
        returns = decode_onestep(
            enc_input,  # shape=(3, 115)
            enc_outputs,  # shape=(3, 115, 256)
            dec_input,  # shape=(3, 1)
            dec_states)  # shape=(3, 256)
        #可以修改:topk_ids, prediction, new_states
        topk_ids, topk_log_probs, new_states = returns['top_k_ids'],\
                                                returns['top_k_log_probs'],\
                                                returns['dec_state'],\

        # print('topk_ids is ', topk_ids)
        # print('topk_log_probs is ', topk_log_probs)
        all_hyps = []
        num_orig_hyps = 1 if steps == 0 else len(hyps)
        num = 1
        # print('num_orig_hyps is ', num_orig_hyps)
        for i in range(num_orig_hyps):
            h, new_state = hyps[i], new_states[i]
            num += 1
            # print('num is ', num)
            for j in range(params['beam_size'] * 2):
                # we extend each hypothesis with each of the top k tokens
                # (this gives 2 x beam_size new hypothesises for each of the beam_size old hypothesises)
                new_hyp = h.extend(
                    token=topk_ids[i, j].numpy(),
                    log_prob=topk_log_probs[i, j],
                    state=new_state,
                )
                all_hyps.append(new_hyp)
        # in the following lines, we sort all the hypothesises, and select only the beam_size most likely hypothesises
        hyps = []
        sorted_hyps = sorted(all_hyps,
                             key=lambda h: h.avg_log_prob,
                             reverse=True)
        for h in sorted_hyps:
            if h.latest_token == vocab.word_to_id('[STOP]'):
                if steps >= params['min_dec_steps']:
                    results.append(h)
            else:
                # print(h.latest_token)
                hyps.append(h)
            if len(hyps) == params['beam_size'] or len(
                    results) == params['beam_size']:
                break
        # print('hyps is ', hyps.)
        # print('steps is ', steps)
        steps += 1

    if len(results) == 0:
        results = hyps

    # At the end of the loop we return the most likely hypothesis, which holds the most likely ouput sequence,
    # given the input fed to the model
    hyps_sorted = sorted(results, key=lambda h: h.avg_log_prob, reverse=True)
    best_hyp = hyps_sorted[0]
    # print('best_hyp.tokens is ', best_hyp.tokens)
    best_hyp.abstract = " ".join(
        output_to_words(best_hyp.tokens, vocab,
                        batch[0]["article_oovs"][0])[1:-1])
    best_hyp.text = batch[0]["article"].numpy()[0].decode()
    print('best_hyp is ', best_hyp.abstract)
    return best_hyp