示例#1
0
def draft_summary_beam_search(input_ids, enc_output, dec_padding_mask,
                              beam_size):

    log.info(f"Building: 'Draft beam search decoder'")
    input_ids = tfa.seq2seq.tile_batch(input_ids, multiplier=beam_size)
    enc_output = tfa.seq2seq.tile_batch(enc_output, multiplier=beam_size)
    dec_padding_mask = tfa.seq2seq.tile_batch(dec_padding_mask,
                                              multiplier=beam_size)

    #print(f'output_before {tf.shape(output)}')
    def beam_search_decoder(output):
        # (batch_size, seq_len, d_bert)
        embeddings = model.embedding(output)
        predictions, dec_op, attention_weights = model.decoder(
            embeddings, enc_output, False, None, dec_padding_mask)
        if config.copy_gen:
            predictions = model.decoder.pointer_generator(
                dec_op[:, -1:, :],
                predictions[:, -1:, :],
                attention_weights[:, :, -1:, :],
                input_ids,
                tf.shape(input_ids)[1],
                tf.shape(predictions[:, -1:, :])[1],
                training=False,
            )
        # (batch_size, 1, target_vocab_size)
        return (predictions[:, -1:, :])

    return beam_search(beam_search_decoder, [CLS_ID] * h_parms.batch_size,
                       beam_size,
                       config.summ_length,
                       config.input_vocab_size,
                       h_parms.length_penalty,
                       stop_early=False,
                       eos_id=[[SEP_ID]])
def beam_search_eval(document, beam_size):

    start = [tokenizer.vocab_size]
    end = [tokenizer.vocab_size + 1]
    encoder_input = tf.tile(document, multiples=[beam_size, 1])
    batch, inp_shape = encoder_input.shape

    def decoder_query(output):
        enc_padding_mask, combined_mask, dec_padding_mask = create_masks(
            encoder_input, output)
        predictions, attention_weights, dec_output = model(
            encoder_input, output, enc_padding_mask, combined_mask,
            dec_padding_mask, False)

        # (batch_size, 1, target_vocab_size)
        return (predictions[:, -1:, :])

    return beam_search(decoder_query,
                       start,
                       beam_size,
                       config.summ_length,
                       config.input_vocab_size,
                       h_parms.length_penalty,
                       stop_early=True,
                       eos_id=[end])
示例#3
0
def validate(model,
             model_type,
             dataloader,
             device,
             w2i,
             i2w,
             data_mode,
             max_length=15,
             beam_size=3):
    gts_dict = {}
    hyps_dict = {}
    bad_count = 0
    for i, (image, texts) in tqdm(enumerate(dataloader),
                                  total=len(dataloader)):
        hyps = beam_search(model, model_type, image, w2i, i2w, device,
                           max_length, beam_size, data_mode)
        if len(hyps) == 0:
            bad_count += 1
            continue
        hyp = hyps[0][1:]
        if hyp[-1] == w2i['<END>']:
            hyp = hyp[:-1]
        hyp = ' '.join([i2w[word] for word in hyp.tolist()])
        gts_dict[i] = texts[0]
        # Temporary
        hyps_dict[i] = [hyp]
    if len(list(gts_dict.keys())) == 0:
        print('Bad validation')
        return {}
    print('Bad hypothesis count: ', bad_count)
    return evaluation.compute_scores(gts_dict, hyps_dict)
示例#4
0
def predict_beam_search(beam_width):
    print("beam-search predicting with %s..." % model_stamp)
    with torch.no_grad():
        global net, test_loader
        net.eval()
        prediction = []
        for batch_idx, (utter, utter_len,
                        seq_order) in enumerate(tqdm(test_loader)):
            utter, utter_len = utter.cuda(), utter_len.cuda()
            batch_size = utter.shape[0]
            (listener_hiddens,
             (listener_h,
              listener_c)), listener_len = net.listener(utter=utter,
                                                        utter_len=utter_len)

            seqs = []
            for b in range(batch_size):
                seq, attention_weights = beam_search(
                    init_state=net.speller.init_state,
                    time_step=net.speller.time_step,
                    listener_hiddens=listener_hiddens[
                        b, :listener_len[b]].unsqueeze(0),
                    listener_h=listener_h[b].unsqueeze(0),
                    listener_c=listener_c[b].unsqueeze(0),
                    beam_width=beam_width,
                    max_len=int(utter_len[b] / 6))
                seqs.append(seq)
            plt.imshow(attention_weights.cpu().numpy(),
                       interpolation='nearest',
                       cmap='hot')
            plt.savefig("result/%s_aw_test_b%d.png" % (model_stamp, batch_idx))
            seqs = np.array(seqs)
            seqs = seqs[seq_order]
            prediction += seqs.tolist()
    return prediction
示例#5
0
def ae_latent_sample_beam(latents_dense_in, inputs, ed, embed, hparams):
    """Sample from the latent space in the autoencoder."""
    def symbols_to_logits_fn(ids):
        """Go from ids to logits."""
        latents_discrete = tf.pad(
            ids[:,
                1:], [[0, 0], [0, 1]
                      ])  # prepare to be right-shifted in 'decode_transformer'
        #latents_discrete = tf.Print(latents_discrete, [tf.shape(latents_discrete), latents_discrete])

        with tf.variable_scope(tf.get_variable_scope(), reuse=False):
            latents_dense = embed(
                tf.one_hot(latents_discrete, depth=2**hparams.bottleneck_bits))
            latents_pred = decode_transformer(inputs, ed, latents_dense,
                                              hparams, "extra")
            logits = tf.layers.dense(latents_pred,
                                     2**hparams.bottleneck_bits,
                                     name="extra_logits")
            current_output_position = commons.shape_list(ids)[1] - 1
            logits = logits[:, current_output_position, :]
        return logits

    initial_ids = tf.zeros([tf.shape(latents_dense_in)[0]], dtype=tf.int32)
    length = tf.shape(latents_dense_in)[1]
    ids, _, _ = beam_search.beam_search(symbols_to_logits_fn,
                                        initial_ids,
                                        beam_size=1,
                                        decode_length=length,
                                        vocab_size=2**hparams.bottleneck_bits,
                                        alpha=0.0,
                                        eos_id=-1,
                                        stop_early=False)

    res = ids[:, 0, :]  # Pick first beam.
    return res[:, 1:]  # Remove the added all-zeros from ids.
示例#6
0
def evaluate(text):
    with open('input_tokenizer.pickle', 'rb') as handle:
        input_tokenizer = pickle.load(handle)
        
    with open('output_tokenizer.pickle', 'rb') as handle:
        output_tokenizer = pickle.load(handle)
        
    input_vocab_size = len(input_tokenizer.word_index) + 1
    output_vocab_size = len(output_tokenizer.word_index) + 1
    
    text = preprocess_text(text)  
    seq = input_tokenizer.texts_to_sequences([text])
    inputs = tf.keras.preprocessing.sequence.pad_sequences(seq, truncating='post', padding='post')
    inputs = tf.convert_to_tensor(inputs)
    
    result = ""
    
    encoder = Encoder(input_vocab_size, constants.embedding_dim, constants.units, constants.BATCH_SIZE)
    decoder = Decoder(output_vocab_size, constants.embedding_dim, constants.units, constants.BATCH_SIZE)
    
    checkpoint_dir = './checkpoints'
    checkpoint = tf.train.Checkpoint(encoder=encoder, decoder=decoder)
    checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir))
    
    enc_outputs, enc_hidden = encoder(inputs)
    dec_hidden = enc_hidden
    dec_input = tf.expand_dims([output_tokenizer.word_index['<start>']], 0)
    
    result = beam_search(constants.beam_width, decoder, dec_input, dec_hidden, 
                         enc_outputs, output_tokenizer.word_index['<end>'], output_vocab_size)
    result = output_tokenizer.sequences_to_texts([result])
    print(result[0])
def draft_summary_beam_search(model, input_ids, enc_output, dec_padding_mask,
                              beam_size):

    log.info(f"Building: 'Draft beam search decoder'")
    input_ids = tfa.seq2seq.tile_batch(input_ids, multiplier=beam_size)
    enc_output = tfa.seq2seq.tile_batch(enc_output, multiplier=beam_size)
    dec_padding_mask = tfa.seq2seq.tile_batch(dec_padding_mask,
                                              multiplier=beam_size)

    def beam_search_decoder(output):
        # (batch_size, seq_len, d_bert)
        embeddings = model.embedding(output)
        predictions, attention_weights = model.decoder(input_ids, embeddings,
                                                       enc_output, False, None,
                                                       dec_padding_mask)
        # (batch_size, 1, target_vocab_size)
        return (predictions[:, -1:, :])

    return beam_search(beam_search_decoder, [CLS_ID] * h_parms.batch_size,
                       beam_size,
                       config.summ_length,
                       config.input_vocab_size,
                       h_parms.length_penalty,
                       stop_early=False,
                       eos_id=[[SEP_ID]])
示例#8
0
def evaluate(path):

    with open('tokenizer.pickle', 'rb') as handle:
        tokenizer = pickle.load(handle)

    vocab_size = len(tokenizer.word_index) + 1

    encoder = Encoder(config.embedding_dim)
    decoder = Decoder(config.units, config.embedding_dim, vocab_size)

    checkpoint_dir = './checkpoints'
    checkpoint = tf.train.Checkpoint(encoder=encoder, decoder=decoder)
    checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir))

    image = load_image(path)
    encoder_outputs = encoder(tf.expand_dims(image, 0))
    dec_state = tf.zeros((1, config.units))
    dec_input = tf.expand_dims([tokenizer.word_index['<start>']], 0)

    result = result = beam_search(config.beam_width, decoder, dec_input,
                                  dec_state, encoder_outputs,
                                  tokenizer.word_index['<end>'], vocab_size)

    result = tokenizer.sequences_to_texts([result])
    print(result)
示例#9
0
def draft_decoded_summary(model, input_ids, target_ids, beam_size):
    batch = tf.shape(input_ids)[0]
    start = [101] * batch
    end = [102]
    # (batch_size, seq_len, d_bert)
    enc_output_ = model.bert_model(input_ids)[0]
    enc_output = tf.tile(enc_output_, multiples=[beam_size, 1, 1])
    input_ids = tf.tile(input_ids, multiples=[beam_size, 1])

    # (batch_size, 1, 1, seq_len), (_), (batch_size, 1, 1, seq_len)
    def beam_search_decoder(target_ids):
        _, combined_mask, dec_padding_mask = create_masks(
            input_ids, target_ids)
        draft_logits, _ = model.draft_summary(input_ids=input_ids,
                                              enc_output=enc_output,
                                              look_ahead_mask=combined_mask,
                                              padding_mask=dec_padding_mask,
                                              target_ids=target_ids,
                                              training=False)
        # (batch_size, 1, target_vocab_size)
        return (draft_logits[:, -1:, :])

    return (beam_search(beam_search_decoder,
                        start,
                        beam_size,
                        config.target_seq_length,
                        config.input_vocab_size,
                        config.length_penalty,
                        stop_early=True,
                        eos_id=[end]), enc_output_)
示例#10
0
def beam_search_eval(inp_sentences, beam_size):

    start = [tokenizer_en.vocab_size] * len(inp_sentences)
    end = [tokenizer_en.vocab_size + 1]
    inp_sentences = [tokenizer_en.encode(i) for i in inp_sentences]
    encoder_input = tf.tile(inp_sentences, multiples=[beam_size, 1])
    batch, inp_shape = encoder_input.shape

    def transformer_query(output):
        enc_padding_mask, combined_mask, dec_padding_mask = create_masks(
            encoder_input, output)
        #print(output.shape[1])
        predictions, attention_weights, dec_output = transformer(
            encoder_input, output, False, enc_padding_mask, combined_mask,
            dec_padding_mask)

        if config.copy_gen:
            predictions = generator(dec_output, predictions, attention_weights,
                                    encoder_input, inp_shape, output.shape[-1],
                                    batch, False)

        # select the last sequence
        return (predictions[:, -1:, :])  # (batch_size, 1, target_vocab_size)

    return beam_search(transformer_query,
                       start,
                       beam_size,
                       config.summ_length,
                       config.target_vocab_size,
                       0.6,
                       stop_early=False,
                       eos_id=[end])
示例#11
0
def generate(model, val_iter, TRG_TEXT, k=10, max_len=100, gpu=True):
    """
    Generates top k best sentences given trained model.
    """
    bos = TRG_TEXT.vocab.stoi['<s>']
    eos = TRG_TEXT.vocab.stoi['</s>']
    pad = TRG_TEXT.vocab.stoi['<pad>']

    filter_token = [pad]

    output = []

    for batch in tqdm(val_iter):
        trg = batch.trg
        src = batch.src
        for i in range(src.size(1)):
            src_sent = src[:, i:i + 1]
            best_options = beam_search.beam_search(model, src_sent, bos, eos,
                                                   k, max_len, filter_token,
                                                   gpu)

            sentence_trg = ""
            sentence_src = ""
            sentence = []
            for word in best_options[0][1]:
                sentence += [TRG_TEXT.vocab.itos[word]]
                sentence_src += TRG_TEXT.vocab.itos[word] + " "
            for word in trg[:, i]:
                sentence_trg += TRG_TEXT.vocab.itos[word] + " "


#             print(sentence_src + "  |  " + sentence_trg)
            output.append(sentence)

    return output
示例#12
0
def decode():
    with tf.Session() as sess:
        beam_size = FLAGS.beam_size
        if_beam_search = FLAGS.if_beam_search
        model = create_model(sess,
                             True,
                             beam_search=if_beam_search,
                             beam_size=beam_size)
        model.batch_size = 1
        data_path = DATA_PATH
        word2id, id2word, trainingSamples = load_dataset(data_path)

        if if_beam_search:
            sys.stdout.write("> ")
            sys.stdout.flush()
            sentence = sys.stdin.readline()
            while sentence:
                recos = beam_search(sess,
                                    sentence=sentence,
                                    word2id=word2id,
                                    id2word=id2word,
                                    model=model)
                print("Replies --------------------------------------->")
                print(recos)
                sys.stdout.write("> ")
                sys.stdout.flush()
                sentence = sys.stdin.readline()
示例#13
0
def evaluate():
    if request.method == 'POST':
        start = time.perf_counter()
        try:
            var_sample: VarNamingSample = request.get_json(force=True, cache=False)
            gt = var_sample["name"]
            tensorized_sample = model.tensorize(var_sample)

            minibatch = model.initialize_minibatch()
            model.extend_minibatch_with(tensorized_sample, minibatch)
            minibatch = model.finalize_minibatch(minibatch, device=DEVICE)
            minibatch["target"]["token_idxs"] = minibatch["target"]["token_idxs"][:1]
            predictions = beam_search(nn, **minibatch)
            predictions = list(map(lambda prediction:
                                   {
                                       "name": [model.vocabulary.id_to_token[
                                                    token_idx] for token_idx in prediction[1].token_idxs.squeeze()][1: -1],
                                       "p": prediction[1].p
                                   }, predictions))
        except Exception:
            return
        time_spent = time.perf_counter() - start
        global last_inference
        last_inference = LastInference(predictions, gt, time_spent)
        return json.dumps({"predictions": last_inference.predictions,
                           "gnnEvaluationTime": last_inference.time_spent})
def beam_search_eval(document, summary, beam_size):
    batch, sum_seq_len = summary.shape
    #print(f'sum_seq_len here {sum_seq_len}')
    start = [tokenizer.vocab_size] * batch
    end = [tokenizer.vocab_size+1]
    doc_input = tf.tile(document, multiples=[beam_size, 1])
    with tf.GradientTape() as tape:
      def decoder_query(output):
          enc_padding_mask, combined_mask, dec_padding_mask = create_masks(
                                                                           doc_input, 
                                                                           output
                                                                           )
          predictions, attention_weights, dec_output = model(
                                                             doc_input, 
                                                             output,
                                                             enc_padding_mask,
                                                             combined_mask,
                                                             dec_padding_mask,
                                                             True
                                                             )

         
          return (predictions[:,-1:,:])  
    return ((beam_search(
                     decoder_query, 
                     start, 
                     beam_size, 
                     decode_length=sum_seq_len, 
                     vocab_size=config.input_vocab_size, 
                     alpha=h_parms.length_penalty,
                     stop_early=False, 
                     eos_id=[end]
                    )), tape)
示例#15
0
def beam_test():

    models = ["124M", "1558M"]
    context = "The next part is"
    length = 20

    for model in models:
        starting_beam = 6
        ending_beam = 8
        beam_incr = 1

        for i in range(starting_beam, ending_beam, beam_incr):
            beam_search(model_name=model,
                        input_samples=[context],
                        beam_width=i,
                        length=length)
        print("Finished for model: " + model + "\n\n")
示例#16
0
def make_name_beam(model, vocab, hps):
    """
    Beam Search
    """

    best_seq = beam_search(model, vocab, hps)
    chars = [vocab.id2char(t) for t in best_seq.tokens[1:]]
    tokens = [t if t != '\s' else ' ' for t in chars]
    tokens = ''.join(tokens)

    print(tokens)
示例#17
0
 def get_arcs(self, document):
     from covington_transistion import Configuration
     arcs = []
     logger = logging.getLogger('progress_logger')
     for paragraph in range(document.get_amount_of_paragraphs()):
         entities = document.get_entities(paragraph=paragraph)
         logger.info("Paragraph {p}, entities {e}".format(p=paragraph, e=len(entities)))
         if entities:
             config = Configuration(entities, document)
             best_end = beam_search(config, self.network)
             arcs.extend(best_end.configuration.get_arcs())
     return arcs
示例#18
0
    def evaluate_batch(self, article):

        self.setup_valid()
        batch = self.batcher.next_batch()
        start_id = self.vocab.word2id(data.START_DECODING)
        end_id = self.vocab.word2id(data.STOP_DECODING)
        unk_id = self.vocab.word2id(data.UNKNOWN_TOKEN)
        decoded_sents = []
        ref_sents = []
        article_sents = []
        rouge = Rouge()
        while batch is not None:
            enc_batch, enc_lens, enc_padding_mask, enc_batch_extend_vocab, extra_zeros, ct_e = get_enc_data(
                batch)
            with T.autograd.no_grad():
                enc_batch = self.model.embeds(enc_batch)
                enc_out, enc_hidden = self.model.encoder(enc_batch, enc_lens)

            #-----------------------Summarization----------------------------------------------------
            with T.autograd.no_grad():
                pred_ids = beam_search(enc_hidden, enc_out, enc_padding_mask,
                                       ct_e, extra_zeros,
                                       enc_batch_extend_vocab, self.model,
                                       start_id, end_id, unk_id)

            for i in range(len(pred_ids)):
                decoded_words = data.outputids2words(pred_ids[i], self.vocab,
                                                     batch.art_oovs[i])
                if len(decoded_words) < 2:
                    decoded_words = "xxx"
                else:
                    decoded_words = " ".join(decoded_words)
                decoded_sents.append(decoded_words)
                abstract = batch.original_abstracts[i]
                article = batch.original_articles[i]
                ref_sents.append(abstract)
                article_sents.append(article)

            batch = self.batcher.next_batch()

        load_file = self.opt.load_model

        if article:
            self.print_original_predicted(decoded_sents, ref_sents,
                                          article_sents, load_file)

        scores = rouge.get_scores(decoded_sents, ref_sents)
        rouge_1 = sum([x["rouge-1"]["f"] for x in scores]) / len(scores)
        rouge_2 = sum([x["rouge-2"]["f"] for x in scores]) / len(scores)
        rouge_l = sum([x["rouge-l"]["f"] for x in scores]) / len(scores)
        logger.info(load_file + " rouge_1:" + "%.4f" % rouge_1 + " rouge_2:" +
                    "%.4f" % rouge_2 + " rouge_l:" + "%.4f" % rouge_l)
示例#19
0
def evaluate(data, tokenizer, model, device):

    src = data.source
    src_mask = data.source_mask
    utter_type = data.utter_type
    with torch.no_grad():
        output_token = beam_search(device, src, src_mask, utter_type, model, tokenizer)
    while output_token[-1] == 0:
        output_token = output_token[:-1]
    while output_token[-1] == 102:
        output_token = output_token[:-1]
    text = tokenizer.decode(output_token)
    return text
示例#20
0
def eval_test(config, qa, dialogue, tokenizer, model, device):
    input_ids, input_mask, utter_type = create_single_sample(qa, dialogue, config.max_src_num_length, tokenizer)

    src = torch.tensor(input_ids, dtype=torch.long, device=device)
    src = src.unsqueeze(0).transpose(0, 1)
    src_mask = torch.tensor(input_mask, dtype=torch.long, device=device)
    src_mask = src_mask.unsqueeze(0).transpose(0, 1)
    utter_type = torch.tensor(utter_type, device=device)
    utter_type = utter_type.unsqueeze(0)
    with torch.no_grad():
        output_token = beam_search(device, src, src_mask, utter_type, model, tokenizer)
    text = tokenizer.decode(output_token)
    return text
示例#21
0
def draft_summary_beam_search(input_ids, beam_size):

    log.info(f"Building: 'Draft beam search decoder'")

    batch = tf.shape(input_ids)[0]
    end = [SEP_ID]
    # (batch_size, seq_len, d_bert)
    enc_output_ = model.bert_model(input_ids)[0]
    enc_output = tf.tile(enc_output_, multiples=[beam_size,1, 1])
    input_ids = tf.tile(input_ids, multiples=[beam_size, 1])
    # (batch_size, 1, 1, seq_len), (_), (batch_size, 1, 1, seq_len)
    dec_input = tf.convert_to_tensor([CLS_ID] * batch)
    output = tf.expand_dims(dec_input, 0)
    def beam_search_decoder(output):
      _, _, dec_padding_mask = create_masks(input_ids, output)    
      embeddings = model.embedding(output)
      predictions, dec_op, attention_weights = model.decoder(
                                                            input_ids, 
                                                            embeddings, 
                                                            enc_output, 
                                                            False, 
                                                            None, 
                                                            dec_padding_mask
                                                            )
      if config.copy_gen:
        predictions = model.decoder.pointer_generator(
                                                      dec_op, 
                                                      predictions,
                                                      attention_weights,
                                                      input_ids,
                                                      tf.shape(input_ids)[1], 
                                                      tf.shape(output)[-1], 
                                                      False
                                                     )
      # (batch_size, 1, target_vocab_size)
      return (predictions[:,-1:,:])
    return (beam_search(
                        beam_search_decoder, 
                        dec_input, 
                        beam_size, 
                        config.summ_length, 
                        config.input_vocab_size, 
                        h_parms.length_penalty, 
                        stop_early=False, 
                        eos_id=[end]
                        ),
                        enc_output_
            )
示例#22
0
    def add_decoder_predict_op(self,
                               embeddings,
                               states,
                               label_embeddings,
                               do_beam_search=True):
        state_t = states[-1]
        with tf.variable_scope("DECODE"):
            cell = LSTMCell(self.config)
            preds = []
            preds_proj = []

            U = tf.get_variable(
                name="U",
                initializer=tf.contrib.layers.xavier_initializer(),
                shape=(self.config.lstm_num_units, self.config.lstm_num_units))
            b = tf.get_variable(
                name="b",
                initializer=tf.contrib.layers.xavier_initializer(),
                shape=(self.config.lstm_num_units, ))
            word_ind = [
                self.labels_vocab.START for _ in range(self.config.batch_size)
            ]

            if do_beam_search:
                with tf.variable_scope("DECODING") as scope:
                    preds_proj = beam_search(word_ind, label_embeddings, U, b,
                                             cell, state_t,
                                             self.config.max_dec_length,
                                             self.config.batch_size,
                                             self.run_cell, scope)
            else:
                with tf.variable_scope("DECODING") as scope:
                    for time_step in range(self.config.max_dec_length):
                        word = self.run_cell(word_ind, label_embeddings, U, b,
                                             cell, time_step, state_t, scope)

                        preds.append(word)
                        word_ind = tf.argmax(word, axis=1)
                        preds_proj.append(word_ind)

                preds_proj = tf.stack(preds_proj, axis=1)
                preds = tf.stack(preds, axis=1)

            assert preds_proj.get_shape().as_list() == [
                self.config.batch_size, self.config.max_dec_length
            ]
            return preds_proj, preds
示例#23
0
文件: main.py 项目: kpsc/nlp
def test():
    du = DataLoader(**data_config)
    params['src_vcb_size'] = du.vocab_size
    params['tgt_vcb_size'] = du.vocab_size
    params['batch_size'] = 1
    tf.reset_default_graph()
    with tf.Session() as sess:
        model = Seq2Seq(params, mode='decode')
        sess.run(tf.global_variables_initializer())
        # model.load(sess, './logs/model/model_1.ckpt')
        model.load(sess, tf.train.latest_checkpoint('./logs/model/'))

        for source, source_len, target, _, _ in du.test_data(
                './data/dialog.test'):
            result = beam_search(sess, model, du.vcb, source, source_len)
            print('source: ', du.transform_indexs(source[0]))
            print('target: ', du.transform_indexs(target[0]))
            print('predict: ', du.transform_indexs(result))
            print('')
示例#24
0
    def predict(inp_sentence):
      start_token = [tokenizer_de.vocab_size]
      end_token = [tokenizer_de.vocab_size + 1]

      # inp sentence is german, hence adding the start and end token
      inp_sentence = start_token + tokenizer_de.encode(inp_sentence) + end_token
      encoder_input = tf.expand_dims(inp_sentence, 0)

      # as the target is english, the first word to the transformer should be the
      # english start token.
      decoder_input = [tokenizer_en.vocab_size]
      output = tf.expand_dims(decoder_input, 0)
      

      # predictions.shape == (batch_size, seq_len, vocab_size)
      def symbols_to_logits(output):          
          batched_input = tf.tile(encoder_input, [beam_width, 1])
          enc_padding_mask, combined_mask, dec_padding_mask = create_masks(
            batched_input, output)
          predictions, attention_weights = transformer1(batched_input,
                                                     output,
                                                     False,
                                                     enc_padding_mask,
                                                     combined_mask,
                                                     dec_padding_mask)
          predictions = predictions[:, -1, :]

          return  predictions
      
      finished_seq, finished_scores, states= beam_search(symbols_to_logits,
                 output,
                 beam_width,
                 MAX_LENGTH,
                 target_vocab_size,
                 alpha,
                 states=None,
                 eos_id=tokenizer_en.vocab_size+1,
                 stop_early=True,
                 use_tpu=False,
                 use_top_k_with_unique=True)
      
      return finished_seq[0, 0, :]
def draft_decoder(self,
                  input_ids,
                  enc_output,
                  beam_size,
                  length_penalty,
                  temperature,
                  top_p,
                  top_k,
                  batch_size,
                  training=False):
    """
        Inference call, builds a draft output_sequence auto-regressively
        """
    start_ids = tf.repeat(config.CLS_ID, repeats=batch_size)
    input_ids = tfa.seq2seq.tile_batch(input_ids, multiplier=beam_size)
    enc_output = tfa.seq2seq.tile_batch(enc_output, multiplier=beam_size)

    #start_ids = tf.cast(start_ids, dtype=tf.int64)
    def perform_beam_search(dec_input):

        return query_decoder(self,
                             enc_output,
                             input_ids,
                             dec_input,
                             batch_size,
                             temperature,
                             top_p,
                             top_k,
                             training=training)

    predicted_beam_search_op, _, _, attention_dist = beam_search(
        perform_beam_search,
        initial_ids=start_ids,
        beam_size=beam_size,
        decode_length=config.target_seq_length,
        vocab_size=config.target_vocab_size,
        alpha=length_penalty,
        stop_early=False,
        eos_id=config.SEP_ID)
    predicted_output_sequence = predicted_beam_search_op[:, 0, :]

    return predicted_output_sequence, attention_dist
示例#26
0
    def abstract(self, article):
        start_id = self.vocab.word2id(data.START_DECODING)
        end_id = self.vocab.word2id(data.STOP_DECODING)
        unk_id = self.vocab.word2id(data.UNKNOWN_TOKEN)
        example = Example(' '.join(jieba.cut(article)), '', self.vocab)
        batch = Batch([example], self.vocab, 1)
        enc_batch, enc_lens, enc_padding_mask, enc_batch_extend_vocab, extra_zeros, ct_e = get_enc_data(
            batch)
        with T.autograd.no_grad():
            enc_batch = self.model.embeds(enc_batch)
            enc_out, enc_hidden = self.model.encoder(enc_batch, enc_lens)
            pred_ids = beam_search(enc_hidden, enc_out, enc_padding_mask, ct_e,
                                   extra_zeros, enc_batch_extend_vocab,
                                   self.model, start_id, end_id, unk_id)

        for i in range(len(pred_ids)):
            decoded_words = data.outputids2words(pred_ids[i], self.vocab,
                                                 batch.art_oovs[i])
            decoded_words = " ".join(decoded_words)
        return decoded_words
示例#27
0
def treino_beam_search():
    with open('Treino/beam_search.csv', 'a+') as arq:
        arq.write(
            'Instancia,Estado,Valor,Tamanho,Execucao,num_best,num_iter\n')
    for problema in problemas:
        # valores = []
        # tempos = []
        print(problema[0], "-----")
        for m in hyper_param_beam_search:
            print(m)
            pes_max = problema[1]
            val = mochila.get_val_from_vt(problema[2])
            pes = mochila.get_pes_from_vt(problema[2])
            estado_inicial = mochila.estado_inicial_aleatorio(pes, pes_max)

            tempo_inicio = default_timer()
            solucao = beam_search(m, estado_inicial, pes, val, pes_max)
            tempo_total = default_timer() - tempo_inicio

            mochila.save_estado(solucao, val, pes, 'Treino/beam_search.csv',
                                tempo_total, problema[0], [m])
示例#28
0
def make_translation_predictions(model, use_bs2=False):
    print('Generating translations')
    with open('test_predictions.txt', 'w') as outfile:
        with open('source_test.txt', 'r') as infile:
            for line in tqdm(list(infile)):
                tokens = [DE.vocab.stoi[w] for w in tokenize_de(line.strip())]
                src = ntorch.tensor(tokens, names="srcSeqlen")
                if use_bs2:
                    translation = beam_search2(model,
                                               src,
                                               beam_size=5,
                                               num_results=10)[0]
                else:
                    translation = beam_search(model,
                                              src,
                                              beam_size=5,
                                              num_results=10)[0]

                assert translation[0] == BOS_IND
                sent = ' '.join(EN.vocab.itos[i] for i in translation[1:])
                outfile.write(sent + '\n')
示例#29
0
def run_test_beamsearch(verbose):
    opt_value = []  # Lista para os valores encontrados
    opt_time = []  # Lista para os tempos encontrados
    i = 0

    for problem in problems:

        # Executando o algoritmo
        state, size, value, time = beam_search(max_size=problem[0],
                                               values=problem[1],
                                               m=hiperparam,
                                               max_time=300)

        # Print caso queira acompanhar
        if verbose:
            print('Problem', problem_name[i],
                  'finished with (opt_value, opt_size, time) equals',
                  (value, size, time))

        # Salvando os melhores valores
        opt_value.append(value)
        opt_time.append(time)

        i = i + 1

    mean_value = np.mean(opt_value)
    std_value = np.std(opt_value)
    mean_time = np.mean(opt_time)
    std_time = np.std(opt_time)

    # Print caso queira acompanhar
    if verbose:
        print('The mean and std for the values found were:', mean_value, '+-',
              std_value)
        print('The mean and std for the times found were:', mean_time, '+-',
              std_time)

    return opt_value, opt_time, [mean_value, std_value, mean_time, std_time]
def tower_infer_dec(chars,
                    scope,
                    rnn_cell,
                    dec_cell,
                    word_emb,
                    rnn_state,
                    out_reuse_vars=False,
                    dev='/cpu:0'):

    with tf.device(dev):
        with tf.variable_scope('embatch_size', reuse=True):
            # (vocab_size, latent_dim)
            emb_char = tf.sg_emb(name='emb_char',
                                 voca_size=Hp.char_vs,
                                 dim=Hp.hd,
                                 dev=dev)
            emb_word = tf.sg_emb(name='emb_word',
                                 emb=word_emb,
                                 voca_size=Hp.word_vs,
                                 dim=300,
                                 dev=dev)

    print(chars)
    ch = chars
    ch = tf.reverse_sequence(input=ch,
                             seq_lengths=[Hp.c_maxlen] * Hp.batch_size,
                             seq_dim=1)
    reuse_vars = reuse_vars_enc = True

    # --------------------------   BYTENET ENCODER   --------------------------

    with tf.variable_scope('encoder'):
        # embed table lookup
        enc = ch.sg_lookup(emb=emb_char)  #(batch, sentlen, latentdim)
        # loop dilated conv block
        for i in range(Hp.num_blocks):
            enc = (enc.sg_res_block(size=5,
                                    rate=1,
                                    name="enc1_%d" % (i),
                                    is_first=True,
                                    reuse_vars=reuse_vars,
                                    dev=dev).sg_res_block(
                                        size=5,
                                        rate=2,
                                        name="enc2_%d" % (i),
                                        reuse_vars=reuse_vars,
                                        dev=dev).sg_res_block(
                                            size=5,
                                            rate=4,
                                            name="enc4_%d" % (i),
                                            reuse_vars=reuse_vars,
                                            dev=dev).sg_res_block(
                                                size=5,
                                                rate=8,
                                                name="enc8_%d" % (i),
                                                reuse_vars=reuse_vars,
                                                dev=dev).sg_res_block(
                                                    size=5,
                                                    rate=16,
                                                    name="enc16_%d" % (i),
                                                    reuse_vars=reuse_vars,
                                                    dev=dev))
        byte_enc = enc
        # --------------------------   QCNN + QPOOL ENCODER #1  --------------------------

        with tf.variable_scope('quazi'):

            #quasi cnn layer ZFO  [batch * 3, seqlen, dim2 ]
            conv = byte_enc.sg_quasi_conv1d(is_enc=True,
                                            size=4,
                                            name="qconv_1",
                                            dev=dev,
                                            reuse_vars=reuse_vars)
            # c = f * c + (1 - f) * z, h = o*c [batch * 4, seqlen, hd]
            pool0 = conv.sg_quasi_rnn(is_enc=False,
                                      att=False,
                                      name="qrnn_1",
                                      reuse_vars=reuse_vars,
                                      dev=dev)

            qpool_last = pool0[:, -1, :]

    # --------------------------   MAXPOOL along time dimension   --------------------------

    inpt_maxpl = tf.expand_dims(byte_enc, 1)  # [batch, 1, seqlen, channels]
    maxpool = tf.nn.max_pool(inpt_maxpl, [1, 1, Hp.c_maxlen, 1], [1, 1, 1, 1],
                             'VALID')
    maxpool = tf.squeeze(maxpool, [1, 2])

    # --------------------------   HIGHWAY   --------------------------

    concat = qpool_last + maxpool
    with tf.variable_scope('highway', reuse=reuse_vars):
        input_lstm = highway(concat, concat.get_shape()[-1], num_layers=1)

    # --------------------------   CONTEXT LSTM  --------------------------

    input_lstm = tf.nn.dropout(input_lstm, Hp.keep_prob)

    with tf.variable_scope('contx_lstm', reuse=reuse_vars):
        output, rnn_state = rnn_cell(input_lstm, rnn_state)

    beam_size = 8
    reuse_vars = out_reuse_vars

    greedy = False
    if greedy:

        dec_state = rnn_state
        dec_out = []
        d_out = tf.constant([1] * Hp.batch_size)
        for idx in range(Hp.w_maxlen):
            w_input = d_out.sg_lookup(emb=emb_word)
            dec_state = tf.contrib.rnn.LSTMStateTuple(c=dec_state.c,
                                                      h=dec_state.h)
            with tf.variable_scope('dec_lstm', reuse=idx > 0 or reuse_vars):
                d_out, dec_state = dec_cell(w_input, dec_state)

            dec_out.append(d_out)
            d_out = tf.expand_dims(d_out, 1).sg_conv1d_gpus(size=1,
                                                            dim=Hp.word_vs,
                                                            name="out_conv",
                                                            act="linear",
                                                            dev=dev,
                                                            reuse=idx > 0
                                                            or reuse_vars)
            d_out = tf.squeeze(d_out).sg_argmax()

        dec_out = tf.stack(dec_out, 1)

        dec = dec_out.sg_conv1d_gpus(size=1,
                                     dim=Hp.word_vs,
                                     name="out_conv",
                                     act="linear",
                                     dev=dev,
                                     reuse=True)
        return dec.sg_argmax(), rnn_state

    else:

        # ------------------ BEAM SEARCH --------------------
        dec_state = tf.contrib.rnn.LSTMStateTuple(
            tf.tile(tf.expand_dims(rnn_state[0], 1), [1, beam_size, 1]),
            tf.tile(tf.expand_dims(rnn_state[1], 1), [1, beam_size, 1]))
        initial_ids = tf.constant([1] * Hp.batch_size)

        def symbols_to_logits_fn(ids, dec_state):
            dec = []
            dec_c, dec_h = [], []
            # (batch x beam_size x decoded_seq)
            ids = tf.reshape(ids, [Hp.batch_size, beam_size, -1])
            print("dec_state ", dec_state[0].get_shape().as_list())
            for ind in range(beam_size):
                with tf.variable_scope('dec_lstm', reuse=ind > 0
                                       or reuse_vars):
                    w_input = ids[:, ind, -1].sg_lookup(emb=emb_word)
                    dec_state0 = tf.contrib.rnn.LSTMStateTuple(
                        c=dec_state.c[:, ind, :], h=dec_state.h[:, ind, :])
                    dec_out, dec_state_i = dec_cell(w_input, dec_state0)
                    dec_out = tf.expand_dims(dec_out, 1)
                dec_i = dec_out.sg_conv1d_gpus(size=1,
                                               dim=Hp.word_vs,
                                               name="out_conv",
                                               act="linear",
                                               dev=dev,
                                               reuse=ind > 0 or reuse_vars)

                dec.append(tf.squeeze(dec_i, 1))
                dec_c.append(dec_state_i[0])
                dec_h.append(dec_state_i[1])
            return tf.stack(dec, 1), tf.contrib.rnn.LSTMStateTuple(
                tf.stack(dec_c, 1), tf.stack(dec_h, 1))

        final_ids, final_probs = beam_search.beam_search(symbols_to_logits_fn,
                                                         dec_state,
                                                         initial_ids,
                                                         beam_size,
                                                         Hp.w_maxlen - 1,
                                                         Hp.word_vs,
                                                         3.5,
                                                         eos_id=2)

        return final_ids[:, 0, :], rnn_state