Exemplo n.º 1
0
    def _get_top_k_sequences(self, log_probs, wordpiece_mask, k):
        batch_size = log_probs.size()[0]
        seq_length = log_probs.size()[1]

        beam_search = BeamSearch(self._end_index,
                                 max_steps=seq_length,
                                 beam_size=k,
                                 per_node_beam_size=self._per_node_beam_size)
        beam_log_probs = torch.nn.functional.pad(
            log_probs, pad=(0, 2, 0, 0, 0, 0), value=-1e7
        )  # add low log probabilites for start and end tags used in the beam search
        start_predictions = beam_log_probs.new_full(
            (batch_size, ), fill_value=self._start_index).long()

        # Shape: (batch_size, beam_size, seq_length)
        top_k_predictions, seq_log_probs = beam_search.search(
            start_predictions, {
                'log_probs': beam_log_probs,
                'wordpiece_mask': wordpiece_mask,
                'step_num': beam_log_probs.new_zeros((batch_size, )).long()
            }, self.take_step)

        # get rid of start and end tags if they slipped in
        top_k_predictions[top_k_predictions > 2] = 0

        return top_k_predictions
Exemplo n.º 2
0
    def _init_graph(self):
        """
        init graph
        """
        self.ys = (self.input_y, None, None)
        self.xs = (self.input_x, None)
        self.memory = self.model.encode(self.xs, False)[0]
        self.logits = self.model.decode(self.xs, self.ys, self.memory, False)[0]

        ckpt = self.tf.train.get_checkpoint_state(self.model_dir).all_model_checkpoint_paths[-1]

        graph = self.logits.graph
        sess_config = self.tf.ConfigProto(allow_soft_placement=True)
        sess_config.gpu_options.allow_growth = True

        saver = self.tf.train.Saver()
        self.sess = self.tf.Session(config=sess_config, graph=graph)

        self.sess.run(self.tf.global_variables_initializer())
        self.tf.reset_default_graph()
        saver.restore(self.sess, ckpt)

        self.bs = BeamSearch(self.model,
                             self.hp.beam_size,
                             list(self.idx2token.keys())[2],
                             list(self.idx2token.keys())[3],
                             self.idx2token,
                             self.hp.maxlen2,
                             self.input_x,
                             self.input_y,
                             self.logits)
Exemplo n.º 3
0
    def beam_sample(self, image_features, beam_size=5):
        batch_size = image_features.size(0)
        beam_searcher = BeamSearch(beam_size, batch_size, 17)

        # init the result with zeros and lstm states
        states = self.init_hidden_noise(image_features)
        states = (states[0].repeat(1, beam_size, 1).cuda(),
                  states[1].repeat(1, beam_size, 1).cuda())

        # embed the start symbol
        words_feed = self.embed.word_embeddings([self.embed.START_SYMBOL] * batch_size) \
            .repeat(beam_size, 1).unsqueeze(1).cuda()

        for i in range(self.max_sentence_length):
            hidden, states = self.lstm(words_feed, states)
            outputs = self.output_linear(hidden.squeeze(1))
            beam_indices, words_indices = beam_searcher.expand_beam(
                outputs=outputs)

            if len(beam_indices) == 0 or i == 15:
                generated_captions = beam_searcher.get_results()[:, 0]
                outcaps = self.embed.words_from_indices(
                    generated_captions.cpu().numpy())
            else:
                words_feed = torch.stack([
                    self.embed.word_embeddings_from_indices(words_indices)
                ]).view(beam_size, 1, -1).cuda()
        return " ".join(outcaps)  # .split(self.embed.END_SYMBOL)[0]
Exemplo n.º 4
0
    def beam_search(self, input_seq, beam_size=3, attentionOverrideMap=None, correctionMap=None, unk_map=None,
                    beam_length=0.5, beam_coverage=0.5,
                    max_length=MAX_LENGTH):

        torch.set_grad_enabled(False)

        input_seqs = [indexes_from_sentence(self.input_lang, input_seq)]
        input_lengths = [len(seq) for seq in input_seqs]
        input_batches = Variable(torch.LongTensor(input_seqs)).transpose(0, 1)

        if use_cuda:
            input_batches = input_batches.cuda()

        self.encoder.train(False)
        self.decoder.train(False)

        encoder_outputs, encoder_hidden = self.encoder(input_batches, input_lengths, None)

        decoder_hidden = encoder_hidden

        beam_search = BeamSearch(self.decoder, encoder_outputs, decoder_hidden, self.output_lang, beam_size,
                                 attentionOverrideMap,
                                 correctionMap, unk_map, beam_length=beam_length, beam_coverage=beam_coverage)
        result = beam_search.search()

        self.encoder.train(True)
        self.decoder.train(True)

        torch.set_grad_enabled(True)

        return result  # Return a list of indexes, one for each word in the sentence, plus EOS
Exemplo n.º 5
0
def rnn_generate(gen_input_file, model_path, max_gen_len, beam_size,
                 word_dict_file):
    """
    use RNN model to generate sequences.

    :param word_id_dict: vocab.
    :type word_id_dict: dictionary with content of "{word, id}",
                        "word" is string type , "id" is int type.
    :param num_words: the number of the words to generate.
    :type num_words: int
    :param beam_size: beam width.
    :type beam_size: int
    :return: save prediction results to output_file
    """

    assert os.path.exists(gen_input_file), "test file does not exist!"
    assert os.path.exists(model_path), "trained model does not exist!"
    assert os.path.exists(
        word_dict_file), "word dictionary file does not exist!"

    # load word dictionary
    word_2_ids = load_dict(word_dict_file)
    try:
        UNK_ID = word_2_ids["<unk>"]
    except KeyError:
        logger.fatal("the word dictionary must contain a <unk> token!")
        sys.exit(-1)

    # initialize paddle
    paddle.init(use_gpu=conf.use_gpu, trainer_count=conf.trainer_count)

    # load the trained model
    pred_words = rnn_lm(
        len(word_2_ids),
        conf.emb_dim,
        conf.hidden_size,
        conf.stacked_rnn_num,
        conf.rnn_type,
        is_infer=True)

    parameters = paddle.parameters.Parameters.from_tar(
        gzip.open(model_path, "r"))

    inferer = paddle.inference.Inference(
        output_layer=pred_words, parameters=parameters)

    generator = BeamSearch(inferer, word_dict_file, beam_size, max_gen_len)
    # generate text
    with open(conf.gen_file, "r") as fin, open(conf.gen_result, "w") as fout:
        for idx, line in enumerate(fin):
            fout.write("%d\t%s" % (idx, line))
            for gen_res in generator.gen_a_sentence([
                    word_2_ids.get(w, UNK_ID)
                    for w in line.lower().strip().split()
            ]):
                fout.write("%s\n" % gen_res)
            fout.write("\n")
Exemplo n.º 6
0
def main():

    english = tokenize("data/100ktok.low.en")
    spanish = tokenize("data/100ktok.low.es")

    training_set, held_out_set, test_set = get_datasets(english, spanish)
    translations = get_word_translations("100000_trans.txt")
    search = BeamSearch(training_set, held_out_set, translations)

    print search.translate(test_set[8])
Exemplo n.º 7
0
def main():
    """
    Creates a temporary file for the given input which is
    used to create a dataset, that is then evaluated on the given model.
    The generated summary is printed to standard out.
    """
    args, unknown_args = prepare_arg_parser().parse_known_args()
    model_file = args.model_file

    with suppress_stdout_stderr():
        model, _optimizer, vocab, _stats, cfg = train.load_model(
            model_file, unknown_args
        )

    _, filename = tempfile.mkstemp()
    try:
        with open(filename, "a") as f:
            input_ = sys.stdin.read()
            article = preprocess.parse(input_)
            print(f"{article}\tSUMMARY_STUB", file=f)

        with suppress_stdout_stderr():
            dataset = Dataset(filename, vocab, cfg)

        batch = next(dataset.generator(1, cfg.pointer))

        # don't enforce any min lengths (useful for short cmdline summaries")
        setattr(cfg, "min_summary_length", 1)
        bs = BeamSearch(model, cfg=cfg)
        summary = evaluate.batch_to_text(bs, batch)[0]
        print(f"SUMMARY:\n{summary}")
    finally:
        os.remove(filename)
Exemplo n.º 8
0
 def translate(self, docs):
     """Translate a batch of documents."""
     batch_size = docs.inp.size(0)
     spt_ids = self.spt_ids_C
     decode_strategy = BeamSearch(self.beam_size, batch_size, self.n_best,
                                  self.min_length, self.max_length, spt_ids,
                                  self.eos_mapping)
     return self._translate_batch_with_strategy(docs, decode_strategy)
 def beam_search(self,
                 initial_sequence,
                 forbid_movies=None,
                 temperature=1,
                 **kwargs):
     """
     Beam search sentence generation
     :param initial_sequence: list giving the initial sequence of tokens
     :param kwargs: additional parameters to pass to model forward pass (e.g. a conditioning context)
     :return:
     """
     beam_search = BeamSearch(self.beam_size, initial_sequence,
                              self.word2id["</s>"])
     beams = beam_search.beams
     for i in range(self.max_sequence_length):
         # compute probabilities for each beam
         probabilities = []
         for beam in beams:
             # add batch_dimension
             model_input = Variable(torch.LongTensor(
                 beam.sequence)).unsqueeze(0)
             if self.model.cuda_available:
                 model_input = model_input.cuda()
             beam_forbidden_movies = forbid_movies.union(
                 beam.mentioned_movies)
             prob = self.model(input=model_input,
                               lengths=[len(beam.sequence)],
                               log_probabilities=False,
                               forbid_movies=beam_forbidden_movies,
                               temperature=temperature,
                               **kwargs)
             # get probabilities for the next token to generate
             probabilities.append(prob[0, -1, :].cpu())
         # update beams
         beams = beam_search.search(probabilities,
                                    n_gram_block=self.n_gram_block)
         # replace movie names with the corresponding words
         for beam in beams:
             if beam.sequence[-1] > len(self.word2id):
                 # update the list of movies mentioned for preventing repeated recommendations
                 beam.mentioned_movies.add(beam.sequence[-1] -
                                           len(self.word2id))
                 beam.sequence[-1:] = replace_movie_with_words(
                     beam.sequence[-1], self.movieId2name, self.word2id)
     return beams
Exemplo n.º 10
0
 def __init__(self, encoder, decoder,
             decoding_style="greedy", special_tokens_dict=None,
             max_decoding_steps=128, beam_width=10):
     super().__init__()
     self.encoder = encoder
     self.decoder = decoder
     if decoding_style not in ["greedy", "beam_search"]:
         print(f"{decoding_style} is not allowed parameter")
         decoding_style = "greedy"
     self.decoding_style = decoding_style
     if special_tokens_dict is None:
         self.special_tokens_dict = {"<pad>":0, "<bos>":1, "<eos>":2, "<unk>":3}
     else:
         self.special_tokens_dict = special_tokens_dict
     self.max_decoding_steps = max_decoding_steps
     self._beam_search = BeamSearch(self.special_tokens_dict["<eos>"],
                                    max_decoding_steps,
                                    beam_width)
Exemplo n.º 11
0
def main():

    english = tokenize("data/100ktok.low.en")
    spanish = tokenize("data/100ktok.low.es")

    training_set, test_set, translated_set = get_datasets(english, spanish)
    translations = get_word_translations("3000_trans.txt")
    search = BeamSearch(training_set, translations)

    test_output = open('trans_beam.txt','w')
    true_output = open('trans_true.txt','w')

    for i in range(len(test_set)):
        print "Translating sentence", i, "..."
        test_output.write(' '.join(search.translate(test_set[i])) + "\n")
        true_output.write(' '.join(translated_set[i]) + "\n")

    test_output.close()
    true_output.close()
Exemplo n.º 12
0
def main():

    english = tokenize("data/100ktok.low.en")
    spanish = tokenize("data/100ktok.low.es")

    training_set, test_set, translated_set = get_datasets(english, spanish)
    translations = get_word_translations("3000_trans.txt")

    print "Original Sentence:", ' '.join(test_set[0])

    translator = DirectTrans(translations)
    print "Direct Translation:", ' '.join(translator.translate(test_set[0]))

    test_output = open('trans_beam.txt','w')
    true_output = open('trans_true.txt','w')

    search = BeamSearch(training_set, translations)
    print "Beam Translation:", ' '.join(search.translate(test_set[0]))
    print "True Translation:", ' '.join(translated_set[0])
Exemplo n.º 13
0
def train(dataset, dataval, y_max_length, steps_per_epoch, vocab, params):
    print("training")
    print(params.__dict__)
    # seq2seq = Seq2seq_attention(len(vocab), params)
    seq2seq = Seq2seq_attention(len(vocab),
                                params,
                                embedding_matrix=fasttext_embedding(
                                    params, sentences=None))
    if params.finetune:
        seq2seq.restore_checkpoint()
        seq2seq.encoder.embedding.trainable = True
        seq2seq.decoder.embedding.trainable = True
        seq2seq.decoder.fc1.trainable = True
    else:
        seq2seq.encoder.embedding.trainable = False
        seq2seq.decoder.embedding.trainable = False
        seq2seq.decoder.fc1.trainable = False
    # it = iter(dataval)
    # inp, out = next(it)
    beam_search = BeamSearch(seq2seq, params.beam_size, vocab.bos, vocab.eos,
                             y_max_length)
    # seq2seq.compare_input_output(
    #     inp, vocab, y_max_length, out, beam_search)
    seq2seq.summary()

    # seq2seq.encoder.summary()
    # seq2seq.decoder.summary()

    # def my_loss(truth, preds):
    #     return sum(tf_rouge_l(preds, truth, vocab.eos))
    def callback():
        print("train set:")
        it = iter(dataset.unbatch())
        for _ in range(3):
            inp, out = next(it)
            seq2seq.compare_input_output(inp, vocab, y_max_length, out,
                                         beam_search)
        print("validation set:")
        it = iter(dataval)
        for _ in range(3):
            inp, out = next(it)
            seq2seq.compare_input_output(inp, vocab, y_max_length, out,
                                         beam_search)

    # seq2seq.train_epoch(dataset, epochs, steps_per_epoch, vocab.bos,
    #                     restore_checkpoint=True, dataval=dataval, callback=None)
    seq2seq.train_epoch(dataset,
                        params.epochs,
                        steps_per_epoch,
                        vocab.bos,
                        y_max_length,
                        restore_checkpoint=params.restore,
                        dataval=dataval,
                        epoch_verbosity=params.epoch_verbosity,
                        callback=callback)
Exemplo n.º 14
0
    def generate(self, input_variable, batch_size):
        input_variable = input_variable.view(batch_size, -1)
        encoder_hidden = self.encoder.init_hidden(batch_size)
        encoder_outputs, encoder_hidden = self.encoder(input_variable,
                                                       encoder_hidden)

        decoder_hidden = encoder_hidden
        decoder_inputs = [(Variable(torch.LongTensor([[SOS_token]])),
                           decoder_hidden)]  # SOS
        beam = BeamSearch(self.vocab_size2, self.beam_size, decoder_hidden)
        # loop beam search
        for di in xrange(self.target_length):
            decoder_outputs = []
            for decoder_input, decoder_hidden in decoder_inputs:
                decoder_output, decoder_hidden, _ = self.decoder(
                    decoder_input, decoder_hidden,
                    encoder_outputs)  # SOS + Predict
                decoder_outputs.append((decoder_output, decoder_hidden))
            decoder_inputs = beam.beam_search(decoder_outputs)
        return beam.generate(self.generate_num)
Exemplo n.º 15
0
def eva_a_phi(phi):
    na, nnh, nh, nw = phi

    # choose a dataset to train (mscoco, flickr8k, flickr30k)
    dataset = 'mscoco'
    data_dir = osp.join(DATA_ROOT, dataset)

    from model.ra import Model
    # settings
    mb = 64  # mini-batch size
    lr = 0.0002  # learning rate
    # nh = 512  # size of LSTM's hidden size
    # nnh = 512  # hidden size of attention mlp
    # nw = 512  # size of word embedding vector
    # na = 512  # size of the region features after dimensionality reduction
    name = 'ra'  # model name, just setting it to 'ra' is ok. 'ra'='region attention'
    vocab_freq = 'freq5'  # use the vocabulary that filtered out words whose frequences are less than 5

    print '... loading data {}'.format(dataset)
    train_set = Reader(batch_size=mb, data_split='train', vocab_freq=vocab_freq, stage='train',
                       data_dir=data_dir, feature_file='features_30res.h5', topic_switch='off') # change 0, 1000, 82783
    valid_set = Reader(batch_size=1, data_split='val', vocab_freq=vocab_freq, stage='val',
                       data_dir=data_dir, feature_file='features_30res.h5',
                       caption_switch='off', topic_switch='off') # change 0, 10, 5000

    npatch, nimg = train_set.features.shape[1:]
    nout = len(train_set.vocab)
    save_dir = '{}-nnh{}-nh{}-nw{}-na{}-mb{}-V{}'.\
        format(dataset.lower(), nnh, nh, nw, na, mb, nout)
    save_dir = osp.join(SAVE_ROOT, save_dir)

    model_file, m = find_last_snapshot(save_dir, resume_training=False)
    os.system('cp model/ra.py {}/'.format(save_dir))
    logger = Logger(save_dir)
    logger.info('... building')
    model = Model(name=name, nimg=nimg, nnh=nnh, nh=nh, na=na, nw=nw, nout=nout, npatch=npatch, model_file=model_file)

    # start training
    bs = BeamSearch([model], beam_size=1, num_cadidates=100, max_length=20)
    best = train(model, bs, train_set, valid_set, save_dir, lr,
                 display=100, starting=m, endding=20, validation=2000, life=10, logger=logger) # change dis1,100; va 2,2000; life 0,10;
    average_models(best=best, L=6, model_dir=save_dir, model_name=name+'.h5') # L 1, 6

    # evaluation
    np.save('data_dir', data_dir)
    np.save('save_dir', save_dir)

    os.system('python valid_time.py')

    scores = np.load('scores.npy')
    running_time = np.load('running_time.npy')
    print 'cider:', scores[-1], 'B1-4,C:', scores, 'running time:', running_time

    return scores, running_time
Exemplo n.º 16
0
    def __init__(self, model, batch_reader, model_config, data_config, vocab,
                 data_loader):
        self.model = model
        self.batch_reader = batch_reader
        self.model_config = model_config
        self.data_config = data_config
        self.vocab = vocab
        self.data_loader = data_loader

        self.saver = tf.train.Saver()
        self.session = tf.Session(config=tf.ConfigProto(
            allow_soft_placement=True))

        self.restore_model_flag = self.restore_model()

        self.bs = BeamSearch(
            self.model, self.model_config.beam_size,
            self.data_loader.word_to_id(self.data_config.sentence_start),
            self.data_loader.word_to_id(self.data_config.sentence_end),
            self.model_config.abstract_length)
def transformers_generate_txt(txt, params):
    """
    txt- english sent
    """
    nlp = spacy.load('en_core_web_sm')

    txt = contractions.fix(txt)

    enc_input = tf.expand_dims(
        tf.constant([tokenize_eng[tok.text.lower()] for tok in nlp(txt)]), 0)

    depth = config['transformer']['dmodel'] / config['transformer']['num_heads']
    dmodel = config['transformer']['dmodel']
    num_blocks = config['transformer']['num_blocks']
    num_heads = config['transformer']['num_heads']

    transformer = Transformer(num_blocks=num_blocks,
                              dmodel=dmodel,
                              depth=depth,
                              num_heads=num_heads,
                              inp_vocab_size=config['dataloader']['eng_vocab'],
                              tar_vocab_size=config['dataloader']['ger_vocab'])

    trainer = TrainerTransformer(transformer, config)
    trainer.restore_checkpoint(config['transformer']['ckpt_dir'])

    dec_input = tf.reshape(tokenize_ger['<sos>'], (1, 1))
    att = []

    dec_seq_mask = unidirectional_input_mask(enc_input, dec_input)
    logits = transformer(enc_input, dec_input, dec_seq_mask=dec_seq_mask)

    # beam_search
    bs = BeamSearch(config['transformer']['k', 1], trainer.transformer)

    sents = bs.call(enc_input, logits, params.dec_max_len)

    output = [[detokenize_ger[idx] for idx in sent] for sent in sents]

    # att.append(attention_weights)
    return [" ".join(sent) for sent in output]
Exemplo n.º 18
0
def parse_options():
    parser = argparse.ArgumentParser()

    Train.add_parse_options(parser)
    Encoder.add_parse_options(parser)
    AttnDecoder.add_parse_options(parser)
    Seq2SeqModel.add_parse_options(parser)
    LMModel.add_parse_options(parser)
    BeamSearch.add_parse_options(parser)

    parser.add_argument("-dev",
                        default=False,
                        action="store_true",
                        help="Get dev set results using the last saved model")
    parser.add_argument("-test",
                        default=False,
                        action="store_true",
                        help="Get test results using the last saved model")
    args = parser.parse_args()
    args = vars(args)
    return process_args(args)
Exemplo n.º 19
0
def main():

    # T=5
    n_51 = Node(idx=51)
    n_52 = Node(idx=52)

    # T=4
    n_41 = Node(idx=41, next_nodes=[n_51])
    n_42 = Node(idx=42, next_nodes=[n_51])
    n_43 = Node(idx=43, next_nodes=[n_52])

    # T=3
    n_31 = Node(idx=31, next_nodes=[n_41, n_42, n_43])

    # T=2
    n_21 = Node(idx=21, next_nodes=[n_31])
    n_22 = Node(idx=22, next_nodes=[n_31])
    n_23 = Node(idx=23, next_nodes=[n_31])

    # T=1
    n_11 = Node(idx=11, next_nodes=[n_21, n_22])
    n_12 = Node(idx=12)
    n_13 = Node(idx=13, next_nodes=[n_23])

    # T=0
    n_root = Node(idx=0, next_nodes=[n_11, n_12, n_13])

    # Beam search
    n_beam = 3
    l_times = 5
    beam_search = BeamSearch(n_beam, l_times)
    seqs = beam_search.find(n_root)

    # Check nodes
    for i, seq in enumerate(seqs):
        print("Sequence {}".format(i))
        for n in seq:
            print(n.idx)
    def __init__(self, embedder, hidden_dim, num_layers, dropout_p, attention,
                 beam_width):
        super(LstmDecoder, self).__init__()
        self._embedder = embedder
        self._hidden_dim = hidden_dim
        self._num_layers = num_layers
        self._attention = attention
        decoder_input_dim = self._embedder.get_embed_dim()
        decoder_input_dim += self._hidden_dim  # for input-feeding

        self._lstm = StackedLSTM(decoder_input_dim, self._hidden_dim,
                                 num_layers, dropout_p)

        self._output_projection_layer = nn.Linear(
            self._hidden_dim, self._embedder.get_vocab_size())

        self._start_index = self._embedder.get_init_token_idx()
        self._eos_index = self._embedder.get_eos_token_idx()
        self._pad_index = self._embedder.get_pad_token_idx()
        self._max_decoding_steps = 100

        self._beam_search = BeamSearch(self._eos_index,
                                       self._max_decoding_steps, beam_width)
Exemplo n.º 21
0
def beamsearch_hamcycle(pred, W, beam_size=2):
    N = W.size(-1)
    batch_size = W.size(0)
    BS = BeamSearch(beam_size, batch_size, N)
    trans_probs = pred.gather(1, BS.get_current_state())
    for step in range(N-1):
        BS.advance(trans_probs, step + 1)
        trans_probs = pred.gather(1, BS.get_current_state())
    ends = torch.zeros(batch_size, 1).type(dtype_l)
    # extract paths
    Paths = BS.get_hyp(ends)
    # Compute cost of path
    Costs = compute_cost_path(Paths, W)
    return Costs, Paths
Exemplo n.º 22
0
    def __init__(self,
                 config,
                 train_vocab,
                 labels_vocab,
                 is_train=True,
                 use_attention=True,
                 beam_search=None):
        self.use_attention = use_attention
        self.beam_search = None
        if beam_search:
            self.beam_search = BeamSearch(self)

        self.config = config
        self.encoder_inputs = None
        self.decoder_inputs = None
        self.grad_norm = None
        self.train_vocab = train_vocab
        self.labels_vocab = labels_vocab

        self.build(is_train)
Exemplo n.º 23
0
 def __init__(self,
              config,
              train_vocab,
              is_train=True,
              use_attention=True,
              beam_search=False,
              bidirectional=True,
              pointer=True):
     self.use_attention = use_attention
     self.beam_search = None
     if beam_search:
         self.beam_search = BeamSearch(self)
     self.pointer = pointer
     self.bidirectional = bidirectional
     self.config = config
     self.config.vocab_size = len(train_vocab.tok2id) + 1
     self.encoder_inputs = None
     self.decoder_inputs = None
     self.grad_norm = None
     self.vocab = train_vocab
     self.build(is_train)
def generate_summaries(model,
                       dataset,
                       cfg,
                       limit=math.inf,
                       shuffle=False,
                       pbar=None):
    """
    Generate summaries using the given `model` on the given `dataset`.
    Expects the given model to be in eval mode.


    :param model: Use this model for evaluation
    :param dataset: The dataset to evaluate on
    :param cfg:
        The `Config` used for the given model from which we get
        info on whether it uses pointer generation or not.
    :param limit: Limit the pairs evaluated to this many
    :param shuffle: Whether to shuffle the dataset before yielding batches
    :param pbar: Optional pbar (tqdm) to update with progress
    """
    batch_size = 1  # beam_search currently only supports batch_size 1
    bs = BeamSearch(model, cfg=cfg)
    with torch.no_grad():
        generator = dataset.generator(batch_size, cfg.pointer, shuffle)
        references = []
        hypothesis = []
        for idx, batch in enumerate(generator):
            hyps = batch_to_text(bs, batch)
            refs = [" ".join(e.tgt) for e in batch.examples]
            hypothesis.extend(hyps)
            references.extend(refs)

            if batch_size * idx >= limit:
                break

            if pbar is not None:
                pbar.update(batch_size)

        pbar.close()
        return (hypothesis, references)
Exemplo n.º 25
0
def check_train_results(dataval, y_max_length, steps_per_epoch, vocab, params):
    print("check_train_results")
    seq2seq = Seq2seq_attention(
        len(vocab), params, embedding_matrix=fasttext_embedding(params, sentences=None))
    it = iter(dataval)
    inp, out = next(it)
    beam_search = BeamSearch(seq2seq, 9, vocab.bos,
                             vocab.eos, y_max_length)
    # seq2seq.compare_input_output(inp, vocab, y_max_length, out, beam_search)
    seq2seq.summary()

    def callback():
        for _ in range(10):
            inp, out = next(it)
            seq2seq.compare_input_output(
                inp, vocab, y_max_length, out, beam_search)

    # seq2seq.train_epoch(dataset, 5, steps_per_epoch, vocab.bos,
    #                     restore_checkpoint=True, dataval=dataval, callback=None)
    # seq2seq.train_epoch(dataset, 5, steps_per_epoch, vocab.bos,
    #                     restore_checkpoint=True, dataval=dataval, callback=callback)
    seq2seq.restore_checkpoint()
    callback()
Exemplo n.º 26
0
    def _beam_search_decoding(self, imgs, beam_size):
        B = imgs.size(0)
        # use batch_size*beam_size as new Batch
        imgs = tile(imgs, beam_size, dim=0)
        enc_outs, hiddens = self.model.encode(imgs)
        dec_states, O_t = self.model.init_decoder(enc_outs, hiddens)

        new_B = imgs.size(0)
        # first decoding step's input
        tgt = torch.ones(new_B, 1).long() * START_TOKEN
        beam = BeamSearch(beam_size, B)
        for t in range(self.max_len):
            tgt = beam.current_predictions.unsqueeze(1)
            dec_states, O_t, probs = self.step_decoding(
                dec_states, O_t, enc_outs, tgt)
            log_probs = torch.log(probs)

            beam.advance(log_probs)
            any_beam_is_finished = beam.is_finished.any()
            if any_beam_is_finished:
                beam.update_finished()
                if beam.done:
                    break

            select_indices = beam.current_origin
            if any_beam_is_finished:
                # Reorder states
                h, c = dec_states
                h = h.index_select(0, select_indices)
                c = c.index_select(0, select_indices)
                dec_states = (h, c)
                O_t = O_t.index_select(0, select_indices)
        # get results
        formulas_idx = torch.stack([hyps[1] for hyps in beam.hypotheses],
                                   dim=0)
        results = self._idx2formulas(formulas_idx)
        return results
Exemplo n.º 27
0
    def generate_beamsearch(self,
                            src: torch.Tensor,
                            maxlen: int,
                            bos_index: int,
                            pad_index: int,
                            unk_index: int,
                            eos_index: int,
                            vocab_size: int,
                            beam_size: int = 3,
                            no_repeat_ngram_size: int = 0):
        # Obtain device information
        device = next(self.parameters()).device
        _, batch_size = src.shape
        src_key_padding_mask = (src == pad_index).T  # batch_size x srclen
        memory = self.encode(src, src_key_padding_mask=src_key_padding_mask)

        # <BOS> tgt seq for generation
        tgt = torch.LongTensor(maxlen, batch_size,
                               beam_size).fill_(pad_index).to(device)
        tgt[0, :, :] = torch.LongTensor(batch_size,
                                        beam_size).fill_(bos_index).to(device)
        scores = torch.zeros(batch_size, beam_size, maxlen).to(device)
        scores[:, :, 0] = torch.ones(batch_size, beam_size).to(device)
        active_beams = [0]  # up to beam_size beams.
        search = BeamSearch(vocab_size, pad_index, unk_index, eos_index)
        ngram_blocking = NgramBlocking(no_repeat_ngram_size)

        # After eos
        log_probs_after_eos = torch.FloatTensor(batch_size, beam_size,
                                                self.out_vocab_size).fill_(
                                                    float("-inf")).cpu()
        log_probs_after_eos[:, :, eos_index] = 0.
        best_n_indices = tgt.new_full((batch_size, len(active_beams)),
                                      bos_index)

        for i in range(1, maxlen):
            if (best_n_indices == eos_index).all(
            ):  # if all of last prediction is eos, we can leave the loop
                break

            # Generate probability for all beams, update probability for all beams (lprobs).
            lprobs = torch.zeros(batch_size, len(active_beams),
                                 vocab_size).to(device)
            for j in range(len(active_beams)):
                tgt_key_padding_mask = (tgt[:i, :,
                                            active_beams[j]] == pad_index
                                        ).T  # batch_size x len(tgt)
                tgt_mask = self.transformer.generate_square_subsequent_mask(
                    i).to(device)
                decode_prob = self.decode(
                    tgt[:i, :, active_beams[j]],
                    memory,
                    tgt_mask=tgt_mask,
                    tgt_key_padding_mask=tgt_key_padding_mask)
                pred_prob = self.linear(decode_prob)
                lprobs[:, j, :] = pred_prob[-1, :]

            # Update lprobs for n-gram blocking
            if no_repeat_ngram_size > 0:
                for batch_idx in range(batch_size):
                    for beam_idx in range(len(active_beams)):
                        lprobs[batch_idx, beam_idx] = ngram_blocking.update(
                            i - 1, tgt[:i, batch_idx, beam_idx],
                            lprobs[batch_idx, beam_idx])

            expanded_indices = best_n_indices.detach().cpu().unsqueeze(
                -1).expand(
                    (batch_size, len(active_beams), self.out_vocab_size))
            clean_lprobs = torch.where(
                expanded_indices == eos_index,
                log_probs_after_eos[:, :len(active_beams)],
                F.log_softmax(lprobs.detach().cpu(), dim=-1))
            # Run the beam search step and select the top-k beams.
            best_n_scores, best_n_indices, best_n_beams = search.step(
                i, clean_lprobs,
                scores.index_select(1, torch.tensor(
                    active_beams, device=device)).detach().cpu(), beam_size)

            # Take the top results, more optimization can be done here, e.g., avoid <eos> beams.
            best_n_scores = best_n_scores[:, :beam_size]
            best_n_indices = best_n_indices[:, :beam_size]
            best_n_beams = best_n_beams[:, :beam_size]

            # update results
            tgt = tgt.gather(
                2,
                best_n_beams.unsqueeze(0).expand(maxlen, batch_size,
                                                 -1).to(device))
            tgt[i, :, :] = best_n_indices
            scores[:, :, i] = best_n_scores
            active_beams = range(beam_size)

        return tgt[:, :, 0]
Exemplo n.º 28
0
    def generate(self, title='晚安', genre=3):
        if (self.model is None):
            raise Exception("has no model")

        temperature = 1
        topk = 15

        context_tokens = []
        assert genre in [0, 1, 2, 3]

        text_genre_list = ['五言绝句', '七言绝句', '五言律诗', '七言律诗']
        genre_code_list = ['wuyanjue', 'qiyanjue', 'wuyanlv', 'qiyanlv']

        text_genre = text_genre_list[genre]
        genre_code = genre_code_list[genre]

        ids = self.title_to_ids[text_genre]
        context_tokens.append(ids)

        context_tokens.append(100)
        context_tokens.extend(
            self.tokenizer.convert_tokens_to_ids(
                self.tokenizer.tokenize(title)))
        context_tokens.append(4282)  # 4282 is #

        out = None
        while out is None:
            #generator = CheckedGenerator(model=self.model,
            #context=context_tokens,
            #tokenizer=self.tokenizer,
            #checker=self.checker,
            #genre=genre_code,
            #temperature=temperature,
            #top_k=topk, device=self.device)

            # BaseGenerator
            #generator = BaseGenerator(model=self.model,
            #context=context_tokens,
            #tokenizer=self.tokenizer,
            #temperature=temperature,
            #top_k=topk,
            #device=self.device)
            #out = generator.sample_sequence()

            # BeamSearch
            generator = BeamSearch(model=self.model,
                                   context=context_tokens,
                                   tokenizer=self.tokenizer,
                                   temperature=temperature,
                                   beam_size=3,
                                   mode=2,
                                   genre=genre,
                                   top_k=topk,
                                   device=self.device)
            out = generator.beam_sequence()

        out = out.tolist()

        text = self.tokenizer.convert_ids_to_tokens(out[0])
        text = text[:-1]
        text = ''.join(text)
        text = text.split('#')[-1]
        return text
Exemplo n.º 29
0
logging.info('# init data')
training_iter = train_batches.make_one_shot_iterator()
val_iter = eval_batches.make_initializable_iterator()

logging.info("# Load model")
m = Transformer(hp)

# get op
loss, train_op, global_step, train_summaries = m.train(xs, ys)
y_hat, eval_summaries = m.eval(xs, ys)

token2idx, idx2token = _load_vocab(hp.vocab)

bs = BeamSearch(m, hp.beam_size,
                list(idx2token.keys())[2],
                list(idx2token.keys())[3], idx2token, hp.maxlen2, m.x,
                m.decoder_inputs, m.logits)

logging.info("# Session")
saver = tf.train.Saver(max_to_keep=hp.num_epochs)
with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
    ckpt = tf.train.latest_checkpoint(hp.logdir)
    if ckpt is None:
        logging.info("Initializing from scratch")
        sess.run(tf.global_variables_initializer())
        save_variable_specs(os.path.join(hp.logdir, "specs"))
    else:
        saver.restore(sess, ckpt)

    summary_writer = tf.summary.FileWriter(hp.logdir, sess.graph)
Exemplo n.º 30
0
    def beam_search(enc_output, enc_bias, source_length):
        """
            beam_search
        """
        max_len = layers.fill_constant(
            shape=[1], dtype='int64', value=max_out_len)
        step_idx = layers.fill_constant(
            shape=[1], dtype='int64', value=0)
        cond = layers.less_than(x=step_idx, y=max_len)
        while_op = layers.While(cond)

        caches_batch_size = batch_size * beam_size
        init_score = np.zeros([1, beam_size]).astype('float32')
        init_score[:, 1:] = -INF
        initial_log_probs = layers.assign(init_score)

        alive_log_probs = layers.expand(initial_log_probs, [batch_size, 1])
        # alive seq [batch_size, beam_size, 1]
        initial_ids = layers.zeros([batch_size, 1, 1], 'float32')
        alive_seq = layers.expand(initial_ids, [1, beam_size, 1]) 
        alive_seq = layers.cast(alive_seq, 'int64')

        enc_output = layers.unsqueeze(enc_output, axes=[1])
        enc_output = layers.expand(enc_output, [1, beam_size, 1, 1])
        enc_output = layers.reshape(enc_output, [caches_batch_size, -1, d_model])

        tgt_src_attn_bias = layers.unsqueeze(enc_bias, axes=[1])
        tgt_src_attn_bias = layers.expand(tgt_src_attn_bias, [1, beam_size, n_head, 1, 1]) 
        enc_bias_shape = layers.shape(tgt_src_attn_bias)
        tgt_src_attn_bias = layers.reshape(tgt_src_attn_bias, [-1, enc_bias_shape[2], 
                enc_bias_shape[3], enc_bias_shape[4]])
            
        beam_search = BeamSearch(beam_size, batch_size, decode_alpha, trg_vocab_size, d_model)

        caches = [{
            "k": layers.fill_constant(
                shape=[caches_batch_size, 0, d_model],
                dtype=enc_output.dtype,
                value=0),
            "v": layers.fill_constant(
                shape=[caches_batch_size, 0, d_model],
                dtype=enc_output.dtype,
                value=0)
        } for i in range(n_layer)]
        
        finished_seq = layers.zeros_like(alive_seq)
        finished_scores = layers.fill_constant([batch_size, beam_size], 
                                                dtype='float32', value=-INF)
        finished_flags = layers.fill_constant([batch_size, beam_size], 
                                                dtype='float32', value=0)
        
        with while_op.block():
            pos = layers.fill_constant([caches_batch_size, 1, 1], dtype='int64', value=1)
            pos = layers.elementwise_mul(pos, step_idx, axis=0)

            alive_seq_1 = layers.reshape(alive_seq, [caches_batch_size, -1])
            alive_seq_2 = alive_seq_1[:, -1:] 
            alive_seq_2 = layers.unsqueeze(alive_seq_2, axes=[1])
 
            logits = wrap_decoder(
                trg_vocab_size, max_in_len, n_layer, n_head, d_key,
                d_value, d_model, d_inner_hid, prepostprocess_dropout,
                attention_dropout, relu_dropout, preprocess_cmd,
                postprocess_cmd, weight_sharing, embedding_sharing,
                dec_inputs=(alive_seq_2, alive_seq_2, pos, None, tgt_src_attn_bias),
                enc_output=enc_output, caches=caches, is_train=False, params_type=params_type)

            alive_seq_2, alive_log_probs_2, finished_seq_2, finished_scores_2, finished_flags_2, caches_2 = \
                    beam_search.inner_func(step_idx, logits, alive_seq_1, alive_log_probs, finished_seq, 
                                           finished_scores, finished_flags, caches, enc_output, 
                                           tgt_src_attn_bias)
            
            layers.increment(x=step_idx, value=1.0, in_place=True)
            finish_cond = beam_search.is_finished(step_idx, source_length, alive_log_probs_2, 
                                                  finished_scores_2, finished_flags_2) 

            layers.assign(alive_seq_2, alive_seq)
            layers.assign(alive_log_probs_2, alive_log_probs)
            layers.assign(finished_seq_2, finished_seq)
            layers.assign(finished_scores_2, finished_scores)
            layers.assign(finished_flags_2, finished_flags)

            for i in xrange(len(caches_2)):
                layers.assign(caches_2[i]["k"], caches[i]["k"])
                layers.assign(caches_2[i]["v"], caches[i]["v"])

            layers.logical_and(x=cond, y=finish_cond, out=cond)

        finished_flags = layers.reduce_sum(finished_flags, dim=1, keep_dim=True) / beam_size
        finished_flags = layers.cast(finished_flags, 'bool')
        mask = layers.cast(layers.reduce_any(input=finished_flags, dim=1, keep_dim=True), 'float32')
        mask = layers.expand(mask, [1, beam_size])

        mask2 = 1.0 - mask
        finished_seq = layers.cast(finished_seq, 'float32')
        alive_seq = layers.cast(alive_seq, 'float32')
        #print mask

        finished_seq = layers.elementwise_mul(finished_seq, mask, axis=0) + \
                        layers.elementwise_mul(alive_seq, mask2, axis = 0)
        finished_seq = layers.cast(finished_seq, 'int32')
        finished_scores = layers.elementwise_mul(finished_scores, mask, axis=0) + \
                            layers.elementwise_mul(alive_log_probs, mask2)
        finished_seq.persistable = True
        finished_scores.persistable = True

        return finished_seq, finished_scores
Exemplo n.º 31
0
class Prediction:
    def __init__(self, args):
        """
        :param model_dir: model dir path
        :param vocab_file: vocab file path
        """
        self.tf = import_tf(0)

        self.args = args
        self.model_dir = args.logdir
        self.vocab_file = args.vocab
        self.token2idx, self.idx2token = _load_vocab(args.vocab)

        hparams = Hparams()
        parser = hparams.parser
        self.hp = parser.parse_args()

        self.model = Transformer(self.hp)

        self._add_placeholder()
        self._init_graph()

    def _init_graph(self):
        """
        init graph
        """
        self.ys = (self.input_y, None, None)
        self.xs = (self.input_x, None)
        self.memory = self.model.encode(self.xs, False)[0]
        self.logits = self.model.decode(self.xs, self.ys, self.memory, False)[0]

        ckpt = self.tf.train.get_checkpoint_state(self.model_dir).all_model_checkpoint_paths[-1]

        graph = self.logits.graph
        sess_config = self.tf.ConfigProto(allow_soft_placement=True)
        sess_config.gpu_options.allow_growth = True

        saver = self.tf.train.Saver()
        self.sess = self.tf.Session(config=sess_config, graph=graph)

        self.sess.run(self.tf.global_variables_initializer())
        self.tf.reset_default_graph()
        saver.restore(self.sess, ckpt)

        self.bs = BeamSearch(self.model,
                             self.hp.beam_size,
                             list(self.idx2token.keys())[2],
                             list(self.idx2token.keys())[3],
                             self.idx2token,
                             self.hp.maxlen2,
                             self.input_x,
                             self.input_y,
                             self.logits)

    def predict(self, content):
        """
        abstract prediction by beam search
        :param content: article content
        :return: prediction result
        """
        input_x = content.split()
        while len(input_x) < self.args.maxlen1: input_x.append('<pad>')
        input_x = input_x[:self.args.maxlen1]

        input_x = [self.token2idx.get(s, self.token2idx['<unk>']) for s in input_x]

        memory = self.sess.run(self.memory, feed_dict={self.input_x: [input_x]})

        return self.bs.search(self.sess, input_x, memory[0])

    def _add_placeholder(self):
        """
        add tensorflow placeholder
        """
        self.input_x = self.tf.placeholder(dtype=self.tf.int32, shape=[None, self.args.maxlen1], name='input_x')
        self.input_y = self.tf.placeholder(dtype=self.tf.int32, shape=[None, None], name='input_y')
Exemplo n.º 32
0
        from model.gnic import Model
        model_list = ['mscoco-gnic-nh512-nw512-mb64-V8843/gnic.h5.merge']
        models = [Model(model_file=osp.join(SAVE_ROOT, m)) for m in model_list]

        valid_set = Reader(batch_size=1,
                           data_split='test',
                           vocab_freq='freq5',
                           stage='test',
                           data_dir=data_dir,
                           feature_file='features_1res.h5',
                           caption_switch='off',
                           topic_switch='off',
                           head=0,
                           tail=1000)

        bs = BeamSearch(models, beam_size=3, num_cadidates=500, max_length=20)
        scores = validate(bs, valid_set)

    if task == 'ss':
        from model.ss import Model
        model_list = ['mscoco-nh512-nw512-mb64-V8843/ss.h5.merge']
        models = [Model(model_file=osp.join(SAVE_ROOT, m)) for m in model_list]

        valid_set = Reader(batch_size=1,
                           data_split='test',
                           vocab_freq='freq5',
                           stage='val',
                           data_dir=data_dir,
                           feature_file='features_1res.h5',
                           topic_type='pred',
                           topic_file='lda_topics.h5',
Exemplo n.º 33
0
 def __init__(self):
     self.rev_vocab = self.load_char_vocab()
     self.beam_search = BeamSearch(self.ckpt_file, self.rev_vocab)