def draft_summary_beam_search(input_ids, enc_output, dec_padding_mask, beam_size): log.info(f"Building: 'Draft beam search decoder'") input_ids = tfa.seq2seq.tile_batch(input_ids, multiplier=beam_size) enc_output = tfa.seq2seq.tile_batch(enc_output, multiplier=beam_size) dec_padding_mask = tfa.seq2seq.tile_batch(dec_padding_mask, multiplier=beam_size) #print(f'output_before {tf.shape(output)}') def beam_search_decoder(output): # (batch_size, seq_len, d_bert) embeddings = model.embedding(output) predictions, dec_op, attention_weights = model.decoder( embeddings, enc_output, False, None, dec_padding_mask) if config.copy_gen: predictions = model.decoder.pointer_generator( dec_op[:, -1:, :], predictions[:, -1:, :], attention_weights[:, :, -1:, :], input_ids, tf.shape(input_ids)[1], tf.shape(predictions[:, -1:, :])[1], training=False, ) # (batch_size, 1, target_vocab_size) return (predictions[:, -1:, :]) return beam_search(beam_search_decoder, [CLS_ID] * h_parms.batch_size, beam_size, config.summ_length, config.input_vocab_size, h_parms.length_penalty, stop_early=False, eos_id=[[SEP_ID]])
def beam_search_eval(document, beam_size): start = [tokenizer.vocab_size] end = [tokenizer.vocab_size + 1] encoder_input = tf.tile(document, multiples=[beam_size, 1]) batch, inp_shape = encoder_input.shape def decoder_query(output): enc_padding_mask, combined_mask, dec_padding_mask = create_masks( encoder_input, output) predictions, attention_weights, dec_output = model( encoder_input, output, enc_padding_mask, combined_mask, dec_padding_mask, False) # (batch_size, 1, target_vocab_size) return (predictions[:, -1:, :]) return beam_search(decoder_query, start, beam_size, config.summ_length, config.input_vocab_size, h_parms.length_penalty, stop_early=True, eos_id=[end])
def validate(model, model_type, dataloader, device, w2i, i2w, data_mode, max_length=15, beam_size=3): gts_dict = {} hyps_dict = {} bad_count = 0 for i, (image, texts) in tqdm(enumerate(dataloader), total=len(dataloader)): hyps = beam_search(model, model_type, image, w2i, i2w, device, max_length, beam_size, data_mode) if len(hyps) == 0: bad_count += 1 continue hyp = hyps[0][1:] if hyp[-1] == w2i['<END>']: hyp = hyp[:-1] hyp = ' '.join([i2w[word] for word in hyp.tolist()]) gts_dict[i] = texts[0] # Temporary hyps_dict[i] = [hyp] if len(list(gts_dict.keys())) == 0: print('Bad validation') return {} print('Bad hypothesis count: ', bad_count) return evaluation.compute_scores(gts_dict, hyps_dict)
def predict_beam_search(beam_width): print("beam-search predicting with %s..." % model_stamp) with torch.no_grad(): global net, test_loader net.eval() prediction = [] for batch_idx, (utter, utter_len, seq_order) in enumerate(tqdm(test_loader)): utter, utter_len = utter.cuda(), utter_len.cuda() batch_size = utter.shape[0] (listener_hiddens, (listener_h, listener_c)), listener_len = net.listener(utter=utter, utter_len=utter_len) seqs = [] for b in range(batch_size): seq, attention_weights = beam_search( init_state=net.speller.init_state, time_step=net.speller.time_step, listener_hiddens=listener_hiddens[ b, :listener_len[b]].unsqueeze(0), listener_h=listener_h[b].unsqueeze(0), listener_c=listener_c[b].unsqueeze(0), beam_width=beam_width, max_len=int(utter_len[b] / 6)) seqs.append(seq) plt.imshow(attention_weights.cpu().numpy(), interpolation='nearest', cmap='hot') plt.savefig("result/%s_aw_test_b%d.png" % (model_stamp, batch_idx)) seqs = np.array(seqs) seqs = seqs[seq_order] prediction += seqs.tolist() return prediction
def ae_latent_sample_beam(latents_dense_in, inputs, ed, embed, hparams): """Sample from the latent space in the autoencoder.""" def symbols_to_logits_fn(ids): """Go from ids to logits.""" latents_discrete = tf.pad( ids[:, 1:], [[0, 0], [0, 1] ]) # prepare to be right-shifted in 'decode_transformer' #latents_discrete = tf.Print(latents_discrete, [tf.shape(latents_discrete), latents_discrete]) with tf.variable_scope(tf.get_variable_scope(), reuse=False): latents_dense = embed( tf.one_hot(latents_discrete, depth=2**hparams.bottleneck_bits)) latents_pred = decode_transformer(inputs, ed, latents_dense, hparams, "extra") logits = tf.layers.dense(latents_pred, 2**hparams.bottleneck_bits, name="extra_logits") current_output_position = commons.shape_list(ids)[1] - 1 logits = logits[:, current_output_position, :] return logits initial_ids = tf.zeros([tf.shape(latents_dense_in)[0]], dtype=tf.int32) length = tf.shape(latents_dense_in)[1] ids, _, _ = beam_search.beam_search(symbols_to_logits_fn, initial_ids, beam_size=1, decode_length=length, vocab_size=2**hparams.bottleneck_bits, alpha=0.0, eos_id=-1, stop_early=False) res = ids[:, 0, :] # Pick first beam. return res[:, 1:] # Remove the added all-zeros from ids.
def evaluate(text): with open('input_tokenizer.pickle', 'rb') as handle: input_tokenizer = pickle.load(handle) with open('output_tokenizer.pickle', 'rb') as handle: output_tokenizer = pickle.load(handle) input_vocab_size = len(input_tokenizer.word_index) + 1 output_vocab_size = len(output_tokenizer.word_index) + 1 text = preprocess_text(text) seq = input_tokenizer.texts_to_sequences([text]) inputs = tf.keras.preprocessing.sequence.pad_sequences(seq, truncating='post', padding='post') inputs = tf.convert_to_tensor(inputs) result = "" encoder = Encoder(input_vocab_size, constants.embedding_dim, constants.units, constants.BATCH_SIZE) decoder = Decoder(output_vocab_size, constants.embedding_dim, constants.units, constants.BATCH_SIZE) checkpoint_dir = './checkpoints' checkpoint = tf.train.Checkpoint(encoder=encoder, decoder=decoder) checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir)) enc_outputs, enc_hidden = encoder(inputs) dec_hidden = enc_hidden dec_input = tf.expand_dims([output_tokenizer.word_index['<start>']], 0) result = beam_search(constants.beam_width, decoder, dec_input, dec_hidden, enc_outputs, output_tokenizer.word_index['<end>'], output_vocab_size) result = output_tokenizer.sequences_to_texts([result]) print(result[0])
def draft_summary_beam_search(model, input_ids, enc_output, dec_padding_mask, beam_size): log.info(f"Building: 'Draft beam search decoder'") input_ids = tfa.seq2seq.tile_batch(input_ids, multiplier=beam_size) enc_output = tfa.seq2seq.tile_batch(enc_output, multiplier=beam_size) dec_padding_mask = tfa.seq2seq.tile_batch(dec_padding_mask, multiplier=beam_size) def beam_search_decoder(output): # (batch_size, seq_len, d_bert) embeddings = model.embedding(output) predictions, attention_weights = model.decoder(input_ids, embeddings, enc_output, False, None, dec_padding_mask) # (batch_size, 1, target_vocab_size) return (predictions[:, -1:, :]) return beam_search(beam_search_decoder, [CLS_ID] * h_parms.batch_size, beam_size, config.summ_length, config.input_vocab_size, h_parms.length_penalty, stop_early=False, eos_id=[[SEP_ID]])
def evaluate(path): with open('tokenizer.pickle', 'rb') as handle: tokenizer = pickle.load(handle) vocab_size = len(tokenizer.word_index) + 1 encoder = Encoder(config.embedding_dim) decoder = Decoder(config.units, config.embedding_dim, vocab_size) checkpoint_dir = './checkpoints' checkpoint = tf.train.Checkpoint(encoder=encoder, decoder=decoder) checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir)) image = load_image(path) encoder_outputs = encoder(tf.expand_dims(image, 0)) dec_state = tf.zeros((1, config.units)) dec_input = tf.expand_dims([tokenizer.word_index['<start>']], 0) result = result = beam_search(config.beam_width, decoder, dec_input, dec_state, encoder_outputs, tokenizer.word_index['<end>'], vocab_size) result = tokenizer.sequences_to_texts([result]) print(result)
def draft_decoded_summary(model, input_ids, target_ids, beam_size): batch = tf.shape(input_ids)[0] start = [101] * batch end = [102] # (batch_size, seq_len, d_bert) enc_output_ = model.bert_model(input_ids)[0] enc_output = tf.tile(enc_output_, multiples=[beam_size, 1, 1]) input_ids = tf.tile(input_ids, multiples=[beam_size, 1]) # (batch_size, 1, 1, seq_len), (_), (batch_size, 1, 1, seq_len) def beam_search_decoder(target_ids): _, combined_mask, dec_padding_mask = create_masks( input_ids, target_ids) draft_logits, _ = model.draft_summary(input_ids=input_ids, enc_output=enc_output, look_ahead_mask=combined_mask, padding_mask=dec_padding_mask, target_ids=target_ids, training=False) # (batch_size, 1, target_vocab_size) return (draft_logits[:, -1:, :]) return (beam_search(beam_search_decoder, start, beam_size, config.target_seq_length, config.input_vocab_size, config.length_penalty, stop_early=True, eos_id=[end]), enc_output_)
def beam_search_eval(inp_sentences, beam_size): start = [tokenizer_en.vocab_size] * len(inp_sentences) end = [tokenizer_en.vocab_size + 1] inp_sentences = [tokenizer_en.encode(i) for i in inp_sentences] encoder_input = tf.tile(inp_sentences, multiples=[beam_size, 1]) batch, inp_shape = encoder_input.shape def transformer_query(output): enc_padding_mask, combined_mask, dec_padding_mask = create_masks( encoder_input, output) #print(output.shape[1]) predictions, attention_weights, dec_output = transformer( encoder_input, output, False, enc_padding_mask, combined_mask, dec_padding_mask) if config.copy_gen: predictions = generator(dec_output, predictions, attention_weights, encoder_input, inp_shape, output.shape[-1], batch, False) # select the last sequence return (predictions[:, -1:, :]) # (batch_size, 1, target_vocab_size) return beam_search(transformer_query, start, beam_size, config.summ_length, config.target_vocab_size, 0.6, stop_early=False, eos_id=[end])
def generate(model, val_iter, TRG_TEXT, k=10, max_len=100, gpu=True): """ Generates top k best sentences given trained model. """ bos = TRG_TEXT.vocab.stoi['<s>'] eos = TRG_TEXT.vocab.stoi['</s>'] pad = TRG_TEXT.vocab.stoi['<pad>'] filter_token = [pad] output = [] for batch in tqdm(val_iter): trg = batch.trg src = batch.src for i in range(src.size(1)): src_sent = src[:, i:i + 1] best_options = beam_search.beam_search(model, src_sent, bos, eos, k, max_len, filter_token, gpu) sentence_trg = "" sentence_src = "" sentence = [] for word in best_options[0][1]: sentence += [TRG_TEXT.vocab.itos[word]] sentence_src += TRG_TEXT.vocab.itos[word] + " " for word in trg[:, i]: sentence_trg += TRG_TEXT.vocab.itos[word] + " " # print(sentence_src + " | " + sentence_trg) output.append(sentence) return output
def decode(): with tf.Session() as sess: beam_size = FLAGS.beam_size if_beam_search = FLAGS.if_beam_search model = create_model(sess, True, beam_search=if_beam_search, beam_size=beam_size) model.batch_size = 1 data_path = DATA_PATH word2id, id2word, trainingSamples = load_dataset(data_path) if if_beam_search: sys.stdout.write("> ") sys.stdout.flush() sentence = sys.stdin.readline() while sentence: recos = beam_search(sess, sentence=sentence, word2id=word2id, id2word=id2word, model=model) print("Replies --------------------------------------->") print(recos) sys.stdout.write("> ") sys.stdout.flush() sentence = sys.stdin.readline()
def evaluate(): if request.method == 'POST': start = time.perf_counter() try: var_sample: VarNamingSample = request.get_json(force=True, cache=False) gt = var_sample["name"] tensorized_sample = model.tensorize(var_sample) minibatch = model.initialize_minibatch() model.extend_minibatch_with(tensorized_sample, minibatch) minibatch = model.finalize_minibatch(minibatch, device=DEVICE) minibatch["target"]["token_idxs"] = minibatch["target"]["token_idxs"][:1] predictions = beam_search(nn, **minibatch) predictions = list(map(lambda prediction: { "name": [model.vocabulary.id_to_token[ token_idx] for token_idx in prediction[1].token_idxs.squeeze()][1: -1], "p": prediction[1].p }, predictions)) except Exception: return time_spent = time.perf_counter() - start global last_inference last_inference = LastInference(predictions, gt, time_spent) return json.dumps({"predictions": last_inference.predictions, "gnnEvaluationTime": last_inference.time_spent})
def beam_search_eval(document, summary, beam_size): batch, sum_seq_len = summary.shape #print(f'sum_seq_len here {sum_seq_len}') start = [tokenizer.vocab_size] * batch end = [tokenizer.vocab_size+1] doc_input = tf.tile(document, multiples=[beam_size, 1]) with tf.GradientTape() as tape: def decoder_query(output): enc_padding_mask, combined_mask, dec_padding_mask = create_masks( doc_input, output ) predictions, attention_weights, dec_output = model( doc_input, output, enc_padding_mask, combined_mask, dec_padding_mask, True ) return (predictions[:,-1:,:]) return ((beam_search( decoder_query, start, beam_size, decode_length=sum_seq_len, vocab_size=config.input_vocab_size, alpha=h_parms.length_penalty, stop_early=False, eos_id=[end] )), tape)
def beam_test(): models = ["124M", "1558M"] context = "The next part is" length = 20 for model in models: starting_beam = 6 ending_beam = 8 beam_incr = 1 for i in range(starting_beam, ending_beam, beam_incr): beam_search(model_name=model, input_samples=[context], beam_width=i, length=length) print("Finished for model: " + model + "\n\n")
def make_name_beam(model, vocab, hps): """ Beam Search """ best_seq = beam_search(model, vocab, hps) chars = [vocab.id2char(t) for t in best_seq.tokens[1:]] tokens = [t if t != '\s' else ' ' for t in chars] tokens = ''.join(tokens) print(tokens)
def get_arcs(self, document): from covington_transistion import Configuration arcs = [] logger = logging.getLogger('progress_logger') for paragraph in range(document.get_amount_of_paragraphs()): entities = document.get_entities(paragraph=paragraph) logger.info("Paragraph {p}, entities {e}".format(p=paragraph, e=len(entities))) if entities: config = Configuration(entities, document) best_end = beam_search(config, self.network) arcs.extend(best_end.configuration.get_arcs()) return arcs
def evaluate_batch(self, article): self.setup_valid() batch = self.batcher.next_batch() start_id = self.vocab.word2id(data.START_DECODING) end_id = self.vocab.word2id(data.STOP_DECODING) unk_id = self.vocab.word2id(data.UNKNOWN_TOKEN) decoded_sents = [] ref_sents = [] article_sents = [] rouge = Rouge() while batch is not None: enc_batch, enc_lens, enc_padding_mask, enc_batch_extend_vocab, extra_zeros, ct_e = get_enc_data( batch) with T.autograd.no_grad(): enc_batch = self.model.embeds(enc_batch) enc_out, enc_hidden = self.model.encoder(enc_batch, enc_lens) #-----------------------Summarization---------------------------------------------------- with T.autograd.no_grad(): pred_ids = beam_search(enc_hidden, enc_out, enc_padding_mask, ct_e, extra_zeros, enc_batch_extend_vocab, self.model, start_id, end_id, unk_id) for i in range(len(pred_ids)): decoded_words = data.outputids2words(pred_ids[i], self.vocab, batch.art_oovs[i]) if len(decoded_words) < 2: decoded_words = "xxx" else: decoded_words = " ".join(decoded_words) decoded_sents.append(decoded_words) abstract = batch.original_abstracts[i] article = batch.original_articles[i] ref_sents.append(abstract) article_sents.append(article) batch = self.batcher.next_batch() load_file = self.opt.load_model if article: self.print_original_predicted(decoded_sents, ref_sents, article_sents, load_file) scores = rouge.get_scores(decoded_sents, ref_sents) rouge_1 = sum([x["rouge-1"]["f"] for x in scores]) / len(scores) rouge_2 = sum([x["rouge-2"]["f"] for x in scores]) / len(scores) rouge_l = sum([x["rouge-l"]["f"] for x in scores]) / len(scores) logger.info(load_file + " rouge_1:" + "%.4f" % rouge_1 + " rouge_2:" + "%.4f" % rouge_2 + " rouge_l:" + "%.4f" % rouge_l)
def evaluate(data, tokenizer, model, device): src = data.source src_mask = data.source_mask utter_type = data.utter_type with torch.no_grad(): output_token = beam_search(device, src, src_mask, utter_type, model, tokenizer) while output_token[-1] == 0: output_token = output_token[:-1] while output_token[-1] == 102: output_token = output_token[:-1] text = tokenizer.decode(output_token) return text
def eval_test(config, qa, dialogue, tokenizer, model, device): input_ids, input_mask, utter_type = create_single_sample(qa, dialogue, config.max_src_num_length, tokenizer) src = torch.tensor(input_ids, dtype=torch.long, device=device) src = src.unsqueeze(0).transpose(0, 1) src_mask = torch.tensor(input_mask, dtype=torch.long, device=device) src_mask = src_mask.unsqueeze(0).transpose(0, 1) utter_type = torch.tensor(utter_type, device=device) utter_type = utter_type.unsqueeze(0) with torch.no_grad(): output_token = beam_search(device, src, src_mask, utter_type, model, tokenizer) text = tokenizer.decode(output_token) return text
def draft_summary_beam_search(input_ids, beam_size): log.info(f"Building: 'Draft beam search decoder'") batch = tf.shape(input_ids)[0] end = [SEP_ID] # (batch_size, seq_len, d_bert) enc_output_ = model.bert_model(input_ids)[0] enc_output = tf.tile(enc_output_, multiples=[beam_size,1, 1]) input_ids = tf.tile(input_ids, multiples=[beam_size, 1]) # (batch_size, 1, 1, seq_len), (_), (batch_size, 1, 1, seq_len) dec_input = tf.convert_to_tensor([CLS_ID] * batch) output = tf.expand_dims(dec_input, 0) def beam_search_decoder(output): _, _, dec_padding_mask = create_masks(input_ids, output) embeddings = model.embedding(output) predictions, dec_op, attention_weights = model.decoder( input_ids, embeddings, enc_output, False, None, dec_padding_mask ) if config.copy_gen: predictions = model.decoder.pointer_generator( dec_op, predictions, attention_weights, input_ids, tf.shape(input_ids)[1], tf.shape(output)[-1], False ) # (batch_size, 1, target_vocab_size) return (predictions[:,-1:,:]) return (beam_search( beam_search_decoder, dec_input, beam_size, config.summ_length, config.input_vocab_size, h_parms.length_penalty, stop_early=False, eos_id=[end] ), enc_output_ )
def add_decoder_predict_op(self, embeddings, states, label_embeddings, do_beam_search=True): state_t = states[-1] with tf.variable_scope("DECODE"): cell = LSTMCell(self.config) preds = [] preds_proj = [] U = tf.get_variable( name="U", initializer=tf.contrib.layers.xavier_initializer(), shape=(self.config.lstm_num_units, self.config.lstm_num_units)) b = tf.get_variable( name="b", initializer=tf.contrib.layers.xavier_initializer(), shape=(self.config.lstm_num_units, )) word_ind = [ self.labels_vocab.START for _ in range(self.config.batch_size) ] if do_beam_search: with tf.variable_scope("DECODING") as scope: preds_proj = beam_search(word_ind, label_embeddings, U, b, cell, state_t, self.config.max_dec_length, self.config.batch_size, self.run_cell, scope) else: with tf.variable_scope("DECODING") as scope: for time_step in range(self.config.max_dec_length): word = self.run_cell(word_ind, label_embeddings, U, b, cell, time_step, state_t, scope) preds.append(word) word_ind = tf.argmax(word, axis=1) preds_proj.append(word_ind) preds_proj = tf.stack(preds_proj, axis=1) preds = tf.stack(preds, axis=1) assert preds_proj.get_shape().as_list() == [ self.config.batch_size, self.config.max_dec_length ] return preds_proj, preds
def test(): du = DataLoader(**data_config) params['src_vcb_size'] = du.vocab_size params['tgt_vcb_size'] = du.vocab_size params['batch_size'] = 1 tf.reset_default_graph() with tf.Session() as sess: model = Seq2Seq(params, mode='decode') sess.run(tf.global_variables_initializer()) # model.load(sess, './logs/model/model_1.ckpt') model.load(sess, tf.train.latest_checkpoint('./logs/model/')) for source, source_len, target, _, _ in du.test_data( './data/dialog.test'): result = beam_search(sess, model, du.vcb, source, source_len) print('source: ', du.transform_indexs(source[0])) print('target: ', du.transform_indexs(target[0])) print('predict: ', du.transform_indexs(result)) print('')
def predict(inp_sentence): start_token = [tokenizer_de.vocab_size] end_token = [tokenizer_de.vocab_size + 1] # inp sentence is german, hence adding the start and end token inp_sentence = start_token + tokenizer_de.encode(inp_sentence) + end_token encoder_input = tf.expand_dims(inp_sentence, 0) # as the target is english, the first word to the transformer should be the # english start token. decoder_input = [tokenizer_en.vocab_size] output = tf.expand_dims(decoder_input, 0) # predictions.shape == (batch_size, seq_len, vocab_size) def symbols_to_logits(output): batched_input = tf.tile(encoder_input, [beam_width, 1]) enc_padding_mask, combined_mask, dec_padding_mask = create_masks( batched_input, output) predictions, attention_weights = transformer1(batched_input, output, False, enc_padding_mask, combined_mask, dec_padding_mask) predictions = predictions[:, -1, :] return predictions finished_seq, finished_scores, states= beam_search(symbols_to_logits, output, beam_width, MAX_LENGTH, target_vocab_size, alpha, states=None, eos_id=tokenizer_en.vocab_size+1, stop_early=True, use_tpu=False, use_top_k_with_unique=True) return finished_seq[0, 0, :]
def draft_decoder(self, input_ids, enc_output, beam_size, length_penalty, temperature, top_p, top_k, batch_size, training=False): """ Inference call, builds a draft output_sequence auto-regressively """ start_ids = tf.repeat(config.CLS_ID, repeats=batch_size) input_ids = tfa.seq2seq.tile_batch(input_ids, multiplier=beam_size) enc_output = tfa.seq2seq.tile_batch(enc_output, multiplier=beam_size) #start_ids = tf.cast(start_ids, dtype=tf.int64) def perform_beam_search(dec_input): return query_decoder(self, enc_output, input_ids, dec_input, batch_size, temperature, top_p, top_k, training=training) predicted_beam_search_op, _, _, attention_dist = beam_search( perform_beam_search, initial_ids=start_ids, beam_size=beam_size, decode_length=config.target_seq_length, vocab_size=config.target_vocab_size, alpha=length_penalty, stop_early=False, eos_id=config.SEP_ID) predicted_output_sequence = predicted_beam_search_op[:, 0, :] return predicted_output_sequence, attention_dist
def abstract(self, article): start_id = self.vocab.word2id(data.START_DECODING) end_id = self.vocab.word2id(data.STOP_DECODING) unk_id = self.vocab.word2id(data.UNKNOWN_TOKEN) example = Example(' '.join(jieba.cut(article)), '', self.vocab) batch = Batch([example], self.vocab, 1) enc_batch, enc_lens, enc_padding_mask, enc_batch_extend_vocab, extra_zeros, ct_e = get_enc_data( batch) with T.autograd.no_grad(): enc_batch = self.model.embeds(enc_batch) enc_out, enc_hidden = self.model.encoder(enc_batch, enc_lens) pred_ids = beam_search(enc_hidden, enc_out, enc_padding_mask, ct_e, extra_zeros, enc_batch_extend_vocab, self.model, start_id, end_id, unk_id) for i in range(len(pred_ids)): decoded_words = data.outputids2words(pred_ids[i], self.vocab, batch.art_oovs[i]) decoded_words = " ".join(decoded_words) return decoded_words
def treino_beam_search(): with open('Treino/beam_search.csv', 'a+') as arq: arq.write( 'Instancia,Estado,Valor,Tamanho,Execucao,num_best,num_iter\n') for problema in problemas: # valores = [] # tempos = [] print(problema[0], "-----") for m in hyper_param_beam_search: print(m) pes_max = problema[1] val = mochila.get_val_from_vt(problema[2]) pes = mochila.get_pes_from_vt(problema[2]) estado_inicial = mochila.estado_inicial_aleatorio(pes, pes_max) tempo_inicio = default_timer() solucao = beam_search(m, estado_inicial, pes, val, pes_max) tempo_total = default_timer() - tempo_inicio mochila.save_estado(solucao, val, pes, 'Treino/beam_search.csv', tempo_total, problema[0], [m])
def make_translation_predictions(model, use_bs2=False): print('Generating translations') with open('test_predictions.txt', 'w') as outfile: with open('source_test.txt', 'r') as infile: for line in tqdm(list(infile)): tokens = [DE.vocab.stoi[w] for w in tokenize_de(line.strip())] src = ntorch.tensor(tokens, names="srcSeqlen") if use_bs2: translation = beam_search2(model, src, beam_size=5, num_results=10)[0] else: translation = beam_search(model, src, beam_size=5, num_results=10)[0] assert translation[0] == BOS_IND sent = ' '.join(EN.vocab.itos[i] for i in translation[1:]) outfile.write(sent + '\n')
def run_test_beamsearch(verbose): opt_value = [] # Lista para os valores encontrados opt_time = [] # Lista para os tempos encontrados i = 0 for problem in problems: # Executando o algoritmo state, size, value, time = beam_search(max_size=problem[0], values=problem[1], m=hiperparam, max_time=300) # Print caso queira acompanhar if verbose: print('Problem', problem_name[i], 'finished with (opt_value, opt_size, time) equals', (value, size, time)) # Salvando os melhores valores opt_value.append(value) opt_time.append(time) i = i + 1 mean_value = np.mean(opt_value) std_value = np.std(opt_value) mean_time = np.mean(opt_time) std_time = np.std(opt_time) # Print caso queira acompanhar if verbose: print('The mean and std for the values found were:', mean_value, '+-', std_value) print('The mean and std for the times found were:', mean_time, '+-', std_time) return opt_value, opt_time, [mean_value, std_value, mean_time, std_time]
def tower_infer_dec(chars, scope, rnn_cell, dec_cell, word_emb, rnn_state, out_reuse_vars=False, dev='/cpu:0'): with tf.device(dev): with tf.variable_scope('embatch_size', reuse=True): # (vocab_size, latent_dim) emb_char = tf.sg_emb(name='emb_char', voca_size=Hp.char_vs, dim=Hp.hd, dev=dev) emb_word = tf.sg_emb(name='emb_word', emb=word_emb, voca_size=Hp.word_vs, dim=300, dev=dev) print(chars) ch = chars ch = tf.reverse_sequence(input=ch, seq_lengths=[Hp.c_maxlen] * Hp.batch_size, seq_dim=1) reuse_vars = reuse_vars_enc = True # -------------------------- BYTENET ENCODER -------------------------- with tf.variable_scope('encoder'): # embed table lookup enc = ch.sg_lookup(emb=emb_char) #(batch, sentlen, latentdim) # loop dilated conv block for i in range(Hp.num_blocks): enc = (enc.sg_res_block(size=5, rate=1, name="enc1_%d" % (i), is_first=True, reuse_vars=reuse_vars, dev=dev).sg_res_block( size=5, rate=2, name="enc2_%d" % (i), reuse_vars=reuse_vars, dev=dev).sg_res_block( size=5, rate=4, name="enc4_%d" % (i), reuse_vars=reuse_vars, dev=dev).sg_res_block( size=5, rate=8, name="enc8_%d" % (i), reuse_vars=reuse_vars, dev=dev).sg_res_block( size=5, rate=16, name="enc16_%d" % (i), reuse_vars=reuse_vars, dev=dev)) byte_enc = enc # -------------------------- QCNN + QPOOL ENCODER #1 -------------------------- with tf.variable_scope('quazi'): #quasi cnn layer ZFO [batch * 3, seqlen, dim2 ] conv = byte_enc.sg_quasi_conv1d(is_enc=True, size=4, name="qconv_1", dev=dev, reuse_vars=reuse_vars) # c = f * c + (1 - f) * z, h = o*c [batch * 4, seqlen, hd] pool0 = conv.sg_quasi_rnn(is_enc=False, att=False, name="qrnn_1", reuse_vars=reuse_vars, dev=dev) qpool_last = pool0[:, -1, :] # -------------------------- MAXPOOL along time dimension -------------------------- inpt_maxpl = tf.expand_dims(byte_enc, 1) # [batch, 1, seqlen, channels] maxpool = tf.nn.max_pool(inpt_maxpl, [1, 1, Hp.c_maxlen, 1], [1, 1, 1, 1], 'VALID') maxpool = tf.squeeze(maxpool, [1, 2]) # -------------------------- HIGHWAY -------------------------- concat = qpool_last + maxpool with tf.variable_scope('highway', reuse=reuse_vars): input_lstm = highway(concat, concat.get_shape()[-1], num_layers=1) # -------------------------- CONTEXT LSTM -------------------------- input_lstm = tf.nn.dropout(input_lstm, Hp.keep_prob) with tf.variable_scope('contx_lstm', reuse=reuse_vars): output, rnn_state = rnn_cell(input_lstm, rnn_state) beam_size = 8 reuse_vars = out_reuse_vars greedy = False if greedy: dec_state = rnn_state dec_out = [] d_out = tf.constant([1] * Hp.batch_size) for idx in range(Hp.w_maxlen): w_input = d_out.sg_lookup(emb=emb_word) dec_state = tf.contrib.rnn.LSTMStateTuple(c=dec_state.c, h=dec_state.h) with tf.variable_scope('dec_lstm', reuse=idx > 0 or reuse_vars): d_out, dec_state = dec_cell(w_input, dec_state) dec_out.append(d_out) d_out = tf.expand_dims(d_out, 1).sg_conv1d_gpus(size=1, dim=Hp.word_vs, name="out_conv", act="linear", dev=dev, reuse=idx > 0 or reuse_vars) d_out = tf.squeeze(d_out).sg_argmax() dec_out = tf.stack(dec_out, 1) dec = dec_out.sg_conv1d_gpus(size=1, dim=Hp.word_vs, name="out_conv", act="linear", dev=dev, reuse=True) return dec.sg_argmax(), rnn_state else: # ------------------ BEAM SEARCH -------------------- dec_state = tf.contrib.rnn.LSTMStateTuple( tf.tile(tf.expand_dims(rnn_state[0], 1), [1, beam_size, 1]), tf.tile(tf.expand_dims(rnn_state[1], 1), [1, beam_size, 1])) initial_ids = tf.constant([1] * Hp.batch_size) def symbols_to_logits_fn(ids, dec_state): dec = [] dec_c, dec_h = [], [] # (batch x beam_size x decoded_seq) ids = tf.reshape(ids, [Hp.batch_size, beam_size, -1]) print("dec_state ", dec_state[0].get_shape().as_list()) for ind in range(beam_size): with tf.variable_scope('dec_lstm', reuse=ind > 0 or reuse_vars): w_input = ids[:, ind, -1].sg_lookup(emb=emb_word) dec_state0 = tf.contrib.rnn.LSTMStateTuple( c=dec_state.c[:, ind, :], h=dec_state.h[:, ind, :]) dec_out, dec_state_i = dec_cell(w_input, dec_state0) dec_out = tf.expand_dims(dec_out, 1) dec_i = dec_out.sg_conv1d_gpus(size=1, dim=Hp.word_vs, name="out_conv", act="linear", dev=dev, reuse=ind > 0 or reuse_vars) dec.append(tf.squeeze(dec_i, 1)) dec_c.append(dec_state_i[0]) dec_h.append(dec_state_i[1]) return tf.stack(dec, 1), tf.contrib.rnn.LSTMStateTuple( tf.stack(dec_c, 1), tf.stack(dec_h, 1)) final_ids, final_probs = beam_search.beam_search(symbols_to_logits_fn, dec_state, initial_ids, beam_size, Hp.w_maxlen - 1, Hp.word_vs, 3.5, eos_id=2) return final_ids[:, 0, :], rnn_state