def Decode(self, article): """Restore a checkpoint and decode it. Args: saver: Tensorflow checkpoint saver. sess: Tensorflow session. Returns: If success, returns true, otherwise, false. """ (article_batch, article_lens) = self.get_article_inputs(article) bs = beam_search.BeamSearch(self._model, 4, self._vocab.WordToId(data.SENTENCE_START), self._vocab.WordToId(data.SENTENCE_END), self._hps.dec_timesteps) article_batch_cp = article_batch.copy() article_lens_cp = article_lens.copy() best_beam = bs.BeamSearch(self.sess, article_batch_cp, article_lens_cp)[0] decode_output = [int(t) for t in best_beam.tokens[1:]] decoded_text = ' '.join(data.Ids2Words(decode_output, self._vocab)) return decoded_text
def _Decode(self, article_text): """Restore a checkpoint and decode it. Args: saver: Tensorflow checkpoint saver. sess: Tensorflow session. Returns: If success, returns true, otherwise, false. """ bs = beam_search.BeamSearch( self._model, self._hps.batch_size, self._vocab.WordToId(data.SENTENCE_START), self._vocab.WordToId(data.SENTENCE_END), self._hps.dec_timesteps) ################### #article_text = "How do I know the difference, between class and object" article = "<d><p><s>"+article_text+"</s></p></d>" article_sentences = [sent.strip() for sent in data.ToSentences(article, include_token=False)] pad_id = self._vocab.WordToId(data.PAD_TOKEN) enc_inputs = [] for i in xrange(min(100,len(article_sentences))): enc_inputs += data.GetWordIds(article_sentences[i], self._vocab) enc_input_len = len(enc_inputs) while len(enc_inputs) < self._hps.enc_timesteps: enc_inputs.append(pad_id) ################### w, h = 120, 4 article_batch_cp = [[0 for x in range(w)] for y in range(h)] for i in range(0,4): article_batch_cp[i] = enc_inputs#article_batch[i] w, h = 1, 4 article_lens_cp = [[0 for x in range(w)] for y in range(h)] #article_lens_cp = article_lens.copy() for i in range(0,4): article_lens_cp[i] = enc_input_len best_beam = bs.BeamSearch(self._sess, article_batch_cp, article_lens_cp) #print len(best_beam) best_beam = best_beam[0] decode_output = [int(t) for t in best_beam.tokens[1:]] QUESTION = article_text test = ' '.join(data.Ids2Words(decode_output, self._vocab)) end_p = test.find(data.SENTENCE_END, 0) if end_p != -1: test = test[:end_p] #print "<Answer>"+test ANSWER = test.replace('<UNK>','') return QUESTION, ANSWER
def call(self, inp, targ): loss = 0 enc_hidden = self.encoder(inp, self.hidden) dec_hidden = enc_hidden dec_input = tf.expand_dims([self.targ_lang.word2idx[BEGIN_TAG]] * self.batch_sz, 1) result = '' if self.use_beam_search: bs = beam_search.BeamSearch(self.beam_size, self.targ_lang.word2idx[BEGIN_TAG], self.targ_lang.word2idx[END_TAG], self.targ_lang, self.max_length_tar, self.batch_sz, self.decoder) for t in range(1, targ.shape[1]): if self.use_beam_search: predictions, _ = self.decoder(dec_input, dec_hidden) best_beam = bs.beam_search(dec_input, dec_hidden) loss += tf.reduce_mean(best_beam.log_prob) predicted_id = tf.argmax(predictions[0]).numpy() else: # Teacher forcing - feeding the target as the next input predictions, dec_hidden = self.decoder(dec_input, dec_hidden) predicted_id = tf.argmax(predictions[0]).numpy() loss += self.loss_function(targ[:, t], predictions) dec_input = tf.expand_dims(targ[:, t], 1) if self.display_result and self.targ_lang.idx2word[ predicted_id] == END_TAG: print("result: ", result) if self.targ_lang.idx2word[predicted_id] == END_TAG: return loss result += ' ' + self.targ_lang.idx2word[predicted_id] #print(result) return loss
def _Decode(self, saver, sess): """Restore a checkpoint and decode it. Args: saver: Tensorflow checkpoint saver. sess: Tensorflow session. Returns: If success, returns true, otherwise, false. """ ckpt_state = tf.train.get_checkpoint_state(FLAGS.log_root) if not (ckpt_state and ckpt_state.model_checkpoint_path): tf.logging.info('No model to decode yet at %s', FLAGS.log_root) return False tf.logging.info('checkpoint path %s', ckpt_state.model_checkpoint_path) ckpt_path = os.path.join( FLAGS.log_root, os.path.basename(ckpt_state.model_checkpoint_path)) tf.logging.info('renamed checkpoint path %s', ckpt_path) saver.restore(sess, ckpt_path) self._decode_io.ResetFiles() for b in self._batch_reader.data_iterator(): (article_batch, abstract_batch, targets, article_lens, abstract_lens, loss_weights, origin_articles, origin_abstracts) = b ''' outputs, _ = self._model.run_decode_step(sess, article_batch, abstract_batch, targets, article_lens, abstract_lens, loss_weights) for i in xrange(self._hps.batch_size): print outputs[i] self._DecodeBatch(origin_articles[i], origin_abstracts[i], outputs[i]) ''' for i in xrange(self._hps.batch_size): bs = beam_search.BeamSearch( self._model, self._hps.batch_size, self._vocab.get_word_id(data_manager.START_TOKEN), self._vocab.get_word_id(data_manager.END_TOKEN), self._hps.dec_timesteps) article_batch_cp = article_batch.copy() article_batch_cp[:] = article_batch[i:i+1] article_lens_cp = article_lens.copy() article_lens_cp[:] = article_lens[i:i+1] best_beam = bs.BeamSearch(sess, article_batch_cp, article_lens_cp)[0] decode_output = [int(t) for t in best_beam.tokens[1:]] self._DecodeBatch( origin_articles[i], origin_abstracts[i], decode_output) return True
def _Decode(self, saver, sess): """Restore a checkpoint and decode it. Args: saver: Tensorflow checkpoint saver. sess: Tensorflow session. Returns: If success, returns true, otherwise, false. """ ckpt_state = tf.train.get_checkpoint_state(FLAGS.log_root) if not (ckpt_state and ckpt_state.model_checkpoint_path): tf.logging.info('No model to decode yet at %s', FLAGS.log_root) return False tf.logging.info('checkpoint path %s', ckpt_state.model_checkpoint_path) ckpt_path = os.path.join( FLAGS.log_root, os.path.basename(ckpt_state.model_checkpoint_path)) tf.logging.info('renamed checkpoint path %s', ckpt_path) saver.restore(sess, ckpt_path) self._decode_io.ResetFiles() for _ in xrange(FLAGS.decode_batches_per_ckpt): (anoPrice_batch, article_batch, abstract_batch, targets, article_lens, price_lens, abstract_lens, loss_weights, origin_articles, origin_abstracts) = self._batch_reader.NextBatch() for i in xrange(self._hps.batch_size): bs = beam_search.BeamSearch( self._model, self._hps.batch_size, self._vocab.WordToId(data.SENTENCE_START), self._vocab.WordToId(data.SENTENCE_END), self._hps.dec_timesteps) """Jenkai: add price batch""" anoPrice_batch_cp = anoPrice_batch.copy() anoPrice_batch_cp[:] = anoPrice_batch[i:i + 1] price_lens_cp = price_lens.copy() price_lens_cp[:] = price_lens_cp[i:i + 1] article_batch_cp = article_batch.copy() article_batch_cp[:] = article_batch[i:i + 1] article_lens_cp = article_lens.copy() article_lens_cp[:] = article_lens[i:i + 1] best_beam = bs.BeamSearch(sess, article_batch_cp, article_lens_cp, anoPrice_batch_cp, price_lens_cp)[0] decode_output = [int(t) for t in best_beam.tokens[1:]] self._DecodeBatch(origin_articles[i], origin_abstracts[i], decode_output) return True
def _Decode(self, saver, sess): """Restore a checkpoint and decode it. Args: saver: Tensorflow checkpoint saver. sess: Tensorflow session. Returns: If success, returns true, otherwise, false. """ ckpt_state = tf.train.get_checkpoint_state(FLAGS.log_root) if not (ckpt_state and ckpt_state.model_checkpoint_path): tf.logging.info('No model to decode yet at %s', FLAGS.log_root) return False tf.logging.info('checkpoint path %s', ckpt_state.model_checkpoint_path) #ckpt_path = os.path.join( # FLAGS.log_root, os.path.basename(ckpt_state.model_checkpoint_path)) #tf.logging.info('renamed checkpoint path %s', ckpt_path) filenames = self._getckptFiles() initWE = None if FLAGS.word2vec <> None: initWE = self._model._loadWord2Vec() for filename in filenames: saver.restore(sess, filename) #ckpt_path self._decode_io = DecodeIO(filename + "Dir") self._decode_io.ResetFiles() self._batch_reader._FillBucketInputQueueShuffle() while self._batch_reader._bucket_input_queue.qsize() <> 0: (article_batch, _, _, article_lens, _, _, origin_articles, origin_abstracts, origin_filenames) = self._batch_reader.NextBatch() for i in xrange(self._hps.batch_size): bs = beam_search.BeamSearch( self._model, self._hps.batch_size, self._vocab.WordToId(data.SENTENCE_START), self._vocab.WordToId(data.SENTENCE_END), self._hps.dec_timesteps, initWE) article_batch_cp = article_batch.copy() article_batch_cp[:] = article_batch[i:i + 1] article_lens_cp = article_lens.copy() article_lens_cp[:] = article_lens[i:i + 1] best_beam = bs.BeamSearch(sess, article_batch_cp, article_lens_cp)[0] decode_output = [int(t) for t in best_beam.tokens[1:]] self._DecodeBatch(origin_articles[i], origin_abstracts[i], origin_filenames[i], decode_output) return True
def _decode(self, saver, sess): """Restore a checkpoint and decode it. Args: saver: Tensorflow checkpoint saver. sess: Tensorflow session. Returns: If success, returns true, otherwise, false. Information: If we want TensorFlow to automatically choose an existing and supported device to run the operations in case the specified one doesn't exist, we can set allow_soft_placement to True in the configuration option when creating the session. """ ckpt_state = tf.train.get_checkpoint_state(FLAGS.log_root) if not (ckpt_state and ckpt_state.model_checkpoint_path): tf.logging.info('No model to decode yet at %s', FLAGS.log_root) return False tf.logging.info('checkpoint path %s', ckpt_state.model_checkpoint_path) ckpt_path = os.path.join( FLAGS.log_root, os.path.basename(ckpt_state.model_checkpoint_path)) tf.logging.info('renamed checkpoint path %s', ckpt_path) saver.restore(sess, ckpt_path) self._decode_io.reset_files() for _ in xrange(FLAGS.decode_batches_per_ckpt): (article_batch, _, _, article_lens, _, _, origin_articles, origin_abstracts) = self._batch_reader.next_batch() for i in xrange(self._hps.batch_size): bs = beam_search.BeamSearch( self._model, self._hps.batch_size, self._vocab.WordToId(data.SENTENCE_START), self._vocab.WordToId(data.SENTENCE_END), self._hps.dec_timesteps) article_batch_cp = article_batch.copy() article_batch_cp[:] = article_batch[i:i + 1] article_lens_cp = article_lens.copy() article_lens_cp[:] = article_lens[i:i + 1] best_beam = bs.BeamSearch(sess, article_batch_cp, article_lens_cp)[0] decode_output = [int(t) for t in best_beam.tokens[1:]] self._decode_batch(origin_articles[i], origin_abstracts[i], decode_output) return True
def single_decode(self, one_sent_in): """Decoding loop for long running process.""" sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) time.sleep(DECODE_LOOP_DELAY_SECS) ckpt_state = tf.train.get_checkpoint_state(FLAGS.log_root) if not (ckpt_state and ckpt_state.model_checkpoint_path): tf.logging.info('No model to decode yet at %s', FLAGS.log_root) return False tf.logging.info('checkpoint path %s', ckpt_state.model_checkpoint_path) ckpt_path = os.path.join( FLAGS.log_root, os.path.basename(ckpt_state.model_checkpoint_path)) tf.logging.info('renamed checkpoint path %s', ckpt_path) self._saver.restore(sess, ckpt_path) for _ in range(FLAGS.decode_batches_per_ckpt): (article_batch, _, _, article_lens, _, _, origin_articles, origin_abstracts) = self._batch_reader.NextBatch() for i in range(1):#range(self._hps.batch_size): bs = beam_search.BeamSearch( self._model, self._hps.batch_size, self._vocab.WordToId(data.SENTENCE_START), self._vocab.WordToId(data.SENTENCE_END), self._hps.dec_timesteps) one_sent = one_sent_in one_sent = one_sent.strip().split() article_lens_cp2 = [len(one_sent)]*2 one_sent = [self._vocab.WordToId(x) for x in one_sent] if len(one_sent) < self._hps.enc_timesteps: padid = self._vocab.WordToId(data.PAD_TOKEN) one_sent = one_sent + [padid]*(self._hps.enc_timesteps-len(one_sent)) else: one_sent = one_sent[:self._hps.enc_timesteps] article_batch_cp2 = [one_sent]*2 best_beam = bs.BeamSearch(sess, article_batch_cp2, article_lens_cp2)[0] decode_output = [int(t) for t in best_beam.tokens[1:]] decoded_output = ' '.join(data.Ids2Words(decode_output, self._vocab)) end_p = decoded_output.find(data.SENTENCE_END, 0) if end_p != -1: decoded_output = decoded_output[:end_p] decoded_output = decoded_output.strip() print("decode output = {}".format(decoded_output)) print("##### decode over #####")
def __init__(self, vocab_inp_size, vocab_tar_size, embedding_dim, enc_units, batch_sz, inp_lang, targ_lang, max_length_tar=100, use_GloVe=False, mode=BEAM_SEARCH, use_bilstm=False, beam_size=2): super(Seq2Seq, self).__init__() self.vocab_inp_size = vocab_inp_size self.vocab_tar_size = vocab_tar_size self.embedding_dim = embedding_dim self.batch_sz = batch_sz self.enc_units = enc_units self.targ_lang = targ_lang self.encoder = Encoder(vocab_inp_size, embedding_dim, enc_units, batch_sz, use_GloVe, inp_lang.vocab, use_bilstm=use_bilstm) self.decoder = Decoder(vocab_tar_size, embedding_dim, enc_units, batch_sz, use_GloVe, targ_lang.vocab, use_bilstm=use_bilstm) self.hidden = tf.zeros((batch_sz, enc_units)) self.max_length_tar = max_length_tar self.mode = mode self.beam_size = beam_size self.use_bilstm = use_bilstm self.bs = beam_search.BeamSearch(self.beam_size, self.targ_lang.word2idx[BEGIN_TAG], self.targ_lang.word2idx[END_TAG], self.targ_lang, self.max_length_tar, self.batch_sz, self.decoder)
def _Decode(self, saver, sess): """Restore a checkpoint and decode it. Args: saver: Tensorflow checkpoint saver. sess: Tensorflow session. Returns: If success, returns true, otherwise, false. """ ckpt_state = tf.train.get_checkpoint_state(self._log_root) if not (ckpt_state and ckpt_state.model_checkpoint_path): tf.logging.info('No model to decode yet at %s', self._log_root) return False tf.logging.info('checkpoint path %s', ckpt_state.model_checkpoint_path) ckpt_path = os.path.join( self._log_root, os.path.basename(ckpt_state.model_checkpoint_path)) tf.logging.info('renamed checkpoint path %s', ckpt_path) saver.restore(sess, ckpt_path) self._decode_io.ResetFiles() for _ in xrange(textsum_config.decode_batches_per_ckpt): article_lens = np.full(self._hps.batch_size, fill_value=self._hps.enc_timesteps, dtype=np.int32) article_batch, _, _, origin_articles, origin_abstracts = self._dataset.next_batch( self._hps.batch_size) for i in xrange(self._hps.batch_size): bs = beam_search.BeamSearch( self._model, self._hps.batch_size, self._vocab.WordToId(data.SENTENCE_START), self._vocab.WordToId(data.SENTENCE_END), self._hps.dec_timesteps) article_batch_cp = article_batch.copy() article_batch_cp[:] = article_batch[i:i + 1] article_lens_cp = article_lens.copy() article_lens_cp[:] = article_lens[i:i + 1] best_beam = bs.BeamSearch(sess, article_batch_cp, article_lens_cp)[0] decode_output = [int(t) for t in best_beam.tokens[1:]] self._DecodeBatch(origin_articles[i], origin_abstracts[i], decode_output, i) return True
def Decode(self): """Restore a checkpoint and decode it. Args: saver: Tensorflow checkpoint saver. sess: Tensorflow session. Returns: If success, returns true, otherwise, false. """ sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) saver = self._saver ckpt_state = tf.train.get_checkpoint_state(FLAGS.log_root) if not (ckpt_state and ckpt_state.model_checkpoint_path): tf.logging.info('No model to decode yet at %s', FLAGS.log_root) return False tf.logging.info('checkpoint path %s', ckpt_state.model_checkpoint_path) ckpt_path = os.path.join( FLAGS.log_root, os.path.basename(ckpt_state.model_checkpoint_path)) tf.logging.info('renamed checkpoint path %s', ckpt_path) saver.restore(sess, ckpt_path) self._decode_io.ResetFiles() for _ in xrange(10): (article_batch, _, _, article_lens, _, _, origin_articles, origin_abstracts) = self._batch_reader.NextBatch() for i in xrange(self._hps.batch_size): bs = beam_search.BeamSearch( self._model, self._hps.batch_size, self._vocab.WordToId(data.SENTENCE_START), self._vocab.WordToId(data.SENTENCE_END), self._hps.dec_timesteps) article_batch_cp = article_batch.copy() article_batch_cp[:] = article_batch[i:i + 1] article_lens_cp = article_lens.copy() article_lens_cp[:] = article_lens[i:i + 1] best_beam = bs.BeamSearch(sess, article_batch_cp, article_lens_cp)[0] decode_output = [int(t) for t in best_beam.tokens[1:]] self._DecodeBatch(origin_articles[i], origin_abstracts[i], decode_output) return True
def DecodeOne(self, input_article): sess = self._sess (article, article_len, origin_article) = self._convertInputToModelTensor(input_article) bs = beam_search.BeamSearch( self._model, FLAGS.beam_size, self._vocab.WordToId(data.SENTENCE_START), self._vocab.WordToId(data.SENTENCE_END), self._hps.dec_timesteps) article_cp = [article]*self._hps.batch_size article_len_cp = [article_len]*self._hps.batch_size best_beam = bs.BeamSearch(sess, article_cp, article_len_cp)[0] decode_output = [int(t) for t in best_beam.tokens[1:]] summary = self._DecodeBatch(decode_output) print decode_output, summary return summary
def _Decode(self, saver, sess): """Restore a checkpoint and decode it. Args: saver: Tensorflow checkpoint saver. sess: Tensorflow session. Returns: If success, returns true, otherwise, false. """ ckpt_state = tf.train.get_checkpoint_state(FLAGS.log_root) if not (ckpt_state and ckpt_state.model_checkpoint_path): tf.logging.info('No model to decode yet at %s', FLAGS.log_root) return False tf.logging.info('checkpoint path %s', ckpt_state.model_checkpoint_path) ckpt_path = os.path.join( FLAGS.log_root, os.path.basename(ckpt_state.model_checkpoint_path)) tf.logging.info('renamed checkpoint path %s', ckpt_path) saver.restore(sess, ckpt_path) # self._decode_io.ResetFiles() for _ in xrange(FLAGS.decode_batches_per_ckpt): (article_batch, _, _, article_lens, _, _, origin_articles, origin_abstracts, article_ids) = self._batch_reader.NextBatch() for i in xrange(self._hps.batch_size): bs = beam_search.BeamSearch( self._model, self._hps.batch_size, self._vocab.WordToId(data.START_DECODING), self._vocab.WordToId(data.STOP_DECODING), self._hps.dec_timesteps) article_batch_cp = article_batch.copy() article_batch_cp[:] = article_batch[i:i+1] article_lens_cp = article_lens.copy() article_lens_cp[:] = article_lens[i:i+1] article_ids_cp = article_ids.copy() article_ids_cp[:] = article_ids[i:i+1] best_beam = bs.run_beam_search(sess, article_batch_cp, article_lens_cp) decode_output = [int(t) for t in best_beam.tokens[1:]] self._decode_batch( origin_articles[i], origin_abstracts[i], decode_output, article_ids[i]) return True
def start_generate_text(self, post_data): quest = post_data['params']['question'] (article_batch, _, _, article_lens, _, _, origin_articles, origin_abstracts) = self.get_batch_quest(quest) bs = beam_search.BeamSearch( myServer.decoder._model, myServer.decoder._hps.batch_size, myServer.decoder._vocab.WordToId(data.SENTENCE_START), myServer.decoder._vocab.WordToId(data.SENTENCE_END), myServer.decoder._hps.dec_timesteps) questions = [] article_batch_cp = article_batch.copy() article_batch_cp[:] = article_batch[0] article_lens_cp = article_lens.copy() article_lens_cp[:] = article_lens[0] best_beam = bs.BeamSearch(myServer.sess, article_batch_cp, article_lens_cp) print("quest:%s" % (origin_articles[0].replace(' ', ''))) for i in range(len(best_beam)): result_beam = best_beam[i] decode_output = [int(t) for t in result_beam.tokens[1:]] decoded_output = ''.join( data.Ids2Words(decode_output, myServer.decoder._vocab)) end_p = decoded_output.find(data.SENTENCE_END, 0) if end_p != -1: decoded_output = decoded_output[:end_p] questions.append(decoded_output) print("%doutput:%s" % (i, decoded_output)) posdata = {} params = {} params["success"] = "true" params["user_id"] = post_data['params']['user_id'] params["questions"] = questions posdata['id'] = post_data['id'] posdata['jsonrpc'] = '2.0' posdata['result'] = params self.wfile.write( json.dumps(posdata).encode(encoding='utf_8', errors='strict'))
def _Decode(self, saver, sess, choose): """Restore a checkpoint and decode it. Args: saver: Tensorflow checkpoint saver. sess: Tensorflow session. Returns: If success, returns true, otherwise, false. """ ''' #下面到saver是判断是否有saver保存了变量 ckpt_state = tf.train.get_checkpoint_state(FLAGS.save_path) if not (ckpt_state and ckpt_state.model_checkpoint_path): #tf.logging.info('No model to decode yet at %s', FLAGS.save_path) print('No model to decode yet at %s', FLAGS.save_path) return False #tf.logging.info('checkpoint path %s', ckpt_state.model_checkpoint_path) #os.path.join(dirpath,filename) == 'dirpath/filename' #savePath / os.path.basename(ckpt_state.model_checkpoint_path) ckpt_path = os.path.join( FLAGS.save_path, os.path.basename(ckpt_state.model_checkpoint_path)) #tf.logging.info('renamed checkpoint path %s', ckpt_path) #saver.restore(sess, ckpt_path) ''' saver.restore(sess, FLAGS.save_path) #self._decode_io.ResetFiles() #NLPCCTestresult_1_0531_06050840 result_1_0531_500_06050840 resultWriter = open("result/NLPCCTestresult_1_0531_0705_500.txt", 'w', encoding='UTF-8') #开始训练过程 batch_gen = self._batch_reader.NextBatch() max_run_step = self._batch_reader.Batchsize() for _ in range(max_run_step): (article_batch, _, _, article_lens, _, _, _, _) = batch_gen.__next__() if choose == 'beam_search': # for i in range(self._hps.batch_size): print("beam_search") for i in range(self._hps.batch_size): bs = beam_search.BeamSearch( self._model, self._hps.batch_size, self._vocab.WordToId(wash_data.SENTENCE_START), self._vocab.WordToId(wash_data.SENTENCE_END), self._hps.dec_timesteps) article_batch_cp = article_batch.copy() #batch中的一个样本的article,如[1,2,3,4,5,6,7,8,9,10] article_batch_cp[:] = article_batch[i:i + 1] article_lens_cp = article_lens.copy() #batch中的一个样本的article的长度 article_lens_cp[:] = article_lens[i:i + 1] best_beam = bs.BeamSearch(sess, article_batch_cp, article_lens_cp)[0] #print("decode--output--3--best_beam == ",best_beam) #decode_output模型的输出ids形式 decode_output = [int(t) for t in best_beam.tokens[1:]] decoded_output = ' '.join( wash_data.Ids2Words(decode_output, self._vocab)) resultWriter.write(decoded_output + '\n') print("decoded_output result == ", decoded_output) else: print("max_prop") pr = predict_result.PredictResult( self._model, self._hps.batch_size, self._vocab.WordToId(wash_data.SENTENCE_START), self._vocab.WordToId(wash_data.SENTENCE_END), self._hps.dec_timesteps) best_result = pr.predictSearch(sess, article_batch, article_lens) for i in range(len(best_result)): decode_output = best_result[i] decoded_output = ' '.join( wash_data.Ids2Words(decode_output, self._vocab)) print("decoded_output result == ", decoded_output) resultWriter.close() return True
def _Decode(self, saver, summary_writer, sess, old_global_step): """Restores a checkpoint and decodes it. Args: saver: Tensorflow checkpoint saver. summary_writer: Tensorflow summary writer. sess: Tensorflow session. old_global_step: Only decode of model is newer Returns: global_step: Step of model that was decodes. """ checkpoint_state = tf.train.get_checkpoint_state(FLAGS.log_root) if not (checkpoint_state and checkpoint_state.model_checkpoint_path): tf.logging.info('No model to decode yet at %s', FLAGS.log_root) return old_global_step checkpoint_path = os.path.join( FLAGS.log_root, os.path.basename(checkpoint_state.model_checkpoint_path)) saver.restore(sess, checkpoint_path) global_step = sess.run(self._model.global_step) if global_step <= old_global_step: tf.logging.info( 'No new model to decode. Latest model at step %d (last: %d)', global_step, old_global_step) return old_global_step else: tf.logging.info('New model to decode. Loaded model at step %d (last: %d)', global_step, old_global_step) scores = [] self._decode_io.ResetFiles() for _ in range(self._config.decode_batches_per_ckpt): next_batch = self._batch_reader.NextBatch() if len(next_batch) == 9: # normal batch reader (enc_inputs, _, _, enc_input_len, _, _, source, targets) = next_batch elif len(next_batch) == 10: # copynet batch reader (enc_inputs, _, _, _, enc_input_len, _, _, source, targets) = next_batch else: tf.logging.error('Unknow batch reader is used... check the length of ' 'return value of _batch_reader.NextBatch()') for i in range(self._config.batch_size): bs = beam_search.BeamSearch( self._model, self._config.batch_size, self._output_vocab.WordToId(data.SENTENCE_START), self._output_vocab.WordToId(data.SENTENCE_END), self._config.max_output_len) enc_inputs_copy = enc_inputs.copy() enc_inputs_copy[:] = enc_inputs[i:i + 1] enc_input_len_copy = enc_input_len.copy() enc_input_len_copy[:] = enc_input_len[i:i + 1] best_beam = bs.BeamSearch(sess, enc_inputs_copy, enc_input_len_copy)[0] dec_outputs = [int(t) for t in best_beam.tokens[1:]] score = self._DecodeBatch(source[i], targets[i], dec_outputs) scores.append(score) avg_score = [sum(x) / float(len(x)) for x in zip(*scores)] self._LogTensorboardSummary(summary_writer, 'metrics/bleu-3', avg_score[0], global_step) self._LogTensorboardSummary(summary_writer, 'metrics/f1-measure', avg_score[1], global_step) self._LogTensorboardSummary(summary_writer, 'metrics/exact', avg_score[2], global_step) return global_step