def main(unused_argv): vocab = data.Vocab(FLAGS.vocab_path, 1000000) # Check for presence of required special tokens. assert vocab.WordToId(data.PAD_TOKEN) > 0 assert vocab.WordToId(data.UNKNOWN_TOKEN) >= 0 assert vocab.WordToId(data.SENTENCE_START) > 0 assert vocab.WordToId(data.SENTENCE_END) > 0 batch_size = 1 if FLAGS.mode == 'decode': batch_size = FLAGS.beam_size hps = seq2seq_attention_model.HParams( mode=FLAGS.mode, # train, eval, decode min_lr=0.01, # min learning rate. lr=0.15, # learning rate batch_size=batch_size, #enc_layers=4, enc_layers=2, enc_timesteps=60, #enc_timesteps=120, #dec_timesteps=30, dec_timesteps=15, min_input_len=2, # discard articles/summaries < than this num_hidden=128, # for rnn cell #num_hidden=256, # for rnn cell emb_dim=128, # If 0, don't use embedding max_grad_norm=2, num_softmax_samples=10) # If 0, no sampled softmax. #num_softmax_samples=4096) # If 0, no sampled softmax. batcher = batch_reader.Batcher(FLAGS.data_path, vocab, hps, FLAGS.article_key, FLAGS.abstract_key, FLAGS.max_article_sentences, FLAGS.max_abstract_sentences, bucketing=FLAGS.use_bucketing, truncate_input=FLAGS.truncate_input) tf.set_random_seed(FLAGS.random_seed) if hps.mode == 'train': model = seq2seq_attention_model.Seq2SeqAttentionModel( hps, vocab, num_gpus=FLAGS.num_gpus) _Train(model, batcher) elif hps.mode == 'eval': model = seq2seq_attention_model.Seq2SeqAttentionModel( hps, vocab, num_gpus=FLAGS.num_gpus) _Eval(model, batcher, vocab=vocab) elif hps.mode == 'decode': decode_mdl_hps = hps # Only need to restore the 1st step and reuse it since # we keep and feed in state for each step's output. decode_mdl_hps = hps._replace(dec_timesteps=1) model = seq2seq_attention_model.Seq2SeqAttentionModel( decode_mdl_hps, vocab, num_gpus=FLAGS.num_gpus) decoder = seq2seq_attention_decode.BSDecoder(model, batcher, hps, vocab) decoder.DecodeLoop()
def __init__(self, vocab_path, ckpt_path): self._num_gpus = 0 self._vocab_path = vocab_path self._ckpt_path = ckpt_path self._vocab = data.Vocab(self._vocab_path, 50000) #1000000 # Check for presence of required special tokens. assert self._vocab.WordToId(data.PAD_TOKEN) > 0 assert self._vocab.WordToId(data.UNKNOWN_TOKEN) >= 0 assert self._vocab.WordToId(data.SENTENCE_START) > 0 assert self._vocab.WordToId(data.SENTENCE_END) > 0 self._decode_hps = seq2seq_attention_model.HParams( mode='decode', # train, eval, decode min_lr=0.01, # min learning rate. lr=0.15, # learning rate batch_size=4, enc_layers=4, enc_timesteps=120, dec_timesteps=30, min_input_len=2, # discard articles/summaries < than this num_hidden=256, # for rnn cell emb_dim=128, # If 0, don't use embedding max_grad_norm=2, num_softmax_samples=4096) # If 0, no sampled softmax. self._hps = self._decode_hps._replace(dec_timesteps=1) print "=== Initilizaing... ===" self._model = seq2seq_attention_model.Seq2SeqAttentionModel( self._hps, self._vocab, num_gpus=self._num_gpus) print "=== Finish Initilizaing ===" self._decoder = seq2seq_attention_decode.BSDecoder( self._model, self._decode_hps, self._vocab, self._ckpt_path) print "==== Can Start to Answer the Question Now!!!!! ===="
def train(cls): cls.vocab = data.Vocab(FLAGS.vocab_path, 1000000) batch_size = FLAGS.beam_size hps = seq2seq_attention_model.HParams( mode=FLAGS.mode, # train, eval, decode min_lr=0.01, # min learning rate. lr=0.15, # learning rate batch_size=batch_size, enc_layers=4, enc_timesteps=120, #120 dec_timesteps=120, #30 min_input_len=0, # discard articles/summaries < than this num_hidden=256, # for rnn cell emb_dim=128, # If 0, don't use embedding max_grad_norm=2, num_softmax_samples=0) # 4096,If 0, no sampled softmax. cls.batcher = Batcher(cls.vocab, hps, FLAGS.article_key, FLAGS.abstract_key, FLAGS.max_article_sentences, FLAGS.max_abstract_sentences, bucketing=FLAGS.use_bucketing, truncate_input=FLAGS.truncate_input) tf.set_random_seed(FLAGS.random_seed) # Only need to restore the 1st step and reuse it since # we keep and feed in state for each step's output. decode_mdl_hps = hps._replace(dec_timesteps=1) model = seq2seq_attention_model.Seq2SeqAttentionModel( decode_mdl_hps, cls.vocab, num_gpus=FLAGS.num_gpus) cls.decoder = seq2seq_attention_decode.BSDecoder( model, cls.batcher, hps, cls.vocab) #载入模型 cls.sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) ckpt_state = tf.train.get_checkpoint_state(FLAGS.log_root) if not (ckpt_state and ckpt_state.model_checkpoint_path): print('No model to decode yet at %s' % (FLAGS.log_root)) tf.logging.info('checkpoint path %s', ckpt_state.model_checkpoint_path) ckpt_path = os.path.join( FLAGS.log_root, os.path.basename(ckpt_state.model_checkpoint_path)) tf.logging.info('renamed checkpoint path %s', ckpt_path) cls.decoder._saver.restore(cls.sess, ckpt_path)
def main(unused_argv): vocab = wash_data.Vocab(FLAGS.vocab_path, 1000000) hps = seq2seq_attention_model.HParams( mode=FLAGS.mode, # train, eval, decode min_lr=0.0001, # min learning rate. lr=0.001, # learning rate batch_size=FLAGS.batch_size, enc_layers=1, # the number of RNN layer in encoder when train enc_timesteps=1500,#encode输入维度 dec_timesteps=40,#decode输入维度 min_input_len=1, # discard articles/summaries < than this num_hidden=128, # for rnn cell LSTM的隐藏维度 emb_dim=256, # If 0, don't use embedding,vocab的嵌入维度 max_grad_norm=2, # Gradient intercept ratio num_softmax_samples=4096) # If 0, no sampled softmax. batcher = batch_reader.Batcher( FLAGS.articleData_path,FLAGS.summaryData_path,FLAGS.decodeData_path, vocab, hps,FLAGS.max_article_sentences, FLAGS.max_summary_sentences, bucketing=FLAGS.use_bucketing, truncate_input=FLAGS.truncate_input,epoch = FLAGS.epoch) tf.set_random_seed(FLAGS.random_seed) if hps.mode == 'train': model = seq2seq_attention_model.Seq2SeqAttentionModel( hps, vocab,num_gpus=FLAGS.num_gpus) _Train(model, batcher) elif hps.mode == 'eval': model = seq2seq_attention_model.Seq2SeqAttentionModel( hps, vocab,num_gpus=FLAGS.num_gpus) _Eval(model, batcher, vocab=vocab) elif hps.mode == 'decode': print("decode begin") decode_mdl_hps = hps # Only need to restore the 1st step and reuse it since # we keep and feed in state for each step's output. decode_mdl_hps = hps._replace(dec_timesteps=1) model = seq2seq_attention_model.Seq2SeqAttentionModel( decode_mdl_hps, vocab,num_gpus=FLAGS.num_gpus) decoder = seq2seq_attention_decode.BSDecoder(model, batcher, hps, vocab) decoder.DecodeLoop(choose = FLAGS.choose)
def main(unused_argv): hps = seq2seq_attention_model.HParams( mode=FLAGS.mode, min_lr=0.01, # min learning rate. lr=0.15, # learning rate batch_size=4, enc_layers=4, enc_timesteps=80, dec_timesteps=20, min_input_len=2, # discard articles/summaries < than this num_hidden=256, # for rnn cell emb_dim=128, # If 0, don't use embedding max_grad_norm=2, num_softmax_samples=4096, num_kw=3) # If 0, no sampled softmax. vocab = data_manager.Vocab('pubmed') batcher = data_manager.DataManager(vocab, hps) tf.set_random_seed(FLAGS.random_seed) if hps.mode == 'train': eval_hps = hps._replace(mode='eval') eval_batcher = data_manager.DataManager(vocab, eval_hps) dropout = .9 model = seq2seq_attention_model.Seq2SeqAttentionModel( hps, vocab, num_gpus=FLAGS.num_gpus) _Train(model, batcher, eval_batcher, dropout) elif hps.mode == 'eval': model = seq2seq_attention_model.Seq2SeqAttentionModel( hps, vocab, num_gpus=FLAGS.num_gpus) _Eval(model, batcher, vocab=vocab) elif hps.mode == 'decode': decode_mdl_hps = hps # Only need to restore the 1st step and reuse it since # we keep and feed in state for each step's output. decode_mdl_hps = hps._replace(dec_timesteps=1) model = seq2seq_attention_model.Seq2SeqAttentionModel( decode_mdl_hps, vocab, num_gpus=FLAGS.num_gpus) decoder = seq2seq_attention_decode.BSDecoder(model, batcher, hps, vocab) decoder.DecodeLoop()
def main(unused_argv): vocab = data.Vocab(FLAGS.vocab_path, 1000000) # Check for presence of required special tokens. assert vocab.CheckVocab(data.PAD_TOKEN) > 0 assert vocab.CheckVocab(data.UNKNOWN_TOKEN) >= 0 assert vocab.CheckVocab(data.SENTENCE_START) > 0 assert vocab.CheckVocab(data.SENTENCE_END) > 0 batch_size = 4 if FLAGS.mode == 'decode': batch_size = FLAGS.beam_size hps = seq2seq_attention_model.HParams( mode=FLAGS.mode, # train, eval, decode min_lr=0.01, # min learning rate. lr=0.15, # learning rate batch_size=batch_size, enc_layers=1, enc_timesteps=120, dec_timesteps=30, min_input_len=2, # discard articles/summaries < than this num_hidden=128, # for rnn cell emb_dim=128, # If 0, don't use embedding max_grad_norm=2, num_softmax_samples=4096) # If 0, no sampled softmax. batcher = batch_reader.Batcher(FLAGS.data_path, vocab, hps, FLAGS.article_key, FLAGS.abstract_key, FLAGS.max_article_sentences, FLAGS.max_abstract_sentences, bucketing=FLAGS.use_bucketing, truncate_input=FLAGS.truncate_input) tf.set_random_seed(FLAGS.random_seed) if hps.mode == 'train': model = seq2seq_attention_model.Seq2SeqAttentionModel( hps, vocab, num_gpus=FLAGS.num_gpus) _Train(model, batcher) elif hps.mode == 'eval': model = seq2seq_attention_model.Seq2SeqAttentionModel( hps, vocab, num_gpus=FLAGS.num_gpus) _Eval(model, batcher, vocab=vocab) elif hps.mode == 'decode': decode_mdl_hps = hps # Only need to restore the 1st step and reuse it since # we keep and feed in state for each step's output. decode_mdl_hps = hps._replace(dec_timesteps=1) model = seq2seq_attention_model.Seq2SeqAttentionModel( decode_mdl_hps, vocab, num_gpus=FLAGS.num_gpus) to_build_grapth = True p = preprocessing(FLAGS.vocab_path) # 舊的decode迴圈 # while True: # kb_input = input('> ') # if kb_input == 'c': # description_str = input('輸入description > ') # context_str = input('輸入context> ') # input_data = p.get_data(description=description_str, context=context_str) # print('輸入資料:') # pprint(input_data) # elif kb_input == 'q': # break # else: # try: # text_to_binary('yahoo_knowledge_data/decode/ver_5/dataset_ready/data_ready_' + kb_input, # 'yahoo_knowledge_data/decode/decode_data') # except: # print('預設testing data出現錯誤') # decoder = seq2seq_attention_decode.BSDecoder(model, hps, vocab, to_build_grapth) # to_build_grapth = False # decoder.DecodeLoop() # 論文用的decode迴圈 file_num = 1 while True: if file_num % 60 == 0: print('已經印60筆') break try: text_to_binary( 'yahoo_knowledge_data/decode/ver_5/dataset_ready/data_ready_' + str(file_num), 'yahoo_knowledge_data/decode/decode_data') except: print('預設testing data出現錯誤') break decoder = seq2seq_attention_decode.BSDecoder( model, hps, vocab, to_build_grapth) to_build_grapth = False decoder.DecodeLoop() print('==================', file_num, '==================') file_num += 1
def main(unused_argv): vocab = data.Vocab(FLAGS.vocab_path, 1000000) # Check for presence of required special tokens. assert vocab.CheckVocab(data.PAD_TOKEN) > 0 assert vocab.CheckVocab(data.UNKNOWN_TOKEN) >= 0 assert vocab.CheckVocab(data.SENTENCE_START) > 0 assert vocab.CheckVocab(data.SENTENCE_END) > 0 batch_size = 64 if FLAGS.mode == 'decode': batch_size = FLAGS.beam_size hps = seq2seq_attention_model.HParams( mode=FLAGS.mode, # train, eval, decode min_lr=0.01, # min learning rate. lr=0.15, # learning rate batch_size=batch_size, enc_layers=4, enc_timesteps=120, dec_timesteps=30, min_input_len=2, # discard articles/summaries < than this num_hidden=256, # for rnn cell emb_dim=128, # If 0, don't use embedding max_grad_norm=2, num_softmax_samples=4096) # If 0, no sampled softmax. eval_hps = seq2seq_attention_model.HParams( mode='eval', # train, eval, decode min_lr=0.01, # min learning rate. lr=0.15, # learning rate batch_size=batch_size, enc_layers=4, enc_timesteps=120, dec_timesteps=30, min_input_len=2, # discard articles/summaries < than this num_hidden=256, # for rnn cell emb_dim=128, # If 0, don't use embedding max_grad_norm=2, num_softmax_samples=4096) # If 0, no sampled softmax. batcher = batch_reader.Batcher( FLAGS.data_path, vocab, hps, FLAGS.article_key, FLAGS.abstract_key, FLAGS.max_article_sentences, FLAGS.max_abstract_sentences, bucketing=FLAGS.use_bucketing, truncate_input=FLAGS.truncate_input) eval_batcher = batch_reader.Batcher( FLAGS.eval_data_path, vocab, eval_hps, FLAGS.article_key, FLAGS.abstract_key, FLAGS.max_article_sentences, FLAGS.max_abstract_sentences, bucketing=FLAGS.use_bucketing, truncate_input=FLAGS.truncate_input) tf.set_random_seed(FLAGS.random_seed) if hps.mode == 'train': model = seq2seq_attention_model.Seq2SeqAttentionModel( hps, vocab, num_gpus=FLAGS.num_gpus) eval_model = seq2seq_attention_model.Seq2SeqAttentionModel( eval_hps, vocab, num_gpus=FLAGS.num_gpus ) count = 0 while count * FLAGS.eval_every_iteration < FLAGS.max_run_steps: _Train(model, batcher) eval_avg_loss = 0 # read previous loss from eval_dir (if any) try: eval_results = tf.contrib.estimator.read_eval_metrics(FLAGS.eval_dir) i = 0 for step, metrics in eval_results.items(): eval_avg_loss += metrics['running_avg_loss'] i += 1 prev_avg_loss = eval_avg_loss / i except FileNotFoundError: print("Haven't run evaluation yet.") cur_loss = _Eval(eval_model, eval_batcher, 20, vocab=vocab) if eval_avg_loss is not 0 and prev_avg_loss < cur_loss: print("Early stopping!") break count += 1 elif hps.mode == 'eval': model = seq2seq_attention_model.Seq2SeqAttentionModel( hps, vocab, num_gpus=FLAGS.num_gpus) _Eval(model, eval_batcher, vocab=vocab) elif hps.mode == 'decode': decode_mdl_hps = hps # Only need to restore the 1st step and reuse it since # we keep and feed in state for each step's output. decode_mdl_hps = hps._replace(dec_timesteps=1) model = seq2seq_attention_model.Seq2SeqAttentionModel( decode_mdl_hps, vocab, num_gpus=FLAGS.num_gpus) decoder = seq2seq_attention_decode.BSDecoder(model, batcher, hps, vocab) decoder.DecodeLoop()