def test(params): assert params["mode"].lower() in [ "test", "eval" ], "change training mode to 'test' or 'eval'" if params['decode_mode'] == 'beam': assert params["beam_size"] == params[ "batch_size"], "Beam size must be equal to batch_size, change the params" # GPU资源配置 config_gpu() print("Building the model ...") model = PGN(params) print("Creating the vocab ...") vocab = Vocab(params["vocab_path"], params["vocab_size"]) params['vocab_size'] = vocab.count print("Creating the checkpoint manager") checkpoint = tf.train.Checkpoint(PGN=model) checkpoint_manager = tf.train.CheckpointManager(checkpoint, pgn_checkpoint_dir, max_to_keep=5) checkpoint.restore(checkpoint_manager.latest_checkpoint) if checkpoint_manager.latest_checkpoint: print("Restored from {}".format(checkpoint_manager.latest_checkpoint)) else: print("Initializing from scratch.") print("Model restored") results = predict_result(model, params, vocab, params['result_save_path'])
def train(params): # GPU资源配置 config_gpu() # 读取vocab训练 vocab = Vocab(params["vocab_path"], params["vocab_size"]) params['vocab_size'] = vocab.count # 构建模型 print("Building the model ...") model = Seq2Seq(params, vocab) # 获取保存管理者 checkpoint = tf.train.Checkpoint(Seq2Seq=model) checkpoint_manager = tf.train.CheckpointManager(checkpoint, params['checkpoint_dir'], max_to_keep=5) # 训练模型 train_model(model, vocab, params, checkpoint_manager)
def test(params): assert params["mode"].lower() in [ "test", "eval" ], "change training mode to 'test' or 'eval'" assert params["beam_size"] == params[ "batch_size"], "Beam size must be equal to batch_size, change the params" # GPU资源配置 config_gpu() print("Creating the vocab ...") vocab = Vocab(params["vocab_path"], params["vocab_size"]) params['vocab_size'] = vocab.count print("Building the model ...") model = Seq2Seq(params, vocab) print("Creating the checkpoint manager") checkpoint = tf.train.Checkpoint(Seq2Seq=model) checkpoint_manager = tf.train.CheckpointManager(checkpoint, seq2seq_checkpoint_dir, max_to_keep=5) checkpoint.restore(checkpoint_manager.latest_checkpoint) # checkpoint.restore('../../data/checkpoints/training_checkpoints_seq2seq/ckpt-6') if checkpoint_manager.latest_checkpoint: print("Restored from {}".format(checkpoint_manager.latest_checkpoint)) else: print("Initializing from scratch.") print("Model restored") if params['greedy_decode']: print('Using greedy search to decoding ...') predict_result(model, params, vocab) else: print('Using beam search to decoding ...') b = beam_test_batch_generator(params["beam_size"]) results = [] for batch in b: best_hyp = beam_decode(model, batch, vocab, params) results.append(best_hyp.abstract) get_rouge(results) print('save result to :{}'.format(params['result_save_path']))
def test(params): assert params["mode"].lower() in [ "test", "eval" ], "change training mode to 'test' or 'eval'" assert params["beam_size"] == params[ "batch_size"], "Beam size must be equal to batch_size, change the params" # GPU资源配置 config_gpu(use_cpu=True) print("Building the model ...") model = Seq2Seq(params) print("Creating the vocab ...") vocab = Vocab(params["vocab_path"], params["vocab_size"]) print("Creating the checkpoint manager") checkpoint = tf.train.Checkpoint(Seq2Seq=model) checkpoint_manager = tf.train.CheckpointManager(checkpoint, checkpoint_dir, max_to_keep=5) #获取最后一次保存的模型 checkpoint.restore(checkpoint_manager.latest_checkpoint) if checkpoint_manager.latest_checkpoint: print("Restored from {}".format(checkpoint_manager.latest_checkpoint)) else: print("Initializing from scratch.") print("Model restored") if params['greedy_decode']: # 贪心算法预测 predict_result(model, params, vocab, params['result_save_path']) else: #beam search预测 b = beam_test_batch_generator(params["beam_size"]) results = [] for batch in b: best_hyp = beam_decode(model, batch, vocab, params) results.append(best_hyp.abstract) save_predict_result(results, params['result_save_path']) print('save result to :{}'.format(params['result_save_path']))
def train(params): # GPU资源配置 config_gpu() # 读取vocab训练 print("Building the model ...") vocab = Vocab(params["vocab_path"], params["vocab_size"]) params['vocab_size'] = vocab.count # 构建模型 print("Building the model ...") model = PGN(params) print("Creating the batcher ...") train_dataset, params['train_steps_per_epoch'] = batcher(vocab, params) params["mode"] = 'val' val_dataset, params['val_steps_per_epoch'] = batcher(vocab, params) params["mode"] = 'train' # 获取保存管理者 print("Creating the checkpoint manager") checkpoint = tf.train.Checkpoint(PGN=model) checkpoint_manager = tf.train.CheckpointManager(checkpoint, params['checkpoint_dir'], max_to_keep=5) checkpoint.restore(checkpoint_manager.latest_checkpoint) if checkpoint_manager.latest_checkpoint: print("Restored from {}".format(checkpoint_manager.latest_checkpoint)) params["trained_epoch"] = int(checkpoint_manager.latest_checkpoint[-1]) else: print("Initializing from scratch.") params["trained_epoch"] = 1 # 学习率衰减 params["learning_rate"] *= np.power(0.95, params["trained_epoch"]) print('learning_rate:{}'.format(params["learning_rate"])) # 训练模型 print("Starting the training ...") train_model(model, train_dataset, val_dataset, params, checkpoint_manager)
#--------------------------------------------- # pred, dec_hidden, _ = decoder(dec_input, dec_hidden, enc_output) pred, _, _ = self.decoder(dec_input, dec_hidden, enc_output) #--------------------------------------------- dec_input = tf.expand_dims(dec_target[:, t], 1) predictions.append(pred) return tf.stack(predictions, 1), dec_hidden if __name__ == '__main__': # GPU资源配置 config_gpu() # 获得参数 params = get_params() # 读取vocab训练 vocab = Vocab(params["vocab_path"], params["vocab_size"]) # 计算vocab size input_sequence_len = 200 params = { "vocab_size": vocab.count, "embed_size": 500, "enc_units": 512, "attn_units": 512, "dec_units": 512, "batch_size": 128, "input_sequence_len": input_sequence_len
def main(): parser = argparse.ArgumentParser() # 模型参数 parser.add_argument("--max_enc_len", default=200, help="Encoder input max sequence length", type=int) parser.add_argument("--max_dec_len", default=40, help="Decoder input max sequence length", type=int) parser.add_argument( "--max_dec_steps", default=100, help="maximum number of words of the predicted abstract", type=int) parser.add_argument( "--min_dec_steps", default=5, help="Minimum number of words of the predicted abstract", type=int) parser.add_argument("--batch_size", default=32, help="batch size", type=int) parser.add_argument("--buffer_size", default=10, help="buffer size", type=int) parser.add_argument( "--beam_size", default=3, help= "beam size for beam search decoding (must be equal to batch size in decode mode)", type=int) parser.add_argument("--vocab_size", default=10000, help="Vocabulary size", type=int) parser.add_argument("--embed_size", default=256, help="Words embeddings dimension", type=int) parser.add_argument("--enc_units", default=256, help="Encoder GRU cell units number", type=int) parser.add_argument("--dec_units", default=256, help="Decoder GRU cell units number", type=int) parser.add_argument( "--attn_units", default=256, help= "[context vector, decoder state, decoder input] feedforward result dimension - " "this result is used to compute the attention weights", type=int) parser.add_argument("--learning_rate", default=0.001, help="Learning rate", type=float) parser.add_argument( "--adagrad_init_acc", default=0.1, help= "Adagrad optimizer initial accumulator value. Please refer to the Adagrad optimizer " "API documentation on tensorflow site for more details.", type=float) parser.add_argument( "--max_grad_norm", default=0.8, help="Gradient norm above which gradients must be clipped", type=float) parser.add_argument('--eps', default=1e-12, type=float) parser.add_argument( '--cov_loss_wt', default=0.5, help='Weight of coverage loss (lambda in the paper).' ' If zero, then no incentive to minimize coverage loss.', type=float) parser.add_argument("--train_seg_x_dir", default=train_x_seg_path, help="train_seg_x_dir", type=str) parser.add_argument("--train_seg_y_dir", default=train_y_seg_path, help="train_seg_y_dir", type=str) parser.add_argument("--val_seg_x_dir", default=val_x_seg_path, help="val_x_seg_path", type=str) parser.add_argument("--val_seg_y_dir", default=val_y_seg_path, help="val_y_seg_path", type=str) parser.add_argument("--test_seg_x_dir", default=test_x_seg_path, help="train_seg_x_dir", type=str) parser.add_argument("--test_save_dir", default=save_result_dir, help="train_seg_x_dir", type=str) parser.add_argument("--checkpoint_dir", default=transformer_checkpoint_dir, help="checkpoint_dir", type=str) parser.add_argument("--transformer_model_dir", default=transformer_checkpoint_dir, help="Model folder") parser.add_argument("--model_path", help="Path to a specific model", default="", type=str) parser.add_argument("--log_file", help="File in which to redirect console outputs", default="", type=str) parser.add_argument("--epochs", default=epochs, help="train epochs", type=int) parser.add_argument("--vocab_path", default=vocab_path, help="vocab path", type=str) # others parser.add_argument("--checkpoints_save_steps", default=10, help="Save checkpoints every N steps", type=int) parser.add_argument("--max_steps", default=10000, help="Max number of iterations", type=int) parser.add_argument("--num_to_test", default=20000, help="Number of examples to test", type=int) parser.add_argument("--max_num_to_eval", default=5, help="max_num_to_eval", type=int) # transformer parser.add_argument('--d_model', default=768, type=int, help="hidden dimension of encoder/decoder") parser.add_argument('--num_blocks', default=3, type=int, help="number of encoder/decoder blocks") parser.add_argument('--num_heads', default=8, type=int, help="number of attention heads") parser.add_argument('--dff', default=1024, type=int, help="hidden dimension of feedforward layer") parser.add_argument('--dropout_rate', default=0.1, type=float) # mode parser.add_argument("--mode", default='test', help="training, eval or test options") parser.add_argument("--model", default='PGN', help="which model to be slected") parser.add_argument("--pointer_gen", default=False, help="training, eval or test options") parser.add_argument("--is_coverage", default=True, help="is_coverage") parser.add_argument("--greedy_decode", default=False, help="greedy_decoder") parser.add_argument("--transformer", default=False, help="transformer") parser.add_argument("--decode_mode", default='greedy', help="transformer") args = parser.parse_args() params = vars(args) # 配置GPU # gpus = tf.config.experimental.list_physical_devices(device_type='GPU') # if gpus: # tf.config.experimental.set_visible_devices(devices=gpus[0], device_type='GPU') config_gpu() if params["mode"] == "train": params["batch_size"] = 8 params["training"] = True train(params) elif params["mode"] == "test": params["batch_size"] = params["beam_size"] = 8 params["training"] = False params["decode_mode"] = 'greedy' # params["decode_mode"] = 'beam' params["print_info"] = True predict_result(params)