示例#1
0
def test(params):
    assert params["mode"].lower() in [
        "test", "eval"
    ], "change training mode to 'test' or 'eval'"
    if params['decode_mode'] == 'beam':
        assert params["beam_size"] == params[
            "batch_size"], "Beam size must be equal to batch_size, change the params"
    # GPU资源配置
    config_gpu()

    print("Building the model ...")
    model = PGN(params)

    print("Creating the vocab ...")
    vocab = Vocab(params["vocab_path"], params["vocab_size"])
    params['vocab_size'] = vocab.count

    print("Creating the checkpoint manager")
    checkpoint = tf.train.Checkpoint(PGN=model)
    checkpoint_manager = tf.train.CheckpointManager(checkpoint,
                                                    pgn_checkpoint_dir,
                                                    max_to_keep=5)
    checkpoint.restore(checkpoint_manager.latest_checkpoint)
    if checkpoint_manager.latest_checkpoint:
        print("Restored from {}".format(checkpoint_manager.latest_checkpoint))
    else:
        print("Initializing from scratch.")
    print("Model restored")

    results = predict_result(model, params, vocab, params['result_save_path'])
示例#2
0
文件: train.py 项目: marchboy/summary
def train(params):
    # GPU资源配置
    config_gpu()

    # 读取vocab训练
    vocab = Vocab(params["vocab_path"], params["vocab_size"])

    params['vocab_size'] = vocab.count

    # 构建模型
    print("Building the model ...")
    model = Seq2Seq(params, vocab)

    # 获取保存管理者
    checkpoint = tf.train.Checkpoint(Seq2Seq=model)
    checkpoint_manager = tf.train.CheckpointManager(checkpoint, params['checkpoint_dir'], max_to_keep=5)

    # 训练模型
    train_model(model, vocab, params, checkpoint_manager)
示例#3
0
def test(params):
    assert params["mode"].lower() in [
        "test", "eval"
    ], "change training mode to 'test' or 'eval'"
    assert params["beam_size"] == params[
        "batch_size"], "Beam size must be equal to batch_size, change the params"
    # GPU资源配置
    config_gpu()

    print("Creating the vocab ...")
    vocab = Vocab(params["vocab_path"], params["vocab_size"])
    params['vocab_size'] = vocab.count

    print("Building the model ...")
    model = Seq2Seq(params, vocab)

    print("Creating the checkpoint manager")
    checkpoint = tf.train.Checkpoint(Seq2Seq=model)
    checkpoint_manager = tf.train.CheckpointManager(checkpoint,
                                                    seq2seq_checkpoint_dir,
                                                    max_to_keep=5)
    checkpoint.restore(checkpoint_manager.latest_checkpoint)
    # checkpoint.restore('../../data/checkpoints/training_checkpoints_seq2seq/ckpt-6')
    if checkpoint_manager.latest_checkpoint:
        print("Restored from {}".format(checkpoint_manager.latest_checkpoint))
    else:
        print("Initializing from scratch.")
    print("Model restored")

    if params['greedy_decode']:
        print('Using greedy search to decoding ...')
        predict_result(model, params, vocab)
    else:
        print('Using beam search to decoding ...')
        b = beam_test_batch_generator(params["beam_size"])
        results = []
        for batch in b:
            best_hyp = beam_decode(model, batch, vocab, params)
            results.append(best_hyp.abstract)
        get_rouge(results)
        print('save result to :{}'.format(params['result_save_path']))
示例#4
0
def test(params):
    assert params["mode"].lower() in [
        "test", "eval"
    ], "change training mode to 'test' or 'eval'"
    assert params["beam_size"] == params[
        "batch_size"], "Beam size must be equal to batch_size, change the params"
    # GPU资源配置
    config_gpu(use_cpu=True)

    print("Building the model ...")
    model = Seq2Seq(params)

    print("Creating the vocab ...")
    vocab = Vocab(params["vocab_path"], params["vocab_size"])

    print("Creating the checkpoint manager")
    checkpoint = tf.train.Checkpoint(Seq2Seq=model)
    checkpoint_manager = tf.train.CheckpointManager(checkpoint,
                                                    checkpoint_dir,
                                                    max_to_keep=5)
    #获取最后一次保存的模型
    checkpoint.restore(checkpoint_manager.latest_checkpoint)
    if checkpoint_manager.latest_checkpoint:
        print("Restored from {}".format(checkpoint_manager.latest_checkpoint))
    else:
        print("Initializing from scratch.")
    print("Model restored")

    if params['greedy_decode']:
        # 贪心算法预测
        predict_result(model, params, vocab, params['result_save_path'])
    else:
        #beam search预测
        b = beam_test_batch_generator(params["beam_size"])
        results = []
        for batch in b:
            best_hyp = beam_decode(model, batch, vocab, params)
            results.append(best_hyp.abstract)
        save_predict_result(results, params['result_save_path'])
        print('save result to :{}'.format(params['result_save_path']))
示例#5
0
def train(params):
    # GPU资源配置
    config_gpu()
    # 读取vocab训练
    print("Building the model ...")
    vocab = Vocab(params["vocab_path"], params["vocab_size"])
    params['vocab_size'] = vocab.count

    # 构建模型
    print("Building the model ...")
    model = PGN(params)

    print("Creating the batcher ...")
    train_dataset, params['train_steps_per_epoch'] = batcher(vocab, params)
    params["mode"] = 'val'
    val_dataset, params['val_steps_per_epoch'] = batcher(vocab, params)
    params["mode"] = 'train'

    # 获取保存管理者
    print("Creating the checkpoint manager")
    checkpoint = tf.train.Checkpoint(PGN=model)
    checkpoint_manager = tf.train.CheckpointManager(checkpoint,
                                                    params['checkpoint_dir'],
                                                    max_to_keep=5)
    checkpoint.restore(checkpoint_manager.latest_checkpoint)
    if checkpoint_manager.latest_checkpoint:
        print("Restored from {}".format(checkpoint_manager.latest_checkpoint))
        params["trained_epoch"] = int(checkpoint_manager.latest_checkpoint[-1])
    else:
        print("Initializing from scratch.")
        params["trained_epoch"] = 1

    # 学习率衰减
    params["learning_rate"] *= np.power(0.95, params["trained_epoch"])
    print('learning_rate:{}'.format(params["learning_rate"]))
    # 训练模型
    print("Starting the training ...")

    train_model(model, train_dataset, val_dataset, params, checkpoint_manager)
示例#6
0
            #---------------------------------------------
            # pred, dec_hidden, _ = decoder(dec_input, dec_hidden, enc_output)
            pred, _, _ = self.decoder(dec_input, dec_hidden, enc_output)

            #---------------------------------------------

            dec_input = tf.expand_dims(dec_target[:, t], 1)

            predictions.append(pred)

        return tf.stack(predictions, 1), dec_hidden


if __name__ == '__main__':
    # GPU资源配置
    config_gpu()
    # 获得参数
    params = get_params()
    # 读取vocab训练
    vocab = Vocab(params["vocab_path"], params["vocab_size"])
    # 计算vocab size
    input_sequence_len = 200

    params = {
        "vocab_size": vocab.count,
        "embed_size": 500,
        "enc_units": 512,
        "attn_units": 512,
        "dec_units": 512,
        "batch_size": 128,
        "input_sequence_len": input_sequence_len
示例#7
0
def main():
    parser = argparse.ArgumentParser()
    # 模型参数
    parser.add_argument("--max_enc_len",
                        default=200,
                        help="Encoder input max sequence length",
                        type=int)
    parser.add_argument("--max_dec_len",
                        default=40,
                        help="Decoder input max sequence length",
                        type=int)
    parser.add_argument(
        "--max_dec_steps",
        default=100,
        help="maximum number of words of the predicted abstract",
        type=int)
    parser.add_argument(
        "--min_dec_steps",
        default=5,
        help="Minimum number of words of the predicted abstract",
        type=int)
    parser.add_argument("--batch_size",
                        default=32,
                        help="batch size",
                        type=int)
    parser.add_argument("--buffer_size",
                        default=10,
                        help="buffer size",
                        type=int)
    parser.add_argument(
        "--beam_size",
        default=3,
        help=
        "beam size for beam search decoding (must be equal to batch size in decode mode)",
        type=int)
    parser.add_argument("--vocab_size",
                        default=10000,
                        help="Vocabulary size",
                        type=int)
    parser.add_argument("--embed_size",
                        default=256,
                        help="Words embeddings dimension",
                        type=int)
    parser.add_argument("--enc_units",
                        default=256,
                        help="Encoder GRU cell units number",
                        type=int)
    parser.add_argument("--dec_units",
                        default=256,
                        help="Decoder GRU cell units number",
                        type=int)
    parser.add_argument(
        "--attn_units",
        default=256,
        help=
        "[context vector, decoder state, decoder input] feedforward result dimension - "
        "this result is used to compute the attention weights",
        type=int)
    parser.add_argument("--learning_rate",
                        default=0.001,
                        help="Learning rate",
                        type=float)
    parser.add_argument(
        "--adagrad_init_acc",
        default=0.1,
        help=
        "Adagrad optimizer initial accumulator value. Please refer to the Adagrad optimizer "
        "API documentation on tensorflow site for more details.",
        type=float)
    parser.add_argument(
        "--max_grad_norm",
        default=0.8,
        help="Gradient norm above which gradients must be clipped",
        type=float)
    parser.add_argument('--eps', default=1e-12, type=float)
    parser.add_argument(
        '--cov_loss_wt',
        default=0.5,
        help='Weight of coverage loss (lambda in the paper).'
        ' If zero, then no incentive to minimize coverage loss.',
        type=float)
    parser.add_argument("--train_seg_x_dir",
                        default=train_x_seg_path,
                        help="train_seg_x_dir",
                        type=str)
    parser.add_argument("--train_seg_y_dir",
                        default=train_y_seg_path,
                        help="train_seg_y_dir",
                        type=str)

    parser.add_argument("--val_seg_x_dir",
                        default=val_x_seg_path,
                        help="val_x_seg_path",
                        type=str)
    parser.add_argument("--val_seg_y_dir",
                        default=val_y_seg_path,
                        help="val_y_seg_path",
                        type=str)

    parser.add_argument("--test_seg_x_dir",
                        default=test_x_seg_path,
                        help="train_seg_x_dir",
                        type=str)
    parser.add_argument("--test_save_dir",
                        default=save_result_dir,
                        help="train_seg_x_dir",
                        type=str)

    parser.add_argument("--checkpoint_dir",
                        default=transformer_checkpoint_dir,
                        help="checkpoint_dir",
                        type=str)
    parser.add_argument("--transformer_model_dir",
                        default=transformer_checkpoint_dir,
                        help="Model folder")
    parser.add_argument("--model_path",
                        help="Path to a specific model",
                        default="",
                        type=str)
    parser.add_argument("--log_file",
                        help="File in which to redirect console outputs",
                        default="",
                        type=str)

    parser.add_argument("--epochs",
                        default=epochs,
                        help="train epochs",
                        type=int)
    parser.add_argument("--vocab_path",
                        default=vocab_path,
                        help="vocab path",
                        type=str)
    # others
    parser.add_argument("--checkpoints_save_steps",
                        default=10,
                        help="Save checkpoints every N steps",
                        type=int)
    parser.add_argument("--max_steps",
                        default=10000,
                        help="Max number of iterations",
                        type=int)
    parser.add_argument("--num_to_test",
                        default=20000,
                        help="Number of examples to test",
                        type=int)
    parser.add_argument("--max_num_to_eval",
                        default=5,
                        help="max_num_to_eval",
                        type=int)

    # transformer
    parser.add_argument('--d_model',
                        default=768,
                        type=int,
                        help="hidden dimension of encoder/decoder")
    parser.add_argument('--num_blocks',
                        default=3,
                        type=int,
                        help="number of encoder/decoder blocks")
    parser.add_argument('--num_heads',
                        default=8,
                        type=int,
                        help="number of attention heads")
    parser.add_argument('--dff',
                        default=1024,
                        type=int,
                        help="hidden dimension of feedforward layer")
    parser.add_argument('--dropout_rate', default=0.1, type=float)

    # mode
    parser.add_argument("--mode",
                        default='test',
                        help="training, eval or test options")
    parser.add_argument("--model",
                        default='PGN',
                        help="which model to be slected")
    parser.add_argument("--pointer_gen",
                        default=False,
                        help="training, eval or test options")
    parser.add_argument("--is_coverage", default=True, help="is_coverage")
    parser.add_argument("--greedy_decode",
                        default=False,
                        help="greedy_decoder")
    parser.add_argument("--transformer", default=False, help="transformer")
    parser.add_argument("--decode_mode", default='greedy', help="transformer")
    args = parser.parse_args()
    params = vars(args)

    # 配置GPU
    # gpus = tf.config.experimental.list_physical_devices(device_type='GPU')
    # if gpus:
    #     tf.config.experimental.set_visible_devices(devices=gpus[0], device_type='GPU')
    config_gpu()

    if params["mode"] == "train":
        params["batch_size"] = 8
        params["training"] = True
        train(params)

    elif params["mode"] == "test":
        params["batch_size"] = params["beam_size"] = 8
        params["training"] = False
        params["decode_mode"] = 'greedy'
        # params["decode_mode"] = 'beam'
        params["print_info"] = True

        predict_result(params)