Exemplo n.º 1
0
def main():
    ## CUDA
    os.environ["CUDA_VISIBLE_DEVICES"] = str(args.cuda)

    ## Parameters
    if args.exp == "NONE":
        args.exp = args.graph_type
    args.enc_max_len = 400
    args.dec_max_len = 100
    args.vocab_limit = 50000
    exp_path = "./saved/" + args.exp + "/"
    if not os.path.exists(exp_path):
        os.makedirs(exp_path)
    model_name = args.model_name
    train_data_len = 287227
    args.data_len = 287227
    train_data_path = args.train_data
    EPOCH_STEPS = (train_data_len - 1) // args.batch_size + 1
    print(args)

    ## DataLoader
    dataloader = CNNDAILY(batch_size=args.batch_size,
                          vocab_limit=args.vocab_limit,
                          max_input_len=args.enc_max_len,
                          max_output_len=args.dec_max_len)
    params = {
        'vocab_size': len(dataloader.word2idx),
        'word2idx': dataloader.word2idx,
        'idx2word': dataloader.idx2word,
        'idx2token': dataloader.idx2word,
        'loss_type': args.loss_type,
        'graph_type': args.graph_type
    }
    print('Vocab Size:', params['vocab_size'])

    ## ModelInit
    model = VAESEQ(params)
    log_path = exp_path + "log.txt"
    LOGGER = open(log_path, "a")

    ## Session
    # load some parameters
    variables = tf.contrib.framework.get_variables_to_restore()
    # print(len(variables), end=",")
    # variables = [v for v in variables if not v.name.startswith("optimizer/transformer/trans_mlp/")]
    # for v in variables_to_resotre:
    #     print(type(v.name), v.name)
    print(len(variables))
    # end load
    saver = tf.train.Saver(variables)
    config = tf.ConfigProto(allow_soft_placement=True)
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    sess.run(tf.global_variables_initializer())

    summary_writer = tf.summary.FileWriter(exp_path, sess.graph)
    # tf.train.write_graph(sess.graph, './saved/vaeseq/', 'train.pbtxt')
    keep_on_train_flag = False
    restore_path = tf.train.latest_checkpoint(exp_path)
    if restore_path:
        keep_on_train_flag = True
        saver.restore(sess, restore_path)
        saver = tf.train.Saver()  # new saver
        last_train_step = int(restore_path.split("-")[-1]) % EPOCH_STEPS
        print("Model restore from file: %s, last train step: %d" %
              (restore_path, last_train_step))
        LOGGER.write("Model restore from file: %s, last train step: %d\n" %
                     (restore_path, last_train_step))

    # Train Mode
    x_log, y_log, t_log, log = None, None, None, None
    for epoch in range(args.num_epoch):
        # batcher = dataloader.load_data(fpath=train_data_path)
        batcher = dataloader.load_data()
        for step in tqdm(range(EPOCH_STEPS)):
            if keep_on_train_flag and step < last_train_step: continue
            if keep_on_train_flag and step == (last_train_step):
                keep_on_train_flag = False

            # get batch data
            try:
                (x_enc_inp, x_dec_inp_full, x_dec_out, y_enc_inp,
                 y_dec_inp_full,
                 y_dec_out), x_enc_inp_oovs, data_oovs, _ = next(batcher)
                x_dec_inp = dataloader.update_word_dropout(x_dec_inp_full)
                y_dec_inp = dataloader.update_word_dropout(y_dec_inp_full)
                max_oovs_len = max([len(oov) for oov in data_oovs
                                    ]) if len(data_oovs) > 0 else 0
            except StopIteration:
                print("there are no more examples")
                break

            # for _ in range(2):
            # x_log = model.train_encoder(sess, x_enc_inp, x_dec_inp, x_dec_out, y_enc_inp, y_dec_inp, y_dec_out)
            # y_log = model.train_decoder(sess, x_enc_inp, x_dec_inp, x_dec_out, y_enc_inp, y_dec_inp, y_dec_out)
            # t_log = model.train_transformer(sess, x_enc_inp, x_dec_inp, x_dec_out, y_enc_inp, y_dec_inp, y_dec_out)
            # t_log = model.merged_transformer_train(sess, x_enc_inp, x_dec_inp, x_dec_out, y_enc_inp, y_dec_inp, y_dec_out)
            # log = model.merged_train(sess, x_enc_inp, x_dec_inp, x_dec_out, y_enc_inp, y_dec_inp, y_dec_out)
            log = model.merged_seq_train(sess, x_enc_inp, x_dec_inp, x_dec_out,
                                         y_enc_inp, y_dec_inp, y_dec_out,
                                         x_enc_inp_oovs, max_oovs_len)
            # model.show_parameters(sess)

            # get the summaries and iteration number so we can write summaries to tensorboard
            train_step = summary_flush(x_log, y_log, t_log, log,
                                       summary_writer)

            if step % args.display_loss_step == 0:
                print("Step %d | [%d/%d] | [%d/%d]" %
                      (train_step, epoch + 1, args.num_epoch, step,
                       train_data_len // args.batch_size),
                      end='')
                LOGGER.write("Step %d | [%d/%d] | [%d/%d]" %
                             (train_step, epoch + 1, args.num_epoch, step,
                              train_data_len // args.batch_size))
                show_loss(x_log, y_log, t_log, log, LOGGER)

            if step % args.display_info_step == 0 and step != 0:
                args.training = False
                save_path = saver.save(sess,
                                       exp_path + model_name,
                                       global_step=train_step)
                print("Model saved in file: %s" % save_path)
                # print("============= Show Encoder ===============")
                # model.show_encoder(sess, x_enc_inp[-1], x_dec_inp[-1], LOGGER)
                # print("============= Show Decoder ===============")
                # model.show_decoder(sess, y_enc_inp[-1], y_dec_inp[-1], LOGGER)
                # print("============= Show Sample ===============")
                # for i in range(3):
                #     model.show_sample(sess, x_enc_inp[i], y_dec_out[i], LOGGER)
                LOGGER.flush()
                args.training = True
Exemplo n.º 2
0
def main():
    ## CUDA
    os.environ["CUDA_VISIBLE_DEVICES"] = "1"

    args.max_len = 400
    args.batch_size = 64
    args.max_dec_len = 100
    args.display_info_step = 10000
    args.isPointer = False
    args.vocab_limit = 50000
    test_len = 11490
    print(args)

    ## Parameters
    dataloader = CNNDAILY(batch_size=args.batch_size,
                          vocab_limit=args.vocab_limit,
                          max_input_len=args.max_len,
                          max_output_len=args.max_dec_len)
    params = {
        'vocab_size': len(dataloader.word2idx),
        'word2idx': dataloader.word2idx,
        'idx2word': dataloader.idx2word,
    }
    print('Vocab Size:', params['vocab_size'])

    ## ModelInit
    model = VRAE(params)
    exp_path = "./saved/cnndy_seq2seq/"
    model_name = "seq2seq.ckpt"

    ## Session
    saver = tf.train.Saver()
    config = tf.ConfigProto(allow_soft_placement=True)
    config.gpu_options.allow_growth = True
    with tf.Session(config=config) as sess:
        sess.run(tf.global_variables_initializer())

        restore_path = tf.train.latest_checkpoint(exp_path)
        # restore_path = "./saved/seq2seq/seq2seq.ckpt-70002"
        saver.restore(sess, restore_path)
        print("Model restore from file: %s" % (restore_path))

        # Parpear Dir
        ref_file = exp_path + "test.input.txt"
        trans_file = exp_path + "test.output.txt"
        result_file = exp_path + "test." + restore_path.split(
            "-")[-1] + ".result.txt"
        test_file = "./corpus/cnndaily/test.txt.tgt.tagged"

        # Test Dir
        dataloader.trans_in_ref(finpath=test_file, foutpath=ref_file)
        with open(trans_file, "w") as f:
            f.write("")
        print("[PAEPEAR DATASET]")

        # Test DataSet
        # batcher = dataloader._load_data(fpath=test_file)
        test_file = "./corpus/cnndaily/test"
        batcher = dataloader.load_data(fpath=test_file)
        for _ in tqdm(range((test_len - 1) // args.batch_size + 1)):
            try:
                # enc_inp, dec_inp_full, dec_out = next(batcher)
                (enc_inp, _, _, _, _,
                 _), x_enc_inp_oovs, data_oovs, _ = next(batcher)
                # dec_inp = dataloader.update_word_dropout(dec_inp_full)
            except StopIteration:
                print("there are no more examples")
                break
            model.evaluation(sess, enc_inp, trans_file)

    # Evaluation
    eval_log = {}
    for metric in ['bleu', 'rouge', 'accuracy', 'word_accuracy']:
        score = evaluation_utils.evaluate(ref_file, trans_file, metric)
        eval_log[metric] = score
        if metric == "bleu":
            print(
                "  bleu-1, bleu-2, bleu-3, bleu-4: %.5f,  %.5f,  %.5f,  %.5f" %
                score)
        elif metric == "rouge":
            print("  rouge-1, rouge-2, rouge-l: %.5f,  %.5f,  %.5f" % score)
        else:
            print("  %s: %.5f" % (metric, score))

    from measures import selfbleu
    selfbleuobj = selfbleu.SelfBleu(trans_file, 1)
    print("  selfbleu-1", 1 - selfbleuobj.get_score())
    eval_log['selfbleu-1'] = selfbleuobj.get_score()
    selfbleuobj = selfbleu.SelfBleu(trans_file, 2)
    print("  selfbleu-2", 1 - selfbleuobj.get_score())
    eval_log['selfbleu-2'] = selfbleuobj.get_score()
    selfbleuobj = selfbleu.SelfBleu(trans_file, 3)
    print("  selfbleu-3", 1 - selfbleuobj.get_score())
    eval_log['selfbleu-3'] = selfbleuobj.get_score()
    selfbleuobj = selfbleu.SelfBleu(trans_file, 4)
    print("  selfbleu-4", 1 - selfbleuobj.get_score())
    eval_log['selfbleu-4'] = selfbleuobj.get_score()

    # Record Log
    dataloader.record_result(eval_log,
                             finpath=test_file,
                             frespaht=trans_file,
                             foutpath=result_file)
Exemplo n.º 3
0
def main():
    os.environ["CUDA_VISIBLE_DEVICES"] = "1"

    args.max_len = 400
    args.batch_size = 64
    args.max_dec_len = 100
    args.display_info_step = 10000
    args.isPointer = False
    args.vocab_limit = 50000
    train_data_len = 287227
    print(args)

    dataloader = CNNDAILY(batch_size=args.batch_size, vocab_limit=args.vocab_limit, max_input_len=args.max_len, max_output_len=args.max_dec_len)
    params = {
        'vocab_size': len(dataloader.word2idx),
        'word2idx': dataloader.word2idx,
        'idx2word': dataloader.idx2word,}
    print('Vocab Size:', params['vocab_size'])

    model = VRAE(params)
    saver = tf.train.Saver()
    exp_path = "./saved/cnndy_vrnn/"
    model_name = "seq2seq.ckpt"

    config = tf.ConfigProto(allow_soft_placement=True)
    config.gpu_options.allow_growth=True
    sess = tf.Session(config=config)
    sess.run(tf.global_variables_initializer())

    EPOCH_STEPS = (train_data_len-1)//args.batch_size+1

    summary_writer = tf.summary.FileWriter(exp_path, sess.graph)
    restore_path = tf.train.latest_checkpoint(exp_path)
    if restore_path:
        saver.restore(sess, restore_path)
        last_train_step = int(restore_path.split("-")[-1]) % EPOCH_STEPS
        print("Model restore from file: %s, last train step: %d" % (restore_path, last_train_step))
    # summary_writer = tf.summary.FileWriter(exp_path)
    # saver.restore(sess, exp_path+model_name)

    for epoch in range(args.num_epoch):
        # dataloader.update_word_dropout()
        # print("\nWord Dropout")
        # dataloader.shuffle()
        # print("Data Shuffled", end='\n\n')
        batcher = dataloader.load_data()

        step = -1
        while True:
            try:
                # enc_inp, dec_inp_full, dec_out = next(dataloader.data_loader)
                (x_enc_inp, x_dec_inp_full, x_dec_out, y_enc_inp, y_dec_inp_full, y_dec_out), x_enc_inp_oovs, data_oovs, _ = next(batcher)
                # enc_inp, dec_inp_full, dec_out = dataloader.next_batch()
                enc_inp, dec_inp_full, dec_out = x_enc_inp, y_dec_inp_full, y_dec_out
                dec_inp = dataloader.update_word_dropout(dec_inp_full)
                step += 1
            except StopIteration:
                print("there are no more examples")
                break
            # print(step, "enc_inp.shape:", enc_inp.shape)
            # print(step, "dec_inp_full.shape:", dec_inp_full.shape)
            # print(step, "dec_out.shape:", dec_out.shape)

            log = model.train_session(sess, enc_inp, dec_inp, dec_out)

            # get the summaries and iteration number so we can write summaries to tensorboard
            summaries, train_step = log['summaries'], log['step']
            summary_writer.add_summary(summaries, train_step) # write the summaries
            if train_step % 100 == 0: # flush the summary writer every so often
                summary_writer.flush()

            if step % args.display_loss_step == 0:
                print("Step %d | [%d/%d] | [%d/%d]" % (log['step'], epoch+1, args.num_epoch, step, train_data_len//args.batch_size), end='')
                print(" | nll_loss:%.1f | kl_w:%.3f | kl_loss:%.2f" % (log['nll_loss'], log['kl_w'], log['kl_loss']))
        
            if step % args.display_info_step == 0:
                model.reconstruct(sess, enc_inp[-1], dec_out[-1])
                save_path = saver.save(sess, exp_path+model_name, global_step=train_step)
                print("Model saved in file: %s" % save_path)

        model.reconstruct(sess, enc_inp[-1], dec_out[-1])
        save_path = saver.save(sess, exp_path+model_name, global_step=train_step)
        print("Model saved in file: %s" % save_path)