Exemplo n.º 1
0
Arquivo: train.py Projeto: zxlzr/TAKG
def main(opt):
    try:
        start_time = time.time()
        train_data_loader, train_bow_loader, valid_data_loader, valid_bow_loader, \
        word2idx, idx2word, vocab, bow_dictionary = load_data_and_vocab(opt, load_train=True)
        opt.bow_vocab_size = len(bow_dictionary)
        load_data_time = time_since(start_time)
        logging.info('Time for loading the data: %.1f' % load_data_time)

        start_time = time.time()
        model = Seq2SeqModel(opt).to(opt.device)
        ntm_model = NTM(opt).to(opt.device)
        optimizer_seq2seq, optimizer_ntm, optimizer_whole = init_optimizers(
            model, ntm_model, opt)

        train_mixture.train_model(model, ntm_model, optimizer_seq2seq,
                                  optimizer_ntm, optimizer_whole,
                                  train_data_loader, valid_data_loader,
                                  bow_dictionary, train_bow_loader,
                                  valid_bow_loader, opt)

        training_time = time_since(start_time)

        logging.info('Time for training: %.1f' % training_time)

    except Exception as e:
        logging.exception("message")
    return
Exemplo n.º 2
0
def main(opt):
    try:
        start_time = time.time()
        train_data_loader, valid_data_loader, word2idx, idx2word, vocab = load_data_and_vocab(opt, load_train=True)
        load_data_time = time_since(start_time)
        logging.info('Time for loading the data: %.1f' % load_data_time)
        start_time = time.time()
        model = init_model(opt)
        optimizer_ml, optimizer_rl, criterion = init_optimizer_criterion(model, opt)
        if opt.train_ml:
            train_ml.train_model(model, optimizer_ml, optimizer_rl, criterion, train_data_loader, valid_data_loader, opt)
        else:
            train_rl.train_model(model, optimizer_ml, optimizer_rl, criterion, train_data_loader, valid_data_loader, opt)
        training_time = time_since(start_time)
        logging.info('Time for training: %.1f' % training_time)
    except Exception as e:
        logging.exception("message")
    return
Exemplo n.º 3
0
def main(opt):
    start_time = time.time()
    train_bow_loader, valid_bow_loader, word2idx, idx2word, vocab, bow_dictionary \
                                        = load_data_and_vocab(opt, load_train=True)
    opt.bow_vocab_size = len(bow_dictionary)
    load_data_time = time_since(start_time)
    logging.info('Time for loading the data: %.1f' % load_data_time)

    start_time = time.time()
    ntm_model = NTM(opt).to(opt.device)
    optimizer_ntm = init_optimizers(ntm_model, opt)

    train_model.train_model(ntm_model, optimizer_ntm, bow_dictionary,
                            train_bow_loader, valid_bow_loader, opt)

    training_time = time_since(start_time)

    logging.info('Time for training: %.1f' % training_time)

    return
Exemplo n.º 4
0
def main(opt):
    try:
        start_time = time.time()
        load_data_time = time_since(start_time)
        test_data_loader, word2idx, idx2word, vocab = load_data_and_vocab(opt, load_train=False)
        model = init_pretrained_model(opt)
        logging.info('Time for loading the data and model: %.1f' % load_data_time)
        start_time = time.time()

        predict(test_data_loader, model, opt)

        total_testing_time = time_since(start_time)
        logging.info('Time for a complete testing: %.1f' % total_testing_time)
        print('Time for a complete testing: %.1f' % total_testing_time)
        sys.stdout.flush()

    except Exception as e:
        logging.exception("message")
    return

    pass
Exemplo n.º 5
0
def main(opt):
    try:
        start_time = time.time()
        train_data_loader, valid_data_loader, word2idx, idx2word, vocab = load_data_and_vocab(
            opt, load_train=True)
        load_data_time = time_since(start_time)
        logging.info('Time for loading the data: %.1f' % load_data_time)
        start_time = time.time()
        model = init_model(opt)

        optimizer = Adam(params=filter(lambda p: p.requires_grad,
                                       model.parameters()),
                         lr=opt.learning_rate)
        train_model(model, optimizer, train_data_loader, valid_data_loader,
                    opt)

        training_time = time_since(start_time)
        logging.info('Time for training: %.1f' % training_time)
    except Exception as e:
        logging.exception("")
    return
Exemplo n.º 6
0
def main(opt):
    #print("agsnf efnghrrqthg")
    clip = 5
    start_time = time.time()
    train_data_loader, valid_data_loader, word2idx, idx2word, vocab = load_data_and_vocab(
        opt, load_train=True)
    load_data_time = time_since(start_time)
    logging.info('Time for loading the data: %.1f' % load_data_time)

    print("______________________ Data Successfully Loaded ______________")
    model = Seq2SeqModel(opt)
    if torch.cuda.is_available():
        model.load_state_dict(torch.load(opt.model_path))
        model = model.to(opt.gpuid)
    else:
        model.load_state_dict(torch.load(opt.model_path, map_location="cpu"))

    print(
        "___________________ Generator Initialised and Loaded _________________________"
    )
    generator = SequenceGenerator(model,
                                  bos_idx=opt.word2idx[pykp.io.BOS_WORD],
                                  eos_idx=opt.word2idx[pykp.io.EOS_WORD],
                                  pad_idx=opt.word2idx[pykp.io.PAD_WORD],
                                  peos_idx=opt.word2idx[pykp.io.PEOS_WORD],
                                  beam_size=1,
                                  max_sequence_length=opt.max_length,
                                  copy_attn=opt.copy_attention,
                                  coverage_attn=opt.coverage_attn,
                                  review_attn=opt.review_attn,
                                  cuda=opt.gpuid > -1)

    init_perturb_std = opt.init_perturb_std
    final_perturb_std = opt.final_perturb_std
    perturb_decay_factor = opt.perturb_decay_factor
    perturb_decay_mode = opt.perturb_decay_mode
    hidden_dim = opt.D_hidden_dim
    embedding_dim = opt.D_embedding_dim
    n_layers = opt.D_layers

    hidden_dim = opt.D_hidden_dim
    embedding_dim = opt.D_embedding_dim
    n_layers = opt.D_layers
    D_model = Discriminator(opt.vocab_size, embedding_dim, hidden_dim,
                            n_layers, opt.word2idx[pykp.io.PAD_WORD])
    print("The Discriminator Description is ", D_model)

    PG_optimizer = torch.optim.Adagrad(model.parameters(),
                                       opt.learning_rate_rl)
    if torch.cuda.is_available():
        D_model.load_state_dict(torch.load(opt.Discriminator_model_path))
        D_model = D_model.to(opt.gpuid)
    else:
        D_model.load_state_dict(
            torch.load(opt.Discriminator_model_path, map_location="cpu"))

    # D_model.load_state_dict(torch.load("Discriminator_checkpts/D_model_combined1.pth.tar"))
    total_epochs = opt.epochs
    for epoch in range(total_epochs):

        total_batch = 0
        print("Starting with epoch:", epoch)
        for batch_i, batch in enumerate(train_data_loader):

            model.train()
            PG_optimizer.zero_grad()

            if perturb_decay_mode == 0:  # do not decay
                perturb_std = init_perturb_std
            elif perturb_decay_mode == 1:  # exponential decay
                perturb_std = final_perturb_std + (
                    init_perturb_std - final_perturb_std) * math.exp(
                        -1. * total_batch * perturb_decay_factor)
            elif perturb_decay_mode == 2:  # steps decay
                perturb_std = init_perturb_std * math.pow(
                    perturb_decay_factor, math.floor((1 + total_batch) / 4000))

            avg_rewards = train_one_batch(D_model, batch, generator, opt,
                                          perturb_std)

            torch.nn.utils.clip_grad_norm_(model.parameters(), clip)
            avg_rewards.backward()
            PG_optimizer.step()

            if batch_i % 4000 == 0:
                print("Saving the file ...............----------->>>>>")
                print("The avg reward is", -avg_rewards.item())
                state_dfs = model.state_dict()
                torch.save(
                    state_dfs, "RL_Checkpoints/Attention_Generator_" +
                    str(epoch) + ".pth.tar")
Exemplo n.º 7
0
def main():
    #print("agsnf efnghrrqthg")
    print("dfsgf")
    clip = 5
    start_time = time.time()
    train_data_loader, valid_data_loader, word2idx, idx2word, vocab = load_data_and_vocab(
        opt, load_train=True)
    load_data_time = time_since(start_time)
    logging.info('Time for loading the data: %.1f' % load_data_time)

    model = Seq2SeqModel(opt)
    #model = model.device()
    #print("The Device is",opt.gpuid)
    model = model.to("cuda:2")

    #model.load_state_dict(torch.load("model/kp20k.ml.one2many.cat.copy.bi-directional.20190704-170553/kp20k.ml.one2many.cat.copy.bi-directional.epoch=2.batch=264.total_batch=8000.model"))
    # model.load_state_dict(torch.load("Checkpoint_individual_3.pth.tar"))
    model.load_state_dict(
        torch.load(
            "model/kp20k.ml.one2many.cat.copy.bi-directional.20190715-132016/kp20k.ml.one2many.cat.copy.bi-directional.epoch=3.batch=26098.total_batch=108000.model"
        ))
    generator = SequenceGenerator(model,
                                  bos_idx=opt.word2idx[pykp.io.BOS_WORD],
                                  eos_idx=opt.word2idx[pykp.io.EOS_WORD],
                                  pad_idx=opt.word2idx[pykp.io.PAD_WORD],
                                  peos_idx=opt.word2idx[pykp.io.PEOS_WORD],
                                  beam_size=1,
                                  max_sequence_length=opt.max_length,
                                  copy_attn=opt.copy_attention,
                                  coverage_attn=opt.coverage_attn,
                                  review_attn=opt.review_attn,
                                  cuda=opt.gpuid > -1)

    init_perturb_std = opt.init_perturb_std
    final_perturb_std = opt.final_perturb_std
    perturb_decay_factor = opt.perturb_decay_factor
    perturb_decay_mode = opt.perturb_decay_mode

    D_model = Discriminator(opt.vocab_size, embedding_dim, hidden_dim,
                            n_layers, opt.word2idx[pykp.io.PAD_WORD])

    # D_model.load_state_dict(torch.load("Discriminator_checkpts/Checkpoint_Individual_Training_4.pth.tar"))

    PG_optimizer = torch.optim.Adagrad(model.parameters(), 0.00005)

    print("The Discriminator statistics are ", D_model)

    if torch.cuda.is_available():
        D_model = D_model.to("cuda:1")

    total_epochs = 5
    for epoch in range(total_epochs):

        total_batch = 0
        print("Starting with epoch:", epoch)
        for batch_i, batch in enumerate(valid_data_loader):
            total_batch += 1

            PG_optimizer.zero_grad()

            if perturb_decay_mode == 0:  # do not decay
                perturb_std = init_perturb_std
            elif perturb_decay_mode == 1:  # exponential decay
                perturb_std = final_perturb_std + (
                    init_perturb_std - final_perturb_std) * math.exp(
                        -1. * total_batch * perturb_decay_factor)
            elif perturb_decay_mode == 2:  # steps decay
                perturb_std = init_perturb_std * math.pow(
                    perturb_decay_factor, math.floor((1 + total_batch) / 4000))

            avg_rewards = train_one_batch(D_model, batch, generator, opt,
                                          perturb_std)

            avg_rewards.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), clip)

            PG_optimizer.step()

            if batch_i % 15 == 0:
                print("The avg reward is", -avg_rewards.item())
            if batch_i % 100 == 0:
                print("Saving the file ...............----------->>>>>")
                print("The avg reward is", -avg_rewards.item())
                state_dfs = model.state_dict()
                torch.save(
                    state_dfs, "RL_Checkpoints/Checkpoint_SeqGAN_" +
                    str(epoch) + ".pth.tar")

        print("Saving the file ...............----------->>>>>")
        state_dfs = model.state_dict()
        torch.save(
            state_dfs,
            "RL_Checkpoints/Checkpoint_SeqGAN_" + str(epoch) + ".pth.tar")
Exemplo n.º 8
0
def main(opt):
    clip = 5
    start_time = time.time()
    train_data_loader, valid_data_loader, word2idx, idx2word, vocab = load_data_and_vocab(
        opt, load_train=True)
    load_data_time = time_since(start_time)
    logging.info('Time for loading the data: %.1f' % load_data_time)

    print(
        "Data Successfully Loaded __.__.__.__.__.__.__.__.__.__.__.__.__.__.")
    model = Seq2SeqModel(opt)

    ##    if torch.cuda.is_available():
    if torch.cuda.is_available():
        model.load_state_dict(torch.load(opt.model_path))
        model = model.to(opt.gpuid)
    else:
        model.load_state_dict(torch.load(opt.model_path, map_location="cpu"))

    print(
        "___________________ Generator Initialised and Loaded _________________________"
    )
    generator = SequenceGenerator(model,
                                  bos_idx=opt.word2idx[pykp.io.BOS_WORD],
                                  eos_idx=opt.word2idx[pykp.io.EOS_WORD],
                                  pad_idx=opt.word2idx[pykp.io.PAD_WORD],
                                  peos_idx=opt.word2idx[pykp.io.PEOS_WORD],
                                  beam_size=1,
                                  max_sequence_length=opt.max_length,
                                  copy_attn=opt.copy_attention,
                                  coverage_attn=opt.coverage_attn,
                                  review_attn=opt.review_attn,
                                  cuda=opt.gpuid > -1)

    init_perturb_std = opt.init_perturb_std
    final_perturb_std = opt.final_perturb_std
    perturb_decay_factor = opt.perturb_decay_factor
    perturb_decay_mode = opt.perturb_decay_mode
    hidden_dim = opt.D_hidden_dim
    embedding_dim = opt.D_embedding_dim
    n_layers = opt.D_layers
    if torch.cuda.is_available():
        D_model = Discriminator(opt.vocab_size, embedding_dim, hidden_dim,
                                n_layers, opt.word2idx[pykp.io.PAD_WORD],
                                opt.gpuid)
    else:
        D_model = Discriminator(opt.vocab_size, embedding_dim, hidden_dim,
                                n_layers, opt.word2idx[pykp.io.PAD_WORD],
                                "cpu")
    print("The Discriminator Description is ", D_model)
    if opt.pretrained_Discriminator:
        if torch.cuda.is_available():
            D_model.load_state_dict(torch.load(opt.Discriminator_model_path))
            D_model = D_model.to(opt.gpuid)
        else:
            D_model.load_state_dict(
                torch.load(opt.Discriminator_model_path, map_location="cpu"))
    else:
        if torch.cuda.is_available():
            D_model = D_model.to(opt.gpuid)
        else:
            D_model.load_state_dict(
                torch.load(opt.Discriminator_model_path, map_location="cpu"))
    D_optimizer = torch.optim.Adam(D_model.parameters(), opt.learning_rate)
    print("Beginning with training Discriminator")
    print(
        "########################################################################################################"
    )
    total_epochs = 5
    for epoch in range(total_epochs):
        total_batch = 0
        print("Starting with epoch:", epoch)
        for batch_i, batch in enumerate(train_data_loader):
            best_valid_loss = 1000
            D_model.train()
            D_optimizer.zero_grad()

            if perturb_decay_mode == 0:  # do not decay
                perturb_std = init_perturb_std
            elif perturb_decay_mode == 1:  # exponential decay
                perturb_std = final_perturb_std + (
                    init_perturb_std - final_perturb_std) * math.exp(
                        -1. * total_batch * perturb_decay_factor)
            elif perturb_decay_mode == 2:  # steps decay
                perturb_std = init_perturb_std * math.pow(
                    perturb_decay_factor, math.floor((1 + total_batch) / 4000))
            avg_batch_loss, _, _ = train_one_batch(D_model, batch, generator,
                                                   opt, perturb_std)
            torch.nn.utils.clip_grad_norm_(D_model.parameters(), clip)
            avg_batch_loss.backward()

            D_optimizer.step()
            D_model.eval()

            if batch_i % 4000 == 0:
                total = 0
                valid_loss_total, valid_real_total, valid_fake_total = 0, 0, 0
                for batch_j, valid_batch in enumerate(valid_data_loader):
                    total += 1
                    valid_loss, valid_real, valid_fake = train_one_batch(
                        D_model, valid_batch, generator, opt, perturb_std)
                    valid_loss_total += valid_loss.cpu().detach().numpy()
                    valid_real_total += valid_real.cpu().detach().numpy()
                    valid_fake_total += valid_fake.cpu().detach().numpy()
                    D_optimizer.zero_grad()

                print("Currently loss is ", valid_loss_total.item() / total)
                print("Currently real loss is ",
                      valid_real_total.item() / total)
                print("Currently fake loss is ",
                      valid_fake_total.item() / total)

                if best_valid_loss > valid_loss_total.item() / total:
                    print(
                        "Loss Decreases so saving the file ...............----------->>>>>"
                    )
                    state_dfs = D_model.state_dict()
                    torch.save(
                        state_dfs,
                        "Discriminator_checkpts/Attention_Disriminator_" +
                        str(epoch) + ".pth.tar")
                    best_valid_loss = valid_loss_total.item() / total
def main():
    #print("agsnf efnghrrqthg")
    clip = 5
    start_time = time.time()
    train_data_loader, valid_data_loader, word2idx, idx2word, vocab = load_data_and_vocab(
        opt, load_train=True)
    load_data_time = time_since(start_time)
    print(idx2word[5])
    logging.info('Time for loading the data: %.1f' % load_data_time)

    model = Seq2SeqModel(opt)
    #model = model.device()
    #print("The Device is",opt.gpuid)
    #model = model.to(devices)
    model = model.to(devices)

    # model.load_state_dict(torch.load("model/kp20k.ml.one2many.cat.copy.bi-directional.20190628-114655/kp20k.ml.one2many.cat.copy.bi-directional.epoch=2.batch=54573.total_batch=116000.model"))
    model.load_state_dict(
        torch.load(
            "model/kp20k.ml.one2many.cat.copy.bi-directional.20190715-132016/kp20k.ml.one2many.cat.copy.bi-directional.epoch=3.batch=26098.total_batch=108000.model"
        ))
    generator = SequenceGenerator(model,
                                  bos_idx=opt.word2idx[pykp.io.BOS_WORD],
                                  eos_idx=opt.word2idx[pykp.io.EOS_WORD],
                                  pad_idx=opt.word2idx[pykp.io.PAD_WORD],
                                  peos_idx=opt.word2idx[pykp.io.PEOS_WORD],
                                  beam_size=1,
                                  max_sequence_length=opt.max_length,
                                  copy_attn=opt.copy_attention,
                                  coverage_attn=opt.coverage_attn,
                                  review_attn=opt.review_attn,
                                  cuda=opt.gpuid > -1)

    init_perturb_std = opt.init_perturb_std
    final_perturb_std = opt.final_perturb_std
    perturb_decay_factor = opt.perturb_decay_factor
    perturb_decay_mode = opt.perturb_decay_mode

    D_model = Discriminator(opt.vocab_size, embedding_dim, hidden_dim,
                            n_layers, opt.word2idx[pykp.io.PAD_WORD])

    print("The Discriminator statistics are ", D_model)

    if torch.cuda.is_available():
        D_model = D_model.to(devices)

    D_model.train()

    D_optimizer = torch.optim.Adam(D_model.parameters(), lr=0.001)

    print("gdsf")
    total_epochs = 5
    for epoch in range(total_epochs):

        total_batch = 0
        print("Starting with epoch:", epoch)
        for batch_i, batch in enumerate(train_data_loader):
            total_batch += 1
            D_optimizer.zero_grad()

            if perturb_decay_mode == 0:  # do not decay
                perturb_std = init_perturb_std
            elif perturb_decay_mode == 1:  # exponential decay
                perturb_std = final_perturb_std + (
                    init_perturb_std - final_perturb_std) * math.exp(
                        -1. * total_batch * perturb_decay_factor)
            elif perturb_decay_mode == 2:  # steps decay
                perturb_std = init_perturb_std * math.pow(
                    perturb_decay_factor, math.floor((1 + total_batch) / 4000))

            avg_batch_loss, real_r, fake_r = train_one_batch(
                D_model, batch, generator, opt, perturb_std)
            #            print("Currently loss is",avg_batch_loss.item())
            #            print("Currently real loss is",real_r.item())
            #            print("Currently fake loss is",fake_r.item())
            #            state_dfs = D_model.state_dict()
            #            torch.save(state_dfs,"Checkpoint_" + str(epoch) + ".pth.tar")
            #

            if batch_i % 350 == 0:
                print("Currently loss is", avg_batch_loss.item())
                print("Currently real loss is", real_r.item())
                print("Currently fake loss is", fake_r.item())

                print("Saving the file ...............----------->>>>>")
                state_dfs = D_model.state_dict()
                torch.save(
                    state_dfs, "Discriminator_checkpts/D_model_combined" +
                    str(epoch) + ".pth.tar")

            torch.nn.utils.clip_grad_norm_(D_model.parameters(), clip)
            avg_batch_loss.backward()
            D_optimizer.step()
            #sys.exit()

            #sys.exit()

        print("Saving the file ...............----------->>>>>")
        state_dfs = D_model.state_dict()
        torch.save(
            state_dfs, "Discriminator_checkpts/D_model_combined" + str(epoch) +
            ".pth.tar")
from pykp.model import Seq2SeqModel
from torch.optim import Adam
import pykp
from pykp.model import Seq2SeqModel
import train_ml
import train_rl

from utils.time_log import time_since
from utils.data_loader import load_data_and_vocab
from utils.string_helper import convert_list_to_kphs
import time
import numpy as np
import random
from torch import device 
from Discriminator_Softmax import Discriminator

#####################################################################################################
opt = argparse.Namespace(attn_mode='concat', baseline='self', batch_size=32, batch_workers=4, bidirectional=True, bridge='copy', checkpoint_interval=4000, copy_attention=True, copy_input_feeding=False, coverage_attn=False, coverage_loss=False, custom_data_filename_suffix=False, custom_vocab_filename_suffix=False, data='data/kp20k_separated/', data_filename_suffix='', dec_layers=1, decay_method='', decoder_size=300, decoder_type='rnn', delimiter_type=0, delimiter_word='<sep>', device=device(type='cuda', index=2), disable_early_stop_rl=False, dropout=0.1, dynamic_dict=True, early_stop_tolerance=4, enc_layers=1, encoder_size=150, encoder_type='rnn', epochs=20, exp='kp20k.rl.one2many.cat.copy.bi-directional', exp_path='exp/kp20k.rl.one2many.cat.copy.bi-directional.20190701-192604', final_perturb_std=0, fix_word_vecs_dec=False, fix_word_vecs_enc=False, goal_vector_mode=0, goal_vector_size=16, gpuid=1, init_perturb_std=0, input_feeding=False, lambda_coverage=1, lambda_orthogonal=0.03, lambda_target_encoder=0.03, learning_rate=0.001, learning_rate_decay=0.5, learning_rate_decay_rl=False, learning_rate_rl=5e-05, loss_normalization='tokens', manager_mode=1, match_type='exact', max_grad_norm=1, max_length=60, max_sample_length=6, max_unk_words=1000, mc_rollouts=False, model_path='model/kp20k.rl.one2many.cat.copy.bi-directional.20190701-192604', must_teacher_forcing=False, num_predictions=1, num_rollouts=3, one2many=True, one2many_mode=1, optim='adam', orthogonal_loss=False, param_init=0.1, perturb_baseline=False, perturb_decay_factor=0.0001, perturb_decay_mode=1, pre_word_vecs_dec=None, pre_word_vecs_enc=None, pretrained_model='model/kp20k.ml.one2many.cat.copy.bi-directional.20190628-114655/kp20k.ml.one2many.cat.copy.bi-directional.epoch=2.batch=54573.total_batch=116000.model', regularization_factor=0.0, regularization_type=0, remove_src_eos=False, replace_unk=True, report_every=10, review_attn=False, reward_shaping=False, reward_type=7, save_model='model', scheduled_sampling=False, scheduled_sampling_batches=10000, seed=9527, separate_present_absent=True, share_embeddings=True, source_representation_queue_size=128, source_representation_sample_size=32, start_checkpoint_at=2, start_decay_at=8, start_epoch=1, target_encoder_size=64, teacher_forcing_ratio=0, timemark='20190701-192604', title_guided=False, topk='G', train_from='', train_ml=False, train_rl=True, truncated_decoder=0, use_target_encoder=False, vocab='data/kp20k_separated/', vocab_filename_suffix='', vocab_size=50002, warmup_steps=4000, word_vec_size=100, words_min_frequency=0)


hidden_dim = 150
embedding_dim = 200
n_layers = 2 
clip = 5 

def main():
      clip = 5
    start_time = time.time()
    train_data_loader, valid_data_loader, word2idx, idx2word, vocab = load_data_and_vocab(opt, load_train=True)
    load_data_time = time_since(start_time)