def main(opt): try: start_time = time.time() train_data_loader, train_bow_loader, valid_data_loader, valid_bow_loader, \ word2idx, idx2word, vocab, bow_dictionary = load_data_and_vocab(opt, load_train=True) opt.bow_vocab_size = len(bow_dictionary) load_data_time = time_since(start_time) logging.info('Time for loading the data: %.1f' % load_data_time) start_time = time.time() model = Seq2SeqModel(opt).to(opt.device) ntm_model = NTM(opt).to(opt.device) optimizer_seq2seq, optimizer_ntm, optimizer_whole = init_optimizers( model, ntm_model, opt) train_mixture.train_model(model, ntm_model, optimizer_seq2seq, optimizer_ntm, optimizer_whole, train_data_loader, valid_data_loader, bow_dictionary, train_bow_loader, valid_bow_loader, opt) training_time = time_since(start_time) logging.info('Time for training: %.1f' % training_time) except Exception as e: logging.exception("message") return
def main(opt): try: start_time = time.time() train_data_loader, valid_data_loader, word2idx, idx2word, vocab = load_data_and_vocab(opt, load_train=True) load_data_time = time_since(start_time) logging.info('Time for loading the data: %.1f' % load_data_time) start_time = time.time() model = init_model(opt) optimizer_ml, optimizer_rl, criterion = init_optimizer_criterion(model, opt) if opt.train_ml: train_ml.train_model(model, optimizer_ml, optimizer_rl, criterion, train_data_loader, valid_data_loader, opt) else: train_rl.train_model(model, optimizer_ml, optimizer_rl, criterion, train_data_loader, valid_data_loader, opt) training_time = time_since(start_time) logging.info('Time for training: %.1f' % training_time) except Exception as e: logging.exception("message") return
def main(opt): start_time = time.time() train_bow_loader, valid_bow_loader, word2idx, idx2word, vocab, bow_dictionary \ = load_data_and_vocab(opt, load_train=True) opt.bow_vocab_size = len(bow_dictionary) load_data_time = time_since(start_time) logging.info('Time for loading the data: %.1f' % load_data_time) start_time = time.time() ntm_model = NTM(opt).to(opt.device) optimizer_ntm = init_optimizers(ntm_model, opt) train_model.train_model(ntm_model, optimizer_ntm, bow_dictionary, train_bow_loader, valid_bow_loader, opt) training_time = time_since(start_time) logging.info('Time for training: %.1f' % training_time) return
def main(opt): try: start_time = time.time() load_data_time = time_since(start_time) test_data_loader, word2idx, idx2word, vocab = load_data_and_vocab(opt, load_train=False) model = init_pretrained_model(opt) logging.info('Time for loading the data and model: %.1f' % load_data_time) start_time = time.time() predict(test_data_loader, model, opt) total_testing_time = time_since(start_time) logging.info('Time for a complete testing: %.1f' % total_testing_time) print('Time for a complete testing: %.1f' % total_testing_time) sys.stdout.flush() except Exception as e: logging.exception("message") return pass
def main(opt): try: start_time = time.time() train_data_loader, valid_data_loader, word2idx, idx2word, vocab = load_data_and_vocab( opt, load_train=True) load_data_time = time_since(start_time) logging.info('Time for loading the data: %.1f' % load_data_time) start_time = time.time() model = init_model(opt) optimizer = Adam(params=filter(lambda p: p.requires_grad, model.parameters()), lr=opt.learning_rate) train_model(model, optimizer, train_data_loader, valid_data_loader, opt) training_time = time_since(start_time) logging.info('Time for training: %.1f' % training_time) except Exception as e: logging.exception("") return
def main(opt): #print("agsnf efnghrrqthg") clip = 5 start_time = time.time() train_data_loader, valid_data_loader, word2idx, idx2word, vocab = load_data_and_vocab( opt, load_train=True) load_data_time = time_since(start_time) logging.info('Time for loading the data: %.1f' % load_data_time) print("______________________ Data Successfully Loaded ______________") model = Seq2SeqModel(opt) if torch.cuda.is_available(): model.load_state_dict(torch.load(opt.model_path)) model = model.to(opt.gpuid) else: model.load_state_dict(torch.load(opt.model_path, map_location="cpu")) print( "___________________ Generator Initialised and Loaded _________________________" ) generator = SequenceGenerator(model, bos_idx=opt.word2idx[pykp.io.BOS_WORD], eos_idx=opt.word2idx[pykp.io.EOS_WORD], pad_idx=opt.word2idx[pykp.io.PAD_WORD], peos_idx=opt.word2idx[pykp.io.PEOS_WORD], beam_size=1, max_sequence_length=opt.max_length, copy_attn=opt.copy_attention, coverage_attn=opt.coverage_attn, review_attn=opt.review_attn, cuda=opt.gpuid > -1) init_perturb_std = opt.init_perturb_std final_perturb_std = opt.final_perturb_std perturb_decay_factor = opt.perturb_decay_factor perturb_decay_mode = opt.perturb_decay_mode hidden_dim = opt.D_hidden_dim embedding_dim = opt.D_embedding_dim n_layers = opt.D_layers hidden_dim = opt.D_hidden_dim embedding_dim = opt.D_embedding_dim n_layers = opt.D_layers D_model = Discriminator(opt.vocab_size, embedding_dim, hidden_dim, n_layers, opt.word2idx[pykp.io.PAD_WORD]) print("The Discriminator Description is ", D_model) PG_optimizer = torch.optim.Adagrad(model.parameters(), opt.learning_rate_rl) if torch.cuda.is_available(): D_model.load_state_dict(torch.load(opt.Discriminator_model_path)) D_model = D_model.to(opt.gpuid) else: D_model.load_state_dict( torch.load(opt.Discriminator_model_path, map_location="cpu")) # D_model.load_state_dict(torch.load("Discriminator_checkpts/D_model_combined1.pth.tar")) total_epochs = opt.epochs for epoch in range(total_epochs): total_batch = 0 print("Starting with epoch:", epoch) for batch_i, batch in enumerate(train_data_loader): model.train() PG_optimizer.zero_grad() if perturb_decay_mode == 0: # do not decay perturb_std = init_perturb_std elif perturb_decay_mode == 1: # exponential decay perturb_std = final_perturb_std + ( init_perturb_std - final_perturb_std) * math.exp( -1. * total_batch * perturb_decay_factor) elif perturb_decay_mode == 2: # steps decay perturb_std = init_perturb_std * math.pow( perturb_decay_factor, math.floor((1 + total_batch) / 4000)) avg_rewards = train_one_batch(D_model, batch, generator, opt, perturb_std) torch.nn.utils.clip_grad_norm_(model.parameters(), clip) avg_rewards.backward() PG_optimizer.step() if batch_i % 4000 == 0: print("Saving the file ...............----------->>>>>") print("The avg reward is", -avg_rewards.item()) state_dfs = model.state_dict() torch.save( state_dfs, "RL_Checkpoints/Attention_Generator_" + str(epoch) + ".pth.tar")
def main(): #print("agsnf efnghrrqthg") print("dfsgf") clip = 5 start_time = time.time() train_data_loader, valid_data_loader, word2idx, idx2word, vocab = load_data_and_vocab( opt, load_train=True) load_data_time = time_since(start_time) logging.info('Time for loading the data: %.1f' % load_data_time) model = Seq2SeqModel(opt) #model = model.device() #print("The Device is",opt.gpuid) model = model.to("cuda:2") #model.load_state_dict(torch.load("model/kp20k.ml.one2many.cat.copy.bi-directional.20190704-170553/kp20k.ml.one2many.cat.copy.bi-directional.epoch=2.batch=264.total_batch=8000.model")) # model.load_state_dict(torch.load("Checkpoint_individual_3.pth.tar")) model.load_state_dict( torch.load( "model/kp20k.ml.one2many.cat.copy.bi-directional.20190715-132016/kp20k.ml.one2many.cat.copy.bi-directional.epoch=3.batch=26098.total_batch=108000.model" )) generator = SequenceGenerator(model, bos_idx=opt.word2idx[pykp.io.BOS_WORD], eos_idx=opt.word2idx[pykp.io.EOS_WORD], pad_idx=opt.word2idx[pykp.io.PAD_WORD], peos_idx=opt.word2idx[pykp.io.PEOS_WORD], beam_size=1, max_sequence_length=opt.max_length, copy_attn=opt.copy_attention, coverage_attn=opt.coverage_attn, review_attn=opt.review_attn, cuda=opt.gpuid > -1) init_perturb_std = opt.init_perturb_std final_perturb_std = opt.final_perturb_std perturb_decay_factor = opt.perturb_decay_factor perturb_decay_mode = opt.perturb_decay_mode D_model = Discriminator(opt.vocab_size, embedding_dim, hidden_dim, n_layers, opt.word2idx[pykp.io.PAD_WORD]) # D_model.load_state_dict(torch.load("Discriminator_checkpts/Checkpoint_Individual_Training_4.pth.tar")) PG_optimizer = torch.optim.Adagrad(model.parameters(), 0.00005) print("The Discriminator statistics are ", D_model) if torch.cuda.is_available(): D_model = D_model.to("cuda:1") total_epochs = 5 for epoch in range(total_epochs): total_batch = 0 print("Starting with epoch:", epoch) for batch_i, batch in enumerate(valid_data_loader): total_batch += 1 PG_optimizer.zero_grad() if perturb_decay_mode == 0: # do not decay perturb_std = init_perturb_std elif perturb_decay_mode == 1: # exponential decay perturb_std = final_perturb_std + ( init_perturb_std - final_perturb_std) * math.exp( -1. * total_batch * perturb_decay_factor) elif perturb_decay_mode == 2: # steps decay perturb_std = init_perturb_std * math.pow( perturb_decay_factor, math.floor((1 + total_batch) / 4000)) avg_rewards = train_one_batch(D_model, batch, generator, opt, perturb_std) avg_rewards.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), clip) PG_optimizer.step() if batch_i % 15 == 0: print("The avg reward is", -avg_rewards.item()) if batch_i % 100 == 0: print("Saving the file ...............----------->>>>>") print("The avg reward is", -avg_rewards.item()) state_dfs = model.state_dict() torch.save( state_dfs, "RL_Checkpoints/Checkpoint_SeqGAN_" + str(epoch) + ".pth.tar") print("Saving the file ...............----------->>>>>") state_dfs = model.state_dict() torch.save( state_dfs, "RL_Checkpoints/Checkpoint_SeqGAN_" + str(epoch) + ".pth.tar")
def main(opt): clip = 5 start_time = time.time() train_data_loader, valid_data_loader, word2idx, idx2word, vocab = load_data_and_vocab( opt, load_train=True) load_data_time = time_since(start_time) logging.info('Time for loading the data: %.1f' % load_data_time) print( "Data Successfully Loaded __.__.__.__.__.__.__.__.__.__.__.__.__.__.") model = Seq2SeqModel(opt) ## if torch.cuda.is_available(): if torch.cuda.is_available(): model.load_state_dict(torch.load(opt.model_path)) model = model.to(opt.gpuid) else: model.load_state_dict(torch.load(opt.model_path, map_location="cpu")) print( "___________________ Generator Initialised and Loaded _________________________" ) generator = SequenceGenerator(model, bos_idx=opt.word2idx[pykp.io.BOS_WORD], eos_idx=opt.word2idx[pykp.io.EOS_WORD], pad_idx=opt.word2idx[pykp.io.PAD_WORD], peos_idx=opt.word2idx[pykp.io.PEOS_WORD], beam_size=1, max_sequence_length=opt.max_length, copy_attn=opt.copy_attention, coverage_attn=opt.coverage_attn, review_attn=opt.review_attn, cuda=opt.gpuid > -1) init_perturb_std = opt.init_perturb_std final_perturb_std = opt.final_perturb_std perturb_decay_factor = opt.perturb_decay_factor perturb_decay_mode = opt.perturb_decay_mode hidden_dim = opt.D_hidden_dim embedding_dim = opt.D_embedding_dim n_layers = opt.D_layers if torch.cuda.is_available(): D_model = Discriminator(opt.vocab_size, embedding_dim, hidden_dim, n_layers, opt.word2idx[pykp.io.PAD_WORD], opt.gpuid) else: D_model = Discriminator(opt.vocab_size, embedding_dim, hidden_dim, n_layers, opt.word2idx[pykp.io.PAD_WORD], "cpu") print("The Discriminator Description is ", D_model) if opt.pretrained_Discriminator: if torch.cuda.is_available(): D_model.load_state_dict(torch.load(opt.Discriminator_model_path)) D_model = D_model.to(opt.gpuid) else: D_model.load_state_dict( torch.load(opt.Discriminator_model_path, map_location="cpu")) else: if torch.cuda.is_available(): D_model = D_model.to(opt.gpuid) else: D_model.load_state_dict( torch.load(opt.Discriminator_model_path, map_location="cpu")) D_optimizer = torch.optim.Adam(D_model.parameters(), opt.learning_rate) print("Beginning with training Discriminator") print( "########################################################################################################" ) total_epochs = 5 for epoch in range(total_epochs): total_batch = 0 print("Starting with epoch:", epoch) for batch_i, batch in enumerate(train_data_loader): best_valid_loss = 1000 D_model.train() D_optimizer.zero_grad() if perturb_decay_mode == 0: # do not decay perturb_std = init_perturb_std elif perturb_decay_mode == 1: # exponential decay perturb_std = final_perturb_std + ( init_perturb_std - final_perturb_std) * math.exp( -1. * total_batch * perturb_decay_factor) elif perturb_decay_mode == 2: # steps decay perturb_std = init_perturb_std * math.pow( perturb_decay_factor, math.floor((1 + total_batch) / 4000)) avg_batch_loss, _, _ = train_one_batch(D_model, batch, generator, opt, perturb_std) torch.nn.utils.clip_grad_norm_(D_model.parameters(), clip) avg_batch_loss.backward() D_optimizer.step() D_model.eval() if batch_i % 4000 == 0: total = 0 valid_loss_total, valid_real_total, valid_fake_total = 0, 0, 0 for batch_j, valid_batch in enumerate(valid_data_loader): total += 1 valid_loss, valid_real, valid_fake = train_one_batch( D_model, valid_batch, generator, opt, perturb_std) valid_loss_total += valid_loss.cpu().detach().numpy() valid_real_total += valid_real.cpu().detach().numpy() valid_fake_total += valid_fake.cpu().detach().numpy() D_optimizer.zero_grad() print("Currently loss is ", valid_loss_total.item() / total) print("Currently real loss is ", valid_real_total.item() / total) print("Currently fake loss is ", valid_fake_total.item() / total) if best_valid_loss > valid_loss_total.item() / total: print( "Loss Decreases so saving the file ...............----------->>>>>" ) state_dfs = D_model.state_dict() torch.save( state_dfs, "Discriminator_checkpts/Attention_Disriminator_" + str(epoch) + ".pth.tar") best_valid_loss = valid_loss_total.item() / total
def main(): #print("agsnf efnghrrqthg") clip = 5 start_time = time.time() train_data_loader, valid_data_loader, word2idx, idx2word, vocab = load_data_and_vocab( opt, load_train=True) load_data_time = time_since(start_time) print(idx2word[5]) logging.info('Time for loading the data: %.1f' % load_data_time) model = Seq2SeqModel(opt) #model = model.device() #print("The Device is",opt.gpuid) #model = model.to(devices) model = model.to(devices) # model.load_state_dict(torch.load("model/kp20k.ml.one2many.cat.copy.bi-directional.20190628-114655/kp20k.ml.one2many.cat.copy.bi-directional.epoch=2.batch=54573.total_batch=116000.model")) model.load_state_dict( torch.load( "model/kp20k.ml.one2many.cat.copy.bi-directional.20190715-132016/kp20k.ml.one2many.cat.copy.bi-directional.epoch=3.batch=26098.total_batch=108000.model" )) generator = SequenceGenerator(model, bos_idx=opt.word2idx[pykp.io.BOS_WORD], eos_idx=opt.word2idx[pykp.io.EOS_WORD], pad_idx=opt.word2idx[pykp.io.PAD_WORD], peos_idx=opt.word2idx[pykp.io.PEOS_WORD], beam_size=1, max_sequence_length=opt.max_length, copy_attn=opt.copy_attention, coverage_attn=opt.coverage_attn, review_attn=opt.review_attn, cuda=opt.gpuid > -1) init_perturb_std = opt.init_perturb_std final_perturb_std = opt.final_perturb_std perturb_decay_factor = opt.perturb_decay_factor perturb_decay_mode = opt.perturb_decay_mode D_model = Discriminator(opt.vocab_size, embedding_dim, hidden_dim, n_layers, opt.word2idx[pykp.io.PAD_WORD]) print("The Discriminator statistics are ", D_model) if torch.cuda.is_available(): D_model = D_model.to(devices) D_model.train() D_optimizer = torch.optim.Adam(D_model.parameters(), lr=0.001) print("gdsf") total_epochs = 5 for epoch in range(total_epochs): total_batch = 0 print("Starting with epoch:", epoch) for batch_i, batch in enumerate(train_data_loader): total_batch += 1 D_optimizer.zero_grad() if perturb_decay_mode == 0: # do not decay perturb_std = init_perturb_std elif perturb_decay_mode == 1: # exponential decay perturb_std = final_perturb_std + ( init_perturb_std - final_perturb_std) * math.exp( -1. * total_batch * perturb_decay_factor) elif perturb_decay_mode == 2: # steps decay perturb_std = init_perturb_std * math.pow( perturb_decay_factor, math.floor((1 + total_batch) / 4000)) avg_batch_loss, real_r, fake_r = train_one_batch( D_model, batch, generator, opt, perturb_std) # print("Currently loss is",avg_batch_loss.item()) # print("Currently real loss is",real_r.item()) # print("Currently fake loss is",fake_r.item()) # state_dfs = D_model.state_dict() # torch.save(state_dfs,"Checkpoint_" + str(epoch) + ".pth.tar") # if batch_i % 350 == 0: print("Currently loss is", avg_batch_loss.item()) print("Currently real loss is", real_r.item()) print("Currently fake loss is", fake_r.item()) print("Saving the file ...............----------->>>>>") state_dfs = D_model.state_dict() torch.save( state_dfs, "Discriminator_checkpts/D_model_combined" + str(epoch) + ".pth.tar") torch.nn.utils.clip_grad_norm_(D_model.parameters(), clip) avg_batch_loss.backward() D_optimizer.step() #sys.exit() #sys.exit() print("Saving the file ...............----------->>>>>") state_dfs = D_model.state_dict() torch.save( state_dfs, "Discriminator_checkpts/D_model_combined" + str(epoch) + ".pth.tar")
from pykp.model import Seq2SeqModel from torch.optim import Adam import pykp from pykp.model import Seq2SeqModel import train_ml import train_rl from utils.time_log import time_since from utils.data_loader import load_data_and_vocab from utils.string_helper import convert_list_to_kphs import time import numpy as np import random from torch import device from Discriminator_Softmax import Discriminator ##################################################################################################### opt = argparse.Namespace(attn_mode='concat', baseline='self', batch_size=32, batch_workers=4, bidirectional=True, bridge='copy', checkpoint_interval=4000, copy_attention=True, copy_input_feeding=False, coverage_attn=False, coverage_loss=False, custom_data_filename_suffix=False, custom_vocab_filename_suffix=False, data='data/kp20k_separated/', data_filename_suffix='', dec_layers=1, decay_method='', decoder_size=300, decoder_type='rnn', delimiter_type=0, delimiter_word='<sep>', device=device(type='cuda', index=2), disable_early_stop_rl=False, dropout=0.1, dynamic_dict=True, early_stop_tolerance=4, enc_layers=1, encoder_size=150, encoder_type='rnn', epochs=20, exp='kp20k.rl.one2many.cat.copy.bi-directional', exp_path='exp/kp20k.rl.one2many.cat.copy.bi-directional.20190701-192604', final_perturb_std=0, fix_word_vecs_dec=False, fix_word_vecs_enc=False, goal_vector_mode=0, goal_vector_size=16, gpuid=1, init_perturb_std=0, input_feeding=False, lambda_coverage=1, lambda_orthogonal=0.03, lambda_target_encoder=0.03, learning_rate=0.001, learning_rate_decay=0.5, learning_rate_decay_rl=False, learning_rate_rl=5e-05, loss_normalization='tokens', manager_mode=1, match_type='exact', max_grad_norm=1, max_length=60, max_sample_length=6, max_unk_words=1000, mc_rollouts=False, model_path='model/kp20k.rl.one2many.cat.copy.bi-directional.20190701-192604', must_teacher_forcing=False, num_predictions=1, num_rollouts=3, one2many=True, one2many_mode=1, optim='adam', orthogonal_loss=False, param_init=0.1, perturb_baseline=False, perturb_decay_factor=0.0001, perturb_decay_mode=1, pre_word_vecs_dec=None, pre_word_vecs_enc=None, pretrained_model='model/kp20k.ml.one2many.cat.copy.bi-directional.20190628-114655/kp20k.ml.one2many.cat.copy.bi-directional.epoch=2.batch=54573.total_batch=116000.model', regularization_factor=0.0, regularization_type=0, remove_src_eos=False, replace_unk=True, report_every=10, review_attn=False, reward_shaping=False, reward_type=7, save_model='model', scheduled_sampling=False, scheduled_sampling_batches=10000, seed=9527, separate_present_absent=True, share_embeddings=True, source_representation_queue_size=128, source_representation_sample_size=32, start_checkpoint_at=2, start_decay_at=8, start_epoch=1, target_encoder_size=64, teacher_forcing_ratio=0, timemark='20190701-192604', title_guided=False, topk='G', train_from='', train_ml=False, train_rl=True, truncated_decoder=0, use_target_encoder=False, vocab='data/kp20k_separated/', vocab_filename_suffix='', vocab_size=50002, warmup_steps=4000, word_vec_size=100, words_min_frequency=0) hidden_dim = 150 embedding_dim = 200 n_layers = 2 clip = 5 def main(): clip = 5 start_time = time.time() train_data_loader, valid_data_loader, word2idx, idx2word, vocab = load_data_and_vocab(opt, load_train=True) load_data_time = time_since(start_time)