def load_encoder_decoder(voc, checkpoint, configs): """ Initialize encoder and decoder, and load from file if prev states exists :param voc: Vocabulary :param checkpoint: dict :param configs: dict :return: Encoder, LuongAttentionDecoderRNN """ logging.info('Building encoder and decoder ...') # Initialize word embeddings embedding = nn.Embedding(voc.num_words, configs["hidden_size"]) # Initialize encoder & decoder models encoder = EncoderRNN(configs["hidden_size"], embedding, configs["encoder_n_layers"], configs["dropout"]) decoder = LuongAttentionDecoderRNN(embedding, voc.num_words, configs) if checkpoint: voc.__dict__ = checkpoint['voc_dict'] embedding.load_state_dict(checkpoint['embedding']) encoder.load_state_dict(checkpoint['en']) decoder.load_state_dict(checkpoint['de']) logging.info('Models built and ready to go!') return encoder.to(get_device()), decoder.to(get_device())
def main(): ap = argparse.ArgumentParser() ap.add_argument('--dev_files', default='../amr_anno_1.0/data/split/dev/*', help='dev files.') ap.add_argument('--log_dir', default='./log', help='log directory') ap.add_argument('--exp_name', default='experiment', help='experiment name') args = ap.parse_args() #read dev files dev_files = glob.glob(args.dev_files) dev_pairs = AMR.read_AMR_files(dev_files, True) logdir = args.log_dir exp_dir = logdir + '/' + args.exp_name if not os.path.exists(logdir): os.makedirs(logdir) if not os.path.exists(exp_dir): os.makedirs(exp_dir) max_iter = 0 dev_bleu = 0.0 while True: load_state_file = None state_files = glob.glob(exp_dir + '/*') for sf in state_files: iter_num = int(sf.split('_')[1].split('.')[0]) if iter_num > max_iter: max_iter = iter_num load_state_file = sf if load_state_file is not None: state = torch.load(load_state_file) amr_vocab = state['amr_vocab'] en_vocab = state['en_vocab'] hidden_size = state['hidden_size'] edge_size = state['edge_size'] drop = state['dropout'] mlength = state['max_length'] logging.info('loaded checkpoint %s', load_state_file) encoder = EncoderRNN(amr_vocab.n_nodes, hidden_size).to(device) child_sum = ChildSum(amr_vocab.n_edges, edge_size, hidden_size).to(device) decoder = AttnDecoderRNN(hidden_size, en_vocab.n_words, dropout_p=drop, max_length=mlength).to(device) encoder.load_state_dict(state['enc_state']) child_sum.load_state_dict(state['sum_state']) decoder.load_state_dict(state['dec_state']) # translate from the dev set translate_random_amr(encoder, child_sum, decoder, dev_pairs, amr_vocab, en_vocab, mlength, n=10) translated_amrs = translate_amrs(encoder, child_sum, decoder, dev_pairs, amr_vocab, en_vocab, mlength) references = [[pair[0]] for pair in dev_pairs[:len(translated_amrs)]] candidates = [sent.split() for sent in translated_amrs] dev_bleu = corpus_bleu(references, candidates) logging.info('Dev BLEU score: %.2f', dev_bleu) else: logging.info('No new checkpoint found. Last DEV BLEU score: %.2f', dev_bleu) time.sleep(20)
def main(): parser = argparse.ArgumentParser("English - Lojban translation") parser.add_argument("--source", default='loj', help="source language data") parser.add_argument("--target", default='en', help="target language data") parser.add_argument("--iters", type=int, default=100000, help="number of iterations to train") parser.add_argument("--no-train", type=bool, default=False, help="Do not perform training. Only validation") parser.add_argument("--pretrain-encoder", help="Path to pretrained encoder") parser.add_argument("--pretrain-decoder", help="Path to pretrained decoder") parser.add_argument( "--pretrain-input-words", type=int, help="Number of source language words in pretrained model") parser.add_argument( "--pretrain-output-words", type=int, help="Number of target language words in pretrained model") parser.add_argument("--encoder-ckpt", default="encoder.pth", help="Name of encoder checkpoint filename") parser.add_argument("--decoder-ckpt", default="decoder.pth", help="Name of decoder checkpoint filename") parser.add_argument("--prefix", default='', help='Prefix, added to data files') args = parser.parse_args() input_lang, output_lang, pairs, pairs_val = prepare_data( args.source, args.target, prefix=args.prefix) langs = (input_lang, output_lang) print(random.choice(pairs)) input_words = args.pretrain_input_words or input_lang.n_words output_words = args.pretrain_output_words or output_lang.n_words encoder = EncoderRNN(input_lang.n_words, hidden_size).to(device) decoder = AttnDecoderRNN(hidden_size, output_lang.n_words, dropout_p=0.1).to(device) if args.pretrain_encoder and args.pretrain_decoder: load_pretrained_model(encoder, decoder, args.pretrain_encoder, args.pretrain_decoder) if not args.no_train: train(encoder, decoder, args.iters, pairs, langs, print_every=5000) torch.save(encoder.state_dict(), args.encoder_ckpt) torch.save(decoder.state_dict(), args.decoder_ckpt) evaluate_all(encoder, decoder, pairs_val, langs)
def main(): # 加载词库,加载数据集 voc = Lang('data/WORDMAP.json') print("词库数量 " + str(voc.n_words)) train_data = SaDataset('train', voc) val_data = SaDataset('valid', voc) # 初始化模型 encoder = EncoderRNN(voc.n_words, hidden_size, encoder_n_layers, dropout) # 将模型使用device进行计算,如果是gpu,则会使用显存,如果是cpu,则会使用内存 encoder = encoder.to(device) # 初始化优化器 优化器的目的是让梯度下降,手段是调整模型的参数,optim是一个pytorch的一个包,adam是一个优化算法,梯度下降 print('Building optimizers ...') ''' 需要优化的参数 学习率 ''' optimizer = optim.Adam(encoder.parameters(), lr=learning_rate) # 基础准确率 best_acc = 0 epochs_since_improvement = 0 # epochs 训练的次数 for epoch in range(0, epochs): # Decay learning rate if there is no improvement for 8 consecutive epochs, and terminate training after 20 if epochs_since_improvement == 20: break if epochs_since_improvement > 0 and epochs_since_improvement % 8 == 0: adjust_learning_rate(optimizer, 0.8) # 训练一次 train(epoch, train_data, encoder, optimizer) # 使用验证集对训练结果进行验证,防止过拟合 val_acc, val_loss = valid(val_data, encoder) print('\n * ACCURACY - {acc:.3f}, LOSS - {loss:.3f}\n'.format(acc=val_acc, loss=val_loss)) # 检查是否有提升 is_best = val_acc > best_acc best_acc = max(best_acc, val_acc) if not is_best: epochs_since_improvement += 1 print("\nEpochs since last improvement: %d\n" % (epochs_since_improvement,)) else: epochs_since_improvement = 0 # Save checkpoint save_checkpoint(epoch, encoder, optimizer, val_acc, is_best) # Reshuffle samples 将验证集合测试集打乱 np.random.shuffle(train_data.samples) np.random.shuffle(val_data.samples)
def main(opt): video_path = opt["video_path"] os.environ['CUDA_VISIBLE_DEVICES'] = '0' image_feats = extract_image_feats(video_path) image_feats = torch.from_numpy(image_feats).type(torch.FloatTensor).unsqueeze(0) encoder = EncoderRNN(opt["dim_vid"], opt["dim_hidden"], bidirectional=bool(opt["bidirectional"]), input_dropout_p=opt["input_dropout_p"], rnn_dropout_p=opt["rnn_dropout_p"]) decoder = DecoderRNN(16860, opt["max_len"], opt["dim_hidden"], opt["dim_word"], input_dropout_p=opt["input_dropout_p"], rnn_dropout_p=opt["rnn_dropout_p"], bidirectional=bool(opt["bidirectional"])) model = S2VTAttModel(encoder, decoder).cuda() model.load_state_dict(torch.load(opt["saved_model"])) model.eval() opt = dict() opt['child_sum'] = True opt['temporal_attention'] = True opt['multimodel_attention'] = True with torch.no_grad(): _, seq_preds = model(image_feats.cuda(), mode='inference', opt=opt) vocab = json.load(open('data/info.json'))['ix_to_word'] sent = NLUtils.decode_sequence(vocab, seq_preds) print(sent)
def main(opt): dataset = VideoDataset(opt, 'train') dataloader = DataLoader(dataset, batch_size=opt["batch_size"], shuffle=True, num_workers=0, pin_memory=True) global dataset_val global dataloader_val dataset_val = VideoDataset(opt, 'val') dataloader_val = DataLoader(dataset_val, batch_size=opt["batch_size"], shuffle=True, num_workers=0, pin_memory=True) opt["vocab_size"] = dataset.get_vocab_size() encoder = EncoderRNN( opt["dim_vid"], opt["dim_hidden"], bidirectional=bool(opt["bidirectional"]), input_dropout_p=opt["input_dropout_p"], rnn_cell=opt['rnn_type'], rnn_dropout_p=opt["rnn_dropout_p"]) decoder = DecoderRNN( opt["vocab_size"], opt["max_len"], opt["dim_hidden"], opt["dim_word"], input_dropout_p=opt["input_dropout_p"], rnn_cell=opt['rnn_type'], rnn_dropout_p=opt["rnn_dropout_p"], bidirectional=bool(opt["bidirectional"])) model = EncoderDecoderModel(encoder, decoder) model = model.cuda() model = nn.DataParallel(model) model.load_state_dict(torch.load('data/save_vatex_batch_noc3d/model_500.pth')) crit = utils.LanguageModelCriterion() optimizer = optim.Adam(model.parameters(),lr=opt["learning_rate"],weight_decay=opt["weight_decay"]) exp_lr_scheduler = optim.lr_scheduler.StepLR(optimizer,step_size=opt["learning_rate_decay_every"],gamma=opt["learning_rate_decay_rate"]) print("Data Loaded") train(dataloader, model, crit, optimizer, exp_lr_scheduler, opt, rl_crit)
def main(opt): dataset = VideoDataset(opt, 'inference') opt["vocab_size"] = dataset.get_vocab_size() opt["seq_length"] = dataset.max_len if opt['beam_size'] != 1: assert opt["batch_size"] == 1 if opt["model"] == 'S2VTModel': model = S2VTModel(opt["vocab_size"], opt["max_len"], opt["dim_hidden"], opt["dim_word"], opt['dim_vid'], n_layers=opt['num_layers'], rnn_cell=opt['rnn_type'], bidirectional=opt["bidirectional"], rnn_dropout_p=opt["rnn_dropout_p"]) elif opt["model"] == "S2VTAttModel": encoder = EncoderRNN(opt["dim_vid"], opt["dim_hidden"], n_layers=opt['num_layers'], rnn_cell=opt['rnn_type'], bidirectional=opt["bidirectional"], input_dropout_p=opt["input_dropout_p"], rnn_dropout_p=opt["rnn_dropout_p"]) decoder = DecoderRNN(opt["vocab_size"], opt["max_len"], opt["dim_hidden"], opt["dim_word"], n_layers=opt['num_layers'], rnn_cell=opt['rnn_type'], input_dropout_p=opt["input_dropout_p"], rnn_dropout_p=opt["rnn_dropout_p"], bidirectional=opt["bidirectional"]) model = S2VTAttModel(encoder, decoder) else: return # if torch.cuda.device_count() > 1: # print("{} devices detected, switch to parallel model.".format(torch.cuda.device_count())) # model = nn.DataParallel(model) convnet = 'nasnetalarge' vocab = dataset.get_vocab() full_decoder = ConvS2VT(convnet, model, opt) tf_img_fn = ptm_utils.TransformImage(full_decoder.conv) load_img_fn = PIL.Image.fromarray for video_path in opt['videos']: print(video_path) with torch.no_grad(): frames = skvideo.io.vread(video_path) # bp --- batches = create_batches(frames, load_img_fn, tf_img_fn) seq_prob, seq_preds = full_decoder(batches, mode='inference') sents = utils.decode_sequence(vocab, seq_preds) for sent in sents: print(sent)
def main(args): config_path = os.path.join(args.config_path, 'config.json') with open(config_path) as f: config = json.load(f) print('[-] Loading pickles') dataset_path = Path(config["dataset_path"]) input_lang = CustomUnpickler(open(dataset_path / 'input_lang.pkl', 'rb')).load() output_lang = CustomUnpickler(open(dataset_path / 'output_lang.pkl', 'rb')).load() pairs = CustomUnpickler(open(dataset_path / 'pairs.pkl', 'rb')).load() # input_lang = load_pkl(dataset_path / 'input_lang.pkl') # output_lang = load_pkl(dataset_path / 'output_lang.pkl') # pairs = load_pkl(dataset_path / 'pairs.pkl') max_len = config["max_len"] lr = config["model_cfg"]["lr"] hidden_size = config["model_cfg"]["hidden_size"] train_iters = args.train_iters device = torch.device("cuda:%s" % args.ordinal if torch.cuda.is_available() else "cpu") encoder = EncoderRNN(input_lang.n_words, hidden_size).to(device) attn_decoder = AttnDecoderRNN(hidden_size, output_lang.n_words, max_len, dropout_p=0.1).to(device) trainer = Trainer(device, encoder, attn_decoder, input_lang, output_lang, pairs, max_len, lr, ckpt_path=config["models_path"]) if args.load_models: trainer.load_models() trainer.run_epoch(train_iters)
def main(opt): dataset = VideoDataset(opt, 'val', 'chinese') opt["vocab_size"] = 13491 #dataset.get_vocab_size() + chinDataset.get_vocab_size() opt["seq_length"] = dataset.max_len encoder = EncoderRNN(opt["dim_vid"], opt["dim_hidden"], bidirectional=bool(opt["bidirectional"]), input_dropout_p=opt["input_dropout_p"], rnn_cell=opt['rnn_type'], rnn_dropout_p=opt["rnn_dropout_p"]) decoder = DecoderRNN(opt["vocab_size"], opt["max_len"], opt["dim_hidden"], opt["dim_word"], input_dropout_p=opt["input_dropout_p"], rnn_cell=opt['rnn_type'], rnn_dropout_p=opt["rnn_dropout_p"], bidirectional=bool(opt["bidirectional"])) model = S2VTAttModel(encoder, decoder) # Setup the model model.load_state_dict( torch.load(opt["saved_model"], map_location=torch.device('cpu'))) crit = utils.LanguageModelCriterion() test(model, crit, dataset, dataset.get_vocab(), opt)
def main(opt): dataset = VideoDataset(opt, 'test') opt.vocab_size = dataset.get_vocab_size() opt.seq_length = dataset.seq_length if opt.model == 'S2VTModel': model = S2VTModel(opt.vocab_size, opt.seq_length, opt.dim_hidden, opt.dim_word, rnn_dropout_p=opt.rnn_dropout_p).cuda() elif opt.model == "S2VTAttModel": encoder = EncoderRNN(opt.dim_vid, opt.dim_hidden) decoder = DecoderRNN(opt.vocab_size, opt.seq_length, opt.dim_hidden, opt.dim_word, rnn_dropout_p=0.2) model = S2VTAttModel(encoder, decoder).cuda() model = nn.DataParallel(model) # Setup the model model.load_state_dict(torch.load(opt.saved_model)) model.eval() crit = utils.LanguageModelCriterion() test(model, crit, dataset, dataset.get_vocab(), opt)
def main(opt): dataset = VideoDataset(opt, "test") opt["vocab_size"] = dataset.get_vocab_size() opt["seq_length"] = dataset.max_len if opt["model"] == 'S2VTModel': model = S2VTModel(opt["vocab_size"], opt["max_len"], opt["dim_hidden"], opt["dim_word"], rnn_dropout_p=opt["rnn_dropout_p"]).cuda() elif opt["model"] == "S2VTAttModel": encoder = EncoderRNN(opt["dim_vid"], opt["dim_hidden"], bidirectional=opt["bidirectional"], input_dropout_p=opt["input_dropout_p"], rnn_dropout_p=opt["rnn_dropout_p"]) decoder = DecoderRNN(opt["vocab_size"], opt["max_len"], opt["dim_hidden"], opt["dim_word"], input_dropout_p=opt["input_dropout_p"], rnn_dropout_p=opt["rnn_dropout_p"], bidirectional=opt["bidirectional"]) model = S2VTAttModel(encoder, decoder).cuda() model = nn.DataParallel(model) # Setup the model model.load_state_dict(torch.load(opt["saved_model"])) crit = utils.LanguageModelCriterion() get_caption(model, crit, dataset, dataset.get_vocab(), opt)
def main(): lang1 = "eng" lang2 = "fra" f = open("../data/data/" + lang1 + "-" + lang2 + ".txt", encoding='utf-8') print(f) lines = f.readlines() eng_sentences, fra_sentences = data_loaders.getSentences(lines) print(len(eng_sentences), len(fra_sentences)) eng_lang = Lang(lang1) eng_lang.parseSentences(eng_sentences) fra_lang = Lang(lang2) fra_lang.parseSentences(fra_sentences) print("No of eng words: ", len(eng_lang.vocab)) print("No of fra words: ", len(fra_lang.vocab)) pairs = data_loaders.createPairs(eng_sentences, fra_sentences) print("Length of pairs: ", len(pairs)) hidden_size = 256 encoder1 = EncoderRNN(len(eng_lang.vocab), hidden_size).to(device) attn_decoder1 = DecoderRNN(len(fra_lang.vocab), hidden_size, len(fra_lang.vocab)).to(device) train.trainIters(encoder1, attn_decoder1, 75000, pairs, eng_lang, fra_lang, print_every=5000)
def main(opt): dataset = VideoDataset(opt, 'train') dataloader = DataLoader(dataset, batch_size=opt["batch_size"], shuffle=True) opt["vocab_size"] = dataset.get_vocab_size() encoder = EncoderRNN( opt["dim_vid"], opt["dim_hidden"], bidirectional=bool(opt["bidirectional"]), input_dropout_p=opt["input_dropout_p"], rnn_cell=opt['rnn_type'], rnn_dropout_p=opt["rnn_dropout_p"]) decoder = DecoderRNN( opt["vocab_size"], opt["max_len"], opt["dim_hidden"], opt["dim_word"], input_dropout_p=opt["input_dropout_p"], rnn_cell=opt['rnn_type'], rnn_dropout_p=opt["rnn_dropout_p"], bidirectional=bool(opt["bidirectional"])) model = S2VTAttModel(encoder, decoder) #model = S2VTModel(opt["vocab_size"], opt["max_len"], opt["dim_hidden"], opt["dim_word"], opt['dim_vid'], rnn_cell=opt['rnn_type'], n_layers=opt['num_layers'], rnn_dropout_p=opt["rnn_dropout_p"]) #model = model.cuda() crit = utils.LanguageModelCriterion() rl_crit = utils.RewardCriterion() optimizer = optim.Adam( model.parameters(), lr=opt["learning_rate"], weight_decay=opt["weight_decay"]) exp_lr_scheduler = optim.lr_scheduler.StepLR( optimizer, step_size=opt["learning_rate_decay_every"], gamma=opt["learning_rate_decay_rate"]) train(dataloader, model, crit, optimizer, exp_lr_scheduler, opt, rl_crit)
def main(): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") SOS_token = 0 EOS_token = 1 MASKED_token = 2 MAX_LENGTH = 42 hidden_size = 325 train_iters = 20 pretrain_train_iters = 2000 dataset = 'imdb' lang_filename = './data/' + dataset + '_lang.pkl' if os.path.exists(lang_filename): with open(lang_filename, 'rb') as file: (lang, lines) = pkl.load(file) else: lang, lines = prepareData(dataset) with open(lang_filename, 'wb') as file: pkl.dump((lang, lines), file) pretrained_filename = './pretrained/pretrained_lstm_' + dataset + '_' + str( hidden_size) + '_' + str(pretrain_train_iters) + '.pkl' model_filename = './pretrained/maskmle_' + dataset + '_' + str( hidden_size) + '_' + str(train_iters) + '.pkl' if os.path.exists(pretrained_filename): with open(pretrained_filename, 'rb') as file: pretainedlstm = pkl.load(file) else: raise NotImplementedError('pretrained lstm is not available') encoder1 = EncoderRNN(lang.n_words, hidden_size).to(device) attn_decoder1 = AttnDecoderRNN(hidden_size, lang.n_words, dropout_p=0.1).to(device) print("Total number of trainable parameters:", count_parameters(encoder1) + count_parameters(attn_decoder1)) def copy_lstm_weights(from_, *args): for to_ in args: to_.weight_ih_l0 = from_.weight_ih_l0 to_.weight_hh_l0 = from_.weight_hh_l0 to_.bias_ih_l0 = from_.bias_ih_l0 to_.bias_hh_l0 = from_.bias_hh_l0 copy_lstm_weights(pretainedlstm.lstm, encoder1.lstm, attn_decoder1.lstm) #copy_lstm_weights(pretainedlstm.lstm, attn_decoder1.lstm) encoder1.embedding.weight = pretainedlstm.embedding.weight attn_decoder1.embedding.weight = pretainedlstm.embedding.weight trainIters(encoder1, attn_decoder1, lang, lines, train_iters, print_every=train_iters // 20, plot_every=train_iters // 20)
def main(): dataset = 'imdb' hidden_size = 325 train_iters = 40 pretrain_train_iters = 40 lang, lines = cachePrepareData(dataset) PATH = './pretrained/' pretrained_filename = PATH + 'pretrained_lstm_' + dataset + '_' + str(hidden_size) + '_' + str(pretrain_train_iters) + '.pt' model_filename = 'maskmle_' + dataset + '_' + str(hidden_size) + '_' + str(train_iters) + '.pt' encoder1 = EncoderRNN(lang.n_words, hidden_size).to(device) encoder1.load_state_dict(torch.load(PATH + 'e_' + model_filename)) attn_decoder1 = AttnDecoderRNN(hidden_size, lang.n_words, dropout_p=0.1).to(device) attn_decoder1.load_state_dict(torch.load(PATH + 'd_' + model_filename)) print(evaluateRandomly(encoder1, attn_decoder1, lang, lines, 20, 0.5))
def main(opt): opt_test = opt test_dataset = VideoDataset(opt_test, 'test') opt_test["vocab_size"] = test_dataset.get_vocab_size() opt_test["seq_length"] = test_dataset.max_len dataset = VideoDataset(opt, 'train') dataloader = DataLoader(dataset, batch_size=opt["batch_size"], shuffle=True) opt["vocab_size"] = dataset.get_vocab_size() if opt["model"] == 'S2VTModel': model = S2VTModel(opt["vocab_size"], opt["max_len"], opt["dim_hidden"], opt["dim_word"], opt['dim_vid'], rnn_cell=opt['rnn_type'], n_layers=opt['num_layers'], rnn_dropout_p=opt["rnn_dropout_p"]) elif opt["model"] == "S2VTAttModel": encoder = EncoderRNN( opt["dim_vid"], opt["dim_hidden"], # bidirectional=opt["bidirectional"], input_dropout_p=opt["input_dropout_p"], rnn_cell=opt['rnn_type'], rnn_dropout_p=opt["rnn_dropout_p"]) second_lstm = Two_Lstm( opt["dim_vid"], opt["dim_hidden"], # bidirectional=opt["bidirectional"], input_dropout_p=opt["input_dropout_p"], rnn_cell=opt['rnn_type'], rnn_dropout_p=opt["rnn_dropout_p"]) decoder = DecoderRNN(opt["vocab_size"], opt["max_len"], opt["dim_hidden"], opt["dim_word"], input_dropout_p=opt["input_dropout_p"], rnn_cell=opt['rnn_type'], rnn_dropout_p=opt["rnn_dropout_p"]) # bidirectional=opt["bidirectional"]) model = S2VTAttModel(encoder, second_lstm, decoder) model = model.cuda() crit = utils.LanguageModelCriterion() rl_crit = utils.RewardCriterion() optimizer = optim.Adam(model.parameters(), lr=opt["learning_rate"], weight_decay=opt["weight_decay"]) exp_lr_scheduler = optim.lr_scheduler.StepLR( optimizer, step_size=opt["learning_rate_decay_every"], gamma=opt["learning_rate_decay_rate"]) train(dataloader, model, crit, optimizer, exp_lr_scheduler, opt, rl_crit, opt_test, test_dataset)
def main(opt): dataset = VideoDataset(opt, "test") opt["vocab_size"] = dataset.get_vocab_size() opt["seq_length"] = dataset.max_len encoder = EncoderRNN(opt["dim_vid"], opt["dim_hidden"], bidirectional=bool(opt["bidirectional"]),input_dropout_p=opt["input_dropout_p"], rnn_dropout_p=opt["rnn_dropout_p"]) decoder = DecoderRNN(opt["vocab_size"], opt["max_len"], opt["dim_hidden"], opt["dim_word"],input_dropout_p=opt["input_dropout_p"],rnn_dropout_p=opt["rnn_dropout_p"], bidirectional=bool(opt["bidirectional"])) model = EncoderDecoderModel(encoder, decoder).cuda() model = nn.DataParallel(model) model.load_state_dict(torch.load(opt["saved_model"])) crit = utils.LanguageModelCriterion() test(model, crit, dataset, dataset.get_vocab(), opt)
def main(): embedding = nn.Embedding(voc.num_words, hidden_size) encoder = EncoderRNN(hidden_size, embedding, encoder_n_layers, dropout) decoder = LuongAttnDecoderRNN(attn_model, embedding, hidden_size, voc.num_words, decoder_n_layers, dropout) model = torch.load(model_save_pth, 'cpu') encoder.load_state_dict(torch.load(model_save_pth, device)['en']) decoder.load_state_dict(torch.load(model_save_pth, device)['de']) #encoder = model['en'] #decoder = model.LuongAttnDecoderRNN['de'] #encoder = encoder.to(device) #decoder = decoder.to(device) encoder.eval() decoder.eval() searcher = GreedySearchDecoder(encoder, decoder) for sentence in pick_n_valid_sentences(10): decoded_words = evaluate(searcher, sentence) print('Human: {}'.format(sentence)) print('Bot: {}'.format(''.join(decoded_words)))
def main(opt): dataset = VideoDataset(opt, 'train') dataloader = DataLoader(dataset, batch_size=opt["batch_size"], num_workers=8, shuffle=True) opt["vocab_size"] = dataset.get_vocab_size() if opt["model"] == 'S2VTModel': model = S2VTModel(opt["vocab_size"], opt["max_len"], opt["dim_hidden"], opt["dim_word"], opt['dim_vid'], rnn_cell=opt['rnn_type'], n_layers=opt['num_layers'], bidirectional=opt["bidirectional"], rnn_dropout_p=opt["rnn_dropout_p"]).cuda() elif opt["model"] == "S2VTAttModel": encoder = EncoderRNN(opt["dim_vid"], opt["dim_hidden"], n_layers=opt['num_layers'], bidirectional=opt["bidirectional"], input_dropout_p=opt["input_dropout_p"], rnn_cell=opt['rnn_type'], rnn_dropout_p=opt["rnn_dropout_p"]) decoder = DecoderRNN(opt["vocab_size"], opt["max_len"], opt["dim_hidden"], opt["dim_word"], n_layers=opt['num_layers'], input_dropout_p=opt["input_dropout_p"], rnn_cell=opt['rnn_type'], rnn_dropout_p=opt["rnn_dropout_p"], bidirectional=opt["bidirectional"]) model = S2VTAttModel(encoder, decoder).cuda() crit = utils.LanguageModelCriterion() rl_crit = utils.RewardCriterion() optimizer = optim.Adam(model.parameters(), lr=opt["learning_rate"], weight_decay=opt["weight_decay"]) exp_lr_scheduler = optim.lr_scheduler.StepLR( optimizer, step_size=opt["learning_rate_decay_every"], gamma=opt["learning_rate_decay_rate"]) model.load_state_dict( torch.load( "C:\\Users\\Shumpu\\VideoCaptioningAttack\\video_caption_pytorch\\save\\vgg16_model_460.pth" )) train(dataloader, model, crit, optimizer, exp_lr_scheduler, opt, rl_crit)
def main(): with open("data/vocab.pkl", 'rb') as f: vocab = pickle.load(f) img_path = "data/flickr7k_images" cap_path = "data/factual_train.txt" styled_path = "data/humor/funny_train.txt" data_loader = get_data_loader(img_path, cap_path, vocab, 3) styled_data_loader = get_styled_data_loader(styled_path, vocab, 3) encoder = EncoderRNN(voc_size=60376, emb_size=300, hidden_size=300) decoder = FactoredLSTM(300, 512, 512, len(vocab)) if torch.cuda.is_available(): encoder = encoder.cuda() decoder = decoder.cuda() # for i, (images, captions, lengths) in enumerate(data_loader): for i, (captions, lengths) in enumerate(styled_data_loader): # images = Variable(images, volatile=True) captions = Variable(captions.long()) if torch.cuda.is_available(): # images = images.cuda() captions = captions.cuda() # features = encoder(images) outputs = decoder(captions, features=None, mode="humorous") print(lengths - 1) print(outputs) print(captions[:, 1:]) loss = masked_cross_entropy(outputs, captions[:, 1:].contiguous(), lengths - 1) print(loss) break
def main(): # load vocablary with open('data/vocab.pkl', 'rb') as f: vocab = pickle.load(f) # build model encoder = EncoderRNN(voc_size=60736, emb_size=300, hidden_size=300) decoder = FactoredLSTM(300, 512, 512, len(vocab)) encoder.load_state_dict(torch.load('pretrained_models/encoder-4.pkl')) decoder.load_state_dict(torch.load('pretrained_models/decoder-4.pkl')) # prepare images # transform = transforms.Compose([ # Rescale((224, 224)), # transforms.ToTensor() # ]) # img_names, img_list = load_sample_images('sample_images/', transform) # image = to_var(img_list[30], volatile=True) data_loader = get_data_loader('', 'data/factual_train.txt', vocab, 1) # if torch.cuda.is_available(): # encoder = encoder.cuda() # decoder = decoder.cuda() for i, (messages, m_lengths, targets, t_lengths) in enumerate(data_loader): print(''.join([vocab.i2w[x] for x in messages[0]])) messages = to_var(messages.long()) targets = to_var(targets.long()) # forward, backward and optimize output, features = encoder(messages, list(m_lengths)) outputs = decoder.sample(features, mode="humorous") caption = [vocab.i2w[x] for x in outputs] print(''.join(caption)) print('-------')
def init(): print("\tInitialising sentences") print("\t\tLoading and cleaning json files") json_of_convs = load_all_json_conv('./Dataset/messages') print("\t\tLoading two person convs") duo_conversations = get_chat_friend_and_me(json_of_convs) print("\t\tMaking two person convs discussions") discussions = get_discussions(duo_conversations) print("\t\tCreating pairs for training") pairs_of_sentences = make_pairs(discussions) print(f"\t\t{len(pairs_of_sentences)} different pairs") print("\t\tCreating Vocabulary") voc = Voc() print("\t\tPopulating Vocabulary") voc.createVocFromPairs(pairs_of_sentences) print(f"\t\tVocabulary of : {voc.num_words} differents words") print('\tBuilding encoder and decoder ...') embedding = nn.Embedding(voc.num_words, HIDDEN_SIZE) encoder = EncoderRNN(HIDDEN_SIZE, embedding, ENCODER_N_LAYERS, DROPOUT) decoder = LuongAttnDecoderRNN(ATTN_MODEL, embedding, HIDDEN_SIZE, voc.num_words, DECODER_N_LAYERS, DROPOUT) encoder_optimizer = optim.Adam(encoder.parameters(), lr=LEARNING_RATE) decoder_optimizer = optim.Adam(decoder.parameters(), lr=LEARNING_RATE * DECODER_LEARNING_RATIO) checkpoint = None if LOADFILENAME: print("\t\tLoading last training") checkpoint = torch.load(LOADFILENAME) # If loading a model trained on GPU to CPU # checkpoint=torch.load(loadFilename,map_location=torch.device('cpu')) encoder_sd = checkpoint['en'] decoder_sd = checkpoint['de'] encoder_optimizer_sd = checkpoint['en_opt'] decoder_optimizer_sd = checkpoint['de_opt'] embedding_sd = checkpoint['embedding'] voc.__dict__ = checkpoint['voc_dict'] print("\t\tPopulating from last training") embedding.load_state_dict(embedding_sd) encoder.load_state_dict(encoder_sd) decoder.load_state_dict(decoder_sd) encoder_optimizer.load_state_dict(encoder_optimizer_sd) decoder_optimizer.load_state_dict(decoder_optimizer_sd) encoder = encoder.to(DEVICE) decoder = decoder.to(DEVICE) return (encoder, decoder, encoder_optimizer, decoder_optimizer, embedding, voc, pairs_of_sentences, checkpoint)
def setup(): global char2index global index2char global SOS_token global EOS_token global PAD_token global model global device char2index, index2char = label_loader.load_label_json( "../data/kor_syllable_zeroth.json") SOS_token = char2index['<s>'] EOS_token = char2index['</s>'] PAD_token = char2index['_'] print(f"device: {device}") input_size = int(161) enc = EncoderRNN(input_size, 512, n_layers=3, dropout_p=0.3, bidirectional=True, rnn_cell='LSTM', variable_lengths=False) dec = DecoderRNN(len(char2index), 128, 512, 512, SOS_token, EOS_token, n_layers=2, rnn_cell='LSTM', dropout_p=0.3, bidirectional_encoder=True) model = Seq2Seq(enc, dec).to(device) model_path = "../models/zeroth_korean_trimmed/LSTM_512x3_512x2_zeroth_korean_trimmed/final.pth" print("Loading checkpoint model %s" % model_path) state = torch.load(model_path, map_location=device) model.load_state_dict(state['model']) print('Model loaded')
def main(self, opt): os.environ['CUDA_VISIBLE_DEVICES'] = '0' video_path = self.ent1.get().replace("/", "\\") image_feats = self.extract_image_feats(video_path) image_feats = torch.from_numpy(image_feats).type( torch.FloatTensor).unsqueeze(0) encoder = EncoderRNN(opt["dim_vid"], opt["dim_hidden"], bidirectional=bool(opt["bidirectional"]), input_dropout_p=opt["input_dropout_p"], rnn_dropout_p=opt["rnn_dropout_p"]) decoder = DecoderRNN(16860, opt["max_len"], opt["dim_hidden"], opt["dim_word"], input_dropout_p=opt["input_dropout_p"], rnn_dropout_p=opt["rnn_dropout_p"], bidirectional=bool(opt["bidirectional"])) model = S2VTAttModel(encoder, decoder).cuda() model.load_state_dict(torch.load("data/save/model_500.pth")) model.eval() opt = dict() opt['child_sum'] = True opt['temporal_attention'] = True opt['multimodel_attention'] = True with torch.no_grad(): _, seq_preds = model(image_feats.cuda(), mode='inference', opt=opt) vocab = json.load(open('data/info.json'))['ix_to_word'] self.sent = NLUtils.decode_sequence(vocab, seq_preds) hasil = self.translator.translate(self.sent[0], dest='id') print(self.sent[0]) self.hasilPred.configure(text=self.sent[0]) self.hasiltrans.configure(text=hasil.text) # coba = self.sent[0] self.textToSpeech(self.sent[0], hasil.text) del seq_preds torch.cuda.empty_cache()
def main(opt): train_dataset = VideoDataset(opt, 'train') train_dataloader = DataLoader(train_dataset, batch_size=opt.batch_size, shuffle=True) opt.vocab_size = train_dataset.vocab_size opt.seq_length = train_dataset.seq_length val_dataset = VideoDataset(opt, 'val') val_dataloader = DataLoader(val_dataset, batch_size=opt.batch_size, shuffle=True) if opt.model == 'S2VTModel': model = S2VTModel(opt.vocab_size, opt.seq_length, opt.dim_hidden, opt.dim_word, rnn_dropout_p=opt.rnn_dropout_p).cuda() elif opt.model == "Vid2seq": encoder = EncoderRNN(opt.dim_vid, opt.dim_hidden) decoder = DecoderRNN(opt.vocab_size, opt.seq_length, opt.dim_hidden, use_attention=True, rnn_dropout_p=opt.rnn_dropout_p) model = Vid2seq(encoder, decoder).cuda() crit = utils.LanguageModelCriterion() rl_crit = utils.RewardCriterion() optimizer = optim.Adam(model.parameters(), lr=opt.learning_rate, weight_decay=opt.weight_decay) exp_lr_scheduler = optim.lr_scheduler.StepLR( optimizer, step_size=opt.learning_rate_decay_every, gamma=opt.learning_rate_decay_rate) if not os.path.isdir(opt.checkpoint_path): os.mkdir(opt.checkpoint_path) train(train_dataloader, val_dataloader, model, crit, optimizer, exp_lr_scheduler, opt, rl_crit)
def main(opt): dataset = VideoDataset(opt, "test") opt["vocab_size"] = dataset.get_vocab_size() opt["seq_length"] = dataset.max_len if opt['beam_size'] != 1: assert opt["batch_size"] == 1 if opt["model"] == 'S2VTModel': model = S2VTModel(opt["vocab_size"], opt["max_len"], opt["dim_hidden"], opt["dim_word"], opt['dim_vid'], n_layers=opt['num_layers'], rnn_cell=opt['rnn_type'], bidirectional=opt["bidirectional"], rnn_dropout_p=opt["rnn_dropout_p"]) elif opt["model"] == "S2VTAttModel": encoder = EncoderRNN(opt["dim_vid"], opt["dim_hidden"], n_layers=opt['num_layers'], rnn_cell=opt['rnn_type'], bidirectional=opt["bidirectional"], input_dropout_p=opt["input_dropout_p"], rnn_dropout_p=opt["rnn_dropout_p"]) decoder = DecoderRNN(opt["vocab_size"], opt["max_len"], opt["dim_hidden"], opt["dim_word"], n_layers=opt['num_layers'], rnn_cell=opt['rnn_type'], input_dropout_p=opt["input_dropout_p"], rnn_dropout_p=opt["rnn_dropout_p"], bidirectional=opt["bidirectional"]) model = S2VTAttModel(encoder, decoder) else: return if torch.cuda.device_count() > 1: print("{} devices detected, switch to parallel model.".format(torch.cuda.device_count())) model = nn.DataParallel(model) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model.to(device) # Setup the model model.load_state_dict(torch.load(opt["saved_model"])) crit = utils.LanguageModelCriterion() test(model, crit, dataset, dataset.get_vocab(), opt)
def main(opt): dataset_test = VideoDataset(opt, 'test') dataloader_test = DataLoader(dataset_test, batch_size=opt["batch_size"], shuffle=False) opt["obj_vocab_size"] = dataset_test.get_obj_vocab_size() opt["rel_vocab_size"] = dataset_test.get_rel_vocab_size() if opt["model"] == 'S2VTModel': model = S2VTModel(opt["vocab_size"], opt["max_len"], opt["dim_hidden"], opt["dim_word"], opt['dim_vid'], rnn_cell=opt['rnn_type'], n_layers=opt['num_layers'], rnn_dropout_p=opt["rnn_dropout_p"]) elif opt["model"] == "S2VTAttModel": encoder = EncoderRNN(opt["dim_vid"], opt["dim_hidden"], bidirectional=opt["bidirectional"], input_dropout_p=opt["input_dropout_p"], rnn_cell=opt['rnn_type'], rnn_dropout_p=opt["rnn_dropout_p"]) decoder = DecoderRNN(opt["obj_vocab_size"], opt["rel_vocab_size"], opt["max_len"], opt["dim_hidden"], opt["dim_word"], input_dropout_p=opt["input_dropout_p"], rnn_cell=opt['rnn_type'], rnn_dropout_p=opt["rnn_dropout_p"], bidirectional=opt["bidirectional"]) model = S2VTAttModel(encoder, decoder) model = model.cuda() model.load_state_dict(torch.load(opt['ckpt_path'])) crit = utils.ObjRelCriterion() test(model, crit, opt, dataloader_test)
def main(args): config_path = os.path.join(args.config_path, 'config.json') with open(config_path) as f: config = json.load(f) print('[-] Loading pickles') dataset_path = Path(config["dataset_path"]) input_lang = CustomUnpickler(open(dataset_path / 'input_lang.pkl', 'rb')).load() output_lang = CustomUnpickler(open(dataset_path / 'output_lang.pkl', 'rb')).load() pairs = CustomUnpickler(open(dataset_path / 'pairs.pkl', 'rb')).load() # input_lang = load_pkl(dataset_path / 'input_lang.pkl') # output_lang = load_pkl(dataset_path / 'output_lang.pkl') # pairs = load_pkl(dataset_path / 'pairs.pkl') hidden_size = config["model_cfg"]["hidden_size"] max_len = config["max_len"] device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") encoder = EncoderRNN(input_lang.n_words, hidden_size).to(device) decoder = AttnDecoderRNN(hidden_size, output_lang.n_words, max_len, dropout_p=0.1).to(device) print('[-] Loading models') ckpt = torch.load(config["models_path"] + 'models.ckpt') encoder.load_state_dict(ckpt['encoder']) encoder.to(device) decoder.load_state_dict(ckpt['decoder']) decoder.to(device) evaluator = Evaluater(device, encoder, decoder, input_lang, output_lang, max_len) # Evaluate random samples evaluator.evaluateRandomly(pairs) evaluator.evaluateAndShowAttention("elle a cinq ans de moins que moi .") # evaluator.evaluateAndShowAttention("elle est trop petit .") # evaluator.evaluateAndShowAttention("je ne crains pas de mourir .") # evaluator.evaluateAndShowAttention("c est un jeune directeur plein de talent .") plt.savefig('attention.png')
def main(): global char2index global index2char global SOS_token global EOS_token global PAD_token parser = argparse.ArgumentParser(description='Speech hackathon Baseline') parser.add_argument('--hidden_size', type=int, default=512, help='hidden size of model (default: 256)') parser.add_argument('--layer_size', type=int, default=3, help='number of layers of model (default: 3)') parser.add_argument('--dropout', type=float, default=0.2, help='dropout rate in training (default: 0.2)') parser.add_argument( '--bidirectional', action='store_true', help='use bidirectional RNN for encoder (default: False)') parser.add_argument( '--use_attention', action='store_true', help='use attention between encoder-decoder (default: False)') parser.add_argument('--batch_size', type=int, default=32, help='batch size in training (default: 32)') parser.add_argument( '--workers', type=int, default=4, help='number of workers in dataset loader (default: 4)') parser.add_argument('--max_epochs', type=int, default=10, help='number of max epochs in training (default: 10)') parser.add_argument('--lr', type=float, default=1e-04, help='learning rate (default: 0.0001)') parser.add_argument('--teacher_forcing', type=float, default=0.5, help='teacher forcing ratio in decoder (default: 0.5)') parser.add_argument('--max_len', type=int, default=80, help='maximum characters of sentence (default: 80)') parser.add_argument('--no_cuda', action='store_true', default=False, help='disables CUDA training') parser.add_argument('--seed', type=int, default=1, help='random seed (default: 1)') parser.add_argument('--save_name', type=str, default='model', help='the name of model in nsml or local') parser.add_argument('--mode', type=str, default='train') parser.add_argument("--pause", type=int, default=0) args = parser.parse_args() char2index, index2char = label_loader.load_label('./hackathon.labels') SOS_token = char2index['<s>'] EOS_token = char2index['</s>'] PAD_token = char2index['_'] random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) args.cuda = not args.no_cuda and torch.cuda.is_available() device = torch.device('cuda' if args.cuda else 'cpu') # N_FFT: defined in loader.py feature_size = N_FFT / 2 + 1 enc = EncoderRNN(feature_size, args.hidden_size, input_dropout_p=args.dropout, dropout_p=args.dropout, n_layers=args.layer_size, bidirectional=args.bidirectional, rnn_cell='gru', variable_lengths=False) dec = DecoderRNN(len(char2index), args.max_len, args.hidden_size * (2 if args.bidirectional else 1), SOS_token, EOS_token, n_layers=args.layer_size, rnn_cell='gru', bidirectional=args.bidirectional, input_dropout_p=args.dropout, dropout_p=args.dropout, use_attention=args.use_attention) model = Seq2seq(enc, dec) model.flatten_parameters() for param in model.parameters(): param.data.uniform_(-0.08, 0.08) # lnw add get the number of model parameters print('Number of model parameters: {}'.format( sum([p.data.nelement() for p in model.parameters()]))) model = nn.DataParallel(model).to(device) optimizer = optim.Adam(model.module.parameters(), lr=args.lr) criterion = nn.CrossEntropyLoss(reduction='sum', ignore_index=PAD_token).to(device) bind_model(model, optimizer) if args.pause == 1: nsml.paused(scope=locals()) if args.mode != "train": return data_list = os.path.join(DATASET_PATH, 'train_data', 'data_list.csv') wav_paths = list() script_paths = list() with open(data_list, 'r') as f: for line in f: # line: "aaa.wav,aaa.label" wav_path, script_path = line.strip().split(',') wav_paths.append(os.path.join(DATASET_PATH, 'train_data', wav_path)) script_paths.append( os.path.join(DATASET_PATH, 'train_data', script_path)) best_loss = 1e10 begin_epoch = 0 # load all target scripts for reducing disk i/o target_path = os.path.join(DATASET_PATH, 'train_label') load_targets(target_path) # lnw valid_ratio=0.05 -> valid_ratio=0.1 or 0.03 #train_batch_num, train_dataset_list, valid_dataset = split_dataset(args, wav_paths, script_paths, valid_ratio=0.05) train_batch_num, train_dataset_list, valid_dataset = split_dataset( args, wav_paths, script_paths, valid_ratio=0.03) #lnw add lstart_time = datetime.now() print("Start time : " + str(lstart_time)) #lnw block #logger.info('start') train_begin = time.time() for epoch in range(begin_epoch, args.max_epochs): #lnw add lepoch_start = datetime.now() print(epoch, "epoch Start time : " + str(lepoch_start)) train_queue = queue.Queue(args.workers * 2) train_loader = MultiLoader(train_dataset_list, train_queue, args.batch_size, args.workers) train_loader.start() #lnw modified print_batch 10 -> 100, 450 #train_loss, train_cer = train(model, train_batch_num, train_queue, criterion, optimizer, device, train_begin, args.workers, 10, args.teacher_forcing) train_loss, train_cer = train(model, train_batch_num, train_queue, criterion, optimizer, device, train_begin, args.workers, 450, args.teacher_forcing) logger.info('Epoch %d (Training) Loss %0.4f CER %0.4f' % (epoch, train_loss, train_cer)) train_loader.join() valid_queue = queue.Queue(args.workers * 2) valid_loader = BaseDataLoader(valid_dataset, valid_queue, args.batch_size, 0) valid_loader.start() eval_loss, eval_cer = evaluate(model, valid_loader, valid_queue, criterion, device) logger.info('Epoch %d (Evaluate) Loss %0.4f CER %0.4f' % (epoch, eval_loss, eval_cer)) valid_loader.join() nsml.report(False, step=epoch, train_epoch__loss=train_loss, train_epoch__cer=train_cer, eval__loss=eval_loss, eval__cer=eval_cer) best_model = (eval_loss < best_loss) nsml.save(args.save_name) if best_model: nsml.save('best') best_loss = eval_loss #lnw add. save best model torch.save(model, 'ModelBestSave.pt') #lnw end time, duration lepoch_end = datetime.now() print(epoch, "epoch End time: " + str(lepoch_end), "Duration:", str(lepoch_end - lepoch_start), "SratTime-NowTime:", str(lepoch_end - lstart_time)) #lnw add lend_time = datetime.now() print("End time : " + str(lend_time)) print('Duration: {}'.format(lend_time - lstart_time))
def main(): input_lang = Lang('data/WORDMAP_en.json') output_lang = Lang('data/WORDMAP_zh.json') print("input_lang.n_words: " + str(input_lang.n_words)) print("output_lang.n_words: " + str(output_lang.n_words)) train_data = TranslationDataset('train') val_data = TranslationDataset('valid') # Initialize encoder & decoder models encoder = EncoderRNN(input_lang.n_words, hidden_size, encoder_n_layers, dropout) decoder = LuongAttnDecoderRNN(attn_model, hidden_size, output_lang.n_words, decoder_n_layers, dropout) # Use appropriate device encoder = encoder.to(device) decoder = decoder.to(device) # Initialize optimizers print('Building optimizers ...') encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate) decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate) # Initializations print('Initializing ...') train_batch_time = ExpoAverageMeter() # forward prop. + back prop. time train_losses = ExpoAverageMeter() # loss (per word decoded) val_batch_time = ExpoAverageMeter() val_losses = ExpoAverageMeter() best_loss = 100000 epochs_since_improvement = 0 # Epochs for epoch in range(start_epoch, epochs): # Decay learning rate if there is no improvement for 8 consecutive epochs, and terminate training after 20 if epochs_since_improvement == 20: break if epochs_since_improvement > 0 and epochs_since_improvement % 8 == 0: adjust_learning_rate(decoder_optimizer, 0.8) adjust_learning_rate(encoder_optimizer, 0.8) # One epoch's training # Ensure dropout layers are in train mode encoder.train() decoder.train() start = time.time() # Batches for i_batch in range(len(train_data)): input_variable, lengths, target_variable, mask, max_target_len = train_data[ i_batch] train_loss = train(input_variable, lengths, target_variable, mask, max_target_len, encoder, decoder, encoder_optimizer, decoder_optimizer) # Keep track of metrics train_losses.update(train_loss) train_batch_time.update(time.time() - start) start = time.time() # Print status if i_batch % print_every == 0: print( '[{0}] Epoch: [{1}][{2}/{3}]\t' 'Batch Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t'.format( timestamp(), epoch, i_batch, len(train_data), batch_time=train_batch_time, loss=train_losses)) # One epoch's validation start = time.time() # Batches for i_batch in range(len(val_data)): input_variable, lengths, target_variable, mask, max_target_len = val_data[ i_batch] val_loss = valid(input_variable, lengths, target_variable, mask, max_target_len, encoder, decoder) # Keep track of metrics val_losses.update(val_loss) val_batch_time.update(time.time() - start) start = time.time() # Print status if i_batch % print_every == 0: print( 'Validation: [{0}/{1}]\t' 'Batch Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t'.format( i_batch, len(val_data), batch_time=val_batch_time, loss=val_losses)) val_loss = val_losses.avg print('\n * LOSS - {loss:.3f}\n'.format(loss=val_loss)) # Check if there was an improvement is_best = val_loss < best_loss best_loss = min(best_loss, val_loss) if not is_best: epochs_since_improvement += 1 print("\nEpochs since last improvement: %d\n" % (epochs_since_improvement, )) else: epochs_since_improvement = 0 save_checkpoint(epoch, encoder, decoder, encoder_optimizer, decoder_optimizer, input_lang, output_lang, val_loss, is_best) # Initialize search module searcher = GreedySearchDecoder(encoder, decoder) for input_sentence, target_sentence in pick_n_valid_sentences( input_lang, output_lang, 10): decoded_words = evaluate(searcher, input_sentence, input_lang, output_lang) print('> {}'.format(input_sentence)) print('= {}'.format(target_sentence)) print('< {}'.format(''.join(decoded_words))) # Reshuffle train and valid samples np.random.shuffle(train_data.samples) np.random.shuffle(val_data.samples)