def model_load(self): encoder = Encoder(**self.checkpoint['encoder_parameter']) decoder = AttentionDecoder(**self.checkpoint['decoder_parameter']) model = Seq2Seq(encoder, decoder, self.seq_len, self.get_attention) model.load_state_dict(self.checkpoint['model_state_dict']) model.to(device) model.eval() return model
def main(args): global batch_size batch_size = args.batch_size hidden_size = args.hidden_size w_embed_size = args.w_embed_size lr = args.lr train_file = 'data/train_data_nv.txt' vocab = Vocab() vocab.build(train_file) if args.pre_trained_embed == 'n': encoder = Encoder(vocab.n_words, w_embed_size, hidden_size, batch_size).to(device) decoder = AttentionDecoder(vocab.n_words, w_embed_size, hidden_size, batch_size).to(device) else: # load pre-trained embedding weight = vocab.load_weight(path="data/komoran_hd_2times.vec") encoder = Encoder(vocab.n_words, w_embed_size, hidden_size, batch_size, weight).to(device) decoder = AttentionDecoder(vocab.n_words, w_embed_size, hidden_size, batch_size, weight).to(device) if args.encoder: encoder.load_state_dict(torch.load(args.encoder)) print("[INFO] load encoder with %s" % args.encoder) if args.decoder: decoder.load_state_dict(torch.load(args.decoder)) print("[INFO] load decoder with %s" % args.decoder) train_data = prep.read_train_data(train_file) train_loader = data.DataLoader(train_data, batch_size=batch_size, shuffle=True) # ev.evaluateRandomly(encoder, decoder, train_data, vocab, batch_size) # ev.evaluate_with_print(encoder, vocab, batch_size) # initialize max_a_at_5, max_a_at_1 = ev.evaluate_similarity(encoder, vocab, batch_size, decoder=decoder) # max_a_at_5, max_a_at_1 = 0, 0 max_bleu = 0 total_epoch = args.epoch print(args) for epoch in range(1, total_epoch + 1): random.shuffle(train_data) trainIters(args, epoch, encoder, decoder, total_epoch, train_data, vocab, train_loader, print_every=2, learning_rate=lr) if epoch % 20 == 0: a_at_5, a_at_1 = ev.evaluate_similarity(encoder, vocab, batch_size, decoder=decoder) if a_at_1 > max_a_at_1: max_a_at_1 = a_at_1 print("[INFO] New record! accuracy@1: %.4f" % a_at_1) if a_at_5 > max_a_at_5: max_a_at_5 = a_at_5 print("[INFO] New record! accuracy@5: %.4f" % a_at_5) if args.save == 'y': torch.save(encoder.state_dict(), 'encoder-max.model') torch.save(decoder.state_dict(), 'decoder-max.model') print("[INFO] new model saved") bleu = ev.evaluateRandomly(encoder, decoder, train_data, vocab, batch_size) if bleu > max_bleu: max_bleu = bleu if args.save == 'y': torch.save(encoder.state_dict(), 'encoder-max-bleu.model') torch.save(decoder.state_dict(), 'decoder-max-bleu.model') print("[INFO] new model saved") print("Done! max accuracy@5: %.4f, max accuracy@1: %.4f" % (max_a_at_5, max_a_at_1)) print("max bleu: %.2f" % max_bleu) if args.save == 'y': torch.save(encoder.state_dict(), 'encoder-last.model') torch.save(decoder.state_dict(), 'decoder-last.model')
args = parser.parse_args() global batch_size batch_size = args.batch_size hidden_size = args.hidden_size w_embed_size = args.w_embed_size train_file = 'data/train_data_nv.txt' vocab = Vocab() vocab.build(train_file) if args.pre_trained_embed == 'n': encoder = Encoder(vocab.n_words, w_embed_size, hidden_size, batch_size).to(device) decoder = AttentionDecoder(vocab.n_words, w_embed_size, hidden_size, batch_size).to(device) # decoder = Decoder(vocab.n_words, w_embed_size, hidden_size, batch_size).to(device) else: # load pre-trained embedding weight = vocab.load_weight(path="data/komoran_hd_2times.vec") encoder = Encoder(vocab.n_words, w_embed_size, hidden_size, batch_size, weight).to(device) decoder = AttentionDecoder(vocab.n_words, w_embed_size, hidden_size, batch_size, weight).to(device) # decoder = Decoder(vocab.n_words, w_embed_size, hidden_size, batch_size, weight).to(device) if args.encoder: encoder.load_state_dict(torch.load(args.encoder)) print("[INFO] load encoder with %s" % args.encoder) if args.decoder: decoder.load_state_dict(torch.load(args.decoder))
def test_forward(self): input = (Variable(torch.randn(3, 2, 4).double(), requires_grad=True),) test = gradcheck(AttentionDecoder(2, 4, 1).double(), input, eps=1e-6, atol=1e-4) print(test)
def test_forward_dimensions(self): input = Variable(torch.randn(3, 2, 4).double(), requires_grad=True) output = AttentionDecoder(2, 4, 1).double().forward(input) assert len(output.size()) == 3
############### model, optimizer ######################## logging.info("loading model and optimizer...") if torch.cuda.is_available(): device = torch.device("cuda") logging.info("using {} GPU(s)".format(torch.cuda.device_count())) else: device = torch.device("cpu") logging.info("using CPU") if CONFIG.hyperparam.attention: model = AttentionDecoder( feature_dim=CONFIG.hyperparam.feature.dim, emb_dim=CONFIG.hyperparam.rnn.word_emb_dim, memory_dim=CONFIG.hyperparam.rnn.memory_dim, vocab_size=len(tokenizer), max_seqlen=CONFIG.hyperparam.tokenizer.max_len, dropout_p=CONFIG.hyperparam.rnn.dropout_prob, ss_prob=CONFIG.hyperparam.rnn.scheduled_sampling_prob, bos_idx=tokenizer.bosidx, pad_idx=tokenizer.padidx, ) else: model = SimpleDecoder( feature_dim=CONFIG.hyperparam.feature.dim, emb_dim=CONFIG.hyperparam.rnn.word_emb_dim, memory_dim=CONFIG.hyperparam.rnn.memory_dim, vocab_size=len(tokenizer), max_seqlen=CONFIG.hyperparam.tokenizer.max_len, dropout_p=CONFIG.hyperparam.rnn.dropout_prob, ss_prob=CONFIG.hyperparam.rnn.scheduled_sampling_prob, bos_idx=tokenizer.bosidx,
s = '{} {:.0f}% {} {:.4f}'.format(step, 100 * step / n_steps, since(start), loss) print(s) n_steps = 75000 print_every = 1000 plot_every = 100 teacher_forcing_ratio = 0.5 max_len = max_len + 1 # 在每句的句末加上了一个<EOS> hidden_size = 256 if __name__ == '__main__': encoder = Encoder(source_lang_dict.n_words, hidden_size) decoder = AttentionDecoder(hidden_size, target_lang_dict.n_words, max_len) sos_var = Variable( torch.cuda.LongTensor([target_lang_dict.word2idx['SOS']])) eos_idx = target_lang_dict.word2idx['EOS'] e_optimizer = torch.optim.SGD(encoder.parameters(), lr=1e-2) d_optimizer = torch.optim.SGD(decoder.parameters(), lr=1e-2) loss_fn = torch.nn.functional.nll_loss encoder.cuda() decoder.cuda() print_loss = 0 plot_loss = 0 start = time.time()
def train(self): encoder_parameter = self.encoder_parameter() decoder_parameter = self.decoder_parameter() encoder = Encoder(**encoder_parameter) decoder = AttentionDecoder(**decoder_parameter) model = Seq2Seq(encoder, decoder, self.args.sequence_size, self.args.get_attention) model.train() model.to(device) optimizer = opt.Adam(model.parameters(), lr=self.args.learning_rate) epoch_step = len(self.train_loader) + 1 total_step = self.args.epochs * epoch_step teacher_forcing_ratios = self.cal_teacher_forcing_ratio(total_step) step = 0 attention = None for epoch in range(self.args.epochs): for i, data in enumerate(self.train_loader, 0): try: src_input, trg_input, trg_output = data if self.args.get_attention: output, attention = model( src_input, trg_input, teacher_forcing_rate=teacher_forcing_ratios[i]) else: output = model( src_input, trg_input, teacher_forcing_rate=teacher_forcing_ratios[i]) # Get loss & accuracy loss, accuracy = self.loss_accuracy(output, trg_output) # Training Log if step % self.args.train_step_print == 0: self.writer.add_scalar('train/loss', loss.item(), step) self.writer.add_scalar('train/accuracy', accuracy.item(), step) print( '[Train] epoch : {0:2d} iter: {1:4d}/{2:4d} step : {3:6d}/{4:6d} ' '=> loss : {5:10f} accuracy : {6:12f}'.format( epoch, i, epoch_step, step, total_step, loss.item(), accuracy.item())) # Validation Log if step % self.args.val_step_print == 0: with torch.no_grad(): val_loss, val_accuracy = self.val( model, teacher_forcing_ratio=teacher_forcing_ratios[i] ) self.writer.add_scalar('val/loss', val_loss, step) self.writer.add_scalar('val/accuracy', val_accuracy, step) print( '[ Val ] epoch : {0:2d} iter: {1:4d}/{2:4d} step : {3:6d}/{4:6d} ' '=> loss : {5:10f} accuracy : {6:12f}'. format(epoch, i, epoch_step, step, total_step, val_loss, val_accuracy)) # Save Model Point if step % self.args.step_save == 0: if self.args.get_attention: self.plot_attention(step, src_input, trg_input, attention) self.model_save(model=model, optimizer=optimizer, epoch=epoch, step=step) # Optimizer optimizer.zero_grad() loss.backward() optimizer.step() step += 1 # If KeyBoard Interrupt Save Model except KeyboardInterrupt: self.model_save(model=model, optimizer=optimizer, epoch=epoch, step=step)
hidden_size = 512 embed_size = 256 print(device) print('Loading dataset ......') train_iter, val_iter, test_iter, DE, EN = load_dataset(args.batch_size) de_size, en_size = len(DE.vocab), len(EN.vocab) print( "[TRAIN]:%d (dataset:%d)\t[TEST]:%d (dataset:%d)\t[VALUATE]:%d (dataset:%d)" % (len(train_iter), len(train_iter.dataset), len(test_iter), len(test_iter.dataset), len(val_iter), len(val_iter.dataset))) print("[DE_vocab]:%d [en_vocab]:%d" % (de_size, en_size)) print("Initialize model ......") encoder = Encoder(de_size, embed_size, hidden_size) decoder = AttentionDecoder(en_size, embed_size, hidden_size) seq2seq = Seq2Seq(encoder, decoder).to(device) optimizer = optim.Adam(seq2seq.parameters(), lr=args.lr) print(seq2seq) best_val_loss = None for epoch in range(0, args.epochs): train(seq2seq, optimizer, train_iter, en_size, args.grad_clip, DE, EN) val_loss = evaluate(seq2seq, val_iter, en_size, DE, EN) now_time = time.time() print("[Epoch:{}] val_loss:{} | val_pp:{} | Time: {}h{}m{}s".format( epoch, val_loss, math.exp(val_loss), (now_time - start_time) // 3600, (now_time - start_time) % 3600 // 60, (now_time - start_time) % 60))