def translate(model, src_vocab, trg_vocab, corpus_iter, translation_output=None): global opt model.eval() hyp_list = [] for idx, batch in enumerate(corpus_iter, start=1): print(idx) batch = list(batch) src_raw = batch[0] src = batch_str2idx_with_flag(src_raw, src_vocab, unk=UNK, pad=PAD, sos=SOS, eos=EOS) src = to_Tensor(src, tensor_type=torch.LongTensor, cuda=opt.cuda) src_mask = get_batch_mask(src, src_vocab, PAD) with torch.no_grad(): sentences_output, scores_output = model.beamsearch(src, src_mask, opt.beam_size, normalize=True) best_sentence, best_score = sentences_output[0], scores_output[0] best_sentence = batch_idx2str([best_sentence], trg_vocab) hyp_list.append(best_sentence[0]) with open(translation_output, 'w') as f: for sentence in hyp_list: sentence = ' '.join(sentence) f.write(sentence + '\n')
def evaluate(opt, model, src_vocab, trg_vocab, corpus_iter, batch_idx, cur_epoch): try: model.eval() print('!!!eval', id(model)) time1 = time.time() hyp_list = [] ref_list = [] print('sub: ', os.getpid()) print('num: ', batch_idx) for idx, batch in enumerate(corpus_iter, start=1): print(idx) src_raw = batch[0] trg_raw = batch[1:] ref = list(map(lambda x: x[0], trg_raw)) ref_list.append(ref) src = batch_str2idx_with_flag(src_raw, src_vocab, unk=UNK, pad=PAD, sos=SOS, eos=EOS) src = to_Tensor(src, tensor_type=torch.LongTensor, cuda=opt.cuda) src_mask = get_batch_mask(src, src_vocab, PAD) with torch.no_grad(): sentences_output, scores_output = model.beamsearch( src, src_mask, opt.beam_size, normalize=True) best_sentence, best_score = sentences_output[0], scores_output[ 0] best_sentence = batch_idx2str([best_sentence], trg_vocab) hyp_list.append(best_sentence[0]) bleu = corpus_bleu(ref_list, hyp_list, smoothing_function=SmoothingFunction().method1) time1 = time.time() - time1 print('subprocess %d batch_idx %d, time: ' % (os.getpid(), batch_idx), time1) return bleu, batch_idx, cur_epoch except Exception as ex: msg = "subprcess wrong: %s" % ex print(msg)
def train(model, src_vocab, trg_vocab, optim_wrapper, train_iter, vldt_iter, loss_function): global opt, min_loss, max_bleu subprocess_pool = Pool(2) model.train() print('start training!!!', id(model)) for epoch in range(opt.epoch, opt.nepoch): # TODO cur_epoch = epoch + 1 total_loss = 0 print('############### epoch = %d ###############\n' % cur_epoch) for batch_idx, batch in enumerate(train_iter, start=1): sorted_batch = sort_batch(batch) src_raw = sorted_batch[0] trg_raw = sorted_batch[1] # 获得以word indices表示的源句子和目标语句 src = batch_str2idx_with_flag(src_raw, src_vocab, unk=UNK, pad=PAD, sos=SOS, eos=EOS) f_trg = batch_str2idx_with_flag(trg_raw, trg_vocab, unk=UNK, pad=PAD, sos=SOS, eos=EOS) src, f_trg = to_Tensor(src, f_trg, tensor_type=torch.LongTensor, cuda=opt.cuda) src_mask = get_batch_mask(src, src_vocab, PAD) f_trg_mask = get_batch_mask(f_trg, trg_vocab, PAD) ''' # b_trg = batch_str2idx_with_flag(trg_raw, trg_vocab, unk=UNK, pad=PAD, sos=SOS, eos=EOS, reverse=True) # 目标端反向的句子batch,暂时不用 # src, f_trg, b_trg = to_Tensor(src, f_trg, b_trg, tensor_type=torch.LongTensor, cuda=opt.cuda) # b_trg_mask = get_batch_mask(b_trg, trg_vocab, PAD) ''' y_prob = model(src, src_mask, f_trg, f_trg_mask) # --------------------------------------- TODO f_trg = torch.cat( (f_trg, torch.LongTensor([[dec_pad] for _ in range(int(f_trg.size(0)))])), 1) loss = loss_function(y_prob.transpose(1, 2), f_trg[:, 1:]) total_loss = total_loss + float(loss) loss.backward() # ---------------------------------------- if batch_idx % opt.interval == 0: total_loss = total_loss / opt.interval if total_loss < min_loss: print('& epoch = %d batch_idx = %d min_loss = %f &\n' % (cur_epoch, batch_idx / opt.interval, total_loss)) min_loss = total_loss save_min_loss_model(model, opt.checkpoint_dir, batch_idx / opt.interval, cur_epoch, min_loss, info='Transformer_min_loss_model') else: print('- batch_idx = %d, loss = %f -\n' % (batch_idx / opt.interval, total_loss)) #torch.nn.utils.clip_grad_norm_(model.parameters(), opt.max_norm, norm_type=2) # 参数更新前执行梯度裁剪,默认取L2范数 optim_wrapper.step() optim_wrapper.zero_grad() total_loss = 0 optim_wrapper.update_lr_per_step() ''' # 开启额外cpu进程测试开发集bleu时调用下面语句 # 从第4轮训练开始,每隔opt.vldt_freq个batch,另开子进程测试一次bleu if cur_epoch >= 4 and (batch_idx * opt.interval) % opt.vldt_freq == 0: cpu_model = copy.deepcopy(model).cpu() subprocess_pool.apply_async(evaluate, args=(opt, cpu_model, src_vocab, trg_vocab, vldt_iter, batch_idx, cur_epoch), callback=my_callback) ''' if (batch_idx / opt.interval) % 100 == 0: print('- epoch = %d, min_loss = %f -\n' % (cur_epoch, min_loss)) # --------------------------------------- sentences = [] for i in range(5): sentence = [] for j in range(y_prob.size(1)): sentence.append(int(y_prob[i][j].argmax())) sentences.append(sentence) sentences = batch_idx2str(sentences, trg_vocab) for i in range(5): print('source:') print(' '.join(src_raw[i])) print('ref:') print(' '.join(trg_raw[i])) print('pred:') print(' '.join(sentences[i])) print('---------------------') # --------------------------------------- optim_wrapper.zero_grad() optim_wrapper.update_lr_per_epoch() save_checkpoint_model(model, opt.checkpoint_dir, cur_epoch, info='Transformer_checkpoint_model') print('$ min_loss: %f, max_bleu: %f $\n' % (min_loss, max_bleu)) # 关闭进程池等待开发集bleu测试完成 subprocess_pool.close() subprocess_pool.join()
def train(model, src_vocab, trg_vocab, optim_wrapper, train_iter, vldt_iter): global opt, min_loss, max_bleu subprocess_pool = Pool(2) # start training model.train() print('!!!train', id(model)) for epoch in range(opt.epoch, opt.nepoch): cur_epoch = epoch + 1 total_loss = 0 print('############### epoch = %d ###############\n' % cur_epoch) for batch_idx, batch in enumerate(train_iter, start=1): sorted_batch = sort_batch(batch) src_raw = sorted_batch[0] trg_raw = sorted_batch[1] # 获得以word indices表示的源句子和目标语句 src = batch_str2idx_with_flag(src_raw, src_vocab, unk=UNK, pad=PAD, sos=SOS, eos=EOS) f_trg = batch_str2idx_with_flag(trg_raw, trg_vocab, unk=UNK, pad=PAD, sos=SOS, eos=EOS) src, f_trg = to_Tensor(src, f_trg, tensor_type=torch.LongTensor, cuda=opt.cuda) src_mask = get_batch_mask(src, src_vocab, PAD) f_trg_mask = get_batch_mask(f_trg, trg_vocab, PAD) ''' # b_trg = batch_str2idx_with_flag(trg_raw, trg_vocab, unk=UNK, pad=PAD, sos=SOS, eos=EOS, reverse=True) # 目标端反向的句子batch,暂时不用 # src, f_trg, b_trg = to_Tensor(src, f_trg, b_trg, tensor_type=torch.LongTensor, cuda=opt.cuda) # b_trg_mask = get_batch_mask(b_trg, trg_vocab, PAD) ''' loss = model(src, src_mask, f_trg, f_trg_mask) # TODO total_loss = total_loss + float(loss) loss.backward() if batch_idx % opt.interval == 0: total_loss = total_loss / opt.interval if total_loss < min_loss: print('& epoch = %d batch_idx = %d min_loss = %f &\n' % (cur_epoch, batch_idx / opt.interval, total_loss)) min_loss = total_loss save_min_loss_model(model, opt.checkpoint_dir, batch_idx / opt.interval, cur_epoch, min_loss, info='RNNSearch_min_loss_model') else: print('- batch_idx = %d, loss = %f -\n' % (batch_idx / opt.interval, total_loss)) torch.nn.utils.clip_grad_norm_( model.parameters(), opt.max_norm, norm_type=2) # 参数更新前执行梯度裁剪,默认取L2范数 optim_wrapper.step() optim_wrapper.zero_grad() total_loss = 0 optim_wrapper.update_lr_per_step() ''' # 开启额外cpu进程测试开发集bleu时调用下面语句 # 从第4轮训练开始,每隔opt.vldt_freq个batch,另开子进程测试一次bleu if cur_epoch >= 4 and (batch_idx * opt.interval) % opt.vldt_freq == 0: cpu_model = copy.deepcopy(model).cpu() subprocess_pool.apply_async(evaluate, args=(opt, cpu_model, src_vocab, trg_vocab, vldt_iter, batch_idx, cur_epoch), callback=my_callback) ''' optim_wrapper.zero_grad() optim_wrapper.update_lr_per_epoch() save_checkpoint_model(model, opt.checkpoint_dir, cur_epoch, info='RNNSearch_checkpoint_model') print('$ min_loss: %f, max_bleu: %f $\n' % (min_loss, max_bleu)) # 关闭进程池等待开发集bleu测试完成 subprocess_pool.close() subprocess_pool.join()