def eval(epoch, eval_type='valid'): #decode_type : greedy or beam_search total_log_loss, total = 0., 0 y_logistic, y = [], [] if eval_type == 'valid': loader = validloader E.is_split_label = False else: loader = testloader E.is_split_label = config.eval_metrics_split_labels model.eval() for raw_src, src, src_len, raw_tgt, tgt_vec, tgt_rnn in loader: if use_cuda: src = src.cuda() tgt_vec = tgt_vec.cuda() log_output = model(src.transpose(0, 1), src_len) log_loss = logit_loss( log_output, tgt_vec[:, :tgt_vocab.size()]) * config.logistic_weight total_log_loss += log_loss.item() total += 1 y_logistic.append(log_output.detach().cpu().numpy()) y.append(tgt_vec.cpu().numpy()[:, :tgt_vocab.size()]) logging("{} log loss :{:.5f}\n".format(eval_type, total_log_loss / total)) if eval_type == 'valid': logging_valid_loss([epoch, updates, total_log_loss / total]) def get_score(y, y_score, typ): logging("-" * 20 + typ + '-' * 20 + '\n') loss_dict = E.compute(y, y_score) logging(E.logging(loss_dict)) return loss_dict y = np.vstack(y) y_score = np.vstack(y_logistic) np.save(os.path.join(config.log, 'y_score.npy'), y_score) np.save(os.path.join(config.log, 'y.npy'), y) E.set_thres(0.5) get_score(y, y_score, 'Logistic') if eval_type == 'valid': global threshold _, threshold = E.find_best_thres(y, y_score) E.set_thres(threshold) loss_d = get_score(y, y_score, 'Logistic') logging('-' * 50 + '\n') return loss_d
def train(epoch): global updates model.train() total_log_loss, total = 0., 0 if config.schedule: scheduler.step() print("Decaying learning rate to %g" % scheduler.get_lr()[0]) for raw_src, src, src_len, raw_tgt, tgt_vec, tgt_rnn in trainloader: if use_cuda: src = src.cuda() tgt_vec = tgt_vec.cuda() model.zero_grad() log_output = model(src.transpose(0, 1), src_len) log_loss = logit_loss( log_output, tgt_vec[:, :tgt_vocab.size()]) * config.logistic_weight losses = log_loss losses.backward() optim.step() total_log_loss += log_loss.item() total += 1 updates += 1 if updates % config.print_interval == 0: logging(time.strftime("[%H:%M:%S]", time.localtime())) logging(" Epoch: %3d, updates: %8d\n" % (epoch, updates)) logging("Log loss : {:.5f}\n".format(total_log_loss / total)) logging_train_loss([epoch, updates, total_log_loss / total]) total_log_loss, total = 0., 0 if updates % config.eval_interval == 0: ## TODO different model will have different decoding strategies score = eval(epoch, 'valid') logging_metric(score, epoch, updates) for metric, value in score.items(): scores[metric].append(score[metric]) if metric == standard_metric and score[metric] >= max( scores[metric]): save_model(log_path + 'best_' + metric + '_checkpoint.pt') save_model(log_path + 'checkpoint.pt') model.train()
def eval(epoch, decode_type='greedy', logistic_joint_decoding=False): #decode_type : greedy or beam_search total_rnn_loss, total_log_loss, total = 0., 0., 0. y_logistic, y_rnn, y_rescore, y = [], [], [], [] if decode_type == 'beam_search': eval_model = AttEnc_DecRNN.AttEnc_DecRNN( encoder, top_k_decoder.TopKDecoder(model.decoder, config.beam_size)) elif decode_type == 'greedy': model.decoder.set_sampling_type('max') eval_model = model eval_model.eval() for items, label_set_rnn, label_set_vec in testloader: items = items.float() if use_cuda: items = items.cuda() label_set_rnn = label_set_rnn.cuda() label_set_vec = label_set_vec.cuda() decoder_outputs, decoder_hidden, ret_dict, log_output = eval_model( items, logistic_joint_decoding=logistic_joint_decoding) if config.loss_type.lower() == 'vanilla': label_sets = label_set_rnn else: label_sets = label_set_vec rnn_loss = Loss(decoder_outputs, ret_dict['sequence'], label_sets) log_loss = logit_loss( log_output, label_set_vec[:, :config.label_set_size]) * config.logistic_weight total_log_loss += log_loss.item() total_rnn_loss += rnn_loss.item() total += 1 y_vec = E.idx2vec(ret_dict['sequence'], config.label_set_size, config.label_set_size + 1, True) y_rnn.append(y_vec) y_logistic.append(log_output.detach().cpu().numpy()) y.append(label_set_vec.cpu().numpy()[:, :config.label_set_size]) if decode_type == 'beam_search': seq, score = rescore.logistic_rescore(ret_dict['topk_sequence'], log_output) y_vec = E.idx2vec(seq, config.label_set_size, config.label_set_size + 1, True) y_rescore.append(y_vec) logging("Decode type: {} , Logistic joint Decoding: {}\n".format( decode_type, logistic_joint_decoding)) logging("Test RNN loss : {:.5f} \nLog loss :{:.5f}\n".format( total_rnn_loss / total, total_log_loss / total)) E.set_thres(0.5) def get_score(y, y_score, typ): y_np = np.vstack(y) y_score_np = np.vstack(y_score) logging("-" * 20 + typ + '-' * 20 + '\n') loss_dict = E.compute(y_np, y_score_np) logging(E.logging(loss_dict)) return loss_dict loss_d = get_score(y, y_rnn, 'RNN') get_score(y, y_logistic, 'Logistic') if decode_type == 'beam_search': get_score(y, y_rescore, 'Logistic Rescore') logging('-' * 50 + '\n') return loss_d
def train(epoch): model.train() model.decoder.set_sampling_type(config.decoder_sampling_type) global updates if config.schedule: scheduler.step() print("Decaying learning rate to %g" % scheduler.get_lr()[0]) if config.loss_type.lower() == 'vanilla': if epoch > config.teacher_forcing_final_epoch: teacher_forcing_ratio = config.teacher_forcing_ratio_end else: teacher_forcing_ratio = config.teacher_forcing_ratio_start + (config.teacher_forcing_ratio_end - config.teacher_forcing_ratio_start) \ / config.teacher_forcing_final_epoch * (epoch-1) logging("Teacher forcing ratio: " + str(teacher_forcing_ratio) + '\n') else: teacher_forcing_ratio = 0 total_log_loss, total_rnn_loss, total = 0., 0., 0 for items, label_set_rnn, label_set_vec in trainloader: items = items.float() if use_cuda: items = items.cuda() label_set_rnn = label_set_rnn.cuda() label_set_vec = label_set_vec.cuda() model.zero_grad() target_variable = None candidates = None label_sets = label_set_vec.clone() if config.loss_type.lower() == 'vanilla': target_variable = label_set_rnn label_sets = label_set_rnn elif config.loss_type.lower() == 'order_free': candidates = label_set_vec.clone() decoder_outputs, decoder_hidden, ret_dict, log_output = model( items, target_variable=target_variable, candidates=candidates, teacher_forcing_ratio=teacher_forcing_ratio) rnn_loss = Loss(decoder_outputs, ret_dict['sequence'], label_sets) log_loss = logit_loss( log_output, label_set_vec[:, :config.label_set_size]) * config.logistic_weight losses = rnn_loss + log_loss losses.backward() optim.step() total_log_loss += log_loss.item() total_rnn_loss += rnn_loss.item() total += 1 updates += 1 if updates % 1000 == 0: logging(time.strftime("[%H:%M:%S]", time.localtime())) logging(" Epoch: %3d, updates: %8d\n" % (epoch, updates)) logging("Training loss : {:.5f} \nLog loss : {:.5f}\n".format( total_rnn_loss / total, total_log_loss / total)) logging(time.strftime("[%H:%M:%S]", time.localtime())) logging(" Epoch: %3d, updates: %8d\n" % (epoch, updates)) logging("Training loss : {:.5f} \nLog loss : {:.5f}\n".format( total_rnn_loss / total, total_log_loss / total)) if config.loss_type.lower() == 'ocd': Loss.update_temperature(epoch) score = eval(epoch, 'greedy', False) score = eval(epoch, 'greedy', True) #score_bs = eval(epoch, 'beam_search', config.logistic_joint_decoding) for metric, value in score.items(): scores[metric].append(score[metric]) if metric == standard_metric and score[metric] >= max(scores[metric]): save_model(log_path + 'best_' + metric + '_checkpoint.pt')
def eval(epoch, eval_type = 'valid', decode_type = 'greedy', logistic_joint_decoding = False): #decode_type : greedy or beam_search total_rnn_loss, total_log_loss, total = 0.,0., 0. y_logistic, y_rnn, y_rescore, y = [], [], [], [] if eval_type == 'valid': loader = validloader E.is_split_label = False else: loader = testloader E.is_split_label = config.eval_metrics_split_labels model.decoder.set_sampling_type('max') if decode_type == 'beam_search': topk_decoder = top_k_decoder.TopKDecoder(model.decoder, config.beam_size, config.beam_score_type) eval_model = seq2seq.Seq2seq(encoder, topk_decoder, decoderFC) elif decode_type == 'greedy': eval_model = model eval_model.eval() for raw_src, src, src_len, raw_tgt, tgt_vec, tgt_rnn in loader: if use_cuda: src = src.cuda() tgt_vec = tgt_vec.cuda() tgt_rnn = tgt_rnn.cuda() decoder_outputs, decoder_hidden, ret_dict, log_output = eval_model(src.transpose(0,1), src_len, logistic_joint_decoding = logistic_joint_decoding) if config.loss_type.lower() == 'vanilla': label_sets = tgt_rnn else: label_sets = tgt_vec.clone() rnn_loss = Loss(decoder_outputs, ret_dict['sequence'], label_sets) log_loss = logit_loss(log_output, tgt_vec[:,:tgt_vocab.size()]) * config.logistic_weight total_log_loss += log_loss.item() total_rnn_loss += rnn_loss.item() total += 1 y_vec = E.idx2vec(ret_dict['sequence'], tgt_vocab.size(), tgt_vocab.size()+1, True) y_rnn.append(y_vec) y_logistic.append(log_output.detach().cpu().numpy()) y.append(tgt_vec.cpu().numpy()[:,:tgt_vocab.size()]) if decode_type == 'beam_search': seq, score = rescore.logistic_rescore(ret_dict['topk_sequence'], log_output) y_vec = E.idx2vec(seq, tgt_vocab.size(), tgt_vocab.size() +1 , True) y_rescore.append(y_vec) logging("Decode type: {} , Logistic joint Decoding: {}\n".format(decode_type, logistic_joint_decoding)) logging("{} RNN loss : {:.5f} \nLog loss :{:.5f}\n".format(eval_type, total_rnn_loss / total, total_log_loss / total)) if eval_type == 'valid' and logistic_joint_decoding is False: logging_valid_loss([epoch,updates,total_log_loss / total, total_rnn_loss / total]) y_np = np.vstack(y) y_logistic_np = np.vstack(y_logistic) y_rnn_np = np.vstack(y_rnn) E.set_thres(0.5) def get_score(y_np, y_score_np, typ): logging("-"*20 + typ + '-'*20 + '\n') loss_dict = E.compute(y_np, y_score_np) logging(E.logging(loss_dict)) return loss_dict score_rnn = get_score(y_np, y_rnn_np, 'RNN') get_score(y_np, y_logistic_np, 'Logistic') ## threshold if eval_type == 'valid': global threshold _,threshold = E.find_best_thres(y_np, y_logistic_np) E.set_thres(threshold) score_logistic = get_score(y_np, y_logistic_np, 'Logistic') score_rescore = None if decode_type == 'beam_search': y_rescore_np = np.vstack(y_rescore) score_rescore = get_score(y_np, y_rescore_np, 'Logistic Rescore') logging('-'*50+'\n') return score_rnn, score_logistic
def train(epoch): global updates model.train() model.decoder.set_sampling_type(config.decoder_sampling_type) total_log_loss, total_rnn_loss, total = 0., 0., 0 #optim.updateLearningRate(None, epoch) if config.schedule: scheduler.step() print("Decaying learning rate to %g" % scheduler.get_lr()[0]) ## Update Teacher Forcing ratio if config.loss_type.lower() == 'vanilla' or config.loss_type.lower() == 'order_free': if epoch > config.teacher_forcing_final_epoch: teacher_forcing_ratio = config.teacher_forcing_ratio_end else: teacher_forcing_ratio = config.teacher_forcing_ratio_start + (config.teacher_forcing_ratio_end - config.teacher_forcing_ratio_start) / config.teacher_forcing_final_epoch * (epoch-1) logging("Teacher forcing ratio: " + str(teacher_forcing_ratio) + '\n') else: teacher_forcing_ratio = 0 # Update Temperature if config.loss_type.lower() == 'ocd': Loss.update_temperature(epoch) for raw_src, src, src_len, raw_tgt, tgt_vec, tgt_rnn in trainloader: if use_cuda: src = src.cuda() tgt_vec = tgt_vec.cuda() tgt_rnn = tgt_rnn.cuda() model.zero_grad() target_variable = None candidates = tgt_vec.clone() label_sets = tgt_vec.clone() if config.loss_type.lower() == 'vanilla': target_variable = tgt_rnn label_sets = tgt_rnn decoder_outputs, decoder_hidden, ret_dict, log_output = model(src.transpose(0,1), src_len, target_variable = target_variable, candidates = candidates, teacher_forcing_ratio=teacher_forcing_ratio) rnn_loss = Loss(decoder_outputs, ret_dict['sequence'], label_sets) log_loss = logit_loss(log_output, tgt_vec[:,:tgt_vocab.size()]) * config.logistic_weight losses = rnn_loss + log_loss losses.backward() optim.step() total_log_loss += log_loss.item() total_rnn_loss += rnn_loss.item() total += 1 updates += 1 if updates % config.print_interval == 0: logging(time.strftime("[%H:%M:%S]", time.localtime())) logging(" Epoch: %3d, updates: %8d\n" % (epoch, updates)) logging("RNN loss : {:.5f} \nLog loss : {:.5f}\n".format(total_rnn_loss / total, total_log_loss / total)) logging_train_loss([epoch,updates,total_log_loss / total, total_rnn_loss / total]) total_log_loss, total_rnn_loss, total = 0., 0., 0 if updates % config.eval_interval == 0: ## TODO different model will have different decoding strategies score_rnn, score_logistic = eval(epoch, 'valid', 'greedy', False) logging_metric(score_rnn, epoch, updates) logging_metric_logistic(score_logistic, epoch, updates) if config.logistic_weight > 0: score_joint,_ = eval(epoch, 'valid', 'beam_search', True) logging_metric_joint(score_joint , epoch, updates) score = score_rnn #eval(epoch, 'test', 'greedy', True) for metric, value in score.items(): scores[metric].append(score[metric]) if metric == standard_metric and score[metric] >= max(scores[metric]): save_model(log_path+'best_'+metric+'_checkpoint.pt') save_model(log_path+'checkpoint.pt') model.train() model.decoder.set_sampling_type(config.decoder_sampling_type)