def dev_step(dev_tensors, batch_size): optimizer.zero_grad() mymodel.eval() predictions = [] entity_losses = [] relation_losses = [] new_tensors = [] all_ent_num = 0 all_rel_num = 0 for k in range(0, len(dev_tensors), batch_size): batch = dev_tensors[k:k + batch_size] entity_loss, relation_loss, pred_entity_tags, pred_rel_tags, X_len, candi_rel_num, batch = step( batch) all_rel_num += candi_rel_num all_ent_num += sum(X_len) predictions.extend(list(zip(pred_entity_tags, pred_rel_tags))) entity_losses.append(entity_loss.item()) relation_losses.append(relation_loss.item()) new_tensors.extend(batch) entity_loss = sum(entity_losses) / all_ent_num if all_rel_num == 0: relation_loss = 0 else: relation_loss = sum(relation_losses) / all_rel_num loss = entity_loss + relation_loss print('Epoch : %d Minibatch : %d Loss : %.5f\t(%.5f, %.5f)' % (i, j, loss, entity_loss, relation_loss)) eval_path = os.path.join(config.save_dir, "validate.dev.output") utils.print_predictions(new_tensors, predictions, eval_path, word_vocab, chunk_vocab, rel_vocab) entity_score, relation_score = eval_file(eval_path) return relation_score
def mrt_with_single_sample(batch, optimizer, n_samples, alpha, mu): mymodel.eval() X, X_char, X_lstm_h, Y, Y_rel, X_len, X_mask, new_batch = utils.get_minibatch(batch, word_vocab, char_vocab) X = model.convert_long_variable(X, use_cuda) X_char = model.convert_long_variable(X_char, use_cuda) X_lstm_h = model.convert_float_variable(X_lstm_h, use_cuda) Y = model.convert_long_tensor(Y, use_cuda) X_mask = model.convert_float_variable(X_mask, use_cuda) eval_path = os.path.join(config.save_dir, "training.MRT.sample.output") sample_entity_score_list = [] sample_f1_loss_list = [] sample_Y_list = [] sample_Y_set = set() # gold tag sample_Y_list.append(Y.cpu().numpy()) sample_Y_set.add(tuple(Y.cpu().numpy()[0])) prob_list = mymodel.entity_model.get_prob(X, X_char, X_lstm_h, X_len, X_mask, Y) # only have one instance i_sample = 0 while True: sample_tag = mymodel.entity_model.sample_by_prob(prob_list[0]) i_sample += 1 if i_sample == 1000: break if tuple(sample_tag) in sample_Y_set: continue sample_Y_set.add(tuple(sample_tag)) sample_Y_list.append(np.array([sample_tag])) if len(sample_Y_set) == n_samples: break mymodel.train() for k in range(len(sample_Y_list)): sample_Y = sample_Y_list[k] sample_Y = model.convert_long_tensor(sample_Y, use_cuda) sample_entity_loss, sample_relation_loss, sample_entity_tags, pred_rel_tags, _ = mymodel.forward_sample(X, X_char, X_lstm_h, X_len, X_mask, sample_Y, Y_rel) sample_entity_score_list.append(-(mu * sample_entity_loss.unsqueeze(0) + (1 - mu) * sample_relation_loss.unsqueeze(0)) * alpha) predictions = list(zip(sample_entity_tags, pred_rel_tags)) utils.print_predictions(new_batch, predictions, eval_path, word_vocab, chunk_vocab, rel_vocab) overall_fscore = eval_file_by_sample(eval_path) sample_f1_loss_list.append(1 - overall_fscore) sample_entity_score = torch.cat(sample_entity_score_list, 0).unsqueeze(0) # 1 x sample_size sample_entity_prob = Softmax(sample_entity_score)[0] sample_f1_loss = model.convert_float_variable(sample_f1_loss_list, use_cuda) modify_f1_loss = (sample_f1_loss + 1 - sample_entity_prob) / 2 # risk = torch.dot(sample_entity_prob, sample_f1_loss) risk = torch.dot(sample_entity_prob, modify_f1_loss) zero_loss = model.convert_float_variable([0], use_cuda) zero_loss.requires_grad = True margin_loss = 1 - sample_entity_prob[0] + torch.max(sample_entity_prob[1:]) margin_loss = torch.max(margin_loss, zero_loss) return risk + margin_loss
state_dict = torch.load( open(config.load_model_path_list[dom_id], 'rb'), map_location=lambda storage, loc: storage) cur_state_dict = mymodel.state_dict() for k in state_dict.keys(): if k in cur_state_dict: cur_state_dict[k] = state_dict[k] mymodel.load_state_dict(cur_state_dict) print("loading previous model successful [%s]" % config.load_model_path_list[dom_id]) # print(ent_span_vocab.item2idx) # print(dom2corpus[dom_id]['chunk_vocab'].item2idx) # print(mymodel.ent2span[1].weight) # print(mymodel.ent2span[1].bias) for title, tensors in zip( ["train", "dev", "test"], [dom2corpus[dom_id]['train_tensors'], dom2corpus[dom_id]['dev_tensors'], dom2corpus[dom_id]['test_tensors']]): if title == "train": continue print("\nEvaluating %s" % title) predictions, new_tensors = predict_all(tensors, config.batch_size, dom_id) eval_path = os.path.join(config.save_dir, "final.%s.output.Domain_%d" % (title, dom_id)) utils.print_predictions(new_tensors, predictions, eval_path, word_vocab, dom2corpus[dom_id]['chunk_vocab'], dom2corpus[dom_id]['rel_vocab']) eval_file(eval_path)
predictions = [] new_tensors = [] for i in range(0, len(tensors), batch_size): print("[ %d / %d ]" % (len(tensors), min(len(tensors), i + batch_size))) batch = tensors[i: i + batch_size] X, X_char, X_lstm_h, Y, Y_rel, X_len, X_mask, batch = utils.get_minibatch(batch, word_vocab, char_vocab) X = model.convert_long_variable(X, use_cuda) X_lstm_h = model.convert_float_variable(X_lstm_h, use_cuda) X_char = model.convert_long_variable(X_char, use_cuda) Y = model.convert_long_tensor(Y, use_cuda) X_mask = model.convert_float_variable(X_mask, use_cuda) new_tensors.extend(batch) _, _, pred_entity_tags, pred_rel_tags, _ = mymodel(X, X_char, X_lstm_h, X_len, X_mask, Y, Y_rel) predictions.extend(list(zip(pred_entity_tags, pred_rel_tags))) return predictions, new_tensors batch_size = config.batch_size for title, tensors in zip( ["train", "dev", "test"], [train_tensors, dev_tensors, test_tensors]): if title != "test" : continue print("\nEvaluating %s" % title) predictions, new_tensors = predict_all(tensors, config.batch_size) eval_path = os.path.join(config.save_dir, "final.%s.output" % title) utils.print_predictions(new_tensors, predictions, eval_path, word_vocab, chunk_vocab, rel_vocab) eval_file(eval_path)
def dev_step(dev_tensors, batch_size, dom_id, i_epoch): optimizer.zero_grad() mymodel.eval() predictions = [] ent_losses = [] rel_losses = [] if config.add_share_loss: ent_span_losses = [] rel_bin_losses = [] share_predictions = [] if config.add_trans_loss: trans_losses = [] new_tensors = [] all_ent_num = 0 all_rel_num = 0 all_rel_bin_num = 0 for k in range(0, len(dev_tensors), batch_size): batch = dev_tensors[k: k + batch_size] if config.add_share_loss: if config.add_trans_loss: (ent_loss, ent_span_loss, rel_loss, rel_bin_loss, trans_loss, pred_ent_tags, pred_ent_span_tags, pred_rel_tags, pred_rel_bin_tags, candi_rel_num, candi_rel_bin_num, X_len, batch) = step(batch, dom_id, i_epoch) trans_losses.append(trans_loss.item()) else: (ent_loss, ent_span_loss, rel_loss, rel_bin_loss, pred_ent_tags, pred_ent_span_tags, pred_rel_tags, pred_rel_bin_tags, candi_rel_num, candi_rel_bin_num, X_len, batch) = step(batch, dom_id, i_epoch) all_rel_bin_num += candi_rel_bin_num ent_span_losses.append(ent_span_loss.item()) rel_bin_losses.append(rel_bin_loss.item()) share_predictions.extend(list(zip(pred_ent_span_tags, pred_rel_bin_tags))) else: (ent_loss, rel_loss, pred_ent_tags, pred_rel_tags, candi_rel_num, X_len, batch) = step(batch, dom_id, i_epoch) all_rel_num += candi_rel_num all_ent_num += sum(X_len) predictions.extend(list(zip(pred_ent_tags, pred_rel_tags))) ent_losses.append(ent_loss.item()) rel_losses.append(rel_loss.item()) new_tensors.extend(batch) ent_loss = sum(ent_losses) / all_ent_num if all_rel_num == 0: rel_loss = 0 else: rel_loss = sum(rel_losses) / all_rel_num if config.add_share_loss: ent_span_loss = sum(ent_span_losses) / all_ent_num if all_rel_bin_num == 0: rel_bin_loss = 0 else: rel_bin_loss = sum(rel_bin_losses) / all_rel_bin_num if config.add_trans_loss: trans_loss = sum(trans_losses) / len(trans_losses) loss = ent_loss + rel_loss + ent_span_loss + rel_bin_loss + trans_loss print('Domain : %d Epoch : %d Minibatch : %d Loss : %.5f\t(%.5f, %.5f, %.5f, %.5f, %.5f)' % ( dom_id, i_epoch, i, loss, ent_loss, rel_loss, ent_span_loss, rel_bin_loss, trans_loss)) else: loss = ent_loss + rel_loss + ent_span_loss + rel_bin_loss print('Domain : %d Epoch : %d Minibatch : %d Loss : %.5f\t(%.5f, %.5f, %.5f, %.5f)' % ( dom_id, i_epoch, i, loss, ent_loss, rel_loss, ent_span_loss, rel_bin_loss)) else: loss = ent_loss + rel_loss print('Domain : %d Epoch : %d Minibatch : %d Loss : %.5f\t(%.5f, %.5f)' % ( dom_id, i_epoch, i, loss, ent_loss, rel_loss)) eval_path = os.path.join(config.save_dir, "validate.dev.output.Domain_%d" % dom_id) share_eval_path = os.path.join(config.save_dir, "validate.dev.output.share.Domain_%d" % dom_id) utils.print_predictions(new_tensors, predictions, eval_path, word_vocab, dom2corpus[dom_id]['chunk_vocab'], dom2corpus[dom_id]['rel_vocab']) entity_score, relation_score = eval_file(eval_path) if config.add_share_loss: print("Share Task Evaluation (Dev)...") utils.print_share_predictions(new_tensors, share_predictions, share_eval_path, word_vocab, dom2corpus[dom_id]['ent_span_vocab']) eval_file(share_eval_path) return relation_score