def evaluate_batch(batch_num, eval_batch, model, tensorboard, val_type_name, goal): model.eval() loss, output_logits = model(eval_batch, val_type_name) output_index = get_output_index(output_logits) # eval_loss = loss.data.cpu().clone()[0] eval_loss = loss.data.cpu().numpy() eval_loss_str = 'Eval loss: {0:.7f} at step {1:d}'.format( eval_loss, batch_num) gold_pred = get_gold_pred_str(output_index, eval_batch['y'].data.cpu().clone(), goal) eval_accu = sum([set(y) == set(yp) for y, yp in gold_pred]) * 1.0 / len(gold_pred) tensorboard.add_validation_scalar('eval_acc_' + val_type_name, eval_accu, batch_num) tensorboard.add_validation_scalar('eval_loss_' + val_type_name, eval_loss, batch_num) eval_str = get_eval_string(gold_pred) print(val_type_name + ":" + eval_loss_str) print(gold_pred[:3]) print(val_type_name + ":" + eval_str) logging.info(val_type_name + ":" + eval_loss_str) logging.info(val_type_name + ":" + eval_str) model.train() return eval_loss
def evaluate_data_cv(k_fold_count, batch_num, model, tensorboard, val_type_name, args, elmo, actual_f1=False): model.eval() data_gen = get_data_gen('crowd/cv_3fold/dev_tree_{0}.json'.format( repr(k_fold_count)), 'test', args, (constant.CHAR_DICT, None), args.goal, elmo=elmo) gold_pred = [] annot_ids = [] binary_out = [] eval_loss = 0. total_ex_count = 0 print('==> evaluate_data_cv') for n, batch in enumerate(data_gen): total_ex_count += len(batch['y']) eval_batch, annot_id = to_torch(batch) loss, output_logits, _ = model(eval_batch, val_type_name) if actual_f1: output_index = get_output_index(output_logits, threshold=args.threshold) else: output_index = get_output_index_rank(output_logits, topk=args.topk) y = eval_batch['y'].data.cpu().clone().numpy() gold_pred = get_gold_pred_str(output_index, y, args.goal) annot_ids.extend(annot_id) eval_loss += loss.clone().item() eval_accu = sum([set(y) == set(yp) for y, yp in gold_pred]) * 1.0 / len(gold_pred) eval_str = get_eval_string(gold_pred) eval_loss_str = 'Eval loss: {0:.7f} at step {1:d}'.format( eval_loss, batch_num) tensorboard.add_validation_scalar('eval_acc_' + val_type_name, eval_accu, batch_num) tensorboard.add_validation_scalar('eval_loss_' + val_type_name, eval_loss, batch_num) print('EVAL: seen ' + repr(total_ex_count) + ' examples.') print(val_type_name + ":" + eval_loss_str) #print(gold_pred[:3]) print(val_type_name + ":" + eval_str) logging.info(val_type_name + ":" + eval_loss_str) logging.info(val_type_name + ":" + eval_str) model.train() data_gen = None output_dict = {} for a_id, (gold, pred) in zip(annot_ids, gold_pred): output_dict[a_id] = {"gold": gold, "pred": pred} return eval_loss, output_dict
def evaluate_data(batch_num, dev_fname, model, tensorboard, val_type_name, args, elmo, bert, actual_f1=True, vocab=None): model.eval() if vocab is None: vocab = (constant.CHAR_DICT, None) dev_gen = get_data_gen(dev_fname, 'test', args, vocab, args.goal, elmo=elmo, bert=bert) gold_pred = [] binary_out = [] eval_loss = 0. total_ex_count = 0 if args.mode in ['train_labeler', 'test_labeler']: cls_correct = 0. cls_total = 0. cls_tp = 0. cls_t_gold = 0. cls_t_pred = 0. for n, batch in enumerate(dev_gen): total_ex_count += len(batch['y']) eval_batch, annot_ids = to_torch(batch) if args.mode in ['train_labeler', 'test_labeler']: loss, output_logits, cls_logits = model(eval_batch, val_type_name) if cls_logits is not None: cls_correct += sum([(1. if pred > 0. else 0.) == gold for pred, gold in zip(cls_logits, batch['y_cls'])]) cls_total += float(cls_logits.size()[0]) cls_tp += sum([(1. if pred > 0. else 0.) == 1. and gold == 1. for pred, gold in zip(cls_logits, batch['y_cls'])]) cls_t_gold += float(sum(batch['y_cls'])) cls_t_pred += float(sum([1. if pred > 0. else 0. for pred in cls_logits])) else: loss, output_logits, _ = model(eval_batch, val_type_name) output_index = get_output_index(output_logits, threshold=args.threshold) gold_pred += get_gold_pred_str(output_index, eval_batch['y'].data.cpu().clone(), args.goal) eval_loss += loss.clone().item() eval_accu = sum([set(y) == set(yp) for y, yp in gold_pred]) * 1.0 / len(gold_pred) eval_str = get_eval_string(gold_pred) _, _, _, _, _, macro_f1 = eval_metric.macro(gold_pred) eval_loss_str = 'Eval loss: {0:.7f} at step {1:d}'.format(eval_loss, batch_num) tensorboard.add_validation_scalar('eval_acc_' + val_type_name, eval_accu, batch_num) tensorboard.add_validation_scalar('eval_loss_' + val_type_name, eval_loss, batch_num) print('EVAL: seen ' + repr(total_ex_count) + ' examples.') print(val_type_name + ":" +eval_loss_str) print(gold_pred[:3]) if args.mode in ['train_labeler', 'test_labeler'] and cls_logits is not None: cls_accuracy = cls_correct / cls_total * 100. cls_precision = cls_tp / cls_t_pred cls_recall = cls_tp / cls_t_gold cls_f1 = f1(cls_precision, cls_recall) cls_str = ' CLS accuracy: {0:.2f}% P: {1:.3f} R: {2:.3f} F1: {3:.3f}'.format(cls_accuracy, cls_precision, cls_recall, cls_f1) print(val_type_name+":"+ eval_str + cls_str) else: print(val_type_name+":"+ eval_str) logging.info(val_type_name + ":" + eval_loss_str) logging.info(val_type_name +":" + eval_str) model.train() dev_gen = None return eval_loss, macro_f1
def _test(args): assert args.load test_fname = args.eval_data data_gens = get_datasets([(test_fname, 'test', args.goal)], args) model = models.Model(args, constant.ANSWER_NUM_DICT[args.goal]) model.cuda() model.eval() load_model(args.reload_model_name, constant.EXP_ROOT, args.model_id, model) for name, dataset in [(test_fname, data_gens[0])]: print('Processing... ' + name) total_gold_pred = [] total_annot_ids = [] total_probs = [] total_ys = [] for batch_num, batch in enumerate(dataset): eval_batch, annot_ids = to_torch(batch) loss, masked_logits, raw_logits, mask = model( eval_batch, args.goal) print(mask) # output_index = get_output_index(masked_logits) # output_prob = masked_logits.data.cpu().clone().numpy() # y = eval_batch['y'].data.cpu().clone().numpy() # gold_pred = get_gold_pred_str(output_index, y, args.goal) # total_probs.extend(output_prob) # total_ys.extend(y) # total_gold_pred.extend(gold_pred) # total_annot_ids.extend(annot_ids) # raw output_index = get_output_index(raw_logits) output_prob = raw_logits.data.cpu().clone().numpy() y = eval_batch['y'].data.cpu().clone().numpy() gold_pred = get_gold_pred_str(output_index, y, args.goal) total_probs.extend(output_prob) total_ys.extend(y) total_gold_pred.extend(gold_pred) total_annot_ids.extend(annot_ids) mrr_val = mrr(total_probs, total_ys) print('mrr_value: ', mrr_val) pickle.dump({ 'gold_id_array': total_ys, 'pred_dist': total_probs }, open('./{0:s}.p'.format(args.reload_model_name), "wb")) with open('./{0:s}.json'.format(args.reload_model_name), 'w') as f_out: output_dict = {} for a_id, (gold, pred) in zip(total_annot_ids, total_gold_pred): output_dict[a_id] = {"gold": gold, "pred": pred} json.dump(output_dict, f_out, indent=2) eval_str = get_eval_string(total_gold_pred) print(eval_str) logging.info('processing: ' + name) logging.info(eval_str)
def _test(args): assert args.load test_fname = args.eval_data data_gens = get_datasets([(test_fname, 'test', args.goal)], args) model = models.Model(args, constant.ANSWER_NUM_DICT[args.goal]) model.cuda() model.eval() # load_model(args.reload_model_name, constant.EXP_ROOT, args.model_id, model) saved_path = constant.EXP_ROOT model.load_state_dict( torch.load(saved_path + '/' + args.model_id + '_best.pt')["state_dict"]) data_gens = get_datasets([(test_fname, 'test', args.goal)], args) #, eval_epoch=1) for name, dataset in [(test_fname, data_gens[0])]: print('Processing... ' + name) batch = next(dataset) eval_batch, annot_ids = to_torch(batch) loss, output_logits = model(eval_batch, args.goal) threshes = np.arange(0, 1, 0.005) p_and_r = [] for thresh in tqdm(threshes): total_gold_pred = [] total_annot_ids = [] total_probs = [] total_ys = [] print('thresh {}'.format(thresh)) output_index = get_output_index(output_logits, thresh) output_prob = model.sigmoid_fn( output_logits).data.cpu().clone().numpy() y = eval_batch['y'].data.cpu().clone().numpy() gold_pred = get_gold_pred_str(output_index, y, args.goal) total_probs.extend(output_prob) total_ys.extend(y) total_gold_pred.extend(gold_pred) total_annot_ids.extend(annot_ids) # mrr_val = mrr(total_probs, total_ys) # print('mrr_value: ', mrr_val) # pickle.dump({'gold_id_array': total_ys, 'pred_dist': total_probs}, # open('./{0:s}.p'.format(args.reload_model_name), "wb")) # with open('./{0:s}.json'.format(args.reload_model_name), 'w') as f_out: # output_dict = {} # for a_id, (gold, pred) in zip(total_annot_ids, total_gold_pred): # output_dict[a_id] = {"gold": gold, "pred": pred} # json.dump(output_dict, f_out) eval_str, p, r = get_eval_string(total_gold_pred) p_and_r.append([p, r]) print(eval_str) np.save(saved_path + '/baseline_pr_dev', p_and_r)
def evaluate_data(batch_num, dev_fname, model, args, elmo, device, char_vocab, dev_type='original'): model.eval() dev_gen = get_data_gen(dev_fname, 'test', args, char_vocab, elmo=elmo) gold_pred = [] eval_loss = 0. total_ex_count = 0 for batch in tqdm(dev_gen): total_ex_count += len(batch['y']) eval_batch, annot_ids = to_torch(batch, device) loss, output_logits, _ = model(eval_batch) output_index = get_output_index(output_logits, threshold=args.threshold) gold_pred += get_gold_pred_str(output_index, eval_batch['y'].data.cpu().clone(), args.goal) eval_loss += loss.clone().item() eval_str = get_eval_string(gold_pred) _, _, _, _, _, macro_f1 = eval_metric.macro(gold_pred) eval_loss_str = 'Eval loss: {0:.7f} at step {1:d}'.format(eval_loss, batch_num) print('==> ' + dev_type + ' EVAL: seen ' + repr(total_ex_count) + ' examples.') print(eval_loss_str) print(gold_pred[:3]) print('==> ' + dev_type + ' : ' + eval_str) logging.info(eval_loss_str) logging.info(eval_str) model.train() return eval_loss, macro_f1
def _test_labeler(args): assert args.load test_fname = args.eval_data data_gens, _ = get_datasets([(test_fname, 'test', args.goal)], args) if args.model_type == 'labeler': print('==> Labeler') model = denoising_models.Labeler(args, constant.ANSWER_NUM_DICT[args.goal]) elif args.model_type == 'filter': print('==> Filter') model = denoising_models.Filter(args, constant.ANSWER_NUM_DICT[args.goal]) else: print('Invalid model type: -model_type ' + args.model_type) raise NotImplementedError model.cuda() model.eval() load_model(args.reload_model_name, constant.EXP_ROOT, args.model_id, model) for name, dataset in [(test_fname, data_gens[0])]: print('Processing... ' + name) total_gold_pred_pcls_ycls_ynoise = [] total_annot_ids = [] total_probs = [] total_ys = [] batch_attn = [] for batch_num, batch in enumerate(dataset): print(batch_num) if not isinstance(batch, dict): print('==> batch: ', batch) eval_batch, annot_ids = to_torch(batch) #print('eval_batch') #for k, v in eval_batch.items(): # print(k, v.size()) loss, output_logits, cls_logits = model(eval_batch, args.goal) #print('loss', loss) #print('output_logits', output_logits) #print('cls_logits', cls_logits) #batch_attn.append((batch, attn_score.data)) output_index = get_output_index(output_logits, threshold=args.threshold) #print('output_index', output_index) #output_prob = model.sigmoid_fn(output_logits).data.cpu().clone().numpy() y = eval_batch['y'].data.cpu().clone().numpy() y_cls = eval_batch['y_cls'].data.cpu().clone().numpy() y_noisy_idx = eval_batch['y_noisy_idx'].data.cpu().clone().numpy() gold_pred_pcls_ycls_ynoise = get_gold_pred_str(output_index, y, args.goal, cls_logits=cls_logits, y_cls=y_cls, y_noisy_idx=y_noisy_idx) #print('gold_pred_pcls_ycls_ynoise', gold_pred_pcls_ycls_ynoise) #total_probs.extend(output_prob) #total_ys.extend(y) total_gold_pred_pcls_ycls_ynoise.extend(gold_pred_pcls_ycls_ynoise) total_annot_ids.extend(annot_ids) #mrr_val = mrr(total_probs, total_ys) #print('mrr_value: ', mrr_val) #pickle.dump({'gold_id_array': total_ys, 'pred_dist': total_probs}, # open('./{0:s}.p'.format(args.reload_model_name), "wb")) pickle.dump((total_annot_ids, total_gold_pred_pcls_ycls_ynoise), open('./{0:s}_gold_pred.p'.format(args.reload_model_name), "wb")) with open('./{0:s}.json'.format(args.model_id), 'w') as f_out: output_dict = {} if args.model_type == 'filter': for a_id, (gold, pred, cls, ycls, ynoise) in zip(total_annot_ids, total_gold_pred_pcls_ycls_ynoise): output_dict[a_id] = {"gold": gold, "pred": pred, "cls_pred": cls, "cls_gold": ycls, "y_noisy": ynoise} elif args.model_type == 'labeler': for a_id, (gold, pred) in zip(total_annot_ids, total_gold_pred_pcls_ycls_ynoise): output_dict[a_id] = {"gold": gold, "pred": pred} else: print('Invalid model type: -model_type ' + args.model_type) raise NotImplementedError json.dump(output_dict, f_out) eval_str = get_eval_string(list(zip(*list(zip(*gold_pred_pcls_ycls_ynoise))[:2]))) print(eval_str) logging.info('processing: ' + name) logging.info(eval_str)
def _test(args): assert args.load test_fname = args.eval_data data_gens, _ = get_datasets([(test_fname, 'test', args.goal)], args) if args.model_type == 'et_model': print('==> Entity Typing Model') model = models.ETModel(args, constant.ANSWER_NUM_DICT[args.goal]) elif args.model_type == 'bert_uncase_small': print('==> Bert Uncased Small') model = models.Bert(args, constant.ANSWER_NUM_DICT[args.goal]) else: print('Invalid model type: -model_type ' + args.model_type) raise NotImplementedError model.cuda() model.eval() load_model(args.reload_model_name, constant.EXP_ROOT, args.model_id, model) for name, dataset in [(test_fname, data_gens[0])]: print('Processing... ' + name) total_gold_pred = [] total_annot_ids = [] total_probs = [] total_ys = [] batch_attn = [] for batch_num, batch in enumerate(dataset): print(batch_num) if not isinstance(batch, dict): print('==> batch: ', batch) eval_batch, annot_ids = to_torch(batch) loss, output_logits, attn_score = model(eval_batch, args.goal) #batch_attn.append((batch, attn_score.data)) output_index = get_output_index(output_logits, threshold=args.threshold) #output_prob = model.sigmoid_fn(output_logits).data.cpu().clone().numpy() y = eval_batch['y'].data.cpu().clone().numpy() gold_pred = get_gold_pred_str(output_index, y, args.goal) #total_probs.extend(output_prob) #total_ys.extend(y) total_gold_pred.extend(gold_pred) total_annot_ids.extend(annot_ids) #mrr_val = mrr(total_probs, total_ys) #print('mrr_value: ', mrr_val) #pickle.dump({'gold_id_array': total_ys, 'pred_dist': total_probs}, # open('./{0:s}.p'.format(args.reload_model_name), "wb")) with open('./{0:s}.json'.format(args.reload_model_name), 'w') as f_out: output_dict = {} counter = 0 for a_id, (gold, pred) in zip(total_annot_ids, total_gold_pred): #attn = batch_attn[0][1].squeeze(2)[counter] #attn = attn.cpu().numpy().tolist() #print(attn, int(batch_attn[0][0]['mention_span_length'][counter]), sum(attn)) #print(mntn_emb[counter]) #print() #print(int(batch_attn[0][0]['mention_span_length'][counter]), batch_attn[0][0]['mention_embed'][counter].shape) #attn = attn[:int(batch_attn[0][0]['mention_span_length'][counter])] output_dict[a_id] = {"gold": gold, "pred": pred} #, "attn": attn, "mntn_len": int(batch_attn[0][0]['mention_span_length'][counter])} counter += 1 json.dump(output_dict, f_out) eval_str = get_eval_string(total_gold_pred) print(eval_str) logging.info('processing: ' + name) logging.info(eval_str)