def evaluation(args, data, split, model, domain, epoch, str_res='results', ner_model=True, predictor=None): # evaluate performance on data model.eval() eval_dict = initialize_eval_dict() eval_dict['epoch'] = epoch #for batch in prepare_data.iterate_batch(data, args.batch_size, args.device): for batch in prepare_data.iterate_batch_rand_bucket_choosing(data, args.batch_size, args.device, ner_model=ner_model, predictor=predictor): if ner_model is not None: word, char, pos, ner, heads, arc_tags, auto_label, masks, lengths, f_f, f_p, b_f, b_p, w_f, mask_v, file_no = batch out_arc, out_arc_tag, masks, lengths = model.forward(word, char, pos, mask=masks, length=lengths, f_f=f_f, f_p=f_p, b_f=b_f, b_p=b_p, w_f=w_f, file_no=file_no, mask_v=mask_v) else: word, char, pos, ner, heads, arc_tags, auto_label, masks, lengths = batch out_arc, out_arc_tag, masks, lengths = model.forward(word, char, pos, mask=masks, length=lengths) heads_pred, arc_tags_pred, _ = model.decode(out_arc, out_arc_tag, mask=masks, length=lengths, leading_symbolic=prepare_data.NUM_SYMBOLIC_TAGS) lengths = lengths.cpu().numpy() word = word.data.cpu().numpy() pos = pos.data.cpu().numpy() ner = ner.data.cpu().numpy() heads = heads.data.cpu().numpy() arc_tags = arc_tags.data.cpu().numpy() heads_pred = heads_pred.data.cpu().numpy() arc_tags_pred = arc_tags_pred.data.cpu().numpy() stats, stats_nopunc, stats_root, num_inst = parse.eval_(word, pos, heads_pred, arc_tags_pred, heads, arc_tags, args.alphabets['word_alphabet'], args.alphabets['pos_alphabet'], lengths, punct_set=args.punct_set, symbolic_root=True) ucorr, lcorr, total, ucm, lcm = stats ucorr_nopunc, lcorr_nopunc, total_nopunc, ucm_nopunc, lcm_nopunc = stats_nopunc corr_root, total_root = stats_root eval_dict['dp_ucorrect'] += ucorr eval_dict['dp_lcorrect'] += lcorr eval_dict['dp_total'] += total eval_dict['dp_ucomplete_match'] += ucm eval_dict['dp_lcomplete_match'] += lcm eval_dict['dp_ucorrect_nopunc'] += ucorr_nopunc eval_dict['dp_lcorrect_nopunc'] += lcorr_nopunc eval_dict['dp_total_nopunc'] += total_nopunc eval_dict['dp_ucomplete_match_nopunc'] += ucm_nopunc eval_dict['dp_lcomplete_match_nopunc'] += lcm_nopunc eval_dict['dp_root_correct'] += corr_root eval_dict['dp_total_root'] += total_root eval_dict['dp_total_inst'] += num_inst eval_dict['dp_uas'] = eval_dict['dp_ucorrect'] * 100 / eval_dict['dp_total'] # considering w. punctuation eval_dict['dp_las'] = eval_dict['dp_lcorrect'] * 100 / eval_dict['dp_total'] # considering w. punctuation print_results(eval_dict, split, domain, str_res) return eval_dict
def evaluation(args, data, split, model, domain, epoch, flag, str_res='results'): # evaluate performance on data model.eval() alpha_dir = args.model_path eval_dict = initialize_eval_dict() eval_dict['epoch'] = epoch for batch in prepare_data.iterate_batch(data, args.batch_size, args.device): word, char, pos, ner, heads, arc_tags, auto_label, masks, lengths = batch # pdb.set_trace() out_arc, out_arc_tag, masks, lengths = model.forward(word, char, pos, mask=masks, length=lengths) heads_pred, arc_tags_pred, _ = model.decode( out_arc, out_arc_tag, ner, flag, mask=masks, length=lengths, leading_symbolic=prepare_data.NUM_SYMBOLIC_TAGS) lengths = lengths.cpu().numpy() word = word.data.cpu().numpy() pos = pos.data.cpu().numpy() ner = ner.data.cpu().numpy() heads = heads.data.cpu().numpy() arc_tags = arc_tags.data.cpu().numpy() heads_pred = heads_pred.data.cpu().numpy() arc_tags_pred = arc_tags_pred.data.cpu().numpy() stats, stats_nopunc, stats_root, num_inst = parse.eval_( word, pos, heads_pred, arc_tags_pred, heads, arc_tags, args.alphabets['word_alphabet'], args.alphabets['pos_alphabet'], lengths, punct_set=args.punct_set, symbolic_root=True) ucorr, lcorr, total, ucm, lcm = stats ucorr_nopunc, lcorr_nopunc, total_nopunc, ucm_nopunc, lcm_nopunc = stats_nopunc corr_root, total_root = stats_root eval_dict['dp_ucorrect'] += ucorr eval_dict['dp_lcorrect'] += lcorr eval_dict['dp_total'] += total eval_dict['dp_ucomplete_match'] += ucm eval_dict['dp_lcomplete_match'] += lcm eval_dict['dp_ucorrect_nopunc'] += ucorr_nopunc eval_dict['dp_lcorrect_nopunc'] += lcorr_nopunc eval_dict['dp_total_nopunc'] += total_nopunc eval_dict['dp_ucomplete_match_nopunc'] += ucm_nopunc eval_dict['dp_lcomplete_match_nopunc'] += lcm_nopunc eval_dict['dp_root_correct'] += corr_root eval_dict['dp_total_root'] += total_root eval_dict['dp_total_inst'] += num_inst eval_dict['dp_uas'] = eval_dict['dp_ucorrect'] * 100 / eval_dict[ 'dp_total'] # considering w. punctuation eval_dict['dp_las'] = eval_dict['dp_lcorrect'] * 100 / eval_dict[ 'dp_total'] # considering w. punctuation print_results(eval_dict, split, domain, str_res) return eval_dict