help='number of sentences to consider, 0 for all') args = parser.parse_args() call('date') data = defaultdict(list) if len(os.listdir(args.key)) < len(os.listdir(args.response)): sys.stderr.write( 'WARN: response folder holds more files than key folder. Some files will be ignored.\n' ) for fname in os.listdir(args.key): if args.measurement == 'spans': sys.stderr.write("Current file name: %s\n" % fname) path = os.path.join(args.key, fname) with open(path) as f: key_event = read_event_spans_conll( first_n_sentences(f, args.n), path) with open(path) as f: key_polarity = read_polarity_spans_conll( first_n_sentences(f, args.n), path) with open(path) as f: key_certainty = read_certainty_spans_conll( first_n_sentences(f, args.n), path) with open(path) as f: key_tense = read_tense_spans_conll( first_n_sentences(f, args.n), path) path = os.path.join(args.response, fname) if os.path.exists(path): with open(path) as f: res_event = read_event_spans_conll( first_n_sentences(f, args.n), path) with open(path) as f:
parser = argparse.ArgumentParser(description='Score the response of a system at factuality.') parser.add_argument('key', help='path to a directory containing all key files') parser.add_argument('response', help='path to a directory containing all response files') parser.add_argument('measurement', help='measure performance on tokens or spans. possible values: tokens, spans') parser.add_argument('-n', type=int, default=5, help='number of sentences to consider, 0 for all') args = parser.parse_args() call('date') data = defaultdict(list) if len(os.listdir(args.key)) < len(os.listdir(args.response)): sys.stderr.write('WARN: response folder holds more files than key folder. Some files will be ignored.\n') for fname in os.listdir(args.key): if args.measurement == 'spans': sys.stderr.write("Current file name: %s\n" %fname) path = os.path.join(args.key, fname) with open(path) as f: key_event = read_event_spans_conll(first_n_sentences(f, args.n), path) with open(path) as f: key_polarity = read_polarity_spans_conll(first_n_sentences(f, args.n), path) with open(path) as f: key_certainty = read_certainty_spans_conll(first_n_sentences(f, args.n), path) with open(path) as f: key_tense = read_tense_spans_conll(first_n_sentences(f, args.n), path) path = os.path.join(args.response, fname) if os.path.exists(path): with open(path) as f: res_event = read_event_spans_conll(first_n_sentences(f, args.n), path) with open(path) as f: res_polarity = read_polarity_spans_conll(first_n_sentences(f, args.n), path) with open(path) as f: res_certainty = read_certainty_spans_conll(first_n_sentences(f, args.n), path) with open(path) as f: res_tense = read_tense_spans_conll(first_n_sentences(f, args.n), path) else: res_event = res_polarity = res_certainty = res_tense = set() data['event'].append(compare_spans(key_event, res_event)) data['polarity'].append(compare_dependent_spans(key_polarity, res_polarity, key_event, res_event)) key_by_type, res_by_type = group_by_type(key_polarity), group_by_type(res_polarity) for type_ in set(key_by_type).union(set(res_by_type)):
import argparse parser = argparse.ArgumentParser(description='Score the response of a system at Named-Entity Disambiguation.') parser.add_argument('key', help='path to a directory containing all key files') parser.add_argument('response', help='path to a directory containing all response files') parser.add_argument('-n', type=int, default=5, help='number of sentences to consider, 0 for all') args = parser.parse_args() call('date') test_all() # never run evaluation script without thorough testing data = defaultdict(list) if len(os.listdir(args.key)) < len(os.listdir(args.response)): sys.stderr.write('WARN: response folder holds more files than key folder. Some files will be ignored.\n') for fname in os.listdir(args.key): path = os.path.join(args.key, fname) with open(path) as f: key = read_spans_conll(first_n_sentences(f, args.n), path) path = os.path.join(args.response, fname) if os.path.exists(path): with open(path) as f: res = read_spans_conll(first_n_sentences(f, args.n), path) else: res = set() data['exact'].append(compare_spans_exact(key, res)) data['partial'].append(compare_spans_partial(key, res)) for name in data: print('\n\nPerformance (%s spans):\n' %name) p = compute_performance(data[name]) print('# response total: %d\n' '# missed: %d\n' '# invented: %d\n\n' 'Micro average:\n'