示例#1
0
def main(args, classify):
    init.argcheck(args, 4, 4, 'Identify errors in parser output',
                  '<gold> <test> <prefix_for_output_files>')

    # Output setup
    out_dict = {
        'out': sys.stdout,
        'err': sys.stderr,
        'gold_trees': sys.stdout,
        'test_trees': sys.stdout,
        'error_counts': sys.stdout
    }
    prefix = args[3]
    out_dict['out'] = open(prefix + '.out', 'w')
    out_dict['err'] = open(prefix + '.log', 'w')
    out_dict['gold_trees'] = open(prefix + '.gold_trees', 'w')
    out_dict['test_trees'] = open(prefix + '.test_trees', 'w')
    out_dict['error_counts'] = open(prefix + '.error_counts', 'w')
    out_dict['init_errors'] = open(prefix + '.init_errors', 'w')
    init.header(args, out_dict.values())

    # Classification
    print >> out_dict['out'], "Printing tree transformations"
    print >> out_dict['err'], "Printing tree transformations"
    gold_in = open(args[1])
    test_in = sys.stdin if args[2] == '-' else open(args[2])
    sent_no = 0
    error_counts = defaultdict(lambda: [])
    while True:
        sent_no += 1
        gold_text = gold_in.readline()
        test_text = test_in.readline()
        if gold_text == '' and test_text == '':
            print >> out_dict['err'], "End of both input files"
            break
        elif gold_text == '':
            print >> out_dict['err'], "End of gold input"
            break
        elif test_text == '':
            print >> out_dict['err'], "End of test input"
            break

        print >> out_dict['out'], "Sentence {}:".format(sent_no)
        print >> out_dict['err'], "Sentence {}:".format(sent_no)
        print >> out_dict['init_errors'], "Sentence {}:".format(sent_no)
        compare(gold_text.strip(), test_text.strip(), out_dict, error_counts,
                classify)
        print >> out_dict['init_errors'], "\n"

    # Results
    counts_to_print = []
    for error in error_counts:
        if error == 'UNSET init':
            continue
        counts_to_print.append(
            (len(error_counts[error]), sum(error_counts[error]), error))
    counts_to_print.sort(reverse=True)
    for error in counts_to_print:
        print >> out_dict['error_counts'], "{} {} {}".format(*error)
def main(args, classify):
	init.argcheck(args, 4, 4, 'Identify errors in parser output', '<gold> <test> <prefix_for_output_files>')

	# Output setup
	out_dict = {
		'out': sys.stdout,
		'err': sys.stderr,
		'gold_trees': sys.stdout,
		'test_trees': sys.stdout,
		'error_counts': sys.stdout
	}
	prefix = args[3]
	out_dict['out'] = open(prefix + '.out', 'w')
	out_dict['err'] = open(prefix + '.log', 'w')
	out_dict['gold_trees'] = open(prefix + '.gold_trees', 'w')
	out_dict['test_trees'] = open(prefix + '.test_trees', 'w')
	out_dict['error_counts'] = open(prefix + '.error_counts', 'w')
	out_dict['init_errors'] = open(prefix + '.init_errors', 'w')
	init.header(args, out_dict.values())

	# Classification
	print >> out_dict['out'], "Printing tree transformations"
	print >> out_dict['err'], "Printing tree transformations"
	gold_in = open(args[1])
	test_in = sys.stdin if args[2] == '-' else open(args[2])
	sent_no = 0
	error_counts = defaultdict(lambda: [])
	while True:
		sent_no += 1
		gold_text = gold_in.readline()
		test_text = test_in.readline()
		if gold_text == '' and test_text == '':
			print >> out_dict['err'], "End of both input files"
			break
		elif gold_text == '':
			print >> out_dict['err'], "End of gold input"
			break
		elif test_text == '':
			print >> out_dict['err'], "End of test input"
			break

		print >> out_dict['out'], "Sentence {}:".format(sent_no)
		print >> out_dict['err'], "Sentence {}:".format(sent_no)
		print >> out_dict['init_errors'], "Sentence {}:".format(sent_no)
		compare(gold_text.strip(), test_text.strip(), out_dict, error_counts, classify)
		print >> out_dict['init_errors'], "\n"

	# Results
	counts_to_print = []
	for error in error_counts:
		if error == 'UNSET init':
			continue
		counts_to_print.append((len(error_counts[error]), sum(error_counts[error]), error))
	counts_to_print.sort(reverse=True)
	for error in counts_to_print:
		print >> out_dict['error_counts'], "{} {} {}".format(*error)
示例#3
0
def main():
    # TODO, shift to a uniform style of module documentation, then just skip all of this!
    desc = __doc__.split("\n")
    arg_info = desc[1]
    further_desc = "\n".join(desc[2:])
    desc = desc[0]
    init.argcheck(sys.argv, 3, 7, desc, arg_info, further_desc)

    out = open(sys.argv[1] + ".table", "w")
    log = open(sys.argv[1] + ".table.log", "w")
    init.header(sys.argv, log)

    data = get_data(sys.argv[2])
    mapping = {}
    if len(sys.argv) > 3:
        mapping = get_mapping(sys.argv[3])
    system_order_file = None
    if len(sys.argv) > 5:
        system_order_file = sys.argv[5]
    error_order_file = None
    if len(sys.argv) > 4:
        error_order_file = sys.argv[4]
    system_order, error_order, extra_info = get_order(data, system_order_file, error_order_file)

    print >> log, "System order:", system_order
    print >> log, "Error order:", error_order

    print_top(error_order, extra_info, out)
    print_data(system_order, error_order, data, mapping, extra_info, out)
    print_bottom(error_order, extra_info, out)

    if len(sys.argv) < 6:
        system_out = open(sys.argv[1] + ".table.system_order", "w")
        print_system_order(system_order, extra_info, system_out)
        system_out.close()
    if len(sys.argv) < 5:
        error_out = open(sys.argv[1] + ".table.error_order", "w")
        print_error_order(error_order, error_out)
        error_out.close()
    if len(sys.argv) < 4:
        mapping_out = open(sys.argv[1] + ".table.name_mapping", "w")
        print_mapping(mapping, data, mapping_out)
        mapping_out.close()

    out.close()
    log.close()
	return multifile_process(path, call)

if __name__ == '__main__':
	formats = {
		'bart': read_bart,
		'cherrypicker': read_cherrypicker,
		'conll': read_conll,
		'ims': read_ims,
###		'opennlp': read_opennlp,
		'reconcile': read_reconcile,
###		'relaxcor': read_relaxcor,
		'stanford_xml': read_stanford_xml,
		'stanford': read_stanford,
		'uiuc': read_uiuc
	}
	init.argcheck(sys.argv, 5, 6, "Translate a system output into the CoNLL format", "<prefix> <[{}]> <dir | file> <gold dir>".format(','.join(formats.keys())))

	out = open(sys.argv[1] + '.out', 'w')
	log = open(sys.argv[1] + '.log', 'w')
	init.header(sys.argv, log)

	auto_src = sys.argv[3]
	gold_src = sys.argv[4]
	if sys.argv[2] not in formats:
		print "Invalid format.  Valid options are:"
		print '\n'.join(formats.keys())
		sys.exit(1)
	auto, gold = formats[sys.argv[2]](auto_src, gold_src)
	
	for doc in auto:
		for part in auto[doc]:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# vim: set ts=2 sw=2 noet:

import sys
from nlp_util import coreference_reading, coreference_rendering, coreference, init

if __name__ == '__main__':
	init.argcheck(sys.argv, 4, 5, "Print coreference resolution errors", "<prefix> <gold_dir> <test> [resolve span errors first? T | F]")

	auto = coreference_reading.read_conll_coref_system_output(sys.argv[3])
	gold = coreference_reading.read_conll_matching_files(auto, sys.argv[2])

	out_cluster_errors = open(sys.argv[1] + '.cluster_errors', 'w')
	out_cluster_context = open(sys.argv[1] + '.cluster_context', 'w')
	out_cluster_missing = open(sys.argv[1] + '.cluster_missing', 'w')
	out_cluster_extra = open(sys.argv[1] + '.cluster_extra', 'w')
	out_mention_list = open(sys.argv[1] + '.mention_list', 'w')
	out_mention_text = open(sys.argv[1] + '.mention_text', 'w')
	out_files = [out_cluster_errors,
	             out_cluster_context,
	             out_cluster_missing,
	             out_cluster_extra,
	             out_mention_list,
	             out_mention_text]
	init.header(sys.argv, out_files)

	for function, outfile in [
		(coreference_rendering.print_mention_text, out_mention_text),
		(coreference_rendering.print_mention_list, out_mention_list),
		(coreference_rendering.print_cluster_errors, out_cluster_errors),
	coreference_rendering.print_conll_style_part(out['error: extra entity'], text, auto_mentions_extra_entity, doc_name, part_name)
	coreference_rendering.print_conll_style_part(out['error: merge'], text, auto_mentions_merge, doc_name, part_name)
	coreference_rendering.print_conll_style_part(out['error: missing mention'], text, auto_mentions_missing_mention, doc_name, part_name)
	coreference_rendering.print_conll_style_part(out['error: missing entity'], text, auto_mentions_missing_entity, doc_name, part_name)
	coreference_rendering.print_conll_style_part(out['error: extra mention prog'], text, auto_mentions_extra_mention_prog, doc_name, part_name)
	coreference_rendering.print_conll_style_part(out['error: extra entity prog'], text, auto_mentions_extra_entity_prog, doc_name, part_name)
	coreference_rendering.print_conll_style_part(out['error: merge prog'], text, auto_mentions_merge_prog, doc_name, part_name)
	coreference_rendering.print_conll_style_part(out['error: missing mention prog'], text, auto_mentions_missing_mention_prog, doc_name, part_name)
	coreference_rendering.print_conll_style_part(out['error: missing entity prog'], text, auto_mentions_missing_entity_prog, doc_name, part_name)

	return errors


if __name__ == '__main__':
	# Process params
	init.argcheck(sys.argv, 4, 5, "Print coreference resolution errors", "<output_prefix> <gold_dir> <test_file> [remove singletons? T | F (default is True)]")
	remove_singletons = True
	if len(sys.argv) == 5 and sys.argv[-1] == 'F':
		remove_singletons = False
	out = {
		'out': open(sys.argv[1] + '.classified.detailed', 'w'),
		'properties': open(sys.argv[1] + '.classified.properties', 'w'),
		'short out': open(sys.argv[1] + '.classified', 'w'),
		'summary': open(sys.argv[1] + '.summary', 'w'),
		'system output': open(sys.argv[1] + '.system', 'w'),
		'gold': open(sys.argv[1] + '.gold', 'w'),
		'error: original': open(sys.argv[1] + '.corrected.none', 'w'),
		'error: span mismatch': open(sys.argv[1] + '.corrected.span_errors', 'w'),
		'error: split': open(sys.argv[1] + '.corrected.confused_entities', 'w'),
		'error: extra mention': open(sys.argv[1] + '.corrected.extra_mention', 'w'),
		'error: extra entity': open(sys.argv[1] + '.corrected.extra_entity', 'w'),
if __name__ == '__main__':
    formats = {
        'bart': read_bart,
        'cherrypicker': read_cherrypicker,
        'conll': read_conll,
        'ims': read_ims,
        ###		'opennlp': read_opennlp,
        'reconcile': read_reconcile,
        ###		'relaxcor': read_relaxcor,
        'stanford_xml': read_stanford_xml,
        'stanford': read_stanford,
        'uiuc': read_uiuc
    }
    init.argcheck(
        sys.argv, 5, 5, "Translate a system output into the CoNLL format",
        "<prefix> <[{}]> <dir | file> <gold dir>".format(','.join(
            formats.keys())))

    out = open(sys.argv[1] + '.out', 'w')
    log = open(sys.argv[1] + '.log', 'w')
    init.header(sys.argv, log)

    auto_src = sys.argv[3]
    gold_src = sys.argv[4]
    if sys.argv[2] not in formats:
        print "Invalid format.  Valid options are:"
        print '\n'.join(formats.keys())
        sys.exit(1)
    auto, gold = formats[sys.argv[2]](auto_src, gold_src)

    for doc in auto:
示例#8
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# vim: set ts=2 sw=2 noet:

import sys
try:
	from nlp_util import init, coreference_reading
except ImportError:
	raise Exception("Remember to either install nlp_util or set up a symlink to the nlp_util directory")

if __name__ == '__main__':
	init.argcheck(sys.argv, 3, 3, "Print conll text", "<prefix> <data>")

	prefix = sys.argv[1]
	data = coreference_reading.read_all(sys.argv[2])

	for doc in data:
		for part in data[doc]:
			text = data[doc][part]['text']
			filename = '__'.join(doc.split('/') + [part])
			out = open(prefix + filename, 'w')
			for line in text:
				print >> out, ' '.join(line)