def prepare_conll_lines(self, gold_corefs, sys_corefs, gold_mention_table, system_mention_table, gold_2_system_one_2_one_mapping, threshold=1.0): """ Convert to ConLL style lines :param gold_corefs: gold coreference chain :param sys_corefs: system coreferenc chain :param gold_mention_table: gold mention table :param system_mention_table: system mention table :param gold_2_system_one_2_one_mapping: a mapping between gold and system :param threshold: To what extent we treat two mention can be aligned, default 1 for exact match :return: """ aligned_gold_table, aligned_system_table = self.create_aligned_tables(gold_2_system_one_2_one_mapping, gold_mention_table, system_mention_table, threshold) logger.debug("Preparing CoNLL files using mapping threhold %.2f" % threshold) gold_conll_lines = self.prepare_lines(gold_corefs, aligned_gold_table, self.extract_token_map(gold_mention_table), self.gold_id_2_text) sys_conll_lines = self.prepare_lines(sys_corefs, aligned_system_table, self.extract_token_map(system_mention_table), self.sys_id_2_text) if not gold_conll_lines: utils.terminate_with_error("Gold standard has data problem for doc [%s], please refer to log. Quitting..." % self.doc_id) if not sys_conll_lines: utils.terminate_with_error("System has data problem for doc [%s], please refer to log. Quitting..." % self.doc_id) return gold_conll_lines, sys_conll_lines
def parse_line(l, invisible_ids): """ Parse the line, get the token ids, remove invisible ones. :param l: A line in the tbf file. :param invisible_ids: Set of invisible ids to remove. """ fields = l.split("\t") num_attributes = len(Config.attribute_names) if len(fields) < 5 + num_attributes: utils.terminate_with_error( "System line has too few fields:\n ---> %s" % l) if MutableConfig.eval_mode == EvalMethod.Token: spans, original_spans = parse_token_ids(fields[3], invisible_ids) if len(spans) == 0: logger.warn( "Find mention with only invisible words, will not be mapped to anything" ) else: # There is no filtering thing in the character mode. spans = parse_characters(fields[3]) original_spans = spans attributes = [canonicalize_string(a) for a in fields[5:5 + num_attributes]] if EvalState.white_listed_types: if attributes[0] not in EvalState.white_listed_types: return None event_id = fields[2] text = fields[4] # span_id = fields[script_column] if len(fields) > script_column else None return spans, attributes, event_id, original_spans, text
def prepare_conll_lines(self, gold_corefs, sys_corefs, gold_mention_table, system_mention_table, gold_2_system_one_2_one_mapping, threshold=1.0): """ Convert to ConLL style lines :param gold_corefs: gold coreference chain :param sys_corefs: system coreferenc chain :param gold_mention_table: gold mention table :param system_mention_table: system mention table :param gold_2_system_one_2_one_mapping: a mapping between gold and system :param threshold: To what extent we treat two mention can be aligned, default 1 for exact match :return: """ aligned_gold_table, aligned_system_table = self.create_aligned_tables(gold_2_system_one_2_one_mapping, gold_mention_table, system_mention_table, threshold) logger.debug("Preparing CoNLL files using mapping threhold %.2f" % threshold) gold_conll_lines = self.prepare_lines(gold_corefs, aligned_gold_table, self.gold_id_2_text) sys_conll_lines = self.prepare_lines(sys_corefs, aligned_system_table, self.sys_id_2_text) if not gold_conll_lines: utils.terminate_with_error("Gold standard has data problem for doc [%s], please refer to log. Quitting..." % self.doc_id) if not sys_conll_lines: utils.terminate_with_error("System has data problem for doc [%s], please refer to log. Quitting..." % self.doc_id) return gold_conll_lines, sys_conll_lines
def parse_line(l, invisible_ids): """ Parse the line, get the token ids, remove invisible ones. :param l: A line in the tbf file. :param invisible_ids: Set of invisible ids to remove. """ fields = l.split("\t") num_attributes = len(Config.attribute_names) if len(fields) < 5 + num_attributes: utils.terminate_with_error("System line has too few fields:\n ---> %s" % l) if MutableConfig.eval_mode == EvalMethod.Token: spans, original_spans = parse_token_ids(fields[3], invisible_ids) if len(spans) == 0: logger.warn("Find mention with only invisible words, will not be mapped to anything") else: spans = parse_characters(fields[3]) original_spans = spans attributes = [canonicalize_string(a) for a in fields[5:5 + num_attributes]] if EvalState.white_listed_types: if attributes[0] not in EvalState.white_listed_types: return None # The temporal column is after the last attribute. temporal_column = 5 + num_attributes event_id = fields[2] text = fields[4] # span_id = fields[temporal_column] if len(fields) > temporal_column else None return spans, attributes, event_id, original_spans, text
def main(): parser = argparse.ArgumentParser( description= "Event mention scorer, provides support to Event Nugget scoring, Event Coreference and Event " "Sequencing scoring.") parser.add_argument("-g", "--gold", help="Golden Standard", required=True) parser.add_argument("-s", "--system", help="System output", required=True) parser.add_argument("-d", "--comparison_output", help="Compare and help show the difference between " "system and gold") parser.add_argument( "-o", "--output", help="Optional evaluation result redirects, put eval result to file") parser.add_argument( "-c", "--coref", help="Eval Coreference result output, need to put the reference" "conll coref scorer in the same folder with this scorer") parser.add_argument( "-a", "--sequencing", help="Eval Event sequencing result output (After and Subevent)") parser.add_argument("-nv", "--no_script_validation", help="Whether to turn off script validation", action="store_true") parser.add_argument( "-t", "--token_path", help= "Path to the directory containing the token mappings file, only used in token mode." ) parser.add_argument( "-m", "--coref_mapping", help="Which mapping will be used to perform coreference mapping.", type=int) parser.add_argument( "-of", "--offset_field", help="A pair of integer indicates which column we should " "read the offset in the token mapping file, index starts" "at 0, default value will be %s" % Config.default_token_offset_fields) parser.add_argument( "-te", "--token_table_extension", help= "any extension appended after docid of token table files. Default is [%s], only used in token mode." % Config.default_token_file_ext) parser.add_argument("-ct", "--coreference_threshold", type=float, help="Threshold for coreference mention mapping") parser.add_argument("-b", "--debug", help="turn debug mode on", action="store_true") # parser.add_argument("--eval_mode", choices=["char", "token"], default="char", # help="Use Span or Token mode. The Span mode will take a span as range [start:end], while the " # "Token mode consider each token is provided as a single id.") parser.add_argument( "-wl", "--type_white_list", type=argparse.FileType('r'), help= "Provide a file, where each line list a mention type subtype pair to be evaluated. Types " "that are out of this white list will be ignored.") parser.add_argument("-dn", "--doc_id_to_eval", help="Provide one single doc id to evaluate.") parser.set_defaults(debug=False) args = parser.parse_args() if args.debug: stream_handler.setLevel(logging.DEBUG) logger.setLevel(logging.DEBUG) logger.debug("Entered debug mode.") else: stream_handler.setLevel(logging.INFO) logger.setLevel(logging.INFO) if args.type_white_list is not None: logger.info( "Only the following types in the white list will be evaluated.") EvalState.white_listed_types = set() for line in args.type_white_list: logger.info(line.strip()) EvalState.white_listed_types.add(canonicalize_string(line)) if args.output is not None: out_path = args.output utils.create_parent_dir(out_path) mention_eval_out = open(out_path, 'w') logger.info("Evaluation output will be saved at %s" % out_path) else: mention_eval_out = sys.stdout logger.info("Evaluation output at standard out.") if os.path.isfile(args.gold): gf = open(args.gold) else: logger.error("Cannot find gold standard file at " + args.gold) sys.exit(1) if args.coref is not None: Config.conll_out = args.coref Config.conll_gold_file = args.coref + "_gold.conll" Config.conll_sys_file = args.coref + "_sys.conll" logger.info("CoNLL script output will be output at " + Config.conll_out) logger.info("Gold and system conll files will generated at " + Config.conll_gold_file + " and " + Config.conll_sys_file) if args.sequencing is not None: Config.script_result_dir = args.sequencing logger.info("Temporal files will be output at " + Config.script_result_dir) utils.supermakedirs(Config.script_result_dir) logger.info("Will evaluate link type: %s." % ",".join(Config.script_types)) for t in Config.script_types: utils.supermakedirs(os.path.join(Config.script_result_dir, t)) utils.remove_file_by_extension(Config.script_result_dir, ".tml") utils.remove_file_by_extension(Config.script_result_dir, ".tml") if args.no_script_validation: Config.no_script_validation = True if os.path.isfile(args.system): sf = open(args.system) else: logger.error("Cannot find system file at " + args.system) sys.exit(1) if args.coref_mapping is not None: if args.coref_mapping < 4: Config.coref_criteria = Config.possible_coref_mapping[ args.coref_mapping] else: logger.error( "Possible mapping : 0: Span only 1: Mention Type 2: Realis 3 Type and Realis" ) utils.terminate_with_error("Must provide a mapping between 0 to 3") else: Config.coref_criteria = Config.possible_coref_mapping[1] diff_out = None if args.comparison_output is not None: diff_out_path = args.comparison_output utils.create_parent_dir(diff_out_path) diff_out = open(diff_out_path, 'w') token_dir = "." if args.token_path is not None: MutableConfig.eval_mode = EvalMethod.Token logger.info("Eval mode is set to token.") if os.path.isdir(args.token_path): logger.debug("Will search token files in " + args.token_path) token_dir = args.token_path else: logger.debug("Cannot find given token directory at [%s], " "will try search for current directory" % args.token_path) else: MutableConfig.eval_mode = EvalMethod.Char token_offset_fields = Config.default_token_offset_fields if args.offset_field is not None: try: token_offset_fields = [ int(x) for x in args.offset_field.split(",") ] except ValueError as _: logger.error( "Token offset argument should be two integer with comma in between, i.e. 2,3" ) if args.coreference_threshold is not None: MutableConfig.coref_mention_threshold = args.coreference_threshold # Read all documents. read_all_doc(gf, sf, args.doc_id_to_eval) # Take all attribute combinations, which will be used to produce scores. attribute_comb = get_attr_combinations(Config.attribute_names) logger.info("Coreference mentions need to match %s before consideration" % Config.coref_criteria[0][1]) while True: print('dir is:', token_dir) if not evaluate(token_dir, args.coref, attribute_comb, token_offset_fields, args.token_table_extension, diff_out): break # Run the CoNLL script on the combined files, which is concatenated from the best alignment of all documents. if args.coref is not None: logger.debug("Running coreference script for the final scores.") ConllEvaluator.run_conll_script(Config.conll_gold_file, Config.conll_sys_file, Config.conll_out) # Get the CoNLL scores from output EvalState.overall_coref_scores = ConllEvaluator.get_conll_scores( Config.conll_out) # Run the TimeML evaluation script. if Config.script_result_dir: TemporalEval.eval_time_ml() print_eval_results(mention_eval_out, attribute_comb) # Clean up, close files. close_if_not_none(diff_out) logger.info("Evaluation Done.") return 0
def main(): parser = argparse.ArgumentParser( description="Event mention scorer, which conducts token based " "scoring, system and gold standard files should follows " "the token-based format.") parser.add_argument("-g", "--gold", help="Golden Standard", required=True) parser.add_argument("-s", "--system", help="System output", required=True) parser.add_argument("-d", "--comparison_output", help="Compare and help show the difference between " "system and gold") parser.add_argument( "-o", "--output", help="Optional evaluation result redirects, put eval result to file") parser.add_argument( "-c", "--coref", help="Eval Coreference result output, need to put the reference" "conll coref scorer in the same folder with this scorer") parser.add_argument( "-a", "--sequencing", help="Eval Event sequencing result output (After and Subevent)" ) parser.add_argument( "-t", "--token_path", help="Path to the directory containing the " "token mappings file") parser.add_argument( "-m", "--coref_mapping", help="Which mapping will be used to perform coreference mapping.", type=int ) parser.add_argument( "-of", "--offset_field", help="A pair of integer indicates which column we should " "read the offset in the token mapping file, index starts" "at 0, default value will be %s" % Config.default_token_offset_fields ) parser.add_argument( "-te", "--token_table_extension", help="any extension appended after docid of token table files. " "Default is [%s]" % Config.default_token_file_ext) parser.add_argument("-ct", "--coreference_threshold", type=float, help="Threshold for coreference mention mapping") parser.add_argument( "-b", "--debug", help="turn debug mode on", action="store_true") parser.add_argument("--eval_mode", choices=["char", "token"], default="char", help="Use Span Overlap or Token Overlap mode. The Span Overlap mode will take a span as range " "[start:end], while the Token Overlap mode consider each token is provided as a single " "id.") parser.add_argument("-wl", "--type_white_list", type=argparse.FileType('r'), help="Provide a file, where each line list a mention type subtype pair to be evaluated. Types " "that are out of this white list will be ignored.") parser.add_argument( "-dn", "--doc_id_to_eval", help="Provide one single doc id to evaluate." ) parser.set_defaults(debug=False) args = parser.parse_args() if args.debug: stream_handler.setLevel(logging.DEBUG) logger.setLevel(logging.DEBUG) logger.debug("Entered debug mode.") else: stream_handler.setLevel(logging.INFO) logger.setLevel(logging.INFO) if args.type_white_list is not None: logger.info("Only the following types in the white list will be evaluated.") EvalState.white_listed_types = set() for line in args.type_white_list: logger.info(line.strip()) EvalState.white_listed_types.add(canonicalize_string(line)) if args.eval_mode == "char": MutableConfig.eval_mode = EvalMethod.Char else: MutableConfig.eval_mode = EvalMethod.Token if args.output is not None: out_path = args.output utils.create_parent_dir(out_path) mention_eval_out = open(out_path, 'w') logger.info("Evaluation output will be saved at %s" % out_path) else: mention_eval_out = sys.stdout logger.info("Evaluation output at standard out.") if os.path.isfile(args.gold): gf = open(args.gold) else: logger.error("Cannot find gold standard file at " + args.gold) sys.exit(1) if args.coref is not None: Config.conll_out = args.coref Config.conll_gold_file = args.coref + "_gold.conll" Config.conll_sys_file = args.coref + "_sys.conll" logger.info("CoNLL script output will be output at " + Config.conll_out) logger.info( "Gold and system conll files will generated at " + Config.conll_gold_file + " and " + Config.conll_sys_file) # if os.path.exists(Config.conll_tmp_marker): # # Clean up the directory to avoid scoring errors. # remove_conll_tmp() # supermakedirs(Config.conll_tmp_marker) if args.sequencing is not None: Config.temporal_result_dir = args.sequencing utils.supermakedirs(os.path.join(Config.temporal_result_dir, Config.temporal_gold_dir)) utils.supermakedirs(os.path.join(Config.temporal_result_dir, Config.temporal_sys_dir)) if os.path.isfile(args.system): sf = open(args.system) else: logger.error("Cannot find system file at " + args.system) sys.exit(1) if args.coref_mapping is not None: if args.coref_mapping < 4: Config.coref_criteria = Config.possible_coref_mapping[args.coref_mapping] else: logger.error("Possible mapping : 0: Span only 1: Mention Type 2: Realis 3 Type and Realis") utils.terminate_with_error("Must provide a mapping between 0 to 3") else: Config.coref_criteria = Config.possible_coref_mapping[1] diff_out = None if args.comparison_output is not None: diff_out_path = args.comparison_output utils.create_parent_dir(diff_out_path) diff_out = open(diff_out_path, 'w') token_dir = "." if args.token_path is not None: if args.eval_mode == EvalMethod.Token: utils.terminate_with_error("Token table (-t) must be provided in token mode") if os.path.isdir(args.token_path): logger.debug("Will search token files in " + args.token_path) token_dir = args.token_path else: logger.debug("Cannot find given token directory at [%s], " "will try search for current directory" % args.token_path) token_offset_fields = Config.default_token_offset_fields if args.offset_field is not None: try: token_offset_fields = [int(x) for x in args.offset_field.split(",")] except ValueError as _: logger.error("Token offset argument should be two integer with comma in between, i.e. 2,3") if args.coreference_threshold is not None: MutableConfig.coref_mention_threshold = args.coreference_threshold # Read all documents. read_all_doc(gf, sf, args.doc_id_to_eval) # Take all attribute combinations, which will be used to produce scores. attribute_comb = get_attr_combinations(Config.attribute_names) logger.info("Coreference mentions need to match %s before consideration" % Config.coref_criteria[0][1]) while True: if not evaluate(token_dir, args.coref, attribute_comb, token_offset_fields, args.token_table_extension, diff_out): break # Run the CoNLL script on the combined files, which is concatenated from the best alignment of all documents. if args.coref is not None: logger.debug("Running coreference script for the final scores.") ConllEvaluator.run_conll_script(Config.conll_gold_file, Config.conll_sys_file, Config.conll_out) # Get the CoNLL scores from output EvalState.overall_coref_scores = ConllEvaluator.get_conll_scores(Config.conll_out) # Run the TimeML evaluation script. TemporalEval.eval_time_ml() print_eval_results(mention_eval_out, attribute_comb) # Clean up, close files. close_if_not_none(diff_out) logger.info("Evaluation Done.")