Пример #1
0
    def prepare_conll_lines(self, gold_corefs, sys_corefs, gold_mention_table, system_mention_table,
                            gold_2_system_one_2_one_mapping, threshold=1.0):
        """
        Convert to ConLL style lines
        :param gold_corefs: gold coreference chain
        :param sys_corefs: system coreferenc chain
        :param gold_mention_table:  gold mention table
        :param system_mention_table: system mention table
        :param gold_2_system_one_2_one_mapping: a mapping between gold and system
        :param threshold: To what extent we treat two mention can be aligned, default 1 for exact match
        :return:
        """
        aligned_gold_table, aligned_system_table = self.create_aligned_tables(gold_2_system_one_2_one_mapping,
                                                                              gold_mention_table,
                                                                              system_mention_table,
                                                                              threshold)
        logger.debug("Preparing CoNLL files using mapping threhold %.2f" % threshold)

        gold_conll_lines = self.prepare_lines(gold_corefs, aligned_gold_table,
                                              self.extract_token_map(gold_mention_table), self.gold_id_2_text)

        sys_conll_lines = self.prepare_lines(sys_corefs, aligned_system_table,
                                             self.extract_token_map(system_mention_table), self.sys_id_2_text)

        if not gold_conll_lines:
            utils.terminate_with_error("Gold standard has data problem for doc [%s], please refer to log. Quitting..."
                                       % self.doc_id)

        if not sys_conll_lines:
            utils.terminate_with_error("System has data problem for doc [%s], please refer to log. Quitting..."
                                       % self.doc_id)

        return gold_conll_lines, sys_conll_lines
Пример #2
0
def parse_line(l, invisible_ids):
    """
    Parse the line, get the token ids, remove invisible ones.
    :param l: A line in the tbf file.
    :param invisible_ids: Set of invisible ids to remove.
    """
    fields = l.split("\t")
    num_attributes = len(Config.attribute_names)
    if len(fields) < 5 + num_attributes:
        utils.terminate_with_error(
            "System line has too few fields:\n ---> %s" % l)

    if MutableConfig.eval_mode == EvalMethod.Token:
        spans, original_spans = parse_token_ids(fields[3], invisible_ids)
        if len(spans) == 0:
            logger.warn(
                "Find mention with only invisible words, will not be mapped to anything"
            )
    else:
        # There is no filtering thing in the character mode.
        spans = parse_characters(fields[3])
        original_spans = spans

    attributes = [canonicalize_string(a) for a in fields[5:5 + num_attributes]]

    if EvalState.white_listed_types:
        if attributes[0] not in EvalState.white_listed_types:
            return None

    event_id = fields[2]
    text = fields[4]
    # span_id = fields[script_column] if len(fields) > script_column else None

    return spans, attributes, event_id, original_spans, text
Пример #3
0
    def prepare_conll_lines(self, gold_corefs, sys_corefs, gold_mention_table, system_mention_table,
                            gold_2_system_one_2_one_mapping, threshold=1.0):
        """
        Convert to ConLL style lines
        :param gold_corefs: gold coreference chain
        :param sys_corefs: system coreferenc chain
        :param gold_mention_table:  gold mention table
        :param system_mention_table: system mention table
        :param gold_2_system_one_2_one_mapping: a mapping between gold and system
        :param threshold: To what extent we treat two mention can be aligned, default 1 for exact match
        :return:
        """
        aligned_gold_table, aligned_system_table = self.create_aligned_tables(gold_2_system_one_2_one_mapping,
                                                                              gold_mention_table,
                                                                              system_mention_table,
                                                                              threshold)
        logger.debug("Preparing CoNLL files using mapping threhold %.2f" % threshold)

        gold_conll_lines = self.prepare_lines(gold_corefs, aligned_gold_table, self.gold_id_2_text)

        sys_conll_lines = self.prepare_lines(sys_corefs, aligned_system_table, self.sys_id_2_text)

        if not gold_conll_lines:
            utils.terminate_with_error("Gold standard has data problem for doc [%s], please refer to log. Quitting..."
                                       % self.doc_id)

        if not sys_conll_lines:
            utils.terminate_with_error("System has data problem for doc [%s], please refer to log. Quitting..."
                                       % self.doc_id)

        return gold_conll_lines, sys_conll_lines
Пример #4
0
def parse_line(l, invisible_ids):
    """
    Parse the line, get the token ids, remove invisible ones.
    :param l: A line in the tbf file.
    :param invisible_ids: Set of invisible ids to remove.
    """
    fields = l.split("\t")
    num_attributes = len(Config.attribute_names)
    if len(fields) < 5 + num_attributes:
        utils.terminate_with_error("System line has too few fields:\n ---> %s" % l)

    if MutableConfig.eval_mode == EvalMethod.Token:
        spans, original_spans = parse_token_ids(fields[3], invisible_ids)
        if len(spans) == 0:
            logger.warn("Find mention with only invisible words, will not be mapped to anything")
    else:
        spans = parse_characters(fields[3])
        original_spans = spans

    attributes = [canonicalize_string(a) for a in fields[5:5 + num_attributes]]

    if EvalState.white_listed_types:
        if attributes[0] not in EvalState.white_listed_types:
            return None

    # The temporal column is after the last attribute.
    temporal_column = 5 + num_attributes

    event_id = fields[2]
    text = fields[4]
    # span_id = fields[temporal_column] if len(fields) > temporal_column else None

    return spans, attributes, event_id, original_spans, text
Пример #5
0
def main():
    parser = argparse.ArgumentParser(
        description=
        "Event mention scorer, provides support to Event Nugget scoring, Event Coreference and Event "
        "Sequencing scoring.")
    parser.add_argument("-g", "--gold", help="Golden Standard", required=True)
    parser.add_argument("-s", "--system", help="System output", required=True)
    parser.add_argument("-d",
                        "--comparison_output",
                        help="Compare and help show the difference between "
                        "system and gold")
    parser.add_argument(
        "-o",
        "--output",
        help="Optional evaluation result redirects, put eval result to file")
    parser.add_argument(
        "-c",
        "--coref",
        help="Eval Coreference result output, need to put the reference"
        "conll coref scorer in the same folder with this scorer")
    parser.add_argument(
        "-a",
        "--sequencing",
        help="Eval Event sequencing result output (After and Subevent)")
    parser.add_argument("-nv",
                        "--no_script_validation",
                        help="Whether to turn off script validation",
                        action="store_true")
    parser.add_argument(
        "-t",
        "--token_path",
        help=
        "Path to the directory containing the token mappings file, only used in token mode."
    )
    parser.add_argument(
        "-m",
        "--coref_mapping",
        help="Which mapping will be used to perform coreference mapping.",
        type=int)
    parser.add_argument(
        "-of",
        "--offset_field",
        help="A pair of integer indicates which column we should "
        "read the offset in the token mapping file, index starts"
        "at 0, default value will be %s" % Config.default_token_offset_fields)
    parser.add_argument(
        "-te",
        "--token_table_extension",
        help=
        "any extension appended after docid of token table files. Default is [%s], only used in token mode."
        % Config.default_token_file_ext)
    parser.add_argument("-ct",
                        "--coreference_threshold",
                        type=float,
                        help="Threshold for coreference mention mapping")
    parser.add_argument("-b",
                        "--debug",
                        help="turn debug mode on",
                        action="store_true")

    # parser.add_argument("--eval_mode", choices=["char", "token"], default="char",
    #                     help="Use Span or Token mode. The Span mode will take a span as range [start:end], while the "
    #                          "Token mode consider each token is provided as a single id.")

    parser.add_argument(
        "-wl",
        "--type_white_list",
        type=argparse.FileType('r'),
        help=
        "Provide a file, where each line list a mention type subtype pair to be evaluated. Types "
        "that are out of this white list will be ignored.")

    parser.add_argument("-dn",
                        "--doc_id_to_eval",
                        help="Provide one single doc id to evaluate.")

    parser.set_defaults(debug=False)
    args = parser.parse_args()

    if args.debug:
        stream_handler.setLevel(logging.DEBUG)
        logger.setLevel(logging.DEBUG)
        logger.debug("Entered debug mode.")
    else:
        stream_handler.setLevel(logging.INFO)
        logger.setLevel(logging.INFO)

    if args.type_white_list is not None:
        logger.info(
            "Only the following types in the white list will be evaluated.")
        EvalState.white_listed_types = set()
        for line in args.type_white_list:
            logger.info(line.strip())
            EvalState.white_listed_types.add(canonicalize_string(line))

    if args.output is not None:
        out_path = args.output
        utils.create_parent_dir(out_path)
        mention_eval_out = open(out_path, 'w')
        logger.info("Evaluation output will be saved at %s" % out_path)
    else:
        mention_eval_out = sys.stdout
        logger.info("Evaluation output at standard out.")

    if os.path.isfile(args.gold):
        gf = open(args.gold)
    else:
        logger.error("Cannot find gold standard file at " + args.gold)
        sys.exit(1)

    if args.coref is not None:
        Config.conll_out = args.coref
        Config.conll_gold_file = args.coref + "_gold.conll"
        Config.conll_sys_file = args.coref + "_sys.conll"

        logger.info("CoNLL script output will be output at " +
                    Config.conll_out)

        logger.info("Gold and system conll files will generated at " +
                    Config.conll_gold_file + " and " + Config.conll_sys_file)

    if args.sequencing is not None:
        Config.script_result_dir = args.sequencing

        logger.info("Temporal files will be output at " +
                    Config.script_result_dir)
        utils.supermakedirs(Config.script_result_dir)

        logger.info("Will evaluate link type: %s." %
                    ",".join(Config.script_types))
        for t in Config.script_types:
            utils.supermakedirs(os.path.join(Config.script_result_dir, t))

        utils.remove_file_by_extension(Config.script_result_dir, ".tml")
        utils.remove_file_by_extension(Config.script_result_dir, ".tml")

        if args.no_script_validation:
            Config.no_script_validation = True

    if os.path.isfile(args.system):
        sf = open(args.system)
    else:
        logger.error("Cannot find system file at " + args.system)
        sys.exit(1)

    if args.coref_mapping is not None:
        if args.coref_mapping < 4:
            Config.coref_criteria = Config.possible_coref_mapping[
                args.coref_mapping]
        else:
            logger.error(
                "Possible mapping : 0: Span only 1: Mention Type 2: Realis 3 Type and Realis"
            )
            utils.terminate_with_error("Must provide a mapping between 0 to 3")
    else:
        Config.coref_criteria = Config.possible_coref_mapping[1]

    diff_out = None
    if args.comparison_output is not None:
        diff_out_path = args.comparison_output
        utils.create_parent_dir(diff_out_path)
        diff_out = open(diff_out_path, 'w')

    token_dir = "."
    if args.token_path is not None:
        MutableConfig.eval_mode = EvalMethod.Token
        logger.info("Eval mode is set to token.")
        if os.path.isdir(args.token_path):
            logger.debug("Will search token files in " + args.token_path)
            token_dir = args.token_path
        else:
            logger.debug("Cannot find given token directory at [%s], "
                         "will try search for current directory" %
                         args.token_path)
    else:
        MutableConfig.eval_mode = EvalMethod.Char

    token_offset_fields = Config.default_token_offset_fields
    if args.offset_field is not None:
        try:
            token_offset_fields = [
                int(x) for x in args.offset_field.split(",")
            ]
        except ValueError as _:
            logger.error(
                "Token offset argument should be two integer with comma in between, i.e. 2,3"
            )

    if args.coreference_threshold is not None:
        MutableConfig.coref_mention_threshold = args.coreference_threshold

    # Read all documents.
    read_all_doc(gf, sf, args.doc_id_to_eval)

    # Take all attribute combinations, which will be used to produce scores.
    attribute_comb = get_attr_combinations(Config.attribute_names)

    logger.info("Coreference mentions need to match %s before consideration" %
                Config.coref_criteria[0][1])

    while True:
        print('dir is:', token_dir)
        if not evaluate(token_dir, args.coref, attribute_comb,
                        token_offset_fields, args.token_table_extension,
                        diff_out):
            break

    # Run the CoNLL script on the combined files, which is concatenated from the best alignment of all documents.
    if args.coref is not None:
        logger.debug("Running coreference script for the final scores.")
        ConllEvaluator.run_conll_script(Config.conll_gold_file,
                                        Config.conll_sys_file,
                                        Config.conll_out)
        # Get the CoNLL scores from output
        EvalState.overall_coref_scores = ConllEvaluator.get_conll_scores(
            Config.conll_out)

    # Run the TimeML evaluation script.
    if Config.script_result_dir:
        TemporalEval.eval_time_ml()

    print_eval_results(mention_eval_out, attribute_comb)

    # Clean up, close files.
    close_if_not_none(diff_out)

    logger.info("Evaluation Done.")
    return 0
Пример #6
0
def main():
    parser = argparse.ArgumentParser(
        description="Event mention scorer, which conducts token based "
                    "scoring, system and gold standard files should follows "
                    "the token-based format.")
    parser.add_argument("-g", "--gold", help="Golden Standard", required=True)
    parser.add_argument("-s", "--system", help="System output", required=True)
    parser.add_argument("-d", "--comparison_output",
                        help="Compare and help show the difference between "
                             "system and gold")
    parser.add_argument(
        "-o", "--output", help="Optional evaluation result redirects, put eval result to file")
    parser.add_argument(
        "-c", "--coref", help="Eval Coreference result output, need to put the reference"
                              "conll coref scorer in the same folder with this scorer")
    parser.add_argument(
        "-a", "--sequencing", help="Eval Event sequencing result output (After and Subevent)"
    )
    parser.add_argument(
        "-t", "--token_path", help="Path to the directory containing the "
                                   "token mappings file")
    parser.add_argument(
        "-m", "--coref_mapping", help="Which mapping will be used to perform coreference mapping.", type=int
    )
    parser.add_argument(
        "-of", "--offset_field", help="A pair of integer indicates which column we should "
                                      "read the offset in the token mapping file, index starts"
                                      "at 0, default value will be %s" % Config.default_token_offset_fields
    )
    parser.add_argument(
        "-te", "--token_table_extension",
        help="any extension appended after docid of token table files. "
             "Default is [%s]" % Config.default_token_file_ext)
    parser.add_argument("-ct", "--coreference_threshold", type=float, help="Threshold for coreference mention mapping")
    parser.add_argument(
        "-b", "--debug", help="turn debug mode on", action="store_true")

    parser.add_argument("--eval_mode", choices=["char", "token"], default="char",
                        help="Use Span Overlap or Token Overlap mode. The Span Overlap mode will take a span as range "
                             "[start:end], while the Token Overlap mode consider each token is provided as a single "
                             "id.")

    parser.add_argument("-wl", "--type_white_list", type=argparse.FileType('r'),
                        help="Provide a file, where each line list a mention type subtype pair to be evaluated. Types "
                             "that are out of this white list will be ignored.")

    parser.add_argument(
        "-dn", "--doc_id_to_eval", help="Provide one single doc id to evaluate."
    )

    parser.set_defaults(debug=False)
    args = parser.parse_args()

    if args.debug:
        stream_handler.setLevel(logging.DEBUG)
        logger.setLevel(logging.DEBUG)
        logger.debug("Entered debug mode.")
    else:
        stream_handler.setLevel(logging.INFO)
        logger.setLevel(logging.INFO)

    if args.type_white_list is not None:
        logger.info("Only the following types in the white list will be evaluated.")
        EvalState.white_listed_types = set()
        for line in args.type_white_list:
            logger.info(line.strip())
            EvalState.white_listed_types.add(canonicalize_string(line))

    if args.eval_mode == "char":
        MutableConfig.eval_mode = EvalMethod.Char
    else:
        MutableConfig.eval_mode = EvalMethod.Token

    if args.output is not None:
        out_path = args.output
        utils.create_parent_dir(out_path)
        mention_eval_out = open(out_path, 'w')
        logger.info("Evaluation output will be saved at %s" % out_path)
    else:
        mention_eval_out = sys.stdout
        logger.info("Evaluation output at standard out.")

    if os.path.isfile(args.gold):
        gf = open(args.gold)
    else:
        logger.error("Cannot find gold standard file at " + args.gold)
        sys.exit(1)

    if args.coref is not None:
        Config.conll_out = args.coref
        Config.conll_gold_file = args.coref + "_gold.conll"
        Config.conll_sys_file = args.coref + "_sys.conll"

        logger.info("CoNLL script output will be output at " + Config.conll_out)

        logger.info(
            "Gold and system conll files will generated at " + Config.conll_gold_file + " and " + Config.conll_sys_file)
        # if os.path.exists(Config.conll_tmp_marker):
        #     # Clean up the directory to avoid scoring errors.
        #     remove_conll_tmp()
        # supermakedirs(Config.conll_tmp_marker)

    if args.sequencing is not None:
        Config.temporal_result_dir = args.sequencing
        utils.supermakedirs(os.path.join(Config.temporal_result_dir, Config.temporal_gold_dir))
        utils.supermakedirs(os.path.join(Config.temporal_result_dir, Config.temporal_sys_dir))

    if os.path.isfile(args.system):
        sf = open(args.system)
    else:
        logger.error("Cannot find system file at " + args.system)
        sys.exit(1)

    if args.coref_mapping is not None:
        if args.coref_mapping < 4:
            Config.coref_criteria = Config.possible_coref_mapping[args.coref_mapping]
        else:
            logger.error("Possible mapping : 0: Span only 1: Mention Type 2: Realis 3 Type and Realis")
            utils.terminate_with_error("Must provide a mapping between 0 to 3")
    else:
        Config.coref_criteria = Config.possible_coref_mapping[1]

    diff_out = None
    if args.comparison_output is not None:
        diff_out_path = args.comparison_output
        utils.create_parent_dir(diff_out_path)
        diff_out = open(diff_out_path, 'w')

    token_dir = "."
    if args.token_path is not None:
        if args.eval_mode == EvalMethod.Token:
            utils.terminate_with_error("Token table (-t) must be provided in token mode")
        if os.path.isdir(args.token_path):
            logger.debug("Will search token files in " + args.token_path)
            token_dir = args.token_path
        else:
            logger.debug("Cannot find given token directory at [%s], "
                         "will try search for current directory" % args.token_path)

    token_offset_fields = Config.default_token_offset_fields
    if args.offset_field is not None:
        try:
            token_offset_fields = [int(x) for x in args.offset_field.split(",")]
        except ValueError as _:
            logger.error("Token offset argument should be two integer with comma in between, i.e. 2,3")

    if args.coreference_threshold is not None:
        MutableConfig.coref_mention_threshold = args.coreference_threshold

    # Read all documents.
    read_all_doc(gf, sf, args.doc_id_to_eval)

    # Take all attribute combinations, which will be used to produce scores.
    attribute_comb = get_attr_combinations(Config.attribute_names)

    logger.info("Coreference mentions need to match %s before consideration" % Config.coref_criteria[0][1])

    while True:
        if not evaluate(token_dir, args.coref, attribute_comb,
                        token_offset_fields, args.token_table_extension,
                        diff_out):
            break

    # Run the CoNLL script on the combined files, which is concatenated from the best alignment of all documents.
    if args.coref is not None:
        logger.debug("Running coreference script for the final scores.")
        ConllEvaluator.run_conll_script(Config.conll_gold_file, Config.conll_sys_file, Config.conll_out)
        # Get the CoNLL scores from output
        EvalState.overall_coref_scores = ConllEvaluator.get_conll_scores(Config.conll_out)

    # Run the TimeML evaluation script.
    TemporalEval.eval_time_ml()

    print_eval_results(mention_eval_out, attribute_comb)

    # Clean up, close files.
    close_if_not_none(diff_out)

    logger.info("Evaluation Done.")