示例#1
0
def main():
    opt_parser = cmd_line_parser()
    (options, args) = opt_parser.parse_args()
    if len(args) > 0:
        opt_parser.error("all arguments must be flagged")

    if (options.guessed is None) or (options.ref is
                                     None) or (options.db_filename is None):
        opt_parser.error("missing arguments. type --help for help.")
    if (options.pid is not None and options.from_xids is not None):
        opt_parser.error("inconsistent parameters. \
        you can't have both a pid and from_xids paramters.")

    keys = [options.guessed, options.ref]
    if options.from_xids:
        xmls = ucca_db.get_by_xids(options.db_filename, options.host, keys)
    else:
        xmls = ucca_db.get_xml_trees(options.db_filename, options.host,
                                     options.pid, keys)

    guessed, ref = [convert.from_site(x) for x in xmls]
    if options.units or options.fscore or options.errors:
        evaluate(guessed,
                 ref,
                 units=options.units,
                 fscore=options.fscore,
                 errors=options.errors,
                 verbose=True)
示例#2
0
def main(args):
    keys = [args.guessed, args.ref]
    xmls = api.get_by_xids(db_name=args.db_filename, host_name=args.host, xids=keys) if args.from_xids else \
        api.get_xml_trees(db_name=args.db_filename, host_name=args.host, pid=args.pid, usernames=keys)
    guessed, ref = [convert.from_site(x) for x in xmls]
    if args.units or args.fscore or args.errors:
        evaluate(guessed, ref, units=args.units, fscore=args.fscore, errors=args.errors,
                 constructions=args.constructions, verbose=True)
def main(filenames, write, **kwargs):
    uploader = TaskUploader(**kwargs)
    downloader = TaskDownloader(**kwargs)
    scores = []
    try:
        for pattern in filenames:
            filenames = glob(pattern)
            if not filenames:
                raise IOError("Not found: " + pattern)
            for ref in read_files_and_dirs(filenames):
                print("Converting passage " + ref.ID + "... ", end="")
                task = uploader.upload_task(ref)
                guessed = downloader.download_task(task["id"], write=write, **kwargs)
                score = evaluate(guessed, ref, **kwargs)
                print("F1=%.3f" % score.average_f1())
                scores.append(score)
    except HTTPError as e:
        try:
            raise ValueError(e.response.json()) from e
        except JSONDecodeError:
            raise ValueError(e.response.text) from e
    print()
    if len(scores) > 1:
        print("Aggregated scores:")
    Scores.aggregate(scores).print()
示例#4
0
def main(args):
    guessed, ref = [
        ioutil.read_files_and_dirs((x, )) for x in (args.guessed, args.ref)
    ]
    guessed = match_by_id(guessed, ref)
    results = []
    for g, r in zip(guessed, ref):
        if len(guessed) > 1:
            sys.stdout.write("\rEvaluating %s%s" %
                             (g.ID, ":" if args.verbose else "..."))
            sys.stdout.flush()
        if args.verbose:
            print()
        result = evaluation.evaluate(g,
                                     r,
                                     constructions=args.constructions,
                                     units=args.units,
                                     fscore=args.fscore,
                                     errors=args.errors,
                                     verbose=args.verbose or len(guessed) == 1,
                                     normalize=args.normalize)
        if args.verbose:
            print("Average labeled F1 score: %.3f\n" % result.average_f1())
        results.append(result)
    summarize(args, results)
示例#5
0
def main(args):
    guessed, ref, ref_yield_tags = [None if x is None else ioutil.read_files_and_dirs((x,))
                                    for x in (args.guessed, args.ref, args.ref_yield_tags)]
    if args.match_by_id:
        guessed = match_by_id(guessed, ref)
        ref_yield_tags = match_by_id(ref_yield_tags, ref)
    results = []
    eval_type = evaluation.UNLABELED if args.unlabeled else evaluation.LABELED
    verbose = args.verbose or len(guessed) == 1
    for g, r, ryt in zip(guessed, ref, ref_yield_tags or repeat(None)):
        if len(guessed) > 1:
            print("Evaluating %s%s" % (g.ID, ":" if args.verbose else "..."), end="\r", flush=True)
        if args.verbose:
            print()
        result = evaluation.evaluate(g, r, constructions=args.constructions, units=args.units, fscore=args.fscore,
                                     errors=args.errors, verbose=verbose,
                                     normalize=args.normalize, ref_yield_tags=ryt,
                                     eval_type=evaluation.UNLABELED if args.unlabeled else None)
        if verbose:
            if args.errors:
                result.print_confusion_matrix(as_table=args.as_table)
            if not args.quiet:
                print_f1(result, eval_type)
        results.append(result)
    summarize(args, results, eval_type=eval_type)
示例#6
0
def main(args):
    guessed, ref, ref_yield_tags = [None if x is None else ioutil.read_files_and_dirs((x,))
                                    for x in (args.guessed, args.ref, args.ref_yield_tags)]
    if args.match_by_id:
        guessed = match_by_id(guessed, ref)
        ref_yield_tags = match_by_id(ref_yield_tags, ref)
    results = []
    eval_type = evaluation.UNLABELED if args.unlabeled else evaluation.LABELED
    verbose = args.verbose or len(guessed) == 1
    for g, r, ryt in zip(guessed, ref, ref_yield_tags or repeat(None)):
        if len(guessed) > 1:
            print("Evaluating %s%s" % (g.ID, ":" if args.verbose else "..."), end="\r", flush=True)
        if args.verbose:
            print()
        result = evaluation.evaluate(g, r, constructions=args.constructions, units=args.units, fscore=args.fscore,
                                     errors=args.errors, verbose=verbose,
                                     normalize=args.normalize, ref_yield_tags=ryt,
                                     eval_type=evaluation.UNLABELED if args.unlabeled else None)
        if verbose:
            if args.errors:
                result.print_confusion_matrix(as_table=args.as_table)
            if not args.quiet:
                print_f1(result, eval_type)
        results.append(result)
    summarize(args, results, eval_type=eval_type)
示例#7
0
def main():
    argparser = argparse.ArgumentParser(description=desc)
    argparser.add_argument("filenames", nargs="+",
                           help="file names to convert and evaluate")
    argparser.add_argument("-f", "--format", required=True, choices=convert.CONVERTERS,
                           help="input file format")
    argparser.add_argument("-T", "--tree", action="store_true",
                           help="remove multiple parents to get a tree")
    args = argparser.parse_args()

    converter1 = convert.TO_FORMAT[args.format]
    converter2 = convert.FROM_FORMAT[args.format]
    scores = []
    for pattern in args.filenames:
        filenames = glob.glob(pattern)
        if not filenames:
            raise IOError("Not found: " + pattern)
        for filename in filenames:
            ref = file2passage(filename)
            try:
                guessed = next(converter2(converter1(ref, tree=args.tree), ref.ID))
                scores.append(evaluate(guessed, ref, fscore=True, verbose=False,
                                       units=False, errors=False))
            except Exception as e:
                raise ValueError("Error evaluating conversion of %s" % filename, e)
    if len(scores) > 1:
        print("Aggregated scores:")
    Scores.aggregate(scores).print()

    sys.exit(0)
示例#8
0
def main(args):
    guessed, ref, ref_yield_tags = [
        None if x is None else ioutil.read_files_and_dirs((x, ))
        for x in (args.guessed, args.ref, args.ref_yield_tags)
    ]
    if args.match_by_id:
        guessed = match_by_id(guessed, ref)
        ref_yield_tags = match_by_id(ref_yield_tags, ref)
    results = []
    for g, r, ryt in zip(guessed, ref, ref_yield_tags or repeat(None)):
        if len(guessed) > 1:
            sys.stdout.write("\rEvaluating %s%s" %
                             (g.ID, ":" if args.verbose else "..."))
            sys.stdout.flush()
        if args.verbose:
            print()
        result = evaluation.evaluate(
            g,
            r,
            constructions=args.constructions,
            units=args.units,
            fscore=args.fscore,
            errors=args.errors,
            verbose=args.verbose or len(guessed) == 1,
            normalize=args.normalize,
            ref_yield_tags=ryt,
            eval_type=evaluation.UNLABELED if args.unlabeled else None)
        if args.verbose:
            print_f1(result, args.unlabeled)
        results.append(result)
    summarize(args, results)
def main(filenames, write, **kwargs):
    uploader = TaskUploader(**kwargs)
    downloader = TaskDownloader(**kwargs)
    scores = []
    try:
        for pattern in filenames:
            filenames = glob(pattern)
            if not filenames:
                raise IOError("Not found: " + pattern)
            for ref in read_files_and_dirs(filenames):
                print("Converting passage " + ref.ID + "... ", end="")
                task = uploader.upload_task(ref)
                guessed = downloader.download_task(task["id"],
                                                   write=write,
                                                   **kwargs)
                score = evaluate(guessed, ref, **kwargs)
                print("F1=%.3f" % score.average_f1())
                scores.append(score)
    except HTTPError as e:
        try:
            raise ValueError(e.response.json()) from e
        except JSONDecodeError:
            raise ValueError(e.response.text) from e
    print()
    if len(scores) > 1:
        print("Aggregated scores:")
    Scores.aggregate(scores).print()
示例#10
0
文件: parse.py 项目: ml-lab/tupa
def evaluate_passage(guessed_passage, ref_passage):
    score = evaluation.evaluate(guessed_passage,
                                ref_passage,
                                constructions=Config().args.constructions,
                                verbose=Config().args.verbose
                                and guessed_passage is not None)
    print("F1=%.3f" % score.average_f1(), flush=True)
    return score
示例#11
0
文件: parse.py 项目: viksit/ucca
def evaluate_passage(guessed_passage, ref_passage):
    score = evaluation.evaluate(guessed_passage,
                                ref_passage,
                                verbose=Config().verbose
                                and guessed_passage is not None,
                                units=False,
                                errors=False)
    print("F1=%.3f" % score.average_unlabeled_f1(), flush=True)
    return score
示例#12
0
def test_evaluate_self(create, units, errors, normalize):
    p = create()
    scores = evaluate(p, p, units=units, errors=errors, normalize=normalize)
    assert 1.0 == scores.average_f1()
    for eval_type, results in sorted(scores.evaluators.items()):
        for construction, stats in results.results.items():
            assert 1.0 == stats.f1, (eval_type, construction)
            assert 1.0 == stats.p, (eval_type, construction)
            assert 1.0 == stats.r, (eval_type, construction)
    check_primary_remote(scores, 1.0)
示例#13
0
def test_evaluate(create1, create2, f1, units, errors):
    p1 = create1()
    p2 = create2()
    validation_errors_before = [list(validate(p, linkage=False)) for p in (p1, p2)]
    scores = evaluate(p1, p2, units=units, errors=errors)
    validation_errors_after = [list(validate(p, linkage=False)) for p in (p1, p2)]
    for before, after in zip(validation_errors_before, validation_errors_after):
        if not before:
            assert not after
    check_primary_remote(scores, f1)
示例#14
0
文件: parse.py 项目: borgr/ucca
def evaluate_passage(guessed_passage, ref_passage):
    score = evaluation.evaluate(
        guessed_passage,
        ref_passage,
        verbose=Config().verbose and guessed_passage is not None,
        units=False,
        errors=False,
    )
    print("F1=%.3f" % score.average_unlabeled_f1(), flush=True)
    return score
def main():
    argparser = argparse.ArgumentParser(description=desc)
    argparser.add_argument("filenames",
                           nargs="+",
                           help="file names to convert and evaluate")
    argparser.add_argument("-f",
                           "--format",
                           required=True,
                           choices=convert.CONVERTERS,
                           help="input file format")
    argparser.add_argument("-T",
                           "--tree",
                           action="store_true",
                           help="remove multiple parents to get a tree")
    argparser.add_argument(
        "-s",
        "--strict",
        action="store_true",
        help="stop immediately if failed to convert or evaluate a file")
    argparser.add_argument(
        "-v",
        "--verbose",
        action="store_true",
        help="print evaluation results for each file separately")
    args = argparser.parse_args()

    converter1 = convert.TO_FORMAT[args.format]
    converter2 = convert.FROM_FORMAT[args.format]
    scores = []
    for pattern in args.filenames:
        filenames = glob.glob(pattern)
        if not filenames:
            raise IOError("Not found: " + pattern)
        for filename in filenames:
            sys.stdout.write("\rConverting %s" % filename)
            sys.stdout.flush()
            ref = file2passage(filename)
            try:
                guessed = next(
                    converter2(converter1(ref, tree=args.tree), ref.ID))
                scores.append(evaluate(guessed, ref, verbose=args.verbose))
            except Exception as e:
                if args.strict:
                    raise ValueError("Error evaluating conversion of %s" %
                                     filename) from e
                else:
                    print("Error evaluating conversion of %s: %s" %
                          (filename, e),
                          file=sys.stderr)
    print()
    if args.verbose and len(scores) > 1:
        print("Aggregated scores:")
    Scores.aggregate(scores).print()

    sys.exit(0)
示例#16
0
def main():
    opt_parser = cmd_line_parser()
    (options, args) = opt_parser.parse_args()
    if len(args) > 0:
        opt_parser.error("all arguments must be flagged")

    if (options.guessed is None) or (options.ref is None) or (options.db_filename is None):
        opt_parser.error("missing arguments. type --help for help.")
    if options.pid is not None and options.from_xids is not None:
        opt_parser.error("inconsistent parameters. \
        you can't have both a pid and from_xids paramters.")

    keys = [options.guessed, options.ref]
    if options.from_xids:
        xmls = ucca_db.get_by_xids(options.db_filename, options.host, keys)
    else:
        xmls = ucca_db.get_xml_trees(options.db_filename, options.host,
                                     options.pid, keys)

    guessed, ref = [convert.from_site(x) for x in xmls]
    if options.units or options.fscore or options.errors:
        evaluate(guessed, ref,
                 units=options.units, fscore=options.fscore, errors=options.errors, verbose=True)
示例#17
0
 def train_test(self, model_type, compare=True):
     passages = [self.passage]
     scores = []
     for mode in "train", "load":
         print("-- %sing %s" % (mode, model_type))
         p = Parser(model_file="test_files/%s" % model_type, model_type=model_type)
         p.train(passages if mode == "train" else None)
         guess, ref = zip(*list(p.parse(passages)))
         print()
         self.assertSequenceEqual(ref, passages)
         score = evaluation.Scores.aggregate([evaluation.evaluate(
             g, r, verbose=False, units=False, errors=False)
                                              for g, r in zip(guess, ref)])
         scores.append(score.average_f1())
     if compare:
         self.assertEqual(*scores)
     print("-- average labeled f1: %.3f, %.3f" % tuple(scores))
示例#18
0
def main(task_ids, by_filename=False, validate=None, log=None, **kwargs):
    kwargs["write"] = False
    if by_filename:
        task_ids_from_file = []
        for filename in task_ids:
            with open(filename, 'r') as f:
                task_ids_from_file += zip(
                    *list(map(str.split, filter(None, map(str.strip, f)))))
        task_ids = task_ids_from_file
    else:
        task_ids = [[task_id] for task_id in task_ids]
    assert len(task_ids) == 2, "Got %d lists of task IDs instead of two" % len(
        task_ids)
    downloader = TaskDownloader(**kwargs)
    scores = []
    validate_h = open(validate, "w", encoding="utf-8") if validate else None
    log_h = open(log, "w", encoding="utf-8") if log else None
    if log:
        fields = ["guessed", "ref"] + Scores.field_titles(
            eval_type=LABELED) + Scores.field_titles(eval_type=UNLABELED)
        print(*fields, file=log_h, sep="\t", flush=True)
    for task_id_pair in tqdm(list(zip(*task_ids)),
                             unit=" tasks",
                             desc="Evaluating"):
        passage_pair = []
        for task_id in task_id_pair:
            passage, *_ = downloader.download_task(task_id,
                                                   validate=validate_h,
                                                   **kwargs)
            passage_pair.append(passage)
        score = evaluate(*passage_pair, **kwargs)
        if log:
            fields = list(task_id_pair) + score.fields(
                eval_type=LABELED) + score.fields(eval_type=UNLABELED)
            print(*fields, file=log_h, sep="\t", flush=True)
        scores.append(score)
    if validate:
        validate_h.close()
    if log:
        log_h.close()
    print()
    if len(scores) > 1:
        print("Aggregated scores:")
    Scores.aggregate(scores).print()
示例#19
0
def main(args):
    guessed, ref = [
        ioutil.read_files_and_dirs((x, ), converters=FROM_FORMAT)
        for x in (args.guessed, args.ref)
    ]
    if len(guessed) != len(ref):
        raise ValueError(
            "Number of passages to compare does not match: %d != %d" %
            (len(guessed), len(ref)))
    if len(guessed) > 1:
        guessed_by_id = {
            g.ID: g
            for g in tqdm(
                guessed, desc="Reading " + args.guessed, unit=" passages")
        }
        try:
            guessed = [
                guessed_by_id[p.ID] for p in tqdm(
                    ref, desc="Reading " + args.ref, unit=" passages")
            ]
        except KeyError as e:
            raise ValueError("Passage IDs do not match") from e
    results = [
        evaluate(g, r, errors=True) for g, r in zip(
            tqdm(guessed, desc="Evaluating", unit=" passages"), ref)
    ]
    confusion_matrix = Scores.aggregate(
        results).evaluators[LABELED].results[PRIMARY].errors.most_common()
    label_map = {}
    for (g, r), _ in confusion_matrix:
        g, *_ = g.partition("|")
        prefix, *_ = g.partition(":")
        if not any(l.startswith(prefix)
                   for l in label_map):  # drop suffix for most common label
            g = prefix
        if g not in label_map:
            label_map[g], *_ = r.partition("|")
    with open(args.out_file, "w", encoding="utf-8") as f:
        csv.writer(f).writerows(
            tqdm(sorted(label_map.items()),
                 desc="Writing " + args.out_file,
                 unit=" rows"))
示例#20
0
def main(args):
    converter1 = convert.TO_FORMAT[args.format]
    converter2 = convert.FROM_FORMAT[args.format]
    scores = []
    for ref in get_passages_with_progress_bar(args.filenames,
                                              desc="Converting"):
        try:
            guessed = next(converter2(converter1(ref, tree=args.tree), ref.ID))
            scores.append(evaluate(guessed, ref, verbose=args.verbose))
        except Exception as e:
            if args.strict:
                raise ValueError("Error evaluating conversion of %s" %
                                 ref.ID) from e
            else:
                with tqdm.external_write_mode():
                    print("Error evaluating conversion of %s: %s" %
                          (ref.ID, e),
                          file=sys.stderr)
    print()
    if args.verbose and len(scores) > 1:
        print("Aggregated scores:")
    Scores.aggregate(scores).print()
def main():
    argparser = argparse.ArgumentParser(description=desc)
    argparser.add_argument("filenames", nargs="+",
                           help="file names to convert and evaluate")
    argparser.add_argument("-f", "--format", required=True, choices=convert.CONVERTERS,
                           help="input file format")
    argparser.add_argument("-T", "--tree", action="store_true",
                           help="remove multiple parents to get a tree")
    argparser.add_argument("-s", "--strict", action="store_true",
                           help="stop immediately if failed to convert or evaluate a file")
    argparser.add_argument("-v", "--verbose", action="store_true",
                           help="print evaluation results for each file separately")
    args = argparser.parse_args()

    converter1 = convert.TO_FORMAT[args.format]
    converter2 = convert.FROM_FORMAT[args.format]
    scores = []
    for pattern in args.filenames:
        filenames = glob.glob(pattern)
        if not filenames:
            raise IOError("Not found: " + pattern)
        for filename in filenames:
            sys.stdout.write("\rConverting %s" % filename)
            sys.stdout.flush()
            ref = file2passage(filename)
            try:
                guessed = next(converter2(converter1(ref, tree=args.tree), ref.ID))
                scores.append(evaluate(guessed, ref, verbose=args.verbose))
            except Exception as e:
                if args.strict:
                    raise ValueError("Error evaluating conversion of %s" % filename) from e
                else:
                    print("Error evaluating conversion of %s: %s" % (filename, e), file=sys.stderr)
    print()
    if args.verbose and len(scores) > 1:
        print("Aggregated scores:")
    Scores.aggregate(scores).print()

    sys.exit(0)
示例#22
0
def main():
    argparser = argparse.ArgumentParser(description=desc)
    argparser.add_argument("filenames",
                           nargs="+",
                           help="file names to convert and evaluate")
    argparser.add_argument("-f",
                           "--format",
                           required=True,
                           choices=convert.CONVERTERS,
                           help="input file format")
    argparser.add_argument("-T",
                           "--tree",
                           action="store_true",
                           help="remove multiple parents to get a tree")
    args = argparser.parse_args()

    converter1 = convert.TO_FORMAT[args.format]
    converter2 = convert.FROM_FORMAT[args.format]
    scores = []
    for pattern in args.filenames:
        filenames = glob.glob(pattern)
        if not filenames:
            raise IOError("Not found: " + pattern)
        for filename in filenames:
            ref = file2passage(filename)
            guessed = next(converter2(converter1(ref), ref.ID))
            scores.append(
                evaluate(guessed,
                         ref,
                         fscore=True,
                         verbose=True,
                         units=False,
                         errors=False))
    if len(scores) > 1:
        print("Aggregated scores:")
        Scores.aggregate(scores).print()

    sys.exit(0)
示例#23
0
def main(task_ids, by_filename=False, validate=None, log=None, **kwargs):
    kwargs["write"] = False
    if by_filename:
        task_ids_from_file = []
        for filename in task_ids:
            with open(filename, 'r') as f:
                task_ids_from_file += zip(*list(map(str.split, filter(None, map(str.strip, f)))))
        task_ids = task_ids_from_file
    else:
        task_ids = [[task_id] for task_id in task_ids]
    assert len(task_ids) == 2, "Got %d lists of task IDs instead of two" % len(task_ids)
    downloader = TaskDownloader(**kwargs)
    scores = []
    validate_h = open(validate, "w", encoding="utf-8") if validate else None
    log_h = open(log, "w", encoding="utf-8") if log else None
    if log:
        fields = ["guessed", "ref"] + Scores.field_titles(eval_type=LABELED) + Scores.field_titles(eval_type=UNLABELED)
        print(*fields, file=log_h, sep="\t", flush=True)
    for task_id_pair in tqdm(list(zip(*task_ids)), unit=" tasks", desc="Evaluating"):
        passage_pair = []
        for task_id in task_id_pair:
            passage, *_ = downloader.download_task(task_id, validate=validate_h, **kwargs)
            passage_pair.append(passage)
        score = evaluate(*passage_pair, **kwargs)
        if log:
            fields = list(task_id_pair) + score.fields(eval_type=LABELED) + score.fields(eval_type=UNLABELED)
            print(*fields, file=log_h, sep="\t", flush=True)
        scores.append(score)
    if validate:
        validate_h.close()
    if log:
        log_h.close()
    print()
    if len(scores) > 1:
        print("Aggregated scores:")
    Scores.aggregate(scores).print()
示例#24
0
def test_evaluate(create1, create2, f1, units, errors):
    scores = evaluate(create1(), create2(), units=units, errors=errors)
    check_primary_remote(scores, f1)
示例#25
0
         sys.stdout.flush()
         guessed_by_id[g.ID] = g
     ids = [p.ID for p in ref]
     try:
         guessed = [guessed_by_id[i] for i in ids]
     except KeyError as e:
         raise ValueError("Passage IDs do not match") from e
 results = []
 for g, r in zip(guessed, ref):
     if len(guessed) > 1:
         sys.stdout.write("\rEvaluating %s%s" % (g.ID, ":" if args.verbose else "..."))
         sys.stdout.flush()
     if args.verbose:
         print()
     result = evaluation.evaluate(g, r, constructions=args.constructions, units=args.units, fscore=args.fscore,
                                  errors=args.errors, verbose=args.verbose or len(guessed) == 1,
                                  normalize=args.normalize)
     if args.verbose:
         print("Average labeled F1 score: %.3f\n" % result.average_f1())
     results.append(result)
 summary = evaluation.Scores.aggregate(results)
 if len(results) > 1:
     if args.verbose:
         print("Aggregated scores:")
     else:
         print(end="\r")
         if not args.quiet:
             summary.print()
     if not args.quiet:
         print("Average labeled F1 score: %.3f" % summary.average_f1())
 args_constructions = summary.evaluators
示例#26
0
     try:
         guessed = [guessed_by_id[i] for i in ids]
     except KeyError as e:
         raise ValueError("Passage IDs do not match") from e
 results = []
 for g, r in zip(guessed, ref):
     if len(guessed) > 1:
         sys.stdout.write("\rEvaluating %s%s" %
                          (g.ID, ":" if args.verbose else "..."))
         sys.stdout.flush()
     if args.verbose:
         print()
     result = evaluation.evaluate(g,
                                  r,
                                  constructions=args.constructions,
                                  units=args.units,
                                  fscore=args.fscore,
                                  errors=args.errors,
                                  verbose=args.verbose or len(guessed) == 1,
                                  normalize=args.normalize)
     if args.verbose:
         print("Average labeled F1 score: %.3f\n" % result.average_f1())
     results.append(result)
 summary = evaluation.Scores.aggregate(results)
 if len(results) > 1:
     if args.verbose:
         print("Aggregated scores:")
     else:
         print(end="\r")
         if not args.quiet:
             summary.print()
     if not args.quiet:
示例#27
0
    elif mode == 'refinement':
        convert_refinement_to_concat(passage)
        convert_refinement_to_concat(ref)

    passage_vanilla = get_vanilla_ucca(passage)
    passage_snacs = get_snacs_ucca(passage)
    passage_refined = get_refined_ucca(passage)
    ref_vanilla = get_vanilla_ucca(ref)
    ref_snacs = get_snacs_ucca(ref)
    ref_refined = get_refined_ucca(ref)

    #    print(passage_snacs)
    #    print(ref_snacs)

    integrated_results.append(
        evaluation.evaluate(passage, ref, constructions=('Non-preterm', )))
    vanilla_results.append(
        evaluation.evaluate(passage_vanilla,
                            ref_vanilla,
                            constructions=('Non-preterm', )))
    snacs_results.append(
        evaluation.evaluate(passage_snacs,
                            ref_snacs,
                            constructions=(
                                'Non-preterm',
                                'SNACS',
                                'hastags',
                            )))
    refined_results.append(
        evaluation.evaluate(passage_refined,
                            ref_refined,
示例#28
0
 def __call__(self, dataset_label, predicted_tree, gold_tree):
     score = evaluation.evaluate(predicted_tree, gold_tree)
     if dataset_label not in self.scores:
         self.scores[dataset_label] = []
     self.scores[dataset_label].append(score)
示例#29
0
from argparse import ArgumentParser

from ucca.evaluation import evaluate
from ucca.ioutil import file2passage


################
# MAIN         #
################

if __name__ == "__main__":
    argparser = ArgumentParser(description="Compare two UCCA passages.")
    argparser.add_argument("guessed", help="xml/pickle file name for the guessed annotation")
    argparser.add_argument("ref", help="xml/pickle file name for the reference annotation")
    argparser.add_argument("--units", "-u", dest="units", action="store_true",
                           help="the units the annotations have in common, and those each has separately")
    argparser.add_argument("--fscore", "-f", dest="fscore", action="store_true",
                           help="outputs the traditional P,R,F instead of the scene structure evaluation")
    argparser.add_argument("--errors", "-e", dest="errors", action="store_true",
                           help="prints the error distribution according to its frequency")
    args = argparser.parse_args()

    if not (args.units or args.fscore or args.errors):
        argparser.error("At least one of -u, -f or -e is required.")

    guessed, ref = [file2passage(x) for x in (args.guessed, args.ref)]

    if args.units or args.fscore or args.errors:
        evaluate(guessed, ref,
                 units=args.units, fscore=args.fscore, errors=args.errors, verbose=True)
示例#30
0
    ref_vanilla = get_vanilla_ucca(ref)
    ref_snacs, ref_refined = get_snacs_refined_ucca(ref)
#    ref_refined, edges_refined = get_refined_ucca(ref)

#    with open('edges_snacs.tsv', 'a') as f:
#        for e, ts in sorted(edges_snacs,key=lambda x: str(x[0])):
#            print(name, e, ts, sep='\t', file=f)

#    with open('edges_refined.tsv', 'a') as f:
#        for e, ts in sorted(edges_refined, key=lambda x: str(x[0])):
#            print(name, e, ts, sep='\t', file=f)

#    print(passage_snacs)
#    print(ref_snacs)

    integrated_results.append(evaluation.evaluate(passage_full, ref_full, constructions=('Non-preterm','SNACS'), normalize=False))
    vanilla_results.append(evaluation.evaluate(passage_vanilla, ref_vanilla, constructions=('Non-preterm','SNACS', 'has_gold_SNACS', 'has_gold_SNACS_sibling', 'has_gold_SNACS_or_sibling', 'scenes', 'scene_children', 'scenes_and_scene_children'), normalize=True))
    snacs_results.append(evaluation.evaluate(passage_snacs, ref_snacs, constructions=('Non-preterm', 'SNACS', 'has_tags',), normalize=False))
    refined_results.append(evaluation.evaluate(passage_refined, ref_refined, constructions=('Non-preterm', 'has_tags',), normalize=False))



print('UCCA SNACS')

integ_aggr = evaluation.Scores.aggregate(integrated_results)
integ_aggr.print()
# integ_aggr.print_confusion_matrix()


print('\n\nUCCA')
示例#31
0
        "the units the annotations have in common, and those each has separately"
    )
    argparser.add_argument(
        "--fscore",
        "-f",
        dest="fscore",
        action="store_true",
        help=
        "outputs the traditional P,R,F instead of the scene structure evaluation"
    )
    argparser.add_argument(
        "--errors",
        "-e",
        dest="errors",
        action="store_true",
        help="prints the error distribution according to its frequency")
    args = argparser.parse_args()

    if not (args.units or args.fscore or args.errors):
        argparser.error("At least one of -u, -f or -e is required.")

    guessed, ref = [file2passage(x) for x in (args.guessed, args.ref)]

    if args.units or args.fscore or args.errors:
        evaluate(guessed,
                 ref,
                 units=args.units,
                 fscore=args.fscore,
                 errors=args.errors,
                 verbose=True)