def main(args): for spec in read_specs(args, converters=FROM_FORMAT): scores = [] if not args.verbose: spec.passages = tqdm( spec.passages, unit=" passages", desc="Parsing " + (spec.out_dir if spec.out_dir != "." else spec.lang)) for passage, parsed in parse(spec.passages, spec.lang, spec.udpipe, args.verbose): map_labels(parsed, args.label_map) normalize(parsed, extra=True) if args.write: write_passage(parsed, args) if args.evaluate: evaluator = EVALUATORS.get(args.output_format) converter = TO_FORMAT.get(args.output_format) if converter is not None: passage, parsed = map(converter, (passage, parsed)) if evaluator is not None: scores.append( evaluator.evaluate(parsed, passage, verbose=args.verbose > 1)) if scores: Scores(scores).print()
def main(args): files = [ None if d is None else [ os.path.join(d, f) for f in os.listdir(d) if not os.path.isdir(os.path.join(d, f)) ] if os.path.isdir(d) else [d] for p in args.guessed + [args.ref] for d in glob(p) or [p] ] ref_files = files[-1] n = len(ref_files) evaluate = EVALUATORS.get( passage_format(ref_files[0])[1], EVALUATORS[args.format]) results = [ list(evaluate_all(evaluate, [f, ref_files, None], n, **vars(args))) for f, n in zip(files, args.guessed) ] for evaluated, name in zip(results[1:], args.guessed[1:]): print(name) baseline = results[0] pair = (baseline, evaluated) d = diff(pair, verbose=True) sample = np.random.choice(n, (args.nboot, n)) s = np.sum( np.sign(d) * diff(pair, indices) > 2 * np.abs(d) for indices in tqdm(sample, unit=" samples")) print("p-value:") print(s / args.nboot) print()
def main(args): files = [[os.path.join(d, f) for f in os.listdir(d)] for d in args.guessed + [args.ref]] n = len(files[-1]) evaluate = EVALUATORS.get(passage_format(files[-1][0])[1], EVALUATORS[args.format]).evaluate results = [list(evaluate_all(args, evaluate, f, n)) for f, n in zip((files[0::2], files[1:]), args.guessed)] d = diff(results, verbose=True) sample = np.random.choice(n, (args.nboot, n)) s = np.sum(np.sign(d) * diff(results, indices) > 2 * np.abs(d) for indices in tqdm(sample, unit=" samples")) print("p-value:") print(s / args.nboot)
def evaluate(self, mode=ParseMode.test): if self.format: self.config.print("Converting to %s and evaluating..." % self.format) self.eval_type = UNLABELED if self.config.is_unlabeled(self.in_format) else LABELED evaluator = EVALUATORS.get(self.format, evaluate_ucca) score = evaluator(self.out, self.passage, converter=get_output_converter(self.format), verbose=self.out and self.config.args.verbose > 3, constructions=self.config.args.constructions, eval_types=(self.eval_type,) if mode is ParseMode.dev else (LABELED, UNLABELED)) self.f1 = average_f1(score, self.eval_type) score.lang = self.lang return score
def main(): argparser = configargparse.ArgParser(description=desc) argparser.add_argument("filenames", nargs="+", help="file names to convert and evaluate") add_verbose_arg(argparser, help="detailed evaluation output") add_boolean_option(argparser, "wikification", "Spotlight to wikify any named node (for AMR)") argparser.add_argument("-o", "--out-dir", help="output directory (if unspecified, files are not written)") args = argparser.parse_args() scores = [] for pattern in args.filenames: filenames = glob(pattern) if not filenames: raise IOError("Not found: " + pattern) for filename in filenames: print("\rConverting '%s'" % filename, end="") if args.out_dir or args.verbose: print(flush=True) basename, ext = os.path.splitext(os.path.basename(filename)) passage_format = ext.lstrip(".") converters = CONVERTERS.get(passage_format, CONVERTERS["amr"]) evaluator = EVALUATORS.get(passage_format, EVALUATORS["amr"]).evaluate with open(filename, encoding="utf-8") as f: for passage, ref, passage_id in converters[0](f, passage_id=basename, return_original=True): if args.out_dir: os.makedirs(args.out_dir, exist_ok=True) outfile = "%s/%s.xml" % (args.out_dir, passage.ID) print("Writing '%s'..." % outfile, file=sys.stderr, flush=True) ioutil.passage2file(passage, outfile) try: guessed = converters[1](passage, wikification=args.wikification, use_original=False) except Exception as e: raise ValueError("Error converting %s back from %s" % (filename, passage_format)) from e if args.out_dir: outfile = "%s/%s%s" % (args.out_dir, passage.ID, ext) print("Writing '%s'..." % outfile, file=sys.stderr, flush=True) with open(outfile, "w", encoding="utf-8") as f_out: print("\n".join(guessed), file=f_out) try: s = evaluator(guessed, ref, verbose=args.verbose > 1) except Exception as e: raise ValueError("Error evaluating conversion of %s" % filename) from e scores.append(s) if args.verbose: print(passage_id) s.print() print() if args.verbose and len(scores) > 1: print("Aggregated scores:") Scores(scores).print() sys.exit(0)
def main(args): scores = [] for pattern in args.filenames: filenames = glob(pattern) if not filenames: raise IOError("Not found: " + pattern) for filename in filenames: print("\rConverting '%s'" % filename, end="") if args.out_dir or args.verbose: print(flush=True) basename, ext = os.path.splitext(os.path.basename(filename)) passage_format = ext.lstrip(".") converters = CONVERTERS.get(passage_format, CONVERTERS["amr"]) evaluator = EVALUATORS.get(passage_format, EVALUATORS["amr"]).evaluate with open(filename, encoding="utf-8") as f: for passage, ref, passage_id in converters[0](f, passage_id=basename, return_original=True): if args.normalize: normalize(passage, extra=args.extra_normalization) if args.out_dir: os.makedirs(args.out_dir, exist_ok=True) outfile = "%s/%s.xml" % (args.out_dir, passage.ID) print("Writing '%s'..." % outfile, file=sys.stderr, flush=True) ioutil.passage2file(passage, outfile) try: guessed = converters[1](passage, wikification=args.wikification, use_original=False) except Exception as e: raise ValueError("Error converting %s back from %s" % (filename, passage_format)) from e if args.out_dir: outfile = "%s/%s%s" % (args.out_dir, passage.ID, ext) print("Writing '%s'..." % outfile, file=sys.stderr, flush=True) with open(outfile, "w", encoding="utf-8") as f_out: print("\n".join(guessed), file=f_out) try: s = evaluator(guessed, ref, verbose=args.verbose > 1) except Exception as e: raise ValueError("Error evaluating conversion of %s" % filename) from e scores.append(s) if args.verbose: print(passage_id) s.print() print() if args.verbose and len(scores) > 1: print("Aggregated scores:") Scores(scores).print()
def main(args): if args.out_dir: os.makedirs(args.out_dir, exist_ok=True) scores = [] for pattern in args.filenames: for filename in glob(pattern) or [pattern]: file_scores = [] basename, ext = os.path.splitext(os.path.basename(filename)) passage_format = ext.lstrip(".") if passage_format == "txt": passage_format = args.format in_converter, out_converter = CONVERTERS.get( passage_format, CONVERTERS[args.format]) evaluate = EVALUATORS.get(passage_format, EVALUATORS[args.format]) with open(filename, encoding="utf-8") as f: t = tqdm(in_converter(f, passage_id=basename, return_original=True), unit=" passages", desc=("Converting '%s'" % filename) + ((", writing to '%s'" % args.out_dir) if args.out_dir else "")) for passage, ref, passage_id in t: if args.normalize: normalize(passage, extra=args.extra_normalization) if args.out_dir: os.makedirs(args.out_dir, exist_ok=True) outfile = os.path.join(args.out_dir, passage.ID + ".xml") if args.verbose: with ioutil.external_write_mode(): print("Writing '%s'..." % outfile, file=sys.stderr, flush=True) ioutil.passage2file(passage, outfile) try: guessed = out_converter(passage, wikification=args.wikification, use_original=False) except Exception as e: raise ValueError("Error converting %s back from %s" % (filename, passage_format)) from e if args.out_dir: outfile = os.path.join(args.out_dir, passage.ID + ext) if args.verbose: with ioutil.external_write_mode(): print("Writing '%s'..." % outfile, file=sys.stderr, flush=True) with open(outfile, "w", encoding="utf-8") as f_out: print("\n".join(guessed), file=f_out) try: s = evaluate(guessed, ref, verbose=args.verbose > 1, units=args.units) except Exception as e: raise ValueError("Error evaluating conversion of %s" % filename) from e file_scores.append(s) if args.verbose: with ioutil.external_write_mode(): print(passage_id) s.print() t.set_postfix(F1="%.2f" % (100.0 * Scores(file_scores).average_f1())) scores += file_scores print() if args.verbose and len(scores) > 1: print("Aggregated scores:") Scores(scores).print()
metadata["competition-submission"], metadata["submitted-at"].strftime("%d.%m.%Y ") ] print("Running evaluation on %s track" % competition) # run evaluation files = [ None if d is None else [ os.path.join(d, f) for f in os.listdir(d) if not os.path.isdir(os.path.join(d, f)) ] if os.path.isdir(d) else [d] for d in (os.path.join(submission_dir, track, lang), os.path.join(truth_dir, lang), None) ] evaluate = EVALUATORS.get( passage_format(files[1][0])[1], EVALUATORS["amr"]) results = list( evaluate_all(evaluate, files, format="amr", name="Evaluating", unlabeled=False, matching_ids=True)) summary = Scores(results) # write results to html file and append to values output_html_file.write("<tr>\n" "<td>%s</td>" % competition) # labeled output_html_file.write("<td>%.3f</td>\n" %