def copy_annotation(passages, conllu, by_id=False, as_array=True, as_extra=True, verbose=False, lang=None): conllu_sentences = {annotated.ID: annotated for annotated in get_passages_with_progress_bar(conllu, converters=CONVERTERS, desc="Reading '%s'" % conllu)} \ if by_id else get_passages(conllu, converters=CONVERTERS) for passage in passages: try: annotated = conllu_sentences[passage.ID] if by_id else next( conllu_sentences) except (KeyError, StopIteration) as e: raise ValueError( "Missing annotation for passage ID '%s', by_id=%s" % (passage.ID, by_id)) from e if verbose: with external_write_mode(): print("Reading annotation from '%s'" % annotated.ID) if as_array: passage.layer(layer0.LAYER_ID).docs()[:] = annotated.layer( layer0.LAYER_ID).docs() if as_extra: for terminal, annotated_terminal in zip( passage.layer(layer0.LAYER_ID).all, annotated.layer(layer0.LAYER_ID).all): copy_tok_to_extra(annotated_terminal, terminal, lang=lang) yield passage
def main(args): if args.out_dir: os.makedirs(args.out_dir, exist_ok=True) if not args.tikz: import matplotlib matplotlib.use('Agg') to_stdout = (args.tikz or args.standoff) and not args.out_dir t = args.passages t = get_passages(t) if to_stdout else get_passages_with_progress_bar( t, desc="Visualizing") if args.sentences: t = (sentence for passage in t for sentence in split2sentences(passage)) for passage in t: if args.tikz: print_text(args, visualization.tikz(passage), passage.ID + ".tikz.txt") elif args.standoff: print_text(args, visualization.standoff(passage), passage.ID + ".ann") else: import matplotlib.pyplot as plt width = len(passage.layer(layer0.LAYER_ID).all) * 19 / 27 plt.figure(passage.ID, figsize=(width, width * 10 / 19)) visualization.draw(passage, node_ids=args.node_ids) if args.out_dir: plt.savefig( os.path.join(args.out_dir, passage.ID + "." + args.format)) plt.close() else: plt.show()
def read(fp, text=None, prefix=None): parent = Path(fp.name).parent paths = [parent / file.strip() for file in fp] for passage in get_passages(map(str, paths)): try: graph = passage2graph(passage, text, prefix) except Exception as exception: print(exception) continue yield graph, None
def run(self, guessed: List[str], ref: List[str], **kwargs): del kwargs guessed, ref = [{ p.ID: p for p in get_passages(f, converters=self.converters()) } for f in (guessed, ref)] stats = SummaryStatistics.aggregate([ self.evaluate(g, ref[i]) for i, g in sorted(guessed.items()) if i in ref ]) stats.print()
def main(args): passages = list(get_passages(args.filenames)) if args.join_by_prefix: subsets = defaultdict(list) for passage in passages: subsets[passage.ID[:-3]].append(passage) else: subsets = {passages[0].ID: passages} for passage_id, subset in sorted(subsets.items()): print("Joining passages " + ", ".join(passage.ID for passage in subset), file=sys.stderr) joined = ucca.convert.join_passages(passages, passage_id=passage_id, remarks=args.remarks) outfile = "%s/%s.%s" % (args.outdir, args.prefix + joined.ID, "pickle" if args.binary else "xml") print("Writing joined passage file '%s'..." % outfile, file=sys.stderr) passage2file(joined, outfile, args.binary)
def read_specs(args, converters=None): specs = [(pattern, args.out_dir, args.lang, args.udpipe, args.stanfordnlp, args.conllu, args.join) for pattern in args.filenames] if args.list_file: with open(args.list_file, encoding="utf-8") as f: specs += [l.strip().split() for l in f if not l.startswith("#")] for spec in specs: pattern = spec[0] filenames = sorted(glob(pattern)) if not filenames: raise IOError("Not found: " + pattern) yield AnnotationSpecification( passages=get_passages(filenames, converters=converters), out_dir=spec[1] if len(spec) > 1 else args.out_dir, lang=spec[2] if len(spec) > 2 else args.lang, udpipe=spec[3] if len(spec) > 3 else args.udpipe, stanfordnlp=spec[4] if len(spec) > 4 else args.stanfordnlp, conllu=spec[5] if len(spec) > 5 else args.conllu, join=spec[6] if len(spec) > 6 else args.join)