def main(args): os.makedirs(args.out_dir, exist_ok=True) for passage in iter_passages(args.filenames, desc="Converting", input_format=args.input_format, prefix=args.prefix, mark_aux=args.mark_aux, annotate=args.annotate, wikification=args.wikification, label_map_file=args.label_map, output_format=args.output_format): map_labels(passage, args.label_map) if args.normalize and args.output_format != "txt": normalize(passage, extra=args.extra_normalization) if args.lang: passage.attrib["lang"] = args.lang write_passage(passage, **vars(args)) if args.validate: try: errors = list( validate(passage, ucca_validation=args.ucca_validation, output_format=args.output_format)) except ValueError: continue if errors: print_errors(errors, passage.ID) sys.exit(1)
def main(args): os.makedirs(args.out_dir, exist_ok=True) kwargs = vars(args) for passage in iter_passages(args.filenames, desc="Converting", **kwargs): map_labels(passage, args.label_map) if args.normalize and args.output_format != "txt": normalize(passage, extra=args.extra_normalization) if args.lang: passage.attrib["lang"] = args.lang write_passage(passage, **kwargs) if args.validate: try: errors = list(validate(passage, **kwargs)) except ValueError: continue if errors: print_errors(errors, passage.ID) sys.exit(1)
def main(args): errors = ((p.ID, list( validate(p, normalization=args.normalize, extra_normalization=args.extra_normalization, ucca_validation=args.ucca_validation, output_format=args.format))) for p in iter_passages(args.filenames, desc="Validating")) errors = dict( islice(((k, v) for k, v in errors if v), 1 if args.strict else None)) if errors: id_len = max(map(len, errors)) for passage_id, es in sorted(errors.items()): print_errors(es, passage_id, id_len) sys.exit(1) else: print("No errors found.")
def main(args): os.makedirs(args.out_dir, exist_ok=True) for passage in iter_passages(args.filenames, desc="Converting", input_format=args.input_format, prefix=args.prefix, split=args.split, mark_aux=args.mark_aux, annotate=args.annotate): map_labels(passage, args.label_map) if args.normalize: normalize(passage, extra=args.extra_normalization) if args.lang: passage.attrib["lang"] = args.lang write_passage(passage, args) if args.validate: errors = list( validate(passage, ucca_validation=args.ucca_validation, output_format=args.output_format)) if errors: print_errors(errors, passage.ID) sys.exit(1)