def main(argv): argv = docopt.docopt(__doc__, argv=argv) print(argv) scan.scan_document(source=argv['SOURCE'], directory=argv['--out'], suffix='.negbio.xml', fn=clean_sentences, non_sequences=[])
def main(argv): argv = docopt.docopt(__doc__, argv=argv) print(argv) ptb2dep = Ptb2DepConverter(universal=True) lemmatizer = Lemmatizer() scan.scan_document(source=argv['SOURCE'], directory=argv['--out'], suffix='.ud.xml', fn=convert, non_sequences=[ptb2dep, lemmatizer])
def main(argv): argv = docopt.docopt(__doc__, argv=argv) print(argv) parser = Bllip(model_dir=os.path.expanduser(argv['--model'])) scan.scan_document(source=argv['SOURCE'], directory=argv['--out'], suffix='.bllip.xml', fn=parse, non_sequences=[parser])
def main(argv): argv = docopt.docopt(__doc__, argv=argv) print(argv) neg_detector = Detector(argv['--neg-patterns'], argv['--uncertainty-patterns']) scan.scan_document(source=argv['SOURCE'], directory=argv['--out'], suffix='.neg.xml', fn=detect, non_sequences=[neg_detector])
def main(argv): argv = docopt.docopt(__doc__, argv=argv) print(argv) splitter = NltkSSplitter(newline=argv['--newline_is_sentence_break']) scan.scan_document(source=argv['SOURCE'], directory=argv['--out'], suffix='.ss.xml', fn=ssplit, non_sequences=[splitter])
def main(argv): argv = docopt.docopt(__doc__, argv=argv) print(argv) splitter = ssplit.NltkSSplitter(newline=True) parser = parse.Bllip(model_dir=argv['--model']) ptb2dep = ptb2ud.Ptb2DepConverter(universal=True) lemmatizer = ptb2ud.Lemmatizer() neg_detector = negdetect.Detector(argv['--neg-patterns'], argv['--uncertainty-patterns']) scan.scan_document( source=argv['SOURCE'], directory=argv['--out'], suffix='.neg.xml', fn=pipeline, non_sequences=[splitter, parser, ptb2dep, lemmatizer, neg_detector])
""" Clean up sentences Usage: negbio_pipeline cleanup [options] --output=<directory> <file> ... Options: --suffix=<suffix> Append an additional SUFFIX to file names. [default: .negbio.xml] --verbose Print more information about progress. --output=<directory> Specify the output directory. """ from negbio.cli_utils import parse_args from negbio.pipeline.cleanup import clean_sentences from negbio.pipeline.scan import scan_document if __name__ == '__main__': argv = parse_args(__doc__) scan_document(source=argv['<file>'], directory=argv['--output'], suffix=argv['--suffix'], fn=clean_sentences)
""" Usage: negbio_pipeline normalize [options] --output=<directory> <file> ... Options: --output=<directory> Specify the output directory. --suffix=<suffix> Append an additional SUFFIX to file names. [default: .normalized.xml] --verbose Print more information about progress. """ from negbio.cli_utils import parse_args from negbio.ext.normalize_mimiccxr import normalize from negbio.pipeline.scan import scan_document if __name__ == '__main__': argv = parse_args(__doc__) scan_document(source=argv['<file>'], verbose=argv['--verbose'], suffix=argv['--suffix'], directory=argv['--output'], fn=normalize)
""" Split text into sentences Usage: negbio_pipeline ssplit [options] --output=<directory> <file> ... Options: --newline_is_sentence_break Whether to treat newlines as sentence breaks. True means that a newline is always a sentence break. False means to ignore newlines for the purpose of sentence splitting. This is appropriate for continuous text, when just the non-whitespace characters should be used to determine sentence breaks. [default=False] --suffix=<suffix> Append an additional SUFFIX to file names. [default: .ssplit.xml] --output=<directory> Specify the output directory. --verbose Print more information about progress. """ from negbio.pipeline.scan import scan_document from negbio.pipeline.ssplit import NegBioSSplitter from negbio.cli_utils import parse_args if __name__ == '__main__': argv = parse_args(__doc__) splitter = NegBioSSplitter(newline=argv['--newline_is_sentence_break']) scan_document(source=argv['<file>'], directory=argv['--output'], suffix=argv['--suffix'], fn=splitter.split_doc, non_sequences=[])
negbio_pipeline neg [options] --output=<directory> <file> ... Options: --neg-patterns=<file> Specify negation rules [default: negbio/patterns/neg_patterns.txt] --uncertainty-patterns=<file> Specify uncertainty rules [default: negbio/patterns/uncertainty_patterns.txt] --suffix=<suffix> Append an additional SUFFIX to file names. [default: .neg.xml] --verbose Print more information about progress. --output=<directory> Specify the output directory. """ import os from negbio.cli_utils import parse_args, get_absolute_path from negbio.neg.neg_detector import Detector from negbio.pipeline.negdetect import detect from negbio.pipeline.scan import scan_document if __name__ == '__main__': argv = parse_args(__doc__) argv = get_absolute_path(argv, '--neg-patterns', 'negbio/patterns/neg_patterns.txt') argv = get_absolute_path(argv, '--uncertainty-patterns', 'negbio/patterns/uncertainty_patterns.txt') neg_detector = Detector(os.path.realpath(argv['--neg-patterns']), os.path.realpath(argv['--uncertainty-patterns'])) scan_document(source=argv['<file>'], directory=argv['--output'], suffix=argv['--suffix'], fn=detect, non_sequences=[neg_detector])
negbio_pipeline section_split [options] --output=<directory> <file> ... Options: --suffix=<suffix> Append an additional SUFFIX to file names. [default: .secsplit.xml] --output=<directory> Specify the output directory. --verbose Print more information about progress. --pattern=<file> Specify section title list for matching. """ import re from negbio.cli_utils import parse_args from negbio.pipeline.scan import scan_document from negbio.pipeline.section_split import split_document def read_section_titles(pathname): with open(pathname) as fp: return re.compile('|'.join(fp.readlines()), re.MULTILINE) if __name__ == '__main__': argv = parse_args(__doc__) if argv['--pattern'] is None: patterns = None else: patterns = read_section_titles(argv['--pattern']) scan_document(source=argv['<file>'], verbose=argv['--verbose'], suffix=argv['--suffix'], directory=argv['--output'], fn=split_document, non_sequences=[patterns])