def test_scan_collection_skip(): filenames = create_collections() output_dir = tempfile.mkdtemp() # remove one file os.remove(filenames[0]) p = NegBioPipeline([('fake', FakePipe())]) p.scan(source=filenames, directory=output_dir, suffix='.xml') assert not os.path.exists(os.path.join(output_dir, filenames[0]))
def test_scan_document_error(): class FakePipeError(Pipe): def __call__(self, doc: bioc.BioCDocument, *args, **kwargs): raise KeyError filenames = create_collections() output_dir = tempfile.mkdtemp() p = NegBioPipeline([('fake_error', FakePipeError())]) p.scan(source=filenames, directory=output_dir, suffix='.xml') for filename in filenames: assert filecmp.cmp(filename, os.path.join(output_dir, os.path.basename(filename)))
def test_scan_collection(): filenames = create_collections() output_dir = tempfile.mkdtemp() os.rmdir(output_dir) p = NegBioPipeline([('fake', FakePipe())]) p.scan(source=filenames, directory=output_dir, suffix='.xml') for filename in filenames: filename = os.path.join(output_dir, os.path.basename(filename)) with open(filename) as fp: c = bioc.load(fp) for doc in c.documents: assert doc.infons['fake']
from negbio.pipeline2.dner_mm import MetaMapExtractor from negbio.pipeline2.pipeline import NegBioPipeline from pymetamap import MetaMap def read_cuis(pathname): cuis = set() with open(pathname) as fp: for line in fp: line = line.strip() if line: cuis.add(line) return cuis if __name__ == '__main__': argv = parse_args(__doc__) mm = MetaMap.get_instance(argv['--metamap']) if argv['--cuis'] is None: cuis = None else: cuis = read_cuis(argv['--cuis']) extractor = MetaMapExtractor(mm, cuis) pipeline = NegBioPipeline(pipeline=[('MetaMapExtractor', extractor)]) pipeline.scan(source=argv['<file>'], directory=argv['--output'], suffix=argv['--suffix'], overwrite=argv['--overwrite'])
from negbio.cli_utils import parse_args, calls_asynchronously from negbio.pipeline2.pipeline import NegBioPipeline from negbio.pipeline2.section_split import SectionSplitter def read_section_titles(pathname): with open(pathname) as fp: titles = [line.strip() for line in fp] p = '|'.join(titles) logging.debug('Section patterns: %s', p) return re.compile(p, re.IGNORECASE | re.MULTILINE) if __name__ == '__main__': argv = parse_args(__doc__) workers = int(argv['--workers']) if workers == 1: if argv['--pattern'] is None: pattern = None else: pattern = read_section_titles(argv['--pattern']) splitter = SectionSplitter(pattern) pipeline = NegBioPipeline(pipeline=[('SectionSplitter', splitter)]) pipeline.scan(source=argv['<file>'], suffix=argv['--suffix'], directory=argv['--output'], overwrite=argv['--overwrite']) else: calls_asynchronously(argv, 'python -m negbio.negbio_section_split')
Parse sentences Usage: negbio_parse [options] --output=<directory> <file> ... Options: --model=<directory> Bllip parser model directory. --output=<directory> Specify the output directory. --suffix=<suffix> Append an additional SUFFIX to file names. [default: .bllip.xml] --verbose Print more information about progress. --workers=<n> Number of threads [default: 1] --files_per_worker=<n> Number of input files per worker [default: 8] --overwrite Overwrite the output file. """ from negbio.cli_utils import parse_args, calls_asynchronously from negbio.pipeline2.parse import NegBioParser from negbio.pipeline2.pipeline import NegBioPipeline if __name__ == '__main__': argv = parse_args(__doc__) workers = int(argv['--workers']) if workers == 1: parser = NegBioParser(model_dir=argv['--model']) pipeline = NegBioPipeline(pipeline=[('NegBioParser', parser)]) pipeline.scan(source=argv['<file>'], directory=argv['--output'], suffix=argv['--suffix'], overwrite=argv['--overwrite']) else: calls_asynchronously(argv, 'python -m negbio.negbio_parse')
""" Usage: negbio_normalize [options] --output=<directory> <file> ... Options: --output=<directory> Specify the output directory. --suffix=<suffix> Append an additional SUFFIX to file names. [default: .normalized.xml] --verbose Print more information about progress. --overwrite Overwrite the output file. """ from negbio.cli_utils import parse_args from negbio.pipeline2.pipeline import NegBioPipeline from negbio.pipeline2.normalize_mimiccxr import MIMICCXRNormalizer if __name__ == '__main__': argv = parse_args(__doc__) normalizer = MIMICCXRNormalizer() pipeline = NegBioPipeline(pipeline=[('MIMICCXRNormalizer', normalizer)]) pipeline.scan(source=argv['<file>'], directory=argv['--output'], suffix=argv['--suffix'], overwrite=argv['--overwrite'])
Clean up sentences Usage: negbio_cleanup [options] --output=<directory> <file> ... Options: --suffix=<suffix> Append an additional SUFFIX to file names. [default: .negbio.xml] --verbose Print more information about progress. --output=<directory> Specify the output directory. --overwrite Overwrite the output file. --workers=<n> Number of threads [default: 1] --files_per_worker=<n> Number of input files per worker [default: 8] """ from negbio.cli_utils import parse_args, calls_asynchronously from negbio.pipeline2.cleanup import CleanUp from negbio.pipeline2.pipeline import NegBioPipeline if __name__ == '__main__': argv = parse_args(__doc__) workers = int(argv['--workers']) if workers == 1: cleanup = CleanUp() pipeline = NegBioPipeline(pipeline=[('CleanUp', cleanup)]) pipeline.scan(source=argv['<file>'], directory=argv['--output'], suffix=argv['--suffix'], overwrite=argv['--overwrite']) else: calls_asynchronously(argv, 'python -m negbio.negbio_clean')