from typing import Dict, Any import stanza from mtap import Document, processor, run_processor, GenericLabel from mtap.processing import DocumentProcessor from mtap.processing.descriptions import label_index, label_property MAX_ITER = 5000 @processor('biomedicus-dependencies', human_name="BioMedICUS Stanza Dependency Parser", entry_point=__name__, description="Calls out to the Stanford Stanza framework for dependency parsing.", inputs=[ label_index(name='sentences', reference='biomedicus-sentences/sentences') ], outputs=[ label_index(name='dependencies', description="The dependent words.", properties=[ label_property( 'deprel', description="The dependency relation", data_type='str' ), label_property( 'head', description="The head of this label or null if its the root.", nullable=True, data_type='ref:dependencies'
@processor( name='biomedicus-negex-triggers', human_name='Negex Triggers Tagger', description='Labels phrases which are negation triggers.', entry_point=__name__, parameters=[ parameter( name='terms_index', data_type='str', description= 'The label index containing terms that should be checked for negation' ) ], inputs=[ label_index(name='sentences', reference='biomedicus-sentences/sentences'), label_index(name='umls_terms', reference='biomedicus-concepts/umls_terms', name_from_parameter='terms_index') ], outputs=[ label_index("negation_trigger", description="Spans of phrases that trigger negation.", properties=[ label_property( "tags", data_type='List[str]', description='The tags that apply to the trigger, ' 'for example: POST PREN') ]) ])
for match in _split.finditer(text): split_timer.stop() start = match.start() local_text = text[prev:start] for ss, se in predict_segment(model, input_mapper, local_text): yield prev + ss, prev + se prev = match.end() split_timer.start() @processor('biomedicus-sentences', human_name="Sentence Detector", description="Labels sentences given document text.", entry_point=__name__, outputs=[ label_index('sentences') ]) class SentenceProcessor(DocumentProcessor): def __init__(self, input_mapper: InputMapping, model: BiLSTM): self.input_mapper = input_mapper self.model = model def process_document(self, document: Document, params: Dict[str, Any]): with document.get_labeler('sentences', distinct=True) as add_sentence: for start, end in predict_text(self.model, self.input_mapper, document.text): add_sentence(start, end) def bi_lstm_hparams_parser(): parser = ArgumentParser(add_help=False) parser.add_argument('--embeddings', type=Path)
@processor( name='biomedicus-deepen', human_name='DEEPEN Negation Detector', description='Detects which UMLS terms are negated.', entry_point=__name__, parameters=[ parameter( name='terms_index', data_type='str', description= 'The label index containing terms that should be checked for negation' ) ], inputs=[ label_index(name='sentences', reference='biomedicus-sentences/sentences'), label_index( name='dependencies', reference='biomedicus-selective-dependencies/dependencies'), label_index(name='umls_terms', reference='biomedicus-concepts/umls_terms', name_from_parameter='terms_index') ], outputs=[ label_index("negated", description="Spans of negated terms."), label_index("negation_trigger", description="Spans of phrases that trigger negation.") ]) class DeepenProcessor(mtap.processing.DocumentProcessor): def __init__(self): self.negex = DeepenTagger()
from mtap import Document, processor, run_processor, GenericLabel from mtap.processing import DocumentProcessor from mtap.processing.descriptions import label_index, label_property MAX_ITER = 5000 @processor( 'biomedicus-selective-dependencies', human_name="BioMedICUS Stanza Selective Dependency Parser", entry_point=__name__, description= "Calls out to the Stanford Stanza framework for dependency parsing" "on a appropriate subset of sentences.", inputs=[ label_index(name='sentences', reference='biomedicus-sentences/sentences'), label_index(name='umls_terms', reference='biomedicus-concepts/umls_terms', name_from_parameter='terms_index'), label_index("negation_triggers", reference='biomedicus-negex-triggers') ], outputs=[ label_index(name='dependencies', description="The dependent words.", properties=[ label_property('deprel', description="The dependency relation", data_type='str'), label_property( 'head', description=
with Processor.started_stopwatch('segment_splitting') as split_timer: for match in _split.finditer(text): split_timer.stop() start = match.start() local_text = text[prev:start] for ss, se in predict_segment(model, input_mapper, local_text): yield prev + ss, prev + se prev = match.end() split_timer.start() @processor('biomedicus-sentences', human_name="Sentence Detector", description="Labels sentences given document text.", entry_point=__name__, outputs=[label_index('sentences')]) class SentenceProcessor(DocumentProcessor): def __init__(self, input_mapper: InputMapping, model: BiLSTM): self.input_mapper = input_mapper self.model = model def process_document(self, document: Document, params: Dict[str, Any]): with document.get_labeler('sentences', distinct=True) as add_sentence: for start, end in predict_text(self.model, self.input_mapper, document.text): add_sentence(start, end) def bi_lstm_hparams_parser(): parser = ArgumentParser(add_help=False) parser.add_argument('--embeddings', type=Path)
_pattern = re.compile(r'^[\s]*(.*?)[\s]*$', re.MULTILINE) def get_sentences(text: str) -> List[Location]: for match in _pattern.finditer(text): yield match.start(1), match.end(1) @processor( name='biomedicus-sentences-one-per-line', human_name='One per Line Sentences', description= 'Labels sentences where each line in the input document is a sentence.', entry_point=__name__, outputs=[label_index(name='sentences')]) class OnePerLineSentencesProcessor(mtap.processing.DocumentProcessor): def process_document(self, document: Document, params: Dict[str, Any]): with document.get_labeler('sentences') as sentence_labeler: for start, end in get_sentences(document.text): sentence_labeler(start, end) def main(args=None): proc = OnePerLineSentencesProcessor() mtap.run_processor(proc, args=args) if __name__ == '__main__': main()
@mtap.processor( 'mtap-example-processor-python', human_name="Python Example Processor", description= "counts the number of times the letters a and b occur in a document", parameters=[ parameter('do_work', required=True, data_type='bool', description="Whether the processor should do anything.") ], outputs=[ label_index('mtap.examples.letter_counts', properties=[ label_property('letter', data_type='str'), label_property('count', data_type='int') ]) ]) class ExampleProcessor(DocumentProcessor): """Does some labeling of the counts of the letter 'a' and 'b' in a document. """ def process_document(self, document: Document, params: Dict[str, Any]) -> Optional[Dict[str, Any]]: if params['do_work']: with self.started_stopwatch('fetch_time'): text = document.text a_count = text.count('a') b_count = text.count('b')
from mtap.processing import DocumentProcessor from mtap.processing.descriptions import parameter, label_index, label_property from mtap.processing.service import _ProcessorServicer @processor('mtap-test-processor', description='Processor desc.', parameters=[ parameter('a_param', required=True, data_type='bool', description="desc.") ], inputs=[ label_index( 'input_index', properties=[label_property('bar', data_type='bool')]) ], outputs=[ label_index('output_index', description='desc.', properties=[ label_property('foo', data_type='str', nullable=True, description='A label property.') ]) ]) class ExampleTestProcessor(DocumentProcessor): def process_document(self, document: Document, params: Dict[str, Any]): pass