Пример #1
0
from typing import Dict, Any

import stanza
from mtap import Document, processor, run_processor, GenericLabel
from mtap.processing import DocumentProcessor
from mtap.processing.descriptions import label_index, label_property

MAX_ITER = 5000


@processor('biomedicus-dependencies',
           human_name="BioMedICUS Stanza Dependency Parser",
           entry_point=__name__,
           description="Calls out to the Stanford Stanza framework for dependency parsing.",
           inputs=[
               label_index(name='sentences', reference='biomedicus-sentences/sentences')
           ],
           outputs=[
               label_index(name='dependencies',
                           description="The dependent words.",
                           properties=[
                               label_property(
                                   'deprel',
                                   description="The dependency relation",
                                   data_type='str'
                               ),
                               label_property(
                                   'head',
                                   description="The head of this label or null if its the root.",
                                   nullable=True,
                                   data_type='ref:dependencies'
Пример #2
0
@processor(
    name='biomedicus-negex-triggers',
    human_name='Negex Triggers Tagger',
    description='Labels phrases which are negation triggers.',
    entry_point=__name__,
    parameters=[
        parameter(
            name='terms_index',
            data_type='str',
            description=
            'The label index containing terms that should be checked for negation'
        )
    ],
    inputs=[
        label_index(name='sentences',
                    reference='biomedicus-sentences/sentences'),
        label_index(name='umls_terms',
                    reference='biomedicus-concepts/umls_terms',
                    name_from_parameter='terms_index')
    ],
    outputs=[
        label_index("negation_trigger",
                    description="Spans of phrases that trigger negation.",
                    properties=[
                        label_property(
                            "tags",
                            data_type='List[str]',
                            description='The tags that apply to the trigger, '
                            'for example: POST PREN')
                    ])
    ])
Пример #3
0
        for match in _split.finditer(text):
            split_timer.stop()
            start = match.start()
            local_text = text[prev:start]
            for ss, se in predict_segment(model, input_mapper, local_text):
                yield prev + ss, prev + se
            prev = match.end()
            split_timer.start()


@processor('biomedicus-sentences',
           human_name="Sentence Detector",
           description="Labels sentences given document text.",
           entry_point=__name__,
           outputs=[
               label_index('sentences')
           ])
class SentenceProcessor(DocumentProcessor):
    def __init__(self, input_mapper: InputMapping, model: BiLSTM):
        self.input_mapper = input_mapper
        self.model = model

    def process_document(self, document: Document, params: Dict[str, Any]):
        with document.get_labeler('sentences', distinct=True) as add_sentence:
            for start, end in predict_text(self.model, self.input_mapper, document.text):
                add_sentence(start, end)


def bi_lstm_hparams_parser():
    parser = ArgumentParser(add_help=False)
    parser.add_argument('--embeddings', type=Path)
Пример #4
0
@processor(
    name='biomedicus-deepen',
    human_name='DEEPEN Negation Detector',
    description='Detects which UMLS terms are negated.',
    entry_point=__name__,
    parameters=[
        parameter(
            name='terms_index',
            data_type='str',
            description=
            'The label index containing terms that should be checked for negation'
        )
    ],
    inputs=[
        label_index(name='sentences',
                    reference='biomedicus-sentences/sentences'),
        label_index(
            name='dependencies',
            reference='biomedicus-selective-dependencies/dependencies'),
        label_index(name='umls_terms',
                    reference='biomedicus-concepts/umls_terms',
                    name_from_parameter='terms_index')
    ],
    outputs=[
        label_index("negated", description="Spans of negated terms."),
        label_index("negation_trigger",
                    description="Spans of phrases that trigger negation.")
    ])
class DeepenProcessor(mtap.processing.DocumentProcessor):
    def __init__(self):
        self.negex = DeepenTagger()
from mtap import Document, processor, run_processor, GenericLabel
from mtap.processing import DocumentProcessor
from mtap.processing.descriptions import label_index, label_property

MAX_ITER = 5000


@processor(
    'biomedicus-selective-dependencies',
    human_name="BioMedICUS Stanza Selective Dependency Parser",
    entry_point=__name__,
    description=
    "Calls out to the Stanford Stanza framework for dependency parsing"
    "on a appropriate subset of sentences.",
    inputs=[
        label_index(name='sentences',
                    reference='biomedicus-sentences/sentences'),
        label_index(name='umls_terms',
                    reference='biomedicus-concepts/umls_terms',
                    name_from_parameter='terms_index'),
        label_index("negation_triggers", reference='biomedicus-negex-triggers')
    ],
    outputs=[
        label_index(name='dependencies',
                    description="The dependent words.",
                    properties=[
                        label_property('deprel',
                                       description="The dependency relation",
                                       data_type='str'),
                        label_property(
                            'head',
                            description=
Пример #6
0
    with Processor.started_stopwatch('segment_splitting') as split_timer:
        for match in _split.finditer(text):
            split_timer.stop()
            start = match.start()
            local_text = text[prev:start]
            for ss, se in predict_segment(model, input_mapper, local_text):
                yield prev + ss, prev + se
            prev = match.end()
            split_timer.start()


@processor('biomedicus-sentences',
           human_name="Sentence Detector",
           description="Labels sentences given document text.",
           entry_point=__name__,
           outputs=[label_index('sentences')])
class SentenceProcessor(DocumentProcessor):
    def __init__(self, input_mapper: InputMapping, model: BiLSTM):
        self.input_mapper = input_mapper
        self.model = model

    def process_document(self, document: Document, params: Dict[str, Any]):
        with document.get_labeler('sentences', distinct=True) as add_sentence:
            for start, end in predict_text(self.model, self.input_mapper,
                                           document.text):
                add_sentence(start, end)


def bi_lstm_hparams_parser():
    parser = ArgumentParser(add_help=False)
    parser.add_argument('--embeddings', type=Path)
Пример #7
0
_pattern = re.compile(r'^[\s]*(.*?)[\s]*$', re.MULTILINE)


def get_sentences(text: str) -> List[Location]:
    for match in _pattern.finditer(text):
        yield match.start(1), match.end(1)


@processor(
    name='biomedicus-sentences-one-per-line',
    human_name='One per Line Sentences',
    description=
    'Labels sentences where each line in the input document is a sentence.',
    entry_point=__name__,
    outputs=[label_index(name='sentences')])
class OnePerLineSentencesProcessor(mtap.processing.DocumentProcessor):
    def process_document(self, document: Document, params: Dict[str, Any]):
        with document.get_labeler('sentences') as sentence_labeler:
            for start, end in get_sentences(document.text):
                sentence_labeler(start, end)


def main(args=None):
    proc = OnePerLineSentencesProcessor()
    mtap.run_processor(proc, args=args)


if __name__ == '__main__':
    main()
Пример #8
0
@mtap.processor(
    'mtap-example-processor-python',
    human_name="Python Example Processor",
    description=
    "counts the number of times the letters a and b occur in a document",
    parameters=[
        parameter('do_work',
                  required=True,
                  data_type='bool',
                  description="Whether the processor should do anything.")
    ],
    outputs=[
        label_index('mtap.examples.letter_counts',
                    properties=[
                        label_property('letter', data_type='str'),
                        label_property('count', data_type='int')
                    ])
    ])
class ExampleProcessor(DocumentProcessor):
    """Does some labeling of the counts of the letter 'a' and 'b' in a document.
    """
    def process_document(self, document: Document,
                         params: Dict[str, Any]) -> Optional[Dict[str, Any]]:
        if params['do_work']:
            with self.started_stopwatch('fetch_time'):
                text = document.text

            a_count = text.count('a')
            b_count = text.count('b')
Пример #9
0
from mtap.processing import DocumentProcessor
from mtap.processing.descriptions import parameter, label_index, label_property
from mtap.processing.service import _ProcessorServicer


@processor('mtap-test-processor',
           description='Processor desc.',
           parameters=[
               parameter('a_param',
                         required=True,
                         data_type='bool',
                         description="desc.")
           ],
           inputs=[
               label_index(
                   'input_index',
                   properties=[label_property('bar', data_type='bool')])
           ],
           outputs=[
               label_index('output_index',
                           description='desc.',
                           properties=[
                               label_property('foo',
                                              data_type='str',
                                              nullable=True,
                                              description='A label property.')
                           ])
           ])
class ExampleTestProcessor(DocumentProcessor):
    def process_document(self, document: Document, params: Dict[str, Any]):
        pass