Пример #1
0
import torch
from mtap import Document, processor, run_processor
from mtap.processing import DocumentProcessor
from mtap.processing.descriptions import labels, label_property

from biomedicus.dependencies.stanza_parser import stanza_deps_and_upos_tags


@processor('biomedicus-selective-dependencies',
           human_name="BioMedICUS Stanza Selective Dependency Parser",
           entry_point=__name__,
           description=
           "Calls out to the Stanford Stanza framework for dependency parsing"
           "on a appropriate subset of sentences.",
           inputs=[
               labels(name='sentences',
                      reference='biomedicus-sentences/sentences'),
               labels(name='umls_terms',
                      reference='biomedicus-concepts/umls_terms',
                      name_from_parameter='terms_index'),
               labels("negation_triggers",
                      reference='biomedicus-negex-triggers')
           ],
           outputs=[
               labels(
                   name='dependencies',
                   description="The dependent words.",
                   properties=[
                       label_property('deprel',
                                      description="The dependency relation",
                                      data_type='str'),
                       label_property(
Пример #2
0
@processor(
    name='biomedicus-deepen',
    human_name='DEEPEN Negation Detector',
    description='Detects which UMLS terms are negated.',
    entry_point=__name__,
    parameters=[
        parameter(
            name='terms_index',
            data_type='str',
            description=
            'The label index containing terms that should be checked for negation'
        )
    ],
    inputs=[
        labels(name='sentences', reference='biomedicus-sentences/sentences'),
        labels(name='dependencies',
               reference='biomedicus-selective-dependencies/dependencies'),
        labels(name='umls_terms',
               reference='biomedicus-concepts/umls_terms',
               name_from_parameter='terms_index')
    ],
    outputs=[
        labels("negated", description="Spans of negated terms."),
        labels("negation_trigger",
               description="Spans of phrases that trigger negation.")
    ])
class DeepenProcessor(mtap.processing.DocumentProcessor):
    def __init__(self):
        self.negex = DeepenTagger()
Пример #3
0
        for match in _split.finditer(text):
            split_timer.stop()
            start = match.start()
            local_text = text[prev:start]
            for ss, se in predict_segment(model, input_mapper, local_text,
                                          device):
                yield prev + ss, prev + se
            prev = match.end()
            split_timer.start()


@processor('biomedicus-sentences',
           human_name="Sentence Detector",
           description="Labels sentences given document text.",
           entry_point=__name__,
           outputs=[labels('sentences')])
class SentenceProcessor(DocumentProcessor):
    def __init__(self, input_mapper: InputMapping, model: BiLSTM, device):
        self.input_mapper = input_mapper
        self.model = model
        self.device = device

    def process_document(self, document: Document, params: Dict[str, Any]):
        with document.get_labeler('sentences', distinct=True) as add_sentence:
            for start, end in predict_text(self.model, self.input_mapper,
                                           document.text, self.device):
                add_sentence(start, end)


def bi_lstm_hparams_parser():
    parser = ArgumentParser(add_help=False)
Пример #4
0
@processor(
    name='biomedicus-negex-triggers',
    human_name='Negex Triggers Tagger',
    description='Labels phrases which are negation triggers.',
    entry_point=__name__,
    parameters=[
        parameter(
            name='terms_index',
            data_type='str',
            description=
            'The label index containing terms that should be checked for negation'
        )
    ],
    inputs=[
        labels(name='sentences', reference='biomedicus-sentences/sentences'),
        labels(name='umls_terms',
               reference='biomedicus-concepts/umls_terms',
               name_from_parameter='terms_index')
    ],
    outputs=[
        labels("negation_trigger",
               description="Spans of phrases that trigger negation.",
               properties=[
                   label_property(
                       "tags",
                       data_type='List[str]',
                       description='The tags that apply to the trigger, '
                       'for example: POST PREN')
               ])
    ])
Пример #5
0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import Dict, Any

import mtap
from mtap.processing.descriptions import labels


@mtap.processor('mtap-python-references-example',
                human_name='Python References Examples',
                description='Shows use of referential fields on labels.',
                outputs=[
                    labels('referenced'),
                    labels('map_references'),
                    labels('list_references'),
                    labels('references')
                ])
class ReferencesExampleProcessor(mtap.DocumentProcessor):
    def process_document(self, document: mtap.Document, params: Dict[str,
                                                                     Any]):
        referenced = [
            mtap.GenericLabel(0, 1),
            mtap.GenericLabel(1, 2),
            mtap.GenericLabel(2, 3),
            mtap.GenericLabel(3, 4)
        ]

        # references can be a map of strings to labels
Пример #6
0
from mtap.api.v1 import processing_pb2
from mtap.processing import DocumentProcessor, _runners
from mtap.processing.descriptions import parameter, labels, label_property
from mtap.processing._service import _ProcessorServicer


@processor('mtap-test-processor',
           description='Processor desc.',
           parameters=[
               parameter('a_param',
                         required=True,
                         data_type='bool',
                         description="desc.")
           ],
           inputs=[
               labels('input_index',
                      properties=[label_property('bar', data_type='bool')])
           ],
           outputs=[
               labels('output_index',
                      description='desc.',
                      properties=[
                          label_property('foo',
                                         data_type='str',
                                         nullable=True,
                                         description='A label property.')
                      ])
           ])
class ExampleTestProcessor(DocumentProcessor):
    def process_document(self, document: Document, params: Dict[str, Any]):
        pass
Пример #7
0
                       stanza_sentence.tokens[0].start_char)
        token_end = (sentence.start_index + token.end_char -
                     stanza_sentence.tokens[0].start_char)
        sentence_upos_tags.append(
            GenericLabel(token_begin, token_end, tag=word.upos))
    return sentence_deps, sentence_upos_tags


@processor(
    'biomedicus-dependencies',
    human_name="BioMedICUS Stanza Dependency Parser",
    entry_point=__name__,
    description=
    "Calls out to the Stanford Stanza framework for dependency parsing.",
    inputs=[
        labels(name='sentences', reference='biomedicus-sentences/sentences')
    ],
    outputs=[
        labels(name='dependencies',
               description="The dependent words.",
               properties=[
                   label_property('deprel',
                                  description="The dependency relation",
                                  data_type='str'),
                   label_property(
                       'head',
                       description=
                       "The head of this label or null if its the root.",
                       nullable=True,
                       data_type='ref:dependencies'),
                   label_property(
Пример #8
0
_pattern = re.compile(r'^[\s]*(.*?)[\s]*$', re.MULTILINE)


def get_sentences(text: str) -> List[Location]:
    for match in _pattern.finditer(text):
        yield match.start(1), match.end(1)


@processor(
    name='biomedicus-sentences-one-per-line',
    human_name='One per Line Sentences',
    description=
    'Labels sentences where each line in the input document is a sentence.',
    entry_point=__name__,
    outputs=[labels(name='sentences')])
class OnePerLineSentencesProcessor(mtap.processing.DocumentProcessor):
    def process_document(self, document: Document, params: Dict[str, Any]):
        with document.get_labeler('sentences') as sentence_labeler:
            for start, end in get_sentences(document.text):
                sentence_labeler(start, end)


def main(args=None):
    proc = OnePerLineSentencesProcessor()
    mtap.run_processor(proc, args=args)


if __name__ == '__main__':
    main()
Пример #9
0
from typing import Dict, Any, Optional

import mtap
from mtap.processing.descriptions import parameter, labels, label_property


@mtap.processor('mtap-example-processor-python',
                human_name="Python Example Processor",
                description="counts the number of times the letters a and b occur in a document",
                parameters=[
                    parameter('do_work', required=True, data_type='bool',
                              description="Whether the processor should do anything.")
                ],
                outputs=[
                    labels('mtap.examples.letter_counts',
                           properties=[label_property('letter', data_type='str'),
                                       label_property('count', data_type='int')])
                ])
class ExampleProcessor(mtap.DocumentProcessor):
    """Does some labeling of the counts of the letter 'a' and 'b' in a document.
    """

    def process_document(self,
                         document: mtap.Document,
                         params: Dict[str, Any]) -> Optional[Dict[str, Any]]:
        if params['do_work']:
            with self.started_stopwatch('fetch_time'):
                text = document.text

            a_count = text.count('a')
            b_count = text.count('b')
Пример #10
0
from typing import Dict, Any

import mtap
import requests
from mtap import DocumentProcessor, processor_parser, run_processor
from mtap.processing.descriptions import labels, processor

logger = logging.getLogger("biomedicus.sentences.bi_lstm_torchserve")


@processor('biomedicus-sentences',
           human_name="Sentence Detector",
           description="Labels sentences given document text.",
           entry_point=__name__,
           outputs=[
               labels('sentences')
           ])
class SentencesProcessor(DocumentProcessor):
    def __init__(self, torchserve_address):
        self.torchserve_address = torchserve_address
        logger.info('Using "%s" as endpoint', torchserve_address)

    def process_document(self, document: 'mtap.Document', params: Dict[str, Any]):
        payload = {'text': str(document.text)}
        r = requests.post(self.torchserve_address, data=payload)
        result = r.json()
        with document.get_labeler('sentences', distinct=True) as create_sentence:
            for begin, end in result:
                create_sentence(begin, end)