示例#1
0
    def process_document(self, document: mtap.Document, params: Dict[str,
                                                                     Any]):
        referenced = [
            mtap.GenericLabel(0, 1),
            mtap.GenericLabel(1, 2),
            mtap.GenericLabel(2, 3),
            mtap.GenericLabel(3, 4)
        ]

        # references can be a map of strings to labels
        with document.get_labeler('map_references') as label_map_references:
            label_map_references(0,
                                 4,
                                 ref={
                                     'a': referenced[0],
                                     'b': referenced[1],
                                     'c': referenced[2],
                                     'd': referenced[3]
                                 })

        # references can be a list of labels
        with document.get_labeler('list_references') as label_list_references:
            label_list_references(0, 2, ref=[referenced[0], referenced[1]])
            label_list_references(2, 3, ref=[referenced[2], referenced[3]])

        # references can be direct
        with document.get_labeler('references') as label_references:
            label_references(0, 2, a=referenced[0], b=referenced[1])
            label_references(2, 3, a=referenced[2], b=referenced[3])

        # referenced labels don't need to be added via "addLabels" or "Labeler.close" before label
        # indices that reference them.
        # The Document will delay uploading any label indices to the server until they are.
        document.add_labels('referenced', referenced)
示例#2
0
def test_labeler_distinct_and_type_id_raises(mocker):
    with pytest.raises(ValueError):
        client = mocker.Mock(EventsClient)
        event = Event(event_id='1', client=client)
        document = Document(
            document_name='plaintext',
            text='The quick brown fox jumped over the lazy dog.',
            event=event)
        document.get_labeler('index',
                             distinct=True,
                             label_adapter=DistinctGenericLabelAdapter)
示例#3
0
 def process_document(self, document: Document, params: Dict[str, Any]):
     label_trigger = document.get_labeler('negation_triggers')
     with label_trigger:
         for sentence in document.get_label_index('sentences'):
             triggers = self.negex.detect_negex_triggers(sentence.text)
             for start_index, end_index, tags in triggers:
                 label_trigger(sentence.start_index + start_index,
                               sentence.start_index + end_index,
                               tags=tags)
示例#4
0
 def process_document(self, document: Document, params: Dict[str, Any]):
     terms_index_name = params.get('terms_index', 'umls_terms')
     label_negated = document.get_labeler('negated')
     label_trigger = document.get_labeler('negation_trigger')
     terms = document.get_label_index(terms_index_name)
     with label_negated, label_trigger:
         for sentence in document.get_label_index('sentences'):
             sentence_terms = [(t.start_index - sentence.start_index,
                                t.end_index - sentence.start_index)
                               for t in terms.inside(sentence)]
             negations, triggers = self.negex.check_sentence(
                 sentence.text, sentence_terms)
             for start_index, end_index in negations:
                 label_negated(sentence.start_index + start_index,
                               sentence.start_index + end_index)
             for start_index, end_index in triggers:
                 label_trigger(sentence.start_index + start_index,
                               sentence.start_index + end_index)
示例#5
0
 def process_document(self, document: Document, params: Dict[str, Any]):
     terms_index_name = params.get('terms_index', 'umls_terms')
     label_negated = document.get_labeler('negated')
     terms = document.get_label_index(terms_index_name)
     triggers = document.labels['negation_triggers']
     deps = document.get_label_index('dependencies')
     upos_tags = document.get_label_index('upos_tags')
     with label_negated:
         for sentence in document.get_label_index('sentences'):
             sentence_terms = terms.inside(sentence)
             sentence_triggers = triggers.inside(sentence)
             if len(sentence_triggers) > 0:
                 negations, _ = self.negex.check_sentence(
                     sentence_terms, sentence_triggers, deps, upos_tags)
                 for start_index, end_index in negations:
                     label_negated(start_index, end_index)
示例#6
0
    def process_document(self,
                         document: mtap.Document,
                         params: Dict[str, Any]) -> Optional[Dict[str, Any]]:
        if params['do_work']:
            with self.started_stopwatch('fetch_time'):
                text = document.text

            a_count = text.count('a')
            b_count = text.count('b')

            with document.get_labeler('mtap.examples.letter_counts') as label_letter_count:
                label_letter_count(start_index=0, end_index=len(document.text), letter='a',
                                   count=a_count)
                label_letter_count(start_index=0, end_index=len(document.text), letter='b',
                                   count=b_count)

        return {'answer': 42}
示例#7
0
def test_copy_document():
    e = Event()
    doc = Document(document_name='first',
                   text='The quick brown fox jumped over the lazy dog.')
    e.add_document(doc)
    with doc.get_labeler('some_index') as label:
        label(0, 3, word='The')
        label(4, 9, word='quick')
        label(10, 15, word='brown')
    processor = CopyDocument('first', 'second')
    processor.process(e, {})
    second = e.documents['second']
    assert second is not None
    assert second.labels['some_index'] == [
        GenericLabel(0, 3, word='The'),
        GenericLabel(4, 9, word='quick'),
        GenericLabel(10, 15, word='brown')
    ]
示例#8
0
def test_labeler_distinct(mocker):
    client = mocker.Mock(EventsClient)
    event = Event(event_id='1', client=client)
    document = Document(document_name='plaintext',
                        text='The quick brown fox jumped over the lazy dog.',
                        event=event)
    with document.get_labeler('index', distinct=True) as add_generic_label:
        add_generic_label(0, 10, x=1)
        add_generic_label(11, 15, x=2)
        add_generic_label(16, 20, x=3)
    labels = [
        GenericLabel(0, 10, document=document, x=1),
        GenericLabel(11, 15, document=document, x=2),
        GenericLabel(16, 20, document=document, x=3)
    ]
    label_adapter = DistinctGenericLabelAdapter
    client.add_labels.assert_called_with(event_id='1',
                                         document_name='plaintext',
                                         index_name='index',
                                         labels=labels,
                                         adapter=label_adapter)
    assert document.get_label_index('index') == labels
示例#9
0
def test_labeler_distinct(mocker):
    client = mocker.Mock(EventsClient)
    client.get_local_instance.return_value = client
    client.get_label_index_info.return_value = []
    event = Event(event_id='1', client=client)
    document = Document(document_name='plaintext',
                        text='The quick brown fox jumped over the lazy dog.',
                        event=event)
    with document.get_labeler('index', distinct=True) as add_generic_label:
        add_generic_label(0, 10, x=1)
        add_generic_label(11, 15, x=2)
        add_generic_label(16, 20, x=3)
    labels = [
        GenericLabel(0, 10, document=document, x=1),
        GenericLabel(11, 15, document=document, x=2),
        GenericLabel(16, 20, document=document, x=3)
    ]
    label_adapter = DISTINCT_GENERIC_ADAPTER
    client.add_labels.assert_called_with(event_id='1',
                                         document_name='plaintext',
                                         index_name='index',
                                         labels=labels,
                                         adapter=label_adapter)
    assert document.labels['index'] == labels
示例#10
0
 def process_document(self, document: Document, params: Dict[str, Any]):
     with document.get_labeler('sentences', distinct=True) as add_sentence:
         for start, end in predict_text(self.model, self.input_mapper,
                                        document.text, self.device):
             add_sentence(start, end)
示例#11
0
 def process_document(self, document: Document, params: Dict[str, Any]):
     with document.get_labeler('sentences') as sentence_labeler:
         for start, end in get_sentences(document.text):
             sentence_labeler(start, end)
示例#12
0
 def process_document(self, document: Document, params: Dict[str, Any]):
     text = document.text
     result = self.pool.apply(predict_sentences_async, args=(text, ))
     with document.get_labeler('sentences', distinct=True) as add_sentence:
         for start, end in result:
             add_sentence(start, end)