def test_add_document_no_client(): event = Event(event_id='1') document = Document('plaintext', text="“You're no help,” he told the lime. " "This was unfair. It was only a lime; " "there was nothing special about it at all. " "It was doing the best it could.") event.add_document(document) assert event.documents['plaintext'] == document
def test_add_document(mocker): client = mocker.Mock(EventsClient) event = Event(event_id='1', client=client) document = Document('plaintext', text="“You're no help,” he told the lime. " "This was unfair. It was only a lime; " "there was nothing special about it at all. " "It was doing the best it could.") event.add_document(document) assert event.documents['plaintext'] == document client.add_document.assert_called_once_with( '1', 'plaintext', "“You're no help,” he told the lime. " "This was unfair. It was only a lime; " "there was nothing special about it at all. " "It was doing the best it could.")
def test_copy_document(): e = Event() doc = Document(document_name='first', text='The quick brown fox jumped over the lazy dog.') e.add_document(doc) with doc.get_labeler('some_index') as label: label(0, 3, word='The') label(4, 9, word='quick') label(10, 15, word='brown') processor = CopyDocument('first', 'second') processor.process(e, {}) second = e.documents['second'] assert second is not None assert second.get_label_index('some_index') == [ GenericLabel(0, 3, word='The'), GenericLabel(4, 9, word='quick'), GenericLabel(10, 15, word='brown') ]
def dict_to_document(document_name: str, d: Dict, *, event: Optional[Event] = None) -> Document: """Turns a serialized dictionary into a Document. Args: document_name (str): The name identifier of the document on the event. d (dict): The dictionary representation of the document. event (~typing.Optional[Event]): An event that the document should be added to. Returns: Document: The deserialized Document object. """ document = Document(document_name=document_name, text=d['text']) if event is not None: event.add_document(document) for k, v in d['label_indices'].items(): index = dict_to_label_index(d=v) document.add_labels(k, index, distinct=index.distinct) return document
def test_json_serializer(): event = Event(event_id='1') event.metadata['foo'] = "bar" document = Document('plaintext', text='Some text.') event.add_document(document) document.add_labels('one', [ mtap.GenericLabel(start_index=0, end_index=5, x=10), mtap.GenericLabel(start_index=6, end_index=10, x=15) ]) document.add_labels('two', [ mtap.GenericLabel(start_index=0, end_index=25, a='b'), mtap.GenericLabel(start_index=26, end_index=42, a='c') ]) document.add_labels('three', [ mtap.GenericLabel(start_index=0, end_index=10, foo=True), mtap.GenericLabel(start_index=11, end_index=15, foo=False) ], distinct=True) with TemporaryFile('w+') as tf: JsonSerializer.event_to_file(event, tf) tf.flush() tf.seek(0) o = json.load(tf) assert o['event_id'] == '1' assert o['metadata']['foo'] == 'bar' d = o['documents']['plaintext'] assert d['text'] == 'Some text.' assert len(d['label_indices']) == 3 assert d['label_indices']['one'] == { 'json_labels': [{ 'start_index': 0, 'end_index': 5, 'x': 10 }, { 'start_index': 6, 'end_index': 10, 'x': 15 }], 'distinct': False } assert d['label_indices']['two'] == { 'json_labels': [{ 'start_index': 0, 'end_index': 25, 'a': 'b' }, { 'start_index': 26, 'end_index': 42, 'a': 'c' }], 'distinct': False } assert d['label_indices']['three'] == { 'json_labels': [{ 'start_index': 0, 'end_index': 10, 'foo': True }, { 'start_index': 11, 'end_index': 15, 'foo': False }], 'distinct': True }