def test_docs_serialization_format(self): import json from eWRT.util.module_path import get_resource DOCS = [{'id': 7, 'body': 'Ehre sei Gott.', 'title': '', 'format': 'text/html', 'header': {'test': 'testvalue'}}, {'id': 8, 'body': '', 'title': 'Guten Tag!', 'format': 'text/html', 'header': {}}] REFERENCE_MULTI = json.load( open(get_resource(__file__, 'data/jeremia_reference_output_documents.json'))) REFERENCE_SINGLE = json.load(open(get_resource( __file__, 'data/jeremia_reference_output_single_document.json'))) # document list j = Jeremia() result = j.submit_documents(DOCS) result.sort() REFERENCE_MULTI.sort() assert REFERENCE_MULTI == result # single document result = j.submit_document(DOCS[0]) assert REFERENCE_SINGLE == result
def test_single_document_with_annotations(self): ''' Tests the handling of single document annotations. ''' DOC = {'id': 12, 'body': 'UBS has finally succeeded. They obtained a 10% share of CS.', 'title': 'UBS versus Credit Suisse.', 'format': 'text/html', 'title_annotation': [{'start': 0, 'end': 3, 'surfaceForm': 'UBS', 'key': 'http://dbpedia.org/UBS'}, {'start': 11, 'end': 24, 'surfaceForm': 'Credit Suisse', 'key': 'http://dbpedia.org/Credit Suisse'}], 'body_annotation': [{'start': 0, 'end': 3, 'surfaceForm': 'UBS', 'key': 'http://dbpedia.org/UBS'}, {'start': 56, 'end': 58, 'surfaceForm': 'CS', 'key': 'http://dbpedia.org/Credit Suisse'}], 'header': {}, } j = Jeremia() # this test requires Jeremia version 0.0.4+ if j.version() < "0.0.4": return print('submitting document with annotations...') result = j.submit_document(DOC) # check: all annotations have been preserved print(result) assert len(result['annotation']) == 4 # check: annotations for annotation in result['annotation']: # title if annotation['md5sum'] == '8e3f3deac5e6c01dab521c07e3a60d7b': assert annotation['start'] == 0 or annotation['start'] == 11 assert annotation['end'] == 3 or annotation['end'] == 24 # first body sentence elif annotation['md5sum'] == 'ffafdc744dcda3d58ab6eafc86ad99b1': assert annotation['start'] == 0 assert annotation['end'] == 3 # second body sentence with adjusted indices elif annotation['md5sum'] == '25faaf0960a68ae741125ca436b330ee': assert annotation['start'] == 29 assert annotation['end'] == 31
def test_docs_serialization_format(self): import json from eWRT.util.module_path import get_resource DOCS = [{ 'id': 7, 'body': 'Ehre sei Gott.', 'title': '', 'format': 'text/html', 'header': { 'test': 'testvalue' } }, { 'id': 8, 'body': '', 'title': 'Guten Tag!', 'format': 'text/html', 'header': {} }] REFERENCE_MULTI = json.load( open( get_resource(__file__, 'data/jeremia_reference_output_documents.json'))) REFERENCE_SINGLE = json.load( open( get_resource( __file__, 'data/jeremia_reference_output_single_document.json'))) # document list j = Jeremia() result = j.submit_documents(DOCS) result.sort() REFERENCE_MULTI.sort() assert REFERENCE_MULTI == result # single document result = j.submit_document(DOCS[0]) assert REFERENCE_SINGLE == result
def test_single_document_with_annotations(self): ''' Tests the handling of single document annotations. ''' DOC = { 'id': 12, 'body': 'UBS has finally succeeded. They obtained a 10% share of CS.', 'title': 'UBS versus Credit Suisse.', 'format': 'text/html', 'title_annotation': [{ 'start': 0, 'end': 3, 'surfaceForm': 'UBS', 'key': 'http://dbpedia.org/UBS' }, { 'start': 11, 'end': 24, 'surfaceForm': 'Credit Suisse', 'key': 'http://dbpedia.org/Credit Suisse' }], 'body_annotation': [{ 'start': 0, 'end': 3, 'surfaceForm': 'UBS', 'key': 'http://dbpedia.org/UBS' }, { 'start': 56, 'end': 58, 'surfaceForm': 'CS', 'key': 'http://dbpedia.org/Credit Suisse' }], 'header': {}, } j = Jeremia() # this test requires Jeremia version 0.0.4+ if j.version() < b"0.0.4": return print('submitting document with annotations...') result = j.submit_document(DOC) # check: all annotations have been preserved print(result) assert len(result['annotation']) == 4 # check: annotations for annotation in result['annotation']: # title if annotation['md5sum'] == '8e3f3deac5e6c01dab521c07e3a60d7b': assert annotation['start'] == 0 or annotation['start'] == 11 assert annotation['end'] == 3 or annotation['end'] == 24 # first body sentence elif annotation['md5sum'] == 'ffafdc744dcda3d58ab6eafc86ad99b1': assert annotation['start'] == 0 assert annotation['end'] == 3 # second body sentence with adjusted indices elif annotation['md5sum'] == '25faaf0960a68ae741125ca436b330ee': assert annotation['start'] == 29 assert annotation['end'] == 31
def test_single_document_processing(self): j = Jeremia() print('submitting document...') document_annotated = j.submit_document(self.DOCS[1]) self.assertTrue(document_annotated != "")
''' add support for calling Jeremia tests as part of a test suite ''' suite = unittest.TestSuite() suite.addTest(unittest.makeSuite(JeremiaTest, 'test')) return suite if __name__ == '__main__': if len(argv) > 1: txt = argv[1] docs = { 'id': '192292', 'body': txt, 'title': '', 'format': 'text/html', 'header': { 'test': 'testvalue' } } j = Jeremia() docs['body_annotation'] = [{ 'start': 0, 'end': 3, 'key': 'test annotation' }] l = j.submit_document(docs) print(l) else: unittest.main()
# single document result = j.submit_document(DOCS[0]) assert REFERENCE_SINGLE == result def test_suite(): ''' add support for calling Jeremia tests as part of a test suite ''' suite = unittest.TestSuite() suite.addTest(unittest.makeSuite(JeremiaTest, 'test')) return suite if __name__ == '__main__': if len(argv) > 1: txt = argv[1] docs = {'id': '192292', 'body': txt, 'title': '', 'format': 'text/html', 'header': {'test': 'testvalue'}} j = Jeremia() docs['body_annotation'] = [ {'start': 0, 'end': 3, 'key': 'test annotation'}] l = j.submit_document(docs) print(l) else: unittest.main()