def test_comm_container_fetch_handler(): comm_container = {'one': create_comm('one'), 'two': create_comm('two')} impl = CommunicationContainerFetchHandler(comm_container) host = 'localhost' port = find_port() with SubprocessFetchCommunicationServiceWrapper(impl, host, port): with FetchCommunicationClientWrapper(host, port) as cli: assert cli.getCommunicationCount() ids = cli.getCommunicationIDs(0, 10) assert 'one' in ids assert 'two' in ids assert 'foo' not in ids
def test_fetch_backed_container(): comm_container = {'one': create_comm('one'), 'two': create_comm('two')} impl = CommunicationContainerFetchHandler(comm_container) host = 'localhost' port = find_port() with SubprocessFetchCommunicationServiceWrapper(impl, host, port): cc = FetchBackedCommunicationContainer(host, port) assert len(cc) == 2 assert 'one' in cc assert 'two' in cc for comm_id in cc: comm = cc[comm_id] assert validate_communication(comm)
def test_create_comm_complex_al_sentence(): comm = create_comm('one', '\n\nsimple comm\t\t.\nor ...\n\nisit?\n', annotation_level=AL_SENTENCE) assert 'one' == comm.id assert '\n\nsimple comm\t\t.\nor ...\n\nisit?\n' == comm.text assert 2 == len(comm.sectionList) sect = comm.sectionList[0] assert 2 == sect.textSpan.start assert 23 == sect.textSpan.ending assert 2 == len(sect.sentenceList) sent = sect.sentenceList[0] assert 2 == sent.textSpan.start assert 16 == sent.textSpan.ending assert sent.tokenization is None sent = sect.sentenceList[1] assert 17 == sent.textSpan.start assert 23 == sent.textSpan.ending assert sent.tokenization is None sect = comm.sectionList[1] assert 25 == sect.textSpan.start assert 30 == sect.textSpan.ending assert 1 == len(sect.sentenceList) sent = sect.sentenceList[0] assert 25 == sent.textSpan.start assert 30 == sent.textSpan.ending assert sent.tokenization is None assert validate_communication(comm)
def test_create_comm_complex_al_none(): comm = create_comm('one', '\n\nsimple comm\t\t.\nor ...\n\nisit?\n', annotation_level=AL_NONE) assert 'one' == comm.id assert '\n\nsimple comm\t\t.\nor ...\n\nisit?\n' == comm.text assert comm.sectionList is None assert validate_communication(comm)
def test_create_comm_complex_al_sentence(): comm = create_comm('one', '\n\nsimple comm\t\t.\nor ...\n\nisit?\n', annotation_level=AL_SENTENCE) assert 'one' == comm.id assert '\n\nsimple comm\t\t.\nor ...\n\nisit?\n' == comm.text assert 2 == len(comm.sectionList) sect = comm.sectionList[0] assert 2 == sect.textSpan.start assert 23 == sect.textSpan.ending assert 2 == len(sect.sentenceList) sent = sect.sentenceList[0] assert 2 == sent.textSpan.start assert 16 == sent.textSpan.ending assert sent.tokenization is None sent = sect.sentenceList[1] assert 17 == sent.textSpan.start assert 23 == sent.textSpan.ending assert sent.tokenization is None sect = comm.sectionList[1] assert 25 == sect.textSpan.start assert 30 == sect.textSpan.ending assert 1 == len(sect.sentenceList) sent = sect.sentenceList[0] assert 25 == sent.textSpan.start assert 30 == sent.textSpan.ending assert sent.tokenization is None assert validate_communication(comm)
def comm_with_other_tags(*additional_tagging_types): comm = create_comm( 'quick', '''\ The quick brown fox jumped over the lazy dog . Or did she ? ''') for section in comm.sectionList: for sentence in section.sentenceList: sentence.tokenization.tokenTaggingList = [ TokenTagging( uuid=generate_UUID(), metadata=AnnotationMetadata( tool=u'tool', timestamp=1, ), taggingType=u'upper', taggedTokenList=[ TaggedToken( tokenIndex=token.tokenIndex, tag=token.text.upper(), ) for token in sentence.tokenization.tokenList.tokenList ], ), TokenTagging( uuid=generate_UUID(), metadata=AnnotationMetadata( tool=u'tool', timestamp=1, ), taggingType=u'lower', taggedTokenList=[ TaggedToken( tokenIndex=token.tokenIndex, tag=token.text.lower(), ) for token in sentence.tokenization.tokenList.tokenList ], ), ] + [ TokenTagging( uuid=generate_UUID(), metadata=AnnotationMetadata( tool=u'tool/{}'.format(i), timestamp=1, ), taggingType=tagging_type, taggedTokenList=[ TaggedToken( tokenIndex=token.tokenIndex, tag='{}_{}/{}'.format(tagging_type, token.tokenIndex, i), ) for token in sentence.tokenization.tokenList.tokenList ], ) for (i, tagging_type) in enumerate(additional_tagging_types) ] return comm
def test_annotate(): impl = NoopAnnotateCommunicationService() host = 'localhost' port = find_port() timeout = 5 comm_id = '1-2-3-4' comm = create_comm(comm_id) comm_uuid_uuidString = comm.uuid.uuidString comm_metadata_tool = comm.metadata.tool comm_metadata_timestamp = comm.metadata.timestamp with SubprocessAnnotateCommunicationServiceWrapper(impl, host, port, timeout=timeout): transport = TSocket.TSocket(host, port) transport = TTransport.TFramedTransport(transport) protocol = TCompactProtocol.TCompactProtocolAccelerated(transport) cli = AnnotateCommunicationService.Client(protocol) transport.open() res = cli.annotate(comm) transport.close() assert res.id == comm_id assert res.uuid.uuidString == comm_uuid_uuidString assert res.metadata.tool == comm_metadata_tool assert res.metadata.timestamp == comm_metadata_timestamp
def test_read_write_fixed_point(): comm = create_comm('comm-1') buf_1 = write_communication_to_buffer(comm) buf_2 = write_communication_to_buffer( read_communication_from_buffer(buf_1) ) assert buf_1 == buf_2
def test_annotate(): impl = NoopAnnotateCommunicationService() host = 'localhost' port = find_port() timeout = 5 comm_id = '1-2-3-4' comm = create_comm(comm_id) comm_uuid_uuidString = comm.uuid.uuidString comm_metadata_tool = comm.metadata.tool comm_metadata_timestamp = comm.metadata.timestamp with SubprocessAnnotateCommunicationServiceWrapper(impl, host, port, timeout=timeout): transport = TSocket.TSocket(host, port) transport = TTransport.TFramedTransport(transport) protocol = TCompactProtocol.TCompactProtocolAccelerated(transport) cli = AnnotateCommunicationService.Client(protocol) transport.open() res = cli.annotate(comm) transport.close() assert res.id == comm_id assert res.uuid.uuidString == comm_uuid_uuidString assert res.metadata.tool == comm_metadata_tool assert res.metadata.timestamp == comm_metadata_timestamp
def comm_with_other_tags(*additional_tagging_types): comm = create_comm('quick', '''\ The quick brown fox jumped over the lazy dog . Or did she ? ''') for section in comm.sectionList: for sentence in section.sentenceList: sentence.tokenization.tokenTaggingList = [ TokenTagging( uuid=generate_UUID(), metadata=AnnotationMetadata( tool=u'tool', timestamp=1, ), taggingType=u'upper', taggedTokenList=[ TaggedToken( tokenIndex=token.tokenIndex, tag=token.text.upper(), ) for token in sentence.tokenization.tokenList.tokenList ], ), TokenTagging( uuid=generate_UUID(), metadata=AnnotationMetadata( tool=u'tool', timestamp=1, ), taggingType=u'lower', taggedTokenList=[ TaggedToken( tokenIndex=token.tokenIndex, tag=token.text.lower(), ) for token in sentence.tokenization.tokenList.tokenList ], ), ] + [ TokenTagging( uuid=generate_UUID(), metadata=AnnotationMetadata( tool=u'tool/{}'.format(i), timestamp=1, ), taggingType=tagging_type, taggedTokenList=[ TaggedToken( tokenIndex=token.tokenIndex, tag='{}_{}/{}'.format(tagging_type, token.tokenIndex, i), ) for token in sentence.tokenization.tokenList.tokenList ], ) for (i, tagging_type) in enumerate(additional_tagging_types) ] return comm
def test_comm_container_fetch_handler(): comm_container = { 'one': create_comm('one'), 'two': create_comm('two') } impl = CommunicationContainerFetchHandler(comm_container) host = 'localhost' port = find_port() with SubprocessFetchCommunicationServiceWrapper(impl, host, port): with FetchCommunicationClientWrapper(host, port) as cli: assert cli.getCommunicationCount() ids = cli.getCommunicationIDs(0, 10) assert 'one' in ids assert 'two' in ids assert 'foo' not in ids
def test_create_comm_complex_al_none(): comm = create_comm('one', '\n\nsimple comm\t\t.\nor ...\n\nisit?\n', annotation_level=AL_NONE) assert 'one' == comm.id assert '\n\nsimple comm\t\t.\nor ...\n\nisit?\n' == comm.text assert comm.sectionList is None assert validate_communication(comm)
def index_volume(): docker_build('Dockerfile.concrete_python', 'concrete-python-image') docker_build('Dockerfile.index', 'indexing-image') docker_volume_create('data') docker_volume_create('index') zip_path = mktemp_path('.zip') with CommunicationWriterZip(zip_path) as writer: writer.write(create_comm( 'test comm 0', 'w1\n{}\nw1\n\nw2 w4\n'.format(make_stop_word_text()))) writer.write(create_comm( 'test comm 1', '{}'.format(make_stop_word_text()))) writer.write(create_comm( 'test comm 2', '{}\nw3\nw3 w2\n'.format(make_stop_word_text()))) docker_run( 'concrete-python-image', 'sleep', 'infinity', d=True, v=('data', '/mnt/data'), name='data-copy') docker_cp(zip_path, ('data-copy', '/mnt/data/comms.tar.gz')) force_remove_container('data-copy') docker_run( 'concrete-python-image', 'fetch-server.py', '/mnt/data/comms.tar.gz', '--host', 'localhost', '--port', '9090', d=True, v=('data', '/mnt/data'), name='fetch') wait_concrete_service('fetch', 'FetchCommunication') docker_run( 'indexing-image', network=('container', 'fetch'), rm=True, v=('index', '/mnt/index')) yield 'index' force_remove_container('data-copy') force_remove_container('fetch') force_remove_volume('data') force_remove_volume('index') force_remove(zip_path) force_remove_image('indexing-image') force_remove_image('concrete-python-image')
def test_fetch_backed_container(): comm_container = { 'one': create_comm('one'), 'two': create_comm('two') } impl = CommunicationContainerFetchHandler(comm_container) host = 'localhost' port = find_port() with SubprocessFetchCommunicationServiceWrapper(impl, host, port): cc = FetchBackedCommunicationContainer(host, port) assert len(cc) == 2 assert 'one' in cc assert 'two' in cc for comm_id in cc: comm = cc[comm_id] assert validate_communication(comm)
def test_create_comm_one_sentence_al_section(): comm = create_comm('one', 'simple comm\t\t.', annotation_level=AL_SECTION) assert 'one' == comm.id assert 'simple comm\t\t.' == comm.text assert 1 == len(comm.sectionList) sect = comm.sectionList[0] assert 0 == sect.textSpan.start assert 14 == sect.textSpan.ending assert sect.sentenceList is None assert validate_communication(comm)
def test_create_comm_one_sentence_al_section(): comm = create_comm('one', 'simple comm\t\t.', annotation_level=AL_SECTION) assert 'one' == comm.id assert 'simple comm\t\t.' == comm.text assert 1 == len(comm.sectionList) sect = comm.sectionList[0] assert 0 == sect.textSpan.start assert 14 == sect.textSpan.ending assert sect.sentenceList is None assert validate_communication(comm)
def test_lattice_with_token_list_kind(): comm = create_comm('comm-1', 'mambo no. 4') tokenization = comm.sectionList[0].sentenceList[0].tokenization lattice_path = LatticePath() lattice_path.tokenList = [Token(tokenIndex=0, text='mambo'), Token(tokenIndex=0, text='no.'), Token(tokenIndex=0, text='3')] token_lattice = TokenLattice() token_lattice.cachedBestPath = lattice_path tokenization.lattice = token_lattice token_texts = [t.text for t in get_tokens(tokenization)] assert ['mambo', 'no.', '4'] == token_texts
def test_create_comm_unicode_al_sentence(): comm = create_comm('one', u'狐狸\t\t.', annotation_level=AL_SENTENCE) assert 'one' == comm.id assert u'狐狸\t\t.' == comm.text assert 1 == len(comm.sectionList) sect = comm.sectionList[0] assert 0 == sect.textSpan.start assert 5 == sect.textSpan.ending assert 1 == len(sect.sentenceList) sent = sect.sentenceList[0] assert 0 == sent.textSpan.start assert 5 == sent.textSpan.ending assert sent.tokenization is None assert validate_communication(comm)
def test_create_comm_unicode_al_sentence(): comm = create_comm('one', u'狐狸\t\t.', annotation_level=AL_SENTENCE) assert 'one' == comm.id assert u'狐狸\t\t.' == comm.text assert 1 == len(comm.sectionList) sect = comm.sectionList[0] assert 0 == sect.textSpan.start assert 5 == sect.textSpan.ending assert 1 == len(sect.sentenceList) sent = sect.sentenceList[0] assert 0 == sent.textSpan.start assert 5 == sent.textSpan.ending assert sent.tokenization is None assert validate_communication(comm)
def test_lattice_with_token_list_kind(): comm = create_comm('comm-1', 'mambo no. 4') tokenization = comm.sectionList[0].sentenceList[0].tokenization lattice_path = LatticePath() lattice_path.tokenList = [ Token(tokenIndex=0, text='mambo'), Token(tokenIndex=0, text='no.'), Token(tokenIndex=0, text='3') ] token_lattice = TokenLattice() token_lattice.cachedBestPath = lattice_path tokenization.lattice = token_lattice token_texts = [t.text for t in get_tokens(tokenization)] assert ['mambo', 'no.', '4'] == token_texts
def test_communication_deep_copy(): comm1 = create_comm('a-b-c', text='foo bar baz .') comm2 = communication_deep_copy(comm1) comm3 = communication_deep_copy(comm1) assert_simple_comms_equal(comm1, comm2) assert_simple_comms_equal(comm2, comm3) tkzn1 = comm1.sectionList[0].sentenceList[0].tokenization tkzn1.tokenList.tokenList[0] = Token(text='bbq', tokenIndex=0) tkzn2 = comm2.sectionList[0].sentenceList[0].tokenization assert list(map( lambda t: t.text, tkzn1.tokenList.tokenList )) != list(map( lambda t: t.text, tkzn2.tokenList.tokenList )) assert_simple_comms_equal(comm2, comm3)
def test_create_comm_complex(): comm = create_comm('one', '\n\nsimple comm\t\t.\nor ...\n\nisit?\n') assert 'one' == comm.id assert '\n\nsimple comm\t\t.\nor ...\n\nisit?\n' == comm.text assert 2 == len(comm.sectionList) sect = comm.sectionList[0] assert 2 == sect.textSpan.start assert 23 == sect.textSpan.ending assert 2 == len(sect.sentenceList) sent = sect.sentenceList[0] assert 2 == sent.textSpan.start assert 16 == sent.textSpan.ending tl = sent.tokenization.tokenList.tokenList assert 3 == len(tl) assert 0 == tl[0].tokenIndex assert 'simple' == tl[0].text assert 'simple' == comm.text[tl[0].textSpan.start:tl[0].textSpan.ending] assert 1 == tl[1].tokenIndex assert 'comm' == tl[1].text assert 'comm' == comm.text[tl[1].textSpan.start:tl[1].textSpan.ending] assert 2 == tl[2].tokenIndex assert '.' == tl[2].text assert '.' == comm.text[tl[2].textSpan.start:tl[2].textSpan.ending] sent = sect.sentenceList[1] assert 17 == sent.textSpan.start assert 23 == sent.textSpan.ending tl = sent.tokenization.tokenList.tokenList assert 2 == len(tl) assert 0 == tl[0].tokenIndex assert 'or' == tl[0].text assert 1 == tl[1].tokenIndex assert '...' == tl[1].text sect = comm.sectionList[1] assert 25 == sect.textSpan.start assert 30 == sect.textSpan.ending assert 1 == len(sect.sentenceList) sent = sect.sentenceList[0] assert 25 == sent.textSpan.start assert 30 == sent.textSpan.ending tl = sent.tokenization.tokenList.tokenList assert 1 == len(tl) assert 0 == tl[0].tokenIndex assert 'isit?' == tl[0].text assert 'isit?' == comm.text[tl[0].textSpan.start:tl[0].textSpan.ending] assert validate_communication(comm)
def test_create_comm_complex(): comm = create_comm('one', '\n\nsimple comm\t\t.\nor ...\n\nisit?\n') assert 'one' == comm.id assert '\n\nsimple comm\t\t.\nor ...\n\nisit?\n' == comm.text assert 2 == len(comm.sectionList) sect = comm.sectionList[0] assert 2 == sect.textSpan.start assert 23 == sect.textSpan.ending assert 2 == len(sect.sentenceList) sent = sect.sentenceList[0] assert 2 == sent.textSpan.start assert 16 == sent.textSpan.ending tl = sent.tokenization.tokenList.tokenList assert 3 == len(tl) assert 0 == tl[0].tokenIndex assert 'simple' == tl[0].text assert 'simple' == comm.text[tl[0].textSpan.start:tl[0].textSpan.ending] assert 1 == tl[1].tokenIndex assert 'comm' == tl[1].text assert 'comm' == comm.text[tl[1].textSpan.start:tl[1].textSpan.ending] assert 2 == tl[2].tokenIndex assert '.' == tl[2].text assert '.' == comm.text[tl[2].textSpan.start:tl[2].textSpan.ending] sent = sect.sentenceList[1] assert 17 == sent.textSpan.start assert 23 == sent.textSpan.ending tl = sent.tokenization.tokenList.tokenList assert 2 == len(tl) assert 0 == tl[0].tokenIndex assert 'or' == tl[0].text assert 1 == tl[1].tokenIndex assert '...' == tl[1].text sect = comm.sectionList[1] assert 25 == sect.textSpan.start assert 30 == sect.textSpan.ending assert 1 == len(sect.sentenceList) sent = sect.sentenceList[0] assert 25 == sent.textSpan.start assert 30 == sent.textSpan.ending tl = sent.tokenization.tokenList.tokenList assert 1 == len(tl) assert 0 == tl[0].tokenIndex assert 'isit?' == tl[0].text assert 'isit?' == comm.text[tl[0].textSpan.start:tl[0].textSpan.ending] assert validate_communication(comm)
def test_print_conll_missing_tags(capsys): # We don't use comm_with_other_tags() here because we want to test # the case where: # tokenization.TokenTaggingList = None comm = create_comm( 'quick', '''\ The quick brown fox jumped over the lazy dog . Or did she ? ''') print_conll_style_tags_for_communication(comm, ner=True) (out, err) = capsys.readouterr() assert err == '' assert out.startswith('INDEX\tTOKEN\n' '-----\t-----\n' '1\tThe\n' '2\tquick\n')
def test_print_conll_missing_tags(capsys): # We don't use comm_with_other_tags() here because we want to test # the case where: # tokenization.TokenTaggingList = None comm = create_comm('quick', '''\ The quick brown fox jumped over the lazy dog . Or did she ? ''') print_conll_style_tags_for_communication(comm, ner=True) (out, err) = capsys.readouterr() assert err == '' assert out.startswith( 'INDEX\tTOKEN\n' '-----\t-----\n' '1\tThe\n' '2\tquick\n' )
def test_create_comm_unicode(): comm = create_comm('one', u'狐狸\t\t.') assert 'one' == comm.id assert u'狐狸\t\t.' == comm.text assert 1 == len(comm.sectionList) sect = comm.sectionList[0] assert 0 == sect.textSpan.start assert 5 == sect.textSpan.ending assert 1 == len(sect.sentenceList) sent = sect.sentenceList[0] assert 0 == sent.textSpan.start assert 5 == sent.textSpan.ending tl = sent.tokenization.tokenList.tokenList assert 2 == len(tl) assert 0 == tl[0].tokenIndex assert u'狐狸' == tl[0].text assert u'狐狸' == comm.text[tl[0].textSpan.start:tl[0].textSpan.ending] assert 1 == tl[1].tokenIndex assert '.' == tl[1].text assert validate_communication(comm)
def test_create_comm_unicode(): comm = create_comm('one', u'狐狸\t\t.') assert 'one' == comm.id assert u'狐狸\t\t.' == comm.text assert 1 == len(comm.sectionList) sect = comm.sectionList[0] assert 0 == sect.textSpan.start assert 5 == sect.textSpan.ending assert 1 == len(sect.sentenceList) sent = sect.sentenceList[0] assert 0 == sent.textSpan.start assert 5 == sent.textSpan.ending tl = sent.tokenization.tokenList.tokenList assert 2 == len(tl) assert 0 == tl[0].tokenIndex assert u'狐狸' == tl[0].text assert u'狐狸' == comm.text[tl[0].textSpan.start:tl[0].textSpan.ending] assert 1 == tl[1].tokenIndex assert '.' == tl[1].text assert validate_communication(comm)
def test_create_comm_complex_sections(): comm = create_comm('one', '\n\n\nFOO\r\n\r\n\n\n\nBAR\n\nFU\nBAR\n\t\n\n \n') assert 'one' == comm.id assert '\n\n\nFOO\r\n\r\n\n\n\nBAR\n\nFU\nBAR\n\t\n\n \n' == comm.text assert 3 == len(comm.sectionList) sect = comm.sectionList[0] assert 3 == sect.textSpan.start assert 6 == sect.textSpan.ending assert 1 == len(sect.sentenceList) sect = comm.sectionList[1] assert 13 == sect.textSpan.start assert 16 == sect.textSpan.ending assert 1 == len(sect.sentenceList) sect = comm.sectionList[2] assert 18 == sect.textSpan.start assert 24 == sect.textSpan.ending assert 2 == len(sect.sentenceList)
def test_create_comm_complex_sections(): comm = create_comm('one', '\n\n\nFOO\r\n\r\n\n\n\nBAR\n\nFU\nBAR\n\t\n\n \n') assert 'one' == comm.id assert '\n\n\nFOO\r\n\r\n\n\n\nBAR\n\nFU\nBAR\n\t\n\n \n' == comm.text assert 3 == len(comm.sectionList) sect = comm.sectionList[0] assert 3 == sect.textSpan.start assert 6 == sect.textSpan.ending assert 1 == len(sect.sentenceList) sect = comm.sectionList[1] assert 13 == sect.textSpan.start assert 16 == sect.textSpan.ending assert 1 == len(sect.sentenceList) sect = comm.sectionList[2] assert 18 == sect.textSpan.start assert 24 == sect.textSpan.ending assert 2 == len(sect.sentenceList)
def test_create_comm_one_sentence(): comm = create_comm('one', 'simple comm\t\t.') assert 'one' == comm.id assert 'simple comm\t\t.' == comm.text assert 1 == len(comm.sectionList) sect = comm.sectionList[0] assert 0 == sect.textSpan.start assert 14 == sect.textSpan.ending assert 1 == len(sect.sentenceList) sent = sect.sentenceList[0] assert 0 == sent.textSpan.start assert 14 == sent.textSpan.ending tl = sent.tokenization.tokenList.tokenList assert 3 == len(tl) assert 0 == tl[0].tokenIndex assert 'simple' == tl[0].text assert 'simple' == comm.text[tl[0].textSpan.start:tl[0].textSpan.ending] assert 1 == tl[1].tokenIndex assert 'comm' == tl[1].text assert 'comm' == comm.text[tl[1].textSpan.start:tl[1].textSpan.ending] assert 2 == tl[2].tokenIndex assert '.' == tl[2].text assert '.' == comm.text[tl[2].textSpan.start:tl[2].textSpan.ending] assert validate_communication(comm)
def test_create_comm_one_sentence(): comm = create_comm('one', 'simple comm\t\t.') assert 'one' == comm.id assert 'simple comm\t\t.' == comm.text assert 1 == len(comm.sectionList) sect = comm.sectionList[0] assert 0 == sect.textSpan.start assert 14 == sect.textSpan.ending assert 1 == len(sect.sentenceList) sent = sect.sentenceList[0] assert 0 == sent.textSpan.start assert 14 == sent.textSpan.ending tl = sent.tokenization.tokenList.tokenList assert 3 == len(tl) assert 0 == tl[0].tokenIndex assert 'simple' == tl[0].text assert 'simple' == comm.text[tl[0].textSpan.start:tl[0].textSpan.ending] assert 1 == tl[1].tokenIndex assert 'comm' == tl[1].text assert 'comm' == comm.text[tl[1].textSpan.start:tl[1].textSpan.ending] assert 2 == tl[2].tokenIndex assert '.' == tl[2].text assert '.' == comm.text[tl[2].textSpan.start:tl[2].textSpan.ending] assert validate_communication(comm)
def test_create_comm_one_sentence_al_none(): comm = create_comm('one', 'simple comm\t\t.', annotation_level=AL_NONE) assert 'one' == comm.id assert 'simple comm\t\t.' == comm.text assert comm.sectionList is None assert validate_communication(comm)
def test_create_comm_empty(): comm = create_comm('one') assert 'one' == comm.id assert '' == comm.text assert [] == comm.sectionList assert validate_communication(comm)
def test_no_lattice_with_no_kind(): comm = create_comm('comm-1', 'mambo no. 4') tokenization = comm.sectionList[0].sentenceList[0].tokenization token_texts = [t.text for t in get_tokens(tokenization)] assert ['mambo', 'no.', '4'] == token_texts
def test_create_comm_ws_al_sentence(): comm = create_comm('one', '\t \t\r\n\n', annotation_level=AL_SENTENCE) assert 'one' == comm.id assert '\t \t\r\n\n' == comm.text assert [] == comm.sectionList assert validate_communication(comm)
def test_no_lattice_with_no_kind(): comm = create_comm('comm-1', 'mambo no. 4') tokenization = comm.sectionList[0].sentenceList[0].tokenization token_texts = [t.text for t in get_tokens(tokenization)] assert ['mambo', 'no.', '4'] == token_texts
def test_create_comm_unicode_al_none(): comm = create_comm('one', u'狐狸\t\t.', annotation_level=AL_NONE) assert 'one' == comm.id assert u'狐狸\t\t.' == comm.text assert comm.sectionList is None assert validate_communication(comm)
def test_create_comm_unicode_al_none(): comm = create_comm('one', u'狐狸\t\t.', annotation_level=AL_NONE) assert 'one' == comm.id assert u'狐狸\t\t.' == comm.text assert comm.sectionList is None assert validate_communication(comm)
def test_create_comm_ws_al_none(): comm = create_comm('one', '\t \t\r\n\n', annotation_level=AL_NONE) assert 'one' == comm.id assert '\t \t\r\n\n' == comm.text assert comm.sectionList is None assert validate_communication(comm)
def test_create_comm_empty_al_section(): comm = create_comm('one', annotation_level=AL_SECTION) assert 'one' == comm.id assert '' == comm.text assert [] == comm.sectionList assert validate_communication(comm)
def test_create_comm_empty(): comm = create_comm('one') assert 'one' == comm.id assert '' == comm.text assert [] == comm.sectionList assert validate_communication(comm)
def test_create_comm_ws_al_sentence(): comm = create_comm('one', '\t \t\r\n\n', annotation_level=AL_SENTENCE) assert 'one' == comm.id assert '\t \t\r\n\n' == comm.text assert [] == comm.sectionList assert validate_communication(comm)
def test_create_comm_ws(): comm = create_comm('one', '\t \t\r\n\n') assert 'one' == comm.id assert '\t \t\r\n\n' == comm.text assert [] == comm.sectionList assert validate_communication(comm)
def test_create_comm_ws_al_none(): comm = create_comm('one', '\t \t\r\n\n', annotation_level=AL_NONE) assert 'one' == comm.id assert '\t \t\r\n\n' == comm.text assert comm.sectionList is None assert validate_communication(comm)
def comm_id_and_buf(request): comm_id = 'temp comm' return (comm_id, write_communication_to_buffer(create_comm(comm_id)))
def comm_id_and_buf(request): comm_id = 'temp comm' return (comm_id, write_communication_to_buffer(create_comm(comm_id)))
def test_create_comm_ws(): comm = create_comm('one', '\t \t\r\n\n') assert 'one' == comm.id assert '\t \t\r\n\n' == comm.text assert [] == comm.sectionList assert validate_communication(comm)
def test_create_comm_empty_al_section(): comm = create_comm('one', annotation_level=AL_SECTION) assert 'one' == comm.id assert '' == comm.text assert [] == comm.sectionList assert validate_communication(comm)
def test_create_comm_one_sentence_al_none(): comm = create_comm('one', 'simple comm\t\t.', annotation_level=AL_NONE) assert 'one' == comm.id assert 'simple comm\t\t.' == comm.text assert comm.sectionList is None assert validate_communication(comm)