def test_should_tag_single_token_within_partial_prediction_at_same_scale(self): token_1 = SimpleToken(TOKEN_TEXT_1) structured_document = SimpleStructuredDocument(lines=[SimpleLine([token_1])]) structured_document.set_bounding_box( structured_document.get_pages()[0], DEFAULT_BOUNDING_BOX ) structured_document.set_bounding_box( structured_document.get_pages()[0], BoundingBox(0, 0, DEFAULT_WIDTH * 10, DEFAULT_HEIGHT * 10) ) structured_document.set_bounding_box( token_1, BoundingBox(0, 0, DEFAULT_WIDTH, DEFAULT_HEIGHT) ) annotated_image = filled_image( BG_COLOR, {TAG_1: COLOR_1}, width=DEFAULT_WIDTH * 10, height=DEFAULT_HEIGHT * 10 ) fill_rect( annotated_image, BoundingBox(0, 0, DEFAULT_WIDTH, DEFAULT_HEIGHT), COLOR_1 ) annotate_structured_document_using_predicted_images( structured_document, [annotated_image] ) assert structured_document.get_tag(token_1, scope=CV_TAG_SCOPE) == TAG_1
def test_should_ignore_block_without_bounding_box(self): token = SimpleToken('test') structured_document = SimpleStructuredDocument( lines=[SimpleLine([token])]) structured_document.set_tag(token, TAG1) blocks = annotation_document_page_to_annotation_blocks( structured_document, structured_document.get_pages()[0]) assert len(blocks) == 0
def test_should_not_tag_single_token_not_within_prediction(self): token_1 = SimpleToken(TOKEN_TEXT_1) structured_document = SimpleStructuredDocument( lines=[SimpleLine([token_1])]) structured_document.set_bounding_box( structured_document.get_pages()[0], DEFAULT_BOUNDING_BOX) structured_document.set_bounding_box(token_1, DEFAULT_BOUNDING_BOX) annotate_structured_document_using_predicted_images( structured_document, [filled_image(BG_COLOR, {TAG_1: COLOR_1})]) assert structured_document.get_tag(token_1, scope=CV_TAG_SCOPE) is None
def test_should_strip_tag_prefix(self): token = SimpleToken('test', tag=TAG1, tag_prefix=B_TAG_PREFIX, bounding_box=DEFAULT_BOUNDING_BOX) assert token.get_tag() == B_TAG_PREFIX + TAG1 structured_document = SimpleStructuredDocument( lines=[SimpleLine([token])]) blocks = annotation_document_page_to_annotation_blocks( structured_document, structured_document.get_pages()[0]) assert [b.tag for b in blocks] == [TAG1]
def test_should_convert_single_token_to_block_with_same_bounding_box(self): token = SimpleToken('test', tag=TAG1, bounding_box=DEFAULT_BOUNDING_BOX) structured_document = SimpleStructuredDocument( lines=[SimpleLine([token])]) blocks = annotation_document_page_to_merged_blocks( structured_document, structured_document.get_pages()[0]) assert len(blocks) == 1 block = blocks[0] assert block.tag == TAG1 assert block.bounding_box == DEFAULT_BOUNDING_BOX