Exemplo n.º 1
0
 def test_should_tag_single_token_within_partial_prediction_at_same_scale(self):
     token_1 = SimpleToken(TOKEN_TEXT_1)
     structured_document = SimpleStructuredDocument(lines=[SimpleLine([token_1])])
     structured_document.set_bounding_box(
         structured_document.get_pages()[0],
         DEFAULT_BOUNDING_BOX
     )
     structured_document.set_bounding_box(
         structured_document.get_pages()[0],
         BoundingBox(0, 0, DEFAULT_WIDTH * 10, DEFAULT_HEIGHT * 10)
     )
     structured_document.set_bounding_box(
         token_1,
         BoundingBox(0, 0, DEFAULT_WIDTH, DEFAULT_HEIGHT)
     )
     annotated_image = filled_image(
         BG_COLOR, {TAG_1: COLOR_1},
         width=DEFAULT_WIDTH * 10,
         height=DEFAULT_HEIGHT * 10
     )
     fill_rect(
         annotated_image,
         BoundingBox(0, 0, DEFAULT_WIDTH, DEFAULT_HEIGHT),
         COLOR_1
     )
     annotate_structured_document_using_predicted_images(
         structured_document,
         [annotated_image]
     )
     assert structured_document.get_tag(token_1, scope=CV_TAG_SCOPE) == TAG_1
    def test_should_ignore_block_without_bounding_box(self):
        token = SimpleToken('test')
        structured_document = SimpleStructuredDocument(
            lines=[SimpleLine([token])])
        structured_document.set_tag(token, TAG1)

        blocks = annotation_document_page_to_annotation_blocks(
            structured_document,
            structured_document.get_pages()[0])
        assert len(blocks) == 0
 def test_should_not_tag_single_token_not_within_prediction(self):
     token_1 = SimpleToken(TOKEN_TEXT_1)
     structured_document = SimpleStructuredDocument(
         lines=[SimpleLine([token_1])])
     structured_document.set_bounding_box(
         structured_document.get_pages()[0], DEFAULT_BOUNDING_BOX)
     structured_document.set_bounding_box(token_1, DEFAULT_BOUNDING_BOX)
     annotate_structured_document_using_predicted_images(
         structured_document, [filled_image(BG_COLOR, {TAG_1: COLOR_1})])
     assert structured_document.get_tag(token_1, scope=CV_TAG_SCOPE) is None
    def test_should_strip_tag_prefix(self):
        token = SimpleToken('test',
                            tag=TAG1,
                            tag_prefix=B_TAG_PREFIX,
                            bounding_box=DEFAULT_BOUNDING_BOX)
        assert token.get_tag() == B_TAG_PREFIX + TAG1
        structured_document = SimpleStructuredDocument(
            lines=[SimpleLine([token])])

        blocks = annotation_document_page_to_annotation_blocks(
            structured_document,
            structured_document.get_pages()[0])
        assert [b.tag for b in blocks] == [TAG1]
    def test_should_convert_single_token_to_block_with_same_bounding_box(self):
        token = SimpleToken('test',
                            tag=TAG1,
                            bounding_box=DEFAULT_BOUNDING_BOX)
        structured_document = SimpleStructuredDocument(
            lines=[SimpleLine([token])])

        blocks = annotation_document_page_to_merged_blocks(
            structured_document,
            structured_document.get_pages()[0])
        assert len(blocks) == 1

        block = blocks[0]
        assert block.tag == TAG1
        assert block.bounding_box == DEFAULT_BOUNDING_BOX