def test_should_not_merge_right_adjacent_block_with_same_different_tag(
            self):
        block1 = AnnotationBlock(TAG1, DEFAULT_BOUNDING_BOX)
        block2 = AnnotationBlock(
            TAG2, block1.bounding_box.move_by(block1.bounding_box.width, 0))

        merged_blocks = merge_blocks([block1, block2])
        assert [b.tag for b in merged_blocks] == [TAG1, TAG2]
    def test_should_merge_right_adjacent_block_with_same_tag(self):
        block1 = AnnotationBlock(TAG1, DEFAULT_BOUNDING_BOX)
        block2 = AnnotationBlock(
            TAG1, block1.bounding_box.move_by(block1.bounding_box.width, 0))

        merged_blocks = merge_blocks([block1, block2])
        assert [b.tag for b in merged_blocks] == [TAG1]
        assert merged_blocks[0].bounding_box == block1.bounding_box.include(
            block2.bounding_box)
    def test_should_not_merge_too_far_away_block_with_same_tag(self):
        block1 = AnnotationBlock(TAG1, DEFAULT_BOUNDING_BOX)
        block2 = AnnotationBlock(
            TAG1,
            block1.bounding_box.move_by(
                block1.bounding_box.width + DEFAULT_NEARBY_TOLERANCE + 1, 0))

        merged_blocks = merge_blocks([block1, block2],
                                     nearby_tolerance=DEFAULT_NEARBY_TOLERANCE)
        assert [b.tag for b in merged_blocks] == [TAG1, TAG1]
    def test_should_merge_right_nearby_block_with_same_tag(self):
        block1 = AnnotationBlock(TAG1, DEFAULT_BOUNDING_BOX)
        block2 = AnnotationBlock(
            TAG1,
            block1.bounding_box.move_by(
                block1.bounding_box.width + DEFAULT_NEARBY_TOLERANCE, 0))

        merged_blocks = merge_blocks([block1, block2],
                                     nearby_tolerance=DEFAULT_NEARBY_TOLERANCE)
        assert [b.tag for b in merged_blocks] == [TAG1]
        assert merged_blocks[0].bounding_box == block1.bounding_box.include(
            block2.bounding_box)
    def test_should_merge_multiple_sequential_right_adjacent_blocks(self):
        block1 = AnnotationBlock(TAG1, DEFAULT_BOUNDING_BOX)
        block2 = AnnotationBlock(
            TAG1, block1.bounding_box.move_by(block1.bounding_box.width, 0))
        block3 = AnnotationBlock(
            TAG1, block2.bounding_box.move_by(block2.bounding_box.width, 0))

        merged_blocks = merge_blocks([block1, block2, block3])
        assert [b.tag for b in merged_blocks] == [TAG1]

        assert merged_blocks[0].bounding_box == (block1.bounding_box.include(
            block2.bounding_box).include(block3.bounding_box))
Пример #6
0
def svg_page_to_blockified_png_bytes(svg_page, color_map, image_size=None):
    structured_document = SvgStructuredDocument(svg_page)
    blocks = expand_blocks(
        merge_blocks(
            annotation_document_page_to_annotation_blocks(
                structured_document,
                structured_document.get_pages()[0]
            )
        )
    )
    viewbox = svg_page.attrib.get('viewBox')
    if not viewbox:
        raise RuntimeError(
            'viewbox missing on svg, available attributes: %s' % svg_page.attrib.keys()
        )
    _, _, width, height = viewbox.split()
    image = annotated_blocks_to_image(
        blocks, color_map,
        width=float(width), height=float(height), background='white',
        scale_to_size=image_size
    )
    out = BytesIO()
    image.save(out, 'png')
    return out.getvalue()
    def test_should_return_same_single_blocks(self):
        block = AnnotationBlock(TAG1, DEFAULT_BOUNDING_BOX)

        merged_blocks = merge_blocks([block])
        assert merged_blocks == [block]