def test_build_string(): xml_str = """<String ID="string_3" HPOS="712" VPOS="133" WIDTH="55" HEIGHT="13" WC="0.92" CONTENT="Liberté"/>""" element = _build_xml(xml_str) assert String.from_xml(element) == String( id="string_3", hpos=712, vpos=133, width=55, height=13, confidence=0.92, content="Liberté", alternatives=[], ) xml_str = """ <String ID="string_3" HPOS="712" VPOS="133" WIDTH="55" HEIGHT="13" WC="0.92" CONTENT="Liberté"> <Alternative>alt</Alternative> </String> """ element = _build_xml(xml_str) assert String.from_xml(element) == String( id="string_3", hpos=712, vpos=133, width=55, height=13, confidence=0.92, content="Liberté", alternatives=[Alternative("alt")], )
def test_build_text_block(): element = _build_xml(""" <TextBlock ID="block_1" HPOS="712" VPOS="129" WIDTH="235" HEIGHT="53"> <TextLine ID="line_2" HPOS="712" VPOS="129" WIDTH="235" HEIGHT="21"> <String ID="string_3" HPOS="712" VPOS="133" WIDTH="55" HEIGHT="13" WC="0.92" CONTENT="abc"/> <SP WIDTH="9" VPOS="133" HPOS="767"/> </TextLine> </TextBlock> """) strings = [ String( id="string_3", hpos=712, vpos=133, width=55, height=13, confidence=0.92, content="abc", alternatives=[], ), SP(hpos=767, vpos=133, width=9), ] lines = [ TextLine(id="line_2", hpos=712, vpos=129, width=235, height=21, strings=strings) ] assert TextBlock.from_xml(element) == TextBlock("block_1", 53, 235, 712, 129, lines)
def test_page_extract_strings(): assert Page("page_0", 2339, 1654, 0, None, []).extract_strings() == [] assert Page("page_0", 2339, 1654, 0, None, [PrintSpace(1, 1, 1, 1, 1, [])]).extract_strings() == [] block = ComposedBlock("", 1, 1, 1, 1, []) assert Page("page_0", 2339, 1654, 0, None, [PrintSpace(1, 1, 1, 1, 1, [block])]).extract_strings() == [] tb = TextBlock("", 1, 1, 1, 1, []) block = ComposedBlock("", 1, 1, 1, 1, [tb]) assert Page("page_0", 2339, 1654, 0, None, [PrintSpace(1, 1, 1, 1, 1, [block])]).extract_strings() == [] tb = TextBlock( "", 1, 1, 1, 1, [TextLine("", 1, 1, 1, 1, [String("", 1, 1, 1, 1, "", 0, [])])]) block = ComposedBlock("", 1, 1, 1, 1, [tb]) page = Page("page_0", 2339, 1654, 0, None, [PrintSpace(1, 1, 1, 1, 1, [block])]) assert page.extract_strings() == [String("", 1, 1, 1, 1, "", 0, [])]
def _string(word: str = 'abc') -> String: return String(id="string_3", hpos=712, vpos=133, width=55, height=13, confidence=0.92, content=word, alternatives=[])
def test_build_text_line(): xml_str = """ <TextLine ID="line_2" HPOS="712" VPOS="129" WIDTH="235" HEIGHT="21"> <String ID="string_3" HPOS="712" VPOS="133" WIDTH="55" HEIGHT="13" WC="0.92" CONTENT="abc"/> <SP WIDTH="9" VPOS="133" HPOS="767"/> </TextLine> """ element = _build_xml(xml_str) strings = [ String( id="string_3", hpos=712, vpos=133, width=55, height=13, confidence=0.92, content="abc", alternatives=[], ), SP(hpos=767, vpos=133, width=9), ] assert TextLine.from_xml(element) == TextLine(id="line_2", hpos=712, vpos=129, width=235, height=21, strings=strings) xml_str = """ <TextLine ID="line_2" HPOS="712" VPOS="129" WIDTH="235" HEIGHT="21"> </TextLine> """ element = _build_xml(xml_str) assert TextLine.from_xml(element) == TextLine(id="line_2", hpos=712, vpos=129, width=235, height=21, strings=[])
def test_load_string_or_sp(): xml_str = """<SP WIDTH="9" VPOS="133" HPOS="767"/>""" element = _build_xml(xml_str) assert _load_string_or_sp(element) == SP(hpos=767, vpos=133, width=9) xml_str = """<String ID="string_3" HPOS="712" VPOS="133" WIDTH="55" HEIGHT="13" WC="0.92" CONTENT="abc"/>""" element = _build_xml(xml_str) assert _load_string_or_sp(element) == String( id="string_3", hpos=712, vpos=133, width=55, height=13, confidence=0.92, content="abc", alternatives=[], ) with pytest.raises(ValueError): xml_str = """<Alternative>test</Alternative>""" element = _build_xml(xml_str) _load_string_or_sp(element)