def get_text_line_from_xml_element(xml_element): t = TextLine(bbox_from_string(xml_element.attrib['bbox'])) for text_xml_element in xml_element.findall('./text'): if len(text_xml_element.attrib) == 0: continue t.texts.append(get_text_from_xml_element(text_xml_element)) return t
def get_text_from_xml_element(xml_element): text = xml_element.text if isinstance(text, str): text = text[0][0] else: text = ' ' bbox = None if 'bbox' in xml_element.attrib: bbox = bbox_from_string(xml_element.attrib['bbox']) return Text( bbox, text, TextStyle(xml_element.attrib['font'], float(xml_element.attrib['size']), xml_element.attrib['colourspace'], xml_element.attrib['ncolour']))
def get_page_from_xml_element(xml_element): page = Page(bbox_from_string(xml_element.attrib['bbox'])) for text_box_node in xml_element.findall('./textbox'): page.text_boxes.append(get_text_box_from_xml_element(text_box_node)) return page
def setUp(self): self.bbox = bbox_from_string('459.840,753.697,462.075,765.210')
def test_five_points(self): with self.assertRaises(InvalidBboxString): bbox_from_string('1,2,3,4,5')
def test_two_points(self): with self.assertRaises(InvalidBboxString): bbox_from_string('1,2')
def test_one_point(self): with self.assertRaises(InvalidBboxString): bbox_from_string('1')