def test_root_annotations(self): lines = [ LineWithMeta( line="bold text\n", hierarchy_level=HierarchyLevel.create_root(), metadata=ParagraphMetadata(paragraph_type="root", predicted_classes=None, page_id=0, line_id=0), annotations=[BoldAnnotation(start=0, end=10, value="True")]), LineWithMeta( line="italic text\n", hierarchy_level=HierarchyLevel.create_root(), metadata=ParagraphMetadata(paragraph_type="root", predicted_classes=None, page_id=0, line_id=1), annotations=[ItalicAnnotation(start=0, end=12, value="True")]), ] node = TreeNode.create(lines=lines) node_annotations = node.get_root().annotations node_annotations.sort(key=lambda a: a.start) self.assertEqual(2, len(node_annotations)) bold, italic = node_annotations self.assertEqual(BoldAnnotation.name, bold.name) self.assertEqual("True", bold.value) self.assertEqual(0, bold.start) self.assertEqual(10, bold.end) self.assertEqual(ItalicAnnotation.name, italic.name) self.assertEqual("True", italic.value) self.assertEqual(10, italic.start) self.assertEqual(22, italic.end)
def create(lines: List[LineWithMeta] = None) -> "TreeNode": """ Creates a root node with given text :param lines: this lines should be the title of the document (or should be empty for documents without title) :return: root of the document tree """ page_id = 0 if len(lines) == 0 else min( (line.metadata.page_id for line in lines)) line_id = 0 if len(lines) == 0 else min( (line.metadata.line_id for line in lines)) texts = (line.line for line in lines) annotations = [] text_length = 0 for line in lines: annotations.extend( TreeNode.__shift_annotations(line=line, text_length=text_length)) text_length += len(line.line) text = "".join(texts) metadata = ParagraphMetadata(paragraph_type="root", page_id=page_id, line_id=line_id, predicted_classes=None) return TreeNode("0", text, annotations=annotations, metadata=metadata, subparagraphs=[], hierarchy_level=HierarchyLevel.create_root(), parent=None)