def test_annotation_merge_mixed(self): annotations = [Annotation(start=0, end=5, name="bold", value="True"), Annotation(start=5, end=15, name="bold", value="True"), Annotation(start=4, end=6, name="italic", value="True"), Annotation(start=6, end=66, name="italic", value="True"), ] self.assertSetEqual({(0, 15, "bold", "True"), (4, 66, "italic", "True")}, self.merge(annotations))
def test_annotation_merge_same_value_no_spaces(self): annotations = [ Annotation(start=0, end=5, name="size", value="1"), Annotation(start=5, end=15, name="size", value="1") ] text = "hellomyfriend" self.assertSetEqual({(0, 15, "size", "1")}, self.merge(annotations, text))
def test_annotation_merge_same_value_separating_by_newline(self): annotations = [ Annotation(start=0, end=5, name="size", value="1"), Annotation(start=6, end=15, name="size", value="1") ] text = "hello\nmy friend" self.assertSetEqual({(0, 15, "size", "1")}, self.merge(annotations, text))
def test_annotation_merge_same_value_separating_by_many_space(self): annotations = [ Annotation(start=0, end=5, name="size", value="1"), Annotation(start=20, end=25, name="size", value="1") ] text = "hello my friend" self.assertSetEqual({(0, 25, "size", "1")}, self.merge(annotations, text))
def test_annotation_merge_different_value(self): annotations = [ Annotation(start=0, end=5, name="bold", value="True"), Annotation(start=5, end=15, name="italic", value="True") ] text = "hello my friend" self.assertSetEqual({(0, 5, "bold", "True"), (5, 15, "italic", "True")}, self.merge(annotations, text))
def test_annotation_merge_three_nested_annotations(self): annotations = [ Annotation(start=0, end=15, name="size", value="1"), Annotation(start=6, end=10, name="size", value="1"), Annotation(start=3, end=8, name="size", value="1") ] text = "hello my friend" self.assertSetEqual({(0, 15, "size", "1")}, self.merge(annotations, text))
def test_annotation_merge_three_one_intersected_annotations(self): annotations = [ Annotation(start=0, end=3, name="size", value="1"), Annotation(start=3, end=6, name="size", value="1"), Annotation(start=8, end=15, name="size", value="1") ] text = "hello my friend" self.assertSetEqual({(0, 6, "size", "1"), (8, 15, "size", "1")}, self.merge(annotations, text))
def test_merge_1000_pair_annotations(self): timeout = 10 n = 1000 annotations = [] for i in range(n): annotations.append( Annotation(start=i, end=i + 1, name="bold", value="True")) annotations.append( Annotation(start=i, end=i + 1, name="size", value="1")) text = "x" * n with TestTimeout(timeout): result = self.merge(annotations, text) self.assertSetEqual({(0, n, "bold", "True"), (0, n, "size", "1")}, result)
def _merge_annotations(annotations: List[Annotation]) -> List[Annotation]: """ Merge annotations when end of the firs annotation and start of the second match and has same value. Used with add_text """ annotations_group_by_name_value = TreeNode._group_annotations( annotations) merged_set = set() merged = [] for annotation_group in annotations_group_by_name_value.values(): for firs_annotation in annotation_group: for second_annotation in annotation_group: if firs_annotation.end == second_annotation.start: merged_annotation = Annotation( start=firs_annotation.start, end=second_annotation.end, name=firs_annotation.name, value=firs_annotation.value) merged.append(merged_annotation) merged_set.add( (firs_annotation.end, firs_annotation.start, firs_annotation.name, firs_annotation.value)) merged_set.add( (second_annotation.end, second_annotation.start, second_annotation.name, second_annotation.value)) other_annotations = [ annotation for annotation in annotations if (annotation.end, annotation.start, annotation.name, annotation.value) not in merged_set ] return sorted(other_annotations + merged, key=lambda a: a.start)
def __shift_annotations(line: LineWithMeta, text_length: int) -> List[Annotation]: new_annotations = [] for annotation in line.annotations: new_annotation = Annotation(start=annotation.start + text_length, end=annotation.end + text_length, name=annotation.name, value=annotation.value) new_annotations.append(new_annotation) return new_annotations
def test_merge_1000_annotations(self): timeout = 10 n = 1000 annotations = [ Annotation(start=i, end=i + 1, name="bold", value="True") for i in range(n) ] text = "x" * n with TestTimeout(timeout): result = self.merge(annotations, text) self.assertSetEqual({(0, n, "bold", "True")}, result)
def merge(self) -> Optional[Annotation]: if len(self.annotations) == 0: return None else: start = min((a.start for a in self.annotations)) end = max((a.end for a in self.annotations)) annotation = self.annotations[0] return Annotation(start=start, end=end, value=annotation.value, name=annotation.name)
def test_merge_1000_no_intersection(self): timeout = 10 n = 1000 annotations = [] for i in range(0, n, 2): annotations.append( Annotation(start=i, end=i + 1, name="bold", value="True")) text = "x" * (2 * n) with TestTimeout(timeout): result = self.merge(annotations, text) self.assertSetEqual( {(a.start, a.end, a.name, a.value) for a in annotations}, result)
def add_text(self, line: LineWithMeta): """ add the text and annotations from given line, text is separated with \n :param line: line with text to add :return: """ new_annotations = [] text_length = len(self.text) for annotation in line.annotations: new_annotation = Annotation(start=annotation.start + text_length - 1, end=annotation.end + text_length, name=annotation.name, value=annotation.value) new_annotations.append(new_annotation) self.text += line.line self.annotations.extend(new_annotations) self.annotations = self._merge_annotations(self.annotations)
def get_api_dict(api: Api, depth: int = 0, name: str = 'TreeNode') -> Model: return api.model( name, { 'node_id': fields.String( description= "Document element identifier. It is unique within one tree (i.e. " "there will be no other such node_id in this tree, but in attachment " "it may occur) The identifier has the form 0.2.1 where each number " "means a serial number at the corresponding level of the hierarchy.", required=True, example="0.2.1"), 'text': fields.String(description="text of node", required=True, example="Закон"), 'annotations': fields.List( fields.Nested(Annotation.get_api_dict(api), description="Text annotations " "(font, size, bold, italic and etc)")), 'metadata': fields.Nested(ParagraphMetadata.get_api_dict(api), skip_none=True, allow_null=False, description="Paragraph meta information"), 'subparagraphs': fields.List( fields.Nested(api.model('others_TreeNode', {})), description= "Node childes (with type 'TreeNode') of structure tree") if depth == get_config()['recursion_deep_subparagraphs'] else fields.List( fields.Nested( TreeNode.get_api_dict( api, depth=depth + 1, name='refTreeNode' + str(depth))), description= "Node childes (with type 'TreeNode') of structure tree") })
def test_annotation_unmerge_same_value(self): annotations = [Annotation(start=0, end=4, name="size", value="1"), Annotation(start=5, end=15, name="size", value="1")] self.assertSetEqual({(0, 4, "size", "1"), (5, 15, "size", "1")}, self.merge(annotations))
def test_annotation_merge_same_value2(self): annotations = [Annotation(start=4, end=5, name="size", value="1"), Annotation(start=5, end=15, name="size", value="1")] self.assertSetEqual({(4, 15, "size", "1")}, self.merge(annotations))
def test_annotation_merge_one_near_space(self): annotations = [Annotation(start=0, end=5, name="size", value="1")] text = "hello my friend" self.assertSetEqual({(0, 5, "size", "1")}, self.merge(annotations, text))