def test_merging_regions(self): paragraphs_merging_filter = ParagraphsMergingFilter() isd = ISD(None) r1 = ISD.Region("r1", isd) b1 = self._get_filled_body(isd, ["Hello", "world"], ["Is there", "anyone here?"]) r1.push_child(b1) isd.put_region(r1) regions = list(isd.iter_regions()) self.assertEqual(1, len(regions)) body = list(regions[0]) self.assertEqual(1, len(body)) divs = list(body[0]) self.assertEqual(2, len(divs)) paragraphs_1 = list(divs[0]) self.assertEqual(2, len(paragraphs_1)) paragraphs_2 = list(divs[1]) self.assertEqual(2, len(paragraphs_2)) paragraphs_merging_filter.process(isd) regions = list(isd.iter_regions()) self.assertEqual(1, len(regions)) body = list(regions[0]) self.assertEqual(1, len(body)) divs = list(body[0]) self.assertEqual(1, len(divs)) paragraphs = list(divs[0]) self.assertEqual(1, len(paragraphs)) spans_and_brs = list(paragraphs[0]) text = self._get_text_from_children(spans_and_brs[0]) self.assertEqual("Hello", text) self.assertIsInstance(spans_and_brs[1], Br) text = self._get_text_from_children(spans_and_brs[2]) self.assertEqual("world", text) self.assertIsInstance(spans_and_brs[3], Br) text = self._get_text_from_children(spans_and_brs[4]) self.assertEqual("Is there", text) self.assertIsInstance(spans_and_brs[5], Br) text = self._get_text_from_children(spans_and_brs[6]) self.assertEqual("anyone here?", text)
def test_merging_regions(self): regions_merging_filter = RegionsMergingFilter() isd = ISD(None) r1 = ISD.Region("r1", isd) b1 = self._get_filled_body(isd, "Hello world") r1.push_child(b1) r2 = ISD.Region("r2", isd) b2 = self._get_filled_body(isd, "Is there anyone here?") r2.push_child(b2) isd.put_region(r1) isd.put_region(r2) self.assertEqual(2, len(list(isd.iter_regions()))) regions_merging_filter.process(isd) self.assertEqual(1, len(list(isd.iter_regions()))) merged_region = isd.get_region("r1_r2") self.assertIsNotNone(merged_region) body = list(merged_region) self.assertEqual(1, len(body)) divs = list(body[0]) self.assertEqual(2, len(divs)) text = self._get_text_from_children(divs[0]) self.assertEqual("Hello world", text) text = self._get_text_from_children(divs[1]) self.assertEqual("Is there anyone here?", text)
def process(self, isd: ISD): """Merges the ISD document regions""" LOGGER.debug("Apply regions merging filter to ISD.") original_regions = list(isd.iter_regions()) not_empty_regions = 0 for region in original_regions: not_empty_regions += len(region) if len(original_regions) <= 1 or not_empty_regions <= 1: return LOGGER.warning("Merging ISD regions.") target_body = Body(isd) region_ids = [] for region in original_regions: region_id = region.get_id() for body in region: for child in body: # Remove child from its parent body child.remove() # Add it to the target body target_body.push_child(child) region_ids.append(region_id) isd.remove_region(region_id) target_region = ISD.Region("_".join(region_ids), isd) target_region.push_child(target_body) isd.put_region(target_region)