Exemplo n.º 1
0
 def test_extend_AllIndexed_validate_continuity(self):
     with open(assets.path_to('gutachten/data/TEMP1/PAGE_TEMP1.xml'),
               'r') as f:
         og = parseString(
             f.read().encode('utf8'),
             silence=True).get_Page().get_ReadingOrder().get_OrderedGroup()
         with self.assertRaisesRegex(Exception, "@index already used: 1"):
             og.extend_AllIndexed([
                 RegionRefIndexedType(index=3, id='r3'),
                 RegionRefIndexedType(index=2, id='r2'),
                 RegionRefIndexedType(index=1, id='r1'),
             ],
                                  validate_continuity=True)
Exemplo n.º 2
0
 def test_extend_AllIndexed_no_validation(self):
     with open(assets.path_to('gutachten/data/TEMP1/PAGE_TEMP1.xml'),
               'r') as f:
         og = parseString(
             f.read().encode('utf8'),
             silence=True).get_Page().get_ReadingOrder().get_OrderedGroup()
         og.extend_AllIndexed([
             RegionRefIndexedType(index=3, id='r3'),
             RegionRefIndexedType(index=2, id='r2'),
             RegionRefIndexedType(index=1, id='r1'),
         ])
         rrs = og.get_RegionRefIndexed()
         self.assertEqual([x.index for x in rrs][-3:], [22, 23, 24])
Exemplo n.º 3
0
def test_extend_all_indexed_validate_continuity():
    # arrange
    with open(assets.path_to('gutachten/data/TEMP1/PAGE_TEMP1.xml'), 'r') as f:
        og = parseString(f.read().encode('utf8'), silence=True).get_Page().get_ReadingOrder().get_OrderedGroup()

    # act
    with pytest.raises(Exception) as index_exc:
        og.extend_AllIndexed([
            RegionRefIndexedType(index=3, id='r3'),
            RegionRefIndexedType(index=2, id='r2'),
            RegionRefIndexedType(index=1, id='r1'),
        ], validate_continuity=True)

    assert "@index already used: 1" in str(index_exc.value)
Exemplo n.º 4
0
def xml_reading_order(page, order_of_texts, id_of_marginalia):
    region_order = ReadingOrderType()
    og = OrderedGroupType(id="ro357564684568544579089")
    page.set_ReadingOrder(region_order)
    region_order.set_OrderedGroup(og)
    region_counter = EynollahIdCounter()
    for idx_textregion, _ in enumerate(order_of_texts):
        og.add_RegionRefIndexed(
            RegionRefIndexedType(index=str(region_counter.get('region')),
                                 regionRef=region_counter.region_id(
                                     order_of_texts[idx_textregion] + 1)))
        region_counter.inc('region')
    for id_marginal in id_of_marginalia:
        og.add_RegionRefIndexed(
            RegionRefIndexedType(index=str(region_counter.get('region')),
                                 regionRef=id_marginal))
        region_counter.inc('region')
Exemplo n.º 5
0
    def process(self):
        """
        Performs the region segmentation.
        """
        with tesserocr.PyTessBaseAPI(path=TESSDATA_PREFIX) as tessapi:
            #  print(self.input_file_grp)
            for (n, input_file) in enumerate(self.input_files):
                #  print(input_file)
                pcgts = page_from_file(self.workspace.download_file(input_file))
                image = self.workspace.resolve_image_as_pil(pcgts.get_Page().imageFilename)
                log.debug("Detecting regions with tesseract")
                tessapi.SetImage(image)
                for component in tessapi.GetComponentImages(tesserocr.RIL.BLOCK, True):
                    points, index = points_from_xywh(component[1]), component[2]

                    #
                    # the region reference in the reading order element
                    #
                    ID = "region%04d" % index
                    log.debug("Detected region '%s': %s", ID, points)
                    # <pg:ReadingOrder>
                    ro = pcgts.get_Page().get_ReadingOrder()
                    if ro is None:
                        ro = ReadingOrderType()
                        pcgts.get_Page().set_ReadingOrder(ro)
                    # <pg:OrderedGroup>
                    og = ro.get_OrderedGroup()
                    if og is None:
                        og = OrderedGroupType(id="reading-order")
                        ro.set_OrderedGroup(og)
                    # <pg:RegionRefIndexed>
                    og.add_RegionRefIndexed(RegionRefIndexedType(regionRef=ID, index=index))

                    #
                    #  text region
                    #
                    pcgts.get_Page().add_TextRegion(TextRegionType(id=ID, Coords=CoordsType(points=points)))

                ID = concat_padded(self.output_file_grp, n)
                self.workspace.add_file(
                    ID=ID,
                    file_grp=self.output_file_grp,
                    mimetype=MIMETYPE_PAGE,
                    local_filename='%s/%s' % (self.output_file_grp, ID),
                    content=to_xml(pcgts).encode('utf-8'),
                )
Exemplo n.º 6
0
    def _process_segment(self, page_image, page, page_xywh, page_id,
                         input_file, n, mrcnn_model, class_names, mask):
        LOG = getLogger('OcrdAnybaseocrBlockSegmenter')
        # check for existing text regions and whether to overwrite them
        border = None
        if page.get_TextRegion():
            if self.parameter['overwrite']:
                LOG.info('removing existing TextRegions in page "%s"', page_id)
                page.set_TextRegion([])
            else:
                LOG.warning('keeping existing TextRegions in page "%s"',
                            page_id)
                return
        # check if border exists
        if page.get_Border():
            border_coords = page.get_Border().get_Coords()
            border_points = polygon_from_points(border_coords.get_points())
            border = Polygon(border_points)


#            page_image, page_xy = self.workspace.image_from_segment(page.get_Border(), page_image, page_xywh)

        img_array = ocrolib.pil2array(page_image)
        page_image.save('./checkthis.png')
        if len(img_array.shape) <= 2:
            img_array = np.stack((img_array, ) * 3, axis=-1)
        results = mrcnn_model.detect([img_array], verbose=1)
        r = results[0]

        th = self.parameter['th']
        # check for existing semgentation mask
        # this code executes only when use_deeplr is set to True in ocrd-tool.json file
        if mask:
            mask = ocrolib.pil2array(mask)
            mask = mask // 255
            mask = 1 - mask
            # multiply all the bounding box part with 2
            for i in range(len(r['rois'])):

                min_x = r['rois'][i][0]
                min_y = r['rois'][i][1]
                max_x = r['rois'][i][2]
                max_y = r['rois'][i][3]
                mask[min_x:max_x, min_y:max_y] *= i + 2
            cv2.imwrite('mask_check.png', mask * (255 / (len(r['rois']) + 2)))

            # check for left over pixels and add them to the bounding boxes
            pixel_added = True

            while pixel_added:

                pixel_added = False
                left_over = np.where(mask == 1)
                for x, y in zip(left_over[0], left_over[1]):
                    local_mask = mask[x - th:x + th, y - th:y + th]
                    candidates = np.where(local_mask > 1)
                    candidates = [k for k in zip(candidates[0], candidates[1])]
                    if len(candidates) > 0:
                        pixel_added = True
                        # find closest pixel with x>1
                        candidates.sort(key=lambda j: np.sqrt((j[0] - th)**2 +
                                                              (j[1] - th)**2))
                        index = local_mask[candidates[0]] - 2

                        # add pixel to mask/bbox
                        # x,y to bbox with index
                        if x < r['rois'][index][0]:
                            r['rois'][index][0] = x

                        elif x > r['rois'][index][2]:
                            r['rois'][index][2] = x

                        if y < r['rois'][index][1]:
                            r['rois'][index][1] = y

                        elif y > r['rois'][index][3]:
                            r['rois'][index][3] = y

                        # update the mask
                        mask[x, y] = index + 2

        # resolving overlapping problem
        bbox_dict = {}  # to check any overlapping bbox
        class_id_check = []

        for i in range(len(r['rois'])):
            min_x = r['rois'][i][0]
            min_y = r['rois'][i][1]
            max_x = r['rois'][i][2]
            max_y = r['rois'][i][3]

            region_bbox = [min_y, min_x, max_y, max_x]

            for key in bbox_dict:
                for bbox in bbox_dict[key]:

                    # checking for ymax case with vertical overlapping
                    # along with y, check both for xmax and xmin
                    if (region_bbox[3] <= bbox[3] and region_bbox[3] >= bbox[1]
                            and ((region_bbox[0] >= bbox[0]
                                  and region_bbox[0] <= bbox[2]) or
                                 (region_bbox[2] >= bbox[0]
                                  and region_bbox[2] <= bbox[2]) or
                                 (region_bbox[0] <= bbox[0]
                                  and region_bbox[2] >= bbox[2]))
                            and r['class_ids'][i] != 5):

                        r['rois'][i][2] = bbox[1] - 1

                    # checking for ymin now
                    # along with y, check both for xmax and xmin
                    if (region_bbox[1] <= bbox[3] and region_bbox[1] >= bbox[1]
                            and ((region_bbox[0] >= bbox[0]
                                  and region_bbox[0] <= bbox[2]) or
                                 (region_bbox[2] >= bbox[0]
                                  and region_bbox[2] <= bbox[2]) or
                                 (region_bbox[0] <= bbox[0]
                                  and region_bbox[2] >= bbox[2]))
                            and r['class_ids'][i] != 5):

                        r['rois'][i][0] = bbox[3] + 1

            if r['class_ids'][i] not in class_id_check:
                bbox_dict[r['class_ids'][i]] = []
                class_id_check.append(r['class_ids'][i])

            bbox_dict[r['class_ids'][i]].append(region_bbox)

        # resolving overlapping problem code

        # define reading order on basis of coordinates
        reading_order = []

        for i in range(len(r['rois'])):
            width, height, _ = img_array.shape
            min_x = r['rois'][i][0]
            min_y = r['rois'][i][1]
            max_x = r['rois'][i][2]
            max_y = r['rois'][i][3]

            if (min_y - 5) > width and r['class_ids'][i] == 2:
                min_y -= 5
            if (max_y + 10) < width and r['class_ids'][i] == 2:
                min_y += 10
            reading_order.append((min_y, min_x, max_y, max_x))

        reading_order = sorted(reading_order,
                               key=lambda reading_order:
                               (reading_order[1], reading_order[0]))
        for i in range(len(reading_order)):
            min_y, min_x, max_y, max_x = reading_order[i]
            min_y = 0
            i_poly = Polygon([[min_x, min_y], [max_x, min_y], [max_x, max_y],
                              [min_x, max_y]])
            for j in range(i + 1, len(reading_order)):
                min_y, min_x, max_y, max_x = reading_order[j]
                j_poly = Polygon([[min_x, min_y], [max_x, min_y],
                                  [max_x, max_y], [min_x, max_y]])
                inter = i_poly.intersection(j_poly)
                if inter:
                    reading_order.insert(j + 1, reading_order[i])
                    del reading_order[i]

        # Creating Reading Order object in PageXML
        order_group = OrderedGroupType(caption="Regions reading order",
                                       id=page_id)

        for i in range(len(r['rois'])):
            min_x = r['rois'][i][0]
            min_y = r['rois'][i][1]
            max_x = r['rois'][i][2]
            max_y = r['rois'][i][3]
            if (min_y - 5) > width and r['class_ids'][i] == 2:
                min_y -= 5
            if (max_y + 10) < width and r['class_ids'][i] == 2:
                min_y += 10

            region_polygon = [[min_x, min_y], [max_x, min_y], [max_x, max_y],
                              [min_x, max_y]]

            if border:
                cut_region_polygon = border.intersection(
                    Polygon(region_polygon))
                if cut_region_polygon.is_empty:
                    continue
            else:
                cut_region_polygon = Polygon(region_polygon)

            order_index = reading_order.index((min_y, min_x, max_y, max_x))
            region_id = '%s_region%04d' % (page_id, i)
            regionRefIndex = RegionRefIndexedType(index=order_index,
                                                  regionRef=region_id)
            order_group.add_RegionRefIndexed(regionRefIndex)

        reading_order_object = ReadingOrderType()
        reading_order_object.set_OrderedGroup(order_group)
        page.set_ReadingOrder(reading_order_object)

        for i in range(len(r['rois'])):
            width, height, _ = img_array.shape
            min_x = r['rois'][i][0]
            min_y = r['rois'][i][1]
            max_x = r['rois'][i][2]
            max_y = r['rois'][i][3]

            if (min_y - 5) > width and r['class_ids'][i] == 2:
                min_y -= 5
            if (max_y + 10) < width and r['class_ids'][i] == 2:
                min_y += 10

            # one change here to resolve flipped coordinates
            region_polygon = [[min_y, min_x], [max_y, min_x], [max_y, max_x],
                              [min_y, max_x]]

            cut_region_polygon = border.intersection(Polygon(region_polygon))

            if cut_region_polygon.is_empty:
                continue
            cut_region_polygon = [
                j for j in zip(list(cut_region_polygon.exterior.coords.xy[0]),
                               list(cut_region_polygon.exterior.coords.xy[1]))
            ][:-1]

            # checking whether coordinates are flipped

            region_polygon = coordinates_for_segment(cut_region_polygon,
                                                     page_image, page_xywh)
            region_points = points_from_polygon(region_polygon)

            read_order = reading_order.index((min_y, min_x, max_y, max_x))

            # this can be tested, provided whether we need previous comments or not?
            # resolving overlapping problem

            region_img = img_array[min_x:max_x, min_y:
                                   max_y]  # extract from points and img_array

            region_img = ocrolib.array2pil(region_img)

            file_id = make_file_id(input_file, self.output_file_grp)
            file_path = self.workspace.save_image_file(
                region_img,
                file_id + "_" + str(i),
                page_id=page_id,
                file_grp=self.output_file_grp)

            # ai = AlternativeImageType(filename=file_path, comments=page_xywh['features'])
            region_id = '%s_region%04d' % (page_id, i)
            coords = CoordsType(region_points)

            # incase of imageRegion
            if r['class_ids'][i] == 15:
                image_region = ImageRegionType(
                    custom='readingOrder {index:' + str(read_order) + ';}',
                    id=region_id,
                    Coords=coords,
                    type_=class_names[r['class_ids'][i]])
                # image_region.add_AlternativeImage(ai)
                page.add_ImageRegion(image_region)
                continue
            if r['class_ids'][i] == 16:
                table_region = TableRegionType(
                    custom='readingOrder {index:' + str(read_order) + ';}',
                    id=region_id,
                    Coords=coords,
                    type_=class_names[r['class_ids'][i]])
                # table_region.add_AlternativeImage(ai)
                page.add_TableRegion(table_region)
                continue
            if r['class_ids'][i] == 17:
                graphic_region = GraphicRegionType(
                    custom='readingOrder {index:' + str(read_order) + ';}',
                    id=region_id,
                    Coords=coords,
                    type_=class_names[r['class_ids'][i]])
                # graphic_region.add_AlternativeImage(ai)
                page.add_GraphicRegion(graphic_region)
                continue

            textregion = TextRegionType(custom='readingOrder {index:' +
                                        str(read_order) + ';}',
                                        id=region_id,
                                        Coords=coords,
                                        type_=class_names[r['class_ids'][i]])
            # textregion.add_AlternativeImage(ai)

            #border = page.get_Border()
            # if border:
            #    border.add_TextRegion(textregion)
            # else:
            page.add_TextRegion(textregion)
Exemplo n.º 7
0
 def _process_page(self, it, page, page_image, page_coords, page_id):
     LOG = getLogger('processor.TesserocrSegmentRegion')
     # equivalent to GetComponentImages with raw_image=True,
     # (which would also give raw coordinates),
     # except we are also interested in the iterator's BlockType() here,
     # and its BlockPolygon()
     index = 0
     ro = page.get_ReadingOrder()
     if not ro:
         ro = ReadingOrderType()
         page.set_ReadingOrder(ro)
     og = ro.get_OrderedGroup()
     if og:
         # start counting from largest existing index
         for elem in (og.get_RegionRefIndexed() +
                      og.get_OrderedGroupIndexed() +
                      og.get_UnorderedGroupIndexed()):
             if elem.index >= index:
                 index = elem.index + 1
     else:
         # new top-level group
         og = OrderedGroupType(id="reading-order")
         ro.set_OrderedGroup(og)
     while it and not it.Empty(RIL.BLOCK):
         # (padding will be passed to both BoundingBox and GetImage)
         # (actually, Tesseract honours padding only on the left and bottom,
         #  whereas right and top are increased less!)
         bbox = it.BoundingBox(RIL.BLOCK, padding=self.parameter['padding'])
         # sometimes these polygons are not planar, which causes
         # PIL.ImageDraw.Draw.polygon (and likely others as well)
         # to misbehave; however, PAGE coordinate semantics prohibit
         # multi-path polygons!
         # (probably a bug in Tesseract itself, cf. tesseract#2826):
         if self.parameter['crop_polygons']:
             polygon = it.BlockPolygon()
         else:
             polygon = polygon_from_x0y0x1y1(bbox)
         polygon = coordinates_for_segment(polygon, page_image, page_coords)
         polygon2 = polygon_for_parent(polygon, page)
         if polygon2 is not None:
             polygon = polygon2
         points = points_from_polygon(polygon)
         coords = CoordsType(points=points)
         if polygon2 is None:
             LOG.info('Ignoring extant region: %s', points)
             it.Next(RIL.BLOCK)
             continue
         # if xywh['w'] < 30 or xywh['h'] < 30:
         #     LOG.info('Ignoring too small region: %s', points)
         #     it.Next(RIL.BLOCK)
         #     continue
         # region_image_bin = it.GetBinaryImage(RIL.BLOCK)
         # if not region_image_bin.getbbox():
         #     LOG.info('Ignoring binary-empty region: %s', points)
         #     it.Next(RIL.BLOCK)
         #     continue
         #
         # add the region reference in the reading order element
         # (will be removed again if Separator/Noise region below)
         ID = "region%04d" % index
         og.add_RegionRefIndexed(
             RegionRefIndexedType(regionRef=ID, index=index))
         #
         # region type switch
         #
         block_type = it.BlockType()
         if block_type in [
                 PT.FLOWING_TEXT,
                 PT.HEADING_TEXT,
                 PT.PULLOUT_TEXT,
                 PT.CAPTION_TEXT,
                 # TABLE is contained in PTIsTextType, but
                 # it is a bad idea to create a TextRegion
                 # for it (better set `find_tables` False):
                 # PT.TABLE,
                 # will also get a 90° @orientation
                 # (but that can be overridden by deskew/OSD):
                 PT.VERTICAL_TEXT
         ]:
             region = TextRegionType(id=ID,
                                     Coords=coords,
                                     type=TextTypeSimpleType.PARAGRAPH)
             if block_type == PT.VERTICAL_TEXT:
                 region.set_orientation(90.0)
             elif block_type == PT.HEADING_TEXT:
                 region.set_type(TextTypeSimpleType.HEADING)
             elif block_type == PT.PULLOUT_TEXT:
                 region.set_type(TextTypeSimpleType.FLOATING)
             elif block_type == PT.CAPTION_TEXT:
                 region.set_type(TextTypeSimpleType.CAPTION)
             page.add_TextRegion(region)
             if self.parameter['sparse_text']:
                 region.set_type(TextTypeSimpleType.OTHER)
                 region.add_TextLine(
                     TextLineType(id=region.id + '_line', Coords=coords))
         elif block_type in [
                 PT.FLOWING_IMAGE, PT.HEADING_IMAGE, PT.PULLOUT_IMAGE
         ]:
             region = ImageRegionType(id=ID, Coords=coords)
             page.add_ImageRegion(region)
         elif block_type in [PT.HORZ_LINE, PT.VERT_LINE]:
             region = SeparatorRegionType(id=ID, Coords=coords)
             page.add_SeparatorRegion(region)
             # undo appending in ReadingOrder
             og.set_RegionRefIndexed(og.get_RegionRefIndexed()[:-1])
         elif block_type in [PT.INLINE_EQUATION, PT.EQUATION]:
             region = MathsRegionType(id=ID, Coords=coords)
             page.add_MathsRegion(region)
         elif block_type == PT.TABLE:
             # without API access to StructuredTable we cannot
             # do much for a TableRegionType (i.e. nrows, ncols,
             # coordinates of cells for recursive regions etc),
             # but this can be achieved afterwards by segment-table
             region = TableRegionType(id=ID, Coords=coords)
             page.add_TableRegion(region)
         else:
             region = NoiseRegionType(id=ID, Coords=coords)
             page.add_NoiseRegion()
             # undo appending in ReadingOrder
             og.set_RegionRefIndexed(og.get_RegionRefIndexed()[:-1])
         LOG.info("Detected region '%s': %s (%s)", ID, points,
                  membername(PT, block_type))
         #
         # iterator increment
         #
         index += 1
         it.Next(RIL.BLOCK)
     if (not og.get_RegionRefIndexed() and not og.get_OrderedGroupIndexed()
             and not og.get_UnorderedGroupIndexed()):
         # schema forbids empty OrderedGroup
         ro.set_OrderedGroup(None)
Exemplo n.º 8
0
    def _process_segment(self, page_image, page, page_xywh, page_id,
                         input_file, mask, dpi):
        LOG = getLogger('processor.AnybaseocrBlockSegmenter')
        # check for existing text regions and whether to overwrite them
        if page.get_TextRegion() or page.get_TableRegion():
            if self.parameter['overwrite']:
                LOG.info('removing existing text/table regions in page "%s"',
                         page_id)
                page.set_TextRegion([])
            else:
                LOG.warning('keeping existing text/table regions in page "%s"',
                            page_id)
        # check if border exists
        border_polygon = None
        if page.get_Border():
            border_coords = page.get_Border().get_Coords()
            border_points = polygon_from_points(border_coords.get_points())
            border_polygon = Polygon(border_points)

        LOG.info('detecting regions on page "%s"', page_id)
        img_array = ocrolib.pil2array(page_image)
        if len(img_array.shape) <= 2:
            img_array = np.stack((img_array, ) * 3, axis=-1)
        # convert to incidence matrix
        class_ids = np.array([[
            1 if category in self.parameter['active_classes'] else 0
            for category in CLASS_NAMES
        ]],
                             dtype=np.int32)
        results = self.mrcnn_model.detect([img_array],
                                          verbose=0,
                                          active_class_ids=class_ids)
        r = results[0]
        LOG.info('found %d candidates on page "%s"', len(r['rois']), page_id)

        th = self.parameter['th']
        # check for existing semgentation mask
        # this code executes only when the workflow had tiseg run before with use_deeplr=true
        if mask:
            mask = ocrolib.pil2array(mask)
            mask = mask // 255
            mask = 1 - mask
            # multiply all the bounding box part with 2
            for i in range(len(r['rois'])):

                min_y, min_x, max_y, max_x = r['rois'][i]
                mask[min_y:max_y, min_x:max_x] *= i + 2

            # check for left over pixels and add them to the bounding boxes
            pixel_added = True

            while pixel_added:

                pixel_added = False
                left_over = np.where(mask == 1)
                for y, x in zip(left_over[0], left_over[1]):
                    local_mask = mask[y - th:y + th, x - th:x + th]
                    candidates = np.where(local_mask > 1)
                    candidates = [k for k in zip(candidates[0], candidates[1])]
                    if len(candidates) > 0:
                        pixel_added = True
                        # find closest pixel with x>1
                        candidates.sort(key=lambda j: np.sqrt((j[0] - th)**2 +
                                                              (j[1] - th)**2))
                        index = local_mask[candidates[0]] - 2

                        # add pixel to mask/bbox
                        # y,x to bbox with index
                        if y < r['rois'][index][0]:
                            r['rois'][index][0] = y

                        elif y > r['rois'][index][2]:
                            r['rois'][index][2] = y

                        if x < r['rois'][index][1]:
                            r['rois'][index][1] = x

                        elif x > r['rois'][index][3]:
                            r['rois'][index][3] = x

                        # update the mask
                        mask[y, x] = index + 2

        for i in range(len(r['rois'])):
            class_id = r['class_ids'][i]
            if class_id >= len(CLASS_NAMES):
                raise Exception(
                    'Unexpected class id %d - model does not match' % class_id)

        # find hull contours on masks
        if self.parameter['use_masks']:
            r.setdefault('polygons', list())
            # estimate glyph scale (roughly)
            scale = int(dpi / 6)
            scale = scale + (scale + 1) % 2  # odd
            for i in range(len(r['rois'])):
                mask = r['masks'][:, :, i]
                mask = cv2.dilate(mask.astype(np.uint8),
                                  np.ones((scale, scale), np.uint8)) > 0
                # close mask until we have a single outer contour
                contours = None
                for _ in range(10):
                    mask = cv2.morphologyEx(
                        mask.astype(np.uint8), cv2.MORPH_CLOSE,
                        np.ones((scale, scale), np.uint8)) > 0
                    contours, _ = cv2.findContours(mask.astype(np.uint8),
                                                   cv2.RETR_EXTERNAL,
                                                   cv2.CHAIN_APPROX_SIMPLE)
                    if len(contours) == 1:
                        break
                r['polygons'].append(Polygon(
                    contours[0][:, 0, :]))  # already in x,y order

        # to reduce overlaps, apply IoU-based non-maximum suppression
        # (and other post-processing against overlaps) across classes,
        # but not on the raw pixels, but the smoothed hull polygons
        LOG.info('post-processing detections on page "%s"', page_id)
        worse = []
        if self.parameter['post_process']:
            active = True

            def _merge_rois(i, j):
                """merges i into j"""
                nonlocal r, active
                r['rois'][j][0] = min(r['rois'][i][0], r['rois'][j][0])
                r['rois'][j][1] = min(r['rois'][i][1], r['rois'][j][1])
                r['rois'][j][2] = max(r['rois'][i][2], r['rois'][j][2])
                r['rois'][j][3] = max(r['rois'][i][3], r['rois'][j][3])
                r['polygons'][j] = r['polygons'][i].union(r['polygons'][j])
                #r['scores'][j] = max(r['scores'][i], r['scores'][i])
                active = True

            # find overlapping pairs
            while active:
                active = False
                for i in range(len(r["class_ids"])):
                    if i in worse:
                        continue
                    for j in range(i + 1, len(r['class_ids'])):
                        if j in worse:
                            continue
                        iclass = r['class_ids'][i]
                        jclass = r['class_ids'][j]
                        iname = CLASS_NAMES[iclass]
                        jname = CLASS_NAMES[jclass]
                        if (iname == 'drop-capital') != (jname
                                                         == 'drop-capital'):
                            # ignore drop-capital overlapping with others
                            continue
                        # rs todo: lower priority for footnote?
                        if (r['rois'][i][1] > r['rois'][j][3]
                                or r['rois'][i][3] < r['rois'][j][1]
                                or r['rois'][i][0] > r['rois'][j][2]
                                or r['rois'][i][2] < r['rois'][j][0]):
                            # no overlap (cut)
                            continue
                        iscore = r['scores'][i]
                        jscore = r['scores'][j]
                        if not self.parameter['use_masks']:
                            LOG.debug(
                                "roi %d[%s] overlaps roi %d[%s] and %s (replacing)",
                                i, iname, j, jname,
                                "looses" if iscore < jscore else "wins")
                            if iscore < jscore:
                                worse.append(i)
                                break
                            else:
                                worse.append(j)
                                continue
                        # compare masks
                        ipoly = r['polygons'][i]
                        jpoly = r['polygons'][j]
                        isize = ipoly.area
                        jsize = jpoly.area
                        inter = ipoly.intersection(jpoly).area
                        union = ipoly.union(jpoly).area
                        # LOG.debug("%d/%d %dpx/%dpx shared %dpx overall %dpx",
                        #           i, j, isize, jsize, inter, union)
                        if inter / isize > self.parameter['min_share_drop']:
                            LOG.debug(
                                "roi %d[%s] contains roi %d[%s] (replacing)",
                                j, jname, i, iname)
                            worse.append(i)
                            break
                        elif inter / jsize > self.parameter['min_share_drop']:
                            LOG.debug(
                                "roi %d[%s] contains roi %d[%s] (replacing)",
                                i, iname, j, jname)
                            worse.append(j)
                        elif inter / union > self.parameter['min_iou_drop']:
                            LOG.debug(
                                "roi %d[%s] heavily overlaps roi %d[%s] and %s (replacing)",
                                i, iname, j, jname,
                                "looses" if iscore < jscore else "wins")
                            if iscore < jscore:
                                worse.append(i)
                                break
                            else:
                                worse.append(j)
                        elif inter / isize > self.parameter['min_share_merge']:
                            LOG.debug("roi %d[%s] covers roi %d[%s] (merging)",
                                      j, jname, i, iname)
                            worse.append(i)
                            _merge_rois(i, j)
                            break
                        elif inter / jsize > self.parameter['min_share_merge']:
                            LOG.debug("roi %d[%s] covers roi %d[%s] (merging)",
                                      i, iname, j, jname)
                            worse.append(j)
                            _merge_rois(j, i)
                        elif inter / union > self.parameter['min_iou_merge']:
                            LOG.debug(
                                "roi %d[%s] slightly overlaps roi %d[%s] and %s (merging)",
                                i, iname, j, jname,
                                "looses" if iscore < jscore else "wins")
                            if iscore < jscore:
                                worse.append(i)
                                _merge_rois(i, j)
                                break
                            else:
                                worse.append(j)
                                _merge_rois(j, i)

        # define reading order on basis of coordinates
        partial_order = np.zeros((len(r['rois']), len(r['rois'])), np.uint8)
        for i, (min_y_i, min_x_i, max_y_i, max_x_i) in enumerate(r['rois']):
            for j, (min_y_j, min_x_j, max_y_j,
                    max_x_j) in enumerate(r['rois']):
                if min_x_i < max_x_j and max_x_i > min_x_j:
                    # xoverlaps
                    if min_y_i < min_y_j:
                        partial_order[i, j] = 1
                else:
                    min_y = min(min_y_i, min_y_j)
                    max_y = max(max_y_i, max_y_j)
                    min_x = min(min_x_i, min_x_j)
                    max_x = max(max_x_i, max_x_j)
                    if next(
                        (False
                         for (min_y_k, min_x_k, max_y_k, max_x_k) in r['rois']
                         if (min_y_k < max_y and max_y_k > min_y
                             and min_x_k < max_x and max_x_k > min_x)), True):
                        # no k in between
                        if ((min_y_j + max_y_j) / 2 < min_y_i
                                and (min_y_i + max_y_i) / 2 > max_y_j):
                            # vertically unrelated
                            partial_order[j, i] = 1
                        elif max_x_i < min_x_j:
                            partial_order[i, j] = 1

        def _topsort(po):
            visited = np.zeros(po.shape[0], np.bool)
            result = list()

            def _visit(k):
                if visited[k]:
                    return
                visited[k] = True
                for l in np.nonzero(po[:, k])[0]:
                    _visit(l)
                result.append(k)

            for k in range(po.shape[0]):
                _visit(k)
            return result

        reading_order = _topsort(partial_order)

        # Creating Reading Order object in PageXML
        order_group = OrderedGroupType(caption="Regions reading order",
                                       id=page_id)
        reading_order_object = ReadingOrderType()
        reading_order_object.set_OrderedGroup(order_group)
        page.set_ReadingOrder(reading_order_object)

        for i in range(len(r['rois'])):
            width, height, _ = img_array.shape
            min_y, min_x, max_y, max_x = r['rois'][i]
            score = r['scores'][i]
            class_id = r['class_ids'][i]
            class_name = CLASS_NAMES[class_id]
            if i in worse:
                LOG.debug(
                    "Ignoring instance %d[%s] overlapping better/larger neighbour",
                    i, class_name)
                continue

            if self.parameter['use_masks']:
                region_polygon = r['polygons'][i].exterior.coords[:-1]
            else:
                region_polygon = polygon_from_bbox(
                    max(min_x - 5, 0) if class_name == 'paragraph' else min_x,
                    min_y,
                    min(max_x +
                        10, width) if class_name == 'paragraph' else max_x,
                    max_y)

            # convert to absolute coordinates
            region_polygon = coordinates_for_segment(region_polygon,
                                                     page_image, page_xywh)
            # intersect with parent and plausibilize
            cut_region_polygon = Polygon(region_polygon)
            if border_polygon:
                cut_region_polygon = border_polygon.intersection(
                    cut_region_polygon)
            if cut_region_polygon.is_empty:
                LOG.warning('region %d does not intersect page frame', i)
                continue
            if not cut_region_polygon.is_valid:
                LOG.warning('region %d has invalid polygon', i)
                continue
            region_polygon = cut_region_polygon.exterior.coords[:-1]
            region_coords = CoordsType(points_from_polygon(region_polygon),
                                       conf=score)
            read_order = reading_order.index(i)
            region_args = {
                'custom': 'readingOrder {index:' + str(read_order) + ';}',
                'id': 'region%04d' % i,
                'Coords': region_coords
            }
            if class_name == 'image':
                image_region = ImageRegionType(**region_args)
                page.add_ImageRegion(image_region)
            elif class_name == 'table':
                table_region = TableRegionType(**region_args)
                page.add_TableRegion(table_region)
            elif class_name == 'graphics':
                graphic_region = GraphicRegionType(**region_args)
                page.add_GraphicRegion(graphic_region)
            else:
                region_args['type_'] = class_name
                textregion = TextRegionType(**region_args)
                page.add_TextRegion(textregion)
            order_index = reading_order.index(i)
            regionRefIndex = RegionRefIndexedType(index=order_index,
                                                  regionRef=region_args['id'])
            order_group.add_RegionRefIndexed(regionRefIndex)
            LOG.info('added %s region on page "%s"', class_name, page_id)