def test_alternative_image_additions(): pcgts = PcGtsType(pcGtsId="foo") assert pcgts.pcGtsId == 'foo' # act # Page/AlternativeImage page = PageType() pcgts.set_Page(page) page.add_AlternativeImage(AlternativeImageType()) # TextRegion/AlternativeImage region = TextRegionType() page.add_TextRegion(region) region.add_AlternativeImage(AlternativeImageType()) # TextLine/AlternativeImage line = TextLineType() region.add_TextLine(line) line.add_AlternativeImage(AlternativeImageType()) # Word/AlternativeImage word = WordType() line.add_Word(word) word.add_AlternativeImage(AlternativeImageType()) # Glyph/AlternativeImage glyph = GlyphType() word.add_Glyph(glyph) glyph.add_AlternativeImage(AlternativeImageType())
def _process_segment(self, page_image, page, page_xywh, page_id, input_file, n, mrcnn_model, class_names): img_array = ocrolib.pil2array(page_image) results = mrcnn_model.detect([img_array], verbose=1) r = results[0] page_xywh['features'] += ',blksegmented' for i in range(len(r['rois'])): width, height, _ = img_array.shape min_x = r['rois'][i][0] min_y = r['rois'][i][1] max_x = r['rois'][i][2] max_y = r['rois'][i][3] #small post-processing incase of paragrapgh to not cut last alphabets if (min_x - 5) > width and r['class_ids'][i] == 2: min_x -= 5 if (max_x + 10) < width and r['class_ids'][i] == 2: min_x += 10 # this can be tested, provided whether we need previous comments or not? region_img = img_array[min_x:max_x, min_y: max_y] #extract from points and img_array region_img = ocrolib.array2pil(region_img) file_id = input_file.ID.replace(self.input_file_grp, self.image_grp) if file_id == input_file.ID: file_id = concat_padded(self.image_grp, n) file_path = self.workspace.save_image_file(region_img, file_id + "_" + str(i), page_id=page_id, file_grp=self.image_grp) ai = AlternativeImageType(filename=file_path, comments=page_xywh['features']) coords = CoordsType( "%i,%i %i,%i %i,%i %i,%i" % (min_x, min_y, max_x, min_y, max_x, max_y, min_x, max_y)) textregion = TextRegionType(Coords=coords, type_=class_names[r['class_ids'][i]]) textregion.add_AlternativeImage(ai) page.add_TextRegion(textregion)
def test_alternativeImage(self): pcgts = PcGtsType(pcGtsId="foo") self.assertEqual(pcgts.pcGtsId, 'foo') # Page/AlternativeImage page = PageType() pcgts.set_Page(page) page.add_AlternativeImage(AlternativeImageType()) # TextRegion/AlternativeImage region = TextRegionType() page.add_TextRegion(region) region.add_AlternativeImage(AlternativeImageType()) # TextLine/AlternativeImage line = TextLineType() region.add_TextLine(line) line.add_AlternativeImage(AlternativeImageType()) # Word/AlternativeImage word = WordType() line.add_Word(word) word.add_AlternativeImage(AlternativeImageType()) # Glyph/AlternativeImage glyph = GlyphType() word.add_Glyph(glyph) glyph.add_AlternativeImage(AlternativeImageType())
def test_deskewing(plain_workspace): #from ocrd_utils import initLogging, setOverrideLogLevel #setOverrideLogLevel('DEBUG') size = (3000, 4000) poly = [[1403, 2573], [1560, 2573], [1560, 2598], [2311, 2598], [2311, 2757], [2220, 2757], [2220, 2798], [2311, 2798], [2311, 2908], [1403, 2908]] xywh = xywh_from_polygon(poly) bbox = bbox_from_polygon(poly) skew = 4.625 image = Image.new('L', size) image = polygon_mask(image, poly) #image.show(title='image') pixels = np.count_nonzero(np.array(image) > 0) name = 'foo0' assert plain_workspace.save_image_file(image, name, 'IMG') pcgts = page_from_file(next(plain_workspace.mets.find_files(ID=name))) page = pcgts.get_Page() region = TextRegionType( id='nonrect', Coords=CoordsType(points=points_from_polygon(poly)), orientation=-skew) page.add_TextRegion(region) page_image, page_coords, _ = plain_workspace.image_from_page(page, '') #page_image.show(title='page_image') assert list(image.getdata()) == list(page_image.getdata()) assert np.all(page_coords['transform'] == np.eye(3)) reg_image, reg_coords = plain_workspace.image_from_segment( region, page_image, page_coords, feature_filter='deskewed', fill=0) assert list(image.crop(bbox).getdata()) == list(reg_image.getdata()) assert reg_image.width == xywh['w'] == 908 assert reg_image.height == xywh['h'] == 335 assert reg_coords['transform'][0, 2] == -xywh['x'] assert reg_coords['transform'][1, 2] == -xywh['y'] # same fg after cropping to minimal bbox reg_pixels = np.count_nonzero(np.array(reg_image) > 0) assert pixels == reg_pixels # now with deskewing (test for size after recropping) reg_image, reg_coords = plain_workspace.image_from_segment(region, page_image, page_coords, fill=0) #reg_image.show(title='reg_image') assert reg_image.width == 932 > xywh['w'] assert reg_image.height == 382 > xywh['h'] assert reg_coords['transform'][0, 1] != 0 assert reg_coords['transform'][1, 0] != 0 assert 'deskewed' in reg_coords['features'] # same fg after cropping to minimal bbox (roughly - due to aliasing) reg_pixels = np.count_nonzero(np.array(reg_image) > 0) assert np.abs(pixels - reg_pixels) / pixels < 0.005 reg_array = np.array(reg_image) > 0 # now via AlternativeImage path = plain_workspace.save_image_file(reg_image, region.id + '_img', 'IMG') region.add_AlternativeImage( AlternativeImageType(filename=path, comments=reg_coords['features'])) logger_capture = FIFOIO(256) logger_handler = logging.StreamHandler(logger_capture) #logger_handler.setFormatter(logging.Formatter(fmt=LOG_FORMAT, datefmt=LOG_TIMEFMT)) logger = logging.getLogger('ocrd_utils.crop_image') logger.addHandler(logger_handler) reg_image2, reg_coords2 = plain_workspace.image_from_segment(region, page_image, page_coords, fill=0) #reg_image2.show(title='reg_image2') logger_output = logger_capture.getvalue() logger_capture.close() assert logger_output == '' assert reg_image2.width == reg_image.width assert reg_image2.height == reg_image.height assert np.allclose(reg_coords2['transform'], reg_coords['transform']) assert reg_coords2['features'] == reg_coords['features'] # same fg after cropping to minimal bbox (roughly - due to aliasing) reg_pixels2 = np.count_nonzero(np.array(reg_image) > 0) assert reg_pixels2 == reg_pixels reg_array2 = np.array(reg_image2) > 0 assert 0.98 < np.sum(reg_array == reg_array2) / reg_array.size <= 1.0