def test_hashable(): """ https://github.com/OCR-D/ocrd_segment/issues/45 """ pcgts = page_from_image(create_ocrd_file_with_defaults(url=assets.path_to('kant_aufklaerung_1784/data/OCR-D-IMG/INPUT_0017.tif'))) page = pcgts.get_Page() testset = set() testset.add(pcgts) testset.add(page)
def test_serialize_no_empty_readingorder(): """ https://github.com/OCR-D/core/issues/602 """ pcgts = page_from_image(create_ocrd_file_with_defaults(url=assets.path_to('kant_aufklaerung_1784/data/OCR-D-IMG/INPUT_0017.tif'))) pcgts.get_Page().set_ReadingOrder(ReadingOrderType()) assert pcgts.get_Page().get_ReadingOrder() pcgts = parseString(to_xml(pcgts, skip_declaration=True)) assert not pcgts.get_Page().get_ReadingOrder()
def convert(cocofile, directory): """Convert MS-COCO JSON to METS/PAGE XML files. Load JSON ``cocofile`` (in MS-COCO format) and chdir to ``directory`` (which it refers to). Start a METS file mets.xml with references to the image files (under fileGrp ``OCR-D-IMG``) and their corresponding PAGE-XML annotations (under fileGrp ``OCR-D-GT-SEG-BLOCK``), as parsed from ``cocofile`` and written using the same basename. """ resolver = Resolver() with pushd_popd(directory): workspace = resolver.workspace_from_nothing('.') # https://github.com/ibm-aur-nlp/PubLayNet workspace.mets.unique_identifier = 'ocrd_PubLayNet_' + directory coco = json.load(cocofile) LOG.info('Loaded JSON for %d images with %d regions in %d categories', len(coco['images']), len(coco['annotations']), len(coco['categories'])) categories = dict() for cat in coco['categories']: categories[cat['id']] = cat['name'] images = dict() for image in coco['images']: images[image['id']] = image for annotation in coco['annotations']: image = images[annotation['image_id']] regions = image.setdefault('regions', list()) regions.append(annotation) del coco LOG.info('Parsing annotations into PAGE-XML') for image in images.values(): page_id = 'p' + str(image['id']) file_base, file_ext = os.path.splitext(image['file_name']) filename = file_base + '.xml' image_file = workspace.add_file('OCR-D-IMG', ID='OCR-D-IMG_' + page_id, pageId=page_id, mimetype=EXT_TO_MIME[file_ext], local_filename=image['file_name']) LOG.info('Added page %s file %s of type %s', image_file.pageId, image_file.local_filename, image_file.mimetype) pcgts = page_from_image(image_file) pcgts.set_pcGtsId(page_id) page = pcgts.get_Page() assert page.imageWidth == image['width'] assert page.imageHeight == image['height'] for region in image['regions']: polygon = np.array(region['segmentation']) polygon = np.reshape(polygon, (polygon.shape[1] // 2, 2)) coords = CoordsType(points=points_from_polygon(polygon)) category = categories[region['category_id']] region_id = 'r' + str(region['id']) if category == 'text': region_obj = TextRegionType( id=region_id, Coords=coords, type_=TextTypeSimpleType.PARAGRAPH) page.add_TextRegion(region_obj) elif category == 'title': region_obj = TextRegionType( id=region_id, Coords=coords, type_=TextTypeSimpleType.HEADING) # CAPTION? page.add_TextRegion(region_obj) elif category == 'list': region_obj = TextRegionType( id=region_id, Coords=coords, type_=TextTypeSimpleType.LISTLABEL) # OTHER? page.add_TextRegion(region_obj) elif category == 'table': region_obj = TableRegionType(id=region_id, Coords=coords) page.add_TableRegion(region_obj) elif category == 'figure': region_obj = ImageRegionType(id=region_id, Coords=coords) page.add_ImageRegion(region_obj) else: raise Exception('unknown image category: %s' % category) page_file = workspace.add_file('OCR-D-GT-SEG-BLOCK', ID='OCR-D-GT-SEG-BLOCK_' + page_id, pageId=page_id, mimetype=MIMETYPE_PAGE, local_filename=filename, content=to_xml(pcgts)) LOG.info('Added page %s file %s with %d regions', page_file.pageId, page_file.local_filename, len(image['regions'])) LOG.info('All done') workspace.save_mets()