def test_alternative_image_additions(): pcgts = PcGtsType(pcGtsId="foo") assert pcgts.pcGtsId == 'foo' # act # Page/AlternativeImage page = PageType() pcgts.set_Page(page) page.add_AlternativeImage(AlternativeImageType()) # TextRegion/AlternativeImage region = TextRegionType() page.add_TextRegion(region) region.add_AlternativeImage(AlternativeImageType()) # TextLine/AlternativeImage line = TextLineType() region.add_TextLine(line) line.add_AlternativeImage(AlternativeImageType()) # Word/AlternativeImage word = WordType() line.add_Word(word) word.add_AlternativeImage(AlternativeImageType()) # Glyph/AlternativeImage glyph = GlyphType() word.add_Glyph(glyph) glyph.add_AlternativeImage(AlternativeImageType())
def page_from_image(input_file): """ Create `OcrdPage </../../ocrd_models/ocrd_models.ocrd_page.html>`_ from an `OcrdFile </../../ocrd_models/ocrd_models.ocrd_file.html>`_ representing an image (i.e. should have ``mimetype`` starting with ``image/``). Arguments: * input_file (OcrdFile): """ if not input_file.local_filename: raise ValueError("input_file must have 'local_filename' property") if not Path(input_file.local_filename).exists(): raise FileNotFoundError("File not found: '%s' (%s)" % (input_file.local_filename, input_file)) exif = exif_from_filename(input_file.local_filename) now = datetime.now() return PcGtsType( Metadata=MetadataType(Creator="OCR-D/core %s" % VERSION, Created=now, LastChange=now), Page=PageType( imageWidth=exif.width, imageHeight=exif.height, # XXX brittle imageFilename=input_file.url if input_file.url is not None else input_file.local_filename))
def page_from_image(input_file): """ Create `OcrdPage </../../ocrd_models/ocrd_models.ocrd_page.html>`_ from an `OcrdFile </../../ocrd_models/ocrd_models.ocrd_file.html>`_ representing an image (i.e. should have ``mimetype`` starting with ``image/``). Arguments: * input_file (OcrdFile): """ if input_file.local_filename is None: raise Exception("input_file must have 'local_filename' property") exif = exif_from_filename(input_file.local_filename) now = datetime.now() return PcGtsType( Metadata=MetadataType( Creator="OCR-D/core %s" % VERSION, Created=now, LastChange=now ), Page=PageType( imageWidth=exif.width, imageHeight=exif.height, # XXX brittle imageFilename=input_file.url if input_file.url is not None else 'file://' + input_file.local_filename ) )
def render_all(self, pc_gts: PcGtsType) -> None: page: PageType = pc_gts.get_Page() self.render_type(page.get_PrintSpace()) self.render_type(page.get_Border()) def region_priority(region): # often, regions overlap; since we don't alpha-composite, # we should provide a useful default priority of what is # rendered last (and thus unoccluded); document order is # not helpful, because it depends on the workflow or geometry # instead of visibility concerns; thus, ensure that: # - SeparatorRegion is top-most # - TextRegion is next # - all others are next if isinstance(region, SeparatorRegionType): return 0 if isinstance(region, TextRegionType): return -1 return -2 for region_ds in sorted(page.get_AllRegions(), key=region_priority): self.render_type(region_ds) if self.features & Feature.ORDER: last_point: Optional[Point] = None for region_ds in page.get_AllRegions(order='reading-order-only'): region = self.region_factory.create(region_ds) new_point = region.poly.representative_point() if last_point: self.operations.append( ArrowOperation(last_point, new_point, color='#FF0000CF')) last_point = new_point
def create_page_xml(imageFilename, height, width): now = datetime.now() pcgts = PcGtsType(Metadata=MetadataType(Creator='SBB_QURATOR', Created=now, LastChange=now), Page=PageType(imageWidth=str(width), imageHeight=str(height), imageFilename=imageFilename, readingDirection='left-to-right', textLineOrder='top-to-bottom')) return pcgts
def test_alternativeImage(self): pcgts = PcGtsType(pcGtsId="foo") self.assertEqual(pcgts.pcGtsId, 'foo') # Page/AlternativeImage page = PageType() pcgts.set_Page(page) page.add_AlternativeImage(AlternativeImageType()) # TextRegion/AlternativeImage region = TextRegionType() page.add_TextRegion(region) region.add_AlternativeImage(AlternativeImageType()) # TextLine/AlternativeImage line = TextLineType() region.add_TextLine(line) line.add_AlternativeImage(AlternativeImageType()) # Word/AlternativeImage word = WordType() line.add_Word(word) word.add_AlternativeImage(AlternativeImageType()) # Glyph/AlternativeImage glyph = GlyphType() word.add_Glyph(glyph) glyph.add_AlternativeImage(AlternativeImageType())
def page_from_image(input_file, with_tree=False): """ Create :py:class:`~ocrd_models.ocrd_page.OcrdPage` from an :py:class:`~ocrd_models.ocrd_file.OcrdFile` representing an image (i.e. should have ``@mimetype`` starting with ``image/``). Arguments: input_file (:py:class:`~ocrd_models.ocrd_file.OcrdFile`): file to open \ and produce a PAGE DOM for Keyword arguments: with_tree (boolean): whether to return XML node tree, element-node mapping \ and reverse mapping, too (cf. :py:func:`ocrd_models.ocrd_page.parseEtree`) """ if not input_file.local_filename: raise ValueError("input_file must have 'local_filename' property") if not Path(input_file.local_filename).exists(): raise FileNotFoundError("File not found: '%s' (%s)" % (input_file.local_filename, input_file)) exif = exif_from_filename(input_file.local_filename) now = datetime.now() pcgts = PcGtsType( Metadata=MetadataType(Creator="OCR-D/core %s" % VERSION, Created=now, LastChange=now), Page=PageType( imageWidth=exif.width, imageHeight=exif.height, # XXX brittle imageFilename=input_file.url if input_file.url is not None else input_file.local_filename), pcGtsId=input_file.ID) if not with_tree: return pcgts mapping = dict() etree = pcgts.to_etree(mapping_=mapping) revmap = dict(((node, element) for element, node in mapping.items())) return pcgts, etree, mapping, revmap