示例#1
0
def test_alternative_image_additions():
    pcgts = PcGtsType(pcGtsId="foo")
    assert pcgts.pcGtsId == 'foo'

    # act
    # Page/AlternativeImage
    page = PageType()
    pcgts.set_Page(page)
    page.add_AlternativeImage(AlternativeImageType())
    # TextRegion/AlternativeImage
    region = TextRegionType()
    page.add_TextRegion(region)
    region.add_AlternativeImage(AlternativeImageType())
    # TextLine/AlternativeImage
    line = TextLineType()
    region.add_TextLine(line)
    line.add_AlternativeImage(AlternativeImageType())
    # Word/AlternativeImage
    word = WordType()
    line.add_Word(word)
    word.add_AlternativeImage(AlternativeImageType())
    # Glyph/AlternativeImage
    glyph = GlyphType()
    word.add_Glyph(glyph)
    glyph.add_AlternativeImage(AlternativeImageType())
示例#2
0
def page_from_image(input_file):
    """
    Create `OcrdPage </../../ocrd_models/ocrd_models.ocrd_page.html>`_
    from an `OcrdFile </../../ocrd_models/ocrd_models.ocrd_file.html>`_
    representing an image (i.e. should have ``mimetype`` starting with ``image/``).

    Arguments:
        * input_file (OcrdFile):
    """
    if not input_file.local_filename:
        raise ValueError("input_file must have 'local_filename' property")
    if not Path(input_file.local_filename).exists():
        raise FileNotFoundError("File not found: '%s' (%s)" %
                                (input_file.local_filename, input_file))
    exif = exif_from_filename(input_file.local_filename)
    now = datetime.now()
    return PcGtsType(
        Metadata=MetadataType(Creator="OCR-D/core %s" % VERSION,
                              Created=now,
                              LastChange=now),
        Page=PageType(
            imageWidth=exif.width,
            imageHeight=exif.height,
            # XXX brittle
            imageFilename=input_file.url
            if input_file.url is not None else input_file.local_filename))
示例#3
0
def page_from_image(input_file):
    """
    Create `OcrdPage </../../ocrd_models/ocrd_models.ocrd_page.html>`_
    from an `OcrdFile </../../ocrd_models/ocrd_models.ocrd_file.html>`_
    representing an image (i.e. should have ``mimetype`` starting with ``image/``).

    Arguments:
        * input_file (OcrdFile):
    """
    if input_file.local_filename is None:
        raise Exception("input_file must have 'local_filename' property")
    exif = exif_from_filename(input_file.local_filename)
    now = datetime.now()
    return PcGtsType(
        Metadata=MetadataType(
            Creator="OCR-D/core %s" % VERSION,
            Created=now,
            LastChange=now
        ),
        Page=PageType(
            imageWidth=exif.width,
            imageHeight=exif.height,
            # XXX brittle
            imageFilename=input_file.url if input_file.url is not None else 'file://' + input_file.local_filename
        )
    )
示例#4
0
    def render_all(self, pc_gts: PcGtsType) -> None:
        page: PageType = pc_gts.get_Page()
        self.render_type(page.get_PrintSpace())
        self.render_type(page.get_Border())

        def region_priority(region):
            # often, regions overlap; since we don't alpha-composite,
            # we should provide a useful default priority of what is
            # rendered last (and thus unoccluded); document order is
            # not helpful, because it depends on the workflow or geometry
            # instead of visibility concerns; thus, ensure that:
            # - SeparatorRegion is top-most
            # - TextRegion is next
            # - all others are next
            if isinstance(region, SeparatorRegionType):
                return 0
            if isinstance(region, TextRegionType):
                return -1
            return -2

        for region_ds in sorted(page.get_AllRegions(), key=region_priority):
            self.render_type(region_ds)

        if self.features & Feature.ORDER:
            last_point: Optional[Point] = None
            for region_ds in page.get_AllRegions(order='reading-order-only'):
                region = self.region_factory.create(region_ds)
                new_point = region.poly.representative_point()
                if last_point:
                    self.operations.append(
                        ArrowOperation(last_point,
                                       new_point,
                                       color='#FF0000CF'))
                last_point = new_point
示例#5
0
def create_page_xml(imageFilename, height, width):
    now = datetime.now()
    pcgts = PcGtsType(Metadata=MetadataType(Creator='SBB_QURATOR',
                                            Created=now,
                                            LastChange=now),
                      Page=PageType(imageWidth=str(width),
                                    imageHeight=str(height),
                                    imageFilename=imageFilename,
                                    readingDirection='left-to-right',
                                    textLineOrder='top-to-bottom'))
    return pcgts
示例#6
0
 def test_alternativeImage(self):
     pcgts = PcGtsType(pcGtsId="foo")
     self.assertEqual(pcgts.pcGtsId, 'foo')
     # Page/AlternativeImage
     page = PageType()
     pcgts.set_Page(page)
     page.add_AlternativeImage(AlternativeImageType())
     # TextRegion/AlternativeImage
     region = TextRegionType()
     page.add_TextRegion(region)
     region.add_AlternativeImage(AlternativeImageType())
     # TextLine/AlternativeImage
     line = TextLineType()
     region.add_TextLine(line)
     line.add_AlternativeImage(AlternativeImageType())
     # Word/AlternativeImage
     word = WordType()
     line.add_Word(word)
     word.add_AlternativeImage(AlternativeImageType())
     # Glyph/AlternativeImage
     glyph = GlyphType()
     word.add_Glyph(glyph)
     glyph.add_AlternativeImage(AlternativeImageType())
示例#7
0
def page_from_image(input_file, with_tree=False):
    """
    Create :py:class:`~ocrd_models.ocrd_page.OcrdPage`
    from an :py:class:`~ocrd_models.ocrd_file.OcrdFile`
    representing an image (i.e. should have ``@mimetype`` starting with ``image/``).

    Arguments:
        input_file (:py:class:`~ocrd_models.ocrd_file.OcrdFile`): file to open \
            and produce a PAGE DOM for
    Keyword arguments:
        with_tree (boolean): whether to return XML node tree, element-node mapping \
            and reverse mapping, too (cf. :py:func:`ocrd_models.ocrd_page.parseEtree`)
    """
    if not input_file.local_filename:
        raise ValueError("input_file must have 'local_filename' property")
    if not Path(input_file.local_filename).exists():
        raise FileNotFoundError("File not found: '%s' (%s)" %
                                (input_file.local_filename, input_file))
    exif = exif_from_filename(input_file.local_filename)
    now = datetime.now()
    pcgts = PcGtsType(
        Metadata=MetadataType(Creator="OCR-D/core %s" % VERSION,
                              Created=now,
                              LastChange=now),
        Page=PageType(
            imageWidth=exif.width,
            imageHeight=exif.height,
            # XXX brittle
            imageFilename=input_file.url
            if input_file.url is not None else input_file.local_filename),
        pcGtsId=input_file.ID)
    if not with_tree:
        return pcgts
    mapping = dict()
    etree = pcgts.to_etree(mapping_=mapping)
    revmap = dict(((node, element) for element, node in mapping.items()))
    return pcgts, etree, mapping, revmap