Exemplo n.º 1
0
def page_from_image(input_file):
    """
    Create `OcrdPage </../../ocrd_models/ocrd_models.ocrd_page.html>`_
    from an `OcrdFile </../../ocrd_models/ocrd_models.ocrd_file.html>`_
    representing an image (i.e. should have ``mimetype`` starting with ``image/``).

    Arguments:
        * input_file (OcrdFile):
    """
    if not input_file.local_filename:
        raise ValueError("input_file must have 'local_filename' property")
    if not Path(input_file.local_filename).exists():
        raise FileNotFoundError("File not found: '%s' (%s)" %
                                (input_file.local_filename, input_file))
    exif = exif_from_filename(input_file.local_filename)
    now = datetime.now()
    return PcGtsType(
        Metadata=MetadataType(Creator="OCR-D/core %s" % VERSION,
                              Created=now,
                              LastChange=now),
        Page=PageType(
            imageWidth=exif.width,
            imageHeight=exif.height,
            # XXX brittle
            imageFilename=input_file.url
            if input_file.url is not None else input_file.local_filename))
Exemplo n.º 2
0
def page_from_image(input_file):
    """
    Create `OcrdPage </../../ocrd_models/ocrd_models.ocrd_page.html>`_
    from an `OcrdFile </../../ocrd_models/ocrd_models.ocrd_file.html>`_
    representing an image (i.e. should have ``mimetype`` starting with ``image/``).

    Arguments:
        * input_file (OcrdFile):
    """
    if input_file.local_filename is None:
        raise Exception("input_file must have 'local_filename' property")
    exif = exif_from_filename(input_file.local_filename)
    now = datetime.now()
    return PcGtsType(
        Metadata=MetadataType(
            Creator="OCR-D/core %s" % VERSION,
            Created=now,
            LastChange=now
        ),
        Page=PageType(
            imageWidth=exif.width,
            imageHeight=exif.height,
            # XXX brittle
            imageFilename=input_file.url if input_file.url is not None else 'file://' + input_file.local_filename
        )
    )
Exemplo n.º 3
0
def create_page_xml(imageFilename, height, width):
    now = datetime.now()
    pcgts = PcGtsType(Metadata=MetadataType(Creator='SBB_QURATOR',
                                            Created=now,
                                            LastChange=now),
                      Page=PageType(imageWidth=str(width),
                                    imageHeight=str(height),
                                    imageFilename=imageFilename,
                                    readingDirection='left-to-right',
                                    textLineOrder='top-to-bottom'))
    return pcgts
Exemplo n.º 4
0
def test_alternative_image_additions():
    pcgts = PcGtsType(pcGtsId="foo")
    assert pcgts.pcGtsId == 'foo'

    # act
    # Page/AlternativeImage
    page = PageType()
    pcgts.set_Page(page)
    page.add_AlternativeImage(AlternativeImageType())
    # TextRegion/AlternativeImage
    region = TextRegionType()
    page.add_TextRegion(region)
    region.add_AlternativeImage(AlternativeImageType())
    # TextLine/AlternativeImage
    line = TextLineType()
    region.add_TextLine(line)
    line.add_AlternativeImage(AlternativeImageType())
    # Word/AlternativeImage
    word = WordType()
    line.add_Word(word)
    word.add_AlternativeImage(AlternativeImageType())
    # Glyph/AlternativeImage
    glyph = GlyphType()
    word.add_Glyph(glyph)
    glyph.add_AlternativeImage(AlternativeImageType())
Exemplo n.º 5
0
def page_from_image(input_file, with_tree=False):
    """
    Create :py:class:`~ocrd_models.ocrd_page.OcrdPage`
    from an :py:class:`~ocrd_models.ocrd_file.OcrdFile`
    representing an image (i.e. should have ``@mimetype`` starting with ``image/``).

    Arguments:
        input_file (:py:class:`~ocrd_models.ocrd_file.OcrdFile`): file to open \
            and produce a PAGE DOM for
    Keyword arguments:
        with_tree (boolean): whether to return XML node tree, element-node mapping \
            and reverse mapping, too (cf. :py:func:`ocrd_models.ocrd_page.parseEtree`)
    """
    if not input_file.local_filename:
        raise ValueError("input_file must have 'local_filename' property")
    if not Path(input_file.local_filename).exists():
        raise FileNotFoundError("File not found: '%s' (%s)" %
                                (input_file.local_filename, input_file))
    exif = exif_from_filename(input_file.local_filename)
    now = datetime.now()
    pcgts = PcGtsType(
        Metadata=MetadataType(Creator="OCR-D/core %s" % VERSION,
                              Created=now,
                              LastChange=now),
        Page=PageType(
            imageWidth=exif.width,
            imageHeight=exif.height,
            # XXX brittle
            imageFilename=input_file.url
            if input_file.url is not None else input_file.local_filename),
        pcGtsId=input_file.ID)
    if not with_tree:
        return pcgts
    mapping = dict()
    etree = pcgts.to_etree(mapping_=mapping)
    revmap = dict(((node, element) for element, node in mapping.items()))
    return pcgts, etree, mapping, revmap
Exemplo n.º 6
0
 def test_alternativeImage(self):
     pcgts = PcGtsType(pcGtsId="foo")
     self.assertEqual(pcgts.pcGtsId, 'foo')
     # Page/AlternativeImage
     page = PageType()
     pcgts.set_Page(page)
     page.add_AlternativeImage(AlternativeImageType())
     # TextRegion/AlternativeImage
     region = TextRegionType()
     page.add_TextRegion(region)
     region.add_AlternativeImage(AlternativeImageType())
     # TextLine/AlternativeImage
     line = TextLineType()
     region.add_TextLine(line)
     line.add_AlternativeImage(AlternativeImageType())
     # Word/AlternativeImage
     word = WordType()
     line.add_Word(word)
     word.add_AlternativeImage(AlternativeImageType())
     # Glyph/AlternativeImage
     glyph = GlyphType()
     word.add_Glyph(glyph)
     glyph.add_AlternativeImage(AlternativeImageType())