Пример #1
0
def test_add_group():
    mets = OcrdMets.empty_mets()
    assert len(mets.file_groups) == 0, '0 file groups'
    mets.add_file_group('TEST')
    assert len(mets.file_groups) == 1, '1 file groups'
    mets.add_file_group('TEST')
    assert len(mets.file_groups) == 1, '1 file groups'
Пример #2
0
 def test_add_group(self):
     mets = OcrdMets.empty_mets()
     self.assertEqual(len(mets.file_groups), 0, '0 file groups')
     mets.add_file_group('TEST')
     self.assertEqual(len(mets.file_groups), 1, '1 file groups')
     mets.add_file_group('TEST')
     self.assertEqual(len(mets.file_groups), 1, '1 file groups')
Пример #3
0
 def test_add_file(self):
     mets = OcrdMets.empty_mets()
     self.assertEqual(len(mets.file_groups), 0, '0 file groups')
     self.assertEqual(len(mets.find_files(fileGrp='OUTPUT')), 0,
                      '0 files in "OUTPUT"')
     f = mets.add_file('OUTPUT',
                       ID="foo123",
                       mimetype="bla/quux",
                       pageId="foobar")
     f2 = mets.add_file('OUTPUT',
                        ID="foo1232",
                        mimetype="bla/quux",
                        pageId="foobar")
     self.assertEqual(f.pageId, 'foobar', 'pageId set')
     self.assertEqual(len(mets.file_groups), 1, '1 file groups')
     self.assertEqual(len(mets.find_files(fileGrp='OUTPUT')), 2,
                      '2 files in "OUTPUT"')
     mets.set_physical_page_for_file('barfoo',
                                     f,
                                     order='300',
                                     orderlabel="page 300")
     self.assertEqual(f.pageId, 'barfoo', 'pageId changed')
     mets.set_physical_page_for_file('quux',
                                     f2,
                                     order='302',
                                     orderlabel="page 302")
     self.assertEqual(f2.pageId, 'quux', 'pageId changed')
     mets.set_physical_page_for_file('barfoo',
                                     f2,
                                     order='301',
                                     orderlabel="page 301")
     self.assertEqual(f2.pageId, 'barfoo', 'pageId changed')
     self.assertEqual(len(mets.file_groups), 1, '1 file group')
Пример #4
0
    def workspace_from_nothing(self,
                               directory,
                               mets_basename='mets.xml',
                               clobber_mets=False):
        """
        Create an empty workspace.

        Arguments:
            directory (string): Target directory for the workspace. \
                If ``none``, create a temporary directory under :py:data:`ocrd.constants.TMP_PREFIX`. \
                (The resulting path can be retrieved via :py:attr:`ocrd.Workspace.directory`.)
        Keyword Arguments:
            clobber_mets (boolean, False): Whether to overwrite existing ``mets.xml``. \
                By default existing ``mets.xml`` will raise an exception.

        Returns:
            a new :py:class:`~ocrd.workspace.Workspace`
        """
        log = getLogger('ocrd.resolver.workspace_from_nothing')
        if directory is None:
            directory = mkdtemp(prefix=TMP_PREFIX)
        Path(directory).mkdir(parents=True, exist_ok=True)
        mets_path = Path(directory, mets_basename)
        if mets_path.exists() and not clobber_mets:
            raise FileExistsError(
                "METS '%s' already exists in '%s' and clobber_mets not set." %
                (mets_basename, directory))
        mets = OcrdMets.empty_mets()
        log.info("Writing METS to %s", mets_path)
        mets_path.write_bytes(mets.to_xml(xmllint=True))

        return Workspace(self, directory, mets, mets_basename=mets_basename)
Пример #5
0
def test_unique_identifier_from_nothing():
    mets = OcrdMets.empty_mets(datetime.now().isoformat())
    assert mets.unique_identifier == None, 'no identifier'
    mets.unique_identifier = 'foo'
    assert mets.unique_identifier == 'foo', 'Right identifier after change is "foo"'
    as_string = mets.to_xml().decode('utf-8')
    assert 'ocrd/core v%s' % VERSION in as_string
    assert 'CREATEDATE="%04u-%02u-%02uT' % (datetime.now().year, datetime.now().month, datetime.now().day,) in as_string
Пример #6
0
 def test_page_from_file_no_existe(self):
     with self.assertRaisesRegex(FileNotFoundError,
                                 "File not found: 'no-existe'"):
         mets = OcrdMets.empty_mets()
         ocrd_file = mets.add_file('FOO',
                                   ID='foo',
                                   local_filename='no-existe',
                                   mimetype='foo/bar')
         page_from_file(ocrd_file)
Пример #7
0
def test_fptr_changed_for_change_id():
    mets = OcrdMets.empty_mets()
    f1 = mets.add_file('FOO',
                       ID='FOO_1',
                       mimetype='image/tiff',
                       pageId='p0001')
    assert mets.get_physical_pages(for_fileIds=['FOO_1']) == ['p0001']
    f1.ID = 'BAZ_1'
    assert mets.get_physical_pages(for_fileIds=['FOO_1']) == [None]
    assert mets.get_physical_pages(for_fileIds=['BAZ_1']) == ['p0001']
Пример #8
0
def test_ocrd_file_equality():
    mets = OcrdMets.empty_mets()
    f1 = mets.add_file('FOO', ID='FOO_1', mimetype='image/tiff')
    f2 = mets.add_file('FOO', ID='FOO_2', mimetype='image/tiff')
    assert f1 != f2
    f3 = create_ocrd_file_with_defaults(ID='TEMP_1', mimetype='image/tiff')
    f4 = create_ocrd_file_with_defaults(ID='TEMP_1', mimetype='image/tif')
    # be tolerant of different equivalent mimetypes
    assert f3 == f4
    f5 = mets.add_file('TEMP', ID='TEMP_1', mimetype='image/tiff')
    assert f3 == f5
Пример #9
0
 def test_unique_identifier_from_nothing(self):
     mets = OcrdMets.empty_mets(datetime.now().isoformat())
     self.assertEqual(mets.unique_identifier, None, 'no identifier')
     mets.unique_identifier = 'foo'
     self.assertEqual(mets.unique_identifier, 'foo', 'Right identifier after change')
     as_string = mets.to_xml().decode('utf-8')
     self.assertIn('ocrd/core v%s' % VERSION, as_string)
     self.assertIn('CREATEDATE="%04u-%02u-%02uT' % (
         datetime.now().year,
         datetime.now().month,
         datetime.now().day,
     ), as_string)
Пример #10
0
 def test_make_file_id_744(self):
     """
     https://github.com/OCR-D/core/pull/744
     > Often file IDs have two numbers, one of which will clash. In that case only the numerical fallback works.
     """
     mets = OcrdMets.empty_mets()
     f = mets.add_file('GRP2',
                       ID='img1796-97_00000024_img',
                       pageId='phys0024')
     f = mets.add_file('GRP2',
                       ID='img1796-97_00000025_img',
                       pageId='phys0025')
     self.assertEqual(make_file_id(f, 'GRP2'), 'GRP2_0002')
Пример #11
0
 def test_make_file_id_mets(self):
     mets = OcrdMets.empty_mets()
     for i in range(1, 10):
         mets.add_file('FOO', ID="FOO_%04d" % (i), mimetype="image/tiff")
         mets.add_file('BAR', ID="BAR_%04d" % (i), mimetype="image/tiff")
     self.assertEqual(make_file_id(mets.find_files(ID='BAR_0007')[0], 'FOO'), 'FOO_0007')
     f = mets.add_file('ABC', ID="BAR_7", mimetype="image/tiff")
     self.assertEqual(make_file_id(f, 'FOO'), 'FOO_0010')
     mets.remove_file(fileGrp='FOO')
     self.assertEqual(make_file_id(f, 'FOO'), 'FOO_0001')
     mets.add_file('FOO', ID="FOO_0001", mimetype="image/tiff")
     # print('\n'.join(['%s' % of for of in mets.find_files()]))
     self.assertEqual(make_file_id(f, 'FOO'), 'FOO_0002')
Пример #12
0
 def test_ocrd_file_eq(self):
     mets = OcrdMets.empty_mets()
     f1 = mets.add_file('FOO', ID='FOO_1', mimetype='image/tiff')
     self.assertEqual(f1 == f1, True)
     self.assertEqual(f1 != f1, False)
     f2 = mets.add_file('FOO', ID='FOO_2', mimetype='image/tiff')
     self.assertEqual(f1 == f2, False)
     f3 = create_ocrd_file_with_defaults(ID='TEMP_1', mimetype='image/tiff')
     f4 = create_ocrd_file_with_defaults(ID='TEMP_1', mimetype='image/tif')
     # be tolerant of different equivalent mimetypes
     self.assertEqual(f3 == f4, True)
     f5 = mets.add_file('TEMP', ID='TEMP_1', mimetype='image/tiff')
     self.assertEqual(f3 == f5, True)
Пример #13
0
    def workspace_from_nothing(self, directory, mets_basename='mets.xml', clobber_mets=False):
        """
        Create an empty workspace.
        """
        if directory is None:
            directory = tempfile.mkdtemp(prefix=TMP_PREFIX)
        Path(directory).mkdir(parents=True, exist_ok=True)
        mets_path = Path(directory, mets_basename)
        if mets_path.exists() and not clobber_mets:
            raise FileExistsError("METS '%s' already exists in '%s' and clobber_mets not set." % (mets_basename, directory))
        mets = OcrdMets.empty_mets()
        log.info("Writing METS to %s", mets_path)
        mets_path.write_bytes(mets.to_xml(xmllint=True))

        return Workspace(self, directory, mets)
Пример #14
0
def test_add_file():
    mets = OcrdMets.empty_mets()
    assert len(mets.file_groups) == 0, '0 file groups'
    assert len(list(mets.find_all_files(fileGrp='OUTPUT'))) == 0, '0 files in "OUTPUT"'
    f = mets.add_file('OUTPUT', ID="foo123", mimetype="bla/quux", pageId="foobar")
    f2 = mets.add_file('OUTPUT', ID="foo1232", mimetype="bla/quux", pageId="foobar")
    assert f.pageId == 'foobar', 'pageId set'
    assert len(mets.file_groups) == 1, '1 file groups'
    assert len(list(mets.find_all_files(fileGrp='OUTPUT'))) == 2, '2 files in "OUTPUT"'
    mets.set_physical_page_for_file('barfoo', f, order='300', orderlabel="page 300")
    assert f.pageId == 'barfoo', 'pageId changed'
    mets.set_physical_page_for_file('quux', f2, order='302', orderlabel="page 302")
    assert f2.pageId == 'quux', 'pageId changed'
    mets.set_physical_page_for_file('barfoo', f2, order='301', orderlabel="page 301")
    assert f2.pageId == 'barfoo', 'pageId changed'
    assert len(mets.file_groups) == 1, '1 file group'
Пример #15
0
    def workspace_from_nothing(self,
                               directory,
                               mets_basename='mets.xml',
                               clobber_mets=False):
        """
        Create an empty workspace.
        """
        if directory is None:
            directory = tempfile.mkdtemp(prefix=TMP_PREFIX)
        if not exists(directory):
            makedirs(directory)

        mets_fpath = join(directory, mets_basename)
        if not clobber_mets and exists(mets_fpath):
            raise Exception("Not clobbering existing mets.xml in '%s'." %
                            directory)
        mets = OcrdMets.empty_mets()
        with open(mets_fpath, 'wb') as fmets:
            log.info("Writing %s", mets_fpath)
            fmets.write(mets.to_xml(xmllint=True))

        return Workspace(self, directory, mets)
Пример #16
0
def create_ocrd_file(*args, **kwargs):
    mets = OcrdMets.empty_mets()
    return mets.add_file(*args, **kwargs)
Пример #17
0
 def test_make_file_id_605(self):
     """https://github.com/OCR-D/core/pull/605"""
     mets = OcrdMets.empty_mets()
     f = mets.add_file('1:!GRP', ID='FOO_0001', pageId='phys0001')
     f = mets.add_file('2:!GRP', ID='FOO_0002', pageId='phys0002')
     self.assertEqual(make_file_id(f, '2:!GRP'), 'id_2_GRP_0002')
Пример #18
0
 def test_make_file_id_570(self):
     """https://github.com/OCR-D/core/pull/570"""
     mets = OcrdMets.empty_mets()
     f = mets.add_file('GRP', ID='FOO_0001', pageId='phys0001')
     mets.add_file('GRP', ID='GRP2_0001', pageId='phys0002')
     self.assertEqual(make_file_id(f, 'GRP2'), 'GRP2_0002')
Пример #19
0
def _url_to_file(the_path):
    dummy_mets = OcrdMets.empty_mets()
    dummy_url = abspath(the_path)
    return dummy_mets.add_file('DEPRECATED',
                               ID=Path(dummy_url).name,
                               url=dummy_url)