def test_add_group(): mets = OcrdMets.empty_mets() assert len(mets.file_groups) == 0, '0 file groups' mets.add_file_group('TEST') assert len(mets.file_groups) == 1, '1 file groups' mets.add_file_group('TEST') assert len(mets.file_groups) == 1, '1 file groups'
def test_add_group(self): mets = OcrdMets.empty_mets() self.assertEqual(len(mets.file_groups), 0, '0 file groups') mets.add_file_group('TEST') self.assertEqual(len(mets.file_groups), 1, '1 file groups') mets.add_file_group('TEST') self.assertEqual(len(mets.file_groups), 1, '1 file groups')
def test_add_file(self): mets = OcrdMets.empty_mets() self.assertEqual(len(mets.file_groups), 0, '0 file groups') self.assertEqual(len(mets.find_files(fileGrp='OUTPUT')), 0, '0 files in "OUTPUT"') f = mets.add_file('OUTPUT', ID="foo123", mimetype="bla/quux", pageId="foobar") f2 = mets.add_file('OUTPUT', ID="foo1232", mimetype="bla/quux", pageId="foobar") self.assertEqual(f.pageId, 'foobar', 'pageId set') self.assertEqual(len(mets.file_groups), 1, '1 file groups') self.assertEqual(len(mets.find_files(fileGrp='OUTPUT')), 2, '2 files in "OUTPUT"') mets.set_physical_page_for_file('barfoo', f, order='300', orderlabel="page 300") self.assertEqual(f.pageId, 'barfoo', 'pageId changed') mets.set_physical_page_for_file('quux', f2, order='302', orderlabel="page 302") self.assertEqual(f2.pageId, 'quux', 'pageId changed') mets.set_physical_page_for_file('barfoo', f2, order='301', orderlabel="page 301") self.assertEqual(f2.pageId, 'barfoo', 'pageId changed') self.assertEqual(len(mets.file_groups), 1, '1 file group')
def workspace_from_nothing(self, directory, mets_basename='mets.xml', clobber_mets=False): """ Create an empty workspace. Arguments: directory (string): Target directory for the workspace. \ If ``none``, create a temporary directory under :py:data:`ocrd.constants.TMP_PREFIX`. \ (The resulting path can be retrieved via :py:attr:`ocrd.Workspace.directory`.) Keyword Arguments: clobber_mets (boolean, False): Whether to overwrite existing ``mets.xml``. \ By default existing ``mets.xml`` will raise an exception. Returns: a new :py:class:`~ocrd.workspace.Workspace` """ log = getLogger('ocrd.resolver.workspace_from_nothing') if directory is None: directory = mkdtemp(prefix=TMP_PREFIX) Path(directory).mkdir(parents=True, exist_ok=True) mets_path = Path(directory, mets_basename) if mets_path.exists() and not clobber_mets: raise FileExistsError( "METS '%s' already exists in '%s' and clobber_mets not set." % (mets_basename, directory)) mets = OcrdMets.empty_mets() log.info("Writing METS to %s", mets_path) mets_path.write_bytes(mets.to_xml(xmllint=True)) return Workspace(self, directory, mets, mets_basename=mets_basename)
def test_unique_identifier_from_nothing(): mets = OcrdMets.empty_mets(datetime.now().isoformat()) assert mets.unique_identifier == None, 'no identifier' mets.unique_identifier = 'foo' assert mets.unique_identifier == 'foo', 'Right identifier after change is "foo"' as_string = mets.to_xml().decode('utf-8') assert 'ocrd/core v%s' % VERSION in as_string assert 'CREATEDATE="%04u-%02u-%02uT' % (datetime.now().year, datetime.now().month, datetime.now().day,) in as_string
def test_page_from_file_no_existe(self): with self.assertRaisesRegex(FileNotFoundError, "File not found: 'no-existe'"): mets = OcrdMets.empty_mets() ocrd_file = mets.add_file('FOO', ID='foo', local_filename='no-existe', mimetype='foo/bar') page_from_file(ocrd_file)
def test_fptr_changed_for_change_id(): mets = OcrdMets.empty_mets() f1 = mets.add_file('FOO', ID='FOO_1', mimetype='image/tiff', pageId='p0001') assert mets.get_physical_pages(for_fileIds=['FOO_1']) == ['p0001'] f1.ID = 'BAZ_1' assert mets.get_physical_pages(for_fileIds=['FOO_1']) == [None] assert mets.get_physical_pages(for_fileIds=['BAZ_1']) == ['p0001']
def test_ocrd_file_equality(): mets = OcrdMets.empty_mets() f1 = mets.add_file('FOO', ID='FOO_1', mimetype='image/tiff') f2 = mets.add_file('FOO', ID='FOO_2', mimetype='image/tiff') assert f1 != f2 f3 = create_ocrd_file_with_defaults(ID='TEMP_1', mimetype='image/tiff') f4 = create_ocrd_file_with_defaults(ID='TEMP_1', mimetype='image/tif') # be tolerant of different equivalent mimetypes assert f3 == f4 f5 = mets.add_file('TEMP', ID='TEMP_1', mimetype='image/tiff') assert f3 == f5
def test_unique_identifier_from_nothing(self): mets = OcrdMets.empty_mets(datetime.now().isoformat()) self.assertEqual(mets.unique_identifier, None, 'no identifier') mets.unique_identifier = 'foo' self.assertEqual(mets.unique_identifier, 'foo', 'Right identifier after change') as_string = mets.to_xml().decode('utf-8') self.assertIn('ocrd/core v%s' % VERSION, as_string) self.assertIn('CREATEDATE="%04u-%02u-%02uT' % ( datetime.now().year, datetime.now().month, datetime.now().day, ), as_string)
def test_make_file_id_744(self): """ https://github.com/OCR-D/core/pull/744 > Often file IDs have two numbers, one of which will clash. In that case only the numerical fallback works. """ mets = OcrdMets.empty_mets() f = mets.add_file('GRP2', ID='img1796-97_00000024_img', pageId='phys0024') f = mets.add_file('GRP2', ID='img1796-97_00000025_img', pageId='phys0025') self.assertEqual(make_file_id(f, 'GRP2'), 'GRP2_0002')
def test_make_file_id_mets(self): mets = OcrdMets.empty_mets() for i in range(1, 10): mets.add_file('FOO', ID="FOO_%04d" % (i), mimetype="image/tiff") mets.add_file('BAR', ID="BAR_%04d" % (i), mimetype="image/tiff") self.assertEqual(make_file_id(mets.find_files(ID='BAR_0007')[0], 'FOO'), 'FOO_0007') f = mets.add_file('ABC', ID="BAR_7", mimetype="image/tiff") self.assertEqual(make_file_id(f, 'FOO'), 'FOO_0010') mets.remove_file(fileGrp='FOO') self.assertEqual(make_file_id(f, 'FOO'), 'FOO_0001') mets.add_file('FOO', ID="FOO_0001", mimetype="image/tiff") # print('\n'.join(['%s' % of for of in mets.find_files()])) self.assertEqual(make_file_id(f, 'FOO'), 'FOO_0002')
def test_ocrd_file_eq(self): mets = OcrdMets.empty_mets() f1 = mets.add_file('FOO', ID='FOO_1', mimetype='image/tiff') self.assertEqual(f1 == f1, True) self.assertEqual(f1 != f1, False) f2 = mets.add_file('FOO', ID='FOO_2', mimetype='image/tiff') self.assertEqual(f1 == f2, False) f3 = create_ocrd_file_with_defaults(ID='TEMP_1', mimetype='image/tiff') f4 = create_ocrd_file_with_defaults(ID='TEMP_1', mimetype='image/tif') # be tolerant of different equivalent mimetypes self.assertEqual(f3 == f4, True) f5 = mets.add_file('TEMP', ID='TEMP_1', mimetype='image/tiff') self.assertEqual(f3 == f5, True)
def workspace_from_nothing(self, directory, mets_basename='mets.xml', clobber_mets=False): """ Create an empty workspace. """ if directory is None: directory = tempfile.mkdtemp(prefix=TMP_PREFIX) Path(directory).mkdir(parents=True, exist_ok=True) mets_path = Path(directory, mets_basename) if mets_path.exists() and not clobber_mets: raise FileExistsError("METS '%s' already exists in '%s' and clobber_mets not set." % (mets_basename, directory)) mets = OcrdMets.empty_mets() log.info("Writing METS to %s", mets_path) mets_path.write_bytes(mets.to_xml(xmllint=True)) return Workspace(self, directory, mets)
def test_add_file(): mets = OcrdMets.empty_mets() assert len(mets.file_groups) == 0, '0 file groups' assert len(list(mets.find_all_files(fileGrp='OUTPUT'))) == 0, '0 files in "OUTPUT"' f = mets.add_file('OUTPUT', ID="foo123", mimetype="bla/quux", pageId="foobar") f2 = mets.add_file('OUTPUT', ID="foo1232", mimetype="bla/quux", pageId="foobar") assert f.pageId == 'foobar', 'pageId set' assert len(mets.file_groups) == 1, '1 file groups' assert len(list(mets.find_all_files(fileGrp='OUTPUT'))) == 2, '2 files in "OUTPUT"' mets.set_physical_page_for_file('barfoo', f, order='300', orderlabel="page 300") assert f.pageId == 'barfoo', 'pageId changed' mets.set_physical_page_for_file('quux', f2, order='302', orderlabel="page 302") assert f2.pageId == 'quux', 'pageId changed' mets.set_physical_page_for_file('barfoo', f2, order='301', orderlabel="page 301") assert f2.pageId == 'barfoo', 'pageId changed' assert len(mets.file_groups) == 1, '1 file group'
def workspace_from_nothing(self, directory, mets_basename='mets.xml', clobber_mets=False): """ Create an empty workspace. """ if directory is None: directory = tempfile.mkdtemp(prefix=TMP_PREFIX) if not exists(directory): makedirs(directory) mets_fpath = join(directory, mets_basename) if not clobber_mets and exists(mets_fpath): raise Exception("Not clobbering existing mets.xml in '%s'." % directory) mets = OcrdMets.empty_mets() with open(mets_fpath, 'wb') as fmets: log.info("Writing %s", mets_fpath) fmets.write(mets.to_xml(xmllint=True)) return Workspace(self, directory, mets)
def create_ocrd_file(*args, **kwargs): mets = OcrdMets.empty_mets() return mets.add_file(*args, **kwargs)
def test_make_file_id_605(self): """https://github.com/OCR-D/core/pull/605""" mets = OcrdMets.empty_mets() f = mets.add_file('1:!GRP', ID='FOO_0001', pageId='phys0001') f = mets.add_file('2:!GRP', ID='FOO_0002', pageId='phys0002') self.assertEqual(make_file_id(f, '2:!GRP'), 'id_2_GRP_0002')
def test_make_file_id_570(self): """https://github.com/OCR-D/core/pull/570""" mets = OcrdMets.empty_mets() f = mets.add_file('GRP', ID='FOO_0001', pageId='phys0001') mets.add_file('GRP', ID='GRP2_0001', pageId='phys0002') self.assertEqual(make_file_id(f, 'GRP2'), 'GRP2_0002')
def _url_to_file(the_path): dummy_mets = OcrdMets.empty_mets() dummy_url = abspath(the_path) return dummy_mets.add_file('DEPRECATED', ID=Path(dummy_url).name, url=dummy_url)