def setUp(self): self.resolver = Resolver() self.bagger = WorkspaceBagger(self.resolver) self.tempdir = mkdtemp() self.bagdir = join(self.tempdir, 'kant_aufklaerung_1784') copytree(assets.path_to('kant_aufklaerung_1784'), self.bagdir) self.workspace_dir = join(self.bagdir, 'data') self.workspace = Workspace(self.resolver, directory=join(self.workspace_dir))
def kant_ocrdzip(ocrd_identifier): resolver = Resolver() bagger = WorkspaceBagger(resolver, strict=True) dest = join(gettempdir(), 'olahd-test-bag-%d.ocrd.zip' % int(round((time() * 1000)))) ws = resolver.workspace_from_url( assets.path_to('kant_aufklaerung_1784/data/mets.xml')) bagger.bag(ws, ocrd_identifier, dest=dest) yield dest unlink(dest)
def setUp(self): super().setUp() pass if exists(BACKUPDIR): rmtree(BACKUPDIR) self.resolver = Resolver() self.bagger = WorkspaceBagger(self.resolver) self.tempdir = mkdtemp() self.bagdir = join(self.tempdir, 'bag') copytree(assets.path_to('kant_aufklaerung_1784'), self.bagdir) self.workspace_dir = join(self.bagdir, 'data') self.workspace = Workspace(self.resolver, directory=self.workspace_dir)
def process(self): client = OlaHdClient(self.parameter['endpoint'], self.parameter['username'], self.parameter['password']) bagger = WorkspaceBagger(Resolver(), strict=True) # TODO dest = join(gettempdir(), 'bag-%d.ocrd.zip' % int(round((time() * 1000)))) # TODO ocrd_identifier = self.workspace.mets.unique_identifier bagger.bag(self.workspace, ocrd_identifier, dest=dest) client.login() client.post(dest, prev_pid=ocrd_identifier)
class TestOcrdZipValidator(TestCase): def setUp(self): self.resolver = Resolver() self.bagger = WorkspaceBagger(self.resolver) self.tempdir = mkdtemp() self.bagdir = join(self.tempdir, 'kant_aufklaerung_1784') copytree(assets.path_to('kant_aufklaerung_1784'), self.bagdir) self.workspace_dir = join(self.bagdir, 'data') self.workspace = Workspace(self.resolver, directory=join(self.workspace_dir)) def tearDown(self): rmtree(self.tempdir) def test_validation(self): ocrdzip = self.bagger.bag(self.workspace, 'SBB0000F29300010000', ocrd_manifestation_depth='partial') report = OcrdZipValidator(self.resolver, ocrdzip).validate() self.assertEqual(report.is_valid, True) def test_validation_unzipped(self): validator = OcrdZipValidator(self.resolver, self.bagdir) report = validator.validate(skip_unzip=True) self.assertEqual(report.is_valid, True) print(report) def test_validation_unzipped_skip_bag(self): validator = OcrdZipValidator(self.resolver, self.bagdir) report = validator.validate(skip_unzip=True, skip_bag=True) self.assertEqual(report.is_valid, True) print(report) def test_fail_validation_no_such_file(self): validator = OcrdZipValidator(self.resolver, '/does/not/exist.ocrd.zip') with self.assertRaisesRegex(IOError, "Can't find file"): validator.validate(skip_unzip=False) def test_fail_validation_unzipped_extra_files(self): validator = OcrdZipValidator(self.resolver, self.bagdir) extrapath = join(self.bagdir, 'data', 'EXTRA') with open(extrapath, 'w') as f: f.write('FAIL') with self.assertRaisesRegex(Exception, "Payload-Oxum validation failed"): validator.validate(skip_unzip=True) def test_fail_validation_extra_tagfile(self): extrapath = join(self.bagdir, 'NOT-ALLOWED') with open(extrapath, 'w') as f: f.write('FAIL') validator = OcrdZipValidator(self.resolver, self.bagdir) with self.assertRaisesRegex( Exception, "Existing tag file 'NOT-ALLOWED' is not listed in Tag-Files-Allowed." ): validator.validate(skip_unzip=True)
class TestWorkspaceBagger(TestCase): def setUp(self): if exists(BACKUPDIR): rmtree(BACKUPDIR) self.resolver = Resolver() self.bagger = WorkspaceBagger(self.resolver) self.tempdir = mkdtemp() self.bagdir = join(self.tempdir, 'bag') copytree(assets.path_to('kant_aufklaerung_1784'), self.bagdir) self.workspace_dir = join(self.bagdir, 'data') self.workspace = Workspace(self.resolver, directory=self.workspace_dir) def tearDown(self): rmtree(self.tempdir) def test_bad_manifestation_depth(self): with self.assertRaisesRegex(Exception, "manifestation_depth must be 'full' or 'partial'"): self.bagger.bag(self.workspace, 'kant_aufklaerung_1784', ocrd_manifestation_depth='foo') def test_bad_inplace_and_dest(self): with self.assertRaisesRegex(Exception, "Setting 'dest' and 'in_place' is a contradiction"): self.bagger.bag(self.workspace, 'kant_aufklaerung_1784', in_place=True, dest='/x/y/z') def test_bad_skip_zip_and_dest(self): with self.assertRaisesRegex(Exception, "Setting 'skip_zip' and not 'in_place' is a contradiction"): self.bagger.bag(self.workspace, 'kant_aufklaerung_1784', in_place=True, skip_zip=False) def test_bag_inplace(self): self.bagger.bag( self.workspace, 'kant_aufklaerung_1784', ocrd_manifestation_depth='partial', skip_zip=True, in_place=True, ocrd_base_version_checksum='123', tag_files=[ README_FILE ], ) def test_bag_zip_and_spill(self): self.workspace.mets.find_all_files(ID='INPUT_0017')[0].url = 'bad-scheme://foo' self.workspace.mets.find_all_files(ID='INPUT_0020')[0].url = 'http://google.com' self.bagger.bag(self.workspace, 'kant_aufklaerung_1784', ocrd_manifestation_depth='full', skip_zip=False, dest=join(self.tempdir, 'out.ocrd.zip')) self.bagger.spill(join(self.tempdir, 'out.ocrd.zip'), join(self.tempdir, 'out')) def test_bag_zip_and_spill_wo_dest(self): self.bagger.bag(self.workspace, 'kant_aufklaerung_1784', ocrd_manifestation_depth='partial', in_place=False, skip_zip=False, dest=join(self.tempdir, 'out.ocrd.zip')) self.bagger.spill(join(self.tempdir, 'out.ocrd.zip'), self.tempdir) def test_bag_wo_dest(self): makedirs(BACKUPDIR) self.bagger.bag(self.workspace, 'kant_aufklaerung_1784', ocrd_manifestation_depth='partial', in_place=True, skip_zip=True) def test_bag_wo_dest_zip(self): makedirs(BACKUPDIR) self.bagger.bag(self.workspace, 'kant_aufklaerung_1784', ocrd_manifestation_depth='partial', in_place=False, skip_zip=True) def test_bag_partial_http_nostrict(self): self.bagger.strict = False makedirs(BACKUPDIR) self.workspace.mets.find_all_files(ID='INPUT_0020')[0].url = 'http://google.com' self.bagger.bag(self.workspace, 'kant_aufklaerung_1784', ocrd_manifestation_depth='partial', in_place=False) def test_bag_partial_http_strict(self): self.bagger.strict = True makedirs(BACKUPDIR) self.workspace.mets.find_all_files(ID='INPUT_0020')[0].url = 'http://google.com' with self.assertRaisesRegex(Exception, "Not fetching non-local files"): self.bagger.bag(self.workspace, 'kant_aufklaerung_1784', ocrd_manifestation_depth='partial', in_place=False) def test_bag_full(self): self.bagger.strict = True f = self.workspace.mets.find_all_files(ID='INPUT_0017')[0] f.url = 'bad-scheme://foo' f.local_filename = None with self.assertRaisesRegex(Exception, "Not an http URL"): self.bagger.bag(self.workspace, 'kant_aufklaerung_1784', ocrd_manifestation_depth='full', skip_zip=False) def test_spill_dest_not_dir(self): with self.assertRaisesRegex(Exception, "Not a directory: /dev/stdout"): self.bagger.spill('x', '/dev/stdout') def test_spill_derived_dest_exists(self): dest = join(self.bagdir, 'foo') makedirs(dest) with self.assertRaisesRegex(Exception, "Directory exists: %s" % dest): self.bagger.spill('/path/to/foo.ocrd.zip', self.bagdir) def test_spill_derived_dest(self): bag_dest = join(self.bagdir, 'foo.ocrd.zip') spill_dest = join(self.bagdir, 'foo') self.bagger.bag(self.workspace, 'kant_aufklaerung_1784', ocrd_manifestation_depth='partial', in_place=False, skip_zip=False, dest=bag_dest) self.bagger.spill(bag_dest, self.bagdir) self.assertTrue(exists(spill_dest))