Пример #1
0
 def setUp(self):
     self.resolver = Resolver()
     self.bagger = WorkspaceBagger(self.resolver)
     self.tempdir = mkdtemp()
     self.bagdir = join(self.tempdir, 'kant_aufklaerung_1784')
     copytree(assets.path_to('kant_aufklaerung_1784'), self.bagdir)
     self.workspace_dir = join(self.bagdir, 'data')
     self.workspace = Workspace(self.resolver,
                                directory=join(self.workspace_dir))
Пример #2
0
def kant_ocrdzip(ocrd_identifier):
    resolver = Resolver()
    bagger = WorkspaceBagger(resolver, strict=True)
    dest = join(gettempdir(),
                'olahd-test-bag-%d.ocrd.zip' % int(round((time() * 1000))))
    ws = resolver.workspace_from_url(
        assets.path_to('kant_aufklaerung_1784/data/mets.xml'))
    bagger.bag(ws, ocrd_identifier, dest=dest)
    yield dest
    unlink(dest)
Пример #3
0
 def setUp(self):
     super().setUp()
     pass
     if exists(BACKUPDIR):
         rmtree(BACKUPDIR)
     self.resolver = Resolver()
     self.bagger = WorkspaceBagger(self.resolver)
     self.tempdir = mkdtemp()
     self.bagdir = join(self.tempdir, 'bag')
     copytree(assets.path_to('kant_aufklaerung_1784'), self.bagdir)
     self.workspace_dir = join(self.bagdir, 'data')
     self.workspace = Workspace(self.resolver, directory=self.workspace_dir)
Пример #4
0
 def process(self):
     client = OlaHdClient(self.parameter['endpoint'],
                          self.parameter['username'],
                          self.parameter['password'])
     bagger = WorkspaceBagger(Resolver(), strict=True)
     # TODO
     dest = join(gettempdir(),
                 'bag-%d.ocrd.zip' % int(round((time() * 1000))))
     # TODO
     ocrd_identifier = self.workspace.mets.unique_identifier
     bagger.bag(self.workspace, ocrd_identifier, dest=dest)
     client.login()
     client.post(dest, prev_pid=ocrd_identifier)
Пример #5
0
class TestOcrdZipValidator(TestCase):
    def setUp(self):
        self.resolver = Resolver()
        self.bagger = WorkspaceBagger(self.resolver)
        self.tempdir = mkdtemp()
        self.bagdir = join(self.tempdir, 'kant_aufklaerung_1784')
        copytree(assets.path_to('kant_aufklaerung_1784'), self.bagdir)
        self.workspace_dir = join(self.bagdir, 'data')
        self.workspace = Workspace(self.resolver,
                                   directory=join(self.workspace_dir))

    def tearDown(self):
        rmtree(self.tempdir)

    def test_validation(self):
        ocrdzip = self.bagger.bag(self.workspace,
                                  'SBB0000F29300010000',
                                  ocrd_manifestation_depth='partial')
        report = OcrdZipValidator(self.resolver, ocrdzip).validate()
        self.assertEqual(report.is_valid, True)

    def test_validation_unzipped(self):
        validator = OcrdZipValidator(self.resolver, self.bagdir)
        report = validator.validate(skip_unzip=True)
        self.assertEqual(report.is_valid, True)
        print(report)

    def test_validation_unzipped_skip_bag(self):
        validator = OcrdZipValidator(self.resolver, self.bagdir)
        report = validator.validate(skip_unzip=True, skip_bag=True)
        self.assertEqual(report.is_valid, True)
        print(report)

    def test_fail_validation_no_such_file(self):
        validator = OcrdZipValidator(self.resolver, '/does/not/exist.ocrd.zip')
        with self.assertRaisesRegex(IOError, "Can't find file"):
            validator.validate(skip_unzip=False)

    def test_fail_validation_unzipped_extra_files(self):
        validator = OcrdZipValidator(self.resolver, self.bagdir)
        extrapath = join(self.bagdir, 'data', 'EXTRA')
        with open(extrapath, 'w') as f:
            f.write('FAIL')
        with self.assertRaisesRegex(Exception,
                                    "Payload-Oxum validation failed"):
            validator.validate(skip_unzip=True)

    def test_fail_validation_extra_tagfile(self):
        extrapath = join(self.bagdir, 'NOT-ALLOWED')
        with open(extrapath, 'w') as f:
            f.write('FAIL')
        validator = OcrdZipValidator(self.resolver, self.bagdir)
        with self.assertRaisesRegex(
                Exception,
                "Existing tag file 'NOT-ALLOWED' is not listed in Tag-Files-Allowed."
        ):
            validator.validate(skip_unzip=True)
Пример #6
0
class TestWorkspaceBagger(TestCase):

    def setUp(self):
        if exists(BACKUPDIR):
            rmtree(BACKUPDIR)
        self.resolver = Resolver()
        self.bagger = WorkspaceBagger(self.resolver)
        self.tempdir = mkdtemp()
        self.bagdir = join(self.tempdir, 'bag')
        copytree(assets.path_to('kant_aufklaerung_1784'), self.bagdir)
        self.workspace_dir = join(self.bagdir, 'data')
        self.workspace = Workspace(self.resolver, directory=self.workspace_dir)

    def tearDown(self):
        rmtree(self.tempdir)

    def test_bad_manifestation_depth(self):
        with self.assertRaisesRegex(Exception, "manifestation_depth must be 'full' or 'partial'"):
            self.bagger.bag(self.workspace, 'kant_aufklaerung_1784', ocrd_manifestation_depth='foo')

    def test_bad_inplace_and_dest(self):
        with self.assertRaisesRegex(Exception, "Setting 'dest' and 'in_place' is a contradiction"):
            self.bagger.bag(self.workspace, 'kant_aufklaerung_1784', in_place=True, dest='/x/y/z')

    def test_bad_skip_zip_and_dest(self):
        with self.assertRaisesRegex(Exception, "Setting 'skip_zip' and not 'in_place' is a contradiction"):
            self.bagger.bag(self.workspace, 'kant_aufklaerung_1784', in_place=True, skip_zip=False)

    def test_bag_inplace(self):
        self.bagger.bag(
            self.workspace,
            'kant_aufklaerung_1784',
            ocrd_manifestation_depth='partial',
            skip_zip=True,
            in_place=True,
            ocrd_base_version_checksum='123',
            tag_files=[
                README_FILE
            ],
        )

    def test_bag_zip_and_spill(self):
        self.workspace.mets.find_all_files(ID='INPUT_0017')[0].url = 'bad-scheme://foo'
        self.workspace.mets.find_all_files(ID='INPUT_0020')[0].url = 'http://google.com'
        self.bagger.bag(self.workspace, 'kant_aufklaerung_1784', ocrd_manifestation_depth='full', skip_zip=False, dest=join(self.tempdir, 'out.ocrd.zip'))
        self.bagger.spill(join(self.tempdir, 'out.ocrd.zip'), join(self.tempdir, 'out'))

    def test_bag_zip_and_spill_wo_dest(self):
        self.bagger.bag(self.workspace, 'kant_aufklaerung_1784', ocrd_manifestation_depth='partial', in_place=False, skip_zip=False, dest=join(self.tempdir, 'out.ocrd.zip'))
        self.bagger.spill(join(self.tempdir, 'out.ocrd.zip'), self.tempdir)

    def test_bag_wo_dest(self):
        makedirs(BACKUPDIR)
        self.bagger.bag(self.workspace, 'kant_aufklaerung_1784', ocrd_manifestation_depth='partial', in_place=True, skip_zip=True)

    def test_bag_wo_dest_zip(self):
        makedirs(BACKUPDIR)
        self.bagger.bag(self.workspace, 'kant_aufklaerung_1784', ocrd_manifestation_depth='partial', in_place=False, skip_zip=True)

    def test_bag_partial_http_nostrict(self):
        self.bagger.strict = False
        makedirs(BACKUPDIR)
        self.workspace.mets.find_all_files(ID='INPUT_0020')[0].url = 'http://google.com'
        self.bagger.bag(self.workspace, 'kant_aufklaerung_1784', ocrd_manifestation_depth='partial', in_place=False)

    def test_bag_partial_http_strict(self):
        self.bagger.strict = True
        makedirs(BACKUPDIR)
        self.workspace.mets.find_all_files(ID='INPUT_0020')[0].url = 'http://google.com'
        with self.assertRaisesRegex(Exception, "Not fetching non-local files"):
            self.bagger.bag(self.workspace, 'kant_aufklaerung_1784', ocrd_manifestation_depth='partial', in_place=False)

    def test_bag_full(self):
        self.bagger.strict = True
        f = self.workspace.mets.find_all_files(ID='INPUT_0017')[0]
        f.url = 'bad-scheme://foo'
        f.local_filename = None
        with self.assertRaisesRegex(Exception, "Not an http URL"):
            self.bagger.bag(self.workspace, 'kant_aufklaerung_1784', ocrd_manifestation_depth='full', skip_zip=False)

    def test_spill_dest_not_dir(self):
        with self.assertRaisesRegex(Exception, "Not a directory: /dev/stdout"):
            self.bagger.spill('x', '/dev/stdout')

    def test_spill_derived_dest_exists(self):
        dest = join(self.bagdir, 'foo')
        makedirs(dest)
        with self.assertRaisesRegex(Exception, "Directory exists: %s" % dest):
            self.bagger.spill('/path/to/foo.ocrd.zip', self.bagdir)

    def test_spill_derived_dest(self):
        bag_dest = join(self.bagdir, 'foo.ocrd.zip')
        spill_dest = join(self.bagdir, 'foo')
        self.bagger.bag(self.workspace, 'kant_aufklaerung_1784', ocrd_manifestation_depth='partial', in_place=False, skip_zip=False, dest=bag_dest)
        self.bagger.spill(bag_dest, self.bagdir)
        self.assertTrue(exists(spill_dest))