示例#1
0
    def test_validate_optional_tagfile_in_directory(self):
        bag = bagit.make_bag(self.tmpdir, checksums=['md5'])
        tagdir = tempfile.mkdtemp(dir=self.tmpdir)

        if not os.path.exists(j(tagdir, "tagfolder")):
            os.makedirs(j(tagdir, "tagfolder"))

        with open(j(tagdir, "tagfolder", "tagfile"), "w") as tagfile:
            tagfile.write("test")
        relpath = j(tagdir, "tagfolder",
                    "tagfile").replace(self.tmpdir + os.sep, "")
        relpath.replace("\\", "/")
        with open(j(self.tmpdir, "tagmanifest-md5.txt"), "w") as tagman:
            # Incorrect checksum.
            tagman.write("8e2af7a0143c7b8f4de0b3fc90f27354 " + relpath + "\n")
        bag = bagit.BDBag(self.tmpdir)
        self.assertRaises(bagit.BagValidationError, self.validate, bag)

        hasher = hashlib.new("md5")
        with open(j(tagdir, "tagfolder", "tagfile"), "r") as tf:
            contents = tf.read().encode('utf-8')
        hasher.update(contents)
        with open(j(self.tmpdir, "tagmanifest-md5.txt"), "w") as tagman:
            tagman.write(hasher.hexdigest() + " " + relpath + "\n")
        bag = bagit.BDBag(self.tmpdir)
        self.assertTrue(self.validate(bag))

        # Missing tagfile.
        os.remove(j(tagdir, "tagfolder", "tagfile"))
        bag = bagit.BDBag(self.tmpdir)
        self.assertRaises(bagit.BagValidationError, self.validate, bag)
示例#2
0
    def test_validate_optional_tagfile(self):
        logger.info(self.getTestHeader(sys._getframe().f_code.co_name))
        bag = bagit.make_bag(self.tmpdir, checksums=['md5'])
        tagdir = tempfile.mkdtemp(dir=self.tmpdir)
        with open(j(tagdir, "tagfile"), "w") as tagfile:
            tagfile.write("test")
        relpath = j(tagdir, "tagfile").replace(self.tmpdir + os.sep, "")
        relpath.replace("\\", "/")
        with open(j(self.tmpdir, "tagmanifest-md5.txt"), "w") as tagman:
            # Incorrect checksum.
            tagman.write("8e2af7a0143c7b8f4de0b3fc90f27354 " + relpath + "\n")
        bag = bagit.BDBag(self.tmpdir)
        self.assertRaises(bagit.BagValidationError, self.validate, bag)

        hasher = hashlib.new("md5")
        contents = slurp_text_file(j(tagdir, "tagfile")).encode('utf-8')
        hasher.update(contents)
        with open(j(self.tmpdir, "tagmanifest-md5.txt"), "w") as tagman:
            tagman.write(hasher.hexdigest() + " " + relpath + "\n")
        bag = bagit.BDBag(self.tmpdir)
        self.assertTrue(self.validate(bag))

        # Missing tagfile.
        os.remove(j(tagdir, "tagfile"))
        bag = bagit.BDBag(self.tmpdir)
        self.assertRaises(bagit.BagValidationError, self.validate, bag)
示例#3
0
    def test_save_baginfo(self):
        bag = bagit.make_bag(self.tmpdir)

        bag.info["foo"] = "bar"
        bag.save()
        bag = bagit.BDBag(self.tmpdir)
        self.assertEqual(bag.info["foo"], "bar")
        self.assertTrue(bag.is_valid())

        bag.info['x'] = ["a", "b", "c"]
        bag.save()
        b = bagit.BDBag(self.tmpdir)
        self.assertEqual(b.info["x"], ["a", "b", "c"])
        self.assertTrue(bag.is_valid())
示例#4
0
    def test_save_baginfo(self):
        logger.info(self.getTestHeader(sys._getframe().f_code.co_name))
        bag = bagit.make_bag(self.tmpdir)

        bag.info["foo"] = "bar"
        bag.save()
        bag = bagit.BDBag(self.tmpdir)
        self.assertEqual(bag.info["foo"], "bar")
        self.assertTrue(bag.is_valid())

        bag.info['x'] = ["a", "b", "c"]
        bag.save()
        b = bagit.BDBag(self.tmpdir)
        self.assertEqual(b.info["x"], ["a", "b", "c"])
        self.assertTrue(bag.is_valid())
示例#5
0
def is_bag(bag_path):
    bag = None
    try:
        bag = bdbagit.BDBag(bag_path)
    except (bdbagit.BagError, bdbagit.BagValidationError):
        pass
    return True if bag else False
示例#6
0
    def test_mixed_case_checksums(self):
        bag = bagit.make_bag(self.tmpdir, checksums=['md5'])
        hashstr = {}
        # Extract entries only for the payload and ignore
        # entries from the tagmanifest file
        for key in bag.entries.keys():
            if key.startswith('data' + os.sep):
                hashstr = bag.entries[key]
        hashstr = next(iter(hashstr.values()))
        manifest = slurp_text_file(j(self.tmpdir, "manifest-md5.txt"))

        manifest = manifest.replace(hashstr, hashstr.upper())

        with open(j(self.tmpdir, "manifest-md5.txt"), "wb") as m:
            m.write(manifest.encode('utf-8'))

        # Since manifest-md5.txt file is updated, re-calculate its
        # md5 checksum and update it in the tagmanifest-md5.txt file
        hasher = hashlib.new('md5')
        contents = slurp_text_file(j(self.tmpdir,
                                     "manifest-md5.txt")).encode('utf-8')
        hasher.update(contents)
        with open(j(self.tmpdir, "tagmanifest-md5.txt"), "r") as tagmanifest:
            tagman_contents = tagmanifest.read()
            tagman_contents = tagman_contents.replace(
                bag.entries['manifest-md5.txt']['md5'], hasher.hexdigest())
        with open(j(self.tmpdir, "tagmanifest-md5.txt"), "w") as tagmanifest:
            tagmanifest.write(tagman_contents)

        bag = bagit.BDBag(self.tmpdir)
        self.assertTrue(self.validate(bag))
示例#7
0
 def test_multiple_oxum_values(self):
     logger.info(self.getTestHeader(sys._getframe().f_code.co_name))
     bag = bagit.make_bag(self.tmpdir)
     with open(j(self.tmpdir, "bag-info.txt"), "a") as baginfo:
         baginfo.write('Payload-Oxum: 7.7\n')
     bag = bagit.BDBag(self.tmpdir)
     self.assertTrue(self.validate(bag, fast=True))
示例#8
0
def validate_bag(bag_path,
                 fast=False,
                 callback=None,
                 config_file=DEFAULT_CONFIG_FILE):
    config = read_config(config_file)
    bag_config = config['bag_config']
    bag_processes = bag_config.get('bag_processes', 1)

    try:
        logger.info("Validating bag: %s" % bag_path)
        bag = bdbagit.BDBag(bag_path)
        bag.validate(bag_processes if not callback else 1,
                     fast=fast,
                     callback=callback)
        logger.info("Bag %s is valid" % bag_path)
    except bdbagit.BagValidationError as e:
        logger.warning(
            "BagValidationError: A BagValidationError may be transient if the bag contains unresolved "
            "remote file references from a fetch.txt file. In this case the bag is incomplete but not "
            "necessarily invalid. Resolve remote file references (if any) and re-validate."
        )
        raise e
    except bdbagit.BaggingInterruptedError as e:
        logger.warning(get_typed_exception(e))
        raise e
    except Exception as e:
        raise RuntimeError("Unhandled exception while validating bag: %s" % e)
示例#9
0
 def test_is_valid(self):
     bag = bagit.make_bag(self.tmpdir)
     bag = bagit.BDBag(self.tmpdir)
     self.assertTrue(bag.is_valid())
     with open(j(self.tmpdir, "data", "extra_file"), "w") as ef:
         ef.write("bar")
     self.assertFalse(bag.is_valid())
示例#10
0
 def test_validate_fast_without_oxum(self):
     bag = bagit.make_bag(self.tmpdir)
     os.remove(j(self.tmpdir, "bag-info.txt"))
     bag = bagit.BDBag(self.tmpdir)
     self.assertRaises(bagit.BagValidationError,
                       self.validate,
                       bag,
                       fast=True)
示例#11
0
 def test_is_valid(self):
     logger.info(self.getTestHeader(sys._getframe().f_code.co_name))
     bag = bagit.make_bag(self.tmpdir)
     bag = bagit.BDBag(self.tmpdir)
     self.assertTrue(bag.is_valid())
     with open(j(self.tmpdir, "data", "extra_file"), "w") as ef:
         ef.write("bar")
     self.assertFalse(bag.is_valid())
示例#12
0
 def test_validate_fast_without_oxum(self):
     logger.info(self.getTestHeader(sys._getframe().f_code.co_name))
     bag = bagit.make_bag(self.tmpdir)
     os.remove(j(self.tmpdir, "bag-info.txt"))
     bag = bagit.BDBag(self.tmpdir)
     self.assertRaises(bagit.BagValidationError,
                       self.validate,
                       bag,
                       fast=True)
示例#13
0
    def test_save_only_baginfo(self):
        bag = bagit.make_bag(self.tmpdir)
        with open(j(self.tmpdir, 'data', 'newfile'), 'w') as nf:
            nf.write('newfile')
        bag.info["foo"] = "bar"
        bag.save()

        bag = bagit.BDBag(self.tmpdir)
        self.assertEqual(bag.info["foo"], "bar")
        self.assertFalse(bag.is_valid())
示例#14
0
 def test_validate_slow_without_oxum_extra_file(self):
     bag = bagit.make_bag(self.tmpdir)
     os.remove(j(self.tmpdir, "bag-info.txt"))
     with open(j(self.tmpdir, "data", "extra_file"), "w") as ef:
         ef.write("foo")
     bag = bagit.BDBag(self.tmpdir)
     self.assertRaises(bagit.BagValidationError,
                       self.validate,
                       bag,
                       fast=False)
示例#15
0
    def test_save_baginfo_with_sha1(self):
        bag = bagit.make_bag(self.tmpdir, checksums=["sha1", "md5"])
        self.assertTrue(bag.is_valid())
        bag.save()

        bag.info['foo'] = "bar"
        bag.save()

        bag = bagit.BDBag(self.tmpdir)
        self.assertTrue(bag.is_valid())
示例#16
0
    def test_save_only_baginfo(self):
        logger.info(self.getTestHeader(sys._getframe().f_code.co_name))
        bag = bagit.make_bag(self.tmpdir)
        with open(j(self.tmpdir, 'data', 'newfile'), 'w') as nf:
            nf.write('newfile')
        bag.info["foo"] = "bar"
        bag.save()

        bag = bagit.BDBag(self.tmpdir)
        self.assertEqual(bag.info["foo"], "bar")
        self.assertFalse(bag.is_valid())
示例#17
0
 def test_validate_slow_without_oxum_extra_file(self):
     logger.info(self.getTestHeader(sys._getframe().f_code.co_name))
     bag = bagit.make_bag(self.tmpdir)
     os.remove(j(self.tmpdir, "bag-info.txt"))
     with open(j(self.tmpdir, "data", "extra_file"), "w") as ef:
         ef.write("foo")
     bag = bagit.BDBag(self.tmpdir)
     self.assertRaises(bagit.BagValidationError,
                       self.validate,
                       bag,
                       fast=False)
示例#18
0
 def test_bom_in_bagit_txt(self):
     bag = bagit.make_bag(self.tmpdir)
     BOM = codecs.BOM_UTF8
     if sys.version_info[0] >= 3:
         BOM = BOM.decode('utf-8')
     with open(j(self.tmpdir, "bagit.txt"), "r") as bf:
         bagfile = BOM + bf.read()
     with open(j(self.tmpdir, "bagit.txt"), "w") as bf:
         bf.write(bagfile)
     bag = bagit.BDBag(self.tmpdir)
     self.assertRaises(bagit.BagValidationError, self.validate, bag)
示例#19
0
    def test_save_baginfo_with_sha1(self):
        logger.info(self.getTestHeader(sys._getframe().f_code.co_name))
        bag = bagit.make_bag(self.tmpdir, checksums=["sha1", "md5"])
        self.assertTrue(bag.is_valid())
        bag.save()

        bag.info['foo'] = "bar"
        bag.save()

        bag = bagit.BDBag(self.tmpdir)
        self.assertTrue(bag.is_valid())
示例#20
0
    def test_open_bag_with_missing_bagit_txt(self):
        bagit.make_bag(self.tmpdir)

        os.unlink(j(self.tmpdir, 'bagit.txt'))

        with self.assertRaises(bagit.BagError) as error_catcher:
            bagit.BDBag(self.tmpdir)

        self.assertEqual(
            'Expected bagit.txt does not exist: %s' %
            j(self.tmpdir, "bagit.txt"), str(error_catcher.exception))
示例#21
0
    def test_open_bag_with_unsupported_version(self):
        bagit.make_bag(self.tmpdir)

        with open(j(self.tmpdir, 'bagit.txt'), 'w') as f:
            f.write('BagIt-Version: 2.0\nTag-File-Character-Encoding: UTF-8\n')

        with self.assertRaises(bagit.BagError) as error_catcher:
            bagit.BDBag(self.tmpdir)

        self.assertEqual('Unsupported bag version: 2.0',
                         str(error_catcher.exception))
示例#22
0
def validate_bag_structure(bag_path, skip_remote=True):
    try:
        logger.info("Validating bag structure: %s" % bag_path)
        bag = bdbagit.BDBag(bag_path)
        if not check_payload_consistency(bag, skip_remote=skip_remote):
            raise bdbagit.BagValidationError(
                "Inconsistent payload state. See log warnings for additional information."
            )
        logger.info("The directory %s is a valid bag structure" % bag_path)
    except Exception as e:
        logger.error("Error while validating bag structure: %s", e)
        raise e
示例#23
0
    def test_open_bag_with_missing_bagit_txt(self):
        logger.info(self.getTestHeader(sys._getframe().f_code.co_name))
        bagit.make_bag(self.tmpdir)

        os.unlink(j(self.tmpdir, 'bagit.txt'))

        with self.assertRaises(bagit.BagError) as error_catcher:
            bagit.BDBag(self.tmpdir)

        self.assertEqual(
            'Expected bagit.txt does not exist: %s' %
            j(self.tmpdir, "bagit.txt"), str(error_catcher.exception))
示例#24
0
    def test_open_bag_with_malformed_bagit_txt(self):
        bagit.make_bag(self.tmpdir)

        with open(j(self.tmpdir, 'bagit.txt'), 'w') as f:
            os.ftruncate(f.fileno(), 0)

        with self.assertRaises(bagit.BagError) as error_catcher:
            bagit.BDBag(self.tmpdir)

        self.assertEqual(
            'Missing required tag in bagit.txt: BagIt-Version, Tag-File-Character-Encoding',
            str(error_catcher.exception))
示例#25
0
 def test_validate_flipped_bit(self):
     bag = bagit.make_bag(self.tmpdir)
     readme = j(self.tmpdir, "data", "README")
     txt = slurp_text_file(readme)
     txt = 'A' + txt[1:]
     with io.open(readme, "w", newline="\n") as r:
         r.write(txt)
     bag = bagit.BDBag(self.tmpdir)
     self.assertRaises(bagit.BagValidationError, self.validate, bag)
     # fast doesn't catch the flipped bit, since oxsum is the same
     self.assertTrue(self.validate(bag, fast=True))
     self.assertTrue(self.validate(bag, completeness_only=True))
示例#26
0
    def test_validate_missing_directory(self):
        bagit.make_bag(self.tmpdir)

        tmp_data_dir = os.path.join(self.tmpdir, 'data')
        shutil.rmtree(tmp_data_dir)

        bag = bagit.BDBag(self.tmpdir)
        with self.assertRaises(bagit.BagValidationError) as error_catcher:
            bag.validate()

        self.assertEqual(
            'Expected data directory %s does not exist' % tmp_data_dir,
            str(error_catcher.exception))
示例#27
0
 def test_validate_completeness(self):
     bag = bagit.make_bag(self.tmpdir)
     old_path = j(self.tmpdir, "data", "README")
     new_path = j(self.tmpdir, "data", "extra_file")
     os.rename(old_path, new_path)
     bag = bagit.BDBag(self.tmpdir)
     self.assertTrue(self.validate(bag, fast=True))
     with mock.patch.object(bag, '_validate_entries') as m:
         self.assertRaises(bagit.BagValidationError,
                           self.validate,
                           bag,
                           completeness_only=True)
         self.assertEqual(m.call_count, 0)
示例#28
0
    def test_open_bag_with_unknown_encoding(self):
        logger.info(self.getTestHeader(sys._getframe().f_code.co_name))
        bagit.make_bag(self.tmpdir)

        with open(j(self.tmpdir, 'bagit.txt'), 'w') as f:
            f.write(
                'BagIt-Version: 0.97\nTag-File-Character-Encoding: WTF-8\n')

        with self.assertRaises(bagit.BagError) as error_catcher:
            bagit.BDBag(self.tmpdir)

        self.assertEqual('Unsupported encoding: WTF-8',
                         str(error_catcher.exception))
示例#29
0
    def test_validate_missing_directory(self):
        logger.info(self.getTestHeader(sys._getframe().f_code.co_name))
        bagit.make_bag(self.tmpdir)

        tmp_data_dir = os.path.join(self.tmpdir, 'data')
        shutil.rmtree(tmp_data_dir)

        bag = bagit.BDBag(self.tmpdir)
        with self.assertRaises(bagit.BagValidationError) as error_catcher:
            bag.validate()

        self.assertEqual(
            'Expected data directory %s does not exist' % tmp_data_dir,
            str(error_catcher.exception))
示例#30
0
    def test_open_bag_with_invalid_versions(self):
        bagit.make_bag(self.tmpdir)

        for v in ('a.b', '2.', '0.1.2', '1.2.3'):
            with open(j(self.tmpdir, 'bagit.txt'), 'w') as f:
                f.write(
                    'BagIt-Version: %s\nTag-File-Character-Encoding: UTF-8\n' %
                    v)

            with self.assertRaises(bagit.BagError) as error_catcher:
                bagit.BDBag(self.tmpdir)

            self.assertEqual(
                'Bag version numbers must be MAJOR.MINOR numbers, not %s' % v,
                str(error_catcher.exception))