def is_valid(path, completeness_only=False, printfn=print): """Return whether a BagIt package is valid given its ``path``.""" try: bag = Bag(path) bag.validate(processes=multiprocessing.cpu_count(), completeness_only=completeness_only) except BagError as err: printfn("Error validating BagIt package:", err, file=sys.stderr) return False return True
def main(): parser = _make_parser() args = parser.parse_args() bags = [] _configure_logging(args) checks = "Performing the following validations: Checking 0xums, Checking bag completeness" if not args.slow: checks += ", Recalculating hashes" LOGGER.info(checks) if args.directory: directory_path = os.path.abspath(args.directory) for path in os.listdir(directory_path): path = os.path.join(directory_path, path) if os.path.isdir(path): bags.append(path) if args.bagpath: bags.append(os.path.abspath(args.bagpath)) LOGGER.info("Checking {} folder(s).".format(len(bags))) for bagpath in tqdm(bags): LOGGER.info("Checking: {}".format(bagpath)) try: bag = Bag(bagpath) except: LOGGER.error("{}: Not a bag".format(bagpath)) else: if bag.validate(fast=args.slow): LOGGER.info("{}: valid".format(bagpath)) else: LOGGER.error("{}: invalid".format(bagpath))
def verify_aip(job): """Verify the AIP was bagged correctly by extracting it and running verification on its contents. This is also where we verify the checksums now that the verifyPREMISChecksums_v0.0 ("Verify checksums generated on ingest") micro-service has been removed. It was removed because verifying checksums by calculating them in that MS and then having bagit calculate them here was redundant. job.args[1] = UUID UUID of the SIP, which will become the UUID of the AIP job.args[2] = current location Full absolute path to the AIP's current location on the local filesystem """ sip_uuid = job.args[1] # %sip_uuid% aip_path = job.args[2] # SIPDirectory%%sip_name%-%sip_uuid%.7z temp_dir = mcpclient_settings.TEMP_DIRECTORY is_uncompressed_aip = os.path.isdir(aip_path) if is_uncompressed_aip: bag_path = aip_path else: try: extract_dir = os.path.join(temp_dir, sip_uuid) bag_path = extract_aip(job, aip_path, extract_dir) except Exception as err: job.print_error(repr(err)) job.pyprint('Error extracting AIP at "{}"'.format(aip_path), file=sys.stderr) return 1 return_code = 0 try: # Only validate completeness since we're going to verify checksums # later against what we have in the database via `verify_checksums`. bag = Bag(bag_path) bag.validate(completeness_only=True) except BagError as err: job.print_error("Error validating BagIt package: {}".format(err)) return_code = 1 if return_code == 0: try: verify_checksums(job, bag, sip_uuid) except VerifyChecksumsError: return_code = 1 else: job.pyprint("Not verifying checksums because other tests have already" " failed.") # cleanup if not is_uncompressed_aip: try: shutil.rmtree(extract_dir) except OSError as err: job.pyprint( "Failed to remove temporary directory at {extract_dir} which" " contains the AIP extracted for verification." " Error:\n{err}".format(extract_dir=extract_dir, err=err), file=sys.stderr, ) return return_code