def verify_bag(self): """Verifies the structure of the bag""" console = self.loggers.console console.info("Verifying bag...") bag = Bag(self.config.dir) if bag.is_valid(): console.info("bag is valid :)") else: console.info("bag is invalid :(")
def is_valid(path, completeness_only=False, printfn=print): """Return whether a BagIt package is valid given its ``path``.""" try: bag = Bag(path) bag.validate(processes=multiprocessing.cpu_count(), completeness_only=completeness_only) except BagError as err: printfn("Error validating BagIt package:", err, file=sys.stderr) return False return True
def test_existing_not_allowed(self): self.profile_dict["Tag-Files-Allowed"] = [] with open(join(self.bagdir, 'tag-foo'), 'w'): pass profile = Profile('TEST', self.profile_dict) result = profile.validate(Bag(self.bagdir)) self.assertFalse(result) self.assertEqual(len(profile.report.errors), 1) self.assertTrue("Existing tag file" in profile.report.errors[0].value)
def __init__(self, path: str, auto_make: bool = False) -> None: self.path = realpath(expanduser(expandvars(normpath(path)))) if auto_make: try: makedirs(self.path, exist_ok=False) except OSError: raise OSError('{} already exists, but auto_make = True' ''.format(self.path)) else: self.bag = make_bag(self.path) else: try: self.bag = Bag(self.path) except BagError as e: raise OSError( '{} does not seem to be a valid Moondog Image bag: {}' ''.format(self.path, str(e))) self.components = {} return None
def is_bag(path, printfn=print): """Determine whether the directory contains a BagIt package. The constructor of ``Bag`` is fast enough but we may prefer to optimize later. """ try: Bag(path) except BagError as err: printfn("Error opening BagIt package:", err, file=sys.stderr) return False return True
def is_bag(path): """Determine whether the directory contains a BagIt package. The constructor of ``Bag`` is fast enough but we may prefer to optimize later. """ if isinstance(path, Path): path = str(path) try: Bag(path) except BagError: return False return True
def main(): parser = _make_parser() args = parser.parse_args() bags = [] _configure_logging(args) checks = "Performing the following validations: Checking 0xums, Checking bag completeness" if not args.slow: checks += ", Recalculating hashes" LOGGER.info(checks) if args.directory: directory_path = os.path.abspath(args.directory) for path in os.listdir(directory_path): path = os.path.join(directory_path, path) if os.path.isdir(path): bags.append(path) if args.bagpath: bags.append(os.path.abspath(args.bagpath)) LOGGER.info("Checking {} folder(s).".format(len(bags))) for bagpath in tqdm(bags): LOGGER.info("Checking: {}".format(bagpath)) try: bag = Bag(bagpath) except: LOGGER.error("{}: Not a bag".format(bagpath)) else: if bag.validate(fast=args.slow): LOGGER.info("{}: valid".format(bagpath)) else: LOGGER.error("{}: invalid".format(bagpath))
def test(): bag = Bag('./test.json', ['imu', 'camera']) # bag = Bag('test.json') cap = cv2.VideoCapture(0) time.sleep(0.1) for i in range(2): ret, frame = cap.read() # np.random.randint(0, 255, size=(5,5)) if ret: bag.push('camera', frame, True) else: print('bad image') # pass bag.push('imu', (1, 2, 3)) print(i) cap.release() bag.close()
def validate(self, skip_checksums=False, skip_bag=False, skip_unzip=False, skip_delete=False, processes=2): """ Validate an OCRD-ZIP file for profile, bag and workspace conformance Arguments: skip_bag (boolean): Whether to skip all checks of manifests and files skip_checksums (boolean): Whether to omit checksum checks but still check basic BagIt conformance skip_unzip (boolean): Whether the OCRD-ZIP is unzipped, i.e. a directory skip_delete (boolean): Whether to skip deleting the unpacked OCRD-ZIP dir after valdiation processes (integer): Number of processes used for checksum validation """ if skip_unzip: bagdir = self.path_to_zip skip_delete = True else: # try: self.profile_validator.validate_serialization(self.path_to_zip) # except IOError as err: # raise err # except ProfileValidationError as err: # self.report.add_error(err.value) bagdir = mkdtemp(prefix=TMP_BAGIT_PREFIX) unzip_file_to_dir(self.path_to_zip, bagdir) try: bag = Bag(bagdir) self._validate_profile(bag) if not skip_bag: self._validate_bag(bag, fast=skip_checksums, processes=processes) finally: if not skip_delete: # remove tempdir rmtree(bagdir) return self.report
def setUp(self): self.bag = Bag('fixtures/test-bar') self.profile = Profile(PROFILE_URL) self.retrieved_profile = self.profile.get_profile()
def test_not_given(self): profile = Profile('TEST', self.profile_dict) bag = Bag(self.bagdir) result = profile.validate(bag) self.assertTrue(result)
def verify_aip(job): """Verify the AIP was bagged correctly by extracting it and running verification on its contents. This is also where we verify the checksums now that the verifyPREMISChecksums_v0.0 ("Verify checksums generated on ingest") micro-service has been removed. It was removed because verifying checksums by calculating them in that MS and then having bagit calculate them here was redundant. job.args[1] = UUID UUID of the SIP, which will become the UUID of the AIP job.args[2] = current location Full absolute path to the AIP's current location on the local filesystem """ sip_uuid = job.args[1] # %sip_uuid% aip_path = job.args[2] # SIPDirectory%%sip_name%-%sip_uuid%.7z temp_dir = mcpclient_settings.TEMP_DIRECTORY is_uncompressed_aip = os.path.isdir(aip_path) if is_uncompressed_aip: bag_path = aip_path else: try: extract_dir = os.path.join(temp_dir, sip_uuid) bag_path = extract_aip(job, aip_path, extract_dir) except Exception as err: job.print_error(repr(err)) job.pyprint('Error extracting AIP at "{}"'.format(aip_path), file=sys.stderr) return 1 return_code = 0 try: # Only validate completeness since we're going to verify checksums # later against what we have in the database via `verify_checksums`. bag = Bag(bag_path) bag.validate(completeness_only=True) except BagError as err: job.print_error("Error validating BagIt package: {}".format(err)) return_code = 1 if return_code == 0: try: verify_checksums(job, bag, sip_uuid) except VerifyChecksumsError: return_code = 1 else: job.pyprint("Not verifying checksums because other tests have already" " failed.") # cleanup if not is_uncompressed_aip: try: shutil.rmtree(extract_dir) except OSError as err: job.pyprint( "Failed to remove temporary directory at {extract_dir} which" " contains the AIP extracted for verification." " Error:\n{err}".format(extract_dir=extract_dir, err=err), file=sys.stderr, ) return return_code
def bag(self, workspace, ocrd_identifier, dest=None, ocrd_mets='mets.xml', ocrd_manifestation_depth='full', ocrd_base_version_checksum=None, processes=1, skip_zip=False, in_place=False, tag_files=None): """ Bag a workspace See https://ocr-d.github.com/ocrd_zip#packing-a-workspace-as-ocrd-zip Arguments: workspace (ocrd.Workspace): workspace to bag ord_identifier (string): Ocrd-Identifier in bag-info.txt dest (string): Path of the generated OCRD-ZIP. ord_mets (string): Ocrd-Mets in bag-info.txt ord_manifestation_depth (string): Ocrd-Manifestation-Depth in bag-info.txt ord_base_version_checksum (string): Ocrd-Base-Version-Checksum in bag-info.txt processes (integer): Number of parallel processes checksumming skip_zip (boolean): Whether to leave directory unzipped in_place (boolean): Whether to **replace** the workspace with its BagIt variant tag_files (list<string>): Path names of additional tag files to be bagged at the root of the bag """ if ocrd_manifestation_depth not in ('full', 'partial'): raise Exception("manifestation_depth must be 'full' or 'partial'") if in_place and (dest is not None): raise Exception("Setting 'dest' and 'in_place' is a contradiction") if in_place and not skip_zip: raise Exception( "Setting 'skip_zip' and not 'in_place' is a contradiction") if tag_files is None: tag_files = [] # create bagdir bagdir = mkdtemp(prefix=TMP_BAGIT_PREFIX) if dest is None: if in_place: dest = workspace.directory elif not skip_zip: dest = '%s.ocrd.zip' % workspace.directory else: dest = '%s.ocrd' % workspace.directory log.info("Bagging %s to %s (temp dir %s)", workspace.directory, '(in-place)' if in_place else dest, bagdir) # create data dir makedirs(join(bagdir, 'data')) # create bagit.txt with open(join(bagdir, 'bagit.txt'), 'wb') as f: f.write(BAGIT_TXT.encode('utf-8')) # create manifests total_bytes, total_files = self._bag_mets_files( workspace, bagdir, ocrd_manifestation_depth, ocrd_mets, processes) # create bag-info.txt bag = Bag(bagdir) self._set_bag_info(bag, total_bytes, total_files, ocrd_identifier, ocrd_manifestation_depth, ocrd_base_version_checksum) for tag_file in tag_files: copyfile(tag_file, join(bagdir, basename(tag_file))) # save bag bag.save() # ZIP it self._serialize_bag(workspace, bagdir, dest, in_place, skip_zip) log.info('Created bag at %s', dest) return dest
def find_bag_metadata(bag_logs_path): try: return Bag(bag_logs_path).info except BagError: print("Unable to locate or parse bag metadata at: {}".format(bag_logs_path), file=sys.stderr) return {}
class ImageBag: def __init__(self, path: str, auto_make: bool = False) -> None: self.path = realpath(expanduser(expandvars(normpath(path)))) if auto_make: try: makedirs(self.path, exist_ok=False) except OSError: raise OSError('{} already exists, but auto_make = True' ''.format(self.path)) else: self.bag = make_bag(self.path) else: try: self.bag = Bag(self.path) except BagError as e: raise OSError( '{} does not seem to be a valid Moondog Image bag: {}' ''.format(self.path, str(e))) self.components = {} return None def accession(self, path: str): self._import_original(path) self._generate_master() def _import_original(self, path: str): d = self.components['original'] = {} d['accession_path'] = realpath(expanduser(expandvars(normpath(path)))) d['filename'] = basename(d['accession_path']) fn, ext = splitext(d['filename']) target_path = join(self.path, 'data', d['filename']) shutil.copy2(d['accession_path'], target_path) with ExifTool() as et: meta = et.get_metadata(target_path) pprint(meta) xmp = XMPFiles(file_path=target_path).get_xmp() pprint(xmp) self._update(manifests=True) def _generate_master(self): infn = self.components['original']['filename'] d = self.components['master'] = {} d['filename'] = 'master.tif' Image.open(join(self.path, 'data', infn)).save(join(self.path, 'data', d['filename'])) self._update(manifests=True) def _update(self, manifests=False): """Update the bag.""" for fn, fmeta in self.components.items(): for term, value in fmeta.items(): bag_term = '{}-{}'.format( fn.title(), term.replace('_', ' ').title().replace(' ', '-')) try: prior_value = self.bag.info[bag_term] except KeyError: self.bag.info[bag_term] = value else: if prior_value != value: self.bag.info[bag_term] = value self.bag.save(manifests=manifests)