def spill(self, src, dest): """ Spill a workspace, i.e. unpack it and turn it into a workspace. See https://ocr-d.github.com/ocrd_zip#unpacking-ocrd-zip-to-a-workspace Arguments: src (string): Path to OCRD-ZIP dest (string): Path to directory to unpack data folder to """ # print(dest) if exists(dest) and not isdir(dest): raise Exception("Not a directory: %s" % dest) # If dest is an existing directory, try to derive its name from src if isdir(dest): workspace_name = re.sub(r'(\.ocrd)?\.zip$', '', basename(src)) new_dest = join(dest, workspace_name) if exists(new_dest): raise Exception("Directory exists: %s" % new_dest) dest = new_dest log.info("Spilling %s to %s", src, dest) bagdir = mkdtemp(prefix=TMP_BAGIT_PREFIX) unzip_file_to_dir(src, bagdir) datadir = join(bagdir, 'data') for root, _, files in walk(datadir): for f in files: srcfile = join(root, f) destdir = join(dest, relpath(root, datadir)) destfile = join(destdir, f) if not exists(destdir): makedirs(destdir) log.debug("Copy %s -> %s", srcfile, destfile) copyfile(srcfile, destfile) # TODO copy allowed tag files if present # TODO validate bagit # Drop tempdir rmtree(bagdir) # Create workspace workspace = Workspace(self.resolver, directory=dest) # TODO validate workspace return workspace
def validate(self, skip_checksums=False, skip_bag=False, skip_unzip=False, skip_delete=False, processes=2): """ Validate an OCRD-ZIP file for profile, bag and workspace conformance Arguments: skip_bag (boolean): Whether to skip all checks of manifests and files skip_checksums (boolean): Whether to omit checksum checks but still check basic BagIt conformance skip_unzip (boolean): Whether the OCRD-ZIP is unzipped, i.e. a directory skip_delete (boolean): Whether to skip deleting the unpacked OCRD-ZIP dir after valdiation processes (integer): Number of processes used for checksum validation """ if skip_unzip: bagdir = self.path_to_zip skip_delete = True else: # try: self.profile_validator.validate_serialization(self.path_to_zip) # except IOError as err: # raise err # except ProfileValidationError as err: # self.report.add_error(err.value) bagdir = mkdtemp(prefix=TMP_BAGIT_PREFIX) unzip_file_to_dir(self.path_to_zip, bagdir) try: bag = Bag(bagdir) self._validate_profile(bag) if not skip_bag: self._validate_bag(bag, fast=skip_checksums, processes=processes) finally: if not skip_delete: # remove tempdir rmtree(bagdir) return self.report