def verify_bag(self):
     """Verifies the structure of the bag"""
     console = self.loggers.console
     console.info("Verifying bag...")
     bag = Bag(self.config.dir)
     if bag.is_valid():
         console.info("bag is valid :)")
     else:
         console.info("bag is invalid :(")
示例#2
0
def is_valid(path, completeness_only=False, printfn=print):
    """Return whether a BagIt package is valid given its ``path``."""
    try:
        bag = Bag(path)
        bag.validate(processes=multiprocessing.cpu_count(),
                     completeness_only=completeness_only)
    except BagError as err:
        printfn("Error validating BagIt package:", err, file=sys.stderr)
        return False
    return True
示例#3
0
 def test_existing_not_allowed(self):
     self.profile_dict["Tag-Files-Allowed"] = []
     with open(join(self.bagdir, 'tag-foo'), 'w'):
         pass
     profile = Profile('TEST', self.profile_dict)
     result = profile.validate(Bag(self.bagdir))
     self.assertFalse(result)
     self.assertEqual(len(profile.report.errors), 1)
     self.assertTrue("Existing tag file" in profile.report.errors[0].value)
示例#4
0
 def __init__(self, path: str, auto_make: bool = False) -> None:
     self.path = realpath(expanduser(expandvars(normpath(path))))
     if auto_make:
         try:
             makedirs(self.path, exist_ok=False)
         except OSError:
             raise OSError('{} already exists, but auto_make = True'
                           ''.format(self.path))
         else:
             self.bag = make_bag(self.path)
     else:
         try:
             self.bag = Bag(self.path)
         except BagError as e:
             raise OSError(
                 '{} does not seem to be a valid Moondog Image bag: {}'
                 ''.format(self.path, str(e)))
     self.components = {}
     return None
示例#5
0
def is_bag(path, printfn=print):
    """Determine whether the directory contains a BagIt package.

    The constructor of ``Bag`` is fast enough but we may prefer to optimize
    later.
    """
    try:
        Bag(path)
    except BagError as err:
        printfn("Error opening BagIt package:", err, file=sys.stderr)
        return False
    return True
示例#6
0
def is_bag(path):
    """Determine whether the directory contains a BagIt package.

    The constructor of ``Bag`` is fast enough but we may prefer to optimize
    later.
    """
    if isinstance(path, Path):
        path = str(path)
    try:
        Bag(path)
    except BagError:
        return False
    return True
示例#7
0
def main():
    parser = _make_parser()
    args = parser.parse_args()

    bags = []

    _configure_logging(args)

    checks = "Performing the following validations: Checking 0xums, Checking bag completeness"
    if not args.slow:
        checks += ", Recalculating hashes"
    LOGGER.info(checks)

    if args.directory:
        directory_path = os.path.abspath(args.directory)
        for path in os.listdir(directory_path):
            path = os.path.join(directory_path, path)
            if os.path.isdir(path):
                bags.append(path)

    if args.bagpath:
        bags.append(os.path.abspath(args.bagpath))

    LOGGER.info("Checking {} folder(s).".format(len(bags)))

    for bagpath in tqdm(bags):
        LOGGER.info("Checking: {}".format(bagpath))
        try:
            bag = Bag(bagpath)
        except:
            LOGGER.error("{}: Not a bag".format(bagpath))
        else:
            if bag.validate(fast=args.slow):
                LOGGER.info("{}: valid".format(bagpath))
            else:
                LOGGER.error("{}: invalid".format(bagpath))
示例#8
0
def test():
    bag = Bag('./test.json', ['imu', 'camera'])
    # bag = Bag('test.json')
    cap = cv2.VideoCapture(0)
    time.sleep(0.1)

    for i in range(2):
        ret, frame = cap.read()
        # np.random.randint(0, 255, size=(5,5))
        if ret:
            bag.push('camera', frame, True)
        else:
            print('bad image')
            # pass
        bag.push('imu', (1, 2, 3))
        print(i)
    cap.release()

    bag.close()
    def validate(self,
                 skip_checksums=False,
                 skip_bag=False,
                 skip_unzip=False,
                 skip_delete=False,
                 processes=2):
        """
        Validate an OCRD-ZIP file for profile, bag and workspace conformance

        Arguments:
            skip_bag (boolean): Whether to skip all checks of manifests and files
            skip_checksums (boolean): Whether to omit checksum checks but still check basic BagIt conformance
            skip_unzip (boolean): Whether the OCRD-ZIP is unzipped, i.e. a directory
            skip_delete (boolean): Whether to skip deleting the unpacked OCRD-ZIP dir after valdiation
            processes (integer): Number of processes used for checksum validation

        """
        if skip_unzip:
            bagdir = self.path_to_zip
            skip_delete = True
        else:
            #  try:
            self.profile_validator.validate_serialization(self.path_to_zip)
            #  except IOError as err:
            #      raise err
            #  except ProfileValidationError as err:
            #      self.report.add_error(err.value)
            bagdir = mkdtemp(prefix=TMP_BAGIT_PREFIX)
            unzip_file_to_dir(self.path_to_zip, bagdir)

        try:
            bag = Bag(bagdir)
            self._validate_profile(bag)

            if not skip_bag:
                self._validate_bag(bag,
                                   fast=skip_checksums,
                                   processes=processes)

        finally:
            if not skip_delete:
                # remove tempdir
                rmtree(bagdir)
        return self.report
示例#10
0
 def setUp(self):
     self.bag = Bag('fixtures/test-bar')
     self.profile = Profile(PROFILE_URL)
     self.retrieved_profile = self.profile.get_profile()
示例#11
0
 def test_not_given(self):
     profile = Profile('TEST', self.profile_dict)
     bag = Bag(self.bagdir)
     result = profile.validate(bag)
     self.assertTrue(result)
示例#12
0
def verify_aip(job):
    """Verify the AIP was bagged correctly by extracting it and running
    verification on its contents. This is also where we verify the checksums
    now that the verifyPREMISChecksums_v0.0 ("Verify checksums generated on
    ingest") micro-service has been removed. It was removed because verifying
    checksums by calculating them in that MS and then having bagit calculate
    them here was redundant.

    job.args[1] = UUID
      UUID of the SIP, which will become the UUID of the AIP
    job.args[2] = current location
      Full absolute path to the AIP's current location on the local filesystem
    """

    sip_uuid = job.args[1]  # %sip_uuid%
    aip_path = job.args[2]  # SIPDirectory%%sip_name%-%sip_uuid%.7z

    temp_dir = mcpclient_settings.TEMP_DIRECTORY

    is_uncompressed_aip = os.path.isdir(aip_path)

    if is_uncompressed_aip:
        bag_path = aip_path
    else:
        try:
            extract_dir = os.path.join(temp_dir, sip_uuid)
            bag_path = extract_aip(job, aip_path, extract_dir)
        except Exception as err:
            job.print_error(repr(err))
            job.pyprint('Error extracting AIP at "{}"'.format(aip_path),
                        file=sys.stderr)
            return 1

    return_code = 0
    try:
        # Only validate completeness since we're going to verify checksums
        # later against what we have in the database via `verify_checksums`.
        bag = Bag(bag_path)
        bag.validate(completeness_only=True)
    except BagError as err:
        job.print_error("Error validating BagIt package: {}".format(err))
        return_code = 1

    if return_code == 0:
        try:
            verify_checksums(job, bag, sip_uuid)
        except VerifyChecksumsError:
            return_code = 1
    else:
        job.pyprint("Not verifying checksums because other tests have already"
                    " failed.")

    # cleanup
    if not is_uncompressed_aip:
        try:
            shutil.rmtree(extract_dir)
        except OSError as err:
            job.pyprint(
                "Failed to remove temporary directory at {extract_dir} which"
                " contains the AIP extracted for verification."
                " Error:\n{err}".format(extract_dir=extract_dir, err=err),
                file=sys.stderr,
            )

    return return_code
示例#13
0
    def bag(self,
            workspace,
            ocrd_identifier,
            dest=None,
            ocrd_mets='mets.xml',
            ocrd_manifestation_depth='full',
            ocrd_base_version_checksum=None,
            processes=1,
            skip_zip=False,
            in_place=False,
            tag_files=None):
        """
        Bag a workspace

        See https://ocr-d.github.com/ocrd_zip#packing-a-workspace-as-ocrd-zip

        Arguments:
            workspace (ocrd.Workspace): workspace to bag
            ord_identifier (string): Ocrd-Identifier in bag-info.txt
            dest (string): Path of the generated OCRD-ZIP.
            ord_mets (string): Ocrd-Mets in bag-info.txt
            ord_manifestation_depth (string): Ocrd-Manifestation-Depth in bag-info.txt
            ord_base_version_checksum (string): Ocrd-Base-Version-Checksum in bag-info.txt
            processes (integer): Number of parallel processes checksumming
            skip_zip (boolean): Whether to leave directory unzipped
            in_place (boolean): Whether to **replace** the workspace with its BagIt variant
            tag_files (list<string>): Path names of additional tag files to be bagged at the root of the bag
        """
        if ocrd_manifestation_depth not in ('full', 'partial'):
            raise Exception("manifestation_depth must be 'full' or 'partial'")
        if in_place and (dest is not None):
            raise Exception("Setting 'dest' and 'in_place' is a contradiction")
        if in_place and not skip_zip:
            raise Exception(
                "Setting 'skip_zip' and not 'in_place' is a contradiction")

        if tag_files is None:
            tag_files = []

        # create bagdir
        bagdir = mkdtemp(prefix=TMP_BAGIT_PREFIX)

        if dest is None:
            if in_place:
                dest = workspace.directory
            elif not skip_zip:
                dest = '%s.ocrd.zip' % workspace.directory
            else:
                dest = '%s.ocrd' % workspace.directory

        log.info("Bagging %s to %s (temp dir %s)", workspace.directory,
                 '(in-place)' if in_place else dest, bagdir)

        # create data dir
        makedirs(join(bagdir, 'data'))

        # create bagit.txt
        with open(join(bagdir, 'bagit.txt'), 'wb') as f:
            f.write(BAGIT_TXT.encode('utf-8'))

        # create manifests
        total_bytes, total_files = self._bag_mets_files(
            workspace, bagdir, ocrd_manifestation_depth, ocrd_mets, processes)

        # create bag-info.txt
        bag = Bag(bagdir)
        self._set_bag_info(bag, total_bytes, total_files, ocrd_identifier,
                           ocrd_manifestation_depth,
                           ocrd_base_version_checksum)

        for tag_file in tag_files:
            copyfile(tag_file, join(bagdir, basename(tag_file)))

        # save bag
        bag.save()

        # ZIP it
        self._serialize_bag(workspace, bagdir, dest, in_place, skip_zip)

        log.info('Created bag at %s', dest)
        return dest
示例#14
0
def find_bag_metadata(bag_logs_path):
    try:
        return Bag(bag_logs_path).info
    except BagError:
        print("Unable to locate or parse bag metadata at: {}".format(bag_logs_path), file=sys.stderr)
        return {}
示例#15
0
class ImageBag:
    def __init__(self, path: str, auto_make: bool = False) -> None:
        self.path = realpath(expanduser(expandvars(normpath(path))))
        if auto_make:
            try:
                makedirs(self.path, exist_ok=False)
            except OSError:
                raise OSError('{} already exists, but auto_make = True'
                              ''.format(self.path))
            else:
                self.bag = make_bag(self.path)
        else:
            try:
                self.bag = Bag(self.path)
            except BagError as e:
                raise OSError(
                    '{} does not seem to be a valid Moondog Image bag: {}'
                    ''.format(self.path, str(e)))
        self.components = {}
        return None

    def accession(self, path: str):
        self._import_original(path)
        self._generate_master()

    def _import_original(self, path: str):
        d = self.components['original'] = {}
        d['accession_path'] = realpath(expanduser(expandvars(normpath(path))))
        d['filename'] = basename(d['accession_path'])
        fn, ext = splitext(d['filename'])
        target_path = join(self.path, 'data', d['filename'])
        shutil.copy2(d['accession_path'], target_path)
        with ExifTool() as et:
            meta = et.get_metadata(target_path)
        pprint(meta)
        xmp = XMPFiles(file_path=target_path).get_xmp()
        pprint(xmp)
        self._update(manifests=True)

    def _generate_master(self):
        infn = self.components['original']['filename']
        d = self.components['master'] = {}
        d['filename'] = 'master.tif'
        Image.open(join(self.path, 'data',
                        infn)).save(join(self.path, 'data', d['filename']))
        self._update(manifests=True)

    def _update(self, manifests=False):
        """Update the bag."""
        for fn, fmeta in self.components.items():
            for term, value in fmeta.items():
                bag_term = '{}-{}'.format(
                    fn.title(),
                    term.replace('_', ' ').title().replace(' ', '-'))
                try:
                    prior_value = self.bag.info[bag_term]
                except KeyError:
                    self.bag.info[bag_term] = value
                else:
                    if prior_value != value:
                        self.bag.info[bag_term] = value
        self.bag.save(manifests=manifests)