def work(self) -> bool:
        errors: typing.List[hathi_result.Result] = []
        my_logger = logging.getLogger(hathi_validate.__name__)
        my_logger.setLevel(logging.INFO)

        with self.log_config(my_logger):
            print("Running ocr Validation")
            try:
                ocr_errors = validate_process.run_validation(
                    validator.ValidateOCRFiles(path=self.package_path))

            except PermissionError as e:
                report_builder = hathi_result.SummaryDirector(
                    source=self.package_path)
                report_builder.add_error("Permission issues. \"{}\"".format(e))
                self.set_results(report_builder.construct())
                return False

            except Exception as e:
                print(e)
                raise

            if ocr_errors:
                self.log("No validation errors found in ".format(
                    self.package_path))

                for error in ocr_errors:
                    self.log(error.message)
                    errors.append(error)
            self.set_results(errors)
        return True
    def work(self) -> bool:
        def filter_ocr_only(entry: os.DirEntry):
            if not entry.is_file():
                return False

            name, ext = os.path.splitext(entry.name)

            if ext.lower() != ".xml":
                return False

            if name.lower() == "marc":
                return False

            return True

        my_logger = logging.getLogger(hathi_validate.__name__)
        my_logger.setLevel(logging.INFO)

        with self.log_config(my_logger):
            errors: typing.List[hathi_result.Result] = []

            ocr_file: os.DirEntry
            for ocr_file in filter(filter_ocr_only,
                                   os.scandir(self.package_path)):
                self.log("Looking for invalid characters in {}".format(
                    ocr_file.path))

                invalid_ocr_character = validate_process.run_validation(
                    validator.ValidateUTF8Files(ocr_file.path))

                if invalid_ocr_character:
                    errors += invalid_ocr_character

            self.set_results(errors)
        return True
示例#3
0
    def work(self) -> bool:
        yml_file = os.path.join(self.package_path, "meta.yml")
        errors: List[hathi_result.Result] = []
        my_logger = logging.getLogger(hathi_validate.__name__)
        my_logger.setLevel(logging.INFO)

        with self.log_config(my_logger):
            report_builder = hathi_result.SummaryDirector(source=yml_file)

            try:
                if not os.path.exists(yml_file):
                    self.log(f"Skipping '{yml_file}' due to file not found")

                else:
                    self.log(f"Validating meta.yml in {self.package_path}")

                    meta_yml_errors = validate_process.run_validation(
                        validator.ValidateMetaYML(yaml_file=yml_file,
                                                  path=self.package_path,
                                                  required_page_data=True))

                    if not meta_yml_errors:
                        self.log(f"{yml_file} successfully validated")
                    else:
                        for error in meta_yml_errors:
                            self.log(error.message)
                            errors.append(error)
            except FileNotFoundError as file_not_found_error:
                report_builder.add_error(
                    f"Unable to validate YAML. Reason: {file_not_found_error}")
            for error in report_builder.construct():
                errors.append(error)
            self.set_results(errors)
        return True
    def work(self) -> bool:
        errors: typing.List[hathi_result.Result] = []
        my_logger = logging.getLogger(hathi_validate.__name__)
        my_logger.setLevel(logging.INFO)

        with self.log_config(my_logger):
            try:
                extra_subdirectories_errors = validate_process.run_validation(
                    validator.ValidateExtraSubdirectories(
                        path=self.package_path))
            except PermissionError as e:
                report_builder = hathi_result.SummaryDirector(
                    source=self.package_path)

                report_builder.add_error("Permission issues. \"{}\"".format(e))
                self.set_results(report_builder.construct())
                return False

            if not extra_subdirectories_errors:
                self.log("No extra subdirectories found in {}".format(
                    self.package_path))

            else:
                for error in extra_subdirectories_errors:
                    self.log(error.message)
                    errors.append(error)

            self.set_results(errors)
        return True
    def work(self) -> bool:
        errors: typing.List[hathi_result.Result] = []

        checksum_report = os.path.join(self.package_path, "checksum.md5")
        my_logger = logging.getLogger(hathi_validate.__name__)
        my_logger.setLevel(logging.INFO)

        with self.log_config(my_logger):
            report_builder = hathi_result.SummaryDirector(
                source=checksum_report
            )

            try:
                files_to_check = []

                for a, file_name in \
                        validate_process.extracts_checksums(checksum_report):
                    files_to_check.append(file_name)

                self.log(
                    "Validating checksums of the {} files "
                    "included in {}".format(
                        len(files_to_check),
                        checksum_report
                    )
                )

                checksum_report_errors = validate_process.run_validation(
                    validator.ValidateChecksumReport(self.package_path,
                                                     checksum_report)
                )
                if not checksum_report_errors:
                    self.log(
                        "All checksums in {} successfully validated".format(
                            checksum_report
                        )
                    )
                else:
                    for error in checksum_report_errors:
                        errors.append(error)
            except FileNotFoundError as e:
                report_builder.add_error(
                    "Unable to validate checksums. Reason: {}".format(e)
                )
            except PermissionError as e:
                report_builder = hathi_result.SummaryDirector(
                   source=self.package_path
                )
                report_builder.add_error("Permission issues. \"{}\"".format(e))
                self.set_results(report_builder.construct())
                return False

            for error in report_builder.construct():
                errors.append(error)
            self.set_results(errors)
        return True
    def work(self) -> bool:
        errors: typing.List[hathi_result.Result] = []
        my_logger = logging.getLogger(hathi_validate.__name__)
        my_logger.setLevel(logging.INFO)

        with self.log_config(my_logger):
            missing_files_errors = validate_process.run_validation(
                validator.ValidateMissingFiles(path=self.package_path))
            if missing_files_errors:
                for error in missing_files_errors:
                    self.log(error.message)
                    errors.append(error)
            self.set_results(errors)
        return True
示例#7
0
    def work(self) -> bool:
        errors: List[hathi_result.Result] = []

        checksum_report = os.path.join(self.package_path, "checksum.md5")
        my_logger = logging.getLogger(hathi_validate.__name__)
        my_logger.setLevel(logging.INFO)

        with self.log_config(my_logger):
            report_builder = hathi_result.SummaryDirector(
                source=checksum_report)

            try:
                files_to_check = [
                    file_name for _, file_name in
                    validate_process.extracts_checksums(checksum_report)
                ]

                self.log(
                    f"Validating checksums of the {len(files_to_check)} files "
                    f"included in {checksum_report}")

                checksum_report_errors: List[hathi_result.Result] = \
                    validate_process.run_validation(
                        validator.ValidateChecksumReport(
                            self.package_path,
                            checksum_report
                        )
                )
                if not checksum_report_errors:
                    self.log(
                        f"All checksums in {checksum_report} successfully "
                        f"validated")
                else:
                    for error in checksum_report_errors:
                        errors.append(error)
            except FileNotFoundError as file_missing_error:
                report_builder.add_error("Unable to validate checksums. "
                                         f"Reason: {file_missing_error}")
            except PermissionError as permission_error:
                report_builder = hathi_result.SummaryDirector(
                    source=self.package_path)
                report_builder.add_error(
                    f'Permission issues. "{permission_error}"')
                self.set_results(report_builder.construct())
                return False

            for error in report_builder.construct():
                errors.append(error)
            self.set_results(errors)
        return True
    def work(self) -> bool:
        marc_file = os.path.join(self.package_path, "marc.xml")
        result_builder = hathi_result.SummaryDirector(source=marc_file)
        errors: typing.List[hathi_result.Result] = []

        my_logger = logging.getLogger(hathi_validate.__name__)
        my_logger.setLevel(logging.INFO)

        with self.log_config(my_logger):
            try:
                if not os.path.exists(marc_file):
                    self.log(
                        "Skipping \'{}\' due to file not found".format(
                            marc_file
                        )
                    )

                else:
                    self.log(
                        "Validating marc.xml in {}".format(self.package_path)
                    )

                    marc_errors = validate_process.run_validation(
                        validator.ValidateMarc(marc_file)
                    )

                    if not marc_errors:
                        self.log("{} successfully validated".format(marc_file))
                    else:
                        for error in marc_errors:
                            self.log(error.message)
                            errors.append(error)
            except FileNotFoundError as e:
                result_builder.add_error(
                    "Unable to Validate Marc. Reason: {}".format(e)
                )
            except PermissionError as e:
                report_builder = hathi_result.SummaryDirector(
                   source=self.package_path
                )
                report_builder.add_error("Permission issues. \"{}\"".format(e))
                self.set_results(report_builder.construct())
                return False

            for error in result_builder.construct():
                errors.append(error)
            self.set_results(errors)
        return True
    def work(self) -> bool:
        errors: typing.List[hathi_result.Result] = []
        extensions = [".txt", ".jp2"]
        my_logger = logging.getLogger(hathi_validate.__name__)
        my_logger.setLevel(logging.INFO)

        with self.log_config(my_logger):
            if self.check_ocr:
                extensions.append(".xml")
            try:
                missing_files_errors = validate_process.run_validation(
                    validator.ValidateComponents(
                        self.package_path,
                        "^[0-9]{8}$",
                        *extensions
                    )
                )
            except FileNotFoundError:
                report_builder = hathi_result.SummaryDirector(
                   source=self.package_path
                )

                report_builder.add_error(
                    "No files located with expected file naming scheme in path"
                )
                self.set_results(report_builder.construct())
                return False
            except PermissionError as e:
                report_builder = hathi_result.SummaryDirector(
                   source=self.package_path
                )
                report_builder.add_error("Permission issues. \"{}\"".format(e))
                self.set_results(report_builder.construct())
                return False

            if not missing_files_errors:
                self.log(
                    "Found no missing component files in {}".format(
                        self.package_path
                    )
                )

            else:
                for error in missing_files_errors:
                    self.log(error.message)
                    errors.append(error)
            self.set_results(errors)
        return True
示例#10
0
def main():
    logger = logging.getLogger(__name__)
    logger.setLevel(logging.DEBUG)
    parser = get_parser()
    args = parser.parse_args()

    configure_logging.configure_logger(debug_mode=args.debug,
                                       log_file=args.log_debug)
    errors = []
    batch_manifest_builder = manifest.PackageManifestDirector()
    for pkg in package.get_dirs(args.path):
        logger.info("Creating a manifest for {}".format(pkg))
        package_builder = batch_manifest_builder.add_package(pkg)

        for root, dirs, files in os.walk(pkg):
            for file_name in files:
                package_builder.add_file(file_name)

        logger.info("Checking {}".format(pkg))

        # Validate missing files
        logger.debug("Looking for missing package files in {}".format(pkg))
        missing_files_errors = process.run_validation(
            validator.ValidateMissingFiles(path=pkg))
        if not missing_files_errors:
            logger.info("Found no missing package files in {}".format(pkg))
        else:
            for error in missing_files_errors:
                logger.info(error.message)
                errors.append(error)

        # Look for missing components
        extensions = [".txt", ".jp2"]
        if args.check_ocr:
            extensions.append(".xml")
        logger.debug("Looking for missing component files in {}".format(pkg))
        missing_files_errors = process.run_validation(
            validator.ValidateComponents(pkg, "^\d{8}$", *extensions))
        if not missing_files_errors:
            logger.info("Found no missing component files in {}".format(pkg))
        else:
            for error in missing_files_errors:
                logger.info(error.message)
                errors.append(error)
        # exit()
        # Validate extra subdirectories
        logger.debug("Looking for extra subdirectories in {}".format(pkg))
        extra_subdirectories_errors = process.run_validation(
            validator.ValidateExtraSubdirectories(path=pkg))
        if not extra_subdirectories_errors:
            pass
        else:
            for error in extra_subdirectories_errors:
                errors.append(error)

        # Validate Checksums
        checksum_report = os.path.join(pkg, "checksum.md5")
        checksum_report_errors = process.run_validation(
            validator.ValidateChecksumReport(pkg, checksum_report))
        if not checksum_report_errors:
            logger.info("All checksums in {} successfully validated".format(
                checksum_report))
        else:
            for error in checksum_report_errors:
                errors.append(error)

        # Validate Marc
        marc_file = os.path.join(pkg, "marc.xml")
        marc_errors = process.run_validation(validator.ValidateMarc(marc_file))
        if not marc_errors:
            logger.info("{} successfully validated".format(marc_file))
        else:
            for error in marc_errors:
                errors.append(error)

        # Validate YML
        yml_file = os.path.join(pkg, "meta.yml")
        meta_yml_errors = process.run_validation(
            validator.ValidateMetaYML(yaml_file=yml_file,
                                      path=pkg,
                                      required_page_data=True))
        if not meta_yml_errors:
            logger.info("{} successfully validated".format(yml_file))
        else:
            for error in meta_yml_errors:
                errors.append(error)
        #

        # Validate ocr files
        if args.check_ocr:
            ocr_errors = process.run_validation(
                validator.ValidateOCRFiles(path=pkg))
            if not ocr_errors:
                logger.info("No validation errors found in ".format(pkg))
            else:
                for error in ocr_errors:
                    errors.append(error)

    batch_manifest = batch_manifest_builder.build_manifest()
    manifest_report = manifest.get_report_as_str(batch_manifest, width=80)
    console_reporter2 = report.Reporter(report.ConsoleReporter())
    validation_report = report.get_report_as_str(errors)
    console_reporter2.report(manifest_report)
    console_reporter2.report(validation_report)
    if args.report_name:
        file_reporter = report.Reporter(
            report.FileOutputReporter(args.report_name))
        file_reporter.report(validation_report)