def check(self, path): # NOTE: this uses the package because of the way hathi packages are formatted valid = True errors = [] # Check if everything in access folder is found same in the preservation folder missing_pres_files = self.check_for_missing_matching_preservation( access_folder=path.directories["access"], preservation_folder=path.directories["preservation"]) if missing_pres_files: valid = False new_error = error_message.ValidationError( "The files [{}] were found in the access but not in the preservation folder" .format(", ".join( [os.path.basename(f) for f in missing_pres_files])), group=path.identifier) new_error.source = path.directories["access"] errors.append(new_error) missing_access_files = self.check_for_missing_matching_access( access_folder=path.directories["access"], preservation_folder=path.directories["preservation"]) if missing_access_files: new_error = error_message.ValidationError( "The files [{}] were found in the preservation folder but not in the access folder" .format(", ".join( [os.path.basename(f) for f in missing_access_files])), group=path.identifier) new_error.source = path.directories["preservation"] errors.append(new_error) return checkers.Results(self.checker_name(), valid=valid, errors=errors)
def check(self, path): valid = True errors = [] file_location = os.path.dirname(path) basename, extension = os.path.splitext(os.path.basename(path)) if extension not in self.ignore_extension: if extension not in self.valid_extensions: valid = False new_error = error_message.ValidationError( "Invalid preservation file extension: \"{}\"".format(extension), group=path) new_error.source = path errors.append(new_error) # Check the image files have the full 8 digits if extension == ".tif": if "target" not in basename: if PresNamingChecker.valid_naming_scheme.match(basename) is None: valid = False new_error = error_message.ValidationError( "Does not match the valid preservation file naming pattern", group=file_location.split(os.sep)[-1]) new_error.source = path errors.append(new_error) return checkers.Results(self.checker_name(), valid=valid, errors=errors)
def check(self, path: str): """ Make sure that all files included in this folder are tiff files and contain nothing else Args: path: Path to the folder to check Returns: list of errors """ required_files = set() # type: ignore required_files = {"checksum.md5", "marc.xml", "meta.yml"} valid_image_extensions = [".jp2"] valid_text_extensions = [".txt", ".xml", ".yml"] errors = [] valid = True image_files = set() text_files = set() try: missing = list(self.find_missing_by_number(path)) if missing: valid = False new_error = error_message.ValidationError( "Expected files [{}] not found in access folder".format(", ".join(missing)), group=path.split(os.sep)[-1]) new_error.source = path errors.append(new_error) except ValueError as e: valid = False new_error = error_message.ValidationError("Error trying to find missing files. Reason: {}".format(e), group=path.split(os.sep)[-1]) new_error.source = path errors.append(new_error) # Sort the files into their own category for root, dirs, files in os.walk(path): for file_ in files: # if the filename is the required files set, remove them if file_ in required_files: required_files.remove(file_) basename, ext = os.path.splitext(file_) if ext in valid_image_extensions: image_files.add((root, file_)) elif ext in valid_text_extensions: text_files.add((root, file_)) # If there are any files still in the required_files set are missing. if required_files: valid = False new_error = error_message.ValidationError( "Missing expected file(s), [{}]".format(", ".join(required_files))) new_error.source = path errors.append(new_error) # errors.append("{} is missing {}".format(path, _file)) return checkers.Results(self.checker_name(), valid=valid, errors=errors)
def check(self, path): valid = True errors = [] for error in self.find_root_directory_errors(path): valid = False errors.append(error) if valid: for error in self.find_subdirectory_errors(path): valid = False errors.append(error) return checkers.Results(self.checker_name(), valid=valid, errors=errors)
def check(self, path): valid = True errors = [] required_files = ( "target_l_001.tif", "target_l_002.tif", "target_r_001.tif", "target_r_002.tif", ) error_group = path.split(os.sep)[-1] try: missing = list(self.find_missing_by_number(path)) if missing: valid = False new_error = error_message.ValidationError( "Expected files [{}] not found in preservation folder". format(", ".join(missing)), group=error_group) new_error.source = path errors.append(new_error) except ValueError as e: valid = False new_error = error_message.ValidationError( "Error trying to find missing files. Reason: {}".format(e), group=error_group) new_error.source = path errors.append(new_error) except FileNotFoundError as e: valid = False new_error = error_message.ValidationError(e, group=error_group) new_error.source = path errors.append(new_error) # return checkers.Results(self.checker_name(), valid=valid, errors=errors) # Find missing required_files missing = list( self.find_missing_required_files(path=path, expected_files=required_files)) if missing: valid = False new_error = error_message.ValidationError( "Missing expected file(s), [{}]".format(", ".join(missing)), group=error_group) new_error.source = path errors.append(new_error) return checkers.Results(self.checker_name(), valid=valid, errors=errors)
def check(self, path): valid = True errors = [] file_location = os.path.dirname(path) group_name = file_location.split(os.sep)[-1] basename, extension = os.path.splitext(os.path.basename(path)) if extension in self.extensions_to_check: if self.valid_naming_scheme.match(basename) is None: valid = False new_error = error_message.ValidationError( "Does not match the valid file pattern for preservation files", group=group_name) new_error.source = path errors.append(new_error) return checkers.Results(self.checker_name(), valid=valid, errors=errors)
def check(self, path): valid = True errors = [] file_location = os.path.dirname(path) basename, extension = os.path.splitext(os.path.basename(path)) if extension not in self.ignore_extension: if extension not in self.valid_extensions: valid = False new_error = error_message.ValidationError("Invalid file type", group=path.split( os.sep)[-1]) new_error.source = path errors.append(new_error) # Check the image files have the full 8 digits if self.valid_naming_scheme.match(basename) is None: valid = False new_error = error_message.ValidationError( "Does not match the valid file pattern for preservation files", group=file_location.split(os.sep)[-1]) new_error.source = path errors.append(new_error) # # # The only xml file should be marc.xml # if extension == ".xml": # if basename != "marc": # valid = False # errors.append( # "\"{}\" does not match the valid file pattern for preservation files".format(basename)) # # # The only yml file should be meta.yml # if extension == ".yml": # if basename != "meta": # valid = False # errors.append( # "\"{}\" does not match the valid file result_type pattern for preservation files".format(basename)) return checkers.Results(self.checker_name(), valid=valid, errors=errors)
def check(self, path): valid = True errors = [] return checkers.Results(self.checker_name(), valid=valid, errors=errors)
def check(self, path): # NOTE: this uses the package because of the way hathi packages are formatted valid = True errors = [] return checkers.Results(self.checker_name(), valid=valid, errors=errors)