def getErrors(output): # Parse preflight output and return error codes and descriptions as element object errorsElt=ET.Element("validationErrors") outputLines=output.split('\n') noLines=len(outputLines) # Loop through output, skip 1st line which doesn't contain any useful info for i in range(1,noLines): thisLine=outputLines[i] thisLineItems=thisLine.split(":") noItems=len(thisLineItems) if noItems>=2: thisErrorCode=thisLineItems[0] thisErrorDescription=thisLineItems[1] # Some silly looking code but error descriptions may contain colons if noItems>2: for j in range (2,noItems): thisErrorDescription = thisErrorDescription + ":" + thisLineItems[j] thisErrorCode.strip() thisErrorDescription.strip() errorElt=ET.Element("error") errorElt.appendChildTagWithText("code", thisErrorCode) errorElt.appendChildTagWithText("description", thisErrorDescription) errorsElt.append(errorElt) return(errorsElt)
def checkOneFile(file): # Process one file and return analysis result as text string (which contains # formatted XML) fileData = readFileBytes(file) isValidJP2, tests, characteristics = BoxValidator( "JP2", fileData).validate() #validateJP2(fileData) # Generate property values remap table remapTable = generatePropertiesRemapTable() # Create printable version of tests and characteristics tree tests.makeHumanReadable() characteristics.makeHumanReadable(remapTable) # Create output elementtree object root = ET.Element('jpylyzer') # Create elements for storing tool and file meta info toolInfo = ET.Element('toolInfo') fileInfo = ET.Element('fileInfo') # File name and path may contain non-ASCII characters, decoding to Latin should # (hopefully) prevent any Unicode decode errors. Elementtree will then deal with any non-ASCII # characters by replacing them with numeric entity references try: # This works in Python 2.7, but raises error in 3.x (no decode attribute for str type!) fileName = os.path.basename(file).decode("iso-8859-15", "strict") filePath = os.path.abspath(file).decode("iso-8859-15", "strict") except AttributeError: # This works in Python 3.x, but goes wrong withh non-ASCII chars in 2.7 fileName = os.path.basename(file) filePath = os.path.abspath(file) # Produce some general tool and file meta info toolInfo.appendChildTagWithText("toolName", scriptName) toolInfo.appendChildTagWithText("toolVersion", __version__) fileInfo.appendChildTagWithText("fileName", fileName) fileInfo.appendChildTagWithText("filePath", filePath) fileInfo.appendChildTagWithText("fileSizeInBytes", str(os.path.getsize(file))) fileInfo.appendChildTagWithText("fileLastModified", time.ctime(os.path.getmtime(file))) # Append to root root.append(toolInfo) root.append(fileInfo) # Add validation outcome root.appendChildTagWithText("isValidJP2", str(isValidJP2)) # Append test results and characteristics to root root.append(tests) root.append(characteristics) return (root)
def checkFiles(images): if len(images) == 0: warnings.warn("no images to check!") for image in images: thisFile = image isFile = os.path.isfile(thisFile) if isFile: # Read and analyse one file fileData = readFileBytes(thisFile) isValidJP2, tests, characteristics = BoxValidator( "JP2", fileData).validate() #validateJP2(fileData) # Generate property values remap table remapTable = generatePropertiesRemapTable() # Create printable version of tests and characteristics tree tests.makeHumanReadable() characteristics.makeHumanReadable(remapTable) # Create output elementtree object root = ET.Element('jpylyzer') # Create elements for storing tool and file meta info toolInfo = ET.Element('toolInfo') fileInfo = ET.Element('fileInfo') # Produce some general tool and file meta info toolInfo.appendChildTagWithText("toolName", scriptName) toolInfo.appendChildTagWithText("toolVersion", __version__) fileInfo.appendChildTagWithText("fileName", thisFile) fileInfo.appendChildTagWithText("filePath", os.path.abspath(thisFile)) fileInfo.appendChildTagWithText("fileSizeInBytes", str(os.path.getsize(thisFile))) fileInfo.appendChildTagWithText( "fileLastModified", time.ctime(os.path.getmtime(thisFile))) # Append to root root.append(toolInfo) root.append(fileInfo) # Add validation outcome root.appendChildTagWithText("isValidJP2", str(isValidJP2)) # Append test results and characteristics to root root.append(tests) root.append(characteristics) # Write output sys.stdout.write(root.toxml())
def checkOneFile(file): # Process one file and return analysis result as text string (which contains # formatted XML) fileData = readFileBytes(file) isValidJP2, tests, characteristics = BoxValidator( "JP2", fileData).validate() #validateJP2(fileData) # Generate property values remap table remapTable = generatePropertiesRemapTable() # Create printable version of tests and characteristics tree tests.makeHumanReadable() characteristics.makeHumanReadable(remapTable) # Create output elementtree object root = ET.Element('jpylyzer') # Create elements for storing tool and file meta info toolInfo = ET.Element('toolInfo') fileInfo = ET.Element('fileInfo') # Produce some general tool and file meta info toolInfo.appendChildTagWithText("toolName", scriptName) toolInfo.appendChildTagWithText("toolVersion", __version__) fileInfo.appendChildTagWithText("fileName", os.path.basename(file)) fileInfo.appendChildTagWithText("filePath", os.path.abspath(file)) fileInfo.appendChildTagWithText("fileSizeInBytes", str(os.path.getsize(file))) fileInfo.appendChildTagWithText("fileLastModified", time.ctime(os.path.getmtime(file))) # Append to root root.append(toolInfo) root.append(fileInfo) # Add validation outcome root.appendChildTagWithText("isValidJP2", str(isValidJP2)) # Append test results and characteristics to root root.append(tests) root.append(characteristics) # Result as XML result = root.toxml().decode("ascii") return (result)
def checkOneFileData(fileName, filePath, fileSizeInBytes, fileLastModifiedDate, fileData): # Process the data from one file and return analysis result as element object # Create output elementtree object if config.inputRecursiveFlag or config.inputWrapperFlag: # Name space already declared in results element, so no need to do it # here root = ET.Element('jpylyzer') else: root = ET.Element( 'jpylyzer', {'xmlns': 'http://openpreservation.org/ns/jpylyzer/', 'xmlns:xsi': 'http://www.w3.org/2001/XMLSchema-instance', 'xsi:schemaLocation': 'http://openpreservation.org/ns/jpylyzer/ http://jpylyzer.openpreservation.org/jpylyzer-v-1-1.xsd'}) # Create elements for storing tool, file and status meta info toolInfo = ET.Element('toolInfo') fileInfo = ET.Element('fileInfo') statusInfo = ET.Element('statusInfo') # If file name / path contain any surrogate pairs, remove them to # avoid problems when writing to XML fileNameCleaned = stripSurrogatePairs(fileName) filePathCleaned = stripSurrogatePairs(filePath) # Produce some general tool and file meta info toolInfo.appendChildTagWithText("toolName", scriptName) toolInfo.appendChildTagWithText("toolVersion", __version__) fileInfo.appendChildTagWithText("fileName", fileNameCleaned) fileInfo.appendChildTagWithText("filePath", filePathCleaned) fileInfo.appendChildTagWithText( "fileSizeInBytes", str(fileSizeInBytes)) fileInfo.appendChildTagWithText( "fileLastModified", fileLastModifiedDate) # Initialise success flag success = True try: # Contents of file to memory map object isValidJP2, tests, characteristics = BoxValidator("JP2", fileData).validate() # Generate property values remap table remapTable = generatePropertiesRemapTable() # Create printable version of tests and characteristics tree tests.makeHumanReadable() characteristics.makeHumanReadable(remapTable) except Exception as ex: isValidJP2 = False success = False exceptionType = type(ex) if exceptionType == MemoryError: failureMessage = "memory error (file size too large)" elif exceptionType == IOError: failureMessage = "I/O error (cannot open file)" elif exceptionType == RuntimeError: failureMessage = "runtime error (please report to developers)" else: failureMessage = "unknown error (please report to developers)" printWarning(failureMessage) tests = ET.Element("tests") characteristics = ET.Element('properties') # Add status info statusInfo.appendChildTagWithText("success", str(success)) if success == False: statusInfo.appendChildTagWithText("failureMessage",failureMessage) # Append all results to root root.append(toolInfo) root.append(fileInfo) root.append(statusInfo) root.appendChildTagWithText("isValidJP2", str(isValidJP2)) root.append(tests) root.append(characteristics) return(root)
def main(): # From where is this script executed?) applicationPath=os.path.abspath(get_main_dir()) # Path to Preflight app preflightApp=os.path.normpath(applicationPath+ "/preflight/preflight-1.8.0-20121114.230701-58-jar-with-dependencies.jar") # Command to launch preflight (may not work on all platforms, ideally move # this all to separate config file instead of hard-coding it) preflightCommand="java -jar " + preflightApp # Get input from command line args=parseCommandLine() dirIn=args.dirIn # Check if dirIn is directory or file, and create list of all input files if os.path.isfile(dirIn)==True: myFilesIn=[dirIn] elif os.path.isdir(dirIn)==True: myFilesIn=getFilesFromTree(dirIn) else: msg=dirIn + " is not a directory or file!" errorExit(msg) # Create output elementtree object root=ET.Element('preflight') numberOfFiles=len(myFilesIn) for i in range(0,numberOfFiles): myFileIn=myFilesIn[i] fileElt=ET.Element("file") fileElt.appendChildTagWithText("fileName", myFileIn) if myFileIn.lower().endswith(".pdf"): systemString= preflightCommand + ' "' + myFileIn + '"' p = sub.Popen(systemString,stdout=sub.PIPE,stderr=sub.PIPE) output, errors = p.communicate() # Exit status preflightExitCode=p.returncode #sys.stderr.write(str(preflightExitCode)) if preflightExitCode<=0: # Value can be -1 in case of warnings preflightExitStatus="Success" else: preflightExitStatus="Failure" # Extract error codes and descriptions from output errorCodesDescs=getErrors(output) if preflightExitCode>0: # Preflight raised an exception (we're ignoring warnings here!) isValidPDFA1b="False" # Append system errors to output fileElt.appendChildTagWithText("sysErrors", errors) elif len(errorCodesDescs)!=0: # Errors were found, so not valid isValidPDFA1b="False" # Add errors codes + descriptions to output fileElt.append(errorCodesDescs) else: # No errors found, so valid isValidPDFA1b="True" # Append validation outcome and preflightExitStatus to output fileElt.appendChildTagWithText("preflightExitStatus", preflightExitStatus) fileElt.appendChildTagWithText("isValidPDFA1b", isValidPDFA1b) # Add output to root element root.append(fileElt) # Write xml-formatted log to stdout sys.stdout.write(root.toxml().decode('UTF-8'))