示例#1
0
def getErrors(output):
    # Parse preflight output and return error codes and descriptions as element object
    
    errorsElt=ET.Element("validationErrors")
    outputLines=output.split('\n')
    noLines=len(outputLines)
    
    # Loop through output, skip 1st line which doesn't contain any useful info
    for i in range(1,noLines):
        thisLine=outputLines[i]
        thisLineItems=thisLine.split(":")
        
        noItems=len(thisLineItems)
        
        if noItems>=2:
            thisErrorCode=thisLineItems[0]
            
            thisErrorDescription=thisLineItems[1]
            
            # Some silly looking code but error descriptions may contain colons
            if noItems>2:
                for j in range (2,noItems):
                    thisErrorDescription = thisErrorDescription + ":" + thisLineItems[j]
                       
            thisErrorCode.strip()
            thisErrorDescription.strip()
            
            errorElt=ET.Element("error")
            errorElt.appendChildTagWithText("code", thisErrorCode)
            errorElt.appendChildTagWithText("description", thisErrorDescription)            
            errorsElt.append(errorElt)
 
    return(errorsElt)
def checkOneFile(file):
    # Process one file and return analysis result as text string (which contains
    # formatted XML)

    fileData = readFileBytes(file)
    isValidJP2, tests, characteristics = BoxValidator(
        "JP2", fileData).validate()  #validateJP2(fileData)

    # Generate property values remap table
    remapTable = generatePropertiesRemapTable()

    # Create printable version of tests and characteristics tree
    tests.makeHumanReadable()
    characteristics.makeHumanReadable(remapTable)

    # Create output elementtree object
    root = ET.Element('jpylyzer')

    # Create elements for storing tool and file meta info
    toolInfo = ET.Element('toolInfo')
    fileInfo = ET.Element('fileInfo')

    # File name and path may contain non-ASCII characters, decoding to Latin should
    # (hopefully) prevent any Unicode decode errors. Elementtree will then deal with any non-ASCII
    # characters by replacing them with numeric entity references
    try:
        # This works in Python 2.7, but raises error in 3.x (no decode attribute for str type!)
        fileName = os.path.basename(file).decode("iso-8859-15", "strict")
        filePath = os.path.abspath(file).decode("iso-8859-15", "strict")
    except AttributeError:
        # This works in Python 3.x, but goes wrong withh non-ASCII chars in 2.7
        fileName = os.path.basename(file)
        filePath = os.path.abspath(file)

    # Produce some general tool and file meta info
    toolInfo.appendChildTagWithText("toolName", scriptName)
    toolInfo.appendChildTagWithText("toolVersion", __version__)
    fileInfo.appendChildTagWithText("fileName", fileName)
    fileInfo.appendChildTagWithText("filePath", filePath)
    fileInfo.appendChildTagWithText("fileSizeInBytes",
                                    str(os.path.getsize(file)))
    fileInfo.appendChildTagWithText("fileLastModified",
                                    time.ctime(os.path.getmtime(file)))

    # Append to root
    root.append(toolInfo)
    root.append(fileInfo)

    # Add validation outcome
    root.appendChildTagWithText("isValidJP2", str(isValidJP2))

    # Append test results and characteristics to root
    root.append(tests)
    root.append(characteristics)

    return (root)
示例#3
0
def checkFiles(images):
    if len(images) == 0:
        warnings.warn("no images to check!")

    for image in images:
        thisFile = image

        isFile = os.path.isfile(thisFile)

        if isFile:
            # Read and analyse one file
            fileData = readFileBytes(thisFile)
            isValidJP2, tests, characteristics = BoxValidator(
                "JP2", fileData).validate()  #validateJP2(fileData)

            # Generate property values remap table
            remapTable = generatePropertiesRemapTable()

            # Create printable version of tests and characteristics tree
            tests.makeHumanReadable()
            characteristics.makeHumanReadable(remapTable)

            # Create output elementtree object
            root = ET.Element('jpylyzer')

            # Create elements for storing tool and file meta info
            toolInfo = ET.Element('toolInfo')
            fileInfo = ET.Element('fileInfo')

            # Produce some general tool and file meta info
            toolInfo.appendChildTagWithText("toolName", scriptName)
            toolInfo.appendChildTagWithText("toolVersion", __version__)
            fileInfo.appendChildTagWithText("fileName", thisFile)
            fileInfo.appendChildTagWithText("filePath",
                                            os.path.abspath(thisFile))
            fileInfo.appendChildTagWithText("fileSizeInBytes",
                                            str(os.path.getsize(thisFile)))
            fileInfo.appendChildTagWithText(
                "fileLastModified", time.ctime(os.path.getmtime(thisFile)))

            # Append to root
            root.append(toolInfo)
            root.append(fileInfo)

            # Add validation outcome
            root.appendChildTagWithText("isValidJP2", str(isValidJP2))

            # Append test results and characteristics to root
            root.append(tests)
            root.append(characteristics)

            # Write output
            sys.stdout.write(root.toxml())
示例#4
0
def checkOneFile(file):
    # Process one file and return analysis result as text string (which contains
    # formatted XML)

    fileData = readFileBytes(file)
    isValidJP2, tests, characteristics = BoxValidator(
        "JP2", fileData).validate()  #validateJP2(fileData)

    # Generate property values remap table
    remapTable = generatePropertiesRemapTable()

    # Create printable version of tests and characteristics tree
    tests.makeHumanReadable()
    characteristics.makeHumanReadable(remapTable)

    # Create output elementtree object
    root = ET.Element('jpylyzer')

    # Create elements for storing tool and file meta info
    toolInfo = ET.Element('toolInfo')
    fileInfo = ET.Element('fileInfo')

    # Produce some general tool and file meta info
    toolInfo.appendChildTagWithText("toolName", scriptName)
    toolInfo.appendChildTagWithText("toolVersion", __version__)
    fileInfo.appendChildTagWithText("fileName", os.path.basename(file))
    fileInfo.appendChildTagWithText("filePath", os.path.abspath(file))
    fileInfo.appendChildTagWithText("fileSizeInBytes",
                                    str(os.path.getsize(file)))
    fileInfo.appendChildTagWithText("fileLastModified",
                                    time.ctime(os.path.getmtime(file)))

    # Append to root
    root.append(toolInfo)
    root.append(fileInfo)

    # Add validation outcome
    root.appendChildTagWithText("isValidJP2", str(isValidJP2))

    # Append test results and characteristics to root
    root.append(tests)
    root.append(characteristics)

    # Result as XML
    result = root.toxml().decode("ascii")

    return (result)
def checkOneFileData(fileName, filePath, fileSizeInBytes, fileLastModifiedDate, fileData):
    # Process the data from one file and return analysis result as element object

    # Create output elementtree object

    if config.inputRecursiveFlag or config.inputWrapperFlag:
        # Name space already declared in results element, so no need to do it
        # here
        root = ET.Element('jpylyzer')
    else:
        root = ET.Element(
            'jpylyzer', {'xmlns': 'http://openpreservation.org/ns/jpylyzer/',
                         'xmlns:xsi': 'http://www.w3.org/2001/XMLSchema-instance',
                         'xsi:schemaLocation': 'http://openpreservation.org/ns/jpylyzer/ http://jpylyzer.openpreservation.org/jpylyzer-v-1-1.xsd'})

    # Create elements for storing tool, file and status meta info
    toolInfo = ET.Element('toolInfo')
    fileInfo = ET.Element('fileInfo')
    statusInfo = ET.Element('statusInfo')

    # If file name / path contain any surrogate pairs, remove them to
    # avoid problems when writing to XML
    fileNameCleaned = stripSurrogatePairs(fileName)
    filePathCleaned = stripSurrogatePairs(filePath)


    # Produce some general tool and file meta info
    toolInfo.appendChildTagWithText("toolName", scriptName)
    toolInfo.appendChildTagWithText("toolVersion", __version__)
    fileInfo.appendChildTagWithText("fileName", fileNameCleaned)
    fileInfo.appendChildTagWithText("filePath", filePathCleaned)
    fileInfo.appendChildTagWithText(
        "fileSizeInBytes", str(fileSizeInBytes))
    fileInfo.appendChildTagWithText(
        "fileLastModified", fileLastModifiedDate)

    # Initialise success flag
    success = True
    
    try:
        # Contents of file to memory map object
        isValidJP2, tests, characteristics = BoxValidator("JP2", fileData).validate()
        
        # Generate property values remap table
        remapTable = generatePropertiesRemapTable()

        # Create printable version of tests and characteristics tree
        tests.makeHumanReadable()
        characteristics.makeHumanReadable(remapTable)
    except Exception as ex:    
        isValidJP2 = False
        success = False
        exceptionType = type(ex)

        if exceptionType == MemoryError:
            failureMessage = "memory error (file size too large)"
        elif exceptionType == IOError:
            failureMessage = "I/O error (cannot open file)"
        elif exceptionType == RuntimeError:
            failureMessage = "runtime error (please report to developers)"
        else:
            failureMessage = "unknown error (please report to developers)"

        printWarning(failureMessage)
        tests = ET.Element("tests")
        characteristics = ET.Element('properties')
 
    # Add status info
    statusInfo.appendChildTagWithText("success", str(success))
    if success == False:
        statusInfo.appendChildTagWithText("failureMessage",failureMessage)
  
    # Append all results to root
    root.append(toolInfo)
    root.append(fileInfo)
    root.append(statusInfo)
    root.appendChildTagWithText("isValidJP2", str(isValidJP2))
    root.append(tests)
    root.append(characteristics)

    return(root)
示例#6
0
def main():
    
    # From where is this script executed?)
    applicationPath=os.path.abspath(get_main_dir())

    # Path to Preflight app
    preflightApp=os.path.normpath(applicationPath+ "/preflight/preflight-1.8.0-20121114.230701-58-jar-with-dependencies.jar")
    
    # Command to launch preflight (may not work on all platforms, ideally move
    # this all to separate config file instead of hard-coding it)
    preflightCommand="java -jar " + preflightApp

    # Get input from command line
    args=parseCommandLine()
    dirIn=args.dirIn

    # Check if dirIn is directory or file, and create list of all input files
    if os.path.isfile(dirIn)==True:
        myFilesIn=[dirIn]
    elif os.path.isdir(dirIn)==True:
        myFilesIn=getFilesFromTree(dirIn)
    else:
        msg=dirIn + " is not a directory or file!"
        errorExit(msg)
               
    # Create output elementtree object
    root=ET.Element('preflight')
                        
    numberOfFiles=len(myFilesIn)

    for i in range(0,numberOfFiles):

        myFileIn=myFilesIn[i]
        
        fileElt=ET.Element("file")
        
        fileElt.appendChildTagWithText("fileName", myFileIn)
        
        if myFileIn.lower().endswith(".pdf"):
            
            systemString= preflightCommand + ' "' + myFileIn + '"'
            
            p = sub.Popen(systemString,stdout=sub.PIPE,stderr=sub.PIPE)
            output, errors = p.communicate()
            
            # Exit status
            preflightExitCode=p.returncode
            
            #sys.stderr.write(str(preflightExitCode))
            
            if preflightExitCode<=0:
                # Value can be -1 in case of warnings
                preflightExitStatus="Success"
            else:
                preflightExitStatus="Failure"
                    
            # Extract error codes and descriptions from output
            errorCodesDescs=getErrors(output)
            
            if preflightExitCode>0:
                # Preflight raised an exception (we're ignoring warnings here!)
                isValidPDFA1b="False"
                
                # Append system errors to output
                fileElt.appendChildTagWithText("sysErrors", errors)
                
            elif len(errorCodesDescs)!=0:
                # Errors were found, so not valid
                isValidPDFA1b="False"
                
                # Add errors codes + descriptions to output
                fileElt.append(errorCodesDescs)           
            
            else:
                # No errors found, so valid
                isValidPDFA1b="True"
            
            # Append validation outcome and preflightExitStatus to output
            fileElt.appendChildTagWithText("preflightExitStatus", preflightExitStatus)
            fileElt.appendChildTagWithText("isValidPDFA1b", isValidPDFA1b)
            
            # Add output to root element 
            root.append(fileElt)              
    
    # Write xml-formatted log to stdout
    sys.stdout.write(root.toxml().decode('UTF-8'))