Exemplo n.º 1
0
def extractAWAMIndicators(pdf, password=''):
    """ Check whether the given PDF document is accessible """

    # Takes an optional password which can be used to
    # unlock the document for encrypted documents.
    try:
        pdfobj = PdfFileReader(pdf, password)
        print 'Producer=>',pdfobj.producer
        print 'Creator=>',pdfobj.creator
    except DecryptionFailedException:
        # We are unable to decrypt document.
        # We have got no parsed pdfobj, and cannot do much more,
        # unfortunately... 
        # Tell that the document was not accessible due to encryption, at least
        print "Decryption failed"
        return {'EIAO.A.10.8.1.4.PDF.1.1':{(0, 1): 0}}
    except NotImplementedError:
        # pyPdf only supports algorithm version 1 and 2. 
        # Version 3 and 4 are not yet supported.
        print "Unsupported decryption algorithm."
        return {'EIAO.A.10.8.1.4.PDF.1.1':{(0, 1): 0}}


    try:
        # Fix indirect object references
        pdfobj.fixIndirectObjectXref()

        structTreeRoot=pdfobj.root['/StructTreeRoot'].getObject()
        roleMap=structTreeRoot['/RoleMap'].getObject()
    except (KeyError, ValueError), e:
        roleMap=None