def test_wrong_password(legacy): w = writer.PdfFileWriter() ref = w.add_object(generic.TextStringObject("Blah blah")) if legacy: sh = StandardSecurityHandler.build_from_pw_legacy( StandardSecuritySettingsRevision.RC4_OR_AES128, w._document_id[0].original_bytes, "ownersecret", "usersecret", keylen_bytes=16, use_aes128=True) else: sh = StandardSecurityHandler.build_from_pw("ownersecret", "usersecret") w.security_handler = sh w._encrypt = w.add_object(sh.as_pdf_object()) out = BytesIO() w.write(out) r = PdfFileReader(out) with pytest.raises(misc.PdfReadError): r.get_object(ref.reference) assert r.decrypt("thispasswordiswrong") == AuthResult.FAILED assert r.security_handler._auth_failed assert r.security_handler.get_string_filter()._auth_failed with pytest.raises(misc.PdfReadError): r.get_object(ref.reference)
def test_identity_crypt_filter(use_alias, with_never_decrypt): w = writer.PdfFileWriter() sh = StandardSecurityHandler.build_from_pw("secret") w.security_handler = sh idf: IdentityCryptFilter = IdentityCryptFilter() assert sh.crypt_filter_config[pdf_name("/Identity")] is idf if use_alias: sh.crypt_filter_config._crypt_filters[pdf_name("/IdentityAlias")] = idf assert sh.crypt_filter_config[pdf_name("/IdentityAlias")] is idf if use_alias: # identity filter can't be serialised, so this should throw an error with pytest.raises(misc.PdfError): w._assign_security_handler(sh) return else: w._assign_security_handler(sh) test_bytes = b'This is some test data that should remain unencrypted.' test_stream = generic.StreamObject(stream_data=test_bytes, handler=sh) test_stream.apply_filter("/Crypt", params={pdf_name("/Name"): pdf_name("/Identity")}) ref = w.add_object(test_stream).reference out = BytesIO() w.write(out) r = PdfFileReader(out) r.decrypt("secret") the_stream = r.get_object(ref, never_decrypt=with_never_decrypt) assert the_stream.encoded_data == test_bytes assert the_stream.data == test_bytes
def test_custom_crypt_filter_errors(): w = writer.PdfFileWriter() custom = pdf_name('/Custom') crypt_filters = { custom: StandardRC4CryptFilter(keylen=16), STD_CF: StandardAESCryptFilter(keylen=16) } cfc = CryptFilterConfiguration(crypt_filters=crypt_filters, default_string_filter=STD_CF, default_stream_filter=STD_CF) sh = StandardSecurityHandler.build_from_pw_legacy( rev=StandardSecuritySettingsRevision.RC4_OR_AES128, id1=w.document_id[0], desired_user_pass="******", desired_owner_pass="******", keylen_bytes=16, crypt_filter_config=cfc) w._assign_security_handler(sh) test_data = b'This is test data!' dummy_stream = generic.StreamObject(stream_data=test_data) with pytest.raises(misc.PdfStreamError): dummy_stream.add_crypt_filter(name='/Idontexist', handler=sh) # no handler dummy_stream.add_crypt_filter(name=custom) dummy_stream._handler = None w.add_object(dummy_stream) out = BytesIO() with pytest.raises(misc.PdfStreamError): w.write(out)
def test_create_fresh(zip1, zip2): pdf_out = writer.PdfFileWriter() p1 = simple_page(pdf_out, 'Hello world', compress=zip1) p2 = simple_page(pdf_out, 'Hello Page 2', compress=zip2) p1_ref = pdf_out.insert_page(p1) p2_ref = pdf_out.insert_page(p2) out = BytesIO() pdf_out.write(out) out.seek(0) r = PdfFileReader(out) pages = r.root['/Pages'] assert pages['/Count'] == 2 kids = pages['/Kids'] assert b'world' in kids[0].get_object()['/Contents'].data assert b'Page 2' in kids[1].get_object()['/Contents'].data assert r.find_page_for_modification(0)[0].idnum == p1_ref.idnum assert r.find_page_for_modification(1)[0].idnum == p2_ref.idnum assert r.find_page_for_modification(-1)[0].idnum == p2_ref.idnum assert r.find_page_for_modification(-2)[0].idnum == p1_ref.idnum with pytest.raises(ValueError): r.find_page_for_modification(2) with pytest.raises(ValueError): r.find_page_for_modification(-3)
def test_key_encipherment_requirement_override(version, keylen, use_aes, use_crypt_filters): r = PdfFileReader(BytesIO(VECTOR_IMAGE_PDF)) w = writer.PdfFileWriter() sh = PubKeySecurityHandler.build_from_certs( [PUBKEY_SELFSIGNED_DECRYPTER.cert], keylen_bytes=keylen, version=version, use_aes=use_aes, use_crypt_filters=use_crypt_filters, perms=-44, ignore_key_usage=True) w.security_handler = sh w._encrypt = w.add_object(sh.as_pdf_object()) new_page_tree = w.import_object(r.root.raw_get('/Pages'), ) w.root['/Pages'] = new_page_tree out = BytesIO() w.write(out) r = PdfFileReader(out) result = r.decrypt_pubkey(PUBKEY_SELFSIGNED_DECRYPTER) assert result.status == AuthStatus.USER assert result.permission_flags == -44 page = r.root['/Pages']['/Kids'][0].get_object() assert '/ExtGState' in page['/Resources'] # just a piece of data I know occurs in the decoded content stream # of the (only) page in VECTOR_IMAGE_PDF assert b'0 1 0 rg /a0 gs' in page['/Contents'].data
def test_legacy_encryption(use_owner_pass, rev, keylen_bytes, use_aes): r = PdfFileReader(BytesIO(VECTOR_IMAGE_PDF)) w = writer.PdfFileWriter() sh = StandardSecurityHandler.build_from_pw_legacy( rev, w._document_id[0].original_bytes, "ownersecret", "usersecret", keylen_bytes=keylen_bytes, use_aes128=use_aes, perms=-44) w.security_handler = sh w._encrypt = w.add_object(sh.as_pdf_object()) new_page_tree = w.import_object(r.root.raw_get('/Pages'), ) w.root['/Pages'] = new_page_tree out = BytesIO() w.write(out) r = PdfFileReader(out) result = r.decrypt("ownersecret" if use_owner_pass else "usersecret") if use_owner_pass: assert result.status == AuthStatus.OWNER assert result.permission_flags is None else: assert result.status == AuthStatus.USER assert result.permission_flags == -44 page = r.root['/Pages']['/Kids'][0].get_object() assert r.trailer['/Encrypt']['/P'] == -44 assert '/ExtGState' in page['/Resources'] # just a piece of data I know occurs in the decoded content stream # of the (only) page in VECTOR_IMAGE_PDF assert b'0 1 0 rg /a0 gs' in page['/Contents'].data
def test_add_stream_to_direct_arr(): w = writer.PdfFileWriter() w.insert_page(simple_page(w, 'Test Test', extra_stream=True)) out = BytesIO() w.write(out) out.seek(0) w = IncrementalPdfFileWriter(out) new_stream = 'BT /F1 18 Tf 0 50 Td (Test2 Test2) Tj ET'.encode('ascii') stream = generic.StreamObject(stream_data=new_stream) stream_ref = w.add_object(stream) w.add_stream_to_page(0, stream_ref) out = BytesIO() w.write(out) out.seek(0) r = PdfFileReader(out) # check if the content stream was added page_obj_ref = r.root['/Pages']['/Kids'].raw_get(0) assert isinstance(page_obj_ref, generic.IndirectObject) page_obj = page_obj_ref.get_object() conts = page_obj['/Contents'] assert len(conts) == 3 assert stream_ref.idnum in (c.idnum for c in conts) # check if resource dictionary is still OK assert '/F1' in page_obj['/Resources']['/Font']
def empty_page(stream_xrefs=False): w = writer.PdfFileWriter(stream_xrefs=stream_xrefs) page = writer.PageObject(contents=w.add_object( generic.StreamObject(stream_data=b'')), media_box=generic.ArrayObject([0, 0, 595, 842])) w.insert_page(page) return w
def test_no_refs_in_obj_stm(): w = writer.PdfFileWriter(stream_xrefs=True) obj_stm = w.prepare_object_stream() with pytest.raises(TypeError, match='Stream obj.*references'): w.add_object( generic.IndirectObject(2, 0, w), obj_stream=obj_stm )
def test_no_stms_in_obj_stm(): w = writer.PdfFileWriter(stream_xrefs=True) obj_stm = w.prepare_object_stream() with pytest.raises(TypeError, match='Stream obj.*references'): w.add_object( generic.StreamObject(stream_data=b'Hello world!'), obj_stream=obj_stm )
def test_preallocate(): w = writer.PdfFileWriter() with pytest.raises(misc.PdfWriteError): w.add_object(generic.NullObject(), idnum=20) alloc = w.allocate_placeholder() assert isinstance(alloc.get_object(), generic.NullObject) w.add_object(generic.TextStringObject("Test Test"), idnum=alloc.idnum) assert alloc.get_object() == "Test Test"
def test_pubkey_alternative_filter(): w = writer.PdfFileWriter() w.encrypt_pubkey([PUBKEY_TEST_DECRYPTER.cert]) # subfilter should be picked up w._encrypt.get_object()['/Filter'] = pdf_name('/FooBar') out = BytesIO() w.write(out) r = PdfFileReader(out) assert isinstance(r.security_handler, PubKeySecurityHandler)
def test_pubkey_encryption_block_cfs_s4(): w = writer.PdfFileWriter() w.encrypt_pubkey([PUBKEY_TEST_DECRYPTER.cert]) encrypt = w._encrypt.get_object() encrypt['/SubFilter'] = pdf_name('/adbe.pkcs7.s4') out = BytesIO() w.write(out) with pytest.raises(misc.PdfReadError): PdfFileReader(out)
def test_page_import_with_fonts(inherit_filters): image_input = PdfFileReader(BytesIO(FILE_WITH_EMBEDDED_FONT)) w = writer.PdfFileWriter() xobj_ref = w.import_page_as_xobject(image_input, inherit_filters=inherit_filters) xobj: generic.StreamObject = xobj_ref.get_object() fonts = xobj['/Resources']['/Font'] assert '/FEmb' in fonts df = fonts['/FEmb']['/DescendantFonts'][0].get_object() font_file = df['/FontDescriptor']['/FontFile3'] assert len(font_file.data) == 1424
def test_page_import(file_no, inherit_filters): fbytes = (VECTOR_IMAGE_PDF, VECTOR_IMAGE_PDF_DECOMP)[file_no] image_input = PdfFileReader(BytesIO(fbytes)) w = writer.PdfFileWriter() xobj_ref = w.import_page_as_xobject(image_input, inherit_filters=inherit_filters) xobj: generic.StreamObject = xobj_ref.get_object() assert '/ExtGState' in xobj['/Resources'] # just a piece of data I know occurs in the decoded content stream # of the (only) page in VECTOR_IMAGE_PDF assert b'0 1 0 rg /a0 gs' in xobj.data
def test_pubkey_encryption_s5_requires_cfs(): w = writer.PdfFileWriter() sh = PubKeySecurityHandler.build_from_certs([PUBKEY_TEST_DECRYPTER.cert]) w._assign_security_handler(sh) encrypt = w._encrypt.get_object() del encrypt['/CF'] out = BytesIO() w.write(out) with pytest.raises(misc.PdfReadError): PdfFileReader(out)
def test_custom_crypt_filter(with_hex_filter, main_unencrypted): w = writer.PdfFileWriter() custom = pdf_name('/Custom') crypt_filters = { custom: StandardRC4CryptFilter(keylen=16), } if main_unencrypted: # streams/strings are unencrypted by default cfc = CryptFilterConfiguration(crypt_filters=crypt_filters) else: crypt_filters[STD_CF] = StandardAESCryptFilter(keylen=16) cfc = CryptFilterConfiguration(crypt_filters=crypt_filters, default_string_filter=STD_CF, default_stream_filter=STD_CF) sh = StandardSecurityHandler.build_from_pw_legacy( rev=StandardSecuritySettingsRevision.RC4_OR_AES128, id1=w.document_id[0], desired_user_pass="******", desired_owner_pass="******", keylen_bytes=16, crypt_filter_config=cfc) w._assign_security_handler(sh) test_data = b'This is test data!' dummy_stream = generic.StreamObject(stream_data=test_data) dummy_stream.add_crypt_filter(name=custom, handler=sh) ref = w.add_object(dummy_stream) dummy_stream2 = generic.StreamObject(stream_data=test_data) ref2 = w.add_object(dummy_stream2) if with_hex_filter: dummy_stream.apply_filter(pdf_name('/AHx')) out = BytesIO() w.write(out) r = PdfFileReader(out) r.decrypt("ownersecret") obj: generic.StreamObject = r.get_object(ref.reference) assert obj.data == test_data if with_hex_filter: cf_dict = obj['/DecodeParms'][1] else: cf_dict = obj['/DecodeParms'] assert cf_dict['/Name'] == pdf_name('/Custom') obj2: generic.DecryptedObjectProxy = r.get_object( ref2.reference, transparent_decrypt=False) raw = obj2.raw_object assert isinstance(raw, generic.StreamObject) if main_unencrypted: assert raw.encoded_data == test_data else: assert raw.encoded_data != test_data
def test_pubkey_unsupported_filter(delete_subfilter): w = writer.PdfFileWriter() w.encrypt_pubkey([PUBKEY_TEST_DECRYPTER.cert]) encrypt = w._encrypt.get_object() encrypt['/Filter'] = pdf_name('/FooBar') if delete_subfilter: del encrypt['/SubFilter'] else: encrypt['/SubFilter'] = pdf_name('/baz.quux') out = BytesIO() w.write(out) with pytest.raises(misc.PdfReadError): PdfFileReader(out)
def test_create_fresh(zip1, zip2): pdf_out = writer.PdfFileWriter() p1 = simple_page(pdf_out, 'Hello world', compress=zip1) p2 = simple_page(pdf_out, 'Hello Page 2', compress=zip2) pdf_out.insert_page(p1) pdf_out.insert_page(p2) out = BytesIO() pdf_out.write(out) out.seek(0) r = PdfFileReader(out) pages = r.root['/Pages'] assert pages['/Count'] == 2 kids = pages['/Kids'] assert b'world' in kids[0].get_object()['/Contents'].data assert b'Page 2' in kids[1].get_object()['/Contents'].data
def test_pubkey_wrong_cert(): r = PdfFileReader(BytesIO(VECTOR_IMAGE_PDF)) w = writer.PdfFileWriter() recpt_cert = load_cert_from_pemder( TESTING_CA_DIR + '/intermediate/newcerts/signer2.cert.pem') test_data = b'This is test data!' dummy_stream = generic.StreamObject(stream_data=test_data) ref = w.add_object(dummy_stream) w.encrypt_pubkey([recpt_cert]) out = BytesIO() w.write(out) r = PdfFileReader(out) result = r.decrypt_pubkey(PUBKEY_TEST_DECRYPTER) assert result.status == AuthStatus.FAILED with pytest.raises(misc.PdfError): r.get_object(ref.reference)
def test_page_tree_import(stream_xrefs, with_objstreams): r = PdfFileReader(BytesIO(VECTOR_IMAGE_PDF)) w = writer.PdfFileWriter(stream_xrefs=stream_xrefs) if with_objstreams: objstream = w.prepare_object_stream() else: objstream = None new_page_tree = w.import_object(r.root.raw_get('/Pages'), obj_stream=objstream) if objstream is not None: w.add_object(objstream.as_pdf_object()) w.root['/Pages'] = new_page_tree out = BytesIO() w.write(out) r = PdfFileReader(out) page = r.root['/Pages']['/Kids'][0].get_object() assert '/ExtGState' in page['/Resources'] # just a piece of data I know occurs in the decoded content stream # of the (only) page in VECTOR_IMAGE_PDF assert b'0 1 0 rg /a0 gs' in page['/Contents'].data
def test_pubkey_encryption(version, keylen, use_aes, use_crypt_filters): r = PdfFileReader(BytesIO(VECTOR_IMAGE_PDF)) w = writer.PdfFileWriter() sh = PubKeySecurityHandler.build_from_certs( [PUBKEY_TEST_DECRYPTER.cert], keylen_bytes=keylen, version=version, use_aes=use_aes, use_crypt_filters=use_crypt_filters) w.security_handler = sh w._encrypt = w.add_object(sh.as_pdf_object()) new_page_tree = w.import_object(r.root.raw_get('/Pages'), ) w.root['/Pages'] = new_page_tree out = BytesIO() w.write(out) r = PdfFileReader(out) r.decrypt_pubkey(PUBKEY_TEST_DECRYPTER) page = r.root['/Pages']['/Kids'][0].get_object() assert '/ExtGState' in page['/Resources'] # just a piece of data I know occurs in the decoded content stream # of the (only) page in VECTOR_IMAGE_PDF assert b'0 1 0 rg /a0 gs' in page['/Contents'].data
def test_no_objstms_without_xref_stm(): w = writer.PdfFileWriter(stream_xrefs=False) with pytest.raises(misc.PdfWriteError, match='Obj'): w.prepare_object_stream()
def test_custom_pubkey_crypt_filter(with_hex_filter, main_unencrypted): w = writer.PdfFileWriter() custom = pdf_name('/Custom') crypt_filters = { custom: PubKeyRC4CryptFilter(keylen=16), } if main_unencrypted: # streams/strings are unencrypted by default cfc = CryptFilterConfiguration(crypt_filters=crypt_filters) else: crypt_filters[DEFAULT_CRYPT_FILTER] = PubKeyAESCryptFilter( keylen=16, acts_as_default=True) cfc = CryptFilterConfiguration( crypt_filters=crypt_filters, default_string_filter=DEFAULT_CRYPT_FILTER, default_stream_filter=DEFAULT_CRYPT_FILTER) sh = PubKeySecurityHandler(version=SecurityHandlerVersion.RC4_OR_AES128, pubkey_handler_subfilter=PubKeyAdbeSubFilter.S5, legacy_keylen=16, crypt_filter_config=cfc) # if main_unencrypted, these should be no-ops sh.add_recipients([PUBKEY_TEST_DECRYPTER.cert]) # (this is always pointless, but it should be allowed) sh.add_recipients([PUBKEY_TEST_DECRYPTER.cert]) crypt_filters[custom].add_recipients([PUBKEY_TEST_DECRYPTER.cert]) w._assign_security_handler(sh) encrypt_dict = w._encrypt.get_object() cfs = encrypt_dict['/CF'] # no /Recipients in S5 mode assert '/Recipients' not in encrypt_dict assert isinstance(cfs[custom]['/Recipients'], generic.ByteStringObject) if main_unencrypted: assert DEFAULT_CRYPT_FILTER not in cfs else: default_rcpts = cfs[DEFAULT_CRYPT_FILTER]['/Recipients'] assert isinstance(default_rcpts, generic.ArrayObject) assert len(default_rcpts) == 2 # custom crypt filters can only have one set of recipients with pytest.raises(misc.PdfError): crypt_filters[custom].add_recipients([PUBKEY_TEST_DECRYPTER.cert]) test_data = b'This is test data!' dummy_stream = generic.StreamObject(stream_data=test_data) dummy_stream.add_crypt_filter(name=custom, handler=sh) ref = w.add_object(dummy_stream) dummy_stream2 = generic.StreamObject(stream_data=test_data) ref2 = w.add_object(dummy_stream2) if with_hex_filter: dummy_stream.apply_filter(pdf_name('/AHx')) out = BytesIO() w.write(out) r = PdfFileReader(out) r.decrypt_pubkey(PUBKEY_TEST_DECRYPTER) # the custom test filter shouldn't have been decrypted yet # so attempting to decode the stream should cause the crypt filter # to throw an error obj: generic.StreamObject = r.get_object(ref.reference) with pytest.raises(misc.PdfError): # noinspection PyStatementEffect obj.data r.security_handler.crypt_filter_config[custom].authenticate( PUBKEY_TEST_DECRYPTER) assert obj.data == test_data if with_hex_filter: cf_dict = obj['/DecodeParms'][1] else: cf_dict = obj['/DecodeParms'] assert cf_dict['/Name'] == pdf_name('/Custom') obj2: generic.DecryptedObjectProxy = r.get_object( ref2.reference, transparent_decrypt=False) raw = obj2.raw_object assert isinstance(raw, generic.StreamObject) if main_unencrypted: assert raw.encoded_data == test_data else: assert raw.encoded_data != test_data