Exemplo n.º 1
0
def simple_page(pdf_out, ascii_text, compress=False, extra_stream=False):
    # based on the minimal pdf file of
    # https://brendanzagaeski.appspot.com/0004.html
    from pyhanko.pdf_utils import generic, writer
    from pyhanko.pdf_utils.generic import pdf_name
    from pyhanko.pdf_utils.misc import get_courier

    resources = generic.DictionaryObject({
        pdf_name('/Font'):
        generic.DictionaryObject({pdf_name('/F1'): get_courier()})
    })
    media_box = generic.ArrayObject(map(generic.NumberObject,
                                        (0, 0, 300, 144)))

    def stream_data(txt, y):
        return f'BT /F1 18 Tf 0 {y} Td ({txt}) Tj ET'.encode('ascii')

    stream = generic.StreamObject(stream_data=stream_data(ascii_text, 0))
    if compress:
        stream.compress()

    if extra_stream:
        stream2 = generic.StreamObject(
            stream_data=stream_data(ascii_text, 100))
        if compress:
            stream2.compress()
        contents = generic.ArrayObject(
            [pdf_out.add_object(stream),
             pdf_out.add_object(stream2)])
    else:
        contents = pdf_out.add_object(stream)
    return writer.PageObject(contents=contents,
                             media_box=media_box,
                             resources=resources)
Exemplo n.º 2
0
def test_custom_crypt_filter(with_hex_filter, main_unencrypted):
    w = writer.PdfFileWriter()
    custom = pdf_name('/Custom')
    crypt_filters = {
        custom: StandardRC4CryptFilter(keylen=16),
    }
    if main_unencrypted:
        # streams/strings are unencrypted by default
        cfc = CryptFilterConfiguration(crypt_filters=crypt_filters)
    else:
        crypt_filters[STD_CF] = StandardAESCryptFilter(keylen=16)
        cfc = CryptFilterConfiguration(crypt_filters=crypt_filters,
                                       default_string_filter=STD_CF,
                                       default_stream_filter=STD_CF)
    sh = StandardSecurityHandler.build_from_pw_legacy(
        rev=StandardSecuritySettingsRevision.RC4_OR_AES128,
        id1=w.document_id[0],
        desired_user_pass="******",
        desired_owner_pass="******",
        keylen_bytes=16,
        crypt_filter_config=cfc)
    w._assign_security_handler(sh)
    test_data = b'This is test data!'
    dummy_stream = generic.StreamObject(stream_data=test_data)
    dummy_stream.add_crypt_filter(name=custom, handler=sh)
    ref = w.add_object(dummy_stream)
    dummy_stream2 = generic.StreamObject(stream_data=test_data)
    ref2 = w.add_object(dummy_stream2)

    if with_hex_filter:
        dummy_stream.apply_filter(pdf_name('/AHx'))
    out = BytesIO()
    w.write(out)
    r = PdfFileReader(out)
    r.decrypt("ownersecret")
    obj: generic.StreamObject = r.get_object(ref.reference)
    assert obj.data == test_data
    if with_hex_filter:
        cf_dict = obj['/DecodeParms'][1]
    else:
        cf_dict = obj['/DecodeParms']

    assert cf_dict['/Name'] == pdf_name('/Custom')

    obj2: generic.DecryptedObjectProxy = r.get_object(
        ref2.reference, transparent_decrypt=False)
    raw = obj2.raw_object
    assert isinstance(raw, generic.StreamObject)
    if main_unencrypted:
        assert raw.encoded_data == test_data
    else:
        assert raw.encoded_data != test_data
Exemplo n.º 3
0
def test_custom_crypt_filter_errors():
    w = writer.PdfFileWriter()
    custom = pdf_name('/Custom')
    crypt_filters = {
        custom: StandardRC4CryptFilter(keylen=16),
        STD_CF: StandardAESCryptFilter(keylen=16)
    }
    cfc = CryptFilterConfiguration(crypt_filters=crypt_filters,
                                   default_string_filter=STD_CF,
                                   default_stream_filter=STD_CF)
    sh = StandardSecurityHandler.build_from_pw_legacy(
        rev=StandardSecuritySettingsRevision.RC4_OR_AES128,
        id1=w.document_id[0],
        desired_user_pass="******",
        desired_owner_pass="******",
        keylen_bytes=16,
        crypt_filter_config=cfc)
    w._assign_security_handler(sh)
    test_data = b'This is test data!'
    dummy_stream = generic.StreamObject(stream_data=test_data)
    with pytest.raises(misc.PdfStreamError):
        dummy_stream.add_crypt_filter(name='/Idontexist', handler=sh)

    # no handler
    dummy_stream.add_crypt_filter(name=custom)
    dummy_stream._handler = None
    w.add_object(dummy_stream)

    out = BytesIO()
    with pytest.raises(misc.PdfStreamError):
        w.write(out)
Exemplo n.º 4
0
def test_identity_crypt_filter(use_alias, with_never_decrypt):
    w = writer.PdfFileWriter()
    sh = StandardSecurityHandler.build_from_pw("secret")
    w.security_handler = sh
    idf: IdentityCryptFilter = IdentityCryptFilter()
    assert sh.crypt_filter_config[pdf_name("/Identity")] is idf
    if use_alias:
        sh.crypt_filter_config._crypt_filters[pdf_name("/IdentityAlias")] = idf
        assert sh.crypt_filter_config[pdf_name("/IdentityAlias")] is idf
    if use_alias:
        # identity filter can't be serialised, so this should throw an error
        with pytest.raises(misc.PdfError):
            w._assign_security_handler(sh)
        return
    else:
        w._assign_security_handler(sh)
    test_bytes = b'This is some test data that should remain unencrypted.'
    test_stream = generic.StreamObject(stream_data=test_bytes, handler=sh)
    test_stream.apply_filter("/Crypt",
                             params={pdf_name("/Name"): pdf_name("/Identity")})
    ref = w.add_object(test_stream).reference
    out = BytesIO()
    w.write(out)

    r = PdfFileReader(out)
    r.decrypt("secret")
    the_stream = r.get_object(ref, never_decrypt=with_never_decrypt)
    assert the_stream.encoded_data == test_bytes
    assert the_stream.data == test_bytes
Exemplo n.º 5
0
def test_add_stream_to_direct_arr():
    w = writer.PdfFileWriter()
    w.insert_page(simple_page(w, 'Test Test', extra_stream=True))
    out = BytesIO()
    w.write(out)
    out.seek(0)
    w = IncrementalPdfFileWriter(out)

    new_stream = 'BT /F1 18 Tf 0 50 Td (Test2 Test2) Tj ET'.encode('ascii')
    stream = generic.StreamObject(stream_data=new_stream)
    stream_ref = w.add_object(stream)
    w.add_stream_to_page(0, stream_ref)

    out = BytesIO()
    w.write(out)
    out.seek(0)
    r = PdfFileReader(out)
    # check if the content stream was added
    page_obj_ref = r.root['/Pages']['/Kids'].raw_get(0)
    assert isinstance(page_obj_ref, generic.IndirectObject)
    page_obj = page_obj_ref.get_object()
    conts = page_obj['/Contents']
    assert len(conts) == 3
    assert stream_ref.idnum in (c.idnum for c in conts)
    # check if resource dictionary is still OK
    assert '/F1' in page_obj['/Resources']['/Font']
Exemplo n.º 6
0
def test_xref_stream_parse_width_value_default_ix2():
    # no tail part
    encoded_entries = [
        "0000000000",
        "0100000011",
    ]
    xref_data = b''.join(binascii.unhexlify(entr) for entr in encoded_entries)
    stream_obj = generic.StreamObject(
        dict_data={
            generic.pdf_name('/W'): generic.ArrayObject(list(
                map(generic.NumberObject, [1, 4, 0])
            )),
            generic.pdf_name('/Size'): 2
        },
        stream_data=xref_data
    )

    expected_out = [
        XRefEntry(
            xref_type=XRefType.FREE, location=None, idnum=0, generation=0
        ),
        XRefEntry(xref_type=XRefType.STANDARD, location=0x11, idnum=1),
    ]

    actual_out = list(parse_xref_stream(stream_obj))
    assert actual_out == expected_out
Exemplo n.º 7
0
def test_write_embedded_string_objstream():
    ffile = ttLib.TTFont(NOTO_SERIF_JP)
    ga = GlyphAccumulator(ffile)
    cid_hx, _ = ga.feed_string('テスト')
    assert cid_hx == '0637062a0639'
    w = IncrementalPdfFileWriter(BytesIO(MINIMAL_XREF))
    obj_stream = w.prepare_object_stream()
    font_ref = ga.embed_subset(w, obj_stream=obj_stream)
    stream = generic.StreamObject(
        stream_data=f'BT /FEmb 18 Tf 0 100 Td <{cid_hx}> Tj ET'.encode(
            'ascii'))
    stream_ref = w.add_object(stream)
    w.add_stream_to_page(0,
                         stream_ref,
                         resources=generic.DictionaryObject({
                             pdf_name('/Font'):
                             generic.DictionaryObject(
                                 {pdf_name('/FEmb'): font_ref})
                         }))
    out = BytesIO()
    w.write(out)
    out.seek(0)
    r = PdfFileReader(out)
    page_obj = r.root['/Pages']['/Kids'][0].get_object()
    conts = page_obj['/Contents']
    assert len(conts) == 2
    assert stream_ref.idnum in (c.idnum for c in conts)
    assert font_ref.idnum in r.xrefs.in_obj_stream
    out.seek(0)

    # attempt to grab the font from the object stream
    font_ref.pdf = r
    font = font_ref.get_object()
    assert font['/Type'] == pdf_name('/Font')
Exemplo n.º 8
0
def test_write_embedded_string():
    w = IncrementalPdfFileWriter(BytesIO(MINIMAL))
    with open(NOTO_SERIF_JP, 'rb') as ffile:
        ga = GlyphAccumulator(w, ffile, font_size=10)
    # shape the string, just to register the glyphs as used
    ga.shape('テスト')
    # ... but we're not going to use the result

    # hardcoded CIDs
    cid_hx = '0637062a0639'
    stream = generic.StreamObject(
        stream_data=f'BT /FEmb 18 Tf 0 100 Td <{cid_hx}> Tj ET'.encode('ascii')
    )
    stream_ref = w.add_object(stream)
    w.add_stream_to_page(
        0, stream_ref, resources=generic.DictionaryObject({
            pdf_name('/Font'): generic.DictionaryObject({
                pdf_name('/FEmb'): ga.as_resource()
            })
        })
    )
    out = BytesIO()
    w.write(out)
    out.seek(0)
    r = PdfFileReader(out)
    page_obj = r.root['/Pages']['/Kids'][0].get_object()
    conts = page_obj['/Contents']
    assert len(conts) == 2
    assert stream_ref.idnum in (c.idnum for c in conts)
Exemplo n.º 9
0
    def _format_tounicode_cmap(self, registry, ordering, supplement):
        def _pairs():
            for ch, (cid, _, _) in self._glyphs.items():
                yield cid, ch

        by_cid = iter(sorted(_pairs(), key=lambda t: t[0]))
        header = ('/CIDInit /ProcSet findresource begin\n'
                  '12 dict begin\n'
                  'begincmap\n'
                  '/CIDSystemInfo 3 dict dup begin\n'
                  f'/Registry ({registry}) def\n'
                  f'/Ordering ({ordering}) def\n'
                  f'/Supplement {supplement}\n def'
                  'end def\n'
                  f'/CMapName {registry}-{ordering}-{supplement:03} def\n'
                  '/CMapType 2 def\n'
                  '1 begincodespacerange\n'
                  '<0000> <FFFF>\n'
                  'endcodespacerange\n')
        # TODO make an effort to use ranges when appropriate, and at least
        #  group the glyphs
        body = '\n'.join(
            f'1 beginbfchar\n<{cid:04x}> <{ord(ch):04x}>\nendbfchar\n'
            for cid, ch in by_cid)

        footer = ('endcmap\n'
                  'CMapName currentdict /CMap\n'
                  'defineresource pop\n'
                  'end\nend')
        stream = generic.StreamObject(stream_data=(header + body +
                                                   footer).encode('ascii'))
        return stream
Exemplo n.º 10
0
    def apply(self, dest_page: int, x: int, y: int):
        """
        Apply a stamp to a particular page in the PDF writer attached to this
        :class:`.TextStamp` instance.

        :param dest_page:
            Index of the page to which the stamp is to be applied
            (starting at `0`).
        :param x:
            Horizontal position of the stamp's lower left corner on the page.
        :param y:
            Vertical position of the stamp's lower left corner on the page.
        :return:
            A reference to the affected page object, together with
            a ``(width, height)`` tuple describing the dimensions of the stamp.
        """
        stamp_ref = self.register()
        resource_name = b'/Stamp' + hexlify(uuid.uuid4().bytes)
        stamp_paint = b'q 1 0 0 1 %g %g cm %s Do Q' % (rd(x), rd(y),
                                                       resource_name)
        stamp_wrapper_stream = generic.StreamObject(stream_data=stamp_paint)
        resources = generic.DictionaryObject({
            pdf_name('/XObject'):
            generic.DictionaryObject(
                {pdf_name(resource_name.decode('ascii')): stamp_ref})
        })
        wr = self.writer
        page_ref = wr.add_stream_to_page(dest_page,
                                         wr.add_object(stamp_wrapper_stream),
                                         resources)
        dims = (self.box.width, self.box.height)
        return page_ref, dims
Exemplo n.º 11
0
def init_xobject_dictionary(command_stream: bytes, box_width, box_height,
                            resources: Optional[generic.DictionaryObject]
                            = None) -> generic.StreamObject:
    """
    Helper function to initialise form XObject dictionaries.

    .. note::
        For utilities to handle image XObjects, see :mod:`.images`.

    :param command_stream:
        The XObject's raw appearance stream.
    :param box_width:
        The width of the XObject's bounding box.
    :param box_height:
        The height of the XObject's bounding box.
    :param resources:
        A resource dictionary to include with the form object.
    :return:
        A :class:`~.generic.StreamObject` representation of the form XObject.
    """
    resources = resources or generic.DictionaryObject()
    return generic.StreamObject({
        pdf_name('/BBox'): generic.ArrayObject(list(
            map(generic.FloatObject, (0.0, box_height, box_width, 0.0))
        )),
        pdf_name('/Resources'): resources,
        pdf_name('/Type'): pdf_name('/XObject'),
        pdf_name('/Subtype'): pdf_name('/Form')
    }, stream_data=command_stream)
Exemplo n.º 12
0
def empty_page(stream_xrefs=False):
    w = writer.PdfFileWriter(stream_xrefs=stream_xrefs)
    page = writer.PageObject(contents=w.add_object(
        generic.StreamObject(stream_data=b'')),
                             media_box=generic.ArrayObject([0, 0, 595, 842]))
    w.insert_page(page)
    return w
Exemplo n.º 13
0
    def as_pdf_object(self) -> generic.StreamObject:
        """
        Render the object stream to a PDF stream object

        :return: An instance of :class:`~.generic.StreamObject`.
        """
        stream_header = BytesIO()
        main_body = BytesIO()
        for idnum, obj in self._obj_refs.items():
            offset = main_body.tell()
            obj.write_to_stream(main_body, None)
            stream_header.write(b'%d %d ' % (idnum, offset))

        first_obj_offset = stream_header.tell()
        stream_header.seek(0)
        sh_bytes = stream_header.read(first_obj_offset)
        stream_data = sh_bytes + main_body.getvalue()
        stream_object = generic.StreamObject({
            pdf_name('/Type'): pdf_name('/ObjStm'),
            pdf_name('/N'): generic.NumberObject(len(self._obj_refs)),
            pdf_name('/First'): generic.NumberObject(first_obj_offset)
        }, stream_data=stream_data)
        if self.compress:
            stream_object.compress()
        return stream_object
Exemplo n.º 14
0
def test_add_stream():
    w = IncrementalPdfFileWriter(BytesIO(MINIMAL))

    def stream_data(y):
        return f'BT /F1 18 Tf 0 {y} Td (Test Test) Tj ET'.encode('ascii')

    stream = generic.StreamObject(stream_data=stream_data(50))

    stream_ref = w.add_object(stream)
    w.add_stream_to_page(0, stream_ref)

    out = BytesIO()
    w.write(out)
    out.seek(0)
    r = PdfFileReader(out)
    # check if the content stream was added
    page_obj_ref = r.root['/Pages']['/Kids'].raw_get(0)
    assert isinstance(page_obj_ref, generic.IndirectObject)
    page_obj = page_obj_ref.get_object()
    conts = page_obj['/Contents']
    assert len(conts) == 2
    assert stream_ref.idnum in (c.idnum for c in conts)
    # check if resource dictionary is still OK
    assert '/F1' in page_obj['/Resources']['/Font']

    # let's try adding a third
    out.seek(0)
    w = IncrementalPdfFileWriter(out)

    stream = generic.StreamObject(stream_data=stream_data(100))
    new_stream_ref = w.add_object(stream)
    w.add_stream_to_page(0, new_stream_ref)

    out = BytesIO()
    w.write(out)
    out.seek(0)
    r = PdfFileReader(out)
    # check if the content stream was added
    page_obj_ref = r.root['/Pages']['/Kids'].raw_get(0)
    assert isinstance(page_obj_ref, generic.IndirectObject)
    page_obj = page_obj_ref.get_object()
    conts = page_obj['/Contents']
    assert len(conts) == 3
    ids = [c.idnum for c in conts]
    assert stream_ref.idnum in ids and new_stream_ref.idnum in ids
Exemplo n.º 15
0
def test_no_stms_in_obj_stm():
    w = writer.PdfFileWriter(stream_xrefs=True)
    obj_stm = w.prepare_object_stream()

    with pytest.raises(TypeError, match='Stream obj.*references'):
        w.add_object(
            generic.StreamObject(stream_data=b'Hello world!'),
            obj_stream=obj_stm
        )
Exemplo n.º 16
0
    def set_font_file(self, writer: BasePdfFileWriter):
        stream_buf = BytesIO()
        self.tt_font.save(stream_buf)
        stream_buf.seek(0)

        font_stream = generic.StreamObject(stream_data=stream_buf.read())
        font_stream.compress()
        font_stream_ref = writer.add_object(font_stream)
        self._font_descriptor[pdf_name('/FontFile2')] = font_stream_ref
        return font_stream_ref
Exemplo n.º 17
0
    def _import_object(self, obj: generic.PdfObject, reference_map: dict,
                       obj_stream) -> generic.PdfObject:

        # TODO check the spec for guidance on fonts. Do font identifiers have
        #  to be globally unique?

        # TODO deal with container_ref

        if isinstance(obj, generic.DecryptedObjectProxy):
            obj = obj.decrypted
        if isinstance(obj, generic.IndirectObject):
            try:
                return reference_map[obj.reference]
            except KeyError:
                refd = obj.get_object()
                # Add a placeholder to reserve the reference value.
                # This ensures correct behaviour in recursive calls
                # with self-references.
                new_ido = self.allocate_placeholder()
                reference_map[obj.reference] = new_ido
                imported = self._import_object(refd, reference_map, obj_stream)

                # if the imported object is a bare reference and/or a stream
                # object, we can't put it into an object stream.
                if isinstance(imported, OBJSTREAM_FORBIDDEN):
                    obj_stream = None

                # fill in the placeholder
                self.add_object(
                    imported, obj_stream=obj_stream, idnum=new_ido.idnum
                )
                return new_ido
        elif isinstance(obj, generic.DictionaryObject):
            raw_dict = {
                k: self._import_object(v, reference_map, obj_stream)
                for k, v in obj.items()
            }
            if isinstance(obj, generic.StreamObject):
                # In the vast majority of use cases, I'd expect the content
                # to be available in encoded form by default.
                # By initialising the stream object in this way, we avoid
                # a potentially costly decoding operation.
                return generic.StreamObject(
                    raw_dict, encoded_data=obj.encoded_data
                )
            else:
                return generic.DictionaryObject(raw_dict)
        elif isinstance(obj, generic.ArrayObject):
            return generic.ArrayObject(
                self._import_object(v, reference_map, obj_stream) for v in obj
            )
        else:
            return obj
Exemplo n.º 18
0
    def set_font_file(self, writer: BasePdfFileWriter):
        stream_buf = BytesIO()
        self.tt_font.save(stream_buf)
        stream_buf.seek(0)

        font_stream = generic.StreamObject({
            # this is a Type2 TTF font program
            pdf_name('/Subtype'): pdf_name('/CIDFontType2'),
        }, stream_data=stream_buf.read())
        font_stream.compress()
        font_stream_ref = writer.add_object(font_stream)
        self._font_descriptor[pdf_name('/FontFile2')] = font_stream_ref
        return font_stream_ref
Exemplo n.º 19
0
def test_code128_render():
    writer = IncrementalPdfFileWriter(BytesIO(MINIMAL))
    bb = barcodes.BarcodeBox("code128", "this is a test")
    xobj_ref = writer.add_object(bb.as_form_xobject())

    stamp_wrapper_stream = generic.StreamObject(
        stream_data=b'q 1 0 0 1 50 50 cm /Barcode Do Q')
    resources = generic.DictionaryObject({
        pdf_name('/XObject'):
        generic.DictionaryObject({pdf_name('/Barcode'): xobj_ref})
    })
    writer.add_stream_to_page(0, writer.add_object(stamp_wrapper_stream),
                              resources)
Exemplo n.º 20
0
 def _cms_objects_to_streams(self, objs, seen, dest):
     for obj in objs:
         obj_bytes = obj.dump()
         try:
             yield seen[obj_bytes]
         except KeyError:
             ref = self.writer.add_object(
                 generic.StreamObject(stream_data=obj_bytes)
             )
             self._mark_modified()
             seen[obj_bytes] = ref
             dest.append(ref)
             yield ref
Exemplo n.º 21
0
 def set_font_file(self, writer: BasePdfFileWriter):
     stream_buf = BytesIO()
     # write the CFF table to the stream
     self.cff.compile(stream_buf, self.tt_font)
     stream_buf.seek(0)
     font_stream = generic.StreamObject({
         # this is a Type0 CFF font program (see Table 126 in ISO 32000)
         pdf_name('/Subtype'): pdf_name('/CIDFontType0C'),
     }, stream_data=stream_buf.read())
     font_stream.compress()
     font_stream_ref = writer.add_object(font_stream)
     self._font_descriptor[pdf_name('/FontFile3')] = font_stream_ref
     return font_stream_ref
Exemplo n.º 22
0
    def _embed_cert(self, cert):
        if self.writer is None:
            raise TypeError('This DSS does not support updates.')

        try:
            return self.certs[cert.issuer_serial]
        except KeyError:
            pass

        ref = self.writer.add_object(
            generic.StreamObject(stream_data=cert.dump())
        )
        self._mark_modified()
        self.certs[cert.issuer_serial] = ref
        return ref
Exemplo n.º 23
0
def test_xref_stream_parse_entry_types():
    encoded_entries = [
        "0000000000ffff",  # free
        "01000000110000",  # regular objects
        "01000000840000",
        "01000000bc0005",
        "01000001b40000",
        "01000002990000",
        "02000000030001",  # object in stream
        "03deadbeef1337",  # undefined (should be ignored)
        "02000000030002",  # object in stream
        "ffcafebabe0007",  # another undefined one
    ]
    xref_data = b''.join(binascii.unhexlify(entr) for entr in encoded_entries)
    stream_obj = generic.StreamObject(
        dict_data={
            generic.pdf_name('/W'): generic.ArrayObject(list(
                map(generic.NumberObject, [1, 4, 2])
            )),
            generic.pdf_name('/Size'): 10
        },
        stream_data=xref_data
    )

    expected_out = [
        XRefEntry(
            xref_type=XRefType.FREE, location=None, idnum=0, generation=0xffff
        ),
        XRefEntry(xref_type=XRefType.STANDARD, location=0x11, idnum=1),
        XRefEntry(xref_type=XRefType.STANDARD, location=0x84, idnum=2),
        XRefEntry(
            xref_type=XRefType.STANDARD, location=0xbc, idnum=3, generation=5
        ),
        XRefEntry(xref_type=XRefType.STANDARD, location=0x1b4, idnum=4),
        XRefEntry(xref_type=XRefType.STANDARD, location=0x299, idnum=5),
        XRefEntry(
            xref_type=XRefType.IN_OBJ_STREAM,
            location=ObjStreamRef(3, 1), idnum=6
        ),
        XRefEntry(
            xref_type=XRefType.IN_OBJ_STREAM,
            location=ObjStreamRef(3, 2),
            idnum=8   # idnum jump because of undefined entry
        ),
    ]

    actual_out = list(parse_xref_stream(stream_obj))
    assert actual_out == expected_out
Exemplo n.º 24
0
def test_premature_xref_stream_end():
    encoded_entries = ["000000ffff", "0100110000"]

    xref_data = b''.join(binascii.unhexlify(entr) for entr in encoded_entries)
    stream_obj = generic.StreamObject(
        dict_data={
            generic.pdf_name('/W'): generic.ArrayObject(list(
                map(generic.NumberObject, [1, 2, 2])
            )),
            generic.pdf_name('/Size'): 3  # one too many
        },
        stream_data=xref_data
    )

    with pytest.raises(misc.PdfReadError, match='incomplete entry'):
        list(parse_xref_stream(stream_obj))
Exemplo n.º 25
0
def test_bogus_metadata_manipulation():
    # test using a double signature created using Adobe Reader
    # (uses object streams, XMP metadata updates and all the fun stuff)

    infile = BytesIO(
        read_all(PDF_DATA_DIR + '/minimal-two-fields-signed-twice.pdf'))

    bogus = b'This is bogus data, yay!'

    def do_check():
        r = PdfFileReader(out)
        print(r.get_object(generic.Reference(2, 0, r), revision=3).data)
        s = r.embedded_signatures[0]
        status = validate_pdf_signature(s)
        assert status.modification_level == ModificationLevel.OTHER

    w = IncrementalPdfFileWriter(infile)
    w.root['/Metadata'] = w.add_object(generic.StreamObject(stream_data=bogus))
    w.update_root()
    out = BytesIO()
    w.write(out)
    do_check()

    w = IncrementalPdfFileWriter(infile)
    metadata_ref = w.root.raw_get('/Metadata')
    metadata_stream: generic.StreamObject = metadata_ref.get_object()
    metadata_stream.strip_filters()
    metadata_stream._data = bogus
    metadata_stream._encoded_data = None
    w.mark_update(metadata_ref)
    out = BytesIO()
    w.write(out)
    do_check()

    w = IncrementalPdfFileWriter(infile)
    w.root['/Metadata'] = generic.NullObject()
    w.update_root()
    out = BytesIO()
    w.write(out)
    do_check()

    w = IncrementalPdfFileWriter(infile)
    w.root['/Metadata'] = w.add_object(generic.NullObject())
    w.update_root()
    out = BytesIO()
    w.write(out)
    do_check()
Exemplo n.º 26
0
def test_code128_render():
    writer = IncrementalPdfFileWriter(BytesIO(MINIMAL))
    bb = barcodes.BarcodeBox("code128", "this is a test")
    xobj_ref = writer.add_object(bb.as_form_xobject())

    stamp_wrapper_stream = generic.StreamObject(
        stream_data=b'q 1 0 0 1 50 50 cm /Barcode Do Q')
    resources = generic.DictionaryObject({
        pdf_name('/XObject'):
        generic.DictionaryObject({pdf_name('/Barcode'): xobj_ref})
    })
    writer.add_stream_to_page(0, writer.add_object(stamp_wrapper_stream),
                              resources)

    # TODO try to read back the code using some kind of barcode scanning
    #  library, perhaps.

    compare_output(writer, f'{EXPECTED_OUTPUT_DIR}/code128-test.pdf')
Exemplo n.º 27
0
def test_pubkey_wrong_cert():
    r = PdfFileReader(BytesIO(VECTOR_IMAGE_PDF))
    w = writer.PdfFileWriter()

    recpt_cert = load_cert_from_pemder(
        TESTING_CA_DIR + '/intermediate/newcerts/signer2.cert.pem')
    test_data = b'This is test data!'
    dummy_stream = generic.StreamObject(stream_data=test_data)
    ref = w.add_object(dummy_stream)
    w.encrypt_pubkey([recpt_cert])
    out = BytesIO()
    w.write(out)
    r = PdfFileReader(out)
    result = r.decrypt_pubkey(PUBKEY_TEST_DECRYPTER)
    assert result.status == AuthStatus.FAILED

    with pytest.raises(misc.PdfError):
        r.get_object(ref.reference)
Exemplo n.º 28
0
def test_xref_stream_trailing_data():
    encoded_entries = [
        "0000000000ffff",  # free
        "01000000110000",
        "deadbeef"
    ]
    xref_data = b''.join(binascii.unhexlify(entr) for entr in encoded_entries)
    stream_obj = generic.StreamObject(
        dict_data={
            generic.pdf_name('/W'): generic.ArrayObject(list(
                map(generic.NumberObject, [1, 4, 2])
            )),
            generic.pdf_name('/Size'): 2
        },
        stream_data=xref_data
    )

    with pytest.raises(misc.PdfReadError, match='Trailing'):
        list(parse_xref_stream(stream_obj))
Exemplo n.º 29
0
def test_write_embedded_string_objstream():
    w = IncrementalPdfFileWriter(BytesIO(MINIMAL_XREF))
    obj_stream = w.prepare_object_stream()
    with open(NOTO_SERIF_JP, 'rb') as ffile:
        ga = GlyphAccumulator(w, ffile, font_size=10, obj_stream=obj_stream)
    # shape the string, just to register the glyphs as used
    ga.shape('テスト')
    # ... but we're not going to use the result

    # hardcoded CIDs
    cid_hx = '0637062a0639'
    font_ref = ga.as_resource()
    stream = generic.StreamObject(
        stream_data=f'BT /FEmb 18 Tf 0 100 Td <{cid_hx}> Tj ET'.encode(
            'ascii'))
    stream_ref = w.add_object(stream)
    w.add_stream_to_page(0,
                         stream_ref,
                         resources=generic.DictionaryObject({
                             pdf_name('/Font'):
                             generic.DictionaryObject(
                                 {pdf_name('/FEmb'): font_ref})
                         }))
    out = BytesIO()
    w.write(out)
    out.seek(0)
    r = PdfFileReader(out)
    page_obj = r.root['/Pages']['/Kids'][0].get_object()
    conts = page_obj['/Contents']
    assert len(conts) == 2
    assert stream_ref.idnum in (c.idnum for c in conts)
    xref_sections = r.xrefs._xref_sections
    last = xref_sections[len(xref_sections) - 1]
    assert font_ref.idnum in last.xref_data.xrefs_in_objstm
    out.seek(0)

    # attempt to grab the font from the object stream
    font_ref.pdf = r
    font = font_ref.get_object()
    assert font['/Type'] == pdf_name('/Font')
Exemplo n.º 30
0
    def _format_tounicode_cmap(self):
        # ToUnicode is always Adobe-UCS2-0 in our case,
        # since we use the fixed-with 2-byte UCS2 encoding for the BMP
        header = (
            '/CIDInit /ProcSet findresource begin\n'
            '12 dict begin\n'
            'begincmap\n'
            '/CIDSystemInfo <<\n'
            '/Registry (Adobe)\n'
            '/Ordering (UCS2)\n'
            '/Supplement 0\n'
            '>> def\n'
            '/CMapName /Adobe-Identity-UCS2 def\n'
            '/CMapType 2 def\n'
            '1 begincodespacerange\n'
            '<0000> <FFFF>\n'
            'endcodespacerange\n'
        )
        to_segment = (
            (cid, codepoints.encode('utf-16be'))
            for cid, codepoints in self._cid_to_unicode.items()
        )
        body = '\n'.join(_segment_cmap(to_segment))

        footer = (
            '\nendcmap\n'
            'CMapName\n'
            'currentdict\n'
            '/CMap defineresource\n'
            'pop\nend\nend'
        )
        stream = generic.StreamObject(
            stream_data=(header + body + footer).encode('ascii')
        )
        stream.compress()
        return stream