def _encode_image(cls, image_filename, lossless): if lossless: img = PnmPicture.read_file(image_filename) if img.img_format == PnmPictureFormat.Bitmap: # PDFs use exactly inverted syntax for 1-bit images img.invert() imgdata = EncodedObject.create(img.data) (colorspace, bits_per_component) = { PnmPictureFormat.Bitmap: (PDFImageColorSpace.DeviceGray, 1), PnmPictureFormat.Graymap: (PDFImageColorSpace.DeviceGray, 8), PnmPictureFormat.Pixmap: (PDFImageColorSpace.DeviceRGB, 8), }[img.img_format] width = img.width height = img.height else: with open(image_filename, "rb") as f: imgdata = EncodedObject(encoded_data=f.read(), filtering=Filter.DCTDecode) (width, height, colorspace, bits_per_component) = cls._get_image_geometry(image_filename) colorspace = { "Gray": PDFImageColorSpace.DeviceGray, "sRGB": PDFImageColorSpace.DeviceRGB, }[colorspace] return PDFImage(width=width, height=height, colorspace=colorspace, bits_per_component=bits_per_component, imgdata=imgdata, inverted=False)
def _test_png_predictors(self, columns, encoded_data, pixel_data): obj = EncodedObject(encoded_data, Filter.Uncompressed, columns = columns, predictor = Predictor.PNGPredictionOptimum) obj_decode = obj.decode() self.assertEqual(len(obj_decode), len(pixel_data)) for line_offset in range(0, len(pixel_data), columns): self.assertEqual(obj_decode[line_offset : line_offset + columns], pixel_data[line_offset : line_offset + columns], "Row %d" % (line_offset // columns)) self.assertEqual(obj_decode, pixel_data)
def _generate_form(self): font_xref = self._get_font_reference() seal_template = PDFTemplate( pkgutil.get_data("llpdf.resources", "seal.pdft")) seal_xref = seal_template.merge_into_pdf(self._pdf)["SealObject"] sign_template = PDFTemplate( pkgutil.get_data("llpdf.resources", "sign_form.pdft")) sign_template["FontXRef"] = font_xref sign_template["SealFormXRef"] = seal_xref signform_xref = sign_template.merge_into_pdf( self._pdf)["SignFormObject"] signform = self._pdf.lookup(signform_xref) signform.content[PDFName("/BBox")] = self._get_signature_bbox() signform_data = signform.stream.decode() (posx, posy, width, height) = self._get_signature_bbox() signform_vars = { "WIDTH": b"%.0f" % (width - 1), "HEIGHT": b"%.0f" % (height - 1), "TEXT": self._get_signing_text(), } for (varname, replacement) in signform_vars.items(): key = ("${" + varname + "}").encode("ascii") signform_data = signform_data.replace(key, replacement) signform.set_stream(EncodedObject.create(signform_data, compress=True)) return signform_xref
def _add_xmp_metadata(self): info_node_xref = self._pdf.trailer[PDFName("/Info")] info_node = self._pdf.lookup(info_node_xref) metadata_date = Timestamp.localnow() modify_date = Timestamp.frompdf(info_node.content[PDFName("/ModDate")].decode("ascii")) if (PDFName("/ModDate") in info_node.content) else metadata_date create_date = Timestamp.frompdf(info_node.content[PDFName("/CreationDate")].decode("ascii")) if (PDFName("/CreationDate") in info_node.content) else metadata_date xmp_metadata = { "creator_tool": self._pdf.get_info("Creator"), "producer": self._pdf.get_info("Producer"), "modify_date": modify_date.format_xml(), "create_date": create_date.format_xml(), "metadata_date": metadata_date.format_xml(), "description": self._pdf.get_info("Subject"), "title": self._pdf.get_info("Title"), "creator": self._pdf.get_info("Author"), "keywords": self._pdf.get_info("Keywords"), "document_uuid": str(uuid.uuid4()), "instance_uuid": str(uuid.uuid4()), "llpdf_version": "llpdf " + llpdf.VERSION, } xmp_metadata_template = pkgutil.get_data("llpdf.resources", "xmp_metadata.xml").decode("utf-8") stream = (xmp_metadata_template % xmp_metadata).encode("utf-8") content = { PDFName("/Type"): PDFName("/Metadata"), PDFName("/Subtype"): PDFName("/XML"), } objid = self._pdf.get_free_objid() pdf_object = PDFObject.create(objid, gennum = 0, content = content, stream = EncodedObject.create(stream, compress = False)) self._pdf.replace_object(pdf_object) return pdf_object.xref
def _create_object(self, content, raw_stream=None): objid = self._pdf.get_free_objid() obj = PDFObject.create(objid=objid, gennum=0, content=content) if raw_stream is not None: obj.set_stream(EncodedObject.create(raw_stream)) self._pdf.replace_object(obj) return PDFXRef(objid, 0)
def get_fontfile_object(self, objid): content = { PDFName("/Length1"): len(self._cleardata), PDFName("/Length2"): len(self._cipherdata), PDFName("/Length3"): len(self._trailerdata), } stream = EncodedObject.create(self._cleardata + self._cipherdata + self._trailerdata, compress = True) obj = PDFObject.create(objid, 0, content, stream) return obj
def serialize_xref_object(self, trailer_dict, objid): offset_width = self._get_offset_width() content = dict(trailer_dict) content.update({ PDFName("/Type"): PDFName("/XRef"), PDFName("/Index"): [ 0, self._max_objid + 1 ], PDFName("/Size"): self._max_objid + 1, PDFName("/W"): [ 1, offset_width, 1 ], }) data = self._serialize_xref_data(offset_width) return PDFObject.create(objid = objid, gennum = 0, content = content, stream = EncodedObject.create(data))
def _add_color_profile(self): if self._args.color_profile is None: profile_data = pkgutil.get_data("llpdf.resources", "sRGB_IEC61966-2-1_black_scaled.icc") else: with open(self._args.color_profile, "rb") as f: profile_data = f.read() content = { PDFName("/N"): 3, PDFName("/Range"): [ 0, 1, 0, 1, 0, 1 ], } objid = self._pdf.get_free_objid() pdf_object = PDFObject.create(objid, gennum = 0, content = content, stream = EncodedObject.create(profile_data)) self._pdf.replace_object(pdf_object) return pdf_object.xref
def run(self): with open(self._args.embed_payload, "rb") as f: payload = f.read() objid = self._pdf.get_free_objid() self._log.debug( "Embedding %d bytes payload from file \"%s\" into PDF file as objid %d", len(payload), self._args.embed_payload, objid) mtime = os.stat(self._args.embed_payload).st_mtime mtime_str = datetime.datetime.utcfromtimestamp(mtime).strftime( "%Y-%m-%dT%H:%M:%SZ") content = { PDFName("/PDFMinify.OriginalFilename"): os.path.basename(self._args.embed_payload).encode(), PDFName("/PDFMinify.MTime"): mtime_str.encode(), PDFName("/PDFMinify.Version"): llpdf.VERSION.encode(), } obj = PDFObject.create(objid=objid, gennum=0, content=content) obj.set_stream(EncodedObject.create(payload, compress=False)) self._pdf.replace_object(obj)
def stream(self): if not self.has_stream: return None else: return EncodedObject.from_object(self)
def test_flate_decompress(self): compressed_data = bytes.fromhex( "78 9c 73 cb cf 4f 4a 2c 02 00 07 eb 02 5a") obj = EncodedObject(compressed_data, Filter.FlateDecode) self.assertEqual(obj.decode(), b"Foobar")
def test_no_decompress(self): uncompressed_data = b"Foobar" obj = EncodedObject(uncompressed_data, Filter.Uncompressed) self.assertEqual(obj.decode(), b"Foobar")
def append_stream(self, text): new_data = text.encode("utf-8") if self.contents_obj.stream is not None: prev_data = self.contents_obj.stream.decode() new_data = prev_data + b"\n" + new_data self.contents_obj.set_stream(EncodedObject.create(new_data))
def serialize(self, serializer): header = [ ] data = bytearray() for obj in self._contained_objects: obj_data = serializer.serialize(obj.content) offset = len(data) header.append(obj.objid) header.append(offset) data += obj_data + b"\n" header = " ".join(str(value) for value in header) header = header.encode("utf-8") + b"\n" full_data = header + data content = { PDFName("/Type"): PDFName("/ObjStm"), PDFName("/N"): self.objects_inside_count, PDFName("/First"): len(header), } return PDFObject.create(objid = self.objid, gennum = 0, content = content, stream = EncodedObject.create(full_data))
def run(self): for obj in self._pdf.stream_objects: if obj.stream.compressed and obj.stream.decompressible: uncompressed_stream = EncodedObject.create(obj.stream.decode(), compress=False) obj.set_stream(uncompressed_stream)
def run(self): # Put an ID into the PDF self._pdf.trailer[PDFName("/ID")] = [os.urandom(16), os.urandom(16)] # Do not interpolate any image objects for image_obj in self._pdf.image_objects: image_obj.content[PDFName("/Interpolate")] = False # No pages may be transparency groups for page in self._pdf.pages: if PDFName("/Group") in page.content: del page.content[PDFName("/Group")] # No transparency groups in Form XObjects for obj in self._pdf: if (obj.getattr(PDFName("/Type")) == PDFName("/XObject")) and (obj.getattr( PDFName("/Subtype")) == PDFName("/Form")) and ( obj.getattr(PDFName("/Group")) is not None): del obj.content[PDFName("/Group")] # Add color profile data color_profile_xref = self._add_color_profile() # Add color intent object color_intent_xref = self._add_color_intent(color_profile_xref) # Add XMP metadata metadata_xref = self._add_xmp_metadata() # Set output intent and metadata reference for all catalogs for obj in self._pdf: if obj.getattr(PDFName("/Type")) == PDFName("/Catalog"): obj.content[PDFName("/OutputIntents")] = color_intent_xref obj.content[PDFName("/Metadata")] = metadata_xref # Set all annotations with annotation flag "printable" (4) for obj in self._pdf: if obj.getattr(PDFName("/Type")) == PDFName("/Annot"): obj.content[PDFName("/F")] = 4 fixed_descriptors = set() for obj in list(self._pdf): if obj.getattr(PDFName("/Type")) == PDFName("/Font"): font_obj = obj if font_obj.getattr( PDFName("/Subtype")) == PDFName("/CIDFontType2"): # Type2 fonts need to have a CIDtoGIDMap font_obj.content[PDFName("/CIDToGIDMap")] = PDFName( "/Identity") if PDFName("/FontDescriptor") in font_obj.content: font_descriptor_xref = font_obj.content[PDFName( "/FontDescriptor")] if font_descriptor_xref in fixed_descriptors: continue fixed_descriptors.add(font_descriptor_xref) font_descriptor_obj = self._pdf.lookup( font_descriptor_xref) if font_obj.getattr( PDFName("/Subtype")) == PDFName("/Type1"): # Update Type1 font descriptors with missing CharSet entries font_file_obj = self._pdf.lookup( font_descriptor_obj.content[PDFName("/FontFile")]) t1_font = T1Font.from_fontfile_obj(font_file_obj) font_descriptor_obj.content[PDFName( "/CharSet")] = t1_font.charset_string elif font_obj.getattr( PDFName("/Subtype")) == PDFName("/CIDFontType2"): # Type2 font descriptors need to have a CIDSet glyph_count = self.type2_font_glyph_count( font_obj.content[PDFName("/W")]) full_bytes = glyph_count // 8 set_bits = glyph_count % 8 last_byte = ((1 << set_bits) - 1) << (8 - set_bits) self._log.debug( "Assuming CIDSet for %d glyphs of %d full 0xff bytes and a final value of 0x%x.", glyph_count, full_bytes, last_byte) cidset_objid = self._pdf.get_free_objid() stream = (bytes([0xff]) * full_bytes) + bytes( [last_byte]) pdf_object = PDFObject.create( cidset_objid, gennum=0, content={}, stream=EncodedObject.create(stream)) self._pdf.replace_object(pdf_object) font_descriptor_obj.content[PDFName( "/CIDSet")] = pdf_object.xref
def put_image(self, pdfimage): if pdfimage.image_format not in [ "JPEG", "RGB", "GRAY" ]: raise UnsupportedError("PDF can only handle JPEG, RGB or GRAY image formats, but %s was supplied." % (pdfimage.image_format)) custom_metadata = { "resolution_dpi": list(pdfimage.resolution_dpi), "comment": pdfimage.comment, } image = self.pdf.new_object({ PDFName("/Type"): PDFName("/XObject"), PDFName("/Subtype"): PDFName("/Image"), PDFName("/Interpolate"): True, PDFName("/Width"): pdfimage.dimensions.width, PDFName("/Height"): pdfimage.dimensions.height, PDFName("/CustomMetadata"): PDFString(json.dumps(custom_metadata)), }) if pdfimage.image_format == "JPEG": image.set_stream(EncodedObject(encoded_data = pdfimage.data, filtering = Filter.DCTDecode)) elif pdfimage.image_format in [ "RGB", "GRAY" ]: image.set_stream(EncodedObject.create(pdfimage.data, compress = True)) else: raise NotImplementedError(pdfimage.image_format) if pdfimage.pixel_format == PixelFormat.RGB: image.content[PDFName("/ColorSpace")] = PDFName("/DeviceRGB") image.content[PDFName("/BitsPerComponent")] = 8 elif pdfimage.pixel_format == PixelFormat.Grayscale: image.content[PDFName("/ColorSpace")] = PDFName("/DeviceGray") image.content[PDFName("/BitsPerComponent")] = 8 elif pdfimage.pixel_format == PixelFormat.BlackWhite: image.content[PDFName("/ColorSpace")] = PDFName("/DeviceGray") image.content[PDFName("/BitsPerComponent")] = 1 else: raise NotImplementedError(pdfimage.pixel_format) image_extents_mm = pdfimage.extents_mm image_scale_x = self.printable_area_mm.width / image_extents_mm.width image_scale_y = self.printable_area_mm.height / image_extents_mm.height image_scalar = min(image_scale_x, image_scale_y) if image_scalar > 1: # Never enlarge image_scalar = 1 printed_size_mm = image_extents_mm * image_scalar page_dimensions_mm = self._hl_page.extents_mm offset_mm = (page_dimensions_mm - printed_size_mm) / 2 offset_dots = offset_mm * 72 / 25.4 printed_size_dots = printed_size_mm * 72 / 25.4 params = { "xoffset": offset_dots.width, "yoffset": offset_dots.height, "xscalar": printed_size_dots.width, "yscalar": printed_size_dots.height, } self._hl_page.append_stream(textwrap.dedent("""\ %(xscalar)f 0 0 %(yscalar)f %(xoffset)f %(yoffset)f cm /Img Do """ % (params))) page_obj = self._hl_page.page_obj if not PDFName("/Resources") in page_obj.content: page_obj.content[PDFName("/Resources")] = { } if not PDFName("/XObject") in page_obj.content[PDFName("/Resources")]: page_obj.content[PDFName("/Resources")][PDFName("/XObject")] = { } page_obj.content[PDFName("/Resources")][PDFName("/XObject")][PDFName("/Img")] = image.xref