示例#1
0
	def _replace_image(self, img_xref, resampled_image):
		image_meta = self._pdf.lookup(img_xref).content
		alpha_xref = image_meta.get(PDFName("/SMask"))

		new_image_obj = PDFObject.create_image(img_xref.objid, img_xref.gennum, resampled_image, alpha_xref = alpha_xref)
		self._pdf.replace_object(new_image_obj)
		if resampled_image.alpha:
			new_alpha_obj = PDFObject.create_image(alpha_xref.objid, alpha_xref.gennum, resampled_image.alpha)
			self._pdf.replace_object(new_alpha_obj)
示例#2
0
 def _create_object(self, content, raw_stream=None):
     objid = self._pdf.get_free_objid()
     obj = PDFObject.create(objid=objid, gennum=0, content=content)
     if raw_stream is not None:
         obj.set_stream(EncodedObject.create(raw_stream))
     self._pdf.replace_object(obj)
     return PDFXRef(objid, 0)
示例#3
0
	def _add_xmp_metadata(self):
		info_node_xref = self._pdf.trailer[PDFName("/Info")]
		info_node = self._pdf.lookup(info_node_xref)

		metadata_date = Timestamp.localnow()
		modify_date = Timestamp.frompdf(info_node.content[PDFName("/ModDate")].decode("ascii")) if (PDFName("/ModDate") in info_node.content) else metadata_date
		create_date = Timestamp.frompdf(info_node.content[PDFName("/CreationDate")].decode("ascii")) if (PDFName("/CreationDate") in info_node.content) else metadata_date
		xmp_metadata = {
			"creator_tool":			self._pdf.get_info("Creator"),
			"producer":				self._pdf.get_info("Producer"),
			"modify_date":			modify_date.format_xml(),
			"create_date":			create_date.format_xml(),
			"metadata_date":		metadata_date.format_xml(),
			"description":			self._pdf.get_info("Subject"),
			"title":				self._pdf.get_info("Title"),
			"creator":				self._pdf.get_info("Author"),
			"keywords":				self._pdf.get_info("Keywords"),
			"document_uuid":		str(uuid.uuid4()),
			"instance_uuid":		str(uuid.uuid4()),
			"llpdf_version":	"llpdf " + llpdf.VERSION,
		}

		xmp_metadata_template = pkgutil.get_data("llpdf.resources", "xmp_metadata.xml").decode("utf-8")
		stream = (xmp_metadata_template % xmp_metadata).encode("utf-8")
		content = {
			PDFName("/Type"):			PDFName("/Metadata"),
			PDFName("/Subtype"):		PDFName("/XML"),
		}
		objid = self._pdf.get_free_objid()
		pdf_object = PDFObject.create(objid, gennum = 0, content = content, stream = EncodedObject.create(stream, compress = False))
		self._pdf.replace_object(pdf_object)
		return pdf_object.xref
示例#4
0
	def get_fontfile_object(self, objid):
		content = {
			PDFName("/Length1"):	len(self._cleardata),
			PDFName("/Length2"):	len(self._cipherdata),
			PDFName("/Length3"):	len(self._trailerdata),
		}
		stream = EncodedObject.create(self._cleardata + self._cipherdata + self._trailerdata, compress = True)
		obj = PDFObject.create(objid, 0, content, stream)
		return obj
	def run(self):
		reformatter = ImageReformatter(lossless = True, scale_factor = 1)
		for image_obj in self._pdf.image_objects:
			if PDFName("/SMask") in image_obj.content:
				current_image = self._pdf.get_image(image_obj.xref)
				flattened_image = reformatter.flatten(current_image, background_color = self._args.background_color)

				flattened_image_obj = PDFObject.create_image(image_obj.xref.objid, image_obj.xref.gennum, flattened_image)
				self._pdf.replace_object(flattened_image_obj)
示例#6
0
	def serialize_xref_object(self, trailer_dict, objid):
		offset_width = self._get_offset_width()
		content = dict(trailer_dict)
		content.update({
			PDFName("/Type"):	PDFName("/XRef"),
			PDFName("/Index"):	[ 0, self._max_objid + 1 ],
			PDFName("/Size"):	self._max_objid + 1,
			PDFName("/W"):		[ 1, offset_width, 1 ],
		})
		data = self._serialize_xref_data(offset_width)
		return PDFObject.create(objid = objid, gennum = 0, content = content, stream = EncodedObject.create(data))
示例#7
0
 def _add_color_intent(self, color_profile_xref):
     content = [{
         PDFName("/Type"): PDFName("/OutputIntent"),
         PDFName("/DestOutputProfile"): color_profile_xref,
         PDFName("/Info"): b"sRGB IEC61966-2.1",
         PDFName("/OutputCondition"): b"sRGB",
         PDFName("/OutputConditionIdentifier"): b"Custom",
         PDFName("/RegistryName"): b"",
         PDFName("/S"): PDFName("/GTS_PDFA1"),
     }]
     objid = self._pdf.get_free_objid()
     pdf_object = PDFObject.create(objid, gennum=0, content=content)
     self._pdf.replace_object(pdf_object)
     return pdf_object.xref
示例#8
0
	def _add_color_profile(self):
		if self._args.color_profile is None:
			profile_data = pkgutil.get_data("llpdf.resources", "sRGB_IEC61966-2-1_black_scaled.icc")
		else:
			with open(self._args.color_profile, "rb") as f:
				profile_data = f.read()

		content = {
			PDFName("/N"):			3,
			PDFName("/Range"):		[ 0, 1, 0, 1, 0, 1 ],
		}
		objid = self._pdf.get_free_objid()
		pdf_object = PDFObject.create(objid, gennum = 0, content = content, stream = EncodedObject.create(profile_data))
		self._pdf.replace_object(pdf_object)
		return pdf_object.xref
	def run(self):
		# Relink the content dictionaries
		relinked_objects = [ ]
		for obj in self._pdf:
			relinked_content = self._relink(obj.content)
			relinked_xref = self._old_to_new.get(obj.xref, obj.xref)
			relinked_object = PDFObject.create(relinked_xref.objid, relinked_xref.gennum, relinked_content, obj.stream)
			relinked_objects.append(relinked_object)

		# Then delete all old objects
		for delete_obj_xref in self._old_to_new:
			self._pdf.delete_object(delete_obj_xref.objid, delete_obj_xref.gennum)

		# And insert the relinked ones
		for relinked_object in relinked_objects:
			self._pdf.replace_object(relinked_object)
示例#10
0
	def get_font_descriptor_object(self, objid, fontfile_xref):
		bbox = self.get_font_bbox()
		content = {
			PDFName("/Type"):			PDFName("/FontDescriptor"),
			PDFName("/ItalicAngle"):	0,
			PDFName("/FontFile"):		fontfile_xref,
			PDFName("/FontName"):		self.get_font_name(),
			PDFName("/Flags"):			int(FontDescriptorFlag.Symbolic),
			PDFName("/FontBBox"):		bbox,
			PDFName("/Ascent"):			bbox[3],
			PDFName("/CapHeight"):		bbox[3],
			PDFName("/Descent"):		bbox[1],
			PDFName("/CharSet"):		self.charset_string,
			PDFName("/MissingWidth"):	self.get_missing_width(),
#			PDFName("/StemV"):			30,		# TODO: Do we need this?
		}
		obj = PDFObject.create(objid, 0, content)
		return obj
	def serialize(self, serializer):
		header = [ ]
		data = bytearray()
		for obj in self._contained_objects:
			obj_data = serializer.serialize(obj.content)
			offset = len(data)
			header.append(obj.objid)
			header.append(offset)
			data += obj_data + b"\n"

		header = " ".join(str(value) for value in header)
		header = header.encode("utf-8") + b"\n"
		full_data = header + data
		content = {
			PDFName("/Type"):	PDFName("/ObjStm"),
			PDFName("/N"):		self.objects_inside_count,
			PDFName("/First"):	len(header),
		}
		return PDFObject.create(objid = self.objid, gennum = 0, content = content, stream = EncodedObject.create(full_data))
示例#12
0
	def get_font_object(self, objid, fontdescriptor_xref):
		widths_dict = self.get_widths_dict()
		first_char = min(widths_dict.keys())
		last_char = max(widths_dict.keys())
		default_width = self.get_missing_width()
		widths_array = [ widths_dict.get(i, default_width) for i in range(first_char, last_char + 1) ]
		content = {
			PDFName("/Type"):			PDFName("/Font"),
			PDFName("/Subtype"):		PDFName("/Type1"),
			PDFName("/FirstChar"):		first_char,
			PDFName("/LastChar"):		last_char,
			PDFName("/Widths"):			widths_array,
			PDFName("/Encoding"):		{
				PDFName("/Type"):			PDFName("/Encoding"),
				PDFName("/Differences"):	build_encoding_array("latin1"),
			},
			PDFName("/BaseFont"):		self.get_font_name(),
			PDFName("/FontDescriptor"):	fontdescriptor_xref,
		}
		obj = PDFObject.create(objid, 0, content)
		return obj
示例#13
0
    def run(self):
        with open(self._args.embed_payload, "rb") as f:
            payload = f.read()

        objid = self._pdf.get_free_objid()
        self._log.debug(
            "Embedding %d bytes payload from file \"%s\" into PDF file as objid %d",
            len(payload), self._args.embed_payload, objid)

        mtime = os.stat(self._args.embed_payload).st_mtime
        mtime_str = datetime.datetime.utcfromtimestamp(mtime).strftime(
            "%Y-%m-%dT%H:%M:%SZ")
        content = {
            PDFName("/PDFMinify.OriginalFilename"):
            os.path.basename(self._args.embed_payload).encode(),
            PDFName("/PDFMinify.MTime"):
            mtime_str.encode(),
            PDFName("/PDFMinify.Version"):
            llpdf.VERSION.encode(),
        }
        obj = PDFObject.create(objid=objid, gennum=0, content=content)
        obj.set_stream(EncodedObject.create(payload, compress=False))
        self._pdf.replace_object(obj)
示例#14
0
    def run(self):
        # Put an ID into the PDF
        self._pdf.trailer[PDFName("/ID")] = [os.urandom(16), os.urandom(16)]

        # Do not interpolate any image objects
        for image_obj in self._pdf.image_objects:
            image_obj.content[PDFName("/Interpolate")] = False

        # No pages may be transparency groups
        for page in self._pdf.pages:
            if PDFName("/Group") in page.content:
                del page.content[PDFName("/Group")]

        # No transparency groups in Form XObjects
        for obj in self._pdf:
            if (obj.getattr(PDFName("/Type"))
                    == PDFName("/XObject")) and (obj.getattr(
                        PDFName("/Subtype")) == PDFName("/Form")) and (
                            obj.getattr(PDFName("/Group")) is not None):
                del obj.content[PDFName("/Group")]

        # Add color profile data
        color_profile_xref = self._add_color_profile()

        # Add color intent object
        color_intent_xref = self._add_color_intent(color_profile_xref)

        # Add XMP metadata
        metadata_xref = self._add_xmp_metadata()

        # Set output intent and metadata reference for all catalogs
        for obj in self._pdf:
            if obj.getattr(PDFName("/Type")) == PDFName("/Catalog"):
                obj.content[PDFName("/OutputIntents")] = color_intent_xref
                obj.content[PDFName("/Metadata")] = metadata_xref

        # Set all annotations with annotation flag "printable" (4)
        for obj in self._pdf:
            if obj.getattr(PDFName("/Type")) == PDFName("/Annot"):
                obj.content[PDFName("/F")] = 4

        fixed_descriptors = set()
        for obj in list(self._pdf):
            if obj.getattr(PDFName("/Type")) == PDFName("/Font"):
                font_obj = obj

                if font_obj.getattr(
                        PDFName("/Subtype")) == PDFName("/CIDFontType2"):
                    # Type2 fonts need to have a CIDtoGIDMap
                    font_obj.content[PDFName("/CIDToGIDMap")] = PDFName(
                        "/Identity")

                if PDFName("/FontDescriptor") in font_obj.content:
                    font_descriptor_xref = font_obj.content[PDFName(
                        "/FontDescriptor")]
                    if font_descriptor_xref in fixed_descriptors:
                        continue
                    fixed_descriptors.add(font_descriptor_xref)

                    font_descriptor_obj = self._pdf.lookup(
                        font_descriptor_xref)
                    if font_obj.getattr(
                            PDFName("/Subtype")) == PDFName("/Type1"):
                        # Update Type1 font descriptors with missing CharSet entries
                        font_file_obj = self._pdf.lookup(
                            font_descriptor_obj.content[PDFName("/FontFile")])
                        t1_font = T1Font.from_fontfile_obj(font_file_obj)
                        font_descriptor_obj.content[PDFName(
                            "/CharSet")] = t1_font.charset_string
                    elif font_obj.getattr(
                            PDFName("/Subtype")) == PDFName("/CIDFontType2"):
                        # Type2 font descriptors need to have a CIDSet
                        glyph_count = self.type2_font_glyph_count(
                            font_obj.content[PDFName("/W")])

                        full_bytes = glyph_count // 8
                        set_bits = glyph_count % 8
                        last_byte = ((1 << set_bits) - 1) << (8 - set_bits)
                        self._log.debug(
                            "Assuming CIDSet for %d glyphs of %d full 0xff bytes and a final value of 0x%x.",
                            glyph_count, full_bytes, last_byte)

                        cidset_objid = self._pdf.get_free_objid()
                        stream = (bytes([0xff]) * full_bytes) + bytes(
                            [last_byte])
                        pdf_object = PDFObject.create(
                            cidset_objid,
                            gennum=0,
                            content={},
                            stream=EncodedObject.create(stream))
                        self._pdf.replace_object(pdf_object)

                        font_descriptor_obj.content[PDFName(
                            "/CIDSet")] = pdf_object.xref