def _cache_xobj(contents, resources, mbox, bbox, rotation): ''' Return a cached Form XObject, or create a new one and cache it. Adds private members x, y, w, h ''' cachedict = contents.xobj_cachedict if cachedict is None: cachedict = contents.private.xobj_cachedict = {} cachekey = mbox, bbox, rotation result = cachedict.get(cachekey) if result is None: func = (_get_fullpage, _get_subpage)[mbox != bbox] result = PdfDict( func(contents, resources, mbox, bbox, rotation), Type=PdfName.XObject, Subtype=PdfName.Form, FormType=1, BBox=PdfArray(bbox), ) rect = bbox if rotation: matrix = rotate_point((1, 0), rotation) + \ rotate_point((0, 1), rotation) result.Matrix = PdfArray(matrix + (0, 0)) rect = rotate_rect(rect, rotation) result.private.x = rect[0] result.private.y = rect[1] result.private.w = rect[2] - rect[0] result.private.h = rect[3] - rect[1] cachedict[cachekey] = result return result
def wrap_object(obj, width, margin): ''' Wrap an xobj in its own page object. ''' fmt = 'q %s 0 0 %s %s %s cm /MyImage Do Q' contents = PdfDict(indirect=True) subtype = obj.Subtype if subtype == PdfName.Form: contents._stream = obj.stream contents.Length = obj.Length contents.Filter = obj.Filter contents.DecodeParms = obj.DecodeParms resources = obj.Resources mbox = obj.BBox elif subtype == PdfName.Image: # Image xoffset = margin[0] yoffset = margin[1] cw = width - margin[0] - margin[2] iw, ih = float(obj.Width), float(obj.Height) ch = 1.0 * cw / iw * ih height = ch + margin[1] + margin[3] p = tuple(('%.9f' % x).rstrip('0').rstrip('.') for x in (cw, ch, xoffset, yoffset)) contents.stream = fmt % p resources = PdfDict(XObject=PdfDict(MyImage=obj)) mbox = PdfArray((0, 0, width, height)) else: raise TypeError("Expected Form or Image XObject") return PdfDict( indirect=True, Type=PdfName.Page, MediaBox=mbox, Resources=resources, Contents=contents, )
def parse_xref_stream(self, source, int=int, range=range, enumerate=enumerate, islice=itertools.islice, defaultdict=collections.defaultdict, hexlify=binascii.hexlify): ''' Parse (one of) the cross-reference file section(s) ''' def readint(s, lengths): offset = 0 for length in itertools.cycle(lengths): next = offset + length yield int(hexlify(s[offset:next]), 16) if length else None offset = next setdefault = source.obj_offsets.setdefault next = source.next # check for xref stream object objid = source.multiple(3) ok = len(objid) == 3 ok = ok and objid[0].isdigit() ok = ok and objid[1] == 'obj' ok = ok and objid[2] == '<<' if not ok: source.exception('Expected xref stream start') obj = self.readdict(source) if obj.Type != PdfName.XRef: source.exception('Expected dict type of /XRef') tok = next() self.readstream(obj, self.findstream(obj, tok, source), source, True) old_strm = obj.stream if not uncompress([obj], True): source.exception('Could not decompress Xref stream') stream = obj.stream # Fix for issue #76 -- goofy compressed xref stream # that is NOT ACTUALLY COMPRESSED stream = stream if stream is not old_strm else convert_store(old_strm) num_pairs = obj.Index or PdfArray(['0', obj.Size]) num_pairs = [int(x) for x in num_pairs] num_pairs = zip(num_pairs[0::2], num_pairs[1::2]) entry_sizes = [int(x) for x in obj.W] if len(entry_sizes) != 3: source.exception('Invalid entry size') object_streams = defaultdict(list) get = readint(stream, entry_sizes) for objnum, size in num_pairs: for cnt in range(size): xtype, p1, p2 = islice(get, 3) if xtype in (1, None): if p1: setdefault((objnum, p2 or 0), p1) elif xtype == 2: object_streams[p1].append((objnum, p2)) objnum += 1 obj.private.object_streams = object_streams return obj
def readarray(self, source, PdfArray=PdfArray): ''' Found a [ token. Parse the tokens after that. ''' specialget = self.special.get result = [] pop = result.pop append = result.append for value in source: if value in ']R': if value == ']': break generation = pop() value = self.findindirect(pop(), generation) else: func = specialget(value) if func is not None: value = func(source) append(value) return PdfArray(result)
def parsexref(self, source, int=int, range=range): ''' Parse (one of) the cross-reference file section(s) ''' def _pairs(array): i = 0 while 1: yield int(array[i]), int(array[i + 1]) i += 2 if (i + 1) >= len(array): break def convert_to_int(d, size): if size > 8: source.exception('Invalid size in convert_to_int') d = '\x00\x00\x00\x00\x00\x00\x00\x00' + d d = d[-8:] return struct.unpack('>q', d)[0] def read_trailer(): tok = next() if tok != '<<': source.exception('Expected "<<" starting catalog') return self.readdict(source) setdefault = source.obj_offsets.setdefault add_offset = source.all_offsets.append next = source.next tok = next() if tok.isdigit(): # check for xref stream object objid = source.multiple(2) ok = len(objid) == 2 ok = ok and objid[0].isdigit() ok = ok and objid[1] == 'obj' if ok: next() # start of dict obj = self.readdict(source) assert obj.Type == '/XRef' tok = next() end = source.floc + int(obj.Length) self.readstream(obj, self.findstream(obj, tok, source), source) uncompress([obj]) num_pairs = obj.Index or PdfArray(['0', obj.Size]) entry_sizes = [int(x) for x in obj.W] object_streams = {} for num, size in _pairs(num_pairs): cnt = 0 stream_offset = 0 while cnt < size: for i in range(len(entry_sizes)): d = obj.stream[stream_offset:stream_offset + entry_sizes[i]] stream_offset += entry_sizes[i] di = convert_to_int(d, entry_sizes[i]) if i == 0: xref_type = di if xref_type == 0 and entry_sizes[0] == 0: xref_type = 1 elif i == 1: if xref_type == 1: offset = di elif xref_type == 2: objnum = di elif i == 2: if xref_type == 1: generation = di elif xref_type == 2: obstr_idx = di if xref_type == 1 and offset != 0: setdefault((num, generation), offset) add_offset(offset) elif xref_type == 2: if not objnum in object_streams: object_streams[objnum] = [] object_streams[objnum].append(obstr_idx) cnt += 1 num += 1 self.load_stream_objects(object_streams) source.floc = end endit = source.multiple(2) if endit != ['endstream', 'endobj']: source.exception('Expected endstream endobj') return obj else: source.exception('Expected xref stream') elif tok == 'xref': # plain xref table start = source.floc try: while 1: tok = next() if tok == 'trailer': return read_trailer() startobj = int(tok) for objnum in range(startobj, startobj + int(next())): offset = int(next()) generation = int(next()) inuse = next() if inuse == 'n': if offset != 0: setdefault((objnum, generation), offset) add_offset(offset) elif inuse != 'f': raise ValueError except: pass try: # Table formatted incorrectly. # See if we can figure it out anyway. end = source.fdata.rindex('trailer', start) table = source.fdata[start:end].splitlines() for line in table: tokens = line.split() if len(tokens) == 2: objnum = int(tokens[0]) elif len(tokens) == 3: offset, generation, inuse = \ int(tokens[0]), int(tokens[1]), tokens[2] if offset != 0 and inuse == 'n': setdefault((objnum, generation), offset) add_offset(offset) objnum += 1 elif tokens: log.error('Invalid line in xref table: %s' % repr(line)) raise ValueError log.warning('Badly formatted xref table') source.floc = end next() except: source.floc = start source.exception('Invalid table format') return read_trailer() else: source.exception('Expected "xref" keyword or xref stream object')
def __init__(self, version='1.3', compress=False): self.pagearray = PdfArray() self.compress = compress self.version = version self.killobj = {}
class PdfWriter(object): _trailer = None def __init__(self, version='1.3', compress=False): self.pagearray = PdfArray() self.compress = compress self.version = version self.killobj = {} def addpage(self, page): self._trailer = None if page.Type != PdfName.Page: raise PdfOutputError('Bad /Type: Expected %s, found %s' % (PdfName.Page, page.Type)) inheritable = page.inheritable # searches for resources self.pagearray.append( IndirectPdfDict( page, Resources=inheritable.Resources, MediaBox=inheritable.MediaBox, CropBox=inheritable.CropBox, Rotate=inheritable.Rotate, ) ) # Add parents in the hierarchy to objects we # don't want to output killobj = self.killobj obj = page.Parent while obj is not None: objid = id(obj) if objid in killobj: break killobj[objid] = obj obj = obj.Parent return self addPage = addpage # for compatibility with pyPdf def addpages(self, pagelist): for page in pagelist: self.addpage(page) return self def _get_trailer(self): trailer = self._trailer if trailer is not None: return trailer # Create the basic object structure of the PDF file trailer = PdfDict( Root=IndirectPdfDict( Type=PdfName.Catalog, Pages=IndirectPdfDict( Type=PdfName.Pages, Count=PdfObject(len(self.pagearray)), Kids=self.pagearray ) ) ) # Make all the pages point back to the page dictionary and # ensure they are indirect references pagedict = trailer.Root.Pages for page in pagedict.Kids: page.Parent = pagedict self._trailer = trailer return trailer def _set_trailer(self, trailer): self._trailer = trailer trailer = property(_get_trailer, _set_trailer) def write(self, fname, trailer=None): trailer = trailer or self.trailer # Dump the data. We either have a filename or a preexisting # file object. preexisting = hasattr(fname, 'write') f = preexisting and fname or open(fname, 'wb') FormatObjects(f, trailer, self.version, self.compress, self.killobj) if not preexisting: f.close() # Dump the trace. if trailer.active_trace != None: fname_trace = fname + '.trace' f = open(fname_trace, 'wb') pickle.dump(trailer.active_trace, f) f.close()
class PdfWriter(object): _trailer = None def __init__(self, version='1.3', compress=False): self.pagearray = PdfArray() self.compress = compress self.version = version self.killobj = {} def addpage(self, page): self._trailer = None if page.Type != PdfName.Page: raise PdfOutputError('Bad /Type: Expected %s, found %s' % (PdfName.Page, page.Type)) inheritable = page.inheritable # searches for resources self.pagearray.append( IndirectPdfDict( page, Resources=inheritable.Resources, MediaBox=inheritable.MediaBox, CropBox=inheritable.CropBox, Rotate=inheritable.Rotate, )) # Add parents in the hierarchy to objects we # don't want to output killobj = self.killobj obj = page.Parent while obj is not None: objid = id(obj) if objid in killobj: break killobj[objid] = obj obj = obj.Parent return self addPage = addpage # for compatibility with pyPdf def addpages(self, pagelist): for page in pagelist: self.addpage(page) return self def _get_trailer(self): trailer = self._trailer if trailer is not None: return trailer # Create the basic object structure of the PDF file trailer = PdfDict(Root=IndirectPdfDict( Type=PdfName.Catalog, Pages=IndirectPdfDict(Type=PdfName.Pages, Count=PdfObject(len(self.pagearray)), Kids=self.pagearray))) # Make all the pages point back to the page dictionary and # ensure they are indirect references pagedict = trailer.Root.Pages for page in pagedict.Kids: page.Parent = pagedict self._trailer = trailer return trailer def _set_trailer(self, trailer): self._trailer = trailer trailer = property(_get_trailer, _set_trailer) def write(self, fname, trailer=None): trailer = trailer or self.trailer # Dump the data. We either have a filename or a preexisting # file object. preexisting = hasattr(fname, 'write') f = preexisting and fname or open(fname, 'wb') FormatObjects(f, trailer, self.version, self.compress, self.killobj) if not preexisting: f.close() # Dump the trace. if trailer.active_trace != None: fname_trace = fname + '.trace' f = open(fname_trace, 'wb') pickle.dump(trailer.active_trace, f) f.close()
def make_page(page, pdf_page, psem): # Prepare everything in parallel @asyncio.coroutine def get_pdf_thumbnail(psem): if page.thumbnail is None: return None return (yield from page.thumbnail.pdf_thumbnail(psem)) @asyncio.coroutine def get_pdf_background(psem): if page.background is None: return None return (yield from page.background.pdf_image(psem)) @asyncio.coroutine def get_pdf_mask(foreground, psem): if foreground.color is not None: return None return (yield from foreground.pdf_mask(psem)) pdf_thumbnail, pdf_background, pdf_foregrounds, pdf_masks = ( yield from asyncio.gather( get_pdf_thumbnail(psem), get_pdf_background(psem), asyncio.gather( *[fg.pdf_image(psem) for fg in page.foreground]), asyncio.gather( *[get_pdf_mask(fg, psem) for fg in page.foreground]))) pdf_page.MediaBox = PdfArray( [0, 0, PdfNumber(page.width), PdfNumber(page.height)]) pdf_page.Group = pdf_group pdf_resources = PdfDict() pdf_colorspace = PdfDict() pdf_colorspace.DefaultRGB = default_rgb_colorspace pdf_resources.ColorSpace = pdf_colorspace pdf_xobject = PdfDict() if pdf_thumbnail is not None: pdf_page.Thumb = pdf_thumbnail im_index = 0 # Save graphics state and scale unity rectangle to page size matrix = TransformationMatrix() matrix.scale(page.width, page.height) before_graphics = ("q\n" + "%s cm\n" % matrix.to_pdf()) after_graphics = "\nQ\n" contents = "" graphics = "" current_color = None if page.color != self._factory.WHITE: if current_color != page.color: current_color = page.color graphics += page.color.to_pdf() + " rg " graphics += ("0 0 1 1 re " + "f\n") if pdf_background is not None: pdf_xobject[PdfName("Im%d" % im_index)] = pdf_background graphics += "/Im%d Do\n" % im_index im_index += 1 for foreground, pdf_foreground, pdf_mask in zip( page.foreground, pdf_foregrounds, pdf_masks): if pdf_mask is not None: pdf_xobject[PdfName("Im%d" % im_index)] = pdf_mask im_index += 1 pdf_xobject[PdfName("Im%d" % im_index)] = pdf_foreground if (foreground.color is not None and current_color != foreground.color): current_color = foreground.color graphics += foreground.color.to_pdf() + " rg " graphics += "/Im%d Do\n" % im_index im_index += 1 if graphics: contents += (before_graphics + graphics.rstrip(" \n") + after_graphics) current_color = None before_text = ("BT\n" + "/F1 1 Tf 3 Tr\n") after_text = "\nET\n" text = "" pdf_annots = [] for t in page.text: if t.text: matrix = TransformationMatrix() # Glyph size is 0.5 x 1 matrix.scale(2 / len(t.text), 1) matrix.translate(-0.5, -0.5) if t.direction == "ltr": pass elif t.direction == "rtl": matrix.translate(0, -1) elif t.direction == "ttb": matrix.rotate(90) matrix.rotate(-t.rotation) matrix.translate(0.5, 0.5) matrix.scale(t.width, t.height) matrix.translate(t.x, t.y) text += "%s Tm %s Tj\n" % ( matrix.to_pdf(), PdfString().from_bytes( t.text.encode("utf-16-be"), bytes_encoding="hex")) if t.external_link is not None or t.internal_link is not None: pdf_annot = PdfDict() pdf_annots.append(pdf_annot) pdf_annot.Type = PdfName.Annot pdf_annot.Subtype = PdfName.Link pdf_annot.Border = [0, 0, 0] pdf_annot.Rect = [ PdfNumber(t.x), PdfNumber(t.y), PdfNumber(t.x + t.width), PdfNumber(t.y + t.height) ] if t.external_link is not None: pdf_a = PdfDict() pdf_annot.A = pdf_a pdf_a.Type = PdfName.Action pdf_a.S = PdfName.URI pdf_a.URI = t.external_link.decode("latin-1") if t.internal_link is not None: pdf_target_page = pdf_pages[t.internal_link[0]] target_x, target_y = t.internal_link[1] pdf_annot.Dest = [ pdf_target_page, PdfName.XYZ, PdfNumber(target_x), PdfNumber(target_y), 0 ] text = text.rstrip(" \n") if text: pdf_resources.Font = pdf_font_mapping contents += (before_text + text + after_text) contents = contents.rstrip(" \n") if contents: pdf_contents = PdfDict() pdf_contents.indirect = True pdf_page.Contents = pdf_contents if COMPRESS_PAGE_CONTENTS: pdf_contents.Filter = [PdfName.FlateDecode] pdf_contents.stream = zlib.compress( contents.encode("latin-1"), 9).decode("latin-1") else: pdf_contents.stream = contents if pdf_annots: pdf_page.Annots = pdf_annots if pdf_xobject: pdf_resources.XObject = pdf_xobject if pdf_resources: pdf_page.Resources = pdf_resources # Report progress nonlocal finished_pages finished_pages += 1 if progress_cb: progress_cb(finished_pages / len(self._pages))
def write_async(self, outfile, process_semaphore, progress_cb=None): pdf_writer = PdfWriter(version="1.5") pdf_group = PdfDict() pdf_group.indirect = True pdf_group.CS = PdfName.DeviceRGB pdf_group.I = PdfBool(True) pdf_group.S = PdfName.Transparency pdf_font_mapping = PdfDict() pdf_font_mapping.indirect = True pdf_font_mapping.F1 = self._build_font() for _ in self._pages: pdf_page = PdfDict() pdf_page.Type = PdfName.Page pdf_writer.addpage(pdf_page) # pdfrw makes a internal copy of the pages # use the copy so that references to pages in links are correct pdf_pages = list(pdf_writer.pagearray) srgb_colorspace = PdfDict() srgb_colorspace.indirect = True srgb_colorspace.N = 3 # Number of components (red, green, blue) with open(SRGB_ICC_FILENAME, "rb") as f: srgb_colorspace_stream = f.read() srgb_colorspace.Filter = [PdfName.FlateDecode] srgb_colorspace.stream = zlib.compress(srgb_colorspace_stream, 9).decode("latin-1") srgb_colorspace.Length1 = len(srgb_colorspace_stream) default_rgb_colorspace = PdfArray([PdfName.ICCBased, srgb_colorspace]) default_rgb_colorspace.indirect = True # Handle all pages in parallel @asyncio.coroutine def make_page(page, pdf_page, psem): # Prepare everything in parallel @asyncio.coroutine def get_pdf_thumbnail(psem): if page.thumbnail is None: return None return (yield from page.thumbnail.pdf_thumbnail(psem)) @asyncio.coroutine def get_pdf_background(psem): if page.background is None: return None return (yield from page.background.pdf_image(psem)) @asyncio.coroutine def get_pdf_mask(foreground, psem): if foreground.color is not None: return None return (yield from foreground.pdf_mask(psem)) pdf_thumbnail, pdf_background, pdf_foregrounds, pdf_masks = ( yield from asyncio.gather( get_pdf_thumbnail(psem), get_pdf_background(psem), asyncio.gather( *[fg.pdf_image(psem) for fg in page.foreground]), asyncio.gather( *[get_pdf_mask(fg, psem) for fg in page.foreground]))) pdf_page.MediaBox = PdfArray( [0, 0, PdfNumber(page.width), PdfNumber(page.height)]) pdf_page.Group = pdf_group pdf_resources = PdfDict() pdf_colorspace = PdfDict() pdf_colorspace.DefaultRGB = default_rgb_colorspace pdf_resources.ColorSpace = pdf_colorspace pdf_xobject = PdfDict() if pdf_thumbnail is not None: pdf_page.Thumb = pdf_thumbnail im_index = 0 # Save graphics state and scale unity rectangle to page size matrix = TransformationMatrix() matrix.scale(page.width, page.height) before_graphics = ("q\n" + "%s cm\n" % matrix.to_pdf()) after_graphics = "\nQ\n" contents = "" graphics = "" current_color = None if page.color != self._factory.WHITE: if current_color != page.color: current_color = page.color graphics += page.color.to_pdf() + " rg " graphics += ("0 0 1 1 re " + "f\n") if pdf_background is not None: pdf_xobject[PdfName("Im%d" % im_index)] = pdf_background graphics += "/Im%d Do\n" % im_index im_index += 1 for foreground, pdf_foreground, pdf_mask in zip( page.foreground, pdf_foregrounds, pdf_masks): if pdf_mask is not None: pdf_xobject[PdfName("Im%d" % im_index)] = pdf_mask im_index += 1 pdf_xobject[PdfName("Im%d" % im_index)] = pdf_foreground if (foreground.color is not None and current_color != foreground.color): current_color = foreground.color graphics += foreground.color.to_pdf() + " rg " graphics += "/Im%d Do\n" % im_index im_index += 1 if graphics: contents += (before_graphics + graphics.rstrip(" \n") + after_graphics) current_color = None before_text = ("BT\n" + "/F1 1 Tf 3 Tr\n") after_text = "\nET\n" text = "" pdf_annots = [] for t in page.text: if t.text: matrix = TransformationMatrix() # Glyph size is 0.5 x 1 matrix.scale(2 / len(t.text), 1) matrix.translate(-0.5, -0.5) if t.direction == "ltr": pass elif t.direction == "rtl": matrix.translate(0, -1) elif t.direction == "ttb": matrix.rotate(90) matrix.rotate(-t.rotation) matrix.translate(0.5, 0.5) matrix.scale(t.width, t.height) matrix.translate(t.x, t.y) text += "%s Tm %s Tj\n" % ( matrix.to_pdf(), PdfString().from_bytes( t.text.encode("utf-16-be"), bytes_encoding="hex")) if t.external_link is not None or t.internal_link is not None: pdf_annot = PdfDict() pdf_annots.append(pdf_annot) pdf_annot.Type = PdfName.Annot pdf_annot.Subtype = PdfName.Link pdf_annot.Border = [0, 0, 0] pdf_annot.Rect = [ PdfNumber(t.x), PdfNumber(t.y), PdfNumber(t.x + t.width), PdfNumber(t.y + t.height) ] if t.external_link is not None: pdf_a = PdfDict() pdf_annot.A = pdf_a pdf_a.Type = PdfName.Action pdf_a.S = PdfName.URI pdf_a.URI = t.external_link.decode("latin-1") if t.internal_link is not None: pdf_target_page = pdf_pages[t.internal_link[0]] target_x, target_y = t.internal_link[1] pdf_annot.Dest = [ pdf_target_page, PdfName.XYZ, PdfNumber(target_x), PdfNumber(target_y), 0 ] text = text.rstrip(" \n") if text: pdf_resources.Font = pdf_font_mapping contents += (before_text + text + after_text) contents = contents.rstrip(" \n") if contents: pdf_contents = PdfDict() pdf_contents.indirect = True pdf_page.Contents = pdf_contents if COMPRESS_PAGE_CONTENTS: pdf_contents.Filter = [PdfName.FlateDecode] pdf_contents.stream = zlib.compress( contents.encode("latin-1"), 9).decode("latin-1") else: pdf_contents.stream = contents if pdf_annots: pdf_page.Annots = pdf_annots if pdf_xobject: pdf_resources.XObject = pdf_xobject if pdf_resources: pdf_page.Resources = pdf_resources # Report progress nonlocal finished_pages finished_pages += 1 if progress_cb: progress_cb(finished_pages / len(self._pages)) finished_pages = 0 yield from asyncio.gather(*[ make_page(page, pdf_page, process_semaphore) for page, pdf_page in zip(self._pages, pdf_pages) ]) trailer = pdf_writer.trailer document_id = PdfString().from_bytes(os.urandom(16)) trailer.ID = [document_id, document_id] mark_info = PdfDict() mark_info.Marked = PdfBool(True) trailer.Root.MarkInfo = mark_info struct_tree_root = PdfDict() struct_tree_root.Type = PdfName.StructTreeRoot trailer.Root.StructTreeRoot = struct_tree_root metadata = PdfDict() metadata.indirect = True metadata.Type = PdfName.Metadata metadata.Subtype = PdfName.XML xmp = XMPMeta() xmp.set_property(XMP_NS_PDFA_ID, "part", "2") xmp.set_property(XMP_NS_PDFA_ID, "conformance", "A") metadata_stream = xmp.serialize_to_str().encode("utf-8") metadata.Filter = [PdfName.FlateDecode] metadata.stream = zlib.compress(metadata_stream, 9).decode("latin-1") metadata.Length1 = len(metadata_stream) trailer.Root.Metadata = metadata with TemporaryDirectory(prefix="djpdf-") as temp_dir: pdf_writer.write(path.join(temp_dir, "temp.pdf")) cmd = [ QPDF_CMD, "--stream-data=preserve", "--object-streams=preserve", "--normalize-content=n", "--newline-before-endstream" ] if LINEARIZE_PDF: cmd.extend(["--linearize"]) cmd.extend([ path.abspath(path.join(temp_dir, "temp.pdf")), path.abspath(outfile) ]) yield from run_command_async(cmd, process_semaphore)
def _build_font(): with open(FONT_FILENAME, "rb") as f: embedded_font_stream = f.read() embedded_font = PdfDict() embedded_font.indirect = True embedded_font.Filter = [PdfName.FlateDecode] embedded_font.stream = zlib.compress(embedded_font_stream, 9).decode("latin-1") embedded_font.Length1 = len(embedded_font_stream) font_descriptor = PdfDict() font_descriptor.indirect = True font_descriptor.Ascent = 1000 font_descriptor.CapHeight = 1000 font_descriptor.Descent = -1 font_descriptor.Flags = 5 # FixedPitch + Symbolic font_descriptor.FontBBox = PdfArray([0, 0, 1000, 500]) font_descriptor.FontFile2 = embedded_font font_descriptor.FontName = PdfName.GlyphLessFont font_descriptor.ItalicAngle = 0 font_descriptor.StemV = 80 font_descriptor.Type = PdfName.FontDescriptor # Map everything to glyph 1 cid_to_gid_map_stream = b"\0\1" * (1 << 16) cid_to_gid_map = PdfDict() cid_to_gid_map.indirect = True cid_to_gid_map.Filter = [PdfName.FlateDecode] cid_to_gid_map.stream = zlib.compress(cid_to_gid_map_stream, 9).decode("latin-1") cid_to_gid_map.Length1 = len(cid_to_gid_map_stream) cid_system_info = PdfDict() cid_system_info.Ordering = PdfString.from_unicode("Identity") cid_system_info.Registry = PdfString.from_unicode("Adobe") cid_system_info.Supplement = 0 cid_font = PdfDict() cid_font.indirect = True cid_font.CIDToGIDMap = cid_to_gid_map cid_font.BaseFont = PdfName.GlyphLessFont cid_font.CIDSystemInfo = cid_system_info cid_font.FontDescriptor = font_descriptor cid_font.Subtype = PdfName.CIDFontType2 cid_font.Type = PdfName.Font cid_font.DW = 500 with open(UNICODE_CMAP_FILENAME, "rb") as f: unicode_cmap_stream = f.read() unicode_cmap = PdfDict() unicode_cmap.indirect = True unicode_cmap.Filter = [PdfName.FlateDecode] unicode_cmap.stream = zlib.compress(unicode_cmap_stream, 9).decode("latin-1") font = PdfDict() font.indirect = True font.BaseFont = PdfName.GlyphLessFont font.DescendantFonts = PdfArray([cid_font]) font.Encoding = PdfName("Identity-H") font.Subtype = PdfName.Type0 font.ToUnicode = unicode_cmap font.Type = PdfName.Font return font