def get_ascent(self) -> pDecimal: """ This function returns the maximum height above the baseline reached by glyphs in this font. The height of glyphs for accented characters shall be excluded. """ if "Ascender" in self._afm._attrs: return pDecimal(self._afm._attrs["Ascender"]) return pDecimal(0)
def get_descent(self) -> pDecimal: """ This function returns the maximum depth below the baseline reached by glyphs in this font. The value shall be a negative number. """ if "Descender" in self._afm._attrs: return pDecimal(self._afm._attrs["Descender"]) return pDecimal(0)
def get_width(self, character_identifier: int) -> typing.Optional[pDecimal]: """ This function returns the width (in text space) of a given character identifier. If this Font is unable to represent the glyph that corresponds to the character identifier, this function returns None """ # check cache if character_identifier in self._width_cache: return self._width_cache[character_identifier] # Default value: none (the DW value shall be used for all glyphs). dw: pDecimal = self["DW"] if "DW" in self else pDecimal(1000) if "W" not in self: return dw assert "W" in self assert isinstance(self["W"], List) i: int = 0 cid: int = 0 cid_width: int = 0 while i < len(self["W"]): # <char_start_code> [<width>+] if ( isinstance(self["W"][i], pDecimal) and i + 1 < len(self["W"]) and isinstance(self["W"][i + 1], List) ): for j in range(0, len(self["W"][i + 1])): cid = int(self["W"][i]) + j cid_width = int(self["W"][i + 1][j]) self._width_cache[cid] = pDecimal(cid_width) i += 2 continue # <char_start_code> <char_end_code> <width> if ( isinstance(self["W"][i], pDecimal) and i + 2 < len(self["W"]) and isinstance(self["W"][i + 1], pDecimal) and isinstance(self["W"][i + 2], pDecimal) ): for j in range(int(self["W"][i]), int(self["W"][i + 1]) + 1): cid = j cid_width = int(self["W"][i + 2]) self._width_cache[cid] = pDecimal(cid_width) i += 3 continue # check cache if character_identifier in self._width_cache: return self._width_cache[character_identifier] # default return dw
def get_width(self, character_identifier: int) -> typing.Optional[pDecimal]: """ This function returns the width (in text space) of a given character identifier. If this Font is unable to represent the glyph that corresponds to the character identifier, this function returns None """ widths: typing.List[pDecimal] = [ pDecimal(v[1]) for k, v in self._afm._chars.items() if v[0] == character_identifier ] if len(widths) == 1: return widths[0] return pDecimal(0)
def _append_to_content_stream(self, page: Page, instructions: str): self._initialize_page_content_stream(page) content_stream = page["Contents"] content_stream[Name("DecodedBytes")] += instructions.encode("latin1") content_stream[Name("Bytes")] = zlib.compress( content_stream["DecodedBytes"], 9) content_stream[Name("Length")] = pDecimal(len(content_stream["Bytes"]))
def invoke(self, canvas: "Canvas", operands: List[AnyPDFType] = []): # type: ignore [name-defined] """ Invoke the T* operator """ move_text_position_op: typing.Optional[ CanvasOperator] = canvas.get_operator("Td") assert move_text_position_op move_text_position_op.invoke( canvas, [pDecimal(0), -canvas.graphics_state.leading])
def get_descent(self) -> pDecimal: """ This function returns the maximum depth below the baseline reached by glyphs in this font. The value shall be a negative number. """ if "FontDescriptor" in self and "Descent" in "FontDescriptor": return self["FontDescriptor"]["Descent"] logger.debug( "Type3Font does not have an `Descent` entry in its `FontDescriptor` dictionary." ) return pDecimal(0) # TODO
def get_ascent(self) -> pDecimal: """ This function returns the maximum height above the baseline reached by glyphs in this font. The height of glyphs for accented characters shall be excluded. """ if "FontDescriptor" in self and "Ascent" in "FontDescriptor": return self["FontDescriptor"]["Ascent"] logger.debug( "Type3Font does not have an `Ascent` entry in its `FontDescriptor` dictionary." ) return pDecimal(0) # TODO
def __init__( self, text_bytes: bytes, font: Font, font_size: Decimal, character_spacing: Decimal = Decimal(0), word_spacing: Decimal = Decimal(0), horizontal_scaling: Decimal = Decimal(100), ): assert isinstance(font, Font) self._glyphs: typing.List[Glyph] = [] i: int = 0 while i < len(text_bytes): # sometimes, 2 bytes make up 1 unicode char unicode_chars: typing.Optional[str] = None if i + 1 < len(text_bytes): multi_byte_char_code: int = text_bytes[i] * 256 + text_bytes[ i + 1] unicode_chars = font.character_identifier_to_unicode( multi_byte_char_code) if unicode_chars is not None: self._glyphs.append( Glyph( multi_byte_char_code, unicode_chars, font.get_width(multi_byte_char_code) or pDecimal(0), )) i += 2 continue # usually it's 1 byte though if i < len(text_bytes): unicode_chars = font.character_identifier_to_unicode( text_bytes[i]) if unicode_chars is not None: self._glyphs.append( Glyph( text_bytes[i], unicode_chars, font.get_width(text_bytes[i]) or Decimal(0), )) i += 1 continue # no mapping found if i < len(text_bytes): self._glyphs.append(Glyph(text_bytes[i], "�", Decimal(250))) i += 1 self._font = font self._font_size = font_size self._character_spacing = character_spacing self._word_spacing = word_spacing self._horizontal_scaling = horizontal_scaling
def _initialize_page_content_stream(self, page: Page): if "Contents" in page: return # build content stream object content_stream = Stream() content_stream[Name("DecodedBytes")] = b"" content_stream[Name("Bytes")] = zlib.compress(content_stream["DecodedBytes"], 9) content_stream[Name("Filter")] = Name("FlateDecode") content_stream[Name("Length")] = pDecimal(len(content_stream["Bytes"])) # set content of page page[Name("Contents")] = content_stream
def add(self, layout_element: LayoutElement) -> "PageLayout": """ This method adds a `LayoutElement` to the current `Page`. """ if self.column_index >= self.number_of_columns: return self # calculate next available rectangle available_height: Decimal = ( self.previous_y - self.vertical_margin - self.previous_leading ) assert self.page_height if available_height < 0: self.switch_to_next_column() return self.add(layout_element) next_available_rect: Rectangle = Rectangle( self.horizontal_margin + self.column_index * (self.column_width + self.inter_column_margin), self.vertical_margin, self.column_width, self.previous_y - self.vertical_margin - self.previous_leading, ) # store previous contents if "Contents" not in self.page: layout_element._initialize_page_content_stream(self.page) previous_decoded_bytes = self.page["Contents"]["DecodedBytes"] # attempt layout layout_rect = layout_element.layout(self.page, bounding_box=next_available_rect) if layout_rect.y < self.vertical_margin: content_stream = self.page["Contents"] content_stream[Name("DecodedBytes")] = previous_decoded_bytes content_stream[Name("Bytes")] = zlib.compress( content_stream["DecodedBytes"], 9 ) content_stream[Name("Length")] = pDecimal(len(content_stream["Bytes"])) self.switch_to_next_column() return self.add(layout_element) # calculate previous_y self.previous_y = layout_rect.y self.previous_leading = self._calculate_leading(layout_element) # return return self
def invoke(self, canvas: "Canvas", operands: List[AnyPDFType] = []): # type: ignore [name-defined] """ Invoke the TD operator """ assert isinstance(operands[0], pDecimal) assert isinstance(operands[1], pDecimal) set_text_leading_op: typing.Optional[ CanvasOperator] = canvas.get_operator("TL") assert set_text_leading_op set_text_leading_op.invoke(canvas, [pDecimal(-operands[1])]) move_text_position_op: typing.Optional[ CanvasOperator] = canvas.get_operator("Td") assert move_text_position_op move_text_position_op.invoke(canvas, operands)
def transform( self, object_to_transform: AnyPDFType, context: Optional[WriteTransformerContext] = None, ): """ This method writes an ET.Element (representing XMP meta information) to a byte stream """ assert isinstance(object_to_transform, ET.Element) assert context is not None assert context.destination is not None assert context.destination # build stream out_value = Stream() out_value[Name("Type")] = Name("Metadata") out_value[Name("Subtype")] = Name("XML") bts = ET.tostring(object_to_transform) out_value[Name("DecodedBytes")] = bts out_value[Name("Bytes")] = bts out_value[Name("Length")] = pDecimal(len(bts)) # copy reference out_value.set_reference( object_to_transform.get_reference()) # type: ignore [attr-defined] # start object if needed started_object = False ref = out_value.get_reference() # type: ignore [attr-defined] if ref is not None: assert isinstance(ref, Reference) if ref.object_number is not None and ref.byte_offset is None: started_object = True self.start_object(out_value, context) # pass stream along to other transformer self.get_root_transformer().transform(out_value, context) # end object if needed if started_object: self.end_object(out_value, context)
def _calculate_layout_box_without_padding( self, page: Page, bounding_box: Rectangle) -> Rectangle: # store previous contents if "Contents" not in page: self._initialize_page_content_stream(page) previous_decoded_bytes = page["Contents"]["DecodedBytes"] # layout without padding layout_rect = self._do_layout_without_padding(page, bounding_box) assert layout_rect is not None # restore content_stream = page["Contents"] content_stream[Name("DecodedBytes")] = previous_decoded_bytes content_stream[Name("Bytes")] = zlib.compress( content_stream["DecodedBytes"], 9) content_stream[Name("Length")] = pDecimal(len(content_stream["Bytes"])) # return return layout_rect
def get_descent(self) -> pDecimal: """ This function returns the maximum depth below the baseline reached by glyphs in this font. The value shall be a negative number. """ return pDecimal(0)
def get_space_character_width_estimate(self) -> Decimal: """ This function estimates the width of the space character (unicode 32) in this Font. If the Font contains the character, this Font will return the corresponding width. If the Font does not contain the character, its width may be derived from the MissingWidth entry in the FontDescriptor, or the AvgWidth entry. If the Font is a composite Font, the DW entry of its DescendantFont is used. If all previously mentioned approaches fail, the width is estimated based on characters that may be present in the Font. (e.g. the width of 'A' is typically twice that of ' '). """ # 1. if space is defined, and the width of space is defined, return that character_identifier: typing.Optional[ int ] = self.unicode_to_character_identifier(" ") width: typing.Optional[Decimal] = None if character_identifier is not None: width = self.get_width(character_identifier) if width is not None: return width # 2. MissingWidth if "FontDescriptor" in self and "MissingWidth" in self["FontDescriptor"]: return self["FontDescriptor"]["MissingWidth"] # 3. AvgWidth if "FontDescriptor" in self and "AvgWidth" in self["FontDescriptor"]: return self["FontDescriptor"]["AvgWidth"] # 3. default width if ( "DescendantFonts" in self and isinstance(self["DescendantFonts"], List) and len(self["DescendantFonts"]) == 1 and "DW" in self["DescendantFonts"][0] ): return self["DescendantFonts"][0]["DW"] # 4. other characters may be defined, which give us a clue # fmt: off char_to_space_width_ratio: typing.Dict[str, Decimal] = { "a": Decimal("0.500"), "b": Decimal("0.500"), "c": Decimal("0.556"), "d": Decimal("0.500"), "e": Decimal("0.500"), "f": Decimal("1.000"), "g": Decimal("0.500"), "h": Decimal("0.500"), "i": Decimal("1.252"), "j": Decimal("1.252"), "k": Decimal("0.556"), "l": Decimal("1.252"), "m": Decimal("0.334"), "n": Decimal("0.500"), "o": Decimal("0.500"), "p": Decimal("0.500"), "q": Decimal("0.500"), "r": Decimal("0.835"), "s": Decimal("0.556"), "t": Decimal("1.000"), "u": Decimal("0.500"), "v": Decimal("0.556"), "w": Decimal("0.385"), "x": Decimal("0.556"), "y": Decimal("0.556"), "z": Decimal("0.556"), "0": Decimal("0.500"), "1": Decimal("0.500"), "2": Decimal("0.500"), "3": Decimal("0.500"), "4": Decimal("0.500"), "5": Decimal("0.500"), "6": Decimal("0.500"), "7": Decimal("0.500"), "8": Decimal("0.500"), "9": Decimal("0.500"), "A": Decimal("0.417"), "B": Decimal("0.417"), "C": Decimal("0.385"), "D": Decimal("0.385"), "E": Decimal("0.417"), "F": Decimal("0.455"), "G": Decimal("0.357"), "H": Decimal("0.385"), "I": Decimal("1.000"), "J": Decimal("0.556"), "K": Decimal("0.417"), "L": Decimal("0.500"), "M": Decimal("0.334"), "N": Decimal("0.385"), "O": Decimal("0.357"), "P": Decimal("0.417"), "Q": Decimal("0.357"), "R": Decimal("0.385"), "S": Decimal("0.417"), "T": Decimal("0.455"), "U": Decimal("0.385"), "V": Decimal("0.417"), "W": Decimal("0.294"), "X": Decimal("0.417"), "Y": Decimal("0.417"), "Z": Decimal("0.455"), } # fmt: on for k, v in char_to_space_width_ratio.items(): character_identifier = self.unicode_to_character_identifier(k) if character_identifier is not None: width = self.get_width(character_identifier) if width is not None: return pDecimal(width * v) # 5. helvetica return Decimal(278)
def true_type_font_from_file(path_to_font_file: Path) -> "TrueTypeFont": """ This function returns the PDF TrueTypeFont object for a given TTF file """ assert path_to_font_file.exists() assert path_to_font_file.name.endswith(".ttf") font_file_bytes: typing.Optional[bytes] = None with open(path_to_font_file, "rb") as ffh: font_file_bytes = ffh.read() assert font_file_bytes # read file ttf_font_file = TTFont(path_to_font_file) # build font font: TrueTypeFont = TrueTypeFont() font_name: str = str( [ x for x in ttf_font_file["name"].names if x.platformID == 3 and x.nameID == 1 ][0].string, "latin1", ) font_name = "".join([ x for x in font_name if x.lower() in "abcdefghijklmnopqrstuvwxyz" ]) font[Name("Name")] = Name(font_name) font[Name("BaseFont")] = Name(font_name) cmap: typing.Optional[typing.Dict[int, str]] = ttf_font_file.getBestCmap() cmap_reverse: typing.Dict[str, int] = {} for k, v in cmap.items(): if v in cmap_reverse: cmap_reverse[v] = min(cmap_reverse[v], k) else: cmap_reverse[v] = k glyph_order: typing.List[str] = [ x for x in ttf_font_file.glyphOrder if x in cmap_reverse ] # build widths units_per_em: pDecimal = pDecimal(ttf_font_file["head"].unitsPerEm) if cmap is not None: font[Name("FirstChar")] = pDecimal(0) font[Name("LastChar")] = pDecimal(len(glyph_order)) font[Name("Widths")] = List() for glyph_name in glyph_order: w: pDecimal = ( pDecimal(ttf_font_file.getGlyphSet()[glyph_name].width) / units_per_em) * pDecimal(1000) w = pDecimal(round(w, 2)) font["Widths"].append(w) font[Name("FontDescriptor")] = Dictionary() font["FontDescriptor"][Name("Type")] = Name("FontDescriptor") font["FontDescriptor"][Name("FontName")] = String(font_name) font["FontDescriptor"][Name("FontStretch")] = Name("Normal") # TODO font["FontDescriptor"][Name("FontWeight")] = pDecimal(400) # TODO font["FontDescriptor"][Name("Flags")] = pDecimal(4) # TODO font["FontDescriptor"][Name("FontBBox")] = List( ).set_can_be_referenced( # type: ignore [attr-defined] False) # TODO for _ in range(0, 4): font["FontDescriptor"]["FontBBox"].append(pDecimal(0)) # fmt: off font["FontDescriptor"][Name("ItalicAngle")] = pDecimal( ttf_font_file["post"].italicAngle) font["FontDescriptor"][Name("Ascent")] = pDecimal( pDecimal(ttf_font_file["hhea"].ascent) / units_per_em * Decimal(1000)) font["FontDescriptor"][Name("Descent")] = pDecimal( pDecimal(ttf_font_file["hhea"].descent) / units_per_em * Decimal(1000)) font["FontDescriptor"][Name("CapHeight")] = pDecimal(0) # TODO font["FontDescriptor"][Name("StemV")] = pDecimal(0) # TODO # fmt: on font[Name("Encoding")] = Dictionary() font["Encoding"][Name("BaseEncoding")] = Name("WinAnsiEncoding") font["Encoding"][Name("Differences")] = List() for i in range(0, len(glyph_order)): font["Encoding"]["Differences"].append(pDecimal(i)) font["Encoding"]["Differences"].append(Name(glyph_order[i])) # embed font file font_stream: Stream = Stream() font_stream[Name("Type")] = Name("Font") font_stream[Name("Subtype")] = Name("TrueType") font_stream[Name("Length")] = pDecimal(len(font_file_bytes)) font_stream[Name("Length1")] = pDecimal(len(font_file_bytes)) font_stream[Name("Filter")] = Name("FlateDecode") font_stream[Name("DecodedBytes")] = font_file_bytes font_stream[Name("Bytes")] = zlib.compress(font_file_bytes, 9) font["FontDescriptor"][Name("FontFile2")] = font_stream # return return font
def transform( self, object_to_transform: AnyPDFType, context: Optional[WriteTransformerContext] = None, ): """ This method writes a Stream to a byte stream """ assert context is not None assert context.destination is not None assert isinstance(object_to_transform, Stream) # avoid resolving objects twice object_ref: typing.Optional[ Reference] = object_to_transform.get_reference( ) # type: ignore [attr-defined] if object_ref is not None and object_ref in context.resolved_references: assert object_ref is not None assert object_ref.object_number is not None logger.debug( "skip writing object %d %d R (already resolved)" % (object_ref.object_number, object_ref.generation_number or 0)) return # start object if needed started_object = False if object_ref is not None: assert object_ref.object_number is not None if object_ref.object_number is not None and object_ref.byte_offset is None: started_object = True self.start_object(object_to_transform, context) context.resolved_references.append(object_ref) # build stream dictionary stream_dictionary = Dictionary() # objects to turn into reference queue: typing.List[AnyPDFType] = [] for k, v in object_to_transform.items(): if k in ["Bytes", "DecodedBytes"]: continue if (isinstance(v, Dictionary) or isinstance(v, List) or isinstance(v, Stream) ) and v.can_be_referenced(): # type: ignore [union-attr] stream_dictionary[k] = self.get_reference(v, context) queue.append(v) else: stream_dictionary[k] = v # if self.compression_level == 0, remove \Filter if context.compression_level == 0 and Name( "Filter") in stream_dictionary: stream_dictionary.pop(Name("Filter")) # handle compression if "DecodedBytes" in object_to_transform: if context.compression_level == 0: bts = object_to_transform["DecodedBytes"] else: bts = zlib.compress(object_to_transform["DecodedBytes"], context.compression_level) stream_dictionary[Name("Length")] = pDecimal(len(bts)) else: assert "Bytes" in object_to_transform bts = object_to_transform["Bytes"] # write stream dictionary self.get_root_transformer().transform(stream_dictionary, context) # write "stream" context.destination.write(bytes("stream\n", "latin1")) # write bytes context.destination.write(bts) # write "endstream" context.destination.write(bytes("\nendstream\n", "latin1")) # end object if needed if started_object: self.end_object(object_to_transform, context) for e in queue: self.get_root_transformer().transform(e, context)
def transform( self, object_to_transform: AnyPDFType, context: Optional[WriteTransformerContext] = None, ): """ This method writes an Image to a byte stream """ assert context is not None assert context.destination is not None assert isinstance(object_to_transform, PILImage.Image) # get image bytes contents = None filter_name: Optional[Name] = None try: with io.BytesIO() as output: assert isinstance(object_to_transform, PILImage.Image) object_to_transform.save(output, format="JPEG") contents = output.getvalue() filter_name = Name("DCTDecode") except Exception as e: pass if contents is None: try: # TODO : properly store PNG (instead of converting it) with io.BytesIO() as output: object_to_transform = self._convert_png_to_jpg( object_to_transform) assert isinstance(object_to_transform, PILImage.Image) object_to_transform.save(output, format="JPEG") contents = output.getvalue() filter_name = Name("DCTDecode") except Exception as e: pass assert contents is not None # build corresponding Stream (XObject) out_value = Stream() out_value[Name("Type")] = Name("XObject") out_value[Name("Subtype")] = Name("Image") out_value[Name("Width")] = pDecimal(object_to_transform.width) out_value[Name("Height")] = pDecimal(object_to_transform.height) out_value[Name("Length")] = pDecimal(len(contents)) out_value[Name("Filter")] = filter_name out_value[Name("BitsPerComponent")] = pDecimal(8) out_value[Name("ColorSpace")] = Name("DeviceRGB") out_value[Name("Bytes")] = contents # copy reference out_value.set_reference( object_to_transform.get_reference()) # type: ignore [attr-defined] # start object if needed started_object = False ref = out_value.get_reference() # type: ignore [attr-defined] if ref is not None: assert isinstance(ref, Reference) if ref.object_number is not None and ref.byte_offset is None: started_object = True self._start_object(out_value, context) # write stream cl = context.compression_level context.compression_level = 9 self.get_root_transformer().transform(out_value, context) context.compression_level = cl # end object if needed if started_object: self._end_object(out_value, context)