def transform( self, object_to_transform: Any, context: Optional[WriteTransformerContext] = None, ): """ This method writes a Document object to a byte stream """ # write header assert context is not None assert context.destination is not None context.destination.write(b"%PDF-1.7\n") context.destination.write(b"%") context.destination.write(bytes([226, 227, 207, 211])) context.destination.write(b"\n") # invalidate all references WritePDFTransformer._invalidate_all_references(object_to_transform) # create Info dictionary if needed if "Info" not in object_to_transform["XRef"]["Trailer"]: object_to_transform["XRef"]["Trailer"][Name("Info")] = Dictionary() # set /ID random_id = HexadecimalString("%032x" % random.randrange(16**32)) if "ID" not in object_to_transform["XRef"]["Trailer"]: object_to_transform["XRef"]["Trailer"][Name("ID")] = List( ).set_can_be_referenced( # type: ignore [attr-defined] False) object_to_transform["XRef"]["Trailer"]["ID"].append(random_id) object_to_transform["XRef"]["Trailer"]["ID"].append(random_id) else: object_to_transform["XRef"]["Trailer"]["ID"][1] = random_id object_to_transform["XRef"]["Trailer"]["ID"].set_can_be_referenced( False) # set CreationDate modification_date = WritePDFTransformer._timestamp_to_str() if "CreationDate" not in object_to_transform["XRef"]["Trailer"][Name( "Info")]: object_to_transform["XRef"]["Trailer"][Name("Info")][Name( "CreationDate")] = String(modification_date) # set ModDate object_to_transform["XRef"]["Trailer"]["Info"][Name( "ModDate")] = String(modification_date) # set Producer object_to_transform["XRef"]["Trailer"]["Info"][Name( "Producer")] = String("pText") # transform XREF self.get_root_transformer().transform(object_to_transform["XRef"], context)
def _test_document(self, file) -> bool: # create output directory if it does not exist yet if not self.output_dir.exists(): self.output_dir.mkdir() doc = None with open(file, "rb") as pdf_file_handle: doc = None with open(file, "rb") as pdf_file_handle: doc = PDF.loads(pdf_file_handle) if "XRef" not in doc: return False if "Trailer" not in doc["XRef"]: return False if "Info" not in doc["XRef"]["Trailer"]: doc["XRef"]["Trailer"][Name("Info")] = Dictionary() # change producer doc["XRef"]["Trailer"]["Info"][Name("Producer")] = String("pText") # determine output location out_file = self.output_dir / (file.stem + "_out.pdf") with open(out_file, "wb") as pdf_file_handle: PDF.dumps(pdf_file_handle, doc) return True
def split_on_glyphs(self) -> typing.List["ChunkOfTextRenderEvent"]: """ This function splits this ChunkOfTextRenderEvent on every Glyph """ chunks_of_text: typing.List[ChunkOfTextRenderEvent] = [] x: Decimal = Decimal(0) y: Decimal = self._graphics_state.text_rise font: typing.Optional[Font] = self._graphics_state.font assert font is not None for g in self._glyph_line.split(): e = ChunkOfTextRenderEvent(self._graphics_state, String(" ")) e.font_size = self.font_size e.font_color = self.font_color e.font = self.font e.text = g.get_text() e._space_character_width_estimate = self._space_character_width_estimate e._graphics_state = self._graphics_state e._glyph_line = g # set baseline bounding box m = self._graphics_state.text_matrix.mul(self._graphics_state.ctm) p0 = m.cross(x, y, Decimal(1)) p1 = m.cross( x + g.get_width_in_text_space(), y + font.get_ascent() * Decimal(0.001), Decimal(1), ) e.baseline_bounding_box = Rectangle( p0[0], p0[1], p1[0] - p0[0], p1[1] - p0[1] ) e.bounding_box = e.baseline_bounding_box # change bounding box (descent) if g.uses_descent(): p0 = m.cross( x, y + font.get_descent() * Decimal(0.001), Decimal(1), ) p1 = m.cross( x + g.get_width_in_text_space(), y + font.get_ascent() * Decimal(0.001), Decimal(1), ) e.bounding_box = Rectangle( min(p0[0], p1[0]), min(p0[1], p1[1]), abs(p1[0] - p0[0]), abs(p1[1] - p0[1]), ) # update x x += g.get_width_in_text_space() # append chunks_of_text.append(e) return chunks_of_text
def _do_layout_without_padding(self, page: Page, bounding_box: Rectangle): assert self.font rgb_color = self.font_color.to_rgb() COLOR_MAX = Decimal(255.0) content = """ q BT %f %f %f rg /%s %f Tf %f 0 0 %f %f %f Tm (%s) Tj ET Q """ % ( Decimal(rgb_color.red / COLOR_MAX), # rg Decimal(rgb_color.green / COLOR_MAX), # rg Decimal(rgb_color.blue / COLOR_MAX), # rg self._get_font_resource_name(self.font, page), # Tf Decimal(1), # Tf float(self.font_size), # Tm float(self.font_size), # Tm float(bounding_box.x), # Tm float(bounding_box.y + bounding_box.height - self.font_size), # Tm self._write_bytes_in_simple_font(), # Tj ) self._append_to_content_stream(page, content) layout_rect = Rectangle( bounding_box.x, bounding_box.y + bounding_box.height - self.font_size, self.font.build_glyph_line(String(self.text)).get_width_in_text_space( self.font_size ), self.font_size, ) # set bounding box self.set_bounding_box(layout_rect) # return return layout_rect
def _do_layout_without_padding_text_alignment_justified( self, lines_of_text: typing.List[str], page: Page, bounding_box: Rectangle ) -> Rectangle: min_x: Decimal = Decimal(2048) min_y: Decimal = Decimal(2048) max_x: Decimal = Decimal(0) max_y: Decimal = Decimal(0) leading: Decimal = self.font_size * Decimal(1.3) for i, line_of_text in enumerate(lines_of_text): estimated_width: Decimal = self.font.build_glyph_line( String(line_of_text) ).get_width_in_text_space(self.font_size) remaining_space: Decimal = bounding_box.width - estimated_width # calculate the space that needs to be divided among the space-characters number_of_spaces: Decimal = Decimal( sum([1 for x in line_of_text if x == " "]) ) if number_of_spaces > 0: space_per_space: Decimal = remaining_space / number_of_spaces else: space_per_space = Decimal(0) words: typing.List[str] = line_of_text.split(" ") # perform layout x: Decimal = bounding_box.x for w in words: s = w + " " r: Rectangle = ChunkOfText( s, font=self.font, font_size=self.font_size, font_color=self.font_color, parent=self, ).layout( page, bounding_box=Rectangle( x, bounding_box.y + bounding_box.height - leading * i - self.font_size, bounding_box.width, self.font_size, ), ) min_x = min(r.x, min_x) min_y = min(r.y, min_y) max_x = max(r.x + r.width, max_x) max_y = max(r.y + r.height, max_y) # line up our next x word_size = self.font.build_glyph_line( String(s) ).get_width_in_text_space(self.font_size) x += word_size x += space_per_space # set bounding box layout_rect = Rectangle(min_x, min_y, max_x - min_x, max_y - min_y) self.set_bounding_box(layout_rect) # return return layout_rect
def _split_text(self, bounding_box: Rectangle) -> typing.List[str]: # attempt to split into words (preserve space if needed) words: typing.List[str] = [""] tokens_to_split_on: typing.List[str] = [" ", "\t", "\n"] tokens_to_preserve: typing.List[str] = [] if self.respect_newlines_in_text: tokens_to_preserve.append("\n") if self.respect_spaces_in_text: tokens_to_preserve.append(" ") tokens_to_preserve.append("\t") for c in self.text: if c in tokens_to_split_on: # we have a token we split on, and preserve # add it to the list of words if c in tokens_to_preserve: words.append(c) words.append("") else: # we have a token we split on, but don't preserve # such as whitespace, with self.respect_spaces_in_text set to False if words[-1] != "": words.append("") else: # build the word that was already being built words[-1] += c # build lines using words lines_of_text = [] for i, w in enumerate(words): # split on \n if w == "\n" and self.respect_newlines_in_text: lines_of_text.append("") continue # build line of text to check if it fits the bounding box potential_text = lines_of_text[-1] if len(lines_of_text) > 0 else "" if i != 0 and not self.respect_spaces_in_text: potential_text += " " potential_text += w # check the width of this piece of text potential_width = self.font.build_glyph_line( String(potential_text) ).get_width_in_text_space(self.font_size) # if this text is larger than the bounding box, split the text remaining_space_in_box: Decimal = bounding_box.width - potential_width if remaining_space_in_box > Decimal( -1 ): # checking with 0 is not a great idea due to rounding errors if len(lines_of_text) == 0: lines_of_text.append(w) else: if i != 0 and not self.respect_spaces_in_text: lines_of_text[-1] += " " lines_of_text[-1] += w else: lines_of_text.append(w) # return return lines_of_text
def add_outline( self, text: str, level: int, destination_type: DestinationType, page_nr: int, top: typing.Optional[Decimal] = None, right: typing.Optional[Decimal] = None, bottom: typing.Optional[Decimal] = None, left: typing.Optional[Decimal] = None, zoom: typing.Optional[Decimal] = None, ) -> "Document": destination = List().set_can_be_referenced( False) # type: ignore [attr-defined] destination.append(Decimal(page_nr)) destination.append(destination_type.value) if destination_type == DestinationType.X_Y_Z: assert (left is not None and bottom is None and right is None and top is not None and zoom is not None) destination.append(Decimal(left)) destination.append(Decimal(top)) destination.append(Decimal(zoom)) if destination_type == DestinationType.FIT: assert (left is None and bottom is None and right is None and top is None and zoom is None) if destination_type == DestinationType.FIT_H: assert (left is None and bottom is None and right is None and top is not None and zoom is None) destination.append(Decimal(top)) if destination_type == DestinationType.FIT_V: assert (left is not None and bottom is None and right is None and top is None and zoom is None) destination.append(Decimal(left)) if destination_type == DestinationType.FIT_R: assert (left is not None and bottom is not None and right is not None and top is not None and zoom is None) destination.append(Decimal(left)) destination.append(Decimal(bottom)) destination.append(Decimal(right)) destination.append(Decimal(top)) if destination_type == DestinationType.FIT_B_H: assert (left is None and bottom is None and right is None and top is not None and zoom is None) destination.append(Decimal(top)) if destination_type == DestinationType.FIT_B_V: assert (left is not None and bottom is None and right is None and top is None and zoom is None) destination.append(Decimal(left)) # add \Outlines entry in \Root if "Outlines" not in self["XRef"]["Trailer"]["Root"]: outline_dictionary: Dictionary = Dictionary() self["XRef"]["Trailer"]["Root"][Name( "Outlines")] = outline_dictionary outline_dictionary.set_parent( # type: ignore [attr-defined] self["XRef"]["Trailer"]["Root"][Name("Outlines")]) outline_dictionary[Name("Type")] = Name("Outlines") outline_dictionary[Name("Count")] = Decimal(0) # create entry outline = Dictionary() outline[Name("Dest")] = destination outline[Name("Parent")] = None outline[Name("Title")] = String(text) # get \Outlines outline_dictionary = self["XRef"]["Trailer"]["Root"]["Outlines"] # if everything is empty, add the new entry as the only entry if "First" not in outline_dictionary or "Last" not in outline_dictionary: outline_dictionary[Name("First")] = outline outline_dictionary[Name("Last")] = outline outline_dictionary[Name("Count")] = Decimal(1) outline[Name("Parent")] = outline_dictionary return self # helper function to make DFS easier def _children(x: Dictionary): if "First" not in x: return [] children = [x["First"]] while children[-1] != x["Last"]: children.append(children[-1]["Next"]) return children # DFS outline(s) outlines_done: typing.List[typing.Tuple[int, Dictionary]] = [] outlines_todo: typing.List[typing.Tuple[int, Dictionary]] = [ (-1, outline_dictionary) ] while len(outlines_todo) > 0: t = outlines_todo[0] outlines_done.append(t) outlines_todo.pop(0) for c in _children(t[1]): outlines_todo.append((t[0] + 1, c)) # find parent parent = [x[1] for x in outlines_done if x[0] == level - 1][-1] # update sibling-linking if "Last" in parent: sibling = parent["Last"] sibling[Name("Next")] = outline # update parent-linking outline[Name("Parent")] = parent if "First" not in parent: parent[Name("First")] = outline if "Count" not in parent: parent[Name("Count")] = Decimal(0) parent[Name("Last")] = outline # update count outline_to_update_count = parent while outline_to_update_count: outline_to_update_count[Name("Count")] = Decimal( outline_to_update_count["Count"] + Decimal(1)) if "Parent" in outline_to_update_count: outline_to_update_count = outline_to_update_count["Parent"] else: break return self
def true_type_font_from_file(path_to_font_file: Path) -> "TrueTypeFont": """ This function returns the PDF TrueTypeFont object for a given TTF file """ assert path_to_font_file.exists() assert path_to_font_file.name.endswith(".ttf") font_file_bytes: typing.Optional[bytes] = None with open(path_to_font_file, "rb") as ffh: font_file_bytes = ffh.read() assert font_file_bytes # read file ttf_font_file = TTFont(path_to_font_file) # build font font: TrueTypeFont = TrueTypeFont() font_name: str = str( [ x for x in ttf_font_file["name"].names if x.platformID == 3 and x.nameID == 1 ][0].string, "latin1", ) font_name = "".join([ x for x in font_name if x.lower() in "abcdefghijklmnopqrstuvwxyz" ]) font[Name("Name")] = Name(font_name) font[Name("BaseFont")] = Name(font_name) cmap: typing.Optional[typing.Dict[int, str]] = ttf_font_file.getBestCmap() cmap_reverse: typing.Dict[str, int] = {} for k, v in cmap.items(): if v in cmap_reverse: cmap_reverse[v] = min(cmap_reverse[v], k) else: cmap_reverse[v] = k glyph_order: typing.List[str] = [ x for x in ttf_font_file.glyphOrder if x in cmap_reverse ] # build widths units_per_em: pDecimal = pDecimal(ttf_font_file["head"].unitsPerEm) if cmap is not None: font[Name("FirstChar")] = pDecimal(0) font[Name("LastChar")] = pDecimal(len(glyph_order)) font[Name("Widths")] = List() for glyph_name in glyph_order: w: pDecimal = ( pDecimal(ttf_font_file.getGlyphSet()[glyph_name].width) / units_per_em) * pDecimal(1000) w = pDecimal(round(w, 2)) font["Widths"].append(w) font[Name("FontDescriptor")] = Dictionary() font["FontDescriptor"][Name("Type")] = Name("FontDescriptor") font["FontDescriptor"][Name("FontName")] = String(font_name) font["FontDescriptor"][Name("FontStretch")] = Name("Normal") # TODO font["FontDescriptor"][Name("FontWeight")] = pDecimal(400) # TODO font["FontDescriptor"][Name("Flags")] = pDecimal(4) # TODO font["FontDescriptor"][Name("FontBBox")] = List( ).set_can_be_referenced( # type: ignore [attr-defined] False) # TODO for _ in range(0, 4): font["FontDescriptor"]["FontBBox"].append(pDecimal(0)) # fmt: off font["FontDescriptor"][Name("ItalicAngle")] = pDecimal( ttf_font_file["post"].italicAngle) font["FontDescriptor"][Name("Ascent")] = pDecimal( pDecimal(ttf_font_file["hhea"].ascent) / units_per_em * Decimal(1000)) font["FontDescriptor"][Name("Descent")] = pDecimal( pDecimal(ttf_font_file["hhea"].descent) / units_per_em * Decimal(1000)) font["FontDescriptor"][Name("CapHeight")] = pDecimal(0) # TODO font["FontDescriptor"][Name("StemV")] = pDecimal(0) # TODO # fmt: on font[Name("Encoding")] = Dictionary() font["Encoding"][Name("BaseEncoding")] = Name("WinAnsiEncoding") font["Encoding"][Name("Differences")] = List() for i in range(0, len(glyph_order)): font["Encoding"]["Differences"].append(pDecimal(i)) font["Encoding"]["Differences"].append(Name(glyph_order[i])) # embed font file font_stream: Stream = Stream() font_stream[Name("Type")] = Name("Font") font_stream[Name("Subtype")] = Name("TrueType") font_stream[Name("Length")] = pDecimal(len(font_file_bytes)) font_stream[Name("Length1")] = pDecimal(len(font_file_bytes)) font_stream[Name("Filter")] = Name("FlateDecode") font_stream[Name("DecodedBytes")] = font_file_bytes font_stream[Name("Bytes")] = zlib.compress(font_file_bytes, 9) font["FontDescriptor"][Name("FontFile2")] = font_stream # return return font
def __init__(self, graphics_state: CanvasGraphicsState, raw_bytes: String): assert graphics_state.font is not None self._glyph_line: GlyphLine = GlyphLine( raw_bytes.get_value_bytes(), graphics_state.font, graphics_state.font_size, graphics_state.character_spacing, graphics_state.word_spacing, graphics_state.horizontal_scaling, ) super(ChunkOfTextRenderEvent, self).__init__( font=graphics_state.font, font_size=graphics_state.font_size, font_color=graphics_state.non_stroke_color, text=self._glyph_line.get_text(), ) m = graphics_state.text_matrix.mul(graphics_state.ctm) # calculate baseline box p0 = m.cross(Decimal(0), graphics_state.text_rise, Decimal(1)) p1 = m.cross( self._glyph_line.get_width_in_text_space(), graphics_state.text_rise + graphics_state.font.get_ascent() * Decimal(0.001), Decimal(1), ) # set baseline box self.baseline_bounding_box = Rectangle( min(p0[0], p1[0]), min(p0[1], p1[1]), abs(p1[0] - p0[0]), abs(p1[1] - p0[1]) ) # calculate bounding box uses_descent = any( [x in self.text.lower() for x in ["y", "p", "q", "f", "g", "j"]] ) if uses_descent: p0 = m.cross( Decimal(0), graphics_state.text_rise + graphics_state.font.get_descent() * Decimal(0.001), Decimal(1), ) p1 = m.cross( self._glyph_line.get_width_in_text_space(), graphics_state.text_rise + graphics_state.font.get_ascent() * Decimal(0.001), Decimal(1), ) self.set_bounding_box( Rectangle( min(p0[0], p1[0]), min(p0[1], p1[1]), abs(p1[0] - p0[0]), abs(p1[1] - p0[1]), ) ) else: self.set_bounding_box(self.baseline_bounding_box) # calculate space character width estimate current_font: Font = graphics_state.font self._space_character_width_estimate = ( current_font.get_space_character_width_estimate() * graphics_state.font_size ) self._font_size = graphics_state.font_size # store graphics state self._graphics_state = graphics_state
def read_object( self, xref: Optional["XREF"] = None ) -> Optional[AnyPDFType]: # type: ignore [name-defined] """ This function processes the next tokens and returns an AnyPDFType. It fails and throws various errors if the next tokens do not represent a pdf object. """ token = self.next_non_comment_token() if token is None or len(token.text) == 0: return None if token.token_type == TokenType.START_DICT: self.seek(token.byte_offset) # go to start of dictionary return self.read_dictionary() if token.token_type == TokenType.START_ARRAY: self.seek(token.byte_offset) # go to start of array return self.read_array() # <number> <number> "R" if token.token_type == TokenType.NUMBER: self.seek(token.byte_offset) # go to start of indirect reference potential_indirect_reference = self.read_indirect_reference() if potential_indirect_reference is not None: return potential_indirect_reference # <number> <number> "obj" # <<dictionary>> # "stream" # <bytes> # "endstream" if token.token_type == TokenType.NUMBER: self.seek(token.byte_offset) potential_stream = self.read_stream(xref) if potential_stream is not None: return potential_stream # <number> <number> "obj" if token.token_type == TokenType.NUMBER: self.seek(token.byte_offset) potential_indirect_object = self.read_indirect_object() if potential_indirect_object is not None: return potential_indirect_object # numbers if token.token_type == TokenType.NUMBER: self.seek(self.tell() + len(token.text)) return Decimal(Decimal(token.text)) # boolean if token.token_type == TokenType.OTHER and token.text in [ "true", "false" ]: return Boolean(token.text == "true") # canvas operators if (token.token_type == TokenType.OTHER and token.text in CanvasOperatorName.VALID_NAMES): return CanvasOperatorName(token.text) # names if token.token_type == TokenType.NAME: return Name(token.text[1:]) # literal strings and hex strings if token.token_type in [TokenType.STRING, TokenType.HEX_STRING]: if token.token_type == TokenType.STRING: return String(token.text[1:-1]) else: return HexadecimalString(token.text[1:-1]) # default return None
def append_embedded_file(self, file_name: str, file_bytes: bytes, apply_compression: bool = True) -> "Document": """ If a PDF file contains file specifications that refer to an external file and the PDF file is archived or transmitted, some provision should be made to ensure that the external references will remain valid. One way to do this is to arrange for copies of the external files to accompany the PDF file. Embedded file streams (PDF 1.3) address this problem by allowing the contents of referenced files to be embedded directly within the body of the PDF file. This makes the PDF file a self-contained unit that can be stored or transmitted as a single entity. (The embedded files are included purely for convenience and need not be directly processed by any conforming reader.) This method embeds a file (specified by its name and bytes) into this Document """ assert "XRef" in self assert "Trailer" in self["XRef"] assert "Root" in self["XRef"]["Trailer"] root = self["XRef"]["Trailer"]["Root"] # set up /Names dictionary if "Names" not in root: root[Name("Names")] = Dictionary() names = root["Names"] # set up /EmbeddedFiles if "EmbeddedFiles" not in names: names[Name("EmbeddedFiles")] = Dictionary() names["EmbeddedFiles"][Name("Kids")] = List() # find parent parent = names["EmbeddedFiles"] while "Kids" in parent: for k in parent["Kids"]: lower_limit = str(k["Limits"][0]) upper_limit = str(k["Limits"][1]) if lower_limit == upper_limit: continue if lower_limit < file_name < upper_limit: parent = k break break # add new child if (len([ x for x in parent["Kids"] if x["Limits"][0] == x["Limits"][1] == file_name ]) == 0): kid = Dictionary() kid[Name("F")] = String(file_name) kid[Name("Type")] = Name("Filespec") kid[Name("Limits")] = List() for _ in range(0, 2): kid["Limits"].append(String(file_name)) # build leaf \Names dictionary names = List() names.append(String(file_name)) kid[Name("Names")] = names # build actual file stream stream = Stream() stream[Name("Type")] = Name("EmbeddedFile") stream[Name("DecodedBytes")] = file_bytes if not apply_compression: stream[Name("Bytes")] = file_bytes else: stream[Name("Bytes")] = zlib.compress( stream[Name("DecodedBytes")], 9) stream[Name("Filter")] = Name("FlateDecode") stream[Name("Length")] = Decimal(len(stream[Name("Bytes")])) # build leaf \Filespec dictionary file_spec = Dictionary() file_spec[Name("EF")] = Dictionary() file_spec["EF"][Name("F")] = stream file_spec[Name("F")] = String(file_name) file_spec[Name("Type")] = Name("Filespec") names.append(file_spec) # append parent["Kids"].append(kid) # change existing child else: kid = [ x for x in parent["Kids"] if x["Limits"][0] == x["Limits"][1] == file_name ][0] # TODO # return return self
def add_outline( self, text: str, level: int, destination_type: DestinationType, page_nr: int, top: typing.Optional[Decimal] = None, right: typing.Optional[Decimal] = None, bottom: typing.Optional[Decimal] = None, left: typing.Optional[Decimal] = None, zoom: typing.Optional[Decimal] = None, ) -> "Document": """ A PDF document may contain a document outline that the conforming reader may display on the screen, allowing the user to navigate interactively from one part of the document to another. The outline consists of a tree-structured hierarchy of outline items (sometimes called bookmarks), which serve as a visual table of contents to display the document’s structure to the user. This function adds an outline to this Document """ destination = List().set_can_be_referenced( False) # type: ignore [attr-defined] destination.append(Decimal(page_nr)) destination.append(destination_type.value) if destination_type == DestinationType.X_Y_Z: assert (left is not None and bottom is None and right is None and top is not None and zoom is not None) destination.append(Decimal(left)) destination.append(Decimal(top)) destination.append(Decimal(zoom)) if destination_type == DestinationType.FIT: assert (left is None and bottom is None and right is None and top is None and zoom is None) if destination_type == DestinationType.FIT_H: assert (left is None and bottom is None and right is None and top is not None and zoom is None) destination.append(Decimal(top)) if destination_type == DestinationType.FIT_V: assert (left is not None and bottom is None and right is None and top is None and zoom is None) destination.append(Decimal(left)) if destination_type == DestinationType.FIT_R: assert (left is not None and bottom is not None and right is not None and top is not None and zoom is None) destination.append(Decimal(left)) destination.append(Decimal(bottom)) destination.append(Decimal(right)) destination.append(Decimal(top)) if destination_type == DestinationType.FIT_B_H: assert (left is None and bottom is None and right is None and top is not None and zoom is None) destination.append(Decimal(top)) if destination_type == DestinationType.FIT_B_V: assert (left is not None and bottom is None and right is None and top is None and zoom is None) destination.append(Decimal(left)) # add \Outlines entry in \Root if "Outlines" not in self["XRef"]["Trailer"]["Root"]: outline_dictionary: Dictionary = Dictionary() self["XRef"]["Trailer"]["Root"][Name( "Outlines")] = outline_dictionary outline_dictionary.set_parent( # type: ignore [attr-defined] self["XRef"]["Trailer"]["Root"][Name("Outlines")]) outline_dictionary[Name("Type")] = Name("Outlines") outline_dictionary[Name("Count")] = Decimal(0) # create entry outline = Dictionary() outline[Name("Dest")] = destination outline[Name("Parent")] = None outline[Name("Title")] = String(text) # get \Outlines outline_dictionary = self["XRef"]["Trailer"]["Root"]["Outlines"] # if everything is empty, add the new entry as the only entry if "First" not in outline_dictionary or "Last" not in outline_dictionary: outline_dictionary[Name("First")] = outline outline_dictionary[Name("Last")] = outline outline_dictionary[Name("Count")] = Decimal(1) outline[Name("Parent")] = outline_dictionary return self # helper function to make DFS easier def _children(x: Dictionary): if "First" not in x: return [] children = [x["First"]] while children[-1] != x["Last"]: children.append(children[-1]["Next"]) return children # DFS outline(s) outlines_done: typing.List[typing.Tuple[int, Dictionary]] = [] outlines_todo: typing.List[typing.Tuple[int, Dictionary]] = [ (-1, outline_dictionary) ] while len(outlines_todo) > 0: t = outlines_todo[0] outlines_done.append(t) outlines_todo.pop(0) for c in _children(t[1]): outlines_todo.append((t[0] + 1, c)) # find parent parent = [x[1] for x in outlines_done if x[0] == level - 1][-1] # update sibling-linking if "Last" in parent: sibling = parent["Last"] sibling[Name("Next")] = outline # update parent-linking outline[Name("Parent")] = parent if "First" not in parent: parent[Name("First")] = outline if "Count" not in parent: parent[Name("Count")] = Decimal(0) parent[Name("Last")] = outline # update count outline_to_update_count = parent while outline_to_update_count: outline_to_update_count[Name("Count")] = Decimal( outline_to_update_count["Count"] + Decimal(1)) if "Parent" in outline_to_update_count: outline_to_update_count = outline_to_update_count["Parent"] else: break return self
def split_on_glyphs(self) -> typing.List["ChunkOfTextRenderEvent"]: chunks_of_text: typing.List[ChunkOfTextRenderEvent] = [] x: Decimal = Decimal(0) y: Decimal = self._graphics_state.text_rise for g in self._glyph_line.glyphs: chrs = ( [g.unicode] if isinstance(g.unicode, int) else [g.unicode[x] for x in range(0, len(g.unicode))] ) e = ChunkOfTextRenderEvent(self._graphics_state, String(" ")) e.font_size = self.font_size e.font_color = self.font_color e.font = self.font e.text = g.to_unicode_string() e.space_character_width_estimate = self.space_character_width_estimate e._graphics_state = self._graphics_state e._glyph_line = GlyphLine([g]) # calculate width width: Decimal = ( g.width * Decimal(0.001) * self.font_size * self._graphics_state.horizontal_scaling * Decimal(0.01) + ( self._graphics_state.word_spacing if g.to_unicode_string() == " " else Decimal(0) ) + self._graphics_state.character_spacing ) # set baseline bounding box m = self._graphics_state.text_matrix.mul(self._graphics_state.ctm) p0 = m.cross(x, y, Decimal(1)) p1 = m.cross( x + width, y + self._graphics_state.font.get_ascent() * Decimal(0.001), Decimal(1), ) e.baseline_bounding_box = Rectangle( p0[0], p0[1], p1[0] - p0[0], p1[1] - p0[1] ) e.bounding_box = e.baseline_bounding_box # change bounding box (descent) uses_descent = g.to_unicode_string().lower() in [ "y", "p", "q", "f", "g", "j", ] if uses_descent: p0 = m.cross( x, y + self._graphics_state.font.get_descent() * Decimal(0.001), Decimal(1), ) p1 = m.cross( x + width, y + self._graphics_state.font.get_ascent() * Decimal(0.001), Decimal(1), ) e.bounding_box = Rectangle( min(p0[0], p1[0]), min(p0[1], p1[1]), abs(p1[0] - p0[0]), abs(p1[1] - p0[1]), ) # update x x += width # append chunks_of_text.append(e) return chunks_of_text
def _read_file(input: typing.TextIO) -> Optional[Font]: lines: typing.List[str] = [x for x in input.readlines()] lines = [x for x in lines if not x.startswith("Comment")] lines = [x[:-1] if x.endswith("\n") else x for x in lines] # check first/last line if not lines[0].startswith("StartFontMetrics") or not lines[-1].startswith( "EndFontMetrics" ): return None out_font = Font() # FontDescriptor out_font_descriptor = FontDescriptor().set_parent(out_font) # type: ignore [attr-defined] font_name = AdobeFontMetrics._find_and_parse_as_string(lines, "FontName") if font_name: out_font_descriptor[Name("FontName")] = Name(font_name) font_family = AdobeFontMetrics._find_and_parse_as_string(lines, "FamilyName") if font_family: out_font_descriptor[Name("FontFamily")] = String(font_family) # FontStretch # FontWeight # Flags # FontBBox fontbbox_str = AdobeFontMetrics._find_and_parse_as_string(lines, "FontBBox") if fontbbox_str: fontbbox = [Decimal(x) for x in fontbbox_str.split(" ")] out_font_descriptor[Name("FontBBox")] = List().set_can_be_referenced(False) # type: ignore [attr-defined] for x in fontbbox: out_font_descriptor[Name("FontBBox")].append(x) # ItalicAngle italic_angle = AdobeFontMetrics._find_and_parse_as_float(lines, "ItalicAngle") if italic_angle: out_font_descriptor[Name("ItalicAngle")] = Decimal(italic_angle) else: out_font_descriptor[Name("ItalicAngle")] = Decimal(0) # Ascent ascent = AdobeFontMetrics._find_and_parse_as_float(lines, "Ascender") if ascent: out_font_descriptor[Name("Ascent")] = Decimal(ascent) else: out_font_descriptor[Name("Ascent")] = Decimal(0) # Descent descent = AdobeFontMetrics._find_and_parse_as_float(lines, "Descender") if descent: out_font_descriptor[Name("Descent")] = Decimal(descent) else: out_font_descriptor[Name("Descent")] = Decimal(0) # Flags out_font_descriptor[Name("Flags")] = Decimal(131104) # Leading # CapHeight capheight = AdobeFontMetrics._find_and_parse_as_float(lines, "CapHeight") if capheight: out_font_descriptor[Name("CapHeight")] = Decimal(capheight) else: out_font_descriptor[Name("CapHeight")] = Decimal(0) # XHeight xheight = AdobeFontMetrics._find_and_parse_as_float(lines, "XHeight") if xheight: out_font_descriptor[Name("XHeight")] = Decimal(xheight) # StemV stemv = AdobeFontMetrics._find_and_parse_as_float(lines, "StemV") if stemv: assert stemv is not None out_font_descriptor[Name("StemV")] = Decimal(stemv) else: out_font_descriptor[Name("StemV")] = Decimal(0) # StemH stemh = AdobeFontMetrics._find_and_parse_as_float(lines, "StemH") if stemh: assert stemh is not None out_font_descriptor[Name("StemH")] = Decimal(stemh) else: out_font_descriptor[Name("StemH")] = Decimal(0) # AvgWidth avgwidth = AdobeFontMetrics._find_and_parse_as_float(lines, "AvgWidth") if avgwidth: assert avgwidth is not None out_font_descriptor[Name("AvgWidth")] = Decimal(avgwidth) # MaxWidth maxwidth = AdobeFontMetrics._find_and_parse_as_float(lines, "MaxWidth") if maxwidth: assert maxwidth is not None out_font_descriptor[Name("MaxWidth")] = Decimal(maxwidth) # MissingWidth missingwidth = AdobeFontMetrics._find_and_parse_as_float(lines, "MissingWidth") if missingwidth: assert missingwidth is not None out_font_descriptor[Name("MissingWidth")] = Decimal(missingwidth) # CharSet charset = AdobeFontMetrics._find_and_parse_as_float(lines, "CharSet") if charset: assert charset is not None out_font_descriptor[Name("CharSet")] = Decimal(charset) # Font out_font[Name("Type")] = Name("Font") out_font[Name("Subtype")] = Name("Type1") out_font[Name("Name")] = out_font_descriptor["FontName"] out_font[Name("BaseFont")] = out_font_descriptor["FontName"] widths = List().set_parent(out_font) # type: ignore [attr-defined] avg_char_width: float = 0 avg_char_width_norm: float = 0 first_char = None last_char = None char_metrics_lines = lines[ lines.index( [x for x in lines if x.startswith("StartCharMetrics")][0] ) : lines.index("EndCharMetrics") + 1 ] char_metrics_lines = char_metrics_lines[1:-1] for cml in char_metrics_lines: tmp = { y.split(" ")[0]: y.split(" ")[1] for y in [x.strip() for x in cml.split(";")] if " " in y } # determine char ch = -1 if "C" in tmp: ch = int(tmp["C"]) if "CH" in tmp: ch = int(tmp["CH"][1:-1], 16) if (first_char is None or ch < first_char) and ch != -1: first_char = ch if (last_char is None or ch > last_char) and ch != -1: last_char = ch w = float(tmp["WX"]) if ch != -1 and w != 0: avg_char_width += w avg_char_width_norm += 1 widths.append(Decimal(w)) assert first_char is not None assert last_char is not None out_font[Name("FirstChar")] = Decimal(first_char) out_font[Name("LastChar")] = Decimal(last_char) out_font[Name("Widths")] = widths if avgwidth is None: out_font_descriptor[Name("AvgWidth")] = Decimal( round(Decimal(avg_char_width / avg_char_width_norm), 2) ) if maxwidth is None: out_font_descriptor[Name("MaxWidth")] = Decimal(max(widths)) out_font[Name("FontDescriptor")] = out_font_descriptor # return return out_font