def transform(
        self,
        object_to_transform: Any,
        context: Optional[WriteTransformerContext] = None,
    ):
        """
        This method writes a Document object to a byte stream
        """
        # write header
        assert context is not None
        assert context.destination is not None

        context.destination.write(b"%PDF-1.7\n")
        context.destination.write(b"%")
        context.destination.write(bytes([226, 227, 207, 211]))
        context.destination.write(b"\n")

        # invalidate all references
        WritePDFTransformer._invalidate_all_references(object_to_transform)

        # create Info dictionary if needed
        if "Info" not in object_to_transform["XRef"]["Trailer"]:
            object_to_transform["XRef"]["Trailer"][Name("Info")] = Dictionary()

        # set /ID
        random_id = HexadecimalString("%032x" % random.randrange(16**32))
        if "ID" not in object_to_transform["XRef"]["Trailer"]:
            object_to_transform["XRef"]["Trailer"][Name("ID")] = List(
            ).set_can_be_referenced(  # type: ignore [attr-defined]
                False)
            object_to_transform["XRef"]["Trailer"]["ID"].append(random_id)
            object_to_transform["XRef"]["Trailer"]["ID"].append(random_id)
        else:
            object_to_transform["XRef"]["Trailer"]["ID"][1] = random_id
        object_to_transform["XRef"]["Trailer"]["ID"].set_can_be_referenced(
            False)

        # set CreationDate
        modification_date = WritePDFTransformer._timestamp_to_str()
        if "CreationDate" not in object_to_transform["XRef"]["Trailer"][Name(
                "Info")]:
            object_to_transform["XRef"]["Trailer"][Name("Info")][Name(
                "CreationDate")] = String(modification_date)

        # set ModDate
        object_to_transform["XRef"]["Trailer"]["Info"][Name(
            "ModDate")] = String(modification_date)

        # set Producer
        object_to_transform["XRef"]["Trailer"]["Info"][Name(
            "Producer")] = String("pText")

        # transform XREF
        self.get_root_transformer().transform(object_to_transform["XRef"],
                                              context)
示例#2
0
    def _test_document(self, file) -> bool:

        # create output directory if it does not exist yet
        if not self.output_dir.exists():
            self.output_dir.mkdir()

        doc = None
        with open(file, "rb") as pdf_file_handle:
            doc = None
            with open(file, "rb") as pdf_file_handle:
                doc = PDF.loads(pdf_file_handle)

        if "XRef" not in doc:
            return False
        if "Trailer" not in doc["XRef"]:
            return False

        if "Info" not in doc["XRef"]["Trailer"]:
            doc["XRef"]["Trailer"][Name("Info")] = Dictionary()

        # change producer
        doc["XRef"]["Trailer"]["Info"][Name("Producer")] = String("pText")

        # determine output location
        out_file = self.output_dir / (file.stem + "_out.pdf")
        with open(out_file, "wb") as pdf_file_handle:
            PDF.dumps(pdf_file_handle, doc)

        return True
示例#3
0
    def split_on_glyphs(self) -> typing.List["ChunkOfTextRenderEvent"]:
        """
        This function splits this ChunkOfTextRenderEvent on every Glyph
        """
        chunks_of_text: typing.List[ChunkOfTextRenderEvent] = []
        x: Decimal = Decimal(0)
        y: Decimal = self._graphics_state.text_rise
        font: typing.Optional[Font] = self._graphics_state.font
        assert font is not None
        for g in self._glyph_line.split():
            e = ChunkOfTextRenderEvent(self._graphics_state, String(" "))
            e.font_size = self.font_size
            e.font_color = self.font_color
            e.font = self.font
            e.text = g.get_text()
            e._space_character_width_estimate = self._space_character_width_estimate
            e._graphics_state = self._graphics_state
            e._glyph_line = g

            # set baseline bounding box
            m = self._graphics_state.text_matrix.mul(self._graphics_state.ctm)
            p0 = m.cross(x, y, Decimal(1))
            p1 = m.cross(
                x + g.get_width_in_text_space(),
                y + font.get_ascent() * Decimal(0.001),
                Decimal(1),
            )
            e.baseline_bounding_box = Rectangle(
                p0[0], p0[1], p1[0] - p0[0], p1[1] - p0[1]
            )
            e.bounding_box = e.baseline_bounding_box

            # change bounding box (descent)
            if g.uses_descent():
                p0 = m.cross(
                    x,
                    y + font.get_descent() * Decimal(0.001),
                    Decimal(1),
                )
                p1 = m.cross(
                    x + g.get_width_in_text_space(),
                    y + font.get_ascent() * Decimal(0.001),
                    Decimal(1),
                )
                e.bounding_box = Rectangle(
                    min(p0[0], p1[0]),
                    min(p0[1], p1[1]),
                    abs(p1[0] - p0[0]),
                    abs(p1[1] - p0[1]),
                )

            # update x
            x += g.get_width_in_text_space()

            # append
            chunks_of_text.append(e)

        return chunks_of_text
示例#4
0
    def _do_layout_without_padding(self, page: Page, bounding_box: Rectangle):
        assert self.font
        rgb_color = self.font_color.to_rgb()
        COLOR_MAX = Decimal(255.0)
        content = """
            q
            BT
            %f %f %f rg
            /%s %f Tf            
            %f 0 0 %f %f %f Tm            
            (%s) Tj
            ET            
            Q
        """ % (
            Decimal(rgb_color.red / COLOR_MAX),  # rg
            Decimal(rgb_color.green / COLOR_MAX),  # rg
            Decimal(rgb_color.blue / COLOR_MAX),  # rg
            self._get_font_resource_name(self.font, page),  # Tf
            Decimal(1),  # Tf
            float(self.font_size),  # Tm
            float(self.font_size),  # Tm
            float(bounding_box.x),  # Tm
            float(bounding_box.y + bounding_box.height - self.font_size),  # Tm
            self._write_bytes_in_simple_font(),  # Tj
        )
        self._append_to_content_stream(page, content)
        layout_rect = Rectangle(
            bounding_box.x,
            bounding_box.y + bounding_box.height - self.font_size,
            self.font.build_glyph_line(String(self.text)).get_width_in_text_space(
                self.font_size
            ),
            self.font_size,
        )

        # set bounding box
        self.set_bounding_box(layout_rect)

        # return
        return layout_rect
示例#5
0
    def _do_layout_without_padding_text_alignment_justified(
        self, lines_of_text: typing.List[str], page: Page, bounding_box: Rectangle
    ) -> Rectangle:
        min_x: Decimal = Decimal(2048)
        min_y: Decimal = Decimal(2048)
        max_x: Decimal = Decimal(0)
        max_y: Decimal = Decimal(0)
        leading: Decimal = self.font_size * Decimal(1.3)

        for i, line_of_text in enumerate(lines_of_text):

            estimated_width: Decimal = self.font.build_glyph_line(
                String(line_of_text)
            ).get_width_in_text_space(self.font_size)
            remaining_space: Decimal = bounding_box.width - estimated_width

            # calculate the space that needs to be divided among the space-characters
            number_of_spaces: Decimal = Decimal(
                sum([1 for x in line_of_text if x == " "])
            )
            if number_of_spaces > 0:
                space_per_space: Decimal = remaining_space / number_of_spaces
            else:
                space_per_space = Decimal(0)
            words: typing.List[str] = line_of_text.split(" ")

            # perform layout
            x: Decimal = bounding_box.x
            for w in words:
                s = w + " "
                r: Rectangle = ChunkOfText(
                    s,
                    font=self.font,
                    font_size=self.font_size,
                    font_color=self.font_color,
                    parent=self,
                ).layout(
                    page,
                    bounding_box=Rectangle(
                        x,
                        bounding_box.y
                        + bounding_box.height
                        - leading * i
                        - self.font_size,
                        bounding_box.width,
                        self.font_size,
                    ),
                )
                min_x = min(r.x, min_x)
                min_y = min(r.y, min_y)
                max_x = max(r.x + r.width, max_x)
                max_y = max(r.y + r.height, max_y)

                # line up our next x
                word_size = self.font.build_glyph_line(
                    String(s)
                ).get_width_in_text_space(self.font_size)
                x += word_size
                x += space_per_space

        # set bounding box
        layout_rect = Rectangle(min_x, min_y, max_x - min_x, max_y - min_y)
        self.set_bounding_box(layout_rect)

        # return
        return layout_rect
示例#6
0
    def _split_text(self, bounding_box: Rectangle) -> typing.List[str]:
        # attempt to split into words (preserve space if needed)
        words: typing.List[str] = [""]
        tokens_to_split_on: typing.List[str] = [" ", "\t", "\n"]

        tokens_to_preserve: typing.List[str] = []
        if self.respect_newlines_in_text:
            tokens_to_preserve.append("\n")
        if self.respect_spaces_in_text:
            tokens_to_preserve.append(" ")
            tokens_to_preserve.append("\t")

        for c in self.text:
            if c in tokens_to_split_on:
                # we have a token we split on, and preserve
                # add it to the list of words
                if c in tokens_to_preserve:
                    words.append(c)
                    words.append("")
                else:
                    # we have a token we split on, but don't preserve
                    # such as whitespace, with self.respect_spaces_in_text set to False
                    if words[-1] != "":
                        words.append("")
            else:
                # build the word that was already being built
                words[-1] += c

        # build lines using words
        lines_of_text = []
        for i, w in enumerate(words):

            # split on \n
            if w == "\n" and self.respect_newlines_in_text:
                lines_of_text.append("")
                continue

            # build line of text to check if it fits the bounding box
            potential_text = lines_of_text[-1] if len(lines_of_text) > 0 else ""
            if i != 0 and not self.respect_spaces_in_text:
                potential_text += " "
            potential_text += w

            # check the width of this piece of text
            potential_width = self.font.build_glyph_line(
                String(potential_text)
            ).get_width_in_text_space(self.font_size)

            # if this text is larger than the bounding box, split the text
            remaining_space_in_box: Decimal = bounding_box.width - potential_width
            if remaining_space_in_box > Decimal(
                -1
            ):  # checking with 0 is not a great idea due to rounding errors
                if len(lines_of_text) == 0:
                    lines_of_text.append(w)
                else:
                    if i != 0 and not self.respect_spaces_in_text:
                        lines_of_text[-1] += " "
                    lines_of_text[-1] += w
            else:
                lines_of_text.append(w)

        # return
        return lines_of_text
示例#7
0
    def add_outline(
        self,
        text: str,
        level: int,
        destination_type: DestinationType,
        page_nr: int,
        top: typing.Optional[Decimal] = None,
        right: typing.Optional[Decimal] = None,
        bottom: typing.Optional[Decimal] = None,
        left: typing.Optional[Decimal] = None,
        zoom: typing.Optional[Decimal] = None,
    ) -> "Document":

        destination = List().set_can_be_referenced(
            False)  # type: ignore [attr-defined]
        destination.append(Decimal(page_nr))
        destination.append(destination_type.value)
        if destination_type == DestinationType.X_Y_Z:
            assert (left is not None and bottom is None and right is None
                    and top is not None and zoom is not None)
            destination.append(Decimal(left))
            destination.append(Decimal(top))
            destination.append(Decimal(zoom))
        if destination_type == DestinationType.FIT:
            assert (left is None and bottom is None and right is None
                    and top is None and zoom is None)
        if destination_type == DestinationType.FIT_H:
            assert (left is None and bottom is None and right is None
                    and top is not None and zoom is None)
            destination.append(Decimal(top))
        if destination_type == DestinationType.FIT_V:
            assert (left is not None and bottom is None and right is None
                    and top is None and zoom is None)
            destination.append(Decimal(left))
        if destination_type == DestinationType.FIT_R:
            assert (left is not None and bottom is not None
                    and right is not None and top is not None and zoom is None)
            destination.append(Decimal(left))
            destination.append(Decimal(bottom))
            destination.append(Decimal(right))
            destination.append(Decimal(top))
        if destination_type == DestinationType.FIT_B_H:
            assert (left is None and bottom is None and right is None
                    and top is not None and zoom is None)
            destination.append(Decimal(top))
        if destination_type == DestinationType.FIT_B_V:
            assert (left is not None and bottom is None and right is None
                    and top is None and zoom is None)
            destination.append(Decimal(left))

        # add \Outlines entry in \Root
        if "Outlines" not in self["XRef"]["Trailer"]["Root"]:
            outline_dictionary: Dictionary = Dictionary()
            self["XRef"]["Trailer"]["Root"][Name(
                "Outlines")] = outline_dictionary
            outline_dictionary.set_parent(  # type: ignore [attr-defined]
                self["XRef"]["Trailer"]["Root"][Name("Outlines")])
            outline_dictionary[Name("Type")] = Name("Outlines")
            outline_dictionary[Name("Count")] = Decimal(0)

        # create entry
        outline = Dictionary()
        outline[Name("Dest")] = destination
        outline[Name("Parent")] = None
        outline[Name("Title")] = String(text)

        # get \Outlines
        outline_dictionary = self["XRef"]["Trailer"]["Root"]["Outlines"]

        # if everything is empty, add the new entry as the only entry
        if "First" not in outline_dictionary or "Last" not in outline_dictionary:
            outline_dictionary[Name("First")] = outline
            outline_dictionary[Name("Last")] = outline
            outline_dictionary[Name("Count")] = Decimal(1)
            outline[Name("Parent")] = outline_dictionary
            return self

        # helper function to make DFS easier
        def _children(x: Dictionary):
            if "First" not in x:
                return []
            children = [x["First"]]
            while children[-1] != x["Last"]:
                children.append(children[-1]["Next"])
            return children

        # DFS outline(s)
        outlines_done: typing.List[typing.Tuple[int, Dictionary]] = []
        outlines_todo: typing.List[typing.Tuple[int, Dictionary]] = [
            (-1, outline_dictionary)
        ]
        while len(outlines_todo) > 0:
            t = outlines_todo[0]
            outlines_done.append(t)
            outlines_todo.pop(0)
            for c in _children(t[1]):
                outlines_todo.append((t[0] + 1, c))

        # find parent
        parent = [x[1] for x in outlines_done if x[0] == level - 1][-1]

        # update sibling-linking
        if "Last" in parent:
            sibling = parent["Last"]
            sibling[Name("Next")] = outline

        # update parent-linking
        outline[Name("Parent")] = parent
        if "First" not in parent:
            parent[Name("First")] = outline
        if "Count" not in parent:
            parent[Name("Count")] = Decimal(0)
        parent[Name("Last")] = outline

        # update count
        outline_to_update_count = parent
        while outline_to_update_count:
            outline_to_update_count[Name("Count")] = Decimal(
                outline_to_update_count["Count"] + Decimal(1))
            if "Parent" in outline_to_update_count:
                outline_to_update_count = outline_to_update_count["Parent"]
            else:
                break

        return self
示例#8
0
    def true_type_font_from_file(path_to_font_file: Path) -> "TrueTypeFont":
        """
        This function returns the PDF TrueTypeFont object for a given TTF file
        """
        assert path_to_font_file.exists()
        assert path_to_font_file.name.endswith(".ttf")

        font_file_bytes: typing.Optional[bytes] = None
        with open(path_to_font_file, "rb") as ffh:
            font_file_bytes = ffh.read()
        assert font_file_bytes

        # read file
        ttf_font_file = TTFont(path_to_font_file)

        # build font
        font: TrueTypeFont = TrueTypeFont()
        font_name: str = str(
            [
                x for x in ttf_font_file["name"].names
                if x.platformID == 3 and x.nameID == 1
            ][0].string,
            "latin1",
        )
        font_name = "".join([
            x for x in font_name if x.lower() in "abcdefghijklmnopqrstuvwxyz"
        ])

        font[Name("Name")] = Name(font_name)
        font[Name("BaseFont")] = Name(font_name)

        cmap: typing.Optional[typing.Dict[int,
                                          str]] = ttf_font_file.getBestCmap()
        cmap_reverse: typing.Dict[str, int] = {}
        for k, v in cmap.items():
            if v in cmap_reverse:
                cmap_reverse[v] = min(cmap_reverse[v], k)
            else:
                cmap_reverse[v] = k
        glyph_order: typing.List[str] = [
            x for x in ttf_font_file.glyphOrder if x in cmap_reverse
        ]

        # build widths
        units_per_em: pDecimal = pDecimal(ttf_font_file["head"].unitsPerEm)
        if cmap is not None:
            font[Name("FirstChar")] = pDecimal(0)
            font[Name("LastChar")] = pDecimal(len(glyph_order))
            font[Name("Widths")] = List()
            for glyph_name in glyph_order:
                w: pDecimal = (
                    pDecimal(ttf_font_file.getGlyphSet()[glyph_name].width) /
                    units_per_em) * pDecimal(1000)
                w = pDecimal(round(w, 2))
                font["Widths"].append(w)

        font[Name("FontDescriptor")] = Dictionary()
        font["FontDescriptor"][Name("Type")] = Name("FontDescriptor")
        font["FontDescriptor"][Name("FontName")] = String(font_name)
        font["FontDescriptor"][Name("FontStretch")] = Name("Normal")  # TODO
        font["FontDescriptor"][Name("FontWeight")] = pDecimal(400)  # TODO
        font["FontDescriptor"][Name("Flags")] = pDecimal(4)  # TODO
        font["FontDescriptor"][Name("FontBBox")] = List(
        ).set_can_be_referenced(  # type: ignore [attr-defined]
            False)  # TODO
        for _ in range(0, 4):
            font["FontDescriptor"]["FontBBox"].append(pDecimal(0))

        # fmt: off
        font["FontDescriptor"][Name("ItalicAngle")] = pDecimal(
            ttf_font_file["post"].italicAngle)
        font["FontDescriptor"][Name("Ascent")] = pDecimal(
            pDecimal(ttf_font_file["hhea"].ascent) / units_per_em *
            Decimal(1000))
        font["FontDescriptor"][Name("Descent")] = pDecimal(
            pDecimal(ttf_font_file["hhea"].descent) / units_per_em *
            Decimal(1000))
        font["FontDescriptor"][Name("CapHeight")] = pDecimal(0)  # TODO
        font["FontDescriptor"][Name("StemV")] = pDecimal(0)  # TODO
        # fmt: on

        font[Name("Encoding")] = Dictionary()
        font["Encoding"][Name("BaseEncoding")] = Name("WinAnsiEncoding")
        font["Encoding"][Name("Differences")] = List()
        for i in range(0, len(glyph_order)):
            font["Encoding"]["Differences"].append(pDecimal(i))
            font["Encoding"]["Differences"].append(Name(glyph_order[i]))

        # embed font file
        font_stream: Stream = Stream()
        font_stream[Name("Type")] = Name("Font")
        font_stream[Name("Subtype")] = Name("TrueType")
        font_stream[Name("Length")] = pDecimal(len(font_file_bytes))
        font_stream[Name("Length1")] = pDecimal(len(font_file_bytes))
        font_stream[Name("Filter")] = Name("FlateDecode")
        font_stream[Name("DecodedBytes")] = font_file_bytes
        font_stream[Name("Bytes")] = zlib.compress(font_file_bytes, 9)

        font["FontDescriptor"][Name("FontFile2")] = font_stream

        # return
        return font
示例#9
0
    def __init__(self, graphics_state: CanvasGraphicsState, raw_bytes: String):
        assert graphics_state.font is not None
        self._glyph_line: GlyphLine = GlyphLine(
            raw_bytes.get_value_bytes(),
            graphics_state.font,
            graphics_state.font_size,
            graphics_state.character_spacing,
            graphics_state.word_spacing,
            graphics_state.horizontal_scaling,
        )
        super(ChunkOfTextRenderEvent, self).__init__(
            font=graphics_state.font,
            font_size=graphics_state.font_size,
            font_color=graphics_state.non_stroke_color,
            text=self._glyph_line.get_text(),
        )
        m = graphics_state.text_matrix.mul(graphics_state.ctm)

        # calculate baseline box
        p0 = m.cross(Decimal(0), graphics_state.text_rise, Decimal(1))
        p1 = m.cross(
            self._glyph_line.get_width_in_text_space(),
            graphics_state.text_rise
            + graphics_state.font.get_ascent() * Decimal(0.001),
            Decimal(1),
        )

        # set baseline box
        self.baseline_bounding_box = Rectangle(
            min(p0[0], p1[0]), min(p0[1], p1[1]), abs(p1[0] - p0[0]), abs(p1[1] - p0[1])
        )

        # calculate bounding box
        uses_descent = any(
            [x in self.text.lower() for x in ["y", "p", "q", "f", "g", "j"]]
        )
        if uses_descent:
            p0 = m.cross(
                Decimal(0),
                graphics_state.text_rise
                + graphics_state.font.get_descent() * Decimal(0.001),
                Decimal(1),
            )
            p1 = m.cross(
                self._glyph_line.get_width_in_text_space(),
                graphics_state.text_rise
                + graphics_state.font.get_ascent() * Decimal(0.001),
                Decimal(1),
            )
            self.set_bounding_box(
                Rectangle(
                    min(p0[0], p1[0]),
                    min(p0[1], p1[1]),
                    abs(p1[0] - p0[0]),
                    abs(p1[1] - p0[1]),
                )
            )
        else:
            self.set_bounding_box(self.baseline_bounding_box)

        # calculate space character width estimate
        current_font: Font = graphics_state.font
        self._space_character_width_estimate = (
            current_font.get_space_character_width_estimate() * graphics_state.font_size
        )
        self._font_size = graphics_state.font_size

        # store graphics state
        self._graphics_state = graphics_state
    def read_object(
        self,
        xref: Optional["XREF"] = None
    ) -> Optional[AnyPDFType]:  # type: ignore [name-defined]
        """
        This function processes the next tokens and returns an AnyPDFType.
        It fails and throws various errors if the next tokens do not represent a pdf object.
        """
        token = self.next_non_comment_token()
        if token is None or len(token.text) == 0:
            return None

        if token.token_type == TokenType.START_DICT:
            self.seek(token.byte_offset)  # go to start of dictionary
            return self.read_dictionary()

        if token.token_type == TokenType.START_ARRAY:
            self.seek(token.byte_offset)  # go to start of array
            return self.read_array()

        # <number> <number> "R"
        if token.token_type == TokenType.NUMBER:
            self.seek(token.byte_offset)  # go to start of indirect reference
            potential_indirect_reference = self.read_indirect_reference()
            if potential_indirect_reference is not None:
                return potential_indirect_reference

        # <number> <number> "obj"
        # <<dictionary>>
        # "stream"
        # <bytes>
        # "endstream"
        if token.token_type == TokenType.NUMBER:
            self.seek(token.byte_offset)
            potential_stream = self.read_stream(xref)
            if potential_stream is not None:
                return potential_stream

        # <number> <number> "obj"
        if token.token_type == TokenType.NUMBER:
            self.seek(token.byte_offset)
            potential_indirect_object = self.read_indirect_object()
            if potential_indirect_object is not None:
                return potential_indirect_object

        # numbers
        if token.token_type == TokenType.NUMBER:
            self.seek(self.tell() + len(token.text))
            return Decimal(Decimal(token.text))

        # boolean
        if token.token_type == TokenType.OTHER and token.text in [
                "true", "false"
        ]:
            return Boolean(token.text == "true")

        # canvas operators
        if (token.token_type == TokenType.OTHER
                and token.text in CanvasOperatorName.VALID_NAMES):
            return CanvasOperatorName(token.text)

        # names
        if token.token_type == TokenType.NAME:
            return Name(token.text[1:])

        # literal strings and hex strings
        if token.token_type in [TokenType.STRING, TokenType.HEX_STRING]:
            if token.token_type == TokenType.STRING:
                return String(token.text[1:-1])
            else:
                return HexadecimalString(token.text[1:-1])

        # default
        return None
示例#11
0
    def append_embedded_file(self,
                             file_name: str,
                             file_bytes: bytes,
                             apply_compression: bool = True) -> "Document":
        """
        If a PDF file contains file specifications that refer to an external file and the PDF file is archived or transmitted,
        some provision should be made to ensure that the external references will remain valid. One way to do this is to
        arrange for copies of the external files to accompany the PDF file. Embedded file streams (PDF 1.3) address
        this problem by allowing the contents of referenced files to be embedded directly within the body of the PDF
        file. This makes the PDF file a self-contained unit that can be stored or transmitted as a single entity. (The
        embedded files are included purely for convenience and need not be directly processed by any conforming reader.)
        This method embeds a file (specified by its name and bytes) into this Document
        """
        assert "XRef" in self
        assert "Trailer" in self["XRef"]
        assert "Root" in self["XRef"]["Trailer"]
        root = self["XRef"]["Trailer"]["Root"]

        # set up /Names dictionary
        if "Names" not in root:
            root[Name("Names")] = Dictionary()
        names = root["Names"]

        # set up /EmbeddedFiles
        if "EmbeddedFiles" not in names:
            names[Name("EmbeddedFiles")] = Dictionary()
            names["EmbeddedFiles"][Name("Kids")] = List()

        # find parent
        parent = names["EmbeddedFiles"]
        while "Kids" in parent:
            for k in parent["Kids"]:
                lower_limit = str(k["Limits"][0])
                upper_limit = str(k["Limits"][1])
                if lower_limit == upper_limit:
                    continue
                if lower_limit < file_name < upper_limit:
                    parent = k
                    break
            break

        # add new child
        if (len([
                x for x in parent["Kids"]
                if x["Limits"][0] == x["Limits"][1] == file_name
        ]) == 0):

            kid = Dictionary()
            kid[Name("F")] = String(file_name)
            kid[Name("Type")] = Name("Filespec")
            kid[Name("Limits")] = List()
            for _ in range(0, 2):
                kid["Limits"].append(String(file_name))

            # build leaf \Names dictionary
            names = List()
            names.append(String(file_name))
            kid[Name("Names")] = names

            # build actual file stream
            stream = Stream()
            stream[Name("Type")] = Name("EmbeddedFile")
            stream[Name("DecodedBytes")] = file_bytes
            if not apply_compression:
                stream[Name("Bytes")] = file_bytes
            else:
                stream[Name("Bytes")] = zlib.compress(
                    stream[Name("DecodedBytes")], 9)
                stream[Name("Filter")] = Name("FlateDecode")
            stream[Name("Length")] = Decimal(len(stream[Name("Bytes")]))

            # build leaf \Filespec dictionary
            file_spec = Dictionary()
            file_spec[Name("EF")] = Dictionary()
            file_spec["EF"][Name("F")] = stream
            file_spec[Name("F")] = String(file_name)
            file_spec[Name("Type")] = Name("Filespec")
            names.append(file_spec)

            # append
            parent["Kids"].append(kid)

        # change existing child
        else:
            kid = [
                x for x in parent["Kids"]
                if x["Limits"][0] == x["Limits"][1] == file_name
            ][0]
            # TODO

        # return
        return self
示例#12
0
    def add_outline(
        self,
        text: str,
        level: int,
        destination_type: DestinationType,
        page_nr: int,
        top: typing.Optional[Decimal] = None,
        right: typing.Optional[Decimal] = None,
        bottom: typing.Optional[Decimal] = None,
        left: typing.Optional[Decimal] = None,
        zoom: typing.Optional[Decimal] = None,
    ) -> "Document":
        """
        A PDF document may contain a document outline that the conforming reader may display on the screen,
        allowing the user to navigate interactively from one part of the document to another. The outline consists of a
        tree-structured hierarchy of outline items (sometimes called bookmarks), which serve as a visual table of
        contents to display the document’s structure to the user.
        This function adds an outline to this Document
        """
        destination = List().set_can_be_referenced(
            False)  # type: ignore [attr-defined]
        destination.append(Decimal(page_nr))
        destination.append(destination_type.value)
        if destination_type == DestinationType.X_Y_Z:
            assert (left is not None and bottom is None and right is None
                    and top is not None and zoom is not None)
            destination.append(Decimal(left))
            destination.append(Decimal(top))
            destination.append(Decimal(zoom))
        if destination_type == DestinationType.FIT:
            assert (left is None and bottom is None and right is None
                    and top is None and zoom is None)
        if destination_type == DestinationType.FIT_H:
            assert (left is None and bottom is None and right is None
                    and top is not None and zoom is None)
            destination.append(Decimal(top))
        if destination_type == DestinationType.FIT_V:
            assert (left is not None and bottom is None and right is None
                    and top is None and zoom is None)
            destination.append(Decimal(left))
        if destination_type == DestinationType.FIT_R:
            assert (left is not None and bottom is not None
                    and right is not None and top is not None and zoom is None)
            destination.append(Decimal(left))
            destination.append(Decimal(bottom))
            destination.append(Decimal(right))
            destination.append(Decimal(top))
        if destination_type == DestinationType.FIT_B_H:
            assert (left is None and bottom is None and right is None
                    and top is not None and zoom is None)
            destination.append(Decimal(top))
        if destination_type == DestinationType.FIT_B_V:
            assert (left is not None and bottom is None and right is None
                    and top is None and zoom is None)
            destination.append(Decimal(left))

        # add \Outlines entry in \Root
        if "Outlines" not in self["XRef"]["Trailer"]["Root"]:
            outline_dictionary: Dictionary = Dictionary()
            self["XRef"]["Trailer"]["Root"][Name(
                "Outlines")] = outline_dictionary
            outline_dictionary.set_parent(  # type: ignore [attr-defined]
                self["XRef"]["Trailer"]["Root"][Name("Outlines")])
            outline_dictionary[Name("Type")] = Name("Outlines")
            outline_dictionary[Name("Count")] = Decimal(0)

        # create entry
        outline = Dictionary()
        outline[Name("Dest")] = destination
        outline[Name("Parent")] = None
        outline[Name("Title")] = String(text)

        # get \Outlines
        outline_dictionary = self["XRef"]["Trailer"]["Root"]["Outlines"]

        # if everything is empty, add the new entry as the only entry
        if "First" not in outline_dictionary or "Last" not in outline_dictionary:
            outline_dictionary[Name("First")] = outline
            outline_dictionary[Name("Last")] = outline
            outline_dictionary[Name("Count")] = Decimal(1)
            outline[Name("Parent")] = outline_dictionary
            return self

        # helper function to make DFS easier
        def _children(x: Dictionary):
            if "First" not in x:
                return []
            children = [x["First"]]
            while children[-1] != x["Last"]:
                children.append(children[-1]["Next"])
            return children

        # DFS outline(s)
        outlines_done: typing.List[typing.Tuple[int, Dictionary]] = []
        outlines_todo: typing.List[typing.Tuple[int, Dictionary]] = [
            (-1, outline_dictionary)
        ]
        while len(outlines_todo) > 0:
            t = outlines_todo[0]
            outlines_done.append(t)
            outlines_todo.pop(0)
            for c in _children(t[1]):
                outlines_todo.append((t[0] + 1, c))

        # find parent
        parent = [x[1] for x in outlines_done if x[0] == level - 1][-1]

        # update sibling-linking
        if "Last" in parent:
            sibling = parent["Last"]
            sibling[Name("Next")] = outline

        # update parent-linking
        outline[Name("Parent")] = parent
        if "First" not in parent:
            parent[Name("First")] = outline
        if "Count" not in parent:
            parent[Name("Count")] = Decimal(0)
        parent[Name("Last")] = outline

        # update count
        outline_to_update_count = parent
        while outline_to_update_count:
            outline_to_update_count[Name("Count")] = Decimal(
                outline_to_update_count["Count"] + Decimal(1))
            if "Parent" in outline_to_update_count:
                outline_to_update_count = outline_to_update_count["Parent"]
            else:
                break

        return self
示例#13
0
    def split_on_glyphs(self) -> typing.List["ChunkOfTextRenderEvent"]:
        chunks_of_text: typing.List[ChunkOfTextRenderEvent] = []
        x: Decimal = Decimal(0)
        y: Decimal = self._graphics_state.text_rise
        for g in self._glyph_line.glyphs:
            chrs = (
                [g.unicode]
                if isinstance(g.unicode, int)
                else [g.unicode[x] for x in range(0, len(g.unicode))]
            )
            e = ChunkOfTextRenderEvent(self._graphics_state, String(" "))
            e.font_size = self.font_size
            e.font_color = self.font_color
            e.font = self.font
            e.text = g.to_unicode_string()
            e.space_character_width_estimate = self.space_character_width_estimate
            e._graphics_state = self._graphics_state
            e._glyph_line = GlyphLine([g])
            # calculate width
            width: Decimal = (
                g.width
                * Decimal(0.001)
                * self.font_size
                * self._graphics_state.horizontal_scaling
                * Decimal(0.01)
                + (
                    self._graphics_state.word_spacing
                    if g.to_unicode_string() == " "
                    else Decimal(0)
                )
                + self._graphics_state.character_spacing
            )

            # set baseline bounding box
            m = self._graphics_state.text_matrix.mul(self._graphics_state.ctm)
            p0 = m.cross(x, y, Decimal(1))
            p1 = m.cross(
                x + width,
                y + self._graphics_state.font.get_ascent() * Decimal(0.001),
                Decimal(1),
            )
            e.baseline_bounding_box = Rectangle(
                p0[0], p0[1], p1[0] - p0[0], p1[1] - p0[1]
            )
            e.bounding_box = e.baseline_bounding_box

            # change bounding box (descent)
            uses_descent = g.to_unicode_string().lower() in [
                "y",
                "p",
                "q",
                "f",
                "g",
                "j",
            ]
            if uses_descent:
                p0 = m.cross(
                    x,
                    y + self._graphics_state.font.get_descent() * Decimal(0.001),
                    Decimal(1),
                )
                p1 = m.cross(
                    x + width,
                    y + self._graphics_state.font.get_ascent() * Decimal(0.001),
                    Decimal(1),
                )
                e.bounding_box = Rectangle(
                    min(p0[0], p1[0]),
                    min(p0[1], p1[1]),
                    abs(p1[0] - p0[0]),
                    abs(p1[1] - p0[1]),
                )

            # update x
            x += width

            # append
            chunks_of_text.append(e)

        return chunks_of_text
示例#14
0
    def _read_file(input: typing.TextIO) -> Optional[Font]:
        lines: typing.List[str] = [x for x in input.readlines()]
        lines = [x for x in lines if not x.startswith("Comment")]
        lines = [x[:-1] if x.endswith("\n") else x for x in lines]

        # check first/last line
        if not lines[0].startswith("StartFontMetrics") or not lines[-1].startswith(
            "EndFontMetrics"
        ):
            return None

        out_font = Font()

        # FontDescriptor
        out_font_descriptor = FontDescriptor().set_parent(out_font)  # type: ignore [attr-defined]
        font_name = AdobeFontMetrics._find_and_parse_as_string(lines, "FontName")
        if font_name:
            out_font_descriptor[Name("FontName")] = Name(font_name)
        font_family = AdobeFontMetrics._find_and_parse_as_string(lines, "FamilyName")
        if font_family:
            out_font_descriptor[Name("FontFamily")] = String(font_family)

        # FontStretch

        # FontWeight

        # Flags

        # FontBBox
        fontbbox_str = AdobeFontMetrics._find_and_parse_as_string(lines, "FontBBox")
        if fontbbox_str:
            fontbbox = [Decimal(x) for x in fontbbox_str.split(" ")]
            out_font_descriptor[Name("FontBBox")] = List().set_can_be_referenced(False)  # type: ignore [attr-defined]
            for x in fontbbox:
                out_font_descriptor[Name("FontBBox")].append(x)

        # ItalicAngle
        italic_angle = AdobeFontMetrics._find_and_parse_as_float(lines, "ItalicAngle")
        if italic_angle:
            out_font_descriptor[Name("ItalicAngle")] = Decimal(italic_angle)
        else:
            out_font_descriptor[Name("ItalicAngle")] = Decimal(0)

        # Ascent
        ascent = AdobeFontMetrics._find_and_parse_as_float(lines, "Ascender")
        if ascent:
            out_font_descriptor[Name("Ascent")] = Decimal(ascent)
        else:
            out_font_descriptor[Name("Ascent")] = Decimal(0)

        # Descent
        descent = AdobeFontMetrics._find_and_parse_as_float(lines, "Descender")
        if descent:
            out_font_descriptor[Name("Descent")] = Decimal(descent)
        else:
            out_font_descriptor[Name("Descent")] = Decimal(0)

        # Flags
        out_font_descriptor[Name("Flags")] = Decimal(131104)

        # Leading

        # CapHeight
        capheight = AdobeFontMetrics._find_and_parse_as_float(lines, "CapHeight")
        if capheight:
            out_font_descriptor[Name("CapHeight")] = Decimal(capheight)
        else:
            out_font_descriptor[Name("CapHeight")] = Decimal(0)

        # XHeight
        xheight = AdobeFontMetrics._find_and_parse_as_float(lines, "XHeight")
        if xheight:
            out_font_descriptor[Name("XHeight")] = Decimal(xheight)

        # StemV
        stemv = AdobeFontMetrics._find_and_parse_as_float(lines, "StemV")
        if stemv:
            assert stemv is not None
            out_font_descriptor[Name("StemV")] = Decimal(stemv)
        else:
            out_font_descriptor[Name("StemV")] = Decimal(0)
        # StemH
        stemh = AdobeFontMetrics._find_and_parse_as_float(lines, "StemH")
        if stemh:
            assert stemh is not None
            out_font_descriptor[Name("StemH")] = Decimal(stemh)
        else:
            out_font_descriptor[Name("StemH")] = Decimal(0)

        # AvgWidth
        avgwidth = AdobeFontMetrics._find_and_parse_as_float(lines, "AvgWidth")
        if avgwidth:
            assert avgwidth is not None
            out_font_descriptor[Name("AvgWidth")] = Decimal(avgwidth)

        # MaxWidth
        maxwidth = AdobeFontMetrics._find_and_parse_as_float(lines, "MaxWidth")
        if maxwidth:
            assert maxwidth is not None
            out_font_descriptor[Name("MaxWidth")] = Decimal(maxwidth)

        # MissingWidth
        missingwidth = AdobeFontMetrics._find_and_parse_as_float(lines, "MissingWidth")
        if missingwidth:
            assert missingwidth is not None
            out_font_descriptor[Name("MissingWidth")] = Decimal(missingwidth)

        # CharSet
        charset = AdobeFontMetrics._find_and_parse_as_float(lines, "CharSet")
        if charset:
            assert charset is not None
            out_font_descriptor[Name("CharSet")] = Decimal(charset)

        # Font
        out_font[Name("Type")] = Name("Font")
        out_font[Name("Subtype")] = Name("Type1")
        out_font[Name("Name")] = out_font_descriptor["FontName"]
        out_font[Name("BaseFont")] = out_font_descriptor["FontName"]

        widths = List().set_parent(out_font)  # type: ignore [attr-defined]
        avg_char_width: float = 0
        avg_char_width_norm: float = 0
        first_char = None
        last_char = None

        char_metrics_lines = lines[
            lines.index(
                [x for x in lines if x.startswith("StartCharMetrics")][0]
            ) : lines.index("EndCharMetrics")
            + 1
        ]
        char_metrics_lines = char_metrics_lines[1:-1]
        for cml in char_metrics_lines:
            tmp = {
                y.split(" ")[0]: y.split(" ")[1]
                for y in [x.strip() for x in cml.split(";")]
                if " " in y
            }

            # determine char
            ch = -1
            if "C" in tmp:
                ch = int(tmp["C"])
            if "CH" in tmp:
                ch = int(tmp["CH"][1:-1], 16)

            if (first_char is None or ch < first_char) and ch != -1:
                first_char = ch
            if (last_char is None or ch > last_char) and ch != -1:
                last_char = ch

            w = float(tmp["WX"])
            if ch != -1 and w != 0:
                avg_char_width += w
                avg_char_width_norm += 1

            widths.append(Decimal(w))

        assert first_char is not None
        assert last_char is not None

        out_font[Name("FirstChar")] = Decimal(first_char)
        out_font[Name("LastChar")] = Decimal(last_char)
        out_font[Name("Widths")] = widths

        if avgwidth is None:
            out_font_descriptor[Name("AvgWidth")] = Decimal(
                round(Decimal(avg_char_width / avg_char_width_norm), 2)
            )
        if maxwidth is None:
            out_font_descriptor[Name("MaxWidth")] = Decimal(max(widths))
        out_font[Name("FontDescriptor")] = out_font_descriptor

        # return
        return out_font