def read_array(self) -> List:
        """
        This function processes the next tokens and returns a List.
        It fails and throws various errors if the next tokens do not represent a List.
        """
        token = self.next_non_comment_token()
        assert token is not None
        assert token.token_type == TokenType.START_ARRAY
        out = List()

        while True:
            token = self.next_non_comment_token()
            assert token is not None
            if token.token_type == TokenType.END_ARRAY:
                break
            assert token.token_type != TokenType.END_DICT

            # go back
            self.seek(token.byte_offset)

            # read
            obj = self.read_object()

            # append
            out.append(obj)

        # return
        return out
    def transform(
        self,
        object_to_transform: Any,
        context: Optional[WriteTransformerContext] = None,
    ):
        """
        This method writes a Document object to a byte stream
        """
        # write header
        assert context is not None
        assert context.destination is not None

        context.destination.write(b"%PDF-1.7\n")
        context.destination.write(b"%")
        context.destination.write(bytes([226, 227, 207, 211]))
        context.destination.write(b"\n")

        # invalidate all references
        WritePDFTransformer._invalidate_all_references(object_to_transform)

        # create Info dictionary if needed
        if "Info" not in object_to_transform["XRef"]["Trailer"]:
            object_to_transform["XRef"]["Trailer"][Name("Info")] = Dictionary()

        # set /ID
        random_id = HexadecimalString("%032x" % random.randrange(16**32))
        if "ID" not in object_to_transform["XRef"]["Trailer"]:
            object_to_transform["XRef"]["Trailer"][Name("ID")] = List(
            ).set_can_be_referenced(  # type: ignore [attr-defined]
                False)
            object_to_transform["XRef"]["Trailer"]["ID"].append(random_id)
            object_to_transform["XRef"]["Trailer"]["ID"].append(random_id)
        else:
            object_to_transform["XRef"]["Trailer"]["ID"][1] = random_id
        object_to_transform["XRef"]["Trailer"]["ID"].set_can_be_referenced(
            False)

        # set CreationDate
        modification_date = WritePDFTransformer._timestamp_to_str()
        if "CreationDate" not in object_to_transform["XRef"]["Trailer"][Name(
                "Info")]:
            object_to_transform["XRef"]["Trailer"][Name("Info")][Name(
                "CreationDate")] = String(modification_date)

        # set ModDate
        object_to_transform["XRef"]["Trailer"]["Info"][Name(
            "ModDate")] = String(modification_date)

        # set Producer
        object_to_transform["XRef"]["Trailer"]["Info"][Name(
            "Producer")] = String("pText")

        # transform XREF
        self.get_root_transformer().transform(object_to_transform["XRef"],
                                              context)
Exemplo n.º 3
0
    def test_hash_types(self):

        obj0 = Dictionary()
        obj0[Name("Root")] = Reference(object_number=10)
        obj0[Name("Marked")] = Boolean(True)

        obj1 = List()
        obj1.append(Name("Red"))
        obj1.append(Decimal(0.5))

        print(hash(obj1))
Exemplo n.º 4
0
 def insert_page(
     self,
     page: Page,
     index: typing.Optional[int] = None
 ) -> "Document":  # type: ignore [name-defined]
     """
     This method appends a page (from another Document) to this Document at a given index
     """
     # build XRef
     if "XRef" not in self:
         self[Name("XRef")] = PlainTextXREF()
         self[Name("XRef")].set_parent(self)
     # build Trailer
     if "Trailer" not in self["XRef"]:
         self["XRef"][Name("Trailer")] = Dictionary()
         self["XRef"][Name("Size")] = Decimal(0)
         self["XRef"]["Trailer"].set_parent(self["XRef"])
     # build Root
     if "Root" not in self["XRef"]["Trailer"]:
         self["XRef"]["Trailer"][Name("Root")] = Dictionary()
         self["XRef"]["Trailer"]["Root"].set_parent(self["XRef"]["Trailer"])
     # build Pages
     if "Pages" not in self["XRef"]["Trailer"]["Root"]:
         self["XRef"]["Trailer"][Name("Root")][Name("Pages")] = Dictionary()
         self["XRef"]["Trailer"][Name("Root")][Name("Pages")][Name(
             "Count")] = Decimal(0)
         self["XRef"]["Trailer"][Name("Root")][Name("Pages")][Name(
             "Kids")] = List()
         self["XRef"]["Trailer"][Name("Root")][Name("Pages")][Name(
             "Type")] = Name("Pages")
         self["XRef"]["Trailer"]["Root"]["Pages"].set_parent(
             self["XRef"]["Trailer"]["Root"])
         self["XRef"]["Trailer"]["Root"]["Pages"]["Kids"].set_parent(
             self["XRef"]["Trailer"]["Root"]["Pages"])
     # update /Kids
     kids = self["XRef"]["Trailer"]["Root"]["Pages"]["Kids"]
     assert kids is not None
     assert isinstance(kids, List)
     if index is None:
         index = len(kids)
     kids.insert(index, page)
     # update /Count
     prev_count = self["XRef"]["Trailer"]["Root"]["Pages"]["Count"]
     self["XRef"]["Trailer"]["Root"]["Pages"][Name("Count")] = Decimal(
         prev_count + 1)
     # set /Parent
     page[Name("Parent")] = self["XRef"]["Trailer"]["Root"]["Pages"]
     page.set_parent(kids)  # type: ignore [attr-defined]
     # return
     return self
Exemplo n.º 5
0
 def _copy_font_descriptor(self, font_descriptor_to_copy: Dictionary) -> Dictionary:
     f0: Dictionary = font_descriptor_to_copy
     f1: Dictionary = self["FontDescriptor"]
     f1[Name("Type")] = f0["Type"]
     f1[Name("FontName")] = f0["FontName"]
     if "FontFamily" in f0:
         f1[Name("FontFamily")] = f0["FontFamily"]
     if "FontStretch" in f0:
         f1[Name("FontStretch")] = f0["FontStretch"]
     if "FontWeight" in f0:
         f1[Name("FontWeight")] = f0["FontWeight"]
     f1[Name("Flags")] = f0["Flags"]
     if "FontBBox" in f0 and False:  # TODO
         f1[Name("FontBBox")] = List().set_can_be_referenced(False)  # type: ignore [attr-defined]
         for i in range(0, len(f0["FontBBox"])):
             f1["FontBBox"].append(f0["FontBBox"][i])
     f1[Name("ItalicAngle")] = f0["ItalicAngle"]
     if "Ascent" in f0:
         f1[Name("Ascent")] = f0["Ascent"]
     if "Descent" in f0:
         f1[Name("Descent")] = f0["Descent"]
     if "Leading" in f0:
         f1[Name("Leading")] = f0["Leading"]
     if "CapHeight" in f0:
         f1[Name("CapHeight")] = f0["CapHeight"]
     if "XHeight" in f0:
         f1[Name("XHeight")] = f0["XHeight"]
     if "StemV" in f0:
         f1[Name("StemV")] = f0["StemV"]
     if "StemH" in f0:
         f1[Name("StemH")] = f0["StemH"]
     if "AvgWidth" in f0:
         f1[Name("AvgWidth")] = f0["AvgWidth"]
     if "MaxWidth" in f0:
         f1[Name("MaxWidth")] = f0["MaxWidth"]
     if "MissingWidth" in f0:
         f1[Name("MissingWidth")] = f0["MissingWidth"]
     if "FontFile" in f0 and False:      # TODO
         f1[Name("FontFile")] = copy.deepcopy(f0["FontFile"])
     if "FontFile2" in f0 and False:     # TODO
         f1[Name("FontFile2")] = copy.deepcopy(f0["FontFile2"])
     if "FontFile3" in f0 and False:     # TODO
         f1[Name("FontFile3")] = copy.deepcopy(f0["FontFile3"])
     if "CharSet" in f0 and False:       # TODO
         f1[Name("CharSet")] = f0["CharSet"]
     # default
     for k,v in f0.items():
         if k not in f1:
             f1[k] = copy.deepcopy(v)
     return f1
Exemplo n.º 6
0
    def add_outline(
        self,
        text: str,
        level: int,
        destination_type: DestinationType,
        page_nr: int,
        top: typing.Optional[Decimal] = None,
        right: typing.Optional[Decimal] = None,
        bottom: typing.Optional[Decimal] = None,
        left: typing.Optional[Decimal] = None,
        zoom: typing.Optional[Decimal] = None,
    ) -> "Document":

        destination = List().set_can_be_referenced(
            False)  # type: ignore [attr-defined]
        destination.append(Decimal(page_nr))
        destination.append(destination_type.value)
        if destination_type == DestinationType.X_Y_Z:
            assert (left is not None and bottom is None and right is None
                    and top is not None and zoom is not None)
            destination.append(Decimal(left))
            destination.append(Decimal(top))
            destination.append(Decimal(zoom))
        if destination_type == DestinationType.FIT:
            assert (left is None and bottom is None and right is None
                    and top is None and zoom is None)
        if destination_type == DestinationType.FIT_H:
            assert (left is None and bottom is None and right is None
                    and top is not None and zoom is None)
            destination.append(Decimal(top))
        if destination_type == DestinationType.FIT_V:
            assert (left is not None and bottom is None and right is None
                    and top is None and zoom is None)
            destination.append(Decimal(left))
        if destination_type == DestinationType.FIT_R:
            assert (left is not None and bottom is not None
                    and right is not None and top is not None and zoom is None)
            destination.append(Decimal(left))
            destination.append(Decimal(bottom))
            destination.append(Decimal(right))
            destination.append(Decimal(top))
        if destination_type == DestinationType.FIT_B_H:
            assert (left is None and bottom is None and right is None
                    and top is not None and zoom is None)
            destination.append(Decimal(top))
        if destination_type == DestinationType.FIT_B_V:
            assert (left is not None and bottom is None and right is None
                    and top is None and zoom is None)
            destination.append(Decimal(left))

        # add \Outlines entry in \Root
        if "Outlines" not in self["XRef"]["Trailer"]["Root"]:
            outline_dictionary: Dictionary = Dictionary()
            self["XRef"]["Trailer"]["Root"][Name(
                "Outlines")] = outline_dictionary
            outline_dictionary.set_parent(  # type: ignore [attr-defined]
                self["XRef"]["Trailer"]["Root"][Name("Outlines")])
            outline_dictionary[Name("Type")] = Name("Outlines")
            outline_dictionary[Name("Count")] = Decimal(0)

        # create entry
        outline = Dictionary()
        outline[Name("Dest")] = destination
        outline[Name("Parent")] = None
        outline[Name("Title")] = String(text)

        # get \Outlines
        outline_dictionary = self["XRef"]["Trailer"]["Root"]["Outlines"]

        # if everything is empty, add the new entry as the only entry
        if "First" not in outline_dictionary or "Last" not in outline_dictionary:
            outline_dictionary[Name("First")] = outline
            outline_dictionary[Name("Last")] = outline
            outline_dictionary[Name("Count")] = Decimal(1)
            outline[Name("Parent")] = outline_dictionary
            return self

        # helper function to make DFS easier
        def _children(x: Dictionary):
            if "First" not in x:
                return []
            children = [x["First"]]
            while children[-1] != x["Last"]:
                children.append(children[-1]["Next"])
            return children

        # DFS outline(s)
        outlines_done: typing.List[typing.Tuple[int, Dictionary]] = []
        outlines_todo: typing.List[typing.Tuple[int, Dictionary]] = [
            (-1, outline_dictionary)
        ]
        while len(outlines_todo) > 0:
            t = outlines_todo[0]
            outlines_done.append(t)
            outlines_todo.pop(0)
            for c in _children(t[1]):
                outlines_todo.append((t[0] + 1, c))

        # find parent
        parent = [x[1] for x in outlines_done if x[0] == level - 1][-1]

        # update sibling-linking
        if "Last" in parent:
            sibling = parent["Last"]
            sibling[Name("Next")] = outline

        # update parent-linking
        outline[Name("Parent")] = parent
        if "First" not in parent:
            parent[Name("First")] = outline
        if "Count" not in parent:
            parent[Name("Count")] = Decimal(0)
        parent[Name("Last")] = outline

        # update count
        outline_to_update_count = parent
        while outline_to_update_count:
            outline_to_update_count[Name("Count")] = Decimal(
                outline_to_update_count["Count"] + Decimal(1))
            if "Parent" in outline_to_update_count:
                outline_to_update_count = outline_to_update_count["Parent"]
            else:
                break

        return self
Exemplo n.º 7
0
    def true_type_font_from_file(path_to_font_file: Path) -> "TrueTypeFont":
        """
        This function returns the PDF TrueTypeFont object for a given TTF file
        """
        assert path_to_font_file.exists()
        assert path_to_font_file.name.endswith(".ttf")

        font_file_bytes: typing.Optional[bytes] = None
        with open(path_to_font_file, "rb") as ffh:
            font_file_bytes = ffh.read()
        assert font_file_bytes

        # read file
        ttf_font_file = TTFont(path_to_font_file)

        # build font
        font: TrueTypeFont = TrueTypeFont()
        font_name: str = str(
            [
                x for x in ttf_font_file["name"].names
                if x.platformID == 3 and x.nameID == 1
            ][0].string,
            "latin1",
        )
        font_name = "".join([
            x for x in font_name if x.lower() in "abcdefghijklmnopqrstuvwxyz"
        ])

        font[Name("Name")] = Name(font_name)
        font[Name("BaseFont")] = Name(font_name)

        cmap: typing.Optional[typing.Dict[int,
                                          str]] = ttf_font_file.getBestCmap()
        cmap_reverse: typing.Dict[str, int] = {}
        for k, v in cmap.items():
            if v in cmap_reverse:
                cmap_reverse[v] = min(cmap_reverse[v], k)
            else:
                cmap_reverse[v] = k
        glyph_order: typing.List[str] = [
            x for x in ttf_font_file.glyphOrder if x in cmap_reverse
        ]

        # build widths
        units_per_em: pDecimal = pDecimal(ttf_font_file["head"].unitsPerEm)
        if cmap is not None:
            font[Name("FirstChar")] = pDecimal(0)
            font[Name("LastChar")] = pDecimal(len(glyph_order))
            font[Name("Widths")] = List()
            for glyph_name in glyph_order:
                w: pDecimal = (
                    pDecimal(ttf_font_file.getGlyphSet()[glyph_name].width) /
                    units_per_em) * pDecimal(1000)
                w = pDecimal(round(w, 2))
                font["Widths"].append(w)

        font[Name("FontDescriptor")] = Dictionary()
        font["FontDescriptor"][Name("Type")] = Name("FontDescriptor")
        font["FontDescriptor"][Name("FontName")] = String(font_name)
        font["FontDescriptor"][Name("FontStretch")] = Name("Normal")  # TODO
        font["FontDescriptor"][Name("FontWeight")] = pDecimal(400)  # TODO
        font["FontDescriptor"][Name("Flags")] = pDecimal(4)  # TODO
        font["FontDescriptor"][Name("FontBBox")] = List(
        ).set_can_be_referenced(  # type: ignore [attr-defined]
            False)  # TODO
        for _ in range(0, 4):
            font["FontDescriptor"]["FontBBox"].append(pDecimal(0))

        # fmt: off
        font["FontDescriptor"][Name("ItalicAngle")] = pDecimal(
            ttf_font_file["post"].italicAngle)
        font["FontDescriptor"][Name("Ascent")] = pDecimal(
            pDecimal(ttf_font_file["hhea"].ascent) / units_per_em *
            Decimal(1000))
        font["FontDescriptor"][Name("Descent")] = pDecimal(
            pDecimal(ttf_font_file["hhea"].descent) / units_per_em *
            Decimal(1000))
        font["FontDescriptor"][Name("CapHeight")] = pDecimal(0)  # TODO
        font["FontDescriptor"][Name("StemV")] = pDecimal(0)  # TODO
        # fmt: on

        font[Name("Encoding")] = Dictionary()
        font["Encoding"][Name("BaseEncoding")] = Name("WinAnsiEncoding")
        font["Encoding"][Name("Differences")] = List()
        for i in range(0, len(glyph_order)):
            font["Encoding"]["Differences"].append(pDecimal(i))
            font["Encoding"]["Differences"].append(Name(glyph_order[i]))

        # embed font file
        font_stream: Stream = Stream()
        font_stream[Name("Type")] = Name("Font")
        font_stream[Name("Subtype")] = Name("TrueType")
        font_stream[Name("Length")] = pDecimal(len(font_file_bytes))
        font_stream[Name("Length1")] = pDecimal(len(font_file_bytes))
        font_stream[Name("Filter")] = Name("FlateDecode")
        font_stream[Name("DecodedBytes")] = font_file_bytes
        font_stream[Name("Bytes")] = zlib.compress(font_file_bytes, 9)

        font["FontDescriptor"][Name("FontFile2")] = font_stream

        # return
        return font
Exemplo n.º 8
0
    def transform(
        self,
        object_to_transform: AnyPDFType,
        context: Optional[WriteTransformerContext] = None,
    ):
        """
        This method writes a List to a byte stream
        """
        assert isinstance(object_to_transform, List)
        assert context is not None
        assert context.destination is not None
        assert context.destination

        # avoid resolving objects twice
        object_ref: typing.Optional[
            Reference] = object_to_transform.get_reference(
            )  # type: ignore [attr-defined]
        if object_ref is not None and object_ref in context.resolved_references:
            assert object_ref is not None
            assert object_ref.object_number is not None
            logger.debug(
                "skip writing object %d %d R (already resolved)" %
                (object_ref.object_number, object_ref.generation_number or 0))
            return

        # output value
        out_value = List()

        # objects to turn into reference
        queue: typing.List[AnyPDFType] = []
        for v in object_to_transform:
            if (isinstance(v, Dictionary) or isinstance(v, List)
                    or isinstance(v, Stream) or isinstance(v, Image)
                ) and v.can_be_referenced():  # type: ignore [union-attr]
                out_value.append(self.get_reference(v, context))
                queue.append(v)
            else:
                out_value.append(v)

        # start object if needed
        started_object = False
        if object_ref is not None:
            assert object_ref.object_number is not None
            if object_ref.object_number is not None and object_ref.byte_offset is None:
                started_object = True
                self.start_object(object_to_transform, context)
            context.resolved_references.append(object_ref)

        # write dictionary at current location
        context.destination.write(bytes("[", "latin1"))
        N = len(out_value)
        for i, v in enumerate(out_value):
            self.get_root_transformer().transform(v, context)
            if i != N - 1:
                context.destination.write(bytes(" ", "latin1"))
        context.destination.write(bytes("]\n", "latin1"))

        # end object if needed
        if started_object:
            self.end_object(object_to_transform, context)

        for e in queue:
            self.get_root_transformer().transform(e, context)

        # return
        return out_value
Exemplo n.º 9
0
 def __deepcopy__(self, memodict={}) -> "Font":
     out: Font = self._empty_copy()
     # Type
     out[Name("Type")] = Name("Font")
     # BaseFont
     out[Name("BaseFont")] = Name(str(self["BaseFont"]))
     # FirstChar
     if "FirstChar" in self:
         out[Name("FirstChar")] = self["FirstChar"]
     # LastChar
     if "LastChar" in self:
         out[Name("LastChar")] = self["LastChar"]
     # Widths
     if "Widths" in self:
         out[Name("Widths")] = List()
         for k in self["Widths"]:
             out[Name("Widths")].append(k)
     # FontDescriptor
     if "FontDescriptor" in self:
         out[Name("FontDescriptor")] = self._copy_font_descriptor(self["FontDescriptor"])
     # Encoding
     if "Encoding" in self:
         # Name
         if isinstance(self["Encoding"], Name):
             out[Name("Encoding")] = Name(str(self["Encoding"]))
         # Dictionary
         if isinstance(self["Encoding"], Dictionary):
             out[Name("Encoding")] = Dictionary()
             out["Encoding"][Name("Type")] = Name("Encoding")
             if "BaseEncoding" in self["Encoding"]:
                 out["Encoding"][Name("BaseEncoding")] = Name(
                     str(self["Encoding"]["BaseEncoding"])
                 )
             if "Differences" in self["Encoding"]:
                 l = List()
                 for x in self["Encoding"]["Differences"]:
                     l.append(x)
                 out["Encoding"][Name("Differences")] = l
     # ToUnicode
     if "ToUnicode" in self:
         out[Name("ToUnicode")] = copy.deepcopy(self["ToUnicode"])
     # FontBBox
     if "FontBBox" in self:
         out[Name("FontBBox")] = List()
         for x in self["FontBBox"]:
             out["FontBBox"].append(x)
     # FontMatrix
     if "FontMatrix" in self:
         out[Name("FontMatrix")] = List()
         for x in self["FontMatrix"]:
             out["FontMatrix"].append(x)
     # CharProcs
     # Resources
     # CIDSystemInfo
     if "CIDSystemInfo" in self:
         out[Name("CIDSystemInfo")] = Dictionary()
         out["CIDSystemInfo"][Name("Registry")] = self["CIDSystemInfo"]["Registry"]
         out["CIDSystemInfo"][Name("Ordering")] = self["CIDSystemInfo"]["Ordering"]
         out["CIDSystemInfo"][Name("Supplement")] = self["CIDSystemInfo"][
             "Supplement"
         ]
     # DW
     if "DW" in self:
         out[Name("DW")] = self["DW"]
     # W
     if "W" in self:
         out[Name("W")] = List()
         for x in self["W"]:
             if isinstance(x, pDecimal):
                 out["W"].append(x)
             if isinstance(x, List):
                 l = List()
                 for y in x:
                     l.append(y)
                 out["W"].append(l)
     # DescendantFonts
     if "DescendantFonts" in self:
         out[Name("DescendantFonts")] = List()
         out["DescendantFonts"].append(
             self["DescendantFonts"][0].__deepcopy__(memodict)
         )
     # DW2
     if "DW2" in self:
         out[Name("DW2")] = List()
         for x in self["DW2"]:
             out["DW2"].append(x)
     # W2
     # CIDToGIDMap
     # default
     for k,v in self.items():
         if k not in out:
             out[k] = copy.deepcopy(v, memodict)
     return out
Exemplo n.º 10
0
    def append_embedded_file(self,
                             file_name: str,
                             file_bytes: bytes,
                             apply_compression: bool = True) -> "Document":
        """
        If a PDF file contains file specifications that refer to an external file and the PDF file is archived or transmitted,
        some provision should be made to ensure that the external references will remain valid. One way to do this is to
        arrange for copies of the external files to accompany the PDF file. Embedded file streams (PDF 1.3) address
        this problem by allowing the contents of referenced files to be embedded directly within the body of the PDF
        file. This makes the PDF file a self-contained unit that can be stored or transmitted as a single entity. (The
        embedded files are included purely for convenience and need not be directly processed by any conforming reader.)
        This method embeds a file (specified by its name and bytes) into this Document
        """
        assert "XRef" in self
        assert "Trailer" in self["XRef"]
        assert "Root" in self["XRef"]["Trailer"]
        root = self["XRef"]["Trailer"]["Root"]

        # set up /Names dictionary
        if "Names" not in root:
            root[Name("Names")] = Dictionary()
        names = root["Names"]

        # set up /EmbeddedFiles
        if "EmbeddedFiles" not in names:
            names[Name("EmbeddedFiles")] = Dictionary()
            names["EmbeddedFiles"][Name("Kids")] = List()

        # find parent
        parent = names["EmbeddedFiles"]
        while "Kids" in parent:
            for k in parent["Kids"]:
                lower_limit = str(k["Limits"][0])
                upper_limit = str(k["Limits"][1])
                if lower_limit == upper_limit:
                    continue
                if lower_limit < file_name < upper_limit:
                    parent = k
                    break
            break

        # add new child
        if (len([
                x for x in parent["Kids"]
                if x["Limits"][0] == x["Limits"][1] == file_name
        ]) == 0):

            kid = Dictionary()
            kid[Name("F")] = String(file_name)
            kid[Name("Type")] = Name("Filespec")
            kid[Name("Limits")] = List()
            for _ in range(0, 2):
                kid["Limits"].append(String(file_name))

            # build leaf \Names dictionary
            names = List()
            names.append(String(file_name))
            kid[Name("Names")] = names

            # build actual file stream
            stream = Stream()
            stream[Name("Type")] = Name("EmbeddedFile")
            stream[Name("DecodedBytes")] = file_bytes
            if not apply_compression:
                stream[Name("Bytes")] = file_bytes
            else:
                stream[Name("Bytes")] = zlib.compress(
                    stream[Name("DecodedBytes")], 9)
                stream[Name("Filter")] = Name("FlateDecode")
            stream[Name("Length")] = Decimal(len(stream[Name("Bytes")]))

            # build leaf \Filespec dictionary
            file_spec = Dictionary()
            file_spec[Name("EF")] = Dictionary()
            file_spec["EF"][Name("F")] = stream
            file_spec[Name("F")] = String(file_name)
            file_spec[Name("Type")] = Name("Filespec")
            names.append(file_spec)

            # append
            parent["Kids"].append(kid)

        # change existing child
        else:
            kid = [
                x for x in parent["Kids"]
                if x["Limits"][0] == x["Limits"][1] == file_name
            ][0]
            # TODO

        # return
        return self
Exemplo n.º 11
0
    def add_outline(
        self,
        text: str,
        level: int,
        destination_type: DestinationType,
        page_nr: int,
        top: typing.Optional[Decimal] = None,
        right: typing.Optional[Decimal] = None,
        bottom: typing.Optional[Decimal] = None,
        left: typing.Optional[Decimal] = None,
        zoom: typing.Optional[Decimal] = None,
    ) -> "Document":
        """
        A PDF document may contain a document outline that the conforming reader may display on the screen,
        allowing the user to navigate interactively from one part of the document to another. The outline consists of a
        tree-structured hierarchy of outline items (sometimes called bookmarks), which serve as a visual table of
        contents to display the document’s structure to the user.
        This function adds an outline to this Document
        """
        destination = List().set_can_be_referenced(
            False)  # type: ignore [attr-defined]
        destination.append(Decimal(page_nr))
        destination.append(destination_type.value)
        if destination_type == DestinationType.X_Y_Z:
            assert (left is not None and bottom is None and right is None
                    and top is not None and zoom is not None)
            destination.append(Decimal(left))
            destination.append(Decimal(top))
            destination.append(Decimal(zoom))
        if destination_type == DestinationType.FIT:
            assert (left is None and bottom is None and right is None
                    and top is None and zoom is None)
        if destination_type == DestinationType.FIT_H:
            assert (left is None and bottom is None and right is None
                    and top is not None and zoom is None)
            destination.append(Decimal(top))
        if destination_type == DestinationType.FIT_V:
            assert (left is not None and bottom is None and right is None
                    and top is None and zoom is None)
            destination.append(Decimal(left))
        if destination_type == DestinationType.FIT_R:
            assert (left is not None and bottom is not None
                    and right is not None and top is not None and zoom is None)
            destination.append(Decimal(left))
            destination.append(Decimal(bottom))
            destination.append(Decimal(right))
            destination.append(Decimal(top))
        if destination_type == DestinationType.FIT_B_H:
            assert (left is None and bottom is None and right is None
                    and top is not None and zoom is None)
            destination.append(Decimal(top))
        if destination_type == DestinationType.FIT_B_V:
            assert (left is not None and bottom is None and right is None
                    and top is None and zoom is None)
            destination.append(Decimal(left))

        # add \Outlines entry in \Root
        if "Outlines" not in self["XRef"]["Trailer"]["Root"]:
            outline_dictionary: Dictionary = Dictionary()
            self["XRef"]["Trailer"]["Root"][Name(
                "Outlines")] = outline_dictionary
            outline_dictionary.set_parent(  # type: ignore [attr-defined]
                self["XRef"]["Trailer"]["Root"][Name("Outlines")])
            outline_dictionary[Name("Type")] = Name("Outlines")
            outline_dictionary[Name("Count")] = Decimal(0)

        # create entry
        outline = Dictionary()
        outline[Name("Dest")] = destination
        outline[Name("Parent")] = None
        outline[Name("Title")] = String(text)

        # get \Outlines
        outline_dictionary = self["XRef"]["Trailer"]["Root"]["Outlines"]

        # if everything is empty, add the new entry as the only entry
        if "First" not in outline_dictionary or "Last" not in outline_dictionary:
            outline_dictionary[Name("First")] = outline
            outline_dictionary[Name("Last")] = outline
            outline_dictionary[Name("Count")] = Decimal(1)
            outline[Name("Parent")] = outline_dictionary
            return self

        # helper function to make DFS easier
        def _children(x: Dictionary):
            if "First" not in x:
                return []
            children = [x["First"]]
            while children[-1] != x["Last"]:
                children.append(children[-1]["Next"])
            return children

        # DFS outline(s)
        outlines_done: typing.List[typing.Tuple[int, Dictionary]] = []
        outlines_todo: typing.List[typing.Tuple[int, Dictionary]] = [
            (-1, outline_dictionary)
        ]
        while len(outlines_todo) > 0:
            t = outlines_todo[0]
            outlines_done.append(t)
            outlines_todo.pop(0)
            for c in _children(t[1]):
                outlines_todo.append((t[0] + 1, c))

        # find parent
        parent = [x[1] for x in outlines_done if x[0] == level - 1][-1]

        # update sibling-linking
        if "Last" in parent:
            sibling = parent["Last"]
            sibling[Name("Next")] = outline

        # update parent-linking
        outline[Name("Parent")] = parent
        if "First" not in parent:
            parent[Name("First")] = outline
        if "Count" not in parent:
            parent[Name("Count")] = Decimal(0)
        parent[Name("Last")] = outline

        # update count
        outline_to_update_count = parent
        while outline_to_update_count:
            outline_to_update_count[Name("Count")] = Decimal(
                outline_to_update_count["Count"] + Decimal(1))
            if "Parent" in outline_to_update_count:
                outline_to_update_count = outline_to_update_count["Parent"]
            else:
                break

        return self
Exemplo n.º 12
0
    def _read_file(input: typing.TextIO) -> Optional[Font]:
        lines: typing.List[str] = [x for x in input.readlines()]
        lines = [x for x in lines if not x.startswith("Comment")]
        lines = [x[:-1] if x.endswith("\n") else x for x in lines]

        # check first/last line
        if not lines[0].startswith("StartFontMetrics") or not lines[-1].startswith(
            "EndFontMetrics"
        ):
            return None

        out_font = Font()

        # FontDescriptor
        out_font_descriptor = FontDescriptor().set_parent(out_font)  # type: ignore [attr-defined]
        font_name = AdobeFontMetrics._find_and_parse_as_string(lines, "FontName")
        if font_name:
            out_font_descriptor[Name("FontName")] = Name(font_name)
        font_family = AdobeFontMetrics._find_and_parse_as_string(lines, "FamilyName")
        if font_family:
            out_font_descriptor[Name("FontFamily")] = String(font_family)

        # FontStretch

        # FontWeight

        # Flags

        # FontBBox
        fontbbox_str = AdobeFontMetrics._find_and_parse_as_string(lines, "FontBBox")
        if fontbbox_str:
            fontbbox = [Decimal(x) for x in fontbbox_str.split(" ")]
            out_font_descriptor[Name("FontBBox")] = List().set_can_be_referenced(False)  # type: ignore [attr-defined]
            for x in fontbbox:
                out_font_descriptor[Name("FontBBox")].append(x)

        # ItalicAngle
        italic_angle = AdobeFontMetrics._find_and_parse_as_float(lines, "ItalicAngle")
        if italic_angle:
            out_font_descriptor[Name("ItalicAngle")] = Decimal(italic_angle)
        else:
            out_font_descriptor[Name("ItalicAngle")] = Decimal(0)

        # Ascent
        ascent = AdobeFontMetrics._find_and_parse_as_float(lines, "Ascender")
        if ascent:
            out_font_descriptor[Name("Ascent")] = Decimal(ascent)
        else:
            out_font_descriptor[Name("Ascent")] = Decimal(0)

        # Descent
        descent = AdobeFontMetrics._find_and_parse_as_float(lines, "Descender")
        if descent:
            out_font_descriptor[Name("Descent")] = Decimal(descent)
        else:
            out_font_descriptor[Name("Descent")] = Decimal(0)

        # Flags
        out_font_descriptor[Name("Flags")] = Decimal(131104)

        # Leading

        # CapHeight
        capheight = AdobeFontMetrics._find_and_parse_as_float(lines, "CapHeight")
        if capheight:
            out_font_descriptor[Name("CapHeight")] = Decimal(capheight)
        else:
            out_font_descriptor[Name("CapHeight")] = Decimal(0)

        # XHeight
        xheight = AdobeFontMetrics._find_and_parse_as_float(lines, "XHeight")
        if xheight:
            out_font_descriptor[Name("XHeight")] = Decimal(xheight)

        # StemV
        stemv = AdobeFontMetrics._find_and_parse_as_float(lines, "StemV")
        if stemv:
            assert stemv is not None
            out_font_descriptor[Name("StemV")] = Decimal(stemv)
        else:
            out_font_descriptor[Name("StemV")] = Decimal(0)
        # StemH
        stemh = AdobeFontMetrics._find_and_parse_as_float(lines, "StemH")
        if stemh:
            assert stemh is not None
            out_font_descriptor[Name("StemH")] = Decimal(stemh)
        else:
            out_font_descriptor[Name("StemH")] = Decimal(0)

        # AvgWidth
        avgwidth = AdobeFontMetrics._find_and_parse_as_float(lines, "AvgWidth")
        if avgwidth:
            assert avgwidth is not None
            out_font_descriptor[Name("AvgWidth")] = Decimal(avgwidth)

        # MaxWidth
        maxwidth = AdobeFontMetrics._find_and_parse_as_float(lines, "MaxWidth")
        if maxwidth:
            assert maxwidth is not None
            out_font_descriptor[Name("MaxWidth")] = Decimal(maxwidth)

        # MissingWidth
        missingwidth = AdobeFontMetrics._find_and_parse_as_float(lines, "MissingWidth")
        if missingwidth:
            assert missingwidth is not None
            out_font_descriptor[Name("MissingWidth")] = Decimal(missingwidth)

        # CharSet
        charset = AdobeFontMetrics._find_and_parse_as_float(lines, "CharSet")
        if charset:
            assert charset is not None
            out_font_descriptor[Name("CharSet")] = Decimal(charset)

        # Font
        out_font[Name("Type")] = Name("Font")
        out_font[Name("Subtype")] = Name("Type1")
        out_font[Name("Name")] = out_font_descriptor["FontName"]
        out_font[Name("BaseFont")] = out_font_descriptor["FontName"]

        widths = List().set_parent(out_font)  # type: ignore [attr-defined]
        avg_char_width: float = 0
        avg_char_width_norm: float = 0
        first_char = None
        last_char = None

        char_metrics_lines = lines[
            lines.index(
                [x for x in lines if x.startswith("StartCharMetrics")][0]
            ) : lines.index("EndCharMetrics")
            + 1
        ]
        char_metrics_lines = char_metrics_lines[1:-1]
        for cml in char_metrics_lines:
            tmp = {
                y.split(" ")[0]: y.split(" ")[1]
                for y in [x.strip() for x in cml.split(";")]
                if " " in y
            }

            # determine char
            ch = -1
            if "C" in tmp:
                ch = int(tmp["C"])
            if "CH" in tmp:
                ch = int(tmp["CH"][1:-1], 16)

            if (first_char is None or ch < first_char) and ch != -1:
                first_char = ch
            if (last_char is None or ch > last_char) and ch != -1:
                last_char = ch

            w = float(tmp["WX"])
            if ch != -1 and w != 0:
                avg_char_width += w
                avg_char_width_norm += 1

            widths.append(Decimal(w))

        assert first_char is not None
        assert last_char is not None

        out_font[Name("FirstChar")] = Decimal(first_char)
        out_font[Name("LastChar")] = Decimal(last_char)
        out_font[Name("Widths")] = widths

        if avgwidth is None:
            out_font_descriptor[Name("AvgWidth")] = Decimal(
                round(Decimal(avg_char_width / avg_char_width_norm), 2)
            )
        if maxwidth is None:
            out_font_descriptor[Name("MaxWidth")] = Decimal(max(widths))
        out_font[Name("FontDescriptor")] = out_font_descriptor

        # return
        return out_font