Пример #1
0
 def get_ascent(self) -> pDecimal:
     """
     This function returns the maximum height above the baseline reached by glyphs in this font.
     The height of glyphs for accented characters shall be excluded.
     """
     if "Ascender" in self._afm._attrs:
         return pDecimal(self._afm._attrs["Ascender"])
     return pDecimal(0)
Пример #2
0
 def get_descent(self) -> pDecimal:
     """
     This function returns the maximum depth below the baseline reached by glyphs in this font.
     The value shall be a negative number.
     """
     if "Descender" in self._afm._attrs:
         return pDecimal(self._afm._attrs["Descender"])
     return pDecimal(0)
Пример #3
0
    def get_width(self, character_identifier: int) -> typing.Optional[pDecimal]:
        """
        This function returns the width (in text space) of a given character identifier.
        If this Font is unable to represent the glyph that corresponds to the character identifier,
        this function returns None
        """

        # check cache
        if character_identifier in self._width_cache:
            return self._width_cache[character_identifier]

        # Default value: none (the DW value shall be used for all glyphs).
        dw: pDecimal = self["DW"] if "DW" in self else pDecimal(1000)
        if "W" not in self:
            return dw

        assert "W" in self
        assert isinstance(self["W"], List)
        i: int = 0
        cid: int = 0
        cid_width: int = 0
        while i < len(self["W"]):
            # <char_start_code> [<width>+]
            if (
                isinstance(self["W"][i], pDecimal)
                and i + 1 < len(self["W"])
                and isinstance(self["W"][i + 1], List)
            ):
                for j in range(0, len(self["W"][i + 1])):
                    cid = int(self["W"][i]) + j
                    cid_width = int(self["W"][i + 1][j])
                    self._width_cache[cid] = pDecimal(cid_width)
                i += 2
                continue
            # <char_start_code> <char_end_code> <width>
            if (
                isinstance(self["W"][i], pDecimal)
                and i + 2 < len(self["W"])
                and isinstance(self["W"][i + 1], pDecimal)
                and isinstance(self["W"][i + 2], pDecimal)
            ):
                for j in range(int(self["W"][i]), int(self["W"][i + 1]) + 1):
                    cid = j
                    cid_width = int(self["W"][i + 2])
                    self._width_cache[cid] = pDecimal(cid_width)
                i += 3
                continue

        # check cache
        if character_identifier in self._width_cache:
            return self._width_cache[character_identifier]

        # default
        return dw
Пример #4
0
 def get_width(self, character_identifier: int) -> typing.Optional[pDecimal]:
     """
     This function returns the width (in text space) of a given character identifier.
     If this Font is unable to represent the glyph that corresponds to the character identifier,
     this function returns None
     """
     widths: typing.List[pDecimal] = [
         pDecimal(v[1])
         for k, v in self._afm._chars.items()
         if v[0] == character_identifier
     ]
     if len(widths) == 1:
         return widths[0]
     return pDecimal(0)
Пример #5
0
 def _append_to_content_stream(self, page: Page, instructions: str):
     self._initialize_page_content_stream(page)
     content_stream = page["Contents"]
     content_stream[Name("DecodedBytes")] += instructions.encode("latin1")
     content_stream[Name("Bytes")] = zlib.compress(
         content_stream["DecodedBytes"], 9)
     content_stream[Name("Length")] = pDecimal(len(content_stream["Bytes"]))
Пример #6
0
 def invoke(self,
            canvas: "Canvas",
            operands: List[AnyPDFType] = []):  # type: ignore [name-defined]
     """
     Invoke the T* operator
     """
     move_text_position_op: typing.Optional[
         CanvasOperator] = canvas.get_operator("Td")
     assert move_text_position_op
     move_text_position_op.invoke(
         canvas, [pDecimal(0), -canvas.graphics_state.leading])
Пример #7
0
 def get_descent(self) -> pDecimal:
     """
     This function returns the maximum depth below the baseline reached by glyphs in this font.
     The value shall be a negative number.
     """
     if "FontDescriptor" in self and "Descent" in "FontDescriptor":
         return self["FontDescriptor"]["Descent"]
     logger.debug(
         "Type3Font does not have an `Descent` entry in its `FontDescriptor` dictionary."
     )
     return pDecimal(0)  # TODO
Пример #8
0
 def get_ascent(self) -> pDecimal:
     """
     This function returns the maximum height above the baseline reached by glyphs in this font.
     The height of glyphs for accented characters shall be excluded.
     """
     if "FontDescriptor" in self and "Ascent" in "FontDescriptor":
         return self["FontDescriptor"]["Ascent"]
     logger.debug(
         "Type3Font does not have an `Ascent` entry in its `FontDescriptor` dictionary."
     )
     return pDecimal(0)  # TODO
Пример #9
0
    def __init__(
            self,
            text_bytes: bytes,
            font: Font,
            font_size: Decimal,
            character_spacing: Decimal = Decimal(0),
            word_spacing: Decimal = Decimal(0),
            horizontal_scaling: Decimal = Decimal(100),
    ):
        assert isinstance(font, Font)
        self._glyphs: typing.List[Glyph] = []
        i: int = 0
        while i < len(text_bytes):
            # sometimes, 2 bytes make up 1 unicode char
            unicode_chars: typing.Optional[str] = None
            if i + 1 < len(text_bytes):
                multi_byte_char_code: int = text_bytes[i] * 256 + text_bytes[
                    i + 1]
                unicode_chars = font.character_identifier_to_unicode(
                    multi_byte_char_code)
                if unicode_chars is not None:
                    self._glyphs.append(
                        Glyph(
                            multi_byte_char_code,
                            unicode_chars,
                            font.get_width(multi_byte_char_code)
                            or pDecimal(0),
                        ))
                    i += 2
                    continue
            # usually it's 1 byte though
            if i < len(text_bytes):
                unicode_chars = font.character_identifier_to_unicode(
                    text_bytes[i])
                if unicode_chars is not None:
                    self._glyphs.append(
                        Glyph(
                            text_bytes[i],
                            unicode_chars,
                            font.get_width(text_bytes[i]) or Decimal(0),
                        ))
                    i += 1
                    continue
            # no mapping found
            if i < len(text_bytes):
                self._glyphs.append(Glyph(text_bytes[i], "�", Decimal(250)))
                i += 1

        self._font = font
        self._font_size = font_size
        self._character_spacing = character_spacing
        self._word_spacing = word_spacing
        self._horizontal_scaling = horizontal_scaling
Пример #10
0
    def _initialize_page_content_stream(self, page: Page):
        if "Contents" in page:
            return

        # build content stream object
        content_stream = Stream()
        content_stream[Name("DecodedBytes")] = b""
        content_stream[Name("Bytes")] = zlib.compress(content_stream["DecodedBytes"], 9)
        content_stream[Name("Filter")] = Name("FlateDecode")
        content_stream[Name("Length")] = pDecimal(len(content_stream["Bytes"]))

        # set content of page
        page[Name("Contents")] = content_stream
Пример #11
0
    def add(self, layout_element: LayoutElement) -> "PageLayout":
        """
        This method adds a `LayoutElement` to the current `Page`.
        """
        if self.column_index >= self.number_of_columns:
            return self

        # calculate next available rectangle
        available_height: Decimal = (
            self.previous_y - self.vertical_margin - self.previous_leading
        )
        assert self.page_height
        if available_height < 0:
            self.switch_to_next_column()
            return self.add(layout_element)

        next_available_rect: Rectangle = Rectangle(
            self.horizontal_margin
            + self.column_index * (self.column_width + self.inter_column_margin),
            self.vertical_margin,
            self.column_width,
            self.previous_y - self.vertical_margin - self.previous_leading,
        )

        # store previous contents
        if "Contents" not in self.page:
            layout_element._initialize_page_content_stream(self.page)
        previous_decoded_bytes = self.page["Contents"]["DecodedBytes"]

        # attempt layout
        layout_rect = layout_element.layout(self.page, bounding_box=next_available_rect)
        if layout_rect.y < self.vertical_margin:
            content_stream = self.page["Contents"]
            content_stream[Name("DecodedBytes")] = previous_decoded_bytes
            content_stream[Name("Bytes")] = zlib.compress(
                content_stream["DecodedBytes"], 9
            )
            content_stream[Name("Length")] = pDecimal(len(content_stream["Bytes"]))
            self.switch_to_next_column()
            return self.add(layout_element)

        # calculate previous_y
        self.previous_y = layout_rect.y
        self.previous_leading = self._calculate_leading(layout_element)

        # return
        return self
Пример #12
0
    def invoke(self,
               canvas: "Canvas",
               operands: List[AnyPDFType] = []):  # type: ignore [name-defined]
        """
        Invoke the TD operator
        """
        assert isinstance(operands[0], pDecimal)
        assert isinstance(operands[1], pDecimal)

        set_text_leading_op: typing.Optional[
            CanvasOperator] = canvas.get_operator("TL")
        assert set_text_leading_op
        set_text_leading_op.invoke(canvas, [pDecimal(-operands[1])])

        move_text_position_op: typing.Optional[
            CanvasOperator] = canvas.get_operator("Td")
        assert move_text_position_op
        move_text_position_op.invoke(canvas, operands)
Пример #13
0
    def transform(
        self,
        object_to_transform: AnyPDFType,
        context: Optional[WriteTransformerContext] = None,
    ):
        """
        This method writes an ET.Element (representing XMP meta information) to a byte stream
        """
        assert isinstance(object_to_transform, ET.Element)
        assert context is not None
        assert context.destination is not None
        assert context.destination

        # build stream
        out_value = Stream()
        out_value[Name("Type")] = Name("Metadata")
        out_value[Name("Subtype")] = Name("XML")

        bts = ET.tostring(object_to_transform)
        out_value[Name("DecodedBytes")] = bts
        out_value[Name("Bytes")] = bts
        out_value[Name("Length")] = pDecimal(len(bts))

        # copy reference
        out_value.set_reference(
            object_to_transform.get_reference())  # type: ignore [attr-defined]

        # start object if needed
        started_object = False
        ref = out_value.get_reference()  # type: ignore [attr-defined]
        if ref is not None:
            assert isinstance(ref, Reference)
            if ref.object_number is not None and ref.byte_offset is None:
                started_object = True
                self.start_object(out_value, context)

        # pass stream along to other transformer
        self.get_root_transformer().transform(out_value, context)

        # end object if needed
        if started_object:
            self.end_object(out_value, context)
Пример #14
0
    def _calculate_layout_box_without_padding(
            self, page: Page, bounding_box: Rectangle) -> Rectangle:

        # store previous contents
        if "Contents" not in page:
            self._initialize_page_content_stream(page)
        previous_decoded_bytes = page["Contents"]["DecodedBytes"]

        # layout without padding
        layout_rect = self._do_layout_without_padding(page, bounding_box)
        assert layout_rect is not None

        # restore
        content_stream = page["Contents"]
        content_stream[Name("DecodedBytes")] = previous_decoded_bytes
        content_stream[Name("Bytes")] = zlib.compress(
            content_stream["DecodedBytes"], 9)
        content_stream[Name("Length")] = pDecimal(len(content_stream["Bytes"]))

        # return
        return layout_rect
Пример #15
0
 def get_descent(self) -> pDecimal:
     """
     This function returns the maximum depth below the baseline reached by glyphs in this font.
     The value shall be a negative number.
     """
     return pDecimal(0)
Пример #16
0
    def get_space_character_width_estimate(self) -> Decimal:
        """
        This function estimates the width of the space character (unicode 32) in this Font.
        If the Font contains the character, this Font will return the corresponding width.

        If the Font does not contain the character, its width may be derived from the
        MissingWidth entry in the FontDescriptor, or the AvgWidth entry.

        If the Font is a composite Font, the DW entry of its DescendantFont is used.

        If all previously mentioned approaches fail, the width is estimated based on characters
        that may be present in the Font. (e.g. the width of 'A' is typically twice that of ' ').
        """
        # 1. if space is defined, and the width of space is defined, return that
        character_identifier: typing.Optional[
            int
        ] = self.unicode_to_character_identifier(" ")
        width: typing.Optional[Decimal] = None
        if character_identifier is not None:
            width = self.get_width(character_identifier)
            if width is not None:
                return width
        # 2. MissingWidth
        if "FontDescriptor" in self and "MissingWidth" in self["FontDescriptor"]:
            return self["FontDescriptor"]["MissingWidth"]
        # 3. AvgWidth
        if "FontDescriptor" in self and "AvgWidth" in self["FontDescriptor"]:
            return self["FontDescriptor"]["AvgWidth"]
        # 3. default width
        if (
            "DescendantFonts" in self
            and isinstance(self["DescendantFonts"], List)
            and len(self["DescendantFonts"]) == 1
            and "DW" in self["DescendantFonts"][0]
        ):
            return self["DescendantFonts"][0]["DW"]
        # 4. other characters may be defined, which give us a clue
        # fmt: off
        char_to_space_width_ratio: typing.Dict[str, Decimal] = {
            "a": Decimal("0.500"), "b": Decimal("0.500"), "c": Decimal("0.556"),
            "d": Decimal("0.500"), "e": Decimal("0.500"), "f": Decimal("1.000"),
            "g": Decimal("0.500"), "h": Decimal("0.500"), "i": Decimal("1.252"),
            "j": Decimal("1.252"), "k": Decimal("0.556"), "l": Decimal("1.252"),
            "m": Decimal("0.334"), "n": Decimal("0.500"), "o": Decimal("0.500"),
            "p": Decimal("0.500"), "q": Decimal("0.500"), "r": Decimal("0.835"),
            "s": Decimal("0.556"), "t": Decimal("1.000"), "u": Decimal("0.500"),
            "v": Decimal("0.556"), "w": Decimal("0.385"), "x": Decimal("0.556"),
            "y": Decimal("0.556"), "z": Decimal("0.556"), "0": Decimal("0.500"),
            "1": Decimal("0.500"), "2": Decimal("0.500"), "3": Decimal("0.500"),
            "4": Decimal("0.500"), "5": Decimal("0.500"), "6": Decimal("0.500"),
            "7": Decimal("0.500"), "8": Decimal("0.500"), "9": Decimal("0.500"),
            "A": Decimal("0.417"), "B": Decimal("0.417"), "C": Decimal("0.385"),
            "D": Decimal("0.385"), "E": Decimal("0.417"), "F": Decimal("0.455"),
            "G": Decimal("0.357"), "H": Decimal("0.385"), "I": Decimal("1.000"),
            "J": Decimal("0.556"), "K": Decimal("0.417"), "L": Decimal("0.500"),
            "M": Decimal("0.334"), "N": Decimal("0.385"), "O": Decimal("0.357"),
            "P": Decimal("0.417"), "Q": Decimal("0.357"), "R": Decimal("0.385"),
            "S": Decimal("0.417"), "T": Decimal("0.455"), "U": Decimal("0.385"),
            "V": Decimal("0.417"), "W": Decimal("0.294"), "X": Decimal("0.417"),
            "Y": Decimal("0.417"), "Z": Decimal("0.455"),
        }
        # fmt: on
        for k, v in char_to_space_width_ratio.items():
            character_identifier = self.unicode_to_character_identifier(k)
            if character_identifier is not None:
                width = self.get_width(character_identifier)
                if width is not None:
                    return pDecimal(width * v)
        # 5. helvetica
        return Decimal(278)
Пример #17
0
    def true_type_font_from_file(path_to_font_file: Path) -> "TrueTypeFont":
        """
        This function returns the PDF TrueTypeFont object for a given TTF file
        """
        assert path_to_font_file.exists()
        assert path_to_font_file.name.endswith(".ttf")

        font_file_bytes: typing.Optional[bytes] = None
        with open(path_to_font_file, "rb") as ffh:
            font_file_bytes = ffh.read()
        assert font_file_bytes

        # read file
        ttf_font_file = TTFont(path_to_font_file)

        # build font
        font: TrueTypeFont = TrueTypeFont()
        font_name: str = str(
            [
                x for x in ttf_font_file["name"].names
                if x.platformID == 3 and x.nameID == 1
            ][0].string,
            "latin1",
        )
        font_name = "".join([
            x for x in font_name if x.lower() in "abcdefghijklmnopqrstuvwxyz"
        ])

        font[Name("Name")] = Name(font_name)
        font[Name("BaseFont")] = Name(font_name)

        cmap: typing.Optional[typing.Dict[int,
                                          str]] = ttf_font_file.getBestCmap()
        cmap_reverse: typing.Dict[str, int] = {}
        for k, v in cmap.items():
            if v in cmap_reverse:
                cmap_reverse[v] = min(cmap_reverse[v], k)
            else:
                cmap_reverse[v] = k
        glyph_order: typing.List[str] = [
            x for x in ttf_font_file.glyphOrder if x in cmap_reverse
        ]

        # build widths
        units_per_em: pDecimal = pDecimal(ttf_font_file["head"].unitsPerEm)
        if cmap is not None:
            font[Name("FirstChar")] = pDecimal(0)
            font[Name("LastChar")] = pDecimal(len(glyph_order))
            font[Name("Widths")] = List()
            for glyph_name in glyph_order:
                w: pDecimal = (
                    pDecimal(ttf_font_file.getGlyphSet()[glyph_name].width) /
                    units_per_em) * pDecimal(1000)
                w = pDecimal(round(w, 2))
                font["Widths"].append(w)

        font[Name("FontDescriptor")] = Dictionary()
        font["FontDescriptor"][Name("Type")] = Name("FontDescriptor")
        font["FontDescriptor"][Name("FontName")] = String(font_name)
        font["FontDescriptor"][Name("FontStretch")] = Name("Normal")  # TODO
        font["FontDescriptor"][Name("FontWeight")] = pDecimal(400)  # TODO
        font["FontDescriptor"][Name("Flags")] = pDecimal(4)  # TODO
        font["FontDescriptor"][Name("FontBBox")] = List(
        ).set_can_be_referenced(  # type: ignore [attr-defined]
            False)  # TODO
        for _ in range(0, 4):
            font["FontDescriptor"]["FontBBox"].append(pDecimal(0))

        # fmt: off
        font["FontDescriptor"][Name("ItalicAngle")] = pDecimal(
            ttf_font_file["post"].italicAngle)
        font["FontDescriptor"][Name("Ascent")] = pDecimal(
            pDecimal(ttf_font_file["hhea"].ascent) / units_per_em *
            Decimal(1000))
        font["FontDescriptor"][Name("Descent")] = pDecimal(
            pDecimal(ttf_font_file["hhea"].descent) / units_per_em *
            Decimal(1000))
        font["FontDescriptor"][Name("CapHeight")] = pDecimal(0)  # TODO
        font["FontDescriptor"][Name("StemV")] = pDecimal(0)  # TODO
        # fmt: on

        font[Name("Encoding")] = Dictionary()
        font["Encoding"][Name("BaseEncoding")] = Name("WinAnsiEncoding")
        font["Encoding"][Name("Differences")] = List()
        for i in range(0, len(glyph_order)):
            font["Encoding"]["Differences"].append(pDecimal(i))
            font["Encoding"]["Differences"].append(Name(glyph_order[i]))

        # embed font file
        font_stream: Stream = Stream()
        font_stream[Name("Type")] = Name("Font")
        font_stream[Name("Subtype")] = Name("TrueType")
        font_stream[Name("Length")] = pDecimal(len(font_file_bytes))
        font_stream[Name("Length1")] = pDecimal(len(font_file_bytes))
        font_stream[Name("Filter")] = Name("FlateDecode")
        font_stream[Name("DecodedBytes")] = font_file_bytes
        font_stream[Name("Bytes")] = zlib.compress(font_file_bytes, 9)

        font["FontDescriptor"][Name("FontFile2")] = font_stream

        # return
        return font
    def transform(
        self,
        object_to_transform: AnyPDFType,
        context: Optional[WriteTransformerContext] = None,
    ):
        """
        This method writes a Stream to a byte stream
        """
        assert context is not None
        assert context.destination is not None
        assert isinstance(object_to_transform, Stream)

        # avoid resolving objects twice
        object_ref: typing.Optional[
            Reference] = object_to_transform.get_reference(
            )  # type: ignore [attr-defined]
        if object_ref is not None and object_ref in context.resolved_references:
            assert object_ref is not None
            assert object_ref.object_number is not None
            logger.debug(
                "skip writing object %d %d R (already resolved)" %
                (object_ref.object_number, object_ref.generation_number or 0))
            return

        # start object if needed
        started_object = False
        if object_ref is not None:
            assert object_ref.object_number is not None
            if object_ref.object_number is not None and object_ref.byte_offset is None:
                started_object = True
                self.start_object(object_to_transform, context)
            context.resolved_references.append(object_ref)

        # build stream dictionary
        stream_dictionary = Dictionary()

        # objects to turn into reference
        queue: typing.List[AnyPDFType] = []
        for k, v in object_to_transform.items():
            if k in ["Bytes", "DecodedBytes"]:
                continue
            if (isinstance(v, Dictionary) or isinstance(v, List)
                    or isinstance(v, Stream)
                ) and v.can_be_referenced():  # type: ignore [union-attr]
                stream_dictionary[k] = self.get_reference(v, context)
                queue.append(v)
            else:
                stream_dictionary[k] = v

        # if self.compression_level == 0, remove \Filter
        if context.compression_level == 0 and Name(
                "Filter") in stream_dictionary:
            stream_dictionary.pop(Name("Filter"))

        # handle compression
        if "DecodedBytes" in object_to_transform:
            if context.compression_level == 0:
                bts = object_to_transform["DecodedBytes"]
            else:
                bts = zlib.compress(object_to_transform["DecodedBytes"],
                                    context.compression_level)
            stream_dictionary[Name("Length")] = pDecimal(len(bts))
        else:
            assert "Bytes" in object_to_transform
            bts = object_to_transform["Bytes"]

        # write stream dictionary
        self.get_root_transformer().transform(stream_dictionary, context)

        # write "stream"
        context.destination.write(bytes("stream\n", "latin1"))

        # write bytes
        context.destination.write(bts)

        # write "endstream"
        context.destination.write(bytes("\nendstream\n", "latin1"))

        # end object if needed
        if started_object:
            self.end_object(object_to_transform, context)

        for e in queue:
            self.get_root_transformer().transform(e, context)
    def transform(
        self,
        object_to_transform: AnyPDFType,
        context: Optional[WriteTransformerContext] = None,
    ):
        """
        This method writes an Image to a byte stream
        """
        assert context is not None
        assert context.destination is not None
        assert isinstance(object_to_transform, PILImage.Image)

        # get image bytes
        contents = None
        filter_name: Optional[Name] = None
        try:
            with io.BytesIO() as output:
                assert isinstance(object_to_transform, PILImage.Image)
                object_to_transform.save(output, format="JPEG")
                contents = output.getvalue()
            filter_name = Name("DCTDecode")
        except Exception as e:
            pass

        if contents is None:
            try:
                # TODO : properly store PNG (instead of converting it)
                with io.BytesIO() as output:
                    object_to_transform = self._convert_png_to_jpg(
                        object_to_transform)
                    assert isinstance(object_to_transform, PILImage.Image)
                    object_to_transform.save(output, format="JPEG")
                    contents = output.getvalue()
                filter_name = Name("DCTDecode")
            except Exception as e:
                pass
        assert contents is not None

        # build corresponding Stream (XObject)
        out_value = Stream()
        out_value[Name("Type")] = Name("XObject")
        out_value[Name("Subtype")] = Name("Image")
        out_value[Name("Width")] = pDecimal(object_to_transform.width)
        out_value[Name("Height")] = pDecimal(object_to_transform.height)
        out_value[Name("Length")] = pDecimal(len(contents))
        out_value[Name("Filter")] = filter_name
        out_value[Name("BitsPerComponent")] = pDecimal(8)
        out_value[Name("ColorSpace")] = Name("DeviceRGB")
        out_value[Name("Bytes")] = contents

        # copy reference
        out_value.set_reference(
            object_to_transform.get_reference())  # type: ignore [attr-defined]

        # start object if needed
        started_object = False
        ref = out_value.get_reference()  # type: ignore [attr-defined]
        if ref is not None:
            assert isinstance(ref, Reference)
            if ref.object_number is not None and ref.byte_offset is None:
                started_object = True
                self._start_object(out_value, context)

        # write stream
        cl = context.compression_level
        context.compression_level = 9
        self.get_root_transformer().transform(out_value, context)
        context.compression_level = cl

        # end object if needed
        if started_object:
            self._end_object(out_value, context)