Python Name示例，ptext.io.read.types.Name Python示例

示例#1

0

显示文件

文件： font_type_1.py 项目： tieugene/ptext-release

 def __deepcopy__(self, memodict={}):
     # fmt: off
     f_out: Font = super(Type1Font, self).__deepcopy__(memodict)
     f_out[Name("Subtype")] = Name("Type1")
     f_out._character_identifier_to_unicode_lookup: typing.Dict[int, str] = {k: v for k, v in self._character_identifier_to_unicode_lookup.items()}
     f_out._unicode_lookup_to_character_identifier: typing.Dict[str, int] = {k: v for k, v in self._unicode_lookup_to_character_identifier.items()}
     return f_out

示例#2

0

显示文件

文件： font_descriptor.py 项目： lzg440/ptext-release

 def __deepcopy__(self, memodict={}):
     out = FontDescriptor()
     for key in ["Type", "FontName", "Flags", "ItalicAngle"]:
         out[Name(key)] = self[key]
     for key in [
             "FontFamily",
             "FontStretch",
             "FontWeight",
             "Ascent",
             "Descent",
             "Leading",
             "CapHeight",
             "XHeight",
             "StemV",
             "StemH",
             "AvgWidth",
             "MaxWidth",
             "MissingWidth",
             "CharSet",
     ]:
         if key in self:
             out[Name(key)] = self[key]
     for key in ["FontBBox", "FontFile", "FontFile2", "FontFile3"]:
         if key in self:
             out[Name(key)] = copy.deepcopy(self[key], memodict)
     return out

示例#3

0

显示文件

    def _test_document(self, file) -> bool:

        # create output directory if it does not exist yet
        if not self.output_dir.exists():
            self.output_dir.mkdir()

        doc = None
        with open(file, "rb") as pdf_file_handle:
            doc = None
            with open(file, "rb") as pdf_file_handle:
                doc = PDF.loads(pdf_file_handle)

        if "XRef" not in doc:
            return False
        if "Trailer" not in doc["XRef"]:
            return False

        if "Info" not in doc["XRef"]["Trailer"]:
            doc["XRef"]["Trailer"][Name("Info")] = Dictionary()

        # change producer
        doc["XRef"]["Trailer"]["Info"][Name("Producer")] = String("pText")

        # determine output location
        out_file = self.output_dir / (file.stem + "_out.pdf")
        with open(out_file, "wb") as pdf_file_handle:
            PDF.dumps(pdf_file_handle, doc)

        return True

示例#4

0

显示文件

 def _append_to_content_stream(self, page: Page, instructions: str):
     self._initialize_page_content_stream(page)
     content_stream = page["Contents"]
     content_stream[Name("DecodedBytes")] += instructions.encode("latin1")
     content_stream[Name("Bytes")] = zlib.compress(
         content_stream["DecodedBytes"], 9)
     content_stream[Name("Length")] = pDecimal(len(content_stream["Bytes"]))

示例#5

0

显示文件

    def test_name_behaves_like_str(self):

        d = {}
        d[Name("A")] = 1
        d[Name("B")] = 2
        d[Name("C")] = 3

        assert d["A"] == 1

示例#6

0

显示文件

文件： read_root_dictionary_transformer.py 项目： lzg440/ptext-release

    def transform(
        self,
        object_to_transform: Union[io.BufferedIOBase, io.RawIOBase,
                                   AnyPDFType],
        parent_object: Any,
        context: Optional[ReadTransformerContext] = None,
        event_listeners: typing.List[EventListener] = [],
    ) -> Any:

        assert isinstance(object_to_transform, Dictionary)

        # add listener(s)
        for l in event_listeners:
            object_to_transform.add_event_listener(
                l)  # type: ignore [attr-defined]

        # convert using Dictionary transformer
        transformed_root_dictionary: Optional[Dictionary] = None
        for t in self.get_root_transformer().children:
            if isinstance(t, ReadDictionaryTransformer):
                transformed_root_dictionary = t.transform(
                    object_to_transform, parent_object, context, [])
                break

        assert transformed_root_dictionary is not None
        assert isinstance(transformed_root_dictionary, Dictionary)

        #
        # rebuild /Pages if needed
        #

        # list to hold Page objects (in order)
        pages_in_order: typing.List[Page] = []

        # stack to explore Page(s) DFS
        stack_to_handle: typing.List[AnyPDFType] = []
        stack_to_handle.extend(transformed_root_dictionary["Pages"]["Kids"])

        # DFS
        while len(stack_to_handle) > 0:
            obj = stack_to_handle.pop(0)
            if isinstance(obj, Page):
                pages_in_order.append(obj)
            if (isinstance(obj, Dictionary) and "Type" in obj
                    and obj["Type"] == "Pages" and "Kids" in obj
                    and isinstance(obj["Kids"], List)):
                for k in obj["Kids"]:
                    stack_to_handle.insert(0, k)

        # change
        transformed_root_dictionary["Pages"][Name("Kids")] = pList()
        for p in pages_in_order:
            transformed_root_dictionary["Pages"]["Kids"].append(p)
        transformed_root_dictionary["Pages"][Name("Count")] = Decimal(
            len(pages_in_order))

        # return
        return transformed_root_dictionary

示例#7

0

显示文件

 def __init__(self):
     super(Type0Font, self).__init__()
     self[Name("Type")] = Name("Font")
     self[Name("Subtype")] = Name("Type0")
     self._character_identifier_to_unicode_lookup: typing.Dict[int,
                                                               str] = {}
     self._unicode_lookup_to_character_identifier: typing.Dict[str,
                                                               int] = {}
     self._byte_to_char_identifier: typing.Dict[int, int] = {}

示例#8

0

显示文件

 def __deepcopy__(self, memodict={}):
     # fmt: off
     f_out: CIDType2Font = super(CIDType2Font, self).__deepcopy__(memodict)
     f_out[Name("Subtype")] = Name("Type0")
     f_out._width_cache: typing.Dict[int, pDecimal] = {
         k: v
         for k, v in self._width_cache.items()
     }
     return f_out

示例#9

0

显示文件

文件： test_hash_types.py 项目： tieugene/ptext-release

    def test_hash_types(self):

        obj0 = Dictionary()
        obj0[Name("Root")] = Reference(object_number=10)
        obj0[Name("Marked")] = Boolean(True)

        obj1 = List()
        obj1.append(Name("Red"))
        obj1.append(Decimal(0.5))

        print(hash(obj1))

示例#10

0

显示文件

 def __deepcopy__(self, memodict={}):
     copy_out = CIDFontType0()
     for k in ["Type", "Subtype", "BaseFont"]:
         copy_out[Name(k)] = self[k]
     for k in ["CIDSystemInfo", "FontDescriptor"]:
         copy_out[Name(k)] = copy.deepcopy(self[k], memodict)
     for k in ["DW", "W", "DW2", "W2", "CIDToGIDMap"]:
         if k in self:
             copy_out[Name(k)] = copy.deepcopy(self.get(k), memodict)
     # return
     return copy_out

示例#11

0

显示文件

文件： cid_font_type_2.py 项目： lzg440/ptext-release

 def __deepcopy__(self, memodict={}):
     copy_out = CIDFontType2()
     for k in ["Type", "Subtype", "BaseFont"]:
         copy_out[Name(k)] = self[k]
     for k in ["Name", "FirstChar", "LastChar"]:
         if k in self:
             copy_out[Name(k)] = self.get(k)
     for k in ["Widths", "FontDescriptor", "Encoding", "ToUnicode"]:
         if k in self:
             copy_out[Name(k)] = copy.deepcopy(self.get(k), memodict)
     # return
     return copy_out

示例#12

0

显示文件

文件： paragraph.py 项目： gbtami/ptext-release

 def __init__(
     self,
     text: str,
     font: Union[Font, str] = "Helvetica",
     font_size: Decimal = Decimal(12),
     font_color: Color = X11Color("Black"),
     border_top: bool = False,
     border_right: bool = False,
     border_bottom: bool = False,
     border_left: bool = False,
     border_color: Color = X11Color("Black"),
     border_width: Decimal = Decimal(1),
     padding_top: Decimal = Decimal(0),
     padding_right: Decimal = Decimal(0),
     padding_bottom: Decimal = Decimal(0),
     padding_left: Decimal = Decimal(0),
     vertical_alignment: Alignment = Alignment.TOP,
     horizontal_alignment: Alignment = Alignment.LEFT,
     background_color: typing.Optional[Color] = None,
     parent: typing.Optional["LayoutElement"] = None,
 ):
     super().__init__(
         border_top=border_top,
         border_right=border_right,
         border_bottom=border_bottom,
         border_left=border_left,
         border_color=border_color,
         border_width=border_width,
         padding_top=padding_top,
         padding_right=padding_right,
         padding_bottom=padding_bottom,
         padding_left=padding_left,
         vertical_alignment=vertical_alignment,
         horizontal_alignment=horizontal_alignment,
         background_color=background_color,
         parent=parent,
     )
     self.text = text
     if isinstance(font, str):
         self.font: Font = FontType1()
         font_to_copy: typing.Optional[Font] = AdobeFontMetrics.get(font)
         self.font[Name("Encoding")] = Name("WinAnsiEncoding")
         assert font_to_copy
         for k, v in font_to_copy.items():
             self.font[k] = v
         assert self.font
     else:
         self.font = font
     self.font_color = font_color
     self.font_size = font_size

示例#13

0

显示文件

文件： font_type_1.py 项目： tieugene/ptext-release

    def unicode_to_character_identifier(self, unicode: str) -> typing.Optional[int]:
        """
        This function maps a unicode str to its character identifier.
        If no such mapping exists, this function returns None.
        """
        if Name("ToUnicode") in self:
            self._read_to_unicode()
            return self._unicode_lookup_to_character_identifier.get(unicode)

        # if "Encoding" is not present, the implied encoding is StandardEncoding
        if "Encoding" not in self:
            self[Name("Encoding")] = Name("StandardEncoding")

        if isinstance(self["Encoding"], Name) and self["Encoding"] in [
            "MacRomanEncoding",
            "MacExpertEncoding",
            "WinAnsiEncoding",
            "StandardEncoding",
        ]:
            try:
                if self["Encoding"] == "WinAnsiEncoding":
                    return int(unicode.encode("cp1252"))
                elif self["Encoding"] == "MacRomanEncoding":
                    return int(unicode.encode("mac-roman"))
                elif self["Encoding"] == "MacExpertEncoding":
                    # TODO replace by actual MacExpertEncoding
                    return int(unicode.encode("mac-roman"))
                elif self["Encoding"] == "StandardEncoding":
                    return int(adobe_standard_encode(unicode))
            except:
                return None

        if (
            isinstance(self["Encoding"], Dictionary)
            and "BaseEncoding" in self["Encoding"]
            and self["Encoding"]["BaseEncoding"]
            in [
                "MacRomanEncoding",
                "MacExpertEncoding",
                "WinAnsiEncoding",
                "StandardEncoding",
            ]
        ):
            self._read_encoding_with_differences()
            return self._unicode_lookup_to_character_identifier.get(unicode, None)

        # default
        return None

示例#14

0

显示文件

文件： cid_font_type_2.py 项目： lzg440/ptext-release

 def get_single_character_width(self,
                                character_code: int) -> Optional[Decimal]:
     """
     Get the width (in text space) of a given character code.
     Returns None if the character code can not be represented in this Font.
     """
     if "W" not in self:
         if "DW" not in self:
             self[Name("DW")] = Decimal(1000)
         return self["DW"]
     if self._cached_widths is None:
         i = 0
         self._cached_widths = {}
         while i < len(self["W"]):
             c_first = int(self["W"][i])
             if isinstance(self["W"][i + 1], list):
                 for j in range(0, len(self["W"][i + 1])):
                     self._cached_widths[c_first + j] = self["W"][i + 1][j]
                 i += 2
                 continue
             if isinstance(self["W"][i + 1], Decimal):
                 c_last = int(self["W"][i + 1])
                 w = int(self["W"][i + 2])
                 for j in range(c_first, c_last + 1):
                     self._cached_widths[j] = w
                 i += 3
                 continue
     # use cache
     if character_code in self._cached_widths:
         return self._cached_widths[character_code]
     # default
     return None

示例#15

0

显示文件

文件： plaintext_xref.py 项目： lzg440/ptext-release

    def read(
        self,
        src: Union[io.BufferedIOBase, io.RawIOBase, io.BytesIO],
        tok: HighLevelTokenizer,
        initial_offset: Optional[int] = None,
    ) -> "XREF":
        """
        This method attempts to read a plaintext XREF from the given io_source.
        It will either throw an exception, or return this XREF
        """

        if initial_offset is not None:
            src.seek(initial_offset)
        else:
            self._seek_to_xref_token(src, tok)

        # now we should be back to the start of XREF
        token = tok.next_non_comment_token()
        assert token is not None
        assert token.text == "xref"

        # read xref sections
        while True:
            xref_section = self._read_section(src, tok)
            if len(xref_section) == 0:
                break
            else:
                for r in xref_section:
                    self.append(r)

        # process trailer
        self[Name("Trailer")] = self._read_trailer(src, tok)

        # return self
        return self

示例#16

0

显示文件

文件： write_pages_transformer.py 项目： tieugene/ptext-release

    def transform(
        self,
        object_to_transform: AnyPDFType,
        context: Optional[WriteTransformerContext] = None,
    ):
        """
        This method writes a \Pages Dictionary to a byte stream
        """
        assert isinstance(object_to_transform, Dictionary)
        assert context is not None

        # \Kids can be written immediately
        object_to_transform[Name("Kids")].set_can_be_referenced(False)

        # queue writing of \Page objects
        queue: typing.List[AnyPDFType] = []
        for i, k in enumerate(object_to_transform["Kids"]):
            queue.append(k)
            ref: Reference = self.get_reference(k, context)
            object_to_transform["Kids"][i] = ref

        # delegate to super
        super(WritePagesTransformer, self).transform(object_to_transform,
                                                     context)

        # write \Page objects
        for p in queue:
            self.get_root_transformer().transform(p, context)

        # restore \Kids
        for i, k in enumerate(queue):
            object_to_transform["Kids"][i] = k

示例#17

0

显示文件

文件： high_level_tokenizer.py 项目： tieugene/ptext-release

    def read_dictionary(self) -> Dictionary:
        """
        This function processes the next tokens and returns a Dictionary.
        It fails and throws various errors if the next tokens do not represent a Dictionary.
        """
        token = self.next_non_comment_token()
        assert token is not None
        assert token.token_type == TokenType.START_DICT

        out_dict = Dictionary()
        while True:

            # attempt to read name token
            token = self.next_non_comment_token()
            assert token is not None
            if token.token_type == TokenType.END_DICT:
                break
            assert token.token_type == TokenType.NAME

            # store name
            name = Name(token.text[1:])

            # attempt to read value
            value = self.read_object()
            assert value is not None

            # store in dict object
            if name is not None:
                out_dict[name] = value

        return out_dict

示例#18

0

显示文件

    def transform(
        self,
        object_to_transform: AnyPDFType,
        context: Optional[WriteTransformerContext] = None,
    ):
        """
        This method writes a \Page Dictionary to a byte stream
        """
        assert isinstance(object_to_transform, Dictionary)
        assert context is not None
        assert context.root_object is not None

        assert isinstance(context.root_object, Document)
        pages_dict = context.root_object["XRef"]["Trailer"]["Root"]["Pages"]

        # add \Parent reference to \Pages
        object_to_transform[Name("Parent")] = self.get_reference(pages_dict, context)

        # mark some keys as non-referencable
        for k in ["ArtBox", "BleedBox", "CropBox", "MediaBox", "TrimBox"]:
            if k in object_to_transform:
                object_to_transform[k].set_can_be_referenced(False)

        # delegate to super
        super(WritePageTransformer, self).transform(object_to_transform, context)

示例#19

0

显示文件

    def transform(
        self,
        object_to_transform: Union[io.BufferedIOBase, io.RawIOBase,
                                   AnyPDFType],
        parent_object: Any,
        context: Optional[ReadTransformerContext] = None,
        event_listeners: typing.List[EventListener] = [],
    ) -> Any:
        """
        This function writes a \FontDescriptor Dictionary to a byte stream
        """

        assert isinstance(object_to_transform, Dictionary)

        # convert like regular dictionary
        if isinstance(parent_object, Font):
            for t in self.get_root_transformer().children:
                if isinstance(t, ReadDictionaryTransformer):
                    return t.transform(object_to_transform, parent_object,
                                       context, event_listeners)

        # build intermittent Font object
        tmp = Font().set_parent(parent_object)  # type: ignore [attr-defined]

        # add listener(s)
        for l in event_listeners:
            tmp.add_event_listener(l)

        tmp[Name("FontDescriptor")] = self.get_root_transformer().transform(
            object_to_transform, tmp, context, [])

        # return
        return tmp

示例#20

0

显示文件

    def add(self, layout_element: LayoutElement) -> "PageLayout":
        """
        This method adds a `LayoutElement` to the current `Page`.
        """
        if self.column_index >= self.number_of_columns:
            return self

        # calculate next available rectangle
        available_height: Decimal = (
            self.previous_y - self.vertical_margin - self.previous_leading
        )
        assert self.page_height
        if available_height < 0:
            self.switch_to_next_column()
            return self.add(layout_element)

        next_available_rect: Rectangle = Rectangle(
            self.horizontal_margin
            + self.column_index * (self.column_width + self.inter_column_margin),
            self.vertical_margin,
            self.column_width,
            self.previous_y - self.vertical_margin - self.previous_leading,
        )

        # store previous contents
        if "Contents" not in self.page:
            layout_element._initialize_page_content_stream(self.page)
        previous_decoded_bytes = self.page["Contents"]["DecodedBytes"]

        # attempt layout
        layout_rect = layout_element.layout(self.page, bounding_box=next_available_rect)
        if layout_rect.y < self.vertical_margin:
            content_stream = self.page["Contents"]
            content_stream[Name("DecodedBytes")] = previous_decoded_bytes
            content_stream[Name("Bytes")] = zlib.compress(
                content_stream["DecodedBytes"], 9
            )
            content_stream[Name("Length")] = pDecimal(len(content_stream["Bytes"]))
            self.switch_to_next_column()
            return self.add(layout_element)

        # calculate previous_y
        self.previous_y = layout_rect.y
        self.previous_leading = self._calculate_leading(layout_element)

        # return
        return self

示例#21

0

显示文件

文件： paragraph.py 项目： gbtami/ptext-release

    def _get_font_resource_name(self, font: Font, page: Page):
        # create resources if needed
        if "Resources" not in page:
            page[Name("Resources")] = Dictionary().set_parent(page)  # type: ignore [attr-defined]
        if "Font" not in page["Resources"]:
            page["Resources"][Name("Font")] = Dictionary()

        # insert font into resources
        font_resource_name = [
            k for k, v in page["Resources"]["Font"].items() if v == font
        ]
        if len(font_resource_name) > 0:
            return font_resource_name[0]
        else:
            font_index = len(page["Resources"]["Font"]) + 1
            page["Resources"]["Font"][Name("F%d" % font_index)] = font
            return Name("F%d" % font_index)

示例#22

0

显示文件

文件： image.py 项目： tieugene/ptext-release

    def _get_image_resource_name(self, image: PILImage, page: Page):
        # create resources if needed
        if "Resources" not in page:
            page[Name("Resources")] = Dictionary().set_parent(
                page)  # type: ignore [attr-defined]
        if "XObject" not in page["Resources"]:
            page["Resources"][Name("XObject")] = Dictionary()

        # insert font into resources
        image_resource_name = [
            k for k, v in page["Resources"]["XObject"].items() if v == image
        ]
        if len(image_resource_name) > 0:
            return image_resource_name[0]
        else:
            image_index = len(page["Resources"]["XObject"]) + 1
            page["Resources"]["XObject"][Name("Im%d" % image_index)] = image
            return Name("Im%d" % image_index)

示例#23

0

显示文件

 def unicode_to_character_identifier(self,
                                     unicode: str) -> typing.Optional[int]:
     """
     This function maps a unicode str to its character identifier.
     If no such mapping exists, this function returns None.
     """
     if Name("ToUnicode") in self:
         self._read_to_unicode()
         return self._unicode_lookup_to_character_identifier.get(unicode)
     # default
     return None

示例#24

0

显示文件

    def _calculate_layout_box_without_padding(
            self, page: Page, bounding_box: Rectangle) -> Rectangle:

        # store previous contents
        if "Contents" not in page:
            self._initialize_page_content_stream(page)
        previous_decoded_bytes = page["Contents"]["DecodedBytes"]

        # layout without padding
        layout_rect = self._do_layout_without_padding(page, bounding_box)
        assert layout_rect is not None

        # restore
        content_stream = page["Contents"]
        content_stream[Name("DecodedBytes")] = previous_decoded_bytes
        content_stream[Name("Bytes")] = zlib.compress(
            content_stream["DecodedBytes"], 9)
        content_stream[Name("Length")] = pDecimal(len(content_stream["Bytes"]))

        # return
        return layout_rect

示例#25

0

显示文件

文件： write_pdf_transformer.py 项目： tieugene/ptext-release

    def transform(
        self,
        object_to_transform: Any,
        context: Optional[WriteTransformerContext] = None,
    ):
        """
        This method writes a Document object to a byte stream
        """
        # write header
        assert context is not None
        assert context.destination is not None

        context.destination.write(b"%PDF-1.7\n")
        context.destination.write(b"%")
        context.destination.write(bytes([226, 227, 207, 211]))
        context.destination.write(b"\n")

        # invalidate all references
        WritePDFTransformer._invalidate_all_references(object_to_transform)

        # create Info dictionary if needed
        if "Info" not in object_to_transform["XRef"]["Trailer"]:
            object_to_transform["XRef"]["Trailer"][Name("Info")] = Dictionary()

        # set /ID
        random_id = HexadecimalString("%032x" % random.randrange(16**32))
        if "ID" not in object_to_transform["XRef"]["Trailer"]:
            object_to_transform["XRef"]["Trailer"][Name("ID")] = List(
            ).set_can_be_referenced(  # type: ignore [attr-defined]
                False)
            object_to_transform["XRef"]["Trailer"]["ID"].append(random_id)
            object_to_transform["XRef"]["Trailer"]["ID"].append(random_id)
        else:
            object_to_transform["XRef"]["Trailer"]["ID"][1] = random_id
        object_to_transform["XRef"]["Trailer"]["ID"].set_can_be_referenced(
            False)

        # set CreationDate
        modification_date = WritePDFTransformer._timestamp_to_str()
        if "CreationDate" not in object_to_transform["XRef"]["Trailer"][Name(
                "Info")]:
            object_to_transform["XRef"]["Trailer"][Name("Info")][Name(
                "CreationDate")] = String(modification_date)

        # set ModDate
        object_to_transform["XRef"]["Trailer"]["Info"][Name(
            "ModDate")] = String(modification_date)

        # set Producer
        object_to_transform["XRef"]["Trailer"]["Info"][Name(
            "Producer")] = String("pText")

        # transform XREF
        self.get_root_transformer().transform(object_to_transform["XRef"],
                                              context)

示例#26

0

显示文件

文件： layout_element.py 项目： lzg440/ptext-release

    def _initialize_page_content_stream(self, page: Page):
        if "Contents" in page:
            return

        # build content stream object
        content_stream = Stream()
        content_stream[Name("DecodedBytes")] = b""
        content_stream[Name("Bytes")] = zlib.compress(content_stream["DecodedBytes"], 9)
        content_stream[Name("Filter")] = Name("FlateDecode")
        content_stream[Name("Length")] = pDecimal(len(content_stream["Bytes"]))

        # set content of page
        page[Name("Contents")] = content_stream

示例#27

0

显示文件

    def transform(
        self,
        object_to_transform: AnyPDFType,
        context: Optional[WriteTransformerContext] = None,
    ):
        assert isinstance(object_to_transform, Dictionary)
        assert context is not None
        assert context.root_object is not None

        assert isinstance(context.root_object, Document)
        pages_dict = context.root_object["XRef"]["Trailer"]["Root"]["Pages"]

        # add \Parent reference to \Pages
        object_to_transform[Name("Parent")] = self.get_reference(
            pages_dict, context)

        # delegate to super
        super(WritePageTransformer, self).transform(object_to_transform,
                                                    context)

示例#28

0

显示文件

文件： font_type_0.py 项目： lzg440/ptext-release

    def __deepcopy__(self, memodict={}):
        copy_out = FontType0()
        copy_out[Name("Type")] = self["Type"]
        copy_out[Name("Subtype")] = self["Subtype"]
        copy_out[Name("BaseFont")] = self["BaseFont"]
        copy_out[Name("Encoding")] = copy.deepcopy(self["Encoding"], memodict)
        copy_out[Name("DescendantFonts")] = copy.deepcopy(
            self["DescendantFonts"], memodict)
        if "ToUnicode" in self:
            copy_out[Name("ToUnicode")] = copy.deepcopy(
                self.get("ToUnicode"), memodict)

        # return
        return copy_out

示例#29

0

显示文件

文件： write_xmp_transformer.py 项目： lzg440/ptext-release

    def transform(
        self,
        object_to_transform: AnyPDFType,
        context: Optional[WriteTransformerContext] = None,
    ):
        """
        This method writes an ET.Element (representing XMP meta information) to a byte stream
        """
        assert isinstance(object_to_transform, ET.Element)
        assert context is not None
        assert context.destination is not None
        assert context.destination

        # build stream
        out_value = Stream()
        out_value[Name("Type")] = Name("Metadata")
        out_value[Name("Subtype")] = Name("XML")

        bts = ET.tostring(object_to_transform)
        out_value[Name("DecodedBytes")] = bts
        out_value[Name("Bytes")] = bts
        out_value[Name("Length")] = pDecimal(len(bts))

        # copy reference
        out_value.set_reference(
            object_to_transform.get_reference())  # type: ignore [attr-defined]

        # start object if needed
        started_object = False
        ref = out_value.get_reference()  # type: ignore [attr-defined]
        if ref is not None:
            assert isinstance(ref, Reference)
            if ref.object_number is not None and ref.byte_offset is None:
                started_object = True
                self.start_object(out_value, context)

        # pass stream along to other transformer
        self.get_root_transformer().transform(out_value, context)

        # end object if needed
        if started_object:
            self.end_object(out_value, context)

示例#30

0

显示文件

文件： font_type_1.py 项目： tieugene/ptext-release

    def __init__(self, font_name: typing.Optional[str] = None):
        super(StandardType1Font, self).__init__()
        if font_name is not None:

            font_name = StandardType1Font._canonical_name(font_name)
            assert font_name is not None

            # assert whether AFM directory exists
            afm_directory: Path = Path(__file__).parent / "afm"
            assert afm_directory.exists()

            # assert whether AFM file exists
            afm_file: Path = afm_directory / (font_name.lower() + ".afm")
            assert afm_file.exists()

            # build AFM datastructure
            self._afm: AFM = AFM(afm_file)

            self[Name("Type")] = Name("Font")
            self[Name("Subtype")] = Name("Type1")
            self[Name("BaseFont")] = Name(self._afm._attrs["FontName"])

            self._character_identifier_to_unicode_lookup: typing.Dict[int, str] = {}
            self._unicode_lookup_to_character_identifier: typing.Dict[str, int] = {}

            if font_name == "Symbol":
                self._character_identifier_to_unicode_lookup  = {c:symbol_decode([c]) for c in range(0, 256)}
                self._unicode_lookup_to_character_identifier = {v:k for k,v in self._character_identifier_to_unicode_lookup.items()}

            elif font_name == "ZapfDingbats":
                self._character_identifier_to_unicode_lookup = {c:zapfdingbats_decode([c]) for c in range(0, 256)}
                self._unicode_lookup_to_character_identifier = {v:k for k,v in self._character_identifier_to_unicode_lookup.items()}

            else:
                for c in range(0, 256):
                    try:
                        self._character_identifier_to_unicode_lookup[c] = bytes([c]).decode("cp1252")
                    except:
                        self._character_identifier_to_unicode_lookup[c] = ""
                self._unicode_lookup_to_character_identifier = {v:k for k,v in self._character_identifier_to_unicode_lookup.items()}