Python FileUnicodeMap示例，pypdf.cmapdb.FileUnicodeMap Python示例

示例#1

0

显示文件

文件： pdffont.py 项目： harshavardhana/bungee-plugins

class PDFSimpleFont(PDFFont):
    def __init__(self, descriptor, widths, spec):
        # Font encoding is specified either by a name of
        # built-in encoding or a dictionary that describes
        # the differences.
        if "Encoding" in spec:
            encoding = resolve1(spec["Encoding"])
        else:
            encoding = LITERAL_STANDARD_ENCODING
        if isinstance(encoding, dict):
            name = literal_name(encoding.get("BaseEncoding", LITERAL_STANDARD_ENCODING))
            diff = list_value(encoding.get("Differences", None))
            self.cid2unicode = EncodingDB.get_encoding(name, diff)
        else:
            self.cid2unicode = EncodingDB.get_encoding(literal_name(encoding))
        self.unicode_map = None
        if "ToUnicode" in spec:
            strm = stream_value(spec["ToUnicode"])
            self.unicode_map = FileUnicodeMap()
            CMapParser(self.unicode_map, StringIO(strm.get_data())).run()
        PDFFont.__init__(self, descriptor, widths)
        return

    def to_unichr(self, cid):
        if self.unicode_map:
            try:
                return self.unicode_map.get_unichr(cid)
            except KeyError:
                pass
        try:
            return self.cid2unicode[cid]
        except KeyError:
            raise PDFUnicodeNotDefined(None, cid)

示例#2

0

显示文件

文件： pdffont.py 项目： harshavardhana/bungee-plugins

 def __init__(self, descriptor, widths, spec):
     # Font encoding is specified either by a name of
     # built-in encoding or a dictionary that describes
     # the differences.
     if "Encoding" in spec:
         encoding = resolve1(spec["Encoding"])
     else:
         encoding = LITERAL_STANDARD_ENCODING
     if isinstance(encoding, dict):
         name = literal_name(encoding.get("BaseEncoding", LITERAL_STANDARD_ENCODING))
         diff = list_value(encoding.get("Differences", None))
         self.cid2unicode = EncodingDB.get_encoding(name, diff)
     else:
         self.cid2unicode = EncodingDB.get_encoding(literal_name(encoding))
     self.unicode_map = None
     if "ToUnicode" in spec:
         strm = stream_value(spec["ToUnicode"])
         self.unicode_map = FileUnicodeMap()
         CMapParser(self.unicode_map, StringIO(strm.get_data())).run()
     PDFFont.__init__(self, descriptor, widths)
     return

示例#3

0

显示文件

文件： pdffont.py 项目： harshavardhana/bungee-plugins

    def __init__(self, rsrcmgr, spec):
        try:
            self.basefont = literal_name(spec["BaseFont"])
        except KeyError:
            if STRICT:
                raise PDFFontError("BaseFont is missing")
            self.basefont = "unknown"
        self.cidsysteminfo = dict_value(spec.get("CIDSystemInfo", {}))
        self.cidcoding = "%s-%s" % (
            self.cidsysteminfo.get("Registry", "unknown"),
            self.cidsysteminfo.get("Ordering", "unknown"),
        )
        try:
            name = literal_name(spec["Encoding"])
        except KeyError:
            if STRICT:
                raise PDFFontError("Encoding is unspecified")
            name = "unknown"
        try:
            self.cmap = CMapDB.get_cmap(name)
        except CMapDB.CMapNotFound as e:
            if STRICT:
                raise PDFFontError(e)
            self.cmap = CMap()
        try:
            descriptor = dict_value(spec["FontDescriptor"])
        except KeyError:
            if STRICT:
                raise PDFFontError("FontDescriptor is missing")
            descriptor = {}
        ttf = None
        if "FontFile2" in descriptor:
            self.fontfile = stream_value(descriptor.get("FontFile2"))
            ttf = TrueTypeFont(self.basefont, StringIO(self.fontfile.get_data()))
        self.unicode_map = None
        if "ToUnicode" in spec:
            strm = stream_value(spec["ToUnicode"])
            self.unicode_map = FileUnicodeMap()
            CMapParser(self.unicode_map, StringIO(strm.get_data())).run()
        elif self.cidcoding == "Adobe-Identity":
            if ttf:
                try:
                    self.unicode_map = ttf.create_unicode_map()
                except TrueTypeFont.CMapNotFound:
                    pass
        else:
            try:
                self.unicode_map = CMapDB.get_unicode_map(self.cidcoding, self.cmap.is_vertical())
            except CMapDB.CMapNotFound as e:
                pass

        self.vertical = self.cmap.is_vertical()
        if self.vertical:
            # writing mode: vertical
            widths = get_widths2(list_value(spec.get("W2", [])))
            self.disps = dict((cid, (vx, vy)) for (cid, (_, (vx, vy))) in list(widths.items()))
            (vy, w) = spec.get("DW2", [880, -1000])
            self.default_disp = (None, vy)
            widths = dict((cid, w) for (cid, (w, _)) in list(widths.items()))
            default_width = w
        else:
            # writing mode: horizontal
            self.disps = {}
            self.default_disp = 0
            widths = get_widths(list_value(spec.get("W", [])))
            default_width = spec.get("DW", 1000)
        PDFFont.__init__(self, descriptor, widths, default_width=default_width)
        return

示例#4

0

显示文件

文件： pdffont.py 项目： harshavardhana/bungee-plugins

 def create_unicode_map(self):
     if "cmap" not in self.tables:
         raise TrueTypeFont.CMapNotFound
     (base_offset, length) = self.tables["cmap"]
     fp = self.fp
     fp.seek(base_offset)
     (version, nsubtables) = struct.unpack(">HH", fp.read(4))
     subtables = []
     for i in range(nsubtables):
         subtables.append(struct.unpack(">HHL", fp.read(8)))
     char2gid = {}
     # Only supports subtable type 0, 2 and 4.
     for (_1, _2, st_offset) in subtables:
         fp.seek(base_offset + st_offset)
         (fmttype, fmtlen, fmtlang) = struct.unpack(">HHH", fp.read(6))
         if fmttype == 0:
             char2gid.update(enumerate(struct.unpack(">256B", fp.read(256))))
         elif fmttype == 2:
             subheaderkeys = struct.unpack(">256H", fp.read(512))
             firstbytes = [0] * 8192
             for (i, k) in enumerate(subheaderkeys):
                 firstbytes[k / 8] = i
             nhdrs = max(subheaderkeys) / 8 + 1
             hdrs = []
             for i in range(nhdrs):
                 (firstcode, entcount, delta, offset) = struct.unpack(">HHhH", fp.read(8))
                 hdrs.append((i, firstcode, entcount, delta, fp.tell() - 2 + offset))
             for (i, firstcode, entcount, delta, pos) in hdrs:
                 if not entcount:
                     continue
                 first = firstcode + (firstbytes[i] << 8)
                 fp.seek(pos)
                 for c in range(entcount):
                     gid = struct.unpack(">H", fp.read(2))
                     if gid:
                         gid += delta
                     char2gid[first + c] = gid
         elif fmttype == 4:
             (segcount, _1, _2, _3) = struct.unpack(">HHHH", fp.read(8))
             segcount /= 2
             ecs = struct.unpack(">%dH" % segcount, fp.read(2 * segcount))
             fp.read(2)
             scs = struct.unpack(">%dH" % segcount, fp.read(2 * segcount))
             idds = struct.unpack(">%dh" % segcount, fp.read(2 * segcount))
             pos = fp.tell()
             idrs = struct.unpack(">%dH" % segcount, fp.read(2 * segcount))
             for (ec, sc, idd, idr) in zip(ecs, scs, idds, idrs):
                 if idr:
                     fp.seek(pos + idr)
                     for c in range(sc, ec + 1):
                         char2gid[c] = (struct.unpack(">H", fp.read(2))[0] + idd) & 0xFFFF
                 else:
                     for c in range(sc, ec + 1):
                         char2gid[c] = (c + idd) & 0xFFFF
         else:
             assert 0
     # create unicode map
     unicode_map = FileUnicodeMap()
     for (char, gid) in list(char2gid.items()):
         unicode_map.add_cid2unichr(gid, char)
     return unicode_map