class PDFSimpleFont(PDFFont): def __init__(self, descriptor, widths, spec): # Font encoding is specified either by a name of # built-in encoding or a dictionary that describes # the differences. if "Encoding" in spec: encoding = resolve1(spec["Encoding"]) else: encoding = LITERAL_STANDARD_ENCODING if isinstance(encoding, dict): name = literal_name(encoding.get("BaseEncoding", LITERAL_STANDARD_ENCODING)) diff = list_value(encoding.get("Differences", None)) self.cid2unicode = EncodingDB.get_encoding(name, diff) else: self.cid2unicode = EncodingDB.get_encoding(literal_name(encoding)) self.unicode_map = None if "ToUnicode" in spec: strm = stream_value(spec["ToUnicode"]) self.unicode_map = FileUnicodeMap() CMapParser(self.unicode_map, StringIO(strm.get_data())).run() PDFFont.__init__(self, descriptor, widths) return def to_unichr(self, cid): if self.unicode_map: try: return self.unicode_map.get_unichr(cid) except KeyError: pass try: return self.cid2unicode[cid] except KeyError: raise PDFUnicodeNotDefined(None, cid)
def __init__(self, descriptor, widths, spec): # Font encoding is specified either by a name of # built-in encoding or a dictionary that describes # the differences. if "Encoding" in spec: encoding = resolve1(spec["Encoding"]) else: encoding = LITERAL_STANDARD_ENCODING if isinstance(encoding, dict): name = literal_name(encoding.get("BaseEncoding", LITERAL_STANDARD_ENCODING)) diff = list_value(encoding.get("Differences", None)) self.cid2unicode = EncodingDB.get_encoding(name, diff) else: self.cid2unicode = EncodingDB.get_encoding(literal_name(encoding)) self.unicode_map = None if "ToUnicode" in spec: strm = stream_value(spec["ToUnicode"]) self.unicode_map = FileUnicodeMap() CMapParser(self.unicode_map, StringIO(strm.get_data())).run() PDFFont.__init__(self, descriptor, widths) return
def __init__(self, rsrcmgr, spec): try: self.basefont = literal_name(spec["BaseFont"]) except KeyError: if STRICT: raise PDFFontError("BaseFont is missing") self.basefont = "unknown" self.cidsysteminfo = dict_value(spec.get("CIDSystemInfo", {})) self.cidcoding = "%s-%s" % ( self.cidsysteminfo.get("Registry", "unknown"), self.cidsysteminfo.get("Ordering", "unknown"), ) try: name = literal_name(spec["Encoding"]) except KeyError: if STRICT: raise PDFFontError("Encoding is unspecified") name = "unknown" try: self.cmap = CMapDB.get_cmap(name) except CMapDB.CMapNotFound as e: if STRICT: raise PDFFontError(e) self.cmap = CMap() try: descriptor = dict_value(spec["FontDescriptor"]) except KeyError: if STRICT: raise PDFFontError("FontDescriptor is missing") descriptor = {} ttf = None if "FontFile2" in descriptor: self.fontfile = stream_value(descriptor.get("FontFile2")) ttf = TrueTypeFont(self.basefont, StringIO(self.fontfile.get_data())) self.unicode_map = None if "ToUnicode" in spec: strm = stream_value(spec["ToUnicode"]) self.unicode_map = FileUnicodeMap() CMapParser(self.unicode_map, StringIO(strm.get_data())).run() elif self.cidcoding == "Adobe-Identity": if ttf: try: self.unicode_map = ttf.create_unicode_map() except TrueTypeFont.CMapNotFound: pass else: try: self.unicode_map = CMapDB.get_unicode_map(self.cidcoding, self.cmap.is_vertical()) except CMapDB.CMapNotFound as e: pass self.vertical = self.cmap.is_vertical() if self.vertical: # writing mode: vertical widths = get_widths2(list_value(spec.get("W2", []))) self.disps = dict((cid, (vx, vy)) for (cid, (_, (vx, vy))) in list(widths.items())) (vy, w) = spec.get("DW2", [880, -1000]) self.default_disp = (None, vy) widths = dict((cid, w) for (cid, (w, _)) in list(widths.items())) default_width = w else: # writing mode: horizontal self.disps = {} self.default_disp = 0 widths = get_widths(list_value(spec.get("W", []))) default_width = spec.get("DW", 1000) PDFFont.__init__(self, descriptor, widths, default_width=default_width) return
def create_unicode_map(self): if "cmap" not in self.tables: raise TrueTypeFont.CMapNotFound (base_offset, length) = self.tables["cmap"] fp = self.fp fp.seek(base_offset) (version, nsubtables) = struct.unpack(">HH", fp.read(4)) subtables = [] for i in range(nsubtables): subtables.append(struct.unpack(">HHL", fp.read(8))) char2gid = {} # Only supports subtable type 0, 2 and 4. for (_1, _2, st_offset) in subtables: fp.seek(base_offset + st_offset) (fmttype, fmtlen, fmtlang) = struct.unpack(">HHH", fp.read(6)) if fmttype == 0: char2gid.update(enumerate(struct.unpack(">256B", fp.read(256)))) elif fmttype == 2: subheaderkeys = struct.unpack(">256H", fp.read(512)) firstbytes = [0] * 8192 for (i, k) in enumerate(subheaderkeys): firstbytes[k / 8] = i nhdrs = max(subheaderkeys) / 8 + 1 hdrs = [] for i in range(nhdrs): (firstcode, entcount, delta, offset) = struct.unpack(">HHhH", fp.read(8)) hdrs.append((i, firstcode, entcount, delta, fp.tell() - 2 + offset)) for (i, firstcode, entcount, delta, pos) in hdrs: if not entcount: continue first = firstcode + (firstbytes[i] << 8) fp.seek(pos) for c in range(entcount): gid = struct.unpack(">H", fp.read(2)) if gid: gid += delta char2gid[first + c] = gid elif fmttype == 4: (segcount, _1, _2, _3) = struct.unpack(">HHHH", fp.read(8)) segcount /= 2 ecs = struct.unpack(">%dH" % segcount, fp.read(2 * segcount)) fp.read(2) scs = struct.unpack(">%dH" % segcount, fp.read(2 * segcount)) idds = struct.unpack(">%dh" % segcount, fp.read(2 * segcount)) pos = fp.tell() idrs = struct.unpack(">%dH" % segcount, fp.read(2 * segcount)) for (ec, sc, idd, idr) in zip(ecs, scs, idds, idrs): if idr: fp.seek(pos + idr) for c in range(sc, ec + 1): char2gid[c] = (struct.unpack(">H", fp.read(2))[0] + idd) & 0xFFFF else: for c in range(sc, ec + 1): char2gid[c] = (c + idd) & 0xFFFF else: assert 0 # create unicode map unicode_map = FileUnicodeMap() for (char, gid) in list(char2gid.items()): unicode_map.add_cid2unichr(gid, char) return unicode_map