Python FileUnicodeMap示例，cmapdb.FileUnicodeMap Python示例

示例#1

0

显示文件

 def create_unicode_map(self):
     if 'cmap' not in self.tables:
         raise TrueTypeFont.CMapNotFound
     (base_offset, length) = self.tables['cmap']
     fp = self.fp
     fp.seek(base_offset)
     (version, nsubtables) = struct.unpack('>HH', fp.read(4))
     subtables = []
     for i in xrange(nsubtables):
         subtables.append(struct.unpack('>HHL', fp.read(8)))
     char2gid = {}
     # Only supports subtable type 0, 2 and 4.
     for (_1, _2, st_offset) in subtables:
         fp.seek(base_offset+st_offset)
         (fmttype, fmtlen, fmtlang) = struct.unpack('>HHH', fp.read(6))
         if fmttype == 0:
             char2gid.update(enumerate(struct.unpack('>256B', fp.read(256))))
         elif fmttype == 2:
             subheaderkeys = struct.unpack('>256H', fp.read(512))
             firstbytes = [0]*8192
             for (i,k) in enumerate(subheaderkeys):
                 firstbytes[k/8] = i
             nhdrs = max(subheaderkeys)/8 + 1
             hdrs = []
             for i in xrange(nhdrs):
                 (firstcode,entcount,delta,offset) = struct.unpack('>HHhH', fp.read(8))
                 hdrs.append((i,firstcode,entcount,delta,fp.tell()-2+offset))
             for (i,firstcode,entcount,delta,pos) in hdrs:
                 if not entcount: continue
                 first = firstcode + (firstbytes[i] << 8)
                 fp.seek(pos)
                 for c in xrange(entcount):
                     gid = struct.unpack('>H', fp.read(2))
                     if gid:
                         gid += delta
                     char2gid[first+c] = gid
         elif fmttype == 4:
             (segcount, _1, _2, _3) = struct.unpack('>HHHH', fp.read(8))
             segcount /= 2
             ecs = struct.unpack('>%dH' % segcount, fp.read(2*segcount))
             fp.read(2)
             scs = struct.unpack('>%dH' % segcount, fp.read(2*segcount))
             idds = struct.unpack('>%dh' % segcount, fp.read(2*segcount))
             pos = fp.tell()
             idrs = struct.unpack('>%dH' % segcount, fp.read(2*segcount))
             for (ec,sc,idd,idr) in zip(ecs, scs, idds, idrs):
                 if idr:
                     fp.seek(pos+idr)
                     for c in xrange(sc, ec+1):
                         char2gid[c] = (struct.unpack('>H', fp.read(2))[0] + idd) & 0xffff
                 else:
                     for c in xrange(sc, ec+1):
                         char2gid[c] = (c + idd) & 0xffff
         else:
             assert 0
     # create unicode map
     unicode_map = FileUnicodeMap()
     for (char,gid) in char2gid.iteritems():
         unicode_map.add_cid2unichr(gid, char)
     return unicode_map

示例#2

0

显示文件

class PDFSimpleFont(PDFFont):

    def __init__(self, descriptor, widths, spec):
        # Font encoding is specified either by a name of
        # built-in encoding or a dictionary that describes
        # the differences.
        if 'Encoding' in spec:
            encoding = resolve1(spec['Encoding'])
        else:
            encoding = LITERAL_STANDARD_ENCODING
        if isinstance(encoding, dict):
            name = literal_name(encoding.get('BaseEncoding', LITERAL_STANDARD_ENCODING))
            diff = list_value(encoding.get('Differences', None))
            self.cid2unicode = EncodingDB.get_encoding(name, diff)
        else:
            self.cid2unicode = EncodingDB.get_encoding(literal_name(encoding))
        self.unicode_map = None
        if 'ToUnicode' in spec:
            strm = stream_value(spec['ToUnicode'])
            self.unicode_map = FileUnicodeMap()
            CMapParser(self.unicode_map, StringIO(strm.get_data())).run()
        PDFFont.__init__(self, descriptor, widths)
        return

    def to_unichr(self, cid):
        if self.unicode_map:
            try:
                return self.unicode_map.get_unichr(cid)
            except KeyError:
                pass
        try:
            return self.cid2unicode[cid]
        except KeyError:
            raise PDFUnicodeNotDefined(None, cid)

示例#3

0

显示文件

 def __init__(self, descriptor, widths, spec):
     # Font encoding is specified either by a name of
     # built-in encoding or a dictionary that describes
     # the differences.
     if 'Encoding' in spec:
         encoding = resolve1(spec['Encoding'])
     else:
         encoding = LITERAL_STANDARD_ENCODING
     if isinstance(encoding, dict):
         name = literal_name(encoding.get('BaseEncoding', LITERAL_STANDARD_ENCODING))
         diff = list_value(encoding.get('Differences', None))
         self.cid2unicode = EncodingDB.get_encoding(name, diff)
     else:
         self.cid2unicode = EncodingDB.get_encoding(literal_name(encoding))
     self.unicode_map = None
     if 'ToUnicode' in spec:
         strm = stream_value(spec['ToUnicode'])
         self.unicode_map = FileUnicodeMap()
         CMapParser(self.unicode_map, StringIO(strm.get_data())).run()
     PDFFont.__init__(self, descriptor, widths)
     return

示例#4

0

显示文件

文件： pdffont.py 项目： 171230839/pdfminer

class PDFCIDFont(PDFFont):
    def __init__(self, rsrcmgr, spec):
        try:
            self.basefont = literal_name(spec['BaseFont'])
        except KeyError:
            if STRICT:
                raise PDFFontError('BaseFont is missing')
            self.basefont = 'unknown'
        self.cidsysteminfo = dict_value(spec.get('CIDSystemInfo', {}))
        self.cidcoding = '%s-%s' % (self.cidsysteminfo.get(
            'Registry',
            'unknown'), self.cidsysteminfo.get('Ordering', 'unknown'))
        try:
            name = literal_name(spec['Encoding'])
        except KeyError:
            if STRICT:
                raise PDFFontError('Encoding is unspecified')
            name = 'unknown'
        try:
            self.cmap = CMapDB.get_cmap(name)
        except CMapDB.CMapNotFound, e:
            if STRICT:
                raise PDFFontError(e)
            self.cmap = CMap()
        try:
            descriptor = dict_value(spec['FontDescriptor'])
        except KeyError:
            if STRICT:
                raise PDFFontError('FontDescriptor is missing')
            descriptor = {}
        ttf = None
        if 'FontFile2' in descriptor:
            self.fontfile = stream_value(descriptor.get('FontFile2'))
            ttf = TrueTypeFont(self.basefont,
                               StringIO(self.fontfile.get_data()))
        self.unicode_map = None
        if 'ToUnicode' in spec:
            strm = stream_value(spec['ToUnicode'])
            self.unicode_map = FileUnicodeMap()
            CMapParser(self.unicode_map, StringIO(strm.get_data())).run()
        elif self.cidcoding in ('Adobe-Identity', 'Adobe-UCS'):
            if ttf:
                try:
                    self.unicode_map = ttf.create_unicode_map()
                except TrueTypeFont.CMapNotFound:
                    pass
        else:
            try:
                self.unicode_map = CMapDB.get_unicode_map(
                    self.cidcoding, self.cmap.is_vertical())
            except CMapDB.CMapNotFound, e:
                pass

示例#5

0

显示文件

文件： pdffont.py 项目： mcs07/pdfminer

 def __init__(self, descriptor, widths, spec):
     # Font encoding is specified either by a name of
     # built-in encoding or a dictionary that describes
     # the differences.
     if 'Encoding' in spec:
         encoding = resolve1(spec['Encoding'])
     else:
         encoding = LITERAL_STANDARD_ENCODING
     if isinstance(encoding, dict):
         name = literal_name(encoding.get('BaseEncoding', LITERAL_STANDARD_ENCODING))
         diff = list_value(encoding.get('Differences', None))
         self.cid2unicode = EncodingDB.get_encoding(name, diff)
     else:
         self.cid2unicode = EncodingDB.get_encoding(literal_name(encoding))
     self.unicode_map = None
     if 'ToUnicode' in spec:
         strm = stream_value(spec['ToUnicode'])
         self.unicode_map = FileUnicodeMap()
         CMapParser(self.unicode_map, StringIO(strm.get_data())).run()
     PDFFont.__init__(self, descriptor, widths)

示例#6

0

显示文件

文件： pdffont.py 项目： AbhiAgarwal/pattern

    def __init__(self, rsrcmgr, spec):
        try:
            self.basefont = literal_name(spec['BaseFont'])
        except KeyError:
            if STRICT:
                raise PDFFontError('BaseFont is missing')
            self.basefont = 'unknown'
        self.cidsysteminfo = dict_value(spec.get('CIDSystemInfo', {}))
        self.cidcoding = '%s-%s' % (self.cidsysteminfo.get('Registry', 'unknown'),
                                    self.cidsysteminfo.get('Ordering', 'unknown'))
        try:
            name = literal_name(spec['Encoding'])
        except KeyError:
            if STRICT:
                raise PDFFontError('Encoding is unspecified')
            name = 'unknown'
        try:
            self.cmap = CMapDB.get_cmap(name)
        except CMapDB.CMapNotFound as e:
            if STRICT:
                raise PDFFontError(e)
            self.cmap = CMap()
        try:
            descriptor = dict_value(spec['FontDescriptor'])
        except KeyError:
            if STRICT:
                raise PDFFontError('FontDescriptor is missing')
            descriptor = {}
        ttf = None
        if 'FontFile2' in descriptor:
            self.fontfile = stream_value(descriptor.get('FontFile2'))
            ttf = TrueTypeFont(self.basefont,
                               StringIO(self.fontfile.get_data()))
        self.unicode_map = None
        if 'ToUnicode' in spec:
            strm = stream_value(spec['ToUnicode'])
            self.unicode_map = FileUnicodeMap()
            CMapParser(self.unicode_map, StringIO(strm.get_data())).run()
        elif self.cidcoding == 'Adobe-Identity':
            if ttf:
                try:
                    self.unicode_map = ttf.create_unicode_map()
                except TrueTypeFont.CMapNotFound:
                    pass
        else:
            try:
                self.unicode_map = CMapDB.get_unicode_map(self.cidcoding, self.cmap.is_vertical())
            except CMapDB.CMapNotFound as e:
                pass

        self.vertical = self.cmap.is_vertical()
        if self.vertical:
            # writing mode: vertical
            widths = get_widths2(list_value(spec.get('W2', [])))
            self.disps = dict( (cid,(vx,vy)) for (cid,(_,(vx,vy))) in widths.iteritems() )
            (vy,w) = spec.get('DW2', [880, -1000])
            self.default_disp = (None,vy)
            widths = dict( (cid,w) for (cid,(w,_)) in widths.iteritems() )
            default_width = w
        else:
            # writing mode: horizontal
            self.disps = {}
            self.default_disp = 0
            widths = get_widths(list_value(spec.get('W', [])))
            default_width = spec.get('DW', 1000)
        PDFFont.__init__(self, descriptor, widths, default_width=default_width)
        return

示例#7

0

显示文件

    def __init__(self, rsrcmgr, spec):
        try:
            self.basefont = literal_name(spec['BaseFont'])
        except KeyError:
            if STRICT:
                raise PDFFontError('BaseFont is missing')
            self.basefont = 'unknown'
        self.cidsysteminfo = dict_value(spec.get('CIDSystemInfo', {}))
        self.cidcoding = '%s-%s' % (self.cidsysteminfo.get(
            'Registry',
            'unknown'), self.cidsysteminfo.get('Ordering', 'unknown'))
        try:
            name = literal_name(spec['Encoding'])
        except KeyError:
            if STRICT:
                raise PDFFontError('Encoding is unspecified')
            name = 'unknown'
        try:
            self.cmap = CMapDB.get_cmap(name)
        except CMapDB.CMapNotFound as e:
            if STRICT:
                raise PDFFontError(e)
            self.cmap = CMap()
        try:
            descriptor = dict_value(spec['FontDescriptor'])
        except KeyError:
            if STRICT:
                raise PDFFontError('FontDescriptor is missing')
            descriptor = {}
        ttf = None
        if 'FontFile2' in descriptor:
            self.fontfile = stream_value(descriptor.get('FontFile2'))
            ttf = TrueTypeFont(self.basefont,
                               StringIO(self.fontfile.get_data()))
        self.unicode_map = None
        if 'ToUnicode' in spec:
            strm = stream_value(spec['ToUnicode'])
            self.unicode_map = FileUnicodeMap()
            CMapParser(self.unicode_map, StringIO(strm.get_data())).run()
        elif self.cidcoding == 'Adobe-Identity':
            if ttf:
                try:
                    self.unicode_map = ttf.create_unicode_map()
                except TrueTypeFont.CMapNotFound:
                    pass
        else:
            try:
                self.unicode_map = CMapDB.get_unicode_map(
                    self.cidcoding, self.cmap.is_vertical())
            except CMapDB.CMapNotFound as e:
                pass

        self.vertical = self.cmap.is_vertical()
        if self.vertical:
            # writing mode: vertical
            widths = get_widths2(list_value(spec.get('W2', [])))
            self.disps = dict(
                (cid, (vx, vy)) for (cid, (_, (vx, vy))) in widths.iteritems())
            (vy, w) = spec.get('DW2', [880, -1000])
            self.default_disp = (None, vy)
            widths = dict((cid, w) for (cid, (w, _)) in widths.iteritems())
            default_width = w
        else:
            # writing mode: horizontal
            self.disps = {}
            self.default_disp = 0
            widths = get_widths(list_value(spec.get('W', [])))
            default_width = spec.get('DW', 1000)
        PDFFont.__init__(self, descriptor, widths, default_width=default_width)
        return

示例#8

0

显示文件

文件： pdffont.py 项目： kaoruAngel/pdfminer

    def __init__(self, rsrcmgr, spec):
        try:
            self.basefont = literal_name(spec["BaseFont"])
        except KeyError:
            if STRICT:
                raise PDFFontError("BaseFont is missing")
            self.basefont = "unknown"
        self.cidsysteminfo = dict_value(spec.get("CIDSystemInfo", {}))
        self.cidcoding = "%s-%s" % (
            self.cidsysteminfo.get("Registry", "unknown"),
            self.cidsysteminfo.get("Ordering", "unknown"),
        )
        try:
            name = literal_name(spec["Encoding"])
        except KeyError:
            if STRICT:
                raise PDFFontError("Encoding is unspecified")
            name = "unknown"
        try:
            self.cmap = CMapDB.get_cmap(name)
        except CMapDB.CMapNotFound as e:
            if STRICT:
                raise PDFFontError(e)
            self.cmap = CMap()
        try:
            descriptor = dict_value(spec["FontDescriptor"])
        except KeyError:
            if STRICT:
                raise PDFFontError("FontDescriptor is missing")
            descriptor = {}
        ttf = None
        if "FontFile2" in descriptor:
            self.fontfile = stream_value(descriptor.get("FontFile2"))
            ttf = TrueTypeFont(self.basefont, StringIO(self.fontfile.get_data()))
        self.unicode_map = None
        if "ToUnicode" in spec:
            strm = stream_value(spec["ToUnicode"])
            self.unicode_map = FileUnicodeMap()
            CMapParser(self.unicode_map, StringIO(strm.get_data())).run()
        elif self.cidcoding in ("Adobe-Identity", "Adobe-UCS"):
            if ttf:
                try:
                    self.unicode_map = ttf.create_unicode_map()
                except TrueTypeFont.CMapNotFound:
                    pass
        else:
            try:
                self.unicode_map = CMapDB.get_unicode_map(self.cidcoding, self.cmap.is_vertical())
            except CMapDB.CMapNotFound as e:
                pass

        self.vertical = self.cmap.is_vertical()
        if self.vertical:
            # writing mode: vertical
            widths = get_widths2(list_value(spec.get("W2", [])))
            self.disps = dict((cid, (vx, vy)) for (cid, (_, (vx, vy))) in widths.iteritems())
            (vy, w) = spec.get("DW2", [880, -1000])
            self.default_disp = (None, vy)
            widths = dict((cid, w) for (cid, (w, _)) in widths.iteritems())
            default_width = w
        else:
            # writing mode: horizontal
            self.disps = {}
            self.default_disp = 0
            widths = get_widths(list_value(spec.get("W", [])))
            default_width = spec.get("DW", 1000)
        PDFFont.__init__(self, descriptor, widths, default_width=default_width)
        return