def calcChecksum(data): if (strlen(data) % 4): data += str_repeat("\0", (4 - (len(data) % 4))) hi = 0x0000 lo = 0x0000 for i in range(0, len(data), 4): hi += (ord(data[i]) << 8) + ord(data[i + 1]) lo += (ord(data[i + 2]) << 8) + ord(data[i + 3]) hi += lo >> 16 lo = lo & 0xFFFF hi = hi & 0xFFFF return (hi, lo)
def calcChecksum(data): if (strlen(data) % 4): data += str_repeat("\0", (4-(len(data) % 4))) hi=0x0000 lo=0x0000 for i in range(0, len(data), 4): hi += (ord(data[i])<<8) + ord(data[i+1]) lo += (ord(data[i+2])<<8) + ord(data[i+3]) hi += lo >> 16 lo = lo & 0xFFFF hi = hi & 0xFFFF return (hi, lo)
def makeSubset(self, file, subset): self.filename = file self.fh = open(file, 'rb') self._pos = 0 self.charWidths = [] self.glyphPos = {} self.charToGlyph = {} self.tables = {} self.otables = {} self.ascent = 0 self.descent = 0 self.skip(4) self.maxUni = 0 self.readTableDirectory() #################/ # head - Font header table #################/ self.seek_table("head") self.skip(50) indexToLocFormat = self.read_ushort() glyphDataFormat = self.read_ushort() #################/ # hhea - Horizontal header table #################/ self.seek_table("hhea") self.skip(32) metricDataFormat = self.read_ushort() orignHmetrics = numberOfHMetrics = self.read_ushort() #################/ # maxp - Maximum profile table #################/ self.seek_table("maxp") self.skip(4) numGlyphs = self.read_ushort() #################/ # cmap - Character to glyph index mapping table #################/ cmap_offset = self.seek_table("cmap") self.skip(2) cmapTableCount = self.read_ushort() unicode_cmap_offset = 0 unicode_cmap_offset12 = 0 for i in range(cmapTableCount): platformID = self.read_ushort() encodingID = self.read_ushort() offset = self.read_ulong() save_pos = self._pos if platformID == 3 and encodingID == 10: # Microsoft, UCS-4 format = self.get_ushort(cmap_offset + offset) if (format == 12): if not unicode_cmap_offset12: unicode_cmap_offset12 = cmap_offset + offset break if ((platformID == 3 and encodingID == 1) or platformID == 0): # Microsoft, Unicode format = self.get_ushort(cmap_offset + offset) if (format == 4): unicode_cmap_offset = cmap_offset + offset break self.seek(save_pos) if not unicode_cmap_offset and not unicode_cmap_offset12: die('Font (' + self.filename + ') does not have cmap for Unicode (platform 3, encoding 1, format 4, or platform 3, encoding 10, format 12, or platform 0, any encoding, format 4)' ) glyphToChar = {} charToGlyph = {} if unicode_cmap_offset12: self.getCMAP12(unicode_cmap_offset12, glyphToChar, charToGlyph) else: self.getCMAP4(unicode_cmap_offset, glyphToChar, charToGlyph) self.charToGlyph = charToGlyph #################/ # hmtx - Horizontal metrics table #################/ scale = 1 # not used self.getHMTX(numberOfHMetrics, numGlyphs, glyphToChar, scale) #################/ # loca - Index to location #################/ self.getLOCA(indexToLocFormat, numGlyphs) subsetglyphs = [(0, 0)] # special "sorted dict"! subsetCharToGlyph = {} for code in subset: if (code in self.charToGlyph): if (self.charToGlyph[code], code) not in subsetglyphs: subsetglyphs.append((self.charToGlyph[code], code)) # Old Glyph ID => Unicode subsetCharToGlyph[code] = self.charToGlyph[ code] # Unicode to old GlyphID self.maxUni = max(self.maxUni, code) (start, dummy) = self.get_table_pos('glyf') subsetglyphs.sort() glyphSet = {} n = 0 fsLastCharIndex = 0 # maximum Unicode index (character code) in this font, according to the cmap subtable for platform ID 3 and platform- specific encoding ID 0 or 1. for originalGlyphIdx, uni in subsetglyphs: fsLastCharIndex = max(fsLastCharIndex, uni) glyphSet[originalGlyphIdx] = n # old glyphID to new glyphID n += 1 codeToGlyph = {} for uni, originalGlyphIdx in sorted(subsetCharToGlyph.items()): codeToGlyph[uni] = glyphSet[originalGlyphIdx] self.codeToGlyph = codeToGlyph for originalGlyphIdx, uni in subsetglyphs: nonlocals = { 'start': start, 'glyphSet': glyphSet, 'subsetglyphs': subsetglyphs } self.getGlyphs(originalGlyphIdx, nonlocals) numGlyphs = numberOfHMetrics = len(subsetglyphs) #tables copied from the original tags = ['name'] for tag in tags: self.add(tag, self.get_table(tag)) tags = ['cvt ', 'fpgm', 'prep', 'gasp'] for tag in tags: if (tag in self.tables): self.add(tag, self.get_table(tag)) # post - PostScript opost = self.get_table('post') post = "\x00\x03\x00\x00" + substr( opost, 4, 12 ) + "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" self.add('post', post) # Sort CID2GID map into segments of contiguous codes if 0 in codeToGlyph: del codeToGlyph[0] #unset(codeToGlyph[65535]) rangeid = 0 range_ = {} prevcid = -2 prevglidx = -1 # for each character for cid, glidx in sorted(codeToGlyph.items()): if (cid == (prevcid + 1) and glidx == (prevglidx + 1)): range_[rangeid].append(glidx) else: # new range rangeid = cid range_[rangeid] = [] range_[rangeid].append(glidx) prevcid = cid prevglidx = glidx # cmap - Character to glyph mapping - Format 4 (MS / ) segCount = len( range_) + 1 # + 1 Last segment has missing character 0xFFFF searchRange = 1 entrySelector = 0 while (searchRange * 2 <= segCount): searchRange = searchRange * 2 entrySelector = entrySelector + 1 searchRange = searchRange * 2 rangeShift = segCount * 2 - searchRange length = 16 + (8 * segCount) + (numGlyphs + 1) cmap = [ 0, 1, # Index : version, number of encoding subtables 3, 1, # Encoding Subtable : platform (MS=3), encoding (Unicode) 0, 12, # Encoding Subtable : offset (hi,lo) 4, length, 0, # Format 4 Mapping subtable: format, length, language segCount * 2, searchRange, entrySelector, rangeShift ] range_ = sorted(range_.items()) # endCode(s) for start, subrange in range_: endCode = start + (len(subrange) - 1) cmap.append(endCode) # endCode(s) cmap.append(0xFFFF) # endCode of last Segment cmap.append(0) # reservedPad # startCode(s) for start, subrange in range_: cmap.append(start) # startCode(s) cmap.append(0xFFFF) # startCode of last Segment # idDelta(s) for start, subrange in range_: idDelta = -(start - subrange[0]) n += count(subrange) cmap.append(idDelta) # idDelta(s) cmap.append(1) # idDelta of last Segment # idRangeOffset(s) for subrange in range_: cmap.append( 0 ) # idRangeOffset[segCount] Offset in bytes to glyph indexArray, or 0 cmap.append(0) # idRangeOffset of last Segment for subrange, glidx in range_: cmap.extend(glidx) cmap.append(0) # Mapping for last character cmapstr = '' for cm in cmap: if cm >= 0: cmapstr += pack(">H", cm) else: try: cmapstr += pack(">h", cm) except: warnings.warn("cmap value too big/small: %s" % cm) cmapstr += pack(">H", -cm) self.add('cmap', cmapstr) # glyf - Glyph data (glyfOffset, glyfLength) = self.get_table_pos('glyf') if (glyfLength < self.maxStrLenRead): glyphData = self.get_table('glyf') offsets = [] glyf = '' pos = 0 hmtxstr = '' xMinT = 0 yMinT = 0 xMaxT = 0 yMaxT = 0 advanceWidthMax = 0 minLeftSideBearing = 0 minRightSideBearing = 0 xMaxExtent = 0 maxPoints = 0 # points in non-compound glyph maxContours = 0 # contours in non-compound glyph maxComponentPoints = 0 # points in compound glyph maxComponentContours = 0 # contours in compound glyph maxComponentElements = 0 # number of glyphs referenced at top level maxComponentDepth = 0 # levels of recursion, set to 0 if font has only simple glyphs self.glyphdata = {} for originalGlyphIdx, uni in subsetglyphs: # hmtx - Horizontal Metrics hm = self.getHMetric(orignHmetrics, originalGlyphIdx) hmtxstr += hm offsets.append(pos) try: glyphPos = self.glyphPos[originalGlyphIdx] glyphLen = self.glyphPos[originalGlyphIdx + 1] - glyphPos except IndexError: warnings.warn("missing glyph %s" % (originalGlyphIdx)) glyphLen = 0 if (glyfLength < self.maxStrLenRead): data = substr(glyphData, glyphPos, glyphLen) else: if (glyphLen > 0): data = self.get_chunk(glyfOffset + glyphPos, glyphLen) else: data = '' if (glyphLen > 0): up = unpack(">H", substr(data, 0, 2))[0] if (glyphLen > 2 and (up & (1 << 15)) ): # If number of contours <= -1 i.e. composiste glyph pos_in_glyph = 10 flags = GF_MORE nComponentElements = 0 while (flags & GF_MORE): nComponentElements += 1 # number of glyphs referenced at top level up = unpack(">H", substr(data, pos_in_glyph, 2)) flags = up[0] up = unpack(">H", substr(data, pos_in_glyph + 2, 2)) glyphIdx = up[0] self.glyphdata.setdefault(originalGlyphIdx, {}).setdefault( 'compGlyphs', []).append(glyphIdx) try: data = self._set_ushort(data, pos_in_glyph + 2, glyphSet[glyphIdx]) except KeyError: data = 0 warnings.warn("missing glyph data %s" % glyphIdx) pos_in_glyph += 4 if (flags & GF_WORDS): pos_in_glyph += 4 else: pos_in_glyph += 2 if (flags & GF_SCALE): pos_in_glyph += 2 elif (flags & GF_XYSCALE): pos_in_glyph += 4 elif (flags & GF_TWOBYTWO): pos_in_glyph += 8 maxComponentElements = max(maxComponentElements, nComponentElements) glyf += data pos += glyphLen if (pos % 4 != 0): padding = 4 - (pos % 4) glyf += str_repeat("\0", padding) pos += padding offsets.append(pos) self.add('glyf', glyf) # hmtx - Horizontal Metrics self.add('hmtx', hmtxstr) # loca - Index to location locastr = '' if (((pos + 1) >> 1) > 0xFFFF): indexToLocFormat = 1 # long format for offset in offsets: locastr += pack(">L", offset) else: indexToLocFormat = 0 # short format for offset in offsets: locastr += pack(">H", (offset / 2)) self.add('loca', locastr) # head - Font header head = self.get_table('head') head = self._set_ushort(head, 50, indexToLocFormat) self.add('head', head) # hhea - Horizontal Header hhea = self.get_table('hhea') hhea = self._set_ushort(hhea, 34, numberOfHMetrics) self.add('hhea', hhea) # maxp - Maximum Profile maxp = self.get_table('maxp') maxp = self._set_ushort(maxp, 4, numGlyphs) self.add('maxp', maxp) # OS/2 - OS/2 os2 = self.get_table('OS/2') self.add('OS/2', os2) self.fh.close() # Put the TTF file together stm = self.endTTFile('') return stm
def makeSubset(self, file, subset): self.filename = file self.fh = open(file ,'rb') self._pos = 0 self.charWidths = [] self.glyphPos = {} self.charToGlyph = {} self.tables = {} self.otables = {} self.ascent = 0 self.descent = 0 self.skip(4) self.maxUni = 0 self.readTableDirectory() #################/ # head - Font header table #################/ self.seek_table("head") self.skip(50) indexToLocFormat = self.read_ushort() glyphDataFormat = self.read_ushort() #################/ # hhea - Horizontal header table #################/ self.seek_table("hhea") self.skip(32) metricDataFormat = self.read_ushort() orignHmetrics = numberOfHMetrics = self.read_ushort() #################/ # maxp - Maximum profile table #################/ self.seek_table("maxp") self.skip(4) numGlyphs = self.read_ushort() #################/ # cmap - Character to glyph index mapping table #################/ cmap_offset = self.seek_table("cmap") self.skip(2) cmapTableCount = self.read_ushort() unicode_cmap_offset = 0 unicode_cmap_offset12 = 0 for i in range(cmapTableCount): platformID = self.read_ushort() encodingID = self.read_ushort() offset = self.read_ulong() save_pos = self._pos if platformID == 3 and encodingID == 10: # Microsoft, UCS-4 format = self.get_ushort(cmap_offset + offset) if (format == 12): if not unicode_cmap_offset12: unicode_cmap_offset12 = cmap_offset + offset break if ((platformID == 3 and encodingID == 1) or platformID == 0): # Microsoft, Unicode format = self.get_ushort(cmap_offset + offset) if (format == 4): unicode_cmap_offset = cmap_offset + offset break self.seek(save_pos ) if not unicode_cmap_offset and not unicode_cmap_offset12: die('Font (' + self.filename + ') does not have cmap for Unicode (platform 3, encoding 1, format 4, or platform 3, encoding 10, format 12, or platform 0, any encoding, format 4)') glyphToChar = {} charToGlyph = {} if unicode_cmap_offset12: self.getCMAP12(unicode_cmap_offset12, glyphToChar, charToGlyph) else: self.getCMAP4(unicode_cmap_offset, glyphToChar, charToGlyph) self.charToGlyph = charToGlyph #################/ # hmtx - Horizontal metrics table #################/ scale = 1 # not used self.getHMTX(numberOfHMetrics, numGlyphs, glyphToChar, scale) #################/ # loca - Index to location #################/ self.getLOCA(indexToLocFormat, numGlyphs) subsetglyphs = [(0, 0)] # special "sorted dict"! subsetCharToGlyph = {} for code in subset: if (code in self.charToGlyph): if (self.charToGlyph[code], code) not in subsetglyphs: subsetglyphs.append((self.charToGlyph[code], code)) # Old Glyph ID => Unicode subsetCharToGlyph[code] = self.charToGlyph[code] # Unicode to old GlyphID self.maxUni = max(self.maxUni, code) (start,dummy) = self.get_table_pos('glyf') subsetglyphs.sort() glyphSet = {} n = 0 fsLastCharIndex = 0 # maximum Unicode index (character code) in this font, according to the cmap subtable for platform ID 3 and platform- specific encoding ID 0 or 1. for originalGlyphIdx, uni in subsetglyphs: fsLastCharIndex = max(fsLastCharIndex , uni) glyphSet[originalGlyphIdx] = n # old glyphID to new glyphID n += 1 codeToGlyph = {} for uni, originalGlyphIdx in sorted(subsetCharToGlyph.items()): codeToGlyph[uni] = glyphSet[originalGlyphIdx] self.codeToGlyph = codeToGlyph for originalGlyphIdx, uni in subsetglyphs: nonlocals = {'start': start, 'glyphSet': glyphSet, 'subsetglyphs': subsetglyphs} self.getGlyphs(originalGlyphIdx, nonlocals) numGlyphs = numberOfHMetrics = len(subsetglyphs) #tables copied from the original tags = ['name'] for tag in tags: self.add(tag, self.get_table(tag)) tags = ['cvt ', 'fpgm', 'prep', 'gasp'] for tag in tags: if (tag in self.tables): self.add(tag, self.get_table(tag)) # post - PostScript opost = self.get_table('post') post = "\x00\x03\x00\x00" + substr(opost,4,12) + "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" self.add('post', post) # Sort CID2GID map into segments of contiguous codes if 0 in codeToGlyph: del codeToGlyph[0] #unset(codeToGlyph[65535]) rangeid = 0 range_ = {} prevcid = -2 prevglidx = -1 # for each character for cid, glidx in sorted(codeToGlyph.items()): if (cid == (prevcid + 1) and glidx == (prevglidx + 1)): range_[rangeid].append(glidx) else: # new range rangeid = cid range_[rangeid] = [] range_[rangeid].append(glidx) prevcid = cid prevglidx = glidx # cmap - Character to glyph mapping - Format 4 (MS / ) segCount = len(range_) + 1 # + 1 Last segment has missing character 0xFFFF searchRange = 1 entrySelector = 0 while (searchRange * 2 <= segCount ): searchRange = searchRange * 2 entrySelector = entrySelector + 1 searchRange = searchRange * 2 rangeShift = segCount * 2 - searchRange length = 16 + (8*segCount ) + (numGlyphs+1) cmap = [0, 1, # Index : version, number of encoding subtables 3, 1, # Encoding Subtable : platform (MS=3), encoding (Unicode) 0, 12, # Encoding Subtable : offset (hi,lo) 4, length, 0, # Format 4 Mapping subtable: format, length, language segCount*2, searchRange, entrySelector, rangeShift] range_ = sorted(range_.items()) # endCode(s) for start, subrange in range_: endCode = start + (len(subrange)-1) cmap.append(endCode) # endCode(s) cmap.append(0xFFFF) # endCode of last Segment cmap.append(0) # reservedPad # startCode(s) for start, subrange in range_: cmap.append(start) # startCode(s) cmap.append(0xFFFF) # startCode of last Segment # idDelta(s) for start, subrange in range_: idDelta = -(start-subrange[0]) n += count(subrange) cmap.append(idDelta) # idDelta(s) cmap.append(1) # idDelta of last Segment # idRangeOffset(s) for subrange in range_: cmap.append(0) # idRangeOffset[segCount] Offset in bytes to glyph indexArray, or 0 cmap.append(0) # idRangeOffset of last Segment for subrange, glidx in range_: cmap.extend(glidx) cmap.append(0) # Mapping for last character cmapstr = '' for cm in cmap: if cm >= 0: cmapstr += pack(">H", cm) else: try: cmapstr += pack(">h", cm) except: warnings.warn("cmap value too big/small: %s" % cm) cmapstr += pack(">H", -cm) self.add('cmap', cmapstr) # glyf - Glyph data (glyfOffset,glyfLength) = self.get_table_pos('glyf') if (glyfLength < self.maxStrLenRead): glyphData = self.get_table('glyf') offsets = [] glyf = '' pos = 0 hmtxstr = '' xMinT = 0 yMinT = 0 xMaxT = 0 yMaxT = 0 advanceWidthMax = 0 minLeftSideBearing = 0 minRightSideBearing = 0 xMaxExtent = 0 maxPoints = 0 # points in non-compound glyph maxContours = 0 # contours in non-compound glyph maxComponentPoints = 0 # points in compound glyph maxComponentContours = 0 # contours in compound glyph maxComponentElements = 0 # number of glyphs referenced at top level maxComponentDepth = 0 # levels of recursion, set to 0 if font has only simple glyphs self.glyphdata = {} for originalGlyphIdx, uni in subsetglyphs: # hmtx - Horizontal Metrics hm = self.getHMetric(orignHmetrics, originalGlyphIdx) hmtxstr += hm offsets.append(pos) try: glyphPos = self.glyphPos[originalGlyphIdx] glyphLen = self.glyphPos[originalGlyphIdx + 1] - glyphPos except IndexError: warnings.warn("missing glyph %s" % (originalGlyphIdx)) glyphLen = 0 if (glyfLength < self.maxStrLenRead): data = substr(glyphData,glyphPos,glyphLen) else: if (glyphLen > 0): data = self.get_chunk(glyfOffset+glyphPos,glyphLen) else: data = '' if (glyphLen > 0): up = unpack(">H", substr(data,0,2))[0] if (glyphLen > 2 and (up & (1 << 15)) ): # If number of contours <= -1 i.e. composiste glyph pos_in_glyph = 10 flags = GF_MORE nComponentElements = 0 while (flags & GF_MORE): nComponentElements += 1 # number of glyphs referenced at top level up = unpack(">H", substr(data,pos_in_glyph,2)) flags = up[0] up = unpack(">H", substr(data,pos_in_glyph+2,2)) glyphIdx = up[0] self.glyphdata.setdefault(originalGlyphIdx, {}).setdefault('compGlyphs', []).append(glyphIdx) try: data = self._set_ushort(data, pos_in_glyph + 2, glyphSet[glyphIdx]) except KeyError: data = 0 warnings.warn("missing glyph data %s" % glyphIdx) pos_in_glyph += 4 if (flags & GF_WORDS): pos_in_glyph += 4 else: pos_in_glyph += 2 if (flags & GF_SCALE): pos_in_glyph += 2 elif (flags & GF_XYSCALE): pos_in_glyph += 4 elif (flags & GF_TWOBYTWO): pos_in_glyph += 8 maxComponentElements = max(maxComponentElements, nComponentElements) glyf += data pos += glyphLen if (pos % 4 != 0): padding = 4 - (pos % 4) glyf += str_repeat("\0",padding) pos += padding offsets.append(pos) self.add('glyf', glyf) # hmtx - Horizontal Metrics self.add('hmtx', hmtxstr) # loca - Index to location locastr = '' if (((pos + 1) >> 1) > 0xFFFF): indexToLocFormat = 1 # long format for offset in offsets: locastr += pack(">L",offset) else: indexToLocFormat = 0 # short format for offset in offsets: locastr += pack(">H",(offset/2)) self.add('loca', locastr) # head - Font header head = self.get_table('head') head = self._set_ushort(head, 50, indexToLocFormat) self.add('head', head) # hhea - Horizontal Header hhea = self.get_table('hhea') hhea = self._set_ushort(hhea, 34, numberOfHMetrics) self.add('hhea', hhea) # maxp - Maximum Profile maxp = self.get_table('maxp') maxp = self._set_ushort(maxp, 4, numGlyphs) self.add('maxp', maxp) # OS/2 - OS/2 os2 = self.get_table('OS/2') self.add('OS/2', os2 ) self.fh.close() # Put the TTF file together stm = self.endTTFile('') return stm