def parseXML(xmlSnippet): """Parses a snippet of XML. Input can be either a single string (unicode or UTF-8 bytes), or a a sequence of strings. The result is in the same format that would be returned by XMLReader, but the parser imposes no constraints on the root element so it can be called on small snippets of TTX files. """ # To support snippets with multiple elements, we add a fake root. reader = TestXMLReader_() xml = b"<root>" if isinstance(xmlSnippet, bytes): xml += xmlSnippet elif isinstance(xmlSnippet, str): xml += tobytes(xmlSnippet, 'utf-8') elif isinstance(xmlSnippet, Iterable): xml += b"".join(tobytes(s, 'utf-8') for s in xmlSnippet) else: raise TypeError("expected string or sequence of strings; found %r" % type(xmlSnippet).__name__) xml += b"</root>" reader.parser.Parse(xml, 0) return reader.root[2]
def test_newlinestr(self): header = b'<?xml version="1.0" encoding="UTF-8"?>' for nls in (None, '\n', '\r\n', '\r', ''): writer = XMLWriter(BytesIO(), newlinestr=nls) writer.write("hello") writer.newline() writer.write("world") writer.newline() linesep = tobytes(os.linesep) if nls is None else tobytes(nls) self.assertEqual( header + linesep + b"hello" + linesep + b"world" + linesep, writer.file.getvalue())
def __init__(self, buf=b'', encoding="ascii"): # Force self.buf to be a byte string buf = tobytes(buf) self.buf = buf self.len = len(buf) self.pos = 0 self.closed = False self.encoding = encoding
def test_decompile_magic_length_last_extra(empty_font): indextable = empty_font['TSI0'] indextable.extra_indices[-1] = (0xFFFD, 0x8000, 0) content = "0" * (0x8000 + 1) data = tobytes(content) table = table_T_S_I__1() table.decompile(data, empty_font) assert table.extraPrograms['fpgm'] == content
def compile(self, ttFont): self.recordsCount = len(self.gmapRecords) self.fontNameLength = len(self.psFontName) self.recordsOffset = 4 * (((self.fontNameLength + 12) + 3) // 4) data = sstruct.pack(GMAPFormat, self) data = data + tobytes(self.psFontName) data = data + b"\0" * (self.recordsOffset - len(data)) for record in self.gmapRecords: data = data + record.compile(ttFont) return data
def test_from_svg_file(self): pen = RecordingPen() with NamedTemporaryFile(delete=False) as tmp: tmp.write(tobytes(SVG_DATA)) try: svg = SVGPath(tmp.name) svg.draw(pen) finally: os.remove(tmp.name) assert pen.value == EXPECTED_PEN_COMMANDS
def toBytes(self, errors='strict'): """ If self.string is a bytes object, return it; otherwise try encoding the Unicode string in self.string to bytes using the encoding of this entry as returned by self.getEncoding(); Note that self.getEncoding() returns 'ascii' if the encoding is unknown to the library. If the Unicode string cannot be encoded to bytes in the chosen encoding, the error is handled according to the errors parameter to this function, which is passed to the underlying encode() function; by default it throws a UnicodeEncodeError exception. """ return tobytes(self.string, encoding=self.getEncoding(), errors=errors)
def compile(self, ttFont): if not hasattr(self, "glyphPrograms"): self.glyphPrograms = {} self.extraPrograms = {} data = b'' indextable = ttFont[self.indextable] glyphNames = ttFont.getGlyphOrder() indices = [] for i in range(len(glyphNames)): if len(data) % 2: data = data + b"\015" # align on 2-byte boundaries, fill with return chars. Yum. name = glyphNames[i] if name in self.glyphPrograms: text = tobytes(self.glyphPrograms[name], encoding="utf-8") else: text = b"" textLength = len(text) if textLength >= 0x8000: textLength = 0x8000 indices.append((i, textLength, len(data))) data = data + text extra_indices = [] codes = sorted(self.extras.items()) for i in range(len(codes)): if len(data) % 2: data = data + b"\015" # align on 2-byte boundaries, fill with return chars. code, name = codes[i] if name in self.extraPrograms: text = tobytes(self.extraPrograms[name], encoding="utf-8") else: text = b"" textLength = len(text) if textLength >= 0x8000: textLength = 0x8000 extra_indices.append((code, textLength, len(data))) data = data + text indextable.set(indices, extra_indices) return data
def test_decompile_offset_past_end(empty_font): empty_font.glyphOrder = ['foo', 'bar'] content = 'baz' data = tobytes(content) empty_font['TSI0'].indices = [(0, len(data), 0), (1, 1, len(data) + 1)] table = table_T_S_I__1() with CapturingLogHandler(table.log, "WARNING") as captor: table.decompile(data, empty_font) # the 'bar' program is skipped because its offset > len(data) assert table.glyphPrograms == {'foo': 'baz'} assert any("textOffset > totalLength" in r.msg for r in captor.records)
def compile(self, ttFont): d = self.__dict__.copy() d["nameLength"] = bytechr(len(self.baseGlyphName)) d["uniqueName"] = self.compilecompileUniqueName(self.uniqueName, 28) METAMD5List = eval(self.METAMD5) d["METAMD5"] = b"" for val in METAMD5List: d["METAMD5"] += bytechr(val) assert (len(d["METAMD5"]) == 16 ), "Failed to pack 16 byte MD5 hash in SING table" data = sstruct.pack(SINGFormat, d) data = data + tobytes(self.baseGlyphName) return data
def compile(self, ttFont): dataList = [ struct.pack(">LLL", self.version, self.flags, len(self.tags)) ] stringPool = "" for tag in self.tags: offset = stringPool.find(tag) if offset < 0: offset = len(stringPool) stringPool = stringPool + tag offset = offset + 12 + len(self.tags) * 4 dataList.append(struct.pack(">HH", offset, len(tag))) dataList.append(tobytes(stringPool)) return bytesjoin(dataList)
def compile(self, ttFont): version = 0 offsetToSVGDocIndex = SVG_format_0Size # I start the SVGDocIndex right after the header. # get SGVDoc info. docList = [] entryList = [] numEntries = len(self.docList) datum = struct.pack(">H", numEntries) entryList.append(datum) curOffset = len(datum) + doc_index_entry_format_0Size * numEntries seenDocs = {} allCompressed = getattr(self, "compressed", False) for i, doc in enumerate(self.docList): if isinstance(doc, (list, tuple)): doc = SVGDocument(*doc) self.docList[i] = doc docBytes = tobytes(doc.data, encoding="utf_8") if (allCompressed or doc.compressed) and not docBytes.startswith(b"\x1f\x8b"): import gzip bytesIO = BytesIO() # mtime=0 strips the useless timestamp and makes gzip output reproducible; # equivalent to `gzip -n` with gzip.GzipFile(None, "w", fileobj=bytesIO, mtime=0) as gzipper: gzipper.write(docBytes) gzipped = bytesIO.getvalue() if len(gzipped) < len(docBytes): docBytes = gzipped del gzipped, bytesIO docLength = len(docBytes) if docBytes in seenDocs: docOffset = seenDocs[docBytes] else: docOffset = curOffset curOffset += docLength seenDocs[docBytes] = docOffset docList.append(docBytes) entry = struct.pack(">HHLL", doc.startGlyphID, doc.endGlyphID, docOffset, docLength) entryList.append(entry) entryList.extend(docList) svgDocData = bytesjoin(entryList) reserved = 0 header = struct.pack(">HLL", version, offsetToSVGDocIndex, reserved) data = [header, svgDocData] data = bytesjoin(data) return data
def pack(fmt, obj): formatstring, names, fixes = getformat(fmt, keep_pad_byte=True) elements = [] if not isinstance(obj, dict): obj = obj.__dict__ for name in names: value = obj[name] if name in fixes: # fixed point conversion value = fl2fi(value, fixes[name]) elif isinstance(value, str): value = tobytes(value) elements.append(value) data = struct.pack(*(formatstring, ) + tuple(elements)) return data
def _makeMacName(name, nameID, language, font=None): """Create a NameRecord for Apple platforms 'language' is an arbitrary IETF BCP 47 language identifier such as 'en', 'de-CH', 'de-AT-1901', or 'fa-Latn'. When possible, we create a Macintosh NameRecord that is understood by old applications (platform ID 1 and an old-style Macintosh language enum). If this is not possible, we create a Unicode NameRecord (platform ID 0) whose language points to the font’s 'ltag' table. The latter can encode any string in any language, but legacy applications might not recognize the format (in which case they will ignore those names). 'font' should be the TTFont for which you want to create a name. If 'font' is None, we only return NameRecords for legacy Macintosh; in that case, the result will be None for names that need to be encoded with an 'ltag' table. See the section “The language identifier” in Apple’s specification: https://developer.apple.com/fonts/TrueType-Reference-Manual/RM06/Chap6name.html """ macLang = _MAC_LANGUAGE_CODES.get(language.lower()) macScript = _MAC_LANGUAGE_TO_SCRIPT.get(macLang) if macLang is not None and macScript is not None: encoding = getEncoding(1, macScript, macLang, default="ascii") # Check if we can actually encode this name. If we can't, # for example because we have no support for the legacy # encoding, or because the name string contains Unicode # characters that the legacy encoding cannot represent, # we fall back to encoding the name in Unicode and put # the language tag into the ltag table. try: _ = tobytes(name, encoding, errors="strict") return makeName(name, nameID, 1, macScript, macLang) except UnicodeEncodeError: pass if font is not None: ltag = font.tables.get("ltag") if ltag is None: ltag = font["ltag"] = newTable("ltag") # 0 = Unicode; 4 = “Unicode 2.0 or later semantics (non-BMP characters allowed)” # “The preferred platform-specific code for Unicode would be 3 or 4.” # https://developer.apple.com/fonts/TrueType-Reference-Manual/RM06/Chap6name.html return makeName(name, nameID, 0, 4, ltag.addTag(language)) else: log.warning("cannot store language %s into 'ltag' table " "without having access to the TTFont object" % language) return None
def test_include_absolute_path(self): with tempfile.NamedTemporaryFile(delete=False) as included: included.write( tobytes(""" feature kern { pos A B -40; } kern; """, encoding="utf-8")) including = StringIO("include(%s);" % included.name) try: lexer = IncludingLexer(including) files = set(loc.file for _, _, loc in lexer) self.assertIn(included.name, files) finally: os.remove(included.name)
def compile(self, ttFont): version = 0 offsetToSVGDocIndex = SVG_format_0Size # I start the SVGDocIndex right after the header. # get SGVDoc info. docList = [] entryList = [] numEntries = len(self.docList) datum = struct.pack(">H", numEntries) entryList.append(datum) curOffset = len(datum) + doc_index_entry_format_0Size * numEntries seenDocs = {} for doc, startGlyphID, endGlyphID in self.docList: docBytes = tobytes(doc, encoding="utf_8") if getattr(self, "compressed", False) and not docBytes.startswith(b"\x1f\x8b"): import gzip bytesIO = BytesIO() with gzip.GzipFile(None, "w", fileobj=bytesIO) as gzipper: gzipper.write(docBytes) gzipped = bytesIO.getvalue() if len(gzipped) < len(docBytes): docBytes = gzipped del gzipped, bytesIO docLength = len(docBytes) if docBytes in seenDocs: docOffset = seenDocs[docBytes] else: docOffset = curOffset curOffset += docLength seenDocs[docBytes] = docOffset docList.append(docBytes) entry = struct.pack(">HHLL", startGlyphID, endGlyphID, docOffset, docLength) entryList.append(entry) entryList.extend(docList) svgDocData = bytesjoin(entryList) reserved = 0 header = struct.pack(">HLL", version, offsetToSVGDocIndex, reserved) data = [header, svgDocData] data = bytesjoin(data) return data
def test_decompile_magic_length_last_glyph(empty_font): empty_font.glyphOrder = ['foo', 'bar'] indextable = empty_font['TSI0'] indextable.indices = [(0, 3, 0), (1, 0x8000, 3) ] # the actual length of 'bar' program is indextable.extra_indices = [ # the difference between the first extra's (0xFFFA, 0, 0x8004), # offset and 'bar' offset: 0x8004 - 3 (0xFFFB, 0, 0x8004), (0xFFFC, 0, 0x8004), (0xFFFD, 0, 0x8004) ] foo_content = "0" * 3 bar_content = "1" * (0x8000 + 1) data = tobytes(foo_content + bar_content) table = table_T_S_I__1() table.decompile(data, empty_font) assert table.glyphPrograms['foo'] == foo_content assert table.glyphPrograms['bar'] == bar_content
def unpack(fmt, data, obj=None): if obj is None: obj = {} data = tobytes(data) formatstring, names, fixes = getformat(fmt) if isinstance(obj, dict): d = obj else: d = obj.__dict__ elements = struct.unpack(formatstring, data) for i in range(len(names)): name = names[i] value = elements[i] if name in fixes: # fixed point conversion value = fi2fl(value, fixes[name]) elif isinstance(value, bytes): try: value = tostr(value) except UnicodeDecodeError: pass d[name] = value return obj
def test_decompile_magic_length_non_last(empty_font): indextable = empty_font['TSI0'] indextable.extra_indices = [ (0xFFFA, 3, 0), (0xFFFB, 0x8000, 3), # the actual length of 'cvt' program is: (0xFFFC, 0, 0x8004), # nextTextOffset - textOffset: 0x8004 - 3 (0xFFFD, 0, 0x8004) ] ppgm_content = "0" * 3 cvt_content = "1" * (0x8000 + 1) data = tobytes(ppgm_content + cvt_content) table = table_T_S_I__1() table.decompile(data, empty_font) assert table.extraPrograms['ppgm'] == ppgm_content assert table.extraPrograms['cvt'] == cvt_content table = table_T_S_I__1() with CapturingLogHandler(table.log, "WARNING") as captor: table.decompile(data[:-1], empty_font) # last entry is truncated captor.assertRegex("nextTextOffset > totalLength") assert table.extraPrograms['cvt'] == cvt_content[:-1]
def fromXML(self, name, attrs, content, ttFont): self.ulFormat = safeEval(attrs["format"]) self.usReserved1 = safeEval(attrs.get("reserved1", "0")) self.usReserved2 = safeEval(attrs.get("reserved2", "0")) self.pkcs7 = base64.b64decode( tobytes(strjoin(filter(pem_spam, content))))
def readPlistFromString(data): return loads(tobytes(data, encoding="utf-8"), use_builtin_types=False)
def packPStrings(strings): data = b"" for s in strings: data = data + bytechr(len(s)) + tobytes(s, encoding="latin1") return data
def _tobytes(self, s, errors="strict"): return tobytes(s, self.encoding, errors)
def fromXML(self, name, attrs, content, ttFont): lines = strjoin(content).split("\n") self.data = tobytes("\r".join(lines[1:-1]))