示例#1
0
def disassemble(aCode):
    codelen = len(aCode)
    pc = 0
    res = []
    while pc < codelen:
        opcode = byteord(aCode[pc:pc + 1])
        if opcode > len(aCode_info):
            instr = aCode_info[0]
        else:
            instr = aCode_info[opcode]
        pc += 1
        if instr[1] != 0 and pc >= codelen: return res
        if instr[1] == -1:
            count = byteord(aCode[pc])
            fmt = "%dB" % count
            pc += 1
        elif instr[1] == 0:
            fmt = ""
        else:
            fmt = instr[1]
        if fmt == "":
            res.append(instr[0])
            continue
        parms = struct.unpack_from(fmt, aCode[pc:])
        res.append(instr[0] + "(" + ", ".join(map(str, parms)) + ")")
        pc += struct.calcsize(fmt)
    return res
示例#2
0
def readLWFN(path, onlyHeader=False):
    """reads an LWFN font file, returns raw data"""
    from fontTools.misc.macRes import ResourceReader
    reader = ResourceReader(path)
    try:
        data = []
        for res in reader.get('POST', []):
            code = byteord(res.data[0])
            if byteord(res.data[1]) != 0:
                raise T1Error('corrupt LWFN file')
            if code in [1, 2]:
                if onlyHeader and code == 2:
                    break
                data.append(res.data[2:])
            elif code in [3, 5]:
                break
            elif code == 4:
                with open(path, "rb") as f:
                    data.append(f.read())
            elif code == 0:
                pass  # comment, ignore
            else:
                raise T1Error('bad chunk code: ' + repr(code))
    finally:
        reader.close()
    data = bytesjoin(data)
    assertType1(data)
    return data
示例#3
0
    def decompile(self, data, ttFont):
        dummy, rest = sstruct.unpack2(SINGFormat, data, self)
        self.uniqueName = self.decompileUniqueName(self.uniqueName)
        self.nameLength = byteord(self.nameLength)
        assert len(rest) == self.nameLength
        self.baseGlyphName = tostr(rest)

        rawMETAMD5 = self.METAMD5
        self.METAMD5 = "[" + hex(byteord(self.METAMD5[0]))
        for char in rawMETAMD5[1:]:
            self.METAMD5 = self.METAMD5 + ", " + hex(byteord(char))
        self.METAMD5 = self.METAMD5 + "]"
示例#4
0
 def decompile(self, data, ttFont):
     numGlyphs = ttFont['maxp'].numGlyphs
     glyphOrder = ttFont.getGlyphOrder()
     dummy, data = sstruct.unpack2(hdmxHeaderFormat, data, self)
     self.hdmx = {}
     for i in range(self.numRecords):
         ppem = byteord(data[0])
         maxSize = byteord(data[1])
         widths = _GlyphnamedList(ttFont.getReverseGlyphMap(),
                                  array.array("B", data[2:2 + numGlyphs]))
         self.hdmx[ppem] = widths
         data = data[self.recordSize:]
     assert len(data) == 0, "too much hdmx data"
示例#5
0
    def getRow(self, row, bitDepth=1, metrics=None, reverseBytes=False):
        if metrics is None:
            metrics = self.metrics
        assert 0 <= row and row < metrics.height, "Illegal row access in bitmap"

        # Loop through each byte. This can cover two bytes in the original data or
        # a single byte if things happen to be aligned. The very last entry might
        # not be aligned so take care to trim the binary data to size and pad with
        # zeros in the row data. Bit aligned data is somewhat tricky.
        #
        # Example of data cut. Data cut represented in x's.
        # '|' represents byte boundary.
        # data = ...0XX|XXXXXX00|000... => XXXXXXXX
        #		or
        # data = ...0XX|XXXX0000|000... => XXXXXX00
        #   or
        # data = ...000|XXXXXXXX|000... => XXXXXXXX
        #   or
        # data = ...000|00XXXX00|000... => XXXX0000
        #
        dataList = []
        bitRange = self._getBitRange(row, bitDepth, metrics)
        stepRange = bitRange + (8, )
        for curBit in range(*stepRange):
            endBit = min(curBit + 8, bitRange[1])
            numBits = endBit - curBit
            cutPoint = curBit % 8
            firstByteLoc = curBit // 8
            secondByteLoc = endBit // 8
            if firstByteLoc < secondByteLoc:
                numBitsCut = 8 - cutPoint
            else:
                numBitsCut = endBit - curBit
            curByte = _reverseBytes(self.imageData[firstByteLoc])
            firstHalf = byteord(curByte) >> cutPoint
            firstHalf = ((1 << numBitsCut) - 1) & firstHalf
            newByte = firstHalf
            if firstByteLoc < secondByteLoc and secondByteLoc < len(
                    self.imageData):
                curByte = _reverseBytes(self.imageData[secondByteLoc])
                secondHalf = byteord(curByte) << numBitsCut
                newByte = (firstHalf | secondHalf) & ((1 << numBits) - 1)
            dataList.append(bytechr(newByte))

        # The way the data is kept is opposite the algorithm used.
        data = bytesjoin(dataList)
        if not reverseBytes:
            data = _reverseBytes(data)
        return data
示例#6
0
    def setRows(self, dataRows, bitDepth=1, metrics=None, reverseBytes=False):
        if metrics is None:
            metrics = self.metrics
        if not reverseBytes:
            dataRows = list(map(_reverseBytes, dataRows))

        # Keep track of a list of ordinal values as they are easier to modify
        # than a list of strings. Map to actual strings later.
        numBytes = (self._getBitRange(len(dataRows), bitDepth, metrics)[0] +
                    7) // 8
        ordDataList = [0] * numBytes
        for row, data in enumerate(dataRows):
            bitRange = self._getBitRange(row, bitDepth, metrics)
            stepRange = bitRange + (8, )
            for curBit, curByte in zip(range(*stepRange), data):
                endBit = min(curBit + 8, bitRange[1])
                cutPoint = curBit % 8
                firstByteLoc = curBit // 8
                secondByteLoc = endBit // 8
                if firstByteLoc < secondByteLoc:
                    numBitsCut = 8 - cutPoint
                else:
                    numBitsCut = endBit - curBit
                curByte = byteord(curByte)
                firstByte = curByte & ((1 << numBitsCut) - 1)
                ordDataList[firstByteLoc] |= (firstByte << cutPoint)
                if firstByteLoc < secondByteLoc and secondByteLoc < numBytes:
                    secondByte = (curByte >> numBitsCut) & (
                        (1 << 8 - numBitsCut) - 1)
                    ordDataList[secondByteLoc] |= secondByte

        # Save the image data with the bits going the correct way.
        self.imageData = _reverseBytes(bytesjoin(map(bytechr, ordDataList)))
示例#7
0
def hintOn(i, hintMaskBytes):
    # used to add the active hints to the bez string,
    # when a T2 hintmask operator is encountered.
    byteIndex = i // 8
    byteValue = byteord(hintMaskBytes[byteIndex])
    offset = 7 - (i % 8)
    return ((2**offset) & byteValue) > 0
示例#8
0
def hexStr(s):
    h = string.hexdigits
    r = ''
    for c in s:
        i = byteord(c)
        r = r + h[(i >> 4) & 0xF] + h[i & 0xF]
    return r
示例#9
0
def stringToLong(s):
    if len(s) != 4:
        raise ValueError('string must be 4 bytes long')
    l = 0
    for i in range(4):
        l += byteord(s[i]) << (i * 8)
    return l
示例#10
0
    def doMask(self, index, bezCommand):
        args = []
        if not self.hintMaskBytes:
            args = self.popallWidth()
            if args:
                self.vhints = []
                self.updateHints(args, self.vhints, "ry")
            self.hintMaskBytes = int((self.hintCount + 7) / 8)

        self.hintMaskString, index = self.callingStack[-1].getBytes(
            index, self.hintMaskBytes)

        if self.read_hints:
            curhhints, curvhints = self.getCurHints(self.hintMaskString)
            strout = ""
            mask = [strout + hex(byteord(ch)) for ch in self.hintMaskString]
            log.debug("%s %s %s %s %s", bezCommand, mask, curhhints, curvhints,
                      args)

            self.bezProgram.append("beginsubr snc\n")
            for i, hint in enumerate(curhhints):
                self.bezProgram.append("%s " % hint)
                if i % 2:
                    self.bezProgram.append("rb\n")
            for i, hint in enumerate(curvhints):
                self.bezProgram.append("%s " % hint)
                if i % 2:
                    self.bezProgram.append("ry\n")
            self.bezProgram.extend(["endsubr enc\n", "newcolors\n"])
        return self.hintMaskString, index
示例#11
0
	def doMask(self, index, bezCommand):
		args = []
		if not self.hintMaskBytes:
			args = self.popallWidth()
			if args:
				self.vhints = []
				self.updateHints(args, self.vhints, "ry")
			self.hintMaskBytes = (self.hintCount + 7) // 8

		self.hintMaskString, index = self.callingStack[-1].getBytes(index, self.hintMaskBytes)

		if not  self.removeHints:
			curhhints, curvhints = self.getCurHints( self.hintMaskString)
			strout = ""
			mask = [strout + hex(byteord(ch)) for ch in self.hintMaskString]
			debugMsg(bezCommand, mask, curhhints, curvhints, args)

			self.bezProgram.append("beginsubr snc\n")
			i = 0
			for hint in curhhints:
				self.bezProgram.append(str(hint))
				if i %2:
					self.bezProgram.append("rb\n")
				i +=1
			i = 0
			for hint in curvhints:
				self.bezProgram.append(str(hint))
				if i %2:
					self.bezProgram.append("ry\n")
				i +=1
			self.bezProgram.extend(["endsubr enc\n", "newcolors\n"])
		return self.hintMaskString, index
示例#12
0
def unpackPStrings(data, n):
    # extract n Pascal strings from data.
    # if there is not enough data, use ""

    strings = []
    index = 0
    dataLen = len(data)

    for _ in range(n):
        if dataLen <= index:
            length = 0
        else:
            length = byteord(data[index])
        index += 1

        if dataLen <= index + length - 1:
            name = ""
        else:
            name = tostr(data[index:index + length], encoding="latin1")
        strings.append(name)
        index += length

    if index < dataLen:
        log.warning("%d extra bytes in post.stringData array", dataLen - index)

    elif dataLen < index:
        log.warning("not enough data in post.stringData array")

    return strings
示例#13
0
def hintOn(i, hintMaskBytes):
    # used to add the active hints to the bez string,
    # when a T2 hintmask operator is encountered.
    byteIndex = i // 8
    byteValue = byteord(hintMaskBytes[byteIndex])
    offset = 7 - (i % 8)
    return ((2 ** offset) & byteValue) > 0
示例#14
0
def hexStr(data):
	"""Convert binary data to a hex string."""
	h = string.hexdigits
	r = ''
	for c in data:
		i = byteord(c)
		r = r + h[(i >> 4) & 0xF] + h[i & 0xF]
	return r
	def decompilePoints_(numPoints, data, offset, tableTag):
		"""(numPoints, data, offset, tableTag) --> ([point1, point2, ...], newOffset)"""
		assert tableTag in ('cvar', 'gvar')
		pos = offset
		numPointsInData = byteord(data[pos])
		pos += 1
		if (numPointsInData & POINTS_ARE_WORDS) != 0:
			numPointsInData = (numPointsInData & POINT_RUN_COUNT_MASK) << 8 | byteord(data[pos])
			pos += 1
		if numPointsInData == 0:
			return (range(numPoints), pos)

		result = []
		while len(result) < numPointsInData:
			runHeader = byteord(data[pos])
			pos += 1
			numPointsInRun = (runHeader & POINT_RUN_COUNT_MASK) + 1
			point = 0
			if (runHeader & POINTS_ARE_WORDS) != 0:
				points = array.array("H")
				pointsSize = numPointsInRun * 2
			else:
				points = array.array("B")
				pointsSize = numPointsInRun
			points.frombytes(data[pos:pos+pointsSize])
			if sys.byteorder != "big": points.byteswap()

			assert len(points) == numPointsInRun
			pos += pointsSize

			result.extend(points)

		# Convert relative to absolute
		absolute = []
		current = 0
		for delta in result:
			current += delta
			absolute.append(current)
		result = absolute
		del absolute

		badPoints = {str(p) for p in result if p < 0 or p >= numPoints}
		if badPoints:
			log.warning("point %s out of range in '%s' table" %
			            (",".join(sorted(badPoints)), tableTag))
		return (result, pos)
示例#16
0
def _escapechar(c):
    """Helper function for tagToIdentifier()"""
    import re
    if re.match("[a-z0-9]", c):
        return "_" + c
    elif re.match("[A-Z]", c):
        return c + "_"
    else:
        return hex(byteord(c))[2:]
示例#17
0
def unpackBase128(data):
	r""" Read one to five bytes from UIntBase128-encoded input string, and return
	a tuple containing the decoded integer plus any leftover data.

	>>> unpackBase128(b'\x3f\x00\x00') == (63, b"\x00\x00")
	True
	>>> unpackBase128(b'\x8f\xff\xff\xff\x7f')[0] == 4294967295
	True
	>>> unpackBase128(b'\x80\x80\x3f')  # doctest: +IGNORE_EXCEPTION_DETAIL
	Traceback (most recent call last):
	  File "<stdin>", line 1, in ?
	TTLibError: UIntBase128 value must not start with leading zeros
	>>> unpackBase128(b'\x8f\xff\xff\xff\xff\x7f')[0]  # doctest: +IGNORE_EXCEPTION_DETAIL
	Traceback (most recent call last):
	  File "<stdin>", line 1, in ?
	TTLibError: UIntBase128-encoded sequence is longer than 5 bytes
	>>> unpackBase128(b'\x90\x80\x80\x80\x00')[0]  # doctest: +IGNORE_EXCEPTION_DETAIL
	Traceback (most recent call last):
	  File "<stdin>", line 1, in ?
	TTLibError: UIntBase128 value exceeds 2**32-1
	"""
	if len(data) == 0:
		raise TTLibError('not enough data to unpack UIntBase128')
	result = 0
	if byteord(data[0]) == 0x80:
		# font must be rejected if UIntBase128 value starts with 0x80
		raise TTLibError('UIntBase128 value must not start with leading zeros')
	for i in range(woff2Base128MaxSize):
		if len(data) == 0:
			raise TTLibError('not enough data to unpack UIntBase128')
		code = byteord(data[0])
		data = data[1:]
		# if any of the top seven bits are set then we're about to overflow
		if result & 0xFE000000:
			raise TTLibError('UIntBase128 value exceeds 2**32-1')
		# set current value = old value times 128 bitwise-or (byte bitwise-and 127)
		result = (result << 7) | (code & 0x7f)
		# repeat until the most significant bit of byte is false
		if (code & 0x80) == 0:
			# return result plus left over data
			return result, data
	# make sure not to exceed the size bound
	raise TTLibError('UIntBase128-encoded sequence is longer than 5 bytes')
示例#18
0
def _AsciiHexEncode(input):
    """This is a verbose encoding used for binary data within
    a PDF file.  One byte binary becomes two bytes of ASCII."""
    "Helper function used by images"
    output = StringIO()
    for char in input:
        output.write('%02x' % byteord(char))
    output.write('>')
    output.seek(0)
    return output.read()
示例#19
0
def unpackPStrings(data):
    strings = []
    index = 0
    dataLen = len(data)
    while index < dataLen:
        length = byteord(data[index])
        strings.append(
            tostr(data[index + 1:index + 1 + length], encoding="latin1"))
        index = index + 1 + length
    return strings
示例#20
0
def _AsciiHexEncode(input):
    """This is a verbose encoding used for binary data within
    a PDF file.  One byte binary becomes two bytes of ASCII."""
    "Helper function used by images"
    output = StringIO()
    for char in input:
        output.write('%02x' % byteord(char))
    output.write('>')
    output.seek(0)
    return output.read()
示例#21
0
def _reverseBytes(data):
    if len(data) != 1:
        return bytesjoin(map(_reverseBytes, data))
    byte = byteord(data)
    result = 0
    for i in range(8):
        result = result << 1
        result |= byte & 1
        byte = byte >> 1
    return bytechr(result)
示例#22
0
def  bezDecrypt(bezDataBuffer):
	r = 11586
	i = 0 # input buffer byte position index
	lenBuffer = len(bezDataBuffer)
	byteCnt = 0 # output buffer byte count.
	newBuffer = ""
	while 1:
		cipher = 0 # restricted to int
		plain = 0 # restricted to int
		j = 2 # used to combine two successive bytes

		# process next two bytes, skipping whitespace.
		while j > 0:
			j -=1
			try:
				while  bezDataBuffer[i].isspace():
					i +=1
				ch = bezDataBuffer[i]
			except IndexError:
				return newBuffer

			if not ch.islower():
				ch = ch.lower()
			if ch.isdigit():
				ch = byteord(ch) - byteord('0')
			else:
				ch = byteord(ch) - byteord('a') + 10
			cipher = (cipher << 4) & 0xFFFF
			cipher = cipher | ch
			i += 1

		plain = cipher ^ (r >> 8)
		r = (cipher + r) * 902381661 + 341529579
		if r  > 0xFFFF:
			r = r & 0xFFFF
		byteCnt +=1
		if (byteCnt > LEN_IV):
			newBuffer += bytechr(plain)
		if i >= lenBuffer:
			break

	return newBuffer
示例#23
0
def unpack255UShort(data):
	""" Read one to three bytes from 255UInt16-encoded input string, and return a
	tuple containing the decoded integer plus any leftover data.

	>>> unpack255UShort(bytechr(252))[0]
	252

	Note that some numbers (e.g. 506) can have multiple encodings:
	>>> unpack255UShort(struct.pack("BB", 254, 0))[0]
	506
	>>> unpack255UShort(struct.pack("BB", 255, 253))[0]
	506
	>>> unpack255UShort(struct.pack("BBB", 253, 1, 250))[0]
	506
	"""
	code = byteord(data[:1])
	data = data[1:]
	if code == 253:
		# read two more bytes as an unsigned short
		if len(data) < 2:
			raise TTLibError('not enough data to unpack 255UInt16')
		result, = struct.unpack(">H", data[:2])
		data = data[2:]
	elif code == 254:
		# read another byte, plus 253 * 2
		if len(data) == 0:
			raise TTLibError('not enough data to unpack 255UInt16')
		result = byteord(data[:1])
		result += 506
		data = data[1:]
	elif code == 255:
		# read another byte, plus 253
		if len(data) == 0:
			raise TTLibError('not enough data to unpack 255UInt16')
		result = byteord(data[:1])
		result += 253
		data = data[1:]
	else:
		# leave as is if lower than 253
		result = code
	# return result plus left over data
	return result, data
示例#24
0
    def getnexttoken(
            self,
            # localize some stuff, for performance
            len=len,
            ps_special=ps_special,
            stringmatch=stringRE.match,
            hexstringmatch=hexstringRE.match,
            commentmatch=commentRE.match,
            endmatch=endofthingRE.match):

        self.skipwhite()
        if self.pos >= self.len:
            return None, None
        pos = self.pos
        buf = self.buf
        char = bytechr(byteord(buf[pos]))
        if char in ps_special:
            if char in b'{}[]':
                tokentype = 'do_special'
                token = char
            elif char == b'%':
                tokentype = 'do_comment'
                _, nextpos = commentmatch(buf, pos).span()
                token = buf[pos:nextpos]
            elif char == b'(':
                tokentype = 'do_string'
                m = stringmatch(buf, pos)
                if m is None:
                    raise PSTokenError('bad string at character %d' % pos)
                _, nextpos = m.span()
                token = buf[pos:nextpos]
            elif char == b'<':
                tokentype = 'do_hexstring'
                m = hexstringmatch(buf, pos)
                if m is None:
                    raise PSTokenError('bad hexstring at character %d' % pos)
                _, nextpos = m.span()
                token = buf[pos:nextpos]
            else:
                raise PSTokenError('bad token at character %d' % pos)
        else:
            if char == b'/':
                tokentype = 'do_literal'
                m = endmatch(buf, pos + 1)
            else:
                tokentype = ''
                m = endmatch(buf, pos)
            if m is None:
                raise PSTokenError('bad token at character %d' % pos)
            _, nextpos = m.span()
            token = buf[pos:nextpos]
        self.pos = pos + len(token)
        token = tostr(token, encoding=self.encoding)
        return tokentype, token
示例#25
0
 def decompile(self, data):
     index = 0
     lenData = len(data)
     push = self.stack.append
     while index < lenData:
         b0 = byteord(data[index])
         index = index + 1
         handler = self.operandEncoding[b0]
         value, index = handler(self, b0, data, index)
         if value is not None:
             push(value)
示例#26
0
def read_operator(self, b0, data, index):
    if b0 == 12:
        op = (b0, byteord(data[index]))
        index = index + 1
    else:
        op = b0
    try:
        operator = self.operators[op]
    except KeyError:
        return None, index
    value = self.handle_operator(operator)
    return value, index
示例#27
0
def _data2binary(data, numBits):
    binaryList = []
    for curByte in data:
        value = byteord(curByte)
        numBitsCut = min(8, numBits)
        for i in range(numBitsCut):
            if value & 0x1:
                binaryList.append('1')
            else:
                binaryList.append('0')
            value = value >> 1
        numBits -= numBitsCut
    return strjoin(binaryList)
示例#28
0
    def toUnicode(self, errors='strict'):
        """
		If self.string is a Unicode string, return it; otherwise try decoding the
		bytes in self.string to a Unicode string using the encoding of this
		entry as returned by self.getEncoding(); Note that  self.getEncoding()
		returns 'ascii' if the encoding is unknown to the library.

		Certain heuristics are performed to recover data from bytes that are
		ill-formed in the chosen encoding, or that otherwise look misencoded
		(mostly around bad UTF-16BE encoded bytes, or bytes that look like UTF-16BE
		but marked otherwise).  If the bytes are ill-formed and the heuristics fail,
		the error is handled according to the errors parameter to this function, which is
		passed to the underlying decode() function; by default it throws a
		UnicodeDecodeError exception.

		Note: The mentioned heuristics mean that roundtripping a font to XML and back
		to binary might recover some misencoded data whereas just loading the font
		and saving it back will not change them.
		"""
        def isascii(b):
            return (b >= 0x20 and b <= 0x7E) or b in [0x09, 0x0A, 0x0D]

        encoding = self.getEncoding()
        string = self.string

        if isinstance(
                string,
                bytes) and encoding == 'utf_16_be' and len(string) % 2 == 1:
            # Recover badly encoded UTF-16 strings that have an odd number of bytes:
            # - If the last byte is zero, drop it.  Otherwise,
            # - If all the odd bytes are zero and all the even bytes are ASCII,
            #   prepend one zero byte.  Otherwise,
            # - If first byte is zero and all other bytes are ASCII, insert zero
            #   bytes between consecutive ASCII bytes.
            #
            # (Yes, I've seen all of these in the wild... sigh)
            if byteord(string[-1]) == 0:
                string = string[:-1]
            elif all(
                    byteord(b) == 0 if i % 2 else isascii(byteord(b))
                    for i, b in enumerate(string)):
                string = b'\0' + string
            elif byteord(string[0]) == 0 and all(
                    isascii(byteord(b)) for b in string[1:]):
                string = bytesjoin(b'\0' + bytechr(byteord(b))
                                   for b in string[1:])

        string = tostr(string, encoding=encoding, errors=errors)

        # If decoded strings still looks like UTF-16BE, it suggests a double-encoding.
        # Fix it up.
        if all(
                ord(c) == 0 if i % 2 == 0 else isascii(ord(c))
                for i, c in enumerate(string)):
            # If string claims to be Mac encoding, but looks like UTF-16BE with ASCII text,
            # narrow it down.
            string = ''.join(c for c in string[1::2])

        return string
示例#29
0
def test_calcCodePageRanges(emptyufo, unicodes, expected):
    font = emptyufo
    for i, c in enumerate(unicodes):
        font.newGlyph("glyph%d" % i).unicode = byteord(c)

    compiler = OutlineOTFCompiler(font)
    compiler.compile()

    assert compiler.otf["OS/2"].ulCodePageRange1 == intListToNum(
        expected, start=0, length=32
    )
    assert compiler.otf["OS/2"].ulCodePageRange2 == intListToNum(
        expected, start=32, length=32
    )
示例#30
0
def read_realNumber(self, b0, data, index):
    number = ''
    while True:
        b = byteord(data[index])
        index = index + 1
        nibble0 = (b & 0xf0) >> 4
        nibble1 = b & 0x0f
        if nibble0 == 0xf:
            break
        number = number + realNibbles[nibble0]
        if nibble1 == 0xf:
            break
        number = number + realNibbles[nibble1]
    return float(number), index
示例#31
0
def block(char):
    """ Return the block property assigned to the Unicode character 'char'
    as a string.

    >>> block("a")
    'Basic Latin'
    >>> block(chr(0x060C))
    'Arabic'
    >>> block(chr(0xEFFFF))
    'No_Block'
    """
    code = byteord(char)
    i = bisect_right(Blocks.RANGES, code)
    return Blocks.VALUES[i - 1]
示例#32
0
 def getToken(self, index, len=len, byteord=byteord, isinstance=isinstance):
     if self.bytecode is not None:
         if index >= len(self.bytecode):
             return None, 0, 0
         b0 = byteord(self.bytecode[index])
         index = index + 1
         handler = self.operandEncoding[b0]
         token, index = handler(self, b0, self.bytecode, index)
     else:
         if index >= len(self.program):
             return None, 0, 0
         token = self.program[index]
         index = index + 1
     isOperator = isinstance(token, str)
     return token, isOperator, index
示例#33
0
 def decompileUniqueName(self, data):
     name = ""
     for char in data:
         val = byteord(char)
         if val == 0:
             break
         if (val > 31) or (val < 128):
             name += chr(val)
         else:
             octString = oct(val)
             if len(octString) > 3:
                 octString = octString[1:]  # chop off that leading zero.
             elif len(octString) < 3:
                 octString.zfill(3)
             name += "\\" + octString
     return name
示例#34
0
def script_extension(char):
    """ Return the script extension property assigned to the Unicode character
    'char' as a set of string.

    >>> script_extension("a") == {'Latn'}
    True
    >>> script_extension(chr(0x060C)) == {'Rohg', 'Syrc', 'Yezi', 'Arab', 'Thaa'}
    True
    >>> script_extension(chr(0x10FFFF)) == {'Zzzz'}
    True
    """
    code = byteord(char)
    i = bisect_right(ScriptExtensions.RANGES, code)
    value = ScriptExtensions.VALUES[i - 1]
    if value is None:
        # code points not explicitly listed for Script Extensions
        # have as their value the corresponding Script property value
        return {script(char)}
    return value
示例#35
0
def _AsciiBase85Encode(input):
    """This is a compact encoding used for binary data within
    a PDF file.  Four bytes of binary data become five bytes of
    ASCII.  This is the default method used for encoding images."""
    outstream = StringIO()
    # special rules apply if not a multiple of four bytes.
    whole_word_count, remainder_size = divmod(len(input), 4)
    cut = 4 * whole_word_count
    body, lastbit = input[0:cut], input[cut:]

    for i in range(whole_word_count):
        offset = i*4
        b1 = byteord(body[offset])
        b2 = byteord(body[offset+1])
        b3 = byteord(body[offset+2])
        b4 = byteord(body[offset+3])

        num = 16777216 * b1 + 65536 * b2 + 256 * b3 + b4

        if num == 0:
            #special case
            outstream.write('z')
        else:
            #solve for five base-85 numbers
            temp, c5 = divmod(num, 85)
            temp, c4 = divmod(temp, 85)
            temp, c3 = divmod(temp, 85)
            c1, c2 = divmod(temp, 85)
            assert ((85**4) * c1) + ((85**3) * c2) + ((85**2) * c3) + (85*c4) + c5 == num, 'dodgy code!'
            outstream.write(chr(c1+33))
            outstream.write(chr(c2+33))
            outstream.write(chr(c3+33))
            outstream.write(chr(c4+33))
            outstream.write(chr(c5+33))

    # now we do the final bit at the end.  I repeated this separately as
    # the loop above is the time-critical part of a script, whereas this
    # happens only once at the end.

    #encode however many bytes we have as usual
    if remainder_size > 0:
        while len(lastbit) < 4:
            lastbit = lastbit + b'\000'
        b1 = byteord(lastbit[0])
        b2 = byteord(lastbit[1])
        b3 = byteord(lastbit[2])
        b4 = byteord(lastbit[3])

        num = 16777216 * b1 + 65536 * b2 + 256 * b3 + b4

        #solve for c1..c5
        temp, c5 = divmod(num, 85)
        temp, c4 = divmod(temp, 85)
        temp, c3 = divmod(temp, 85)
        c1, c2 = divmod(temp, 85)

        #print 'encoding: %d %d %d %d -> %d -> %d %d %d %d %d' % (
        #    b1,b2,b3,b4,num,c1,c2,c3,c4,c5)
        lastword = chr(c1+33) + chr(c2+33) + chr(c3+33) + chr(c4+33) + chr(c5+33)
        #write out most of the bytes.
        outstream.write(lastword[0:remainder_size + 1])

    #terminator code for ascii 85
    outstream.write('~>')
    outstream.seek(0)
    return outstream.read()
示例#36
0
def _AsciiBase85Decode(input):
    """This is not used - Acrobat Reader decodes for you - but a round
    trip is essential for testing."""
    outstream = StringIO()
    #strip all whitespace
    stripped = ''.join(input.split())
    #check end
    assert stripped[-2:] == '~>', 'Invalid terminator for Ascii Base 85 Stream'
    stripped = stripped[:-2]  #chop off terminator

    #may have 'z' in it which complicates matters - expand them
    stripped = stripped.replace('z', '!!!!!')
    # special rules apply if not a multiple of five bytes.
    whole_word_count, remainder_size = divmod(len(stripped), 5)
    #print '%d words, %d leftover' % (whole_word_count, remainder_size)
    assert remainder_size != 1, 'invalid Ascii 85 stream!'
    cut = 5 * whole_word_count
    body, lastbit = stripped[0:cut], stripped[cut:]

    for i in range(whole_word_count):
        offset = i*5
        c1 = byteord(body[offset]) - 33
        c2 = byteord(body[offset+1]) - 33
        c3 = byteord(body[offset+2]) - 33
        c4 = byteord(body[offset+3]) - 33
        c5 = byteord(body[offset+4]) - 33

        num = ((85**4) * c1) + ((85**3) * c2) + ((85**2) * c3) + (85*c4) + c5

        temp, b4 = divmod(num,256)
        temp, b3 = divmod(temp,256)
        b1, b2 = divmod(temp, 256)

        assert  num == 16777216 * b1 + 65536 * b2 + 256 * b3 + b4, 'dodgy code!'
        outstream.write(chr(b1))
        outstream.write(chr(b2))
        outstream.write(chr(b3))
        outstream.write(chr(b4))

    #decode however many bytes we have as usual
    if remainder_size > 0:
        while len(lastbit) < 5:
            lastbit = lastbit + '!'
        c1 = byteord(lastbit[0]) - 33
        c2 = byteord(lastbit[1]) - 33
        c3 = byteord(lastbit[2]) - 33
        c4 = byteord(lastbit[3]) - 33
        c5 = byteord(lastbit[4]) - 33
        num = ((85**4) * c1) + ((85**3) * c2) + ((85**2) * c3) + (85*c4) + c5
        temp, b4 = divmod(num,256)
        temp, b3 = divmod(temp,256)
        b1, b2 = divmod(temp, 256)
        assert  num == 16777216 * b1 + 65536 * b2 + 256 * b3 + b4, 'dodgy code!'
        #print 'decoding: %d %d %d %d %d -> %d -> %d %d %d %d' % (
        #    c1,c2,c3,c4,c5,num,b1,b2,b3,b4)

        #the last character needs 1 adding; the encoding loses
        #data by rounding the number to x bytes, and when
        #divided repeatedly we get one less
        if remainder_size == 2:
            lastword = chr(b1+1)
        elif remainder_size == 3:
            lastword = chr(b1) + chr(b2+1)
        elif remainder_size == 4:
            lastword = chr(b1) + chr(b2) + chr(b3+1)
        outstream.write(lastword)

    #terminator code for ascii 85
    outstream.seek(0)
    return outstream.read()