def test_decode_utf8_1(): """ Counterpart of test_encode_utf8_1. Tests decoding of some special cases: 1 - c080 must be decoded as byte 00 2 - eda081edb080 must be decoded as \uD801\uDC00 (representing U+10400) """ str1 = '31c08032' str2 = decode_modified_utf8(binascii.unhexlify(str1)) assert u'1\x002' == str2 str1 = 'eda081edb080' str2 = decode_modified_utf8(binascii.unhexlify(str1)) assert u'\uD801\uDC00' == str2
def _from_io(self, fio): # Reads in the ConstantPool (constant_pool in the JVM Spec) constant_pool_count = unpack('>H', fio.read(2))[0] # Pull this locally so CPython doesn't do a lookup each time. read = fio.read while constant_pool_count > 1: constant_pool_count -= 1 # The 1-byte prefix identifies the type of constant. tag = unpack('>B', read(1))[0] if tag == 1: # CONSTANT_Utf8_info, a length prefixed UTF-8-ish string. length = unpack('>H', read(2))[0] self.append((tag, decode_modified_utf8(read(length)))) else: # Every other constant type is trivial. fmt, size = _constant_fmts[tag] self.append((tag,) + unpack(fmt, read(size))) if tag in (5, 6): # LONG (5) and DOUBLE (6) count as two entries in the # pool. self.append(None) constant_pool_count -= 1
def unpack(self, fio): """ Read the ConstantPool from the file-like object `fio`. .. note:: Advanced usage only. You will typically never need to call this method as it will be called for you when loading a ClassFile. :param fio: Any file-like object providing `read()` """ # Reads in the ConstantPool (constant_pool in the JVM Spec) constant_pool_count = unpack('>H', fio.read(2))[0] # Pull this locally so CPython doesn't do a lookup each time. read = fio.read while constant_pool_count > 1: constant_pool_count -= 1 # The 1-byte prefix identifies the type of constant. tag = unpack('>B', read(1))[0] if tag == 1: # CONSTANT_Utf8_info, a length prefixed UTF-8-ish string. length = unpack('>H', read(2))[0] self.append((tag, decode_modified_utf8(read(length)))) else: # Every other constant type is trivial. fmt, size = _constant_fmts[tag] self.append((tag,) + unpack(fmt, read(size))) if tag in (5, 6): # LONG (5) and DOUBLE (6) count as two entries in the # pool. self.append(None) constant_pool_count -= 1
def test_decode_utf8_1(): """ Counterpart of test_encode_utf8_1. Tests decoding of some special cases: 1 - c080 must be decoded as byte 00 2 - eda081edb080 must be decoded as \uD801\uDC00 (representing U+10400) """ pairs = ((b'\x31\xc0\x80\x32', '1\x002'), (b'\xed\xa0\x81\xed\xb0\x80', '\uD801\uDC00')) for original, decoded in pairs: assert decode_modified_utf8(original) == decoded
def test_decode_utf8_1(): """ Counterpart of test_encode_utf8_1. Tests decoding of some special cases: 1 - c080 must be decoded as byte 00 2 - eda081edb080 must be decoded as \uD801\uDC00 (representing U+10400) """ pairs = ( (b'\x31\xc0\x80\x32', '1\x002'), (b'\xed\xa0\x81\xed\xb0\x80', '\uD801\uDC00') ) for original, decoded in pairs: assert decode_modified_utf8(original) == decoded
def unpack(self, fio): """ Read the ConstantPool from the file-like object `fio`. .. note:: Advanced usage only. You will typically never need to call this method as it will be called for you when loading a ClassFile. :param fio: Any file-like object providing `read()` """ # Reads in the ConstantPool (constant_pool in the JVM Spec) constant_pool_count = unpack('>H', fio.read(2))[0] # Pull this locally so CPython doesn't do a lookup each time. read = fio.read while constant_pool_count > 1: constant_pool_count -= 1 # The 1-byte prefix identifies the type of constant. tag = ord(read(1)) if tag == 1: # CONSTANT_Utf8_info, a length prefixed UTF-8-ish string. # Only attempt to properly decode the MUTF8 if it fails # regular UTF8 decoding, which overs huge time savings over # large JARs. utf8_str = read(unpack('>H', read(2))[0]) try: utf8_str = utf8_str.decode('utf8') except UnicodeDecodeError: utf8_str = decode_modified_utf8(utf8_str) self.append((tag, utf8_str)) else: # Every other constant type is trivial. fmt, size = _constant_fmts[tag] self.append((tag, *unpack(fmt, read(size)))) if tag == 5 or tag == 6: # LONG (5) and DOUBLE (6) count as two entries in the # pool. self.append(None) constant_pool_count -= 1