def test_encode_utf8_1(): """ Tests encoding of some special cases: 1 - byte 00 must be encoded as 'c080' 2 - supplementary characters (represented by the two surrogate code units of their UTF-16 representation): each surrogate must be encoded by three bytes. This means supplementary characters are represented by six bytes then U+10400 (represented as \uD801\uDC00) will be encoded as 'eda081edb080' """ # string containing byte 00 str1 = u"1\x002" bb = encode_modified_utf8(str1) assert "31c08032" == binascii.hexlify(bb), binascii.hexlify(bb) # Unicode supplementary character U+10400 str1 = u"\uD801\uDC00" bb = encode_modified_utf8(str1) assert "eda081edb080" == binascii.hexlify(bb), binascii.hexlify(bb)
def test_encode_utf8_1(): """ Tests encoding of some special cases: 1 - byte 00 must be encoded as 'c080' 2 - supplementary characters (represented by the two surrogate code units of their UTF-16 representation): each surrogate must be encoded by three bytes. This means supplementary characters are represented by six bytes then U+10400 (represented as \uD801\uDC00) will be encoded as 'eda081edb080' """ pairs = ((u'1\x002', b'\x31\xc0\x80\x32'), (u'\uD801\uDC00', b'\xed\xa0\x81\xed\xb0\x80')) for original, encoded in pairs: assert encode_modified_utf8(original) == encoded
def test_encode_utf8_1(): """ Tests encoding of some special cases: 1 - byte 00 must be encoded as 'c080' 2 - supplementary characters (represented by the two surrogate code units of their UTF-16 representation): each surrogate must be encoded by three bytes. This means supplementary characters are represented by six bytes then U+10400 (represented as \uD801\uDC00) will be encoded as 'eda081edb080' """ pairs = ( (u'1\x002', b'\x31\xc0\x80\x32'), (u'\uD801\uDC00', b'\xed\xa0\x81\xed\xb0\x80') ) for original, encoded in pairs: assert encode_modified_utf8(original) == encoded
def pack(self): encoded_value = encode_modified_utf8(self.value) return pack('>BH', self.TAG, len(encoded_value)) + encoded_value
def _to_io(self, fout): write = fout.write write(pack('>H', self.raw_count)) for constant in self: if isinstance(constant, ConstantUTF8): encoded_value = encode_modified_utf8(constant.value) length = len(encoded_value) write(pack( '>BH', constant.TAG, length )) write(encoded_value) elif isinstance(constant, ConstantInteger): write(pack( '>Bi', constant.TAG, constant.value )) elif isinstance(constant, ConstantFloat): write(pack( '>Bf', constant.TAG, constant.value )) elif isinstance(constant, ConstantLong): write(pack( '>Bq', constant.TAG, constant.value )) elif isinstance(constant, ConstantDouble): write(pack( '>Bd', constant.TAG, constant.value )) elif isinstance(constant, ConstantClass): write(pack( '>BH', constant.TAG, constant._name_index )) elif isinstance(constant, ConstantString): write(pack( '>BH', constant.TAG, constant._string_index )) elif isinstance(constant, ConstantRef): write(pack( '>BHH', constant.TAG, constant._class_index, constant._name_and_type_index )) elif isinstance(constant, ConstantNameAndType): write(pack( '>BHH', constant.TAG, constant._name_index, constant._descriptor_index )) elif isinstance(constant, ConstantMethodHandle): write(pack( '>BBH', constant.TAG, constant._reference_kind, constant._reference_index )) elif isinstance(constant, ConstantMethodType): write(pack( '>BH', constant.TAG, constant._descriptor_index )) elif isinstance(constant, ConstantInvokeDynamic): write(pack( '>BHH', constant.TAG, constant._bootstrap_method_attr_index, constant._name_and_type_index ))
def pack(self, fout): """ Write the ConstantPool to the file-like object `fout`. .. note:: Advanced usage only. You will typically never need to call this method as it will be calle=d for you when saving a ClassFile. :param fout: Any file-like object providing `write()` """ write = fout.write write(pack('>H', self.raw_count)) for constant in self: if isinstance(constant, ConstantUTF8): encoded_value = encode_modified_utf8(constant.value) length = len(encoded_value) write(pack( '>BH', constant.TAG, length )) write(encoded_value) elif isinstance(constant, ConstantInteger): write(pack( '>Bi', constant.TAG, constant.value )) elif isinstance(constant, ConstantFloat): write(pack( '>Bf', constant.TAG, constant.value )) elif isinstance(constant, ConstantLong): write(pack( '>Bq', constant.TAG, constant.value )) elif isinstance(constant, ConstantDouble): write(pack( '>Bd', constant.TAG, constant.value )) elif isinstance(constant, ConstantClass): write(pack( '>BH', constant.TAG, constant._name_index )) elif isinstance(constant, ConstantString): write(pack( '>BH', constant.TAG, constant._string_index )) elif isinstance(constant, ConstantRef): write(pack( '>BHH', constant.TAG, constant._class_index, constant._name_and_type_index )) elif isinstance(constant, ConstantNameAndType): write(pack( '>BHH', constant.TAG, constant._name_index, constant._descriptor_index ))