Пример #1
0
def test_decode_utf8_1():
    """
    Counterpart of test_encode_utf8_1.
    Tests decoding of some special cases:
        1 - c080 must be decoded as byte 00
        2 - eda081edb080 must be decoded as \uD801\uDC00 (representing U+10400)
    """
    str1 = '31c08032'
    str2 = decode_modified_utf8(binascii.unhexlify(str1))
    assert u'1\x002' == str2

    str1 = 'eda081edb080'
    str2 = decode_modified_utf8(binascii.unhexlify(str1))
    assert u'\uD801\uDC00' == str2
Пример #2
0
    def _from_io(self, fio):
        # Reads in the ConstantPool (constant_pool in the JVM Spec)
        constant_pool_count = unpack('>H', fio.read(2))[0]

        # Pull this locally so CPython doesn't do a lookup each time.
        read = fio.read

        while constant_pool_count > 1:
            constant_pool_count -= 1
            # The 1-byte prefix identifies the type of constant.
            tag = unpack('>B', read(1))[0]

            if tag == 1:
                # CONSTANT_Utf8_info, a length prefixed UTF-8-ish string.
                length = unpack('>H', read(2))[0]
                self.append((tag, decode_modified_utf8(read(length))))
            else:
                # Every other constant type is trivial.
                fmt, size = _constant_fmts[tag]
                self.append((tag,) + unpack(fmt, read(size)))
                if tag in (5, 6):
                    # LONG (5) and DOUBLE (6) count as two entries in the
                    # pool.
                    self.append(None)
                    constant_pool_count -= 1
Пример #3
0
    def unpack(self, fio):
        """
        Read the ConstantPool from the file-like object `fio`.

        .. note::

            Advanced usage only. You will typically never need to call this
            method as it will be called for you when loading a ClassFile.

        :param fio: Any file-like object providing `read()`
        """
        # Reads in the ConstantPool (constant_pool in the JVM Spec)
        constant_pool_count = unpack('>H', fio.read(2))[0]

        # Pull this locally so CPython doesn't do a lookup each time.
        read = fio.read

        while constant_pool_count > 1:
            constant_pool_count -= 1
            # The 1-byte prefix identifies the type of constant.
            tag = unpack('>B', read(1))[0]

            if tag == 1:
                # CONSTANT_Utf8_info, a length prefixed UTF-8-ish string.
                length = unpack('>H', read(2))[0]
                self.append((tag, decode_modified_utf8(read(length))))
            else:
                # Every other constant type is trivial.
                fmt, size = _constant_fmts[tag]
                self.append((tag,) + unpack(fmt, read(size)))
                if tag in (5, 6):
                    # LONG (5) and DOUBLE (6) count as two entries in the
                    # pool.
                    self.append(None)
                    constant_pool_count -= 1
Пример #4
0
def test_decode_utf8_1():
    """
    Counterpart of test_encode_utf8_1.

    Tests decoding of some special cases:
        1 - c080 must be decoded as byte 00
        2 - eda081edb080 must be decoded as \uD801\uDC00 (representing U+10400)
    """
    pairs = ((b'\x31\xc0\x80\x32', '1\x002'), (b'\xed\xa0\x81\xed\xb0\x80',
                                               '\uD801\uDC00'))

    for original, decoded in pairs:
        assert decode_modified_utf8(original) == decoded
Пример #5
0
def test_decode_utf8_1():
    """
    Counterpart of test_encode_utf8_1.

    Tests decoding of some special cases:
        1 - c080 must be decoded as byte 00
        2 - eda081edb080 must be decoded as \uD801\uDC00 (representing U+10400)
    """
    pairs = (
        (b'\x31\xc0\x80\x32', '1\x002'),
        (b'\xed\xa0\x81\xed\xb0\x80', '\uD801\uDC00')
    )

    for original, decoded in pairs:
        assert decode_modified_utf8(original) == decoded
Пример #6
0
    def unpack(self, fio):
        """
        Read the ConstantPool from the file-like object `fio`.

        .. note::

            Advanced usage only. You will typically never need to call this
            method as it will be called for you when loading a ClassFile.

        :param fio: Any file-like object providing `read()`
        """
        # Reads in the ConstantPool (constant_pool in the JVM Spec)
        constant_pool_count = unpack('>H', fio.read(2))[0]

        # Pull this locally so CPython doesn't do a lookup each time.
        read = fio.read

        while constant_pool_count > 1:
            constant_pool_count -= 1
            # The 1-byte prefix identifies the type of constant.
            tag = ord(read(1))

            if tag == 1:
                # CONSTANT_Utf8_info, a length prefixed UTF-8-ish string.
                # Only attempt to properly decode the MUTF8 if it fails
                # regular UTF8 decoding, which overs huge time savings over
                # large JARs.
                utf8_str = read(unpack('>H', read(2))[0])
                try:
                    utf8_str = utf8_str.decode('utf8')
                except UnicodeDecodeError:
                    utf8_str = decode_modified_utf8(utf8_str)
                self.append((tag, utf8_str))
            else:
                # Every other constant type is trivial.
                fmt, size = _constant_fmts[tag]
                self.append((tag, *unpack(fmt, read(size))))
                if tag == 5 or tag == 6:
                    # LONG (5) and DOUBLE (6) count as two entries in the
                    # pool.
                    self.append(None)
                    constant_pool_count -= 1