예제 #1
0
def _parse_sections(stream: IO[bytes]) -> Iterable[SECTION_TYPES]:
    """
    Helper function implementing the core logic for parsing sections.

    Among other things, this ensure that sections are correctly ordered and not
    duplicated (other than custom sections).
    """
    start_pos = stream.tell()
    end_pos = stream.seek(0, 2)
    stream.seek(start_pos)

    # During section parsing sections may be omitted.  The WASM spec says that
    # omitted sections are equivalent to them being present but empty.  As we
    # parse the bytecode, we need to fill in any missing sections with their
    # empty equivalent.  This iterator allows us to lazily step through the
    # sections in order.
    empty_section_iter = iter(EMPTY_SECTIONS_BY_ID)

    # A data structure to allow detection of duplicate sections.
    seen_section_ids: Set[int] = set()
    # We track missing sections separately.
    missing_section_ids: Set[int] = set()

    while stream.tell() < end_pos:
        section_id = parse_single_byte(stream)

        if section_id == numpy.uint8(0x00):
            yield parse_custom_section(stream)
            continue
        elif section_id not in PARSERS_BY_SECTION_ID:
            raise ParseError(f"Invalid section id: {hex(section_id)}")
        elif section_id in seen_section_ids:
            raise ParseError(
                f"Encountered multiple sections with the section id: "
                f"{hex(section_id)}")
        elif section_id in missing_section_ids:
            all_seen = tuple(
                sorted(seen_section_ids.union(missing_section_ids)))
            raise ParseError(
                f"Encountered section id out of order. section_id={section_id} "
                f"already encountered sections {all_seen}")

        seen_section_ids.add(section_id)

        for _, empty_section in _next_empty_section(section_id,
                                                    empty_section_iter):
            missing_section_ids.add(section_id)
            yield empty_section

        section_parser_fn = PARSERS_BY_SECTION_ID[section_id]
        section = section_parser_fn(stream)
        yield section

    # get empty sections for any that were omitted.
    for _, empty_section in empty_section_iter:
        yield empty_section
예제 #2
0
파일: null.py 프로젝트: metazool/py-wasm
def parse_null_byte(stream: IO[bytes]) -> None:
    """
    Consume a single null byte from the stream

    Raise a ParseError if the stream is empty or if the consumed byte is not
    0x00
    """
    byte = stream.read(1)
    if byte == b'\x00':
        return
    elif byte:
        raise ParseError(f"Expected 0x00 but got {hex(byte[0])}")
    else:
        raise ParseError("Unexpected end of stream")
예제 #3
0
파일: byte.py 프로젝트: stjordanis/py-wasm
def parse_single_byte(stream: IO[bytes]) -> UInt8:
    byte = stream.read(1)

    if byte:
        return UInt8(byte[0])
    else:
        raise ParseError("Unexpected end of stream")
예제 #4
0
파일: null.py 프로젝트: stjordanis/py-wasm
def parse_null_byte(stream: IO[bytes]) -> None:
    byte = stream.read(1)
    if byte == b'\x00':
        return
    elif byte:
        raise MalformedModule(f"TODO: expected 0x00 but got {hex(byte[0])}")
    else:
        raise ParseError("Unexpected end of stream")
예제 #5
0
파일: byte.py 프로젝트: stjordanis/py-wasm
def parse_bytes(stream: IO[bytes]) -> bytes:
    size = parse_u32(stream)
    data = stream.read(size)

    if len(data) != size:
        raise ParseError(
            f"Error parsing raw bytes.  Expected bytestream of size {size}. "
            f"Parsed stream is of size {len(data)}"
        )
    return data
예제 #6
0
파일: byte.py 프로젝트: metazool/py-wasm
def parse_single_byte(stream: IO[bytes]) -> numpy.uint8:
    """
    Parses a single byte from the stream returning it as an 8-bit integer.
    """
    byte = stream.read(1)

    if byte:
        return numpy.uint8(byte[0])
    else:
        raise ParseError("Unexpected end of stream")
예제 #7
0
def parse_module(stream: IO[bytes]) -> Module:
    """
    Parser for a binary encoded WebAssembly module.

    Return a Module object if successful.

    Raise a ParseError if an error is encountered.
    """
    # `parse_magic` both parses and validates the 4-byte *magic* preamble.
    # Curretly we simply discard this value.
    parse_magic(stream)
    version = parse_version(stream)

    (
        custom_sections,
        type_section,
        import_section,
        function_section,
        table_section,
        memory_section,
        global_section,
        export_section,
        start_section,
        element_segment_section,
        code_section,
        data_segment_section,
    ) = parse_sections(stream)

    if len(function_section.types) != len(code_section.codes):
        raise ParseError(
            "Mismatched lengths of function section and code section. "
            f"function-types[{len(function_section.types)}] != "
            f"codes[{len(code_section.codes)}]"
        )

    functions = tuple(
        Function(type_idx, code.locals, code.expr)
        for type_idx, code
        in zip(function_section.types, code_section.codes)
    )

    module = Module(
        version=version,
        types=type_section.function_types,
        funcs=functions,
        tables=table_section.tables,
        mems=memory_section.mems,
        globals=global_section.globals,
        elem=element_segment_section.element_segments,
        data=data_segment_section.data_segments,
        start=start_section.start,
        imports=import_section.imports,
        exports=export_section.exports,
    )
    return module
예제 #8
0
파일: vector.py 프로젝트: metazool/py-wasm
def parse_vector(
    sub_parser: Callable[[IO[bytes]], TItem],
    stream: IO[bytes],
) -> Tuple[TItem, ...]:
    """
    Parser for a vector of encoded values.
    """
    vector_size = parse_u32(stream)
    try:
        return tuple(_parse_vector(sub_parser, vector_size, stream))
    except Exception as err:
        raise ParseError(f"Error parsing vector: {err}") from err
예제 #9
0
def parse_blocktype(stream: IO[bytes]) -> Tuple[ValType, ...]:
    byte = parse_single_byte(stream)
    if byte == 0x40:
        return tuple()

    try:
        valtype = ValType.from_byte(byte)
    except ValueError as err:
        raise ParseError(
            f"Invalid byte while parsing mut.  Got '{hex(byte)}: {str(err)}")

    return (valtype, )
예제 #10
0
    def parse_and_validate_length_fn(stream: IO[bytes]) -> TReturn:
        # Note: Section parsers all operate under the assumption that their `stream`
        # contains **only** the bytes for the given section.  It follows that
        # successful parsing for any section **must** consume the full stream.
        declared_size = parse_size(stream)
        raw_section = stream.read(declared_size)

        if len(raw_section) != declared_size:
            raise ParseError(
                "Section declared size larger than stream. "
                "declared={declared_size}  actual={len(raw_section)}")

        section_stream = io.BytesIO(raw_section)
        section = parser_fn(section_stream)

        current_pos = section_stream.tell()
        end_pos = section_stream.seek(0, 2)

        if current_pos != end_pos:
            raise ParseError(
                f"Section parser did not fully consume section stream, leaving "
                f"{end_pos - current_pos} unconsumed bytes")
        return section
예제 #11
0
파일: text.py 프로젝트: stjordanis/py-wasm
def parse_text(stream: IO[bytes]) -> str:
    encoded_name_length = parse_u32(stream)
    encoded_name = stream.read(encoded_name_length)

    if len(encoded_name) != encoded_name_length:
        raise ParseError(
            "Unexpected end of stream while parsing name. Expected length "
            f"{encoded_name_length}.  Got '{encoded_name} with length "
            f"{len(encoded_name)}")

    try:
        name = encoded_name.decode('utf8')
    except UnicodeDecodeError as err:
        raise MalformedModule from err

    return name
예제 #12
0
def parse_version(stream: IO[bytes]) -> Tuple[UInt8, UInt8, UInt8, UInt8]:
    """
    https://webassembly.github.io/spec/core/bikeshed/index.html#binary-version
    """
    actual = (
        parse_single_byte(stream),
        parse_single_byte(stream),
        parse_single_byte(stream),
        parse_single_byte(stream),
    )
    if actual not in KNOWN_VERSIONS:
        raise ParseError(
            f"Unknown version. Got: "
            f"{tuple(hex(byte) for byte in actual)}"
        )
    return actual
예제 #13
0
def parse_version(stream: IO[bytes]) -> Tuple[numpy.uint8, numpy.uint8, numpy.uint8, numpy.uint8]:
    """
    Parser for the version portion of a binary encoded Web Assembly module
    https://webassembly.github.io/spec/core/bikeshed/index.html#binary-version
    """
    actual = (
        parse_single_byte(stream),
        parse_single_byte(stream),
        parse_single_byte(stream),
        parse_single_byte(stream),
    )
    if actual not in KNOWN_VERSIONS:
        raise ParseError(
            f"Unknown version. Got: "
            f"{tuple(hex(byte) for byte in actual)}"
        )
    return actual
예제 #14
0
def _parse_unsigned_leb128(stream: IO[bytes]) -> Iterable[int]:
    for shift in itertools.count(0, 7):
        if shift > SHIFT_64_BIT_MAX:
            raise Exception("TODO: better exception msg: Integer is too large...")

        byte = stream.read(1)

        try:
            value = byte[0]
        except IndexError:
            raise ParseError(
                "Unexpected end of stream while parsing LEB128 encoded integer"
            )

        yield (value & LOW_MASK) << shift

        if not value & HIGH_MASK:
            break