Пример #1
0
 def test_uncompress_exact_len_err(self):
     data = six.binary_type(bytearray(range(100)))
     zipped = zlib.compress(data)
     total = len(zipped)
     reader = ExactSizeReader(ZipCompressedReader(six.BytesIO(zipped)))
     with self.assertRaises(EOFError):
         reader.read(size=total + 1)
Пример #2
0
    def __init__(self, wxf_input):
        """WXF parser returning Python object from a WXF encoded byte sequence.
        """
        self.context = SerializationContext()
        self.reader = wxf_input_to_buffer(wxf_input)

        version, compress = self.parse_header()
        if compress == True:
            self.reader = ZipCompressedReader(self.reader)
        else:
            self.reader = ExactSizeReader(self.reader)
Пример #3
0
    def test_uncompress_exact_len(self):
        byte_list = [random.randint(0, 255) for i in range(10000)]
        data = six.binary_type(bytearray(byte_list))
        zipped = zlib.compress(data)
        total = len(zipped)
        num_of_chunk = 20
        chunk_size = total // num_of_chunk
        in_buffer = six.BytesIO(zipped)
        reader = ExactSizeReader(ZipCompressedReader(in_buffer))
        buff = six.BytesIO()
        for i in range(num_of_chunk):
            buff.write(reader.read(chunk_size))
            self.assertEqual(buff.getvalue(), data[:(i + 1) * chunk_size])

        buff.write(reader.read())
        self.assertEqual(buff.getvalue(), data)
Пример #4
0
 def __init__(self, wxf_input):
     """WXF parser returning Python object from a WXF encoded byte sequence.
     """
     self.context = SerializationContext()
     if isinstance(wxf_input, (six.binary_type, six.buffer_types)):
         self.reader = six.BytesIO(wxf_input)
     elif hasattr(wxf_input, 'read'):
         self.reader = wxf_input
     else:
         raise TypeError(
             'Class %s neither implements a read method nor is a binary type.'
             % wxf_input.__class__.__name__)
     version, compress = self.parse_header()
     if compress == True:
         self.reader = ZipCompressedReader(self.reader)
     else:
         self.reader = ExactSizeReader(self.reader)
Пример #5
0
class WXFParser(object):
    """Parse a WXF input.

    This class is initialized with a WXF input, and exposes a generator of
    :class:`~wolframclient.deserializers.wxf.wxfparser.WXFToken`.
    The input `wxf_input` can be a string of bytes with the serialized expression, a string of unicodes
    in which case it is considered as a filename, a object implementing a `read` method.

    The generator outputs WXF tokens one by one::

        with open('/tmp/data.wxf', 'rb') as fp:
            parser = WXFParser(fp)
            gen = parser.tokens()
            print(next(gen))

    This low level class is providing intermediary objects to ease the parsing of WXF. Most of
    the time one should directly use high level interface such as
    :func:`~wolframclient.deserializers.wxf.wxfparser.binary_deserialize`.

    The token generator is generally consumed by an instance of
    :class:`~wolframclient.deserializers.wxf.wxfconsumer.WXFConsumer`.
    """

    _mapping = {
        constants.WXF_CONSTANTS.Symbol: "token_for_string",
        constants.WXF_CONSTANTS.String: "token_for_string",
        constants.WXF_CONSTANTS.BigInteger: "token_for_string",
        constants.WXF_CONSTANTS.BigReal: "token_for_string",
        constants.WXF_CONSTANTS.Function: "token_for_function",
        constants.WXF_CONSTANTS.BinaryString: "token_for_binary_string",
        constants.WXF_CONSTANTS.Integer8: "token_for_integer8",
        constants.WXF_CONSTANTS.Integer16: "token_for_integer16",
        constants.WXF_CONSTANTS.Integer32: "token_for_integer32",
        constants.WXF_CONSTANTS.Integer64: "token_for_integer64",
        constants.WXF_CONSTANTS.Real64: "token_for_real64",
        constants.WXF_CONSTANTS.PackedArray: "token_for_packed_array",
        constants.WXF_CONSTANTS.NumericArray: "token_for_numeric_array",
        constants.WXF_CONSTANTS.Association: "token_for_association",
        constants.WXF_CONSTANTS.Rule: "token_for_rule",
        constants.WXF_CONSTANTS.RuleDelayed: "token_for_rule",
    }

    def __init__(self, wxf_input):
        """WXF parser returning Python object from a WXF encoded byte sequence.
        """
        self.context = SerializationContext()
        self.reader = wxf_input_to_buffer(wxf_input)

        version, compress = self.parse_header()
        if compress == True:
            self.reader = ZipCompressedReader(self.reader)
        else:
            self.reader = ExactSizeReader(self.reader)

    def tokens(self):
        """Generate instances :class:`~wolframclient.deserializers.wxf.wxfparser.WXFToken` from a WXF input."""
        yield self.next_token()
        while not self.context.is_valid_final_state():
            yield self.next_token()

    def parse_header(self):
        compress = False
        next_byte = self.reader.read(1)
        if next_byte == WXF_VERSION:
            version = int(next_byte)
            next_byte = self.reader.read(1)
        else:
            raise WolframParserException("Invalid version %s." % next_byte)
        if next_byte == WXF_HEADER_COMPRESS:
            compress = True
            next_byte = self.reader.read(1)
        if next_byte != WXF_HEADER_SEPARATOR:
            raise WolframParserException(
                "Invalid header. Failed to find header separator ':'.")
        return (version, compress)

    def parse_array(self, token):
        # Parsing array rank and dimensions
        rank = parse_varint(self.reader)
        if rank == 0:
            raise WolframParserException("Array rank cannot be zero.")
        token.dimensions = []
        for i in range(rank):
            dim = parse_varint(self.reader)
            if dim == 0:
                raise WolframParserException(
                    "Array dimensions cannot be zero.")
            token.dimensions.append(dim)
        # reading values
        bytecount = constants.ARRAY_TYPES_ELEM_SIZE[
            token.array_type] * token.element_count
        token.data = self.reader.read(bytecount)

    def token_for_string(self, token):
        self.context.add_part()
        token.length = parse_varint(self.reader)
        if token.length == 0:
            token.data = ""
        else:
            token.data = self.reader.read(token.length).decode("utf8")

        return token

    def token_for_integer8(self, token):
        self.context.add_part()
        token.data = constants.StructInt8LE.unpack(self.reader.read(1))[0]
        return token

    def token_for_integer16(self, token):
        self.context.add_part()
        token.data = constants.StructInt16LE.unpack(self.reader.read(2))[0]
        return token

    def token_for_integer32(self, token):
        self.context.add_part()
        token.data = constants.StructInt32LE.unpack(self.reader.read(4))[0]
        return token

    def token_for_integer64(self, token):
        self.context.add_part()
        token.data = constants.StructInt64LE.unpack(self.reader.read(8))[0]
        return token

    def token_for_real64(self, token):
        self.context.add_part()
        token.data = constants.StructDouble.unpack(self.reader.read(8))[0]
        return token

    def token_for_function(self, token):
        token.length = parse_varint(self.reader)
        self.context.step_into_new_function(token.length)
        return token

    def token_for_association(self, token):
        token.length = parse_varint(self.reader)
        self.context.step_into_new_assoc(token.length)
        return token

    def token_for_rule(self, token):
        if not self.context.is_rule_valid():
            raise WolframParserException(
                "Rule and RuleDelayed must be parts of an Association.")
        self.context.step_into_new_rule()
        return token

    def token_for_packed_array(self, token):
        self.context.add_part()
        token.array_type = self.reader.read(1)
        if token.array_type not in constants.VALID_PACKED_ARRAY_TYPES:
            raise WolframParserException("Invalid PackedArray value type: %s" %
                                         token.array_type)
        self.parse_array(token)
        return token

    def token_for_numeric_array(self, token):
        self.context.add_part()
        token.array_type = self.reader.read(1)
        if token.array_type not in constants.ARRAY_TYPES_ELEM_SIZE:
            raise WolframParserException(
                "Invalid NumericArray value type: %s" % token.array_type)
        self.parse_array(token)
        return token

    def token_for_binary_string(self, token):
        self.context.add_part()
        token.length = parse_varint(self.reader)
        if token.length == 0:
            token.data = b""
        else:
            token.data = self.reader.read(token.length)
        return token

    def next_token(self):
        next_byte = self.reader.read(1)

        try:
            handler = self._mapping[next_byte]
        except KeyError:
            raise WolframParserException("Unexpected token %s" % next_byte)

        return getattr(self, handler)(WXFToken(next_byte))