def __init__(self, wxf_input): """WXF parser returning Python object from a WXF encoded byte sequence. """ self.context = SerializationContext() self.reader = wxf_input_to_buffer(wxf_input) version, compress = self.parse_header() if compress == True: self.reader = ZipCompressedReader(self.reader) else: self.reader = ExactSizeReader(self.reader)
def __init__(self, wxf_input): """WXF parser returning Python object from a WXF encoded byte sequence. """ self.context = SerializationContext() if isinstance(wxf_input, (six.binary_type, six.buffer_types)): self.reader = six.BytesIO(wxf_input) elif hasattr(wxf_input, 'read'): self.reader = wxf_input else: raise TypeError( 'Class %s neither implements a read method nor is a binary type.' % wxf_input.__class__.__name__) version, compress = self.parse_header() if compress == True: self.reader = ZipCompressedReader(self.reader) else: self.reader = ExactSizeReader(self.reader)
class WXFParser(object): """Parse a WXF input. This class is initialized with a WXF input, and exposes a generator of :class:`~wolframclient.deserializers.wxf.wxfparser.WXFToken`. The input `wxf_input` can be a string of bytes with the serialized expression, a string of unicodes in which case it is considered as a filename, a object implementing a `read` method. The generator outputs WXF tokens one by one:: with open('/tmp/data.wxf', 'rb') as fp: parser = WXFParser(fp) gen = parser.tokens() print(next(gen)) This low level class is providing intermediary objects to ease the parsing of WXF. Most of the time one should directly use high level interface such as :func:`~wolframclient.deserializers.wxf.wxfparser.binary_deserialize`. The token generator is generally consumed by an instance of :class:`~wolframclient.deserializers.wxf.wxfconsumer.WXFConsumer`. """ _mapping = { constants.WXF_CONSTANTS.Symbol: "token_for_string", constants.WXF_CONSTANTS.String: "token_for_string", constants.WXF_CONSTANTS.BigInteger: "token_for_string", constants.WXF_CONSTANTS.BigReal: "token_for_string", constants.WXF_CONSTANTS.Function: "token_for_function", constants.WXF_CONSTANTS.BinaryString: "token_for_binary_string", constants.WXF_CONSTANTS.Integer8: "token_for_integer8", constants.WXF_CONSTANTS.Integer16: "token_for_integer16", constants.WXF_CONSTANTS.Integer32: "token_for_integer32", constants.WXF_CONSTANTS.Integer64: "token_for_integer64", constants.WXF_CONSTANTS.Real64: "token_for_real64", constants.WXF_CONSTANTS.PackedArray: "token_for_packed_array", constants.WXF_CONSTANTS.NumericArray: "token_for_numeric_array", constants.WXF_CONSTANTS.Association: "token_for_association", constants.WXF_CONSTANTS.Rule: "token_for_rule", constants.WXF_CONSTANTS.RuleDelayed: "token_for_rule", } def __init__(self, wxf_input): """WXF parser returning Python object from a WXF encoded byte sequence. """ self.context = SerializationContext() self.reader = wxf_input_to_buffer(wxf_input) version, compress = self.parse_header() if compress == True: self.reader = ZipCompressedReader(self.reader) else: self.reader = ExactSizeReader(self.reader) def tokens(self): """Generate instances :class:`~wolframclient.deserializers.wxf.wxfparser.WXFToken` from a WXF input.""" yield self.next_token() while not self.context.is_valid_final_state(): yield self.next_token() def parse_header(self): compress = False next_byte = self.reader.read(1) if next_byte == WXF_VERSION: version = int(next_byte) next_byte = self.reader.read(1) else: raise WolframParserException("Invalid version %s." % next_byte) if next_byte == WXF_HEADER_COMPRESS: compress = True next_byte = self.reader.read(1) if next_byte != WXF_HEADER_SEPARATOR: raise WolframParserException( "Invalid header. Failed to find header separator ':'.") return (version, compress) def parse_array(self, token): # Parsing array rank and dimensions rank = parse_varint(self.reader) if rank == 0: raise WolframParserException("Array rank cannot be zero.") token.dimensions = [] for i in range(rank): dim = parse_varint(self.reader) if dim == 0: raise WolframParserException( "Array dimensions cannot be zero.") token.dimensions.append(dim) # reading values bytecount = constants.ARRAY_TYPES_ELEM_SIZE[ token.array_type] * token.element_count token.data = self.reader.read(bytecount) def token_for_string(self, token): self.context.add_part() token.length = parse_varint(self.reader) if token.length == 0: token.data = "" else: token.data = self.reader.read(token.length).decode("utf8") return token def token_for_integer8(self, token): self.context.add_part() token.data = constants.StructInt8LE.unpack(self.reader.read(1))[0] return token def token_for_integer16(self, token): self.context.add_part() token.data = constants.StructInt16LE.unpack(self.reader.read(2))[0] return token def token_for_integer32(self, token): self.context.add_part() token.data = constants.StructInt32LE.unpack(self.reader.read(4))[0] return token def token_for_integer64(self, token): self.context.add_part() token.data = constants.StructInt64LE.unpack(self.reader.read(8))[0] return token def token_for_real64(self, token): self.context.add_part() token.data = constants.StructDouble.unpack(self.reader.read(8))[0] return token def token_for_function(self, token): token.length = parse_varint(self.reader) self.context.step_into_new_function(token.length) return token def token_for_association(self, token): token.length = parse_varint(self.reader) self.context.step_into_new_assoc(token.length) return token def token_for_rule(self, token): if not self.context.is_rule_valid(): raise WolframParserException( "Rule and RuleDelayed must be parts of an Association.") self.context.step_into_new_rule() return token def token_for_packed_array(self, token): self.context.add_part() token.array_type = self.reader.read(1) if token.array_type not in constants.VALID_PACKED_ARRAY_TYPES: raise WolframParserException("Invalid PackedArray value type: %s" % token.array_type) self.parse_array(token) return token def token_for_numeric_array(self, token): self.context.add_part() token.array_type = self.reader.read(1) if token.array_type not in constants.ARRAY_TYPES_ELEM_SIZE: raise WolframParserException( "Invalid NumericArray value type: %s" % token.array_type) self.parse_array(token) return token def token_for_binary_string(self, token): self.context.add_part() token.length = parse_varint(self.reader) if token.length == 0: token.data = b"" else: token.data = self.reader.read(token.length) return token def next_token(self): next_byte = self.reader.read(1) try: handler = self._mapping[next_byte] except KeyError: raise WolframParserException("Unexpected token %s" % next_byte) return getattr(self, handler)(WXFToken(next_byte))