Пример #1
0
 def __init__(self, raw_buff):
     self.reset()
     self.buff = BuffHandle(raw_buff)
     self.buff.read(12)
     self.string_block = StringBlock(self.buff)
     self._resource_ids = []
     self._prefixuri = {}
     self._uriprefix = {}
     self._prefixuril = []
Пример #2
0
 def __init__(self, raw_buff):
     self.reset()
     self.buff = BuffHandle(raw_buff)
     self.buff.read(12)
     self.string_block = StringBlock(self.buff)
     self._resource_ids = []
     self._prefixuri = {}
     self._uriprefix = {}
     self._prefixuril = []
Пример #3
0
class AXMLParser:
    ##
    # Class constructor.
    #
    # @param raw_buff  The raw buffer of an binary XML file.
    #
    def __init__(self, raw_buff):
        self.reset()
        self.buff = BuffHandle(raw_buff)
        self.buff.read(12)
        self.string_block = StringBlock(self.buff)
        self._resource_ids = []
        self._prefixuri = {}
        self._uriprefix = {}
        self._prefixuril = []

    def reset(self):
        self._event = -1
        self._line_number = -1
        self._name = -1
        self._namespace_uri = -1
        self._attributes = []
        self._id_attribute = -1
        self._class_attribute = -1
        self._style_attribute = -1

    def next(self):
        self.do_next()
        return self._event

    def do_next(self):
        if self._event == END_DOCUMENT:
            return

        event = self._event

        self.reset()

        while 1:
            chunk_type = -1

            # Fake END_DOCUMENT event.
            if event == END_TAG:
                pass

            # START_DOCUMENT
            if event == START_DOCUMENT:
                chunk_type = CHUNK_XML_START_TAG
            else:
                if self.buff.end() == True:
                    self._event = END_DOCUMENT
                    break
                chunk_type = unpack(UNPACK_FORMAT_LITTLEENDIAN_LONG,
                                    self.buff.read(4))[0]

            if chunk_type == CHUNK_RESOURCEIDS:
                chunk_size = unpack(UNPACK_FORMAT_LITTLEENDIAN_LONG,
                                    self.buff.read(4))[0]
                # FIXME
                if chunk_size < 8 or chunk_size % 4 != 0:
                    raise ("ooo")

                for i in range(0, int(chunk_size / 4 - 2)):
                    self._resource_ids.append(
                        unpack(UNPACK_FORMAT_LITTLEENDIAN_LONG,
                               self.buff.read(4))[0])

                continue

            # FIXME
            if chunk_type < CHUNK_XML_FIRST or chunk_type > CHUNK_XML_LAST:
                raise ("ooo")

            # Fake START_DOCUMENT event.
            if chunk_type == CHUNK_XML_START_TAG and event == -1:
                self._event = START_DOCUMENT
                break

            self.buff.read(4)  #/*chunk_size*/
            line_number = unpack(UNPACK_FORMAT_LITTLEENDIAN_LONG,
                                 self.buff.read(4))[0]
            self.buff.read(4)  #0xFFFFFFFF

            if chunk_type == CHUNK_XML_START_NAMESPACE or chunk_type == CHUNK_XML_END_NAMESPACE:
                if chunk_type == CHUNK_XML_START_NAMESPACE:
                    prefix = unpack(UNPACK_FORMAT_LITTLEENDIAN_LONG,
                                    self.buff.read(4))[0]
                    uri = unpack(UNPACK_FORMAT_LITTLEENDIAN_LONG,
                                 self.buff.read(4))[0]

                    self._prefixuri[prefix] = uri
                    self._uriprefix[uri] = prefix
                    self._prefixuril.append((prefix, uri))
                else:
                    self.buff.read(4)
                    self.buff.read(4)
                    (prefix, uri) = self._prefixuril.pop()
                    #del self.m_prefixuri[ prefix ]
                    #del self.m_uriprefix[ uri ]
                continue

            self._line_number = line_number

            if chunk_type == CHUNK_XML_START_TAG:
                self._namespace_uri = unpack(UNPACK_FORMAT_LITTLEENDIAN_LONG,
                                             self.buff.read(4))[0]
                self._name = unpack(UNPACK_FORMAT_LITTLEENDIAN_LONG,
                                    self.buff.read(4))[0]

                # FIXME
                self.buff.read(4)  #flags

                attributeCount = unpack(UNPACK_FORMAT_LITTLEENDIAN_LONG,
                                        self.buff.read(4))[0]
                self._id_attribute = (attributeCount >> 16) - 1
                attributeCount = attributeCount & 0xFFFF
                self._class_attribute = unpack(UNPACK_FORMAT_LITTLEENDIAN_LONG,
                                               self.buff.read(4))[0]
                self._style_attribute = (self._class_attribute >> 16) - 1

                self._class_attribute = (self._class_attribute & 0xFFFF) - 1

                for i in range(0, attributeCount * ATTRIBUTE_LENGTH):
                    self._attributes.append(
                        unpack(UNPACK_FORMAT_LITTLEENDIAN_LONG,
                               self.buff.read(4))[0])

                for i in range(ATTRIBUTE_IX_VALUE_TYPE, len(self._attributes),
                               ATTRIBUTE_LENGTH):
                    self._attributes[i] = (self._attributes[i] >> 24)

                self._event = START_TAG
                break

            if chunk_type == CHUNK_XML_END_TAG:
                self._namespace_uri = unpack(UNPACK_FORMAT_LITTLEENDIAN_LONG,
                                             self.buff.read(4))[0]
                self._name = unpack(UNPACK_FORMAT_LITTLEENDIAN_LONG,
                                    self.buff.read(4))[0]
                self._event = END_TAG
                break

            if chunk_type == CHUNK_XML_TEXT:
                self._name = unpack(UNPACK_FORMAT_LITTLEENDIAN_LONG,
                                    self.buff.read(4))[0]

                # FIXME
                self.buff.read(4)  #?
                self.buff.read(4)  #?

                self._event = TEXT
                break

    def get_prefix_by_uri(self, uri):
        try:
            return self._uriprefix[uri]
        except KeyError:
            return -1

    def get_prefix(self):
        try:
            return self.string_block.get_raw(
                self._prefixuri[self._namespace_uri])
        except KeyError:
            return ""

    def get_name(self):
        if self._name == -1 or (self._event != START_TAG
                                and self._event != END_TAG):
            return ""

        return self.string_block.get_raw(self._name)

    def get_text(self):
        if self._name == -1 or self._event != TEXT:
            return ""

        return self.string_block.get_raw(self._name)

    def get_namespace_prefix(self, pos):
        prefix = self._prefixuril[pos][0]
        return self.string_block.get_raw(prefix)

    def get_namespace_uri(self, pos):
        uri = self._prefixuril[pos][1]
        return self.string_block.get_raw(uri)

    def get_attribute_offset(self, index):
        # FIXME
        if self._event != START_TAG:
            raise ("Current event is not START_TAG.")

        offset = index * 5
        # FIXME
        if offset >= len(self._attributes):
            raise ("Invalid attribute index")

        return offset

    def get_attribute_count(self):
        if self._event != START_TAG:
            return -1

        return int(len(self._attributes) / ATTRIBUTE_LENGTH)

    def get_attribute_prefix(self, index):
        offset = self.get_attribute_offset(index)
        uri = self._attributes[offset + ATTRIBUTE_IX_NAMESPACE_URI]

        prefix = self.get_prefix_by_uri(uri)
        if prefix == -1:
            return ""

        return self.string_block.get_raw(prefix)

    def get_attribute_name(self, index):
        offset = self.get_attribute_offset(index)
        name = self._attributes[offset + ATTRIBUTE_IX_NAME]

        if name == -1:
            return ""

        return self.string_block.get_raw(name)

    def get_attribute_value_type(self, index):
        offset = self.get_attribute_offset(index)
        return self._attributes[offset + ATTRIBUTE_IX_VALUE_TYPE]

    def get_attribute_value_data(self, index):
        offset = self.get_attribute_offset(index)
        return self._attributes[offset + ATTRIBUTE_IX_VALUE_DATA]

    def get_attribute_value(self, index):
        offset = self.get_attribute_offset(index)
        value_type = self._attributes[offset + ATTRIBUTE_IX_VALUE_TYPE]
        if value_type == TYPE_STRING:
            value_string = self._attributes[offset + ATTRIBUTE_IX_VALUE_STRING]
            return self.string_block.get_raw(value_string)
        # WIP
        return ""
Пример #4
0
class AXMLParser:
    ##
    # Class constructor.
    #
    # @param raw_buff  The raw buffer of an binary XML file.
    #
    def __init__(self, raw_buff):
        self.reset()
        self.buff = BuffHandle(raw_buff)
        self.buff.read(12)
        self.string_block = StringBlock(self.buff)
        self._resource_ids = []
        self._prefixuri = {}
        self._uriprefix = {}
        self._prefixuril = []

    def reset(self):
        self._event = -1
        self._line_number = -1
        self._name = -1
        self._namespace_uri = -1
        self._attributes = []
        self._id_attribute = -1
        self._class_attribute = -1
        self._style_attribute = -1

    def next(self):
        self.do_next()
        return self._event

    def do_next(self):
        if self._event == END_DOCUMENT:
            return

        event = self._event

        self.reset()

        while 1:
            chunk_type = -1

            # Fake END_DOCUMENT event.
            if event == END_TAG:
                pass

            # START_DOCUMENT
            if event == START_DOCUMENT:
                chunk_type = CHUNK_XML_START_TAG
            else:
                if self.buff.end() == True:
                    self._event = END_DOCUMENT
                    break
                chunk_type = unpack(UNPACK_FORMAT_LITTLEENDIAN_LONG, self.buff.read(4))[0]

            if chunk_type == CHUNK_RESOURCEIDS:
                chunk_size = unpack(UNPACK_FORMAT_LITTLEENDIAN_LONG, self.buff.read(4))[0]
                # FIXME
                if chunk_size < 8 or chunk_size%4 != 0:
                    raise("ooo")

                for i in range(0, int(chunk_size/4-2)):
                    self._resource_ids.append(unpack(UNPACK_FORMAT_LITTLEENDIAN_LONG, self.buff.read(4))[0])

                continue

            # FIXME
            if chunk_type < CHUNK_XML_FIRST or chunk_type > CHUNK_XML_LAST:
                raise("ooo")

            # Fake START_DOCUMENT event.
            if chunk_type == CHUNK_XML_START_TAG and event == -1:
                self._event = START_DOCUMENT
                break

            self.buff.read(4) #/*chunk_size*/
            line_number = unpack(UNPACK_FORMAT_LITTLEENDIAN_LONG, self.buff.read(4))[0]
            self.buff.read(4) #0xFFFFFFFF

            if chunk_type == CHUNK_XML_START_NAMESPACE or chunk_type == CHUNK_XML_END_NAMESPACE:
                if chunk_type == CHUNK_XML_START_NAMESPACE:
                    prefix = unpack(UNPACK_FORMAT_LITTLEENDIAN_LONG, self.buff.read(4))[0]
                    uri = unpack(UNPACK_FORMAT_LITTLEENDIAN_LONG, self.buff.read(4))[0]

                    self._prefixuri[ prefix ] = uri
                    self._uriprefix[ uri ] = prefix
                    self._prefixuril.append((prefix, uri))
                else:
                    self.buff.read(4)
                    self.buff.read(4)
                    (prefix, uri) = self._prefixuril.pop()
                    #del self.m_prefixuri[ prefix ]
                    #del self.m_uriprefix[ uri ]
                continue

            self._line_number = line_number

            if chunk_type == CHUNK_XML_START_TAG:
                self._namespace_uri = unpack(UNPACK_FORMAT_LITTLEENDIAN_LONG, self.buff.read(4))[0]
                self._name = unpack(UNPACK_FORMAT_LITTLEENDIAN_LONG, self.buff.read(4))[0]

                # FIXME
                self.buff.read(4) #flags

                attributeCount = unpack(UNPACK_FORMAT_LITTLEENDIAN_LONG, self.buff.read(4))[0]
                self._id_attribute = (attributeCount>>16) - 1
                attributeCount = attributeCount & 0xFFFF
                self._class_attribute = unpack(UNPACK_FORMAT_LITTLEENDIAN_LONG, self.buff.read(4))[0]
                self._style_attribute = (self._class_attribute>>16) - 1

                self._class_attribute = (self._class_attribute & 0xFFFF) - 1

                for i in range(0, attributeCount * ATTRIBUTE_LENGTH):
                    self._attributes.append(unpack(UNPACK_FORMAT_LITTLEENDIAN_LONG, self.buff.read(4))[0])

                for i in range(ATTRIBUTE_IX_VALUE_TYPE, len(self._attributes), ATTRIBUTE_LENGTH):
                    self._attributes[i] = (self._attributes[i]>>24)

                self._event = START_TAG
                break

            if chunk_type == CHUNK_XML_END_TAG:
                self._namespace_uri = unpack(UNPACK_FORMAT_LITTLEENDIAN_LONG, self.buff.read(4))[0]
                self._name = unpack(UNPACK_FORMAT_LITTLEENDIAN_LONG, self.buff.read(4))[0]
                self._event = END_TAG
                break

            if chunk_type == CHUNK_XML_TEXT:
                self._name = unpack(UNPACK_FORMAT_LITTLEENDIAN_LONG, self.buff.read(4))[0]

                # FIXME
                self.buff.read(4) #?
                self.buff.read(4) #?

                self._event = TEXT
                break

    def get_prefix_by_uri(self, uri):
        try:
            return self._uriprefix[ uri ]
        except KeyError:
            return -1

    def get_prefix(self):
        try:
            return self.string_block.get_raw(self._prefixuri[ self._namespace_uri ])
        except KeyError:
            return ""

    def get_name(self):
        if self._name == -1 or (self._event != START_TAG and self._event != END_TAG):
            return ""

        return self.string_block.get_raw(self._name)

    def get_text(self):
        if self._name == -1 or self._event != TEXT:
            return ""

        return self.string_block.get_raw(self._name)

    def get_namespace_prefix(self, pos):
        prefix = self._prefixuril[ pos ][0]
        return self.string_block.get_raw(prefix)

    def get_namespace_uri(self, pos):
        uri = self._prefixuril[ pos ][1]
        return self.string_block.get_raw(uri)

    def get_attribute_offset(self, index):
        # FIXME
        if self._event != START_TAG:
            raise("Current event is not START_TAG.")

        offset = index * 5
        # FIXME
        if offset >= len(self._attributes):
            raise("Invalid attribute index")

        return offset

    def get_attribute_count(self):
        if self._event != START_TAG:
            return -1

        return int(len(self._attributes) / ATTRIBUTE_LENGTH)

    def get_attribute_prefix(self, index):
        offset = self.get_attribute_offset(index)
        uri = self._attributes[offset + ATTRIBUTE_IX_NAMESPACE_URI]

        prefix = self.get_prefix_by_uri(uri)
        if prefix == -1:
            return ""

        return self.string_block.get_raw(prefix)

    def get_attribute_name(self, index):
        offset = self.get_attribute_offset(index)
        name = self._attributes[offset + ATTRIBUTE_IX_NAME]

        if name == -1:
            return ""

        return self.string_block.get_raw(name)

    def get_attribute_value_type(self, index):
        offset = self.get_attribute_offset(index)
        return self._attributes[offset + ATTRIBUTE_IX_VALUE_TYPE]

    def get_attribute_value_data(self, index):
        offset = self.get_attribute_offset(index)
        return self._attributes[offset + ATTRIBUTE_IX_VALUE_DATA]

    def get_attribute_value(self, index):
        offset = self.get_attribute_offset(index)
        value_type = self._attributes[offset + ATTRIBUTE_IX_VALUE_TYPE]
        if value_type == TYPE_STRING:
            value_string = self._attributes[offset + ATTRIBUTE_IX_VALUE_STRING]
            return self.string_block.get_raw(value_string)
        # WIP
        return ""