示例#1
0
    def __init__(self, raw_buff):
        self.reset()

        self.buff = bytecode.BuffHandle(raw_buff)

        _type, _header_size, _size = unpack('<HHL', self.buff.read(8))

        if _header_size == 28024:
            # Can be a common error: the file is not an AXML but a plain XML
            # The file will then usually start with '<?xm' / '3C 3F 78 6D'
            print(
                "Header size is 28024! Are you trying to parse a plain XML file?"
            )
            exit(1)
        if _type != 0x0003:
            print("header-type error")
            exit(1)

        # self.buff.read(4)
        # self.buff.read(4)

        self.sb = StringBlock(self.buff)

        self.m_resourceIDs = []
        self.m_prefixuri = {}
        self.m_uriprefix = {}
        self.m_prefixuriL = []
示例#2
0
    def __init__(self, raw_buff):
        self.reset()
        self.valid_axml = True
        self.axml_tampered = False
        self.packerwarning = False
        self.buff = bytecode.BuffHandle(raw_buff)

        axml_file, = unpack('<L', self.buff.read(4))
        if axml_file != CHUNK_AXML_FILE:
            if axml_file >> 16 == 0x0008:
                self.axml_tampered = True
                print "AXML file has an unusual header", format(self.buff)
            else:
                self.valid_axml = False
                print "Not a valid AXML file. Header 0x{:08x}".format(
                    self.buff)
                return

        self.filesize, = unpack('<L', self.buff.read(4))
        header = ARSCHeader(self.buff)
        if header.type == 0x0001:
            print "Expected String Pool header, got %x" % header.type

        self.sb = StringBlock(self.buff, header)

        self.m_resourceIDs = []
        self.m_prefixuri = {}
        self.m_uriprefix = {}
        self.m_prefixuriL = []
示例#3
0
    def __init__(self, raw_buff):
        self.reset()

        self.buff = bytecode.BuffHandle(raw_buff)

        self.buff.read(4)
        self.buff.read(4)

        self.sb = StringBlock(self.buff)

        self.m_resourceIDs = []
        self.m_prefixuri = {}
        self.m_uriprefix = {}
        self.m_prefixuriL = []
    def __init__(self, raw_buff):
        self.reset()
        self.valid_axml = True
        self.axml_tampered = False
        self.packerwarning = False
        self.buff = bytecode.BuffHandle(raw_buff)

        axml_file, = unpack('<L', self.buff.read(4))
        if axml_file != CHUNK_AXML_FILE:
            if axml_file >> 16 == 0x0008:
                self.axml_tampered = True
                print "AXML file has an unusual header", format(self.buff)
            else:
                self.valid_axml = False
                print "Not a valid AXML file. Header 0x{:08x}".format(self.buff)
                return

        self.filesize, = unpack('<L', self.buff.read(4))
        header = ARSCHeader(self.buff)
        if header.type == 0x0001:
            print  "Expected String Pool header, got %x" % header.type

        self.sb = StringBlock(self.buff, header)

        self.m_resourceIDs = []
        self.m_prefixuri = {}
        self.m_uriprefix = {}
        self.m_prefixuriL = []
示例#5
0
    def __init__(self, raw_buff) :
        self.reset()

        self.buff = bytecode.BuffHandle( raw_buff )

        self.buff.read(4)
        self.buff.read(4)

        self.sb = StringBlock( self.buff )

        self.m_resourceIDs = []
        self.m_prefixuri = {}
        self.m_uriprefix = {}
        self.m_prefixuriL = []
示例#6
0
    def __init__(self, raw_buff):
        self.reset()

        self.valid_axml = True
        self.buff = BuffHandle(raw_buff)

        axml_file = unpack('<L', self.buff.read(4))[0]

        if axml_file == self.CHUNK_AXML_FILE:
            self.buff.read(4)

            self.sb = StringBlock(self.buff)

            self.m_resourceIDs = []
            self.m_prefixuri = {}
            self.m_uriprefix = {}
            self.m_prefixuriL = []

            self.visited_ns = []
        else:
            self.valid_axml = False
            logging.warning("Not a valid xml file")
示例#7
0
    def __init__(self, raw_buff, debug=False):
        self.log = logging.getLogger("pyaxmlparser.arscparser")
        self.log.setLevel(logging.DEBUG if debug else logging.CRITICAL)
        self.analyzed = False
        self._resolved_strings = None
        self.buff = bytecode.BuffHandle(raw_buff)

        self.header = ARSCHeader(self.buff)
        # TODO: assert header type
        self.packageCount = unpack("<i", self.buff.read(4))[0]

        self.packages = {}
        self.values = {}
        self.resource_values = collections.defaultdict(collections.defaultdict)
        self.resource_configs = collections.defaultdict(
            lambda: collections.defaultdict(set))
        self.resource_keys = collections.defaultdict(
            lambda: collections.defaultdict(collections.defaultdict))
        self.string_pool_main = None

        # skip to the start of the first chunk data, skipping trailing header bytes
        self.buff.set_idx(self.header.start + self.header.header_size)

        # Gives the offset inside the file of the end of this chunk
        data_end = self.header.start + self.header.size

        while self.buff.get_idx() <= data_end - ARSCHeader.SIZE:
            res_header = ARSCHeader(self.buff)

            if res_header.start + res_header.size > data_end:
                # this inner chunk crosses the boundary of the table chunk
                break

            if (res_header.type == const.RES_STRING_POOL_TYPE
                    and not self.string_pool_main):
                self.string_pool_main = StringBlock(self.buff, res_header)

            elif res_header.type == const.RES_TABLE_PACKAGE_TYPE:
                assert (len(self.packages) <
                        self.packageCount), "Got more packages than expected"

                current_package = ARSCResTablePackage(self.buff, res_header)
                package_name = current_package.get_name()
                package_data_end = res_header.start + res_header.size

                self.packages[package_name] = []

                # After the Header, we have the resource type symbol table
                self.buff.set_idx(current_package.header.start +
                                  current_package.typeStrings)
                type_sp_header = ARSCHeader(self.buff)
                assert type_sp_header.type == const.RES_STRING_POOL_TYPE, (
                    "Expected String Pool header, got %x" %
                    type_sp_header.type)
                table_strings = StringBlock(self.buff, type_sp_header)

                # Next, we should have the resource key symbol table
                self.buff.set_idx(current_package.header.start +
                                  current_package.keyStrings)
                key_sp_header = ARSCHeader(self.buff)
                assert key_sp_header.type == const.RES_STRING_POOL_TYPE, (
                    "Expected String Pool header, got %x" % key_sp_header.type)
                key_strings = StringBlock(self.buff, key_sp_header)

                # Add them to the dict of read packages
                self.packages[package_name].append(current_package)
                self.packages[package_name].append(table_strings)
                self.packages[package_name].append(key_strings)

                pc = PackageContext(current_package, self.string_pool_main,
                                    table_strings, key_strings)

                # skip to the first header in this table package chunk
                # FIXME is this correct? We have already read the first two sections!
                # self.buff.set_idx(res_header.start + res_header.header_size)
                # this looks more like we want: (???)
                self.buff.set_idx(res_header.start + res_header.header_size +
                                  type_sp_header.size + key_sp_header.size)

                # Read all other headers
                while self.buff.get_idx(
                ) <= package_data_end - ARSCHeader.SIZE:
                    pkg_chunk_header = ARSCHeader(self.buff)
                    self.log.debug(
                        "Found a header: {}".format(pkg_chunk_header))
                    if (pkg_chunk_header.start + pkg_chunk_header.size >
                            package_data_end):
                        # we are way off the package chunk; bail out
                        break

                    self.packages[package_name].append(pkg_chunk_header)

                    if pkg_chunk_header.type == const.RES_TABLE_TYPE_SPEC_TYPE:
                        self.packages[package_name].append(
                            ARSCResTypeSpec(self.buff, pc))

                    elif pkg_chunk_header.type == const.RES_TABLE_TYPE_TYPE:
                        a_res_type = ARSCResType(self.buff, pc)
                        self.packages[package_name].append(a_res_type)
                        self.resource_configs[package_name][a_res_type].add(
                            a_res_type.config)

                        self.log.debug("Config: {}".format(a_res_type.config))

                        entries = []
                        for i in range(0, a_res_type.entryCount):
                            current_package.resource_id = (
                                current_package.resource_id & 0xFFFF0000 | i)
                            entries.append((
                                unpack("<i", self.buff.read(4))[0],
                                current_package.resource_id,
                            ))

                        self.packages[package_name].append(entries)

                        for entry, res_id in entries:
                            if self.buff.end():
                                break

                            if entry != -1:
                                ate = ARSCResTableEntry(self.buff, res_id, pc)
                                self.packages[package_name].append(ate)
                                if ate.is_weak():
                                    # FIXME we are not sure how to implement the FLAG_WEAk!
                                    # We saw the following: There is just a single Res_value after the ARSCResTableEntry
                                    # and then comes the next ARSCHeader.
                                    # Therefore we think this means all entries are somehow replicated?
                                    # So we do some kind of hack here. We set the idx to the entry again...
                                    # Now we will read all entries!
                                    # Not sure if this is a good solution though
                                    self.buff.set_idx(ate.start)
                    elif pkg_chunk_header.type == const.RES_TABLE_LIBRARY_TYPE:
                        self.log.warning(
                            "RES_TABLE_LIBRARY_TYPE chunk is not supported")
                    else:
                        # FIXME: silently skip other chunk types
                        pass

                    # skip to the next chunk
                    self.buff.set_idx(pkg_chunk_header.start +
                                      pkg_chunk_header.size)

            # move to the next resource chunk
            self.buff.set_idx(res_header.start + res_header.size)
示例#8
0
class AXMLParser:
    def __init__(self, raw_buff):
        self.reset()

        self.buff = bytecode.BuffHandle(raw_buff)

        self.buff.read(4)
        self.buff.read(4)

        self.sb = StringBlock(self.buff)

        self.m_resourceIDs = []
        self.m_prefixuri = {}
        self.m_uriprefix = {}
        self.m_prefixuriL = []

    def reset(self):
        self.m_event = -1
        self.m_lineNumber = -1
        self.m_name = -1
        self.m_namespaceUri = -1
        self.m_attributes = []
        self.m_idAttribute = -1
        self.m_classAttribute = -1
        self.m_styleAttribute = -1

    def next(self):
        self.doNext()
        return self.m_event

    def doNext(self):
        if self.m_event == tc.END_DOCUMENT:
            return

        event = self.m_event

        self.reset()

        while 1:
            chunkType = -1

            # Fake END_DOCUMENT event.
            if event == tc.END_TAG:
                pass

            # START_DOCUMENT
            if event == tc.START_DOCUMENT:
                chunkType = tc.CHUNK_XML_START_TAG
            else:
                if self.buff.end() == True:
                    self.m_event = tc.END_DOCUMENT
                    break
                chunkType = SV('<L', self.buff.read(4)).get_value()

            if chunkType == tc.CHUNK_RESOURCEIDS:
                chunkSize = SV('<L', self.buff.read(4)).get_value()
                # FIXME
                if chunkSize < 8 or chunkSize % 4 != 0:
                    raise ("ooo")

                for i in range(0, chunkSize / 4 - 2):
                    self.m_resourceIDs.append(SV('<L', self.buff.read(4)))

                continue

            # FIXME
            if chunkType < tc.CHUNK_XML_FIRST or chunkType > tc.CHUNK_XML_LAST:
                raise ("ooo")

            # Fake START_DOCUMENT event.
            if chunkType == tc.CHUNK_XML_START_TAG and event == -1:
                self.m_event = tc.START_DOCUMENT
                break

            self.buff.read(4)  #/*chunkSize*/
            lineNumber = SV('<L', self.buff.read(4)).get_value()
            self.buff.read(4)  #0xFFFFFFFF

            if chunkType == tc.CHUNK_XML_START_NAMESPACE or chunkType == tc.CHUNK_XML_END_NAMESPACE:
                if chunkType == tc.CHUNK_XML_START_NAMESPACE:
                    prefix = SV('<L', self.buff.read(4)).get_value()
                    uri = SV('<L', self.buff.read(4)).get_value()

                    self.m_prefixuri[prefix] = uri
                    self.m_uriprefix[uri] = prefix
                    self.m_prefixuriL.append((prefix, uri))
                else:
                    self.buff.read(4)
                    self.buff.read(4)
                    (prefix, uri) = self.m_prefixuriL.pop()
                    #del self.m_prefixuri[ prefix ]
                    #del self.m_uriprefix[ uri ]

                continue

            self.m_lineNumber = lineNumber

            if chunkType == tc.CHUNK_XML_START_TAG:
                self.m_namespaceUri = SV('<L', self.buff.read(4)).get_value()
                self.m_name = SV('<L', self.buff.read(4)).get_value()

                # FIXME
                self.buff.read(4)  #flags

                attributeCount = SV('<L', self.buff.read(4)).get_value()
                self.m_idAttribute = (attributeCount >> 16) - 1
                attributeCount = attributeCount & 0xFFFF
                self.m_classAttribute = SV('<L', self.buff.read(4)).get_value()
                self.m_styleAttribute = (self.m_classAttribute >> 16) - 1

                self.m_classAttribute = (self.m_classAttribute & 0xFFFF) - 1

                for i in range(0, attributeCount * tc.ATTRIBUTE_LENGTH):
                    self.m_attributes.append(
                        SV('<L', self.buff.read(4)).get_value())

                for i in range(tc.ATTRIBUTE_IX_VALUE_TYPE,
                               len(self.m_attributes), tc.ATTRIBUTE_LENGTH):
                    self.m_attributes[i] = (self.m_attributes[i] >> 24)

                self.m_event = tc.START_TAG
                break

            if chunkType == tc.CHUNK_XML_END_TAG:
                self.m_namespaceUri = SV('<L', self.buff.read(4)).get_value()
                self.m_name = SV('<L', self.buff.read(4)).get_value()
                self.m_event = tc.END_TAG
                break

            if chunkType == tc.CHUNK_XML_TEXT:
                self.m_name = SV('<L', self.buff.read(4)).get_value()

                # FIXME
                self.buff.read(4)  #?
                self.buff.read(4)  #?

                self.m_event = tc.TEXT
                break

    def getPrefixByUri(self, uri):
        try:
            return self.m_uriprefix[uri]
        except KeyError:
            return -1

    def getPrefix(self):
        try:
            return self.sb.getRaw(self.m_prefixuri[self.m_namespaceUri])
        except KeyError:
            return ""

    def getName(self):
        if self.m_name == -1 or (self.m_event != tc.START_TAG
                                 and self.m_event != tc.END_TAG):
            return ""

        return self.sb.getRaw(self.m_name)

    def getText(self):
        if self.m_name == -1 or self.m_event != tc.TEXT:
            return ""

        return self.sb.getRaw(self.m_name)

    def getNamespacePrefix(self, pos):
        prefix = self.m_prefixuriL[pos][0]
        return self.sb.getRaw(prefix)

    def getNamespaceUri(self, pos):
        uri = self.m_prefixuriL[pos][1]
        return self.sb.getRaw(uri)

    def getNamespaceCount(self, pos):
        pass

    def getAttributeOffset(self, index):
        # FIXME
        if self.m_event != tc.START_TAG:
            raise ("Current event is not START_TAG.")

        offset = index * 5
        # FIXME
        if offset >= len(self.m_attributes):
            raise ("Invalid attribute index")

        return offset

    def getAttributeCount(self):
        if self.m_event != tc.START_TAG:
            return -1

        return len(self.m_attributes) / tc.ATTRIBUTE_LENGTH

    def getAttributePrefix(self, index):
        offset = self.getAttributeOffset(index)
        uri = self.m_attributes[offset + tc.ATTRIBUTE_IX_NAMESPACE_URI]

        prefix = self.getPrefixByUri(uri)
        if prefix == -1:
            return ""

        return self.sb.getRaw(prefix)

    def getAttributeName(self, index):
        offset = self.getAttributeOffset(index)
        name = self.m_attributes[offset + tc.ATTRIBUTE_IX_NAME]

        if name == -1:
            return ""

        return self.sb.getRaw(name)

    def getAttributeValueType(self, index):
        offset = self.getAttributeOffset(index)
        return self.m_attributes[offset + tc.ATTRIBUTE_IX_VALUE_TYPE]

    def getAttributeValueData(self, index):
        offset = self.getAttributeOffset(index)
        return self.m_attributes[offset + tc.ATTRIBUTE_IX_VALUE_DATA]

    def getAttributeValue(self, index):
        offset = self.getAttributeOffset(index)
        valueType = self.m_attributes[offset + tc.ATTRIBUTE_IX_VALUE_TYPE]
        if valueType == tc.TYPE_STRING:
            valueString = self.m_attributes[offset +
                                            tc.ATTRIBUTE_IX_VALUE_STRING]
            return self.sb.getRaw(valueString)
        # WIP
        return ""
示例#9
0
class AXMLParser :

    def __init__(self, raw_buff) :
        self.reset()

        self.buff = bytecode.BuffHandle( raw_buff )

        self.buff.read(4)
        self.buff.read(4)

        self.sb = StringBlock( self.buff )

        self.m_resourceIDs = []
        self.m_prefixuri = {}
        self.m_uriprefix = {}
        self.m_prefixuriL = []

    def reset(self) :
        self.m_event = -1
        self.m_lineNumber = -1
        self.m_name = -1
        self.m_namespaceUri = -1
        self.m_attributes = []
        self.m_idAttribute = -1
        self.m_classAttribute = -1
        self.m_styleAttribute = -1

    def next(self) :
        self.doNext()
        return self.m_event

    def doNext(self) :
        if self.m_event == StringBlock.END_DOCUMENT :
            return

        event = self.m_event

        self.reset()
        while 1 :
            chunkType = -1

            # Fake END_DOCUMENT event.
            if event == StringBlock.END_TAG :
                pass

            # START_DOCUMENT
            if event == StringBlock.START_DOCUMENT :
                chunkType = StringBlock.CHUNK_XML_START_TAG
            else :
                if self.buff.end() == True :
                    self.m_event = StringBlock.END_DOCUMENT
                    break
                chunkType = SV( '<L', self.buff.read( 4 ) ).get_value()


            if chunkType == StringBlock.CHUNK_RESOURCEIDS :
                chunkSize = SV( '<L', self.buff.read( 4 ) ).get_value()
                # FIXME
                if chunkSize < 8 or chunkSize%4 != 0 :
                    raise("ooo")

                for i in range(0, chunkSize/4-2) :
                    self.m_resourceIDs.append( SV( '<L', self.buff.read( 4 ) ) )

                continue

            # FIXME
            if chunkType < StringBlock.CHUNK_XML_FIRST or chunkType > StringBlock.CHUNK_XML_LAST :
                raise("ooo")

            # Fake START_DOCUMENT event.
            if chunkType == StringBlock.CHUNK_XML_START_TAG and event == -1 :
                self.m_event = StringBlock.START_DOCUMENT
                break

            self.buff.read( 4 ) #/*chunkSize*/
            lineNumber = SV( '<L', self.buff.read( 4 ) ).get_value()
            self.buff.read( 4 ) #0xFFFFFFFF

            if chunkType == StringBlock.CHUNK_XML_START_NAMESPACE or chunkType == StringBlock.CHUNK_XML_END_NAMESPACE :
                if chunkType == StringBlock.CHUNK_XML_START_NAMESPACE :
                    prefix = SV( '<L', self.buff.read( 4 ) ).get_value()
                    uri = SV( '<L', self.buff.read( 4 ) ).get_value()

                    self.m_prefixuri[ prefix ] = uri
                    self.m_uriprefix[ uri ] = prefix
                    self.m_prefixuriL.append( (prefix, uri) )
                else :
                    self.buff.read( 4 )
                    self.buff.read( 4 )
                    (prefix, uri) = self.m_prefixuriL.pop()
                    #del self.m_prefixuri[ prefix ]
                    #del self.m_uriprefix[ uri ]

                continue


            self.m_lineNumber = lineNumber

            if chunkType == StringBlock.CHUNK_XML_START_TAG :
                self.m_namespaceUri = SV( '<L', self.buff.read( 4 ) ).get_value()
                self.m_name = SV( '<L', self.buff.read( 4 ) ).get_value()

                # FIXME
                self.buff.read( 4 ) #flags
                
                attributeCount = SV( '<L', self.buff.read( 4 ) ).get_value()
                self.m_idAttribute = (attributeCount>>16) - 1
                attributeCount = attributeCount & 0xFFFF
                self.m_classAttribute = SV( '<L', self.buff.read( 4 ) ).get_value()
                self.m_styleAttribute = (self.m_classAttribute>>16) - 1

                self.m_classAttribute = (self.m_classAttribute & 0xFFFF) - 1

                for i in range(0, attributeCount*StringBlock.ATTRIBUTE_LENGTH) :
                    self.m_attributes.append( SV( '<L', self.buff.read( 4 ) ).get_value() )

                for i in range(StringBlock.ATTRIBUTE_IX_VALUE_TYPE, len(self.m_attributes), StringBlock.ATTRIBUTE_LENGTH) :
                    self.m_attributes[i] = (self.m_attributes[i]>>24)

                self.m_event = StringBlock.START_TAG
                break

            if chunkType == StringBlock.CHUNK_XML_END_TAG :
                self.m_namespaceUri = SV( '<L', self.buff.read( 4 ) ).get_value()
                self.m_name = SV( '<L', self.buff.read( 4 ) ).get_value()
                self.m_event = StringBlock.END_TAG
                break

            if chunkType == StringBlock.CHUNK_XML_TEXT :
                self.m_name = SV( '<L', self.buff.read( 4 ) ).get_value()
                
                # FIXME
                self.buff.read( 4 ) #?
                self.buff.read( 4 ) #?

                self.m_event = StringBlock.TEXT
                break

    def getPrefixByUri(self, uri) :
        try :
            return self.m_uriprefix[ uri ]
        except KeyError :
            return -1

    def getPrefix(self) :
        try :
            return self.sb.getRaw(self.m_prefixuri[ self.m_namespaceUri ])
        except KeyError :
            return ""

    def getName(self) :
        if self.m_name == -1 or (self.m_event != StringBlock.START_TAG and self.m_event != StringBlock.END_TAG) :
            return ""

        return self.sb.getRaw(self.m_name)

    def getText(self) :
        if self.m_name == -1 or self.m_event != StringBlock.TEXT :
            return ""

        return self.sb.getRaw(self.m_name)

    def getNamespacePrefix(self, pos) :
        prefix = self.m_prefixuriL[ pos ][0]
        return self.sb.getRaw( prefix )

    def getNamespaceUri(self, pos) :
        uri = self.m_prefixuriL[ pos ][1]
        return self.sb.getRaw( uri )

    def getNamespaceCount(self, pos) :
        pass

    def getAttributeOffset(self, index) :
        # FIXME
        if self.m_event != StringBlock.START_TAG :
            raise("Current event is not START_TAG.")

        offset = index * 5
        # FIXME
        if offset >= len(self.m_attributes) :
            raise("Invalid attribute index")

        return offset

    def getAttributeCount(self) :
        if self.m_event != StringBlock.START_TAG :
            return -1

        return len(self.m_attributes) / StringBlock.ATTRIBUTE_LENGTH

    def getAttributePrefix(self, index) :
        offset = self.getAttributeOffset(index)
        uri = self.m_attributes[offset+StringBlock.ATTRIBUTE_IX_NAMESPACE_URI]

        prefix = self.getPrefixByUri( uri )
        if prefix == -1 :
            return ""

        return self.sb.getRaw( prefix )

    def getAttributeName(self, index) :
        offset = self.getAttributeOffset(index)
        name = self.m_attributes[offset+StringBlock.ATTRIBUTE_IX_NAME]

        if name == -1 :
            return ""

        return self.sb.getRaw( name )

    def getAttributeValueType(self, index) :
        offset = self.getAttributeOffset(index)
        return self.m_attributes[offset+StringBlock.ATTRIBUTE_IX_VALUE_TYPE]

    def getAttributeValueData(self, index) :
        offset = self.getAttributeOffset(index)
        return self.m_attributes[offset+StringBlock.ATTRIBUTE_IX_VALUE_DATA]

    def getAttributeValue(self, index) :
        offset = self.getAttributeOffset(index)
        valueType = self.m_attributes[offset+StringBlock.ATTRIBUTE_IX_VALUE_TYPE]
        if valueType == tc.TYPE_STRING :
            valueString = self.m_attributes[offset+StringBlock.ATTRIBUTE_IX_VALUE_STRING]
            return self.sb.getRaw( valueString )
        # WIP
        return ""
示例#10
0
    def __init__(self, raw_buff, debug=False):
        self.log = logging.getLogger("pyaxmlparser.axmlparser")
        self.log.setLevel(logging.DEBUG if debug else logging.CRITICAL)
        self._reset()

        self._valid = True
        self.axml_tampered = False
        self.buff = bytecode.BuffHandle(raw_buff)

        # Minimum is a single ARSCHeader, which would be a strange edge case...
        if self.buff.size() < 8:
            self.log.error(
                "Filesize is too small to be a valid AXML file! Filesize: {}".
                format(self.buff.size()))
            self._valid = False
            return

        # This would be even stranger, if an AXML file is larger than 4GB...
        # But this is not possible as the maximum chunk size is a unsigned 4 byte int.
        if self.buff.size() > 0xFFFFFFFF:
            self.log.error(
                "Filesize is too large to be a valid AXML file! Filesize: {}".
                format(self.buff.size()))
            self._valid = False
            return

        try:
            axml_header = ARSCHeader(self.buff)
        except AssertionError as e:
            self.log.error("Error parsing first resource header: %s", e)
            self._valid = False
            return

        self.filesize = axml_header.size

        if axml_header.header_size == 28024:
            # Can be a common error: the file is not an AXML but a plain XML
            # The file will then usually start with '<?xm' / '3C 3F 78 6D'
            self.log.warning(
                "Header size is 28024! Are you trying to parse a plain XML file?"
            )

        if axml_header.header_size != 8:
            self.log.error(
                "This does not look like an AXML file. "
                "header size does not equal 8! header size = {}".format(
                    axml_header.header_size))
            self._valid = False
            return

        if self.filesize > self.buff.size():
            self.log.error(
                "This does not look like an AXML file. "
                "Declared filesize does not match real size: {} vs {}".format(
                    self.filesize, self.buff.size()))
            self._valid = False
            return

        if self.filesize < self.buff.size():
            # The file can still be parsed up to the point where the chunk should end.
            self.axml_tampered = True
            self.log.warning(
                "Declared filesize ({}) is smaller than total file size ({}). "
                "Was something appended to the file? Trying to parse it anyways."
                .format(self.filesize, self.buff.size()))

        # Not that severe of an error, we have plenty files where this is not
        # set correctly
        if axml_header.type != const.RES_XML_TYPE:
            self.axml_tampered = True
            self.log.warning(
                "AXML file has an unusual resource type! "
                "Malware likes to to such stuff to anti androguard! "
                "But we try to parse it anyways. "
                "Resource Type: 0x{:04x}".format(axml_header.type))

        # Now we parse the STRING POOL
        try:
            header = ARSCHeader(self.buff)
        except AssertionError as e:
            self.log.error("Error parsing resource header of string pool: %s",
                           e)
            self._valid = False
            return

        if header.header_size != 0x1C:
            self.log.error(
                "This does not look like an AXML file. String chunk header "
                "size does not equal 28! header size = {}".format(
                    header.header_size))
            self._valid = False
            return

        if header.type != const.RES_STRING_POOL_TYPE:
            self.log.error(
                "Expected String Pool header, got resource type 0x{:04x} "
                "instead".format(header.type))
            self._valid = False
            return

        self.sb = StringBlock(self.buff, header)

        # Stores resource ID mappings, if any
        self.resource_ids = []

        # Store a list of prefix/uri mappings encountered
        self.namespaces = []
示例#11
0
    def __init__(self, raw_buff):
        self.analyzed = False
        self._resolved_strings = None
        self.buff = bytecode.BuffHandle(raw_buff)

        self.header = ARSCHeader(self.buff)
        self.packageCount = unpack('<i', self.buff.read(4))[0]

        self.packages = {}
        self.values = {}
        self.resource_values = collections.defaultdict(collections.defaultdict)
        self.resource_configs = collections.defaultdict(
            lambda: collections.defaultdict(set))
        self.resource_keys = collections.defaultdict(
            lambda: collections.defaultdict(collections.defaultdict))
        self.stringpool_main = None

        self.buff.set_idx(self.header.start + self.header.header_size)

        data_end = self.header.start + self.header.size

        while self.buff.get_idx() <= data_end - ARSCHeader.SIZE:
            res_header = ARSCHeader(self.buff)

            if res_header.start + res_header.size > data_end:
                # this inner chunk crosses the boundary of the table chunk
                break

            if res_header.type == 0x0001 and not self.stringpool_main:
                self.stringpool_main = StringBlock(self.buff, res_header)

            elif res_header.type == 0x0200:
                print len(
                    self.packages
                ) < self.packageCount, "Got more packages than expected"

                current_package = ARSCResTablePackage(self.buff, res_header)
                package_name = current_package.get_name()
                package_data_end = res_header.start + res_header.size

                self.packages[package_name] = []

                self.buff.set_idx(current_package.header.start +
                                  current_package.typeStrings)
                type_sp_header = ARSCHeader(self.buff)
                assert type_sp_header.type == 0x0001, \
                    "Expected String Pool header, got %x" % type_sp_header.type
                mTableStrings = StringBlock(self.buff, type_sp_header)

                self.buff.set_idx(current_package.header.start +
                                  current_package.keyStrings)
                key_sp_header = ARSCHeader(self.buff)
                assert key_sp_header.type == 0x0001, \
                    "Expected String Pool header, got %x" % key_sp_header.type
                mKeyStrings = StringBlock(self.buff, key_sp_header)

                self.packages[package_name].append(current_package)
                self.packages[package_name].append(mTableStrings)
                self.packages[package_name].append(mKeyStrings)

                pc = PackageContext(current_package, self.stringpool_main,
                                    mTableStrings, mKeyStrings)

                # skip to the first header in this table package chunk
                self.buff.set_idx(res_header.start + res_header.header_size)

                while self.buff.get_idx(
                ) <= package_data_end - ARSCHeader.SIZE:

                    pkg_chunk_header = ARSCHeader(self.buff)
                    if pkg_chunk_header.start + pkg_chunk_header.size > package_data_end:
                        # we are way off the package chunk; bail out
                        break

                    self.packages[package_name].append(pkg_chunk_header)

                    if pkg_chunk_header.type == RES_TABLE_TYPE_SPEC_TYPE:
                        self.packages[package_name].append(
                            ARSCResTypeSpec(self.buff, pc))

                    elif pkg_chunk_header.type == RES_TABLE_TYPE_TYPE:
                        a_res_type = ARSCResType(self.buff, pc)
                        self.packages[package_name].append(a_res_type)
                        self.resource_configs[package_name][a_res_type].add(
                            a_res_type.config)

                        entries = []
                        for i in range(0, a_res_type.entryCount):
                            current_package.mResId = current_package.mResId & 0xffff0000 | i
                            entries.append((unpack('<i', self.buff.read(4))[0],
                                            current_package.mResId))

                        self.packages[package_name].append(entries)

                        for entry, res_id in entries:
                            if self.buff.end():
                                break

                            if entry != -1:
                                ate = ARSCResTableEntry(self.buff, res_id, pc)
                                self.packages[package_name].append(ate)
                    elif pkg_chunk_header.type == RES_TABLE_LIBRARY_TYPE:
                        print "RES_TABLE_LIBRARY_TYPE chunk is not supported"
                    else:
                        # silently skip other chunk types
                        pass

                    # skip to the next chunk
                    self.buff.set_idx(pkg_chunk_header.start +
                                      pkg_chunk_header.size)

            # move to the next resource chunk
            self.buff.set_idx(res_header.start + res_header.size)
class AXMLParser:

    def __init__(self, raw_buff):
        self.reset()
        self.valid_axml = True
        self.axml_tampered = False
        self.packerwarning = False
        self.buff = bytecode.BuffHandle(raw_buff)

        axml_file, = unpack('<L', self.buff.read(4))
        if axml_file != CHUNK_AXML_FILE:
            if axml_file >> 16 == 0x0008:
                self.axml_tampered = True
                print "AXML file has an unusual header", format(self.buff)
            else:
                self.valid_axml = False
                print "Not a valid AXML file. Header 0x{:08x}".format(self.buff)
                return

        self.filesize, = unpack('<L', self.buff.read(4))
        header = ARSCHeader(self.buff)
        if header.type == 0x0001:
            print  "Expected String Pool header, got %x" % header.type

        self.sb = StringBlock(self.buff, header)

        self.m_resourceIDs = []
        self.m_prefixuri = {}
        self.m_uriprefix = {}
        self.m_prefixuriL = []

    def reset(self):
        self.m_event = -1
        self.m_lineNumber = -1
        self.m_name = -1
        self.m_namespaceUri = -1
        self.m_attributes = []
        self.m_idAttribute = -1
        self.m_classAttribute = -1
        self.m_styleAttribute = -1

    def next(self):
        self.doNext()
        return self.m_event

    def doNext(self):
        if self.m_event == tc.END_DOCUMENT:
            return

        event = self.m_event

        self.reset()

        while 1:
            chunkType = -1

            # Fake END_DOCUMENT event.
            if event == tc.END_TAG:
                pass

            # START_DOCUMENT
            if event == tc.START_DOCUMENT:
                chunkType = tc.CHUNK_XML_START_TAG
            else:
                if self.buff.end() == True:
                    self.m_event = tc.END_DOCUMENT
                    break
                chunkType = SV('<L', self.buff.read(4)).get_value()

            if chunkType == tc.CHUNK_RESOURCEIDS:
                chunkSize = SV('<L', self.buff.read(4)).get_value()
                # FIXME
                if chunkSize < 8 or chunkSize%4 != 0:
                    raise("ooo")

                for i in range(0, int(chunkSize/4-2)):
                    self.m_resourceIDs.append(SV('<L', self.buff.read(4)))

                continue

            # FIXME
            if chunkType < tc.CHUNK_XML_FIRST or chunkType > tc.CHUNK_XML_LAST:
                raise("ooo")

            # Fake START_DOCUMENT event.
            if chunkType == tc.CHUNK_XML_START_TAG and event == -1:
                self.m_event = tc.START_DOCUMENT
                break

            self.buff.read(4) #/*chunkSize*/
            lineNumber = SV('<L', self.buff.read(4)).get_value()
            self.buff.read(4) #0xFFFFFFFF

            if chunkType == tc.CHUNK_XML_START_NAMESPACE or chunkType == tc.CHUNK_XML_END_NAMESPACE:
                if chunkType == tc.CHUNK_XML_START_NAMESPACE:
                    prefix = SV('<L', self.buff.read(4)).get_value()
                    uri = SV('<L', self.buff.read(4)).get_value()

                    self.m_prefixuri[ prefix ] = uri
                    self.m_uriprefix[ uri ] = prefix
                    self.m_prefixuriL.append((prefix, uri))
                else:
                    self.buff.read(4)
                    self.buff.read(4)
                    (prefix, uri) = self.m_prefixuriL.pop()
                    #del self.m_prefixuri[ prefix ]
                    #del self.m_uriprefix[ uri ]

                continue

            self.m_lineNumber = lineNumber

            if chunkType == tc.CHUNK_XML_START_TAG:
                self.m_namespaceUri = SV('<L', self.buff.read(4)).get_value()
                self.m_name = SV('<L', self.buff.read(4)).get_value()

                # FIXME
                self.buff.read(4) #flags

                attributeCount = SV('<L', self.buff.read(4)).get_value()
                self.m_idAttribute = (attributeCount>>16) - 1
                attributeCount = attributeCount & 0xFFFF
                self.m_classAttribute = SV('<L', self.buff.read(4)).get_value()
                self.m_styleAttribute = (self.m_classAttribute>>16) - 1

                self.m_classAttribute = (self.m_classAttribute & 0xFFFF) - 1

                for i in range(0, attributeCount * tc.ATTRIBUTE_LENGTH):
                    self.m_attributes.append(SV('<L', self.buff.read(4)).get_value())

                for i in range(tc.ATTRIBUTE_IX_VALUE_TYPE, len(self.m_attributes), tc.ATTRIBUTE_LENGTH):
                    self.m_attributes[i] = (self.m_attributes[i]>>24)

                self.m_event = tc.START_TAG
                break

            if chunkType == tc.CHUNK_XML_END_TAG:
                self.m_namespaceUri = SV('<L', self.buff.read(4)).get_value()
                self.m_name = SV('<L', self.buff.read(4)).get_value()
                self.m_event = tc.END_TAG
                break

            if chunkType == tc.CHUNK_XML_TEXT:
                self.m_name = SV('<L', self.buff.read(4)).get_value()

                # FIXME
                self.buff.read(4) #?
                self.buff.read(4) #?

                self.m_event = tc.TEXT
                break

    def getPrefixByUri(self, uri):
        try:
            return self.m_uriprefix[ uri ]
        except KeyError:
            return -1

    def getPrefix(self):
        try:
            return self.sb.getRaw(self.m_prefixuri[ self.m_namespaceUri ])
        except KeyError:
            return ""

    def getName(self):
        if self.m_name == -1 or (self.m_event != tc.START_TAG and self.m_event != tc.END_TAG):
            return ""

        return self.sb.getRaw(self.m_name)

    def getText(self):
        if self.m_name == -1 or self.m_event != tc.TEXT:
            return ""

        return self.sb.getRaw(self.m_name)

    def getNamespacePrefix(self, pos):
        prefix = self.m_prefixuriL[ pos ][0]
        return self.sb.getRaw(prefix)

    def getNamespaceUri(self, pos):
        uri = self.m_prefixuriL[ pos ][1]
        return self.sb.getRaw(uri)

    def getNamespaceCount(self, pos):
        pass

    def getAttributeOffset(self, index):
        # FIXME
        if self.m_event != tc.START_TAG:
            raise("Current event is not START_TAG.")

        offset = index * 5
        # FIXME
        if offset >= len(self.m_attributes):
            raise("Invalid attribute index")

        return offset

    def getAttributeCount(self):
        if self.m_event != tc.START_TAG:
            return -1

        return int(len(self.m_attributes) / tc.ATTRIBUTE_LENGTH)

    def getAttributePrefix(self, index):
        offset = self.getAttributeOffset(index)
        uri = self.m_attributes[offset + tc.ATTRIBUTE_IX_NAMESPACE_URI]

        prefix = self.getPrefixByUri(uri)
        if prefix == -1:
            return ""

        return self.sb.getRaw(prefix)

    def getAttributeName(self, index):
        offset = self.getAttributeOffset(index)
        name = self.m_attributes[offset + tc.ATTRIBUTE_IX_NAME]

        if name == -1:
            return ""

        return self.sb.getRaw(name)

    def getAttributeValueType(self, index):
        offset = self.getAttributeOffset(index)
        return self.m_attributes[offset + tc.ATTRIBUTE_IX_VALUE_TYPE]

    def getAttributeValueData(self, index):
        offset = self.getAttributeOffset(index)
        return self.m_attributes[offset + tc.ATTRIBUTE_IX_VALUE_DATA]

    def getAttributeValue(self, index):
        offset = self.getAttributeOffset(index)
        valueType = self.m_attributes[offset + tc.ATTRIBUTE_IX_VALUE_TYPE]
        if valueType == tc.TYPE_STRING:
            valueString = self.m_attributes[offset + tc.ATTRIBUTE_IX_VALUE_STRING]
            return self.sb.getRaw(valueString)
        # WIP
        return ""
示例#13
0
class AXMLParser:

    CHUNK_AXML_FILE = 0x00080003
    CHUNK_RESOURCEIDS = 0x00080180
    CHUNK_XML_FIRST = 0x00100100
    CHUNK_XML_START_NAMESPACE = 0x00100100
    CHUNK_XML_END_NAMESPACE = 0x00100101
    CHUNK_XML_START_TAG = 0x00100102
    CHUNK_XML_END_TAG = 0x00100103
    CHUNK_XML_TEXT = 0x00100104
    CHUNK_XML_LAST = 0x00100104

    START_DOCUMENT = 0
    END_DOCUMENT = 1
    START_TAG = 2
    END_TAG = 3
    TEXT = 4

    ATTRIBUTE_IX_NAMESPACE_URI = 0
    ATTRIBUTE_IX_NAME = 1
    ATTRIBUTE_IX_VALUE_STRING = 2
    ATTRIBUTE_IX_VALUE_TYPE = 3
    ATTRIBUTE_IX_VALUE_DATA = 4
    ATTRIBUTE_LENGHT = 5

    TYPE_ATTRIBUTE = 2
    TYPE_DIMENSION = 5
    TYPE_FIRST_COLOR_INT = 28
    TYPE_FIRST_INT = 16
    TYPE_FLOAT = 4
    TYPE_FRACTION = 6
    TYPE_INT_BOOLEAN = 18
    TYPE_INT_COLOR_ARGB4 = 30
    TYPE_INT_COLOR_ARGB8 = 28
    TYPE_INT_COLOR_RGB4 = 31
    TYPE_INT_COLOR_RGB8 = 29
    TYPE_INT_DEC = 16
    TYPE_INT_HEX = 17
    TYPE_LAST_COLOR_INT = 31
    TYPE_LAST_INT = 31
    TYPE_NULL = 0
    TYPE_REFERENCE = 1
    TYPE_STRING = 3

    def __init__(self, raw_buff):
        self.reset()

        self.valid_axml = True
        self.buff = BuffHandle(raw_buff)

        axml_file = unpack('<L', self.buff.read(4))[0]

        if axml_file == self.CHUNK_AXML_FILE:
            self.buff.read(4)

            self.sb = StringBlock(self.buff)

            self.m_resourceIDs = []
            self.m_prefixuri = {}
            self.m_uriprefix = {}
            self.m_prefixuriL = []

            self.visited_ns = []
        else:
            self.valid_axml = False
            logging.warning("Not a valid xml file")

    def is_valid(self):
        return self.valid_axml

    def reset(self):
        self.m_event = -1
        self.m_lineNumber = -1
        self.m_name = -1
        self.m_namespaceUri = -1
        self.m_attributes = []
        self.m_idAttribute = -1
        self.m_classAttribute = -1
        self.m_styleAttribute = -1

    def next(self):
        self.doNext()
        return self.m_event

    def doNext(self):
        if self.m_event == self.END_DOCUMENT:
            return

        event = self.m_event

        self.reset()
        while True:
            chunkType = -1

            # Fake END_DOCUMENT event.
            if event == self.END_TAG:
                pass

            # START_DOCUMENT
            if event == self.START_DOCUMENT:
                chunkType = self.CHUNK_XML_START_TAG
            else:
                if self.buff.end():
                    self.m_event = self.END_DOCUMENT
                    break
                chunkType = unpack('<L', self.buff.read(4))[0]

            if chunkType == self.CHUNK_RESOURCEIDS:
                chunkSize = unpack('<L', self.buff.read(4))[0]
                # FIXME
                if chunkSize < 8 or chunkSize % 4 != 0:
                    logging.warning("Invalid chunk size")

                for i in range(0, chunkSize / 4 - 2):
                    self.m_resourceIDs.append(unpack('<L', self.buff.read(4))[0])

                continue

            # FIXME
            if chunkType < self.CHUNK_XML_FIRST or chunkType > self.CHUNK_XML_LAST:
                logging.warning("invalid chunk type")

            # Fake START_DOCUMENT event.
            if chunkType == self.CHUNK_XML_START_TAG and event == -1:
                self.m_event = self.START_DOCUMENT
                break

            self.buff.read(4)  # /*chunkSize*/
            lineNumber = unpack('<L', self.buff.read(4))[0]
            self.buff.read(4)  # 0xFFFFFFFF

            if chunkType == self.CHUNK_XML_START_NAMESPACE or chunkType == self.CHUNK_XML_END_NAMESPACE:
                if chunkType == self.CHUNK_XML_START_NAMESPACE:
                    prefix = unpack('<L', self.buff.read(4))[0]
                    uri = unpack('<L', self.buff.read(4))[0]

                    self.m_prefixuri[prefix] = uri
                    self.m_uriprefix[uri] = prefix
                    self.m_prefixuriL.append((prefix, uri))
                    self.ns = uri
                else:
                    self.ns = -1
                    self.buff.read(4)
                    self.buff.read(4)
                    (prefix, uri) = self.m_prefixuriL.pop()
                continue

            self.m_lineNumber = lineNumber

            if chunkType == self.CHUNK_XML_START_TAG:
                self.m_namespaceUri = unpack('<L', self.buff.read(4))[0]
                self.m_name = unpack('<L', self.buff.read(4))[0]

                # FIXME
                self.buff.read(4)  # flags

                attributeCount = unpack('<L', self.buff.read(4))[0]
                self.m_idAttribute = (attributeCount >> 16) - 1
                attributeCount = attributeCount & 0xFFFF
                self.m_classAttribute = unpack('<L', self.buff.read(4))[0]
                self.m_styleAttribute = (self.m_classAttribute >> 16) - 1

                self.m_classAttribute = (self.m_classAttribute & 0xFFFF) - 1

                for i in range(0, attributeCount * self.ATTRIBUTE_LENGHT):
                    self.m_attributes.append(unpack('<L', self.buff.read(4))[0])

                for i in range(self.ATTRIBUTE_IX_VALUE_TYPE, len(self.m_attributes), self.ATTRIBUTE_LENGHT):
                    self.m_attributes[i] = self.m_attributes[i] >> 24

                self.m_event = self.START_TAG
                break

            if chunkType == self.CHUNK_XML_END_TAG:
                self.m_namespaceUri = unpack('<L', self.buff.read(4))[0]
                self.m_name = unpack('<L', self.buff.read(4))[0]
                self.m_event = self.END_TAG
                break

            if chunkType == self.CHUNK_XML_TEXT:
                self.m_name = unpack('<L', self.buff.read(4))[0]

                # FIXME
                self.buff.read(4)
                self.buff.read(4)

                self.m_event = self.TEXT
                break

    def getPrefixByUri(self, uri):
        try:
            return self.m_uriprefix[uri]
        except KeyError:
            return -1

    def getPrefix(self):
        try:
            return self.sb.getString(self.m_uriprefix[self.m_namespaceUri])
        except KeyError:
            return u''

    def getName(self):
        if self.m_name == -1 or (self.m_event != self.START_TAG and self.m_event != self.END_TAG):
            return u''
        return self.sb.getString(self.m_name)

    def getText(self):
        if self.m_name == -1 or self.m_event != self.TEXT:
            return u''
        return self.sb.getString(self.m_name)

    def getNamespacePrefix(self, pos):
        prefix = self.m_prefixuriL[pos][0]
        return self.sb.getString(prefix)

    def getNamespaceUri(self, pos):
        uri = self.m_prefixuriL[pos][1]
        return self.sb.getString(uri)

    def getXMLNS(self):
        buff = ""
        for i in self.m_uriprefix:
            if i not in self.visited_ns:
                buff += "xmlns:%s=\"%s\"\n" % (
                    self.sb.getString(self.m_uriprefix[i]), self.sb.getString(self.m_prefixuri[self.m_uriprefix[i]]))
                self.visited_ns.append(i)
        return buff

    def getNamespaceCount(self, pos):
        pass

    def getAttributeOffset(self, index):
        # FIXME
        if self.m_event != self.START_TAG:
            logging.warning("Current event is not START_TAG.")

        offset = index * 5
        # FIXME
        if offset >= len(self.m_attributes):
            logging.warning("Invalid attribute index")

        return offset

    def getAttributeCount(self):
        if self.m_event != self.START_TAG:
            return -1

        return len(self.m_attributes) / self.ATTRIBUTE_LENGHT

    def getAttributePrefix(self, index):
        offset = self.getAttributeOffset(index)
        uri = self.m_attributes[offset + self.ATTRIBUTE_IX_NAMESPACE_URI]
        prefix = self.getPrefixByUri(uri)
        if prefix == -1:
            return ""
        return self.sb.getString(prefix)

    def getAttributeName(self, index):
        offset = self.getAttributeOffset(index)
        name = self.m_attributes[offset + self.ATTRIBUTE_IX_NAME]
        if name == -1:
            return ""
        return self.sb.getString(name)

    def getAttributeValueType(self, index):
        offset = self.getAttributeOffset(index)
        return self.m_attributes[offset + self.ATTRIBUTE_IX_VALUE_TYPE]

    def getAttributeValueData(self, index):
        offset = self.getAttributeOffset(index)
        return self.m_attributes[offset + self.ATTRIBUTE_IX_VALUE_DATA]

    def getAttributeValue(self, index):
        offset = self.getAttributeOffset(index)
        valueType = self.m_attributes[offset + self.ATTRIBUTE_IX_VALUE_TYPE]
        if valueType == self.TYPE_STRING:
            valueString = self.m_attributes[offset + self.ATTRIBUTE_IX_VALUE_STRING]
            return self.sb.getString(valueString)
        return ""