def __init__(self, raw_buff): self.reset() self.valid_axml = True self.axml_tampered = False self.packerwarning = False self.buff = bytecode.BuffHandle(raw_buff) axml_file, = unpack('<L', self.buff.read(4)) if axml_file != const.CHUNK_AXML_FILE: # It looks like the header is wrong. # need some other checks. # We noted, that a some of files start with 0x0008NNNN, # where NNNN is some random number if axml_file >> 16 == 0x0008: self.axml_tampered = True warn( "AXML file has an unusual header, most malwares like " "doing such stuff to anti androguard! But we try to parse " "it anyways. Header: 0x{:08x}".format(axml_file)) else: self.valid_axml = False warn( "Not a valid AXML file. Header 0x{:08x}".format(axml_file)) return # Next is the filesize self.filesize, = unpack('<L', self.buff.read(4)) assert self.filesize <= self.buff.size(), ( "Declared filesize does not match real size: {} vs {}".format( self.filesize, self.buff.size())) # Now we parse the STRING POOL header = arscutil.ARSCHeader( self.buff) # read 8 byte=header+chunk_size assert header.type == const.RES_STRING_POOL_TYPE, ( "Expected String Pool header, got %x" % header.type) self.sb = StringBlock(self.buff, header) self.m_resourceIDs = [] self.m_prefixuri = {} self.m_uriprefix = defaultdict(list) # Contains a list of current prefix/uri pairs self.m_prefixuriL = [] # Store which namespaces are already printed self.visited_ns = []
def __init__(self, raw_buff): self.reset() self.valid_axml = True self.buff = bytecode.BuffHandle(raw_buff) axml_file = unpack('<L', self.buff.read(4))[0] if axml_file == const.CHUNK_AXML_FILE: self.buff.read(4) self.sb = StringBlock(self.buff) self.m_resourceIDs = [] self.m_prefixuri = {} self.m_uriprefix = {} self.m_prefixuriL = [] self.visited_ns = [] else: self.valid_axml = False warn("Not a valid xml file")
def __init__(self, raw_buff): self.analyzed = False self.buff = bytecode.BuffHandle(raw_buff) # print "SIZE", hex(self.buff.size()) self.header = ARSCHeader(self.buff) self.packageCount = unpack('<i', self.buff.read(4))[0] # print hex(self.packageCount) self.stringpool_main = StringBlock(self.buff) self.next_header = ARSCHeader(self.buff) self.packages = {} self.values = {} for i in _range(0, self.packageCount): current_package = ARSCResTablePackage(self.buff) package_name = current_package.get_name() self.packages[package_name] = [] mTableStrings = StringBlock(self.buff) mKeyStrings = StringBlock(self.buff) # self.stringpool_main.show() # self.mTableStrings.show() # self.mKeyStrings.show() self.packages[package_name].append(current_package) self.packages[package_name].append(mTableStrings) self.packages[package_name].append(mKeyStrings) pc = PackageContext(current_package, self.stringpool_main, mTableStrings, mKeyStrings) current = self.buff.get_idx() while not self.buff.end(): header = ARSCHeader(self.buff) self.packages[package_name].append(header) if header.type == const.RES_TABLE_TYPE_SPEC_TYPE: self.packages[package_name].append( ARSCResTypeSpec(self.buff, pc)) elif header.type == const.RES_TABLE_TYPE_TYPE: a_res_type = ARSCResType(self.buff, pc) self.packages[package_name].append(a_res_type) entries = [] for i in _range(0, a_res_type.entryCount): current_package.mResId = \ current_package.mResId & 0xffff0000 | i entries.append((unpack('<i', self.buff.read(4))[0], current_package.mResId)) self.packages[package_name].append(entries) for entry, res_id in entries: if self.buff.end(): break if entry != -1: ate = ARSCResTableEntry(self.buff, res_id, pc) self.packages[package_name].append(ate) elif header.type == const.RES_TABLE_PACKAGE_TYPE: break else: warn("unknown type") break current += header.size self.buff.set_idx(current)
def __init__(self, raw_buff): self.analyzed = False self._resolved_strings = None self.buff = bytecode.BuffHandle(raw_buff) self.header = ARSCHeader(self.buff) self.packageCount = unpack('<i', self.buff.read(4))[0] self.packages = {} self.values = {} self.resource_values = collections.defaultdict(collections.defaultdict) self.resource_configs = collections.defaultdict( lambda: collections.defaultdict(set)) self.resource_keys = collections.defaultdict( lambda: collections.defaultdict(collections.defaultdict)) self.stringpool_main = None # skip to the start of the first chunk self.buff.set_idx(self.header.start + self.header.header_size) data_end = self.header.start + self.header.size while self.buff.get_idx() <= data_end - ARSCHeader.SIZE: res_header = ARSCHeader(self.buff) if res_header.start + res_header.size > data_end: # this inner chunk crosses the boundary of the table chunk break if res_header.type == const.RES_STRING_POOL_TYPE and not self.stringpool_main: self.stringpool_main = StringBlock(self.buff, res_header) elif res_header.type == const.RES_TABLE_PACKAGE_TYPE: assert len( self.packages ) < self.packageCount, "Got more packages than expected" current_package = ARSCResTablePackage(self.buff, res_header) package_name = current_package.get_name() package_data_end = res_header.start + res_header.size self.packages[package_name] = [] # After the Header, we have the resource type symbol table self.buff.set_idx(current_package.header.start + current_package.typeStrings) type_sp_header = ARSCHeader(self.buff) assert type_sp_header.type == const.RES_STRING_POOL_TYPE, \ "Expected String Pool header, got %x" % type_sp_header.type mTableStrings = StringBlock(self.buff, type_sp_header) # Next, we should have the resource key symbol table self.buff.set_idx(current_package.header.start + current_package.keyStrings) key_sp_header = ARSCHeader(self.buff) assert key_sp_header.type == const.RES_STRING_POOL_TYPE, \ "Expected String Pool header, got %x" % key_sp_header.type mKeyStrings = StringBlock(self.buff, key_sp_header) # Add them to the dict of read packages self.packages[package_name].append(current_package) self.packages[package_name].append(mTableStrings) self.packages[package_name].append(mKeyStrings) pc = PackageContext(current_package, self.stringpool_main, mTableStrings, mKeyStrings) # skip to the first header in this table package chunk # FIXME is this correct? We have already read the first two sections! # self.buff.set_idx(res_header.start + res_header.header_size) # this looks more like we want: (???) self.buff.set_idx(res_header.start + res_header.header_size + type_sp_header.size + key_sp_header.size) # Read all other headers while self.buff.get_idx( ) <= package_data_end - ARSCHeader.SIZE: pkg_chunk_header = ARSCHeader(self.buff) log.debug("Found a header: {}".format(pkg_chunk_header)) if pkg_chunk_header.start + pkg_chunk_header.size > package_data_end: # we are way off the package chunk; bail out break self.packages[package_name].append(pkg_chunk_header) if pkg_chunk_header.type == const.RES_TABLE_TYPE_SPEC_TYPE: self.packages[package_name].append( ARSCResTypeSpec(self.buff, pc)) elif pkg_chunk_header.type == const.RES_TABLE_TYPE_TYPE: a_res_type = ARSCResType(self.buff, pc) self.packages[package_name].append(a_res_type) self.resource_configs[package_name][a_res_type].add( a_res_type.config) log.debug("Config: {}".format(a_res_type.config)) entries = [] for i in range(0, a_res_type.entryCount): current_package.mResId = current_package.mResId & 0xffff0000 | i entries.append((unpack('<i', self.buff.read(4))[0], current_package.mResId)) self.packages[package_name].append(entries) for entry, res_id in entries: if self.buff.end(): break if entry != -1: ate = ARSCResTableEntry(self.buff, res_id, pc) self.packages[package_name].append(ate) if ate.is_weak(): # FIXME we are not sure how to implement the FLAG_WEAk! # We saw the following: There is just a single Res_value after the ARSCResTableEntry # and then comes the next ARSCHeader. # Therefore we think this means all entries are somehow replicated? # So we do some kind of hack here. We set the idx to the entry again... # Now we will read all entries! # Not sure if this is a good solution though self.buff.set_idx(ate.start) elif pkg_chunk_header.type == const.RES_TABLE_LIBRARY_TYPE: log.warning( "RES_TABLE_LIBRARY_TYPE chunk is not supported") else: # silently skip other chunk types pass # skip to the next chunk self.buff.set_idx(pkg_chunk_header.start + pkg_chunk_header.size) # move to the next resource chunk self.buff.set_idx(res_header.start + res_header.size)
def __init__(self, raw_buff): self._reset() self._valid = True self.axml_tampered = False self.buff = bytecode.BuffHandle(raw_buff) # Minimum is a single ARSCHeader, which would be a strange edge case... if self.buff.size() < 8: log.error( "Filesize is too small to be a valid AXML file! Filesize: {}". format(self.buff.size())) self._valid = False return # This would be even stranger, if an AXML file is larger than 4GB... # But this is not possible as the maximum chunk size is a unsigned 4 byte int. if self.buff.size() > 0xFFFFFFFF: log.error( "Filesize is too large to be a valid AXML file! Filesize: {}". format(self.buff.size())) self._valid = False return try: axml_header = ARSCHeader(self.buff) except AssertionError as e: log.error("Error parsing first resource header: %s", e) self._valid = False return self.filesize = axml_header.size if axml_header.header_size == 28024: # Can be a common error: the file is not an AXML but a plain XML # The file will then usually start with '<?xm' / '3C 3F 78 6D' log.warning( "Header size is 28024! Are you trying to parse a plain XML file?" ) if axml_header.header_size != 8: log.error("This does not look like an AXML file. " "header size does not equal 8! header size = {}".format( axml_header.header_size)) self._valid = False return if self.filesize > self.buff.size(): log.error( "This does not look like an AXML file. " "Declared filesize does not match real size: {} vs {}".format( self.filesize, self.buff.size())) self._valid = False return if self.filesize < self.buff.size(): # The file can still be parsed up to the point where the chunk should end. self.axml_tampered = True log.warning( "Declared filesize ({}) is smaller than total file size ({}). " "Was something appended to the file? Trying to parse it anyways." .format(self.filesize, self.buff.size())) # Not that severe of an error, we have plenty files where this is not # set correctly if axml_header.type != const.RES_XML_TYPE: self.axml_tampered = True log.warning("AXML file has an unusual resource type! " "Malware likes to to such stuff to anti androguard! " "But we try to parse it anyways. " "Resource Type: 0x{:04x}".format(axml_header.type)) # Now we parse the STRING POOL try: header = ARSCHeader(self.buff) except AssertionError as e: log.error("Error parsing resource header of string pool: %s", e) self._valid = False return if header.header_size != 0x1C: log.error( "This does not look like an AXML file. String chunk header " "size does not equal 28! header size = {}".format( header.header_size)) self._valid = False return if header.type != const.RES_STRING_POOL_TYPE: log.error( "Expected String Pool header, got resource type 0x{:04x} " "instead".format(header.type)) self._valid = False return self.sb = StringBlock(self.buff, header) # Stores resource ID mappings, if any self.m_resourceIDs = [] # Store a list of prefix/uri mappings encountered self.namespaces = []