def __init__(self, raw_buff): self.reset() self.buff = bytecode.BuffHandle(raw_buff) _type, _header_size, _size = unpack('<HHL', self.buff.read(8)) if _header_size == 28024: # Can be a common error: the file is not an AXML but a plain XML # The file will then usually start with '<?xm' / '3C 3F 78 6D' print( "Header size is 28024! Are you trying to parse a plain XML file?" ) exit(1) if _type != 0x0003: print("header-type error") exit(1) # self.buff.read(4) # self.buff.read(4) self.sb = StringBlock(self.buff) self.m_resourceIDs = [] self.m_prefixuri = {} self.m_uriprefix = {} self.m_prefixuriL = []
def __init__(self, raw_buff): self.reset() self.valid_axml = True self.axml_tampered = False self.packerwarning = False self.buff = bytecode.BuffHandle(raw_buff) axml_file, = unpack('<L', self.buff.read(4)) if axml_file != CHUNK_AXML_FILE: if axml_file >> 16 == 0x0008: self.axml_tampered = True print "AXML file has an unusual header", format(self.buff) else: self.valid_axml = False print "Not a valid AXML file. Header 0x{:08x}".format( self.buff) return self.filesize, = unpack('<L', self.buff.read(4)) header = ARSCHeader(self.buff) if header.type == 0x0001: print "Expected String Pool header, got %x" % header.type self.sb = StringBlock(self.buff, header) self.m_resourceIDs = [] self.m_prefixuri = {} self.m_uriprefix = {} self.m_prefixuriL = []
def __init__(self, raw_buff): self.reset() self.buff = bytecode.BuffHandle(raw_buff) self.buff.read(4) self.buff.read(4) self.sb = StringBlock(self.buff) self.m_resourceIDs = [] self.m_prefixuri = {} self.m_uriprefix = {} self.m_prefixuriL = []
def __init__(self, raw_buff): self.reset() self.valid_axml = True self.axml_tampered = False self.packerwarning = False self.buff = bytecode.BuffHandle(raw_buff) axml_file, = unpack('<L', self.buff.read(4)) if axml_file != CHUNK_AXML_FILE: if axml_file >> 16 == 0x0008: self.axml_tampered = True print "AXML file has an unusual header", format(self.buff) else: self.valid_axml = False print "Not a valid AXML file. Header 0x{:08x}".format(self.buff) return self.filesize, = unpack('<L', self.buff.read(4)) header = ARSCHeader(self.buff) if header.type == 0x0001: print "Expected String Pool header, got %x" % header.type self.sb = StringBlock(self.buff, header) self.m_resourceIDs = [] self.m_prefixuri = {} self.m_uriprefix = {} self.m_prefixuriL = []
def __init__(self, raw_buff) : self.reset() self.buff = bytecode.BuffHandle( raw_buff ) self.buff.read(4) self.buff.read(4) self.sb = StringBlock( self.buff ) self.m_resourceIDs = [] self.m_prefixuri = {} self.m_uriprefix = {} self.m_prefixuriL = []
def __init__(self, raw_buff): self.reset() self.valid_axml = True self.buff = BuffHandle(raw_buff) axml_file = unpack('<L', self.buff.read(4))[0] if axml_file == self.CHUNK_AXML_FILE: self.buff.read(4) self.sb = StringBlock(self.buff) self.m_resourceIDs = [] self.m_prefixuri = {} self.m_uriprefix = {} self.m_prefixuriL = [] self.visited_ns = [] else: self.valid_axml = False logging.warning("Not a valid xml file")
def __init__(self, raw_buff, debug=False): self.log = logging.getLogger("pyaxmlparser.arscparser") self.log.setLevel(logging.DEBUG if debug else logging.CRITICAL) self.analyzed = False self._resolved_strings = None self.buff = bytecode.BuffHandle(raw_buff) self.header = ARSCHeader(self.buff) # TODO: assert header type self.packageCount = unpack("<i", self.buff.read(4))[0] self.packages = {} self.values = {} self.resource_values = collections.defaultdict(collections.defaultdict) self.resource_configs = collections.defaultdict( lambda: collections.defaultdict(set)) self.resource_keys = collections.defaultdict( lambda: collections.defaultdict(collections.defaultdict)) self.string_pool_main = None # skip to the start of the first chunk data, skipping trailing header bytes self.buff.set_idx(self.header.start + self.header.header_size) # Gives the offset inside the file of the end of this chunk data_end = self.header.start + self.header.size while self.buff.get_idx() <= data_end - ARSCHeader.SIZE: res_header = ARSCHeader(self.buff) if res_header.start + res_header.size > data_end: # this inner chunk crosses the boundary of the table chunk break if (res_header.type == const.RES_STRING_POOL_TYPE and not self.string_pool_main): self.string_pool_main = StringBlock(self.buff, res_header) elif res_header.type == const.RES_TABLE_PACKAGE_TYPE: assert (len(self.packages) < self.packageCount), "Got more packages than expected" current_package = ARSCResTablePackage(self.buff, res_header) package_name = current_package.get_name() package_data_end = res_header.start + res_header.size self.packages[package_name] = [] # After the Header, we have the resource type symbol table self.buff.set_idx(current_package.header.start + current_package.typeStrings) type_sp_header = ARSCHeader(self.buff) assert type_sp_header.type == const.RES_STRING_POOL_TYPE, ( "Expected String Pool header, got %x" % type_sp_header.type) table_strings = StringBlock(self.buff, type_sp_header) # Next, we should have the resource key symbol table self.buff.set_idx(current_package.header.start + current_package.keyStrings) key_sp_header = ARSCHeader(self.buff) assert key_sp_header.type == const.RES_STRING_POOL_TYPE, ( "Expected String Pool header, got %x" % key_sp_header.type) key_strings = StringBlock(self.buff, key_sp_header) # Add them to the dict of read packages self.packages[package_name].append(current_package) self.packages[package_name].append(table_strings) self.packages[package_name].append(key_strings) pc = PackageContext(current_package, self.string_pool_main, table_strings, key_strings) # skip to the first header in this table package chunk # FIXME is this correct? We have already read the first two sections! # self.buff.set_idx(res_header.start + res_header.header_size) # this looks more like we want: (???) self.buff.set_idx(res_header.start + res_header.header_size + type_sp_header.size + key_sp_header.size) # Read all other headers while self.buff.get_idx( ) <= package_data_end - ARSCHeader.SIZE: pkg_chunk_header = ARSCHeader(self.buff) self.log.debug( "Found a header: {}".format(pkg_chunk_header)) if (pkg_chunk_header.start + pkg_chunk_header.size > package_data_end): # we are way off the package chunk; bail out break self.packages[package_name].append(pkg_chunk_header) if pkg_chunk_header.type == const.RES_TABLE_TYPE_SPEC_TYPE: self.packages[package_name].append( ARSCResTypeSpec(self.buff, pc)) elif pkg_chunk_header.type == const.RES_TABLE_TYPE_TYPE: a_res_type = ARSCResType(self.buff, pc) self.packages[package_name].append(a_res_type) self.resource_configs[package_name][a_res_type].add( a_res_type.config) self.log.debug("Config: {}".format(a_res_type.config)) entries = [] for i in range(0, a_res_type.entryCount): current_package.resource_id = ( current_package.resource_id & 0xFFFF0000 | i) entries.append(( unpack("<i", self.buff.read(4))[0], current_package.resource_id, )) self.packages[package_name].append(entries) for entry, res_id in entries: if self.buff.end(): break if entry != -1: ate = ARSCResTableEntry(self.buff, res_id, pc) self.packages[package_name].append(ate) if ate.is_weak(): # FIXME we are not sure how to implement the FLAG_WEAk! # We saw the following: There is just a single Res_value after the ARSCResTableEntry # and then comes the next ARSCHeader. # Therefore we think this means all entries are somehow replicated? # So we do some kind of hack here. We set the idx to the entry again... # Now we will read all entries! # Not sure if this is a good solution though self.buff.set_idx(ate.start) elif pkg_chunk_header.type == const.RES_TABLE_LIBRARY_TYPE: self.log.warning( "RES_TABLE_LIBRARY_TYPE chunk is not supported") else: # FIXME: silently skip other chunk types pass # skip to the next chunk self.buff.set_idx(pkg_chunk_header.start + pkg_chunk_header.size) # move to the next resource chunk self.buff.set_idx(res_header.start + res_header.size)
class AXMLParser: def __init__(self, raw_buff): self.reset() self.buff = bytecode.BuffHandle(raw_buff) self.buff.read(4) self.buff.read(4) self.sb = StringBlock(self.buff) self.m_resourceIDs = [] self.m_prefixuri = {} self.m_uriprefix = {} self.m_prefixuriL = [] def reset(self): self.m_event = -1 self.m_lineNumber = -1 self.m_name = -1 self.m_namespaceUri = -1 self.m_attributes = [] self.m_idAttribute = -1 self.m_classAttribute = -1 self.m_styleAttribute = -1 def next(self): self.doNext() return self.m_event def doNext(self): if self.m_event == tc.END_DOCUMENT: return event = self.m_event self.reset() while 1: chunkType = -1 # Fake END_DOCUMENT event. if event == tc.END_TAG: pass # START_DOCUMENT if event == tc.START_DOCUMENT: chunkType = tc.CHUNK_XML_START_TAG else: if self.buff.end() == True: self.m_event = tc.END_DOCUMENT break chunkType = SV('<L', self.buff.read(4)).get_value() if chunkType == tc.CHUNK_RESOURCEIDS: chunkSize = SV('<L', self.buff.read(4)).get_value() # FIXME if chunkSize < 8 or chunkSize % 4 != 0: raise ("ooo") for i in range(0, chunkSize / 4 - 2): self.m_resourceIDs.append(SV('<L', self.buff.read(4))) continue # FIXME if chunkType < tc.CHUNK_XML_FIRST or chunkType > tc.CHUNK_XML_LAST: raise ("ooo") # Fake START_DOCUMENT event. if chunkType == tc.CHUNK_XML_START_TAG and event == -1: self.m_event = tc.START_DOCUMENT break self.buff.read(4) #/*chunkSize*/ lineNumber = SV('<L', self.buff.read(4)).get_value() self.buff.read(4) #0xFFFFFFFF if chunkType == tc.CHUNK_XML_START_NAMESPACE or chunkType == tc.CHUNK_XML_END_NAMESPACE: if chunkType == tc.CHUNK_XML_START_NAMESPACE: prefix = SV('<L', self.buff.read(4)).get_value() uri = SV('<L', self.buff.read(4)).get_value() self.m_prefixuri[prefix] = uri self.m_uriprefix[uri] = prefix self.m_prefixuriL.append((prefix, uri)) else: self.buff.read(4) self.buff.read(4) (prefix, uri) = self.m_prefixuriL.pop() #del self.m_prefixuri[ prefix ] #del self.m_uriprefix[ uri ] continue self.m_lineNumber = lineNumber if chunkType == tc.CHUNK_XML_START_TAG: self.m_namespaceUri = SV('<L', self.buff.read(4)).get_value() self.m_name = SV('<L', self.buff.read(4)).get_value() # FIXME self.buff.read(4) #flags attributeCount = SV('<L', self.buff.read(4)).get_value() self.m_idAttribute = (attributeCount >> 16) - 1 attributeCount = attributeCount & 0xFFFF self.m_classAttribute = SV('<L', self.buff.read(4)).get_value() self.m_styleAttribute = (self.m_classAttribute >> 16) - 1 self.m_classAttribute = (self.m_classAttribute & 0xFFFF) - 1 for i in range(0, attributeCount * tc.ATTRIBUTE_LENGTH): self.m_attributes.append( SV('<L', self.buff.read(4)).get_value()) for i in range(tc.ATTRIBUTE_IX_VALUE_TYPE, len(self.m_attributes), tc.ATTRIBUTE_LENGTH): self.m_attributes[i] = (self.m_attributes[i] >> 24) self.m_event = tc.START_TAG break if chunkType == tc.CHUNK_XML_END_TAG: self.m_namespaceUri = SV('<L', self.buff.read(4)).get_value() self.m_name = SV('<L', self.buff.read(4)).get_value() self.m_event = tc.END_TAG break if chunkType == tc.CHUNK_XML_TEXT: self.m_name = SV('<L', self.buff.read(4)).get_value() # FIXME self.buff.read(4) #? self.buff.read(4) #? self.m_event = tc.TEXT break def getPrefixByUri(self, uri): try: return self.m_uriprefix[uri] except KeyError: return -1 def getPrefix(self): try: return self.sb.getRaw(self.m_prefixuri[self.m_namespaceUri]) except KeyError: return "" def getName(self): if self.m_name == -1 or (self.m_event != tc.START_TAG and self.m_event != tc.END_TAG): return "" return self.sb.getRaw(self.m_name) def getText(self): if self.m_name == -1 or self.m_event != tc.TEXT: return "" return self.sb.getRaw(self.m_name) def getNamespacePrefix(self, pos): prefix = self.m_prefixuriL[pos][0] return self.sb.getRaw(prefix) def getNamespaceUri(self, pos): uri = self.m_prefixuriL[pos][1] return self.sb.getRaw(uri) def getNamespaceCount(self, pos): pass def getAttributeOffset(self, index): # FIXME if self.m_event != tc.START_TAG: raise ("Current event is not START_TAG.") offset = index * 5 # FIXME if offset >= len(self.m_attributes): raise ("Invalid attribute index") return offset def getAttributeCount(self): if self.m_event != tc.START_TAG: return -1 return len(self.m_attributes) / tc.ATTRIBUTE_LENGTH def getAttributePrefix(self, index): offset = self.getAttributeOffset(index) uri = self.m_attributes[offset + tc.ATTRIBUTE_IX_NAMESPACE_URI] prefix = self.getPrefixByUri(uri) if prefix == -1: return "" return self.sb.getRaw(prefix) def getAttributeName(self, index): offset = self.getAttributeOffset(index) name = self.m_attributes[offset + tc.ATTRIBUTE_IX_NAME] if name == -1: return "" return self.sb.getRaw(name) def getAttributeValueType(self, index): offset = self.getAttributeOffset(index) return self.m_attributes[offset + tc.ATTRIBUTE_IX_VALUE_TYPE] def getAttributeValueData(self, index): offset = self.getAttributeOffset(index) return self.m_attributes[offset + tc.ATTRIBUTE_IX_VALUE_DATA] def getAttributeValue(self, index): offset = self.getAttributeOffset(index) valueType = self.m_attributes[offset + tc.ATTRIBUTE_IX_VALUE_TYPE] if valueType == tc.TYPE_STRING: valueString = self.m_attributes[offset + tc.ATTRIBUTE_IX_VALUE_STRING] return self.sb.getRaw(valueString) # WIP return ""
class AXMLParser : def __init__(self, raw_buff) : self.reset() self.buff = bytecode.BuffHandle( raw_buff ) self.buff.read(4) self.buff.read(4) self.sb = StringBlock( self.buff ) self.m_resourceIDs = [] self.m_prefixuri = {} self.m_uriprefix = {} self.m_prefixuriL = [] def reset(self) : self.m_event = -1 self.m_lineNumber = -1 self.m_name = -1 self.m_namespaceUri = -1 self.m_attributes = [] self.m_idAttribute = -1 self.m_classAttribute = -1 self.m_styleAttribute = -1 def next(self) : self.doNext() return self.m_event def doNext(self) : if self.m_event == StringBlock.END_DOCUMENT : return event = self.m_event self.reset() while 1 : chunkType = -1 # Fake END_DOCUMENT event. if event == StringBlock.END_TAG : pass # START_DOCUMENT if event == StringBlock.START_DOCUMENT : chunkType = StringBlock.CHUNK_XML_START_TAG else : if self.buff.end() == True : self.m_event = StringBlock.END_DOCUMENT break chunkType = SV( '<L', self.buff.read( 4 ) ).get_value() if chunkType == StringBlock.CHUNK_RESOURCEIDS : chunkSize = SV( '<L', self.buff.read( 4 ) ).get_value() # FIXME if chunkSize < 8 or chunkSize%4 != 0 : raise("ooo") for i in range(0, chunkSize/4-2) : self.m_resourceIDs.append( SV( '<L', self.buff.read( 4 ) ) ) continue # FIXME if chunkType < StringBlock.CHUNK_XML_FIRST or chunkType > StringBlock.CHUNK_XML_LAST : raise("ooo") # Fake START_DOCUMENT event. if chunkType == StringBlock.CHUNK_XML_START_TAG and event == -1 : self.m_event = StringBlock.START_DOCUMENT break self.buff.read( 4 ) #/*chunkSize*/ lineNumber = SV( '<L', self.buff.read( 4 ) ).get_value() self.buff.read( 4 ) #0xFFFFFFFF if chunkType == StringBlock.CHUNK_XML_START_NAMESPACE or chunkType == StringBlock.CHUNK_XML_END_NAMESPACE : if chunkType == StringBlock.CHUNK_XML_START_NAMESPACE : prefix = SV( '<L', self.buff.read( 4 ) ).get_value() uri = SV( '<L', self.buff.read( 4 ) ).get_value() self.m_prefixuri[ prefix ] = uri self.m_uriprefix[ uri ] = prefix self.m_prefixuriL.append( (prefix, uri) ) else : self.buff.read( 4 ) self.buff.read( 4 ) (prefix, uri) = self.m_prefixuriL.pop() #del self.m_prefixuri[ prefix ] #del self.m_uriprefix[ uri ] continue self.m_lineNumber = lineNumber if chunkType == StringBlock.CHUNK_XML_START_TAG : self.m_namespaceUri = SV( '<L', self.buff.read( 4 ) ).get_value() self.m_name = SV( '<L', self.buff.read( 4 ) ).get_value() # FIXME self.buff.read( 4 ) #flags attributeCount = SV( '<L', self.buff.read( 4 ) ).get_value() self.m_idAttribute = (attributeCount>>16) - 1 attributeCount = attributeCount & 0xFFFF self.m_classAttribute = SV( '<L', self.buff.read( 4 ) ).get_value() self.m_styleAttribute = (self.m_classAttribute>>16) - 1 self.m_classAttribute = (self.m_classAttribute & 0xFFFF) - 1 for i in range(0, attributeCount*StringBlock.ATTRIBUTE_LENGTH) : self.m_attributes.append( SV( '<L', self.buff.read( 4 ) ).get_value() ) for i in range(StringBlock.ATTRIBUTE_IX_VALUE_TYPE, len(self.m_attributes), StringBlock.ATTRIBUTE_LENGTH) : self.m_attributes[i] = (self.m_attributes[i]>>24) self.m_event = StringBlock.START_TAG break if chunkType == StringBlock.CHUNK_XML_END_TAG : self.m_namespaceUri = SV( '<L', self.buff.read( 4 ) ).get_value() self.m_name = SV( '<L', self.buff.read( 4 ) ).get_value() self.m_event = StringBlock.END_TAG break if chunkType == StringBlock.CHUNK_XML_TEXT : self.m_name = SV( '<L', self.buff.read( 4 ) ).get_value() # FIXME self.buff.read( 4 ) #? self.buff.read( 4 ) #? self.m_event = StringBlock.TEXT break def getPrefixByUri(self, uri) : try : return self.m_uriprefix[ uri ] except KeyError : return -1 def getPrefix(self) : try : return self.sb.getRaw(self.m_prefixuri[ self.m_namespaceUri ]) except KeyError : return "" def getName(self) : if self.m_name == -1 or (self.m_event != StringBlock.START_TAG and self.m_event != StringBlock.END_TAG) : return "" return self.sb.getRaw(self.m_name) def getText(self) : if self.m_name == -1 or self.m_event != StringBlock.TEXT : return "" return self.sb.getRaw(self.m_name) def getNamespacePrefix(self, pos) : prefix = self.m_prefixuriL[ pos ][0] return self.sb.getRaw( prefix ) def getNamespaceUri(self, pos) : uri = self.m_prefixuriL[ pos ][1] return self.sb.getRaw( uri ) def getNamespaceCount(self, pos) : pass def getAttributeOffset(self, index) : # FIXME if self.m_event != StringBlock.START_TAG : raise("Current event is not START_TAG.") offset = index * 5 # FIXME if offset >= len(self.m_attributes) : raise("Invalid attribute index") return offset def getAttributeCount(self) : if self.m_event != StringBlock.START_TAG : return -1 return len(self.m_attributes) / StringBlock.ATTRIBUTE_LENGTH def getAttributePrefix(self, index) : offset = self.getAttributeOffset(index) uri = self.m_attributes[offset+StringBlock.ATTRIBUTE_IX_NAMESPACE_URI] prefix = self.getPrefixByUri( uri ) if prefix == -1 : return "" return self.sb.getRaw( prefix ) def getAttributeName(self, index) : offset = self.getAttributeOffset(index) name = self.m_attributes[offset+StringBlock.ATTRIBUTE_IX_NAME] if name == -1 : return "" return self.sb.getRaw( name ) def getAttributeValueType(self, index) : offset = self.getAttributeOffset(index) return self.m_attributes[offset+StringBlock.ATTRIBUTE_IX_VALUE_TYPE] def getAttributeValueData(self, index) : offset = self.getAttributeOffset(index) return self.m_attributes[offset+StringBlock.ATTRIBUTE_IX_VALUE_DATA] def getAttributeValue(self, index) : offset = self.getAttributeOffset(index) valueType = self.m_attributes[offset+StringBlock.ATTRIBUTE_IX_VALUE_TYPE] if valueType == tc.TYPE_STRING : valueString = self.m_attributes[offset+StringBlock.ATTRIBUTE_IX_VALUE_STRING] return self.sb.getRaw( valueString ) # WIP return ""
def __init__(self, raw_buff, debug=False): self.log = logging.getLogger("pyaxmlparser.axmlparser") self.log.setLevel(logging.DEBUG if debug else logging.CRITICAL) self._reset() self._valid = True self.axml_tampered = False self.buff = bytecode.BuffHandle(raw_buff) # Minimum is a single ARSCHeader, which would be a strange edge case... if self.buff.size() < 8: self.log.error( "Filesize is too small to be a valid AXML file! Filesize: {}". format(self.buff.size())) self._valid = False return # This would be even stranger, if an AXML file is larger than 4GB... # But this is not possible as the maximum chunk size is a unsigned 4 byte int. if self.buff.size() > 0xFFFFFFFF: self.log.error( "Filesize is too large to be a valid AXML file! Filesize: {}". format(self.buff.size())) self._valid = False return try: axml_header = ARSCHeader(self.buff) except AssertionError as e: self.log.error("Error parsing first resource header: %s", e) self._valid = False return self.filesize = axml_header.size if axml_header.header_size == 28024: # Can be a common error: the file is not an AXML but a plain XML # The file will then usually start with '<?xm' / '3C 3F 78 6D' self.log.warning( "Header size is 28024! Are you trying to parse a plain XML file?" ) if axml_header.header_size != 8: self.log.error( "This does not look like an AXML file. " "header size does not equal 8! header size = {}".format( axml_header.header_size)) self._valid = False return if self.filesize > self.buff.size(): self.log.error( "This does not look like an AXML file. " "Declared filesize does not match real size: {} vs {}".format( self.filesize, self.buff.size())) self._valid = False return if self.filesize < self.buff.size(): # The file can still be parsed up to the point where the chunk should end. self.axml_tampered = True self.log.warning( "Declared filesize ({}) is smaller than total file size ({}). " "Was something appended to the file? Trying to parse it anyways." .format(self.filesize, self.buff.size())) # Not that severe of an error, we have plenty files where this is not # set correctly if axml_header.type != const.RES_XML_TYPE: self.axml_tampered = True self.log.warning( "AXML file has an unusual resource type! " "Malware likes to to such stuff to anti androguard! " "But we try to parse it anyways. " "Resource Type: 0x{:04x}".format(axml_header.type)) # Now we parse the STRING POOL try: header = ARSCHeader(self.buff) except AssertionError as e: self.log.error("Error parsing resource header of string pool: %s", e) self._valid = False return if header.header_size != 0x1C: self.log.error( "This does not look like an AXML file. String chunk header " "size does not equal 28! header size = {}".format( header.header_size)) self._valid = False return if header.type != const.RES_STRING_POOL_TYPE: self.log.error( "Expected String Pool header, got resource type 0x{:04x} " "instead".format(header.type)) self._valid = False return self.sb = StringBlock(self.buff, header) # Stores resource ID mappings, if any self.resource_ids = [] # Store a list of prefix/uri mappings encountered self.namespaces = []
def __init__(self, raw_buff): self.analyzed = False self._resolved_strings = None self.buff = bytecode.BuffHandle(raw_buff) self.header = ARSCHeader(self.buff) self.packageCount = unpack('<i', self.buff.read(4))[0] self.packages = {} self.values = {} self.resource_values = collections.defaultdict(collections.defaultdict) self.resource_configs = collections.defaultdict( lambda: collections.defaultdict(set)) self.resource_keys = collections.defaultdict( lambda: collections.defaultdict(collections.defaultdict)) self.stringpool_main = None self.buff.set_idx(self.header.start + self.header.header_size) data_end = self.header.start + self.header.size while self.buff.get_idx() <= data_end - ARSCHeader.SIZE: res_header = ARSCHeader(self.buff) if res_header.start + res_header.size > data_end: # this inner chunk crosses the boundary of the table chunk break if res_header.type == 0x0001 and not self.stringpool_main: self.stringpool_main = StringBlock(self.buff, res_header) elif res_header.type == 0x0200: print len( self.packages ) < self.packageCount, "Got more packages than expected" current_package = ARSCResTablePackage(self.buff, res_header) package_name = current_package.get_name() package_data_end = res_header.start + res_header.size self.packages[package_name] = [] self.buff.set_idx(current_package.header.start + current_package.typeStrings) type_sp_header = ARSCHeader(self.buff) assert type_sp_header.type == 0x0001, \ "Expected String Pool header, got %x" % type_sp_header.type mTableStrings = StringBlock(self.buff, type_sp_header) self.buff.set_idx(current_package.header.start + current_package.keyStrings) key_sp_header = ARSCHeader(self.buff) assert key_sp_header.type == 0x0001, \ "Expected String Pool header, got %x" % key_sp_header.type mKeyStrings = StringBlock(self.buff, key_sp_header) self.packages[package_name].append(current_package) self.packages[package_name].append(mTableStrings) self.packages[package_name].append(mKeyStrings) pc = PackageContext(current_package, self.stringpool_main, mTableStrings, mKeyStrings) # skip to the first header in this table package chunk self.buff.set_idx(res_header.start + res_header.header_size) while self.buff.get_idx( ) <= package_data_end - ARSCHeader.SIZE: pkg_chunk_header = ARSCHeader(self.buff) if pkg_chunk_header.start + pkg_chunk_header.size > package_data_end: # we are way off the package chunk; bail out break self.packages[package_name].append(pkg_chunk_header) if pkg_chunk_header.type == RES_TABLE_TYPE_SPEC_TYPE: self.packages[package_name].append( ARSCResTypeSpec(self.buff, pc)) elif pkg_chunk_header.type == RES_TABLE_TYPE_TYPE: a_res_type = ARSCResType(self.buff, pc) self.packages[package_name].append(a_res_type) self.resource_configs[package_name][a_res_type].add( a_res_type.config) entries = [] for i in range(0, a_res_type.entryCount): current_package.mResId = current_package.mResId & 0xffff0000 | i entries.append((unpack('<i', self.buff.read(4))[0], current_package.mResId)) self.packages[package_name].append(entries) for entry, res_id in entries: if self.buff.end(): break if entry != -1: ate = ARSCResTableEntry(self.buff, res_id, pc) self.packages[package_name].append(ate) elif pkg_chunk_header.type == RES_TABLE_LIBRARY_TYPE: print "RES_TABLE_LIBRARY_TYPE chunk is not supported" else: # silently skip other chunk types pass # skip to the next chunk self.buff.set_idx(pkg_chunk_header.start + pkg_chunk_header.size) # move to the next resource chunk self.buff.set_idx(res_header.start + res_header.size)
class AXMLParser: def __init__(self, raw_buff): self.reset() self.valid_axml = True self.axml_tampered = False self.packerwarning = False self.buff = bytecode.BuffHandle(raw_buff) axml_file, = unpack('<L', self.buff.read(4)) if axml_file != CHUNK_AXML_FILE: if axml_file >> 16 == 0x0008: self.axml_tampered = True print "AXML file has an unusual header", format(self.buff) else: self.valid_axml = False print "Not a valid AXML file. Header 0x{:08x}".format(self.buff) return self.filesize, = unpack('<L', self.buff.read(4)) header = ARSCHeader(self.buff) if header.type == 0x0001: print "Expected String Pool header, got %x" % header.type self.sb = StringBlock(self.buff, header) self.m_resourceIDs = [] self.m_prefixuri = {} self.m_uriprefix = {} self.m_prefixuriL = [] def reset(self): self.m_event = -1 self.m_lineNumber = -1 self.m_name = -1 self.m_namespaceUri = -1 self.m_attributes = [] self.m_idAttribute = -1 self.m_classAttribute = -1 self.m_styleAttribute = -1 def next(self): self.doNext() return self.m_event def doNext(self): if self.m_event == tc.END_DOCUMENT: return event = self.m_event self.reset() while 1: chunkType = -1 # Fake END_DOCUMENT event. if event == tc.END_TAG: pass # START_DOCUMENT if event == tc.START_DOCUMENT: chunkType = tc.CHUNK_XML_START_TAG else: if self.buff.end() == True: self.m_event = tc.END_DOCUMENT break chunkType = SV('<L', self.buff.read(4)).get_value() if chunkType == tc.CHUNK_RESOURCEIDS: chunkSize = SV('<L', self.buff.read(4)).get_value() # FIXME if chunkSize < 8 or chunkSize%4 != 0: raise("ooo") for i in range(0, int(chunkSize/4-2)): self.m_resourceIDs.append(SV('<L', self.buff.read(4))) continue # FIXME if chunkType < tc.CHUNK_XML_FIRST or chunkType > tc.CHUNK_XML_LAST: raise("ooo") # Fake START_DOCUMENT event. if chunkType == tc.CHUNK_XML_START_TAG and event == -1: self.m_event = tc.START_DOCUMENT break self.buff.read(4) #/*chunkSize*/ lineNumber = SV('<L', self.buff.read(4)).get_value() self.buff.read(4) #0xFFFFFFFF if chunkType == tc.CHUNK_XML_START_NAMESPACE or chunkType == tc.CHUNK_XML_END_NAMESPACE: if chunkType == tc.CHUNK_XML_START_NAMESPACE: prefix = SV('<L', self.buff.read(4)).get_value() uri = SV('<L', self.buff.read(4)).get_value() self.m_prefixuri[ prefix ] = uri self.m_uriprefix[ uri ] = prefix self.m_prefixuriL.append((prefix, uri)) else: self.buff.read(4) self.buff.read(4) (prefix, uri) = self.m_prefixuriL.pop() #del self.m_prefixuri[ prefix ] #del self.m_uriprefix[ uri ] continue self.m_lineNumber = lineNumber if chunkType == tc.CHUNK_XML_START_TAG: self.m_namespaceUri = SV('<L', self.buff.read(4)).get_value() self.m_name = SV('<L', self.buff.read(4)).get_value() # FIXME self.buff.read(4) #flags attributeCount = SV('<L', self.buff.read(4)).get_value() self.m_idAttribute = (attributeCount>>16) - 1 attributeCount = attributeCount & 0xFFFF self.m_classAttribute = SV('<L', self.buff.read(4)).get_value() self.m_styleAttribute = (self.m_classAttribute>>16) - 1 self.m_classAttribute = (self.m_classAttribute & 0xFFFF) - 1 for i in range(0, attributeCount * tc.ATTRIBUTE_LENGTH): self.m_attributes.append(SV('<L', self.buff.read(4)).get_value()) for i in range(tc.ATTRIBUTE_IX_VALUE_TYPE, len(self.m_attributes), tc.ATTRIBUTE_LENGTH): self.m_attributes[i] = (self.m_attributes[i]>>24) self.m_event = tc.START_TAG break if chunkType == tc.CHUNK_XML_END_TAG: self.m_namespaceUri = SV('<L', self.buff.read(4)).get_value() self.m_name = SV('<L', self.buff.read(4)).get_value() self.m_event = tc.END_TAG break if chunkType == tc.CHUNK_XML_TEXT: self.m_name = SV('<L', self.buff.read(4)).get_value() # FIXME self.buff.read(4) #? self.buff.read(4) #? self.m_event = tc.TEXT break def getPrefixByUri(self, uri): try: return self.m_uriprefix[ uri ] except KeyError: return -1 def getPrefix(self): try: return self.sb.getRaw(self.m_prefixuri[ self.m_namespaceUri ]) except KeyError: return "" def getName(self): if self.m_name == -1 or (self.m_event != tc.START_TAG and self.m_event != tc.END_TAG): return "" return self.sb.getRaw(self.m_name) def getText(self): if self.m_name == -1 or self.m_event != tc.TEXT: return "" return self.sb.getRaw(self.m_name) def getNamespacePrefix(self, pos): prefix = self.m_prefixuriL[ pos ][0] return self.sb.getRaw(prefix) def getNamespaceUri(self, pos): uri = self.m_prefixuriL[ pos ][1] return self.sb.getRaw(uri) def getNamespaceCount(self, pos): pass def getAttributeOffset(self, index): # FIXME if self.m_event != tc.START_TAG: raise("Current event is not START_TAG.") offset = index * 5 # FIXME if offset >= len(self.m_attributes): raise("Invalid attribute index") return offset def getAttributeCount(self): if self.m_event != tc.START_TAG: return -1 return int(len(self.m_attributes) / tc.ATTRIBUTE_LENGTH) def getAttributePrefix(self, index): offset = self.getAttributeOffset(index) uri = self.m_attributes[offset + tc.ATTRIBUTE_IX_NAMESPACE_URI] prefix = self.getPrefixByUri(uri) if prefix == -1: return "" return self.sb.getRaw(prefix) def getAttributeName(self, index): offset = self.getAttributeOffset(index) name = self.m_attributes[offset + tc.ATTRIBUTE_IX_NAME] if name == -1: return "" return self.sb.getRaw(name) def getAttributeValueType(self, index): offset = self.getAttributeOffset(index) return self.m_attributes[offset + tc.ATTRIBUTE_IX_VALUE_TYPE] def getAttributeValueData(self, index): offset = self.getAttributeOffset(index) return self.m_attributes[offset + tc.ATTRIBUTE_IX_VALUE_DATA] def getAttributeValue(self, index): offset = self.getAttributeOffset(index) valueType = self.m_attributes[offset + tc.ATTRIBUTE_IX_VALUE_TYPE] if valueType == tc.TYPE_STRING: valueString = self.m_attributes[offset + tc.ATTRIBUTE_IX_VALUE_STRING] return self.sb.getRaw(valueString) # WIP return ""
class AXMLParser: CHUNK_AXML_FILE = 0x00080003 CHUNK_RESOURCEIDS = 0x00080180 CHUNK_XML_FIRST = 0x00100100 CHUNK_XML_START_NAMESPACE = 0x00100100 CHUNK_XML_END_NAMESPACE = 0x00100101 CHUNK_XML_START_TAG = 0x00100102 CHUNK_XML_END_TAG = 0x00100103 CHUNK_XML_TEXT = 0x00100104 CHUNK_XML_LAST = 0x00100104 START_DOCUMENT = 0 END_DOCUMENT = 1 START_TAG = 2 END_TAG = 3 TEXT = 4 ATTRIBUTE_IX_NAMESPACE_URI = 0 ATTRIBUTE_IX_NAME = 1 ATTRIBUTE_IX_VALUE_STRING = 2 ATTRIBUTE_IX_VALUE_TYPE = 3 ATTRIBUTE_IX_VALUE_DATA = 4 ATTRIBUTE_LENGHT = 5 TYPE_ATTRIBUTE = 2 TYPE_DIMENSION = 5 TYPE_FIRST_COLOR_INT = 28 TYPE_FIRST_INT = 16 TYPE_FLOAT = 4 TYPE_FRACTION = 6 TYPE_INT_BOOLEAN = 18 TYPE_INT_COLOR_ARGB4 = 30 TYPE_INT_COLOR_ARGB8 = 28 TYPE_INT_COLOR_RGB4 = 31 TYPE_INT_COLOR_RGB8 = 29 TYPE_INT_DEC = 16 TYPE_INT_HEX = 17 TYPE_LAST_COLOR_INT = 31 TYPE_LAST_INT = 31 TYPE_NULL = 0 TYPE_REFERENCE = 1 TYPE_STRING = 3 def __init__(self, raw_buff): self.reset() self.valid_axml = True self.buff = BuffHandle(raw_buff) axml_file = unpack('<L', self.buff.read(4))[0] if axml_file == self.CHUNK_AXML_FILE: self.buff.read(4) self.sb = StringBlock(self.buff) self.m_resourceIDs = [] self.m_prefixuri = {} self.m_uriprefix = {} self.m_prefixuriL = [] self.visited_ns = [] else: self.valid_axml = False logging.warning("Not a valid xml file") def is_valid(self): return self.valid_axml def reset(self): self.m_event = -1 self.m_lineNumber = -1 self.m_name = -1 self.m_namespaceUri = -1 self.m_attributes = [] self.m_idAttribute = -1 self.m_classAttribute = -1 self.m_styleAttribute = -1 def next(self): self.doNext() return self.m_event def doNext(self): if self.m_event == self.END_DOCUMENT: return event = self.m_event self.reset() while True: chunkType = -1 # Fake END_DOCUMENT event. if event == self.END_TAG: pass # START_DOCUMENT if event == self.START_DOCUMENT: chunkType = self.CHUNK_XML_START_TAG else: if self.buff.end(): self.m_event = self.END_DOCUMENT break chunkType = unpack('<L', self.buff.read(4))[0] if chunkType == self.CHUNK_RESOURCEIDS: chunkSize = unpack('<L', self.buff.read(4))[0] # FIXME if chunkSize < 8 or chunkSize % 4 != 0: logging.warning("Invalid chunk size") for i in range(0, chunkSize / 4 - 2): self.m_resourceIDs.append(unpack('<L', self.buff.read(4))[0]) continue # FIXME if chunkType < self.CHUNK_XML_FIRST or chunkType > self.CHUNK_XML_LAST: logging.warning("invalid chunk type") # Fake START_DOCUMENT event. if chunkType == self.CHUNK_XML_START_TAG and event == -1: self.m_event = self.START_DOCUMENT break self.buff.read(4) # /*chunkSize*/ lineNumber = unpack('<L', self.buff.read(4))[0] self.buff.read(4) # 0xFFFFFFFF if chunkType == self.CHUNK_XML_START_NAMESPACE or chunkType == self.CHUNK_XML_END_NAMESPACE: if chunkType == self.CHUNK_XML_START_NAMESPACE: prefix = unpack('<L', self.buff.read(4))[0] uri = unpack('<L', self.buff.read(4))[0] self.m_prefixuri[prefix] = uri self.m_uriprefix[uri] = prefix self.m_prefixuriL.append((prefix, uri)) self.ns = uri else: self.ns = -1 self.buff.read(4) self.buff.read(4) (prefix, uri) = self.m_prefixuriL.pop() continue self.m_lineNumber = lineNumber if chunkType == self.CHUNK_XML_START_TAG: self.m_namespaceUri = unpack('<L', self.buff.read(4))[0] self.m_name = unpack('<L', self.buff.read(4))[0] # FIXME self.buff.read(4) # flags attributeCount = unpack('<L', self.buff.read(4))[0] self.m_idAttribute = (attributeCount >> 16) - 1 attributeCount = attributeCount & 0xFFFF self.m_classAttribute = unpack('<L', self.buff.read(4))[0] self.m_styleAttribute = (self.m_classAttribute >> 16) - 1 self.m_classAttribute = (self.m_classAttribute & 0xFFFF) - 1 for i in range(0, attributeCount * self.ATTRIBUTE_LENGHT): self.m_attributes.append(unpack('<L', self.buff.read(4))[0]) for i in range(self.ATTRIBUTE_IX_VALUE_TYPE, len(self.m_attributes), self.ATTRIBUTE_LENGHT): self.m_attributes[i] = self.m_attributes[i] >> 24 self.m_event = self.START_TAG break if chunkType == self.CHUNK_XML_END_TAG: self.m_namespaceUri = unpack('<L', self.buff.read(4))[0] self.m_name = unpack('<L', self.buff.read(4))[0] self.m_event = self.END_TAG break if chunkType == self.CHUNK_XML_TEXT: self.m_name = unpack('<L', self.buff.read(4))[0] # FIXME self.buff.read(4) self.buff.read(4) self.m_event = self.TEXT break def getPrefixByUri(self, uri): try: return self.m_uriprefix[uri] except KeyError: return -1 def getPrefix(self): try: return self.sb.getString(self.m_uriprefix[self.m_namespaceUri]) except KeyError: return u'' def getName(self): if self.m_name == -1 or (self.m_event != self.START_TAG and self.m_event != self.END_TAG): return u'' return self.sb.getString(self.m_name) def getText(self): if self.m_name == -1 or self.m_event != self.TEXT: return u'' return self.sb.getString(self.m_name) def getNamespacePrefix(self, pos): prefix = self.m_prefixuriL[pos][0] return self.sb.getString(prefix) def getNamespaceUri(self, pos): uri = self.m_prefixuriL[pos][1] return self.sb.getString(uri) def getXMLNS(self): buff = "" for i in self.m_uriprefix: if i not in self.visited_ns: buff += "xmlns:%s=\"%s\"\n" % ( self.sb.getString(self.m_uriprefix[i]), self.sb.getString(self.m_prefixuri[self.m_uriprefix[i]])) self.visited_ns.append(i) return buff def getNamespaceCount(self, pos): pass def getAttributeOffset(self, index): # FIXME if self.m_event != self.START_TAG: logging.warning("Current event is not START_TAG.") offset = index * 5 # FIXME if offset >= len(self.m_attributes): logging.warning("Invalid attribute index") return offset def getAttributeCount(self): if self.m_event != self.START_TAG: return -1 return len(self.m_attributes) / self.ATTRIBUTE_LENGHT def getAttributePrefix(self, index): offset = self.getAttributeOffset(index) uri = self.m_attributes[offset + self.ATTRIBUTE_IX_NAMESPACE_URI] prefix = self.getPrefixByUri(uri) if prefix == -1: return "" return self.sb.getString(prefix) def getAttributeName(self, index): offset = self.getAttributeOffset(index) name = self.m_attributes[offset + self.ATTRIBUTE_IX_NAME] if name == -1: return "" return self.sb.getString(name) def getAttributeValueType(self, index): offset = self.getAttributeOffset(index) return self.m_attributes[offset + self.ATTRIBUTE_IX_VALUE_TYPE] def getAttributeValueData(self, index): offset = self.getAttributeOffset(index) return self.m_attributes[offset + self.ATTRIBUTE_IX_VALUE_DATA] def getAttributeValue(self, index): offset = self.getAttributeOffset(index) valueType = self.m_attributes[offset + self.ATTRIBUTE_IX_VALUE_TYPE] if valueType == self.TYPE_STRING: valueString = self.m_attributes[offset + self.ATTRIBUTE_IX_VALUE_STRING] return self.sb.getString(valueString) return ""