def setUp(self): file_path = os.path.abspath( os.path.join(os.path.dirname(__file__), "..", 'data', 'test')) with zipfile.ZipFile(file_path, mode="r") as zf: data = zf.read('resources.arsc') self.arscobj = ARSCParser(data) self.package = self.arscobj.get_packages_names()[0]
def _init_arsc(self): ARSC_NAME = 'resources.arsc' try: with apkfile.ZipFile(self.apk_path, mode="r") as zf: if ARSC_NAME in zf.namelist(): data = zf.read(ARSC_NAME) self.arsc = ARSCParser(data) except Exception as e: raise e
def _init_app_icon(self): files = self.get_files() result = re.search(r':icon="@(.*?)"', self.get_org_manifest()) ids = '0x' + result.groups()[0].lower() try: with apkfile.ZipFile(self.apk_path, 'r') as z: data = z.read('resources.arsc') self.arscobj = ARSCParser(data) self.package = self.arscobj.get_packages_names()[0] datas = xmltodict.parse( self.arscobj.get_public_resources(self.package)) for item in datas['resources']['public']: if ids != item['@id']: continue for f in files: name = f['name'] if item['@type'] in name and item['@name'] in name: self.app_icon = name except Exception as ex: raise ex
class TestAPK(unittest.TestCase): def setUp(self): file_path = os.path.abspath( os.path.join(os.path.dirname(__file__), "..", 'data', 'test')) with zipfile.ZipFile(file_path, mode="r") as zf: data = zf.read('resources.arsc') self.arscobj = ARSCParser(data) self.package = self.arscobj.get_packages_names()[0] def test_get_packages_names(self): self.assertEqual(self.package, 'com.example.hellojni') def test_get_strings_resources(self): datas = xmltodict.parse( self.arscobj.get_strings_resources())['packages']['package'] self.assertEqual(datas['@name'], 'com.example.hellojni') strs = datas['locale']['resources']['string'] self.assertIn( OrderedDict([('@name', 'app_name'), ('#text', 'hellojni')]), strs) self.assertIn( OrderedDict([('@name', 'hello_world'), ('#text', 'Hello world!')]), strs) self.assertIn( OrderedDict([('@name', 'action_settings'), ('#text', 'Settings')]), strs) def test_get_id_resources(self): datas = xmltodict.parse(self.arscobj.get_id_resources(self.package)) self.assertEqual( OrderedDict([('@type', 'id'), ('@name', 'action_settings'), ('#text', 'false')]), datas['resources']['item']) def test_get_public_resources(self): datas = xmltodict.parse(self.arscobj.get_public_resources( self.package)) app_name = OrderedDict([('@type', 'string'), ('@name', 'app_name'), ('@id', '0x7f050000')]) self.assertIn(app_name, datas['resources']['public']) main = OrderedDict([('@type', 'menu'), ('@name', 'main'), ('@id', '0x7f070000')]) self.assertIn(main, datas['resources']['public']) # def test_get_bool_resources(self): # buff = minidom.parseString( # self.arscobj.get_bool_resources(self.package)).toprettyxml() # print(buff) # def test_get_integer_resources(self): # buff = minidom.parseString( # self.arscobj.get_integer_resources(self.package)).toprettyxml() # print(buff) # def test_get_color_resources(self): # buff = minidom.parseString( # self.arscobj.get_color_resources(self.package)).toprettyxml() # print(buff) def test_get_dimen_resources(self): datas = xmltodict.parse(self.arscobj.get_dimen_resources(self.package)) ahm = OrderedDict([('@name', 'activity_horizontal_margin'), ('#text', '16.0dip')]) self.assertIn(ahm, datas['resources']['dimen'])
class APK: def __init__(self, apk_path): self.apk_path = apk_path self.dex_files = None self.children = None self.manifest = None self.org_manifest = None self.strings = None # 16进制字符串 self.org_strings = None # 原始字符串 self.opcodes = None self.certs = {} self.arsc = None self.strings_refx = None self.app_icon = None self.methods = None self.trees = None # 代码结构序列字典 self.application = None self.main_activity = None self.mini_mani = None self.classes = None self.methods_refx = None # @staticmethod # def serialize_xml(org_xml): # if not org_xml: # return None # _xml = re.sub(r'\n', ' ', org_xml) # _xml = re.sub(r'"\s+?>', '">', _xml) # _xml = re.sub(r'>\s+?<', '><', _xml) # return _xml @staticmethod def serialize_xml(org_xml): _xml = '' try: soup = BeautifulSoup(org_xml, features='lxml-xml') _xml = re.sub(r'>[^<]+<', '><', soup.prettify()) except ExpatError: print(org_xml, e) except Exception as e: print(org_xml, e) return _xml def get_mini_mani(self): if not self.mini_mani: self.mini_mani = self.serialize_xml(self.get_org_manifest()) return self.mini_mani def get_main_activity(self): if not self.main_activity: self._init_main_activity() return self.main_activity def _init_main_activity(self): mani = self.get_mini_mani() ptn = r'<activity(.*?)android:name="([^"]*?)"[^<>]*?><intent-filter.*?<action android:name="android.intent.action.MAIN">.*?</activity>' result = re.search(ptn, mani) if result: self.main_activity = result.groups()[1] def get_application(self): if not self.application: self._init_application() return self.application def _init_application(self): mani = self.get_mini_mani() if not mani: return ptn = r'<application[^<>]*?:name="([^<>"]*?)"[^<>]*?>' result = re.search(ptn, mani) if result: self.application = result.groups()[0] def get_app_icon(self): if self.app_icon: return self.app_icon self._init_app_icon() return self.app_icon def _init_app_icon(self): files = self.get_files() result = re.search(r':icon="@(.*?)"', self.get_org_manifest()) ids = '0x' + result.groups()[0].lower() try: with apkfile.ZipFile(self.apk_path, 'r') as z: data = z.read('resources.arsc') self.arscobj = ARSCParser(data) self.package = self.arscobj.get_packages_names()[0] datas = xmltodict.parse( self.arscobj.get_public_resources(self.package)) for item in datas['resources']['public']: if ids != item['@id']: continue for f in files: name = f['name'] if item['@type'] in name and item['@name'] in name: self.app_icon = name except Exception as ex: raise ex def get_trees(self, height=2, limit=5000): if self.trees is None: self._init_trees(height, limit) return self.trees @staticmethod def pretty_print(node): """漂亮地打印一个节点 Args: node (TYPE): Description """ for pre, _, node in RenderTree(node): print('{}{}'.format(pre, node.name)) def _init_trees(self, height, limit): if self.methods is None: self._init_methods(limit) if not self.methods: return root = Node('root') r = Resolver(pathattr='name') def find_node(path): """查找节点 Args: root (TYPE): Description path (TYPE): Description Returns: TYPE: Description """ try: return r.glob(root, path)[0] except Exception: return None def to_nodes(mtd): """把一个方法,转化成节点 Args: root (TYPE): 根节点 mtd (TYPE): Description Returns: TYPE: Node """ current = root node_path = '/root' for item in mtd.split('/'): node_path = node_path + '/' + item tnode = find_node(node_path) if tnode: current = tnode else: current = Node(item, parent=current) count = 0 # TODO 节点插入的顺序,决定了树的遍历顺序,及其计算结果 # 假设2个结构一样,但是,因为名字顺序不一样,导致插入顺序不一致 # 有可能导致一样的结构不一样的结果。 for mtd in self.methods: count += 1 to_nodes(mtd) def serialize_node(root_node): snum = '' for pre, _, node in RenderTree(root_node): snum = snum + str(node.height) return snum self.trees = {} for pre, _, node in RenderTree(root): if node.height > height: key = hash.hash(serialize_node(node), 'md5') if key in self.trees: self.trees[key].append(node) else: self.trees[key] = [node] def get_classes(self): if self.classes is None: self._init_classes() return self.classes def _init_classes(self): classes = set() if not self.dex_files: self._init_dex_files() for dex_file in self.dex_files: for dexClass in dex_file.classes: classes.add(dexClass.name) self.classes = sorted(classes) def get_methods(self, limit=10000): """获取所有方法路径 com/a/b/mtd_name Returns: TYPE: set """ if self.methods is None: self._init_methods(limit) return self.methods def _init_methods(self, limit=10000): """初始化方法 某些APK可能存在大量的方法,可能会相当耗时,根据情况加限制 Args: limit (int, optional): 方法数量限制,超过该值,则不获取方法 Returns: TYPE: 方法集合 """ methods = set() if not self.dex_files: self._init_dex_files() count = 0 for dex_file in self.dex_files: count += dex_file.method_ids.size if limit < count: return for dex_file in self.dex_files: for dexClass in dex_file.classes: try: dexClass.parseData() except IndexError: continue for method in dexClass.data.methods: clsname = method.id.cname.decode() mtdname = method.id.name.decode() methods.add(clsname + '/' + mtdname) self.methods = sorted(methods) def _init_strings_refx(self): if not self.dex_files: self._init_dex_files() self.strings_refx = {} for dex_file in self.dex_files: for dexClass in dex_file.classes: try: dexClass.parseData() except IndexError: continue for method in dexClass.data.methods: if not method.code: continue for bc in method.code.bytecode: # 1A const-string # 1B const-string-jumbo if bc.opcode not in {26, 27}: continue if method.id.cname is None: continue clsname = method.id.cname.decode() mtdname = method.id.name.decode() dexstr = dex_file.string(bc.args[1]) if clsname in self.strings_refx: if mtdname in self.strings_refx[clsname]: self.strings_refx[clsname][mtdname].add(dexstr) else: self.strings_refx[clsname][mtdname] = set() self.strings_refx[clsname][mtdname].add(dexstr) else: self.strings_refx[clsname] = {} self.strings_refx[clsname][mtdname] = set() self.strings_refx[clsname][mtdname].add(dexstr) def get_strings_refx(self): """获取字符串索引,即字符串被那些类、方法使用了。 :return: 字符串索引 :rtype: [dict] """ if self.strings_refx is None: self._init_strings_refx() return self.strings_refx def get_methods_refx(self): """获取方法索引,即方法被那些类、方法使用了。 :return: 方法索引 :rtype: [dict] """ if self.methods_refx is None: self._init_methods_refx() return self.methods_refx def _init_methods_refx(self): if not self.dex_files: self._init_dex_files() self.methods_refx = {} for dex_file in self.dex_files: for dexClass in dex_file.classes: try: dexClass.parseData() except IndexError: continue for method in dexClass.data.methods: if not method.code: continue for bc in method.code.bytecode: if bc.opcode not in INVOKE_OPCODES: continue clsname = method.id.cname.decode() mtdname = method.id.name.decode() method_id = dex_file.method_id(bc.args[0]) mtd_name = method_id.name mtd_cname = method_id.cname dexstr = mtd_cname + b'->' + mtd_name if clsname in self.methods_refx: if mtdname in self.methods_refx[clsname]: self.methods_refx[clsname][mtdname].add(dexstr) else: self.methods_refx[clsname][mtdname] = set() self.methods_refx[clsname][mtdname].add(dexstr) else: self.methods_refx[clsname] = {} self.methods_refx[clsname][mtdname] = set() self.methods_refx[clsname][mtdname].add(dexstr) def get_dex_files(self): if not self.dex_files: self._init_dex_files() return self.dex_files def _init_dex_files(self): self.dex_files = [] try: with apkfile.ZipFile(self.apk_path, 'r') as z: for name in z.namelist(): data = z.read(name) if name.startswith('classes') and name.endswith('.dex') \ and Magic(data).get_type() == 'dex': dex_file = DexFile(data) self.dex_files.append(dex_file) except Exception as ex: raise ex def get_strings(self): if not self.strings: self._init_strings() return self.strings def get_org_strings(self): if not self.org_strings: self._init_strings() return self.org_strings def _init_strings(self): if not self.dex_files: self._init_dex_files() str_set = set() org_str_set = set() for dex_file in self.dex_files: for i in range(dex_file.string_ids.size): ostr = dex_file.string(i) org_str_set.add(ostr) str_set.add(binascii.hexlify(ostr).decode()) self.strings = list(str_set) self.org_strings = list(org_str_set) def get_files(self): if not self.children: self._init_children() return self.children def _init_children(self): self.children = [] try: with apkfile.ZipFile(self.apk_path, mode="r") as zf: for name in zf.namelist(): try: data = zf.read(name) mine = Magic(data).get_type() info = zf.getinfo(name) except Exception as ex: print(name, ex) continue item = {} item["name"] = name item["type"] = mine item["time"] = "%d%02d%02d%02d%02d%02d" % info.date_time crc = str(hex(info.CRC)).upper()[2:] crc = '0' * (8 - len(crc)) + crc item["crc"] = crc # item["sha1"] = "" self.children.append(item) except Exception as e: raise e def get_org_manifest(self): if not self.org_manifest: self._init_manifest() return self.org_manifest def _init_org_manifest(self): ANDROID_MANIFEST = "AndroidManifest.xml" try: with apkfile.ZipFile(self.apk_path, mode="r") as zf: if ANDROID_MANIFEST in zf.namelist(): data = zf.read(ANDROID_MANIFEST) try: axml = AXML(data) if axml.is_valid: self.org_manifest = axml.get_xml() except Exception as e: raise e except Exception as e: raise e # fix manifest self.org_manifest = re.sub(r'\s:(="[\w]*?\.[\.\w]*")', r' android:name\1', self.org_manifest) def get_manifest(self): if not self.manifest: self._init_manifest() return self.manifest def _init_manifest(self): if not self.org_manifest: self._init_org_manifest() if self.org_manifest: try: self.manifest = xmltodict.parse(self.org_manifest, False)['manifest'] except xml.parsers.expat.ExpatError as e: pass except Exception as e: raise e def get_manifest_tag_numbers(self): """统计清单标签的个数 """ if not self.org_manifest: self._init_org_manifest() if self.org_manifest is None: print(self.apk_path, '无法解析清单') return tag_reg = r'<([\w\-\:]+)\s' tag_reg = r'<([\w\-\:]+)\s[^>]*?:name="([^"]*?)"' tag_ptn = re.compile(tag_reg) result = { 'uses-permission': 0, 'activity': 0, 'receiver': 0, 'service': 0, 'provider': 0, 'version_code': 0, } perms = set() for item in tag_ptn.finditer(self.org_manifest): name, value = item.groups() if name == 'uses-permission': if value.startswith('android.permission'): perms.add(value) elif 'activity' in name and name != 'activity-alias': result['activity'] += 1 elif 'receiver' in name: result['receiver'] += 1 elif 'service' in name: result['service'] += 1 elif 'provider' in name: result['provider'] += 1 result['uses-permission'] = len(perms) ptn = re.compile(r'android:versionCode="(\d+?)"') for item in ptn.finditer(self.org_manifest): value = item.groups()[0] if value.isdigit(): result['version_code'] = int(value) api = 4 target_sdk_ptn = re.compile(r'android:targetSdkVersion="(\d+?)"') match = target_sdk_ptn.search(self.org_manifest) if match: api = int(match.groups()[0]) else: min_sdk_ptn = re.compile(r'android:minSdkVersion="(\d+?)"') match = min_sdk_ptn.search(self.org_manifest) if match: api = int(match.groups()[0]) if api <= 3: # If both your minSdkVersion and targetSdkVersion values are set to 3 or lower, # the system implicitly grants your app these permissions if 'android.permission.READ_PHONE_STATE' in self.org_manifest: result['uses-permission'] += 1 if 'android.permission.WRITE_EXTERNAL_STORAGE' in self.org_manifest: result['uses-permission'] += 1 return result def _init_arsc(self): ARSC_NAME = 'resources.arsc' try: with apkfile.ZipFile(self.apk_path, mode="r") as zf: if ARSC_NAME in zf.namelist(): data = zf.read(ARSC_NAME) self.arsc = ARSCParser(data) except Exception as e: raise e def get_arsc(self): if not self.arsc: self._init_arsc() return self.arsc def get_certs(self, digestalgo='md5'): if digestalgo not in self.certs: self._init_certs(digestalgo) return self.certs[digestalgo] def _init_certs(self, digestalgo): try: with apkfile.ZipFile(self.apk_path, mode="r") as zf: for name in zf.namelist(): if name.startswith('META-INF/') and name.endswith( ('.DSA', '.RSA')): data = zf.read(name) mine = Magic(data).get_type() if mine != 'txt': from apkutils.cert import Certificate cert = Certificate(data, digestalgo=digestalgo) self.certs[digestalgo] = cert.get() except Exception as e: raise e def get_opcodes(self): if not self.dex_files: self._init_opcodes() return self.opcodes def _init_opcodes(self): if not self.dex_files: self._init_dex_files() self.opcodes = [] for dex_file in self.dex_files: for dexClass in dex_file.classes: try: dexClass.parseData() except IndexError: continue for method in dexClass.data.methods: opcodes = "" if method.code: for bc in method.code.bytecode: opcode = str(hex(bc.opcode)).upper()[2:] if len(opcode) == 2: opcodes = opcodes + opcode else: opcodes = opcodes + "0" + opcode proto = self.get_proto_string(method.id.return_type, method.id.param_types) item = {} item['super_class'] = dexClass.super.decode() item['class_name'] = method.id.cname.decode() item['method_name'] = method.id.name.decode() item['method_desc'] = method.id.desc.decode() item['proto'] = proto item['opcodes'] = opcodes self.opcodes.append(item) @staticmethod def get_proto_string(return_type, param_types): proto = return_type.decode() if len(proto) > 1: proto = 'L' for item in param_types: param_type = item.decode() proto += 'L' if len(param_type) > 1 else param_type return proto
def resources(self): with zipfile.ZipFile(self.apk_path, mode="r") as zf: data = zf.read('resources.arsc') return ARSCParser(data)
class APK: def __init__(self, apk_path): self.apk_path = apk_path self.dex_files = None self.children = None self.manifest = None self.org_manifest = None self.strings = None self.org_strings = None self.opcodes = None self.certs = [] self.arsc = None self.strings_refx = None self.app_icon = None def get_app_icon(self): if self.app_icon: return self.app_icon self._init_app_icon() return self.app_icon def _init_app_icon(self): files = self.get_files() result = re.search(r':icon="@(.*?)"', self.get_org_manifest()) ids = '0x' + result.groups()[0].lower() try: with apkfile.ZipFile(self.apk_path, 'r') as z: data = z.read('resources.arsc') self.arscobj = ARSCParser(data) self.package = self.arscobj.get_packages_names()[0] datas = xmltodict.parse( self.arscobj.get_public_resources(self.package)) for item in datas['resources']['public']: if ids != item['@id']: continue for f in files: name = f['name'] if item['@type'] in name and item['@name'] in name: self.app_icon = name except Exception as ex: raise ex def _init_strings_refx(self): if not self.dex_files: self._init_dex_files() self.strings_refx = {} for dex_file in self.dex_files: for dexClass in dex_file.classes: try: dexClass.parseData() except IndexError: continue for method in dexClass.data.methods: if not method.code: continue for bc in method.code.bytecode: # 1A const-string # 1B const-string-jumbo if bc.opcode not in {26, 27}: continue clsname = method.id.cname.decode() mtdname = method.id.name.decode() dexstr = dex_file.string(bc.args[1]) if clsname in self.strings_refx: if mtdname in self.strings_refx[clsname]: self.strings_refx[clsname][mtdname].add(dexstr) else: self.strings_refx[clsname][mtdname] = set() self.strings_refx[clsname][mtdname].add(dexstr) else: self.strings_refx[clsname] = {} self.strings_refx[clsname][mtdname] = set() self.strings_refx[clsname][mtdname].add(dexstr) def get_strings_refx(self): """获取字符串索引,即字符串被那些类、方法使用了。 :return: 字符串索引 :rtype: [dict] """ if self.strings_refx is None: self._init_strings_refx() return self.strings_refx def get_dex_files(self): if not self.dex_files: self._init_dex_files() return self.dex_files def _init_dex_files(self): self.dex_files = [] try: with apkfile.ZipFile(self.apk_path, 'r') as z: for name in z.namelist(): data = z.read(name) if name.startswith('classes') and name.endswith('.dex') \ and Magic(data).get_type() == 'dex': dex_file = DexFile(data) self.dex_files.append(dex_file) except Exception as ex: raise ex def get_strings(self): if not self.strings: self._init_strings() return self.strings def get_org_strings(self): if not self.org_strings: self._init_strings() return self.org_strings def _init_strings(self): if not self.dex_files: self._init_dex_files() str_set = set() org_str_set = set() for dex_file in self.dex_files: for i in range(dex_file.string_ids.size): ostr = dex_file.string(i) org_str_set.add(ostr) str_set.add(binascii.hexlify(ostr).decode()) self.strings = list(str_set) self.org_strings = list(org_str_set) def get_files(self): if not self.children: self._init_children() return self.children def _init_children(self): self.children = [] try: with apkfile.ZipFile(self.apk_path, mode="r") as zf: for name in zf.namelist(): try: data = zf.read(name) mine = Magic(data).get_type() info = zf.getinfo(name) except Exception as ex: print(name, ex) continue item = {} item["name"] = name item["type"] = mine item["time"] = "%d%02d%02d%02d%02d%02d" % info.date_time crc = str(hex(info.CRC)).upper()[2:] crc = '0' * (8 - len(crc)) + crc item["crc"] = crc # item["sha1"] = "" self.children.append(item) except Exception as e: raise e def get_org_manifest(self): if not self.org_manifest: self._init_manifest() return self.org_manifest def _init_org_manifest(self): ANDROID_MANIFEST = "AndroidManifest.xml" try: with apkfile.ZipFile(self.apk_path, mode="r") as zf: if ANDROID_MANIFEST in zf.namelist(): data = zf.read(ANDROID_MANIFEST) try: axml = AXML(data) if axml.is_valid: self.org_manifest = axml.get_xml() except Exception as e: raise e except Exception as e: raise e def get_manifest(self): if not self.manifest: self._init_manifest() return self.manifest def _init_manifest(self): if not self.org_manifest: self._init_org_manifest() if self.org_manifest: try: self.manifest = xmltodict.parse(self.org_manifest, False)['manifest'] except xml.parsers.expat.ExpatError as e: pass except Exception as e: raise e def _init_arsc(self): ARSC_NAME = 'resources.arsc' try: with apkfile.ZipFile(self.apk_path, mode="r") as zf: if ARSC_NAME in zf.namelist(): data = zf.read(ARSC_NAME) self.arsc = ARSCParser(data) except Exception as e: raise e def get_arsc(self): if not self.arsc: self._init_arsc() return self.arsc def get_certs(self): if not self.certs: self._init_certs() return self.certs def _init_certs(self): try: with apkfile.ZipFile(self.apk_path, mode="r") as zf: for name in zf.namelist(): if 'META-INF' in name: data = zf.read(name) mine = Magic(data).get_type() if mine != 'txt': from apkutils.cert import Certificate cert = Certificate(data) self.certs = cert.get() except Exception as e: raise e def get_opcodes(self): if not self.dex_files: self._init_opcodes() return self.opcodes def _init_opcodes(self): if not self.dex_files: self._init_dex_files() self.opcodes = [] for dex_file in self.dex_files: for dexClass in dex_file.classes: try: dexClass.parseData() except IndexError: continue for method in dexClass.data.methods: opcodes = "" if method.code: for bc in method.code.bytecode: opcode = str(hex(bc.opcode)).upper()[2:] if len(opcode) == 2: opcodes = opcodes + opcode else: opcodes = opcodes + "0" + opcode proto = self.get_proto_string(method.id.return_type, method.id.param_types) item = {} item['super_class'] = dexClass.super.decode() item['class_name'] = method.id.cname.decode() item['method_name'] = method.id.name.decode() item['method_desc'] = method.id.desc.decode() item['proto'] = proto item['opcodes'] = opcodes self.opcodes.append(item) @staticmethod def get_proto_string(return_type, param_types): proto = return_type.decode() if len(proto) > 1: proto = 'L' for item in param_types: param_type = item.decode() proto += 'L' if len(param_type) > 1 else param_type return proto
class APK: def __init__(self, apk_path): self.apk_path = apk_path self.dex_files = None self.children = None self.manifest = None self.org_manifest = None self.strings = None self.org_strings = None self.opcodes = None self.certs = [] self.arsc = None self.strings_refx = None self.app_icon = None self.methods = None self.trees = None # 代码结构序列字典 self.application = None self.main_activity = None self.mini_mani = None self.classes = None @staticmethod def serialize_xml(org_xml): if not org_xml: return None _xml = re.sub(r'\n', ' ', org_xml) _xml = re.sub(r'"\s+?>', '">', _xml) _xml = re.sub(r'>\s+?<', '><', _xml) return _xml def get_mini_mani(self): if not self.mini_mani: self.mini_mani = self.serialize_xml(self.org_manifest) return self.mini_mani def get_main_activity(self): if not self.main_activity: self._init_main_activity() return self.main_activity def _init_main_activity(self): mani = self.get_mini_mani() ptn = r'<activity android:name="([^<>"]*?)">.*?<action android:name="android.intent.action.MAIN">.*?</activity>' result = re.search(ptn, mani) if result: self.main_activity = result.groups()[0] def get_application(self): if not self.application: self._init_application() return self.application def _init_application(self): mani = self.get_mini_mani() if not mani: return ptn = r'<application[^<>]*?:name="([^<>"]*?)"[^<>]*?>' result = re.search(ptn, mani) if result: self.application = result.groups()[0] def get_app_icon(self): if self.app_icon: return self.app_icon self._init_app_icon() return self.app_icon def _init_app_icon(self): files = self.get_files() result = re.search(r':icon="@(.*?)"', self.get_org_manifest()) ids = '0x' + result.groups()[0].lower() try: with apkfile.ZipFile(self.apk_path, 'r') as z: data = z.read('resources.arsc') self.arscobj = ARSCParser(data) self.package = self.arscobj.get_packages_names()[0] datas = xmltodict.parse( self.arscobj.get_public_resources(self.package)) for item in datas['resources']['public']: if ids != item['@id']: continue for f in files: name = f['name'] if item['@type'] in name and item['@name'] in name: self.app_icon = name except Exception as ex: raise ex def get_trees(self, height=2, limit=5000): if self.trees is None: self._init_trees(height, limit) return self.trees @staticmethod def pretty_print(node): """漂亮地打印一个节点 Args: node (TYPE): Description """ for pre, _, node in RenderTree(node): print('{}{}'.format(pre, node.name)) def _init_trees(self, height, limit): if self.methods is None: self._init_methods(limit) if not self.methods: return root = Node('root') r = Resolver(pathattr='name') def find_node(path): """查找节点 Args: root (TYPE): Description path (TYPE): Description Returns: TYPE: Description """ try: return r.glob(root, path)[0] except Exception: return None def to_nodes(mtd): """把一个方法,转化成节点 Args: root (TYPE): 根节点 mtd (TYPE): Description Returns: TYPE: Node """ current = root node_path = '/root' for item in mtd.split('/'): node_path = node_path + '/' + item tnode = find_node(node_path) if tnode: current = tnode else: current = Node(item, parent=current) count = 0 # TODO 节点插入的顺序,决定了树的遍历顺序,及其计算结果 # 假设2个结构一样,但是,因为名字顺序不一样,导致插入顺序不一致 # 有可能导致一样的结构不一样的结果。 for mtd in self.methods: count += 1 to_nodes(mtd) def serialize_node(root_node): snum = '' for pre, _, node in RenderTree(root_node): snum = snum + str(node.height) return snum self.trees = {} for pre, _, node in RenderTree(root): if node.height > height: key = hash.hash(serialize_node(node), 'md5') if key in self.trees: self.trees[key].append(node) else: self.trees[key] = [node] def get_classes(self): if self.classes is None: self._init_classes() return self.classes def _init_classes(self): classes = set() if not self.dex_files: self._init_dex_files() for dex_file in self.dex_files: for dexClass in dex_file.classes: classes.add(dexClass.name) self.classes = sorted(classes) def get_methods(self, limit=10000): """获取所有方法路径 com/a/b/mtd_name Returns: TYPE: set """ if self.methods is None: self._init_methods(limit) return self.methods def _init_methods(self, limit=10000): """初始化方法 某些APK可能存在大量的方法,可能会相当耗时,根据情况加限制 Args: limit (int, optional): 方法数量限制,超过该值,则不获取方法 Returns: TYPE: 方法集合 """ methods = set() if not self.dex_files: self._init_dex_files() count = 0 for dex_file in self.dex_files: count += dex_file.method_ids.size if limit < count: return for dex_file in self.dex_files: for dexClass in dex_file.classes: try: dexClass.parseData() except IndexError: continue for method in dexClass.data.methods: clsname = method.id.cname.decode() mtdname = method.id.name.decode() methods.add(clsname + '/' + mtdname) self.methods = sorted(methods) def _init_strings_refx(self): if not self.dex_files: self._init_dex_files() self.strings_refx = {} for dex_file in self.dex_files: for dexClass in dex_file.classes: try: dexClass.parseData() except IndexError: continue for method in dexClass.data.methods: if not method.code: continue for bc in method.code.bytecode: # 1A const-string # 1B const-string-jumbo if bc.opcode not in {26, 27}: continue clsname = method.id.cname.decode() mtdname = method.id.name.decode() dexstr = dex_file.string(bc.args[1]) if clsname in self.strings_refx: if mtdname in self.strings_refx[clsname]: self.strings_refx[clsname][mtdname].add(dexstr) else: self.strings_refx[clsname][mtdname] = set() self.strings_refx[clsname][mtdname].add(dexstr) else: self.strings_refx[clsname] = {} self.strings_refx[clsname][mtdname] = set() self.strings_refx[clsname][mtdname].add(dexstr) def get_strings_refx(self): """获取字符串索引,即字符串被那些类、方法使用了。 :return: 字符串索引 :rtype: [dict] """ if self.strings_refx is None: self._init_strings_refx() return self.strings_refx def get_dex_files(self): if not self.dex_files: self._init_dex_files() return self.dex_files def _init_dex_files(self): self.dex_files = [] try: with apkfile.ZipFile(self.apk_path, 'r') as z: for name in z.namelist(): data = z.read(name) if name.startswith('classes') and name.endswith('.dex') \ and Magic(data).get_type() == 'dex': dex_file = DexFile(data) self.dex_files.append(dex_file) except Exception as ex: raise ex def get_strings(self): if not self.strings: self._init_strings() return self.strings def get_org_strings(self): if not self.org_strings: self._init_strings() return self.org_strings def _init_strings(self): if not self.dex_files: self._init_dex_files() str_set = set() org_str_set = set() for dex_file in self.dex_files: for i in range(dex_file.string_ids.size): ostr = dex_file.string(i) org_str_set.add(ostr) str_set.add(binascii.hexlify(ostr).decode()) self.strings = list(str_set) self.org_strings = list(org_str_set) def get_files(self): if not self.children: self._init_children() return self.children def _init_children(self): self.children = [] try: with apkfile.ZipFile(self.apk_path, mode="r") as zf: for name in zf.namelist(): try: data = zf.read(name) mine = Magic(data).get_type() info = zf.getinfo(name) except Exception as ex: print(name, ex) continue item = {} item["name"] = name item["type"] = mine item["time"] = "%d%02d%02d%02d%02d%02d" % info.date_time crc = str(hex(info.CRC)).upper()[2:] crc = '0' * (8 - len(crc)) + crc item["crc"] = crc # item["sha1"] = "" self.children.append(item) except Exception as e: raise e def get_org_manifest(self): if not self.org_manifest: self._init_manifest() return self.org_manifest def _init_org_manifest(self): ANDROID_MANIFEST = "AndroidManifest.xml" try: with apkfile.ZipFile(self.apk_path, mode="r") as zf: if ANDROID_MANIFEST in zf.namelist(): data = zf.read(ANDROID_MANIFEST) try: axml = AXML(data) if axml.is_valid: self.org_manifest = axml.get_xml() except Exception as e: raise e except Exception as e: raise e def get_manifest(self): if not self.manifest: self._init_manifest() return self.manifest def _init_manifest(self): if not self.org_manifest: self._init_org_manifest() if self.org_manifest: try: self.manifest = xmltodict.parse( self.org_manifest, False)['manifest'] except xml.parsers.expat.ExpatError as e: pass except Exception as e: raise e def _init_arsc(self): ARSC_NAME = 'resources.arsc' try: with apkfile.ZipFile(self.apk_path, mode="r") as zf: if ARSC_NAME in zf.namelist(): data = zf.read(ARSC_NAME) self.arsc = ARSCParser(data) except Exception as e: raise e def get_arsc(self): if not self.arsc: self._init_arsc() return self.arsc def get_certs(self): if not self.certs: self._init_certs() return self.certs def _init_certs(self): try: with apkfile.ZipFile(self.apk_path, mode="r") as zf: for name in zf.namelist(): if 'META-INF' in name: data = zf.read(name) mine = Magic(data).get_type() if mine != 'txt': from apkutils.cert import Certificate cert = Certificate(data) self.certs = cert.get() except Exception as e: raise e def get_opcodes(self): if not self.dex_files: self._init_opcodes() return self.opcodes def _init_opcodes(self): if not self.dex_files: self._init_dex_files() self.opcodes = [] for dex_file in self.dex_files: for dexClass in dex_file.classes: try: dexClass.parseData() except IndexError: continue for method in dexClass.data.methods: opcodes = "" if method.code: for bc in method.code.bytecode: opcode = str(hex(bc.opcode)).upper()[2:] if len(opcode) == 2: opcodes = opcodes + opcode else: opcodes = opcodes + "0" + opcode proto = self.get_proto_string( method.id.return_type, method.id.param_types) item = {} item['super_class'] = dexClass.super.decode() item['class_name'] = method.id.cname.decode() item['method_name'] = method.id.name.decode() item['method_desc'] = method.id.desc.decode() item['proto'] = proto item['opcodes'] = opcodes self.opcodes.append(item) @staticmethod def get_proto_string(return_type, param_types): proto = return_type.decode() if len(proto) > 1: proto = 'L' for item in param_types: param_type = item.decode() proto += 'L' if len(param_type) > 1 else param_type return proto