def find_method_used(self): """ funzione per ricercare i metodi che sono usati all'interno dell'apk, tanto lenta """ used_jadx = False if used_jadx: # Create DalvikVMFormat Object self.dalvik_format = DalvikVMFormat(self.apk) # Create Analysis Object self.analysis_object = Analysis(self.dalvik_format) # Load the decompiler # Make sure that the jadx executable is found in $PATH # or use the argument jadx="/path/to/jadx" to point to the executable decompiler = DecompilerJADX(self.dalvik_format, self.analysis_object) # propagate decompiler and analysis back to DalvikVMFormat self.dalvik_format.set_decompiler(decompiler) self.dalvik_format.set_vmanalysis(self.analysis_object) # Now you can do stuff like: list_method_analysis = self.analysis_object.get_methods() for method_analys in list_method_analysis: method_name = method_analys.get_method().get_name() # print(method_encoded.get_method().get_source()) self.method[method_name] = list(method_analys.get_xref_from()) elif self.use_analyze: # return apk, list dex , object analysis apk, self.dalvik_format, self.analysis_object = AnalyzeAPK( self.name_apk) for method_analys in self.analysis_object.get_methods(): method_name = method_analys.get_method().get_name() # from method_name get list dove esso viene chiamato self.method[method_name] = list(method_analys.get_xref_from()) elif self.use_smaliparser: # use smali parser, apktool and grep invece di Androguard dir_apk_tool = "temp_dir_" + self.name_only_apk + "/" list_method_to_analyze = self.conf["method_smali_parser"] self.method_2_value, self.all_url = smaliparser.start( dir_apk_tool, list_method_to_analyze) else: # TODO to make faster analysis but not work well self.dalvik_format = DalvikVMFormat(self.apk) for encoded_method in self.dalvik_format.get_methods(): method_analysis = MethodClassAnalysis(encoded_method) method_name = method_analysis.get_method().get_name() # print(method_name) # from method_name get list dove esso viene chiamato self.method[method_name] = list( method_analysis.get_xref_from())
def __analyze_dex(self, dex_file, raw=False): # DalvikVMFormat dalvik_vm_format = None if raw == False: dalvik_vm_format = DalvikVMFormat(open(dex_file, "rb").read()) else: dalvik_vm_format = DalvikVMFormat(dex_file) # VMAnalysis vm_analysis = VMAnalysis(dalvik_vm_format) dalvik_vm_format.set_vmanalysis(vm_analysis) return vm_analysis
def run(self): for apk in self.apks_list: api_sequence = [] ret = {'apk': apk, 'apis': []} try: with ZipFile(apk) as zipfile: # find .dex files inside apk dexes = [ dex for dex in zipfile.namelist() if dex.endswith('.dex') ] for dex in dexes: # for every dex extract api sequence with zipfile.open(dex) as dexfile: d = DalvikVMFormat(dexfile.read()) api_sequence += self.get_api_sequence(d) # send apk's api sequence to the main process ret['apis'] = api_sequence self.queue.put(ret) print( 'Process %d: %.1f%%' % (self.process_id, ((self.apks_list.index(apk) + 1) / self.total_apks) * 100)) except BadZipfile as e: self.queue.put(ret) print('Bad zip file =========> %s' % apk) except Exception as e: self.queue.put(ret) print('\n%s\n%s\n' % (apk, e)) self.queue.close() print('----------------> Process %d is done!' % self.process_id)
def testClassManager(self): """Test if the classmanager has the same items""" from androguard.core.bytecodes.mutf8 import decode, patch_string fname = "examples/android/TestsAndroguard/bin/classes.dex" parsed = parse_dex.read_dex(fname) with open(fname, "rb") as f: d = DalvikVMFormat(f.read()) cm = d.get_class_manager() self.assertFalse(cm.get_odex_format()) ERR_STR = 'AG:IS: invalid string' ## Testing Strings... for idx in range(parsed.string_ids_size): self.assertNotEqual(cm.get_string(idx), ERR_STR) self.assertNotEqual(cm.get_raw_string(idx), ERR_STR) self.assertEqual(cm.get_raw_string(idx), patch_string(decode(parsed.str_raw[idx]))) self.assertEqual(cm.get_string(parsed.string_ids_size), ERR_STR) self.assertEqual(cm.get_raw_string(parsed.string_ids_size), ERR_STR) self.assertEqual(cm.get_string(parsed.string_ids_size + 100), ERR_STR) self.assertEqual(cm.get_raw_string(parsed.string_ids_size + 100), ERR_STR)
def testcode(self): skipped_methods = [] fname = "examples/android/TestsAndroguard/bin/classes.dex" parsed = parse_dex.read_dex(fname) with open(fname, "rb") as f: d = DalvikVMFormat(f.read()) dif = Differ() for m in d.get_methods(): if not m.get_code(): continue if m.get_method_idx() in skipped_methods: continue code = hexlify(m.get_code().get_raw()) self.assertEqual( parsed.methods[m.get_method_idx()], code, "incorrect code for " "[{}]: {} --> {}:\n" "{}\ntries_size: {}, insns_size: {}\nSHOULD BE {}\n{}\n{}". format( m.get_method_idx(), m.get_class_name(), m.get_name(), "".join( dif.compare(parsed.methods[m.get_method_idx()], code)), m.get_code().tries_size, m.get_code().insns_size, hexlify(m.get_code().get_raw()), parsed.methods[m.get_method_idx()], hexlify(m.get_code().code.get_raw())))
def test(app_path): if not app_path: return False if not os.path.exists(app_path): return False app_apk = APK(app_path) dvm = DalvikVMFormat(app_apk.get_dex()) receivers = app_apk.get_receivers() activities = app_apk.get_activities() services = app_apk.get_services() for activity in activities: if not check_class_in_dex(dvm, activity): return True for receiver in receivers: if not check_class_in_dex(dvm, receiver): return True for service in services: if not check_class_in_dex(dvm, service): return True return False
def From_apk_to_Data_Section(path_of_apk): apk = path_of_apk apk_obj = APK(apk) dalvik_obj = DalvikVMFormat(apk_obj) return dalvik_obj.get_buff( )[dalvik_obj.header. data_off:] # using the offset from the start of file to start of the data section we get can get the data section
def run(self): unique_apis = [] for apk in self.apks_list: try: with ZipFile(apk) as zipfile: # find .dex files inside apk dexes = [dex for dex in zipfile.namelist() if dex.endswith('.dex')] dx = Analysis() # analyze every .dex for dex in dexes: with zipfile.open(dex) as dexfile: d = DalvikVMFormat(dexfile.read()) dx.add(d) # creates cross references between classes, methods, etc. for all the .dex dx.create_xref() # extracting android apis apis = self.get_api_calls(dx) not_unique = unique_apis + apis unique_apis = list(np.unique(not_unique)) print('Process %d: %.1f%%' % (self.process_id, ((self.apks_list.index(apk) + 1) / self.total_apks) * 100)) except BadZipfile as e: print('Bad zip file =========> %s' % apk) except Exception as e: print('\n%s\n%s\n' % (apk, e)) self.queue.put(unique_apis) print('----------------> Process %d is done!' % self.process_id)
def get_apis(path): application = APK(path) application_dex = DalvikVMFormat(application.get_dex()) application_x = Analysis(application_dex) methods = set() cs = [cc.get_name() for cc in application_dex.get_classes()] for method in application_dex.get_methods(): g = application_x.get_method(method) if method.get_code() == None: continue for i in g.get_basic_blocks().get(): for ins in i.get_instructions(): output = ins.get_output() match = re.search(r'(L[^;]*;)->[^\(]*\([^\)]*\).*', output) if match and match.group(1) not in cs: methods.add(match.group()) methods = list(methods) return methods
def on_complete(self): receivers = self.get_results("apkinfo", {}).get("manifest", {}).get("receivers", {}) activities = self.get_results("apkinfo", {}).get("manifest", {}).get("activities", {}) services = self.get_results("apkinfo", {}).get("manifest", {}).get("services", {}) app_path = self.get_results("target",{}).get("file",{}).get("path", None) if not app_path: return False if not os.path.exists(app_path): return False app_apk = APK(app_path) dvm = DalvikVMFormat(app_apk.get_dex()) classes = set() for cls in dvm.get_classes(): classes.add(cls.name) for receiver in receivers: if self.convert_class(receiver) not in classes: return True for activity in activities: if self.convert_class(activity) not in classes: return True for service in services: if self.convert_class(service) not in classes: return True
def run(self): """Run androguard to extract static android information @return: list of static features """ self.key = "apkinfo" apkinfo = {} if "file" not in self.task["category"] or not HAVE_ANDROGUARD: return f = File(self.task["target"]) if f.get_name().endswith((".zip", ".apk")) or "zip" in f.get_type(): if not os.path.exists(self.file_path): raise CuckooProcessingError( "Sample file doesn't exist: \"%s\"" % self.file_path) try: a = APK(self.file_path) if a.is_valid_APK(): manifest = {} apkinfo["files"] = self._apk_files(a) manifest["package"] = a.get_package() # manifest["permissions"]=a.get_details_permissions_new() manifest["main_activity"] = a.get_main_activity() manifest["activities"] = a.get_activities() manifest["services"] = a.get_services() manifest["receivers"] = a.get_receivers() # manifest["receivers_actions"]=a.get__extended_receivers() manifest["providers"] = a.get_providers() manifest["libraries"] = a.get_libraries() apkinfo["manifest"] = manifest # apkinfo["certificate"] = a.get_certificate() static_calls = {} if self.check_size(apkinfo["files"]): vm = DalvikVMFormat(a.get_dex()) vmx = uVMAnalysis(vm) static_calls["all_methods"] = self.get_methods(vmx) static_calls[ "is_native_code"] = analysis.is_native_code(vmx) static_calls["is_dynamic_code"] = analysis.is_dyn_code( vmx) static_calls[ "is_reflection_code"] = analysis.is_reflection_code( vmx) # static_calls["dynamic_method_calls"]= analysis.get_show_DynCode(vmx) # static_calls["reflection_method_calls"]= analysis.get_show_ReflectionCode(vmx) # static_calls["permissions_method_calls"]= analysis.get_show_Permissions(vmx) # static_calls["crypto_method_calls"]= analysis.get_show_CryptoCode(vmx) # static_calls["native_method_calls"]= analysis.get_show_NativeMethods(vmx) else: log.warning("Dex size bigger than: %s", self.options.decompilation_threshold) apkinfo["static_method_calls"] = static_calls except (IOError, OSError, BadZipfile) as e: raise CuckooProcessingError("Error opening file %s" % e) return apkinfo
def extract_calls_from_dex(self, dex): """ Using androguard extracts the method calls from the Dalvik Bytecode. Returns a generator. """ instructions = chain(*(m.get_instructions() for m in DalvikVMFormat(dex).get_methods() if not m.get_class_name()[1:].startswith("android"))) return (self.parse_method_signature(i.get_translated_kind()) for i in instructions if self.is_interesting_instruction(i))
def From_apk_to_code_item_str(path_of_apk): apk = path_of_apk apk_obj = APK(apk) dalvik_obj = DalvikVMFormat(apk_obj) code_item = dalvik_obj.get_codes_item() code_item_str = code_item.show( ) #Required manuel modification of package .show() return code_item_str
def __native_from_androguard(self, dex_file_path): with open(dex_file_path, 'rb') as dex_file: dalvik = DalvikVMFormat(dex_file.read()) return list( filter( lambda method: method.get_access_flags_string() == "public native", dalvik.get_methods()))
def extract_permissions(file): a = APK(file) d = DalvikVMFormat(a.get_dex()) dx = VMAnalysis(d) vm = dvm.DalvikVMFormat(a.get_dex()) vmx = analysis.uVMAnalysis(vm) d.set_vmanalysis(dx) d.set_decompiler(DecompilerDAD(d, dx)) return a.get_permissions()
def parse_apk(path): """ Parse an apk file to my custom bytecode output :param path: the path to the :rtype: string """ # Load our example APK a = APK(path) # Create DalvikVMFormat Object d = DalvikVMFormat(a) return parse_dalvik(d)
def extract_features(file_path): #result = [] try: a = APK(file_path) d = DalvikVMFormat(a.get_dex()) dx = Analysis(d) vm = dvm.DalvikVMFormat(a.get_dex()) vmx = analysis.uAnalysis(vm) d.set_Analysis(dx) d.set_decompiler(DecompilerDAD(d, dx)) except: return None return a.get_permissions() #it will return permission
def From_Apk_to_Dex_file_bytes(path_of_apk, mode=1): if mode == 1: apk = path_of_apk a, d, dx = AnalyzeAPK(apk) tobytes = d[0].get_buff() tobytes = bytes(tobytes) elif mode == 0: apk = path_of_apk apk_obj = APK(apk) dalvik_obj = DalvikVMFormat(apk_obj) tobytes = dalvik_obj.get_buff() tobytes = bytes(tobytes) return tobytes
def analyze_dex(filename, raw=False, decompiler=None): """ Analyze an android dex file and setup all stuff for a more quickly analysis ! :param filename: the filename of the android dex file or a buffer which represents the dex file :type filename: string :param raw: True is you would like to use a buffer (optional) :type raw: boolean :param decompiler: the type of decompiler to use ("dad", "dex2jad", "ded") :type decompiler: string :rtype: return the :class:`DalvikVMFormat`, and :class:`VMAnalysis` objects """ d = None if raw: d = DalvikVMFormat(filename) else: d = DalvikVMFormat(open(filename, "rb").read()) dx = analysis.Analysis(d) d.set_vmanalysis(dx) run_decompiler(d, dx, decompiler) dx.create_xref() return d, dx
def __init__(self, filename): self.filename = filename try: self.a = APK(filename) self.d = DalvikVMFormat(self.a.get_dex()) self.d.create_python_export() self.dx = Analysis(self.d) except zipfile.BadZipfile: # if file is not an APK, may be a dex object _, self.d, self.dx = AnalyzeDex(self.filename) self.d.set_vmanalysis(self.dx) self.dx.create_xref() self.fcg = self.build_fcg()
def decrypt_dex(data: bytes) -> Optional[DalvikVMFormat]: try: aes = AES.new(KEY) decrypted = aes.decrypt(data) zipfile = BytesIO(decrypted) zip_dict = extract_zip(zipfile) dex = zip_dict.get("classes.dex") if dex is None: return None return DalvikVMFormat(dex) except Exception as e: logger.exception(e) logger.error("Failed to decrypt!") return None
def __init__(self, args): self.apk = args.apk self.verbosity = args.verbosity self.output_location = args.output_location self.file_identifier = args.apk.split('.')[0] self.file_identifier = self.file_identifier[-24:] # print "Analyzing " + self.apk # print " Output Location " + self.output_location # print "File Identifier " + self.file_identifier # analyze the dex file print "From LOCATION = ", self.apk self.a = APK(self.apk) # get the vm analysis self.d = DalvikVMFormat(self.a.get_dex()) self.dx = VMAnalysis(self.d) self.gx = GVMAnalysis(self.dx, None) self.d.set_vmanalysis(self.dx) self.d.set_gvmanalysis(self.gx) # create the cross reference self.d.create_xref() self.d.create_dref() print 'CWD: ', os.getcwd() predictor = Predict_Input(self.output_location, self.file_identifier) self.predictions = predictor.predict(self.apk, self.apk[:-4], self.output_location, self.file_identifier) try: # get the classes for this apk # store them in a dict self.classes = self.get_class_dict() # Find the R$layout class self.Rlayout = self.get_RLayout(self.d.get_classes()) # Find the R$id class self.Rid = self.get_Rid(self.d.get_classes()) # Store all fields referenced in R$id self.fields, self.field_refs = self.get_fields(self.Rid) except Exception, e: print e
def AnalyzeAPK(_file, session=None, raw=False): """ Analyze an android application and setup all stuff for a more quickly analysis! If session is None, no session is used at all. This is the default behaviour. If you like to continue your work later, it might be a good idea to use a session. A default session can be created by using :meth:`~get_default_session`. :param _file: the filename of the android application or a buffer which represents the application :type _file: string (for filename) or bytes (for raw) :param session: A session (default: None) :param raw: boolean if raw bytes are supplied instead of a filename :rtype: return the :class:`~androguard.core.bytecodes.apk.APK`, list of :class:`~androguard.core.bytecodes.dvm.DalvikVMFormat`, and :class:`~androguard.core.analysis.analysis.Analysis` objects """ log.debug("AnalyzeAPK") if session: log.debug("Using existing session {}".format(session)) if raw: data = _file filename = hashlib.md5(_file).hexdigest() else: with open(_file, "rb") as fd: data = fd.read() filename = _file digest = session.add(filename, data) return session.get_objects_apk(filename, digest) else: log.debug("Analysing without session") a = APK(_file, raw=raw) # FIXME: probably it is not necessary to keep all DalvikVMFormats, as # they are already part of Analysis. But when using sessions, it works # this way... d = [] dx = Analysis() for dex in a.get_all_dex(): df = DalvikVMFormat(dex, using_api=a.get_target_sdk_version()) dx.add(df) d.append(df) df.set_decompiler(decompiler.DecompilerDAD(d, dx)) dx.create_xref() return a, d, dx
def parse_dex_proc(dex: DexFile): out = {} count = 0 try: df = DalvikVMFormat(dex.data) except Exception as e: return dex, count, e for cdef in df.get_classes(): count += 1 methods = parse_class_def(cdef) if not methods: continue className = methods[0].jclass out[className] = {} for m in methods: out[className].update(m.as_dict) return dex, count, out
def make_img(dirname, img_path): cnt = 0 size = int() filenames = os.listdir(dirname) for filename in filenames[:]: cnt += 1 full_filename = os.path.join(dirname, filename) if os.path.isdir(full_filename): make_img(full_filename, img_path) else: file_split = os.path.splitext(full_filename) if file_split[1] == '.apk': try: b = int() apk = APK(full_filename) dalvik = DalvikVMFormat(apk) code_item = dalvik.get_codes_item() code_item_str = code_item.show() binary = int(code_item_str, 16) size = int(((len( binary.to_bytes(int(len(code_item_str) / 2), 'big')) // 8))) size = int(math.sqrt(size)) photo_image = PIL.Image.frombytes( 'L', (size, size), binary.to_bytes(int(len(code_item_str) / 2), 'big')) split_name = os.path.splitext(filename) full_img_path = img_path + split_name[0] + '.jpg' print(full_img_path) photo_image.save(full_img_path) except Exception as ex: cnt -= 1 print(ex) continue else: pass print("processed :", cnt)
def __init__(self, filename): self.filename = filename # print(os.path.exists(filename)) # a,d,dx = AnalyzeAPK(filename) # print(dx.get_call_graph()) try: self.a = APK(filename) self.d = DalvikVMFormat(self.a.get_dex()) self.d.create_python_export() self.dx = Analysis(self.d) except zipfile.BadZipfile: # if file is not an APK, may be a dex object _, self.d, self.dx = AnalyzeDex(self.filename) self.d.set_vmanalysis(self.dx) self.dx.create_xref() self.fcg = self.build_fcg()
def __init__(self, apk_name): self.apk_name = apk_name self.apk = INPUT_APK_DIR + self.apk_name + ".apk" # analyze the dex file self.a = APK(self.apk) # get the vm analysis self.d = DalvikVMFormat(self.a.get_dex()) self.dx = VMAnalysis(self.d) self.gx = GVMAnalysis(self.dx, None) self.d.set_vmanalysis(self.dx) self.d.set_gvmanalysis(self.gx) # create the cross reference self.d.create_xref() self.d.create_dref()
def get_properties_SE(path): """ Get a list of custom properties for String Encryption :param path: the path to the :rtype: string """ a = APK(path) d = DalvikVMFormat(a) properties = np.array([]) total = 0 entropy = 0 words = {'total': 0, 'size': 0} length = 0 symbols = {'equals': 0, 'dashes': 0, 'slashes': 0, 'pluses': 0} rep_chars = 0 for s in d.get_strings(): total += 1 ent = stats.entropy(list(map(ord, list(s)))) if math.isnan(ent): print("-"*100) print(s) print("-"*100) ent = 0 entropy += ent for word in s.split(" "): words['total'] += 1 words['size'] += len(word) length += len(s) symbols['equals'] += s.count('=') symbols['dashes'] += s.count('/') symbols['slashes'] += s.count('-') symbols['pluses'] += s.count('+') rep_chars += count_repetitive_characters(s) properties = np.append(properties, entropy / total) properties = np.append(properties, words['size'] / words['total']) properties = np.append(properties, length / total) properties = np.append(properties, symbols['equals'] / total) properties = np.append(properties, symbols['dashes'] / total) properties = np.append(properties, symbols['slashes'] / total) properties = np.append(properties, symbols['pluses'] / total) properties = np.append(properties, rep_chars / total) return properties
def addDEX(self, filename, data, dx=None, postpone_xref=False): """ Add a DEX file to the Session and run analysis. :param filename: the (file)name of the DEX file :param data: binary data of the dex file :param dx: an existing Analysis Object (optional) :param postpone_xref: True if no xref shall be created, and will be called manually :return: A tuple of SHA256 Hash, DalvikVMFormat Object and Analysis object """ digest = hashlib.sha256(data).hexdigest() log.debug("add DEX:%s" % digest) log.debug("Parsing format ...") d = DalvikVMFormat(data) log.debug("added DEX:%s" % digest) self.analyzed_files[filename].append(digest) self.analyzed_digest[digest] = filename self.analyzed_dex[digest] = d if dx is None: dx = Analysis() dx.add(d) if not postpone_xref: dx.create_xref() # TODO: If multidex: this will called many times per dex, even if already set for d in dx.vms: # TODO: allow different decompiler here! d.set_decompiler(DecompilerDAD(d, dx)) d.set_vmanalysis(dx) self.analyzed_vms[digest] = dx if self.export_ipython: log.debug("Exporting in ipython") d.create_python_export() return digest, d, dx
def process_vm(self): """ Process the application's classes.dex Args: None Results: None """ # Make sure classes.dex exists if self.find_dex(): self.dex = self.apk.get_dex() # Analyze classes.dex # TODO Throw in a progress bar, this can take awhile if self.dex: self.logger.log("info", "Loading classes.dex ...") from androguard.core.bytecodes.dvm import DalvikVMFormat from androguard.core.analysis.analysis import VMAnalysis from androguard.core.analysis.ganalysis import GVMAnalysis # Create a new virtual machine instance self.vm = DalvikVMFormat(self.dex) if self.vm: print(self.t.yellow("\n\t--> Loaded classes.dex (!)\n")) self.logger.log("info", "Analyzing classes.dex ...") # Analyze the virtual machine instance self.vmx = VMAnalysis(self.vm) self.gmx = GVMAnalysis(self.vmx, None) if self.vmx and self.gmx: print(self.t.yellow("\n\t--> Analyzed classes.dex (!)\n")) self.vm.set_vmanalysis(self.vmx) self.vm.set_gvmanalysis(self.gmx) # Generate xref(s) self.vm.create_xref() self.vm.create_dref() else: CommandError("Cannot analyze VM instance (!)") else: CommandError("Cannot load VM instance (!)") else: CommandError("classes.dex not found (!)")