def do_build_apk_report(self, a): output = StringIO() a.get_files_types() output.write("[FILES] \n") for i in a.get_files(): try: output.write("\t%s %s %x\n" % (i, a.files[i], a.files_crc32[i], )) except KeyError: output.write("\t%s %x\n" % (i, a.files_crc32[i], )) output.write("\n[PERMISSIONS] \n") details_permissions = a.get_details_permissions() for i in details_permissions: output.write("\t%s %s\n" % (i, details_permissions[i], )) output.write("\n[MAIN ACTIVITY]\n\t%s\n" % (a.get_main_activity(), )) output.write("\n[ACTIVITIES] \n") activities = a.get_activities() for i in activities: filters = a.get_intent_filters("activity", i) output.write("\t%s %s\n" % (i, filters or "", )) output.write("\n[SERVICES] \n") services = a.get_services() for i in services: filters = a.get_intent_filters("service", i) output.write("\t%s %s\n" % (i, filters or "", )) output.write("\n[RECEIVERS] \n") receivers = a.get_receivers() for i in receivers: filters = a.get_intent_filters("receiver", i) output.write("\t%s %s\n" % (i, filters or "", )) output.write("\n[PROVIDERS]\n\t%s\n\n" % (a.get_providers(), )) vm = dvm.DalvikVMFormat(a.get_dex()) vmx = analysis.uVMAnalysis(vm) output.write("Native code : %s\n" % (analysis.is_native_code(vmx), )) output.write("Dynamic code : %s\n" % (analysis.is_dyn_code(vmx), )) output.write("Reflection code : %s\n" % (analysis.is_reflection_code(vmx), )) output.write("ASCII Obfuscation: %s\n\n" % (analysis.is_ascii_obfuscation(vm), )) for i in vmx.get_methods(): i.create_tags() if not i.tags.empty(): output.write("%s %s %s\n" % (i.method.get_class_name(), i.method.get_name(), i.tags, )) return output
def display_dvm_info(apk): vm = dvm.DalvikVMFormat(apk.get_dex()) vmx = analysis.uVMAnalysis(vm) print "Native code:", analysis.is_native_code(vmx) print "Dynamic code:", analysis.is_dyn_code(vmx) print "Reflection code:", analysis.is_reflection_code(vmx) print "Ascii Obfuscation:", analysis.is_ascii_obfuscation(vm) for i in vmx.get_methods(): i.create_tags() if not i.tags.empty(): print i.method.get_class_name(), i.method.get_name(), i.tags
def main(options, args) : print options.input print options.output if options.input == None or options.output == None : print "static_analysis.py -i <inputfile> -o <outputfolder>" sys.exit(2) else : ret_type = androconf.is_android( options.input ) if ret_type == "APK" : try : a = apk.APK(options.input, zipmodule=2) if a.is_valid_APK() : vm = dvm.DalvikVMFormat(a.get_dex()) vmx = analysis.uVMAnalysis(vm) data = { 'mainActivity' : a.get_main_activity(), 'activities' : a.get_activities(), 'providers' : a.get_providers(), 'receivers' : a.get_receivers(), 'services' : a.get_services(), 'androidVersion' : a.get_androidversion_code(), 'maxSdkVersion' : a.get_max_sdk_version(), 'minSdkVersion' : a.get_min_sdk_version(), 'targetSdkVersion' : a.get_target_sdk_version(), 'package' : a.get_package(), 'libraries' : a.get_libraries(), 'isCryptoCode' : analysis.is_crypto_code(vmx), 'isDynamicCode' : analysis.is_dyn_code(vmx), 'isNativeCode' : analysis.is_native_code(vmx), 'nativeMethodCount' : native_method_count(vm), 'isReflectionCode' : analysis.is_reflection_code(vmx), 'reflectionCount' : len(vmx.get_tainted_packages().search_methods("Ljava/lang/reflect/Method;", ".", ".")), 'isAsciiObfuscation' : analysis.is_ascii_obfuscation(vm), 'permissions' : a.get_permissions(), 'actualPermissions' : actual_permissions(vm, vmx), #'internalMethodCalls' : get_methods(vm.get_class_manager(), vmx.get_tainted_packages().get_internal_packages(), {}), 'externalMethodCalls' : get_methods(vm.get_class_manager(), vmx.get_tainted_packages().get_external_packages(), {}) } with io.open(options.output + "/" + hashfile(options.input) + ".json", 'w', encoding='utf-8') as f: f.write(unicode(json.dumps(data, sort_keys=False, indent=2, separators=(',', ': '), ensure_ascii=False))) else : print "INVALID APK" except Exception, e : print "ERROR", e import traceback traceback.print_exc()
def extract_features(file_path): result = {} try: a = APK(file_path) d = DalvikVMFormat(a.get_dex()) dx = Analysis(d) vm = dvm.DalvikVMFormat(a.get_dex()) #vmx = analysis.uVMAnalysis(vm) d.set_vmanalysis(dx) d.set_decompiler(DecompilerDAD(d, dx)) except: return None result['android_version_code'] = a.get_androidversion_code() result['android_version_name'] = a.get_androidversion_name() result['max_sdk'] = a.get_max_sdk_version() result['min_sdk'] = a.get_min_sdk_version() #result['libraries'] = a.get_libraries() result['filename'] = a.get_filename() result['target_sdk'] = a.get_target_sdk_version() result['md5'] = hashlib.md5(a.get_raw()).hexdigest() result['sha256'] = hashlib.sha256(a.get_raw()).hexdigest() result['permissions'] = a.get_permissions() result['activities'] = a.get_activities() result['providers'] = a.get_providers() result['services'] = a.get_services() result['strings'] = d.get_strings() result['class_names'] = [c.get_name() for c in d.get_classes()] result['method_names'] = [m.get_name() for m in d.get_methods()] result['field_names'] = [f.get_name() for f in d.get_fields()] #result['is_native_code'] = 1 if analysis.is_native_code(dx) else 0 result['is_obfuscation'] = 1 if analysis.is_ascii_obfuscation(d) else 0 #result['is_crypto_code'] = 1 if analysis.is_crypto_code(dx) else 0 '''result['is_dyn_code'] = 1 if analysis.is_dyn_code(dx) else 0 result['is_reflection_code'] = 1 if analysis.is_reflection_code(vmx) else 0''' result['is_database'] = 1 if d.get_regex_strings(DB_REGEX) else 0 arr = [] s = a.get_elements("action", "name") for i in s: arr.append(i) result['intents'] = arr s_list = [] s_list.extend(result['class_names']) s_list.extend(result['method_names']) s_list.extend(result['field_names']) result['entropy_rate'] = entropy_rate(s_list) result['feature_vectors'] = {} # Search for the presence of api calls in a given apk result['feature_vectors']['api_calls'] = [] for call in API_CALLS: status = 1 if dx.get_method(call) else 0 result['feature_vectors']['api_calls'].append(status) # Search for the presence of permissions in a given apk result['feature_vectors']['permissions'] = [] for permission in PERMISSIONS: status = 1 if permission in result['permissions'] else 0 result['feature_vectors']['permissions'].append(status) #Search for the presence of intents in a given apk result['feature_vectors']['intents'] = [] n = len(INTENTS) m = len(result['intents']) for i in range(n): stri = INTENTS[i] flg = False for j in range(m): if stri in result['intents'][j]: flg = True break if flg: status = 1 else: status = 0 result['feature_vectors']['intents'].append(status) #Check for special strings in code result['feature_vectors']['special_strings'] = [] for word in SPECIAL_STRINGS: status = 1 if d.get_regex_strings(word) else 0 result['feature_vectors']['special_strings'].append(status) return result
def api_check(folder, APKname): if os.path.exists("result/" + folder + APKname + 'data/'): print(APKname + " Already scanned") return print("Starting apk:" + APKname) apk_start_time = time.time() RESULTdict = dict.fromkeys(RESULT_PARAMS, 0) ##отдельные словари для фич OtherDict = dict.fromkeys(('obfuscation', 'database'), 0) APIdict = dict.fromkeys((API_CALLS + API_ClASS), 0) permission_dict = dict.fromkeys(PERMISSIONS, 0) strings_dict = dict.fromkeys(API_SYSTEM_COMMANDS, 0) groupAPI_dict = dict.fromkeys(APIGROUPS, 0) ##№№№ #a-APK d[0]-DalvikVMFormat dx-Analysis try: a, d, dx = AnalyzeAPK(folder + APKname) except: print(" ERROR: Androguard parse error, skipping file") return ### temp = a.get_details_permissions() temp2 = a.get_declared_permissions_details() temp3 = a.get_uses_implied_permission_list() # ########TODO почитать про использование пермишинсов без запросов #### RESULTdict["APP_Name"] = APKname RESULTdict['folder'] = folder #methods = [] #подозрительные строки RESULTdict["warn_strings"] = [] strings = dx.get_strings_analysis() #w=d[0].get_strings() list_system_commands = read_system_commands(strings, API_SYSTEM_COMMANDS) for i in list_system_commands: #print(i) RESULTdict["warn_strings"].append(i) for i in list_system_commands: strings_dict[i] += 1 ### общая информация RESULTdict['permissions'] = a.get_permissions() RESULTdict['activities'] = a.get_activities() RESULTdict['providers'] = a.get_providers() RESULTdict['services'] = a.get_services() RESULTdict['libraries'] = a.get_libraries() RESULTdict['is_obfuscation'] = 1 if is_ascii_obfuscation(d[0]) else 0 RESULTdict['is_database'] = 1 if d[0].get_regex_strings(DB_REGEX) else 0 #TODO intents_analysis from new.py OtherDict['obfuscation'] = RESULTdict['is_obfuscation'] OtherDict['database'] = RESULTdict['is_database'] #permissions RESULTdict['warn_permissions'] = [] #RESULTdict['feature_vectors']['permissions'] = [] for permission in PERMISSIONS: if permission in RESULTdict['permissions']: RESULTdict['warn_permissions'].append(permission) permission_dict[permission] = 1 ########################################################################### #TODO подсчет групп АПИ и системных команд для вектора фич ########################################################################### #API RESULTdict['API_groups'] = [] external_classes = dx.get_external_classes() for i in external_classes: class_name = i.get_vm_class() methods_list = class_name.get_methods() for method in methods_list: a = '%s' % method.get_class_name().replace(';', '') b = '%s' % method.get_name() c = '%s' % method.get_descriptor() #TODO permission_api_name https://androguard.readthedocs.io/en/latest/api/androguard.core.analysis.html?highlight=permission#androguard.core.analysis.analysis.ExternalMethod.permission_api_name if b in API_CALLS: APIdict[b] += 1 ###TODO !!!нужна нормализация данных if a in API_ClASS: APIdict[a] += 1 temp = GroupAPI_Checker.checkAPIGroup(a.replace('/', '.')[1:], b) if (temp != None): groupAPI_dict[temp] += 1 RESULTdict['API_groups'].append(temp) ##запись общих параметров with open("result/" + 'API_CALLS.csv', 'a', encoding='utf8') as csvfile: fieldnames = (('APP_Name', 'folder') + API_CALLS + API_ClASS) writer = csv.DictWriter(csvfile, fieldnames=fieldnames, delimiter=";", lineterminator="\n") #writer.writeheader() tempDict = APIdict.copy() tempDict['APP_Name'] = APKname tempDict['folder'] = folder writer.writerow(tempDict) with open("result/" + 'OtherDict.csv', 'a', encoding='utf8') as csvfile: fieldnames = 'APP_Name', 'folder', 'obfuscation', 'database' writer = csv.DictWriter(csvfile, fieldnames=fieldnames, delimiter=";", lineterminator="\n") #writer.writeheader() tempDict = OtherDict.copy() tempDict['APP_Name'] = APKname tempDict['folder'] = folder writer.writerow(tempDict) with open("result/" + 'permission_dict.csv', 'a', encoding='utf8') as csvfile: fieldnames = ('APP_Name', 'folder') + PERMISSIONS writer = csv.DictWriter(csvfile, fieldnames=fieldnames, delimiter=";", lineterminator="\n") #writer.writeheader() tempDict = permission_dict.copy() tempDict['APP_Name'] = APKname tempDict['folder'] = folder writer.writerow(tempDict) with open("result/" + 'strings_dict.csv', 'a', encoding='utf8') as csvfile: fieldnames = ('APP_Name', 'folder') + API_SYSTEM_COMMANDS writer = csv.DictWriter(csvfile, fieldnames=fieldnames, delimiter=";", lineterminator="\n") #writer.writeheader() tempDict = strings_dict.copy() tempDict['APP_Name'] = APKname tempDict['folder'] = folder writer.writerow(tempDict) with open("result/" + 'groupAPI_dict.csv', 'a', encoding='utf8') as csvfile: fieldnames = ('APP_Name', 'folder') + APIGROUPS writer = csv.DictWriter(csvfile, fieldnames=fieldnames, delimiter=";", lineterminator="\n") #writer.writeheader() tempDict = groupAPI_dict.copy() tempDict['APP_Name'] = APKname tempDict['folder'] = folder writer.writerow(tempDict) with open("result/" + 'RESULTdict.csv', 'a', encoding='utf8') as csvfile: fieldnames = RESULT_PARAMS writer = csv.DictWriter(csvfile, fieldnames=fieldnames, delimiter=";", lineterminator="\n") #writer.writeheader() writer.writerow(RESULTdict) ##запись параметров данного приложения try: if os.path.exists("result/" + folder): os.mkdir('result/' + folder + APKname + 'data') else: os.mkdir('result/' + folder) os.mkdir('result/' + folder + APKname + 'data') except OSError: print("Создать директорию %s не удалось" % ('result/' + folder + APKname + 'data')) else: with open("result/" + folder + APKname + 'data/RESULT.csv', 'w', encoding='utf8') as csvfile: fieldnames = RESULT_PARAMS writer = csv.DictWriter(csvfile, fieldnames=fieldnames, delimiter=";", lineterminator="\n") writer.writeheader() writer.writerow(RESULTdict) with open("result/" + folder + APKname + 'data/OtherDict.csv', 'w', encoding='utf8') as csvfile: fieldnames = 'obfuscation', 'database' writer = csv.DictWriter(csvfile, fieldnames=fieldnames, delimiter=";", lineterminator="\n") writer.writeheader() writer.writerow(OtherDict) with open("result/" + folder + APKname + 'data/APIdict.csv', 'w', encoding='utf8') as csvfile: fieldnames = API_CALLS + API_ClASS writer = csv.DictWriter(csvfile, fieldnames=fieldnames, delimiter=";", lineterminator="\n") writer.writeheader() writer.writerow(APIdict) with open("result/" + folder + APKname + 'data/permission_dict.csv', 'w', encoding='utf8') as csvfile: fieldnames = PERMISSIONS writer = csv.DictWriter(csvfile, fieldnames=fieldnames, delimiter=";", lineterminator="\n") writer.writeheader() writer.writerow(permission_dict) with open("result/" + folder + APKname + 'data/strings_dict.csv', 'w', encoding='utf8') as csvfile: fieldnames = API_SYSTEM_COMMANDS writer = csv.DictWriter(csvfile, fieldnames=fieldnames, delimiter=";", lineterminator="\n") writer.writeheader() writer.writerow(strings_dict) with open("result/" + folder + APKname + 'data/groupAPI_dict.csv', 'w', encoding='utf8') as csvfile: fieldnames = APIGROUPS writer = csv.DictWriter(csvfile, fieldnames=fieldnames, delimiter=";", lineterminator="\n") writer.writeheader() writer.writerow(groupAPI_dict) print("APK done:{} ".format(time.time() - apk_start_time))
def extract_features(file_path): result = {} try: a = APK(file_path) d = DalvikVMFormat(a.get_dex()) dx = Analysis(d) vm = dvm.DalvikVMFormat(a.get_dex()) vmx = analysis.Analysis(vm) d.set_vmanalysis(dx) d.set_decompiler(DecompilerDAD(d, dx)) except Exception as e: print e return None result['android_version_code'] = a.get_androidversion_code() result['android_version_name'] = a.get_androidversion_name() result['max_sdk'] = a.get_max_sdk_version() result['min_sdk'] = a.get_min_sdk_version() result['libraries'] = a.get_libraries() result['filename'] = a.get_filename() result['target_sdk'] = a.get_target_sdk_version() result['md5'] = hashlib.md5(a.get_raw()).hexdigest() result['sha256'] = hashlib.sha256(a.get_raw()).hexdigest() result['permissions'] = a.get_permissions() result['activities'] = a.get_activities() result['providers'] = a.get_providers() result['services'] = a.get_services() result['strings'] = d.get_strings() result['class_names'] = [c.get_name() for c in d.get_classes()] result['method_names'] = [m.get_name() for m in d.get_methods()] result['field_names'] = [f.get_name() for f in d.get_fields()] # result['is_native_code'] = 1 if analysis.is_native_code(dx) else 0 result['is_obfuscation'] = 1 if analysis.is_ascii_obfuscation(d) else 0 # result['is_crypto_code'] = 1 if analysis.is_crypto_code(dx) else 0 # result['is_dyn_code'] = 1 if analysis.is_dyn_code(dx) else 0 # result['is_reflection_code'] = 1 if analysis.is_reflection_code(vmx) else 0 result['is_database'] = 1 if d.get_regex_strings(DB_REGEX) else 0 s_list = [] s_list.extend(result['class_names']) s_list.extend(result['method_names']) s_list.extend(result['field_names']) result['entropy_rate'] = entropy_rate(s_list) result['feature_vectors'] = {} result['feature_vectors']['api_calls'] = [] for call in API_CALLS: status = 1 if dx.get_method_by_name(".", call, ".") else 0 result['feature_vectors']['api_calls'].append(status) result['feature_vectors']['permissions'] = [] for permission in PERMISSIONS: status = 1 if permission in result['permissions'] else 0 result['feature_vectors']['permissions'].append(status) result['feature_vectors']['special_strings'] = [] for word in SPECIAL_STRINGS: status = 1 if d.get_regex_strings(word) else 0 result['feature_vectors']['special_strings'].append(status) result['feature_vectors']['others'] = [ # result['is_reflection_code'], # result['is_crypto_code'], # result['is_native_code'], result['is_obfuscation'], result['is_database'], # result['is_dyn_code'] ] return result
def extract_features(file_path): result = {} try: a = APK(file_path) d = DalvikVMFormat(a.get_dex()) dx = VMAnalysis(d) vm = dvm.DalvikVMFormat(a.get_dex()) vmx = analysis.uVMAnalysis(vm) d.set_vmanalysis(dx) d.set_decompiler(DecompilerDAD(d, dx)) except: return None result['android_version_code'] = a.get_androidversion_code() result['android_version_name'] = a.get_androidversion_name() result['max_sdk'] = a.get_max_sdk_version() result['min_sdk'] = a.get_min_sdk_version() result['libraries'] = a.get_libraries() result['filename'] = a.get_filename() result['target_sdk'] = a.get_target_sdk_version() result['md5'] = hashlib.md5(a.get_raw()).hexdigest() result['sha256'] = hashlib.sha256(a.get_raw()).hexdigest() result['permissions'] = a.get_permissions() result['activities'] = a.get_activities() result['providers'] = a.get_providers() result['services'] = a.get_services() #result['strings'] = d.get_strings() #result['class_names'] = [c.get_name() for c in d.get_classes()] #result['method_names'] = [m.get_name() for m in d.get_methods()] #result['field_names'] = [f.get_name() for f in d.get_fields()] class_names = [c.get_name() for c in d.get_classes()] method_names = [m.get_name() for m in d.get_methods()] field_names = [ f.get_name() for f in d.get_fields()] result['is_native_code'] = 1 if analysis.is_native_code(dx) else 0 result['is_obfuscation'] = 1 if analysis.is_ascii_obfuscation(d) else 0 result['is_crypto_code'] = 1 if analysis.is_crypto_code(dx) else 0 result['is_dyn_code'] = 1 if analysis.is_dyn_code(dx) else 0 result['is_reflection_code'] = 1 if analysis.is_reflection_code(vmx) else 0 result['is_database'] = 1 if d.get_regex_strings(DB_REGEX) else 0 s_list = [] #s_list.extend(result['class_names']) #s_list.extend(result['method_names']) #s_list.extend(result['field_names']) s_list.extend(class_names) s_list.extend(method_names) s_list.extend(method_names) result['entropy_rate'] = entropy_rate(s_list) result['feature_vectors'] = {} # Search for the presence of api calls in a given apk result['feature_vectors']['api_calls'] = [] for call in API_CALLS: status = 1 if dx.tainted_packages.search_methods(".", call, ".") else 0 result['feature_vectors']['api_calls'].append(status) # Search for the presence of permissions in a given apk result['feature_vectors']['permissions'] = [] for permission in PERMISSIONS: status = 1 if permission in result['permissions'] else 0 result['feature_vectors']['permissions'].append(status) result['feature_vectors']['special_strings'] = [] for word in SPECIAL_STRINGS: status = 1 if d.get_regex_strings(word) else 0 result['feature_vectors']['special_strings'].append(status) opt_seq = [] for m in d.get_methods(): for i in m.get_instructions(): opt_seq.append(i.get_name()) optngramlist = [tuple(opt_seq[i:i+NGRAM]) for i in xrange(len(opt_seq) - NGRAM)] optngram = Counter(optngramlist) optcodes = dict() tmpCodes = dict(optngram) #for k,v in optngram.iteritems(): # if v>=NGRAM_THRE: #optcodes[str(k)] = v # optcodes[str(k)] = 1 tmpCodes = sorted(tmpCodes.items(),key =lambda d:d[1],reverse=True) for value in tmpCodes[:NGRAM_THRE]: optcodes[str(value[0])] = 1 result['feature_vectors']['opt_codes'] = optcodes return result
def main(options, args) : print options.input if options.input == None or options.output == None : print "static_analysis.py -i <inputfile> -o <outputfolder>" sys.exit(2) elif db.static_features.find({"_id": hashfile(options.input)}, limit=1).count() == 1 : print "static analysis found.. skipping.." sys.exit(0) elif db.virustotal_features.find({"sha1": hashfile(options.input)}).count() == 0 : print "virus total metadata not found.. skipping.." sys.exit(0) elif db.virustotal_features.find({ "$or": [ { "positives": 0 }, { "positives": { "$gte": 35 } } ], "sha1": hashfile(options.input) }).count() == 0 : print "not clear enough benign or malicious.. skipping.." sys.exit(0) t_beginning = time.time() ret_type = androconf.is_android( options.input ) if ret_type == "APK" : try : a = apk.APK(options.input, zipmodule=2) if a.is_valid_APK() : vm = dvm.DalvikVMFormat(a.get_dex()) vmx = analysis.uVMAnalysis(vm) data = { '_id' : hashfile(options.input), 'validApk' : True, 'mainActivity' : a.get_main_activity(), 'activities' : a.get_activities(), 'providers' : a.get_providers(), 'receivers' : a.get_receivers(), 'services' : a.get_services(), 'androidVersion' : a.get_androidversion_code(), 'maxSdkVersion' : a.get_max_sdk_version(), 'minSdkVersion' : a.get_min_sdk_version(), 'targetSdkVersion' : a.get_target_sdk_version(), 'package' : a.get_package(), 'libraries' : a.get_libraries(), 'isCryptoCode' : analysis.is_crypto_code(vmx), 'isDynamicCode' : analysis.is_dyn_code(vmx), 'isNativeCode' : analysis.is_native_code(vmx), 'nativeMethodCount' : native_method_count(vm), 'isReflectionCode' : analysis.is_reflection_code(vmx), 'reflectionCount' : len(vmx.get_tainted_packages().search_methods("Ljava/lang/reflect/Method;", ".", ".")), 'isAsciiObfuscation' : analysis.is_ascii_obfuscation(vm), 'permissions' : a.get_permissions(), 'actualPermissions' : actual_permissions(vm, vmx), 'internalMethodCalls' : get_methods(vm.get_class_manager(), vmx.get_tainted_packages().get_internal_packages(), {}), 'externalMethodCalls' : get_methods(vm.get_class_manager(), vmx.get_tainted_packages().get_external_packages(), {}) } data['duration'] = time.time() - t_beginning db.static_features.insert(data) else : print "INVALID APK" data = { '_id' : hashfile(options.input), 'validApk' : False } db.static_features.insert(data) except Exception, e : print "ERROR", e import traceback traceback.print_exc()