def execute_step_seven(class_index, exportCSV, export_mongodb, package_index, perform_nocleanup, save_single_analysis, source_folder, system_commands_index, with_color): """ STEP 7 - Features extraction """ print_message( "\n\n>>>> AndroPyTool -- STEP 7: Execute features extraction\n", with_color, "green") features_extractor( apks_directory=join_dir(source_folder, APKS_DIRECTORY), single_analysis=save_single_analysis, dynamic_analysis_folder=join_dir(source_folder, DYNAMIC_ANALYSIS_FOLDER), virus_total_reports_folder=join_dir(source_folder, VIRUSTOTAL_FOLDER), flowdroid_folder=join_dir(source_folder, FLOWDROID_PROCESSED_FOLDER), output_folder=join_dir(source_folder, FEATURES_FILES), noclean_up=perform_nocleanup, package_index_file=package_index, classes_index_file=class_index, system_commands_file=system_commands_index, label=None, avclass=True, export_mongodb=export_mongodb, export_csv=exportCSV)
def run_flowdroid(source_directory, output_folder, with_color=True): """ Executes flowdroid over a set of samples Parameters ---------- :param source_directory: Folder containing apk files :param output_folder: Folder where files generated by FlowDroid are saved :param with_color: If colors are used to print messages """ if not os.path.exists(source_directory): print print_message("Folder not found!", with_color, 'red') if not os.path.exists(output_folder): os.makedirs(output_folder) list_apks = [] for path, subdirs, files in os.walk(source_directory): for name in files: if name.endswith(".apk"): list_apks.append(os.path.join(path, name)) for apk_path in tqdm(list_apks): apk_id = os.path.basename(apk_path) print "RUNNING FLOWDROID FOR: " + str(apk_id) if os.path.isfile( join_dir(output_folder, apk_id.replace( ".apk", ".json"))) or "assets" in apk_id: continue print "TRUE TRUE TRUE" flowdroid_call = get_call_flowdroid(apk_path) origen_wd = os.getcwd() os.chdir(os.path.join(os.path.abspath(sys.path[0]), FLOWDROID_FOLDER)) process = subprocess.Popen( flowdroid_call, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) # , env={'PATH': FLOWDROID_FOLDER}) os.chdir(origen_wd) # get back to our original working directory timer = Timer(MAX_TIME_ANALYSIS * 59, kill, [process]) try: timer.start() stdout, stderr = process.communicate() finally: timer.cancel() with open(join_dir(output_folder, apk_id.replace(".apk", ".json")), "w") as output: output.write(str(stdout))
def execute_step_four(source_folder, step_run_flowdroid, with_color): """ STEP 4 - Launch FlowDroid """ if step_run_flowdroid: print_message("\n\n>>>> AndroPyTool -- STEP 4: Launching FlowDroid\n", with_color, "green") run_flowdroid(source_directory=join_dir(source_folder, APKS_DIRECTORY), output_folder=join_dir(source_folder, FLOWDROID_RESULTS_FOLDER), with_color=with_color) sleep(1)
def save_file(req, directory, file_name=False): """ Saving incoming file HTTP request to provided directory under original or provided filename :param req: Incoming file HTTP request :param directory: Directory where to save the file from incoming request :param file_name: Default is False. By default the file is saved under name extracted from file http request. :return: Dictionary """ if "file" not in req.files: raise ValueError("File expected!") file = req.files["file"] if file.filename == '': raise ValueError("No selected file") debug("File name from incoming request: %s" % file.filename) if not file_name: file_name = file.filename else: if "." not in file_name and "." not in file.filename: file_name = "%s.unknown" % file_name elif "." not in file_name and "." in file.filename: ext = file.filename.rsplit('.', 1)[1].lower() debug("Deducted file extensions: %s" % ext) file_name = "%s.%s" % (file_name, ext) name = join_dir(directory, file_name) debug("Saving file from incoming request to: %s" % name) file.save(name) return {"saved_name": file_name, "incoming_name": file.filename}
def analyze_apks(analyze_apk, source_directory): # Getting the name of the folder that contains all apks and folders with apks base_folder = source_directory.split("/")[-1] apk_filename = join_dir(base_folder, analyze_apk.replace(source_directory, '')) apk_filename = apk_filename.replace("//", "/") apk_name_no_extensions = "".join( apk_filename.split("/")[-1].split(".")[:-1]) try: androguard_apk_object = apk.APK(analyze_apk) except Exception: print "ERROR in APK: " + apk_name_no_extensions opcodes_analysis_dict = collections.OrderedDict() # Opcodes opcodes_analysis_dict['APK_name'] = apk_name_no_extensions opcodes_analysis_dict.update(opcodes_analysis(androguard_apk_object)) opcodes_analysis_dict['STR_Opcodes'] = get_str_opcodes( androguard_apk_object) return opcodes_analysis_dict
def analyse_virustotal(source_directory, vt_analysis_output_folder=None, output_samples_folder=None, with_color=True): """ Analyses a set of APK files with the VirusTotal service Parameters ---------- :param source_directory: Folder containing apk files :param vt_analysis_output_folder: Folder where VirusTotal reports are saved :param output_samples_folder: Folder where apk files are saved after analysed with VirusTotal :return: """ global key_manager key_manager = VT_KEY_manager() if vt_analysis_output_folder is None: vt_analysis_output_folder = join_dir(source_directory, VT_ANALYSIS_DIRECTORY_NAME) reports_not_received = 0 # TODO It is necessary to control when the directory could not be created (for instance if the folder is going to be # TODO created in a non existing directory if not os.path.exists(vt_analysis_output_folder): os.makedirs(vt_analysis_output_folder) if output_samples_folder is not None: if not os.path.exists(output_samples_folder): os.makedirs(output_samples_folder) apks_found = [ f for f in listdir(source_directory) if isfile(join(source_directory, f)) and ( not (f.endswith(".txt") or f.endswith(".tar"))) ] #apks_found = [i for i in os.walk(source_directory) if (len(i[2]) > 0 and i[2][0].endswith('.apk'))] for apk in tqdm(apks_found): if isfile(join(source_directory, apk + ".json")) or isfile( join(vt_analysis_output_folder, apk + ".json")): print_message("APK WITH JSON. CONTINUE...", with_color, 'green') continue apk_path = source_directory + apk reports_not_received += analyse_one(apk, apk_path, vt_analysis_output_folder, output_samples_folder, with_color) if reports_not_received > 0: print("WARNING! " + str(reports_not_received) + " apks does not have yet a VT analysis. Please" \ ", execute again this script after a while") else: print_message("SUCCESS!!", with_color, 'green') print( " All reports have been saved in the VT_ANALYSIS folder. APKS are in SAMPLES folder." )
def execute_step_three(source_folder, step_filter_bw_mw, vt_threshold, with_color): """ STEP 3 - Filtering BW & MW """ if step_filter_bw_mw: print_message("\n\n>>>> AndroPyTool -- STEP 3: Filtering BW and MW\n", with_color, "green") filter_apks(source_directory=join_dir(source_folder, APKS_DIRECTORY), vt_analysis_directory=join_dir(source_folder, VIRUSTOTAL_FOLDER), bw_directory_name=join_dir(source_folder, BW_DIRECTORY), mw_directory_name=join_dir(source_folder, MW_DIRECTORY), threshold=vt_threshold) sleep(1)
def upload_state(text): # seconds_since_epoch = int(datetime.utcnow().timestamp()) # filename = join_dir("site_states", f"{seconds_since_epoch}.json") filename = join_dir("site_states", f"last_state.json") with open(filename, "w") as file: file.write(text) color = '\x1b[47m' + '\x1b[31m' print(f"Saved -> {color}{filename}")
def execute_step_two(source_folder, virus_total_api_key, with_color): """ STEP 2 - Analyse with VirusTotal """ if virus_total_api_key is not None: print_message( "\n\n>>>> AndroPyTool -- STEP 2: Analysing with VirusTotal\n", with_color, "green") analyse_virustotal( source_directory=join_dir(source_folder, APKS_DIRECTORY), vt_analysis_output_folder=join_dir(source_folder, VIRUSTOTAL_FOLDER), output_samples_folder=join_dir(source_folder, APKS_DIRECTORY), with_color=with_color, vt_api_key=virus_total_api_key) sleep(1)
def run_DroidBox(input_dir, output_dir): if not os.path.exists(input_dir): print print_message("Folder not found!", with_color, 'red') if not os.path.exists(output_dir): os.makedirs(output_dir) list_apks = [] for path, subdirs, files in os.walk(input_dir): for name in files: if name.endswith(".apk"): list_apks.append(os.path.join(path, name)) for apk_path in list_apks: apk_id = os.path.basename(apk_path) print "RUNNING FLOWDROID FOR: " + str(apk_id) #if os.path.isfile(join_dir(output_dir, apk_id.replace(".apk", ".json"))) in apk_id: # continue print "TRUE TRUE TRUE" droidbox_call = get_call_DroidBox(apk_path) print droidbox_call origen_wd = os.getcwd() #change directory os.chdir("/home/android/droid/DroidBox_4.1.1") print os.getcwd() process = subprocess.Popen(droidbox_call, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) #--------command to execute DroidBox--------- #process = subprocess.Popen(droidbox_call, shell=True, stdout=subprocess.PIPE,stderr=subprocess.STDOUT) #stdout, stderr = process.communicate() # , env={'PATH': FLOWDROID_FOLDER}) os.chdir(origen_wd) # get back to our original working directory stdout, stderr = process.communicate() #no need for timer #timer = Timer(MAX_TIME_ANALYSIS * 60, kill, [process]) #try: # timer.start() # stdout, stderr = process.communicate() #finally: # timer.cancel() with open(join_dir(output_dir, apk_id.replace(".apk", ".json")), "w") as output: output.write(str(stdout)) print str(stdout) output.write(str(stderr)) print str(stderr)
def execute_step_five(source_folder, step_run_flowdroid, with_color): """ STEP 5 - Process FlowDroid outputs """ if step_run_flowdroid: print_message( "\n\n>>>> AndroPyTool -- STEP 5: Processing FlowDroid outputs\n", with_color, "green") process_flowdroid_outputs( flowdroid_analyses_folder=join_dir(source_folder, FLOWDROID_RESULTS_FOLDER), output_folder_individual_csv=join_dir(source_folder, FLOWDROID_PROCESSED_FOLDER), output_csv_file=join_dir(source_folder, FLOWDROID_PROCESSED_FOLDER, OUTPUT_GLOBAL_FILE_FLOWDROID), with_color=with_color) sleep(1)
def file_as_string(name): """ Encoding a file to Base64 format :param name: Name of a file to encode :return: String. String in Base64 format """ img_path = join_dir(app.instance_path, name) if not exists(img_path): raise ValueError("File %s doesn't exists" % img_path) with open(img_path, "rb") as img_file: return b64encode(img_file.read()).decode("ascii")
def execute_step_six(droidbox_time, source_folder, step_run_droidbox, with_color): """ STEP 6 - Execute DroidBox """ if step_run_droidbox: print_message("\n\n>>>> AndroPyTool -- STEP 6: Execute DroidBox\n", with_color, "green") analyze_with_droidbox(apks_folders=join_dir(source_folder, APKS_DIRECTORY), duration=droidbox_time, output_directory=join_dir( source_folder, DROIDBOX_RESULTS_FOLDER), gui=DROIDBOX_GUI_MODE) parse_droidbox_outputs( source_folder=join_dir(source_folder, DROIDBOX_RESULTS_FOLDER), output_droidbox=join_dir(source_folder, DYNAMIC_ANALYSIS_FOLDER, DYNAMIC_DROIDBOX_ANALYSIS), output_strace=join_dir(source_folder, DYNAMIC_ANALYSIS_FOLDER, DYNAMIC_STRACE_ANALYSIS), output_other=join_dir(source_folder, DROIDBOX_RESULTS_FOLDER)) # DroidBox changes the working directory, so let's set again the original directory: os.chdir(CURRENT_DIRECTORY)
def execute_step_one(source_folder, step_filter_apks, with_color): """ STEP 1 - Filter valid apks """ if step_filter_apks: print_message("\n\n>>>> AndroPyTool -- STEP 1: Filtering apks\n", with_color, "green") filter_valid_apks( source_directory=source_folder, valid_apks_directory=join_dir(source_folder, APKS_DIRECTORY), invalid_apks_directory=join_dir(source_folder, INVALID_APKS_DIRECTORY), with_color=with_color) sleep(1) else: # If this step is not executed, all samples must be moved to the /samples/ directory if not os.path.exists(join_dir(source_folder, APKS_DIRECTORY)): os.makedirs(join_dir(source_folder, APKS_DIRECTORY)) list_apks = [f for f in listdir(source_folder) if f.endswith(".apk")] for apk in list_apks: shutil.move(join_dir(source_folder, apk), join_dir(source_folder, APKS_DIRECTORY, apk))
def filter_apks(source_directory, vt_analysis_directory, bw_directory_name=None, mw_directory_name=None, threshold=1): """ Filter apks between malware and benignware based on the report received from VirusTotal Parameters ---------- :param source_directory: Folder containing apk files :param vt_analysis_directory: Folder containing reports received from VirusTotal :param bw_directory_name: Folder where benignware applications are moved to :param mw_directory_name: Folder where malware applications are moved to :param threshold: Minimum number of antivirus testing for positive to consider a sample as malicious. Default: 1 :return: """ files_apks = [ f for f in listdir(source_directory) if isfile(join(source_directory, f)) and f.endswith(".apk") ] if bw_directory_name is None: bw_directory_name = join_dir(source_directory, BW_DIRECTORY_NAME) if mw_directory_name is None: mw_directory_name = join_dir(source_directory, MW_DIRECTORY_NAME) if not os.path.exists(bw_directory_name): os.makedirs(bw_directory_name) if not os.path.exists(mw_directory_name): os.makedirs(mw_directory_name) for apk in tqdm(files_apks): app_id = apk.replace(".apk", "") json_id = join_dir(vt_analysis_directory, app_id + ".json") apk_path = join_dir(source_directory, apk) if not os.path.isfile(json_id): print 'ERROR! - NO VT ANALYSIS FOUND FOR APK: ' + app_id continue data_file = open(json_id) try: data = json.load(data_file) except ValueError: continue positives = data["positives"] if positives < threshold: shutil.move(apk_path, join_dir(bw_directory_name, apk)) if positives >= threshold: shutil.move(apk_path, join_dir(mw_directory_name, apk))
def __init__(self, **kwargs): self.__dict__ = kwargs self.file = abspath(argv[0]) self.script = basename(self.file) self.argv = ' '.join(argv[1:]) self.__dict__.update({ 'dir': dirname(self.file), 'image': getenv('BLAZ_IMAGE', 'amiorin/alpine-blaz'), 'docker_exe': self._find_docker_exe(), 'docker_sock': getenv('DOCKER_SOCK', '/var/run/docker.sock'), 'docker_options': getenv('DOCKER_OPTIONS', '--rm --privileged --net=host'), 'version': __version__ }) chdir(self.dir) if 'BLAZ_CHDIR_REL' in environ: self.mount_dir = abspath(join_dir(self.dir, environ['BLAZ_CHDIR_REL'])) else: self.mount_dir = self.dir self._create_lock()
def project_config(): """ Parsing file defined in PROJECT_CONFIG option of main application config. Otherwise trying to find project.cfg file :return: Dict. Each project type (i.e. subsection in config file) having options returned by project_parse_cfg_options function """ result = {} cfg_file = app.config.get("PROJECT_CONFIG", "project.cfg") cfg_path = join_dir(app.instance_path, cfg_file) if not exists(cfg_path): warning("Projects configuration file doesn't exists. Using defaults") return result cfg = ConfigParser() cfg.read(cfg_path) projects = cfg.sections() for project in projects: name = project.lower() result[name] = project_parse_cfg_options(cfg, project) debug("Project configuration: %s" % result) return result
from django.core.files.storage import FileSystemStorage from django.db import models from os.path import join as join_dir from django.conf import settings from hashlib import sha3_512 # Create your models here. paper_storage = FileSystemStorage(location=join_dir(settings.BASE_DIR, 'storage/')) def user_directory_path(instance, file_name): return f'{instance.group}/{file_name}' class Document(models.Model): group = models.ForeignKey('learners.Group', on_delete=models.CASCADE, related_name='document') title = models.CharField(max_length=250) paper = models.FileField(storage=paper_storage, upload_to=user_directory_path) information = models.CharField(max_length=250, null=True) date_uploaded = models.DateTimeField(auto_now_add=True, editable=False) date_updated = models.DateTimeField(auto_now=True) def delete(self, using=None, keep_parents=False): self.paper.storage.delete(self.paper.name) super().delete(using=using, keep_parents=keep_parents) def get_string(self): return f'{self.group}_{self.paper.name}' def verify(self, uploaded_file):
def filter_valid_apks(source_directory, valid_apks_directory=None, invalid_apks_directory=None, with_color=True): """ Analyses a set of Android apks with Androguard to filter valid and invalid samples If a JSON file with the same name that the app is found in the source directory, it is also moved Parameters ---------- :param source_directory: Folder containing apk files :param valid_apks_directory: Folder where valid apks are saved :param invalid_apks_directory: Folder where invalid apks are saved :param with_color: If colors are used to print messages """ if not isdir(source_directory): print "Folder not found!" sys.exit(0) if valid_apks_directory is None: valid_apks_directory = join_dir(source_directory, VALID_APKS_DIRECTORY) if invalid_apks_directory is None: invalid_apks_directory = join_dir(source_directory, INVALID_APKS_DIRECTORY) num_valid_apk = 0 num_invalid_apk = 0 files_apks = [ f for f in listdir(source_directory) if isfile(join(source_directory, f)) and f.endswith(".apk") ] print str(len(files_apks)) + " apks found. Processing..." if not os.path.exists(valid_apks_directory): os.makedirs(valid_apks_directory) if not os.path.exists(invalid_apks_directory): os.makedirs(invalid_apks_directory) for apk in tqdm(files_apks): if not apk.endswith(".apk"): shutil.move(join_dir(source_directory, apk), join_dir(source_directory, apk + ".apk")) apk += ".apk" json_file = apk.replace(".apk", ".json") try: apk_analysed = APK(join_dir(source_directory, apk)) valid_apk = apk_analysed.valid_apk except: valid_apk = False if valid_apk: if isfile(join(source_directory, json_file)): shutil.move(join_dir(source_directory, json_file), join_dir(valid_apks_directory, json_file)) shutil.move(join_dir(source_directory, apk), join_dir(valid_apks_directory, apk)) num_valid_apk += 1 else: if isfile(join(source_directory, json_file)): shutil.move(join_dir(source_directory, json_file), join_dir(invalid_apks_directory, json_file)) shutil.move(join_dir(source_directory, apk), join_dir(invalid_apks_directory, apk)) num_invalid_apk += 1 print_message("TOTAL VALID APKS: " + str(num_valid_apk), with_color, "green") print colored("TOTAL INVALID APKS: " + str(num_invalid_apk), "red")
def getFeatures(source_directory): ############################################################ # Label tong hop with open(LabelsNum_file, "r+") as file_LabeslNum: LABELSNUMANDTEXT = json.load(file_LabeslNum) # doc file config with open(config_file, "r+") as f: dataConfig = json.load(f) maxLabelsNum = dataConfig['maxLabelsNum'] #lay part Data partData = dataConfig['partData'] time = datetime.datetime.now() partDataFile = str(partData) + '_' + str(time).strip() + '.csv' csvFile = open(r'DataCSV/' + partDataFile, 'w+', newline='') writer = csv.writer(csvFile, delimiter=',') source_directory = str(source_directory) #if not os.path.exists(output_folder): # os.makedirs(output_folder) # Load Android API packages and classes global API_PACKAGES_LIST, API_CLASSES_LIST, API_SYSTEM_COMMANDS ############################################################ # get name and labels ARRNAME, ARRLABELS = load_NameandLabels(labels) ############################################################ # READING PACKAGES, CLASSES AND SYSTEM COMMANDS ############################################################ package_file = load_file(str(package_index_file)) API_PACKAGES_LIST = [x.strip() for x in package_file] class_file = load_file(str(classes_index_file)) API_CLASSES_LIST = [x.strip() for x in class_file] commands_file = load_file(str(system_commands_file)) API_SYSTEM_COMMANDS = [x.strip() for x in commands_file] ############################################################ ############################################################ apk_list = list_files(source_directory, '*.apk') for analyze_apk in tqdm(apk_list): # Getting the name of the folder that contains all apks and folders with apks base_folder = source_directory.split("/")[-1] apk_filename = join_dir(base_folder, analyze_apk.replace(source_directory, '')) apk_filename = apk_filename.replace("//", "/") apk_name_no_extensions = "".join( apk_filename.split("/")[-1].split(".")[:-1]) # export to monggoDB #if os.path.isfile(join_dir(output_folder, apk_filename.split("/")[-1].replace('.apk', '-analysis.json'))): # database[apk_filename.replace('.apk', '')] = json.load( # open(join_dir(output_folder, apk_filename.split("/")[-1]. # replace('.apk', '-analysis.json')))) # continue pre_static_dict = collections.OrderedDict() pre_static_dict['Filename'] = apk_filename hasher_md5 = hashlib.md5() hasher_sha256 = hashlib.sha256() hasher_sha1 = hashlib.sha1() with open(analyze_apk, 'rb') as afile: buf = afile.read() hasher_md5.update(buf) hasher_sha256.update(buf) hasher_sha1.update(buf) md5 = hasher_md5.hexdigest() sha256 = hasher_sha256.hexdigest() sha1 = hasher_sha1.hexdigest() pre_static_dict["md5"] = md5 pre_static_dict["sha256"] = sha256 pre_static_dict["sha1"] = sha1 """ if label is not None: pre_static_dict["Label"] = label else: pre_static_dict["Label"] = "/".join(apk_filename.split("/")[:-1]) """ pre_static_dict["VT_positives"] = None try: androguard_apk_object = APK(analyze_apk) except Exception: print("ERROR in APK: " + apk_name_no_extensions) continue static_analysis_dict = collections.OrderedDict() # Package name static_analysis_dict[ 'Package name'] = androguard_apk_object.get_package() # Permissions static_analysis_dict[ 'Permissions'] = androguard_apk_object.get_permissions() # Activities try: list_activities = androguard_apk_object.get_activities() except UnicodeEncodeError: list_activities = [] # Main activity static_analysis_dict[ 'Main activity'] = androguard_apk_object.get_main_activity() # Receivers try: list_receivers = androguard_apk_object.get_receivers() except UnicodeEncodeError: list_receivers = [] # Services try: list_services = androguard_apk_object.get_services() except UnicodeEncodeError: list_services = [] # API calls and Strings list_smali_api_calls, list_smali_strings = read_strings_and_apicalls( analyze_apk, API_PACKAGES_LIST, API_CLASSES_LIST) for api_call in list_smali_api_calls.keys(): new_api_call = '.'.join(api_call.split(".")[:-1]) if new_api_call in list_smali_api_calls.keys(): list_smali_api_calls[new_api_call] = list_smali_api_calls[ new_api_call] + list_smali_api_calls[api_call] else: list_smali_api_calls[new_api_call] = list_smali_api_calls[ api_call] del list_smali_api_calls[api_call] static_analysis_dict['API calls'] = list_smali_api_calls static_analysis_dict['Strings'] = Counter( filter(None, list_smali_strings)) # API packages API_packages_dict = collections.OrderedDict() android_list_packages_lenghts = [ len(x.split(".")) for x in API_PACKAGES_LIST ] list_api_calls_keys = list_smali_api_calls.keys() for api_call in list_api_calls_keys: score = 0 package_chosen = None for i, package in enumerate(API_PACKAGES_LIST): len_package = android_list_packages_lenghts[i] if api_call.startswith(package) and len_package > score: score = len_package package_chosen = package if package_chosen is not None: if not package_chosen in API_packages_dict.keys(): API_packages_dict[package_chosen] = list_smali_api_calls[ api_call] else: API_packages_dict[package_chosen] += list_smali_api_calls[ api_call] static_analysis_dict['API packages'] = API_packages_dict # Intents try: static_analysis_dict['Intents'] = intents_analysis( join_dir(analyze_apk.replace('.apk', ''), 'AndroidManifest.xml')) except: static_analysis_dict['Intents'] = {'Failed to extract intents': 0} # Intents of activities intents_activities = collections.OrderedDict() for activity in list_activities: intents_activities[activity] = check_for_intents( join_dir(analyze_apk.replace('.apk', ''), 'AndroidManifest.xml'), activity, 'activity') static_analysis_dict['Activities'] = intents_activities # Intents of services intents_services = collections.OrderedDict() for service in list_services: intents_services[service] = check_for_intents( join_dir(analyze_apk.replace('.apk', ''), 'AndroidManifest.xml'), service, 'service') static_analysis_dict['Services'] = intents_services # Intents of receivers intents_receivers = collections.OrderedDict() for intent in list_receivers: intents_receivers[intent] = check_for_intents( join_dir(analyze_apk.replace('.apk', '/'), 'AndroidManifest.xml'), intent, 'receiver') static_analysis_dict['Receivers'] = intents_receivers row = standardData(pre_static_dict, static_analysis_dict) if md5 in ARRNAME: index = -1 if md5 in ARRNAME: index = ARRNAME.index(md5) if sha256 in ARRNAME: index = ARRNAME.index(sha256) if index != -1: label = ARRLABELS[index] try: if label not in LABELSNUMANDTEXT: if 'SINGLETON' in label: continue continue # maxLabelsNum += 1 # temp = collections.OrderedDict() # temp[label] = maxLabelsNum # LABELSNUMANDTEXT[label] = maxLabelsNum except: continue labelNum = [LABELSNUMANDTEXT[label]] labelNum.extend(row) writer.writerow(labelNum) # apk_total_analysis = collections.OrderedDict([("Pre_static_analysis", pre_static_dict), # ("Static_analysis", static_analysis_dict)]) # # save_as_json(apk_total_analysis, output_name=join_dir(output_folder, apk_name_no_extensions + # "-analysis.json")) #save labelsnum neu co them nhan moo with open(str(LabelsNum_file), 'w+') as fp: json.dump(LABELSNUMANDTEXT, fp, indent=4) fp.close() # Save data config partData += 1 dataConfig['partData'] = partData dataConfig['maxLabelsNum'] = maxLabelsNum with open(str(config_file), 'w+') as fp: json.dump(dataConfig, fp, indent=4) fp.close() csvFile.close()
def cd(self, subdir="."): chdir(join_dir(self.mount_dir, subdir))
def export_csv_file(database, export_csv, flowdroid_folder, output_folder): if export_csv is not None: set_permissions = set() set_opcodes = set() set_apicalls = set() set_systemcommands = set() set_intents_activities = set() set_intents_services = set() set_intents_receivers = set() set_api_packages = set() for apk_key in tqdm(database.keys()): apk_dict = database[apk_key] if len(apk_key.split("/")) > 1: kind = apk_key.split("/")[0] hash_app = apk_key.split("/")[1] else: kind = "" hash_app = apk_key set_permissions.update(apk_dict["Static_analysis"]["Permissions"]) set_opcodes.update(apk_dict["Static_analysis"]["Opcodes"]) set_apicalls.update(apk_dict["Static_analysis"]["API calls"]) set_systemcommands.update( apk_dict["Static_analysis"]["System commands"]) for activity in apk_dict["Static_analysis"]["Activities"]: if apk_dict["Static_analysis"]["Activities"][activity] is not None and \ len(apk_dict["Static_analysis"]["Activities"][activity]) > 0: set_intents_activities.update( apk_dict["Static_analysis"]["Activities"][activity]) for service in apk_dict["Static_analysis"]["Services"]: if apk_dict["Static_analysis"]["Services"][service] is not None and \ len(apk_dict["Static_analysis"]["Services"][service]) > 0: set_intents_services.update( apk_dict["Static_analysis"]["Services"][service]) for receiver in apk_dict["Static_analysis"]["Receivers"]: if apk_dict["Static_analysis"]["Receivers"][receiver] is not None and \ len(apk_dict["Static_analysis"]["Receivers"][receiver]) > 0: set_intents_receivers.update( apk_dict["Static_analysis"]["Receivers"][receiver]) set_api_packages.update( apk_dict["Static_analysis"]["API packages"]) list_permissions = [x.replace(" ", "") for x in list(set_permissions)] list_opcodes = list(set_opcodes) list_apicalls = list(set_apicalls) list_systemcommands = list(set_systemcommands) list_intents_activities = list(set_intents_activities) list_intents_services = list(set_intents_services) list_intents_receivers = list(set_intents_receivers) list_api_packages = list(set_api_packages) for i, apicall in enumerate(list(list_apicalls)): list_apicalls[i] = ".".join( apicall.encode('ascii', 'ignore').split(".")[:-1]) list_apicalls = list(set(list_apicalls)) flowdroid_fields = [] if flowdroid_folder: apk_dict_example = database[database.keys()[0]] flowdroid_fields = apk_dict_example["Static_analysis"][ "FlowDroid"].keys() if "Sources\\Sinks" in flowdroid_fields: del flowdroid_fields[flowdroid_fields.index("Sources\\Sinks")] flowdroid_fields_matrix = [(x, y) for x in flowdroid_fields for y in flowdroid_fields] list_rows = [] rows_permissions = [] rows_opcodes = [] rows_apicalls = [] rows_systemcommands = [] rows_intents_activities = [] rows_intents_services = [] rows_intents_receivers = [] rows_api_packages = [] for apk_key in tqdm(database.keys()): apk_dict = database[apk_key] label = None if len(apk_key.split("/")) > 1: label = apk_key.split("/")[0] hash_app = apk_key.split("/")[1] else: label = "" hash_app = apk_key list_permissions_filled = [0 for x in range(len(list_permissions))] for i, item in enumerate(list_permissions): if item.replace( " ", "") in apk_dict["Static_analysis"]["Permissions"]: list_permissions_filled[i] = 1 list_opcodes_filled = [0 for x in range(len(list_opcodes))] for i, item in enumerate(list_opcodes): if item in apk_dict["Static_analysis"]["Opcodes"]: list_opcodes_filled[i] = apk_dict["Static_analysis"][ "Opcodes"][item] list_apicalls_filled = [0 for x in range(len(list_apicalls))] for i, item in enumerate(list_apicalls): if item in apk_dict["Static_analysis"]["API calls"]: list_apicalls_filled[i] = apk_dict["Static_analysis"][ "API calls"][item] list_systemcommands_filled = [ 0 for x in range(len(list_systemcommands)) ] for i, item in enumerate(list_systemcommands): if item in apk_dict["Static_analysis"]["System commands"]: list_systemcommands_filled[i] = apk_dict[ "Static_analysis"]["System commands"][item] list_intents_activities_filled = [ 0 for x in range(len(list_intents_activities)) ] for i, item in enumerate(list_intents_activities): if item in apk_dict["Static_analysis"]["Activities"]: list_intents_activities_filled[i] = 1 list_intents_services_filled = [ 0 for x in range(len(list_intents_services)) ] for i, item in enumerate(list_intents_services): if item in apk_dict["Static_analysis"]["Services"]: list_intents_services_filled[i] = 1 list_intents_receivers_filled = [ 0 for x in range(len(list_intents_receivers)) ] for i, item in enumerate(list_intents_receivers): if item in apk_dict["Static_analysis"]["Receivers"]: list_intents_receivers_filled[i] = 1 list_api_packages_filled = [ 0 for x in range(len(list_api_packages)) ] for i, item in enumerate(list_api_packages): if item in apk_dict["Static_analysis"]["API packages"]: list_intents_receivers_filled[i] = 1 flowdroid_fields_matrix_filled = [ 0 for x in range(len(flowdroid_fields_matrix)) ] flow_df = pd.read_csv(join_dir(flowdroid_folder, hash_app + ".csv")) flow_df = flow_df.set_index("Sources\Sinks") for i, item in enumerate(flowdroid_fields_matrix): source, sink = item[0], item[1] flowdroid_fields_matrix_filled[i] = flow_df[source][sink] complete_row = [label] + list_permissions_filled + list_opcodes_filled + list_apicalls_filled + \ list_systemcommands_filled + list_intents_activities_filled + \ list_intents_services_filled + list_intents_receivers_filled + list_api_packages_filled + \ flowdroid_fields_matrix_filled rows_permissions.append(list_permissions_filled) rows_opcodes.append(list_opcodes_filled) rows_apicalls.append(list_apicalls_filled) rows_systemcommands.append(list_systemcommands_filled) rows_intents_activities.append(list_intents_activities_filled) rows_intents_services.append(list_intents_services_filled) rows_intents_receivers.append(list_intents_receivers_filled) rows_api_packages.append(list_api_packages_filled) list_rows.append(complete_row) list_permissions = ["PERMISSION-" + x for x in list(list_permissions)] list_opcodes = ["OPCODE-" + x for x in list(list_opcodes)] list_apicalls = ["APICALL-" + x for x in list(list_apicalls)] list_systemcommands = [ "SYSTEMCOMMAND-" + x for x in list(list_systemcommands) ] list_intents_activities = [ "ACTIVITY-" + x for x in list(list_intents_activities) ] list_intents_services = [ "SERVICE-" + x for x in list(list_intents_services) ] list_intents_receivers = [ "RECEIVER-" + x for x in list(list_intents_receivers) ] list_api_packages = [ "APIPACKAGE-" + x for x in list(list_api_packages) ] flowdroid_fields_matrix_strings = [ "FLOWDROID-" + x[0] + "-" + x[1] for x in flowdroid_fields_matrix ] complete_list_fields = ["label"] + list_permissions + list_opcodes + list_apicalls + \ list_systemcommands + list_intents_activities + list_intents_services + list_intents_receivers + \ list_api_packages + flowdroid_fields_matrix_strings with open(output_folder + "/" + export_csv, 'wb') as csv_file: csvwriter = csv.writer(csv_file, delimiter=",") csvwriter.writerow(complete_list_fields) print "WRITING CSV FILE..." for row in tqdm(list_rows): csvwriter.writerow(row)
def parse_droidbox_outputs(source_folder, output_droidbox, output_strace, output_other): source_folder = source_folder + "/" list_files = [] for path, subdirs, files in os.walk(source_folder): for name in files: list_files.append(os.path.join(path, name)) list_droidbox_files = [ f for f in list_files if ntpath.basename(f).startswith("analysis") ] list_strace_files = [ f for f in list_files if ntpath.basename(f).startswith("strace") ] list_logcat_files = [ f for f in list_files if ntpath.basename(f).startswith("logcat") ] if not os.path.exists(output_droidbox): os.makedirs(output_droidbox) if not os.path.exists(output_strace): os.makedirs(output_strace) if not os.path.exists(output_other): os.makedirs(output_other) # STRACE print "Strace..." for file in tqdm(list_strace_files): output_file = ntpath.basename(file).replace(".txt", ".csv").replace( "strace_", "").replace(".apk", "") if os.path.isfile(output_strace + "/" + output_file): continue with open(file, "rb") as f: lines = f.readlines() with open(join_dir(output_strace, output_file), 'wb') as fp: for line in lines: line = re.sub(" +", " ", line) line = line.split(" ", 2) fp.write(line[1] + "," + line[0] + "," + line[2]) shutil.move(file, join_dir(output_other, file)) # Droidbox print "DroidBox..." for file in list_droidbox_files: output_file = ntpath.basename(file).replace("analysis_", "") shutil.move(file, join_dir(output_droidbox, output_file)) # Logcat print "Logcat..." for file in list_logcat_files: output_file = ntpath.basename(file) shutil.move(file, join_dir(output_other, output_file))
def image_string(name): img_path = join_dir(current_app.instance_path, name) if not exists(img_path): raise ValueError("Image %s doesn't exists" % img_path) with open(img_path, "rb") as img_file: return b64encode(img_file.read()).decode("ascii")
def analyse_virustotal(source_directory, vt_api_key, vt_analysis_output_folder=None, with_color=True): """ Analyses a set of APK files with the VirusTotal service Parameters ---------- :param source_directory: Folder containing apk files :param vt_analysis_output_folder: Folder where VirusTotal reports are saved :param output_samples_folder: Folder where apk files are saved after analysed with VirusTotal :return: """ if len(vt_api_key) != 64: print( 'ERROR! - invalid vt_key file. Please, provide a virustotal key!') sys.exit(0) global VT_KEY VT_KEY = vt_api_key if vt_analysis_output_folder is None: vt_analysis_output_folder = join_dir(source_directory, VT_ANALYSIS_DIRECTORY_NAME) reports_not_received = 0 if not os.path.exists(vt_analysis_output_folder): os.makedirs(vt_analysis_output_folder) count_positives = 0 for classname in os.listdir(source_directory): print(classname) folderpath = os.path.join(source_directory, classname) for apkfilepath in os.listdir(folderpath): classapkpath = os.path.join(classname, apkfilepath) apk_path = os.path.join(source_directory, classapkpath) # apk_path = source_directory + apk hash_sha = sha256(apk_path) report = "" while report == "": report = get_report_hash(hash_sha) if report == "": print_message("No report received. Waiting...", with_color, 'red') time.sleep(1) response_dict = simplejson.loads(report) response_code = response_dict.get("response_code") if response_code == 1: # Report generated positives = response_dict.get("positives") file_json = open( os.path.join(vt_analysis_output_folder, classapkpath.replace(".apk", "") + ".json"), "w") file_json.write(report) if positives > 0: count_positives += 1 print_message( str(apkfilepath) + "APK WITH JSON. CONTINUE...", with_color, 'green') time.sleep(1) if response_code == 0: reports_not_received += 1 params = {'apikey': VT_KEY} files = {'file': ("apk", open(apk_path, 'rb'))} print("Uploading APK: " + apk_path) print("File not analysed yet. Uploading file...") try: response = requests.post( 'https://www.virustotal.com/vtapi/v2/file/scan', files=files, params=params) except requests.exceptions.ConnectionError: print_message("Connection error", with_color, 'red') continue print(str(response)) try: response.json() except JSONDecodeError: print_message("JSONDecodeError", with_color, 'red') continue print_message("SENT TO VIRUS-TOTAL", with_color, 'blue') if reports_not_received > 0: print ("WARNING! " + str(reports_not_received) + " apks does not have yet a VT analysis. Please" \ ", execute again this script after a while") else: print_message("SUCCESS!!", with_color, 'green') print( " All reports have been saved in the VT_ANALYSIS folder. APKS are in SAMPLES folder." )
def reverse(nameApk): # doc file config with open(config_file, "r+") as f: dataConfig = json.load(f) maxLabelsNum = dataConfig['maxLabelsNum'] # Label tong hop # with open(LabelsNum_file, "r+") as file_LabeslNum: # LABELSNUMANDTEXT = json.load(file_LabeslNum) # Load Android API packages and classes global API_PACKAGES_LIST, API_CLASSES_LIST, API_SYSTEM_COMMANDS ############################################################ # READING PACKAGES, CLASSES AND SYSTEM COMMANDS ############################################################ package_file = load_file(str(package_index_file)) API_PACKAGES_LIST = [x.strip() for x in package_file] class_file = load_file(str(classes_index_file)) API_CLASSES_LIST = [x.strip() for x in class_file] commands_file = load_file(str(system_commands_file)) API_SYSTEM_COMMANDS = [x.strip() for x in commands_file] static_analysis_dict = collections.OrderedDict() try: analyze_apk = os.path.join(TEMP,nameApk) # Getting the name of the folder that contains all apks and folders with apks base_folder = TEMP.split("/")[-1] apk_filename = join_dir(base_folder, analyze_apk.replace(TEMP, '')) apk_filename = apk_filename.replace("//", "/") apk_name_no_extensions = "".join(apk_filename.split("/")[-1].split(".")[:-1]) # export to monggoDB # if os.path.isfile(join_dir(output_folder, apk_filename.split("/")[-1].replace('.apk', '-analysis.json'))): # database[apk_filename.replace('.apk', '')] = json.load( # open(join_dir(output_folder, apk_filename.split("/")[-1]. # replace('.apk', '-analysis.json')))) # continue pre_static_dict = collections.OrderedDict() pre_static_dict['Filename'] = apk_filename hasher_md5 = hashlib.md5() hasher_sha256 = hashlib.sha256() hasher_sha1 = hashlib.sha1() with open(analyze_apk, 'rb') as afile: buf = afile.read() hasher_md5.update(buf) hasher_sha256.update(buf) hasher_sha1.update(buf) md5 = hasher_md5.hexdigest() sha256 = hasher_sha256.hexdigest() sha1 = hasher_sha1.hexdigest() pre_static_dict["md5"] = md5 pre_static_dict["sha256"] = sha256 pre_static_dict["sha1"] = sha1 """ if label is not None: pre_static_dict["Label"] = label else: pre_static_dict["Label"] = "/".join(apk_filename.split("/")[:-1]) """ pre_static_dict["VT_positives"] = None apk_Oject = APK(analyze_apk) # get package name static_analysis_dict['Package_name'] = apk_Oject.get_package() # get Permission static_analysis_dict['Permissions'] = apk_Oject.get_permissions() # Activities try: list_activities = apk_Oject.get_activities() except UnicodeEncodeError: list_activities = [] # get Main ACtivity static_analysis_dict['Main_activity'] = apk_Oject.get_main_activity() # Receivers try: list_receivers = apk_Oject.get_receivers() except UnicodeEncodeError: list_receivers = [] # Services try: list_services = apk_Oject.get_services() except UnicodeEncodeError: list_services = [] # API calls and Strings list_smali_api_calls, list_smali_strings = read_strings_and_apicalls(analyze_apk, API_PACKAGES_LIST, API_CLASSES_LIST) for api_call in list_smali_api_calls.keys(): new_api_call = '.'.join(api_call.split(".")[:-1]) if new_api_call in list_smali_api_calls.keys(): list_smali_api_calls[new_api_call] = list_smali_api_calls[new_api_call] + list_smali_api_calls[ api_call] else: list_smali_api_calls[new_api_call] = list_smali_api_calls[api_call] del list_smali_api_calls[api_call] static_analysis_dict['API_calls'] = list_smali_api_calls static_analysis_dict['Strings'] = Counter(filter(None, list_smali_strings)) # API packages API_packages_dict = collections.OrderedDict() android_list_packages_lenghts = [len(x.split(".")) for x in API_PACKAGES_LIST] list_api_calls_keys = list_smali_api_calls.keys() for api_call in list_api_calls_keys: score = 0 package_chosen = None for i, package in enumerate(API_PACKAGES_LIST): len_package = android_list_packages_lenghts[i] if api_call.startswith(package) and len_package > score: score = len_package package_chosen = package if package_chosen is not None: if not package_chosen in API_packages_dict.keys(): API_packages_dict[package_chosen] = list_smali_api_calls[api_call] else: API_packages_dict[package_chosen] += list_smali_api_calls[api_call] static_analysis_dict['API_packages'] = API_packages_dict # Intents try: static_analysis_dict['Intents'] = intents_analysis(join_dir(analyze_apk.replace('.apk', ''), 'AndroidManifest.xml')) except: static_analysis_dict['Intents'] = {'Failed to extract intents': 0} # Intents of activities intents_activities = collections.OrderedDict() for activity in list_activities: intents_activities[activity] = check_for_intents(join_dir(analyze_apk.replace('.apk', ''), 'AndroidManifest.xml'), activity, 'activity') static_analysis_dict['Activities'] = intents_activities # Intents of services intents_services = collections.OrderedDict() for service in list_services: intents_services[service] = check_for_intents(join_dir(analyze_apk.replace('.apk', ''), 'AndroidManifest.xml'), service, 'service') static_analysis_dict['Services'] = intents_services # Intents of receivers intents_receivers = collections.OrderedDict() for intent in list_receivers: intents_receivers[intent] = check_for_intents(join_dir(analyze_apk.replace('.apk', '/'), 'AndroidManifest.xml'), intent, 'receiver') static_analysis_dict['Receivers'] = intents_receivers static_analysis_dict['Receivers'] = intents_receivers apk_total_analysis = collections.OrderedDict([("Pre_static_analysis", pre_static_dict), ("Static_analysis", static_analysis_dict)]) # # save_as_json(apk_total_analysis, output_name=join_dir(output_folder, apk_name_no_extensions + # "-analysis.json")) row = standardData(pre_static_dict, static_analysis_dict) csvFileClient = open(DataCSVClient + md5 + '.csv', 'w+', newline='') writer = csv.writer(csvFileClient, delimiter=',') writer.writerow(row) csvFileClient.close() delAPk(analyze_apk) if checkMerge(DataCSVClient, dataConfig['mergeCSV']): mergeCSV() return md5, apk_total_analysis except Exception as e: print('Exception: ', e) return 'Error', 'No features'
def features_extractor(apks_directory, single_analysis, dynamic_analysis_folder, virus_total_reports_folder, flowdroid_folder, output_folder, noclean_up, package_index_file, classes_index_file, system_commands_file, label, avclass, export_mongodb, export_csv): """ Extracts features from a set of samples Parameters ---------- :param apks_directory: Folder containing apk files :param single_analysis: If an individual features file is generated for each sample :param dynamic_analysis_folder: Folder containing dynamic analysis reports :param virus_total_reports_folder: Folder containing VirusTotal reports :param flowdroid_folder: Folder containing flowdroid reports :param output_folder: Folder where features files are saved :param noclean_up: If unnecesary files generated are removed :param package_index_file: File describing Android API packages :param classes_index_file: File describing Android API classes :param system_commands_file: File describing Android system commands :param label: If provided, all samples are labelled according to this argument :param avclass: If avclass is executed to obtain a consensual label for each sample :param export_mongodb: Mongodb address to write features to a database :param export_csv: If the features extracted are saved into a csv file """ source_directory = str(apks_directory) if not os.path.exists(output_folder): os.makedirs(output_folder) # Load Android API packages and classes global API_PACKAGES_LIST, API_CLASSES_LIST, API_SYSTEM_COMMANDS ############################################################ # READING PACKAGES, CLASSES AND SYSTEM COMMANDS ############################################################ package_file = load_file(str(package_index_file)) API_PACKAGES_LIST = [x.strip() for x in package_file] class_file = load_file(str(classes_index_file)) API_CLASSES_LIST = [x.strip() for x in class_file] system_commands_file = load_file(str(system_commands_file)) API_SYSTEM_COMMANDS = [x.strip() for x in system_commands_file] ############################################################ ############################################################ # BUILDING LIST OF APKS ############################################################ apk_list = list_files(source_directory, '*.apk') print '[*] Number of APKs:', len(apk_list) ############################################################ ############################################################ # ANALYSING APKS ############################################################ database = collections.OrderedDict() print "ANALYSING APKS..." for analyze_apk in tqdm(apk_list): # Getting the name of the folder that contains all apks and folders with apks base_folder = source_directory.split("/")[-1] apk_filename = join_dir(base_folder, analyze_apk.replace(source_directory, '')) apk_filename = apk_filename.replace("//", "/") apk_name_no_extensions = "".join( apk_filename.split("/")[-1].split(".")[:-1]) if os.path.isfile( join_dir( output_folder, apk_filename.split("/")[-1].replace( '.apk', '-analysis.json'))): database[apk_filename.replace('.apk', '')] = json.load( open( join_dir( output_folder, apk_filename.split("/")[-1].replace( '.apk', '-analysis.json')))) continue pre_static_dict = collections.OrderedDict() pre_static_dict['Filename'] = apk_filename hasher_md5 = hashlib.md5() hasher_sha256 = hashlib.sha256() hasher_sha1 = hashlib.sha1() with open(analyze_apk, 'rb') as afile: buf = afile.read() hasher_md5.update(buf) hasher_sha256.update(buf) hasher_sha1.update(buf) md5 = hasher_md5.hexdigest() sha256 = hasher_sha256.hexdigest() sha1 = hasher_sha1.hexdigest() pre_static_dict["md5"] = md5 pre_static_dict["sha256"] = sha256 pre_static_dict["sha1"] = sha1 if label is not None: pre_static_dict["Label"] = label else: pre_static_dict["Label"] = "/".join(apk_filename.split("/")[:-1]) try: androguard_apk_object = apk.APK(analyze_apk) except Exception: print "ERROR in APK: " + apk_name_no_extensions continue static_analysis_dict = collections.OrderedDict() # Package name static_analysis_dict[ 'Package name'] = androguard_apk_object.get_package() # Permissions static_analysis_dict[ 'Permissions'] = androguard_apk_object.get_permissions() # Opcodes static_analysis_dict['Opcodes'] = opcodes_analysis( androguard_apk_object) # Activities try: list_activities = androguard_apk_object.get_activities() except UnicodeEncodeError: list_activities = [] # Main activity static_analysis_dict[ 'Main activity'] = androguard_apk_object.get_main_activity() # Receivers try: list_receivers = androguard_apk_object.get_receivers() except UnicodeEncodeError: list_receivers = [] # Services try: list_services = androguard_apk_object.get_services() except UnicodeEncodeError: list_services = [] # API calls and Strings list_smali_api_calls, list_smali_strings = read_strings_and_apicalls( analyze_apk, API_PACKAGES_LIST, API_CLASSES_LIST) for api_call in list_smali_api_calls.keys(): new_api_call = '.'.join(api_call.split(".")[:-1]) if new_api_call in list_smali_api_calls.keys(): list_smali_api_calls[new_api_call] = list_smali_api_calls[ new_api_call] + list_smali_api_calls[api_call] else: list_smali_api_calls[new_api_call] = list_smali_api_calls[ api_call] del list_smali_api_calls[api_call] static_analysis_dict['API calls'] = list_smali_api_calls static_analysis_dict['Strings'] = Counter( filter(None, list_smali_strings)) # API packages API_packages_dict = collections.OrderedDict() android_list_packages_lenghts = [ len(x.split(".")) for x in API_PACKAGES_LIST ] list_api_calls_keys = list_smali_api_calls.keys() for api_call in list_api_calls_keys: score = 0 package_chosen = None for i, package in enumerate(API_PACKAGES_LIST): len_package = android_list_packages_lenghts[i] if api_call.startswith(package) and len_package > score: score = len_package package_chosen = package if package_chosen is not None: if not package_chosen in API_packages_dict.keys(): API_packages_dict[package_chosen] = list_smali_api_calls[ api_call] else: API_packages_dict[package_chosen] += list_smali_api_calls[ api_call] static_analysis_dict['API packages'] = API_packages_dict # System commands list_system_commands = read_system_commands(list_smali_strings, API_SYSTEM_COMMANDS) static_analysis_dict['System commands'] = Counter(list_system_commands) # Intents try: static_analysis_dict['Intents'] = intents_analysis( join_dir(analyze_apk.replace('.apk', ''), 'AndroidManifest.xml')) except: static_analysis_dict['Intents'] = {'Failed to extract intents': 0} # Intents of activities intents_activities = collections.OrderedDict() for activity in list_activities: intents_activities[activity] = check_for_intents( join_dir(analyze_apk.replace('.apk', ''), 'AndroidManifest.xml'), activity, 'activity') static_analysis_dict['Activities'] = intents_activities # Intents of services intents_services = collections.OrderedDict() for service in list_services: intents_services[service] = check_for_intents( join_dir(analyze_apk.replace('.apk', ''), 'AndroidManifest.xml'), service, 'service') static_analysis_dict['Services'] = intents_services # Intents of receivers intents_receivers = collections.OrderedDict() for intent in list_receivers: intents_receivers[intent] = check_for_intents( join_dir(analyze_apk.replace('.apk', '/'), 'AndroidManifest.xml'), intent, 'receiver') static_analysis_dict['Receivers'] = intents_receivers if not noclean_up: cleanup(analyze_apk) ############################################################ # READING DYNAMIC ANALYSIS FILES TO INCLUDE IN JSON # ONLY THE NAME OF THE FILE IS INCLUDED # TODO EACH FILE MUST BE STORED IN A FOLDER NAMED AS THE TOOL USED ############################################################ dynamic_analysis_dict = collections.OrderedDict() if dynamic_analysis_folder and isdir(dynamic_analysis_folder): dynamic_analysis_folders = [ join_dir(dynamic_analysis_folder, x) for x in listdir(str(dynamic_analysis_folder)) if isdir(join_dir(dynamic_analysis_folder, x)) ] for dynamic_analysis_tool_folder in dynamic_analysis_folders: # dynamic_analysis_folder += "/" path_to_folder = dynamic_analysis_tool_folder.split("/") dynamic_tool_name = filter(None, path_to_folder)[-1] for extension in POSSIBLE_DYNAMIC_FILES_EXTENSIONS: if os.path.isfile( join_dir(dynamic_analysis_tool_folder, apk_name_no_extensions + extension)): dynamic_file_name = join_dir( dynamic_analysis_tool_folder, apk_name_no_extensions + extension) # If the file has .json extension, it is added to the global json # If not, the field is filled with the path to the dynamic analysis file if extension == ".json": dynamic_analysis_dict[ dynamic_tool_name] = json.load( open(dynamic_file_name)) else: dynamic_analysis_dict[ dynamic_tool_name] = dynamic_file_name break ############################################################ # READING FLOWDROID ANALYSIS FILES TO INCLUDE IN JSON # ONLY THE NAME OF THE FILE IS INCLUDED # TODO EACH FILE MUST BE STORED IN A FOLDER NAMED AS THE TOOL USED ############################################################ flowdroid_file = "" if flowdroid_folder: if isfile( join_dir(flowdroid_folder, apk_name_no_extensions + ".csv")): flowdroid_file = join_dir(flowdroid_folder, apk_name_no_extensions + ".csv") # static_analysis_dict['FlowDroid'] = flowdroid_field data_flowdroid_csv = pd.read_csv(flowdroid_file) # Setting column names with the first column data_flowdroid_csv.index = data_flowdroid_csv["Sources\\Sinks"] if "Sources\\Sinks" in data_flowdroid_csv.columns: del data_flowdroid_csv["Sources\\Sinks"] flowdroid_field = data_flowdroid_csv.to_dict() static_analysis_dict['FlowDroid'] = flowdroid_field ############################################################ # READING VIRUSTOTAL FILE TO INCLUDE IN JSON ############################################################ virus_total_dict = collections.OrderedDict() if virus_total_reports_folder: vt_file_name = join_dir(virus_total_reports_folder, apk_name_no_extensions + ".json") if isfile(vt_file_name): load_vt_json = load_from_json(vt_file_name) virus_total_dict = load_vt_json else: virus_total_dict = "" ############################################################ # GETTING AVCLASS LABEL IF VIRUSTOTAL ANALYSIS IS AVAILABLE ############################################################ if virus_total_reports_folder and avclass: vt_file_name = join_dir(virus_total_reports_folder, apk_name_no_extensions + ".json") if isfile(vt_file_name): pre_static_dict["avclass"] = get_avclass_label(vt_file_name) ############################################################ # FILLING APK JSON FIELD ############################################################ apk_total_analysis = OrderedDict([ ("Pre_static_analysis", pre_static_dict), ("Static_analysis", static_analysis_dict), ("Dynamic_analysis", dynamic_analysis_dict), ("VirusTotal", virus_total_dict) ]) database[apk_filename.replace('.apk', '')] = apk_total_analysis ############################################################ # SAVING ANALYSIS FOR INDIVIDUAL APK WHEN SELECTED ############################################################ if single_analysis: # save_single_analysis(join_dir(output_folder, apk_filename.split("/")[-1]. # replace('.apk', '-analysis.json')), # apk_total_analysis) save_as_json(apk_total_analysis, output_name=join_dir( output_folder, apk_name_no_extensions + "-analysis.json")) save_as_json(database, output_name=join_dir(output_folder, OUTPUT_FILE_GLOBAL_JSON)) ############################################################ # EXPORTING TO MONGODB ############################################################ if export_mongodb is not None: for apk_key in database.keys(): for call in database[apk_key]["Static_analysis"]["API calls"].keys( ): database[apk_key]["Static_analysis"]["API calls"][call.replace(".", "-")] = \ database[apk_key]["Static_analysis"]["API calls"][call] del database[apk_key]["Static_analysis"]["API calls"][call] for string in database[apk_key]["Static_analysis"]["Strings"].keys( ): database[apk_key]["Static_analysis"]["Strings"][string.replace(".", "-")] = \ database[apk_key]["Static_analysis"]["Strings"][string] del database[apk_key]["Static_analysis"]["Strings"][string] for activity in database[apk_key]["Static_analysis"][ "Activities"].keys(): database[apk_key]["Static_analysis"]["Activities"][activity.replace(".", "-")] = \ database[apk_key]["Static_analysis"]["Activities"][activity] del database[apk_key]["Static_analysis"]["Activities"][ activity] for receiver in database[apk_key]["Static_analysis"][ "Receivers"].keys(): database[apk_key]["Static_analysis"]["Receivers"][receiver.replace(".", "-")] = \ database[apk_key]["Static_analysis"]["Receivers"][receiver] del database[apk_key]["Static_analysis"]["Receivers"][receiver] for intent in database[apk_key]["Static_analysis"]["Intents"].keys( ): database[apk_key]["Static_analysis"]["Intents"][intent.replace(".", "-")] = \ database[apk_key]["Static_analysis"]["Intents"][intent] del database[apk_key]["Static_analysis"]["Intents"][intent] for package in database[apk_key]["Static_analysis"][ "API packages"].keys(): database[apk_key]["Static_analysis"]["API packages"][package.replace(".", "-")] = \ database[apk_key]["Static_analysis"]["API packages"][package] del database[apk_key]["Static_analysis"]["API packages"][ package] client = MongoClient('mongodb://' + export_mongodb) # Creating database db = client['AndroPyTool_database'] coll = db['report_' + TIME_EXECUTION] coll.insert_one(database).inserted_id ############################################################ # EXPORTING TO CSV ############################################################ if export_csv is not None: set_permissions = set() set_opcodes = set() set_apicalls = set() set_systemcommands = set() set_intents_activities = set() set_intents_services = set() set_intents_receivers = set() set_api_packages = set() for apk_key in tqdm(database.keys()): apk_dict = database[apk_key] kind = apk_key.split("/")[0] hash_app = apk_key.split("/")[1] set_permissions.update(apk_dict["Static_analysis"]["Permissions"]) set_opcodes.update(apk_dict["Static_analysis"]["Opcodes"]) set_apicalls.update(apk_dict["Static_analysis"]["API calls"]) set_systemcommands.update( apk_dict["Static_analysis"]["System commands"]) for activity in apk_dict["Static_analysis"]["Activities"]: if apk_dict["Static_analysis"]["Activities"][activity] is not None and \ len(apk_dict["Static_analysis"]["Activities"][activity]) > 0: set_intents_activities.update( apk_dict["Static_analysis"]["Activities"][activity]) for service in apk_dict["Static_analysis"]["Services"]: if apk_dict["Static_analysis"]["Services"][service] is not None and \ len(apk_dict["Static_analysis"]["Services"][service]) > 0: set_intents_services.update( apk_dict["Static_analysis"]["Services"][service]) for receiver in apk_dict["Static_analysis"]["Receivers"]: if apk_dict["Static_analysis"]["Receivers"][receiver] is not None and \ len(apk_dict["Static_analysis"]["Receivers"][receiver]) > 0: set_intents_receivers.update( apk_dict["Static_analysis"]["Receivers"][receiver]) set_api_packages.update( apk_dict["Static_analysis"]["API packages"]) list_permissions = [x.replace(" ", "") for x in list(set_permissions)] list_opcodes = list(set_opcodes) list_apicalls = list(set_apicalls) list_systemcommands = list(set_systemcommands) list_intents_activities = list(set_intents_activities) list_intents_services = list(set_intents_services) list_intents_receivers = list(set_intents_receivers) list_api_packages = list(set_api_packages) for i, apicall in enumerate(list(list_apicalls)): list_apicalls[i] = ".".join( apicall.encode('ascii', 'ignore').split(".")[:-1]) list_apicalls = list(set(list_apicalls)) flowdroid_fields = [] if flowdroid_folder: apk_dict_example = database[database.keys()[0]] flowdroid_fields = apk_dict_example["Static_analysis"][ "FlowDroid"].keys() del flowdroid_fields[flowdroid_fields.index("Sources\\Sinks")] flowdroid_fields_matrix = [(x, y) for x in flowdroid_fields for y in flowdroid_fields] list_rows = [] rows_permissions = [] rows_opcodes = [] rows_apicalls = [] rows_systemcommands = [] rows_intents_activities = [] rows_intents_services = [] rows_intents_receivers = [] rows_api_packages = [] for apk_key in tqdm(data.keys()): apk_dict = data[apk_key] label = apk_key.split("/")[0] hash_app = apk_keyapk.split("/")[1] list_permissions_filled = [0 for x in range(len(list_permissions))] for i, item in enumerate(list_permissions): if item.replace( " ", "") in apk_dict["Static_analysis"]["Permissions"]: list_permissions_filled[i] = 1 list_opcodes_filled = [0 for x in range(len(list_opcodes))] for i, item in enumerate(list_opcodes): if item in apk_dict["Static_analysis"]["Opcodes"]: list_opcodes_filled[i] = apk_dict["Static_analysis"][ "Opcodes"][item] list_apicalls_filled = [0 for x in range(len(list_apicalls))] for i, item in enumerate(list_apicalls): if item in apk_dict["Static_analysis"]["API calls"]: list_apicalls_filled[i] = apk_dict["Static_analysis"][ "API calls"][item] list_systemcommands_filled = [ 0 for x in range(len(list_systemcommands)) ] for i, item in enumerate(list_systemcommands): if item in apk_dict["Static_analysis"]["System commands"]: list_systemcommands_filled[i] = apk_dict[ "Static_analysis"]["System commands"][item] list_intents_activities_filled = [ 0 for x in range(len(list_intents_activities)) ] for i, item in enumerate(list_intents_activities): if item in apk_dict["Static_analysis"]["Activities"]: list_intents_activities_filled[i] = 1 list_intents_services_filled = [ 0 for x in range(len(list_intents_services)) ] for i, item in enumerate(list_intents_services): if item in apk_dict["Static_analysis"]["Services"]: list_intents_services_filled[i] = 1 list_intents_receivers_filled = [ 0 for x in range(len(list_intents_receivers)) ] for i, item in enumerate(list_intents_receivers): if item in apk_dict["Static_analysis"]["Receivers"]: list_intents_receivers_filled[i] = 1 list_api_packages_filled = [ 0 for x in range(len(list_api_packages)) ] for i, item in enumerate(list_api_packages): if item in apk_dict["Static_analysis"]["API packages"]: list_intents_receivers_filled[i] = 1 flowdroid_fields_matrix_filled = [ 0 for x in range(len(flowdroid_fields_matrix)) ] flow_df = pd.read_csv("FlowDroid_processed/" + hash_app + ".csv") flow_df = flow_df.set_index("Sources\Sinks") for i, item in enumerate(flowdroid_fields_matrix): source, sink = item[0], item[1] flowdroid_fields_matrix_filled[i] = flow_df[source][sink] complete_row = [label] + list_permissions_filled + list_opcodes_filled + list_apicalls_filled + \ list_systemcommands_filled + list_intents_activities_filled + \ list_intents_services_filled + list_intents_receivers_filled + list_api_packages_filled + \ flowdroid_fields_matrix_filled rows_permissions.append(list_permissions_filled) rows_opcodes.append(list_opcodes_filled) rows_apicalls.append(list_apicalls_filled) rows_systemcommands.append(list_systemcommands_filled) rows_intents_activities.append(list_intents_activities_filled) rows_intents_services.append(list_intents_services_filled) rows_intents_receivers.append(list_intents_receivers_filled) rows_api_packages.append(list_api_packages_filled) list_rows.append(complete_row) list_permissions = ["PERMISSION-" + x for x in list(list_permissions)] list_opcodes = ["OPCODE-" + x for x in list(list_opcodes)] list_apicalls = ["APICALL-" + x for x in list(list_apicalls)] list_systemcommands = [ "SYSTEMCOMMAND-" + x for x in list(list_systemcommands) ] list_intents_activities = [ "ACTIVITY-" + x for x in list(list_intents_activities) ] list_intents_services = [ "SERVICE-" + x for x in list(list_intents_services) ] list_intents_receivers = [ "RECEIVER-" + x for x in list(list_intents_receivers) ] list_api_packages = [ "APIPACKAGE-" + x for x in list(list_api_packages) ] flowdroid_fields_matrix_strings = [ "FLOWDROID-" + x[0] + "-" + x[1] for x in flowdroid_fields_matrix ] complete_list_fields = ["label"] + list_permissions + list_opcodes + list_apicalls + \ list_systemcommands + list_intents_activities + list_intents_services + list_intents_receivers + \ list_api_packages + flowdroid_fields_matrix_strings with open(export_csv, 'wb') as csv_file: csvwriter = csv.writer(csv_file, delimiter=",") csvwriter.writerow(complete_list_fields) print "WRITING CSV FILE..." for row in tqdm(list_rows): csvwriter.writerow(row)
def cd(self, subdir="."): chdir(join_dir(self.project_dir, subdir))
def features_extractor(apks_directory, output_folder, export_csv): """ Extracts features from a set of samples Parameters ---------- :param apks_directory: Folder containing apk files :param single_analysis: If an individual features file is generated for each sample :param dynamic_analysis_folder: Folder containing dynamic analysis reports :param virus_total_reports_folder: Folder containing VirusTotal reports :param flowdroid_folder: Folder containing flowdroid reports :param output_folder: Folder where features files are saved :param noclean_up: If unnecesary files generated are removed :param package_index_file: File describing Android API packages :param classes_index_file: File describing Android API classes :param system_commands_file: File describing Android system commands :param label: If provided, all samples are labelled according to this argument :param avclass: If avclass is executed to obtain a consensual label for each sample :param export_mongodb: Mongodb address to write features to a database :param export_csv: If the features extracted are saved into a csv file """ source_directory = str(apks_directory) if not os.path.exists(output_folder): os.makedirs(output_folder) # # Load Android API packages and classes # global API_PACKAGES_LIST, API_CLASSES_LIST, API_SYSTEM_COMMANDS # # ############################################################ # # READING PACKAGES, CLASSES AND SYSTEM COMMANDS # ############################################################ # package_index_file = "info/package_index.txt" # classes_index_file = "info/class_index.txt" # system_commands_file = "info/system_commands.txt" # # package_file = load_file(str(package_index_file)) # API_PACKAGES_LIST = [x.strip() for x in package_file] # # class_file = load_file(str(classes_index_file)) # API_CLASSES_LIST = [x.strip() for x in class_file] # # system_commands_file = load_file(str(system_commands_file)) # API_SYSTEM_COMMANDS = [x.strip() for x in system_commands_file] # ########################################################### ############################################################ # BUILDING LIST OF APKS ############################################################ apk_list = list_files(source_directory, '*.apk') print '[*] Number of APKs:', len(apk_list) ############################################################ ############################################################ # ANALYSING APKS ############################################################ database = collections.OrderedDict() apk_analysis_dic = collections.OrderedDict() print "ANALYSING APKS..." for analyze_apk in tqdm(apk_list): # Getting the name of the folder that contains all apks and folders with apks base_folder = source_directory.split("/")[-1] apk_filename = join_dir(base_folder, analyze_apk.replace(source_directory, '')) apk_filename = apk_filename.replace("//", "/") apk_name_no_extensions = "".join( apk_filename.split("/")[-1].split(".")[:-1]) if os.path.isfile( join_dir( output_folder, apk_filename.split("/")[-1].replace( '.apk', '-analysis.json'))): database[apk_filename.replace('.apk', '')] = json.load( open( join_dir( output_folder, apk_filename.split("/")[-1].replace( '.apk', '-analysis.json')))) continue try: androguard_apk_object = apk.APK(analyze_apk) except Exception: print "ERROR in APK: " + apk_name_no_extensions continue static_analysis_dict = collections.OrderedDict() # Package name static_analysis_dict[ 'Package name'] = androguard_apk_object.get_package() # # Permissions # static_analysis_dict['Permissions'] = androguard_apk_object.get_permissions() # Opcodes static_analysis_dict['Opcodes'] = opcodes_analysis( androguard_apk_object) # print static_analysis_dict['Opcodes'][1] # Activities # try: # list_activities = androguard_apk_object.get_activities() # except UnicodeEncodeError: # list_activities = [] # # # Main activity # static_analysis_dict['Main activity'] = androguard_apk_object.get_main_activity() # # # Receivers # try: # list_receivers = androguard_apk_object.get_receivers() # except UnicodeEncodeError: # list_receivers = [] # # # Services # try: # list_services = androguard_apk_object.get_services() # except UnicodeEncodeError: # list_services = [] # # # API calls and Strings # list_smali_api_calls, list_smali_strings = read_strings_and_apicalls(analyze_apk, API_PACKAGES_LIST, # API_CLASSES_LIST) # for api_call in list_smali_api_calls.keys(): # new_api_call = '.'.join(api_call.split(".")[:-1]) # if new_api_call in list_smali_api_calls.keys(): # list_smali_api_calls[new_api_call] = list_smali_api_calls[new_api_call] + list_smali_api_calls[api_call] # else: # list_smali_api_calls[new_api_call] = list_smali_api_calls[api_call] # del list_smali_api_calls[api_call] # static_analysis_dict['API calls'] = list_smali_api_calls # static_analysis_dict['Strings'] = Counter(filter(None, list_smali_strings)) # # # API packages # # API_packages_dict = collections.OrderedDict() # android_list_packages_lenghts = [len(x.split(".")) for x in API_PACKAGES_LIST] # # list_api_calls_keys = list_smali_api_calls.keys() # for api_call in list_api_calls_keys: # score = 0 # package_chosen = None # for i, package in enumerate(API_PACKAGES_LIST): # len_package = android_list_packages_lenghts[i] # if api_call.startswith(package) and len_package > score: # score = len_package # package_chosen = package # if package_chosen is not None: # if not package_chosen in API_packages_dict.keys(): # API_packages_dict[package_chosen] = list_smali_api_calls[api_call] # else: # API_packages_dict[package_chosen] += list_smali_api_calls[api_call] # # static_analysis_dict['API packages'] = API_packages_dict # # # # System commands # list_system_commands = read_system_commands(list_smali_strings, API_SYSTEM_COMMANDS) # static_analysis_dict['System commands'] = Counter(list_system_commands) # # # Intents # try: # static_analysis_dict['Intents'] = intents_analysis(join_dir(analyze_apk.replace('.apk', ''), # 'AndroidManifest.xml')) # except: # static_analysis_dict['Intents'] = {'Failed to extract intents': 0} # # # Intents of activities # intents_activities = collections.OrderedDict() # for activity in list_activities: # # # intents_activities[activity] = check_for_intents(join_dir(analyze_apk.replace('.apk', ''), # 'AndroidManifest.xml'), # activity, 'activity') # static_analysis_dict['Activities'] = intents_activities # # # Intents of services # intents_services = collections.OrderedDict() # for service in list_services: # intents_services[service] = check_for_intents(join_dir(analyze_apk.replace('.apk', ''), # 'AndroidManifest.xml'), # service, 'service') # static_analysis_dict['Services'] = intents_services # # # Intents of receivers # intents_receivers = collections.OrderedDict() # for intent in list_receivers: # intents_receivers[intent] = check_for_intents(join_dir(analyze_apk.replace('.apk', '/'), # 'AndroidManifest.xml'), # intent, 'receiver') # static_analysis_dict['Receivers'] = intents_receivers index = 0 apk_analysis_dic[str(index)] = static_analysis_dict index += 1 print apk_analysis_dic['0']['Opcodes'][1]
def features_extractor(apks_directory, output_folder, export_csv): source_directory = str(apks_directory) if not os.path.exists(output_folder): os.makedirs(output_folder) ############################################################ # BUILDING LIST OF APKS ############################################################ apk_list = list_files(source_directory, '*.apk') print '[*] Number of APKs:', len(apk_list) ############################################################ ############################################################ # ANALYSING APKS ############################################################ database = collections.OrderedDict() apk_analysis_list = [] ROW = 0 print "ANALYSING APKS..." for analyze_apk in tqdm(apk_list): # Getting the name of the folder that contains all apks and folders with apks base_folder = source_directory.split("/")[-1] apk_filename = join_dir(base_folder, analyze_apk.replace(source_directory, '')) apk_filename = apk_filename.replace("//", "/") apk_name_no_extensions = "".join(apk_filename.split("/")[-1].split(".")[:-1]) try: androguard_apk_object = apk.APK(analyze_apk) except Exception: print "ERROR in APK: " + apk_name_no_extensions opcodes_analysis_dict = collections.OrderedDict() # Opcodes opcodes_analysis_dict['ROW'] = ROW ROW += 1 opcodes_analysis_dict['APK_name'] = apk_name_no_extensions opcodes_analysis_dict.update(opcodes_analysis(androguard_apk_object)) opcodes_analysis_dict['STR_Opcodes'] = get_str_opcodes(androguard_apk_object) apk_analysis_list.append(opcodes_analysis_dict) ###EXPORT_TO_CSV##### if export_csv is not None: set_fields = set() export_csv = output_folder + "/" + export_csv print "EXPORTING TO CSV:" print "RESOLVING FIELDS..." for row in tqdm(apk_analysis_list): apk_dict = row set_fields.update(apk_dict.keys()) with open(export_csv, 'w') as f: list_fields = list(set_fields) fieldnames = list_fields fieldnames = sorted(fieldnames) writer = csv.DictWriter(f, fieldnames=fieldnames) writer.writeheader() print "WRITING CSV ROWS..." for w in tqdm(apk_analysis_list): writer.writerow(w)
def analyse_virustotal(nameApk, vt_api_key, vt_analysis_output_folder=None, output_samples_folder=None, with_color=True): vt_api_key = '890b68a41eb0e7c029641a2bc147b42ce93df2ed85f98bca696385143dc7904e' """ Analyses a set of APK files with the VirusTotal service Parameters ---------- :param source_directory: Folder containing apk files :param vt_analysis_output_folder: Folder where VirusTotal reports are saved :param output_samples_folder: Folder where apk files are saved after analysed with VirusTotal :return: """ if len(vt_api_key) != 64: print ('ERROR! - invalid vt_key file. Please, provide a virustotal key!') sys.exit(0) global VT_KEY VT_KEY = vt_api_key if vt_analysis_output_folder is None: vt_analysis_output_folder = join_dir(TEMP, VT_ANALYSIS_DIRECTORY_NAME) reports_not_received = 0 # TODO It is necessary to control when the directory could not be created (for instance if the folder is going to be # TODO created in a non existing directory if not os.path.exists(vt_analysis_output_folder): os.makedirs(vt_analysis_output_folder) if output_samples_folder is not None: if not os.path.exists(output_samples_folder): os.makedirs(output_samples_folder) # apks_found = [f for f in listdir(source_directory) if isfile(join(source_directory, f)) # and f.endswith(".apk")] # apk_path = os.path.join(TEMP, nameApk) count_positives = 0 if isfile(join(TEMP, nameApk.replace(".apk", ".json"))): print_message("APK WITH JSON. CONTINUE...", with_color, 'green') apk_path = os.path.join(TEMP, nameApk) hash_sha = sha256(apk_path) report = "" while report == "": report = get_report_hash(hash_sha) if report == "": print_message("No report received. Waiting...", with_color, 'red') time.sleep(1) response_dict = simplejson.loads(report) response_code = response_dict.get("response_code") response_code =0 if response_code == 1: # Report generated positives = response_dict.get("positives") file_json = open(apk_path.replace(".apk", "") + ".json", "w") file_json.write(report) if positives > 0: count_positives += 1 shutil.move(apk_path.replace(".apk", "") + ".json", join_dir(vt_analysis_output_folder, nameApk.replace(".apk", "") + ".json")) if output_samples_folder is not None: shutil.move(join_dir(TEMP, nameApk), join_dir(output_samples_folder, nameApk)) if response_code == 0: reports_not_received += 1 params = {'apikey': VT_KEY} files = {'file': ("apk", open(apk_path, 'rb'))} print("Uploading APK: " + nameApk) print("File not analysed yet. Uploading file...") try: response = requests.post('https://www.virustotal.com/vtapi/v2/file/scan', files=files, params=params) except requests.exceptions.ConnectionError: print_message("Connection error", with_color, 'red') print(str(response)) try: response.json() except JSONDecodeError: print_message("JSONDecodeError", with_color, 'red') print_message("SENT TO VIRUS-TOTAL", with_color, 'blue') if reports_not_received > 0: print("WARNING! " + str(reports_not_received) + " apks does not have yet a VT analysis. Please" \ ", execute again this script after a while") else: print_message("SUCCESS!!", with_color, 'green') print (" All reports have been saved in the VT_ANALYSIS folder. APKS are in SAMPLES folder.")
def execute_andro_py_tool_steps(source_folder, step_filter_apks, step_filter_bw_mw, step_run_flowdroid, step_run_droidbox, save_single_analysis, perform_nocleanup, package_index, class_index, system_commands_index, export_mongodb, exportCSV, with_color, vt_threshold, virus_total_api_key=None): """ This method is used to launch all the different modules implemented in AndroPyTool. It generates a folder tree containing all generated reports and features files Parameters ---------- :param source_folder: Source directory containing apks to extract features and perform analysis :param step_filter_apks: If apks are filtered between valid or invalid apks using Androguard :param virus_total_api_Key: VirusTotal service API key :param step_filter_bw_mw: If apks are filtered between benignware and malware according to the Virustotal report :param step_run_flowdroid: If flowdroid is executed with all the samples :param step_run_droidbox: If droidbox is executed with all the samples :param save_single_analysis: If an individual features report is generated for each sample :param perform_nocleanup: If unnecesary files generated are removed :param package_index: File describing Android API packages :param class_index: File describing Android API classes :param system_commands_index: File describing Android system commands """ ################################################ # STEP 1 - Filter valid apks ################################################ if step_filter_apks: print_message("\n\n>>>> AndroPyTool -- STEP 1: Filtering apks\n", with_color, "green") # print messagecolored("\n\n>>>> AndroPyTool -- STEP 1: Filtering apks\n", "green") filter_valid_apks( source_directory=source_folder, valid_apks_directory=join_dir(source_folder, APKS_DIRECTORY), invalid_apks_directory=join_dir(source_folder, INVALID_APKS_DIRECTORY), with_color=with_color) sleep(1) else: # If this step is not executed, all samples must be moved to the /samples/ directory if not os.path.exists(join_dir(source_folder, APKS_DIRECTORY)): os.makedirs(join_dir(source_folder, APKS_DIRECTORY)) list_apks = [f for f in listdir(source_folder) if f.endswith(".apk")] for apk in list_apks: shutil.move(join_dir(source_folder, apk), join_dir(source_folder, APKS_DIRECTORY, apk)) ################################################ # STEP 2 - Analyse with VirusTotal ################################################ if virus_total_api_key is not None: print_message( "\n\n>>>> AndroPyTool -- STEP 2: Analysing with VirusTotal\n", with_color, "green") analyse_virustotal( source_directory=join_dir(source_folder, APKS_DIRECTORY), vt_analysis_output_folder=join_dir(source_folder, VIRUSTOTAL_FOLDER), output_samples_folder=join_dir(source_folder, APKS_DIRECTORY), with_color=with_color, vt_api_key=virus_total_api_key) sleep(1) ################################################ # STEP 3 - Filtering BW & MW ################################################ if step_filter_bw_mw: print_message("\n\n>>>> AndroPyTool -- STEP 3: Filtering BW and MW\n", with_color, "green") filter_apks(source_directory=join_dir(source_folder, APKS_DIRECTORY), vt_analysis_directory=join_dir(source_folder, VIRUSTOTAL_FOLDER), bw_directory_name=join_dir(source_folder, BW_DIRECTORY), mw_directory_name=join_dir(source_folder, MW_DIRECTORY), threshold=vt_threshold) sleep(1) # NOW APKS ARE CONTAINED IN DIFFERENT SUBFOLDERS ################################################ # STEP 4 - Launch FlowDroid ################################################ if step_run_flowdroid: print_message("\n\n>>>> AndroPyTool -- STEP 4: Launching FlowDroid\n", with_color, "green") run_flowdroid(source_directory=join_dir(source_folder, APKS_DIRECTORY), output_folder=join_dir(source_folder, FLOWDROID_RESULTS_FOLDER), with_color=with_color) sleep(1) ################################################ # STEP 5 - Process FlowDroid outputs ################################################ if step_run_flowdroid: print_message( "\n\n>>>> AndroPyTool -- STEP 5: Processing FlowDroid outputs\n", with_color, "green") process_flowdroid_outputs( flowdroid_analyses_folder=join_dir(source_folder, FLOWDROID_RESULTS_FOLDER), output_folder_individual_csv=join_dir(source_folder, FLOWDROID_PROCESSED_FOLDER), output_csv_file=join_dir(source_folder, FLOWDROID_PROCESSED_FOLDER, OUTPUT_GLOBAL_FILE_FLOWDROID), with_color=with_color) sleep(1) ################################################ # STEP 6 - Execute DroidBox ################################################ if step_run_droidbox: print_message("\n\n>>>> AndroPyTool -- STEP 6: Execute DroidBox\n", with_color, "green") analyze_with_droidbox(apks_folders=join_dir(source_folder, APKS_DIRECTORY), duration=DROIDBOX_ANALYSIS_DURATION, output_directory=join_dir( source_folder, DROIDBOX_RESULTS_FOLDER), gui=DROIDBOX_GUI_MODE) parse_droidbox_outputs( source_folder=join_dir(source_folder, DROIDBOX_RESULTS_FOLDER), output_droidbox=join_dir(source_folder, DYNAMIC_ANALYSIS_FOLDER, DYNAMIC_DROIDBOX_ANALYSIS), output_strace=join_dir(source_folder, DYNAMIC_ANALYSIS_FOLDER, DYNAMIC_STRACE_ANALYSIS), output_other=join_dir(source_folder, DROIDBOX_RESULTS_FOLDER)) #selectDynamic() # DroidBox changes the working directory, so let's set again the original directory: os.chdir(CURRENT_DIRECTORY) ################################################ # STEP 7 - Features extraction ################################################ print_message( "\n\n>>>> AndroPyTool -- STEP 7: Execute features extraction\n", with_color, "green") features_extractor( apks_directory=join_dir(source_folder, APKS_DIRECTORY), single_analysis=save_single_analysis, dynamic_analysis_folder=join_dir(source_folder, DYNAMIC_ANALYSIS_FOLDER), virus_total_reports_folder=join_dir(source_folder, VIRUSTOTAL_FOLDER), flowdroid_folder=join_dir(source_folder, FLOWDROID_PROCESSED_FOLDER), output_folder=join_dir(source_folder, FEATURES_FILES), noclean_up=perform_nocleanup, package_index_file=package_index, classes_index_file=class_index, system_commands_file=system_commands_index, label=None, avclass=True, export_mongodb=export_mongodb, export_csv=exportCSV) if step_run_flowdroid or step_run_droidbox: selectStatic(source_folder=source_folder, features_file=FEATURES_FILES, flowdroid_resuilts_folder=FLOWDROID_RESULTS_FOLDER, flowdroid_processed_folder=FLOWDROID_PROCESSED_FOLDER, select_features_static=SELECT_FEATURES_STATIC)
# 'muctr(adm)', # 'rea(adm)', # 'misis(adm)', # 'vavt(adm)', # 'mgppu(adm)', # 'msu(adm)', # 'rggru(adm)', # 'rgung(adm)', # 'miigaik(adm)', ] for uni in allowed_uni: # os.listdir(uni_dir): print(uni) pth = join_dir(uni_dir, uni) if os.path.isdir(pth) and not uni.startswith(".") and uni != "tools": TO_RENDER_ITEM = {} info = json.loads(open(join_dir(pth, "info.json"), "r").read()) print(info) TO_RENDER_ITEM = {**info} try: disciplines = json.loads( open(join_dir(pth, "disciplines.json"), "r").read()) print("Файл дисциплин найден") except FileNotFoundError: print("Файл дисциплин не найден") disciplines = {}