def write_analyze_task_results_to_fs(storage, group_result, chunked = False): ''' Get successful task results and write them to disk if enabled. Parameters ---------- storage: RedundantStorage group_result : GroupResult chunked : bool, optional (default is False) If work has been divided into chunks. Returns ------- int Number of successful tasks ''' if group_result is not None: results = get_successful_analyze_task_results(group_result, chunked = chunked) # no result writing to disk wanted if not storage.fs_storage_disabled(): clilog.info("Fetching all analysis results for storage ...") if results: try: storage.fetch_results_from_mongodb(results) except DatabaseLoadException as e: log.exception(e) return len(results) return 0
def action_delete(self, parser, hashes, package_names, tags, yes): ''' Delete from the database specified by `parser` args ''' args = self.args whole_db = args.all db = args.delete if whole_db: cli_check_n_exec(prompt_prefix="Do you really want to delete the whole database?", func=lambda:True, circumvent_check=not whole_db) # place after run check! circumvent_check = yes or whole_db # import db if db == SUBCOMMAND_DELETE_IMPORT: cli_check_n_exec(androlyze.action_delete_apks_import, circumvent_check=circumvent_check, args=(self.storage, args.delete_apk, hashes, package_names, tags, whole_db)) # res db elif db == SUBCOMMAND_DELETE_RESULT: kwargs = CLIUtil.get_result_db_filter_args_from_argparser(args) if not kwargs and not whole_db: raise CLIError('You did not supply any filter argument!\nIf you want to delete the whole db, use the --all switch!', parser) kwargs["whole_db"] = whole_db # delete from res db n = cli_check_n_exec(androlyze.action_delete_apks_res, circumvent_check = circumvent_check, args=(self.storage,), kwargs=kwargs) if not whole_db: clilog.info("Deleted %s file/document(s) !" % n)
def import_apks(apk_paths): apk_importer = ApkImporter(apk_paths, storage) for apk in apk_importer.import_apks(copy_apk = copy_apk, copy_to_mongodb = copy_to_mongodb, update = update, tag = tag): clilog.info("imported %s", apk.short_description()) # use shared memory counter if given if cnt_imported_apks is not None: with cnt_imported_apks.get_lock(): cnt_imported_apks.value += 1
def rewrite_configs(): clilog.info("using environment variables for service discovery (kubernetes): %s", ', '.join(envs_kubernetes)) clilog.info("using environment variables for service discovery (docker): %s", ', '.join(envs_docker)) clilog.info("rabbitmq: host: %s, port: %s", *get_rabbitmq_conn_info()) clilog.info("mongodb: host: %s, port: %s", *get_mongodb_conn_info()) def rewrite_config_key(key, value, config_path = CONFIG_PATH): key = key.strip() value = value.strip() return run(r""" sed -i.bak "s/\(%s[ ]*=[ ]*\).*/\1%s/g" %s """ % (key, value, config_path)) def rewrite_amqp(user = r"\2", pw = r"\3", host = r"\4", port = r"\5", vhost = r"\6", config_path = CONFIG_PATH): any( run(r""" sed -i.bak "s/\(%s[ ]*=[ ]*amqp:[/][/]\)\(.*\)[:]\(.*\)[@]\(.*\)[:]\(.*\)[/]\(.*\)/\1%s:%s@%s:%s\/%s/g" %s """ % (key, user, pw, host, port, vhost, config_path)) for key in [KEY_BROKER_URL, KEY_BROKER_BACKEND_URL] ) mongodb_ip, mongodb_port = get_mongodb_conn_info() if mongodb_ip is not None: rewrite_config_key(KEY_RESULT_DB_IP, mongodb_ip) if mongodb_port is not None: rewrite_config_key(KEY_RESULT_DB_PORT, mongodb_port) rabbitmq_ip, rabbitmq_port = get_rabbitmq_conn_info() rewrite_amqp(host = rabbitmq_ip, port = rabbitmq_port)
def print_query_result_db(res, distict_generator = False, count = False, raw = False, interactive = True): ''' Print the results from the result db (mongodb). Parameters ---------- count : bool, optional (default is False) Only print count, not results distict_generator : bool, optional (default is False) Res is generator<object> created from the distinct(...) method of mongodb. If generaor<dict>, convert each dict to json. Otherwise just print. raw : bool, optional (default is False) Print raw data from gridfs Otherwise print json. res : gridfs.grid_file.GridOutCursor or generator<object> or pymongo.cursor.Cursor First if non_document and non_document_raw. Second if disctinct values wanted. Thirst otherwise. The results to print interactive: bool, optional (default is True) Iterate interactive through the result cursor ''' from pymongo.errors import PyMongoError try: # print count if count: cnt = 0 # res is list if distict_generator: cnt = len(res) # res is cursor else: cnt = res.count() clilog.info(cnt) else: if distict_generator: for r in sorted(res): if isinstance(r, dict): r = dict2json(res) clilog.info(r) else: for i, res in enumerate(res, 1): # interactive result view if i != 1 and interactive and raw_input('Press any key to view next result or abort with "no" !)').lower() == 'no': break sys.stderr.write('/* {} */\n'.format(i)) # print raw data if raw: # gridfs.grid_file.GridOut for gridout_obj in res: clilog.info(gridout_obj) # print json else: clilog.info(dict2json(res)) except PyMongoError as e: log.exception(e)
def action_query(self, hashes, package_names, tags, yes): ''' Query the database ''' args = self.args parser = self.parser # check on which database to query # get from argparser query_dst = args.query_dst if query_dst == SUBCOMMAND_QUERY_IMPORT: clilog.info('\n'.join(androlyze.action_query_import_db(self.storage, args.query_import_cmd, hashes, package_names, tags))) elif query_dst == SUBCOMMAND_QUERY_RESULT: kwargs = CLIUtil.get_result_db_filter_args_from_argparser(args) if args.show_id: kwargs["remove_id_field"] = not args.show_id distinct_key = None if args.distinct is not None: distinct_key = args.distinct # get distinct values for script name elif args.list_ran_scripts: distinct_key = MongoUtil.get_attr_str(RESOBJ_SCRIPT_META, RESOBJ_SCRIPT_META_NAME, args.non_document) no_args_supplied = len(kwargs) == 0 and not args.latest and not args.count and distinct_key is None whole_db = args.all raw = args.raw # update with basic result query options kwargs.update(CLIUtil.get_basic_result_query_options(args)) kwargs.update(dict(include_fields=args.include_fields, exclude_fields=args.exclude_fields, non_document_raw=raw, distinct_key = distinct_key)) if no_args_supplied and not whole_db: raise CLIError('Not enough arguments supplied!\nIf you want to dump the whole db, use the --all switch!', parser) res = cli_check_n_exec( androlyze.action_query_result_db, prompt_prefix='Will print whole results db!', circumvent_check=not no_args_supplied or yes, args=(self.storage, CLIUtil.get_checks_from_cli(args)), kwargs=kwargs) # log results print_query_result_db(res, distict_generator=distinct_key is not None, count=args.count, raw=raw, interactive = not args.not_interactive)
def __init__(self, *args, **kwargs): ''' See :py:method`.BaseAnalyzer.__init__` for details. Parameters ---------- serialize_apks : bool, optional (default is True) If true, serialize .apk . Otherwise id (hash) of the apk will be send and fetched by the worker from the result db. Be sure to import the apks to the result db first! ''' serialize_apks = kwargs.get("serialize_apks", True) super(DistributedAnalyzer, self).__init__(*args, **kwargs) # list(apk_path, _apk, is_apk) self.__apks = list(AnalyzeUtil.apk_gen(self.apks_or_paths)) # result group self.group_result = None # serialize .apk data self.__serialize_apks = serialize_apks if serialize_apks: clilog.info("Will serialize .apk data!") else: clilog.info("Will send id of apks!") self.analyze_stats_view = None # stats view for cli self.analyze_stats_view = AnalysisStatsView(self._cnt_apks) self.analyze_stats_view.daemon = True # the `TaskCollection` for the analysis tasks self.task_collection = TaskCollection(self._cnt_apks) # register celery signals self.register_signals() self.lock = Lock()
def action_delete_apks_import(storage, delete_apk = False, hashes = None, package_names = None, tags = None, select_whole_db = False): ''' Delete from the import storage (database and/or filesys) Parameters ---------- storage : RedundantStorage The store to use. delete_apk : boolean, optional (default is False) hashes : iterable<str>, optional (default is None) package_names : iterable<str>, optional (default is None) tags : iterable<str>, optional (default is None) select_whole_db : boolean, optional (default is False) If true, select the whole import database! Be careful! This means we do not take `hashes`, `package_names` and `tags` into acccount! Raises ------ ValueError ''' try: apks = None if select_whole_db: apks = action_query_import_db(storage, COMMAND_QUERY_APKS, hashes, package_names, tags) # If don't delete whole database!!!!! elif len(Util.filter_not_none((hashes, package_names, tags))) > 0: apks = action_query_import_db(storage, COMMAND_QUERY_APKS, hashes, package_names, tags) else: raise ValueError('''Neither hashes nor package names nor tags specified! If you wan't do use the whole database, set `select_whole_db` to true. ''') # use list, otherwise we have duplicates due to the generator for apk in list(apks): if delete_apk: clilog.info("Will delete from database and file system: \n%s ", apk.detailed_description()) else: clilog.info("Will delete %s from database: %s ", apk.short_description(), storage.import_db_storage) storage.delete_entry_for_apk(apk, delete_apk) except StorageException as e: log.warn(e)
def rewrite_configs(): clilog.info( "using environment variables for service discovery (kubernetes): %s", ', '.join(envs_kubernetes)) clilog.info( "using environment variables for service discovery (docker): %s", ', '.join(envs_docker)) clilog.info("rabbitmq: host: %s, port: %s", *get_rabbitmq_conn_info()) clilog.info("mongodb: host: %s, port: %s", *get_mongodb_conn_info()) def rewrite_config_key(key, value, config_path=CONFIG_PATH): key = key.strip() value = value.strip() return run(r""" sed -i.bak "s/\(%s[ ]*=[ ]*\).*/\1%s/g" %s """ % (key, value, config_path)) def rewrite_amqp(user=r"\2", pw=r"\3", host=r"\4", port=r"\5", vhost=r"\6", config_path=CONFIG_PATH): any( run(r""" sed -i.bak "s/\(%s[ ]*=[ ]*amqp:[/][/]\)\(.*\)[:]\(.*\)[@]\(.*\)[:]\(.*\)[/]\(.*\)/\1%s:%s@%s:%s\/%s/g" %s """ % (key, user, pw, host, port, vhost, config_path)) for key in [KEY_BROKER_URL, KEY_BROKER_BACKEND_URL]) mongodb_ip, mongodb_port = get_mongodb_conn_info() if mongodb_ip is not None: rewrite_config_key(KEY_RESULT_DB_IP, mongodb_ip) if mongodb_port is not None: rewrite_config_key(KEY_RESULT_DB_PORT, mongodb_port) rabbitmq_ip, rabbitmq_port = get_rabbitmq_conn_info() rewrite_amqp(host=rabbitmq_ip, port=rabbitmq_port)
def _analyze(self): ''' See doc of :py:method:`.BaseAnalyzer.analyze`. ''' # try to get registered workers # it network fails at this point -> stop analysis try: clilog.info(CeleryUtil.get_workers_and_check_network()) except NetworkError as e: log.critical(e) return 0 # storage objects storage = self.storage clilog.info("Number of apks to analyze: %d", self._cnt_apks) try: # get analyze task analyze_task = tasks[CeleryConstants.get_analyze_task_name()] # create storage storage.create_or_open_sub_storages() # send tasks start = time() # apk generator over .apk or apk hashes apk_gen = AnalyzeUtil.apk_id_or_raw_data_gen(self.apks, force_raw_data = self.serialize_apks) clilog.info("Task publishing progress:") # send and serialize .apks # if analysis via path serialize them! if self.serialize_apks: log.info("sending .apks to message broker") self.group_result = group_result = GroupResult(results = []) for args in self.send_apk_args_generator(apk_gen): task = analyze_task.delay(*args) group_result.add(task) # send only apk id and let fetch via mongodb else: log.info("sending ids of apks") task_group = group((analyze_task.s(*args) for args in self.send_id_args_generator(apk_gen))) # publish tasks self.group_result = task_group() log.info("sending took %ss", (time() - start)) sys.stderr.write("\nAnalysis progress:\n") # start showing analysis progress self.analyze_stats_view.start() # wait for results log.debug("joining on ResultGroup ... ") # setup callback callback_func = self.get_callback_func(self.success_handler, self.error_handler) CeleryUtil.join_native(self.group_result, propagate = False, callback = callback_func) clilog.info("\nanalysis done ... ") log.info("distributed analysis took %ss", (time() - start)) return self.stop_analysis_view() except DatabaseOpenError as e: log.critical(e) return 0 except (KeyboardInterrupt, Exception) as e: if not isinstance(e, KeyboardInterrupt): log.exception(e) log.warn("Interrupting distributed analysis ... Please wait a moment!") log.warn("revoking tasks on all workers ...") if celerysettings.CELERY_TASK_REVOCATION_ENABLED: # revoke tasks if self.group_result is None: # revoke via task ids log.debug("revoking while publishing tasks ...") self.task_collection.revoke_all(terminate = True, signal = 'SIGKILL') else: # revoke via GroupResult if yet available/created # first available after all tasks have been send self.group_result.revoke(terminate = True, signal = 'SIGKILL') log.warn("revoked tasks and killed workers ...") #return number of analyzed apks return self.stop_analysis_view()
def action_import_apks(storage, apk_paths, copy_apk = False, copy_to_mongodb = False, update = False, tag = None, # shared memory cnt_imported_apks = None, total_apk_count = None, import_finished = None, # concurrent settings concurrency = None ): ''' Import the apks from the `apk_paths` and create the file system structure where the results will be kept, specified by `storage`. Parameters ---------- storage : RedundantStorage The store to use. apk_paths : iterable<str> The apk files and/or directories. copy_apk : bool Import the apk file to the `import_dir` (copy it). copy_to_mongodb : bool, optional (default is False) Also import into MongoDB. Useful for the distributed analysis. update : bool Update apks that have already been imported. tag : str, optional (default is None) Some tag cnt_imported_apks : multiprocessing.Value<int>, optional (default is None) If given, use for progress updating. total_apk_count : multiprocessing.Value<int>, optional (default is None) If given, use for total count of apks. import_finished : multiprocessing.Value<byte>, optional (default is None) If given, use to signal that import has been completed. concurrency : int, optional (default is number of cpus) Number of processes to use for the import. ''' from androlyze.loader.ApkImporter import ApkImporter # get single paths to apks so we get the correct total count of apks clilog.info("looking for apks in given paths ... ") apk_paths = ApkImporter.get_apks_from_list_or_dir(apk_paths) if total_apk_count is not None: # may be time consuming for recursive lookup apk_paths, total_apk_count.value = Util.count_iterable_n_clone(apk_paths) # create count if not given if cnt_imported_apks is None: cnt_imported_apks = Value('i', 0, lock = RLock()) # set concurrency if concurrency is None: concurrency = cpu_count() log.warn("Using %d processes", concurrency) clilog.info("Storage dir is %s" % storage.fs_storage.store_root_dir) if copy_apk: clilog.info("Copying APKs to %s ..." % storage.fs_storage.store_root_dir) def import_apks(apk_paths): apk_importer = ApkImporter(apk_paths, storage) for apk in apk_importer.import_apks(copy_apk = copy_apk, copy_to_mongodb = copy_to_mongodb, update = update, tag = tag): clilog.info("imported %s", apk.short_description()) # use shared memory counter if given if cnt_imported_apks is not None: with cnt_imported_apks.get_lock(): cnt_imported_apks.value += 1 pool = [] # don't convert generator to list if only 1 process wanted apk_paths = [apk_paths] if concurrency == 1 else Util.split_n_uniform_distri(list(apk_paths), concurrency) # start parallel import # multiprocessing's pool causes pickle errors for i in range(concurrency): p = Process(target = import_apks, args = (apk_paths[i], )) log.debug("starting process %s", p) pool.append(p) p.start() for it in pool: log.debug("joined on process %s", p) it.join() apks_imported = cnt_imported_apks.value != 0 # show some message that no APK has been imported if not apks_imported: log.warn("No .apk file has been imported! This means no .apk file has been found or they already have been imported.") else: clilog.info("done") # because not all apks may be importable, we cannot use we count for signal that the import is done if import_finished is not None: import_finished.value = 1 clilog.info("Imported %d apks", cnt_imported_apks.value)
def create_analyzer(storage, script_list, apks_or_paths = None, mode = ANALYZE_MODE_PARALLEL, concurrency = None, serialize_apks = True ): ''' Create the analyzer only. Parameters ---------- storage : RedundantStorage The store to use. script_list : list<str> List of paths to scripts (complete filename with extension). apks_or_paths: list<str> or list<Apk>, optional (default is None) List of `Apk` or paths to the apks which shall be analyzed with the given scripts If you analyze from paths the `import_date` is not set! mode : str, optional (default is `ANALYZE_MODE_PARALLEL`) Do an parallel analysis by default. Choose between : , , . concurrency : int, optional (default is number of cpu cores) Number of workers to spawn. serialize_apks : bool, optional (default is True) If true, serialize .apk . Otherwise id (hash) of the apk will be send and fetched by the worker from the result db. Be sure to import the apks to the result db first! ''' from androlyze.model.script import ScriptUtil from androlyze.analyze.exception import AndroScriptError try: # list<type<AndroScript>> androscript_list = ScriptUtil.import_scripts(script_list) instantiated_scripts = sorted(ScriptUtil.instantiate_scripts(androscript_list, script_paths = script_list)) if len(instantiated_scripts) == 0: log.warn("No scripts supplied!") return # get hashes for `AndroScript`s so that we can set the hash directly next time we instantiate the script script_hashes = [s.hash for s in instantiated_scripts] min_script_needs = ScriptUtil.get_minimum_script_options(instantiated_scripts) # log infos about scripts clilog.info('Loaded scripts:\n%s', '\n'.join((str(s) for s in instantiated_scripts))) log.info(ScriptUtil.androscript_options_descr(instantiated_scripts)) if apks_or_paths: def create_analyzer(): analyzer = None # argument for BaseAnalyzer args = storage, androscript_list, script_hashes, min_script_needs, apks_or_paths log.info("Mode: %s", mode) # normal analyzer if mode == ANALYZE_MODE_NON_PARALLEL: from androlyze.analyze.Analyzer import Analyzer analyzer = Analyzer(*args) # use parallel analyzer elif mode == ANALYZE_MODE_PARALLEL: from androlyze.analyze.parallel.ParallelAnalyzer import ParallelAnalyzer analyzer = ParallelAnalyzer(*args, concurrency = concurrency) # use distributed one elif mode == ANALYZE_MODE_DISTRIBUTED: from androlyze.analyze.distributed.DistributedAnalyzer import DistributedAnalyzer analyzer = DistributedAnalyzer(*args, concurrency = concurrency, serialize_apks = serialize_apks) return analyzer return create_analyzer() except ApkImportError as e: log.warn(e) except IOError as e: log.warn(AndroScriptError(e.filename, caused_by = e)) sys.exit(1) except ImportError as e: log.exception(e) except Exception as e: log.exception(e)
def print_query_result_db(res, distict_generator=False, count=False, raw=False, interactive=True): ''' Print the results from the result db (mongodb). Parameters ---------- count : bool, optional (default is False) Only print count, not results distict_generator : bool, optional (default is False) Res is generator<object> created from the distinct(...) method of mongodb. If generaor<dict>, convert each dict to json. Otherwise just print. raw : bool, optional (default is False) Print raw data from gridfs Otherwise print json. res : gridfs.grid_file.GridOutCursor or generator<object> or pymongo.cursor.Cursor First if non_document and non_document_raw. Second if disctinct values wanted. Thirst otherwise. The results to print interactive: bool, optional (default is True) Iterate interactive through the result cursor ''' from pymongo.errors import PyMongoError try: # print count if count: cnt = 0 # res is list if distict_generator: cnt = len(res) # res is cursor else: cnt = res.count() clilog.info(cnt) else: if distict_generator: for r in sorted(res): if isinstance(r, dict): r = dict2json(res) clilog.info(r) else: for i, res in enumerate(res, 1): # interactive result view if i != 1 and interactive and raw_input( 'Press any key to view next result or abort with "no" !)' ).lower() == 'no': break sys.stderr.write('/* {} */\n'.format(i)) # print raw data if raw: # gridfs.grid_file.GridOut for gridout_obj in res: clilog.info(gridout_obj) # print json else: clilog.info(dict2json(res)) except PyMongoError as e: log.exception(e)
def _analyze(self): ''' See doc of :py:method:`.BaseAnalyzer.analyze`. ''' # try to get registered workers # it network fails at this point -> stop analysis try: clilog.info(CeleryUtil.get_workers_and_check_network()) except NetworkError as e: log.critical(e) return 0 # storage objects storage = self.storage clilog.info("Number of apks to analyze: %d", self._cnt_apks) try: # get analyze task analyze_task = tasks[CeleryConstants.get_analyze_task_name()] # create storage storage.create_or_open_sub_storages() # send tasks start = time() # apk generator over .apk or apk hashes apk_gen = AnalyzeUtil.apk_id_or_raw_data_gen( self.apks, force_raw_data=self.serialize_apks) clilog.info("Task publishing progress:") # send and serialize .apks # if analysis via path serialize them! if self.serialize_apks: log.info("sending .apks to message broker") self.group_result = group_result = GroupResult(results=[]) for args in self.send_apk_args_generator(apk_gen): task = analyze_task.delay(*args) group_result.add(task) # send only apk id and let fetch via mongodb else: log.info("sending ids of apks") task_group = group( (analyze_task.s(*args) for args in self.send_id_args_generator(apk_gen))) # publish tasks self.group_result = task_group() log.info("sending took %ss", (time() - start)) sys.stderr.write("\nAnalysis progress:\n") # start showing analysis progress self.analyze_stats_view.start() # wait for results log.debug("joining on ResultGroup ... ") # setup callback callback_func = self.get_callback_func(self.success_handler, self.error_handler) CeleryUtil.join_native(self.group_result, propagate=False, callback=callback_func) clilog.info("\nanalysis done ... ") log.info("distributed analysis took %ss", (time() - start)) return self.stop_analysis_view() except DatabaseOpenError as e: log.critical(e) return 0 except (KeyboardInterrupt, Exception) as e: if not isinstance(e, KeyboardInterrupt): log.exception(e) log.warn( "Interrupting distributed analysis ... Please wait a moment!") log.warn("revoking tasks on all workers ...") if celerysettings.CELERY_TASK_REVOCATION_ENABLED: # revoke tasks if self.group_result is None: # revoke via task ids log.debug("revoking while publishing tasks ...") self.task_collection.revoke_all(terminate=True, signal='SIGKILL') else: # revoke via GroupResult if yet available/created # first available after all tasks have been send self.group_result.revoke(terminate=True, signal='SIGKILL') log.warn("revoked tasks and killed workers ...") #return number of analyzed apks return self.stop_analysis_view()
def run(command): clilog.info(">>> %s", command) return os.system(command)
def evaluate(self, storage, *args, **kwargs): self.storage = storage clilog.info("evaluating '%s' version: %s", self.script_name, self.version) return self._evaluate(storage, *args, **kwargs)
def _analyze(self, test = False): ''' Start the analysis and store the results in the predefined place. Parameters ---------- test : bool, optional (default is False) Use for testing. Will not store any result ! Returns ------- int Number of analyzed apks list<ResultObject> List of the results (only if `test`) ''' androscripts = self.script_list # collect results for test mode test_results = [] # get minimum options for all scripts -> boost performance # use only as much options as needed! # run over apks for apk_path, _apk, _ in apk_gen(self.apks_or_paths): eandro_apk = open_apk(apk_path, apk=_apk) # if is None error happened and has been logged # otherwise proceed with analysis if eandro_apk is not None: # tuple<FastApk, AndroScript> res = AnalyzeUtil.analyze_apk(eandro_apk, androscripts, self.min_script_needs, reset_scripts = True) if res: # unpack results fastapk, script_results = res # store results if not in test mode if not test: for script in script_results: try: storage_result = AnalyzeUtil.store_script_res(self.storage, script, fastapk) # keep storage results self.add_storage_result(storage_result) except StorageException as e: log.warn(e) else: # deliver result object in testing mode test_results += [s.res for s in script_results] clilog.info("analyzed %s", fastapk.short_description()) # increment counter, no lock needed, nobody else is writing to this value self.cnt_analyzed_apks.value += 1 if test: return test_results return self.cnt_analyzed_apks.value
def _analyze(self, test=False): ''' Start the analysis and store the results in the predefined place. Parameters ---------- test : bool, optional (default is False) Use for testing. Will not store any result ! Returns ------- int Number of analyzed apks list<ResultObject> List of the results (only if `test`) ''' androscripts = self.script_list # collect results for test mode test_results = [] # get minimum options for all scripts -> boost performance # use only as much options as needed! # run over apks for apk_path, _apk, _ in apk_gen(self.apks_or_paths): eandro_apk = open_apk(apk_path, apk=_apk) # if is None error happened and has been logged # otherwise proceed with analysis if eandro_apk is not None: # tuple<FastApk, AndroScript> res = AnalyzeUtil.analyze_apk(eandro_apk, androscripts, self.min_script_needs, reset_scripts=True) if res: # unpack results fastapk, script_results = res # store results if not in test mode if not test: for script in script_results: try: storage_result = AnalyzeUtil.store_script_res( self.storage, script, fastapk) # keep storage results self.add_storage_result(storage_result) except StorageException as e: log.warn(e) else: # deliver result object in testing mode test_results += [s.res for s in script_results] clilog.info("analyzed %s", fastapk.short_description()) # increment counter, no lock needed, nobody else is writing to this value self.cnt_analyzed_apks.value += 1 if test: return test_results return self.cnt_analyzed_apks.value
def run_action(self, cmd): ''' Run an action specified by `cmd`(see COMMAND_ prefixed variables) ''' parser = self.parser args = self.args # check which command has been used if cmd is None: # no command specified through program name -> get it from argparser cmd = args.command if cmd in COMMANDS_ALL: hashes, package_names, tags = CLIUtil.get_filter_options_from_cli(args) yes = args.yes if cmd == COMMAND_QUERY: self.action_query(hashes, package_names, tags, yes) # dblyze -> do the analysis results evaluation elif cmd == COMMAND_EVAL: dblyze_scripts = ScriptUtil.import_scripts(args.scripts, clazz_name = "Eval") for dblyze_script in dblyze_scripts: dblyze_script().evaluate(self.storage) # sync from result db to file sys elif cmd == COMMAND_SYNC: total_entries = androlyze.action_sync_fs(self.storage, lambda _ : False) CLIUtil.cli_check_n_exec(androlyze.action_sync_fs, prompt_prefix = "Will download %d entries from result database!" % total_entries, circumvent_check = args.yes, args = (self.storage, lambda _ : True) ) else: # print welcome message clilog.info("Welcome to %s!\n" % PROJECT_NAME) # import command if cmd == COMMAND_IMPORT: apks_or_paths, _ = self.get_apks_or_paths_from_cli() tag = args.tag copy2disk, copy2db, update, concurrency = args.copy_disk, args.copy_db, args.update, args.concurrency if not update: log.warn('''--update not supplied. No update of already present apks in database will be done!''') androlyze.action_import_apks(self.storage, apks_or_paths, copy2disk, copy2db, update, tag, concurrency = concurrency) # analyze command elif cmd == COMMAND_ANALYZE: # androguard path has to be set before from androlyze import action_analyze # sort apks ? get_apks_kwargs = {} no_sort_by_code_size = args.no_sort_code_size if not no_sort_by_code_size: # sort apks by app code size for better scheduling get_apks_kwargs = dict(order_by = TABLE_APK_IMPORT_KEY_SIZE_APP_CODE, ascending = False) apks_or_paths, _ = self.get_apks_or_paths_from_cli(**get_apks_kwargs) # debug infos if not no_sort_by_code_size and not args.apks: apks_or_paths, _it = itertools.tee(apks_or_paths) clilog.info('Using Code Size Scheduling for faster analysis!') log.debug('\n'.join(('%s: %s' % (x.package_name, x.size_app_code) for x in _it))) scripts = args.scripts parallel_mode, concurrency, send_id = self.__load_parallel_settings() # get analysis mode analyze_mode = None if parallel_mode == PARALLELIZATION_MODE_DISTRIBUTED: analyze_mode = ANALYZE_MODE_DISTRIBUTED elif parallel_mode == PARALLELIZATION_MODE_NON_PARALLEL: analyze_mode = ANALYZE_MODE_NON_PARALLEL else: analyze_mode = ANALYZE_MODE_PARALLEL action_analyze(self.storage, scripts, apks_or_paths, mode = analyze_mode, concurrency = concurrency, serialize_apks = not send_id) # delete command elif cmd == COMMAND_DELETE: self.action_delete(parser, hashes, package_names, tags, yes) clilog.info("done")