def write_analyze_task_results_to_fs(storage, group_result, chunked = False): ''' Get successful task results and write them to disk if enabled. Parameters ---------- storage: RedundantStorage group_result : GroupResult chunked : bool, optional (default is False) If work has been divided into chunks. Returns ------- int Number of successful tasks ''' if group_result is not None: results = get_successful_analyze_task_results(group_result, chunked = chunked) # no result writing to disk wanted if not storage.fs_storage_disabled(): clilog.info("Fetching all analysis results for storage ...") if results: try: storage.fetch_results_from_mongodb(results) except DatabaseLoadException as e: log.exception(e) return len(results) return 0
def _analyze(self, apk, dalvik_vm_format, vm_analysis, gvm_analysis, *args, **kwargs): # CFG for encoded_method in dalvik_vm_format.get_methods(): try: method_analysis = vm_analysis.get_method(encoded_method) if encoded_method.get_code() == None: continue classname = encoded_method.get_class_name() # skip android classes due to mongo db document limit if classname.find("Landroid") != -1: continue ast = None if method_analysis is not None: ast = AnaUtil.ast_for_method_analysis(method_analysis) if ast is not None: self.cres += '%s\n\n' % pformat(ast) except Exception as e: log.exception(e)
def delete_results(self, where = None, non_document = False, **kwargs): ''' See doc of :py:meth:`.ResultStorageInterface.delete_results` ''' coll = self.__get_collection(gridfs_obj = non_document) if where is None: where = {} where.update(self.create_where_clause(kwargs, from_gridfs = non_document)) n = 0 try: # do the query log.debug("mongodb remove(%s)", where) # gridfs if non_document: # get ids and delete for _id in self.get_ids(where = where, non_document = non_document): coll.delete(_id) log.debug("Deleted element with id: %s from mongodb gridfs!", _id) n += 1 # normal collection else: write_result = coll.remove(where, getLastError=True) if write_result is not None: n = write_result["n"] return n except PyMongoError as e: log.exception(DatabaseDeleteException(self, where, e)) return n
def print_query_result_db(res, distict_generator = False, count = False, raw = False, interactive = True): ''' Print the results from the result db (mongodb). Parameters ---------- count : bool, optional (default is False) Only print count, not results distict_generator : bool, optional (default is False) Res is generator<object> created from the distinct(...) method of mongodb. If generaor<dict>, convert each dict to json. Otherwise just print. raw : bool, optional (default is False) Print raw data from gridfs Otherwise print json. res : gridfs.grid_file.GridOutCursor or generator<object> or pymongo.cursor.Cursor First if non_document and non_document_raw. Second if disctinct values wanted. Thirst otherwise. The results to print interactive: bool, optional (default is True) Iterate interactive through the result cursor ''' from pymongo.errors import PyMongoError try: # print count if count: cnt = 0 # res is list if distict_generator: cnt = len(res) # res is cursor else: cnt = res.count() clilog.info(cnt) else: if distict_generator: for r in sorted(res): if isinstance(r, dict): r = dict2json(res) clilog.info(r) else: for i, res in enumerate(res, 1): # interactive result view if i != 1 and interactive and raw_input('Press any key to view next result or abort with "no" !)').lower() == 'no': break sys.stderr.write('/* {} */\n'.format(i)) # print raw data if raw: # gridfs.grid_file.GridOut for gridout_obj in res: clilog.info(gridout_obj) # print json else: clilog.info(dict2json(res)) except PyMongoError as e: log.exception(e)
def open_apk(apk_or_path = None, apk = None, raw = False, path = None): ''' Open apk and set meta information from `apk` Parameters ---------- apk_or_path : str, optional (default is None). Path to apk. apk : Apk, optional (default is None) If given, take the meta infos from `apk`. So we don't need to recompute the hash. At least if `apk_or_path`. raw : bool, optional (default is False) If specified, use `apk` as raw .apk data. path : str, optional (default is None) Can be used for `raw` to set the path of the `EAndroApk`. If not given, won't be set. Returns ------- EAndroApk None If apk could not be opened. ''' apk_descr = str(apk_or_path) if raw: apk_descr = "raw data" try: eandro_apk = None if not raw: eandro_apk = EAndroApk(apk_or_path) else: eandro_apk = EAndroApk(apk_or_path, raw = True) eandro_apk.path = path if apk is not None: # we don't want to lose meta infos # use the hash from db so we don't need to recompute eandro_apk.set_meta(apk) return eandro_apk except BadZipfile as e: log.warn("Apk %s is not a valid zip file!" % apk_descr) except (struct.error, IOError) as e: log.warn(CouldNotOpenApk(apk_descr, e)) except Exception as e: log.exception(e)
def open_apk(apk_or_path=None, apk=None, raw=False, path=None): ''' Open apk and set meta information from `apk` Parameters ---------- apk_or_path : str, optional (default is None). Path to apk. apk : Apk, optional (default is None) If given, take the meta infos from `apk`. So we don't need to recompute the hash. At least if `apk_or_path`. raw : bool, optional (default is False) If specified, use `apk` as raw .apk data. path : str, optional (default is None) Can be used for `raw` to set the path of the `EAndroApk`. If not given, won't be set. Returns ------- EAndroApk None If apk could not be opened. ''' apk_descr = str(apk_or_path) if raw: apk_descr = "raw data" try: eandro_apk = None if not raw: eandro_apk = EAndroApk(apk_or_path) else: eandro_apk = EAndroApk(apk_or_path, raw=True) eandro_apk.path = path if apk is not None: # we don't want to lose meta infos # use the hash from db so we don't need to recompute eandro_apk.set_meta(apk) return eandro_apk except BadZipfile as e: log.warn("Apk %s is not a valid zip file!" % apk_descr) except (struct.error, IOError) as e: log.warn(CouldNotOpenApk(apk_descr, e)) except Exception as e: log.exception(e)
def log_will_retry(secs, exc = None, what = ''): ''' Parameters ---------- secs : int Retry in `secs` seconds. exc: Exception, optional (default is None) Exception to log what : str, optional (default is '') What to try again. ''' if exc is not None: log.exception(exc) log.warn("Trying %s again in %ss", what, secs)
def test(script, apk_paths): ''' Use this function to develop and test your script. E.g. find unregistered keys and other errors. Parameters ---------- script : type The reference to the script which shall be tested (not instantiated!) apk_paths : iterable<str> Paths to apks Examples -------- >>> for res in AndroScript.test(ClassDetails, ["../../../testenv/apks/a2dp.Vol.apk"]): ... # get result object ... print res ... # get json ... print res.write_to_json() Returns ------- list<ResultObject> The `ResultObject` for every analyzed apk ''' # no circular import from androlyze.analyze.Analyzer import Analyzer res = [] try: # init scripts to get options inst_script_list = ScriptUtil.instantiate_scripts([script]) script_options = ScriptUtil.get_minimum_script_options( inst_script_list) script_list = [script] # options: storage, script_list, script_hashes, min_script_needs, apks_or_paths # but the analyzer needs the scripts uninitialized! ana = Analyzer(None, script_list, None, script_options, apk_paths) res = ana.analyze(test=True) except AndroScriptError as e: log.exception(e) return res
def test(script, apk_paths): ''' Use this function to develop and test your script. E.g. find unregistered keys and other errors. Parameters ---------- script : type The reference to the script which shall be tested (not instantiated!) apk_paths : iterable<str> Paths to apks Examples -------- >>> for res in AndroScript.test(ClassDetails, ["../../../testenv/apks/a2dp.Vol.apk"]): ... # get result object ... print res ... # get json ... print res.write_to_json() Returns ------- list<ResultObject> The `ResultObject` for every analyzed apk ''' # no circular import from androlyze.analyze.Analyzer import Analyzer res = [] try: # init scripts to get options inst_script_list = ScriptUtil.instantiate_scripts([script]) script_options = ScriptUtil.get_minimum_script_options(inst_script_list) script_list = [script] # options: storage, script_list, script_hashes, min_script_needs, apks_or_paths # but the analyzer needs the scripts uninitialized! ana = Analyzer(None, script_list, None, script_options, apk_paths) res = ana.analyze(test = True) except AndroScriptError as e: log.exception(e) return res
def prefetch_apk(self, task_id, task, *args, **kwargs): ''' Prefetch the `APK`s if mongodb is used as distributed apk storage. If the prefetch fails, the task will be retried. ''' try: # open db if not already opened self.__setup_db() args = kwargs["args"] _, _, _, apk_zipfile_or_hash, is_hash, fast_apk = args # prefetch apk via hash if given if is_hash: # get apk from the apk storage eandro_apk = self.__get_apk_from_storage(apk_zipfile_or_hash, apk = fast_apk) if eandro_apk is not None: # store in prefetch pool apk_prefetch_pool[apk_zipfile_or_hash] = eandro_apk log.info("prefetched: %s, size apk cache: %d", eandro_apk.short_description(), len(apk_prefetch_pool)) # abort if file not in db! except (NoFile, DatabaseOpenError, DatabaseLoadException) as e: log.exception(e)
def run(self): work_queue = self.work_queue try: for work in iter(work_queue.get, STOP_SENTINEL): try: apk_path, _apk, _ = work eandro_apk = AnalyzeUtil.open_apk(apk_path, apk=_apk) # do the analysis res = self.analyze_apk(eandro_apk) # remember yet analyzed APKs if eandro_apk: self.analyzed_apks.put( FastApk.load_from_eandroapk(eandro_apk)) # collect results if res is not None: self.__store_results([res]) else: # increment analyzed apks counter self.add_analyzed_apks_sm(1) except KeyboardInterrupt as e: raise e except Exception as e: log.exception(e) finally: # signal one task done work_queue.task_done() # signal sentinel read work_queue.task_done() work_queue.close() # be silent except KeyboardInterrupt: pass
def _analyze(self, apk, dalvik_vm_format, vm_analysis, gvm_analysis, *args, **kwargs): """ This sample code is taken from `androguard` and has been modified! See Also -------- http://code.google.com/p/androguard/wiki/RE#Source_Code """ res = self.res # androguard.core.bytecodes.dvm.ClassDefItem for clazz in dalvik_vm_format.get_classes(): try: key = clazz.get_name() # skip android classes due to mongo db document limit if key.find("Landroid") != -1: continue # allows querying for package name res.register_keys([key], CAT_DECOMPILE) res.log(key, clazz.get_source().split("\n"), CAT_DECOMPILE) except Exception as e: log.exception(e)
def _analyze(self, apk, dalvik_vm_format, vm_analysis, gvm_analysis, *args, **kwargs): ''' This sample code is taken from `androguard` and has been modified! See Also -------- http://code.google.com/p/androguard/wiki/RE#Source_Code ''' res = self.res # androguard.core.bytecodes.dvm.ClassDefItem for clazz in dalvik_vm_format.get_classes(): try: key = clazz.get_name() # skip android classes due to mongo db document limit if key.find("Landroid") != -1: continue # allows querying for package name res.register_keys([key], CAT_DECOMPILE) res.log(key, clazz.get_source().split("\n"), CAT_DECOMPILE) except Exception as e: log.exception(e)
def _analyze(self, apk, dalvik_vm_format, vm_analysis, gvm_analysis, *args, **kwargs): ''' This sample code is taken from `androguard` and has been modified! See Also -------- http://code.google.com/p/androguard/wiki/RE#Source_Code ''' res = self.res res.register_keys([CAT_DECOMPILE]) # androguard.core.bytecodes.dvm.ClassDefItem for clazz in dalvik_vm_format.get_classes(): try: key = clazz.get_name() # skip android classes if key.find("Landroid") != -1: continue self.cres += clazz.get_source() except Exception as e: log.exception(e)
def run(self): work_queue = self.work_queue try: for work in iter(work_queue.get, STOP_SENTINEL): try: apk_path, _apk, _ = work eandro_apk = AnalyzeUtil.open_apk(apk_path, apk=_apk) # do the analysis res = self.analyze_apk(eandro_apk) # remember yet analyzed APKs if eandro_apk: self.analyzed_apks.put(FastApk.load_from_eandroapk(eandro_apk)) # collect results if res is not None: self.__store_results([res]) else: # increment analyzed apks counter self.add_analyzed_apks_sm(1) except KeyboardInterrupt as e: raise e except Exception as e: log.exception(e) finally: # signal one task done work_queue.task_done() # signal sentinel read work_queue.task_done() work_queue.close() # be silent except KeyboardInterrupt: pass
def run_analysis(analyzer): ''' Run the analysis with the `analyzer`. Parameters ---------- analyzer : BaseAnalyzer Returns ------- int Number of analyzed apks. ''' from androlyze.analyze.exception import AndroScriptError try: cnt_analyzed_apks = analyzer.analyze() if cnt_analyzed_apks == 0: log.warn("No apk file has been analyzed !") else: log.warn("Analyzed %s apks", cnt_analyzed_apks) return cnt_analyzed_apks except AndroScriptError as e: log.exception(e)
def prefetch_apk(self, task_id, task, *args, **kwargs): ''' Prefetch the `APK`s if mongodb is used as distributed apk storage. If the prefetch fails, the task will be retried. ''' try: # open db if not already opened self.__setup_db() args = kwargs["args"] _, _, _, apk_zipfile_or_hash, is_hash, fast_apk = args # prefetch apk via hash if given if is_hash: # get apk from the apk storage eandro_apk = self.__get_apk_from_storage(apk_zipfile_or_hash, apk=fast_apk) if eandro_apk is not None: # store in prefetch pool apk_prefetch_pool[apk_zipfile_or_hash] = eandro_apk log.info("prefetched: %s, size apk cache: %d", eandro_apk.short_description(), len(apk_prefetch_pool)) # abort if file not in db! except (NoFile, DatabaseOpenError, DatabaseLoadException) as e: log.exception(e)
def store_custom_data(self, package_name, version_name, _hash, file_name, data): ''' Store custom data to the file system (also with the result directory as root) Parameters ---------- package_name : str Package name of the apk. Unique apk identifier (at least in the store) version_name : str Version name _hash : str The hash of the apk. file_name : str File name. data : object The data what shall be written to disk. Will write str(data) to disk. Raises ------ FileSysStoreException ''' try: # create basic fs structure base_path = self.get_apk_res_path_all_args(package_name, version_name, _hash) self.create_filesys_structure(base_path) file_path = join(base_path, file_name) try: with open(file_path, "w") as f: f.write(str(data)) except IOError as e: raise FileSysStoreException(file_path, "custom data", self, e) except FileSysCreateStorageStructureException as e: log.exception(e)
tag, build_date=build_date) if __name__ == '__main__': import json from androlyze.log.Log import log #from androlyze.model.script.AndroScript import AndroScript #APK_NAME = "/mnt/stuff/android/apks/com.parkdroid.apk" APK_NAME = "/mnt/stuff/androlyze/import/apk/com.whatsapp/2.7.3581/071435b4c72d45593ba64d411463ad18e02cbd3d90296d38f5b42d7e9d96ea9b/com.whatsapp_2.7.3581.apk" try: with open(APK_NAME, "rb") as f: print FastApk.fast_load_from_io(file_like_object=f) apk = FastApk.fast_load_from_io(file_like_object=None, apk_file_path=APK_NAME) apk.tag = "exploitable" print apk print json.dumps(apk.meta_dict(), indent=4) print apk #res.log_true("check1", "checks") #res.log_true("check2", "checks") #storage.store_result_for_apk(apk, AndroScript("script1"), res) # androguard cannot load some apk files which the fast loader can except Exception as e: log.exception(e) print "done"
def print_query_result_db(res, distict_generator=False, count=False, raw=False, interactive=True): ''' Print the results from the result db (mongodb). Parameters ---------- count : bool, optional (default is False) Only print count, not results distict_generator : bool, optional (default is False) Res is generator<object> created from the distinct(...) method of mongodb. If generaor<dict>, convert each dict to json. Otherwise just print. raw : bool, optional (default is False) Print raw data from gridfs Otherwise print json. res : gridfs.grid_file.GridOutCursor or generator<object> or pymongo.cursor.Cursor First if non_document and non_document_raw. Second if disctinct values wanted. Thirst otherwise. The results to print interactive: bool, optional (default is True) Iterate interactive through the result cursor ''' from pymongo.errors import PyMongoError try: # print count if count: cnt = 0 # res is list if distict_generator: cnt = len(res) # res is cursor else: cnt = res.count() clilog.info(cnt) else: if distict_generator: for r in sorted(res): if isinstance(r, dict): r = dict2json(res) clilog.info(r) else: for i, res in enumerate(res, 1): # interactive result view if i != 1 and interactive and raw_input( 'Press any key to view next result or abort with "no" !)' ).lower() == 'no': break sys.stderr.write('/* {} */\n'.format(i)) # print raw data if raw: # gridfs.grid_file.GridOut for gridout_obj in res: clilog.info(gridout_obj) # print json else: clilog.info(dict2json(res)) except PyMongoError as e: log.exception(e)
def analyze_apk_ana_objs(ana_objs, time_s, eandro_apk, scripts, propagate_error=False, reset_scripts=True): ''' Analyze the `eandro_apk` with the given `scripts` assuming each `AndroScript` neads at least `min_script_needs`. Be sure that you reseted the `scripts`! Parameters ---------- eandro_apk : EAndroApk The apk. scripts : iterable<AndroScript> The scripts to use for the analysis. propagate_error : bool, optional (default is False) If true propagate errors. reset_scripts : bool, optional (default is True) If given, reset the `AndroScript` before analyzing. Returns ------- list<FastApk, list<AndroScript>> Uses `FastApk` to only store the meta information, not the apk data! None If error happened. ''' from androlyze.analyze.exception import AndroScriptError try: # reset scripts if reset_scripts: for s in scripts: s.reset() if eandro_apk is not None: fastapk = None script_results = [] for s in scripts: try: result_obj = s.analyze(eandro_apk, *ana_objs) # we only need the meta infos of the apk if eandro_apk is not None: fastapk = FastApk.load_from_eandroapk(eandro_apk) # set androguard analysis time if script wants stats s.add_apk_androguard_analyze_time(time_s) # link to apk if isinstance(result_obj, ResultObject): result_obj.set_apk(fastapk) script_results.append(s) except Exception as e: if propagate_error: raise else: log.exception(AndroScriptError(s, e)) if fastapk is not None: # use fastapk to only store the meta information, not the apk data! return [fastapk, script_results] # interrupt analysis if analysis objects could not be created! except DexError as e: log.exception(e)
def create_analyzer(storage, script_list, apks_or_paths = None, mode = ANALYZE_MODE_PARALLEL, concurrency = None, serialize_apks = True ): ''' Create the analyzer only. Parameters ---------- storage : RedundantStorage The store to use. script_list : list<str> List of paths to scripts (complete filename with extension). apks_or_paths: list<str> or list<Apk>, optional (default is None) List of `Apk` or paths to the apks which shall be analyzed with the given scripts If you analyze from paths the `import_date` is not set! mode : str, optional (default is `ANALYZE_MODE_PARALLEL`) Do an parallel analysis by default. Choose between : , , . concurrency : int, optional (default is number of cpu cores) Number of workers to spawn. serialize_apks : bool, optional (default is True) If true, serialize .apk . Otherwise id (hash) of the apk will be send and fetched by the worker from the result db. Be sure to import the apks to the result db first! ''' from androlyze.model.script import ScriptUtil from androlyze.analyze.exception import AndroScriptError try: # list<type<AndroScript>> androscript_list = ScriptUtil.import_scripts(script_list) instantiated_scripts = sorted(ScriptUtil.instantiate_scripts(androscript_list, script_paths = script_list)) if len(instantiated_scripts) == 0: log.warn("No scripts supplied!") return # get hashes for `AndroScript`s so that we can set the hash directly next time we instantiate the script script_hashes = [s.hash for s in instantiated_scripts] min_script_needs = ScriptUtil.get_minimum_script_options(instantiated_scripts) # log infos about scripts clilog.info('Loaded scripts:\n%s', '\n'.join((str(s) for s in instantiated_scripts))) log.info(ScriptUtil.androscript_options_descr(instantiated_scripts)) if apks_or_paths: def create_analyzer(): analyzer = None # argument for BaseAnalyzer args = storage, androscript_list, script_hashes, min_script_needs, apks_or_paths log.info("Mode: %s", mode) # normal analyzer if mode == ANALYZE_MODE_NON_PARALLEL: from androlyze.analyze.Analyzer import Analyzer analyzer = Analyzer(*args) # use parallel analyzer elif mode == ANALYZE_MODE_PARALLEL: from androlyze.analyze.parallel.ParallelAnalyzer import ParallelAnalyzer analyzer = ParallelAnalyzer(*args, concurrency = concurrency) # use distributed one elif mode == ANALYZE_MODE_DISTRIBUTED: from androlyze.analyze.distributed.DistributedAnalyzer import DistributedAnalyzer analyzer = DistributedAnalyzer(*args, concurrency = concurrency, serialize_apks = serialize_apks) return analyzer return create_analyzer() except ApkImportError as e: log.warn(e) except IOError as e: log.warn(AndroScriptError(e.filename, caused_by = e)) sys.exit(1) except ImportError as e: log.exception(e) except Exception as e: log.exception(e)
def _analyze(self): ''' See doc of :py:method:`.BaseAnalyzer.analyze`. ''' # try to get registered workers # it network fails at this point -> stop analysis try: clilog.info(CeleryUtil.get_workers_and_check_network()) except NetworkError as e: log.critical(e) return 0 # storage objects storage = self.storage clilog.info("Number of apks to analyze: %d", self._cnt_apks) try: # get analyze task analyze_task = tasks[CeleryConstants.get_analyze_task_name()] # create storage storage.create_or_open_sub_storages() # send tasks start = time() # apk generator over .apk or apk hashes apk_gen = AnalyzeUtil.apk_id_or_raw_data_gen( self.apks, force_raw_data=self.serialize_apks) clilog.info("Task publishing progress:") # send and serialize .apks # if analysis via path serialize them! if self.serialize_apks: log.info("sending .apks to message broker") self.group_result = group_result = GroupResult(results=[]) for args in self.send_apk_args_generator(apk_gen): task = analyze_task.delay(*args) group_result.add(task) # send only apk id and let fetch via mongodb else: log.info("sending ids of apks") task_group = group( (analyze_task.s(*args) for args in self.send_id_args_generator(apk_gen))) # publish tasks self.group_result = task_group() log.info("sending took %ss", (time() - start)) sys.stderr.write("\nAnalysis progress:\n") # start showing analysis progress self.analyze_stats_view.start() # wait for results log.debug("joining on ResultGroup ... ") # setup callback callback_func = self.get_callback_func(self.success_handler, self.error_handler) CeleryUtil.join_native(self.group_result, propagate=False, callback=callback_func) clilog.info("\nanalysis done ... ") log.info("distributed analysis took %ss", (time() - start)) return self.stop_analysis_view() except DatabaseOpenError as e: log.critical(e) return 0 except (KeyboardInterrupt, Exception) as e: if not isinstance(e, KeyboardInterrupt): log.exception(e) log.warn( "Interrupting distributed analysis ... Please wait a moment!") log.warn("revoking tasks on all workers ...") if celerysettings.CELERY_TASK_REVOCATION_ENABLED: # revoke tasks if self.group_result is None: # revoke via task ids log.debug("revoking while publishing tasks ...") self.task_collection.revoke_all(terminate=True, signal='SIGKILL') else: # revoke via GroupResult if yet available/created # first available after all tasks have been send self.group_result.revoke(terminate=True, signal='SIGKILL') log.warn("revoked tasks and killed workers ...") #return number of analyzed apks return self.stop_analysis_view()
def format_query_result_db(res_cursor, distict_generator = False, count = False, raw = False, html = False): ''' Format the results from the result db (mongodb). Parameters ---------- res_cursor : gridfs.grid_file.GridOutCursor or generator<object> or pymongo.cursor.Cursor First if non_document and non_document_raw. Second if distinct values wanted. Thirst otherwise. distict_generator : bool, optional (default is False) Res is generator<object> created from the distinct(...) method of mongodb. If generaor<dict>, convert each dict to json. Otherwise just print. count : bool, optional (default is False) Only print count, not results raw : bool, optional (default is False) Print raw data from gridfs Otherwise print json. If `raw` will not be converted to html! html : bool, optional (default is False) Format as html. Returns ------- str ''' from pymongo.errors import PyMongoError from androlyze.ui.util import HtmlUtil # if html enabled convert to table view if `json2html` is present # otherwise use pygmentize json_convert = lambda json : json if html: try: from json2html import json2html json_convert = lambda j : json2html.convert(json = j) except ImportError: from pygments import highlight from pygments.formatters import HtmlFormatter from pygments.lexers import get_lexer_by_name json_convert = lambda json: highlight(json, get_lexer_by_name('json'), HtmlFormatter()) # collect results as list<str> resl = [] def anl(text): ''' Append a newline ''' # dont format raw data as html return '%s\n' % text if not html or raw else HtmlUtil.newline(HtmlUtil.prefy(text)) try: # return count if count: cnt = 0 if is_pymongo_cursor(res_cursor): cnt = res_cursor.count() elif distict_generator: cnt = len(list(res_cursor)) return '%d' % cnt else: if distict_generator: for r in sorted(res_cursor): if isinstance(r, dict): r = dict2json(res_cursor) resl.append(r) elif isinstance(r, (str, unicode)): resl.append(r) else: for i, res in enumerate(res_cursor, 1): delimiter = '/* %d */' % i text = HtmlUtil.newline(delimiter) if html else delimiter if html: text = HtmlUtil.redify(text) resl.append(text) # return raw data if raw: # gridfs.grid_file.GridOut for gridout_obj in res: resl.append(gridout_obj) # return json else: j = dict2json(res) # convert json (if enabled) j = json_convert(j) resl.append(j) # return result by joining single strings return ''.join([anl(res_str) for res_str in resl]) except PyMongoError as e: log.exception(e)
def fetch_results_from_mongodb(self, rds, results, wait_for_db = True, # progress nice_progess = False, synced_entries = None, total_sync_entries = None): ''' Fetch some results from the result database and write them to disk. If data cannot be loaded from db, try until it can be. Parameters ---------- rds : ResultDatabaseStorage The database to query for the results. results : list< tuple<id, gridfs (bool)> > Define which results shall be fetched. wait_for_db : bool, optional (default is True) Wait until data could be fetched from db. nice_progess : bool, optional (default is False) If enabled update show some nice progress bar on the cli. synced_entries : multiprocessing.Value<int>, optional (default is None) If supplied store number of already synces entries. total_sync_entries : multiprocessing.Value<int>, optional (default is None) If supplied store number of total entries to sync. Raises ------ DatabaseLoadException If `wait_for_db` is False and an error occurred. ''' # retry in ... seconds DATABASE_RETRY_TIME = 5 # if true assume both counts are shared memory (Value) use_shared_memory = synced_entries is not None and total_sync_entries is not None if results is not None: results_stored = False while not results_stored: try: # get ids non_gridfs_ids, gridfs_ids = MongoUtil.split_result_ids(results) # counts cnt_non_gridfs_ids = len(non_gridfs_ids) cnt_gridfs_ids = len(gridfs_ids) if use_shared_memory: total_sync_entries.value = cnt_gridfs_ids + cnt_non_gridfs_ids # gridfs raw data as well as metadata gridfs_entries_raw = [] if gridfs_ids: gridfs_entries_raw = rds.get_results_for_ids(gridfs_ids, non_document = True, non_document_raw = True) # regular documents (non gridfs) non_gridfs_entries = [] if non_gridfs_ids: non_gridfs_entries = rds.get_results_for_ids(non_gridfs_ids, non_document = False, non_document_raw = True) if not nice_progess: log.debug("fetching %d non-documents (gridfs) ... ", cnt_gridfs_ids) for i, gridfs_entry_raw in enumerate(gridfs_entries_raw, 1): # get our stored metadata (for script and apk) gridfs_entry_meta = gridfs_entry_raw.metadata if not nice_progess: log.debug("getting results for %s", gridfs_entry_meta[RESOBJ_APK_META][RESOBJ_APK_META_PACKAGE_NAME]) else: Util.print_dyn_progress(Util.format_progress(i, cnt_gridfs_ids)) # use apk to extract data from dict fastapk = FastApk.load_from_result_dict(gridfs_entry_meta) # get filename file_name = gridfs_entry_raw.filename # write results to disk try: self.store_custom_data(fastapk.package_name, fastapk.version_name, fastapk.hash, file_name, gridfs_entry_raw.read()) except FileSysStoreException as e: log.exception(e) # update shared memory progress indicitor if use_shared_memory: with synced_entries.get_lock(): synced_entries.value += 1 if not nice_progess: log.debug("fetching %d documents (non-gridfs) ... ", cnt_non_gridfs_ids) for i, non_gridfs_entry in enumerate(non_gridfs_entries, 1): if not nice_progess: clilog.debug("getting results for %s" % non_gridfs_entry[RESOBJ_APK_META][RESOBJ_APK_META_PACKAGE_NAME]) else: Util.print_dyn_progress(Util.format_progress(i, cnt_non_gridfs_ids)) # write results to disk self.store_result_dict(non_gridfs_entry) # update shared memory progress indicitor if use_shared_memory: with synced_entries.get_lock(): synced_entries.value += 1 # if not wait for db wanted stop here results_stored = True or not wait_for_db except (DatabaseLoadException, PyMongoError) as e: if not wait_for_db: raise log.warn(e) Util.log_will_retry(DATABASE_RETRY_TIME, exc = e) sleep(DATABASE_RETRY_TIME)
import_date = res_dict[RESOBJ_APK_META][RESOBJ_APK_META_IMPORT_DATE] tag = res_dict[RESOBJ_APK_META][RESOBJ_APK_META_TAG] build_date = res_dict[RESOBJ_APK_META][RESOBJ_APK_META_BUILD_DATE] return FastApk(package_name, version_name, path, _hash, import_date, tag, build_date = build_date) if __name__ == '__main__': import json from androlyze.log.Log import log #from androlyze.model.script.AndroScript import AndroScript #APK_NAME = "/mnt/stuff/android/apks/com.parkdroid.apk" APK_NAME = "/mnt/stuff/androlyze/import/apk/com.whatsapp/2.7.3581/071435b4c72d45593ba64d411463ad18e02cbd3d90296d38f5b42d7e9d96ea9b/com.whatsapp_2.7.3581.apk" try: with open(APK_NAME, "rb") as f: print FastApk.fast_load_from_io(file_like_object = f) apk = FastApk.fast_load_from_io(file_like_object = None, apk_file_path = APK_NAME) apk.tag = "exploitable" print apk print json.dumps(apk.meta_dict(), indent = 4) print apk #res.log_true("check1", "checks") #res.log_true("check2", "checks") #storage.store_result_for_apk(apk, AndroScript("script1"), res) # androguard cannot load some apk files which the fast loader can except Exception as e: log.exception(e) print "done"
def _analyze(self): ''' See doc of :py:method:`.BaseAnalyzer.analyze`. ''' # try to get registered workers # it network fails at this point -> stop analysis try: clilog.info(CeleryUtil.get_workers_and_check_network()) except NetworkError as e: log.critical(e) return 0 # storage objects storage = self.storage clilog.info("Number of apks to analyze: %d", self._cnt_apks) try: # get analyze task analyze_task = tasks[CeleryConstants.get_analyze_task_name()] # create storage storage.create_or_open_sub_storages() # send tasks start = time() # apk generator over .apk or apk hashes apk_gen = AnalyzeUtil.apk_id_or_raw_data_gen(self.apks, force_raw_data = self.serialize_apks) clilog.info("Task publishing progress:") # send and serialize .apks # if analysis via path serialize them! if self.serialize_apks: log.info("sending .apks to message broker") self.group_result = group_result = GroupResult(results = []) for args in self.send_apk_args_generator(apk_gen): task = analyze_task.delay(*args) group_result.add(task) # send only apk id and let fetch via mongodb else: log.info("sending ids of apks") task_group = group((analyze_task.s(*args) for args in self.send_id_args_generator(apk_gen))) # publish tasks self.group_result = task_group() log.info("sending took %ss", (time() - start)) sys.stderr.write("\nAnalysis progress:\n") # start showing analysis progress self.analyze_stats_view.start() # wait for results log.debug("joining on ResultGroup ... ") # setup callback callback_func = self.get_callback_func(self.success_handler, self.error_handler) CeleryUtil.join_native(self.group_result, propagate = False, callback = callback_func) clilog.info("\nanalysis done ... ") log.info("distributed analysis took %ss", (time() - start)) return self.stop_analysis_view() except DatabaseOpenError as e: log.critical(e) return 0 except (KeyboardInterrupt, Exception) as e: if not isinstance(e, KeyboardInterrupt): log.exception(e) log.warn("Interrupting distributed analysis ... Please wait a moment!") log.warn("revoking tasks on all workers ...") if celerysettings.CELERY_TASK_REVOCATION_ENABLED: # revoke tasks if self.group_result is None: # revoke via task ids log.debug("revoking while publishing tasks ...") self.task_collection.revoke_all(terminate = True, signal = 'SIGKILL') else: # revoke via GroupResult if yet available/created # first available after all tasks have been send self.group_result.revoke(terminate = True, signal = 'SIGKILL') log.warn("revoked tasks and killed workers ...") #return number of analyzed apks return self.stop_analysis_view()
def analyze_apk_ana_objs(ana_objs, time_s, eandro_apk, scripts, propagate_error = False, reset_scripts = True): ''' Analyze the `eandro_apk` with the given `scripts` assuming each `AndroScript` neads at least `min_script_needs`. Be sure that you reseted the `scripts`! Parameters ---------- eandro_apk : EAndroApk The apk. scripts : iterable<AndroScript> The scripts to use for the analysis. propagate_error : bool, optional (default is False) If true propagate errors. reset_scripts : bool, optional (default is True) If given, reset the `AndroScript` before analyzing. Returns ------- list<FastApk, list<AndroScript>> Uses `FastApk` to only store the meta information, not the apk data! None If error happened. ''' from androlyze.analyze.exception import AndroScriptError try: # reset scripts if reset_scripts: for s in scripts: s.reset() if eandro_apk is not None: fastapk = None script_results = [] for s in scripts: try: result_obj = s.analyze(eandro_apk, *ana_objs) # we only need the meta infos of the apk if eandro_apk is not None: fastapk = FastApk.load_from_eandroapk(eandro_apk) # set androguard analysis time if script wants stats s.add_apk_androguard_analyze_time(time_s) # link to apk if isinstance(result_obj, ResultObject): result_obj.set_apk(fastapk) script_results.append(s) except Exception as e: if propagate_error: raise else: log.exception(AndroScriptError(s, e)) if fastapk is not None: # use fastapk to only store the meta information, not the apk data! return [fastapk, script_results] # interrupt analysis if analysis objects could not be created! except DexError as e: log.exception(e)