Пример #1
0
def write_analyze_task_results_to_fs(storage, group_result, chunked = False):
    '''
    Get successful task results and write them to disk if enabled.

    Parameters
    ----------
    storage: RedundantStorage
    group_result : GroupResult
    chunked : bool, optional (default is False)
        If work has been divided into chunks.

    Returns
    -------
    int
        Number of successful tasks
    '''
    if group_result is not None:
        results = get_successful_analyze_task_results(group_result, chunked = chunked)

        # no result writing to disk wanted
        if not storage.fs_storage_disabled():
            clilog.info("Fetching all analysis results for storage ...")
            if results:
                try:
                    storage.fetch_results_from_mongodb(results)
                except DatabaseLoadException as e:
                    log.exception(e)
            return len(results)

    return 0
Пример #2
0
    def _analyze(self, apk, dalvik_vm_format, vm_analysis, gvm_analysis, *args,
                 **kwargs):

        # CFG
        for encoded_method in dalvik_vm_format.get_methods():
            try:
                method_analysis = vm_analysis.get_method(encoded_method)

                if encoded_method.get_code() == None:
                    continue

                classname = encoded_method.get_class_name()

                # skip android classes due to mongo db document limit
                if classname.find("Landroid") != -1:
                    continue

                ast = None
                if method_analysis is not None:
                    ast = AnaUtil.ast_for_method_analysis(method_analysis)

                if ast is not None:
                    self.cres += '%s\n\n' % pformat(ast)
            except Exception as e:
                log.exception(e)
Пример #3
0
    def delete_results(self,
                       where = None, non_document = False, **kwargs):
        ''' See doc of :py:meth:`.ResultStorageInterface.delete_results` '''
        coll = self.__get_collection(gridfs_obj = non_document)

        if where is None:
            where = {}

        where.update(self.create_where_clause(kwargs, from_gridfs = non_document))

        n = 0
        try:
            # do the query
            log.debug("mongodb remove(%s)", where)

            # gridfs
            if non_document:
                # get ids and delete
                for _id in self.get_ids(where = where, non_document = non_document):
                    coll.delete(_id)
                    log.debug("Deleted element with id: %s from mongodb gridfs!", _id)
                    n += 1

            # normal collection
            else:
                write_result = coll.remove(where, getLastError=True)
                if write_result is not None:
                    n = write_result["n"]

            return n

        except PyMongoError as e:
            log.exception(DatabaseDeleteException(self, where, e))
            return n
Пример #4
0
def print_query_result_db(res, distict_generator = False, count = False, raw = False, interactive = True):
    '''
    Print the results from the result db (mongodb).

    Parameters
    ----------
    count : bool, optional (default is False)
        Only print count, not results
    distict_generator : bool, optional (default is False)
        Res is generator<object> created from the distinct(...) method of mongodb.
        If generaor<dict>, convert each dict to json.
        Otherwise just print.
    raw : bool, optional (default is False)
        Print raw data from gridfs
        Otherwise print json.
    res : gridfs.grid_file.GridOutCursor or generator<object> or pymongo.cursor.Cursor
        First if non_document and non_document_raw.
        Second if disctinct values wanted.
        Thirst otherwise.
        The results to print
    interactive: bool, optional (default is True)
        Iterate interactive through the result cursor
    '''
    from pymongo.errors import PyMongoError

    try:
        # print count
        if count:
            cnt = 0
            # res is list
            if distict_generator:
                cnt = len(res)
            # res is cursor
            else:
                cnt = res.count()
            clilog.info(cnt)
        else:
            if distict_generator:
                for r in sorted(res):
                    if isinstance(r, dict):
                        r = dict2json(res)
                    clilog.info(r)
            else:
                for i, res in enumerate(res, 1):
                    # interactive result view
                    if i != 1 and interactive and raw_input('Press any key to view next result or abort with "no" !)').lower() == 'no':
                        break
                    sys.stderr.write('/* {} */\n'.format(i))
                    # print raw data
                    if raw:
                        # gridfs.grid_file.GridOut
                        for gridout_obj in res:
                            clilog.info(gridout_obj)
                    # print json
                    else:
                        clilog.info(dict2json(res))

    except PyMongoError as e:
        log.exception(e)
Пример #5
0
def open_apk(apk_or_path = None, apk = None, raw = False, path = None):
    '''
    Open apk and set meta information from `apk`

    Parameters
    ----------
    apk_or_path : str, optional (default is None).
        Path to apk.
    apk : Apk, optional (default is None)
        If given, take the meta infos from `apk`.
        So we don't need to recompute the hash.
        At least if `apk_or_path`.

    raw : bool, optional (default is False)
        If specified, use `apk` as raw .apk data.
    path : str, optional (default is None)
        Can be used for `raw` to set the path of the `EAndroApk`.
        If not given, won't be set.

    Returns
    -------
    EAndroApk
    None
        If apk could not be opened.
    '''

    apk_descr = str(apk_or_path)
    if raw:
        apk_descr = "raw data"

    try:
        eandro_apk = None
        if not raw:
            eandro_apk = EAndroApk(apk_or_path)

        else:
            eandro_apk = EAndroApk(apk_or_path, raw = True)
            eandro_apk.path = path

        if apk is not None:
            # we don't want to lose meta infos
            # use the hash from db so we don't need to recompute
            eandro_apk.set_meta(apk)

        return eandro_apk
    except BadZipfile as e:
        log.warn("Apk %s is not a valid zip file!" % apk_descr)
    except (struct.error, IOError) as e:
        log.warn(CouldNotOpenApk(apk_descr, e))
    except Exception as e:
        log.exception(e)
Пример #6
0
def open_apk(apk_or_path=None, apk=None, raw=False, path=None):
    '''
    Open apk and set meta information from `apk`

    Parameters
    ----------
    apk_or_path : str, optional (default is None).
        Path to apk.
    apk : Apk, optional (default is None)
        If given, take the meta infos from `apk`.
        So we don't need to recompute the hash.
        At least if `apk_or_path`.

    raw : bool, optional (default is False)
        If specified, use `apk` as raw .apk data.
    path : str, optional (default is None)
        Can be used for `raw` to set the path of the `EAndroApk`.
        If not given, won't be set.

    Returns
    -------
    EAndroApk
    None
        If apk could not be opened.
    '''

    apk_descr = str(apk_or_path)
    if raw:
        apk_descr = "raw data"

    try:
        eandro_apk = None
        if not raw:
            eandro_apk = EAndroApk(apk_or_path)

        else:
            eandro_apk = EAndroApk(apk_or_path, raw=True)
            eandro_apk.path = path

        if apk is not None:
            # we don't want to lose meta infos
            # use the hash from db so we don't need to recompute
            eandro_apk.set_meta(apk)

        return eandro_apk
    except BadZipfile as e:
        log.warn("Apk %s is not a valid zip file!" % apk_descr)
    except (struct.error, IOError) as e:
        log.warn(CouldNotOpenApk(apk_descr, e))
    except Exception as e:
        log.exception(e)
Пример #7
0
def log_will_retry(secs, exc = None, what = ''):
    '''
    Parameters
    ----------
    secs : int
        Retry in `secs` seconds.
    exc: Exception, optional (default is None)
        Exception to log
    what : str, optional (default is '')
        What to try again.
    '''
    if exc is not None:
        log.exception(exc)
    log.warn("Trying %s again in %ss", what, secs)
Пример #8
0
    def test(script, apk_paths):
        '''
        Use this function to develop and test your script.

        E.g. find unregistered keys and other errors.

        Parameters
        ----------
        script : type
            The reference to the script which shall be tested (not instantiated!)
        apk_paths : iterable<str>
            Paths to apks

        Examples
        --------
        >>> for res in AndroScript.test(ClassDetails, ["../../../testenv/apks/a2dp.Vol.apk"]):
        ...     # get result object
        ...     print res
        ...     # get json
        ...     print res.write_to_json()

        Returns
        -------
        list<ResultObject>
            The `ResultObject` for every analyzed apk
        '''
        # no circular import
        from androlyze.analyze.Analyzer import Analyzer

        res = []
        try:
            # init scripts to get options
            inst_script_list = ScriptUtil.instantiate_scripts([script])
            script_options = ScriptUtil.get_minimum_script_options(
                inst_script_list)

            script_list = [script]
            # options: storage, script_list, script_hashes, min_script_needs, apks_or_paths
            # but the analyzer needs the scripts uninitialized!
            ana = Analyzer(None, script_list, None, script_options, apk_paths)
            res = ana.analyze(test=True)
        except AndroScriptError as e:
            log.exception(e)

        return res
Пример #9
0
    def test(script, apk_paths):
        '''
        Use this function to develop and test your script.

        E.g. find unregistered keys and other errors.

        Parameters
        ----------
        script : type
            The reference to the script which shall be tested (not instantiated!)
        apk_paths : iterable<str>
            Paths to apks

        Examples
        --------
        >>> for res in AndroScript.test(ClassDetails, ["../../../testenv/apks/a2dp.Vol.apk"]):
        ...     # get result object
        ...     print res
        ...     # get json
        ...     print res.write_to_json()

        Returns
        -------
        list<ResultObject>
            The `ResultObject` for every analyzed apk
        '''
        # no circular import
        from androlyze.analyze.Analyzer import Analyzer

        res = []
        try:
            # init scripts to get options
            inst_script_list = ScriptUtil.instantiate_scripts([script])
            script_options = ScriptUtil.get_minimum_script_options(inst_script_list)

            script_list = [script]
            # options: storage, script_list, script_hashes, min_script_needs, apks_or_paths
            # but the analyzer needs the scripts uninitialized!
            ana = Analyzer(None, script_list, None, script_options, apk_paths)
            res = ana.analyze(test = True)
        except AndroScriptError as e:
            log.exception(e)

        return res
Пример #10
0
    def prefetch_apk(self, task_id, task, *args, **kwargs):
        ''' Prefetch the `APK`s if mongodb is used as distributed apk storage.
        If the prefetch fails, the task will be retried.
        '''
        try:
            # open db if not already opened
            self.__setup_db()

            args = kwargs["args"]
            _, _, _, apk_zipfile_or_hash, is_hash, fast_apk = args
            # prefetch apk via hash if given
            if is_hash:
                # get apk from the apk storage
                eandro_apk = self.__get_apk_from_storage(apk_zipfile_or_hash, apk = fast_apk)
                if eandro_apk is not None:
                    # store in prefetch pool
                    apk_prefetch_pool[apk_zipfile_or_hash] = eandro_apk
                    log.info("prefetched: %s, size apk cache: %d", eandro_apk.short_description(), len(apk_prefetch_pool))
                    # abort if file not in db!
        except (NoFile, DatabaseOpenError, DatabaseLoadException) as e:
            log.exception(e)
Пример #11
0
    def run(self):
        work_queue = self.work_queue

        try:
            for work in iter(work_queue.get, STOP_SENTINEL):
                try:
                    apk_path, _apk, _ = work

                    eandro_apk = AnalyzeUtil.open_apk(apk_path, apk=_apk)

                    # do the analysis
                    res = self.analyze_apk(eandro_apk)

                    # remember yet analyzed APKs
                    if eandro_apk:
                        self.analyzed_apks.put(
                            FastApk.load_from_eandroapk(eandro_apk))

                    # collect results
                    if res is not None:
                        self.__store_results([res])
                    else:
                        # increment analyzed apks counter
                        self.add_analyzed_apks_sm(1)

                except KeyboardInterrupt as e:
                    raise e
                except Exception as e:
                    log.exception(e)
                finally:
                    # signal one task done
                    work_queue.task_done()

            # signal sentinel read
            work_queue.task_done()

            work_queue.close()
        # be silent
        except KeyboardInterrupt:
            pass
Пример #12
0
    def _analyze(self, apk, dalvik_vm_format, vm_analysis, gvm_analysis, *args, **kwargs):
        """ This sample code is taken from `androguard` and has been modified!

        See Also
        --------
        http://code.google.com/p/androguard/wiki/RE#Source_Code
        """

        res = self.res

        # androguard.core.bytecodes.dvm.ClassDefItem
        for clazz in dalvik_vm_format.get_classes():
            try:
                key = clazz.get_name()
                # skip android classes due to mongo db document limit
                if key.find("Landroid") != -1:
                    continue
                # allows querying for package name
                res.register_keys([key], CAT_DECOMPILE)
                res.log(key, clazz.get_source().split("\n"), CAT_DECOMPILE)
            except Exception as e:
                log.exception(e)
Пример #13
0
    def _analyze(self, apk, dalvik_vm_format, vm_analysis, gvm_analysis, *args, **kwargs):
        ''' This sample code is taken from `androguard` and has been modified!

        See Also
        --------
        http://code.google.com/p/androguard/wiki/RE#Source_Code
        '''

        res = self.res

        # androguard.core.bytecodes.dvm.ClassDefItem
        for clazz in dalvik_vm_format.get_classes():
            try:
                key = clazz.get_name() 
                # skip android classes due to mongo db document limit
                if key.find("Landroid") != -1:
                    continue
                # allows querying for package name
                res.register_keys([key], CAT_DECOMPILE)
                res.log(key, clazz.get_source().split("\n"), CAT_DECOMPILE)
            except Exception as e:
                log.exception(e)
Пример #14
0
    def _analyze(self, apk, dalvik_vm_format, vm_analysis, gvm_analysis, *args, **kwargs):
        ''' This sample code is taken from `androguard` and has been modified!

        See Also
        --------
        http://code.google.com/p/androguard/wiki/RE#Source_Code
        '''

        res = self.res

        res.register_keys([CAT_DECOMPILE])

        # androguard.core.bytecodes.dvm.ClassDefItem
        for clazz in dalvik_vm_format.get_classes():
            try:
                key = clazz.get_name() 
                # skip android classes
                if key.find("Landroid") != -1:
                    continue
                self.cres += clazz.get_source()
            except Exception as e:
                log.exception(e)
Пример #15
0
    def run(self):
        work_queue = self.work_queue

        try:
            for work in iter(work_queue.get, STOP_SENTINEL):
                try:
                    apk_path, _apk, _ = work
    
                    eandro_apk = AnalyzeUtil.open_apk(apk_path, apk=_apk)
    
                    # do the analysis
                    res = self.analyze_apk(eandro_apk)
    
                    # remember yet analyzed APKs
                    if eandro_apk:
                        self.analyzed_apks.put(FastApk.load_from_eandroapk(eandro_apk))
                    
                    # collect results
                    if res is not None:
                        self.__store_results([res])
                    else:
                        # increment analyzed apks counter
                        self.add_analyzed_apks_sm(1)
                        
                except KeyboardInterrupt as e:
                    raise e
                except Exception as e:
                    log.exception(e)
                finally:
                    # signal one task done
                    work_queue.task_done()
    
            # signal sentinel read
            work_queue.task_done()
    
            work_queue.close()
        # be silent
        except KeyboardInterrupt:
            pass
Пример #16
0
    def _analyze(self, apk, dalvik_vm_format, vm_analysis, gvm_analysis, *args,
                 **kwargs):
        ''' This sample code is taken from `androguard` and has been modified!

        See Also
        --------
        http://code.google.com/p/androguard/wiki/RE#Source_Code
        '''

        res = self.res

        res.register_keys([CAT_DECOMPILE])

        # androguard.core.bytecodes.dvm.ClassDefItem
        for clazz in dalvik_vm_format.get_classes():
            try:
                key = clazz.get_name()
                # skip android classes
                if key.find("Landroid") != -1:
                    continue
                self.cres += clazz.get_source()
            except Exception as e:
                log.exception(e)
Пример #17
0
 def _analyze(self, apk, dalvik_vm_format, vm_analysis, gvm_analysis, *args, **kwargs):    
     
     # CFG
     for encoded_method in dalvik_vm_format.get_methods():
         try:
             method_analysis = vm_analysis.get_method(encoded_method)
         
             if encoded_method.get_code() == None:
                 continue
             
             classname = encoded_method.get_class_name()
             
             # skip android classes due to mongo db document limit
             if classname.find("Landroid") != -1:
                 continue
             
             ast = None
             if method_analysis is not None:
                 ast = AnaUtil.ast_for_method_analysis(method_analysis)
             
             if ast is not None:    
                 self.cres += '%s\n\n' % pformat(ast)
         except Exception as e:
             log.exception(e)
Пример #18
0
def run_analysis(analyzer):
    ''' Run the analysis with the `analyzer`.

    Parameters
    ----------
    analyzer : BaseAnalyzer

    Returns
    -------
    int
        Number of analyzed apks.
    '''
    from androlyze.analyze.exception import AndroScriptError

    try:
        cnt_analyzed_apks = analyzer.analyze()
        if  cnt_analyzed_apks == 0:
            log.warn("No apk file has been analyzed !")
        else:
            log.warn("Analyzed %s apks", cnt_analyzed_apks)

        return cnt_analyzed_apks
    except AndroScriptError as e:
        log.exception(e)
Пример #19
0
    def prefetch_apk(self, task_id, task, *args, **kwargs):
        ''' Prefetch the `APK`s if mongodb is used as distributed apk storage.
        If the prefetch fails, the task will be retried.
        '''
        try:
            # open db if not already opened
            self.__setup_db()

            args = kwargs["args"]
            _, _, _, apk_zipfile_or_hash, is_hash, fast_apk = args
            # prefetch apk via hash if given
            if is_hash:
                # get apk from the apk storage
                eandro_apk = self.__get_apk_from_storage(apk_zipfile_or_hash,
                                                         apk=fast_apk)
                if eandro_apk is not None:
                    # store in prefetch pool
                    apk_prefetch_pool[apk_zipfile_or_hash] = eandro_apk
                    log.info("prefetched: %s, size apk cache: %d",
                             eandro_apk.short_description(),
                             len(apk_prefetch_pool))
                    # abort if file not in db!
        except (NoFile, DatabaseOpenError, DatabaseLoadException) as e:
            log.exception(e)
Пример #20
0
    def store_custom_data(self, package_name, version_name, _hash, file_name, data):
        '''
        Store custom data to the file system (also with the result directory as root)

        Parameters
        ----------
        package_name : str
            Package name of the apk.
            Unique apk identifier (at least in the store)
        version_name : str
            Version name
        _hash : str
            The hash of the apk.
        file_name : str
            File name.
        data : object
            The data what shall be written to disk.
            Will write str(data) to disk.

        Raises
        ------
        FileSysStoreException
        '''
        try:
            # create basic fs structure
            base_path = self.get_apk_res_path_all_args(package_name, version_name, _hash)
            self.create_filesys_structure(base_path)

            file_path = join(base_path, file_name)
            try:
                with open(file_path, "w") as f:
                    f.write(str(data))
            except IOError as e:
                raise FileSysStoreException(file_path, "custom data", self, e)
        except FileSysCreateStorageStructureException as e:
            log.exception(e)
Пример #21
0
                       tag,
                       build_date=build_date)


if __name__ == '__main__':
    import json
    from androlyze.log.Log import log

    #from androlyze.model.script.AndroScript import AndroScript

    #APK_NAME = "/mnt/stuff/android/apks/com.parkdroid.apk"
    APK_NAME = "/mnt/stuff/androlyze/import/apk/com.whatsapp/2.7.3581/071435b4c72d45593ba64d411463ad18e02cbd3d90296d38f5b42d7e9d96ea9b/com.whatsapp_2.7.3581.apk"
    try:
        with open(APK_NAME, "rb") as f:
            print FastApk.fast_load_from_io(file_like_object=f)

        apk = FastApk.fast_load_from_io(file_like_object=None,
                                        apk_file_path=APK_NAME)
        apk.tag = "exploitable"
        print apk
        print json.dumps(apk.meta_dict(), indent=4)

        print apk
        #res.log_true("check1", "checks")
        #res.log_true("check2", "checks")
        #storage.store_result_for_apk(apk, AndroScript("script1"), res)
        # androguard cannot load some apk files which the fast loader can
    except Exception as e:
        log.exception(e)
    print "done"
Пример #22
0
def print_query_result_db(res,
                          distict_generator=False,
                          count=False,
                          raw=False,
                          interactive=True):
    '''
    Print the results from the result db (mongodb).

    Parameters
    ----------
    count : bool, optional (default is False)
        Only print count, not results
    distict_generator : bool, optional (default is False)
        Res is generator<object> created from the distinct(...) method of mongodb.
        If generaor<dict>, convert each dict to json.
        Otherwise just print.
    raw : bool, optional (default is False)
        Print raw data from gridfs
        Otherwise print json.
    res : gridfs.grid_file.GridOutCursor or generator<object> or pymongo.cursor.Cursor
        First if non_document and non_document_raw.
        Second if disctinct values wanted.
        Thirst otherwise.
        The results to print
    interactive: bool, optional (default is True)
        Iterate interactive through the result cursor
    '''
    from pymongo.errors import PyMongoError

    try:
        # print count
        if count:
            cnt = 0
            # res is list
            if distict_generator:
                cnt = len(res)
            # res is cursor
            else:
                cnt = res.count()
            clilog.info(cnt)
        else:
            if distict_generator:
                for r in sorted(res):
                    if isinstance(r, dict):
                        r = dict2json(res)
                    clilog.info(r)
            else:
                for i, res in enumerate(res, 1):
                    # interactive result view
                    if i != 1 and interactive and raw_input(
                            'Press any key to view next result or abort with "no" !)'
                    ).lower() == 'no':
                        break
                    sys.stderr.write('/* {} */\n'.format(i))
                    # print raw data
                    if raw:
                        # gridfs.grid_file.GridOut
                        for gridout_obj in res:
                            clilog.info(gridout_obj)
                    # print json
                    else:
                        clilog.info(dict2json(res))

    except PyMongoError as e:
        log.exception(e)
Пример #23
0
def analyze_apk_ana_objs(ana_objs,
                         time_s,
                         eandro_apk,
                         scripts,
                         propagate_error=False,
                         reset_scripts=True):
    ''' Analyze the `eandro_apk` with the given `scripts` assuming each `AndroScript`
    neads at least `min_script_needs`.

    Be sure that you reseted the `scripts`!

    Parameters
    ----------
    eandro_apk : EAndroApk
        The apk.
    scripts : iterable<AndroScript>
        The scripts to use for the analysis.
    propagate_error : bool, optional (default is False)
        If true propagate errors.
    reset_scripts : bool, optional (default is True)
        If given, reset the `AndroScript` before analyzing.

    Returns
    -------
    list<FastApk, list<AndroScript>>
        Uses `FastApk` to only store the meta information, not the apk data!
    None
        If error happened.
    '''
    from androlyze.analyze.exception import AndroScriptError

    try:
        # reset scripts
        if reset_scripts:
            for s in scripts:
                s.reset()

        if eandro_apk is not None:
            fastapk = None

            script_results = []
            for s in scripts:
                try:
                    result_obj = s.analyze(eandro_apk, *ana_objs)

                    # we only need the meta infos of the apk
                    if eandro_apk is not None:
                        fastapk = FastApk.load_from_eandroapk(eandro_apk)

                    # set androguard analysis time if script wants stats
                    s.add_apk_androguard_analyze_time(time_s)

                    # link to apk
                    if isinstance(result_obj, ResultObject):
                        result_obj.set_apk(fastapk)

                    script_results.append(s)
                except Exception as e:
                    if propagate_error:
                        raise
                    else:
                        log.exception(AndroScriptError(s, e))

            if fastapk is not None:
                # use fastapk to only store the meta information, not the apk data!
                return [fastapk, script_results]

    # interrupt analysis if analysis objects could not be created!
    except DexError as e:
        log.exception(e)
Пример #24
0
def create_analyzer(storage, script_list, apks_or_paths = None,
                   mode = ANALYZE_MODE_PARALLEL, concurrency = None,
                   serialize_apks = True
                   ):
    '''
    Create the analyzer only.

    Parameters
    ----------
    storage : RedundantStorage
        The store to use.
    script_list : list<str>
        List of paths to scripts (complete filename with extension).
    apks_or_paths: list<str> or list<Apk>, optional (default is None)
        List of `Apk` or paths to the apks which shall be analyzed with the given scripts
        If you analyze from paths the `import_date` is not set!
    mode : str, optional (default is `ANALYZE_MODE_PARALLEL`)
        Do an parallel analysis by default. Choose between : , , .
    concurrency : int, optional (default is number of cpu cores)
        Number of workers to spawn.
    serialize_apks : bool, optional (default is True)
        If true, serialize .apk .
        Otherwise id (hash) of the apk will be send and fetched by the worker from the result db.
        Be sure to import the apks to the result db first!
    '''
    from androlyze.model.script import ScriptUtil
    from androlyze.analyze.exception import AndroScriptError

    try:
        # list<type<AndroScript>>
        androscript_list = ScriptUtil.import_scripts(script_list)
        instantiated_scripts = sorted(ScriptUtil.instantiate_scripts(androscript_list, script_paths = script_list))

        if len(instantiated_scripts) == 0:
            log.warn("No scripts supplied!")
            return

        # get hashes for `AndroScript`s so that we can set the hash directly next time we instantiate the script
        script_hashes = [s.hash for s in instantiated_scripts]
        min_script_needs = ScriptUtil.get_minimum_script_options(instantiated_scripts)

        # log infos about scripts
        clilog.info('Loaded scripts:\n%s', '\n'.join((str(s) for s in instantiated_scripts)))
        log.info(ScriptUtil.androscript_options_descr(instantiated_scripts))

        if apks_or_paths:

            def create_analyzer():

                analyzer = None
                # argument for BaseAnalyzer
                args = storage, androscript_list, script_hashes, min_script_needs, apks_or_paths
                log.info("Mode: %s", mode)

                # normal analyzer
                if mode == ANALYZE_MODE_NON_PARALLEL:
                    from androlyze.analyze.Analyzer import Analyzer
                    analyzer = Analyzer(*args)
                # use parallel analyzer
                elif mode == ANALYZE_MODE_PARALLEL:
                    from androlyze.analyze.parallel.ParallelAnalyzer import ParallelAnalyzer
                    analyzer = ParallelAnalyzer(*args, concurrency = concurrency)
                # use distributed one
                elif mode == ANALYZE_MODE_DISTRIBUTED:
                    from androlyze.analyze.distributed.DistributedAnalyzer import DistributedAnalyzer
                    analyzer = DistributedAnalyzer(*args, concurrency = concurrency, serialize_apks = serialize_apks)

                return analyzer

            return create_analyzer()

    except ApkImportError as e:
        log.warn(e)
    except IOError as e:
        log.warn(AndroScriptError(e.filename, caused_by = e))
        sys.exit(1)
    except ImportError as e:
        log.exception(e)
    except Exception as e:
        log.exception(e)
Пример #25
0
    def _analyze(self):
        ''' See doc of :py:method:`.BaseAnalyzer.analyze`. '''

        # try to get registered workers
        # it network fails at this point -> stop analysis
        try:
            clilog.info(CeleryUtil.get_workers_and_check_network())
        except NetworkError as e:
            log.critical(e)
            return 0

        # storage objects
        storage = self.storage

        clilog.info("Number of apks to analyze: %d", self._cnt_apks)

        try:
            # get analyze task
            analyze_task = tasks[CeleryConstants.get_analyze_task_name()]

            # create storage
            storage.create_or_open_sub_storages()

            # send tasks
            start = time()

            # apk generator over .apk or apk hashes
            apk_gen = AnalyzeUtil.apk_id_or_raw_data_gen(
                self.apks, force_raw_data=self.serialize_apks)

            clilog.info("Task publishing progress:")

            # send and serialize .apks
            # if analysis via path serialize them!
            if self.serialize_apks:
                log.info("sending .apks to message broker")
                self.group_result = group_result = GroupResult(results=[])

                for args in self.send_apk_args_generator(apk_gen):
                    task = analyze_task.delay(*args)
                    group_result.add(task)

            # send only apk id and let fetch via mongodb
            else:
                log.info("sending ids of apks")

                task_group = group(
                    (analyze_task.s(*args)
                     for args in self.send_id_args_generator(apk_gen)))

                # publish tasks
                self.group_result = task_group()

            log.info("sending took %ss", (time() - start))
            sys.stderr.write("\nAnalysis progress:\n")

            # start showing analysis progress
            self.analyze_stats_view.start()

            # wait for results
            log.debug("joining on ResultGroup ... ")

            # setup callback
            callback_func = self.get_callback_func(self.success_handler,
                                                   self.error_handler)
            CeleryUtil.join_native(self.group_result,
                                   propagate=False,
                                   callback=callback_func)

            clilog.info("\nanalysis done ... ")
            log.info("distributed analysis took %ss", (time() - start))

            return self.stop_analysis_view()
        except DatabaseOpenError as e:
            log.critical(e)
            return 0

        except (KeyboardInterrupt, Exception) as e:
            if not isinstance(e, KeyboardInterrupt):
                log.exception(e)
            log.warn(
                "Interrupting distributed analysis ... Please wait a moment!")
            log.warn("revoking tasks on all workers ...")

            if celerysettings.CELERY_TASK_REVOCATION_ENABLED:
                # revoke tasks
                if self.group_result is None:
                    # revoke via task ids
                    log.debug("revoking while publishing tasks ...")

                    self.task_collection.revoke_all(terminate=True,
                                                    signal='SIGKILL')
                else:
                    # revoke via GroupResult if yet available/created
                    # first available after all tasks have been send
                    self.group_result.revoke(terminate=True, signal='SIGKILL')
                log.warn("revoked tasks and killed workers ...")

            #return number of analyzed apks
            return self.stop_analysis_view()
Пример #26
0
def format_query_result_db(res_cursor, distict_generator = False, count = False, raw = False, html = False):
    '''
    Format the results from the result db (mongodb).

    Parameters
    ----------
    res_cursor : gridfs.grid_file.GridOutCursor or generator<object> or pymongo.cursor.Cursor
        First if non_document and non_document_raw.
        Second if distinct values wanted.
        Thirst otherwise.
    distict_generator : bool, optional (default is False)
        Res is generator<object> created from the distinct(...) method of mongodb.
        If generaor<dict>, convert each dict to json.
        Otherwise just print.
    count : bool, optional (default is False)
        Only print count, not results
    raw : bool, optional (default is False)
        Print raw data from gridfs
        Otherwise print json.
        If `raw` will not be converted to html!
    html : bool, optional (default is False)
        Format as html.

    Returns
    -------
    str
    '''
    from pymongo.errors import PyMongoError
    from androlyze.ui.util import HtmlUtil

    # if html enabled convert to table view if `json2html` is present
    # otherwise use pygmentize
    json_convert = lambda json : json
    if html:
        try:
            from json2html import json2html
            json_convert = lambda j : json2html.convert(json = j)
        except ImportError:
            from pygments import highlight
            from pygments.formatters import HtmlFormatter
            from pygments.lexers import get_lexer_by_name
            
            json_convert = lambda json: highlight(json, get_lexer_by_name('json'), HtmlFormatter())

    # collect results as list<str>
    resl = []

    def anl(text):
        ''' Append a newline '''
        # dont format raw data as html
        return '%s\n' % text if not html or raw else HtmlUtil.newline(HtmlUtil.prefy(text))

    try:
        # return count
        if count:
            cnt = 0
            
            if is_pymongo_cursor(res_cursor):
                cnt = res_cursor.count()
            elif distict_generator:
                cnt = len(list(res_cursor))
            
            return '%d' % cnt
        
        else:
            if distict_generator:
                for r in sorted(res_cursor):
                    if isinstance(r, dict):
                        r = dict2json(res_cursor)
                        resl.append(r)
                    elif isinstance(r, (str, unicode)):
                        resl.append(r)
            else:
                for i, res in enumerate(res_cursor, 1):
                    delimiter = '/* %d */' % i
                    text = HtmlUtil.newline(delimiter) if html else delimiter
                    if html: text = HtmlUtil.redify(text)
                    resl.append(text)
                    # return raw data
                    if raw:
                        # gridfs.grid_file.GridOut
                        for gridout_obj in res:
                            resl.append(gridout_obj)
                    # return json
                    else:
                        j = dict2json(res)
                        # convert json (if enabled)
                        j = json_convert(j)
                        resl.append(j)
        # return result by joining single strings
        return ''.join([anl(res_str) for res_str in resl])
    except PyMongoError as e:
        log.exception(e)
Пример #27
0
    def fetch_results_from_mongodb(self, rds, results, wait_for_db = True,
                                   # progress
                                   nice_progess = False, synced_entries = None, total_sync_entries = None):
        '''
        Fetch some results from the result database and write them to disk.

        If data cannot be loaded from db, try until it can be.

        Parameters
        ----------
        rds : ResultDatabaseStorage
            The database to query for the results.
        results : list< tuple<id, gridfs (bool)> >
            Define which results shall be fetched.
        wait_for_db : bool, optional (default is True)
            Wait until data could be fetched from db.
        nice_progess : bool, optional (default is False)
            If enabled update show some nice progress bar on the cli.
        synced_entries : multiprocessing.Value<int>, optional (default is None)
            If supplied store number of already synces entries.
        total_sync_entries : multiprocessing.Value<int>, optional (default is None)
            If supplied store number of total entries to sync.

        Raises
        ------
        DatabaseLoadException
            If `wait_for_db` is False and an error occurred.
        '''
        # retry in ... seconds
        DATABASE_RETRY_TIME = 5

        # if true assume both counts are shared memory (Value)
        use_shared_memory = synced_entries is not None and total_sync_entries is not None

        if results is not None:
            results_stored = False
            while not results_stored:
                try:
                    # get ids
                    non_gridfs_ids, gridfs_ids = MongoUtil.split_result_ids(results)

                    # counts
                    cnt_non_gridfs_ids = len(non_gridfs_ids)
                    cnt_gridfs_ids = len(gridfs_ids)

                    if use_shared_memory:
                        total_sync_entries.value = cnt_gridfs_ids + cnt_non_gridfs_ids

                    # gridfs raw data as well as metadata
                    gridfs_entries_raw = []
                    if gridfs_ids:
                        gridfs_entries_raw = rds.get_results_for_ids(gridfs_ids, non_document = True, non_document_raw = True)

                    # regular documents (non gridfs)
                    non_gridfs_entries = []
                    if non_gridfs_ids:
                        non_gridfs_entries = rds.get_results_for_ids(non_gridfs_ids, non_document = False, non_document_raw = True)

                    if not nice_progess:
                        log.debug("fetching %d non-documents (gridfs) ... ", cnt_gridfs_ids)

                    for i, gridfs_entry_raw in enumerate(gridfs_entries_raw, 1):

                        # get our stored metadata (for script and apk)
                        gridfs_entry_meta = gridfs_entry_raw.metadata

                        if not nice_progess:
                            log.debug("getting results for %s", gridfs_entry_meta[RESOBJ_APK_META][RESOBJ_APK_META_PACKAGE_NAME])
                        else:
                            Util.print_dyn_progress(Util.format_progress(i, cnt_gridfs_ids))

                        # use apk to extract data from dict
                        fastapk = FastApk.load_from_result_dict(gridfs_entry_meta)
                        # get filename
                        file_name = gridfs_entry_raw.filename

                        # write results to disk
                        try:
                            self.store_custom_data(fastapk.package_name, fastapk.version_name, fastapk.hash, file_name, gridfs_entry_raw.read())
                        except FileSysStoreException as e:
                            log.exception(e)

                        # update shared memory progress indicitor
                        if use_shared_memory:
                            with synced_entries.get_lock():
                                synced_entries.value += 1

                    if not nice_progess:
                        log.debug("fetching %d documents (non-gridfs) ... ", cnt_non_gridfs_ids)

                    for i, non_gridfs_entry in enumerate(non_gridfs_entries, 1):
                        if not nice_progess:
                            clilog.debug("getting results for %s" % non_gridfs_entry[RESOBJ_APK_META][RESOBJ_APK_META_PACKAGE_NAME])
                        else:
                            Util.print_dyn_progress(Util.format_progress(i, cnt_non_gridfs_ids))

                        # write results to disk
                        self.store_result_dict(non_gridfs_entry)

                        # update shared memory progress indicitor
                        if use_shared_memory:
                            with synced_entries.get_lock():
                                synced_entries.value += 1

                    # if not wait for db wanted stop here
                    results_stored = True or not wait_for_db

                except (DatabaseLoadException, PyMongoError) as e:
                    if not wait_for_db:
                        raise
                    log.warn(e)
                    Util.log_will_retry(DATABASE_RETRY_TIME, exc = e)
                    sleep(DATABASE_RETRY_TIME)
Пример #28
0
        import_date = res_dict[RESOBJ_APK_META][RESOBJ_APK_META_IMPORT_DATE]
        tag = res_dict[RESOBJ_APK_META][RESOBJ_APK_META_TAG]
        build_date = res_dict[RESOBJ_APK_META][RESOBJ_APK_META_BUILD_DATE]
        return FastApk(package_name, version_name, path, _hash, import_date, tag, build_date = build_date)

if __name__ == '__main__':
    import json
    from androlyze.log.Log import log

    #from androlyze.model.script.AndroScript import AndroScript

    #APK_NAME = "/mnt/stuff/android/apks/com.parkdroid.apk"
    APK_NAME = "/mnt/stuff/androlyze/import/apk/com.whatsapp/2.7.3581/071435b4c72d45593ba64d411463ad18e02cbd3d90296d38f5b42d7e9d96ea9b/com.whatsapp_2.7.3581.apk"
    try:
        with open(APK_NAME, "rb") as f:
            print FastApk.fast_load_from_io(file_like_object = f)

        apk = FastApk.fast_load_from_io(file_like_object = None, apk_file_path = APK_NAME)
        apk.tag = "exploitable"
        print apk
        print json.dumps(apk.meta_dict(), indent = 4)

        print apk
        #res.log_true("check1", "checks")
        #res.log_true("check2", "checks")
        #storage.store_result_for_apk(apk, AndroScript("script1"), res)
        # androguard cannot load some apk files which the fast loader can
    except Exception as e:
        log.exception(e)
    print "done"
Пример #29
0
    def _analyze(self):
        ''' See doc of :py:method:`.BaseAnalyzer.analyze`. '''

        # try to get registered workers
        # it network fails at this point -> stop analysis
        try:
            clilog.info(CeleryUtil.get_workers_and_check_network())
        except NetworkError as e:
            log.critical(e)
            return 0

        # storage objects
        storage = self.storage

        clilog.info("Number of apks to analyze: %d", self._cnt_apks)

        try:
            # get analyze task
            analyze_task = tasks[CeleryConstants.get_analyze_task_name()]

            # create storage
            storage.create_or_open_sub_storages()

            # send tasks
            start = time()

            # apk generator over .apk or apk hashes
            apk_gen = AnalyzeUtil.apk_id_or_raw_data_gen(self.apks, force_raw_data = self.serialize_apks)

            clilog.info("Task publishing progress:")

            # send and serialize .apks
            # if analysis via path serialize them!
            if self.serialize_apks:
                log.info("sending .apks to message broker")
                self.group_result = group_result = GroupResult(results = [])

                for args in self.send_apk_args_generator(apk_gen):
                    task = analyze_task.delay(*args)
                    group_result.add(task)

            # send only apk id and let fetch via mongodb
            else:
                log.info("sending ids of apks")

                task_group = group((analyze_task.s(*args) for args in self.send_id_args_generator(apk_gen)))

                # publish tasks
                self.group_result = task_group()

            log.info("sending took %ss", (time() - start))
            sys.stderr.write("\nAnalysis progress:\n")

            # start showing analysis progress
            self.analyze_stats_view.start()

            # wait for results
            log.debug("joining on ResultGroup ... ")

            # setup callback
            callback_func = self.get_callback_func(self.success_handler, self.error_handler)
            CeleryUtil.join_native(self.group_result, propagate = False, callback = callback_func)

            clilog.info("\nanalysis done ... ")
            log.info("distributed analysis took %ss", (time() - start))

            return self.stop_analysis_view()
        except DatabaseOpenError as e:
            log.critical(e)
            return 0

        except (KeyboardInterrupt, Exception) as e:
            if not isinstance(e, KeyboardInterrupt):
                log.exception(e)
            log.warn("Interrupting distributed analysis ... Please wait a moment!")
            log.warn("revoking tasks on all workers ...")

            if celerysettings.CELERY_TASK_REVOCATION_ENABLED:
                # revoke tasks
                if self.group_result is None:
                    # revoke via task ids
                    log.debug("revoking while publishing tasks ...")

                    self.task_collection.revoke_all(terminate = True, signal = 'SIGKILL')
                else:
                    # revoke via GroupResult if yet available/created
                    # first available after all tasks have been send
                    self.group_result.revoke(terminate = True, signal = 'SIGKILL')
                log.warn("revoked tasks and killed workers ...")

            #return number of analyzed apks
            return self.stop_analysis_view()
Пример #30
0
def analyze_apk_ana_objs(ana_objs, time_s, eandro_apk, scripts, propagate_error = False, reset_scripts = True):
    ''' Analyze the `eandro_apk` with the given `scripts` assuming each `AndroScript`
    neads at least `min_script_needs`.

    Be sure that you reseted the `scripts`!

    Parameters
    ----------
    eandro_apk : EAndroApk
        The apk.
    scripts : iterable<AndroScript>
        The scripts to use for the analysis.
    propagate_error : bool, optional (default is False)
        If true propagate errors.
    reset_scripts : bool, optional (default is True)
        If given, reset the `AndroScript` before analyzing.

    Returns
    -------
    list<FastApk, list<AndroScript>>
        Uses `FastApk` to only store the meta information, not the apk data!
    None
        If error happened.
    '''
    from androlyze.analyze.exception import AndroScriptError

    try:
        # reset scripts
        if reset_scripts:
            for s in scripts:
                s.reset()

        if eandro_apk is not None:
            fastapk = None

            script_results = []
            for s in scripts:
                try:
                    result_obj = s.analyze(eandro_apk, *ana_objs)

                    # we only need the meta infos of the apk
                    if eandro_apk is not None:
                        fastapk = FastApk.load_from_eandroapk(eandro_apk)

                    # set androguard analysis time if script wants stats
                    s.add_apk_androguard_analyze_time(time_s)

                    # link to apk
                    if isinstance(result_obj, ResultObject):
                        result_obj.set_apk(fastapk)

                    script_results.append(s)
                except Exception as e:
                    if propagate_error:
                        raise
                    else:
                        log.exception(AndroScriptError(s, e))

            if fastapk is not None:
                # use fastapk to only store the meta information, not the apk data!
                return [fastapk, script_results]

    # interrupt analysis if analysis objects could not be created!
    except DexError as e:
        log.exception(e)