Пример #1
0
 def _start_single_worker_process(self, process_index):
     process = ExceptionSafeProcess(
         target=self.worker,
         name='Analysis-Worker-{}'.format(process_index),
         args=(process_index, ))
     process.start()
     self.workers.append(process)
Пример #2
0
def test_new_worker_was_started():
    def target():
        pass

    old, new = ExceptionSafeProcess(target=target), ExceptionSafeProcess(target=target)

    assert new_worker_was_started(old, new)
    assert not new_worker_was_started(old, old)
Пример #3
0
 def __init__(self, config=None, db_interface=None, testing=False, callback=None):
     self.config = config
     self.db_interface = db_interface if db_interface else CompareDbInterface(config=config)
     self.stop_condition = Value('i', 1)
     self.in_queue = Queue()
     self.callback = callback
     self.compare_module = Compare(config=self.config, db_interface=self.db_interface)
     self.worker = ExceptionSafeProcess(target=self._compare_scheduler_main)
     if not testing:
         self.start()
Пример #4
0
class TaggingDaemon:
    def __init__(self, analysis_scheduler=None, db_interface=None):
        self.parent = analysis_scheduler
        self.config = self.parent.config
        self.db_interface = db_interface if db_interface else self.parent.db_backend_service
        self.stop_condition = Value('i', 0)

        self.start_tagging_process()
        logging.info('Tagging daemon online')

    def shutdown(self):
        self.stop_condition.value = 1
        self.tagging_process.join()
        logging.info('Tagging daemon offline')

    def start_tagging_process(self):
        self.tagging_process = ExceptionSafeProcess(
            target=self._analysis_tag_scheduler_main)
        self.tagging_process.start()

    def _analysis_tag_scheduler_main(self):
        while self.stop_condition.value == 0:
            self._fetch_next_tag()

    def _fetch_next_tag(self):
        try:
            tags = self.parent.tag_queue.get(
                timeout=float(self.config['ExpertSettings']['block_delay']))
        except Empty:
            return

        if not tags['notags']:
            if self.db_interface.existence_quick_check(tags['uid']):
                self._process_tags(tags)
            else:
                self.parent.tag_queue.put(tags)

    def _process_tags(self, tags):
        uid = tags['uid']
        plugin_name = tags['plugin']
        for tag_name, tag in tags['tags'].items():
            if tag['propagate']:
                # Tags should be deleted as well, how ?
                self.db_interface.update_analysis_tags(uid=uid,
                                                       plugin_name=plugin_name,
                                                       tag_name=tag_name,
                                                       tag=tag)
                logging.debug('Tag {} set for plugin {} and uid {}'.format(
                    tag_name, plugin_name, uid))
Пример #5
0
def test_check_worker_restart(caplog):
    config = get_config_for_testing()
    config.set('ExpertSettings', 'throw_exceptions', 'false')

    worker = ExceptionSafeProcess(target=breaking_process, args=(True, ))
    process_list = [worker]
    worker.start()

    sleep(1)
    with caplog.at_level(logging.INFO):
        result = check_worker_exceptions(process_list, 'foo', config, worker_function=lambda _: None)
        assert not result
        assert len(process_list) == 1
        assert process_list[0] != worker
        assert 'Exception in foo' in caplog.messages[0]
        assert 'restarting foo' in caplog.messages[-1]
        process_list[0].join()
Пример #6
0
def test_check_worker_exceptions():
    config = get_config_for_testing()
    config.set('ExpertSettings', 'throw_exceptions', 'true')

    process_list = [ExceptionSafeProcess(target=breaking_process, args=(True, ))]
    process_list[0].start()

    result = check_worker_exceptions(process_list, 'foo', config=config)
    assert not result
    assert len(process_list) == 1
    sleep(1)
    result = check_worker_exceptions(process_list, 'foo', config=config)
    assert result
    assert len(process_list) == 0
Пример #7
0
def test_exception_safe_process():
    with pytest.raises(RuntimeError):
        breaking_process()

    process = ExceptionSafeProcess(target=breaking_process)
    process.start()
    process.join()
    assert process.exception
    assert str(process.exception[0]) == 'now that\'s annoying'
Пример #8
0
 def worker_processing_with_timeout(self, worker_id, next_task):
     manager = Manager()
     result = manager.list()
     process = ExceptionSafeProcess(target=self.process_next_object, args=(next_task, result))
     process.start()
     process.join(timeout=self.timeout)
     if self.timeout_happened(process):
         self._handle_failed_analysis(next_task, process, worker_id, 'Timeout')
     elif process.exception:
         self._handle_failed_analysis(next_task, process, worker_id, 'Exception')
     else:
         self.out_queue.put(result.pop())
         logging.debug('Worker {}: Finished {} analysis on {}'.format(worker_id, self.NAME, next_task.uid))
Пример #9
0
 def worker_processing_with_timeout(self, worker_id, next_task):
     manager = Manager()
     result = manager.list()
     process = ExceptionSafeProcess(target=self.process_next_object, args=(next_task, result))
     process.start()
     process.join(timeout=self.timeout)
     if self.timeout_happened(process):
         terminate_process_and_childs(process)
         self.out_queue.put(next_task)
         logging.warning('Worker {}: Timeout {} analysis on {}'.format(worker_id, self.NAME, next_task.uid))
     elif process.exception:
         terminate_process_and_childs(process)
         raise process.exception[0]
     else:
         self.out_queue.put(result.pop())
         logging.debug('Worker {}: Finished {} analysis on {}'.format(worker_id, self.NAME, next_task.uid))
Пример #10
0
 def _start_runner_process(self):
     logging.debug('Starting scheduler...')
     self.schedule_process = ExceptionSafeProcess(target=self._task_runner)
     self.schedule_process.start()
Пример #11
0
class CompareScheduler:
    '''
    This module handles all request regarding compares
    '''
    def __init__(self,
                 config=None,
                 db_interface=None,
                 testing=False,
                 callback=None):
        self.config = config
        self.db_interface = db_interface if db_interface else CompareDbInterface(
            config=config)
        self.stop_condition = Value('i', 1)
        self.in_queue = Queue()
        self.callback = callback
        self.compare_module = Compare(config=self.config,
                                      db_interface=self.db_interface)
        self.worker = ExceptionSafeProcess(target=self._compare_scheduler_main)
        if not testing:
            self.start()

    def start(self):
        self.stop_condition.value = 0
        self.worker.start()
        logging.info('Compare Scheduler online...')

    def shutdown(self):
        '''
        shutdown the scheduler
        '''
        logging.debug('Shutting down...')
        if getattr(self.db_interface, 'shutdown', False):
            self.db_interface.shutdown()
        if self.stop_condition.value == 0:
            self.stop_condition.value = 1
            self.worker.join()
        self.in_queue.close()
        logging.info('Compare Scheduler offline')

    def add_task(self, compare_task):
        compare_id, redo = compare_task
        try:
            self.db_interface.check_objects_exist(compare_id)
        except FactCompareException as exception:
            return exception.get_message(
            )  # FIXME: return value gets ignored by backend intercom
        logging.debug('Schedule for compare: {}'.format(compare_id))
        self.in_queue.put((compare_id, redo))
        return None

    def _compare_scheduler_main(self):
        compares_done = set()
        while self.stop_condition.value == 0:
            self._compare_single_run(compares_done)
        logging.debug('Compare Thread terminated')

    def _compare_single_run(self, compares_done):
        try:
            compare_id, redo = self.in_queue.get(
                timeout=float(self.config['ExpertSettings']['block_delay']))
        except Empty:
            pass
        else:
            if self._decide_whether_to_process(compare_id, redo,
                                               compares_done):
                if redo:
                    self.db_interface.delete_old_compare_result(compare_id)
                compares_done.add(compare_id)
                self._process_compare(compare_id)
                if self.callback:
                    self.callback()

    def _process_compare(self, compare_id):
        result = self.compare_module.compare(
            convert_compare_id_to_list(compare_id))
        if isinstance(result, dict):
            self.db_interface.add_compare_result(result)
        else:
            logging.error(result)

    @staticmethod
    def _decide_whether_to_process(uid, redo, compares_done):
        return redo or uid not in compares_done

    def check_exceptions(self):
        return_value = False
        if self.worker.exception:
            logging.error("{}Worker Exception Found!!{}".format(
                bcolors.FAIL, bcolors.ENDC))
            logging.error(self.worker.exception[1])
            if self.config.getboolean('ExpertSettings', 'throw_exceptions'):
                return_value = True
                terminate_process_and_childs(self.worker)
        return return_value
Пример #12
0
 def start_result_collector(self):
     logging.debug('Starting result collector')
     self.result_collector_process = ExceptionSafeProcess(
         target=self.result_collector)
     self.result_collector_process.start()
Пример #13
0
class AnalysisScheduler:  # pylint: disable=too-many-instance-attributes
    '''
    This Scheduler performs analysis of firmware objects
    '''
    def __init__(self,
                 config: Optional[ConfigParser] = None,
                 pre_analysis=None,
                 post_analysis=None,
                 db_interface=None):
        self.config = config
        self.analysis_plugins = {}
        self.load_plugins()
        self.stop_condition = Value('i', 0)
        self.process_queue = Queue()
        self.manager = Manager()
        self.currently_running = self.manager.dict()
        self.recently_finished = self.manager.dict()
        self.currently_running_lock = self.manager.Lock()  # pylint: disable=no-member

        self.db_backend_service = db_interface if db_interface else BackEndDbInterface(
            config=config)
        self.pre_analysis = pre_analysis if pre_analysis else self.db_backend_service.add_object
        self.post_analysis = post_analysis if post_analysis else self.db_backend_service.add_analysis
        self.start_scheduling_process()
        self.start_result_collector()
        logging.info('Analysis System online...')
        logging.info('Plugins available: {}'.format(
            self.get_list_of_available_plugins()))

    def shutdown(self):
        '''
        shutdown the scheduler and all loaded plugins
        '''
        logging.debug('Shutting down...')
        self.stop_condition.value = 1
        with ThreadPoolExecutor() as executor:
            executor.submit(self.schedule_process.join)
            executor.submit(self.result_collector_process.join)
            for plugin in self.analysis_plugins:
                executor.submit(self.analysis_plugins[plugin].shutdown)
        if getattr(self.db_backend_service, 'shutdown', False):
            self.db_backend_service.shutdown()
        self.process_queue.close()
        logging.info('Analysis System offline')

    def update_analysis_of_object_and_children(self, fo: FileObject):
        '''
        This function is used to recursively analyze an object without need of the unpacker
        '''
        for included_file in self.db_backend_service.get_list_of_all_included_files(
                fo):
            child = self.db_backend_service.get_object(included_file)
            self._schedule_analysis_tasks(child, fo.scheduled_analysis)
        self.check_further_process_or_complete(fo)

    def start_analysis_of_object(self, fo: FileObject):
        '''
        This function should be used to add a new firmware object to the scheduler
        '''
        self._add_to_current_analyses(fo)
        self._schedule_analysis_tasks(fo,
                                      fo.scheduled_analysis,
                                      mandatory=True)

    def update_analysis_of_single_object(self, fo: FileObject):
        '''
        This function is used to add analysis tasks for a single file
        '''
        self._schedule_analysis_tasks(fo, fo.scheduled_analysis)

    def _schedule_analysis_tasks(self,
                                 fo,
                                 scheduled_analysis,
                                 mandatory=False):
        scheduled_analysis = self._add_dependencies_recursively(
            copy(scheduled_analysis) or [])
        fo.scheduled_analysis = self._smart_shuffle(
            scheduled_analysis +
            MANDATORY_PLUGINS if mandatory else scheduled_analysis)
        self.check_further_process_or_complete(fo)

    def _smart_shuffle(self, plugin_list: List[str]) -> List[str]:
        scheduled_plugins = []
        remaining_plugins = set(plugin_list)

        while remaining_plugins:
            next_plugins = self._get_plugins_with_met_dependencies(
                remaining_plugins, scheduled_plugins)
            if not next_plugins:
                logging.error(
                    'Error: Could not schedule plugins because dependencies cannot be fulfilled: {}'
                    .format(remaining_plugins))
                break
            scheduled_plugins[:0] = shuffled(next_plugins)
            remaining_plugins.difference_update(next_plugins)

        # assure file type is first for blacklist functionality
        if 'file_type' in scheduled_plugins and scheduled_plugins[
                -1] != 'file_type':
            scheduled_plugins.remove('file_type')
            scheduled_plugins.append('file_type')
        return scheduled_plugins

    def _get_plugins_with_met_dependencies(
            self, remaining_plugins: Set[str],
            scheduled_plugins: List[str]) -> List[str]:
        met_dependencies = scheduled_plugins
        return [
            plugin for plugin in remaining_plugins if all(
                dependency in met_dependencies
                for dependency in self.analysis_plugins[plugin].DEPENDENCIES)
        ]

    def get_list_of_available_plugins(self):
        '''
        returns a list of all loaded plugins
        '''
        plugin_list = list(self.analysis_plugins.keys())
        plugin_list.sort(key=str.lower)
        return plugin_list

# ---- internal functions ----

    def get_default_plugins_from_config(self):
        try:
            result = {}
            for plugin_set in self.config['default_plugins']:
                result[plugin_set] = read_list_from_config(
                    self.config, 'default_plugins', plugin_set)
            return result
        except (TypeError, KeyError, AttributeError):
            logging.warning('default plug-ins not set in config')
            return []

    def get_plugin_dict(self):
        '''
        returns a dictionary of plugins with the following form: names as keys and the respective description value
        {NAME: (DESCRIPTION, mandatory, default, VERSION, DEPENDENCIES, MIME_BLACKLIST, MIME_WHITELIST, config.threads)}
        - mandatory plug-ins shall not be shown in the analysis selection but always executed
        - default plug-ins shall be pre-selected in the analysis selection
        '''
        plugin_list = self.get_list_of_available_plugins()
        plugin_list = self._remove_unwanted_plugins(plugin_list)
        default_plugins = self.get_default_plugins_from_config()
        default_flag_dict = {}
        result = {}
        for plugin in plugin_list:
            mandatory_flag = plugin in MANDATORY_PLUGINS
            for key in default_plugins:
                default_flag_dict[key] = plugin in default_plugins[key]
            blacklist, whitelist = self._get_blacklist_and_whitelist_from_plugin(
                plugin)
            result[plugin] = (self.analysis_plugins[plugin].DESCRIPTION,
                              mandatory_flag, dict(default_flag_dict),
                              self.analysis_plugins[plugin].VERSION,
                              self.analysis_plugins[plugin].DEPENDENCIES,
                              blacklist, whitelist,
                              self.config[plugin].get('threads', 0))
        result['unpacker'] = (
            'Additional information provided by the unpacker', True, False)
        return result

# ---- scheduling functions ----

    def get_scheduled_workload(self):
        self._clear_recently_finished()
        workload = {
            'analysis_main_scheduler': self.process_queue.qsize(),
            'plugins': {},
            'current_analyses': self._get_current_analyses_stats(),
            'recently_finished_analyses': dict(self.recently_finished),
        }
        for plugin_name in self.analysis_plugins:
            plugin = self.analysis_plugins[plugin_name]
            workload['plugins'][plugin_name] = {
                'queue':
                plugin.in_queue.qsize(),
                'active': (sum(plugin.active[i].value
                               for i in range(plugin.thread_count))),
            }
        return workload

    def _get_current_analyses_stats(self):
        return {
            uid: {
                'unpacked_count': stats_dict['unpacked_files_count'],
                'analyzed_count': stats_dict['analyzed_files_count'],
                'start_time': stats_dict['start_time'],
                'total_count': stats_dict['total_files_count'],
            }
            for uid, stats_dict in self.currently_running.items()
        }

    def register_plugin(self, name, plugin_instance):
        '''
        This function is called upon plugin init to announce its presence
        '''
        self.analysis_plugins[name] = plugin_instance

    def load_plugins(self):
        source = import_plugins('analysis.plugins', 'plugins/analysis')
        for plugin_name in source.list_plugins():
            plugin = source.load_plugin(plugin_name)
            plugin.AnalysisPlugin(self, config=self.config)

    def start_scheduling_process(self):
        logging.debug('Starting scheduler...')
        self.schedule_process = ExceptionSafeProcess(target=self.scheduler)
        self.schedule_process.start()

    def scheduler(self):
        while self.stop_condition.value == 0:
            try:
                task = self.process_queue.get(timeout=float(
                    self.config['ExpertSettings']['block_delay']))
            except Empty:
                pass
            else:
                self.process_next_analysis(task)

    def _reschedule_failed_analysis_task(self, fw_object: Union[Firmware,
                                                                FileObject]):
        failed_plugin, cause = fw_object.analysis_exception
        fw_object.processed_analysis[failed_plugin] = {'failed': cause}
        for plugin in fw_object.scheduled_analysis[:]:
            if failed_plugin in self.analysis_plugins[plugin].DEPENDENCIES:
                fw_object.scheduled_analysis.remove(plugin)
                logging.warning(
                    'Unscheduled analysis {} for {} because dependency {} failed'
                    .format(plugin, fw_object.uid, failed_plugin))
                fw_object.processed_analysis[plugin] = {
                    'failed':
                    'Analysis of dependency {} failed'.format(failed_plugin)
                }
        fw_object.analysis_exception = None

    # ---- analysis skipping ----

    def process_next_analysis(self, fw_object: FileObject):
        self.pre_analysis(fw_object)
        analysis_to_do = fw_object.scheduled_analysis.pop()
        if analysis_to_do not in self.analysis_plugins:
            logging.error('Plugin \'{}\' not available'.format(analysis_to_do))
            self.check_further_process_or_complete(fw_object)
        else:
            self._start_or_skip_analysis(analysis_to_do, fw_object)

    def _start_or_skip_analysis(self, analysis_to_do: str,
                                file_object: FileObject):
        if self._analysis_is_already_in_db_and_up_to_date(
                analysis_to_do, file_object.uid):
            logging.debug(
                'skipping analysis "{}" for {} (analysis already in DB)'.
                format(analysis_to_do, file_object.uid))
            if analysis_to_do in self._get_cumulative_remaining_dependencies(
                    file_object.scheduled_analysis):
                self._add_completed_analysis_results_to_file_object(
                    analysis_to_do, file_object)
            self.check_further_process_or_complete(file_object)
        elif analysis_to_do not in MANDATORY_PLUGINS and self._next_analysis_is_blacklisted(
                analysis_to_do, file_object):
            logging.debug(
                'skipping analysis "{}" for {} (blacklisted file type)'.format(
                    analysis_to_do, file_object.uid))
            file_object.processed_analysis[
                analysis_to_do] = self._get_skipped_analysis_result(
                    analysis_to_do)
            self.post_analysis(file_object)
            self.check_further_process_or_complete(file_object)
        else:
            self.analysis_plugins[analysis_to_do].add_job(file_object)

    def _add_completed_analysis_results_to_file_object(self,
                                                       analysis_to_do: str,
                                                       fw_object: FileObject):
        db_entry = self.db_backend_service.get_specific_fields_of_db_entry(
            fw_object.uid, {'processed_analysis.{}'.format(analysis_to_do): 1})
        desanitized_analysis = self.db_backend_service.retrieve_analysis(
            db_entry['processed_analysis'])
        fw_object.processed_analysis[analysis_to_do] = desanitized_analysis[
            analysis_to_do]

    def _analysis_is_already_in_db_and_up_to_date(self, analysis_to_do: str,
                                                  uid: str):
        db_entry = self.db_backend_service.get_specific_fields_of_db_entry(
            uid, {
                'processed_analysis.{plugin}.{key}'.format(
                    plugin=analysis_to_do, key=key): 1
                for key in [
                    'failed', 'file_system_flag', 'plugin_version',
                    'system_version'
                ]
            })
        if not db_entry or analysis_to_do not in db_entry[
                'processed_analysis'] or 'failed' in db_entry[
                    'processed_analysis'][analysis_to_do]:
            return False
        if 'plugin_version' not in db_entry['processed_analysis'][
                analysis_to_do]:
            logging.error('Plugin Version missing: UID: {}, Plugin: {}'.format(
                uid, analysis_to_do))
            return False

        if db_entry['processed_analysis'][analysis_to_do]['file_system_flag']:
            db_entry[
                'processed_analysis'] = self.db_backend_service.retrieve_analysis(
                    db_entry['processed_analysis'],
                    analysis_filter=[analysis_to_do])
            if 'file_system_flag' in db_entry['processed_analysis'][
                    analysis_to_do]:
                logging.warning('Desanitization of version string failed')
                return False

        return self._analysis_is_up_to_date(
            db_entry['processed_analysis'][analysis_to_do],
            self.analysis_plugins[analysis_to_do])

    @staticmethod
    def _analysis_is_up_to_date(analysis_db_entry: dict,
                                analysis_plugin: AnalysisBasePlugin):
        old_plugin_version = analysis_db_entry['plugin_version']
        old_system_version = analysis_db_entry.get('system_version', None)
        current_plugin_version = analysis_plugin.VERSION
        current_system_version = getattr(analysis_plugin, 'SYSTEM_VERSION',
                                         None)
        try:
            if LooseVersion(old_plugin_version) < LooseVersion(current_plugin_version) or \
                    LooseVersion(old_system_version or '0') < LooseVersion(current_system_version or '0'):
                return False
        except TypeError:
            logging.error(
                'plug-in or system version of "{}" plug-in is or was invalid!'.
                format(analysis_plugin.NAME))
            return False
        return True

# ---- blacklist and whitelist ----

    def _get_skipped_analysis_result(self, analysis_to_do):
        return {
            'skipped': 'blacklisted file type',
            'summary': [],
            'analysis_date': time(),
            'plugin_version': self.analysis_plugins[analysis_to_do].VERSION
        }

    def _next_analysis_is_blacklisted(self, next_analysis: str,
                                      fw_object: FileObject):
        blacklist, whitelist = self._get_blacklist_and_whitelist(next_analysis)
        if not (blacklist or whitelist):
            return False
        if blacklist and whitelist:
            message = color_string(
                'Configuration of plugin "{}" erroneous'.format(next_analysis),
                TerminalColors.FAIL)
            logging.error(
                '{}: found blacklist and whitelist. Ignoring blacklist.'.
                format(message))

        file_type = self._get_file_type_from_object_or_db(fw_object)

        if whitelist:
            return not substring_is_in_list(file_type, whitelist)
        return substring_is_in_list(file_type, blacklist)

    def _get_file_type_from_object_or_db(
            self, fw_object: FileObject) -> Optional[str]:
        if 'file_type' not in fw_object.processed_analysis:
            self._add_completed_analysis_results_to_file_object(
                'file_type', fw_object)

        return fw_object.processed_analysis['file_type']['mime'].lower()

    def _get_blacklist_and_whitelist(self,
                                     next_analysis: str) -> Tuple[List, List]:
        blacklist, whitelist = self._get_blacklist_and_whitelist_from_config(
            next_analysis)
        if not (blacklist or whitelist):
            blacklist, whitelist = self._get_blacklist_and_whitelist_from_plugin(
                next_analysis)
        return blacklist, whitelist

    def _get_blacklist_and_whitelist_from_config(
            self, analysis_plugin: str) -> Tuple[List, List]:
        blacklist = read_list_from_config(self.config, analysis_plugin,
                                          'mime_blacklist')
        whitelist = read_list_from_config(self.config, analysis_plugin,
                                          'mime_whitelist')
        return blacklist, whitelist

# ---- result collector functions ----

    def _get_blacklist_and_whitelist_from_plugin(
            self, analysis_plugin: str) -> Tuple[List, List]:
        blacklist = getattr(self.analysis_plugins[analysis_plugin],
                            'MIME_BLACKLIST', [])
        whitelist = getattr(self.analysis_plugins[analysis_plugin],
                            'MIME_WHITELIST', [])
        return blacklist, whitelist

    def start_result_collector(self):
        logging.debug('Starting result collector')
        self.result_collector_process = ExceptionSafeProcess(
            target=self.result_collector)
        self.result_collector_process.start()

# ---- miscellaneous functions ----

    def result_collector(self):  # pylint: disable=too-complex
        while self.stop_condition.value == 0:
            nop = True
            for plugin in self.analysis_plugins:
                try:
                    fw = self.analysis_plugins[plugin].out_queue.get_nowait()
                except Empty:
                    pass
                else:
                    nop = False
                    if plugin in fw.processed_analysis:
                        if fw.analysis_exception:
                            self._reschedule_failed_analysis_task(fw)

                        self.post_analysis(fw)
                    self.check_further_process_or_complete(fw)
            if nop:
                sleep(float(self.config['ExpertSettings']['block_delay']))

    def check_further_process_or_complete(self, fw_object):
        if not fw_object.scheduled_analysis:
            logging.info('Analysis Completed:\n{}'.format(fw_object))
            self._remove_from_current_analyses(fw_object)
        else:
            self.process_queue.put(fw_object)

    @staticmethod
    def _remove_unwanted_plugins(list_of_plugins):
        defaults = ['dummy_plugin_for_testing_only']
        for plugin in defaults:
            list_of_plugins.remove(plugin)
        return list_of_plugins

    def check_exceptions(self):
        for _, plugin in self.analysis_plugins.items():
            if plugin.check_exceptions():
                return True
        return check_worker_exceptions(
            [self.schedule_process, self.result_collector_process],
            'Scheduler')

    def _add_dependencies_recursively(
            self, scheduled_analyses: List[str]) -> List[str]:
        scheduled_analyses_set = set(scheduled_analyses)
        while True:
            new_dependencies = self._get_cumulative_remaining_dependencies(
                scheduled_analyses_set)
            if not new_dependencies:
                break
            scheduled_analyses_set.update(new_dependencies)
        return list(scheduled_analyses_set)

    def _get_cumulative_remaining_dependencies(
            self, scheduled_analyses: Set[str]) -> Set[str]:
        return {
            dependency
            for plugin in scheduled_analyses
            for dependency in self.analysis_plugins[plugin].DEPENDENCIES
        }.difference(scheduled_analyses)

    # currently running analyses

    def _add_to_current_analyses(self, fw_object: Union[Firmware, FileObject]):
        self.currently_running_lock.acquire()
        try:
            if isinstance(fw_object, Firmware):
                self.currently_running[
                    fw_object.uid] = self._init_current_analysis(fw_object)
            else:
                self._update_current_analysis(fw_object)
        finally:
            self.currently_running_lock.release()

    def _update_current_analysis(self, fw_object):
        '''
        new file comes from unpacking:
        - file moved from files_to_unpack to files_to_analyze (could be duplicate!)
        - included files added to files_to_unpack (could also include duplicates!)
        '''
        for parent in self._find_currently_analyzed_parents(fw_object):
            updated_dict = self.currently_running[parent]
            new_files = set(fw_object.files_included) - set(
                updated_dict['files_to_unpack']).union(
                    set(updated_dict['files_to_analyze']))
            updated_dict['total_files_count'] += len(new_files)
            updated_dict['files_to_unpack'] = list(
                set(updated_dict['files_to_unpack']).union(new_files))
            if fw_object.uid in updated_dict['files_to_unpack']:
                updated_dict['files_to_unpack'].remove(fw_object.uid)
                updated_dict['files_to_analyze'].append(fw_object.uid)
                updated_dict['unpacked_files_count'] += 1
            self.currently_running[parent] = updated_dict

    @staticmethod
    def _init_current_analysis(fw_object):
        return {
            'files_to_unpack': list(fw_object.files_included),
            'files_to_analyze': [fw_object.uid],
            'start_time': time(),
            'unpacked_files_count': 1,
            'analyzed_files_count': 0,
            'total_files_count': 1 + len(fw_object.files_included),
        }

    def _remove_from_current_analyses(self, fw_object: Union[Firmware,
                                                             FileObject]):
        try:
            self.currently_running_lock.acquire()
            for parent in self._find_currently_analyzed_parents(fw_object):
                updated_dict = self.currently_running[parent]
                if fw_object.uid not in updated_dict['files_to_analyze']:
                    logging.warning(
                        'Trying to remove {} from current analysis of {} but it is not included'
                        .format(fw_object.uid, parent))
                    continue
                updated_dict['files_to_analyze'] = list(
                    set(updated_dict['files_to_analyze']) - {fw_object.uid})
                updated_dict['analyzed_files_count'] += 1
                if len(updated_dict['files_to_unpack']) == len(
                        updated_dict['files_to_analyze']) == 0:
                    self.recently_finished[
                        parent] = self._init_recently_finished(updated_dict)
                    self.currently_running.pop(parent)
                    logging.info(
                        'Analysis of firmware {} completed'.format(parent))
                else:
                    self.currently_running[parent] = updated_dict
        finally:
            self.currently_running_lock.release()

    @staticmethod
    def _init_recently_finished(analysis_data: dict) -> dict:
        return {
            'duration': time() - analysis_data['start_time'],
            'total_files_count': analysis_data['total_files_count'],
            'time_finished': time(),
        }

    def _find_currently_analyzed_parents(
            self, fw_object: Union[Firmware, FileObject]) -> Set[str]:
        parent_uids = {fw_object.uid} if isinstance(
            fw_object, Firmware) else fw_object.parent_firmware_uids
        return set(self.currently_running.keys()).intersection(parent_uids)

    def _clear_recently_finished(self):
        for uid, stats in list(self.recently_finished.items()):
            if time(
            ) - stats['time_finished'] > RECENTLY_FINISHED_DISPLAY_TIME_IN_SEC:
                self.recently_finished.pop(uid)
Пример #14
0
 def start_scheduling_process(self):
     logging.debug('Starting scheduler...')
     self.schedule_process = ExceptionSafeProcess(target=self.scheduler)
     self.schedule_process.start()
Пример #15
0
 def start_tagging_process(self):
     self.tagging_process = ExceptionSafeProcess(
         target=self._analysis_tag_scheduler_main)
     self.tagging_process.start()
Пример #16
0
class AnalysisScheduler:
    '''
    This Scheduler performs analysis of firmware objects
    '''
    def __init__(self,
                 config: Optional[ConfigParser] = None,
                 pre_analysis=None,
                 post_analysis=None,
                 db_interface=None):
        self.config = config
        self.analysis_plugins = {}
        self.load_plugins()
        self.stop_condition = Value('i', 0)
        self.process_queue = Queue()
        self.tag_queue = Queue()
        self.db_backend_service = db_interface if db_interface else BackEndDbInterface(
            config=config)
        self.pre_analysis = pre_analysis if pre_analysis else self.db_backend_service.add_object
        self.post_analysis = post_analysis if post_analysis else self.db_backend_service.add_analysis
        self.start_scheduling_process()
        self.start_result_collector()
        logging.info('Analysis System online...')
        logging.info('Plugins available: {}'.format(
            self.get_list_of_available_plugins()))

    def shutdown(self):
        '''
        shutdown the scheduler and all loaded plugins
        '''
        logging.debug('Shutting down...')
        self.stop_condition.value = 1
        with ThreadPoolExecutor() as e:
            e.submit(self.schedule_process.join)
            e.submit(self.result_collector_process.join)
            for plugin in self.analysis_plugins:
                e.submit(self.analysis_plugins[plugin].shutdown)
        if getattr(self.db_backend_service, 'shutdown', False):
            self.db_backend_service.shutdown()
        self.tag_queue.close()
        self.process_queue.close()
        logging.info('Analysis System offline')

    def add_update_task(self, fo: FileObject):
        for included_file in self.db_backend_service.get_list_of_all_included_files(
                fo):
            child = self.db_backend_service.get_object(included_file)
            child.scheduled_analysis = self._add_dependencies_recursively(
                fo.scheduled_analysis or [])
            child.scheduled_analysis = self._smart_shuffle(
                child.scheduled_analysis)
            self.check_further_process_or_complete(child)
        self.check_further_process_or_complete(fo)

    def add_task(self, fo: FileObject):
        '''
        This function should be used to add a new firmware object to the scheduler
        '''
        scheduled_plugins = self._add_dependencies_recursively(
            fo.scheduled_analysis or [])
        fo.scheduled_analysis = self._smart_shuffle(scheduled_plugins +
                                                    MANDATORY_PLUGINS)
        self.check_further_process_or_complete(fo)

    def _smart_shuffle(self, plugin_list: List[str]) -> List[str]:
        scheduled_plugins = []
        remaining_plugins = set(plugin_list)

        while len(remaining_plugins) > 0:
            next_plugins = self._get_plugins_with_met_dependencies(
                remaining_plugins, scheduled_plugins)
            if not next_plugins:
                logging.error(
                    'Error: Could not schedule plugins because dependencies cannot be fulfilled: {}'
                    .format(remaining_plugins))
                break
            scheduled_plugins[:0] = shuffled(next_plugins)
            remaining_plugins.difference_update(next_plugins)

        # assure file type is first for blacklist functionality
        if 'file_type' in scheduled_plugins and scheduled_plugins[
                -1] != 'file_type':
            scheduled_plugins.remove('file_type')
            scheduled_plugins.append('file_type')
        return scheduled_plugins

    def _get_plugins_with_met_dependencies(
            self, remaining_plugins: Set[str],
            scheduled_plugins: List[str]) -> List[str]:
        met_dependencies = scheduled_plugins
        return [
            plugin for plugin in remaining_plugins if all(
                dependency in met_dependencies
                for dependency in self.analysis_plugins[plugin].DEPENDENCIES)
        ]

    def get_list_of_available_plugins(self):
        '''
        returns a list of all loaded plugins
        '''
        plugin_list = list(self.analysis_plugins.keys())
        plugin_list.sort(key=str.lower)
        return plugin_list

# ---- internal functions ----

    def get_default_plugins_from_config(self):
        try:
            result = {}
            for plugin_set in self.config['default_plugins']:
                result[plugin_set] = read_list_from_config(
                    self.config, 'default_plugins', plugin_set)
            return result
        except (TypeError, KeyError, AttributeError):
            logging.warning('default plug-ins not set in config')
            return []

    def get_plugin_dict(self):
        '''
        returns a dictionary of plugins with the following form: names as keys and the respective description value
        {NAME: (DESCRIPTION, MANDATORY_FLAG, DEFAULT_FLAG, VERSION)}
        - mandatory plug-ins shall not be shown in the analysis selection but always exectued
        - default plug-ins shall be pre-selected in the analysis selection
        '''
        plugin_list = self.get_list_of_available_plugins()
        plugin_list = self._remove_unwanted_plugins(plugin_list)
        default_plugins = self.get_default_plugins_from_config()
        default_flag_dict = {}
        result = {}
        for plugin in plugin_list:
            mandatory_flag = plugin in MANDATORY_PLUGINS
            for key in default_plugins.keys():
                default_flag_dict[key] = plugin in default_plugins[key]
            result[plugin] = (self.analysis_plugins[plugin].DESCRIPTION,
                              mandatory_flag, dict(default_flag_dict),
                              self.analysis_plugins[plugin].VERSION)
        result['unpacker'] = (
            'Additional information provided by the unpacker', True, False)
        return result

# ---- scheduling functions ----

    def get_scheduled_workload(self):
        workload = {'analysis_main_scheduler': self.process_queue.qsize()}
        for plugin in self.analysis_plugins:
            workload[plugin] = self.analysis_plugins[plugin].in_queue.qsize()
        return workload

    def register_plugin(self, name, plugin_instance):
        '''
        This function is called upon plugin init to announce its presence
        '''
        self.analysis_plugins[name] = plugin_instance

    def load_plugins(self):
        source = import_plugins('analysis.plugins', 'plugins/analysis')
        for plugin_name in source.list_plugins():
            plugin = source.load_plugin(plugin_name)
            plugin.AnalysisPlugin(self, config=self.config)

    def start_scheduling_process(self):
        logging.debug('Starting scheduler...')
        self.schedule_process = ExceptionSafeProcess(target=self.scheduler)
        self.schedule_process.start()

    def scheduler(self):
        while self.stop_condition.value == 0:
            try:
                task = self.process_queue.get(timeout=float(
                    self.config['ExpertSettings']['block_delay']))
            except Empty:
                pass
            else:
                self.process_next_analysis(task)

    # ---- analysis skipping ----

    def process_next_analysis(self, fw_object: FileObject):
        self.pre_analysis(fw_object)
        analysis_to_do = fw_object.scheduled_analysis.pop()
        if analysis_to_do not in self.analysis_plugins:
            logging.error('Plugin \'{}\' not available'.format(analysis_to_do))
            self.check_further_process_or_complete(fw_object)
        else:
            self._start_or_skip_analysis(analysis_to_do, fw_object)

    def _start_or_skip_analysis(self, analysis_to_do: str,
                                fw_object: FileObject):
        if self._analysis_is_already_in_db_and_up_to_date(
                analysis_to_do, fw_object.get_uid()):
            logging.debug(
                'skipping analysis "{}" for {} (analysis already in DB)'.
                format(analysis_to_do, fw_object.get_uid()))
            if analysis_to_do in self._get_cumulative_remaining_dependencies(
                    fw_object.scheduled_analysis):
                self._add_completed_analysis_results_to_file_object(
                    analysis_to_do, fw_object)
            self.check_further_process_or_complete(fw_object)
        elif analysis_to_do not in MANDATORY_PLUGINS and self._next_analysis_is_blacklisted(
                analysis_to_do, fw_object):
            logging.debug(
                'skipping analysis "{}" for {} (blacklisted file type)'.format(
                    analysis_to_do, fw_object.get_uid()))
            fw_object.processed_analysis[
                analysis_to_do] = self._get_skipped_analysis_result(
                    analysis_to_do)
            self.check_further_process_or_complete(fw_object)
        else:
            self.analysis_plugins[analysis_to_do].add_job(fw_object)

    def _add_completed_analysis_results_to_file_object(self,
                                                       analysis_to_do: str,
                                                       fw_object: FileObject):
        db_entry = self.db_backend_service.get_specific_fields_of_db_entry(
            fw_object.get_uid(),
            {'processed_analysis.{}'.format(analysis_to_do): 1})
        desanitized_analysis = self.db_backend_service.retrieve_analysis(
            db_entry['processed_analysis'])
        fw_object.processed_analysis[analysis_to_do] = desanitized_analysis[
            analysis_to_do]

    def _analysis_is_already_in_db_and_up_to_date(self, analysis_to_do: str,
                                                  uid: str):
        db_entry = self.db_backend_service.get_specific_fields_of_db_entry(
            uid, {
                'processed_analysis.{}.file_system_flag'.format(analysis_to_do):
                1,
                'processed_analysis.{}.plugin_version'.format(analysis_to_do):
                1,
                'processed_analysis.{}.system_version'.format(analysis_to_do):
                1
            })
        if not db_entry or analysis_to_do not in db_entry['processed_analysis']:
            return False
        elif 'plugin_version' not in db_entry['processed_analysis'][
                analysis_to_do]:
            logging.error('Plugin Version missing: UID: {}, Plugin: {}'.format(
                uid, analysis_to_do))
            return False

        if db_entry['processed_analysis'][analysis_to_do]['file_system_flag']:
            db_entry[
                'processed_analysis'] = self.db_backend_service.retrieve_analysis(
                    db_entry['processed_analysis'],
                    analysis_filter=[
                        analysis_to_do,
                    ])
            if 'file_system_flag' in db_entry['processed_analysis'][
                    analysis_to_do]:
                logging.warning('Desanitization of version string failed')
                return False

        analysis_plugin_version = db_entry['processed_analysis'][
            analysis_to_do]['plugin_version']
        analysis_system_version = db_entry['processed_analysis'][analysis_to_do]['system_version'] \
            if 'system_version' in db_entry['processed_analysis'][analysis_to_do] else None
        plugin_version = self.analysis_plugins[analysis_to_do].VERSION
        system_version = self.analysis_plugins[analysis_to_do].SYSTEM_VERSION \
            if hasattr(self.analysis_plugins[analysis_to_do], 'SYSTEM_VERSION') else None

        if LooseVersion(analysis_plugin_version) < LooseVersion(plugin_version) or \
                LooseVersion(analysis_system_version or '0') < LooseVersion(system_version or '0'):
            return False

        return True

# ---- blacklist and whitelist ----

    def _get_skipped_analysis_result(self, analysis_to_do):
        return {
            'skipped': 'blacklisted file type',
            'summary': [],
            'analysis_date': time(),
            'plugin_version': self.analysis_plugins[analysis_to_do].VERSION
        }

    def _next_analysis_is_blacklisted(self, next_analysis: str,
                                      fw_object: FileObject):
        blacklist, whitelist = self._get_blacklist_and_whitelist(next_analysis)
        if not (blacklist or whitelist):
            return False
        if blacklist and whitelist:
            logging.error(
                '{}Configuration of plugin "{}" erroneous{}: found blacklist and whitelist. Ignoring blacklist.'
                .format(bcolors.FAIL, next_analysis, bcolors.ENDC))

        file_type = self._get_file_type_from_object_or_db(fw_object)

        if whitelist:
            return not substring_is_in_list(file_type, whitelist)
        return substring_is_in_list(file_type, blacklist)

    def _get_file_type_from_object_or_db(
            self, fw_object: FileObject) -> Optional[str]:
        if 'file_type' not in fw_object.processed_analysis:
            self._add_completed_analysis_results_to_file_object(
                'file_type', fw_object)

        return fw_object.processed_analysis['file_type']['mime'].lower()

    def _get_blacklist_and_whitelist(self,
                                     next_analysis: str) -> Tuple[List, List]:
        blacklist, whitelist = self._get_blacklist_and_whitelist_from_config(
            next_analysis)
        if not (blacklist or whitelist):
            blacklist, whitelist = self._get_blacklist_and_whitelist_from_plugin(
                next_analysis)
        return blacklist, whitelist

    def _get_blacklist_and_whitelist_from_config(
            self, analysis_plugin: str) -> Tuple[List, List]:
        blacklist = read_list_from_config(self.config, analysis_plugin,
                                          'mime_blacklist')
        whitelist = read_list_from_config(self.config, analysis_plugin,
                                          'mime_whitelist')
        return blacklist, whitelist

# ---- result collector functions ----

    def _get_blacklist_and_whitelist_from_plugin(
            self, analysis_plugin: str) -> Tuple[List, List]:
        blacklist = self.analysis_plugins[
            analysis_plugin].MIME_BLACKLIST if hasattr(
                self.analysis_plugins[analysis_plugin],
                'MIME_BLACKLIST') else []
        whitelist = self.analysis_plugins[
            analysis_plugin].MIME_WHITELIST if hasattr(
                self.analysis_plugins[analysis_plugin],
                'MIME_WHITELIST') else []
        return blacklist, whitelist

    def start_result_collector(self):
        logging.debug('Starting result collector')
        self.result_collector_process = ExceptionSafeProcess(
            target=self.result_collector)
        self.result_collector_process.start()

# ---- miscellaneous functions ----

    def result_collector(self):
        while self.stop_condition.value == 0:
            nop = True
            for plugin in self.analysis_plugins:
                try:
                    fw = self.analysis_plugins[plugin].out_queue.get_nowait()
                    fw = self._handle_analysis_tags(fw, plugin)
                except Empty:
                    pass
                else:
                    nop = False
                    if plugin in fw.processed_analysis:
                        self.post_analysis(fw)
                    self.check_further_process_or_complete(fw)
            if nop:
                sleep(float(self.config['ExpertSettings']['block_delay']))

    def _handle_analysis_tags(self, fw, plugin):
        self.tag_queue.put(check_tags(fw, plugin))
        return add_tags_to_object(fw, plugin)

    def check_further_process_or_complete(self, fw_object):
        if not fw_object.scheduled_analysis:
            logging.info('Analysis Completed:\n{}'.format(fw_object))
        else:
            self.process_queue.put(fw_object)

    @staticmethod
    def _remove_unwanted_plugins(list_of_plugins):
        defaults = ['dummy_plugin_for_testing_only']
        for plugin in defaults:
            list_of_plugins.remove(plugin)
        return list_of_plugins

    def check_exceptions(self):
        for _, plugin in self.analysis_plugins.items():
            if plugin.check_exceptions():
                return True
        for process in [self.schedule_process, self.result_collector_process]:
            if process.exception:
                logging.error('{}Exception in scheduler process {}{}'.format(
                    bcolors.FAIL, bcolors.ENDC, process.name))
                logging.error(process.exception[1])
                terminate_process_and_childs(process)
                return True  # Error here means nothing will ever get scheduled again. Thing should just break !
        return False

    def _add_dependencies_recursively(
            self, scheduled_analyses: List[str]) -> List[str]:
        scheduled_analyses_set = set(scheduled_analyses)
        while True:
            new_dependencies = self._get_cumulative_remaining_dependencies(
                scheduled_analyses_set)
            if not new_dependencies:
                break
            scheduled_analyses_set.update(new_dependencies)
        return list(scheduled_analyses_set)

    def _get_cumulative_remaining_dependencies(
            self, scheduled_analyses: Set[str]) -> Set[str]:
        return {
            dependency
            for plugin in scheduled_analyses
            for dependency in self.analysis_plugins[plugin].DEPENDENCIES
        }.difference(scheduled_analyses)
Пример #17
0
class AnalysisScheduler(object):
    '''
    This Scheduler performs analysis of firmware objects
    '''

    analysis_plugins = {}

    def __init__(self, config=None, pre_analysis=None, post_analysis=None, db_interface=None):
        self.config = config
        self.load_plugins()
        self.stop_condition = Value('i', 0)
        self.process_queue = Queue()
        self.tag_queue = Queue()
        self.db_backend_service = db_interface if db_interface else BackEndDbInterface(config=config)
        self.pre_analysis = pre_analysis if pre_analysis else self.db_backend_service.add_object
        self.post_analysis = post_analysis if post_analysis else self.db_backend_service.add_analysis
        self.start_scheduling_process()
        self.start_result_collector()
        logging.info('Analysis System online...')
        logging.info('Plugins available: {}'.format(self.get_list_of_available_plugins()))

    def shutdown(self):
        '''
        shutdown the scheduler and all loaded plugins
        '''
        logging.debug('Shutting down...')
        self.stop_condition.value = 1
        with ThreadPoolExecutor() as e:
            e.submit(self.schedule_process.join)
            e.submit(self.result_collector_process.join)
            for plugin in self.analysis_plugins:
                e.submit(self.analysis_plugins[plugin].shutdown)
        if getattr(self.db_backend_service, 'shutdown', False):
            self.db_backend_service.shutdown()
        self.tag_queue.close()
        self.process_queue.close()
        logging.info('Analysis System offline')

    def add_update_task(self, fo):
        for included_file in self.db_backend_service.get_list_of_all_included_files(fo):
            child = self.db_backend_service.get_object(included_file)
            child.scheduled_analysis = fo.scheduled_analysis
            shuffle(child.scheduled_analysis)
            self.check_further_process_or_complete(child)
        self.check_further_process_or_complete(fo)

    def add_task(self, fo):
        '''
        This function should be used to add a new firmware object to the scheduler
        '''
        if fo.scheduled_analysis is None:
            fo.scheduled_analysis = MANDATORY_PLUGINS
        else:
            shuffle(fo.scheduled_analysis)
            fo.scheduled_analysis = MANDATORY_PLUGINS + fo.scheduled_analysis
        self.check_further_process_or_complete(fo)

    def get_list_of_available_plugins(self):
        '''
        returns a list of all loaded plugins
        '''
        plugin_list = list(self.analysis_plugins.keys())
        plugin_list.sort(key=str.lower)
        return plugin_list

    def get_default_plugins_from_config(self):
        try:
            result = {}
            for plugin_set in self.config['default_plugins']:
                result[plugin_set] = self.config['default_plugins'][plugin_set].split(', ')
            return result
        except (TypeError, KeyError, AttributeError):
            logging.warning('default plug-ins not set in config')
            return []

    def get_plugin_dict(self):
        '''
        returns a dictionary of plugins with the following form: names as keys and the respective description value
        {NAME: (DESCRIPTION, MANDATORY_FLAG, DEFAULT_FLAG, VERSION)}
        - mandatory plug-ins shall not be shown in the analysis selection but always exectued
        - default plug-ins shall be pre-selected in the analysis selection
        '''
        plugin_list = self.get_list_of_available_plugins()
        plugin_list = self._remove_unwanted_plugins(plugin_list)
        default_plugins = self.get_default_plugins_from_config()
        default_flag_dict = {}
        result = {}
        for plugin in plugin_list:
            mandatory_flag = plugin in MANDATORY_PLUGINS
            for key in default_plugins.keys():
                default_flag_dict[key] = plugin in default_plugins[key]
            result[plugin] = (self.analysis_plugins[plugin].DESCRIPTION, mandatory_flag, dict(default_flag_dict), self.analysis_plugins[plugin].VERSION)
        result['unpacker'] = ('Additional information provided by the unpacker', True, False)
        return result

    def get_scheduled_workload(self):
        workload = {'analysis_main_scheduler': self.process_queue.qsize()}
        for plugin in self.analysis_plugins:
            workload[plugin] = self.analysis_plugins[plugin].in_queue.qsize()
        return workload

# ---- internal functions ----

    def register_plugin(self, name, plugin_instance):
        '''
        This function is called upon plugin init to announce its presence
        '''
        self.analysis_plugins[name] = plugin_instance

    def load_plugins(self):
        source = import_plugins('analysis.plugins', 'plugins/analysis')
        for plugin_name in source.list_plugins():
            plugin = source.load_plugin(plugin_name)
            plugin.AnalysisPlugin(self, config=self.config)

# ---- scheduling functions ----

    def start_scheduling_process(self):
        logging.debug('Starting scheduler...')
        self.schedule_process = ExceptionSafeProcess(target=self.scheduler)
        self.schedule_process.start()

    def scheduler(self):
        while self.stop_condition.value == 0:
            try:
                task = self.process_queue.get(timeout=int(self.config['ExpertSettings']['block_delay']))
            except Empty:
                pass
            else:
                self.process_next_analysis(task)

    def process_next_analysis(self, fw_object):
        self.pre_analysis(fw_object)
        analysis_to_do = fw_object.scheduled_analysis.pop()
        if analysis_to_do not in self.analysis_plugins:
            logging.error('Plugin \'{}\' not available'.format(analysis_to_do))
        else:
            self.analysis_plugins[analysis_to_do].add_job(fw_object)

# ---- result collector functions ----

    def start_result_collector(self):
        logging.debug('Starting result collector')
        self.result_collector_process = ExceptionSafeProcess(target=self.result_collector)
        self.result_collector_process.start()

    def result_collector(self):
        while self.stop_condition.value == 0:
            nop = True
            for plugin in self.analysis_plugins:
                try:
                    fw = self.analysis_plugins[plugin].out_queue.get_nowait()
                    fw = self._handle_analysis_tags(fw, plugin)
                except Empty:
                    pass
                else:
                    nop = False
                    self.post_analysis(fw)
                    self.check_further_process_or_complete(fw)
            if nop:
                sleep(int(self.config['ExpertSettings']['block_delay']))

    def _handle_analysis_tags(self, fw, plugin):
        self.tag_queue.put(check_tags(fw, plugin))
        return add_tags_to_object(fw, plugin)

    def check_further_process_or_complete(self, fw_object):
        if not fw_object.scheduled_analysis:
            logging.info('Analysis Completed:\n{}'.format(fw_object))
        else:
            self.process_queue.put(fw_object)

# ---- miscellaneous functions ----

    @staticmethod
    def _remove_unwanted_plugins(list_of_plugins):
        defaults = ['dummy_plugin_for_testing_only']
        for plugin in defaults:
            list_of_plugins.remove(plugin)
        return list_of_plugins

    def check_exceptions(self):
        for _, plugin in self.analysis_plugins.items():
            if plugin.check_exceptions():
                return True
        for process in [self.schedule_process, self.result_collector_process]:
            if process.exception:
                logging.error("{}Exception in scheduler process {}{}".format(bcolors.FAIL, bcolors.ENDC, process.name))
                logging.error(process.exception[1])
                terminate_process_and_childs(process)
                return True  # Error here means nothing will ever get scheduled again. Thing should just break !
        return False
Пример #18
0
class CompareScheduler:
    '''
    This module handles all request regarding compares
    '''
    def __init__(self,
                 config=None,
                 db_interface=None,
                 testing=False,
                 callback=None):
        self.config = config
        self.db_interface = db_interface if db_interface else CompareDbInterface(
            config=config)
        self.stop_condition = Value('i', 1)
        self.in_queue = Queue()
        self.callback = callback
        self.compare_module = Compare(config=self.config,
                                      db_interface=self.db_interface)
        self.worker = ExceptionSafeProcess(target=self._compare_scheduler_main)
        if not testing:
            self.start()

    def start(self):
        self.stop_condition.value = 0
        self.worker.start()
        logging.info('Compare Scheduler online...')

    def shutdown(self):
        '''
        shutdown the scheduler
        '''
        logging.debug('Shutting down...')
        if getattr(self.db_interface, 'shutdown', False):
            self.db_interface.shutdown()
        if self.stop_condition.value == 0:
            self.stop_condition.value = 1
            self.worker.join()
        self.in_queue.close()
        logging.info('Compare Scheduler offline')

    def add_task(self, compare_task):
        compare_id, redo = compare_task
        try:
            self.db_interface.check_objects_exist(compare_id)
        except FactCompareException as exception:
            return exception.get_message(
            )  # FIXME: return value gets ignored by backend intercom
        logging.debug(f'Schedule for compare: {compare_id}')
        self.in_queue.put((compare_id, redo))
        return None

    def _compare_scheduler_main(self):
        compares_done = set()
        while self.stop_condition.value == 0:
            self._compare_single_run(compares_done)
        logging.debug('Compare Thread terminated')

    def _compare_single_run(self, compares_done):
        try:
            compare_id, redo = self.in_queue.get(
                timeout=float(self.config['ExpertSettings']['block_delay']))
        except Empty:
            pass
        else:
            if self._decide_whether_to_process(compare_id, redo,
                                               compares_done):
                if redo:
                    self.db_interface.delete_old_compare_result(compare_id)
                compares_done.add(compare_id)
                self._process_compare(compare_id)
                if self.callback:
                    self.callback()

    def _process_compare(self, compare_id):
        try:
            self.db_interface.add_compare_result(
                self.compare_module.compare(
                    convert_compare_id_to_list(compare_id)))
        except Exception:  # pylint: disable=broad-except
            logging.error(f'Fatal error in compare process for {compare_id}',
                          exc_info=True)

    @staticmethod
    def _decide_whether_to_process(uid, redo, compares_done):
        return redo or uid not in compares_done

    def check_exceptions(self):
        processes_to_check = [self.worker]
        shutdown = check_worker_exceptions(processes_to_check, 'Compare',
                                           self.config,
                                           self._compare_scheduler_main)
        if not shutdown and new_worker_was_started(
                new_process=processes_to_check[0], old_process=self.worker):
            self.worker = processes_to_check.pop()
        return shutdown
Пример #19
0
class AnalysisScheduler:  # pylint: disable=too-many-instance-attributes
    '''
    The analysis scheduler is responsible for

    * initializing analysis plugins
    * scheduling tasks based on user decision and built-in dependencies
    * deciding if tasks should run or may be skipped
    * running the tasks
    * and storing the new results of analysis tasks in the database

    Plugin initialization is mostly handled by the plugins, the scheduler only provides an attachment point and offers
    a single point of reference for introspection and runtime information.

    The scheduler offers three entry points:

    #. Start the analysis of a file object (start_analysis_of_object)
    #. Start the analysis of a file object without context (update_analysis_of_single_object)
    #. Start an update of a firmware file and all it's children (update_analysis_of_object_and_children)

    Entry point 1. is used by the unpacking scheduler and is trigger for each file object after the unpacking has been
    processed. Entry points 2. and 3. are independent of the unpacking process and can be triggered by the user using
    the Web-UI or REST-API. 2. is used to update analyses for a single file. 3. is used to update analyses for all files
    contained inside a given firmware. The difference between 1. and 2. is that the single file update (2.) will not be
    considered in the current analysis introspection.

    Scheduling of tasks is made with the following considerations:

    * New objects need a set of mandatory plugins (e.g. file type and hashes), as these results are used in further
      processing stages
    * Plugins can have dependencies, these have to be present before the depending plugin can be run
    * The order of execution is shuffled (dependency preserving) to balance execution of the plugins

    After scheduling, for each task a set of checks is run to decide if a task might be skipped: class::

        ┌─┬──────────────┐ No                                   ┌────────┐
        │0│Plugin exists?├──────────────────────────────────────►        │
        └─┴───┬──────────┘                                      │  Skip  │
              │ Yes                                     ┌───────►        ◄───┐
        ┌─┬───▼─────────────┐ Yes                       │       └────────┘   │
        │1│Is forced update?├───────────────────────────┼─────┐              │
        └─┴───┬─────────────┘                           │     │              │
              │ No                                      │     │              │
        ┌─┬───▼────────────────────────────────┐ Yes    │     │              │
        │2│Analysis present, version unchanged?├────────┘     │              │
        └─┴───┬────────────────────────────────┘              │ ┌─────────┐  │
              │ No                                            └─►         │  │
        ┌─┬───▼────────────────────────────┐ No                 │  Start  │  │
        │3│Analysis is black / whitelisted?├────────────────────►         │  │
        └─┴───┬────────────────────────────┘                    └─────────┘  │
              │ Yes                                                          │
              └──────────────────────────────────────────────────────────────┘

    Running the analysis tasks is achieved through (multiprocessing.Queue)s. Each plugin has an in-queue, triggered
    by the scheduler using the `add_job` function, and an out-queue that is processed by the result collector. The
    actual analysis process is out of scope. Database interaction happens before (pre_analysis) and after
    (post_analysis) the running of a task, to store intermediate results for live updates, and final results.

    :param config: The ConfigParser object shared by all backend entities.
    :param pre_analysis: A database callback to execute before running an analysis task.
    :param post_analysis: A database callback to execute after running an analysis task.
    :param db_interface: An object reference to an instance of BackEndDbInterface.
    '''

    def __init__(self, config: Optional[ConfigParser] = None, pre_analysis=None, post_analysis=None, db_interface=None):
        self.config = config
        self.analysis_plugins = {}
        self._load_plugins()
        self.stop_condition = Value('i', 0)
        self.process_queue = Queue()

        self.status = AnalysisStatus()
        self.task_scheduler = AnalysisTaskScheduler(self.analysis_plugins)

        self.db_backend_service = db_interface if db_interface else BackEndDbInterface(config=config)
        self.pre_analysis = pre_analysis if pre_analysis else self.db_backend_service.add_object
        self.post_analysis = post_analysis if post_analysis else self.db_backend_service.add_analysis
        self._start_runner_process()
        self._start_result_collector()
        logging.info('Analysis System online...')
        logging.info(f'Plugins available: {self._get_list_of_available_plugins()}')

    def shutdown(self):
        '''
        Shutdown the runner process, the result collector and all plugin processes. A multiprocessing.Value is set to
        notify all attached processes of the impending shutdown. Afterwards queues are closed once it's safe.
        '''
        logging.debug('Shutting down...')
        self.stop_condition.value = 1
        with ThreadPoolExecutor() as executor:
            executor.submit(self.schedule_process.join)
            executor.submit(self.result_collector_process.join)
            for plugin in self.analysis_plugins.values():
                executor.submit(plugin.shutdown)
        if getattr(self.db_backend_service, 'shutdown', False):
            self.db_backend_service.shutdown()
        self.process_queue.close()
        logging.info('Analysis System offline')

    def update_analysis_of_object_and_children(self, fo: FileObject):
        '''
        This function is used to analyze an object and all its recursively included objects without repeating the
        extraction process. Scheduled analyses are propagated to the included objects.

        :param fo: The root file that is to be analyzed
        '''
        included_files = self.db_backend_service.get_list_of_all_included_files(fo)
        self.pre_analysis(fo)
        self.status.add_update_to_current_analyses(fo, included_files)
        for child_uid in included_files:
            child_fo = self.db_backend_service.get_object(child_uid)
            child_fo.force_update = getattr(fo, 'force_update', False)  # propagate forced update to children
            self.task_scheduler.schedule_analysis_tasks(child_fo, fo.scheduled_analysis)
            self._check_further_process_or_complete(child_fo)
        self._check_further_process_or_complete(fo)

    def start_analysis_of_object(self, fo: FileObject):
        '''
        This function is used to start analysis of a firmware object. The function registers the firmware with the
        status module such that the progress of the firmware and its included files is tracked.

        :param fo: The firmware that is to be analyzed
        '''
        self.status.add_to_current_analyses(fo)
        self.task_scheduler.schedule_analysis_tasks(fo, fo.scheduled_analysis, mandatory=True)
        self._check_further_process_or_complete(fo)

    def update_analysis_of_single_object(self, fo: FileObject):
        '''
        This function is used to add analysis tasks for a single file. This function has no side effects so the object
        is simply iterated until all scheduled analyses are processed or skipped.

        :param fo: The file that is to be analyzed
        '''
        self.task_scheduler.schedule_analysis_tasks(fo, fo.scheduled_analysis)
        self._check_further_process_or_complete(fo)

    def _get_list_of_available_plugins(self) -> List[str]:
        plugin_list = list(self.analysis_plugins.keys())
        plugin_list.sort(key=str.lower)
        return plugin_list

    # ---- plugin initialization ----

    def _load_plugins(self):
        source = import_plugins('analysis.plugins', 'plugins/analysis')
        for plugin_name in source.list_plugins():
            try:
                plugin = source.load_plugin(plugin_name)
            except Exception:  # pylint: disable=broad-except
                # This exception could be caused by upgrading dependencies to incompatible versions. Another cause could
                # be missing dependencies. So if anything goes wrong we want to inform the user about it
                logging.error(f'Could not import plugin {plugin_name} due to exception', exc_info=True)
            else:
                plugin.AnalysisPlugin(self, config=self.config)

    def register_plugin(self, name: str, plugin_instance: AnalysisBasePlugin):
        '''
        This function is used by analysis plugins to register themselves with this scheduler. During intialization the
        plugins will call this functions giving their name and a reference to their object to allow the scheduler to
        address them for running analyses.

        :param name: The plugin name for addressing in runner and collector
        :param plugin_instance: A reference to the plugin object
        '''
        self.analysis_plugins[name] = plugin_instance

    def _get_default_plugins_from_config(self):
        try:
            return {
                plugin_set: read_list_from_config(
                    self.config, 'default_plugins', plugin_set
                )
                for plugin_set in self.config['default_plugins']
            }
        except (TypeError, KeyError, AttributeError):
            logging.warning('default plug-ins not set in config')
            return {}

    def get_plugin_dict(self) -> dict:
        '''
        Get information regarding all loaded plugins in form of a dictionary with the following form:

        .. code-block:: python

            {
                NAME: (
                    str: DESCRIPTION,
                    bool: mandatory,
                    bool: default,
                    str: VERSION,
                    list: DEPENDENCIES,
                    list: MIME_BLACKLIST,
                    list: MIME_WHITELIST,
                    str: config.threads
                )
            }

        Mandatory plugins are not shown in the analysis selection but always executed. Default plugins are pre-selected
        in the analysis selection.

        :return: dict with information regarding all loaded plugins
        '''
        plugin_list = self._get_list_of_available_plugins()
        plugin_list = self._remove_unwanted_plugins(plugin_list)
        default_plugins = self._get_default_plugins_from_config()
        default_flag_dict = {}
        result = {}
        for plugin in plugin_list:
            mandatory_flag = plugin in MANDATORY_PLUGINS
            for key in default_plugins:
                default_flag_dict[key] = plugin in default_plugins[key]
            blacklist, whitelist = self._get_blacklist_and_whitelist_from_plugin(plugin)
            result[plugin] = (
                self.analysis_plugins[plugin].DESCRIPTION,
                mandatory_flag,
                dict(default_flag_dict),
                self.analysis_plugins[plugin].VERSION,
                self.analysis_plugins[plugin].DEPENDENCIES,
                blacklist,
                whitelist,
                self.config[plugin].get('threads', '0')
            )
        result['unpacker'] = ('Additional information provided by the unpacker', True, False)
        return result

    # ---- task runner functions ----

    def _start_runner_process(self):
        logging.debug('Starting scheduler...')
        self.schedule_process = ExceptionSafeProcess(target=self._task_runner)
        self.schedule_process.start()

    def _task_runner(self):
        while self.stop_condition.value == 0:
            try:
                task = self.process_queue.get(timeout=float(self.config['ExpertSettings']['block_delay']))
            except Empty:
                pass
            else:
                self._process_next_analysis_task(task)

    def _process_next_analysis_task(self, fw_object: FileObject):
        self.pre_analysis(fw_object)
        analysis_to_do = fw_object.scheduled_analysis.pop()
        if analysis_to_do not in self.analysis_plugins:
            logging.error(f'Plugin \'{analysis_to_do}\' not available')
            self._check_further_process_or_complete(fw_object)
        else:
            self._start_or_skip_analysis(analysis_to_do, fw_object)

    def _start_or_skip_analysis(self, analysis_to_do: str, file_object: FileObject):
        if not self._is_forced_update(file_object) and self._analysis_is_already_in_db_and_up_to_date(analysis_to_do, file_object.uid):
            logging.debug(f'skipping analysis "{analysis_to_do}" for {file_object.uid} (analysis already in DB)')
            if analysis_to_do in self.task_scheduler.get_cumulative_remaining_dependencies(file_object.scheduled_analysis):
                self._add_completed_analysis_results_to_file_object(analysis_to_do, file_object)
            self._check_further_process_or_complete(file_object)
        elif analysis_to_do not in MANDATORY_PLUGINS and self._next_analysis_is_blacklisted(analysis_to_do, file_object):
            logging.debug(f'skipping analysis "{analysis_to_do}" for {file_object.uid} (blacklisted file type)')
            file_object.processed_analysis[analysis_to_do] = self._get_skipped_analysis_result(analysis_to_do)
            self.post_analysis(file_object)
            self._check_further_process_or_complete(file_object)
        else:
            self.analysis_plugins[analysis_to_do].add_job(file_object)

    # ---- 1. Is forced update ----

    @staticmethod
    def _is_forced_update(file_object: FileObject) -> bool:
        try:
            return bool(getattr(file_object, 'force_update', False))
        except AttributeError:
            return False

    # ---- 2. Analysis present and plugin version unchanged ----

    def _analysis_is_already_in_db_and_up_to_date(self, analysis_to_do: str, uid: str):
        db_entry = self.db_backend_service.get_specific_fields_of_db_entry(
            uid,
            {
                f'processed_analysis.{analysis_to_do}.{key}': 1
                for key in ['failed', 'file_system_flag', 'plugin_version', 'system_version']
            }
        )
        if not db_entry or analysis_to_do not in db_entry['processed_analysis'] or 'failed' in db_entry['processed_analysis'][analysis_to_do]:
            return False
        if 'plugin_version' not in db_entry['processed_analysis'][analysis_to_do]:
            logging.error(f'Plugin Version missing: UID: {uid}, Plugin: {analysis_to_do}')
            return False

        if db_entry['processed_analysis'][analysis_to_do]['file_system_flag']:
            db_entry['processed_analysis'] = self.db_backend_service.retrieve_analysis(db_entry['processed_analysis'], analysis_filter=[analysis_to_do])
            if 'file_system_flag' in db_entry['processed_analysis'][analysis_to_do]:
                logging.warning('Desanitization of version string failed')
                return False

        return self._analysis_is_up_to_date(db_entry['processed_analysis'][analysis_to_do], self.analysis_plugins[analysis_to_do], uid)

    def _analysis_is_up_to_date(self, analysis_db_entry: dict, analysis_plugin: AnalysisBasePlugin, uid):
        old_plugin_version = analysis_db_entry['plugin_version']
        old_system_version = analysis_db_entry.get('system_version', None)
        current_plugin_version = analysis_plugin.VERSION
        current_system_version = getattr(analysis_plugin, 'SYSTEM_VERSION', None)
        try:
            if LooseVersion(old_plugin_version) < LooseVersion(current_plugin_version) or \
                    LooseVersion(old_system_version or '0') < LooseVersion(current_system_version or '0'):
                return False
        except TypeError:
            logging.error(f'plug-in or system version of "{analysis_plugin.NAME}" plug-in is or was invalid!')
            return False

        return self._dependencies_are_up_to_date(analysis_plugin, uid)

    def _dependencies_are_up_to_date(self, analysis_plugin: AnalysisBasePlugin, uid):
        for dependency in analysis_plugin.DEPENDENCIES:
            self_date = _get_analysis_date(analysis_plugin.NAME, uid, self.db_backend_service)
            dependency_date = _get_analysis_date(dependency, uid, self.db_backend_service)
            if self_date < dependency_date:
                return False

        return True

    def _add_completed_analysis_results_to_file_object(self, analysis_to_do: str, fw_object: FileObject):
        db_entry = self.db_backend_service.get_specific_fields_of_db_entry(
            fw_object.uid, {f'processed_analysis.{analysis_to_do}': 1}
        )
        desanitized_analysis = self.db_backend_service.retrieve_analysis(db_entry['processed_analysis'])
        fw_object.processed_analysis[analysis_to_do] = desanitized_analysis[analysis_to_do]

    # ---- 3. blacklist and whitelist ----

    def _get_skipped_analysis_result(self, analysis_to_do):
        return {
            'skipped': 'blacklisted file type',
            'summary': [],
            'analysis_date': time(),
            'plugin_version': self.analysis_plugins[analysis_to_do].VERSION
        }

    def _next_analysis_is_blacklisted(self, next_analysis: str, fw_object: FileObject):
        blacklist, whitelist = self._get_blacklist_and_whitelist(next_analysis)
        if not (blacklist or whitelist):
            return False
        if blacklist and whitelist:
            message = color_string(f'Configuration of plugin "{next_analysis}" erroneous', TerminalColors.FAIL)
            logging.error(f'{message}: found blacklist and whitelist. Ignoring blacklist.')

        file_type = self._get_file_type_from_object_or_db(fw_object)

        if whitelist:
            return not substring_is_in_list(file_type, whitelist)
        return substring_is_in_list(file_type, blacklist)

    def _get_file_type_from_object_or_db(self, fw_object: FileObject) -> Optional[str]:
        if 'file_type' not in fw_object.processed_analysis:
            self._add_completed_analysis_results_to_file_object('file_type', fw_object)

        return fw_object.processed_analysis['file_type']['mime'].lower()

    def _get_blacklist_and_whitelist(self, next_analysis: str) -> Tuple[List, List]:
        blacklist, whitelist = self._get_blacklist_and_whitelist_from_config(next_analysis)
        if not (blacklist or whitelist):
            blacklist, whitelist = self._get_blacklist_and_whitelist_from_plugin(next_analysis)
        return blacklist, whitelist

    def _get_blacklist_and_whitelist_from_config(self, analysis_plugin: str) -> Tuple[List, List]:
        blacklist = read_list_from_config(self.config, analysis_plugin, 'mime_blacklist')
        whitelist = read_list_from_config(self.config, analysis_plugin, 'mime_whitelist')
        return blacklist, whitelist

    def _get_blacklist_and_whitelist_from_plugin(self, analysis_plugin: str) -> Tuple[List, List]:
        blacklist = getattr(self.analysis_plugins[analysis_plugin], 'MIME_BLACKLIST', [])
        whitelist = getattr(self.analysis_plugins[analysis_plugin], 'MIME_WHITELIST', [])
        return blacklist, whitelist

    # ---- result collector functions ----

    def _start_result_collector(self):
        logging.debug('Starting result collector')
        self.result_collector_process = ExceptionSafeProcess(target=self._result_collector)
        self.result_collector_process.start()

    def _result_collector(self):  # pylint: disable=too-complex
        while self.stop_condition.value == 0:
            nop = True
            for plugin_name, plugin in self.analysis_plugins.items():
                try:
                    fw = plugin.out_queue.get_nowait()
                except Empty:
                    pass
                else:
                    nop = False
                    if plugin_name in fw.processed_analysis:
                        if fw.analysis_exception:
                            self.task_scheduler.reschedule_failed_analysis_task(fw)

                        self.post_analysis(fw)
                    self._check_further_process_or_complete(fw)
            if nop:
                sleep(float(self.config['ExpertSettings']['block_delay']))

    def _check_further_process_or_complete(self, fw_object):
        if not fw_object.scheduled_analysis:
            logging.info(f'Analysis Completed:\n{fw_object}')
            self.status.remove_from_current_analyses(fw_object)
        else:
            self.process_queue.put(fw_object)

    # ---- miscellaneous functions ----

    def get_combined_analysis_workload(self):
        return self.process_queue.qsize() + sum(plugin.in_queue.qsize() for plugin in self.analysis_plugins.values())

    def get_scheduled_workload(self) -> dict:
        '''
        Get the current workload of this scheduler. The workload is represented through
        - the general in-queue,
        - the currently running analyses in each plugin and the plugin in-queues,
        - the progress for each currently analyzed firmware and
        - recently finished analyses.

         The result has the form:

         .. code-block:: python

            {
                'analysis_main_scheduler': int(),
                'plugins': dict(),
                'current_analyses': dict(),
                'recently_finished_analyses': dict(),
            }

        :return: Dictionary containing current workload statistics
        '''
        self.status.clear_recently_finished()
        workload = {
            'analysis_main_scheduler': self.process_queue.qsize(),
            'plugins': {},
            'current_analyses': self.status.get_current_analyses_stats(),
            'recently_finished_analyses': dict(self.status.recently_finished),
        }
        for plugin_name, plugin in self.analysis_plugins.items():
            workload['plugins'][plugin_name] = {
                'queue': plugin.in_queue.qsize(),
                'active': (sum(plugin.active[i].value for i in range(plugin.thread_count))),
            }
        return workload

    @staticmethod
    def _remove_unwanted_plugins(list_of_plugins):
        defaults = ['dummy_plugin_for_testing_only']
        for plugin in defaults:
            list_of_plugins.remove(plugin)
        return list_of_plugins

    def check_exceptions(self) -> bool:
        '''
        Iterate all attached processes and see if an exception occured in any. Depending on configuration, plugin
        exceptions are not registered as they are restarted after an exception occurs.

        :return: Boolean value stating if any attached process ran into an exception
        '''
        for _, plugin in self.analysis_plugins.items():
            if plugin.check_exceptions():
                return True
        return check_worker_exceptions([self.schedule_process, self.result_collector_process], 'Scheduler')
Пример #20
0
 def start_work_load_monitor(self):
     logging.debug('Start work load monitor...')
     process = ExceptionSafeProcess(target=self._work_load_monitor)
     process.start()
     self.workers.append(process)