class TestRestStatistics(RestTestBase): def setup(self): super().setup() self.stats_updater = StatisticDbUpdater(config=self.config) self.stats_updater.update_statistic( 'file_type', { 'file_types': [['application/gzip', 3454]], 'firmware_container': [['application/zip', 3], ['firmware/foo', 1]] }) self.stats_updater.update_statistic( 'known_vulnerabilities', {'known_vulnerabilities': [['BackDoor_String', 1]]}) def teardown(self): self.stats_updater.shutdown() super().teardown() def test_rest_request_all_statistics(self): st = self.test_client.get('/rest/statistics', follow_redirects=True) st_dict = json.loads(st.data) assert b'file_type' in st.data assert bool(st_dict['file_type']) assert 'file_types' in st_dict['file_type'] assert 'firmware_container' in st_dict['file_type'] assert b'known_vulnerabilities' in st.data assert bool(st_dict['known_vulnerabilities']) assert 'known_vulnerabilities' in st_dict['known_vulnerabilities'] assert b'malware' in st.data assert not st_dict['malware'] assert b'exploit_mitigations' in st.data assert not st_dict['exploit_mitigations'] def test_rest_request_single_statistic(self): st = self.test_client.get('/rest/statistics/file_type', follow_redirects=True) st_dict = json.loads(st.data) assert b'file_type' in st.data assert 'file_types' in st_dict['file_type'] assert 'firmware_container' in st_dict['file_type'] assert b'known_vulnerabilities' not in st.data def test_rest_request_non_existent_statistic(self): st = self.test_client.get('/rest/statistics/non_existent_stat', follow_redirects=True) assert b'A statistic with the ID non_existent_stat does not exist' in st.data def test_rest_request_invalid_data(self): st = self.test_client.get('/rest/statistics/', follow_redirects=True) assert b'404 Not Found' in st.data
class StatisticUpdater(object): ''' This class handles statistic generation ''' def __init__(self, config=None): self._config = config self.db = StatisticDbUpdater(config=self._config) self.start_time = None self.match = {} def shutdown(self): self.db.shutdown() def set_match(self, match): self.match = match if match else {} def update_all_stats(self): self.start_time = time() self.db.update_statistic('firmware_meta', self._get_firmware_meta_stats()) self.db.update_statistic('file_type', self._get_file_type_stats()) self.db.update_statistic('malware', self._get_malware_stats()) self.db.update_statistic('crypto_material', self._get_crypto_material_stats()) self.db.update_statistic('unpacking', self._get_unpacking_stats()) self.db.update_statistic('architecture', self._get_architecture_stats()) self.db.update_statistic('ips_and_uris', self._get_ip_stats()) self.db.update_statistic('release_date', self._get_time_stats()) self.db.update_statistic('exploit_mitigations', self._get_exploit_mitigations_stats()) self.db.update_statistic('known_vulnerabilities', self._get_known_vulnerabilities_stats()) self.db.update_statistic('software_components', self._get_software_components_stats()) # should always be the last, because of the benchmark self.db.update_statistic('general', self.get_general_stats()) # ---- get statistic functions def get_general_stats(self): if self.start_time is None: self.start_time = time() stats = {} stats['number_of_firmwares'] = self.db.firmwares.count_documents( self.match) stats['total_firmware_size'] = get_field_sum(self.db.firmwares, '$size', match=self.match) stats['average_firmware_size'] = get_field_average(self.db.firmwares, '$size', match=self.match) if not self.match: stats[ 'number_of_unique_files'] = self.db.file_objects.count_documents( {}) stats['total_file_size'] = get_field_sum(self.db.file_objects, '$size') stats['average_file_size'] = get_field_average( self.db.file_objects, '$size') else: aggregation_pipeline = self._get_file_object_filter_aggregation_pipeline( pipeline_group={ '_id': '$_id', 'size': { '$push': '$size' } }, additional_projection={'size': 1}) query_result = [ item['size'][0] for item in self.db.file_objects.aggregate( aggregation_pipeline) ] stats['number_of_unique_files'] = len(query_result) stats['total_file_size'] = sum(query_result) stats['average_file_size'] = avg(query_result) stats['creation_time'] = time() benchmark = stats['creation_time'] - self.start_time stats['benchmark'] = benchmark logging.info('time to create stats: {}'.format(time_format(benchmark))) return stats def _get_malware_stats(self): stats = {} result = self._get_objects_and_count_of_occurrence_firmware_and_file_db( '$processed_analysis.malware_scanner.scans.ClamAV.result', unwind=False, match=self.match) stats['malware'] = self._clean_malware_list(result) return stats def _get_exploit_mitigations_stats(self): stats = dict() stats['exploit_mitigations'] = [] aggregation_pipeline = self._get_file_object_filter_aggregation_pipeline( pipeline_group={ '_id': '$parent_firmware_uids', 'exploit_mitigations': { '$push': '$processed_analysis.exploit_mitigations.summary' } }, pipeline_match={ 'processed_analysis.exploit_mitigations.summary': { '$exists': True, '$not': { '$size': 0 } } }, additional_projection={ 'processed_analysis.exploit_mitigations.summary': 1 }) result_list_of_lists = [ list(itertools.chain.from_iterable(d['exploit_mitigations'])) for d in self.db.file_objects.aggregate(aggregation_pipeline) ] result_flattened = list( itertools.chain.from_iterable(result_list_of_lists)) result = self._count_occurrences(result_flattened) self.get_stats_nx(result, stats) self.get_stats_canary(result, stats) self.get_stats_relro(result, stats) self.get_stats_pie(result, stats) self.get_stats_fortify(result, stats) return stats def get_stats_fortify(self, result, stats): fortify_off, fortify_on = self.extract_fortify_data_from_analysis( result) total_amount_of_files = calculate_total_files( [fortify_off, fortify_on]) self.append_nx_stats_to_result_dict(fortify_off, fortify_on, stats, total_amount_of_files) def extract_fortify_data_from_analysis(self, result): fortify_on = self.extract_mitigation_from_list( 'FORTIFY_SOURCE enabled', result) fortify_off = self.extract_mitigation_from_list( 'FORTIFY_SOURCE disabled', result) return fortify_off, fortify_on def get_stats_nx(self, result, stats): nx_off, nx_on = self.extract_nx_data_from_analysis(result) total_amount_of_files = calculate_total_files([nx_off, nx_on]) self.append_nx_stats_to_result_dict(nx_off, nx_on, stats, total_amount_of_files) def extract_nx_data_from_analysis(self, result): nx_on = self.extract_mitigation_from_list('NX enabled', result) nx_off = self.extract_mitigation_from_list('NX disabled', result) return nx_off, nx_on def append_nx_stats_to_result_dict(self, nx_off, nx_on, stats, total_amount_of_files): self.update_result_dict(nx_on, stats, total_amount_of_files) self.update_result_dict(nx_off, stats, total_amount_of_files) def get_stats_canary(self, result, stats): canary_off, canary_on = self.extract_canary_data_from_analysis(result) total_amount_of_files = calculate_total_files([canary_off, canary_on]) self.append_canary_stats_to_result_dict(canary_off, canary_on, stats, total_amount_of_files) def extract_canary_data_from_analysis(self, result): canary_on = self.extract_mitigation_from_list('Canary enabled', result) canary_off = self.extract_mitigation_from_list('Canary disabled', result) return canary_off, canary_on def append_canary_stats_to_result_dict(self, canary_off, canary_on, stats, total_amount_of_files): self.update_result_dict(canary_on, stats, total_amount_of_files) self.update_result_dict(canary_off, stats, total_amount_of_files) def get_stats_relro(self, result, stats): relro_off, relro_on, relro_partial = self.extract_relro_data_from_analysis( result) total_amount_of_files = calculate_total_files( [relro_off, relro_on, relro_partial]) self.append_relro_stats_to_result_dict(relro_off, relro_on, relro_partial, stats, total_amount_of_files) def extract_relro_data_from_analysis(self, result): relro_on = self.extract_mitigation_from_list('RELRO fully enabled', result) relro_partial = self.extract_mitigation_from_list( 'RELRO partially enabled', result) relro_off = self.extract_mitigation_from_list('RELRO disabled', result) return relro_off, relro_on, relro_partial def append_relro_stats_to_result_dict(self, relro_off, relro_on, relro_partial, stats, total_amount_of_files): self.update_result_dict(relro_on, stats, total_amount_of_files) self.update_result_dict(relro_partial, stats, total_amount_of_files) self.update_result_dict(relro_off, stats, total_amount_of_files) def get_stats_pie(self, result, stats): pie_invalid, pie_off, pie_on, pie_partial = self.extract_pie_data_from_analysis( result) total_amount_of_files = calculate_total_files( [pie_off, pie_on, pie_partial, pie_invalid]) self.append_pie_stats_to_result_dict(pie_invalid, pie_off, pie_on, pie_partial, stats, total_amount_of_files) def extract_pie_data_from_analysis(self, result): pie_on = self.extract_mitigation_from_list('PIE enabled', result) pie_partial = self.extract_mitigation_from_list( 'PIE/DSO present', result) pie_off = self.extract_mitigation_from_list('PIE disabled', result) pie_invalid = self.extract_mitigation_from_list( 'PIE - invalid ELF file', result) return pie_invalid, pie_off, pie_on, pie_partial def append_pie_stats_to_result_dict(self, pie_invalid, pie_off, pie_on, pie_partial, stats, total_amount_of_files): self.update_result_dict(pie_on, stats, total_amount_of_files) self.update_result_dict(pie_partial, stats, total_amount_of_files) self.update_result_dict(pie_off, stats, total_amount_of_files) self.update_result_dict(pie_invalid, stats, total_amount_of_files) @staticmethod def extract_mitigation_from_list(string, result): exploit_mitigation_stat = list( filter(lambda x: x.count(string) > 0, result)) return exploit_mitigation_stat def update_result_dict(self, exploit_mitigation, stats, total_amount_of_files): if len(exploit_mitigation) > 0 and total_amount_of_files > 0: percentage_value = self._round(exploit_mitigation, total_amount_of_files) stats['exploit_mitigations'].append( (exploit_mitigation[0][0], exploit_mitigation[0][1], percentage_value)) else: pass @staticmethod def _round(exploit_mitigation_stat, total_amount_of_files): rounded_value = round( exploit_mitigation_stat[0][1] / total_amount_of_files, 5) return rounded_value def _get_known_vulnerabilities_stats(self): stats = {} result = self._get_objects_and_count_of_occurrence_firmware_and_file_db( '$processed_analysis.known_vulnerabilities.summary', unwind=True, match=self.match) stats['known_vulnerabilities'] = self._clean_malware_list(result) return stats def _get_crypto_material_stats(self): stats = {} result = self._get_objects_and_count_of_occurrence_firmware_and_file_db( '$processed_analysis.crypto_material.summary', unwind=True, match=self.match) stats['crypto_material'] = result return stats @staticmethod def _clean_malware_list(input_list): tmp = [] for item in input_list: if item[0] != 'not available' and item[0] != 'clean': tmp.append(item) return tmp def _get_firmware_meta_stats(self): stats = {} stats['vendor'] = self._get_objects_and_count_of_occurrence_single_db( self.db.firmwares, '$vendor', match=self.match) stats[ 'device_class'] = self._get_objects_and_count_of_occurrence_single_db( self.db.firmwares, '$device_class', match=self.match) return stats def _get_file_type_stats(self): stats = {} if not self.match: stats[ 'file_types'] = self._get_objects_and_count_of_occurrence_single_db( self.db.file_objects, '$processed_analysis.file_type.mime') stats[ 'firmware_container'] = self._get_objects_and_count_of_occurrence_single_db( self.db.firmwares, '$processed_analysis.file_type.mime', match=self.match) return stats def _get_unpacking_stats(self): stats = {} stats[ 'used_unpackers'] = self._get_objects_and_count_of_occurrence_firmware_and_file_db( '$processed_analysis.unpacker.plugin_used') stats[ 'packed_file_types'] = self._get_objects_and_count_of_occurrence_single_db( self.db.file_objects, '$processed_analysis.file_type.mime', match={'processed_analysis.unpacker.summary': 'packed'}) stats[ 'data_loss_file_types'] = self._get_objects_and_count_of_occurrence_firmware_and_file_db( '$processed_analysis.file_type.mime', match={'processed_analysis.unpacker.summary': 'data lost'}) fo_packing_stats = dict( self._get_objects_and_count_of_occurrence_single_db( self.db.file_objects, '$processed_analysis.unpacker.summary', unwind=True)) firmware_packing_stats = dict( self._get_objects_and_count_of_occurrence_single_db( self.db.file_objects, '$processed_analysis.unpacker.summary', unwind=True)) stats['overall_unpack_ratio'] = self._get_ratio( fo_packing_stats, firmware_packing_stats, ['unpacked', 'packed']) stats['overall_data_loss_ratio'] = self._get_ratio( fo_packing_stats, firmware_packing_stats, ['data lost', 'no data lost']) stats['average_packed_entropy'] = avg( dict( self._get_objects_and_count_of_occurrence_single_db( self.db.file_objects, '$processed_analysis.unpacker.entropy', unwind=True, match={'processed_analysis.unpacker.summary': 'packed'})).keys()) stats['average_unpacked_entropy'] = avg( dict( self._get_objects_and_count_of_occurrence_single_db( self.db.file_objects, '$processed_analysis.unpacker.entropy', unwind=True, match={'processed_analysis.unpacker.summary': 'unpacked'})).keys()) return stats def _get_file_object_filter_aggregation_pipeline( self, pipeline_group, pipeline_match=None, additional_projection=None, sort=False, unwind=None): aggregation_pipeline = [{ '$unwind': '$parent_firmware_uids' }, { '$lookup': { 'from': 'firmwares', 'localField': 'parent_firmware_uids', 'foreignField': '_id', 'as': 'firmware' } }, { '$unwind': '$firmware' }, { '$project': { '_id': 1, 'parent_firmware_uids': 1, 'device_class': '$firmware.device_class', 'vendor': '$firmware.vendor' } }, { '$group': pipeline_group }] if additional_projection: aggregation_pipeline[3]['$project'].update(additional_projection) if self.match: aggregation_pipeline.insert(4, {'$match': self.match}) if pipeline_match: aggregation_pipeline.insert(0, {'$match': pipeline_match}) if unwind: aggregation_pipeline.insert(-1, {'$unwind': unwind}) if sort: aggregation_pipeline.append({'$sort': SON([('_id', 1)])}) return aggregation_pipeline def _get_architecture_stats(self): stats = {} aggregation_pipeline = self._get_file_object_filter_aggregation_pipeline( pipeline_group={ '_id': '$parent_firmware_uids', 'architecture': { '$push': '$processed_analysis.cpu_architecture.summary' } }, pipeline_match={ 'processed_analysis.cpu_architecture.summary': { '$exists': True, '$not': { '$size': 0 } } }, additional_projection={ 'processed_analysis.cpu_architecture.summary': 1 }) result = [ self._shorten_architecture_string( self._find_most_frequent_architecture( list(itertools.chain.from_iterable(item['architecture'])))) for item in self.db.file_objects.aggregate(aggregation_pipeline) ] stats['cpu_architecture'] = self._count_occurrences(result) return stats def _find_most_frequent_architecture(self, arch_list): try: arch_frequency = sorted(self._count_occurrences(arch_list), key=lambda x: x[1], reverse=True) return arch_frequency[0][0] except Exception as e: logging.error('Could not get arch frequency: {} {}'.format( sys.exc_info()[0].__name__, e)) return None @staticmethod def _count_occurrences(l): return [(item, l.count(item)) for item in set(l)] @staticmethod def _shorten_architecture_string(s): if s is None: return None logging.debug(s) string_parts = s.split(',')[:2] if len(string_parts) > 1: # long string with bitness and endianness and ' (M)' at the end return ','.join(s.split(',')[:2]) else: # short string (without bitness and endianness but with ' (M)' at the end) return s[:-4] def _get_ip_stats(self): stats = {} stats[ 'ips_v4'] = self._get_objects_and_count_of_occurrence_firmware_and_file_db( '$processed_analysis.ip_and_uri_finder.ips_v4', unwind=True, sumup_function=sum_up_nested_lists) stats[ 'ips_v6'] = self._get_objects_and_count_of_occurrence_firmware_and_file_db( '$processed_analysis.ip_and_uri_finder.ips_v6', unwind=True, sumup_function=sum_up_nested_lists) stats[ 'uris'] = self._get_objects_and_count_of_occurrence_firmware_and_file_db( '$processed_analysis.ip_and_uri_finder.uris', unwind=True) return stats @staticmethod def _get_ratio(fo_stats, firmware_stats, values): for stats in [fo_stats, firmware_stats]: for v in values: if v not in stats: stats[v] = 0 try: return (fo_stats[values[0]] + firmware_stats[values[0]]) / \ (fo_stats[values[0]] + fo_stats[values[1]] + firmware_stats[values[0]] + firmware_stats[values[1]]) except ZeroDivisionError: return 0 def _get_time_stats(self): projection = { 'month': { '$month': '$release_date' }, 'year': { '$year': '$release_date' } } query = get_objects_and_count_of_occurrence(self.db.firmwares, projection, match=self.match) histogram_data = self._build_stats_entry_from_date_query(query) return {'date_histogram_data': histogram_data} @staticmethod def _get_month_name(month_int): return datetime(1900, month_int, 1).strftime('%B') def _get_software_components_stats(self): query_result = self.db.file_objects.aggregate([ { '$project': { 'sc': { '$objectToArray': '$processed_analysis.software_components' } } }, { '$match': { 'sc.4': { '$exists': True } } }, # match only analyses with actual results (more keys than the 4 standard keys) { '$unwind': '$sc' }, { '$group': { '_id': '$sc.k', 'count': { '$sum': 1 } } } ]) return { 'software_components': [(entry['_id'], int(entry['count'])) for entry in query_result if entry['_id'] not in [ 'summary', 'analysis_date', 'file_system_flag', 'plugin_version', 'tags', 'skipped', 'system_version' ]] } # ---- internal stuff def _build_stats_entry_from_date_query(self, date_query): time_dict = build_time_dict(date_query) result = [] for year in sorted(time_dict.keys()): for month in sorted(time_dict[year].keys()): result.append(('{} {}'.format(self._get_month_name(month), year), time_dict[year][month])) return result @staticmethod def _convert_dict_list_to_list(input_list): result = [] for item in input_list: if item['_id'] is None: item['_id'] = 'not available' result.append([item['_id'], item['count']]) return result def _get_objects_and_count_of_occurrence_single_db(self, database, object_path, unwind=False, match=None): if self.match and database == self.db.file_objects: # filtered live query on file objects aggregation_pipeline = self._get_file_object_filter_aggregation_pipeline( pipeline_group={ '_id': object_path, 'count': { '$sum': 1 } }, pipeline_match=match, sort=True, additional_projection={object_path.replace('$', ''): 1}, unwind=object_path if unwind else None) tmp = database.aggregate(aggregation_pipeline) else: tmp = get_objects_and_count_of_occurrence(database, object_path, unwind=unwind, match=merge_dict( match, self.match)) chart_list = self._convert_dict_list_to_list(tmp) return self._filter_sanitzized_objects(chart_list) def _get_objects_and_count_of_occurrence_firmware_and_file_db( self, object_path, unwind=False, match=None, sumup_function=sum_up_lists): result_firmwares = self._get_objects_and_count_of_occurrence_single_db( self.db.firmwares, object_path, unwind=unwind, match=match) result_files = self._get_objects_and_count_of_occurrence_single_db( self.db.file_objects, object_path, unwind=unwind, match=match) combined_result = sumup_function(result_firmwares, result_files) return combined_result @staticmethod def _filter_sanitzized_objects(input_list): out_list = [] for item in input_list: if not is_sanitized_entry(item[0]): out_list.append(item) return out_list
class WorkLoadStatistic: def __init__(self, config, component): self.config = config self.component = component self.db = StatisticDbUpdater(config=self.config) self.platform_information = self._get_platform_information() logging.debug('{}: Online'.format(self.component)) def shutdown(self): logging.debug('{}: shutting down -> set offline message'.format( self.component)) self.db.update_statistic(self.component, { 'status': 'offline', 'last_update': time() }) self.db.shutdown() def update(self, unpacking_workload=None, analysis_workload=None, compare_workload=None): stats = { 'name': self.component, 'status': 'online', 'last_update': time(), 'system': self._get_system_information(), 'platform': self.platform_information, } if unpacking_workload: stats['unpacking'] = unpacking_workload if analysis_workload: stats['analysis'] = analysis_workload if compare_workload: stats['compare'] = compare_workload self.db.update_statistic(self.component, stats) def _get_system_information(self): memory_usage = psutil.virtual_memory() try: disk_usage = psutil.disk_usage( self.config['data_storage']['firmware_file_storage_directory']) except Exception: disk_usage = psutil.disk_usage('/') try: cpu_percentage = psutil.cpu_percent() except Exception: cpu_percentage = 'unknown' result = { 'cpu_cores': psutil.cpu_count(logical=False), 'virtual_cpu_cores': psutil.cpu_count(), 'cpu_percentage': cpu_percentage, 'load_average': ', '.join(str(x) for x in os.getloadavg()), 'memory_total': memory_usage.total, 'memory_used': memory_usage.used, 'memory_percent': memory_usage.percent, 'disk_total': disk_usage.total, 'disk_used': disk_usage.used, 'disk_percent': disk_usage.percent } return result @staticmethod def _get_platform_information(): operating_system = ' '.join(distro.linux_distribution()[0:2]) python_version = '.'.join(str(x) for x in sys.version_info[0:3]) fact_version = __VERSION__ return { 'os': operating_system, 'python': python_version, 'fact_version': fact_version }