def _append_section(self, section: ResultSection) -> None: self._flattened_sections.append( dict(body=section.body, classification=section.classification, body_format=section.body_format, depth=section.depth, heuristic=get_heuristic_primitives(section.heuristic), tags=unflatten(section.tags), title_text=section.title_text, zeroize_on_tag_safe=section.zeroize_on_tag_safe, auto_collapse=section.auto_collapse))
def _append_section(self, section: ResultSection) -> None: self._flattened_sections.append( dict( body=section.body, classification=section.classification, body_format=section.body_format, depth=section.depth, heuristic=section.heuristic, tags=unflatten(section.tags), title_text=section.title_text, ))
def test_result_for_service(datastore, login_session): _, session, host = login_session rand_key = random.choice(file_res_list) rand_hash = rand_key[:64] service_name = rand_key.split('.')[1] resp = get_api_data( session, f"{host}/api/v4/file/result/{rand_hash}/{service_name}/") result_dict = resp['results'][0] for s in result_dict['result']['sections']: s['tags'] = unflatten(tag_list_to_dict(s['tags'])) res_data = Result(result_dict) assert res_data.build_key() in file_res_list
def test_dict_flatten(): src = { "a": { "b": { "c": 1 } }, "b": { "d": { 2 } } } flat_src = flatten(src) assert src == unflatten(flat_src) assert list(flat_src.keys()) == ["a.b.c", "b.d"]
def validate_tags(tag_map): tag_map, _ = construct_safe(Tagging, unflatten(tag_map)) tag_map = flatten(tag_map.as_primitives(strip_null=True)) return tag_map
def _handle_task_result(self, exec_time: int, task: ServiceTask, result: Dict[str, Any], client_id, service_name, freshen: bool, metric_factory): def freshen_file(file_info_list, item): file_info = file_info_list.get(item['sha256'], None) if file_info is None or not self.filestore.exists(item['sha256']): return True else: file_info['archive_ts'] = archive_ts file_info['expiry_ts'] = expiry_ts file_info['classification'] = item['classification'] self.datastore.save_or_freshen_file( item['sha256'], file_info, file_info['expiry_ts'], file_info['classification'], is_section_image=item.get('is_section_image', False)) return False archive_ts = now_as_iso(self.config.datastore.ilm.days_until_archive * 24 * 60 * 60) if task.ttl: expiry_ts = now_as_iso(task.ttl * 24 * 60 * 60) else: expiry_ts = None # Check if all files are in the filestore if freshen: missing_files = [] hashes = list( set([ f['sha256'] for f in result['response']['extracted'] + result['response']['supplementary'] ])) file_infos = self.datastore.file.multiget(hashes, as_obj=False, error_on_missing=False) with elasticapm.capture_span( name="handle_task_result.freshen_files", span_type="tasking_client"): with concurrent.futures.ThreadPoolExecutor( max_workers=5) as executor: res = { f['sha256']: executor.submit(freshen_file, file_infos, f) for f in result['response']['extracted'] + result['response']['supplementary'] } for k, v in res.items(): if v.result(): missing_files.append(k) if missing_files: return missing_files # Add scores to the heuristics, if any section set a heuristic with elasticapm.capture_span( name="handle_task_result.process_heuristics", span_type="tasking_client"): total_score = 0 for section in result['result']['sections']: zeroize_on_sig_safe = section.pop('zeroize_on_sig_safe', True) section['tags'] = flatten(section['tags']) if section.get('heuristic'): heur_id = f"{service_name.upper()}.{str(section['heuristic']['heur_id'])}" section['heuristic']['heur_id'] = heur_id try: section[ 'heuristic'], new_tags = self.heuristic_handler.service_heuristic_to_result_heuristic( section['heuristic'], self.heuristics, zeroize_on_sig_safe) for tag in new_tags: section['tags'].setdefault(tag[0], []) if tag[1] not in section['tags'][tag[0]]: section['tags'][tag[0]].append(tag[1]) total_score += section['heuristic']['score'] except InvalidHeuristicException: section['heuristic'] = None # Update the total score of the result result['result']['score'] = total_score # Add timestamps for creation, archive and expiry result['created'] = now_as_iso() result['archive_ts'] = archive_ts result['expiry_ts'] = expiry_ts # Pop the temporary submission data temp_submission_data = result.pop('temp_submission_data', None) if temp_submission_data: old_submission_data = { row.name: row.value for row in task.temporary_submission_data } temp_submission_data = { k: v for k, v in temp_submission_data.items() if k not in old_submission_data or v != old_submission_data[k] } big_temp_data = { k: len(str(v)) for k, v in temp_submission_data.items() if len(str(v)) > self.config.submission.max_temp_data_length } if big_temp_data: big_data_sizes = [f"{k}={v}" for k, v in big_temp_data.items()] self.log.warning( f"[{task.sid}] The following temporary submission keys where ignored because they are " "bigger then the maximum data size allowed " f"[{self.config.submission.max_temp_data_length}]: {' | '.join(big_data_sizes)}" ) temp_submission_data = { k: v for k, v in temp_submission_data.items() if k not in big_temp_data } # Process the tag values with elasticapm.capture_span(name="handle_task_result.process_tags", span_type="tasking_client"): for section in result['result']['sections']: # Perform tag safelisting tags, safelisted_tags = self.tag_safelister.get_validated_tag_map( section['tags']) section['tags'] = unflatten(tags) section['safelisted_tags'] = safelisted_tags section['tags'], dropped = construct_safe( Tagging, section.get('tags', {})) # Set section score to zero and lower total score if service is set to zeroize score # and all tags were safelisted if section.pop('zeroize_on_tag_safe', False) and \ section.get('heuristic') and \ len(tags) == 0 and \ len(safelisted_tags) != 0: result['result']['score'] -= section['heuristic']['score'] section['heuristic']['score'] = 0 if dropped: self.log.warning( f"[{task.sid}] Invalid tag data from {service_name}: {dropped}" ) result = Result(result) result_key = result.build_key( service_tool_version=result.response.service_tool_version, task=task) self.dispatch_client.service_finished(task.sid, result_key, result, temp_submission_data) # Metrics if result.result.score > 0: metric_factory.increment('scored') else: metric_factory.increment('not_scored') self.log.info( f"[{task.sid}] {client_id} - {service_name} " f"successfully completed task {f' in {exec_time}ms' if exec_time else ''}" ) self.status_table.set( client_id, (service_name, ServiceStatus.Idle, time.time() + 5))
def download_file(sha256, **kwargs): """ Download the file using the default encoding method. This api will force the browser in download mode. Variables: sha256 => A resource locator for the file (sha256) Arguments (optional): encoding => Type of encoding use for the resulting file name => Name of the file to download sid => Submission ID where the file is from Data Block: None API call example: /api/v4/file/download/123456...654321/ Result example: <THE FILE BINARY ENCODED IN SPECIFIED FORMAT> """ user = kwargs['user'] file_obj = STORAGE.file.get(sha256, as_obj=False) if not file_obj: return make_api_response({}, "The file was not found in the system.", 404) if user and Classification.is_accessible(user['classification'], file_obj['classification']): params = load_user_settings(user) name = request.args.get('name', sha256) or sha256 name = os.path.basename(name) name = safe_str(name) sid = request.args.get('sid', None) or None submission = {} file_metadata = {} if sid is not None: submission = STORAGE.submission.get(sid, as_obj=False) if submission is None: submission = {} if Classification.is_accessible(user['classification'], submission['classification']): file_metadata.update(unflatten(submission['metadata'])) if Classification.enforce: submission_classification = submission.get( 'classification', file_obj['classification']) file_metadata[ 'classification'] = Classification.max_classification( submission_classification, file_obj['classification']) encoding = request.args.get('encoding', params['download_encoding']) password = request.args.get('password', params['default_zip_password']) if encoding not in FILE_DOWNLOAD_ENCODINGS: return make_api_response( {}, f"{encoding.upper()} is not in the valid encoding types: {FILE_DOWNLOAD_ENCODINGS}", 403) if encoding == "raw" and not ALLOW_RAW_DOWNLOADS: return make_api_response( {}, "RAW file download has been disabled by administrators.", 403) if encoding == "zip": if not ALLOW_ZIP_DOWNLOADS: return make_api_response( {}, "PROTECTED file download has been disabled by administrators.", 403) elif not password: return make_api_response( {}, "No password given or retrieved from user's settings.", 403) download_dir = None target_path = None # Create a temporary download location if encoding == 'zip': download_dir = tempfile.mkdtemp() download_path = os.path.join(download_dir, name) else: _, download_path = tempfile.mkstemp() try: downloaded_from = FILESTORE.download(sha256, download_path) if not downloaded_from: return make_api_response( {}, "The file was not found in the system.", 404) # Encode file if encoding == 'raw': target_path = download_path elif encoding == 'zip': name += '.zip' target_path = os.path.join(download_dir, name) subprocess.run([ 'zip', '-j', '--password', password, target_path, download_path ], capture_output=True) else: target_path, name = encode_file(download_path, name, file_metadata) return stream_file_response(open(target_path, 'rb'), name, os.path.getsize(target_path)) finally: # Cleanup if target_path: if os.path.exists(target_path): os.unlink(target_path) if download_path: if os.path.exists(download_path): os.unlink(download_path) if download_dir: if os.path.exists(download_dir): os.rmdir(download_dir) else: return make_api_response({}, "You are not allowed to download this file.", 403)
def download_file(sha256, **kwargs): """ Download the file using the default encoding method. This api will force the browser in download mode. Variables: sha256 => A resource locator for the file (sha256) Arguments (optional): encoding => Type of encoding use for the resulting file name => Name of the file to download sid => Submission ID where the file is from Data Block: None API call example: /api/v4/file/download/123456...654321/ Result example: <THE FILE BINARY ENCODED IN SPECIFIED FORMAT> """ user = kwargs['user'] file_obj = STORAGE.file.get(sha256, as_obj=False) if not file_obj: return make_api_response({}, "The file was not found in the system.", 404) if user and Classification.is_accessible(user['classification'], file_obj['classification']): params = load_user_settings(user) name = request.args.get('name', sha256) or sha256 name = os.path.basename(name) name = safe_str(name) sid = request.args.get('sid', None) or None submission = {} submission_meta = {} if sid is not None: submission = STORAGE.submission.get(sid, as_obj=False) if submission is None: submission = {} hash_list = [submission.get('files', [])[0].get('sha256', None)] hash_list.extend([x[:64] for x in submission.get('errors', [])]) hash_list.extend([x[:64] for x in submission.get('results', [])]) if sha256 not in hash_list: return make_api_response({}, f"File {sha256} is not associated to submission {sid}.", 403) if Classification.is_accessible(user['classification'], submission['classification']): submission_meta.update(unflatten(submission['metadata'])) if Classification.enforce: submission_classification = submission.get('classification', file_obj['classification']) submission_meta['classification'] = Classification.max_classification(submission_classification, file_obj['classification']) encoding = request.args.get('encoding', params['download_encoding']) if encoding not in ['raw', 'cart']: return make_api_response({}, f"{encoding.upper()} is not in the valid encoding types: [raw, cart]", 403) if encoding == "raw" and not ALLOW_RAW_DOWNLOADS: return make_api_response({}, "RAW file download has been disabled by administrators.", 403) _, download_path = tempfile.mkstemp() try: with forge.get_filestore() as f_transport: downloaded_from = f_transport.download(sha256, download_path) if not downloaded_from: return make_api_response({}, "The file was not found in the system.", 404) if encoding == 'raw': target_path = download_path else: target_path, name = encode_file(download_path, name, submission_meta) try: return stream_file_response(open(target_path, 'rb'), name, os.path.getsize(target_path)) finally: if target_path: if os.path.exists(target_path): os.unlink(target_path) finally: if download_path: if os.path.exists(download_path): os.unlink(download_path) else: return make_api_response({}, "You are not allowed to download this file.", 403)