def handle_artefacts(artefact_list: list, request: ServiceRequest) -> ResultSection: """ Goes through each artefact in artefact_list, uploading them and adding result sections accordingly Positional arguments: artefact_list -- list of dictionaries that each represent an artefact """ validated_artefacts = SandboxOntology._validate_artefacts( artefact_list) artefacts_result_section = ResultSection("Sandbox Artefacts") for artefact in validated_artefacts: SandboxOntology._handle_artefact(artefact, artefacts_result_section) if artefact.to_be_extracted: try: request.add_extracted(artefact.path, artefact.name, artefact.description) except MaxExtractedExceeded: # To avoid errors from being raised when too many files have been extracted pass else: request.add_supplementary(artefact.path, artefact.name, artefact.description) return artefacts_result_section if artefacts_result_section.subsections else None
def execute(self, request: ServiceRequest) -> None: # Check that the current node has a version map while True: if self.nodes[self.current_node]['engine_count'] == 0: self._get_version_map(self.current_node) self.log.info("Getting version map from execute() function") if self.nodes[self.current_node]['engine_count'] == 0: self.new_node(force=True) else: break filename = request.file_path try: response = self.scan_file(filename) except RecoverableError: response = self.scan_file(filename) result = self.parse_results(response) request.result = result request.set_service_context( f"Definition Time Range: {self.nodes[self.current_node]['oldest_dat']} - " f"{self.nodes[self.current_node]['newest_dat']}") # Compare queue time of current node with new random node after a minimum run time on current node elapsed_time = time.time() - self.start_time if elapsed_time >= self.config.get("max_node_time"): self.new_node(force=True) elif elapsed_time >= self.config.get("min_node_time"): self.new_node(force=False)
def execute(self, request: ServiceRequest) -> None: sha256 = request.sha256 result = Result() # First, let's get the analysis metadata, if it exists on the system main_api_result = self._get_analysis_metadata( request.get_param('analysis_id'), sha256) if not main_api_result: self.log.debug(f"SHA256 {sha256} is not on the system.") request.result = result return if main_api_result.get( "verdict") in Verdicts.NOT_SUPPORTED_VERDICTS.value: self.log.debug(f"Unsupported file type: {request.file_type}") request.result = result return elif main_api_result.get("verdict") == AnalysisStatusCode.FAILED.value: self.log.warning("The Intezer server is not feeling well :(") request.result = result return analysis_id = main_api_result["analysis_id"] # Setup the main result section main_kv_section = ResultKeyValueSection( "IntezerStatic analysis report") processed_main_api_result = self._process_details( main_api_result.copy(), UNINTERESTING_ANALYSIS_KEYS) main_kv_section.update_items(processed_main_api_result) if "family_name" in main_api_result: main_kv_section.add_tag("attribution.family", main_api_result["family_name"]) # This file-verdict map will be used later on to assign heuristics to sub-analyses file_verdict_map = {} self._process_iocs(analysis_id, file_verdict_map, main_kv_section) if not self.config["is_on_premise"]: self._process_ttps(analysis_id, main_kv_section) self._handle_subanalyses(request, sha256, analysis_id, file_verdict_map, main_kv_section) # Setting heuristic here to avoid FPs if main_kv_section.subsections: self._set_heuristic_by_verdict(main_kv_section, main_api_result["verdict"]) if main_kv_section.subsections or main_kv_section.heuristic: result.add_section(main_kv_section) request.result = result
def gen_results(self, api_response): procr = self.upmal.process_results(api_response, self.upm) result = Result() service_task = ServiceTask(sample1) task = Task(service_task) request = ServiceRequest(task) self.upmal.generate_results(procr, result, api_response, request)
def test_execute(sample, metadefender_class_instance, mocker): from assemblyline_v4_service.common.task import Task from assemblyline_v4_service.common.result import Result from assemblyline.odm.messages.task import Task as ServiceTask from assemblyline_v4_service.common.request import ServiceRequest import json metadefender_class_instance.nodes["blah"] = { "engine_count": 1, "oldest_dat": 1, "newest_dat": 1 } mocker.patch.object(metadefender_class_instance, "_get_version_map") metadefender_class_instance.start() service_task = ServiceTask(sample) task = Task(service_task) metadefender_class_instance._task = task service_request = ServiceRequest(task) mocker.patch.object(metadefender_class_instance, "scan_file") mocker.patch.object(metadefender_class_instance, "new_node") mocker.patch.object(metadefender_class_instance, "parse_results", return_value=Result()) # Actually executing the sample metadefender_class_instance.execute(service_request) # For coverage metadefender_class_instance.config["max_node_time"] = 0 metadefender_class_instance.execute(service_request) metadefender_class_instance.config["max_node_time"] = 1000 metadefender_class_instance.config["min_node_time"] = 0 metadefender_class_instance.execute(service_request)
def test_service(sample): overwrite_results = False # Used temporarily to mass-correct tests cls = ViperMonkey() cls.start() task = Task(create_service_task(sample=sample)) service_request = ServiceRequest(task) cls.execute(service_request) # Get the result of execute() from the test method test_result = task.get_service_result() # Get the assumed "correct" result of the sample correct_path = os.path.join(SELF_LOCATION, "tests", "results", f"{sample}.json") with open(correct_path, "r") as f: correct_result = json.load(f) test_result = generalize_result(test_result) if overwrite_results: if test_result != correct_result: with open(correct_path, "w") as f: json.dump(test_result, f) else: assert test_result == correct_result
def test_execute(class_instance, sample): # Imports required to execute the sample from assemblyline_v4_service.common.task import Task from assemblyline.odm.messages.task import Task as ServiceTask from assemblyline_v4_service.common.request import ServiceRequest # Creating the required objects for execution service_task = ServiceTask(sample1) task = Task(service_task) class_instance._task = task service_request = ServiceRequest(task) # Actually executing the sample class_instance.execute(service_request) # Get the result of execute() from the test method test_result = task.get_service_result() # Get the assumed "correct" result of the sample correct_result_path = os.path.join(TEST_DIR, "results", task.file_name + ".json") with open(correct_result_path, "r") as f: correct_result = json.loads(f.read()) # Assert that the appropriate sections of the dict are equal # Avoiding date in the response test_result_response = test_result.pop("response") correct_result_response = correct_result.pop("response") assert test_result == correct_result # Comparing everything in the response except for the date test_result_response.pop("milestones") correct_result_response.pop("milestones") assert test_result_response == correct_result_response
def handle_task(self, task: ServiceTask) -> None: try: self._task = Task(task) self.log.info( f"[{self._task.sid}] Starting task for file: {self._task.sha256} ({self._task.type})" ) self._task.start( self.service_attributes.default_result_classification, self.service_attributes.version, self.get_tool_version()) self.ontologies = defaultdict(list) request = ServiceRequest(self._task) self.execute(request) self._attach_service_meta_ontology(request) self._success() except RuntimeError as re: if is_recoverable_runtime_error(re): new_ex = exceptions.RecoverableError( "Service trying to use a threadpool during shutdown") self._handle_execute_failure( new_ex, exceptions.get_stacktrace_info(re)) else: self._handle_execute_failure( re, exceptions.get_stacktrace_info(re)) except Exception as ex: self._handle_execute_failure(ex, exceptions.get_stacktrace_info(ex)) finally: self._cleanup()
def execute(self, request: ServiceRequest): try: self.client = Client(apikey=self.config.get( "api_key", request.get_param("api_key")), proxy=self.config.get('proxy') or None) except Exception as e: self.log.error("No API key found for VirusTotal") raise e if request.task.metadata.get('submitted_url', None) and request.task.depth == 0: response = self.scan_url(request) else: response = self.scan_file(request) if response: result = self.parse_results(response) request.result = result else: request.result = Result()
def execute(self, request: ServiceRequest) -> None: """ Main Module. See README for details.""" request.result = Result() patterns = PatternMatch() self.sample_type = request.file_type self.excess_extracted = 0 # Filters for submission modes. Listed in order of use. if request.deep_scan: # Maximum size of submitted file to run this service: max_size = 8000000 # String length maximum # Used in basic ASCII and UNICODE modules: max_length = 1000000 # String list maximum size # List produced by basic ASCII and UNICODE module results and will determine # if patterns.py will only evaluate network IOC patterns: st_max_size = 1000000 # BBcrack maximum size of submitted file to run module: bb_max_size = 200000 else: max_size = self.config.get('max_size', 3000000) max_length = self.config.get('max_length', 5000) st_max_size = self.config.get('st_max_size', 0) bb_max_size = self.config.get('bb_max_size', 85000) # Begin analysis if (len(request.file_contents) or 0) >= max_size or self.sample_type.startswith("archive/"): # No analysis is done if the file is an archive or too large return self.ascii_results(request, patterns, max_length, st_max_size) self.embedded_pe_results(request) # Possible encoded strings -- all sample types except code/* (code is handled by deobfuscripter service) if not self.sample_type.startswith('code'): self.base64_results(request, patterns) if (len(request.file_contents) or 0) < bb_max_size: self.bbcrack_results(request) # Other possible encoded strings -- all sample types but code and executables if not self.sample_type.split('/', 1)[0] in ['executable', 'code']: self.unicode_results(request, patterns) # Go over again, looking for long ASCII-HEX character strings if not self.sample_type.startswith('document/office'): self.hex_results(request, patterns) if self.excess_extracted: self.log.warning( f"Too many files extracted from {request.sha256}, " f"{self.excess_extracted} files were not extracted") request.result.add_section( ResultSection( f"Over extraction limit: " f"{self.excess_extracted} files were not extracted"))
def test_service(sample): config = helper.get_service_attributes().config cls = emlparser.emlparser.EmlParser(config=config) cls.start() task = Task(create_service_task(sample=sample)) service_request = ServiceRequest(task) cls.execute(service_request) # Get the result of execute() from the test method test_result = task.get_service_result() assert "0766" in test_result["temp_submission_data"]["email_body"]
def extract_powershell(self, parameter: str, section: ResultSection, request: ServiceRequest) -> None: """Searches parameter for PowerShell, adds as extracted if found Args: parameter: String to be searched section: Section to be modified if PowerShell found """ matches = find_powershell_strings(parameter.encode()) if not matches: return self.found_powershell = True for match in matches: powershell_command = get_powershell_command(match.value) sha256hash = hashlib.sha256(powershell_command).hexdigest() # Add PowerShell code as extracted, account for duplicates if sha256hash not in self.file_hashes: powershell_filename = f"{sha256hash[0:10]}.ps1" ResultSection( "Discovered PowerShell code in parameter.", parent=section, body=powershell_command[:100].decode() + f"... see [{powershell_filename}]", ) powershell_file_path = os.path.join(self.working_directory, powershell_filename) with open(powershell_file_path, "wb") as f: f.write(powershell_command) request.add_extracted( powershell_file_path, powershell_filename, "Discovered PowerShell code in parameter") self.file_hashes.append(sha256hash)
def handle_task(self, task: ServiceTask) -> None: try: self._task = Task(task) self.log.info(f"Starting task: {self._task.sid}/{self._task.sha256} ({self._task.type})") self._task.start(self.service_attributes.default_result_classification, self.service_attributes.version, self.get_tool_version()) request = ServiceRequest(self._task) self.execute(request) self._success() except Exception as ex: self._handle_execute_failure(ex, exceptions.get_stacktrace_info(ex)) finally: self._cleanup()
def execute(self, request: ServiceRequest) -> Optional[Dict[str, Any]]: result = Result() request.result = result # Get AV labels from previous services av_labels = request.task.tags.get('av.virus_name') if not av_labels: return # Extract AVclass tags av_tags = self._get_avclass_tags(request.md5, request.sha1, request.sha256, av_labels) if av_tags is None: return # Build results section = self._get_result_section(av_tags.family, av_tags.is_pup) for tag_section in self._get_category_sections(av_tags.tags): section.add_subsection(tag_section) result.add_section(section)
def execute(self, request: ServiceRequest) -> None: result = Result() self.hits = {} # clear the hits dict path = request.file_path file_name = request.file_name self.log.info(f" Executing {file_name}") self.log.info(f"Number of rules {len(self.sigma_parser.rules)}") self.sigma_parser.register_callback(self.sigma_hit) self.sigma_parser.check_logfile(path) if len(self.hits) > 0: hit_section = ResultSection('Events detected as suspicious') # group alerts together for id, events in self.hits.items(): title = self.sigma_parser.rules[id].title section = SigmaHitSection(title, events) tags = self.sigma_parser.rules[id].tags if tags: for tag in tags: name = tag[7:] if name.startswith(('t', 'g', 's')): attack_id = name.upper() source = events[0]['signature_source'] if attack_id: section.set_heuristic(get_heur_id(events[0]['score']), attack_id=attack_id, signature=f"{source}.{title}") section.add_tag(f"file.rule.{source}", f"{source}.{title}") else: section.set_heuristic(get_heur_id(events[0]['score']), signature=f"{source}.{title}") section.add_tag(f"file.rule.{source}", f"{source}.{title}") for event in events: # add the event data as a subsection section.add_subsection(EventDataSection(event)) hit_section.add_subsection(section) result.add_section(hit_section) request.result = result
def test_execute(sample, target): # TODO: Break down the execute method to make it easily testable from assemblyline_v4_service.common.task import Task from assemblyline.odm.messages.task import Task as ServiceTask from assemblyline_v4_service.common.request import ServiceRequest service_task = ServiceTask(sample) task = Task(service_task) target._task = task service_request = ServiceRequest(task) # Actually executing the sample target.execute(service_request) # Get the result of execute() from the test method test_result = task.get_service_result() # Get the assumed "correct" result of the sample correct_result_path = os.path.join(TEST_DIR, "results", task.file_name + ".json") with open(correct_result_path, "r") as f: correct_result = json.loads(f.read()) f.close() # Assert that the appropriate sections of the dict are equal # Avoiding unique items in the response test_result_response = test_result.pop("response") correct_result_response = correct_result.pop("response") assert test_result == correct_result # Comparing everything in the response except for the service_completed and the output.json supplementary test_result_response["milestones"].pop("service_completed") correct_result_response["milestones"].pop("service_completed") correct_result_response.pop("supplementary") test_result_response.pop("supplementary") assert test_result_response == correct_result_response
def dexray(self, request: ServiceRequest, local: str): """Iterate through quarantine decrypt methods. Args: request: AL request object. local: File path of AL sample. Returns: True if archive is password protected, and number of white-listed embedded files. """ encoding = request.file_type.replace("quarantine/", "") extracted = [] metadata = {} # Try all extracting methods for extract_method in self.extract_methods: # noinspection PyArgumentList extracted, metadata = extract_method(local, self.sha, self.working_directory, encoding) if extracted or metadata: break extracted_count = len(extracted) # safe_str the file name (fn) extracted = [[fp, safe_str(fn), e] for fp, fn, e in extracted] for child in extracted: try: # If the file is not successfully added as extracted, then decrease the extracted file counter if not request.add_extracted(*child): extracted_count -= 1 except MaxExtractedExceeded: raise MaxExtractedExceeded( f"This file contains {extracted_count} extracted files, exceeding the " f"maximum of {request.max_extracted} extracted files allowed. " "None of the files were extracted.") return metadata
def execute(self, request: ServiceRequest) -> None: request.result = Result() # 1. Calculate entropy map with open(request.file_path, "rb") as fin: (entropy, part_entropies) = calculate_partition_entropy(fin) entropy_graph_data = {"type": "colormap", "data": {"domain": [0, 8], "values": part_entropies}} ResultSection( f"File entropy: {round(entropy, 3)}", parent=request.result, body_format=BODY_FORMAT.GRAPH_DATA, body=json.dumps(entropy_graph_data, allow_nan=False), ) if request.file_type != "shortcut/windows": # 2. Get hachoir metadata parser = createParser(request.file_path) if parser is not None: with parser: parser_tags = parser.getParserTags() parser_id = parser_tags.get("id", "unknown") # Do basic metadata extraction metadata = extractMetadata(parser, 1) if metadata: kv_body: Dict[str, Union[str, List[str]]] = {} tags: List[Tuple[str, str]] = [] for m in metadata: if m.key == "comment": for v in m.values: key, val = get_type_val(v.text, "comment") if not val: continue kv_body[key] = val tag_type = TAG_MAP.get(parser_id, {}).get(key, None) or TAG_MAP.get(None, {}).get( key, None ) if tag_type is not None: tags.append((tag_type, val)) elif m.key in ["mime_type"]: pass else: values = [v.text for v in m.values] if len(values) == 1 and values[0]: kv_body[m.key] = values[0] elif values: kv_body[m.key] = values for v in values: tag_type = TAG_MAP.get(parser_id, {}).get(m.key, None) or TAG_MAP.get(None, {}).get( m.key, None ) if tag_type is not None: tags.append((tag_type, v)) if kv_body: res = ResultSection( f"Metadata extracted by hachoir-metadata [Parser: {parser_id}]", body=json.dumps(kv_body, allow_nan=False), body_format=BODY_FORMAT.KEY_VALUE, parent=request.result, ) for t_type, t_val in tags: res.add_tag(t_type, t_val) # 3. Get Exiftool Metadata exif = subprocess.run(["exiftool", "-j", request.file_path], capture_output=True, check=False) if exif.stdout: exif_data = json.loads(exif.stdout.decode("utf-8", errors="ignore")) res_data = exif_data[0] if "Error" not in res_data: exif_body = {} for k, v in res_data.items(): if v and k not in [ "SourceFile", "ExifToolVersion", "FileName", "Directory", "FileSize", "FileModifyDate", "FileAccessDate", "FileInodeChangeDate", "FilePermissions", "FileType", "FileTypeExtension", "MIMEType", "Warning", ]: if v in [float("inf"), -float("inf"), float("nan")]: exif = subprocess.run( ["exiftool", f"-{k}", "-T", request.file_path], capture_output=True, check=False ) v = exif.stdout.decode("utf-8", errors="ignore").strip() exif_body[build_key(k)] = v if exif_body: e_res = ResultSection( "Metadata extracted by ExifTool", body=json.dumps(exif_body, allow_nan=False), body_format=BODY_FORMAT.KEY_VALUE, parent=request.result, ) for k, v in exif_body.items(): tag_type = TAG_MAP.get(res_data.get("FileTypeExtension", "UNK").upper(), {}).get( k, None ) or TAG_MAP.get(None, {}).get(k, None) if tag_type: e_res.add_tag(tag_type, v) # 4. Lnk management. if request.file_type == "shortcut/windows": with open(request.file_path, "rb") as indata: lnk = LnkParse3.lnk_file(indata) features = lnk.get_json(get_all=True) lnk_result_section = ResultSection( "Extra metadata extracted by LnkParse3", parent=request.result, ) heur_1_items = {} risky_executable = ["rundll32.exe", "powershell.exe", "cmd.exe", "mshta.exe"] if "command_line_arguments" in features["data"]: if any(x in features["data"]["command_line_arguments"].lower() for x in risky_executable): heur_1_items["command_line_arguments"] = features["data"]["command_line_arguments"] elif " && " in features["data"]["command_line_arguments"]: heur_1_items["command_line_arguments"] = features["data"]["command_line_arguments"] lbp = "" if "local_base_path" in features["link_info"]: lbp = features["link_info"]["local_base_path"] if "common_path_suffix" in features["link_info"]: lbp = f"{lbp}{features['link_info']['common_path_suffix']}" if any(x in lbp.lower() for x in risky_executable): heur_1_items["local_base_path"] = features["link_info"]["local_base_path"] if "relative_path" in features["data"]: if any(x in features["data"]["relative_path"].lower() for x in risky_executable): heur_1_items["relative_path"] = features["data"]["relative_path"] target = "" if "target" in features: import ntpath if "items" in features["target"]: last_item = None for item in features["target"]["items"]: if "primary_name" in item: last_item = item target = ntpath.join(target, item["primary_name"]) if last_item and last_item["flags"] == "Is directory": target = "" if any(x in target.lower() for x in risky_executable): heur_1_items["target_file_dosname"] = target if "icon_location" in features["data"]: deceptive_icons = ["wordpad.exe", "shell32.dll"] lnk_result_section.add_tag( tag_type="file.shortcut.icon_location", value=features["data"]["icon_location"] ) if any( features["data"]["icon_location"].lower().strip('"').strip("'").endswith(x) for x in deceptive_icons ): heur = Heuristic(4) heur_section = ResultKeyValueSection(heur.name, heuristic=heur, parent=lnk_result_section) heur_section.set_item("icon_location", features["data"]["icon_location"]) timestamps = [] if features["header"]["creation_time"]: timestamps.append(("creation_time", features["header"]["creation_time"])) if features["header"]["modified_time"]: timestamps.append(("modified_time", features["header"]["modified_time"])) if request.task.depth != 0: heur2_earliest_ts = datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta( days=self.config.get("heur2_flag_more_recent_than_days", 3) ) heur2_latest_ts = datetime.datetime.now(datetime.timezone.utc) + datetime.timedelta(days=2) recent_timestamps = [] future_timestamps = [] for k, timestamp in timestamps: if timestamp < heur2_earliest_ts: continue if timestamp > heur2_latest_ts: future_timestamps.append((k, timestamp)) continue recent_timestamps.append((k, timestamp)) if recent_timestamps: heur = Heuristic(2) heur_section = ResultKeyValueSection(heur.name, heuristic=heur, parent=lnk_result_section) for k, timestamp in recent_timestamps: heur_section.set_item(k, timestamp.isoformat()) if future_timestamps: heur = Heuristic(3) heur_section = ResultKeyValueSection(heur.name, heuristic=heur, parent=lnk_result_section) for k, timestamp in future_timestamps: heur_section.set_item(k, timestamp.isoformat()) if "DISTRIBUTED_LINK_TRACKER_BLOCK" in features["extra"]: if "machine_identifier" in features["extra"]["DISTRIBUTED_LINK_TRACKER_BLOCK"]: machine_id = features["extra"]["DISTRIBUTED_LINK_TRACKER_BLOCK"]["machine_identifier"] lnk_result_section.add_tag("file.shortcut.machine_id", machine_id) if machine_id.lower().startswith("desktop-"): heur = Heuristic(5) heur_section = ResultKeyValueSection(heur.name, heuristic=heur, parent=lnk_result_section) heur_section.set_item("machine_identifier", machine_id) if "droid_file_identifier" in features["extra"]["DISTRIBUTED_LINK_TRACKER_BLOCK"]: mac = features["extra"]["DISTRIBUTED_LINK_TRACKER_BLOCK"]["droid_file_identifier"][-12:] mac = ":".join(a + b for a, b in zip(mac[::2], mac[1::2])) lnk_result_section.add_tag("file.shortcut.tracker_mac", mac) elif "birth_droid_file_identifier" in features["extra"]["DISTRIBUTED_LINK_TRACKER_BLOCK"]: mac = features["extra"]["DISTRIBUTED_LINK_TRACKER_BLOCK"]["birth_droid_file_identifier"][-12:] mac = ":".join(a + b for a, b in zip(mac[::2], mac[1::2])) lnk_result_section.add_tag("file.shortcut.tracker_mac", mac) # Adapted code from previous logic. May be best replaced by new heuristics and logic. bp = str(lbp).strip() rp = str(features["data"].get("relative_path", "")).strip() nn = str(features["data"].get("net_name", "")).strip() t = str(target).strip().rsplit("\\")[-1].strip() cla = str(features["data"].get("command_line_arguments", "")).strip() filename_extracted = (bp or rp or t or nn).rsplit("\\")[-1].strip() if filename_extracted: lnk_result_section.add_tag(tag_type="file.name.extracted", value=(bp or rp or t or nn).rsplit("\\")[-1]) process_cmdline = f"{(rp or bp or t or nn)} {cla}".strip() if process_cmdline: lnk_result_section.add_tag(tag_type="file.shortcut.command_line", value=process_cmdline) cmd_code = None if filename_extracted in ["cmd", "cmd.exe"]: cmd_code = (get_cmd_command(f"{filename_extracted} {cla}".encode()), "bat") if "rundll32 " in cla: # We are already checking for rundll32.exe as part of risky_executable heur_1_items["command_line_arguments"] = features["data"]["command_line_arguments"] elif filename_extracted in ["powershell", "powershell.exe"]: cmd_code = (get_powershell_command(f"{filename_extracted} {cla}".encode()), "ps1") if heur_1_items: heur = Heuristic(1) heur_section = ResultKeyValueSection(heur.name, heuristic=heur, parent=lnk_result_section) heur_section.update_items(heur_1_items) if cmd_code: sha256hash = hashlib.sha256(cmd_code[0]).hexdigest() cmd_filename = f"{sha256hash[0:10]}.{cmd_code[1]}" cmd_file_path = os.path.join(self.working_directory, cmd_filename) with open(cmd_file_path, "wb") as cmd_f: cmd_f.write(cmd_code[0]) request.add_extracted( cmd_file_path, cmd_filename, "Extracted LNK execution code", ) def _datetime_to_str(obj): if isinstance(obj, datetime.datetime): return obj.isoformat() return obj temp_path = os.path.join(self.working_directory, "features.json") with open(temp_path, "w") as f: json.dump(features, f, default=_datetime_to_str) request.add_supplementary(temp_path, "features.json", "Features extracted from the LNK file") if lnk.appended_data: sha256hash = hashlib.sha256(lnk.appended_data).hexdigest() appended_data_path = os.path.join(self.working_directory, sha256hash) with open(appended_data_path, "wb") as appended_data_f: appended_data_f.write(lnk.appended_data) request.add_extracted( appended_data_path, sha256hash, "Additional data at the end of the LNK file", ) heur = Heuristic(6) heur_section = ResultKeyValueSection(heur.name, heuristic=heur, parent=lnk_result_section) heur_section.set_item("Length", len(lnk.appended_data)) # 5. URL file management if request.file_type == "shortcut/web": config = ConfigParser() config.read(request.file_path) res = ResultKeyValueSection("Metadata extracted by Ini Reader", parent=request.result) for k, v in config["InternetShortcut"].items(): res.set_item(k, v) if k == "url": if v.startswith("http://") or v.startswith("https://"): res.add_tag("network.static.uri", v) elif v.startswith("file:"): heur = Heuristic(1) heur_section = ResultKeyValueSection(heur.name, heuristic=heur, parent=res) heur_section.set_item("url", v) config.pop("InternetShortcut", None) if config.sections(): extra_res = ResultKeyValueSection("Extra sections", parent=res) extra_res.set_item("Names", ", ".join(config.sections()))
def _attach_service_meta_ontology(self, request: ServiceRequest) -> None: heuristics = helper.get_heuristics() def preprocess_result_for_dump(sections, current_max, heur_tag_map, tag_map): for section in sections: # Determine max classification of the overall result current_max = forge.get_classification().max_classification( section.classification, current_max) # Cleanup invalid tagging from service results def validate_tags(tag_map): tag_map, _ = construct_safe(Tagging, unflatten(tag_map)) tag_map = flatten(tag_map.as_primitives(strip_null=True)) return tag_map # Merge tags def merge_tags(tag_a, tag_b): if not tag_a: return tag_b elif not tag_b: return tag_a all_keys = list(tag_a.keys()) + list(tag_b.keys()) return { key: list(set(tag_a.get(key, []) + tag_b.get(key, []))) for key in all_keys } # Append tags raised by the service, if any section_tags = validate_tags(section.tags) if section_tags: tag_map.update(section_tags) # Append tags associated to heuristics raised by the service, if any if section.heuristic: heur = heuristics[section.heuristic.heur_id] key = f'{self.name.upper()}_{heur.heur_id}' update_value = {"name": heur.name, "tags": {}} if section_tags: update_value = \ { "name": heur.name, "tags": merge_tags(heur_tag_map[key]["tags"], section_tags) } heur_tag_map[key].update(update_value) # Recurse through subsections if section.subsections: current_max, heur_tag_map, tag_map = preprocess_result_for_dump( section.subsections, current_max, heur_tag_map, tag_map) return current_max, heur_tag_map, tag_map if not request.result or not request.result.sections: # No service results, therefore no ontological output return max_result_classification, heur_tag_map, tag_map = preprocess_result_for_dump( request.result.sections, request.task.service_default_result_classification, defaultdict(lambda: {"tags": dict()}), defaultdict(list)) if not tag_map and not self.ontologies: # No tagging or ontologies found, therefore informational results return ontology = { 'header': { 'md5': request.md5, 'sha1': request.sha1, 'sha256': request.sha256, 'type': request.file_type, 'size': request.file_size, 'classification': max_result_classification, 'service_name': request.task.service_name, 'service_version': request.task.service_version, 'service_tool_version': request.task.service_tool_version, 'tags': tag_map, 'heuristics': heur_tag_map } } # Include Ontological data ontology.update( {type.lower(): data for type, data in self.ontologies.items()}) ontology_suffix = f"{request.sha256}.ontology" ontology_path = os.path.join(self.working_directory, ontology_suffix) try: open(ontology_path, 'w').write( json.dumps( ResultOntology(ontology).as_primitives(strip_null=True))) attachment_name = f'{request.task.service_name}_{ontology_suffix}'.lower( ) request.add_supplementary( path=ontology_path, name=attachment_name, description=f"Result Ontology from {request.task.service_name}", classification=max_result_classification) except ValueError as e: self.log.error(f"Problem with generating ontology: {e}")
def test_service(sample): overwrite_results = False # Used temporarily to mass-correct tests cls = Characterize() cls.start() task = Task(create_service_task(sample=sample)) service_request = ServiceRequest(task) cls.execute(service_request) result_dir_files = [ os.path.basename(x) for x in glob.glob( os.path.join(SELF_LOCATION, "tests", "results", sample, "*")) ] correct_path = os.path.join(SELF_LOCATION, "tests", "results", sample, "features.json") if os.path.exists(correct_path): result_dir_files.remove("features.json") with open(correct_path, "r") as f: correct_result = json.load(f) test_path = os.path.join(cls.working_directory, "features.json") with open(test_path, "r") as f: test_result = json.load(f) if overwrite_results: if test_result != correct_result: with open(correct_path, "w") as f: json.dump(test_result, f) else: assert test_result == correct_result # Get the result of execute() from the test method test_result = task.get_service_result() result_dir_files.remove("result.json") # Get the assumed "correct" result of the sample correct_path = os.path.join(SELF_LOCATION, "tests", "results", sample, "result.json") with open(correct_path, "r") as f: correct_result = json.load(f) test_result = generalize_result(test_result) if overwrite_results: if test_result != correct_result: with open(correct_path, "w") as f: json.dump(test_result, f) else: assert test_result == correct_result for extracted_file in test_result["response"]["extracted"]: if not overwrite_results or extracted_file[ "name"] in result_dir_files: result_dir_files.remove(extracted_file["name"]) correct_path = os.path.join(SELF_LOCATION, "tests", "results", sample, extracted_file["name"]) with open(correct_path, "rb") as f: correct_result = f.read() test_path = os.path.join(cls.working_directory, extracted_file["name"]) with open(test_path, "rb") as f: test_result = f.read() if overwrite_results: if test_result != correct_result: with open(correct_path, "wb") as f: f.write(test_result) else: assert test_result == correct_result assert not result_dir_files
def test_execute(sample, intezer_static_class_instance, dummy_api_interface_class, dummy_get_response_class, mocker): from assemblyline_v4_service.common.task import Task from assemblyline.odm.messages.task import Task as ServiceTask from assemblyline_v4_service.common.request import ServiceRequest from json import loads from intezer_static import ALIntezerApi mocker.patch.object(intezer_static_class_instance, "get_api_interface", return_value=dummy_api_interface_class) intezer_static_class_instance.start() service_task = ServiceTask(sample) task = Task(service_task) task.service_config = { "analysis_id": "", } intezer_static_class_instance._task = task service_request = ServiceRequest(task) intezer_static_class_instance.config["private_only"] = False mocker.patch.object(ALIntezerApi, "get_latest_analysis", return_value={"analysis_id": "blah"}) mocker.patch.object(ALIntezerApi, "analyze_by_file", return_value="blah") mocker.patch.object(ALIntezerApi, "get_iocs", return_value={ "files": [], "network": [] }) mocker.patch.object(ALIntezerApi, "get_dynamic_ttps", return_value=[]) mocker.patch.object(ALIntezerApi, "get_sub_analyses_by_id", return_value=[]) # Actually executing the sample intezer_static_class_instance.execute(service_request) # Get the result of execute() from the test method test_result = task.get_service_result() # Get the assumed "correct" result of the sample correct_result_path = os.path.join(TEST_DIR, "results", task.file_name + ".json") with open(correct_result_path, "r") as f: correct_result = loads(f.read()) f.close() # Assert that the appropriate sections of the dict are equal # Avoiding unique items in the response test_result_response = test_result.pop("response") correct_result_response = correct_result.pop("response") assert test_result == correct_result # Comparing everything in the response except for the service_completed and the output.json supplementary test_result_response["milestones"].pop("service_completed") correct_result_response["milestones"].pop("service_completed") correct_result_response.pop("supplementary") test_result_response.pop("supplementary") correct_result_response.pop("service_context") test_result_response.pop("service_context") assert test_result_response == correct_result_response # Code coverage task.service_config = { "analysis_id": "blah", } intezer_static_class_instance._task = task service_request = ServiceRequest(task) intezer_static_class_instance.execute(service_request) task.service_config = {"analysis_id": ""} intezer_static_class_instance.config["is_on_premise"] = False mocker.patch.object(ALIntezerApi, "get_latest_analysis", return_value={"verdict": "not_supported"}) mocker.patch.object(ALIntezerApi, "get_dynamic_ttps", return_value=[]) intezer_static_class_instance.execute(service_request) mocker.patch.object(ALIntezerApi, "get_latest_analysis", return_value={"verdict": "failed"}) intezer_static_class_instance.execute(service_request)
def execute(self, request: ServiceRequest): filename = posixpath.basename(request.file_name) request.result = self.check_file_name_anomalies(filename) return
def _handle_subanalyses(self, request: ServiceRequest, sha256: str, analysis_id: str, file_verdict_map: Dict[str, str], parent_section: ResultSection) -> None: """ This method handles the subanalyses for a given analysis ID :param request: The service request object :param sha256: The hash of the given file :param analysis_id: The ID for the analysis which we will be retrieving :param file_verdict_map: A map of sha256s representing a file's contents, and the verdict for that file :param parent_result_section: The result section that the network result section will be added to, if applicable :return: None """ so = SandboxOntology() # This boolean is used to determine if we should try to download another file can_we_download_files = True # These sets will be used as we work through the process trees process_path_set = set() command_line_set = set() # Now let's get into the subanalyses for this sample sub_analyses = self.client.get_sub_analyses_by_id(analysis_id) for sub in sub_analyses: sub_analysis_id = sub["sub_analysis_id"] # Get the extraction info, which is basically the details of how the subanalysis object came to be extraction_info = sub.pop("extraction_info", None) # Processes is only present when the sample has undergone dynamic execution if extraction_info and "processes" not in extraction_info: extraction_info = None code_reuse = self.client.get_sub_analysis_code_reuse_by_id( analysis_id, sub_analysis_id) if code_reuse: families = code_reuse.pop("families", []) else: families = [] if not families and not extraction_info: # Otherwise, boring! continue if families and not any(family["reused_gene_count"] > 1 for family in families): # Most likely a false positive continue ### # If we have gotten to this point, then the sub analysis is worth reporting ### extraction_method = sub["source"].replace("_", " ") if extraction_method != "root": sub_kv_section = ResultKeyValueSection( f"Subanalysis report for {sub['sha256']}, extracted via {extraction_method}" ) else: sub_kv_section = ResultKeyValueSection( f"Subanalysis report for {sub['sha256']}") metadata = self.client.get_sub_analysis_metadata_by_id( analysis_id, sub_analysis_id) processed_subanalysis = self._process_details( metadata.copy(), UNINTERESTING_SUBANALYSIS_KEYS) sub_kv_section.update_items(processed_subanalysis) parent_section.add_subsection(sub_kv_section) if code_reuse: code_reuse_kv_section = ResultKeyValueSection( "Code reuse detected") code_reuse_kv_section.update_items(code_reuse) sub_kv_section.add_subsection(code_reuse_kv_section) sub_sha256 = sub["sha256"] if families: self._process_families(families, sub_sha256, file_verdict_map, sub_kv_section) if extraction_info: self._process_extraction_info(extraction_info["processes"], process_path_set, command_line_set, so) # Setting a heuristic here or downloading the file would be redundant if the hash matched the original file if sub_sha256 != sha256: self._set_heuristic_by_verdict( sub_kv_section, file_verdict_map.get(sub_sha256)) if can_we_download_files: file_was_downloaded = self.client.download_file_by_sha256( sub_sha256, self.working_directory) if file_was_downloaded: path = f"{self.working_directory}/{sub_sha256}.sample" request.add_extracted( path, f"{sub_sha256}.sample", f"Extracted via {extraction_method}", ) self.log.debug( f"Added {sub_sha256}.sample as an extracted file.") else: can_we_download_files = False process_tree_section = so.get_process_tree_result_section() for process_path in process_path_set: process_tree_section.add_tag("dynamic.process.file_name", process_path) for command_line in command_line_set: process_tree_section.add_tag("dynamic.process.command_line", command_line) if process_tree_section.body: parent_section.add_subsection(process_tree_section)
def check_for_b64(self, data: str, section: ResultSection, request: ServiceRequest, file_contents: bytes) -> bool: """Search and decode base64 strings in sample data. Args: data: Data to be parsed section: base64 subsection, must have heuristic set Returns: decoded: Boolean which is true if base64 found """ assert section.heuristic decoded_param = data decoded = False encoded_data = data.encode() for content, start, end in find_base64(encoded_data): if encoded_data[start:end] in file_contents: # Present in original file, not an intermediate IoC continue try: # Powershell base64 will be utf-16 content = content.decode("utf-16").encode() except UnicodeDecodeError: pass try: if len(content) < FILE_PARAMETER_SIZE: decoded_param = decoded_param[: start] + " " + content.decode( errors="ignore" ) + decoded_param[end:] else: b64hash = "" pe_files = find_pe_files(content) for pe_file in pe_files: b64hash = hashlib.sha256(pe_file).hexdigest() pe_path = os.path.join(self.working_directory, b64hash) with open(pe_path, "wb") as f: f.write(pe_file) request.add_extracted( pe_path, b64hash, "PE file found in base64 encoded parameter") section.heuristic.add_signature_id("pe_file") if not pe_files: b64hash = hashlib.sha256(content).hexdigest() content_path = os.path.join(self.working_directory, b64hash) with open(content_path, "wb") as f: f.write(content) request.add_extracted( content_path, b64hash, "Large base64 encoded parameter") section.heuristic.add_signature_id("possible_file") decoded_param = decoded_param[: start] + f"[See extracted file {b64hash}]" + decoded_param[ end:] decoded = True except Exception: pass if decoded: section.heuristic.increment_frequency() section.add_line( f"Possible Base64 {truncate(data)} decoded: {decoded_param}") self.find_ip(decoded_param) return decoded
def execute(self, request: ServiceRequest) -> None: self.result = Result() request.result = self.result self.ip_list = [] self.url_list = [] self.found_powershell = False self.file_hashes = [] vmonkey_err = False actions: List[str] = [] external_functions: List[str] = [] tmp_iocs: List[str] = [] output_results: Dict[str, Any] = {} potential_base64: Set[str] = set() # Running ViperMonkey try: file_contents = request.file_contents input_file: str = request.file_path input_file_obj: Optional[IO] = None # Typical start to XML files if not file_contents.startswith( b"<?") and request.file_type == "code/xml": # Default encoding/decoding if BOM not found encoding: Optional[str] = None decoding: Optional[str] = None # Remove potential BOMs from contents if file_contents.startswith(BOM_UTF8): encoding = "utf-8" decoding = "utf-8-sig" elif file_contents.startswith(BOM_UTF16): encoding = "utf-16" decoding = "utf-16" if encoding and decoding: input_file_obj = tempfile.NamedTemporaryFile( "w+", encoding=encoding) input_file_obj.write( file_contents.decode(decoding, errors="ignore")) input_file = input_file_obj.name else: # If the file_type was detected as XML, it's probably buried within but not actually an XML file # Give no response as ViperMonkey can't process this kind of file return cmd = " ".join([ PYTHON2_INTERPRETER, os.path.join(os.path.dirname(__file__), "vipermonkey_compat.py2"), input_file, self.working_directory, ]) p = subprocess.run(cmd, capture_output=True, shell=True) stdout = p.stdout # Close file if input_file_obj and os.path.exists(input_file_obj.name): input_file_obj.close() # Add artifacts artifact_dir = os.path.join( self.working_directory, os.path.basename(input_file) + "_artifacts") if os.path.exists(artifact_dir): for file in os.listdir(artifact_dir): try: file_path = os.path.join(artifact_dir, file) if os.path.isfile(file_path) and os.path.getsize( file_path): request.add_extracted( file_path, file, "File extracted by ViperMonkey during analysis" ) except os.error as e: self.log.warning(e) # Read output if stdout: for line in stdout.splitlines(): if line.startswith(b"{") and line.endswith(b"}"): try: output_results = json.loads(line) except UnicodeDecodeError: output_results = json.loads( line.decode("utf-8", "replace")) break # Checking for tuple in case vmonkey return is None # If no macros found, return is [][][], if error, return is None # vmonkey_err can still happen if return is [][][], log as warning instead of error if isinstance(output_results.get("vmonkey_values"), dict): """ Structure of variable "actions" is as follows: [action, parameters, description] action: 'Found Entry Point', 'Execute Command', etc... parameters: Parameters for function description: 'Shell Function', etc... external_functions is a list of built-in VBA functions that were called """ actions = output_results["vmonkey_values"]["actions"] external_functions = output_results["vmonkey_values"][ "external_funcs"] tmp_iocs = output_results["vmonkey_values"]["tmp_iocs"] if output_results["vmonkey_err"]: vmonkey_err = True self.log.warning(output_results["vmonkey_err"]) else: vmonkey_err = True else: vmonkey_err = True except Exception: self.log.exception( f"Vipermonkey failed to analyze file {request.sha256}") if actions: # Creating action section action_section = ResultSection("Recorded Actions:", parent=self.result) action_section.add_tag("technique.macro", "Contains VBA Macro(s)") sub_action_sections: Dict[str, ResultSection] = {} for action, parameters, description in actions: # Creating action sub-sections for each action if not description: # For actions with no description, just use the type of action description = action if description not in sub_action_sections: # Action's description will be the sub-section name sub_action_section = ResultSection(description, parent=action_section) sub_action_sections[description] = sub_action_section if description == "Shell function": sub_action_section.set_heuristic(2) else: # Reuse existing section sub_action_section = sub_action_sections[description] if sub_action_section.heuristic: sub_action_section.heuristic.increment_frequency() # Parameters are sometimes stored as a list, account for this if isinstance(parameters, list): for item in parameters: # Parameters includes more than strings (booleans for example) if isinstance(item, str): # Check for PowerShell self.extract_powershell(item, sub_action_section, request) # Join list items into single string param = ", ".join(str(p) for p in parameters) else: param = parameters # Parameters includes more than strings (booleans for example) if isinstance(param, str): self.extract_powershell(param, sub_action_section, request) # If the description field was empty, re-organize result section for this case if description == action: sub_action_section.add_line(param) else: sub_action_section.add_line( f"Action: {action}, Parameters: {param}") # Check later for base64 potential_base64.add(param) # Add urls/ips found in parameter to respective lists self.find_ip(param) # Check tmp_iocs res_temp_iocs = ResultSection("Runtime temporary IOCs") for ioc in tmp_iocs: self.extract_powershell(ioc, res_temp_iocs, request) potential_base64.add(ioc) self.find_ip(ioc) if len(res_temp_iocs.subsections) != 0 or res_temp_iocs.body: self.result.add_section(res_temp_iocs) # Add PowerShell score/tag if found if self.found_powershell: ResultSection("Discovered PowerShell code in file", parent=self.result, heuristic=Heuristic(3)) # Check parameters and temp_iocs for base64 base64_section = ResultSection("Possible Base64 found", heuristic=Heuristic(5, frequency=0)) for param in potential_base64: self.check_for_b64(param, base64_section, request, request.file_contents) if base64_section.body: self.result.add_section(base64_section) # Add url/ip tags self.add_ip_tags() # Create section for built-in VBA functions called if len(external_functions) > 0: external_func_section = ResultSection( "VBA functions called", body_format=BODY_FORMAT.MEMORY_DUMP, parent=self.result) for func in external_functions: if func in vba_builtins: external_func_section.add_line(func + ": " + vba_builtins[func]) else: external_func_section.add_line(func) # Add vmonkey log as a supplemental file if we have results if "stdout" in output_results and (vmonkey_err or request.result.sections): temp_log_copy = os.path.join( tempfile.gettempdir(), f"{request.sid}_vipermonkey_output.log") with open(temp_log_copy, "w") as temp_log_file: temp_log_file.write(output_results["stdout"]) request.add_supplementary(temp_log_copy, "vipermonkey_output.log", "ViperMonkey log output") if vmonkey_err is True: ResultSection( 'ViperMonkey has encountered an error, please check "vipermonkey_output.log"', parent=self.result, heuristic=Heuristic(1), )
def execute(self, request: ServiceRequest) -> None: """ Main module see README for details. """ start = time.time() result = Result() request.result = result file_path = request.file_path if request.deep_scan: # Maximum size of submitted file to run this service: max_size = 200000 # String length maximum, used in basic ASCII and UNICODE modules: max_length = 1000000 # String list maximum size # List produced by basic ASCII and UNICODE module results and will determine # if patterns.py will only evaluate network IOC patterns: st_max_size = 100000 # Minimum string size for encoded/stacked string modules: enc_min_length = 7 stack_min_length = 7 else: max_size = self.config.get('max_size', 85000) max_length = self.config.get('max_length', 5000) st_max_size = self.config.get('st_max_size', 0) enc_min_length = self.config.get('enc_min_length', 7) stack_min_length = self.config.get('stack_min_length', 7) timeout = self.service_attributes.timeout - 50 if len(request.file_contents) > max_size: return stack_args = [ FLOSS, f'-n {stack_min_length}', '--no-decoded-strings', file_path ] decode_args = [ FLOSS, f'-n {enc_min_length}', '-x', '--no-static-strings', '--no-stack-strings', file_path ] with Popen(stack_args, stdout=PIPE, stderr=PIPE) as stack, \ Popen(decode_args, stdout=PIPE, stderr=PIPE) as decode: stack_out, _, timed_out = self.handle_process( stack, timeout + start - time.time(), ' '.join(stack_args)) if timed_out: result.add_section( ResultSection('FLARE FLOSS stacked strings timed out')) self.log.warning( f'floss stacked strings timed out for sample {request.sha256}' ) dec_out, dec_err, timed_out = self.handle_process( decode, timeout + start - time.time(), ' '.join(decode_args)) if timed_out: result.add_section( ResultSection('FLARE FLOSS decoded strings timed out')) self.log.warning( f'floss decoded strings timed out for sample {request.sha256}' ) if stack_out: sections = [[y for y in x.splitlines() if y] for x in stack_out.split(b'\n\n')] for section in sections: if not section: # skip empty continue match = re.match(rb'FLOSS static\s+.*\s+strings', section[0]) if match: result_section = static_result(section, max_length, st_max_size) if result_section: result.add_section(result_section) continue match = re.match(rb'.*\d+ stackstring.*', section[0]) if match: result_section = stack_result(section) if result_section: result.add_section(result_section) continue # Process decoded strings results if dec_out: result_section = decoded_result(dec_out) if result_section: if dec_err: result_section.add_line( "Flare Floss generated error messages while analyzing:" ) result_section.add_line(safe_str(dec_err)) result.add_section(result_section)
def execute(self, request: ServiceRequest) -> None: # --- Setup ---------------------------------------------------------------------------------------------- request.result = Result() patterns = PatternMatch() if request.deep_scan: max_attempts = 100 else: max_attempts = 10 self.files_extracted = set() self.hashes = set() # --- Pre-Processing -------------------------------------------------------------------------------------- # Get all IOCs prior to de-obfuscation pat_values = patterns.ioc_match(request.file_contents, bogon_ip=True, just_network=False) if pat_values and request.get_param('extract_original_iocs'): ioc_res = ResultSection( "The following IOCs were found in the original file", parent=request.result, body_format=BODY_FORMAT.MEMORY_DUMP) for k, val in pat_values.items(): for v in val: if ioc_res: ioc_res.add_line( f"Found {k.upper().replace('.', ' ')}: {safe_str(v)}" ) ioc_res.add_tag(k, v) # --- Prepare Techniques ---------------------------------------------------------------------------------- techniques = [ ('MSOffice Embedded script', self.msoffice_embedded_script_string), ('CHR and CHRB decode', self.chr_decode), ('String replace', self.string_replace), ('Powershell carets', self.powershell_carets), ('Array of strings', self.array_of_strings), ('Fake array vars', self.vars_of_fake_arrays), ('Reverse strings', self.str_reverse), ('B64 Decode', self.b64decode_str), ('Simple XOR function', self.simple_xor_function), ] second_pass = [('Concat strings', self.concat_strings), ('MSWord macro vars', self.mswordmacro_vars), ('Powershell vars', self.powershell_vars), ('Charcode hex', self.charcode_hex)] final_pass = [ ('Charcode', self.charcode), ] code_extracts = [('.*html.*', "HTML scripts extraction", self.extract_htmlscript)] layers_list: List[Tuple[str, bytes]] = [] layer = request.file_contents # --- Stage 1: Script Extraction -------------------------------------------------------------------------- for pattern, name, func in code_extracts: if regex.match(regex.compile(pattern), request.task.file_type): extracted_parts = func(request.file_contents) layer = b"\n".join(extracted_parts).strip() layers_list.append((name, layer)) break # --- Stage 2: Deobsfucation ------------------------------------------------------------------------------ idx = 0 first_pass_len = len(techniques) layers_count = len(layers_list) while True: if idx > max_attempts: final_pass.extend(techniques) for name, technique in final_pass: res = technique(layer) if res: layers_list.append((name, res)) break with ThreadPoolExecutor() as executor: threads = [ executor.submit(technique, layer) for name, technique in techniques ] results = [thread.result() for thread in threads] for i in range(len(results)): result = results[i] if result: layers_list.append((techniques[i][0], result)) # Looks like it worked, restart with new layer layer = result # If the layers haven't changed in a passing, break if layers_count == len(layers_list): if len(techniques) != first_pass_len: final_pass.extend(techniques) with ThreadPoolExecutor() as executor: threads = [ executor.submit(technique, layer) for name, technique in final_pass ] results = [thread.result() for thread in threads] for i in range(len(results)): result = results[i] if result: layers_list.append((techniques[i][0], result)) break for x in second_pass: techniques.insert(0, x) layers_count = len(layers_list) idx += 1 # --- Compiling results ---------------------------------------------------------------------------------- if len(layers_list) > 0: extract_file = False num_layers = len(layers_list) # Compute heuristic if num_layers < 5: heur_id = 1 elif num_layers < 10: heur_id = 2 elif num_layers < 50: heur_id = 3 elif num_layers < 100: heur_id = 4 else: # num_layers >= 100 heur_id = 5 # Cleanup final layer clean = self.clean_up_final_layer(layers_list[-1][1]) if clean != request.file_contents: # Check for new IOCs pat_values = patterns.ioc_match(clean, bogon_ip=True, just_network=False) diff_tags: Dict[str, List[bytes]] = {} for uri in pat_values.get('network.static.uri', []): # Compare URIs without query string uri = uri.split(b'?', 1)[0] if uri not in request.file_contents: diff_tags.setdefault('network.static.uri', []) diff_tags['network.static.uri'].append(uri) if request.deep_scan or (len(clean) > 1000 and heur_id >= 4) or diff_tags: extract_file = True # Display obfuscation steps mres = ResultSection( "De-obfuscation steps taken by DeobsfuScripter", parent=request.result) if heur_id: mres.set_heuristic(heur_id) lcount = Counter([x[0] for x in layers_list]) for l, c in lcount.items(): mres.add_line(f"{l}, {c} time(s).") # Display final layer byte_count = 5000 if extract_file: # Save extracted file byte_count = 500 file_name = f"{os.path.basename(request.file_name)}_decoded_final" file_path = os.path.join(self.working_directory, file_name) # Ensure directory exists before write os.makedirs(os.path.dirname(file_path), exist_ok=True) with open(file_path, 'wb+') as f: f.write(clean) self.log.debug( f"Submitted dropped file for analysis: {file_path}" ) request.add_extracted(file_path, file_name, "Final deobfuscation layer") ResultSection(f"First {byte_count} bytes of the final layer:", body=safe_str(clean[:byte_count]), body_format=BODY_FORMAT.MEMORY_DUMP, parent=request.result) # Display new IOCs from final layer if len(diff_tags) > 0: ioc_new = ResultSection( "New IOCs found after de-obfustcation", parent=request.result, body_format=BODY_FORMAT.MEMORY_DUMP) has_network_heur = False for ty, val in diff_tags.items(): for v in val: if "network" in ty: has_network_heur = True ioc_new.add_line( f"Found {ty.upper().replace('.', ' ')}: {safe_str(v)}" ) ioc_new.add_tag(ty, v) if has_network_heur: ioc_new.set_heuristic(7) else: ioc_new.set_heuristic(6) if len(self.files_extracted) > 0: ext_file_res = ResultSection( "The following files were extracted during the deobfuscation", heuristic=Heuristic(8), parent=request.result) for extracted in self.files_extracted: file_name = os.path.basename(extracted) ext_file_res.add_line(file_name) request.add_extracted( extracted, file_name, "File of interest deobfuscated from sample")