def process_file(self, file_path, append_file, metadata={}): """Process file. @return: file_info """ config = {} cape_name = "" if not os.path.exists(file_path): return buf = self.options.get("buffer", BUFSIZE) file_info, pefile_object = File(file_path, metadata.get("metadata", "")).get_all() if pefile_object: self.results.setdefault("pefiles", {}) self.results["pefiles"].setdefault(file_info["sha256"], pefile_object) # Get the file data try: with open(file_info["path"], "rb") as file_open: file_data = file_open.read() except UnicodeDecodeError as e: with open(file_info["path"], "rb") as file_open: file_data = file_open.read() if metadata.get("pids", False): if len(metadata["pids"]) == 1: file_info["pid"] = metadata["pids"][0] else: file_info["pid"] = ",".join(metadata["pids"]) metastrings = metadata.get("metadata", "").split(";?") if len(metastrings) > 2: file_info["process_path"] = metastrings[1] file_info["process_name"] = metastrings[1].split("\\")[-1] if len(metastrings) > 3: file_info["module_path"] = metastrings[2] file_info["cape_type_code"] = 0 file_info["cape_type"] = "" if metastrings != "": try: file_info["cape_type_code"] = int(metastrings[0]) except Exception as e: pass if file_info["cape_type_code"] == COMPRESSION: file_info["cape_type"] = "Decompressed PE Image" if file_info["cape_type_code"] in inject_map: file_info["cape_type"] = inject_map[file_info["cape_type_code"]] if len(metastrings) > 4: file_info["target_path"] = metastrings[3] file_info["target_process"] = metastrings[3].split("\\")[-1] file_info["target_pid"] = metastrings[4] if file_info["cape_type_code"] == INJECTION_SECTION: file_info["cape_type"] = "Injected Section" if len(metastrings) > 4: file_info["section_handle"] = metastrings[4] simple_cape_type_map = { UNPACKED_PE: "Unpacked PE Image", UNPACKED_SHELLCODE: "Unpacked Shellcode", } if file_info["cape_type_code"] in simple_cape_type_map: file_info["cape_type"] = simple_cape_type_map[file_info["cape_type_code"]] if len(metastrings) > 4: file_info["virtual_address"] = metastrings[3] type_strings = file_info["type"].split() if type_strings[0] in ("PE32+", "PE32"): file_info["cape_type"] += pe_map[type_strings[0]] if type_strings[2] == ("(DLL)"): file_info["cape_type"] += "DLL" else: file_info["cape_type"] += "executable" # PlugX if file_info["cape_type_code"] == PLUGX_CONFIG: file_info["cape_type"] = "PlugX Config" if plugx_parser: plugx_config = plugx_parser.parse_config(file_data, len(file_data)) if plugx_config: cape_name = "PlugX" config[cape_name] = dict() for key, value in plugx_config.items(): config[cape_name].update({key: [value]}) else: log.error("CAPE: PlugX config parsing failure - size many not be handled.") append_file = False if file_info["cape_type_code"] in code_mapping: file_info["cape_type"] = code_mapping[file_info["cape_type_code"]] if file_info["cape_type_code"] in config_mapping: file_info["cape_type"] = code_mapping[file_info["cape_type_code"]] type_strings = file_info["type"].split() if type_strings[0] in ("PE32+", "PE32"): file_info["cape_type"] += pe_map[type_strings[0]] if type_strings[2] == ("(DLL)"): file_info["cape_type"] += "DLL" else: file_info["cape_type"] += "executable" if file_info["cape_type_code"] in name_mapping: cape_name = name_mapping[file_info["cape_type_code"]] append_file = True if file_info["cape_type_code"] == EVILGRAB_DATA: cape_name = "EvilGrab" file_info["cape_type"] = "EvilGrab Data" if file_info["size"] == 256 or file_info["size"] == 260: config[cape_name].update({"filepath": [format(file_data)]}) if file_info["size"] > 0x1000: append_file = True else: append_file = False if file_info["cape_type_code"] == SEDRECO_DATA: cape_name = "Sedreco" config[cape_name] = dict() config[cape_name]["cape_type"] = "Sedreco Config" if len(metastrings) > 4: SedrecoConfigIndex = metastrings[4] if SedrecoConfigIndex in sedreco_map: ConfigItem = sedreco_map[SedrecoConfigIndex] else: ConfigItem = "Unknown" ConfigData = format(file_data) if ConfigData: config[cape_name].update({ConfigItem: [ConfigData]}) append_file = False if file_info["cape_type_code"] == CERBER_CONFIG: file_info["cape_type"] = "Cerber Config" cape_name = "Cerber" config[cape_name] = dict() config["cape_type"] = "Cerber Config" parsed = json.loads(file_data.rstrip(b"\0")) config[cape_name].update({"JSON Data": [json.dumps(parsed, indent=4, sort_keys=True)]}) append_file = True if file_info["cape_type_code"] == URSNIF_PAYLOAD: cape_name = "Ursnif" config[cape_name] = dict() config[cape_name]["cape_type"] = "Ursnif Payload" file_info["cape_type"] = "Ursnif Payload" if file_info["cape_type_code"] == URSNIF_CONFIG: file_info["cape_type"] = "Ursnif Config" cape_name = "Ursnif" malwareconfig_loaded = False try: malwareconfig_parsers = os.path.join(CUCKOO_ROOT, "modules", "processing", "parsers", "CAPE") file, pathname, description = imp.find_module(cape_name, [malwareconfig_parsers]) module = imp.load_module(cape_name, file, pathname, description) malwareconfig_loaded = True log.debug("CAPE: Imported malwareconfig.com parser %s", cape_name) except ImportError: log.debug("CAPE: malwareconfig.com parser: No module named %s", cape_name) if malwareconfig_loaded: try: malwareconfig_config = module.config(file_data) if malwareconfig_config: config[cape_name] = dict() config[cape_name]["cape_type"] = "Ursnif Config" if isinstance(malwareconfig_config, list): for (key, value) in malwareconfig_config[0].items(): config[cape_name].update({key: [value]}) elif isinstance(malwareconfig_config, dict): for (key, value) in malwareconfig_config.items(): config[cape_name].update({key: [value]}) except Exception as e: log.error("CAPE: malwareconfig parsing error with %s: %s", cape_name, e) append_file = False # Hancitor if file_info["cape_type_code"] == HANCITOR_PAYLOAD: cape_name = "Hancitor" config[cape_name] = dict() config[cape_name]["cape_type"] = "Hancitor Payload" file_info["cape_type"] = "Hancitor Payload" if file_info["cape_type_code"] == HANCITOR_CONFIG: cape_name = "Hancitor" file_info["cape_type"] = "Hancitor Config" ConfigStrings = file_data.split(b"\0") ConfigStrings = [_f for _f in ConfigStrings if _f] ConfigItem = "Campaign Code" config[cape_name] = dict() config[cape_name]["cape_type"] = "Hancitor Config" config[cape_name].update({ConfigItem: [ConfigStrings[0]]}) GateURLs = ConfigStrings[1].split(b"|") for index, value in enumerate(GateURLs): ConfigItem = "Gate URL " + str(index + 1) config[cape_name].update({ConfigItem: [value]}) append_file = False # QakBot if file_info["cape_type_code"] == QAKBOT_CONFIG: file_info["cape_type"] = "QakBot Config" cape_name = "QakBot" config[cape_name] = dict() config[cape_name]["cape_type"] = "QakBot Config" config_tmp = static_config_parsers(cape_name, file_data) if config_tmp and config_tmp[cape_name]: config.update(config_tmp) append_file = False # Attempt to decrypt script dump if file_info["cape_type_code"] == SCRIPT_DUMP: data = file_data.decode("utf-16").replace("\x00", "") cape_name = "ScriptDump" malwareconfig_loaded = False try: malwareconfig_parsers = os.path.join(CUCKOO_ROOT, "modules", "processing", "parsers", "CAPE") file, pathname, description = imp.find_module(cape_name, [malwareconfig_parsers]) module = imp.load_module(cape_name, file, pathname, description) malwareconfig_loaded = True log.debug("CAPE: Imported malwareconfig.com parser %s", cape_name) except ImportError: log.debug("CAPE: malwareconfig.com parser: No module named %s", cape_name) if malwareconfig_loaded: try: script_data = module.config(self, data) if script_data and "more_eggs" in script_data["type"]: bindata = script_data["data"] sha256 = hashlib.sha256(bindata).hexdigest() filepath = os.path.join(self.CAPE_path, sha256) tmpstr = file_info["pid"] tmpstr += "," + file_info["process_path"] tmpstr += "," + file_info["module_path"] if "text" in script_data["datatype"]: file_info["cape_type"] = "MoreEggsJS" outstr = str(MOREEGGSJS_PAYLOAD) + "," + tmpstr + "\n" with open(filepath + "_info.txt", "w") as infofd: infofd.write(outstr) with open(filepath, "w") as cfile: cfile.write(bindata) elif "binary" in script_data["datatype"]: file_info["cape_type"] = "MoreEggsBin" outstr = str(MOREEGGSBIN_PAYLOAD) + "," + tmpstr + "\n" with open(filepath + "_info.txt", "w") as infofd: infofd.write(outstr) with open(filepath, "wb") as cfile: cfile.write(bindata) if os.path.exists(filepath): self.script_dump_files.append(filepath) else: file_info["cape_type"] = "Script Dump" log.info("CAPE: Script Dump does not contain known encrypted payload.") except Exception as e: log.error("CAPE: malwareconfig parsing error with %s: %s", cape_name, e) append_file = True # More_Eggs if file_info["cape_type_code"] == MOREEGGSJS_PAYLOAD: file_info["cape_type"] = "More Eggs JS Payload" cape_name = "MoreEggs" append_file = True # Process CAPE Yara hits for hit in file_info["cape_yara"]: # Check to see if file is packed with UPX if hit["name"] == "UPX": log.info("CAPE: Found UPX Packed sample - attempting to unpack") self.upx_unpack(file_data) # Check for a payload or config hit extraction_types = ("payload", "config", "loader") try: if any([file_type in hit["meta"].get("cape_type", "").lower() for file_type in extraction_types]): file_info["cape_type"] = hit["meta"]["cape_type"] cape_name = hit["name"].replace("_", " ") except Exception as e: print("Cape type error: {}".format(e)) type_strings = file_info["type"].split() if "-bit" not in file_info["cape_type"]: if type_strings[0] in ("PE32+", "PE32"): file_info["cape_type"] += pe_map[type_strings[0]] if type_strings[2] == ("(DLL)"): file_info["cape_type"] += "DLL" else: file_info["cape_type"] += "executable" suppress_parsing_list = ["Cerber", "Ursnif"] if hit["name"] == "GuLoader": self.detect2pid(file_info["pid"], "GuLoader") if hit["name"] in suppress_parsing_list: continue tmp_config = static_config_parsers(hit["name"].replace("_", " "), file_data) if tmp_config and tmp_config[hit["name"].replace("_", " ")]: config.update(tmp_config) if cape_name: if not "detections" in self.results: if cape_name != "UPX": #ToDo list of keys self.results["detections"] = cape_name if file_info.get("pid"): self.detect2pid(file_info["pid"], cape_name) # Remove duplicate payloads from web ui for cape_file in self.cape["payloads"] or []: if file_info["size"] == cape_file["size"]: if HAVE_PYDEEP: ssdeep_grade = pydeep.compare(file_info["ssdeep"].encode("utf-8"), cape_file["ssdeep"].encode("utf-8")) if ssdeep_grade >= ssdeep_threshold: append_file = False if file_info.get("entrypoint") and file_info.get("ep_bytes") and cape_file.get("entrypoint"): if (file_info.get("entrypoint") and file_info["entrypoint"] == cape_file["entrypoint"] and file_info["cape_type_code"] == cape_file["cape_type_code"] and file_info["ep_bytes"] == cape_file["ep_bytes"] ): log.debug("CAPE duplicate output file skipped") append_file = False if append_file is True: if HAVE_FLARE_CAPA: pretime = datetime.now() capa_details = flare_capa_details(file_path, "cape") if capa_details: file_info["flare_capa"] = capa_details self.add_statistic_tmp("flare_capa", "time", pretime=pretime) self.cape["payloads"].append(file_info) if config and config not in self.cape["configs"]: self.cape["configs"].append(config)
def static_file_info( data_dictionary: dict, file_path: str, task_id: str, package: str, options: str, destination_folder: str, results: dict ): if int(os.path.getsize(file_path) / (1024 * 1024)) > int(processing_conf.static.max_file_size): return if ( not HAVE_OLETOOLS and "Zip archive data, at least v2.0" in data_dictionary["type"] and package in {"doc", "ppt", "xls", "pub"} ): log.info("Missed dependencies: pip3 install oletools") options_dict = get_options(options) if HAVE_PEFILE and ("PE32" in data_dictionary["type"] or "MS-DOS executable" in data_dictionary["type"]): data_dictionary["pe"] = PortableExecutable(file_path).run(task_id) if HAVE_FLARE_CAPA: capa_details = flare_capa_details(file_path, "static") if capa_details: data_dictionary["flare_capa"] = capa_details if HAVE_FLOSS: floss_strings = Floss(file_path, "static", "pe").run() if floss_strings: data_dictionary["floss"] = floss_strings if "Mono" in data_dictionary["type"]: data_dictionary["dotnet"] = DotNETExecutable(file_path).run() elif HAVE_OLETOOLS and package in {"doc", "ppt", "xls", "pub"}: # options is dict where we need to get pass get_options data_dictionary["office"] = Office(file_path, task_id, data_dictionary["sha256"], options_dict).run() elif "PDF" in data_dictionary["type"] or file_path.endswith(".pdf"): data_dictionary["pdf"] = PDF(file_path).run() elif package in {"wsf", "hta"} or data_dictionary["type"] == "XML document text" or file_path.endswith(".wsf"): data_dictionary["wsf"] = WindowsScriptFile(file_path).run() # elif package in {"js", "vbs"}: # data_dictionary["js"] = EncodedScriptFile(file_path).run() elif package == "lnk" or "MS Windows shortcut" in data_dictionary["type"]: data_dictionary["lnk"] = LnkShortcut(file_path).run() elif "Java Jar" in data_dictionary["type"] or file_path.endswith(".jar"): if selfextract_conf.procyon.binary and not os.path.exists(selfextract_conf.procyon.binary): log.error("procyon_path specified in processing.conf but the file does not exist") else: data_dictionary["java"] = Java(file_path, selfextract_conf.procyon.binary).run() # It's possible to fool libmagic into thinking our 2007+ file is a zip. # So until we have static analysis for zip files, we can use oleid to fail us out silently, # yeilding no static analysis results for actual zip files. # elif "ELF" in data_dictionary["type"] or file_path.endswith(".elf"): # data_dictionary["elf"] = ELF(file_path).run() # data_dictionary["keys"] = f.get_keys() # elif HAVE_OLETOOLS and package == "hwp": # data_dictionary["hwp"] = HwpDocument(file_path).run() with open(file_path, "rb") as f: is_text_file(data_dictionary, file_path, 8192, f.read()) if processing_conf.trid.enabled: trid_info(file_path, data_dictionary) if processing_conf.die.enabled: detect_it_easy_info(file_path, data_dictionary) if HAVE_FLOSS and processing_conf.floss.enabled: floss_strings = Floss(file_path, package).run() if floss_strings: data_dictionary["floss"] = floss_strings if HAVE_STRINGS: strings = extract_strings(file_path) if strings: data_dictionary["strings"] = strings # ToDo we need url support if HAVE_VIRUSTOTAL and processing_conf.virustotal.enabled: vt_details = vt_lookup("file", file_path, results) if vt_details: data_dictionary["virustotal"] = vt_details generic_file_extractors(file_path, destination_folder, data_dictionary["type"], data_dictionary, options_dict, results)
def run(self): """Run analysis. @return: list of process dumps with related information. """ self.key = "procdump" procdump_files = [] buf = self.options.get("buffer", 8192) if not os.path.exists(self.procdump_path): return None meta = {} if os.path.exists(self.files_metadata): for line in open(self.files_metadata, "rb"): entry = json.loads(line) filepath = os.path.join(self.analysis_path, entry["path"]) meta[filepath] = { "pids": entry["pids"], "filepath": entry["filepath"], "metadata": entry["metadata"], } file_names = os.listdir(self.procdump_path) for file_name in file_names: file_path = os.path.join(self.procdump_path, file_name) if not meta.get(file_path): continue file_info, pefile_object = File( file_path=file_path, guest_paths=meta[file_path]["metadata"], file_name=file_name).get_all() if pefile_object: self.results.setdefault("pefiles", {}) self.results["pefiles"].setdefault(file_info["sha256"], pefile_object) metastrings = meta[file_path].get("metadata", "").split(";?") if len(metastrings) < 3: continue file_info["process_path"] = metastrings[1] file_info["module_path"] = metastrings[2] file_info["process_name"] = file_info["process_path"].rsplit( "\\", 1)[-1] file_info["pid"] = meta[file_path]["pids"][0] type_strings = file_info["type"].split() if len(type_strings) < 3: continue if type_strings[0] == "MS-DOS": file_info["cape_type"] = "DOS MZ image: executable" else: file_info["cape_type"] = "PE image" if type_strings[0] == ("PE32+"): file_info["cape_type"] += ": 64-bit " elif type_strings[0] == ("PE32"): file_info["cape_type"] += ": 32-bit " file_info["cape_type"] += "DLL" if type_strings[2] == ( "(DLL)") else "executable" texttypes = [ "ASCII", "Windows Registry text", "XML document text", "Unicode text", ] if any(texttype in file_info["type"] for texttype in texttypes): with open(file_info["path"], "r") as drop_open: filedata = drop_open.read(buf + 1) file_info["data"] = convert_to_printable_and_truncate( filedata, buf) if file_info["pid"]: _ = cape_name_from_yara(file_info, file_info["pid"], self.results) if HAVE_FLARE_CAPA: pretime = timeit.default_timer() capa_details = flare_capa_details(file_path, "procdump") if capa_details: file_info["flare_capa"] = capa_details self.add_statistic_tmp("flare_capa", "time", pretime) # should we use dropped path here? static_file_info( file_info, file_path, str(self.task["id"]), self.task.get("package", ""), self.task.get("options", ""), self.self_extracted, self.results, ) procdump_files.append(file_info) return procdump_files
def process_file(self, file_path, append_file, metadata={}): """Process file. @return: file_info """ config = {} cape_name = "" type_string = "" if not os.path.exists(file_path): return buf = self.options.get("buffer", BUFSIZE) file_info, pefile_object = File(file_path, metadata.get("metadata", "")).get_all() if pefile_object: self.results.setdefault("pefiles", {}) self.results["pefiles"].setdefault(file_info["sha256"], pefile_object) # Get the file data with open(file_info["path"], "rb") as file_open: file_data = file_open.read() if metadata.get("pids", False): if len(metadata["pids"]) == 1: file_info["pid"] = metadata["pids"][0] else: file_info["pid"] = ",".join(metadata["pids"]) metastrings = metadata.get("metadata", "").split(";?") if len(metastrings) > 2: file_info["process_path"] = metastrings[1] file_info["process_name"] = metastrings[1].split("\\")[-1] if len(metastrings) > 3: file_info["module_path"] = metastrings[2] file_info["cape_type_code"] = 0 file_info["cape_type"] = "" if metastrings and metastrings[0] and metastrings[0].isdigit(): file_info["cape_type_code"] = int(metastrings[0]) if file_info["cape_type_code"] == TYPE_STRING: if len(metastrings) > 4: type_string = metastrings[3] if file_info["cape_type_code"] == COMPRESSION: file_info["cape_type"] = "Decompressed PE Image" if file_info["cape_type_code"] in inject_map: file_info["cape_type"] = inject_map[ file_info["cape_type_code"]] if len(metastrings) > 4: file_info["target_path"] = metastrings[3] file_info["target_process"] = metastrings[3].split( "\\")[-1] file_info["target_pid"] = metastrings[4] if file_info["cape_type_code"] in unpack_map: file_info["cape_type"] = unpack_map[ file_info["cape_type_code"]] if len(metastrings) > 4: file_info["virtual_address"] = metastrings[3] type_strings = file_info["type"].split() if type_strings[0] in ("PE32+", "PE32"): file_info["cape_type"] += pe_map[type_strings[0]] if type_strings[2] == ("(DLL)"): file_info["cape_type"] += "DLL" else: file_info["cape_type"] += "executable" if file_info["cape_type_code"] in code_mapping: file_info["cape_type"] = code_mapping[ file_info["cape_type_code"]] type_strings = file_info["type"].split() if type_strings[0] in ("PE32+", "PE32"): file_info["cape_type"] += pe_map[type_strings[0]] if type_strings[2] == ("(DLL)"): file_info["cape_type"] += "DLL" else: file_info["cape_type"] += "executable" if file_info["cape_type_code"] in name_mapping: cape_name = name_mapping[file_info["cape_type_code"]] append_file = True """ ConfigData = format(file_data) if ConfigData: config[cape_name].update({ConfigItem: [ConfigData]}) """ append_file = False # PlugX if file_info["cape_type_code"] == PLUGX_CONFIG: file_info["cape_type"] = "PlugX Config" if plugx_parser: plugx_config = plugx_parser.parse_config( file_data, len(file_data)) if plugx_config: cape_name = "PlugX" config[cape_name] = dict() for key, value in plugx_config.items(): config[cape_name].update({key: [value]}) else: log.error( "CAPE: PlugX config parsing failure - size many not be handled." ) append_file = False # Attempt to decrypt script dump if file_info["cape_type_code"] == SCRIPT_DUMP: data = file_data.decode("utf-16").replace("\x00", "") cape_name = "ScriptDump" malwareconfig_loaded = False try: malwareconfig_parsers = os.path.join( CUCKOO_ROOT, "modules", "processing", "parsers", "CAPE") file, pathname, description = imp.find_module( cape_name, [malwareconfig_parsers]) module = imp.load_module(cape_name, file, pathname, description) malwareconfig_loaded = True log.debug("CAPE: Imported malwareconfig.com parser %s", cape_name) except ImportError: log.debug( "CAPE: malwareconfig.com parser: No module named %s", cape_name) if malwareconfig_loaded: try: script_data = module.config(self, data) if script_data and "more_eggs" in script_data["type"]: bindata = script_data["data"] sha256 = hashlib.sha256(bindata).hexdigest() filepath = os.path.join(self.CAPE_path, sha256) tmpstr = file_info["pid"] tmpstr += "," + file_info["process_path"] tmpstr += "," + file_info["module_path"] if "text" in script_data["datatype"]: file_info["cape_type"] = "MoreEggsJS" outstr = str( MOREEGGSJS_PAYLOAD) + "," + tmpstr + "\n" with open(filepath + "_info.txt", "w") as infofd: infofd.write(outstr) with open(filepath, "w") as cfile: cfile.write(bindata) elif "binary" in script_data["datatype"]: file_info["cape_type"] = "MoreEggsBin" outstr = str( MOREEGGSBIN_PAYLOAD) + "," + tmpstr + "\n" with open(filepath + "_info.txt", "w") as infofd: infofd.write(outstr) with open(filepath, "wb") as cfile: cfile.write(bindata) if os.path.exists(filepath): self.script_dump_files.append(filepath) else: file_info["cape_type"] = "Script Dump" log.info( "CAPE: Script Dump does not contain known encrypted payload." ) except Exception as e: log.error( "CAPE: malwareconfig parsing error with %s: %s", cape_name, e) append_file = True # More_Eggs if file_info["cape_type_code"] == MOREEGGSJS_PAYLOAD: file_info["cape_type"] = "More Eggs JS Payload" cape_name = "MoreEggs" append_file = True # Process CAPE Yara hits for hit in file_info["cape_yara"]: # Check to see if file is packed with UPX if hit["name"] == "UPX": log.info( "CAPE: Found UPX Packed sample - attempting to unpack") self.upx_unpack(file_data) # Check for a payload or config hit extraction_types = ("payload", "config", "loader") try: if any([ file_type in hit["meta"].get("cape_type", "").lower() for file_type in extraction_types ]): file_info["cape_type"] = hit["meta"]["cape_type"] cape_name = hit["name"].replace("_", " ") except Exception as e: print("Cape type error: {}".format(e)) type_strings = file_info["type"].split() if "-bit" not in file_info["cape_type"]: if type_strings[0] in ("PE32+", "PE32"): file_info["cape_type"] += pe_map[type_strings[0]] if type_strings[2] == ("(DLL)"): file_info["cape_type"] += "DLL" else: file_info["cape_type"] += "executable" if hit["name"] == "GuLoader": self.detect2pid(file_info["pid"], "GuLoader") tmp_config = static_config_parsers(hit["name"], file_data) if tmp_config and tmp_config[hit["name"].replace("_", " ")]: config.update(tmp_config) if type_string: log.info("CAPE: type_string: %s", type_string) tmp_config = static_config_parsers( type_string.split(" ")[0], file_data) if tmp_config: cape_name = type_string.split(" ")[0] log.info("CAPE: config returned for: %s", cape_name) config.update(tmp_config) if cape_name: if "detections" not in self.results: if cape_name != "UPX": # ToDo list of keys self.results["detections"] = cape_name if file_info.get("pid"): self.detect2pid(file_info["pid"], cape_name) # Remove duplicate payloads from web ui for cape_file in self.cape["payloads"] or []: if file_info["size"] == cape_file["size"]: if HAVE_PYDEEP: ssdeep_grade = pydeep.compare( file_info["ssdeep"].encode("utf-8"), cape_file["ssdeep"].encode("utf-8")) if ssdeep_grade >= ssdeep_threshold: append_file = False if file_info.get("entrypoint") and file_info.get( "ep_bytes") and cape_file.get("entrypoint"): if (file_info.get("entrypoint") and file_info["entrypoint"] == cape_file["entrypoint"] and file_info["cape_type_code"] == cape_file["cape_type_code"] and file_info["ep_bytes"] == cape_file["ep_bytes"]): log.debug("CAPE duplicate output file skipped") append_file = False if append_file is True: if HAVE_FLARE_CAPA: pretime = datetime.now() capa_details = flare_capa_details(file_path, "cape") if capa_details: file_info["flare_capa"] = capa_details self.add_statistic_tmp("flare_capa", "time", pretime=pretime) self.cape["payloads"].append(file_info) if config and config not in self.cape["configs"]: self.cape["configs"].append(config)
def process_file(self, file_path, append_file, metadata=None): """Process file. @return: file_info """ if metadata is None: metadata = {} cape_name = "" type_string = "" if not os.path.exists(file_path): return file_info, pefile_object = File(file_path, metadata.get("metadata", "")).get_all() cape_names = set() if pefile_object: self.results.setdefault("pefiles", {}).setdefault(file_info["sha256"], pefile_object) if file_info.get("clamav") and processing_conf.detections.clamav: clamav_detection = get_clamav_consensus(file_info["clamav"]) if clamav_detection: add_family_detection(self.results, clamav_detection, "ClamAV", file_info["sha256"]) # should we use dropped path here? static_file_info( file_info, file_path, str(self.task["id"]), self.task.get("package", ""), self.task.get("options", ""), self.self_extracted, self.results, ) # Get the file data with open(file_info["path"], "rb") as file_open: file_data = file_open.read() if metadata.get("pids", False): file_info["pid"] = metadata["pids"][0] if len( metadata["pids"]) == 1 else ",".join(metadata["pids"]) metastrings = metadata.get("metadata", "").split(";?") if len(metastrings) > 2: file_info["process_path"] = metastrings[1] file_info["process_name"] = metastrings[1].rsplit("\\", 1)[-1] if len(metastrings) > 3: file_info["module_path"] = metastrings[2] file_info["cape_type_code"] = 0 file_info["cape_type"] = "" if metastrings and metastrings[0] and metastrings[0].isdigit(): file_info["cape_type_code"] = int(metastrings[0]) if file_info["cape_type_code"] == TYPE_STRING: if len(metastrings) > 4: type_string = metastrings[3] elif file_info["cape_type_code"] == COMPRESSION: file_info["cape_type"] = "Decompressed PE Image" elif file_info["cape_type_code"] in inject_map: file_info["cape_type"] = inject_map[ file_info["cape_type_code"]] if len(metastrings) > 4: file_info["target_path"] = metastrings[3] file_info["target_process"] = metastrings[3].rsplit( "\\", 1)[-1] file_info["target_pid"] = metastrings[4] elif file_info["cape_type_code"] in unpack_map: file_info["cape_type"] = unpack_map[ file_info["cape_type_code"]] if len(metastrings) > 4: file_info["virtual_address"] = metastrings[3] type_strings = file_info["type"].split() if type_strings[0] in ("PE32+", "PE32"): file_info["cape_type"] += pe_map[type_strings[0]] if type_strings[2] == ("(DLL)"): file_info["cape_type"] += "DLL" else: file_info["cape_type"] += "executable" if file_info["cape_type_code"] in code_mapping: file_info["cape_type"] = code_mapping[ file_info["cape_type_code"]] type_strings = file_info["type"].split() if type_strings[0] in ("PE32+", "PE32"): file_info["cape_type"] += pe_map[type_strings[0]] if type_strings[2] == ("(DLL)"): file_info["cape_type"] += "DLL" else: file_info["cape_type"] += "executable" if file_info["cape_type_code"] in name_mapping: cape_name = name_mapping[file_info["cape_type_code"]] append_file = True # PlugX elif file_info["cape_type_code"] == PLUGX_CONFIG: file_info["cape_type"] = "PlugX Config" if plugx_parser: plugx_config = plugx_parser.parse_config( file_data, len(file_data)) if plugx_config: cape_name = "PlugX" self.update_cape_configs(cape_name, plugx_config) cape_names.add(cape_name) else: log.error( "CAPE: PlugX config parsing failure - size many not be handled" ) append_file = False # Attempt to decrypt script dump elif file_info["cape_type_code"] == SCRIPT_DUMP: data = file_data.decode("utf-16").replace("\x00", "") cape_name = "ScriptDump" malwareconfig_loaded = False try: malwareconfig_parsers = os.path.join( CUCKOO_ROOT, "modules", "processing", "parsers", "CAPE") file, pathname, description = imp.find_module( cape_name, [malwareconfig_parsers]) module = imp.load_module(cape_name, file, pathname, description) malwareconfig_loaded = True log.debug("CAPE: Imported parser %s", cape_name) except ImportError: log.debug("CAPE: parser: No module named %s", cape_name) if malwareconfig_loaded: try: script_data = module.config(self, data) if script_data and "more_eggs" in script_data["type"]: bindata = script_data["data"] sha256 = hashlib.sha256(bindata).hexdigest() filepath = os.path.join(self.CAPE_path, sha256) if "text" in script_data["datatype"]: file_info["cape_type"] = "MoreEggsJS" elif "binary" in script_data["datatype"]: file_info["cape_type"] = "MoreEggsBin" with open(filepath, "w") as cfile: cfile.write(bindata) self.script_dump_files.append(filepath) else: file_info["cape_type"] = "Script Dump" log.info( "CAPE: Script Dump does not contain known encrypted payload" ) except Exception as e: log.error( "CAPE: malwareconfig parsing error with %s: %s", cape_name, e) append_file = True # More_Eggs elif file_info["cape_type_code"] == MOREEGGSJS_PAYLOAD: file_info["cape_type"] = "More Eggs JS Payload" cape_name = "MoreEggs" append_file = True # Process CAPE Yara hits # Prefilter extracted data + beauty is better than oneliner: all_files = [] for extracted_file in file_info.get("extracted_files", []): yara_hits = extracted_file["cape_yara"] if not yara_hits: continue if extracted_file.get("data", b""): extracted_file_data = make_bytes(extracted_file["data"]) else: with open(extracted_file["path"], "rb") as fil: extracted_file_data = fil.read() for yara in yara_hits: all_files.append(( f"[{extracted_file.get('sha256', '')}]{file_info['path']}", extracted_file_data, yara, )) for yara in file_info["cape_yara"]: all_files.append((file_info["path"], file_data, yara)) executed_config_parsers = collections.defaultdict(set) for tmp_path, tmp_data, hit in all_files: # Check for a payload or config hit try: if File.yara_hit_provides_detection(hit): file_info["cape_type"] = hit["meta"]["cape_type"] cape_name = File.get_cape_name_from_yara_hit(hit) cape_names.add(cape_name) except Exception as e: print(f"Cape type error: {e}") type_strings = file_info["type"].split() if "-bit" not in file_info["cape_type"]: if type_strings[0] in ("PE32+", "PE32"): file_info["cape_type"] += pe_map[type_strings[0]] file_info["cape_type"] += "DLL" if type_strings[2] == ( "(DLL)") else "executable" if cape_name and cape_name not in executed_config_parsers[tmp_path]: tmp_config = static_config_parsers(cape_name, tmp_path, tmp_data) self.update_cape_configs(cape_name, tmp_config) executed_config_parsers[tmp_path].add(cape_name) if type_string: log.info("CAPE: type_string: %s", type_string) tmp_cape_name = File.get_cape_name_from_cape_type(type_string) if tmp_cape_name and tmp_cape_name not in executed_config_parsers: tmp_config = static_config_parsers(tmp_cape_name, file_info["path"], file_data) if tmp_config: cape_name = tmp_cape_name cape_names.add(cape_name) log.info("CAPE: config returned for: %s", cape_name) self.update_cape_configs(cape_name, tmp_config) self.add_family_detections(file_info, cape_names) # Remove duplicate payloads from web ui for cape_file in self.cape["payloads"] or []: if file_info["size"] == cape_file["size"]: if HAVE_PYDEEP: ssdeep_grade = pydeep.compare(file_info["ssdeep"].encode(), cape_file["ssdeep"].encode()) if ssdeep_grade >= ssdeep_threshold: log.debug( "CAPE duplicate output file skipped: ssdeep grade %d, threshold %d", ssdeep_grade, ssdeep_threshold) append_file = False if file_info.get("entrypoint") and file_info.get( "ep_bytes") and cape_file.get("entrypoint"): if (file_info["entrypoint"] == cape_file["entrypoint"] and file_info["cape_type_code"] == cape_file["cape_type_code"] and file_info["ep_bytes"] == cape_file["ep_bytes"]): log.debug( "CAPE duplicate output file skipped: matching entrypoint" ) append_file = False if append_file: if HAVE_FLARE_CAPA: pretime = timeit.default_timer() capa_details = flare_capa_details(file_path, "cape") if capa_details: file_info["flare_capa"] = capa_details self.add_statistic_tmp("flare_capa", "time", pretime=pretime) self.cape["payloads"].append(file_info)
def run(self): """Run analysis. @return: list of process dumps with related information. """ self.key = "procdump" procdump_files = [] buf = self.options.get("buffer", 8192) if not os.path.exists(self.procdump_path): return None meta = dict() if os.path.exists(self.files_metadata): for line in open(self.files_metadata, "rb"): entry = json.loads(line) filepath = os.path.join(self.analysis_path, entry["path"]) meta[filepath] = { "pids": entry["pids"], "filepath": entry["filepath"], "metadata": entry["metadata"], } file_names = os.listdir(self.procdump_path) for file_name in file_names: file_path = os.path.join(self.procdump_path, file_name) if not meta.get(file_path): continue file_info, pefile_object = File( file_path=file_path, guest_paths=meta[file_path]["metadata"], file_name=file_name).get_all() if pefile_object: self.results.setdefault("pefiles", {}) self.results["pefiles"].setdefault(file_info["sha256"], pefile_object) metastrings = meta[file_path].get("metadata", "").split(";?") if len(metastrings) < 3: continue file_info["process_path"] = metastrings[1] file_info["module_path"] = metastrings[2] file_info["process_name"] = file_info["process_path"].split( "\\")[-1] file_info["pid"] = meta[file_path]["pids"][0] type_strings = file_info["type"].split() if len(type_strings) < 3: continue if type_strings[0] == "MS-DOS": file_info["cape_type"] = "DOS MZ image: executable" else: file_info["cape_type"] = "PE image" if type_strings[0] == ("PE32+"): file_info["cape_type"] += ": 64-bit " elif type_strings[0] == ("PE32"): file_info["cape_type"] += ": 32-bit " if type_strings[2] == ("(DLL)"): file_info["cape_type"] += "DLL" else: file_info["cape_type"] += "executable" texttypes = [ "ASCII", "Windows Registry text", "XML document text", "Unicode text", ] readit = False for texttype in texttypes: if texttype in file_info["type"]: readit = True break if readit: with open(file_info["path"], "r") as drop_open: filedata = drop_open.read(buf + 1) if len(filedata) > buf: file_info["data"] = convert_to_printable(filedata[:buf] + " <truncated>") else: file_info["data"] = convert_to_printable(filedata) if HAVE_FLARE_CAPA: pretime = datetime.now() capa_details = flare_capa_details(file_path, "procdump") if capa_details: file_info["flare_capa"] = capa_details self.add_statistic_tmp("flare_capa", "time", pretime) procdump_files.append(file_info) return procdump_files