示例#1
0
    def process_file(self, file_path, append_file, metadata={}):
        """Process file.
        @return: file_info
        """

        config = {}
        cape_name = ""


        if not os.path.exists(file_path):
            return

        buf = self.options.get("buffer", BUFSIZE)
        file_info, pefile_object = File(file_path, metadata.get("metadata", "")).get_all()
        if pefile_object:
                self.results.setdefault("pefiles", {})
                self.results["pefiles"].setdefault(file_info["sha256"], pefile_object)

        # Get the file data
        try:
            with open(file_info["path"], "rb") as file_open:
                file_data = file_open.read()
        except UnicodeDecodeError as e:
            with open(file_info["path"], "rb") as file_open:
                file_data = file_open.read()

        if metadata.get("pids", False):
            if len(metadata["pids"]) == 1:
                file_info["pid"] = metadata["pids"][0]
            else:
                file_info["pid"] = ",".join(metadata["pids"])

        metastrings = metadata.get("metadata", "").split(";?")
        if len(metastrings) > 2:
            file_info["process_path"] = metastrings[1]
            file_info["process_name"] = metastrings[1].split("\\")[-1]
        if len(metastrings) > 3:
            file_info["module_path"] = metastrings[2]

        file_info["cape_type_code"] = 0
        file_info["cape_type"] = ""
        if metastrings != "":
            try:
                file_info["cape_type_code"] = int(metastrings[0])
            except Exception as e:
                pass
            if file_info["cape_type_code"] == COMPRESSION:
                file_info["cape_type"] = "Decompressed PE Image"

            if file_info["cape_type_code"] in inject_map:
                file_info["cape_type"] = inject_map[file_info["cape_type_code"]]
                if len(metastrings) > 4:
                    file_info["target_path"] = metastrings[3]
                    file_info["target_process"] = metastrings[3].split("\\")[-1]
                    file_info["target_pid"] = metastrings[4]

            if file_info["cape_type_code"] == INJECTION_SECTION:
                file_info["cape_type"] = "Injected Section"
                if len(metastrings) > 4:
                    file_info["section_handle"] = metastrings[4]

            simple_cape_type_map = {
                UNPACKED_PE: "Unpacked PE Image",
                UNPACKED_SHELLCODE: "Unpacked Shellcode",
            }
            if file_info["cape_type_code"] in simple_cape_type_map:
                file_info["cape_type"] = simple_cape_type_map[file_info["cape_type_code"]]
                if len(metastrings) > 4:
                    file_info["virtual_address"] = metastrings[3]

            type_strings = file_info["type"].split()
            if type_strings[0] in ("PE32+", "PE32"):
                file_info["cape_type"] += pe_map[type_strings[0]]
                if type_strings[2] == ("(DLL)"):
                    file_info["cape_type"] += "DLL"
                else:
                    file_info["cape_type"] += "executable"
            # PlugX
            if file_info["cape_type_code"] == PLUGX_CONFIG:
                file_info["cape_type"] = "PlugX Config"
                if plugx_parser:
                    plugx_config = plugx_parser.parse_config(file_data, len(file_data))
                    if plugx_config:
                        cape_name = "PlugX"
                        config[cape_name] = dict()
                        for key, value in plugx_config.items():
                            config[cape_name].update({key: [value]})
                    else:
                        log.error("CAPE: PlugX config parsing failure - size many not be handled.")
                    append_file = False
            if file_info["cape_type_code"] in code_mapping:
                file_info["cape_type"] = code_mapping[file_info["cape_type_code"]]
                if file_info["cape_type_code"] in config_mapping:
                    file_info["cape_type"] = code_mapping[file_info["cape_type_code"]]

                type_strings = file_info["type"].split()
                if type_strings[0] in ("PE32+", "PE32"):
                    file_info["cape_type"] += pe_map[type_strings[0]]
                    if type_strings[2] == ("(DLL)"):
                        file_info["cape_type"] += "DLL"
                    else:
                        file_info["cape_type"] += "executable"

                if file_info["cape_type_code"] in name_mapping:
                    cape_name = name_mapping[file_info["cape_type_code"]]

                append_file = True

            if file_info["cape_type_code"] == EVILGRAB_DATA:
                cape_name = "EvilGrab"
                file_info["cape_type"] = "EvilGrab Data"
                if file_info["size"] == 256 or file_info["size"] == 260:
                    config[cape_name].update({"filepath": [format(file_data)]})
                if file_info["size"] > 0x1000:
                    append_file = True
                else:
                    append_file = False
            if file_info["cape_type_code"] == SEDRECO_DATA:
                cape_name = "Sedreco"
                config[cape_name] = dict()
                config[cape_name]["cape_type"] = "Sedreco Config"
                if len(metastrings) > 4:
                    SedrecoConfigIndex = metastrings[4]
                    if SedrecoConfigIndex in sedreco_map:
                        ConfigItem = sedreco_map[SedrecoConfigIndex]
                    else:
                        ConfigItem = "Unknown"

                ConfigData = format(file_data)
                if ConfigData:
                    config[cape_name].update({ConfigItem: [ConfigData]})
                append_file = False

            if file_info["cape_type_code"] == CERBER_CONFIG:
                file_info["cape_type"] = "Cerber Config"
                cape_name = "Cerber"
                config[cape_name] = dict()
                config["cape_type"] = "Cerber Config"

                parsed = json.loads(file_data.rstrip(b"\0"))
                config[cape_name].update({"JSON Data": [json.dumps(parsed, indent=4, sort_keys=True)]})
                append_file = True

            if file_info["cape_type_code"] == URSNIF_PAYLOAD:
                cape_name = "Ursnif"
                config[cape_name] = dict()
                config[cape_name]["cape_type"] = "Ursnif Payload"

                file_info["cape_type"] = "Ursnif Payload"
            if file_info["cape_type_code"] == URSNIF_CONFIG:
                file_info["cape_type"] = "Ursnif Config"
                cape_name = "Ursnif"
                malwareconfig_loaded = False
                try:
                    malwareconfig_parsers = os.path.join(CUCKOO_ROOT, "modules", "processing", "parsers", "CAPE")
                    file, pathname, description = imp.find_module(cape_name, [malwareconfig_parsers])
                    module = imp.load_module(cape_name, file, pathname, description)
                    malwareconfig_loaded = True
                    log.debug("CAPE: Imported malwareconfig.com parser %s", cape_name)
                except ImportError:
                    log.debug("CAPE: malwareconfig.com parser: No module named %s", cape_name)
                if malwareconfig_loaded:
                    try:
                        malwareconfig_config = module.config(file_data)
                        if malwareconfig_config:
                            config[cape_name] = dict()
                            config[cape_name]["cape_type"] = "Ursnif Config"
                            if isinstance(malwareconfig_config, list):
                                for (key, value) in malwareconfig_config[0].items():
                                    config[cape_name].update({key: [value]})
                            elif isinstance(malwareconfig_config, dict):
                                for (key, value) in malwareconfig_config.items():
                                    config[cape_name].update({key: [value]})
                    except Exception as e:
                        log.error("CAPE: malwareconfig parsing error with %s: %s", cape_name, e)
                append_file = False
            # Hancitor
            if file_info["cape_type_code"] == HANCITOR_PAYLOAD:
                cape_name = "Hancitor"
                config[cape_name] = dict()
                config[cape_name]["cape_type"] = "Hancitor Payload"
                file_info["cape_type"] = "Hancitor Payload"
            if file_info["cape_type_code"] == HANCITOR_CONFIG:
                cape_name = "Hancitor"
                file_info["cape_type"] = "Hancitor Config"
                ConfigStrings = file_data.split(b"\0")
                ConfigStrings = [_f for _f in ConfigStrings if _f]
                ConfigItem = "Campaign Code"
                config[cape_name] = dict()
                config[cape_name]["cape_type"] = "Hancitor Config"
                config[cape_name].update({ConfigItem: [ConfigStrings[0]]})
                GateURLs = ConfigStrings[1].split(b"|")
                for index, value in enumerate(GateURLs):
                    ConfigItem = "Gate URL " + str(index + 1)
                    config[cape_name].update({ConfigItem: [value]})
                append_file = False
            # QakBot
            if file_info["cape_type_code"] == QAKBOT_CONFIG:
                file_info["cape_type"] = "QakBot Config"
                cape_name = "QakBot"
                config[cape_name] = dict()
                config[cape_name]["cape_type"] = "QakBot Config"
                config_tmp = static_config_parsers(cape_name, file_data)
                if config_tmp and config_tmp[cape_name]:
                    config.update(config_tmp)
                append_file = False
            # Attempt to decrypt script dump
            if file_info["cape_type_code"] == SCRIPT_DUMP:
                data = file_data.decode("utf-16").replace("\x00", "")
                cape_name = "ScriptDump"
                malwareconfig_loaded = False
                try:
                    malwareconfig_parsers = os.path.join(CUCKOO_ROOT, "modules", "processing", "parsers", "CAPE")
                    file, pathname, description = imp.find_module(cape_name, [malwareconfig_parsers])
                    module = imp.load_module(cape_name, file, pathname, description)
                    malwareconfig_loaded = True
                    log.debug("CAPE: Imported malwareconfig.com parser %s", cape_name)
                except ImportError:
                    log.debug("CAPE: malwareconfig.com parser: No module named %s", cape_name)
                if malwareconfig_loaded:
                    try:
                        script_data = module.config(self, data)
                        if script_data and "more_eggs" in script_data["type"]:
                            bindata = script_data["data"]
                            sha256 = hashlib.sha256(bindata).hexdigest()
                            filepath = os.path.join(self.CAPE_path, sha256)
                            tmpstr = file_info["pid"]
                            tmpstr += "," + file_info["process_path"]
                            tmpstr += "," + file_info["module_path"]
                            if "text" in script_data["datatype"]:
                                file_info["cape_type"] = "MoreEggsJS"
                                outstr = str(MOREEGGSJS_PAYLOAD) + "," + tmpstr + "\n"
                                with open(filepath + "_info.txt", "w") as infofd:
                                    infofd.write(outstr)
                                with open(filepath, "w") as cfile:
                                    cfile.write(bindata)
                            elif "binary" in script_data["datatype"]:
                                file_info["cape_type"] = "MoreEggsBin"
                                outstr = str(MOREEGGSBIN_PAYLOAD) + "," + tmpstr + "\n"
                                with open(filepath + "_info.txt", "w") as infofd:
                                    infofd.write(outstr)
                                with open(filepath, "wb") as cfile:
                                    cfile.write(bindata)
                            if os.path.exists(filepath):
                                self.script_dump_files.append(filepath)
                        else:
                            file_info["cape_type"] = "Script Dump"
                            log.info("CAPE: Script Dump does not contain known encrypted payload.")
                    except Exception as e:
                        log.error("CAPE: malwareconfig parsing error with %s: %s", cape_name, e)
                append_file = True

            # More_Eggs
            if file_info["cape_type_code"] == MOREEGGSJS_PAYLOAD:
                file_info["cape_type"] = "More Eggs JS Payload"
                cape_name = "MoreEggs"
                append_file = True

        # Process CAPE Yara hits
        for hit in file_info["cape_yara"]:
            # Check to see if file is packed with UPX
            if hit["name"] == "UPX":
                log.info("CAPE: Found UPX Packed sample - attempting to unpack")
                self.upx_unpack(file_data)

            # Check for a payload or config hit
            extraction_types = ("payload", "config", "loader")

            try:
                if any([file_type in hit["meta"].get("cape_type", "").lower() for file_type in extraction_types]):
                    file_info["cape_type"] = hit["meta"]["cape_type"]
                    cape_name = hit["name"].replace("_", " ")
            except Exception as e:
                print("Cape type error: {}".format(e))
            type_strings = file_info["type"].split()
            if "-bit" not in file_info["cape_type"]:
                if type_strings[0] in ("PE32+", "PE32"):
                    file_info["cape_type"] += pe_map[type_strings[0]]
                    if type_strings[2] == ("(DLL)"):
                        file_info["cape_type"] += "DLL"
                    else:
                        file_info["cape_type"] += "executable"

            suppress_parsing_list = ["Cerber", "Ursnif"]

            if hit["name"] == "GuLoader":
                self.detect2pid(file_info["pid"], "GuLoader")

            if hit["name"] in suppress_parsing_list:
                continue

            tmp_config = static_config_parsers(hit["name"].replace("_", " "), file_data)
            if tmp_config and tmp_config[hit["name"].replace("_", " ")]:
                config.update(tmp_config)

        if cape_name:
            if not "detections" in self.results:
                if cape_name != "UPX":
                    #ToDo list of keys
                    self.results["detections"] = cape_name
            if file_info.get("pid"):
                self.detect2pid(file_info["pid"], cape_name)

        # Remove duplicate payloads from web ui
        for cape_file in self.cape["payloads"] or []:
            if file_info["size"] == cape_file["size"]:
                if HAVE_PYDEEP:
                    ssdeep_grade = pydeep.compare(file_info["ssdeep"].encode("utf-8"), cape_file["ssdeep"].encode("utf-8"))
                    if ssdeep_grade >= ssdeep_threshold:
                        append_file = False
                if file_info.get("entrypoint") and file_info.get("ep_bytes") and cape_file.get("entrypoint"):
                    if (file_info.get("entrypoint")
                        and file_info["entrypoint"] == cape_file["entrypoint"]
                        and file_info["cape_type_code"] == cape_file["cape_type_code"]
                        and file_info["ep_bytes"] == cape_file["ep_bytes"]
                    ):
                        log.debug("CAPE duplicate output file skipped")
                        append_file = False

        if append_file is True:
            if HAVE_FLARE_CAPA:
                pretime = datetime.now()
                capa_details = flare_capa_details(file_path, "cape")
                if capa_details:
                    file_info["flare_capa"] = capa_details
                self.add_statistic_tmp("flare_capa", "time", pretime=pretime)
            self.cape["payloads"].append(file_info)

        if config and config not in self.cape["configs"]:
            self.cape["configs"].append(config)
示例#2
0
def static_file_info(
    data_dictionary: dict, file_path: str, task_id: str, package: str, options: str, destination_folder: str, results: dict
):

    if int(os.path.getsize(file_path) / (1024 * 1024)) > int(processing_conf.static.max_file_size):
        return

    if (
        not HAVE_OLETOOLS
        and "Zip archive data, at least v2.0" in data_dictionary["type"]
        and package in {"doc", "ppt", "xls", "pub"}
    ):
        log.info("Missed dependencies: pip3 install oletools")

    options_dict = get_options(options)

    if HAVE_PEFILE and ("PE32" in data_dictionary["type"] or "MS-DOS executable" in data_dictionary["type"]):
        data_dictionary["pe"] = PortableExecutable(file_path).run(task_id)

        if HAVE_FLARE_CAPA:
            capa_details = flare_capa_details(file_path, "static")
            if capa_details:
                data_dictionary["flare_capa"] = capa_details

        if HAVE_FLOSS:
            floss_strings = Floss(file_path, "static", "pe").run()
            if floss_strings:
                data_dictionary["floss"] = floss_strings

        if "Mono" in data_dictionary["type"]:
            data_dictionary["dotnet"] = DotNETExecutable(file_path).run()
    elif HAVE_OLETOOLS and package in {"doc", "ppt", "xls", "pub"}:
        # options is dict where we need to get pass get_options
        data_dictionary["office"] = Office(file_path, task_id, data_dictionary["sha256"], options_dict).run()
    elif "PDF" in data_dictionary["type"] or file_path.endswith(".pdf"):
        data_dictionary["pdf"] = PDF(file_path).run()
    elif package in {"wsf", "hta"} or data_dictionary["type"] == "XML document text" or file_path.endswith(".wsf"):
        data_dictionary["wsf"] = WindowsScriptFile(file_path).run()
    # elif package in {"js", "vbs"}:
    #    data_dictionary["js"] = EncodedScriptFile(file_path).run()
    elif package == "lnk" or "MS Windows shortcut" in data_dictionary["type"]:
        data_dictionary["lnk"] = LnkShortcut(file_path).run()
    elif "Java Jar" in data_dictionary["type"] or file_path.endswith(".jar"):
        if selfextract_conf.procyon.binary and not os.path.exists(selfextract_conf.procyon.binary):
            log.error("procyon_path specified in processing.conf but the file does not exist")
        else:
            data_dictionary["java"] = Java(file_path, selfextract_conf.procyon.binary).run()

    # It's possible to fool libmagic into thinking our 2007+ file is a zip.
    # So until we have static analysis for zip files, we can use oleid to fail us out silently,
    # yeilding no static analysis results for actual zip files.
    # elif "ELF" in data_dictionary["type"] or file_path.endswith(".elf"):
    #    data_dictionary["elf"] = ELF(file_path).run()
    #    data_dictionary["keys"] = f.get_keys()
    # elif HAVE_OLETOOLS and package == "hwp":
    #    data_dictionary["hwp"] = HwpDocument(file_path).run()

    with open(file_path, "rb") as f:
        is_text_file(data_dictionary, file_path, 8192, f.read())

    if processing_conf.trid.enabled:
        trid_info(file_path, data_dictionary)

    if processing_conf.die.enabled:
        detect_it_easy_info(file_path, data_dictionary)

    if HAVE_FLOSS and processing_conf.floss.enabled:
        floss_strings = Floss(file_path, package).run()
        if floss_strings:
            data_dictionary["floss"] = floss_strings

    if HAVE_STRINGS:
        strings = extract_strings(file_path)
        if strings:
            data_dictionary["strings"] = strings

    # ToDo we need url support
    if HAVE_VIRUSTOTAL and processing_conf.virustotal.enabled:
        vt_details = vt_lookup("file", file_path, results)
        if vt_details:
            data_dictionary["virustotal"] = vt_details

    generic_file_extractors(file_path, destination_folder, data_dictionary["type"], data_dictionary, options_dict, results)
示例#3
0
    def run(self):
        """Run analysis.
        @return: list of process dumps with related information.
        """
        self.key = "procdump"
        procdump_files = []
        buf = self.options.get("buffer", 8192)
        if not os.path.exists(self.procdump_path):
            return None

        meta = {}
        if os.path.exists(self.files_metadata):
            for line in open(self.files_metadata, "rb"):
                entry = json.loads(line)
                filepath = os.path.join(self.analysis_path, entry["path"])
                meta[filepath] = {
                    "pids": entry["pids"],
                    "filepath": entry["filepath"],
                    "metadata": entry["metadata"],
                }

        file_names = os.listdir(self.procdump_path)
        for file_name in file_names:
            file_path = os.path.join(self.procdump_path, file_name)
            if not meta.get(file_path):
                continue
            file_info, pefile_object = File(
                file_path=file_path,
                guest_paths=meta[file_path]["metadata"],
                file_name=file_name).get_all()
            if pefile_object:
                self.results.setdefault("pefiles", {})
                self.results["pefiles"].setdefault(file_info["sha256"],
                                                   pefile_object)
            metastrings = meta[file_path].get("metadata", "").split(";?")
            if len(metastrings) < 3:
                continue
            file_info["process_path"] = metastrings[1]
            file_info["module_path"] = metastrings[2]
            file_info["process_name"] = file_info["process_path"].rsplit(
                "\\", 1)[-1]
            file_info["pid"] = meta[file_path]["pids"][0]
            type_strings = file_info["type"].split()
            if len(type_strings) < 3:
                continue
            if type_strings[0] == "MS-DOS":
                file_info["cape_type"] = "DOS MZ image: executable"
            else:
                file_info["cape_type"] = "PE image"
                if type_strings[0] == ("PE32+"):
                    file_info["cape_type"] += ": 64-bit "
                elif type_strings[0] == ("PE32"):
                    file_info["cape_type"] += ": 32-bit "
                file_info["cape_type"] += "DLL" if type_strings[2] == (
                    "(DLL)") else "executable"

            texttypes = [
                "ASCII",
                "Windows Registry text",
                "XML document text",
                "Unicode text",
            ]
            if any(texttype in file_info["type"] for texttype in texttypes):
                with open(file_info["path"], "r") as drop_open:
                    filedata = drop_open.read(buf + 1)
                file_info["data"] = convert_to_printable_and_truncate(
                    filedata, buf)

            if file_info["pid"]:
                _ = cape_name_from_yara(file_info, file_info["pid"],
                                        self.results)

            if HAVE_FLARE_CAPA:
                pretime = timeit.default_timer()
                capa_details = flare_capa_details(file_path, "procdump")
                if capa_details:
                    file_info["flare_capa"] = capa_details
                self.add_statistic_tmp("flare_capa", "time", pretime)

            # should we use dropped path here?
            static_file_info(
                file_info,
                file_path,
                str(self.task["id"]),
                self.task.get("package", ""),
                self.task.get("options", ""),
                self.self_extracted,
                self.results,
            )

            procdump_files.append(file_info)

        return procdump_files
示例#4
0
    def process_file(self, file_path, append_file, metadata={}):
        """Process file.
        @return: file_info
        """

        config = {}
        cape_name = ""
        type_string = ""

        if not os.path.exists(file_path):
            return

        buf = self.options.get("buffer", BUFSIZE)
        file_info, pefile_object = File(file_path,
                                        metadata.get("metadata",
                                                     "")).get_all()
        if pefile_object:
            self.results.setdefault("pefiles", {})
            self.results["pefiles"].setdefault(file_info["sha256"],
                                               pefile_object)

        # Get the file data
        with open(file_info["path"], "rb") as file_open:
            file_data = file_open.read()

        if metadata.get("pids", False):
            if len(metadata["pids"]) == 1:
                file_info["pid"] = metadata["pids"][0]
            else:
                file_info["pid"] = ",".join(metadata["pids"])

        metastrings = metadata.get("metadata", "").split(";?")
        if len(metastrings) > 2:
            file_info["process_path"] = metastrings[1]
            file_info["process_name"] = metastrings[1].split("\\")[-1]
        if len(metastrings) > 3:
            file_info["module_path"] = metastrings[2]

        file_info["cape_type_code"] = 0
        file_info["cape_type"] = ""
        if metastrings and metastrings[0] and metastrings[0].isdigit():
            file_info["cape_type_code"] = int(metastrings[0])

            if file_info["cape_type_code"] == TYPE_STRING:
                if len(metastrings) > 4:
                    type_string = metastrings[3]

            if file_info["cape_type_code"] == COMPRESSION:
                file_info["cape_type"] = "Decompressed PE Image"

            if file_info["cape_type_code"] in inject_map:
                file_info["cape_type"] = inject_map[
                    file_info["cape_type_code"]]
                if len(metastrings) > 4:
                    file_info["target_path"] = metastrings[3]
                    file_info["target_process"] = metastrings[3].split(
                        "\\")[-1]
                    file_info["target_pid"] = metastrings[4]

            if file_info["cape_type_code"] in unpack_map:
                file_info["cape_type"] = unpack_map[
                    file_info["cape_type_code"]]
                if len(metastrings) > 4:
                    file_info["virtual_address"] = metastrings[3]

            type_strings = file_info["type"].split()

            if type_strings[0] in ("PE32+", "PE32"):
                file_info["cape_type"] += pe_map[type_strings[0]]
                if type_strings[2] == ("(DLL)"):
                    file_info["cape_type"] += "DLL"
                else:
                    file_info["cape_type"] += "executable"

            if file_info["cape_type_code"] in code_mapping:
                file_info["cape_type"] = code_mapping[
                    file_info["cape_type_code"]]
                type_strings = file_info["type"].split()
                if type_strings[0] in ("PE32+", "PE32"):
                    file_info["cape_type"] += pe_map[type_strings[0]]
                    if type_strings[2] == ("(DLL)"):
                        file_info["cape_type"] += "DLL"
                    else:
                        file_info["cape_type"] += "executable"
                if file_info["cape_type_code"] in name_mapping:
                    cape_name = name_mapping[file_info["cape_type_code"]]
                append_file = True
                """
                ConfigData = format(file_data)
                if ConfigData:
                    config[cape_name].update({ConfigItem: [ConfigData]})
                """
                append_file = False

            # PlugX
            if file_info["cape_type_code"] == PLUGX_CONFIG:
                file_info["cape_type"] = "PlugX Config"
                if plugx_parser:
                    plugx_config = plugx_parser.parse_config(
                        file_data, len(file_data))
                    if plugx_config:
                        cape_name = "PlugX"
                        config[cape_name] = dict()
                        for key, value in plugx_config.items():
                            config[cape_name].update({key: [value]})
                    else:
                        log.error(
                            "CAPE: PlugX config parsing failure - size many not be handled."
                        )
                    append_file = False

            # Attempt to decrypt script dump
            if file_info["cape_type_code"] == SCRIPT_DUMP:
                data = file_data.decode("utf-16").replace("\x00", "")
                cape_name = "ScriptDump"
                malwareconfig_loaded = False
                try:
                    malwareconfig_parsers = os.path.join(
                        CUCKOO_ROOT, "modules", "processing", "parsers",
                        "CAPE")
                    file, pathname, description = imp.find_module(
                        cape_name, [malwareconfig_parsers])
                    module = imp.load_module(cape_name, file, pathname,
                                             description)
                    malwareconfig_loaded = True
                    log.debug("CAPE: Imported malwareconfig.com parser %s",
                              cape_name)
                except ImportError:
                    log.debug(
                        "CAPE: malwareconfig.com parser: No module named %s",
                        cape_name)
                if malwareconfig_loaded:
                    try:
                        script_data = module.config(self, data)
                        if script_data and "more_eggs" in script_data["type"]:
                            bindata = script_data["data"]
                            sha256 = hashlib.sha256(bindata).hexdigest()
                            filepath = os.path.join(self.CAPE_path, sha256)
                            tmpstr = file_info["pid"]
                            tmpstr += "," + file_info["process_path"]
                            tmpstr += "," + file_info["module_path"]
                            if "text" in script_data["datatype"]:
                                file_info["cape_type"] = "MoreEggsJS"
                                outstr = str(
                                    MOREEGGSJS_PAYLOAD) + "," + tmpstr + "\n"
                                with open(filepath + "_info.txt",
                                          "w") as infofd:
                                    infofd.write(outstr)
                                with open(filepath, "w") as cfile:
                                    cfile.write(bindata)
                            elif "binary" in script_data["datatype"]:
                                file_info["cape_type"] = "MoreEggsBin"
                                outstr = str(
                                    MOREEGGSBIN_PAYLOAD) + "," + tmpstr + "\n"
                                with open(filepath + "_info.txt",
                                          "w") as infofd:
                                    infofd.write(outstr)
                                with open(filepath, "wb") as cfile:
                                    cfile.write(bindata)
                            if os.path.exists(filepath):
                                self.script_dump_files.append(filepath)
                        else:
                            file_info["cape_type"] = "Script Dump"
                            log.info(
                                "CAPE: Script Dump does not contain known encrypted payload."
                            )
                    except Exception as e:
                        log.error(
                            "CAPE: malwareconfig parsing error with %s: %s",
                            cape_name, e)
                append_file = True

            # More_Eggs
            if file_info["cape_type_code"] == MOREEGGSJS_PAYLOAD:
                file_info["cape_type"] = "More Eggs JS Payload"
                cape_name = "MoreEggs"
                append_file = True

        # Process CAPE Yara hits
        for hit in file_info["cape_yara"]:
            # Check to see if file is packed with UPX
            if hit["name"] == "UPX":
                log.info(
                    "CAPE: Found UPX Packed sample - attempting to unpack")
                self.upx_unpack(file_data)

            # Check for a payload or config hit
            extraction_types = ("payload", "config", "loader")

            try:
                if any([
                        file_type in hit["meta"].get("cape_type", "").lower()
                        for file_type in extraction_types
                ]):
                    file_info["cape_type"] = hit["meta"]["cape_type"]
                    cape_name = hit["name"].replace("_", " ")
            except Exception as e:
                print("Cape type error: {}".format(e))
            type_strings = file_info["type"].split()
            if "-bit" not in file_info["cape_type"]:
                if type_strings[0] in ("PE32+", "PE32"):
                    file_info["cape_type"] += pe_map[type_strings[0]]
                    if type_strings[2] == ("(DLL)"):
                        file_info["cape_type"] += "DLL"
                    else:
                        file_info["cape_type"] += "executable"

            if hit["name"] == "GuLoader":
                self.detect2pid(file_info["pid"], "GuLoader")

            tmp_config = static_config_parsers(hit["name"], file_data)
            if tmp_config and tmp_config[hit["name"].replace("_", " ")]:
                config.update(tmp_config)

        if type_string:
            log.info("CAPE: type_string: %s", type_string)
            tmp_config = static_config_parsers(
                type_string.split(" ")[0], file_data)
            if tmp_config:
                cape_name = type_string.split(" ")[0]
                log.info("CAPE: config returned for: %s", cape_name)
                config.update(tmp_config)

        if cape_name:
            if "detections" not in self.results:
                if cape_name != "UPX":
                    # ToDo list of keys
                    self.results["detections"] = cape_name
            if file_info.get("pid"):
                self.detect2pid(file_info["pid"], cape_name)

        # Remove duplicate payloads from web ui
        for cape_file in self.cape["payloads"] or []:
            if file_info["size"] == cape_file["size"]:
                if HAVE_PYDEEP:
                    ssdeep_grade = pydeep.compare(
                        file_info["ssdeep"].encode("utf-8"),
                        cape_file["ssdeep"].encode("utf-8"))
                    if ssdeep_grade >= ssdeep_threshold:
                        append_file = False
                if file_info.get("entrypoint") and file_info.get(
                        "ep_bytes") and cape_file.get("entrypoint"):
                    if (file_info.get("entrypoint") and file_info["entrypoint"]
                            == cape_file["entrypoint"]
                            and file_info["cape_type_code"]
                            == cape_file["cape_type_code"] and
                            file_info["ep_bytes"] == cape_file["ep_bytes"]):
                        log.debug("CAPE duplicate output file skipped")
                        append_file = False

        if append_file is True:
            if HAVE_FLARE_CAPA:
                pretime = datetime.now()
                capa_details = flare_capa_details(file_path, "cape")
                if capa_details:
                    file_info["flare_capa"] = capa_details
                self.add_statistic_tmp("flare_capa", "time", pretime=pretime)
            self.cape["payloads"].append(file_info)

        if config and config not in self.cape["configs"]:
            self.cape["configs"].append(config)
示例#5
0
    def process_file(self, file_path, append_file, metadata=None):
        """Process file.
        @return: file_info
        """

        if metadata is None:
            metadata = {}
        cape_name = ""
        type_string = ""

        if not os.path.exists(file_path):
            return

        file_info, pefile_object = File(file_path,
                                        metadata.get("metadata",
                                                     "")).get_all()
        cape_names = set()

        if pefile_object:
            self.results.setdefault("pefiles",
                                    {}).setdefault(file_info["sha256"],
                                                   pefile_object)

        if file_info.get("clamav") and processing_conf.detections.clamav:
            clamav_detection = get_clamav_consensus(file_info["clamav"])
            if clamav_detection:
                add_family_detection(self.results, clamav_detection, "ClamAV",
                                     file_info["sha256"])

        # should we use dropped path here?
        static_file_info(
            file_info,
            file_path,
            str(self.task["id"]),
            self.task.get("package", ""),
            self.task.get("options", ""),
            self.self_extracted,
            self.results,
        )

        # Get the file data
        with open(file_info["path"], "rb") as file_open:
            file_data = file_open.read()

        if metadata.get("pids", False):
            file_info["pid"] = metadata["pids"][0] if len(
                metadata["pids"]) == 1 else ",".join(metadata["pids"])

        metastrings = metadata.get("metadata", "").split(";?")
        if len(metastrings) > 2:
            file_info["process_path"] = metastrings[1]
            file_info["process_name"] = metastrings[1].rsplit("\\", 1)[-1]
        if len(metastrings) > 3:
            file_info["module_path"] = metastrings[2]

        file_info["cape_type_code"] = 0
        file_info["cape_type"] = ""
        if metastrings and metastrings[0] and metastrings[0].isdigit():
            file_info["cape_type_code"] = int(metastrings[0])

            if file_info["cape_type_code"] == TYPE_STRING:
                if len(metastrings) > 4:
                    type_string = metastrings[3]

            elif file_info["cape_type_code"] == COMPRESSION:
                file_info["cape_type"] = "Decompressed PE Image"

            elif file_info["cape_type_code"] in inject_map:
                file_info["cape_type"] = inject_map[
                    file_info["cape_type_code"]]
                if len(metastrings) > 4:
                    file_info["target_path"] = metastrings[3]
                    file_info["target_process"] = metastrings[3].rsplit(
                        "\\", 1)[-1]
                    file_info["target_pid"] = metastrings[4]

            elif file_info["cape_type_code"] in unpack_map:
                file_info["cape_type"] = unpack_map[
                    file_info["cape_type_code"]]
                if len(metastrings) > 4:
                    file_info["virtual_address"] = metastrings[3]

            type_strings = file_info["type"].split()

            if type_strings[0] in ("PE32+", "PE32"):
                file_info["cape_type"] += pe_map[type_strings[0]]
                if type_strings[2] == ("(DLL)"):
                    file_info["cape_type"] += "DLL"
                else:
                    file_info["cape_type"] += "executable"

            if file_info["cape_type_code"] in code_mapping:
                file_info["cape_type"] = code_mapping[
                    file_info["cape_type_code"]]
                type_strings = file_info["type"].split()
                if type_strings[0] in ("PE32+", "PE32"):
                    file_info["cape_type"] += pe_map[type_strings[0]]
                    if type_strings[2] == ("(DLL)"):
                        file_info["cape_type"] += "DLL"
                    else:
                        file_info["cape_type"] += "executable"
                if file_info["cape_type_code"] in name_mapping:
                    cape_name = name_mapping[file_info["cape_type_code"]]
                append_file = True

            # PlugX
            elif file_info["cape_type_code"] == PLUGX_CONFIG:
                file_info["cape_type"] = "PlugX Config"
                if plugx_parser:
                    plugx_config = plugx_parser.parse_config(
                        file_data, len(file_data))
                    if plugx_config:
                        cape_name = "PlugX"
                        self.update_cape_configs(cape_name, plugx_config)
                        cape_names.add(cape_name)
                    else:
                        log.error(
                            "CAPE: PlugX config parsing failure - size many not be handled"
                        )
                    append_file = False

            # Attempt to decrypt script dump
            elif file_info["cape_type_code"] == SCRIPT_DUMP:
                data = file_data.decode("utf-16").replace("\x00", "")
                cape_name = "ScriptDump"
                malwareconfig_loaded = False
                try:
                    malwareconfig_parsers = os.path.join(
                        CUCKOO_ROOT, "modules", "processing", "parsers",
                        "CAPE")
                    file, pathname, description = imp.find_module(
                        cape_name, [malwareconfig_parsers])
                    module = imp.load_module(cape_name, file, pathname,
                                             description)
                    malwareconfig_loaded = True
                    log.debug("CAPE: Imported parser %s", cape_name)
                except ImportError:
                    log.debug("CAPE: parser: No module named %s", cape_name)
                if malwareconfig_loaded:
                    try:
                        script_data = module.config(self, data)
                        if script_data and "more_eggs" in script_data["type"]:
                            bindata = script_data["data"]
                            sha256 = hashlib.sha256(bindata).hexdigest()
                            filepath = os.path.join(self.CAPE_path, sha256)
                            if "text" in script_data["datatype"]:
                                file_info["cape_type"] = "MoreEggsJS"
                            elif "binary" in script_data["datatype"]:
                                file_info["cape_type"] = "MoreEggsBin"
                            with open(filepath, "w") as cfile:
                                cfile.write(bindata)
                                self.script_dump_files.append(filepath)
                        else:
                            file_info["cape_type"] = "Script Dump"
                            log.info(
                                "CAPE: Script Dump does not contain known encrypted payload"
                            )
                    except Exception as e:
                        log.error(
                            "CAPE: malwareconfig parsing error with %s: %s",
                            cape_name, e)
                append_file = True

            # More_Eggs
            elif file_info["cape_type_code"] == MOREEGGSJS_PAYLOAD:
                file_info["cape_type"] = "More Eggs JS Payload"
                cape_name = "MoreEggs"
                append_file = True

        # Process CAPE Yara hits

        # Prefilter extracted data + beauty is better than oneliner:
        all_files = []
        for extracted_file in file_info.get("extracted_files", []):
            yara_hits = extracted_file["cape_yara"]
            if not yara_hits:
                continue
            if extracted_file.get("data", b""):
                extracted_file_data = make_bytes(extracted_file["data"])
            else:
                with open(extracted_file["path"], "rb") as fil:
                    extracted_file_data = fil.read()
            for yara in yara_hits:
                all_files.append((
                    f"[{extracted_file.get('sha256', '')}]{file_info['path']}",
                    extracted_file_data,
                    yara,
                ))

        for yara in file_info["cape_yara"]:
            all_files.append((file_info["path"], file_data, yara))

        executed_config_parsers = collections.defaultdict(set)
        for tmp_path, tmp_data, hit in all_files:
            # Check for a payload or config hit
            try:
                if File.yara_hit_provides_detection(hit):
                    file_info["cape_type"] = hit["meta"]["cape_type"]
                    cape_name = File.get_cape_name_from_yara_hit(hit)
                    cape_names.add(cape_name)
            except Exception as e:
                print(f"Cape type error: {e}")
            type_strings = file_info["type"].split()
            if "-bit" not in file_info["cape_type"]:
                if type_strings[0] in ("PE32+", "PE32"):
                    file_info["cape_type"] += pe_map[type_strings[0]]
                    file_info["cape_type"] += "DLL" if type_strings[2] == (
                        "(DLL)") else "executable"

            if cape_name and cape_name not in executed_config_parsers[tmp_path]:
                tmp_config = static_config_parsers(cape_name, tmp_path,
                                                   tmp_data)
                self.update_cape_configs(cape_name, tmp_config)
                executed_config_parsers[tmp_path].add(cape_name)

        if type_string:
            log.info("CAPE: type_string: %s", type_string)
            tmp_cape_name = File.get_cape_name_from_cape_type(type_string)
            if tmp_cape_name and tmp_cape_name not in executed_config_parsers:
                tmp_config = static_config_parsers(tmp_cape_name,
                                                   file_info["path"],
                                                   file_data)
                if tmp_config:
                    cape_name = tmp_cape_name
                    cape_names.add(cape_name)
                    log.info("CAPE: config returned for: %s", cape_name)
                    self.update_cape_configs(cape_name, tmp_config)

        self.add_family_detections(file_info, cape_names)

        # Remove duplicate payloads from web ui
        for cape_file in self.cape["payloads"] or []:
            if file_info["size"] == cape_file["size"]:
                if HAVE_PYDEEP:
                    ssdeep_grade = pydeep.compare(file_info["ssdeep"].encode(),
                                                  cape_file["ssdeep"].encode())
                    if ssdeep_grade >= ssdeep_threshold:
                        log.debug(
                            "CAPE duplicate output file skipped: ssdeep grade %d, threshold %d",
                            ssdeep_grade, ssdeep_threshold)
                        append_file = False
                if file_info.get("entrypoint") and file_info.get(
                        "ep_bytes") and cape_file.get("entrypoint"):
                    if (file_info["entrypoint"] == cape_file["entrypoint"]
                            and file_info["cape_type_code"]
                            == cape_file["cape_type_code"] and
                            file_info["ep_bytes"] == cape_file["ep_bytes"]):
                        log.debug(
                            "CAPE duplicate output file skipped: matching entrypoint"
                        )
                        append_file = False

        if append_file:
            if HAVE_FLARE_CAPA:
                pretime = timeit.default_timer()
                capa_details = flare_capa_details(file_path, "cape")
                if capa_details:
                    file_info["flare_capa"] = capa_details
                self.add_statistic_tmp("flare_capa", "time", pretime=pretime)
            self.cape["payloads"].append(file_info)
示例#6
0
文件: procdump.py 项目: gle2/CAPEv2
    def run(self):
        """Run analysis.
        @return: list of process dumps with related information.
        """
        self.key = "procdump"
        procdump_files = []
        buf = self.options.get("buffer", 8192)
        if not os.path.exists(self.procdump_path):
            return None

        meta = dict()
        if os.path.exists(self.files_metadata):
            for line in open(self.files_metadata, "rb"):
                entry = json.loads(line)
                filepath = os.path.join(self.analysis_path, entry["path"])
                meta[filepath] = {
                    "pids": entry["pids"],
                    "filepath": entry["filepath"],
                    "metadata": entry["metadata"],
                }

        file_names = os.listdir(self.procdump_path)
        for file_name in file_names:
            file_path = os.path.join(self.procdump_path, file_name)
            if not meta.get(file_path):
                continue
            file_info, pefile_object = File(
                file_path=file_path,
                guest_paths=meta[file_path]["metadata"],
                file_name=file_name).get_all()
            if pefile_object:
                self.results.setdefault("pefiles", {})
                self.results["pefiles"].setdefault(file_info["sha256"],
                                                   pefile_object)
            metastrings = meta[file_path].get("metadata", "").split(";?")
            if len(metastrings) < 3:
                continue
            file_info["process_path"] = metastrings[1]
            file_info["module_path"] = metastrings[2]
            file_info["process_name"] = file_info["process_path"].split(
                "\\")[-1]
            file_info["pid"] = meta[file_path]["pids"][0]
            type_strings = file_info["type"].split()
            if len(type_strings) < 3:
                continue
            if type_strings[0] == "MS-DOS":
                file_info["cape_type"] = "DOS MZ image: executable"
            else:
                file_info["cape_type"] = "PE image"
                if type_strings[0] == ("PE32+"):
                    file_info["cape_type"] += ": 64-bit "
                elif type_strings[0] == ("PE32"):
                    file_info["cape_type"] += ": 32-bit "
                if type_strings[2] == ("(DLL)"):
                    file_info["cape_type"] += "DLL"
                else:
                    file_info["cape_type"] += "executable"
            texttypes = [
                "ASCII",
                "Windows Registry text",
                "XML document text",
                "Unicode text",
            ]
            readit = False
            for texttype in texttypes:
                if texttype in file_info["type"]:
                    readit = True
                    break
            if readit:
                with open(file_info["path"], "r") as drop_open:
                    filedata = drop_open.read(buf + 1)
                if len(filedata) > buf:
                    file_info["data"] = convert_to_printable(filedata[:buf] +
                                                             " <truncated>")
                else:
                    file_info["data"] = convert_to_printable(filedata)

            if HAVE_FLARE_CAPA:
                pretime = datetime.now()
                capa_details = flare_capa_details(file_path, "procdump")
                if capa_details:
                    file_info["flare_capa"] = capa_details
                self.add_statistic_tmp("flare_capa", "time", pretime)

            procdump_files.append(file_info)

        return procdump_files