Exemplo n.º 1
0
    def handle_artefacts(artefact_list: list,
                         request: ServiceRequest) -> ResultSection:
        """
        Goes through each artefact in artefact_list, uploading them and adding result sections accordingly

        Positional arguments:
        artefact_list -- list of dictionaries that each represent an artefact
        """

        validated_artefacts = SandboxOntology._validate_artefacts(
            artefact_list)

        artefacts_result_section = ResultSection("Sandbox Artefacts")

        for artefact in validated_artefacts:
            SandboxOntology._handle_artefact(artefact,
                                             artefacts_result_section)

            if artefact.to_be_extracted:
                try:
                    request.add_extracted(artefact.path, artefact.name,
                                          artefact.description)
                except MaxExtractedExceeded:
                    # To avoid errors from being raised when too many files have been extracted
                    pass
            else:
                request.add_supplementary(artefact.path, artefact.name,
                                          artefact.description)

        return artefacts_result_section if artefacts_result_section.subsections else None
Exemplo n.º 2
0
    def _attach_service_meta_ontology(self, request: ServiceRequest) -> None:

        heuristics = helper.get_heuristics()

        def preprocess_result_for_dump(sections, current_max, heur_tag_map,
                                       tag_map):
            for section in sections:
                # Determine max classification of the overall result
                current_max = forge.get_classification().max_classification(
                    section.classification, current_max)

                # Cleanup invalid tagging from service results
                def validate_tags(tag_map):
                    tag_map, _ = construct_safe(Tagging, unflatten(tag_map))
                    tag_map = flatten(tag_map.as_primitives(strip_null=True))
                    return tag_map

                # Merge tags
                def merge_tags(tag_a, tag_b):
                    if not tag_a:
                        return tag_b

                    elif not tag_b:
                        return tag_a

                    all_keys = list(tag_a.keys()) + list(tag_b.keys())
                    return {
                        key: list(set(tag_a.get(key, []) + tag_b.get(key, [])))
                        for key in all_keys
                    }

                # Append tags raised by the service, if any
                section_tags = validate_tags(section.tags)
                if section_tags:
                    tag_map.update(section_tags)

                # Append tags associated to heuristics raised by the service, if any
                if section.heuristic:
                    heur = heuristics[section.heuristic.heur_id]
                    key = f'{self.name.upper()}_{heur.heur_id}'
                    update_value = {"name": heur.name, "tags": {}}
                    if section_tags:
                        update_value = \
                            {
                                "name": heur.name,
                                "tags": merge_tags(heur_tag_map[key]["tags"], section_tags)
                            }
                    heur_tag_map[key].update(update_value)

                # Recurse through subsections
                if section.subsections:
                    current_max, heur_tag_map, tag_map = preprocess_result_for_dump(
                        section.subsections, current_max, heur_tag_map,
                        tag_map)

            return current_max, heur_tag_map, tag_map

        if not request.result or not request.result.sections:
            # No service results, therefore no ontological output
            return

        max_result_classification, heur_tag_map, tag_map = preprocess_result_for_dump(
            request.result.sections,
            request.task.service_default_result_classification,
            defaultdict(lambda: {"tags": dict()}), defaultdict(list))

        if not tag_map and not self.ontologies:
            # No tagging or ontologies found, therefore informational results
            return

        ontology = {
            'header': {
                'md5': request.md5,
                'sha1': request.sha1,
                'sha256': request.sha256,
                'type': request.file_type,
                'size': request.file_size,
                'classification': max_result_classification,
                'service_name': request.task.service_name,
                'service_version': request.task.service_version,
                'service_tool_version': request.task.service_tool_version,
                'tags': tag_map,
                'heuristics': heur_tag_map
            }
        }
        # Include Ontological data
        ontology.update(
            {type.lower(): data
             for type, data in self.ontologies.items()})

        ontology_suffix = f"{request.sha256}.ontology"
        ontology_path = os.path.join(self.working_directory, ontology_suffix)
        try:
            open(ontology_path, 'w').write(
                json.dumps(
                    ResultOntology(ontology).as_primitives(strip_null=True)))
            attachment_name = f'{request.task.service_name}_{ontology_suffix}'.lower(
            )
            request.add_supplementary(
                path=ontology_path,
                name=attachment_name,
                description=f"Result Ontology from {request.task.service_name}",
                classification=max_result_classification)
        except ValueError as e:
            self.log.error(f"Problem with generating ontology: {e}")
    def execute(self, request: ServiceRequest) -> None:
        self.result = Result()
        request.result = self.result

        self.ip_list = []
        self.url_list = []
        self.found_powershell = False
        self.file_hashes = []

        vmonkey_err = False
        actions: List[str] = []
        external_functions: List[str] = []
        tmp_iocs: List[str] = []
        output_results: Dict[str, Any] = {}
        potential_base64: Set[str] = set()

        # Running ViperMonkey
        try:
            file_contents = request.file_contents
            input_file: str = request.file_path
            input_file_obj: Optional[IO] = None
            # Typical start to XML files
            if not file_contents.startswith(
                    b"<?") and request.file_type == "code/xml":
                # Default encoding/decoding if BOM not found
                encoding: Optional[str] = None
                decoding: Optional[str] = None
                # Remove potential BOMs from contents
                if file_contents.startswith(BOM_UTF8):
                    encoding = "utf-8"
                    decoding = "utf-8-sig"
                elif file_contents.startswith(BOM_UTF16):
                    encoding = "utf-16"
                    decoding = "utf-16"
                if encoding and decoding:
                    input_file_obj = tempfile.NamedTemporaryFile(
                        "w+", encoding=encoding)
                    input_file_obj.write(
                        file_contents.decode(decoding, errors="ignore"))
                    input_file = input_file_obj.name
                else:
                    # If the file_type was detected as XML, it's probably buried within but not actually an XML file
                    # Give no response as ViperMonkey can't process this kind of file
                    return
            cmd = " ".join([
                PYTHON2_INTERPRETER,
                os.path.join(os.path.dirname(__file__),
                             "vipermonkey_compat.py2"),
                input_file,
                self.working_directory,
            ])
            p = subprocess.run(cmd, capture_output=True, shell=True)
            stdout = p.stdout

            # Close file
            if input_file_obj and os.path.exists(input_file_obj.name):
                input_file_obj.close()

            # Add artifacts
            artifact_dir = os.path.join(
                self.working_directory,
                os.path.basename(input_file) + "_artifacts")
            if os.path.exists(artifact_dir):
                for file in os.listdir(artifact_dir):
                    try:
                        file_path = os.path.join(artifact_dir, file)
                        if os.path.isfile(file_path) and os.path.getsize(
                                file_path):
                            request.add_extracted(
                                file_path, file,
                                "File extracted by ViperMonkey during analysis"
                            )
                    except os.error as e:
                        self.log.warning(e)

            # Read output
            if stdout:
                for line in stdout.splitlines():
                    if line.startswith(b"{") and line.endswith(b"}"):
                        try:
                            output_results = json.loads(line)
                        except UnicodeDecodeError:
                            output_results = json.loads(
                                line.decode("utf-8", "replace"))
                        break

                # Checking for tuple in case vmonkey return is None
                # If no macros found, return is [][][], if error, return is None
                # vmonkey_err can still happen if return is [][][], log as warning instead of error
                if isinstance(output_results.get("vmonkey_values"), dict):
                    """
                    Structure of variable "actions" is as follows:
                    [action, parameters, description]
                    action: 'Found Entry Point', 'Execute Command', etc...
                    parameters: Parameters for function
                    description: 'Shell Function', etc...

                    external_functions is a list of built-in VBA functions
                    that were called
                    """
                    actions = output_results["vmonkey_values"]["actions"]
                    external_functions = output_results["vmonkey_values"][
                        "external_funcs"]
                    tmp_iocs = output_results["vmonkey_values"]["tmp_iocs"]
                    if output_results["vmonkey_err"]:
                        vmonkey_err = True
                        self.log.warning(output_results["vmonkey_err"])
                else:
                    vmonkey_err = True
            else:
                vmonkey_err = True

        except Exception:
            self.log.exception(
                f"Vipermonkey failed to analyze file {request.sha256}")

        if actions:
            # Creating action section
            action_section = ResultSection("Recorded Actions:",
                                           parent=self.result)
            action_section.add_tag("technique.macro", "Contains VBA Macro(s)")
            sub_action_sections: Dict[str, ResultSection] = {}
            for action, parameters, description in actions:  # Creating action sub-sections for each action
                if not description:  # For actions with no description, just use the type of action
                    description = action

                if description not in sub_action_sections:
                    # Action's description will be the sub-section name
                    sub_action_section = ResultSection(description,
                                                       parent=action_section)
                    sub_action_sections[description] = sub_action_section
                    if description == "Shell function":
                        sub_action_section.set_heuristic(2)
                else:
                    # Reuse existing section
                    sub_action_section = sub_action_sections[description]
                    if sub_action_section.heuristic:
                        sub_action_section.heuristic.increment_frequency()

                # Parameters are sometimes stored as a list, account for this
                if isinstance(parameters, list):
                    for item in parameters:
                        # Parameters includes more than strings (booleans for example)
                        if isinstance(item, str):
                            # Check for PowerShell
                            self.extract_powershell(item, sub_action_section,
                                                    request)
                    # Join list items into single string
                    param = ", ".join(str(p) for p in parameters)

                else:
                    param = parameters
                    # Parameters includes more than strings (booleans for example)
                    if isinstance(param, str):
                        self.extract_powershell(param, sub_action_section,
                                                request)

                # If the description field was empty, re-organize result section for this case
                if description == action:
                    sub_action_section.add_line(param)
                else:
                    sub_action_section.add_line(
                        f"Action: {action}, Parameters: {param}")

                # Check later for base64
                potential_base64.add(param)

                # Add urls/ips found in parameter to respective lists
                self.find_ip(param)
        # Check tmp_iocs
        res_temp_iocs = ResultSection("Runtime temporary IOCs")
        for ioc in tmp_iocs:
            self.extract_powershell(ioc, res_temp_iocs, request)
            potential_base64.add(ioc)
            self.find_ip(ioc)

        if len(res_temp_iocs.subsections) != 0 or res_temp_iocs.body:
            self.result.add_section(res_temp_iocs)

        # Add PowerShell score/tag if found
        if self.found_powershell:
            ResultSection("Discovered PowerShell code in file",
                          parent=self.result,
                          heuristic=Heuristic(3))

        # Check parameters and temp_iocs for base64
        base64_section = ResultSection("Possible Base64 found",
                                       heuristic=Heuristic(5, frequency=0))
        for param in potential_base64:
            self.check_for_b64(param, base64_section, request,
                               request.file_contents)
        if base64_section.body:
            self.result.add_section(base64_section)

        # Add url/ip tags
        self.add_ip_tags()

        # Create section for built-in VBA functions called
        if len(external_functions) > 0:
            external_func_section = ResultSection(
                "VBA functions called",
                body_format=BODY_FORMAT.MEMORY_DUMP,
                parent=self.result)
            for func in external_functions:
                if func in vba_builtins:
                    external_func_section.add_line(func + ": " +
                                                   vba_builtins[func])
                else:
                    external_func_section.add_line(func)

        # Add vmonkey log as a supplemental file if we have results
        if "stdout" in output_results and (vmonkey_err
                                           or request.result.sections):
            temp_log_copy = os.path.join(
                tempfile.gettempdir(), f"{request.sid}_vipermonkey_output.log")
            with open(temp_log_copy, "w") as temp_log_file:
                temp_log_file.write(output_results["stdout"])

            request.add_supplementary(temp_log_copy, "vipermonkey_output.log",
                                      "ViperMonkey log output")
            if vmonkey_err is True:
                ResultSection(
                    'ViperMonkey has encountered an error, please check "vipermonkey_output.log"',
                    parent=self.result,
                    heuristic=Heuristic(1),
                )
Exemplo n.º 4
0
    def execute(self, request: ServiceRequest) -> None:
        request.result = Result()

        # 1. Calculate entropy map
        with open(request.file_path, "rb") as fin:
            (entropy, part_entropies) = calculate_partition_entropy(fin)

        entropy_graph_data = {"type": "colormap", "data": {"domain": [0, 8], "values": part_entropies}}

        ResultSection(
            f"File entropy: {round(entropy, 3)}",
            parent=request.result,
            body_format=BODY_FORMAT.GRAPH_DATA,
            body=json.dumps(entropy_graph_data, allow_nan=False),
        )

        if request.file_type != "shortcut/windows":
            # 2. Get hachoir metadata
            parser = createParser(request.file_path)
            if parser is not None:
                with parser:
                    parser_tags = parser.getParserTags()
                    parser_id = parser_tags.get("id", "unknown")

                    # Do basic metadata extraction
                    metadata = extractMetadata(parser, 1)

                    if metadata:
                        kv_body: Dict[str, Union[str, List[str]]] = {}
                        tags: List[Tuple[str, str]] = []
                        for m in metadata:
                            if m.key == "comment":
                                for v in m.values:
                                    key, val = get_type_val(v.text, "comment")
                                    if not val:
                                        continue

                                    kv_body[key] = val

                                    tag_type = TAG_MAP.get(parser_id, {}).get(key, None) or TAG_MAP.get(None, {}).get(
                                        key, None
                                    )
                                    if tag_type is not None:
                                        tags.append((tag_type, val))
                            elif m.key in ["mime_type"]:
                                pass
                            else:
                                values = [v.text for v in m.values]
                                if len(values) == 1 and values[0]:
                                    kv_body[m.key] = values[0]
                                elif values:
                                    kv_body[m.key] = values

                                for v in values:
                                    tag_type = TAG_MAP.get(parser_id, {}).get(m.key, None) or TAG_MAP.get(None, {}).get(
                                        m.key, None
                                    )
                                    if tag_type is not None:
                                        tags.append((tag_type, v))

                        if kv_body:
                            res = ResultSection(
                                f"Metadata extracted by hachoir-metadata [Parser: {parser_id}]",
                                body=json.dumps(kv_body, allow_nan=False),
                                body_format=BODY_FORMAT.KEY_VALUE,
                                parent=request.result,
                            )

                            for t_type, t_val in tags:
                                res.add_tag(t_type, t_val)

        # 3. Get Exiftool Metadata
        exif = subprocess.run(["exiftool", "-j", request.file_path], capture_output=True, check=False)
        if exif.stdout:
            exif_data = json.loads(exif.stdout.decode("utf-8", errors="ignore"))
            res_data = exif_data[0]
            if "Error" not in res_data:
                exif_body = {}
                for k, v in res_data.items():
                    if v and k not in [
                        "SourceFile",
                        "ExifToolVersion",
                        "FileName",
                        "Directory",
                        "FileSize",
                        "FileModifyDate",
                        "FileAccessDate",
                        "FileInodeChangeDate",
                        "FilePermissions",
                        "FileType",
                        "FileTypeExtension",
                        "MIMEType",
                        "Warning",
                    ]:
                        if v in [float("inf"), -float("inf"), float("nan")]:
                            exif = subprocess.run(
                                ["exiftool", f"-{k}", "-T", request.file_path], capture_output=True, check=False
                            )
                            v = exif.stdout.decode("utf-8", errors="ignore").strip()
                        exif_body[build_key(k)] = v
                if exif_body:
                    e_res = ResultSection(
                        "Metadata extracted by ExifTool",
                        body=json.dumps(exif_body, allow_nan=False),
                        body_format=BODY_FORMAT.KEY_VALUE,
                        parent=request.result,
                    )
                    for k, v in exif_body.items():
                        tag_type = TAG_MAP.get(res_data.get("FileTypeExtension", "UNK").upper(), {}).get(
                            k, None
                        ) or TAG_MAP.get(None, {}).get(k, None)
                        if tag_type:
                            e_res.add_tag(tag_type, v)

        # 4. Lnk management.
        if request.file_type == "shortcut/windows":
            with open(request.file_path, "rb") as indata:
                lnk = LnkParse3.lnk_file(indata)

            features = lnk.get_json(get_all=True)

            lnk_result_section = ResultSection(
                "Extra metadata extracted by LnkParse3",
                parent=request.result,
            )

            heur_1_items = {}
            risky_executable = ["rundll32.exe", "powershell.exe", "cmd.exe", "mshta.exe"]

            if "command_line_arguments" in features["data"]:
                if any(x in features["data"]["command_line_arguments"].lower() for x in risky_executable):
                    heur_1_items["command_line_arguments"] = features["data"]["command_line_arguments"]
                elif " && " in features["data"]["command_line_arguments"]:
                    heur_1_items["command_line_arguments"] = features["data"]["command_line_arguments"]

            lbp = ""
            if "local_base_path" in features["link_info"]:
                lbp = features["link_info"]["local_base_path"]
                if "common_path_suffix" in features["link_info"]:
                    lbp = f"{lbp}{features['link_info']['common_path_suffix']}"
                if any(x in lbp.lower() for x in risky_executable):
                    heur_1_items["local_base_path"] = features["link_info"]["local_base_path"]

            if "relative_path" in features["data"]:
                if any(x in features["data"]["relative_path"].lower() for x in risky_executable):
                    heur_1_items["relative_path"] = features["data"]["relative_path"]

            target = ""
            if "target" in features:
                import ntpath

                if "items" in features["target"]:
                    last_item = None
                    for item in features["target"]["items"]:
                        if "primary_name" in item:
                            last_item = item
                            target = ntpath.join(target, item["primary_name"])

                    if last_item and last_item["flags"] == "Is directory":
                        target = ""

                    if any(x in target.lower() for x in risky_executable):
                        heur_1_items["target_file_dosname"] = target

            if "icon_location" in features["data"]:
                deceptive_icons = ["wordpad.exe", "shell32.dll"]

                lnk_result_section.add_tag(
                    tag_type="file.shortcut.icon_location", value=features["data"]["icon_location"]
                )
                if any(
                    features["data"]["icon_location"].lower().strip('"').strip("'").endswith(x) for x in deceptive_icons
                ):
                    heur = Heuristic(4)
                    heur_section = ResultKeyValueSection(heur.name, heuristic=heur, parent=lnk_result_section)
                    heur_section.set_item("icon_location", features["data"]["icon_location"])

            timestamps = []
            if features["header"]["creation_time"]:
                timestamps.append(("creation_time", features["header"]["creation_time"]))
            if features["header"]["modified_time"]:
                timestamps.append(("modified_time", features["header"]["modified_time"]))

            if request.task.depth != 0:
                heur2_earliest_ts = datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(
                    days=self.config.get("heur2_flag_more_recent_than_days", 3)
                )
                heur2_latest_ts = datetime.datetime.now(datetime.timezone.utc) + datetime.timedelta(days=2)
                recent_timestamps = []
                future_timestamps = []
                for k, timestamp in timestamps:
                    if timestamp < heur2_earliest_ts:
                        continue
                    if timestamp > heur2_latest_ts:
                        future_timestamps.append((k, timestamp))
                        continue
                    recent_timestamps.append((k, timestamp))

                if recent_timestamps:
                    heur = Heuristic(2)
                    heur_section = ResultKeyValueSection(heur.name, heuristic=heur, parent=lnk_result_section)
                    for k, timestamp in recent_timestamps:
                        heur_section.set_item(k, timestamp.isoformat())
                if future_timestamps:
                    heur = Heuristic(3)
                    heur_section = ResultKeyValueSection(heur.name, heuristic=heur, parent=lnk_result_section)
                    for k, timestamp in future_timestamps:
                        heur_section.set_item(k, timestamp.isoformat())

            if "DISTRIBUTED_LINK_TRACKER_BLOCK" in features["extra"]:
                if "machine_identifier" in features["extra"]["DISTRIBUTED_LINK_TRACKER_BLOCK"]:
                    machine_id = features["extra"]["DISTRIBUTED_LINK_TRACKER_BLOCK"]["machine_identifier"]
                    lnk_result_section.add_tag("file.shortcut.machine_id", machine_id)
                    if machine_id.lower().startswith("desktop-"):
                        heur = Heuristic(5)
                        heur_section = ResultKeyValueSection(heur.name, heuristic=heur, parent=lnk_result_section)
                        heur_section.set_item("machine_identifier", machine_id)
                if "droid_file_identifier" in features["extra"]["DISTRIBUTED_LINK_TRACKER_BLOCK"]:
                    mac = features["extra"]["DISTRIBUTED_LINK_TRACKER_BLOCK"]["droid_file_identifier"][-12:]
                    mac = ":".join(a + b for a, b in zip(mac[::2], mac[1::2]))
                    lnk_result_section.add_tag("file.shortcut.tracker_mac", mac)
                elif "birth_droid_file_identifier" in features["extra"]["DISTRIBUTED_LINK_TRACKER_BLOCK"]:
                    mac = features["extra"]["DISTRIBUTED_LINK_TRACKER_BLOCK"]["birth_droid_file_identifier"][-12:]
                    mac = ":".join(a + b for a, b in zip(mac[::2], mac[1::2]))
                    lnk_result_section.add_tag("file.shortcut.tracker_mac", mac)

            # Adapted code from previous logic. May be best replaced by new heuristics and logic.
            bp = str(lbp).strip()
            rp = str(features["data"].get("relative_path", "")).strip()
            nn = str(features["data"].get("net_name", "")).strip()
            t = str(target).strip().rsplit("\\")[-1].strip()
            cla = str(features["data"].get("command_line_arguments", "")).strip()

            filename_extracted = (bp or rp or t or nn).rsplit("\\")[-1].strip()
            if filename_extracted:
                lnk_result_section.add_tag(tag_type="file.name.extracted", value=(bp or rp or t or nn).rsplit("\\")[-1])

            process_cmdline = f"{(rp or bp or t or nn)} {cla}".strip()
            if process_cmdline:
                lnk_result_section.add_tag(tag_type="file.shortcut.command_line", value=process_cmdline)

            cmd_code = None
            if filename_extracted in ["cmd", "cmd.exe"]:
                cmd_code = (get_cmd_command(f"{filename_extracted} {cla}".encode()), "bat")
                if "rundll32 " in cla:  # We are already checking for rundll32.exe as part of risky_executable
                    heur_1_items["command_line_arguments"] = features["data"]["command_line_arguments"]
            elif filename_extracted in ["powershell", "powershell.exe"]:
                cmd_code = (get_powershell_command(f"{filename_extracted} {cla}".encode()), "ps1")

            if heur_1_items:
                heur = Heuristic(1)
                heur_section = ResultKeyValueSection(heur.name, heuristic=heur, parent=lnk_result_section)
                heur_section.update_items(heur_1_items)

            if cmd_code:
                sha256hash = hashlib.sha256(cmd_code[0]).hexdigest()
                cmd_filename = f"{sha256hash[0:10]}.{cmd_code[1]}"
                cmd_file_path = os.path.join(self.working_directory, cmd_filename)
                with open(cmd_file_path, "wb") as cmd_f:
                    cmd_f.write(cmd_code[0])
                request.add_extracted(
                    cmd_file_path,
                    cmd_filename,
                    "Extracted LNK execution code",
                )

            def _datetime_to_str(obj):
                if isinstance(obj, datetime.datetime):
                    return obj.isoformat()
                return obj

            temp_path = os.path.join(self.working_directory, "features.json")
            with open(temp_path, "w") as f:
                json.dump(features, f, default=_datetime_to_str)
            request.add_supplementary(temp_path, "features.json", "Features extracted from the LNK file")

            if lnk.appended_data:
                sha256hash = hashlib.sha256(lnk.appended_data).hexdigest()
                appended_data_path = os.path.join(self.working_directory, sha256hash)
                with open(appended_data_path, "wb") as appended_data_f:
                    appended_data_f.write(lnk.appended_data)
                request.add_extracted(
                    appended_data_path,
                    sha256hash,
                    "Additional data at the end of the LNK file",
                )
                heur = Heuristic(6)
                heur_section = ResultKeyValueSection(heur.name, heuristic=heur, parent=lnk_result_section)
                heur_section.set_item("Length", len(lnk.appended_data))

        # 5. URL file management
        if request.file_type == "shortcut/web":
            config = ConfigParser()
            config.read(request.file_path)

            res = ResultKeyValueSection("Metadata extracted by Ini Reader", parent=request.result)
            for k, v in config["InternetShortcut"].items():
                res.set_item(k, v)

                if k == "url":
                    if v.startswith("http://") or v.startswith("https://"):
                        res.add_tag("network.static.uri", v)
                    elif v.startswith("file:"):
                        heur = Heuristic(1)
                        heur_section = ResultKeyValueSection(heur.name, heuristic=heur, parent=res)
                        heur_section.set_item("url", v)

            config.pop("InternetShortcut", None)
            if config.sections():
                extra_res = ResultKeyValueSection("Extra sections", parent=res)
                extra_res.set_item("Names", ", ".join(config.sections()))