Пример #1
0
    def run(self):
        # construct a valid filename into which thug will save the result
        fname = str(self.filename).replace("/", "_").replace(" ", "_")
        # get the file to send
        binary = get_binary(self.job_id)
        # construct arguments, For example this corresponds to,
        # box-js sample.js --output-dir=/tmp/boxjs --no-kill ...
        args = [
            f"@{fname}",
            "--output-dir=/tmp/boxjs",
            "--no-kill",
            "--no-shell-error",
            "--no-echo",
        ]
        # Box-js by default has a timeout of 10 seconds,
        # but subprocess is not able to catch that
        # that's why it's necessary to provide a custom timeout of
        # 10 seconds only to the subprocess itself.
        req_data = {
            "args": args,
            "timeout": 10,
            "callback_context": {"read_result_from": fname},
        }
        req_files = {fname: binary}

        return self._docker_run(req_data, req_files)
Пример #2
0
    def run(self):
        # get binary
        fname = str(self.filename).replace("/", "_").replace(" ", "_")
        self.args.extend(["-n", "/tmp/thug/" + fname, "-l", f"@{fname}"])
        logger.debug(
            f"Making request with arguments: {self.args}"
            f" for analyzer: {self.analyzer_name}, job_id: #{self.job_id}."
        )
        # request new analysis
        r = requests.post(
            self.url, files={fname: get_binary(self.job_id)}, data={"args": self.args,},
        )
        # handle cases in case of error
        if self._check_status_code(self.name, r):
            # if no error, continue..
            errors = []
            # this is to check whether analysis completed or not..
            resp = self._poll_for_result(r.json()["key"])
            err = resp.get("error", None)
            if err:
                errors.append(err)
            logger.info(
                f"Fetching final report ({self.analyzer_name}, job_id: #{self.job_id})"
            )
            # if no error, we fetch the final report..
            result_resp = requests.get(f"{self.base_url}/get-result?name={fname}")
            if not result_resp.status_code == 200:
                e = resp.json()["error"]
                errors.append(e)
                raise AnalyzerRunException(", ".join(errors))

            return result_resp.json()
Пример #3
0
    def run(self):
        mwdb = mwdblib.MWDB(api_key=self.__api_key)
        binary = get_binary(self.job_id)
        query = str(hashlib.sha256(binary).hexdigest())

        if self.upload_file:
            logger.info(f"mwdb_scan uploading sample: {self.md5}")
            file_object = mwdb.upload_file(query, binary)
            file_object.flush()
            for _try in range(self.max_tries):
                logger.info(
                    f"mwdb_scan sample: {self.md5} polling for result try #{_try + 1}"
                )
                time.sleep(self.poll_distance)
                file_info = mwdb.query_file(file_object.data["id"])
                if self.file_analysis(file_info):
                    break
            if not self.file_analysis(file_info):
                raise AnalyzerRunException("max retry attempts exceeded")
        else:
            try:
                file_info = mwdb.query_file(query)
            except Exception:
                raise AnalyzerRunException(
                    "File not found in the MWDB. Set 'upload_file=true' "
                    "if you want to upload and poll results. ")
        result = {"data": file_info.data, "metakeys": file_info.metakeys}
        result["permalink"] = f"https://mwdb.cert.pl/file/{query}"
        return result
Пример #4
0
    def run(self):
        results = {}
        results["magic"] = magic.from_file(self.filepath)
        results["mimetype"] = magic.from_file(self.filepath, mime=True)

        binary = get_binary(self.job_id)
        results["md5"] = hashlib.md5(binary).hexdigest()
        results["sha1"] = hashlib.sha1(binary).hexdigest()
        results["sha256"] = hashlib.sha256(binary).hexdigest()
        results["ssdeep"] = pydeep.hash_file(self.filepath).decode()

        try:
            with ExifTool(self.exiftool_path) as et:
                exif_report = et.execute_json(self.filepath)
                if exif_report:
                    exif_single_report = exif_report[0]
                    exif_report_cleaned = {
                        key: value
                        for key, value in exif_single_report.items()
                        if not (key.startswith("File") or key.startswith("SourceFile"))
                    }
                    # compatibility with the previous version of this analyzer
                    results["filetype"] = exif_single_report.get("File:FileType", "")
                    results["exiftool"] = exif_report_cleaned
        except Exception as e:
            logger.exception(e)

        return results
Пример #5
0
 def run(self):
     # get binary
     binary = get_binary(self.job_id)
     # make request data
     fname = str(self.filename).replace("/", "_").replace(" ", "_")
     args = ["flarestrings", f"@{fname}"]
     req_data = {
         "args": args,
         "timeout": self.timeout,
     }
     req_files = {fname: binary}
     result = self._docker_run(req_data, req_files)
     exceed_max_strings = len(result) > self.max_no_of_strings
     if exceed_max_strings:
         result = [s for s in result[:self.max_no_of_strings]]
     if self.rank_strings:
         args = [
             "rank_strings",
             "--limit",
             str(self.max_no_of_strings),
             "--strings",
             json_dumps(result),
         ]
         req_data = {"args": args, "timeout": self.timeout}
         result = self._docker_run(req_data)
     result = {
         "data": [row[:self.max_chars_for_string] for row in result],
         "exceeded_max_number_of_strings": exceed_max_strings,
     }
     return result
Пример #6
0
    def __intezer_scan_file(self, intezer_token):
        session = requests.session()
        session.headers["Authorization"] = f"Bearer {intezer_token}"

        name_to_send = self.filename if self.filename else self.md5
        binary = get_binary(self.job_id)
        files = {"file": (name_to_send, binary)}
        logger.info(f"intezer md5 {self.md5} sending sample for analysis")
        response = session.post(self.base_url + "/analyze", files=files)
        if response.status_code != 201:
            raise AnalyzerRunException(
                f"failed analyze request, status code {response.status_code}")

        for chance in range(self.max_tries):
            if response.status_code != 200:
                time.sleep(self.poll_distance)
                logger.info(
                    f"intezer md5 {self.md5} polling for result try #{chance + 1}"
                )
                result_url = response.json().get("result_url", "")
                response = session.get(self.base_url + result_url)
                response.raise_for_status()

        if response.status_code != 200 and not self.is_test:
            raise AnalyzerRunException("received max tries attempts")

        return response.json()
Пример #7
0
 def run(self):
     # get the file to send
     fname = str(self.filename).replace("/", "_").replace(" ", "_")
     binary = get_binary(self.job_id)
     # make request parameters
     req_data = {"args": [f"@{fname}"] + self.args}
     req_files = {fname: binary}
     return self._docker_run(req_data, req_files)
Пример #8
0
    def run(self):
        # get binary
        binary = get_binary(self.job_id)
        # make request data
        fname = str(self.filename).replace("/", "_").replace(" ", "_")
        args = [f"@{fname}", "-j"]
        req_data = {"args": args, "timeout": self.timeout}
        req_files = {fname: binary}

        return self._docker_run(req_data, req_files)
Пример #9
0
    def run(self):
        final_report = {}
        if not self.__api_key:
            raise AnalyzerRunException(
                f"No API key retrieved with name: {self.api_key_name}")

        self.headers = {"Authorization": f"Bearer {self.__api_key}"}

        name_to_send = self.filename if self.filename else self.md5
        binary = get_binary(self.job_id)
        files = {
            "file": (name_to_send, binary),
            "_json": (None, b'{"kind": "file", "interactive": false}'),
        }

        logger.info(f"triage md5 {self.md5} sending sample for analysis")
        for _try in range(self.max_tries):
            logger.info(
                f"triage md5 {self.md5} polling for result try #{_try + 1}")
            response = requests.post(self.base_url + "samples",
                                     headers=self.headers,
                                     files=files)
            if response.status_code == 200:
                break
            time.sleep(self.poll_distance)

        if response.status_code != 200:
            raise AnalyzerRunException("max retry attempts exceeded")

        sample_id = response.json().get("id", None)
        if sample_id is None:
            raise AnalyzerRunException("error sending sample")

        requests.get(self.base_url + f"samples/{sample_id}/events",
                     headers=self.headers)

        final_report["overview"] = self.get_overview_report(sample_id)

        if self.report_type == "complete":
            final_report["static_report"] = self.get_static_report(sample_id)

            final_report["task_report"] = {}
            for task in final_report["overview"]["tasks"].keys():
                status_code, task_report_json = self.get_task_report(
                    sample_id, task)
                if status_code == 200:
                    final_report["task_report"][f"{task}"] = task_report_json

        analysis_id = final_report["overview"].get("sample", {}).get("id", "")
        if analysis_id:
            final_report["permalink"] = f"{self.report_url}{analysis_id}"

        return final_report
Пример #10
0
    def run(self):
        if not self.cuckoo_url:
            raise AnalyzerConfigurationException("cuckoo URL missing")

        binary = get_binary(self.job_id)
        if not binary:
            raise AnalyzerRunException("is the binary empty?!")

        self.__cuckoo_request_scan(binary)
        self.__cuckoo_poll_result()
        result = self.__cuckoo_retrieve_and_create_report()

        return result
Пример #11
0
    def _scan_binary(self):
        """scan a binary against all YARA rules in Malpedia"""

        url = self.base_url + "scan/binary"
        headers = {"Authorization": f"APIToken {self.__api_key}"}
        binary = get_binary(self.job_id)
        files = {"file": binary}

        try:
            response = requests.post(url, headers=headers, files=files)
            response.raise_for_status()
        except requests.RequestException as e:
            raise AnalyzerRunException(e)

        result = response.json()
        return result
Пример #12
0
    def run(self):
        # get binary
        binary = get_binary(self.job_id)
        # make request data
        fname = str(self.filename).replace("/", "_").replace(" ", "_")
        req_data = {"args": ["-j", f"@{fname}"]}
        req_files = {fname: binary}

        result = self._docker_run(req_data, req_files)

        if result:
            # limit strings dump to first 100
            if "strings" in result and "dump" in result["strings"]:
                result["strings"]["dump"] = result["strings"]["dump"][:100]

        return result
Пример #13
0
    def run(self):
        # get binary
        binary = get_binary(self.job_id)
        # request new analysis
        req_data = {"args": ["-j", "@filetoscan"]}
        req_files = {"filetoscan": binary}
        r = requests.post(self.url, files=req_files, data=req_data)
        # handle cases in case of error
        if self._check_status_code(self.name, r):
            # if no error, continue..
            resp = self._poll_for_result(r.json()["key"])
            result = resp.get("report", None)
            if result:
                result = json.loads(result)
                if "strings" in result and "dump" in result["strings"]:
                    result["strings"]["dump"] = result["strings"]["dump"][:100]

            return result
Пример #14
0
    def __vt_request_scan(self, notify_url):
        binary = get_binary(self.job_id)
        params = {"apikey": self.__api_key}
        if notify_url:
            params["notify_url"] = notify_url
        files = {"file": binary}

        try:
            resp = requests.post(self.base_url + "file/scan",
                                 files=files,
                                 params=params)
            resp.raise_for_status()
        except requests.RequestException as e:
            raise AnalyzerRunException(e)
        json_resp = resp.json()
        response_code = json_resp.get("response_code", 1)
        if response_code == -1:
            raise AnalyzerRunException("response code -1")
        return json_resp
Пример #15
0
    def run(self):
        # construct a valid filename into which thug will save the result
        fname = str(self.filename).replace("/", "_").replace(" ", "_")
        # get the file to send
        binary = get_binary(self.job_id)
        # construct arguments, For example this corresponds to,
        # apkid -j file.apk
        args = [
            "-t",
            "20",
            "-j",
            f"@{fname}",
        ]
        req_data = {
            "args": args,
        }
        req_files = {fname: binary}

        return self._docker_run(req_data, req_files)
Пример #16
0
    def run(self):
        # construct a valid dir name into which thug will save the result
        fname = str(self.filename).replace("/", "_").replace(" ", "_")
        tmp_dir = f"{fname}_{secrets.token_hex(4)}"
        # get the file to send
        binary = get_binary(self.job_id)
        # append final arguments,
        # -n -> output directory
        # -l -> the local file to analyze
        self.args.extend(["-n", "/home/thug/" + tmp_dir, "-l", f"@{fname}"])
        # make request parameters
        req_data = {
            "args": self.args,
            "callback_context": {
                "read_result_from": tmp_dir
            },
        }
        req_files = {fname: binary}

        return self._docker_run(req_data, req_files)
Пример #17
0
    def run(self):
        results = {}
        results["magic"] = magic.from_file(self.filepath)
        results["mimetype"] = magic.from_file(self.filepath, mime=True)
        results["filetype"] = pyexifinfo.fileType(self.filepath)

        exif_report = pyexifinfo.get_json(self.filepath)
        if exif_report:
            exif_report_cleaned = {
                key: value
                for key, value in exif_report[0].items()
                if not (key.startswith("File") or key.startswith("SourceFile"))
            }
            results["exiftool"] = exif_report_cleaned

        binary = get_binary(self.job_id)
        results["md5"] = hashlib.md5(binary).hexdigest()
        results["sha1"] = hashlib.sha1(binary).hexdigest()
        results["sha256"] = hashlib.sha256(binary).hexdigest()
        results["ssdeep"] = pydeep.hash_file(self.filepath).decode()

        return results
Пример #18
0
    def run(self):
        results = {}

        rtfobj_results = {}
        binary = get_binary(self.job_id)
        rtfp = RtfObjParser(binary)
        rtfp.parse()
        rtfobj_results["ole_objects"] = []
        for rtfobj in rtfp.objects:
            if rtfobj.is_ole:
                class_name = rtfobj.class_name.decode()
                ole_dict = {
                    "format_id": rtfobj.format_id,
                    "class_name": class_name,
                    "ole_datasize": rtfobj.oledata_size,
                }
                if rtfobj.is_package:
                    ole_dict["is_package"] = True
                    ole_dict["filename"] = rtfobj.filename
                    ole_dict["src_path"] = rtfobj.src_path
                    ole_dict["temp_path"] = rtfobj.temp_path
                    ole_dict["olepkgdata_md5"] = rtfobj.olepkgdata_md5
                else:
                    ole_dict["ole_md5"] = rtfobj.oledata_md5
                if rtfobj.clsid:
                    ole_dict["clsid_desc"] = rtfobj.clsid_desc
                    ole_dict["clsid_id"] = rtfobj.clsid
                rtfobj_results["ole_objects"].append(ole_dict)
                # http://www.kb.cert.org/vuls/id/921560
                if class_name == "OLE2Link":
                    rtfobj_results["exploit_ole2link_vuln"] = True
                # https://www.kb.cert.org/vuls/id/421280/
                elif class_name.lower() == "equation.3":
                    rtfobj_results["exploit_equation_editor"] = True

        results["rtfobj"] = rtfobj_results

        return results
Пример #19
0
    def run(self):
        # get binary
        binary = get_binary(self.job_id)
        # make request data
        fname = str(self.filename).replace("/", "_").replace(" ", "_")
        args = [f"@{fname}", f"--output-json=/tmp/{fname}.json"]
        req_data = {
            "args": args,
            "timeout": self.timeout,
            "callback_context": {
                "read_result_from": fname
            },
        }
        req_files = {fname: binary}
        result = self._docker_run(req_data, req_files)
        result["exceeded_max_number_of_strings"] = {}

        self.url = self.ranking_url
        for key in self.max_no_of_strings.keys():
            if self.rank_strings[key]:
                args = [
                    "rank_strings",
                    "--limit",
                    str(self.max_no_of_strings[key]),
                    "--strings",
                    json_dumps(result["strings"][key]),
                ]
                req_data = {"args": args, "timeout": self.timeout}
                result["strings"][key] = self._docker_run(req_data)
            else:
                if (len(result.get("strings", {}).get(key, [])) >
                        self.max_no_of_strings[key]):
                    result["strings"][key] = [
                        s for s in result["strings"][key]
                    ]
                    result["exceeded_max_number_of_strings"][key] = True
        return result
Пример #20
0
def vt_scan_file(
    api_key,
    md5,
    job_id,
    rescan_instead=False,
    max_tries=100,
    poll_distance=5,
):
    try:
        binary = get_binary(job_id)
    except Exception:
        raise AnalyzerRunException("couldn't retrieve the binary to perform a scan")

    headers = {"x-apikey": api_key}
    if rescan_instead:
        logger.info(f"md5 {md5} job {job_id} VT analyzer requested rescan")
        files = {}
        uri = f"files/{md5}/analyse"
    else:
        logger.info(f"md5 {md5} job {job_id} VT analyzer requested scan")
        files = {"file": binary}
        uri = "files"

    try:
        response = requests.post(vt_base + uri, files=files, headers=headers)
        response.raise_for_status()
    except requests.RequestException as e:
        raise AnalyzerRunException(e)
    result = response.json()

    result_data = result.get("data", {})
    scan_id = result_data.get("id", "")
    if not scan_id:
        raise AnalyzerRunException(
            "no scan_id given by VirusTotal to retrieve the results"
        )
    # max 5 minutes waiting
    got_result = False
    uri = f"analyses/{scan_id}"
    for chance in range(max_tries):
        time.sleep(poll_distance)
        logger.info(
            f"vt polling, try n.{chance + 1}. job_id {job_id}. starting the query"
        )
        try:
            response = requests.get(vt_base + uri, headers=headers)
            response.raise_for_status()
        except requests.RequestException as e:
            raise AnalyzerRunException(e)
        json_response = response.json()
        # pprint.pprint(json_response)
        analysis_status = (
            json_response.get("data", {}).get("attributes", {}).get("status", "")
        )
        if analysis_status == "completed":
            got_result = True
            break
        else:
            logger.info(
                f"vt polling: try #{chance + 1}. job_id: #{job_id}."
                f" status:{analysis_status}"
            )

    if not got_result and not rescan_instead:
        raise AnalyzerRunException(
            f"max VT polls tried without getting any result. job_id {job_id}"
        )

    # retrieve the FULL report, not only scans results.
    # If it's a new sample, it's free of charge.
    return vt3_get.vt_get_report(api_key, md5, "hash", {}, job_id)