def paranoid_rule_check(self, rulefile):
        # Run rules separately on command line to ensure there are no errors
        print_val = "--==Rules_validated++__"
        external_file = os.path.join(tempfile.gettempdir(), "externals.json")
        try:
            with open(external_file, "wb") as out_json:
                out_json.write(json.dumps(self.externals).encode("utf-8"))

            p = subprocess.Popen(
                f"python3 paranoid_check.py {rulefile} {external_file}",
                cwd=os.path.dirname(os.path.realpath(__file__)),
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE,
                shell=True)
            stdout, stderr = p.communicate()

        finally:
            os.unlink(external_file)

        stdout = safe_str(stdout)
        stderr = safe_str(stderr)

        if print_val not in stdout:
            if stdout.strip().startswith('yara.SyntaxError'):
                raise Exception(stdout.strip())
            else:
                raise Exception("YaraValidator has failed!--+--" +
                                str(stderr) + "--:--" + str(stdout))
    def ioc_to_tag(self,
                   data: bytes,
                   patterns: PatternMatch,
                   res: Optional[ResultSection] = None,
                   taglist: bool = False,
                   check_length: bool = False,
                   strs_max_size: int = 0,
                   st_max_length: int = 300) -> Dict[str, Set[str]]:
        """Searches data for patterns and adds as AL tag to result output.

        Args:
            data: Data to be searched.
            patterns: FrankenStrings Patterns() object.
            res: AL result.
            taglist: True if tag list should be returned.
            check_length: True if length of string should be compared to st_max_length.
            strs_max_size: Maximum size of strings list. If greater then only network IOCs will be searched.
            st_max_length: Maximum length of a string from data that can be searched.

        Returns: tag list as dictionary (always empty if taglist is false)
        """

        tags: Dict[str, Set[str]] = {}

        min_length = self.st_min_length if check_length else 4

        strs: Set[bytes] = set()
        just_network = False

        # Flare-FLOSS ascii string extract
        for ast in strings.extract_ascii_strings(data, n=min_length):
            if not check_length or len(ast.s) < st_max_length:
                strs.add(ast.s)
        # Flare-FLOSS unicode string extract
        for ust in strings.extract_unicode_strings(data, n=min_length):
            if not check_length or len(ust.s) < st_max_length:
                strs.add(ust.s)

        if check_length and len(strs) > strs_max_size:
            just_network = True

        for s in strs:
            st_value: Dict[str, Iterable[bytes]] = patterns.ioc_match(
                s, bogon_ip=True, just_network=just_network)
            for ty, val in st_value.items():
                if taglist and ty not in tags:
                    tags[ty] = set()
                for v in val:
                    if ty == 'network.static.domain' and not is_valid_domain(
                            v.decode('utf-8')):
                        continue
                    if ty == 'network.email.address' and not is_valid_email(
                            v.decode('utf-8')):
                        continue
                    if len(v) < 1001:
                        if res:
                            res.add_tag(ty, safe_str(v))
                        if taglist:
                            tags[ty].add(safe_str(v))
        return tags
Пример #3
0
def test_safe_str_emoji():
    test_str = 'Smile! \ud83d\ude00'
    test_bytes = b'Smile! \xf0\x9f\x98\x80'
    expected_result = 'Smile! 😀'

    assert str_utils.safe_str(test_bytes) == expected_result
    assert str_utils.safe_str(test_str) == expected_result
Пример #4
0
def test_safe_str():
    test_str = 'helloÌ\x02Í\udcf9'
    test_bytes = b'hello\xc3\x8c\x02\xc3\x8d\udcf9'
    expected_result = 'hello\xcc\\x02\xcd\\udcf9'

    assert str_utils.safe_str(test_bytes) == expected_result
    assert str_utils.safe_str(test_str) == expected_result
def _validate_tag(
    result_section: ResultSection,
    tag: str,
    value: Any,
    safelist: Dict[str, Dict[str, List[str]]] = None
) -> bool:
    """
    This method validates the value relative to the tag type before adding the value as a tag to the ResultSection.
    :param result_section: The ResultSection that the tag will be added to
    :param tag: The tag type that the value will be tagged under
    :param value: The item that will be tagged under the tag type
    :param safelist: The safelist containing matches and regexs. The product of a service using self.get_api_interface().get_safelist().
    :return: Tag was successfully added
    """
    if safelist is None:
        safelist = {}

    regex = _get_regex_for_tag(tag)
    if regex and not match(regex, value):
        return False

    if "ip" in tag and not is_valid_ip(value):
        return False

    if "domain" in tag:
        if not is_valid_domain(value):
            return False
        elif value in FALSE_POSITIVE_DOMAINS_FOUND_IN_PATHS:
            return False
        elif isinstance(value, str) and value.split(".")[-1] in COMMON_FILE_EXTENSIONS:
            return False

    if is_tag_safelisted(value, [tag], safelist):
        return False

    # if "uri" is in the tag, let's try to extract its domain/ip and tag it.
    if "uri_path" not in tag and "uri" in tag:
        # First try to get the domain
        valid_domain = False
        domain = search(DOMAIN_REGEX, value)
        if domain:
            domain = domain.group()
            valid_domain = _validate_tag(result_section, "network.dynamic.domain", domain, safelist)
        # Then try to get the IP
        valid_ip = False
        ip = search(IP_REGEX, value)
        if ip:
            ip = ip.group()
            valid_ip = _validate_tag(result_section, "network.dynamic.ip", ip, safelist)

        if value not in [domain, ip] and (valid_domain or valid_ip):
            result_section.add_tag(tag, safe_str(value))
        else:
            return False
    else:
        result_section.add_tag(tag, safe_str(value))

    return True
Пример #6
0
    def parse_plist(self, pdict):
        """Attempts to extract and identify all known and unknown keys of a plist file.

        Args:
            pdict: Plist dictionary item.

        Returns:
            A list of known keys and a list of unknown keys.
        """

        idenkey_sec = ResultSection("Identified Keys")
        unkkey_sec = ResultSection("Unidentified Keys:")

        # Sometimes plist is a list of dictionaries, or it is just a list. Will merge dict /convert to dict for now
        if isinstance(pdict, list):
            pdict = self.transform_dicts(pdict)

        for k, i in list(pdict.items()):
            # Prepare Keys
            k = str(safe_str(k))
            k_noipad = k.replace("~ipad", "")

            # Prepare values
            if i is None:
                i = [""]
            elif not isinstance(i, list):
                i = [i]

            # Many plist files are duplicates of info.plist, do not report on keys already identified
            if k_noipad in self.reported_keys:
                if i in self.reported_keys[k_noipad]:
                    continue
                self.reported_keys[k_noipad].append(i)
            else:
                self.reported_keys[k_noipad] = [i]

            # Process known keys
            if k_noipad in self.known_keys:
                desc, create_tag = self.known_keys[k_noipad]

                idenkey_sec.add_line(f"{k} ({desc}): {', '.join([safe_str(x, force_str=True) for x in i])}")
                if create_tag:
                    for val in i:
                        idenkey_sec.add_tag(TAG_MAP[k_noipad.upper()], safe_str(val, force_str=True))

            else:
                unkkey_sec.add_line(f"{k}: {', '.join([safe_str(x, force_str=True) for x in i])}")

        if idenkey_sec.body is None:
            idenkey_sec = None

        if unkkey_sec.body is None:
            unkkey_sec = None

        return idenkey_sec, unkkey_sec
Пример #7
0
def stream_file_response(reader, name, size, status_code=200):
    quota_user = flsk_session.pop("quota_user", None)
    quota_id = flsk_session.pop("quota_id", None)
    quota_set = flsk_session.pop("quota_set", False)
    if quota_user and quota_set:
        RATE_LIMITER.dec(quota_user, track_id=quota_id)
        RATE_LIMITER.dec("__global__", track_id=quota_id)

    chunk_size = 65535

    def generate():
        reader.seek(0)
        while True:
            data = reader.read(chunk_size)
            if not data:
                break
            yield data
        reader.close()

    headers = {
        "Content-Type": 'application/octet-stream',
        "Content-Length": size,
        "Content-Disposition": 'attachment; filename="%s"' % safe_str(name)
    }
    return Response(generate(), status=status_code, headers=headers)
Пример #8
0
    def __init__(
        self,
        title_text: Union[str, List],
        body: Optional[str, Dict] = None,
        classification: Optional[Classification] = None,
        body_format: BODY_FORMAT = BODY_FORMAT.TEXT,
        heuristic: Optional[Heuristic] = None,
        tags: Optional[Dict[str, List[str]]] = None,
        parent: Optional[Union[ResultSection, Result]] = None,
    ):
        self._finalized: bool = False
        self.parent = parent
        self._section = None
        self.subsections: List[ResultSection] = []
        self.body: str = body
        self.classification: Classification = classification or SERVICE_ATTRIBUTES.default_result_classification
        self.body_format: BODY_FORMAT = body_format
        self.depth: int = 0
        self.tags = tags or {}
        self.heuristic = None

        if isinstance(title_text, list):
            title_text = ''.join(title_text)
        self.title_text = safe_str(title_text)

        if heuristic:
            self.set_heuristic(heuristic.heur_id,
                               attack_id=heuristic.attack_id,
                               signature=heuristic.signature)

        if parent is not None:
            if isinstance(parent, ResultSection):
                parent.add_subsection(self)
            elif isinstance(parent, Result):
                parent.add_section(self)
Пример #9
0
def add_access_control(user):
    user.update(
        Classification.get_access_control_parts(user.get(
            "classification", Classification.UNRESTRICTED),
                                                user_classification=True))

    gl2_query = " OR ".join(
        ['__access_grp2__:__EMPTY__'] +
        ['__access_grp2__:"%s"' % x for x in user["__access_grp2__"]])
    gl2_query = "(%s) AND " % gl2_query

    gl1_query = " OR ".join(
        ['__access_grp1__:__EMPTY__'] +
        ['__access_grp1__:"%s"' % x for x in user["__access_grp1__"]])
    gl1_query = "(%s) AND " % gl1_query

    req = list(
        set(Classification.get_access_control_req()).difference(
            set(user["__access_req__"])))
    req_query = " OR ".join(['__access_req__:"%s"' % r for r in req])
    if req_query:
        req_query = "-(%s) AND " % req_query

    lvl_query = "__access_lvl__:[0 TO %s]" % user["__access_lvl__"]

    query = "".join([gl2_query, gl1_query, req_query, lvl_query])
    user['access_control'] = safe_str(query)
    def run_strings_analysis(self, apk_file, result: Result):
        string_args = ['d', 'strings', apk_file]
        strings, _ = self.run_appt(string_args)
        if not strings or strings == "String pool is unitialized.\n":
            ResultSection("No strings found in APK", body="This is highly unlikely and most-likely malicious.",
                          parent=result, heuristic=Heuristic(6))
        else:
            res_strings = ResultSection("Strings Analysis", parent=result)

            config_args = ['d', 'configurations', apk_file]
            configs, _ = self.run_appt(config_args)
            languages = []
            for line in configs.splitlines():
                config = line.upper()
                if config in ISO_LOCALES:
                    languages.append(config)
                    res_strings.add_tag('file.apk.locale', config)

            data_line = strings.split("\n", 1)[0]
            count = int(data_line.split(" entries")[0].rsplit(" ", 1)[1])
            styles = int(data_line.split(" styles")[0].rsplit(" ", 1)[1])
            if count < 50:
                ResultSection("Low volume of strings, this is suspicious.", parent=res_strings,
                              body_format=BODY_FORMAT.MEMORY_DUMP, body=safe_str(strings), heuristic=Heuristic(7))

            if len(languages) < 2:
                ResultSection("This app is not built for multiple languages. This is unlikely.",
                              parent=res_strings, heuristic=Heuristic(8))

            res_strings.add_line(f"Total string count: {count}")
            res_strings.add_line(f"Total styles: {styles}")
            if languages:
                res_strings.add_line(f"Languages: {', '.join(languages)}")
Пример #11
0
def get_tag_safelist(**_):
    """
    Get the current tag_safelist

    Variables:
    None

    Arguments:
    default    =>  Load the default values that came with the system

    Data Block:
    None

    Result example:
    <current tag_safelist.yml file>
    """
    default = request.args.get('default', 'false').lower() in ['true', '']

    with forge.get_cachestore('system', config=config,
                              datastore=STORAGE) as cache:
        tag_safelist_yml = cache.get('tag_safelist_yml')
        if not tag_safelist_yml or default:
            yml_data = forge.get_tag_safelist_data()
            if yml_data:
                return make_api_response(yaml.safe_dump(yml_data))

            return make_api_response(
                None, "Could not find the tag_safelist.yml file", 404)

        return make_api_response(safe_str(tag_safelist_yml))
 def add_lines(self, line_list: List[str]) -> None:
     segment = safe_str('\n'.join(line_list))
     if self._data is None:
         self._data = segment
     else:
         self._data = f"{self._data}\n{segment}"
     return self._data
    def parse_meta(self, signature):
        meta = {}
        try:
            meta_parts = signature.split("(", 1)[1].strip(" );").split("; ")
            for part in meta_parts:
                if ":" in part:
                    key, val = part.split(":", 1)
                    if key == "metadata":
                        for metadata in val.split(","):
                            meta_key, meta_val = metadata.strip().split(' ')
                            meta[meta_key] = safe_str(meta_val)
                    else:
                        meta[key] = safe_str(val.strip('"'))
        except ValueError:
            return meta

        return meta
Пример #14
0
    def fileinfo(self, path: str) -> Dict:
        path = safe_str(path)
        data = get_digests_for_file(path, on_first_block=self.ident)
        data["ssdeep"] = ssdeep.hash_from_file(path)

        # Check if file empty
        if not int(data.get("size", -1)):
            data["type"] = "empty"

        # Futher identify zip files based of their content
        elif data["type"] in [
                "archive/zip", "java/jar", "document/office/unknown"
        ]:
            data["type"] = zip_ident(path, data["type"])

        # Further check CaRT files, they may have an explicit type set
        elif data["type"] == "archive/cart":
            data["type"] = cart_ident(path)

        # Further identify dos executables has this may be a PE that has been misidentified
        elif data["type"] == "executable/windows/dos":
            data["type"] = dos_ident(path)

        # If we're so far failed to identified the file, lets run the yara rules
        elif "unknown" in data["type"] or data["type"] == "text/plain":
            data["type"] = self.yara_ident(path, data, fallback=data["type"])

        # Extra checks for office documents
        #  - Check for encryption
        if data["type"] in [
                "document/office/word",
                "document/office/excel",
                "document/office/powerpoint",
                "document/office/unknown",
        ]:
            try:
                msoffcrypto_obj = msoffcrypto.OfficeFile(open(path, "rb"))
                if msoffcrypto_obj and msoffcrypto_obj.is_encrypted():
                    data["type"] = "document/office/passwordprotected"
            except Exception:
                # If msoffcrypto can't handle the file to confirm that it is/isn't password protected,
                # then it's not meant to be. Moving on!
                pass

        # Extra checks for PDF documents
        #  - Check for encryption
        #  - Check for PDF collection (portfolio)
        if data["type"] == "document/pdf":
            # Password protected documents typically contain '/Encrypt'
            pdf_content = open(path, "rb").read()
            if re.search(b"/Encrypt", pdf_content):
                data["type"] = "document/pdf/passwordprotected"
            # Portfolios typically contain '/Type/Catalog/Collection
            elif re.search(b"/Type/Catalog/Collection", pdf_content):
                data["type"] = "document/pdf/portfolio"

        return data
    def bbcrack_results(self,
                        request: ServiceRequest) -> Optional[ResultSection]:
        """
        Balbuzard's bbcrack XOR'd strings to find embedded patterns/PE files of interest

        Args:
            request: AL request object with result section

        Returns:
            The result section (with request.result as its parent) if one is created
        """
        x_res = (ResultSection("BBCrack XOR'd Strings:",
                               body_format=BODY_FORMAT.MEMORY_DUMP,
                               heuristic=Heuristic(2)))
        if request.deep_scan:
            xresult = bbcrack(request.file_contents, level=2)
        else:
            xresult = bbcrack(request.file_contents, level=1)
        xformat_string = '%-20s %-7s %-7s %-50s'
        xor_al_results = []
        xindex = 0
        for transform, regex, offset, score, smatch in xresult:
            if regex == 'EXE_HEAD':
                xindex += 1
                xtemp_file = os.path.join(
                    self.working_directory,
                    f"EXE_HEAD_{xindex}_{offset}_{score}.unXORD")
                with open(xtemp_file, 'wb') as xdata:
                    xdata.write(smatch)
                pe_extracted = self.pe_dump(
                    request,
                    xtemp_file,
                    offset,
                    file_string="xorpe_decoded",
                    msg="Extracted xor file during FrakenStrings analysis.")
                if pe_extracted:
                    xor_al_results.append(
                        xformat_string %
                        (str(transform), offset, score, "[PE Header Detected. "
                         "See Extracted files]"))
            else:
                if not regex.startswith("EXE_"):
                    x_res.add_tag(self.BBCRACK_TO_TAG.get(regex, regex),
                                  smatch)
                xor_al_results.append(
                    xformat_string %
                    (str(transform), offset, score, safe_str(smatch)))
        # Result Graph:
        if len(xor_al_results) > 0:
            xcolumn_names = ('Transform', 'Offset', 'Score', 'Decoded String')
            x_res.add_line(xformat_string % xcolumn_names)
            x_res.add_line(xformat_string % tuple('-' * len(s)
                                                  for s in xcolumn_names))
            x_res.add_lines(xor_al_results)
            request.result.add_section(x_res)
            return x_res
        return None
    def _do_respmod(self, filename, data):
        encoded = self.chunk_encode(data)

        # ICAP RESPMOD req-hdr is the start of the original HTTP request.
        respmod_req_hdr = "GET /{FILENAME} HTTP/1.1\r\n\r\n".format(
            FILENAME=safe_str(filename))

        # ICAP RESPMOD res-hdr is the start of the HTTP response for above request.
        respmod_res_hdr = ("HTTP/1.1 200 OK\r\n"
                           "Transfer-Encoding: chunked\r\n\r\n")

        res_hdr_offset = len(respmod_req_hdr)
        res_bdy_offset = len(respmod_res_hdr) + res_hdr_offset

        # The ICAP RESPMOD header. Note:
        # res-hdr offset should match the start of the GET request above.
        # res-body offset should match the start of the response above.

        respmod_icap_hdr = (
            f"RESPMOD icap://{self.host}:{self.port}/{self.service}{self.action} ICAP/1.0\r\n"
            f"Host: {self.host}:{self.port}\r\n"
            "Allow: 204\r\n"
            f"Encapsulated: req-hdr=0, res-hdr={res_hdr_offset}, res-body={res_bdy_offset}\r\n\r\n"
        )

        serialized_request = b"%s%s%s%s" % (respmod_icap_hdr.encode(),
                                            respmod_req_hdr.encode(),
                                            respmod_res_hdr.encode(), encoded)

        for i in range(self.number_of_retries):
            if self.kill:
                self.kill = False
                return
            try:
                if not self.socket:
                    self.socket = socket.create_connection(
                        (self.host, self.port), timeout=self.timeout)
                    self.successful_connection = True
                self.socket.sendall(serialized_request)
                response = temp_resp = self.socket.recv(self.RESP_CHUNK_SIZE)
                while len(temp_resp) == self.RESP_CHUNK_SIZE:
                    temp_resp = self.socket.recv(self.RESP_CHUNK_SIZE)
                    response += temp_resp

                return response.decode()
            except Exception:
                self.successful_connection = False
                try:
                    self.socket.close()
                except Exception:
                    pass
                self.socket = None
                if i == (self.number_of_retries - 1):
                    raise

        raise Exception("Icap server refused to respond.")
Пример #17
0
    def _do_respmod(self, filename, data):
        encoded = self.chunk_encode(data)

        # ICAP RESPMOD req-hdr is the start of the original HTTP request.
        respmod_req_hdr = "GET /{FILENAME} HTTP/1.1\r\n\r\n".format(FILENAME=safe_str(filename))

        # ICAP RESPMOD res-hdr is the start of the HTTP response for above request.
        respmod_res_hdr = (
            "HTTP/1.1 200 OK\r\n"
            "Transfer-Encoding: chunked\r\n\r\n")

        res_hdr_offset = len(respmod_req_hdr)
        res_bdy_offset = len(respmod_res_hdr) + res_hdr_offset

        # The ICAP RESPMOD header. Note:
        # res-hdr offset should match the start of the GET request above.
        # res-body offset should match the start of the response above.

        respmod_icap_hdr = (
            f"RESPMOD icap://{self.host}:{self.port}/{self.service}{self.action} ICAP/1.0\r\n"
            f"Host:{self.host}:{self.port}\r\n"
            "Allow:204\r\n"
            f"Encapsulated: req-hdr=0, res-hdr={res_hdr_offset}, res-body={res_bdy_offset}\r\n\r\n"
        )

        sio = StringIO()
        sio.write(respmod_icap_hdr)
        sio.write(respmod_req_hdr)
        sio.write(respmod_res_hdr)
        sio.write(encoded)
        serialized_request = sio.getvalue()

        for i in range(self.MAX_RETRY):
            s = None
            try:
                s = socket.create_connection((self.host, self.port), timeout=10)
                s.sendall(bytes(serialized_request))
                response = temp_resp = s.recv(self.RESP_CHUNK_SIZE)
                while len(temp_resp) == self.RESP_CHUNK_SIZE:
                    temp_resp = s.recv(self.RESP_CHUNK_SIZE)
                    response += temp_resp

                return response
            except Exception:
                if i == (self.MAX_RETRY-1):
                    raise
            finally:
                if s is not None:
                    try:
                        # try to close the connection anyways
                        s.close()
                    except Exception:
                        pass

        raise Exception("Icap server refused to respond.")
 def add_line(self, text: Union[str, List]) -> None:
     # add_line with a list should join without newline seperator.
     # use add_lines if list should be split one element per line.
     if isinstance(text, list):
         text = ''.join(text)
     textstr = safe_str(text)
     if self._data:
         self._data = f"{self._data}\n{textstr}"
     else:
         self._data = textstr
     return self._data
Пример #19
0
def get_attribute(ldap_login_info, key, safe=True):
    details = ldap_login_info.get('details')
    if details:
        value = details.get(key, [])
        if len(value) >= 1:
            if safe:
                return safe_str(value[0])
            else:
                return value[0]

    return None
 def add_line(self, text: Union[str, List]) -> None:
     # add_line with a list should join without newline seperator.
     # use add_lines if list should be split one element per line.
     if isinstance(text, list):
         text = ''.join(text)
     textstr = safe_str(text)
     if self._body:
         textstr = '\n' + textstr
         self._body = self._body + textstr
     else:
         self._body = textstr
Пример #21
0
def guess_language(path: str) -> Tuple[str, Union[str, int]]:
    file_length = os.path.getsize(path)
    with open(path, 'rb') as fh:
        if file_length > 131070:
            buf = fh.read(65535)
            fh.seek(file_length - 65535)
            buf += fh.read(65535)
        else:
            buf = fh.read()

    scores = defaultdict(int)
    shebang_lang = re.match(SHEBANG, buf)
    if shebang_lang:
        lang = shebang_lang.group(1)
        lang = 'code/' + EXECUTABLES.get(safe_str(lang), safe_str(lang))
        scores[lang] = STRONG_SCORE * 3

    for lang, patterns in STRONG_INDICATORS.items():
        for pattern in patterns:
            for _ in re.findall(pattern, buf):
                scores[lang] += STRONG_SCORE

    for lang, pattern in WEAK_INDICATORS.items():
        for _ in re.findall(pattern, buf):
            scores[lang] += WEAK_SCORE

    for lang in list(scores.keys()):
        if scores[lang] < MINIMUM_GUESS_SCORE:
            scores.pop(lang)

    max_v = 0
    if len(scores) > 0:
        max_v = max(list(scores.values()))
    high_scores = [(k, v) for k, v in scores.items() if v == max_v]
    high_scores = [(_differentiate(k, scores), v) for k, v in high_scores]

    if len(high_scores) != 1:
        return 'unknown', 0
    else:
        confidences = [(k, _confidence(v)) for k, v in high_scores]
        return confidences[0]
Пример #22
0
    def transform_dicts(orig_dict):
        """Transforms a plist object that is type LIST to type DICT.

        Args:
            orig_dict: Plist item of type LIST.

        Returns:
            Transformed plist item.
        """
        dfli = defaultdict(list)
        for x in orig_dict:
            # If item is a dictionary, expand and add values
            if isinstance(x, dict):
                for k, v in list(x.items()):
                    dfli[str(safe_str(k))].append(str(safe_str(v)))
            else:
                dfli.setdefault(str(safe_str(x)))

        merged = dict(dfli)

        return merged
Пример #23
0
def fileinfo(path: str) -> Dict:
    path = safe_str(path)

    data = get_digests_for_file(path, on_first_block=ident)

    # This is a special case, we know if the mime is set to one of these values
    # then the input file is almost certainly an office file, but based on only the first
    # block magic can't figure out any more than that. To handle that case we will read the
    # entire file, and identify again.
    if data['mime'] is not None and data['mime'].lower() in [
            'application/cdfv2-corrupt', 'application/cdfv2-unknown'
    ]:
        with open(path, 'rb') as fh:
            buf = fh.read()
            buflen = len(buf)
            data.update(ident(buf, buflen))
    data['ssdeep'] = ssdeep_from_file(path) if ssdeep_from_file else ''

    # When data is parsed from a cart file we trust its metatdata and can skip the recognition test later
    cart_metadata_set = False

    if not int(data.get('size', -1)):
        data['type'] = 'empty'
    elif data['type'] in ['archive/zip', 'java/jar']:
        # In addition to explicit zip files, we also want to run zip_ident when
        # a file is a jar as there is a high rate of false positive (magic
        # matching eclipse and other java related files as jars)
        data['type'] = zip_ident(path)
    elif data['type'] == 'document/office/unknown':
        # For unknown document files try identifying them by unziping,
        # but don't commit to it being a zip if it can't be extracted
        data['type'] = zip_ident(path, data['type'])
    elif data['type'] == 'unknown':
        data['type'], _ = guess_language(path)
    elif data['type'] == 'archive/cart':
        data['type'] = cart_ident(path)
        cart_metadata_set = True
    elif data['type'] == 'executable/windows/dos':
        # The default magic file misidentifies PE files with a munged DOS header
        data['type'] = dos_ident(path)
    elif data['type'] == 'code/html':
        # Magic detects .hta files as .html, guess_language detects .hta files as .js/.vbs
        # If both conditions are met, it's fair to say that the file is an .hta
        lang, _ = guess_language(path)
        if lang in ["code/javascript", "code/vbs"]:
            data['type'] = 'code/hta'

    if not recognized.get(data['type'], False) and not cart_metadata_set:
        data['type'] = 'unknown'

    return data
def RC4(data, key):
    '''
        RC4 implementation

        @param data: Bytes to be encrypyed/decrypted
        @param key: Key used for the algorithm
        @return: The encrypted/decrypted bytes
    '''
    y = 0
    hash = {}
    box = {}
    ret = ''
    keyLength = len(key)
    dataLength = len(data)

    # Initialization
    for x in range(256):
        key = safe_str(key)
        hash[x] = ord(key[x % keyLength])
        box[x] = x
    for x in range(256):
        y = (y + int(box[x]) + int(hash[x])) % 256
        tmp = box[x]
        box[x] = box[y]
        box[y] = tmp

    z = y = 0
    for x in range(0, dataLength):
        z = (z + 1) % 256
        y = (y + box[z]) % 256
        tmp = box[z]
        box[z] = box[y]
        box[y] = tmp
        k = box[((box[z] + box[y]) % 256)]
        if isinstance(data, bytes):
            data = safe_str(data)
        ret += chr(ord(data[x]) ^ k)
    return ret
def isOwnerPass(password, dictO, dictU, computedUserPass, keyLength, revision):
    '''
        Checks if the given password is the owner password of the file

        @param password: The given password or the empty password
        @param dictO: The /O element of the /Encrypt dictionary
        @param dictU: The /U element of the /Encrypt dictionary
        @param computedUserPass: The computed user password of the file
        @param keyLength: The length of the key
        @param revision: The algorithm revision
        @return The boolean telling if the given password is the owner password or not
    '''
    if revision == 5:
        vSalt = dictO[32:40]
        inputHash = hashlib.sha256(password + vSalt + dictU).digest()
        if inputHash == dictO[:32]:
            return True
        else:
            return False
    else:
        keyLength = int(keyLength / 8)
        lenPass = len(password)
        if lenPass > 32:
            password = password[:32]
        elif lenPass < 32:
            password += paddingString[:32 - lenPass]
        rc4Key = hashlib.md5(password).digest()
        if revision > 2:
            counter = 0
            while counter < 50:
                rc4Key = hashlib.md5(rc4Key).digest()
                counter += 1
        rc4Key = rc4Key[:keyLength]
        if revision == 2:
            userPass = RC4(dictO, rc4Key)
        elif revision > 2:
            counter = 19
            while counter >= 0:
                newKey = ''
                if isinstance(rc4Key, bytes):
                    rc4Key = safe_str(rc4Key)
                for i in range(len(rc4Key)):
                    newKey += chr(ord(rc4Key[i]) ^ counter)
                dictO = RC4(dictO, newKey)
                counter -= 1
            userPass = dictO
        else:
            # Is it possible??
            userPass = ''
        return isUserPass(userPass, computedUserPass, dictU, revision)
Пример #26
0
    def is_whitelisted(self, task: IngestTask):
        reason, hit = self.get_whitelist_verdict(self.whitelist, task)
        hit = {x: dotdump(safe_str(y)) for x, y in hit.items()}
        sha256 = task.submission.files[0].sha256

        if not reason:
            with self.whitelisted_lock:
                reason = self.whitelisted.get(sha256, None)
                if reason:
                    hit = 'cached'

        if reason:
            if hit != 'cached':
                with self.whitelisted_lock:
                    self.whitelisted[sha256] = reason

            task.failure = "Whitelisting due to reason %s (%s)" % (dotdump(
                safe_str(reason)), hit)
            self._notify_drop(task)

            self.counter.increment('whitelisted')

        return reason
def keytool_printcert(cert_path: str) -> None:
    """
    This function runs the 'keytool -printcert' command against a provided file

    :param cert_path: A path to a certificate
    :return: the string output of 'keytool -printcert' or None
    """
    stdout, _ = Popen(["keytool", "-printcert", "-file", cert_path],
                      stderr=PIPE, stdout=PIPE).communicate()
    stdout = safe_str(stdout)

    if stdout and "keytool error" not in stdout:
        return stdout

    return None
Пример #28
0
def make_file_response(data,
                       name,
                       size,
                       status_code=200,
                       content_type="application/octet-stream"):
    quota_user = flsk_session.pop("quota_user", None)
    quota_set = flsk_session.pop("quota_set", False)
    if quota_user and quota_set:
        QUOTA_TRACKER.end(quota_user)

    response = make_response(data, status_code)
    response.headers["Content-Type"] = content_type
    response.headers["Content-Length"] = size
    response.headers[
        "Content-Disposition"] = 'attachment; filename="%s"' % safe_str(name)
    return response
    def __init__(
        self,
        title_text: Union[str, List],
        body: Optional[Union[str, SectionBody]] = None,
        classification: Optional[Classification] = None,
        body_format: BODY_FORMAT = BODY_FORMAT.TEXT,
        heuristic: Optional[Heuristic] = None,
        tags: Optional[Dict[str, List[str]]] = None,
        parent: Optional[Union[ResultSection, Result]] = None,
        zeroize_on_tag_safe: bool = False,
        auto_collapse: bool = False,
        zeroize_on_sig_safe: bool = True,
    ):
        self._finalized: bool = False
        self.parent = parent
        self._section = None
        self._subsections: List[ResultSection] = []
        if isinstance(body, SectionBody):
            self._body_format = body.format
            self._body = body.body
        else:
            self._body_format: BODY_FORMAT = body_format
            self._body: str = body
        self.classification: Classification = classification or SERVICE_ATTRIBUTES.default_result_classification
        self.depth: int = 0
        self._tags = tags or {}
        self._heuristic = None
        self.zeroize_on_tag_safe = zeroize_on_tag_safe
        self.auto_collapse = auto_collapse
        self.zeroize_on_sig_safe = zeroize_on_sig_safe

        if isinstance(title_text, list):
            title_text = ''.join(title_text)
        self.title_text = safe_str(title_text)

        if heuristic:
            if not isinstance(heuristic, Heuristic):
                log.warning(
                    f"This is not a valid Heuristic object: {str(heuristic)}")
            else:
                self._heuristic = heuristic

        if parent is not None:
            if isinstance(parent, ResultSection):
                parent.add_subsection(self)
            elif isinstance(parent, Result):
                parent.add_section(self)
Пример #30
0
def make_file_response(data,
                       name,
                       size,
                       status_code=200,
                       content_type="application/octet-stream"):
    quota_user = flsk_session.pop("quota_user", None)
    quota_id = flsk_session.pop("quota_id", None)
    quota_set = flsk_session.pop("quota_set", False)
    if quota_user and quota_set:
        RATE_LIMITER.dec(quota_user, track_id=quota_id)
        RATE_LIMITER.dec("__global__", track_id=quota_id)

    response = make_response(data, status_code)
    response.headers["Content-Type"] = content_type
    response.headers["Content-Length"] = size
    response.headers[
        "Content-Disposition"] = 'attachment; filename="%s"' % safe_str(name)
    return response