Exemplo n.º 1
0
    def _unpack(self, buf):
        """Extract into a list irc messages of a tcp streams.
        @buf: tcp stream data
        """
        try:
            f = cStringIO.StringIO(buf)
            lines = f.readlines()
        except Exception:
            log.error("Failed reading tcp stream buffer")
            return False

        logirc = False
        for element in lines:
            if not re.match("^:", element) is None:
                command = "([a-zA-Z]+|[0-9]{3})"
                params = "(\x20.+)"
                irc_server_msg = re.findall("(^:[\w+.{}!@|()]+\x20)" + command + params, element)
                if irc_server_msg:
                    self._sc["prefix"] = convert_to_printable(irc_server_msg[0][0].strip())
                    self._sc["command"] = convert_to_printable(irc_server_msg[0][1].strip())
                    self._sc["params"] = convert_to_printable(irc_server_msg[0][2].strip())
                    self._sc["type"] = "server"
                    if logirc:
                        self._messages.append(dict(self._sc))
            else:
                irc_client_msg = re.findall("([a-zA-Z]+\x20)(.+[\x0a\0x0d])", element)
                if irc_client_msg and irc_client_msg[0][0].strip() in self.__methods_client:
                    self._cc["command"] = convert_to_printable(irc_client_msg[0][0].strip())
                    if self._cc["command"] in ["NICK", "USER"]:
                        logirc = True
                    self._cc["params"] = convert_to_printable(irc_client_msg[0][1].strip())
                    self._cc["type"] = "client"
                    if logirc:
                        self._messages.append(dict(self._cc))
Exemplo n.º 2
0
def extract_strings(path, nulltermonly, minchars):
    strings = []

    try:
        data = open(path, "rb").read()
    except (IOError, OSError) as e:
        raise CuckooProcessingError(f"Error opening file {e}")

    endlimit = b""
    if not HAVE_RE2:
        endlimit = b"8192"

    if nulltermonly:
        apat = b"([\x20-\x7e]{" + str(
            minchars).encode() + b"," + endlimit + b"})\x00"
        upat = b"((?:[\x20-\x7e][\x00]){" + str(
            minchars).encode() + b"," + endlimit + b"})\x00\x00"
    else:
        apat = b"[\x20-\x7e]{" + str(
            minchars).encode() + b"," + endlimit + b"}"
        upat = b"(?:[\x20-\x7e][\x00]){" + str(
            minchars).encode() + b"," + endlimit + b"}"

    strings = [bytes2str(string) for string in re.findall(apat, data)]
    for ws in re.findall(upat, data):
        strings.append(str(ws.decode("utf-16le")))

    return strings
Exemplo n.º 3
0
 def test_re_findall(self):
     self.assertEqual(re.findall(":+", "abc"), [])
     self.assertEqual(re.findall(":+", "a:b::c:::d"), [":", "::", ":::"])
     self.assertEqual(re.findall("(:+)", "a:b::c:::d"), [":", "::", ":::"])
     self.assertEqual(re.findall("(:)(:*)", "a:b::c:::d"), [(":", ""),
                                                            (":", ":"),
                                                            (":", "::")])
Exemplo n.º 4
0
    def do_strings(self):
        if not self.voptions.basic.dostrings:
            return None
        try:
            with open(self.memfile, "rb") as f:
                data = f.read()
        except (IOError, OSError, MemoryError) as e:
            raise CuckooProcessingError(f"Error opening file {e}") from e

        nulltermonly = self.voptions.basic.get("strings_nullterminated_only",
                                               True)
        minchars = str(self.voptions.basic.get("strings_minchars", 5)).encode()

        if nulltermonly:
            apat = b"([\x20-\x7e]{" + minchars + b",})\x00"
            upat = b"((?:[\x20-\x7e][\x00]){" + minchars + b",})\x00\x00"
        else:
            apat = b"[\x20-\x7e]{" + minchars + b",}"
            upat = b"(?:[\x20-\x7e][\x00]){" + minchars + b",}"

        strings = re.findall(apat, data) + [
            ws.decode("utf-16le").encode() for ws in re.findall(upat, data)
        ]
        with open(f"{self.memfile}.strings", "wb") as f:
            f.write(b"\n".join(strings))
        return f"{self.memfile}.strings"
Exemplo n.º 5
0
    def run(self):
        """Run extract of printable strings.
        @return: list of printable strings.
        """
        self.key = "strings"
        strings = []

        if self.task["category"] == "file":
            if not os.path.exists(self.file_path):
                raise CuckooProcessingError("Sample file doesn't exist: \"%s\"" % self.file_path)

            try:
                data = open(self.file_path, "rb").read()
            except (IOError, OSError) as e:
                raise CuckooProcessingError("Error opening file %s" % e)

            nulltermonly = self.options.get("nullterminated_only", True)
            minchars = self.options.get("minchars", 5)

            if nulltermonly:
                apat = "([\x20-\x7e]{" + str(minchars) + ",})\x00"
                upat = "((?:[\x20-\x7e][\x00]){" + str(minchars) + ",})\x00\x00"
            else:
                apat = "[\x20-\x7e]{" + str(minchars) + ",}"
                upat = "(?:[\x20-\x7e][\x00]){" + str(minchars) + ",}"

            strings = re.findall(apat, data)
            for ws in re.findall(upat, data):
                strings.append(str(ws.decode("utf-16le")))

        return strings
Exemplo n.º 6
0
    def handleEvent(self, event):
        eventName = event.eventType
        srcModuleName = event.module
        eventData = event.data

        if eventData in self.results:
            return None
        else:
            self.results[eventData] = True

        self.sf.debug("Received event, " + eventName + ", from " +
                      srcModuleName)

        # Retrieve profile
        try:
            network = eventData.split(": ")[0]
            url = eventData.split(": ")[1]
        except BaseException as e:
            self.sf.error(
                "Unable to parse SOCIAL_MEDIA: " + eventData + " (" + str(e) +
                ")", False)
            return None

        if not network == "Twitter":
            self.sf.debug("Skipping social network profile, " + url +
                          ", as not a Twitter profile")
            return None

        res = self.sf.fetchUrl(url,
                               timeout=self.opts['_fetchtimeout'],
                               useragent="SpiderFoot")

        if res['content'] is None:
            return None

        if not res['code'] == "200":
            self.sf.debug(url + " is not a valid Twitter profile")
            return None

        # Retrieve name
        human_name = re.findall(r'<div class="fullname">([^<]+)\s*</div>',
                                res['content'], re.MULTILINE)

        if human_name:
            e = SpiderFootEvent("RAW_RIR_DATA",
                                "Possible full name: " + human_name[0],
                                self.__name__, event)
            self.notifyListeners(e)

        # Retrieve location
        location = re.findall(r'<div class="location">([^<]+)</div>',
                              res['content'])

        if location:
            if len(location[0]) < 3 or len(location[0]) > 100:
                self.sf.debug("Skipping likely invalid location.")
            else:
                e = SpiderFootEvent("GEOINFO", location[0], self.__name__,
                                    event)
                self.notifyListeners(e)
Exemplo n.º 7
0
 def __findTagAttributes(tag):
     att_double = re.findall('<\w*[ ]| *(.*?)[ ]*=[ ]*"(.*?)"[ +|>]', tag)
     att_single = re.findall('<\w*[ ]| *(.*?)[ ]*=[ ]*\'(.*?)\'[ +|>]', tag)
     att_none = re.findall('<\w*[ ]| *(.*?)[ ]*=[ ]*["|\']?(.*?)["|\']?[ +|>]', tag)
     att_none.extend(att_single)
     att_none.extend(att_double)
     return att_none
Exemplo n.º 8
0
    def portScanUDP(self, ip):
        res = self.sf.fetchUrl(
            "https://hackertarget.com/udp-port-scan/",
            timeout=self.opts['_fetchtimeout'],
            useragent=self.opts['_useragent'],
            postData="theinput=" + ip +
            "&thetest=udpscan&name_of_nonce_field=&_wp_http_referer=%2Fudp-port-scan%2F"
        )

        if res['content'] is None:
            return None

        html_data = re.findall(r'<pre id="formResponse">(.*?)</pre>',
                               res['content'], re.MULTILINE | re.DOTALL)

        if not html_data:
            self.sf.debug("Found no open UDP ports on " + ip)
            return None

        open_ports = re.findall(r'(\d+)/udp\s+open\s+', html_data[0])

        if not open_ports:
            self.sf.debug("Found no open UDP ports on " + ip)
            return None

        self.sf.debug("Found " + str(len(open_ports)) + " open UDP ports on " +
                      ip)

        return open_ports
Exemplo n.º 9
0
    def do_strings(self):
        strings_path = None
        if self.voptions.basic.dostrings:
            try:
                data = open(self.memfile, "rb").read()
            except (IOError, OSError) as e:
                raise CuckooProcessingError("Error opening file %s" % e)

            nulltermonly = self.voptions.basic.get(
                "strings_nullterminated_only", True)
            minchars = self.voptions.basic.get("strings_minchars", 5)

            if nulltermonly:
                apat = "([\x20-\x7e]{" + str(minchars) + ",})\x00"
                upat = "((?:[\x20-\x7e][\x00]){" + str(
                    minchars) + ",})\x00\x00"
            else:
                apat = "[\x20-\x7e]{" + str(minchars) + ",}"
                upat = "(?:[\x20-\x7e][\x00]){" + str(minchars) + ",}"

            strings = re.findall(apat, data)
            for ws in re.findall(upat, data):
                strings.append(str(ws.decode("utf-16le")))
            data = None
            f = open(self.memfile + ".strings", "w")
            f.write("\n".join(strings))
            f.close()
Exemplo n.º 10
0
    def do_strings(self):
        strings_path = None
        if self.voptions.basic.dostrings:
            try:
                data = open(self.memfile, "rb").read()
            except (IOError, OSError) as e:
                raise CuckooProcessingError("Error opening file %s" % e)

            nulltermonly = self.voptions.basic.get("strings_nullterminated_only", True)
            minchars = self.voptions.basic.get("strings_minchars", 5)

            if nulltermonly:
                apat = "([\x20-\x7e]{" + str(minchars) + ",})\x00"
                upat = "((?:[\x20-\x7e][\x00]){" + str(minchars) + ",})\x00\x00"
            else:
                apat = "[\x20-\x7e]{" + str(minchars) + ",}"
                upat = "(?:[\x20-\x7e][\x00]){" + str(minchars) + ",}"

            strings = re.findall(apat, data)
            for ws in re.findall(upat, data):
                strings.append(str(ws.decode("utf-16le")))
            data = None
            f=open(self.memfile + ".strings", "w")
            f.write("\n".join(strings))
            f.close()
Exemplo n.º 11
0
    def _unpack(self, buf):
        """Extract into a list irc messages of a tcp streams.
        @buf: tcp stream data
        """
        try:
            f = BytesIO(buf)
            lines = f.readlines()
        except Exception:
            log.error("Failed reading tcp stream buffer")
            return False

        logirc = False
        for element in lines:
            if not re.match(b"^:", element) is None:
                command = "([a-zA-Z]+|[0-9]{3})"
                params = "(\x20.+)"
                irc_server_msg = re.findall("(^:[\w+.{}!@|()]+\x20)" + command + params, element)
                if irc_server_msg:
                    self._sc["prefix"] = convert_to_printable(irc_server_msg[0][0].strip())
                    self._sc["command"] = convert_to_printable(irc_server_msg[0][1].strip())
                    self._sc["params"] = convert_to_printable(irc_server_msg[0][2].strip())
                    self._sc["type"] = "server"
                    if logirc:
                        self._messages.append(dict(self._sc))
            else:
                irc_client_msg = re.findall(b"([a-zA-Z]+\x20)(.+[\x0a\0x0d])", element)
                if irc_client_msg and irc_client_msg[0][0].strip() in self.__methods_client:
                    self._cc["command"] = convert_to_printable(irc_client_msg[0][0].strip())
                    if self._cc["command"] in ["NICK", "USER"]:
                        logirc = True
                    self._cc["params"] = convert_to_printable(irc_client_msg[0][1].strip())
                    self._cc["type"] = "client"
                    if logirc:
                        self._messages.append(dict(self._cc))
Exemplo n.º 12
0
 def test_re_findall(self):
     self.assertEqual(re.findall(":+", "abc"), [])
     self.assertEqual(re.findall(":+", "a:b::c:::d"), [":", "::", ":::"])
     self.assertEqual(re.findall("(:+)", "a:b::c:::d"), [":", "::", ":::"])
     self.assertEqual(re.findall("(:)(:*)", "a:b::c:::d"), [(":", ""),
                                                            (":", ":"),
                                                            (":", "::")])
Exemplo n.º 13
0
    def handleEvent(self, event):
        # We are only interested in the raw data from the spidering module
        # because the spidering module will always provide events with the
        # event.sourceEvent.data set to the URL of the source.
        if "sfp_spider" not in event.module:
            self.sf.debug("Ignoring web content from " + event.module)
            return None

        eventName = event.eventType
        srcModuleName = event.module
        eventData = event.data
        eventSource = event.actualSource

        self.sf.debug("Received event, " + eventName + ", from " + srcModuleName)

        # We aren't interested in describing pages that are not hosted on
        # our base domain.
        if not self.getTarget().matches(self.sf.urlFQDN(eventSource)):
            self.sf.debug("Not gathering page info for external site " + eventSource)
            return None

        if eventSource not in self.results:
            self.results[eventSource] = list()
        else:
            self.sf.debug("Already checked this page for a page type, skipping.")
            return None

        # Check the configured regexps to determine the page type
        for regexpGrp in regexps:
            if regexpGrp in self.results[eventSource]:
                continue

            for regex in regexps[regexpGrp]:
                rx = re.compile(regex, re.IGNORECASE)
                matches = re.findall(rx, eventData)
                if len(matches) > 0 and regexpGrp not in self.results[eventSource]:
                    self.sf.info("Matched " + regexpGrp + " in content from " + eventSource)
                    self.results[eventSource] = self.results[eventSource] + [regexpGrp]
                    evt = SpiderFootEvent(regexpGrp, eventSource, self.__name__, event)
                    self.notifyListeners(evt)

        # If no regexps were matched, consider this a static page
        if len(self.results[eventSource]) == 0:
            self.sf.info("Treating " + eventSource + " as URL_STATIC")
            evt = SpiderFootEvent("URL_STATIC", eventSource, self.__name__, event)
            self.notifyListeners(evt)

        # Check for externally referenced Javascript pages
        pat = re.compile("<script.*src=[\'\"]?([^\'\">]*)", re.IGNORECASE)
        matches = re.findall(pat, eventData)
        if len(matches) > 0:
            for match in matches:
                if '://' in match and not self.getTarget().matches(self.sf.urlFQDN(match)):
                    self.sf.debug("Externally hosted Javascript found at: " + match)
                    evt = SpiderFootEvent("PROVIDER_JAVASCRIPT", match,
                                          self.__name__, event)
                    self.notifyListeners(evt)

        return None
Exemplo n.º 14
0
 def __findTagAttributes(tag):
     att_double = re.findall('<\w*[ ]| *(.*?)[ ]*=[ ]*"(.*?)"[ +|>]', tag)
     att_single = re.findall('<\w*[ ]| *(.*?)[ ]*=[ ]*\'(.*?)\'[ +|>]', tag)
     att_none = re.findall(
         '<\w*[ ]| *(.*?)[ ]*=[ ]*["|\']?(.*?)["|\']?[ +|>]', tag)
     att_none.extend(att_single)
     att_none.extend(att_double)
     return att_none
Exemplo n.º 15
0
 def handle_data(self, data):
     if self.inscript:
         allowed_ext = [".php", ".asp", ".xml", ".js", ".json", ".jsp"]
         self.liens.extend(lamejs.lamejs(data).getLinks())
         candidates = re.findall(r'"([A-Za-z0-9_=#&%\.\+\?/-]*)"', data)
         candidates += re.findall(r"'([A-Za-z0-9_=#&%\.\+\?/-]*)'", data)
         for jstr in candidates:
             if jstr not in self.common_js_strings:
                 for ext in allowed_ext:
                     if ext in jstr:
                         self.liens.append(jstr)
Exemplo n.º 16
0
    def do_strings(self):
        strings_path = None
        if self.voptions.basic.dostrings:
            try:
                data = open(self.memfile, "r").read()
            except (IOError, OSError) as e:
                raise CuckooProcessingError("Error opening file %s" % e)

            nulltermonly = self.voptions.basic.get(
                "strings_nullterminated_only", True)
            minchars = self.voptions.basic.get("strings_minchars", 5)

            if nulltermonly:
                apat = "([\x20-\x7e]{" + str(minchars) + ",})\x00"
                strings = re.findall(apat, data)
                upat = "((?:[\x20-\x7e][\x00]){" + str(
                    minchars) + ",})\x00\x00"
                strings += [
                    str(ws.decode("utf-16le"))
                    for ws in re.findall(upat, data)
                ]
                data = None
                f = open(dmp_path + ".strings", "w")
                f.write("\n".join(strings))
                f.close()
                strings_path = self.memfile + ".strings"
            else:
                apat = "([\x20-\x7e]{" + str(minchars) + ",})\x00"
                strings = re.findall(apat, data)
                upat = "(?:[\x20-\x7e][\x00]){" + str(minchars) + ",}"
                strings += [
                    str(ws.decode("utf-16le"))
                    for ws in re.findall(upat, data)
                ]
                data = None
                f = open(self.memfile + ".strings", "w")
                f.write("\n".join(strings))
                f.close()
                strings_path = self.memfile + ".strings"

            if self.voptions.basic.zipstrings:
                try:
                    f = zipfile.ZipFile("%s.zip" % (strings_path),
                                        "w",
                                        allowZip64=True)
                    f.write(strings_path, os.path.basename(strings_path),
                            zipfile.ZIP_DEFLATED)
                    f.close()
                    os.remove(strings_path)
                    strings_path = "%s.zip" % (strings_path)
                except Exception as e:
                    raise CuckooProcessingError(
                        "Error creating Process Memory Strings Zip File %s" %
                        e)
Exemplo n.º 17
0
 def handle_data(self, data):
     if self.inscript:
         allowed_ext = [".php", ".asp", ".xml", ".js", ".json", ".jsp"]
         self.liens.extend(lamejs.lamejs(data).getLinks())
         candidates = re.findall(r'"([A-Za-z0-9_=#&%\.\+\?/-]*)"', data)
         candidates += re.findall(r"'([A-Za-z0-9_=#&%\.\+\?/-]*)'", data)
         for jstr in candidates:
             if jstr not in self.common_js_strings:
                 for ext in allowed_ext:
                     if ext in jstr:
                         self.liens.append(jstr)
Exemplo n.º 18
0
def extract_urls(msg, html=False):
    if html:
        msg = msg.replace("=3D", '=')
        for x in REPLACE:
            msg = msg.replace(x, '')

        urls = re.findall(RE_URL_HTML, msg)
    else:
        urls = re.findall(RE_URL_PLAIN, msg)
    pprint(urls)
    links = set()
    for u in urls:
        u = str(u.decode()).rstrip("/")
        links.add(u)
    return links
def get_schedule_line_groups(classified_event):
    text = classified_event.processed_text.get_tokenized_text()

    # (?!20[01][05])
    time = r'\b[012]?\d[:.,h]?(?:[0-5][05])?(?:am|pm)?\b'
    time_with_minutes = r'\b[012]?\d[:.,h]?(?:[0-5][05])(?:am|pm)?\b'
    time_to_time = r'%s ?(?:to|do|до|til|till|alle|a|-|–|[^\w,.]) ?%s' % (time, time)

    # We try to grab all lines in schedule up until schedule ends,
    # so we need a "non-schedule line at the end", aka ['']
    lines = text.split('\n') + ['']
    idx = 0
    schedule_lines = []
    while idx < len(lines):
        first_idx = idx
        while idx < len(lines):
            line = lines[idx]
            # if it has
            # grab time one and time two, store diff
            # store delimiters
            # maybe store description as well?
            # compare delimiters, times, time diffs, styles, etc
            times = re.findall(time_to_time, line)
            if not times or len(line) > 80:
                if idx - first_idx >= 1:
                    schedule_lines.append(lines[first_idx:idx])
                break
            idx += 1
        first_idx = idx
        while idx < len(lines):
            line = lines[idx]
            times = re.findall(time, line)
            # TODO(lambert): Somehow track "1)" that might show up here? :(
            times = [x for x in times if x not in ['1.', '2.']]
            if not times or len(line) > 80:
                if idx - first_idx >= 3:
                    schedule_lines.append(lines[first_idx:idx])
                break
            idx += 1
        idx += 1

    schedule_groups = []
    for sub_lines in schedule_lines:
        if not [x for x in sub_lines if re.search(time_with_minutes, x)]:
            continue
        schedule_groups.append(sub_lines)

    return schedule_groups
Exemplo n.º 20
0
    def handleEvent(self, event):
        eventName = event.eventType
        srcModuleName = event.module
        eventData = event.data

        self.sf.debug("Received event, " + eventName + ", from " +
                      srcModuleName)

        pat = re.compile("([A-Za-z0-9+\/]+\=\=|[A-Za-z0-9+\/]+\=)")
        m = re.findall(pat, eventData)
        for match in m:
            if self.checkForStop():
                return None

            minlen = int(self.opts['minlength'])
            if len(match) >= minlen:
                caps = sum(1 for c in match if c.isupper())
                # Base64-encoded strings don't look like normal strings
                if caps < (minlen / 4):
                    return None
                self.sf.info("Found Base64 string: " + match)
                if type(match) == str:
                    string = unicode(match, 'utf-8', errors='replace')
                else:
                    string = match

                try:
                    string += " (" + base64.b64decode(match) + ")"
                    evt = SpiderFootEvent("BASE64_DATA", string, self.__name__,
                                          event)
                    self.notifyListeners(evt)
                except BaseException as e:
                    self.sf.debug("Unable to base64-decode a string.")

        return None
def find_competitor_list(search_text):
    processed_text = grammar_matcher.StringProcessor(search_text)
    results_match = re.search(r'\n0*1[^\d].+\n^0*2[^\d].+\n(?:^\d+.+\n){2,}', processed_text.text, re.MULTILINE)
    if results_match:
        numbered_list = results_match.group(0)
        num_lines = numbered_list.count('\n')
        if len(re.findall(r'\d ?[.:h] ?\d\d|\bam\b|\bpm\b', numbered_list)) > num_lines / 4:
            return None  # good list of times! workshops, etc! performance/shows/club-set times!
        processed_numbered_list = grammar_matcher.StringProcessor(numbered_list, processed_text.match_on_word_boundaries)
        event_keywords = processed_numbered_list.get_tokens(rules.EVENT)
        if len(event_keywords) > num_lines / 8:
            return None
        if processed_text.has_token(keywords.WRONG_NUMBERED_LIST):
            return None
        if num_lines > 10:
            return numbered_list
        else:
            lines = numbered_list.split('\n')
            qualified_lines = len([x for x in lines if re.search(r'[^\d\W].*[-(]', x)])
            if qualified_lines > num_lines / 2:
                return numbered_list
            for type in ['crew', 'pop|boog', 'lock', 'b\W?(?:boy|girl)']:
                qualified_lines = len([x for x in lines if re.search(type, x)])
                if qualified_lines > num_lines / 8:
                    return numbered_list
            if processed_text.match_on_word_boundaries == regex_keywords.WORD_BOUNDARIES:  # maybe separate on kana vs kanji?
                avg_words = 1.0 * sum([len([y for y in x.split(' ')]) for x in lines]) / num_lines
                if avg_words < 3:
                    return numbered_list
    return None
Exemplo n.º 22
0
 def on_call(self, call, process):
     if process["process_name"].lower() not in self.whitelistprocs:
         buff = call["arguments"]["buffer"].lower()
         if len(buff) >= 128 and (call["arguments"]["filepath"].endswith(".txt") or call["arguments"]["filepath"].endswith(".htm") or call["arguments"]["filepath"].endswith(".html")):
             patterns = "|".join(indicators)
             if len(re.findall(patterns, buff)) > 1:
                 self.mark_call()
    def handleEvent(self, event):
        eventName = event.eventType
        srcModuleName = event.module
        eventData = event.data

        # We only want web content from the target
        if srcModuleName != "sfp_spider":
            return None

        eventSource = event.sourceEvent.data
        self.sf.debug("Received event, " + eventName + ", from " + srcModuleName)

        if eventSource not in self.results.keys():
            self.results[eventSource] = list()

        # We only want web content for pages on the target site
        if not self.getTarget().matches(self.sf.urlFQDN(eventSource)):
            self.sf.debug("Not collecting web content information for external sites.")
            return None

        for regexpGrp in regexps.keys():
            if regexpGrp in self.results[eventSource]:
                continue

            for regex in regexps[regexpGrp]:
                pat = re.compile(regex, re.IGNORECASE)
                matches = re.findall(pat, eventData)
                if len(matches) > 0 and regexpGrp not in self.results[eventSource]:
                    self.sf.info("Matched " + regexpGrp + " in content from " + eventSource)
                    self.results[eventSource].append(regexpGrp)
                    evt = SpiderFootEvent("ERROR_MESSAGE", regexpGrp,
                                          self.__name__, event.sourceEvent)
                    self.notifyListeners(evt)

        return None
 def on_call(self, call, process):
     if process["process_name"].lower() not in self.whitelistprocs:
         buff = call["arguments"]["buffer"].lower()
         if len(buff) >= 128:
             patterns = "|".join(self.indicators)
             if len(re.findall(patterns, buff)) > 1:
                 self.mark_call()
Exemplo n.º 25
0
def get_vt_consensus(namelist: list):

    finaltoks = defaultdict(int)
    for name in namelist:
        toks = re.findall(r"[A-Za-z0-9]+", name)
        for tok in toks:
            finaltoks[tok.title()] += 1
    for tok in list(finaltoks):
        lowertok = tok.lower()
        accepted = True
        numlist = [x for x in tok if x.isdigit()]
        if len(numlist) > 2 or len(tok) < 4:
            accepted = False
        if accepted:
            for black in banlist:
                if black == lowertok:
                    accepted = False
                    break
        if not accepted:
            del finaltoks[tok]

    sorted_finaltoks = sorted(list(finaltoks.items()), key=operator.itemgetter(1), reverse=True)
    if len(sorted_finaltoks) == 1 and sorted_finaltoks[0][1] >= 2:
        return sorted_finaltoks[0][0]
    elif len(sorted_finaltoks) > 1 and (sorted_finaltoks[0][1] >= sorted_finaltoks[1][1] * 2 or sorted_finaltoks[0][1] > 8):
        return sorted_finaltoks[0][0]
    elif len(sorted_finaltoks) > 1 and sorted_finaltoks[0][1] == sorted_finaltoks[1][1] and sorted_finaltoks[0][1] > 2:
        return sorted_finaltoks[0][0]
    return ""
Exemplo n.º 26
0
    def query(self, qry):
        url = "https://en.wikipedia.org/w/api.php?action=feedcontributions&user="******"0":
            dt = datetime.datetime.now() - datetime.timedelta(
                days=int(self.opts['days_limit']))
            y = dt.strftime("%Y")
            m = dt.strftime("%m")
            url += "&year=" + y + "&month=" + m
        res = self.sf.fetchUrl(url,
                               timeout=self.opts['_fetchtimeout'],
                               useragent="SpiderFoot")
        if res['code'] in ["404", "403", "500"]:
            return None

        links = list()
        try:
            parser = HTMLParser()
            for line in res['content'].split("\n"):
                matches = re.findall("<link>(.*?)</link>", line, re.IGNORECASE)
                for m in matches:
                    if "Special:Contributions" in m:
                        continue
                    d = parser.unescape(m)
                    links.append(d)
            return links
        except Exception as e:
            self.sf.error(
                "Error processing response from Wikipedia: " + str(e), False)
            return None
Exemplo n.º 27
0
def get_suricata_family(signature):
    """
    Args:
        signature: suricata alert string
    Return
        family: family name or False
    """
    # ToDo Trojan-Proxy
    family = False
    words = re.findall(r"[A-Za-z0-9/\-]+", signature)
    famcheck = words[2]
    if "/" in famcheck:
        famcheck_list = famcheck.split("/")  # [-1]
        for fam_name in famcheck_list:
            if not any(
                [block in fam_name.lower() for block in suricata_blocklist]):
                famcheck = fam_name
                break
    famchecklower = famcheck.lower()
    if famchecklower.startswith("win.") and famchecklower.count(".") == 1:
        famchecklower = famchecklower.split(".")[-1]
        famcheck = famcheck.split(".")[-1]
    if famchecklower in ("win32", "w32", "ransomware"):
        famcheck = words[3]
        famchecklower = famcheck.lower()
    if famchecklower == "ptsecurity":
        famcheck = words[3]
        famchecklower = famcheck.lower()
    isbad = any([block in famchecklower for block in suricata_blocklist])
    if not isbad and len(famcheck) >= 4:
        family = famcheck.title()
    isgood = any([allow in famchecklower for allow in suricata_passlist])
    if isgood and len(famcheck) >= 4:
        family = famcheck.title()
    return family
Exemplo n.º 28
0
    def on_complete(self):
        matches = [
            r'(https?:\/\/)?([\da-z\.-]+)\.([0-9a-z\.]{2,6})(:\d{1,5})?([\/\w\.-]*)\/?',
        ]
        dedup = list()
        extracted_config = False
        for potential_ioc in self.iocs:
            for entry in matches:
                all_matches = re.findall(entry, potential_ioc)
                if all_matches:
                    extracted_config = True
                    for buf in all_matches:
                        ioc = ""
                        idx = 0
                        for tmp in buf:
                            idx += 1
                            if tmp == '':
                                pass
                            # Account for match groups and the second
                            # (or third depending on match) period as a
                            # delimiter. We need to add it in manually.
                            if idx == 2:
                                ioc += tmp + "."
                            else:
                                ioc += tmp
                        if ioc not in dedup:
                            dedup.append(ioc)
        if dedup:
            for ioc in dedup:
                self.data.append({"ioc": ioc})

        return extracted_config
Exemplo n.º 29
0
def get_clamav_consensus(namelist: list):
    for detection in namelist:
        if detection.startswith("Win.Trojan."):
            words = re.findall(r"[A-Za-z0-9]+", detection)
            family = words[2]
            if family:
                return family
Exemplo n.º 30
0
    def getLatestIndexes(self):
        url = "https://commoncrawl.s3.amazonaws.com/cc-index/collections/index.html"
        res = self.sf.fetchUrl(url , timeout=60, 
                               useragent="SpiderFoot")

        if res['code'] in [ "400", "401", "402", "403", "404" ]:
            self.sf.error("CommonCrawl index collection doesn't seem to be available.", False)
            self.errorState = True
            return list()

        if not res['content']:
            self.sf.error("CommonCrawl index collection doesn't seem to be available.", False)
            self.errorState = True
            return list()

        indexes = re.findall(".*(CC-MAIN-\d+-\d+).*", res['content'])
        highest = 0
        indexlist = dict()
        for m in indexes:
            ms = m.replace("CC-MAIN-", "").replace("-", "")
            indexlist[ms] = True

        topindexes = sorted(indexlist.keys(), reverse=True)[0:self.opts['indexes']]

        if len(topindexes) < self.opts['indexes']:
            self.sf.error("Not able to find latest CommonCrawl indexes.", False)
            self.errorState = True
            return list()

        retindex = list()
        for i in topindexes:
            retindex.append("CC-MAIN-" + str(i)[0:4] + "-" + str(i)[4:6])
        self.sf.debug("CommonCrawl indexes: " + str(retindex))
        return retindex
Exemplo n.º 31
0
    def run(self) -> List[str]:
        ret = []
        with open(self.filepath, "r") as f:
            source = f.read()

        # Get rid of superfluous comments.
        source = re.sub("/\\*.*?\\*/", "", source, flags=re.S)

        for script in re.findall(self.script_re, source, re.I | re.S):
            try:
                x = bs4.BeautifulSoup(script, "html.parser")
                language = x.script.attrs.get("language", "").lower()
            except Exception:
                language = None

            # We can't rely on bs4 or any other HTML/XML parser to provide us
            # with the raw content of the xml tag as they decode html entities
            # and all that, leaving us with a corrupted string.
            source = re.match("<.*>(.*)</.*>$", script, re.S).group(0)

            # Decode JScript.Encode encoding.
            if language in {"jscript.encode", "vbscript.encode"}:
                source = EncodedScriptFile(self.filepath).decode(source.encode())

            if len(source) > 65536:
                source = f"{source[:65536]}\r\n<truncated>"

            ret.append(source)

        return ret
Exemplo n.º 32
0
 def search(self, regex, flags=0, all=False):
     if all:
         result = dict()
         result["detail"] = []
         matches = []
         for map in self.address_space:
             for chunk in map["chunks"]:
                 self.dumpfile.seek(chunk["offset"])
                 match = re.findall(
                     regex,
                     self.dumpfile.read(chunk["end"] - chunk["start"]),
                     flags)
                 if match:
                     matches.extend(match)
                     result["detail"].append({
                         "match": match,
                         "chunk": chunk
                     })
         result["matches"] = matches
         return result
     else:
         for map in self.address_space:
             for chunk in map["chunks"]:
                 self.dumpfile.seek(chunk["offset"])
                 match = re.search(
                     regex,
                     self.dumpfile.read(chunk["end"] - chunk["start"]),
                     flags)
                 if match:
                     result = dict()
                     result["match"] = match
                     result["chunk"] = chunk
                     return result
Exemplo n.º 33
0
def get_suricata_family(signature):
    """
    Args:
        signature: suricata alert string
    Return
        family: family name or False
    """

    family = False
    #alert["signature"].startswith(("ET JA3 HASH")):
    words = re.findall(r"[A-Za-z0-9/\-]+", signature)
    famcheck = words[2]
    if "/" in famcheck:
        famcheck = famcheck.split("/")[-1]
    famchecklower = famcheck.lower()
    #ET MALWARE Sharik/Smoke CnC Beacon 11
    #ETPRO TROJAN MSIL/Revenge-RAT CnC Checkin
    #ETPRO TROJAN Win32/Predator The Thief Initial CnC Checkin
    if famchecklower in ("win32", "w32", "ransomware"):
        famcheck = words[3]
        famchecklower = famcheck.lower()
    isbad = any(True for black in suricata_blacklist if black in famchecklower)
    if not isbad and len(famcheck) >= 4:
        family = famcheck.title()

    return family
Exemplo n.º 34
0
    def handleEvent(self, event):
        eventName = event.eventType
        srcModuleName = event.module
        eventData = event.data
        sourceData = self.sf.hashstring(eventData)

        if sourceData in self.results:
            return None
        else:
            self.results.append(sourceData)

        self.sf.debug("Received event, " + eventName + ", from " +
                      srcModuleName)

        # thanks to https://stackoverflow.com/questions/21683680/regex-to-match-bitcoin-addresses
        matches = re.findall("[\s:=\>]([13][a-km-zA-HJ-NP-Z1-9]{25,34})",
                             eventData)
        for m in matches:
            self.sf.debug("Bitcoin potential match: " + m)
            if self.check_bc(m):
                evt = SpiderFootEvent("BITCOIN_ADDRESS", m, self.__name__,
                                      event)
                self.notifyListeners(evt)

        return None
Exemplo n.º 35
0
    def on_complete(self):
        ret = False
        networkret = False
        campaign = set()
        mutexs = [
            "^(Global|Local)\\\\pen3j3832h$",
            "^(Global|Local)\\\\u1nyj3rt20",
        ]
        for mutex in mutexs:
            if self.check_mutex(pattern=mutex, regex=True):
                self.syncapis = True
                break

        # Check if there are any winners
        if self.cryptoapis or self.syncapis or networkret:
            ret = True
            if (self.cryptoapis or self.syncapis) and networkret:
                self.confidence = 100
                self.description = "Exhibits behaviorial and network characteristics of Upatre+Dyre/Mini-Dyre malware"
                #for camp in campaign:
                #    self.data.append({"Campaign": camp})

            elif networkret:
                self.description = "Exhibits network behavior characteristic of Upatre+Dyre/Mini-Dyre malware"
                #for camp in campaign:
                #    self.data.append({"Campaign": camp})

            if self.extract_c2s:
                dump_pid = 0
                for proc in self.results["behavior"]["processtree"]:
                    for child in proc["children"]:
                        # Look for lowest PID svchost.exe
                        if not dump_pid or child["pid"] < dump_pid:
                            if child["name"] == "svchost.exe":
                                dump_pid = child["pid"]
                if dump_pid:
                    dump_path = ""
                    if len(self.results["procmemory"]):
                        for memdump in self.results["procmemory"]:
                            if dump_pid == memdump["pid"]:
                                dump_path = memdump["file"]
                    if dump_path:
                        whitelist = [
                            "1.2.3.4",
                            "0.0.0.0",
                        ]
                        with open(dump_path, "rb") as dump_file:
                            dump_data = dump_file.read()
                        ippat = "\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}:\d{2,5}"
                        ips = re.findall(ippat, dump_data)
                        for ip in set(ips):
                            addit = True
                            for item in whitelist:
                                if ip.startswith(item):
                                    addit = False
                            #if addit:
                            #    self.data.append({"C2": ip})

        return ret
Exemplo n.º 36
0
    def lookupItem(self, target, content):
        grps = re.findall("<title><\!\[CDATA\[(.[^\]]*)\]\]></title>\s+<link><\!\[CDATA\[(.[^\]]*)\]\]></link>", content)
        for m in grps:
            if target in m[0]:
                self.sf.info("Found zoneh site: " + m[0])
                return m[0] + "\n<SFURL>" + m[1] + "</SFURL>"

        return False
Exemplo n.º 37
0
def charReplace(inputString, MODFLAG):
    # OLD: [char]101
    # NEW: e
    for value in re.findall("\[[Cc][Hh][Aa][Rr]\][0-9]{1,3}", inputString):
        inputString = inputString.replace(value, '"%s"' % chr(int(value.split("]")[1])))
    if MODFLAG == 0:
        MODFLAG = 1
    return inputString, MODFLAG
Exemplo n.º 38
0
def parsenamedacts(pattern, intext):
    namedacts = re.findall(pattern, intext)
    namedacts = list(set(namedacts))
    outtext = intext
    for namedact in namedacts:
       #outtext =  outtext.replace(namedact+r'@/', encode_act(namedact)+r'@/')
       outtext =  outtext.replace(r'ref-namedact-'+namedact,r'ref-namedact-'+encode_act(namedact))
    return outtext
Exemplo n.º 39
0
    def on_call(self, call, process):
        if self.checkEvent and self.lastapi == "CryptHashData":
            if call["api"] == "NtOpenEvent":
                event = self.get_argument(call, "EventName")
                event = event.split("\\")
                if len(event) == 2:
                    if event[1] in self.hashes and event[0] in ["Global", "Local"]:
                        self.found = True

        if call["api"] == "GetVolumeNameForVolumeMountPointW":
            if call["status"]:
                name = self.get_argument(call, "VolumeName")
                if name and len(name) > 10:
                    name = name[10:-1]
                    if name not in self.volumes:
                        self.volumes.add(name)
                        md5 = hashlib.md5(name).hexdigest()[:16].upper()
                        self.hashes.add(md5)

        elif call["api"] == "CryptHashData":
            if self.hashes:
                buf = self.get_argument(call, "Buffer")
                if buf and all(word in buf for word in self.keywords):
                    # Try/Except handles when this behavior changes in the future
                    try:
                        args = parse_qs(urlparse("/?" + buf).query,
                                        keep_blank_values=True)
                    except:
                        self.sigchanged = True
                        self.severity = 1
                        self.description = "Potential Locky ransomware behavioral characteristics observed. (See Note)"
                        self.data.append({"Note": "Unexpected behavior observed for Locky. Please " \
                                                  "report this sample to https://github.com/spende" \
                                                  "rsandbox/community-modified/issues"})

                    if args and "id" in args.keys():
                        if args["id"][0] in self.hashes:
                            self.found = process["process_id"]
                        if "affid" in args:
                            tmp = {"Affid": args["affid"][0]}
                            if tmp not in self.data:
                                self.data.append(tmp)

                elif buf in self.volumes and self.lastapi == "GetVolumeNameForVolumeMountPointW":
                    checkEvent = True

                else:
                    check = re.findall(r"\s((?:https?://)?\w+(?:\.onion|\.tor2web)[/.](?:\w+\/)?)",
                                       buf, re.I)
                    if check:
                        for payment in check:
                            self.payment.add(payment)

        elif call["api"] == "InternetCrackUrlA":
            if self.found and process["process_id"] == self.found:
                url = self.get_argument(call, "Url")
                if url and url.endswith(".php"):
                    self.c2s.add(url)
Exemplo n.º 40
0
    def on_complete(self):
        for screenshot in self.get_results("screenshots", []):
            if "ocr" in screenshot:
                ocr = screenshot["ocr"].lower()
                patterns = "|".join(indicators)
                if len(re.findall(patterns, ocr)) > 1:
                    self.mark_ioc("message", ocr)

        return self.has_marks()
Exemplo n.º 41
0
 def on_call(self, call, process):
     if call["api"] == "NtWriteFile":
         filescore = 0
         buff = self.get_raw_argument(call, "Buffer").lower()
         filepath = self.get_raw_argument(call, "HandleName")
         patterns = "|".join(self.indicators)
         if (filepath.lower() == "\\??\\physicaldrive0" or filepath.lower().startswith("\\device\\harddisk")) and len(buff) >= 128:
             if len(re.findall(patterns, buff)) > 1:   
                 if filepath not in self.ransomfile:
                     self.ransomfile.append(filepath)
Exemplo n.º 42
0
    def do_strings(self):
        strings_path = None
        if self.voptions.basic.dostrings:
            try:
                data = open(self.memfile, "r").read()
            except (IOError, OSError) as e:
                raise CuckooProcessingError("Error opening file %s" % e)

            nulltermonly = self.voptions.basic.get("strings_nullterminated_only", True)
            minchars = self.voptions.basic.get("strings_minchars", 5)

            if nulltermonly:
                apat = "([\x20-\x7e]{" + str(minchars) + ",})\x00"
                strings = re.findall(apat, data)
                upat = "((?:[\x20-\x7e][\x00]){" + str(minchars) + ",})\x00\x00"
                strings += [str(ws.decode("utf-16le")) for ws in re.findall(upat, data)]
                data = None
                f=open(dmp_path + ".strings", "w")
                f.write("\n".join(strings))
                f.close()
                strings_path = self.memfile + ".strings"
            else:
                apat = "([\x20-\x7e]{" + str(minchars) + ",})\x00"
                strings = re.findall(apat, data)
                upat = "(?:[\x20-\x7e][\x00]){" + str(minchars) + ",}"
                strings += [str(ws.decode("utf-16le")) for ws in re.findall(upat, data)]
                data = None
                f=open(self.memfile + ".strings", "w")
                f.write("\n".join(strings))
                f.close()
                strings_path = self.memfile + ".strings"

            if self.voptions.basic.zipstrings:
                try:
                    f = zipfile.ZipFile("%s.zip" % (strings_path), "w",allowZip64=True)
                    f.write(strings_path, os.path.basename(strings_path), zipfile.ZIP_DEFLATED)
                    f.close()
                    os.remove(strings_path)
                    strings_path = "%s.zip" % (strings_path)
                except Exception as e:
                    raise CuckooProcessingError("Error creating Process Memory Strings Zip File %s" % e)
Exemplo n.º 43
0
def is_workshop(classified_event):
    trimmed_title = classified_event.processed_title.delete_with_rule(rules.WRONG_CLASS)
    if classified_event.processed_text.get_tokens(dance_keywords.ROMANCE):
        has_class_title = trimmed_title.get_tokens(rules.ROMANCE_EXTENDED_CLASS_ONLY)
    else:
        has_class_title = trimmed_title.get_tokens(dance_keywords.CLASS_ONLY)
    has_good_dance_class_title = trimmed_title.has_token(rules.GOOD_DANCE_CLASS)

    has_non_dance_event_title = classified_event.processed_title.has_token(keywords.BAD_COMPETITION_TITLE_ONLY)
    has_good_dance_title = trimmed_title.has_token(rules.GOOD_DANCE)
    has_extended_good_crew_title = trimmed_title.has_token(rules.MANUAL_DANCER[grammar.STRONG_WEAK])

    has_wrong_style_title = classified_event.processed_title.has_token(all_styles.DANCE_WRONG_STYLE_TITLE)

    final_title = classified_event.processed_title.get_tokenized_text()
    lee_lee_hiphop = 'lee lee' in final_title and re.findall('hip\W?hop', final_title)

    trimmed_text = classified_event.processed_text.delete_with_rule(rules.WRONG_CLASS)
    has_good_dance_class = trimmed_text.has_token(rules.GOOD_DANCE_CLASS)
    has_good_dance = classified_event.processed_text.has_token(rules.GOOD_DANCE)
    has_wrong_style = classified_event.processed_text.has_token(all_styles.DANCE_WRONG_STYLE_TITLE)

    has_good_crew = classified_event.processed_text.has_token(rules.MANUAL_DANCER[grammar.STRONG])

    # print has_class_title
    # print has_good_dance_title
    # print has_extended_good_crew_title
    # print has_wrong_style_title

    # print classified_event.processed_text.get_tokenized_text()
    # print ''
    # print has_class_title
    # print has_wrong_style
    # print has_good_dance
    # print has_good_crew
    if has_class_title and (has_good_dance_title or has_extended_good_crew_title) and not has_wrong_style_title:
        return (
            True, 'has class with strong class-title: %s %s' % (has_class_title, (has_good_dance_title or has_extended_good_crew_title))
        )
    elif classified_event.is_dance_event(
    ) and has_good_dance_title and has_extended_good_crew_title and not has_wrong_style_title and not has_non_dance_event_title:
        return (True, 'has class with strong style-title: %s %s' % (has_good_dance_title, has_extended_good_crew_title))
    elif classified_event.is_dance_event() and lee_lee_hiphop and not has_wrong_style_title and not has_non_dance_event_title:
        return (True, 'has class with strong style-title: %s %s' % (has_good_dance_title, has_extended_good_crew_title))
    elif has_class_title and not has_wrong_style and (has_good_dance or has_good_crew):
        return (True, 'has class title: %s, that contains strong description %s, %s' % (has_class_title, has_good_dance, has_good_crew))
    elif has_good_dance_class_title:
        return (True, 'has good dance class title: %s' % has_good_dance_class_title)
    elif has_good_dance_class and not has_wrong_style_title:
        return (True, 'has good dance class: %s' % has_good_dance_class)
    return (False, 'nothing')
Exemplo n.º 44
0
    def handleEvent(self, event):
        eventName = event.eventType
        srcModuleName = event.module
        eventData = event.data

        self.sf.debug("Received event, " + eventName + ", from " + srcModuleName)

        pat = re.compile("([\%a-zA-Z\.0-9_\-\+]+@[a-zA-Z\.0-9\-]+\.[a-zA-Z\.0-9\-]+)")
        matches = re.findall(pat, eventData)
        myres = list()
        for match in matches:
            evttype = "EMAILADDR"
            if len(match) < 4:
                self.sf.debug("Likely invalid address: " + match)
                continue

            # Handle messed up encodings
            if "%" in match:
                self.sf.debug("Skipped address: " + match)
                continue

            # Get the domain and strip potential ending .
            mailDom = match.lower().split('@')[1].strip('.')
            if not self.getTarget().matches(mailDom) and not self.getTarget().matches(match):
                self.sf.debug("External domain, so possible affiliate e-mail")
                evttype = "AFFILIATE_EMAILADDR"

            if eventName.startswith("AFFILIATE_"):
                evttype = "AFFILIATE_EMAILADDR"

            self.sf.info("Found e-mail address: " + match)
            if type(match) == str:
                mail = unicode(match.strip('.'), 'utf-8', errors='replace')
            else:
                mail = match.strip('.')

            if mail in myres:
                self.sf.debug("Already found from this source.")
                continue
            else:
                myres.append(mail)

            evt = SpiderFootEvent(evttype, mail, self.__name__, event)
            if event.moduleDataSource:
                evt.moduleDataSource = event.moduleDataSource
            else:
                evt.moduleDataSource = "Unknown"
            self.notifyListeners(evt)

        return None
Exemplo n.º 45
0
def get_tags(src, tags='page,title,revision,text'):
    # find namespace (eg: http://www.mediawiki.org/xml/export-0.3/)
    try:
        root = src.readline() + src.readline()
        ns = unicode(re.findall(r'xmlns="([^"]*)', root)[0])

        tag_prefix = u'{%s}' % (ns,)

        tag = {}
        for t in tags.split(','):
            tag[t] = tag_prefix + unicode(t)
    finally:
        src.seek(0)

    return tag
    def on_complete(self):
        matches = [r"(https?:\/\/)?([\da-z\.-]+)\.([0-9a-z\.]{2,6})(:\d{1,5})?([\/\w\.-]*)\/?"]
        whitelist = [
            "http://crl.microsoft.com",
            "http://www.microsoft.com",
            "asm.v1",
            "asm.v3",
            "verisign.com",
            "symantec.com",
            "thawte.com",
        ]
        dedup = list()
        extracted_data = False
        for potential_ioc in self.iocs:
            for entry in matches:
                all_matches = re.findall(entry, potential_ioc)
                if all_matches:
                    for buf in all_matches:
                        ioc = ""
                        idx = 0
                        for tmp in buf:
                            idx += 1
                            if tmp == "":
                                pass
                            # Account for match groups and the second
                            # (or third depending on match) period as a
                            # delimiter. We need to add it in manually.
                            if idx == 2:
                                ioc += tmp + "."
                            else:
                                ioc += tmp

                        addit = True
                        for item in whitelist:
                            if item in ioc:
                                addit = False
                        if addit and ioc not in dedup:
                            dedup.append(ioc)
        if dedup:
            extracted_data = True
            for ioc in dedup:
                self.data.append({"ioc": ioc})

        return extracted_data
Exemplo n.º 47
0
    def on_complete(self):
        if "dropped" in self.results:
            for dropped in self.results["dropped"]:
                mimetype = dropped["type"]
                if "ASCII text" in mimetype:
                    filename = dropped["name"]
                    data = dropped["data"]
                    patterns = "|".join(self.indicators)
                    if len(data) >= 128:
                        if len(re.findall(patterns, data)) > 1:
                            if filename not in self.ransomfile:
                                self.ransomfile.append(filename)

        if len(self.ransomfile) > 0:
            for filename in self.ransomfile:
                self.data.append({"ransom_file" : "%s" % (filename)})
            return True

        return False
Exemplo n.º 48
0
def get_namespaces(src):
    try:
        counter = 0
        namespaces = []

        while 1:
            line = src.readline()
            if not line:
                break
            keys = re.findall(
                r'<namespace key="(-?\d+)"[^>]*>([^<]*)</namespace>', line)
            for key, ns in keys:
                namespaces.append((key, ns))

            counter += 1
            if counter > 40:
                break
    finally:
        src.seek(0)

    return namespaces
Exemplo n.º 49
0
    def handleEvent(self, event):
        eventName = event.eventType
        srcModuleName = event.module
        eventData = event.data
        sourceData = self.sf.hashstring(eventData)

        if sourceData in self.results:
            return None
        else:
            self.results.append(sourceData)

        self.sf.debug("Received event, " + eventName + ", from " + srcModuleName)

        # thanks to https://stackoverflow.com/questions/21683680/regex-to-match-bitcoin-addresses
        matches = re.findall("[\s:=\>]([13][a-km-zA-HJ-NP-Z1-9]{25,34})", eventData)
        for m in matches:
            self.sf.debug("Bitcoin potential match: " + m)
            if self.check_bc(m):
                evt = SpiderFootEvent("BITCOIN_ADDRESS", m, self.__name__, event)
                self.notifyListeners(evt)

        return None
Exemplo n.º 50
0
 def search(self, regex, flags=0, all=False):
     if all:
         result = dict()
         result["detail"] = []
         matches = []
         for map in self.address_space:
             for chunk in map["chunks"]:
                 self.dumpfile.seek(chunk["offset"])
                 match = re.findall(regex, self.dumpfile.read(chunk["end"] - chunk["start"]), flags)
                 if match:
                     matches.extend(match)
                     result["detail"].append({"match": match, "chunk": chunk})
         result["matches"] = matches
         return result
     else:
         for map in self.address_space:
             for chunk in map["chunks"]:
                 self.dumpfile.seek(chunk["offset"])
                 match = re.search(regex, self.dumpfile.read(chunk["end"] - chunk["start"]), flags)
                 if match:
                     result = dict()
                     result["match"] = match
                     result["chunk"] = chunk
                     return result
Exemplo n.º 51
0
    def run(self):
        """Run analysis.
        @return: structured results.
        """
        self.key = "procmemory"
        results = []
        zipdump = self.options.get("zipdump", False)
        zipstrings = self.options.get("zipstrings", False)
        do_strings = self.options.get("strings", False)
        nulltermonly = self.options.get("nullterminated_only", True)
        minchars = self.options.get("minchars", 5)

        if os.path.exists(self.pmemory_path):
            for dmp in os.listdir(self.pmemory_path):
                # if we're re-processing this task, this means if zips are enabled, we won't do any reprocessing on the
                # process dumps (only matters for now for Yara)
                if not dmp.endswith(".dmp"):
                    continue

                dmp_path = os.path.join(self.pmemory_path, dmp)
                dmp_file = File(dmp_path)
                process_name = ""
                process_path = ""
                process_id = int(os.path.splitext(os.path.basename(dmp_path))[0])
                if "behavior" in self.results and "processes" in self.results["behavior"]:
                    for process in self.results["behavior"]["processes"]:
                        if process_id == process["process_id"]:
                            process_name = process["process_name"]
                            process_path = process["module_path"]
                proc = dict(
                    file=dmp_path,
                    pid=process_id,
                    name=process_name,
                    path=process_path,
                    yara=dmp_file.get_yara(os.path.join(CUCKOO_ROOT, "data", "yara", "index_memory.yar")),
                    address_space=self.parse_dump(dmp_path),
                    zipdump=zipdump,
                    zipstrings=zipstrings,
                )

                if do_strings:
                    try:
                        data = open(dmp_path, "r").read()
                    except (IOError, OSError) as e:
                        raise CuckooProcessingError("Error opening file %s" % e)

                    if nulltermonly:
                        apat = "([\x20-\x7e]{" + str(minchars) + ",})\x00"
                        strings = re.findall(apat, data)
                        upat = "((?:[\x20-\x7e][\x00]){" + str(minchars) + ",})\x00\x00"
                        strings += [str(ws.decode("utf-16le")) for ws in re.findall(upat, data)]
                        f = open(dmp_path + ".strings", "w")
                        f.write("\n".join(strings))
                        f.close()
                        proc["strings_path"] = dmp_path + ".strings"
                    else:
                        apat = "([\x20-\x7e]{" + str(minchars) + ",})\x00"
                        strings = re.findall(apat, data)
                        upat = "(?:[\x20-\x7e][\x00]){" + str(minchars) + ",}"
                        strings += [str(ws.decode("utf-16le")) for ws in re.findall(upat, data)]
                        f = open(dmp_path + ".strings", "w")
                        f.write("\n".join(strings))
                        f.close()
                        proc["strings_path"] = dmp_path + ".strings"
                    zipstrings = self.options.get("zipstrings", False)
                    if zipstrings:
                        try:
                            f = zipfile.ZipFile("%s.zip" % (proc["strings_path"]), "w")
                            f.write(proc["strings_path"], os.path.basename(proc["strings_path"]), zipfile.ZIP_DEFLATED)
                            f.close()
                            os.remove(proc["strings_path"])
                            proc["strings_path"] = "%s.zip" % (proc["strings_path"])
                        except:
                            raise CuckooProcessingError("Error creating Process Memory Strings Zip File %s" % e)

                # Deduplicate configs
                if proc["yara"]:
                    for match in proc["yara"]:
                        # Dyre
                        if match["name"] == "DyreCfgInjectsList":
                            output = list()
                            buf = ""
                            recline = False
                            for ystring in match["strings"]:
                                for line in ystring.splitlines():
                                    if line.startswith("<litem>"):
                                        buf = ""
                                        recline = True
                                    if recline:
                                        buf += line.strip() + "\n"
                                    if line.startswith("</litem>"):
                                        recline = False
                                        if buf not in output:
                                            output.append(buf)

                            match["strings"] = ["".join(output)]
                            match["meta"]["description"] += " (Observed %d unique inject elements)" % len(output)

                        elif match["name"] == "DyreCfgRedirectList":
                            output = list()
                            buf = ""
                            recline = False
                            for ystring in match["strings"]:
                                for line in ystring.splitlines():
                                    if line.startswith("<rpcgroup>"):
                                        buf = ""
                                        recline = True
                                    if recline:
                                        buf += line.strip() + "\n"
                                    if line.startswith("</rpcgroup>"):
                                        recline = False
                                        if buf not in output:
                                            output.append(buf)

                            match["strings"] = ["".join(output)]
                            match["meta"]["description"] += " (Observed %d unique redirect elements)" % len(output)

                        # DarkComet
                        elif match["name"] == "DarkCometConfig":
                            output = list()
                            recline = False
                            for ystring in match["strings"]:
                                for line in ystring.splitlines():
                                    if line.startswith("#BEGIN DARKCOMET"):
                                        buf = ""
                                        recline = True
                                    if recline:
                                        buf += line.strip() + "\n"
                                    if line.startswith("#EOF DARKCOMET"):
                                        recline = False
                                        if buf not in output:
                                            output.append(buf)

                            match["strings"] = ["".join(output)]

                if zipdump:
                    try:
                        f = zipfile.ZipFile("%s.zip" % (dmp_path), "w")
                        f.write(dmp_path, os.path.basename(dmp_path), zipfile.ZIP_DEFLATED)
                        f.close()
                        os.remove(dmp_path)
                        proc["file"] = "%s.zip" % (dmp_path)
                    except:
                        raise CuckooProcessingError("Error creating Process Memory Zip File %s" % e)

                results.append(proc)
        return results
Exemplo n.º 52
0
    def run(self):
        """Run evented signatures."""
        # This will contain all the matched signatures.
        matched = []

        stats = { } 

        complete_list = list_plugins(group="signatures")
        evented_list = [sig(self.results)
                        for sig in complete_list
                        if sig.enabled and sig.evented and
                        self._check_signature_version(sig) and (not sig.filter_analysistypes or self.results["target"]["category"] in sig.filter_analysistypes)]

        overlay = self._load_overlay()
        log.debug("Applying signature overlays for signatures: %s", ", ".join(overlay.keys()))
        for signature in complete_list + evented_list:
            self._apply_overlay(signature, overlay)

        if evented_list and "behavior" in self.results:
            log.debug("Running %u evented signatures", len(evented_list))
            for sig in evented_list:
                stats[sig.name] = timedelta()
                if sig == evented_list[-1]:
                    log.debug("\t `-- %s", sig.name)
                else:
                    log.debug("\t |-- %s", sig.name)

            # Iterate calls and tell interested signatures about them.
            for proc in self.results["behavior"]["processes"]:
                for call in proc["calls"]:
                    # Loop through active evented signatures.
                    for sig in evented_list:
                        # Skip current call if it doesn't match the filters (if any).
                        if sig.filter_processnames and not proc["process_name"] in sig.filter_processnames:
                            continue
                        if sig.filter_apinames and not call["api"] in sig.filter_apinames:
                            continue
                        if sig.filter_categories and not call["category"] in sig.filter_categories:
                            continue

                        result = None
                        try:
                            pretime = datetime.now()
                            result = sig.on_call(call, proc)
                            posttime = datetime.now()
                            timediff = posttime - pretime
                            stats[sig.name] += timediff
                        except NotImplementedError:
                            result = False
                        except:
                            log.exception("Failed to run signature \"%s\":", sig.name)
                            result = False

                        # If the signature returns None we can carry on, the
                        # condition was not matched.
                        if result is None:
                            continue

                        # On True, the signature is matched.
                        if result is True:
                            log.debug("Analysis matched signature \"%s\"", sig.name)
                            matched.append(sig.as_result())
                            if sig in complete_list:
                                complete_list.remove(sig)

                        # Either True or False, we don't need to check this sig anymore.
                        evented_list.remove(sig)
                        del sig

            # Call the stop method on all remaining instances.
            for sig in evented_list:
                try:
                    pretime = datetime.now()
                    result = sig.on_complete()
                    posttime = datetime.now()
                    timediff = posttime - pretime
                    stats[sig.name] += timediff
                except NotImplementedError:
                    continue
                except:
                    log.exception("Failed run on_complete() method for signature \"%s\":", sig.name)
                    continue
                else:
                    if result is True:
                        log.debug("Analysis matched signature \"%s\"", sig.name)
                        matched.append(sig.as_result())
                        if sig in complete_list:
                            complete_list.remove(sig)

        # Link this into the results already at this point, so non-evented signatures can use it
        self.results["signatures"] = matched

        # Add in statistics for evented signatures that took at least some time
        for key, value in stats.iteritems():
            if value:
                self.results["statistics"]["signatures"].append({
                    "name": key,
                    "time": float("%d.%03d" % (value.seconds,
                                             value.microseconds / 1000)),
                    })

        # Compat loop for old-style (non evented) signatures.
        if complete_list:
            complete_list.sort(key=lambda sig: sig.order)
            log.debug("Running non-evented signatures")

            for signature in complete_list:
                if not signature.filter_analysistypes or self.results["target"]["category"] in signature.filter_analysistypes:
                    match = self.process(signature)
                    # If the signature is matched, add it to the list.
                    if match:
                        matched.append(match)

                    # Reset the ParseProcessLog instances after each signature
                    if "behavior" in self.results:
                        for process in self.results["behavior"]["processes"]:
                            process["calls"].reset()

        # Sort the matched signatures by their severity level.
        matched.sort(key=lambda key: key["severity"])

        # Tweak later as needed
        malscore = 0.0
        for match in matched:
            if match["severity"] == 1:
                malscore += match["weight"] * 0.5 * (match["confidence"] / 100.0)
            else:
                malscore += match["weight"] * (match["severity"] - 1) * (match["confidence"] / 100.0)
        if malscore > 10.0:
            malscore = 10.0
        if malscore < 0.0:
            malscore = 0.0
        self.results["malscore"] = malscore

        family = ""
        # Make a best effort detection of malware family name (can be updated later by re-processing the analysis)
        for match in matched:
            if "families" in match and match["families"]:
                family = match["families"][0].title()
                break
        if not family and self.results["info"]["category"] == "file" and "virustotal" in self.results and "results" in self.results["virustotal"] and self.results["virustotal"]["results"]:
            detectnames = []
            for res in self.results["virustotal"]["results"]:
                if res["sig"]:
                    # weight Microsoft's detection, they seem to be more accurate than the rest
                    if res["vendor"] == "Microsoft":
                        detectnames.append(res["sig"])
                    detectnames.append(res["sig"])
            family = get_vt_consensus(detectnames)
        
        # add detection based on suricata here
        if not family and "suricata" in self.results and "alerts" in self.results["suricata"] and self.results["suricata"]["alerts"]:
            for alert in self.results["suricata"]["alerts"]:
                if "signature" in alert and alert["signature"]:
                    if alert["signature"].startswith("ET TROJAN") or alert["signature"].startswith("ETPRO TROJAN"):
                        words = re.findall(r"[A-Za-z0-9]+", alert["signature"])
                        famcheck = words[2]
                        famchecklower = famcheck.lower()
                        if famchecklower == "win32" or famchecklower == "w32":
                            famcheck = words[3]
                            famchecklower = famcheck.lower()

                        blacklist = [
                            "upx",
                            "executable",
                            "potential",
                            "likely",
                            "rogue",
                            "supicious",
                            "generic",
                            "possible",
                            "known",
                            "common",
                            "troj",
                            "trojan",
                            "team",
                            "probably",
                            "w2km",
                            "http",
                            "abuse.ch",
                            "win32",
                            "unknown",
                            "single",
                            "exe",
                            "filename",
                            "js",
                        ]
                        isgood = True
                        for black in blacklist:
                            if black == famchecklower:
                                isgood = False
                                break
                        if isgood:
                            famcheck = famcheck.split(".")[0]
                            family = famcheck.title()

        # fall back to ClamAV detection
        if not family and self.results["info"]["category"] == "file" and "clamav" in self.results["target"]["file"] and self.results["target"]["file"]["clamav"] and self.results["target"]["file"]["clamav"].startswith("Win.Trojan."):
            words = re.findall(r"[A-Za-z0-9]+", self.results["target"]["file"]["clamav"])
            family = words[2]

        self.results["malfamily"] = family
Exemplo n.º 53
0
    def feed(self, html_source):
        html_source = html_source.replace("\n", "")
        html_source = html_source.replace("\r", "")
        html_source = html_source.replace("\t", "")

        links = re.findall('<a.*?>', html_source)
        link_attributes = []
        for link in links:
            link_attributes.append(self.__findTagAttributes(link))

        #Finding all the forms: getting the text from "<form..." to "...</form>"
        #the array forms will contain all the forms of the page
        forms = re.findall('<form.*?>.*?</form>', html_source)
        forms_attributes = []
        for form in forms:
            forms_attributes.append(self.__findTagAttributes(form))

        #Processing the forms, obtaining the method and all the inputs
        #Also finding the method of the forms
        inputs_in_forms = []
        text_areas_in_forms = []
        selects_in_forms = []
        for form in forms:
            inputs_in_forms.append(re.findall('<input.*?>', form))
            text_areas_in_forms.append(re.findall('<textarea.*?>', form))
            selects_in_forms.append(re.findall('<select.*?>', form))

        #Extracting the attributes of the <input> tag as XML parser
        inputs_attributes = []
        for i in xrange(len(inputs_in_forms)):
            inputs_attributes.append([])
            for inputt in inputs_in_forms[i]:
                inputs_attributes[i].append(self.__findTagAttributes(inputt))

        selects_attributes = []
        for i in xrange(len(selects_in_forms)):
            selects_attributes.append([])
            for select in selects_in_forms[i]:
                selects_attributes[i].append(self.__findTagAttributes(select))

        textareas_attributes = []
        for i in xrange(len(text_areas_in_forms)):
            textareas_attributes.append([])
            for textArea in text_areas_in_forms[i]:
                textareas_attributes[i].append(self.__findTagAttributes(textArea))

        if self.verbose == 3:
            print('')
            print('')
            print(_("Forms"))
            print("=====")
            for i in xrange(len(forms)):
                print(_("Form {0}").format(str(i)))
                tmpdict = {}
                for k, v in dict(forms_attributes[i]).items():
                    tmpdict[k.lower()] = v
                print(_(" * Method:  {0}").format(self.__decode_htmlentities(tmpdict['action'])))
                print(_(" * Intputs:"))
                for j in xrange(len(inputs_in_forms[i])):
                    print(u"    + " + inputs_in_forms[i][j])
                    for att in inputs_attributes[i][j]:
                        print(u"       - " + str(att))
                print(_(" * Selects:"))
                for j in xrange(len(selects_in_forms[i])):
                    print(u"    + " + selects_in_forms[i][j])
                    for att in selects_attributes[i][j]:
                        print(u"       - " + str(att))
                print(_(" * TextAreas:"))
                for j in xrange(len(text_areas_in_forms[i])):
                    print(u"    + " + text_areas_in_forms[i][j])
                    for att in textareas_attributes[i][j]:
                        print(u"       - " + str(att))
            print('')
            print(_("URLS"))
            print("====")

        for i in xrange(len(links)):
            tmpdict = {}
            for k, v in dict(link_attributes[i]).items():
                tmpdict[k.lower()] = v
            if "href" in tmpdict:
                self.liens.append(self.__decode_htmlentities(tmpdict['href']))
                if self.verbose == 3:
                    print(self.__decode_htmlentities(tmpdict['href']))

        for i in xrange(len(forms)):
            tmpdict = {}
            for k, v in dict(forms_attributes[i]).items():
                tmpdict[k.lower()] = v
            self.form_values = []
            if "action" in tmpdict:
                self.liens.append(self.__decode_htmlentities(tmpdict['action']))
                self.current_form_url = self.__decode_htmlentities(tmpdict['action'])

            # Forms use GET method by default
            self.current_form_method = "get"
            if "method" in tmpdict:
                if tmpdict["method"].lower() == "post":
                    self.current_form_method = "post"

            for j in xrange(len(inputs_attributes[i])):
                tmpdict = {}
                for k, v in dict(inputs_attributes[i][j]).items():
                    tmpdict[k.lower()] = v
                    if "type" not in tmpdict:
                        tmpdict["type"] = "text"
                    if "name" in tmpdict:
                        if tmpdict['type'].lower() in \
                            ['text', 'password', 'radio', 'checkbox', 'hidden',
                             'submit', 'search']:
                            # use default value if present or set it to 'on'
                            if "value" in tmpdict:
                                if tmpdict["value"] != "":
                                    val = tmpdict["value"]
                                else:
                                    val = u"on"
                            else:
                                val = u"on"
                            self.form_values.append([tmpdict['name'], val])
                        if tmpdict['type'].lower() == "file":
                            self.uploads.append(self.current_form_url)

            for j in xrange(len(textareas_attributes[i])):
                tmpdict = {}
                for k, v in dict(textareas_attributes[i][j]).items():
                    tmpdict[k.lower()] = v
                if "name" in tmpdict:
                    self.form_values.append([tmpdict['name'], u'on'])

            for j in xrange(len(selects_attributes[i])):
                tmpdict = {}
                for k, v in dict(selects_attributes[i][j]).items():
                    tmpdict[k.lower()] = v
                if "name" in tmpdict:
                    self.form_values.append([tmpdict['name'], u'on'])

            if self.current_form_method == "post":
                self.forms.append((self.current_form_url, self.form_values))
            else:
                l = ["=".join([k, v]) for k, v in self.form_values]
                l.sort()
                self.liens.append(self.current_form_url.split("?")[0] + "?" + "&".join(l))
Exemplo n.º 54
0
def getWrittenUrls(data):
    urls = re.findall("(?P<url>https?://[^\|]+)\|", data)
    if urls:
        return urls

    return []
Exemplo n.º 55
0
    def on_call(self, call, process):
        if call["api"] == "CreateProcessInternalW":
            flags = int(self.get_argument(call, "CreationFlags"), 16)
            if flags & 0x4:
                handle = self.get_argument(call, "ProcessHandle")
                self.suspended[handle] = self.get_argument(call, "ProcessId")

        elif call["api"] == "WriteProcessMemory":
            buf = self.get_argument(call, "Buffer")
            if any(string in buf for string in self.bufContents):
                handle = self.get_argument(call, "ProcessHandle")
                if handle in self.suspended:
                    for pHandle in self.suspended:
                        if pHandle == handle:
                            self.badPid = self.suspended[pHandle]
                            break

                    check = getWrittenUrls(buf)
                    if len(check) >= 2:
                        self.c2s = check

        elif call["api"] == "NtClose":
            if call["status"]:
                handle = self.get_argument(call, "Handle")
                if handle in self.suspended:
                    del self.suspended[handle]

        elif call["api"] == "RtlDecompressBuffer":
            buf = self.get_argument(call, "UncompressedBuffer")
            if "Cookie:disclaimer_accepted=true" in buf:
                self.badPid = str(process["process_id"])
                check = getWrittenUrls(buf)
                if len(check) >= 2:
                    self.c2s = check

        elif call["api"] == "InternetCrackUrlA":
            if process["process_id"] == self.badPid and self.netSequence == 0:
                if call["status"]:
                    self.currentUrl = self.get_argument(call, "Url")
                    self.netSequence += 1

        elif call["api"] == "HttpOpenRequestA":
            if process["process_id"] == self.badPid and self.netSequence == 1:
                if call["status"]:
                    method = self.get_argument(call, "Verb")
                    if method and method == "POST":
                        self.netSequence += 1

        elif call["api"] == "HttpSendRequestA":
            if process["process_id"] == self.badPid and self.netSequence == 2:
                pData = self.get_argument(call, "PostData")
                if pData and all(word in pData for word in self.keywords):
                    self.found = True
                    c2 = {"C2": self.currentUrl}
                    if c2 not in self.data:
                        self.data.append(c2)
                self.netSequence = 0

        elif call["api"] == "InternetReadFile":
            if call["status"] and str(process["process_id"]) == self.badPid:
                buf = self.get_argument(call, "Buffer")
                if buf and buf.startswith("{") and buf.strip().endswith("}"):
                    check = re.findall(":(?P<url>https?://[^\}]+)\}", buf)
                    if check:
                        self.c2s += check

        return None