Пример #1
0
    def _search(self, searchTerms, logPath, files, searchForNick, includeToday, reverse):
        if searchForNick:
            pattern = re2.compile(fr"^\[[^]]+\]\s+<(.?{searchTerms})>\s+.*", re2.IGNORECASE)
        else:
            pattern = re2.compile(fr'.*<.*> .*({searchTerms}).*', re2.IGNORECASE)
        found = None

        if not includeToday:
            today = f"{strftime('%Y-%m-%d')}.log"
            if today in files:
                files.remove(today)

        if reverse:
            files.reverse()
        for filename in files:
            with open(os.path.join(logPath, filename), 'r', errors='ignore') as logfile:
                if reverse:
                    lines = reversed(logfile.readlines())
                else:
                    lines = logfile.readlines()
            if reverse and includeToday:
                lines = list(lines)[1:]
            for line in lines:
                if pattern.match(line.rstrip()):
                    found = line.rstrip()
                    break
            if found:
                return f'[{filename[:10]}] {found}'
        return 'Nothing that matches your search terms has been found in the log.'
    def run(self):
        if self.results["target"]["category"] == "file":
            return False
 
        self.ie_paths_re = re.compile(r"^c:\\program files(?:\s\(x86\))?\\internet explorer\\iexplore.exe$",re.I)
        #run through re.escape()
        self.white_list_re = ["^C\\:\\\\Program Files(?:\s\\(x86\\))?\\\\Adobe\\\\Reader\\ \\d+\\.\\d+\\\\Reader\\\\AcroRd32\\.exe$",
                         "^C\\:\\\\Program Files(?:\s\\(x86\\))?\\\\Java\\\\jre\\d+\\\\bin\\\\j(?:avaw?|p2launcher)\\.exe$",
                         "^C\\:\\\\Program Files(?:\s\\(x86\\))?\\\\Microsoft SilverLight\\\\(?:\\d+\\.)+\\d\\\\agcp.exe$",
                         "^C\\:\\\\Windows\\\\System32\\\\ntvdm\\.exe$",
                         "^C\\:\\\\Windows\\\\system32\\\\rundll32\\.exe$",
                         "^C\\:\\\\Windows\\\\syswow64\\\\rundll32\\.exe$",
                         "^C\\:\\\\Windows\\\\system32\\\\drwtsn32\\.exe$",
                         "^C\\:\\\\Windows\\\\syswow64\\\\drwtsn32\\.exe$",
                         "^C\\:\\\\Windows\\\\system32\\\\dwwin\\.exe$"
                        ]
        #means we can be evaded but also means we can have relatively tight paths between 32-bit and 64-bit
        self.white_list_re_compiled = []
        for entry in self.white_list_re:
            self.white_list_re_compiled.append(re.compile(entry,re.I))
        self.white_list_re_compiled.append(self.ie_paths_re)

        # Sometimes if we get a service loaded we get out of order processes in tree need iterate over IE processes get the path of the initial monitored executable
        self.initialpath = None
        processes = self.results["behavior"]["processtree"]
        if len(processes):
            for p in processes:
                initialpath = p["module_path"].lower()
                if initialpath and self.ie_paths_re.match(initialpath) and p.has_key("children"):
                    self.martians = self.find_martians(p,self.white_list_re_compiled)
                    if len(self.martians) > 0:
                        for martian in self.martians:
                            self.data.append({"ie_martian": martian})
                        return True 
        return False
Пример #3
0
    def substitute(self, search, replace, flags, text, inputMessage, channel):
        # Apparently re.sub understands escape sequences in the replacement string;
        #  strip all but the backreferences
        replace = replace.replace('\\', '\\\\')
        replace = re.sub(r'\\([1-9][0-9]?([^0-9]|$))', r'\1', replace)

        if channel not in self.messages:
            self.messages[channel] = []
            self.unmodifiedMessages[channel] = []

        messages = self.unmodifiedMessages[channel] if 'o' in flags else self.messages[channel]

        if 'g' in flags:
            count = 0
        else:
            count = 1

        subFlags = 0
        if 'i' in flags:
            subFlags |= re.IGNORECASE
        if 'v' in flags:
            subFlags |= re.VERBOSE

        if 'c' in flags:
            newMessage = copy.copy(inputMessage)

            try:
                searchC = re2.compile(search, subFlags)
                new = searchC.sub(replace, text, count)
            except sre_constants.error as e:
                newMessage.messageString = "[Regex Error in Sed pattern: {}]".format(e.message)
                return newMessage

            if new != text:
                newMessage.messageString = new
                self.storeMessage(newMessage, False)
            else:
                newMessage.messageString = text
                self.storeMessage(newMessage, False)

            return newMessage

        for message in reversed(messages):
            try:
                searchC = re2.compile(search, subFlags)
                new = searchC.sub(replace, message.messageString, count)
            except sre_constants.error as e:
                newMessage = copy.copy(inputMessage)
                newMessage.messageString = "[Regex Error in Sed pattern: {}]".format(e.message)
                return newMessage

            new = new[:300]

            if searchC.search(message.messageString):
                newMessage = copy.copy(message)
                newMessage.messageString = new
                self.storeMessage(newMessage, False)
                return newMessage

        return None
Пример #4
0
    def _search(self, searchTerms, logPath, files, searchForNick, includeToday, reverse):
        if searchForNick:
            pattern = re2.compile(r"^\[[^]]+\]\s+<(.?{})>\s+.*".format(searchTerms), re.IGNORECASE)
        else:
            pattern = re2.compile(r".*<.*> .*({}).*".format(searchTerms), re.IGNORECASE)
        found = None

        if not includeToday:
            today = "{}.log".format(strftime("%Y-%m-%d"))
            if today in files:
                files.remove(today)

        if reverse:
            files.reverse()
        for filename in files:
            with open(os.path.join(logPath, filename), "r") as logfile:
                if reverse:
                    lines = reversed(logfile.readlines())
                else:
                    lines = logfile.readlines()
            if reverse and includeToday:
                lines = list(lines)[1:]
            for line in lines:
                if pattern.match(line.rstrip()):
                    found = line.rstrip()
                    break
            if found:
                return "[{}] {}".format(filename[:10], found)
        return "Nothing that matches your search terms has been found in the log."
Пример #5
0
 def test_empty_array(self):
     # SF buf 1647541
     import array
     for typecode in 'cbBuhHiIlLfd':
         a = array.array(typecode)
         self.assertEqual(re.compile("bla").match(a), None)
         self.assertEqual(re.compile("").match(a).groups(), ())
Пример #6
0
 def test_bug_926075(self):
     try:
         unicode
     except NameError:
         return # no problem if we have no unicode
     self.assert_(re.compile('bug_926075') is not
                  re.compile(eval("u'bug_926075'")))
Пример #7
0
    def __init__(self,regular_expressions):
        """ Initialize the object 

            regular_expressions: an interative set of regular expressions to
                be applied for extracting mutations. These are in the 
                default python syntax (i.e., perl regular expressions), with 
                the single exception being that regular expressions which
                should be performed in a case sensitive manner should be 
                followed by the string '[CASE_SENSITIVE]', with no spaces 
                between it and the regular expression. 
                This can be a list, a file, or any other object which supports
                iteration. For an example, you should refer to the regex.txt
                file in the MutationFinder directory.

        """
        MutationExtractor.__init__(self)
        self._regular_expressions = []

        for regular_expression in regular_expressions:
            if regular_expression.endswith('[CASE_SENSITIVE]'):
                self._regular_expressions.append(\
                 compile(regular_expression[:regular_expression.rindex('[')]))
            else:
                self._regular_expressions.append(\
                 compile(regular_expression,IGNORECASE))
Пример #8
0
    def run(self):
        self.ie_paths_re = re.compile(r"^c:\\program files(?:\s\(x86\))?\\internet explorer\\iexplore.exe$",re.I)
        #run through re.escape()
        self.white_list_re = ["^C\\:\\\\Program Files(?:\s\\(x86\\))?\\\\Adobe\\\\Reader\\ \\d+\\.\\d+\\\\Reader\\\\AcroRd32\\.exe$",
                         "^C\\:\\\\Program Files(?:\s\\(x86\\))?\\\\Java\\\\jre\\d+\\\\bin\\\\j(?:avaw?|p2launcher)\\.exe$",
                         "^C\\:\\\\Program Files(?:\s\\(x86\\))?\\\\Microsoft SilverLight\\\\(?:\\d+\\.)+\\d\\\\agcp.exe$",
                         "^C\\:\\\\Windows\\\\System32\\\\ntvdm.exe$",
                        ]
        #means we can be evaded but also means we can have relatively tight paths between 32-bit and 64-bit
        self.white_list_re_compiled = []
        for entry in self.white_list_re:
            self.white_list_re_compiled.append(re.compile(entry,re.I))
        self.white_list_re_compiled.append(self.ie_paths_re)

        # get the path of the initial monitored executable
        self.initialpath = None
        processes = self.results["behavior"]["processtree"]
        if len(processes):
            self.initialpath = processes[0]["module_path"].lower()
        if self.initialpath and self.ie_paths_re.match(self.initialpath) and processes[0].has_key("children"):
           self.martians = self.find_martians(processes,self.white_list_re_compiled)
           if len(self.martians) > 0:
               for martian in self.martians:
                   self.data.append({"ie_martian": martian})
               return True 
        return False
Пример #9
0
    def test_re_match(self):
        self.assertEqual(re.match('a', 'a').groups(), ())
        self.assertEqual(re.match('(a)', 'a').groups(), ('a',))
        self.assertEqual(re.match(r'(a)', 'a').group(0), 'a')
        self.assertEqual(re.match(r'(a)', 'a').group(1), 'a')
        self.assertEqual(re.match(r'(a)', 'a').group(1, 1), ('a', 'a'))

        pat = re.compile('((a)|(b))(c)?')
        self.assertEqual(pat.match('a').groups(), ('a', 'a', None, None))
        self.assertEqual(pat.match('b').groups(), ('b', None, 'b', None))
        self.assertEqual(pat.match('ac').groups(), ('a', 'a', None, 'c'))
        self.assertEqual(pat.match('bc').groups(), ('b', None, 'b', 'c'))
        self.assertEqual(pat.match('bc').groups(""), ('b', "", 'b', 'c'))

        # A single group
        m = re.match('(a)', 'a')
        self.assertEqual(m.group(0), 'a')
        self.assertEqual(m.group(0), 'a')
        self.assertEqual(m.group(1), 'a')
        self.assertEqual(m.group(1, 1), ('a', 'a'))

        pat = re.compile('(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?')
        self.assertEqual(pat.match('a').group(1, 2, 3), ('a', None, None))
        self.assertEqual(pat.match('b').group('a1', 'b2', 'c3'),
                         (None, 'b', None))
        self.assertEqual(pat.match('ac').group(1, 'b2', 3), ('a', None, 'c'))
Пример #10
0
 def __init__(self, *args, **kwargs):
     Signature.__init__(self, *args, **kwargs)
     # Named group to extract the URL of the cloned website.
     self.rex = {
         "saved from url": re.compile(r"\<!--\ssaved\sfrom\surl=\(\d+\)(?P<url>[^\s]+)", re.I),
         "mirrored from": re.compile(r"<!--\smirrored\sfrom\s(?P<url>[^\s]+)\sby\sHTTrack", re.I),
     }
     self.hits = set()
Пример #11
0
 def _prepare_pattern(self, pattern):
     """
     Strip out key:value pairs from the pattern and compile the regular expression.
     """
     regex, _, rest = pattern.partition('\\;')
     try:
         return re.compile(regex, re.I)
     except re.error as e:
         warnings.warn("Caught '{error}' compiling regex: {regex}".format(error=e, regex=regex))
         return re.compile(r'(?!x)x') # regex that never matches: http://stackoverflow.com/a/1845097/413622
Пример #12
0
def _compile_regex(regex_string, flags=0):
    try:
        if re2:
            # default max_mem is 8<<20 = 8*1000*1000
            return re.compile(regex_string, max_mem=60*1000*1000, flags=flags)
        else:
            return re.compile(regex_string, flags=flags)
    except:
        logging.exception("Error compiling with flags %s for string: %s", flags, regex_string)
        raise
Пример #13
0
def test_regex(regex_array):
    """Ensures the regex strings are validated for proper syntax.

    """
    for regex_entry in regex_array:
        try:
            re.compile(regex_entry, re.MULTILINE | re.UNICODE)
        except re.error:
            logging.error('Invalid Regex Found: %s', regex_entry)
            sys.exit(1)
Пример #14
0
    def test_dollar_matches_twice(self):
        "$ matches the end of string, and just before the terminating \n"
        pattern = re.compile('$')
        self.assertEqual(pattern.sub('#', 'a\nb\n'), 'a\nb#\n#')
        self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a\nb\nc#')
        self.assertEqual(pattern.sub('#', '\n'), '#\n#')

        pattern = re.compile('$', re.MULTILINE)
        self.assertEqual(pattern.sub('#', 'a\nb\n' ), 'a#\nb#\n#' )
        self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a#\nb#\nc#')
        self.assertEqual(pattern.sub('#', '\n'), '#\n#')
    def run(self):
        if self.results["target"]["category"] == "url":
            return False

        office_pkgs = ["ppt","doc","xls","eml"]
        if not any(e in self.results["info"]["package"] for e in office_pkgs):
            return False

        self.office_paths_re = re.compile(r"^[A-Z]\:\\Program Files(?:\s\(x86\))?\\Microsoft Office\\(?:Office1[1-5]\\)?(?:WINWORD|OUTLOOK|POWERPNT|EXCEL|WORDVIEW)\.EXE$",re.I)
        #run through re.escape()
        #############################################
        #YOU MAY HAVE TO CUSTOMIZE THIS FOR YOUR ENV#
        #############################################
        self.white_list_re = ["C\\:\\\\Program Files(?:\s\\(x86\\))?\\\\Adobe\\\\Reader\\ \\d+\\.\\d+\\\\Reader\\\\AcroRd32\\.exe$",
                         "C\\:\\\\Program Files(?:\s\\(x86\\))?\\\\Java\\\\jre\\d+\\\\bin\\\\j(?:avaw?|p2launcher)\\.exe$",
                         "C\\:\\\\Program Files(?:\s\\(x86\\))?\\\\Microsoft SilverLight\\\\(?:\\d+\\.)+\\d\\\\agcp\\.exe$",
                         "C\\:\\\\Windows\\\\System32\\\\ntvdm\\.exe$",
                         "C\\:\\\\Windows\\\\System32\\\\svchost\\.exe$",
                         "C\\:\\\\Program Files(?:\s\\(x86\\))?\\\\internet explorer\\\\iexplore\.exe$",
                         # remove this one at some point
                         "C\\:\\\\Windows\\\\System32\\\\rundll32\\.exe$",
                         "C\\:\\\\Windows\\\\System32\\\\drwtsn32\\.exe$",
                         "C\\:\\\\Windows\\\\splwow64\\.exe$",
                         "C\\:\\\\Program Files(?:\s\\(x86\\))?\\\\Common Files\\\\Microsoft Shared\\\\office1[1-6]\\\\off(?:lb|diag)\\.exe$",
                         "C\\:\\\\Program Files(?:\s\\(x86\\))?\\\\Common Files\\\\Microsoft Shared\\\\dw\\\\dw(?:20)?\\.exe$",
                         "C\\:\\\\Windows\\\\system32\\\\dwwin\\.exe$",
                         "C\\:\\\\Windows\\\\system32\\\\WerFault\\.exe$",
                         "C\\:\\\\Windows\\\\syswow64\\\\WerFault\\.exe$"
                        ]
        #means we can be evaded but also means we can have relatively tight paths between 32-bit and 64-bit
        self.white_list_re_compiled = []
        for entry in self.white_list_re:
            try:
                self.white_list_re_compiled.append(re.compile(entry,re.I))
            except Exception as e:
                print "failed to compile expression %s error:%s" % (entry,e)
        self.white_list_re_compiled.append(self.office_paths_re)

        # Sometimes if we get a service loaded we get out of order processes in tree need iterate over IE processes get the path of the initial monitored executable
        self.initialpath = None
        processes = self.results["behavior"]["processtree"]
        if len(processes):
            for p in processes:
                initialpath = p["module_path"].lower()
                if initialpath and self.office_paths_re.match(initialpath) and p.has_key("children"):
                    self.martians = self.find_martians(p,self.white_list_re_compiled)
                    if len(self.martians) > 0:
                        for martian in self.martians:
                            self.data.append({"office_martian": martian})
                        return True
        return False
Пример #16
0
    def __init__(self):
        """ Initialize the object """
        MutationExtractor.__init__(self)
        word_regex_patterns = self._build_word_regex_patterns()
        string_regex_patterns = self._build_string_regex_patterns()

        self._word_regexs = []
        self._string_regexs = []
        self._replace_regex = compile('[^a-zA-Z0-9\s]')

        # Compile the regular expressions
        for regex_pattern in word_regex_patterns:
            self._word_regexs.append(compile(regex_pattern))
        for regex_pattern in string_regex_patterns:
            self._string_regexs.append(compile(regex_pattern))
 def on_call(self, call, process):
     if call["api"] == "RegSetValueExA" and call["status"]:
         key = re.compile(".*\\\\SOFTWARE\\\\(Wow6432Node\\\\)?Microsoft\\\\Windows\\\\CurrentVersion\\\\Run\\\\cryptedinfo$")
         buff = self.get_argument(call, "Buffer").lower()
         fullname = self.get_argument(call, "FullName")
         if buff == "notepad c:\programdata\cryptinfo.txt" and key.match(fullname): 
             return True
Пример #18
0
    def execute(self, message: IRCMessage):
        comicLimit = 8
        params = list(message.parameterList)
        if len(params) > 0 and string.isNumber(params[0]):
            comicLimit = int(params.pop(0))

        messages = self.getMessages(message.replyTo)
        if len(params) > 0:
            regex = re2.compile(" ".join(params), re2.IGNORECASE)
            matches = list(filter(regex.search, [msg[1] for msg in messages]))
            if len(matches) == 0:
                return IRCResponse(ResponseType.Say,
                                   "Sorry, that didn't match anything in my message buffer.",
                                   message.replyTo)
            elif len(matches) > 1:
                return IRCResponse(ResponseType.Say,
                                   "Sorry, that matches too many lines in my message buffer.",
                                   message.replyTo)

            index = [msg[1] for msg in messages].index(matches[0])
            lastIndex = index + comicLimit
            if lastIndex > len(messages):
                lastIndex = len(messages)
            messages = messages[index:lastIndex]
        else:
            messages = messages[comicLimit * -1:]
        if messages:
            comicBytes = self.makeComic(messages)
            return IRCResponse(ResponseType.Say, self.postComic(comicBytes), message.replyTo)
        else:
            return IRCResponse(ResponseType.Say,
                               "There are no messages in the buffer to create a comic with.",
                               message.replyTo)
Пример #19
0
    def start(self, path):
        root = os.environ["TEMP"]
        password = self.options.get("password")
        exe_regex = re.compile('(\.exe|\.scr|\.msi|\.bat|\.lnk)$',flags=re.IGNORECASE)
        zipinfos = self.get_infos(path)
        self.extract_zip(path, root, password, 0)

        file_name = self.options.get("file")
        # If no file name is provided via option, take the first file.
        if not file_name:
            # No name provided try to find a better name.
            if len(zipinfos):
                # Attempt to find a valid exe extension in the archive
                for f in zipinfos:
                    if exe_regex.search(f.filename):
                        file_name = f.filename
                        break
                # Default to the first one if none found
                file_name = file_name if file_name else zipinfos[0].filename
                log.debug("Missing file option, auto executing: {0}".format(file_name))
            else:
                raise CuckooPackageError("Empty ZIP archive")

        file_path = os.path.join(root, file_name)
        return self.execute(file_path, self.options.get("arguments"), file_path)
Пример #20
0
def score_pattern(pattern, pos,neg):
	"""Calculates a one tailed p-value for a pattern using a Fisher's exact test
	
	:param pattern: regular expression search pattern
	:type pattern: string
	:param pos: list of strings where the outcome was positive
	:type pos: list of strings
	:param neg: list of strings where the outcome was negative
	:type neg: list of strings
	:returns: one tailed p-value
	:rtype: float
	"""
	p=re.compile(pattern)
	pp=0
	pn=0
	for l in pos:
		if p.search(l):
			pp+=1
	for l in neg:
		if p.search(l):
			pn+=1
	s1=getLogPvalue(pp, len(pos), pn, len(neg))
	s2=getLogPvalue(pn, len(neg),pp, len(pos))

	return min(s1,s2)
Пример #21
0
    def when(self, context):
        if not self.regex:
            regex = ')('.join(e.re_value() for e in self.expressions)
            self.regex = re.compile('(' + regex + ')', self.modifiers)

        end = context.start+SEARCH_WINDOW
        return self.regex.match(context.input[context.start:end])
Пример #22
0
def render_pony(name, text, balloonstyle, width=80, center=False, centertext=False):
	pony = load_pony(name)
	balloon = link_l = link_r = ''
	if text:
		[link_l, link_r] = balloonstyle[-2:]
	for i,line in enumerate(pony):
		match = re.search('\$balloon([0-9]*)\$', line)
		if match:
			minwidth = int(match.group(1) or '0')
			pony[i:i+1] = render_balloon(text, balloonstyle, minwidth=minwidth, maxwidth=int(width/2), pad=str.center if centertext else str.ljust)
			break
	try:
		first = pony.index('$$$')
		second = pony[first+1:].index('$$$')
		pony[first:] = pony[first+1+second+1:]
	except:
		pass
	pony = [ line.replace('$\\$', link_l).replace('$/$', link_r) for line in pony ]
	indent = ''
	if center:
		ponywidth = max([ len(re.sub(r'\x1B\[[0-9;]+m|\$.*\$', '', line)) for line in pony ])
		indent = ' '*int((width-ponywidth)/2)
	wre = re.compile('((\x1B\[[0-9;]+m)*.){0,%s}' % width)
	reset = '\n'
	return indent+(reset+indent).join([ wre.search(line).group() for line in pony ])+reset
Пример #23
0
def AWGetStripDialogue(yyyy=None, mm=None, dd=None, urlstring=None):
	"""
	Get a strip's dialogue from ohnorobot.com for a given date.
	
	This works by taking a URL like this:
	"http://www.ohnorobot.com/index.pl?s=%s+%s+%s&Search=Search&comic=636&e=0&n=0&b=0&m=0&d=0&t=0" % (
		monthnames[mm], dd, yyyy
	)
	... and looking up the specific AWAchewoodDate in the mess of return data to find the dialogue.
	
	"""
	if urlstring:
		bar = AWGetStripAssetbarData(urlstring=urlstring)
		yyyy, mm, dd = bar['year'], bar['month'], bar['day']
	
	dsurl = "http://www.ohnorobot.com/index.pl?s=%s+%s+%s&Search=Search&comic=636&e=0&n=0&b=0&m=0&d=0&t=0" % (
		monthnames[int(mm)], dd, yyyy
	)
	dsearch = soup(dsurl)
	dlg = filter(lambda li: li.find('a', {'class':"searchlink", 'href':re.compile("%s$" % AWAchewoodDate(yyyy, mm, dd))}), dsearch.findAll('li'))
	
	if len(dlg) == 1:
		#return strip_entities(strip_tags(dlg.pop()))
		return strip_tags(dlg.pop())
	return u""
Пример #24
0
    def run(self):
        indicators = [
            "UPDATE__",
            "MAIN_.*",
            "BACKUP_.*"
        ]

        count = 0
        for indicator in indicators:
            if self.check_mutex(pattern=indicator, regex=True):
                count += 1

        if count == len(indicators):
            return True

        athena_http_re = re.compile("a=(%[A-Fa-f0-9]{2})+&b=[-A-Za-z0-9+/]+(%3[dD])*&c=(%[A-Fa-f0-9]{2})+")

        if "network" in self.results:
            httpitems = self.results["network"].get("http")
            if not httpitems:
                return False
            for http in httpitems:
                if http["method"] == "POST" and athena_http_re.search(http["body"]):
                    self.data.append({"url" : http["uri"], "data" : http["body"]})
                    return True

        return False
Пример #25
0
 def _gen_keyword(self, content):
     """
     Generator for self.keywords (dictionary made of of regexps
     as keys and thier categories as values)
     """
     for line in content[2].split("\n")[1:-1]:
         # Comments start with //
         if line and not line.startswith("//"):
             line = line.split("\t")
             # If not using a dictionary made of regexps
             # it fixes the keyword for regexping
             # "^" is added at the beginning of every keyword
             # If keyword doesn't ends with "*", a "$" is added
             # bad -> ^bad$ matches "bad" but not "badass"
             # bad* -> ^bad matches "bad" and "badass"
             if not self.dic_regex:
                 line[0] = "".join(["\\b", line[0]])
                 try:
                     if (line[0][-1] == "*"):
                         line[0] = line[0][:-1]
                     else:
                         line[0] = "".join([line[0], "\\b"])
                 except IndexError:
                     continue
             yield (re.compile(line[0], re.IGNORECASE), line[1:])
Пример #26
0
    def handleEvent(self, event):
        eventName = event.eventType
        srcModuleName = event.module
        eventData = event.data

        # We only want web content from the target
        if srcModuleName != "sfp_spider":
            return None

        eventSource = event.sourceEvent.data
        self.sf.debug("Received event, " + eventName + ", from " + srcModuleName)

        if eventSource not in self.results.keys():
            self.results[eventSource] = list()

        # We only want web content for pages on the target site
        if not self.getTarget().matches(self.sf.urlFQDN(eventSource)):
            self.sf.debug("Not collecting web content information for external sites.")
            return None

        for regexpGrp in regexps.keys():
            if regexpGrp in self.results[eventSource]:
                continue

            for regex in regexps[regexpGrp]:
                pat = re.compile(regex, re.IGNORECASE)
                matches = re.findall(pat, eventData)
                if len(matches) > 0 and regexpGrp not in self.results[eventSource]:
                    self.sf.info("Matched " + regexpGrp + " in content from " + eventSource)
                    self.results[eventSource].append(regexpGrp)
                    evt = SpiderFootEvent("ERROR_MESSAGE", regexpGrp,
                                          self.__name__, event.sourceEvent)
                    self.notifyListeners(evt)

        return None
Пример #27
0
    def when(self, context):
        if not self.regex:
            regex      = ')('.join(e.re_value() for e in self.expressions)
            unire      = unicode(regex, 'latin1')
            self.regex = re.compile('(' + unire + ')', self.modifiers)

        return self.regex.match(context.input, context.start)
Пример #28
0
def build_smile_re(dsmile):
    out = {}
    for name, lsmile in dsmile.items():
        out[name] = re.compile(r'(?: %s)' % (r'| '.join(lsmile)))
        #print name, r'(?:\s%s)' % (r'|\s'.join(lsmile))

    return out
Пример #29
0
    def run(self):
        # Check zeus synchronization-mutex.
        # Regexp pattern for zeus synchronization-mutex such as for example:
        # 2CCB0BFE-ECAB-89CD-0261-B06D1C10937F
        exp = re.compile(".*[A-Z0-9]{8}-([A-Z0-9]{4}-){3}[A-Z0-9]{12}", re.IGNORECASE)
        mutexes = self.results["behavior"]["summary"]["mutexes"]
        mutexset = set()
        count = 0
        for mutex in mutexes:
            if exp.match(mutex):
                mutexset.add(mutex)
                count += 1 

        # Check if there are at least 5 mutexes opened matching the pattern?   
        if count < 5:
            return False
        
        # Check for UDP Traffic on remote port greater than 1024.
        # TODO: this might be faulty without checking whether the destination
        # IP is really valid.
        count = 0
        if "network" in self.results:
            for udp in self.results["network"]["udp"]:
                if udp["dport"] > 1024:
                    count += 1
            
        if count < 4:
            return False

        for mutex in mutexset:
            self.data.append({"mutex": mutex})

        return True
Пример #30
0
 def test_bug_1661(self):
     # Verify that flags do not get silently ignored with compiled patterns
     pattern = re.compile('.')
     self.assertRaises(ValueError, re.match, pattern, 'A', re.I)
     self.assertRaises(ValueError, re.search, pattern, 'A', re.I)
     self.assertRaises(ValueError, re.findall, pattern, 'A', re.I)
     self.assertRaises(ValueError, re.compile, pattern, re.I)
Пример #31
0
    def _add_domain(self, domain):
        """Add a domain to unique list.
        @param domain: domain name.
        """
        filters = [
            ".*\\.windows\\.com$", ".*\\.in\\-addr\\.arpa$", ".*\\.ip6\\.arpa$"
        ]

        regexps = [re.compile(filter) for filter in filters]
        for regexp in regexps:
            if regexp.match(domain):
                return

        for entry in self.unique_domains:
            if entry["domain"] == domain:
                return

        self.unique_domains.append({
            "domain": domain,
            "ip": self._dns_gethostbyname(domain)
        })
Пример #32
0
    def handleEvent(self, event):
        eventName = event.eventType
        srcModuleName = event.module
        eventData = event.data

        # We only want web content from the target
        if srcModuleName != "sfp_spider":
            return None

        eventSource = event.actualSource

        self.sf.debug("Received event, " + eventName + ", from " +
                      srcModuleName)

        if eventSource not in self.results.keys():
            self.results[eventSource] = list()

        # We only want web content for pages on the target site
        if not self.getTarget().matches(self.sf.urlFQDN(eventSource)):
            self.sf.debug(
                "Not collecting web content information for external sites.")
            return None

        for regexpGrp in regexps.keys():
            if regexpGrp in self.results[eventSource]:
                continue

            for regex in regexps[regexpGrp]:
                pat = re.compile(regex, re.IGNORECASE)
                matches = re.findall(pat, eventData)
                if len(matches
                       ) > 0 and regexpGrp not in self.results[eventSource]:
                    self.sf.info("Matched " + regexpGrp + " in content from " +
                                 eventSource)
                    self.results[eventSource].append(regexpGrp)
                    evt = SpiderFootEvent("ERROR_MESSAGE", regexpGrp,
                                          self.__name__, event)
                    self.notifyListeners(evt)

        return None
Пример #33
0
    def start(self, path):
        root = os.environ["TEMP"]
        password = self.options.get("password")
        exe_regex = re.compile('(\.exe|\.scr|\.msi|\.bat|\.lnk|\.js|\.jse|\.vbs|\.vbe|\.wsf)$',flags=re.IGNORECASE)
        zipinfos = self.get_infos(path)
        self.extract_zip(path, root, password, 0)

        file_name = self.options.get("file")
        # If no file name is provided via option, take the first file.
        if not file_name:
            # No name provided try to find a better name.
            if len(zipinfos):
                # Attempt to find a valid exe extension in the archive
                for f in zipinfos:
                    if exe_regex.search(f.filename):
                        file_name = f.filename
                        break
                # Default to the first one if none found
                file_name = file_name if file_name else zipinfos[0].filename
                log.debug("Missing file option, auto executing: {0}".format(file_name))
            else:
                raise CuckooPackageError("Empty ZIP archive")


        file_path = os.path.join(root, file_name)
        log.debug("file_name: \"%s\"" % (file_name))
        if file_name.lower().endswith(".lnk"):
            cmd_path = self.get_path("cmd.exe")
            cmd_args = "/c start /wait \"\" \"{0}\"".format(file_path)
            return self.execute(cmd_path, cmd_args, file_path)
        elif file_name.lower().endswith(".msi"):
            msi_path = self.get_path("msiexec.exe")
            msi_args = "/I \"{0}\"".format(file_path)
            return self.execute(msi_path, msi_args, file_path)
        elif file_name.lower().endswith((".js", ".jse", ".vbs", ".vbe", ".wsf")):
            wscript = self.get_path_app_in_path("wscript.exe")
            wscript_args = "\"{0}\"".format(file_path)
            return self.execute(wscript, wscript_args, file_path)
        else:
            return self.execute(file_path, self.options.get("arguments"), file_path)
Пример #34
0
def getLinks(html):
    soup = BeautifulSoup(html)
    links = []
    for link in soup.findAll('a', attrs={'href': re.compile("^http://")}):
        links.append(link.get('href'))
    for link in soup.findAll('a', attrs={'href': re.compile("^https://")}):
        links.append(link.get('href'))
    for link in soup.findAll('iframe', attrs={'src': re.compile("^https://")}):
        links.append(link.get('src'))
    for link in soup.findAll('iframe', attrs={'src': re.compile("^http://")}):
        links.append(link.get('src'))
    for link in soup.findAll('script', attrs={'src': re.compile("^https://")}):
        links.append(link.get('src'))
    for link in soup.findAll('script', attrs={'src': re.compile("^http://")}):
        links.append(link.get('src'))
    return links
Пример #35
0
 def _remove(self, message: IRCMessage):
     """remove <regex> - remove a quote from the OutOfContext log."""
     if len(message.parameterList) < 2:
         return IRCResponse(ResponseType.Say, "Remove what?",
                            message.replyTo)
     if len(self.storage) == 0 or message.replyTo not in self.storage:
         return IRCResponse(ResponseType.Say,
                            "There are no quotes in the log.",
                            message.replyTo)
     regex = re2.compile(" ".join(message.parameterList[1:]),
                         re2.IGNORECASE)
     matches = list(filter(regex.search, self.storage[message.replyTo]))
     if len(matches) == 0:
         return IRCResponse(ResponseType.Say,
                            "That message is not in the log.",
                            message.replyTo)
     if len(matches) > 1:
         return IRCResponse(
             ResponseType.Say,
             f"Unable to remove quote, {len(matches)} matches found.",
             message.replyTo)
     return self._removeQuote(message.replyTo, matches[0])
Пример #36
0
def test_float():
    print re('[#ss #float #es]')
    f = compile('[#ss #float #es]')
    assert f.match('0.0')
    assert f.match('-0.0')
    assert f.match('0.0e1')
    assert f.match('-0.0e1')
    assert f.match('0.0e-1')
    assert f.match('0.0E1')
    assert f.match('0.')
    assert f.match('0.e1')
    assert f.match('.0')
    assert f.match('.0e1')
    assert f.match('0e1')
    assert not f.match('0')
    assert not f.match('.')
    assert not f.match('.e1')
    assert not f.match('0.0e')
    assert f.match('1024.12e3')
    assert f.match('-1024.12e-3')
    assert f.match('-.12e3')
    assert f.match('-1024.12E-3')
Пример #37
0
    def test_re_groupref_exists(self):
        self.assertEqual(
            re.match('^(\()?([^()]+)(?(1)\))$', '(a)').groups(), ('(', 'a'))
        self.assertEqual(
            re.match('^(\()?([^()]+)(?(1)\))$', 'a').groups(), (None, 'a'))
        self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a)'), None)
        self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a'), None)
        self.assertEqual(
            re.match('^(?:(a)|c)((?(1)b|d))$', 'ab').groups(), ('a', 'b'))
        self.assertEqual(
            re.match('^(?:(a)|c)((?(1)b|d))$', 'cd').groups(), (None, 'd'))
        self.assertEqual(
            re.match('^(?:(a)|c)((?(1)|d))$', 'cd').groups(), (None, 'd'))
        self.assertEqual(
            re.match('^(?:(a)|c)((?(1)|d))$', 'a').groups(), ('a', ''))

        # Tests for bug #1177831: exercise groups other than the first group
        p = re.compile('(?P<g1>a)(?P<g2>b)?((?(g2)c|d))')
        self.assertEqual(p.match('abc').groups(), ('a', 'b', 'c'))
        self.assertEqual(p.match('ad').groups(), ('a', None, 'd'))
        self.assertEqual(p.match('abd'), None)
        self.assertEqual(p.match('ac'), None)
Пример #38
0
async def ss(text, channel, reply, event, bot, storage):
    """<regex replacement> - replace regex with replacement"""

    delete_if_needed(storage, event)

    text = text.split(maxsplit=1)
    if len(text) == 0 or len(text) > 2:
        msg = "Invalid format"
        msg += ": " + "\n`" + "<regex replacement> - replace regex with replacement`"
        msg += (
            "\n`" +
            "if only the regex is specified, it will be replaced with a blankspace`"
        )
        reply(msg, timeout=15)
        return

    if len(text) == 1:
        text.append("")

    replied_to = await event.msg.reference()
    if replied_to:
        messages = [replied_to]
    else:
        messages = await channel.async_get_latest_messages(MAX_LEN)

    try:
        regex = re2.compile(text[0])
    except:
        reply("You don't have a valid regex")
        return

    for msg in messages:
        if (msg.id == event.msg.id or msg.author.id == bot.get_own_id()
                or msg.text.startswith(".s")):
            continue
        if regex.search(msg.text) != None:
            msg = "<%s> %s" % (msg.author.name, regex.sub(text[1], msg.text))
            reply(msg)
            return
Пример #39
0
def search_behavior(request, task_id):
    if request.method == 'POST':
        query = request.POST.get('search')
        results = []

        # Fetch anaylsis report
        record = results_db.analysis.find_one(
            {"info.id": int(task_id)}
        )

        # Loop through every process
        for process in record["behavior"]["processes"]:
            process_results = []

            chunks = results_db.calls.find({
                "_id": { "$in": process["calls"] }
            })
            for chunk in chunks:
                for call in chunk["calls"]:
                    query = re.compile(query)
                    if query.search(call['api']):
                        process_results.append(call)
                    else:
                        for argument in call['arguments']:
                            if query.search(argument['name']) or query.search(argument['value']):
                                process_results.append(call)
                                break

            if len(process_results) > 0:
                results.append({
                    'process': process,
                    'signs': process_results
                })

        return render_to_response("analysis/behavior/_search_results.html",
                                  {"results": results},
                                  context_instance=RequestContext(request))
    else:
        raise PermissionDenied
Пример #40
0
    def execute(self, message: IRCMessage):
        comicLimit = 8
        params = list(message.parameterList)
        if len(params) > 0 and string.isNumber(params[0]):
            comicLimit = int(params.pop(0))

        messages = self.getMessages(message.replyTo)
        if len(params) > 0:
            regex = re2.compile(" ".join(params), re2.IGNORECASE)
            matches = list(filter(regex.search, [msg[1] for msg in messages]))
            if len(matches) == 0:
                return IRCResponse(
                    ResponseType.Say,
                    "Sorry, that didn't match anything in my message buffer.",
                    message.replyTo)
            elif len(matches) > 1:
                return IRCResponse(
                    ResponseType.Say,
                    "Sorry, that matches too many lines in my message buffer.",
                    message.replyTo)

            index = [msg[1] for msg in messages].index(matches[0])
            lastIndex = index + comicLimit
            if lastIndex > len(messages):
                lastIndex = len(messages)
            messages = messages[index:lastIndex]
        else:
            messages = messages[comicLimit * -1:]
        if messages:
            comicBytes = self.makeComic(messages)
            return IRCResponse(ResponseType.Say, self.postComic(comicBytes),
                               message.replyTo)
        else:
            return IRCResponse(
                ResponseType.Say,
                "There are no messages in the buffer to create a comic with.",
                message.replyTo)
def apply_transformation(transform_tool, regex_hit_pattern, input_f, output_f):
    nul = open(os.devnull, 'w')
    # print("tool: {}\ninput: {}\nouput: {}".format(' '.join(transform_tool), input_f, output_f))
    # print(' '.join(transform_tool))
    counter = 0

    p = Popen(transform_tool, stdout=PIPE, stderr=None)

    # get number of hits for tranformation
    regex_c = re2.compile(regex_hit_pattern)

    for line in p.stdout:
        # print(line)
        match_p = regex_c.search(line)
        if match_p is not None:
            counter += 1

    p.wait()

    print("Transformation return code: {}".format(p.returncode))
    print("Number of successful transformations: {}".format(counter))
    nul.close()

    return counter if p.returncode == 0 else -1
Пример #42
0
    def execute(self, irc_c, msg, cmd):
        # Set the return mode of the output
        selection = {
            'ignorepromoted': self['ignorepromoted'],
            'order': self['order'],
            'limit': self['limit'],
            'offset': self['offset'],
        }
        if self['random']:
            selection['order'] = 'random'
            selection['limit'] = 1
        # if self['recommend']:
        #     selection['order'] = 'recommend'
        #     selection['limit'] = 1
        if self['newest']:
            selection['order'] = 'recent'
            selection['limit'] = 1
        # What are we searching for?
        searches = []
        strings = []
        if len(self['title']) > 0:
            strings = self['title']
            searches.extend([{'term': s, 'type': None} for s in strings])
        # Add any regexes
        regexes = []
        for regex in self['regex']:
            try:
                re.compile(regex)
            except re.error as e:
                raise CommandError(
                    "'{}' isn't a valid regular expression: {}.".format(
                        regex, e
                    )
                ) from e
            regexes.append(regex)
            # don't append compiled regex - SQL doesn't like that
        searches.extend([{'term': r, 'type': 'regex'} for r in regexes])
        # Set the tags
        tags = {'include': [], 'exclude': []}
        for tag in self['tags']:
            if tag[0] == "-":
                tags['exclude'].append(tag[1:])
                continue
            if tag[0] == "+":
                tags['include'].append(tag[1:])
                continue
            tags['include'].append(tag)
        searches.append({'term': tags, 'type': 'tags'})
        # Set the author
        authors = {'include': [], 'exclude': []}
        for author in self['author']:
            if author[0] == "-":
                authors['exclude'].append(author[1:])
                continue
            if author[0] == "+":
                authors['include'].append(author[1:])
                continue
            authors['include'].append(author)
        searches.append({'term': authors, 'type': 'author'})
        # Set the rating
        # Cases to account for: modifiers, range, combination
        ratings = MinMax()
        for rating in self['rating']:
            if ".." in rating:
                rating = rating.split("..")
                if len(rating) > 2:
                    raise CommandError("Too many ratings in range.")
                try:
                    rating = [int(x) for x in rating]
                except ValueError as e:
                    raise CommandError(
                        "Ratings in a range must be integers."
                    ) from e
                try:
                    ratings >= min(rating)
                    ratings <= max(rating)
                except MinMaxError as e:
                    raise CommandError(str(e).format("rating")) from e
            elif rating[0] in [">", "<", "="]:
                pattern = r"^(?P<comp>[<>=]{1,2})(?P<value>[0-9]+)"
                match = re.search(pattern, rating)
                if match:
                    try:
                        rating = int(match.group('value'))
                    except ValueError as e:
                        raise CommandError("Invalid rating comparison.") from e
                    comp = match.group('comp')
                    try:
                        if comp == ">=":
                            ratings >= rating
                        elif comp == "<=":
                            ratings <= rating
                        elif comp == "<":
                            ratings < rating
                        elif comp == ">":
                            ratings > rating
                        elif comp == "=":
                            ratings >= rating
                            ratings <= rating
                        else:
                            raise CommandError("Unknown rating comparison.")
                    except MinMaxError as e:
                        raise CommandError(str(e).format("rating")) from e
                elif rating[0] in [">", "<", "="]:
                    pattern = r"^(?P<comp>[<>=]{1,2})(?P<value>-?[0-9]+)"
                    match = re.search(pattern, rating)
                    if match:
                        try:
                            rating = int(match.group('value'))
                        except ValueError:
                            raise CommandError("Invalid rating comparison.")
                        comp = match.group('comp')
                        try:
                            if comp == ">=":
                                ratings >= rating
                            elif comp == "<=":
                                ratings <= rating
                            elif comp == "<":
                                ratings < rating
                            elif comp == ">":
                                ratings > rating
                            elif comp == "=":
                                ratings >= rating
                                ratings <= rating
                            else:
                                raise CommandError(
                                    "Unknown operator in rating comparison."
                                )
                        except MinMaxError as e:
                            raise CommandError(str(e).format("rating"))
                    else:
                        raise CommandError("Invalid rating comparison.")
                else:
                    raise CommandError("Invalid rating comparison.")
            else:
                try:
                    rating = int(rating)
                except ValueError as e:
                    raise CommandError(
                        "Rating must be a range, comparison, or number."
                    ) from e
                # Assume =, assign both
                try:
                    ratings >= rating
                    ratings <= rating
                except MinMaxError as e:
                    raise CommandError(str(e).format("rating"))
        searches.append({'term': ratings, 'type': 'rating'})
        # Set created date
        # Cases to handle: absolute, relative, range (which can be both)
        createds = MinMax()
        created = self['created']
        # created is a list of date selectors - ranges, abs and rels
        # but ALL dates are ranges!
        created = [DateRange(c) for c in created]
        # created is now a list of DateRanges with min and max
        try:
            for selector in created:
                if selector.max is not None:
                    createds <= selector.max
                if selector.min is not None:
                    createds >= selector.min
        except MinMaxError as e:
            raise CommandError(str(e).format("date"))
        searches.append({'term': createds, 'type': 'date'})
        # Set category
        categories = {'include': [], 'exclude': []}
        for category in self['category']:
            if category[0] == "-":
                categories['exclude'].append(category[1:])
                continue
            else:
                if category[0] == "+":
                    categories['include'].append(category[1:])
                    continue
                categories['include'].append(category)
                continue
            categories['include'].append(category)
        searches.append({'term': categories, 'type': 'category'})
        # Set parent page
        parents = self['parent']
        if parents is not None:
            searches.append({'term': parents, 'type': 'parent'})
        # FINAL BIT - summarise commands
        if self['verbose']:
            verbose = "Searching for articles "
            if len(strings) > 0:
                verbose += "containing \"{}\"; ".format("\", \"".join(strings))
            if len(regexes) > 0:
                verbose += "matching the regex /{}/; ".format(
                    "/ & /".join(regexes)
                )
            if parents is not None:
                verbose += "whose parent page is '{}'; ".format(parents)
            if len(categories['include']) == 1:
                verbose += (
                    "in the category '" + categories['include'][0] + "'; "
                )
            elif len(categories['include']) > 1:
                verbose += (
                    "in the categories '" + "', '".join(categories) + "; "
                )
            if len(categories['exclude']) == 1:
                verbose += (
                    "not in the category '" + categories['exclude'][0] + "'; "
                )
            elif len(categories['exclude']) > 1:
                verbose += (
                    "not in the categories '" + "', '".join(categories) + "; "
                )
            if len(tags['include']) > 0:
                verbose += (
                    "with the tags '" + "', '".join(tags['include']) + "'; "
                )
            if len(tags['exclude']) > 0:
                verbose += (
                    "without the tags '" + "', '".join(tags['exclude']) + "'; "
                )
            if len(authors['include']) > 0:
                verbose += "by " + " & ".join(authors['include']) + "; "
            if len(authors['exclude']) > 0:
                verbose += "not by " + " or ".join(authors['exclude']) + "; "
            if ratings['max'] is not None and ratings['min'] is not None:
                if ratings['max'] == ratings['min']:
                    verbose += "with a rating of " + str(ratings['max']) + "; "
                else:
                    verbose += (
                        "with a rating between "
                        + str(ratings['min'])
                        + " and "
                        + str(ratings['max'])
                        + "; "
                    )
            elif ratings['max'] is not None:
                verbose += (
                    "with a rating less than " + str(ratings['max'] + 1) + "; "
                )
            elif ratings['min'] is not None:
                verbose += (
                    "with a rating greater than "
                    + str(ratings['min'] - 1)
                    + "; "
                )
            if createds['min'] is not None and createds['max'] is not None:
                verbose += (
                    "created between "
                    + createds['min'].to_datetime_string()
                    + " and "
                    + createds['max'].to_datetime_string()
                    + "; "
                )
            elif createds['max'] is not None:
                verbose += (
                    "created before "
                    + createds['max'].to_datetime_string()
                    + "; "
                )
            elif createds['min'] is not None:
                verbose += (
                    "created after "
                    + createds['min'].to_datetime_string()
                    + "; "
                )
            if verbose.endswith("; "):
                verbose = verbose[:-2]
            msg.reply(verbose)

        page_ids = DB.get_articles(searches)
        pages = [DB.get_article_info(p_id) for p_id in page_ids]
        pages = Search.order(pages, search_term=strings, **selection)

        if len(pages) >= 50:
            msg.reply(
                "{} results found - you're going to have to be more "
                "specific!".format(len(pages))
            )
            return
        if len(pages) > 3:
            msg.reply(
                "{} results (use ..sm to choose): {}".format(
                    len(pages), Showmore.parse_multiple_titles(pages)
                )
            )
            DB.set_showmore_list(msg.raw_channel, [p['id'] for p in pages])
            return
        if len(pages) == 0:
            # check if there's no args other than --verbose
            if len(self['title']) > 0:
                # google only takes 10 args
                url = google_search(
                    '"' + '" "'.join(self['title'][:10]) + '"', num=1
                )[0]
                if url is None:
                    msg.reply("No matches found.")
                    return
                if url['title'].endswith(" - SCP Foundation"):
                    url['title'] = url['title'][:-17]
                msg.reply(
                    "No matches found. Did you mean \x02{}\x0F? {}".format(
                        url['title'], url['link']
                    )
                )
            else:
                msg.reply("No matches found.")
            return
        for page in pages:
            msg.reply(
                Gib.obfuscate(
                    Showmore.parse_title(page),
                    DB.get_channel_members(msg.raw_channel),
                )
            )
    def handleEvent(self, event):
        eventName = event.eventType
        srcModuleName = event.module
        eventData = event.data

        # Don't re-parse e-mail addresses
        if "EMAILADDR" in eventName:
            return None

        # Ignore any web content that isn't from the target. This avoids noise from
        # pastebin and other content where unrelated e-mails are likely to be found.
        if "_CONTENT" in eventName and eventName != "TARGET_WEB_CONTENT":
            return None

        self.sf.debug("Received event, " + eventName + ", from " +
                      srcModuleName)

        if type(eventData) not in [str, unicode]:
            try:
                if type(eventData) in [list, dict]:
                    eventData = str(eventData)
                else:
                    self.sf.debug("Unhandled type to find e-mails: " +
                                  str(type(eventData)))
                    return None
            except BaseException as e:
                self.sf.debug("Unable to convert list/dict to string: " +
                              str(e))
                return None

        pat = re.compile(
            "([\%a-zA-Z\.0-9_\-]+@[a-zA-Z\.0-9\-]+\.[a-zA-Z\.0-9\-]+)")
        matches = re.findall(pat, eventData)
        myres = list()
        for match in matches:
            evttype = "EMAILADDR"
            if len(match) < 4:
                self.sf.debug("Likely invalid address: " + match)
                continue

            # Handle messed up encodings
            if "%" in match:
                self.sf.debug("Skipped address: " + match)
                continue

            # Get the domain and strip potential ending .
            mailDom = match.lower().split('@')[1].strip('.')
            if not self.getTarget().matches(
                    mailDom) and not self.getTarget().matches(match):
                self.sf.debug("External domain, so possible affiliate e-mail")
                # Raw RIR data returning external e-mails generates way
                # too much noise.
                if eventName == "RAW_RIR_DATA":
                    return None
                evttype = "AFFILIATE_EMAILADDR"

            self.sf.info("Found e-mail address: " + match)
            if type(match) == str:
                mail = unicode(match.strip('.'), 'utf-8', errors='replace')
            else:
                mail = match.strip('.')

            if mail in myres:
                self.sf.debug("Already found from this source.")
                continue
            else:
                myres.append(mail)

            evt = SpiderFootEvent(evttype, mail, self.__name__, event)
            if event.moduleDataSource:
                evt.moduleDataSource = event.moduleDataSource
            else:
                evt.moduleDataSource = "Unknown"
            self.notifyListeners(evt)

        return None
Пример #44
0
    def resourceList(self, id, target, targetType):
        targetDom = ''
        # Get the base domain if we're supplied a domain
        if targetType == "domain":
            targetDom = self.sf.hostDomain(target, self.opts['_internettlds'])

        for check in malchecks.keys():
            cid = malchecks[check]['id']
            if id == cid and malchecks[check]['type'] == "list":
                data = dict()
                url = malchecks[check]['url']
                data['content'] = self.sf.cacheGet(
                    "sfmal_" + cid, self.opts.get('cacheperiod', 0))
                if data['content'] is None:
                    data = self.sf.fetchUrl(url,
                                            timeout=self.opts['_fetchtimeout'],
                                            useragent=self.opts['_useragent'])
                    if data['content'] is None:
                        self.sf.error("Unable to fetch " + url, False)
                        return None
                    else:
                        self.sf.cachePut("sfmal_" + cid, data['content'])

                # If we're looking at netblocks
                if targetType == "netblock":
                    iplist = list()
                    # Get the regex, replace {0} with an IP address matcher to
                    # build a list of IP.
                    # Cycle through each IP and check if it's in the netblock.
                    if 'regex' in malchecks[check]:
                        rx = malchecks[check]['regex'].replace(
                            "{0}", "(\d+\.\d+\.\d+\.\d+)")
                        pat = re.compile(rx, re.IGNORECASE)
                        self.sf.debug("New regex for " + check + ": " + rx)
                        for line in data['content'].split('\n'):
                            grp = re.findall(pat, line)
                            if len(grp) > 0:
                                #self.sf.debug("Adding " + grp[0] + " to list.")
                                iplist.append(grp[0])
                    else:
                        iplist = data['content'].split('\n')

                    for ip in iplist:
                        if len(ip) < 8 or ip.startswith("#"):
                            continue
                        ip = ip.strip()

                        try:
                            if IPAddress(ip) in IPNetwork(target):
                                self.sf.debug(
                                    ip + " found within netblock/subnet " +
                                    target + " in " + check)
                                return url
                        except Exception as e:
                            self.sf.debug("Error encountered parsing: " +
                                          str(e))
                            continue

                    return None

                # If we're looking at hostnames/domains/IPs
                if 'regex' not in malchecks[check]:
                    for line in data['content'].split('\n'):
                        if line == target or (targetType == "domain"
                                              and line == targetDom):
                            self.sf.debug(target + "/" + targetDom +
                                          " found in " + check + " list.")
                            return url
                else:
                    # Check for the domain and the hostname
                    try:
                        rxDom = unicode(
                            malchecks[check]['regex']).format(targetDom)
                        rxTgt = unicode(
                            malchecks[check]['regex']).format(target)
                        for line in data['content'].split('\n'):
                            if (targetType == "domain" and re.match(rxDom, line, re.IGNORECASE)) or \
                                    re.match(rxTgt, line, re.IGNORECASE):
                                self.sf.debug(target + "/" + targetDom +
                                              " found in " + check + " list.")
                                return url
                    except BaseException as e:
                        self.sf.debug("Error encountered parsing 2: " + str(e))
                        continue

        return None
Пример #45
0
def index(request, resubmit_hash=False):
    if request.method == "POST":
        package = request.POST.get("package", "")
        timeout = min(force_int(request.POST.get("timeout")), 60 * 60 * 24)
        options = request.POST.get("options", "")
        priority = force_int(request.POST.get("priority"))
        machine = request.POST.get("machine", "")
        gateway = request.POST.get("gateway", None)
        clock = request.POST.get("clock", None)
        custom = request.POST.get("custom", "")
        memory = bool(request.POST.get("memory", False))
        enforce_timeout = bool(request.POST.get("enforce_timeout", False))
        referrer = validate_referrer(request.POST.get("referrer", None))
        tags = request.POST.get("tags", None)
        opt_filename = ""
        for option in options.split(","):
            if option.startswith("filename="):
                opt_filename = option.split("filename=")[1]
                break
        task_gateways = []
        ipaddy_re = re.compile(
            r"^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$"
        )

        if referrer:
            if options:
                options += ","
            options += "referrer=%s" % (referrer)

        if request.POST.get("free"):
            if options:
                options += ","
            options += "free=yes"

        if request.POST.get("nohuman"):
            if options:
                options += ","
            options += "nohuman=yes"

        if request.POST.get("tor"):
            if options:
                options += ","
            options += "tor=yes"

        if request.POST.get("route", None):
            if options:
                options += ","
            options += "route={0}".format(request.POST.get("route", None))

        if request.POST.get("process_dump"):
            if options:
                options += ","
            options += "procdump=0"
        else:
            if options:
                options += ","
            options += "procdump=1"

        if request.POST.get("process_memory"):
            if options:
                options += ","
            options += "procmemdump=1"

        if request.POST.get("import_reconstruction"):
            if options:
                options += ","
            options += "import_reconstruction=1"

        if request.POST.get("disable_cape"):
            if options:
                options += ","
            options += "disable_cape=1"

        if request.POST.get("kernel_analysis"):
            if options:
                options += ","
            options += "kernel_analysis=yes"

        if request.POST.get("norefer"):
            if options:
                options += ","
            options += "norefer=1"

        orig_options = options

        if gateway and gateway.lower() == "all":
            for e in settings.GATEWAYS:
                if ipaddy_re.match(settings.GATEWAYS[e]):
                    task_gateways.append(settings.GATEWAYS[e])
        elif gateway and gateway in settings.GATEWAYS:
            if "," in settings.GATEWAYS[gateway]:
                if request.POST.get("all_gw_in_group"):
                    tgateway = settings.GATEWAYS[gateway].split(",")
                    for e in tgateway:
                        task_gateways.append(settings.GATEWAYS[e])
                else:
                    tgateway = random.choice(
                        settings.GATEWAYS[gateway].split(","))
                    task_gateways.append(settings.GATEWAYS[tgateway])
            else:
                task_gateways.append(settings.GATEWAYS[gateway])

        if not task_gateways:
            # To reduce to the default case
            task_gateways = [None]

        db = Database()
        task_ids = []
        task_machines = []

        if machine.lower() == "all":
            for entry in db.list_machines():
                task_machines.append(entry.label)
        else:
            task_machines.append(machine)

        status = "ok"
        if "hash" in request.POST and request.POST.get(
                "hash", False) and request.POST.get("hash")[0] != '':
            resubmission_hash = request.POST.get("hash").strip()
            paths = db.sample_path_by_hash(resubmission_hash)
            paths = filter(
                None,
                [path if os.path.exists(path) else False for path in paths])
            if not paths and FULL_DB:
                tasks = results_db.analysis.find(
                    {"dropped.sha256": resubmission_hash})
                if tasks:
                    for task in tasks:
                        # grab task id and replace in path aka distributed cuckoo hack
                        path = os.path.join(settings.CUCKOO_PATH,
                                            "storage", "analyses",
                                            str(task["info"]["id"]), "files",
                                            resubmission_hash)
                        if os.path.exists(path):
                            paths = [path]
                            break
            if paths:
                content = ""
                content = submit_utils.get_file_content(paths)
                if content is False:
                    return render(request, "error.html", {
                        "error":
                        "Can't find {} on disk".format(resubmission_hash)
                    })
                base_dir = tempfile.mkdtemp(prefix='resubmit_',
                                            dir=settings.TEMP_PATH)
                if opt_filename:
                    filename = base_dir + "/" + opt_filename
                else:
                    filename = base_dir + "/" + resubmission_hash
                path = store_temp_file(content, filename)
                headers = {}
                url = 'local'
                params = {}

                status, task_ids = download_file(
                    content, request, db, task_ids, url, params, headers,
                    "Local", path, package, timeout, options, priority,
                    machine, gateway, clock, custom, memory, enforce_timeout,
                    referrer, tags, orig_options, task_gateways, task_machines)

        elif "sample" in request.FILES:
            samples = request.FILES.getlist("sample")
            for sample in samples:
                # Error if there was only one submitted sample and it's empty.
                # But if there are multiple and one was empty, just ignore it.
                if not sample.size:
                    if len(samples) != 1:
                        continue

                    return render(request, "error.html",
                                  {"error": "You uploaded an empty file."})
                elif sample.size > settings.MAX_UPLOAD_SIZE:
                    return render(
                        request, "error.html", {
                            "error":
                            "You uploaded a file that exceeds the maximum allowed upload size specified in web/web/local_settings.py."
                        })

                # Moving sample from django temporary file to Cuckoo temporary storage to
                # let it persist between reboot (if user like to configure it in that way).
                path = store_temp_file(sample.read(), sample.name)

                for gw in task_gateways:
                    options = update_options(gw, orig_options)

                    for entry in task_machines:
                        try:
                            task_ids_new = db.demux_sample_and_add_to_db(
                                file_path=path,
                                package=package,
                                timeout=timeout,
                                options=options,
                                priority=priority,
                                machine=entry,
                                custom=custom,
                                memory=memory,
                                enforce_timeout=enforce_timeout,
                                tags=tags,
                                clock=clock)
                            task_ids.extend(task_ids_new)
                        except CuckooDemuxError as err:
                            return render(request, "error.html",
                                          {"error": err})

        elif "quarantine" in request.FILES:
            samples = request.FILES.getlist("quarantine")
            for sample in samples:
                # Error if there was only one submitted sample and it's empty.
                # But if there are multiple and one was empty, just ignore it.
                if not sample.size:
                    if len(samples) != 1:
                        continue

                    return render(
                        request, "error.html",
                        {"error": "You uploaded an empty quarantine file."})
                elif sample.size > settings.MAX_UPLOAD_SIZE:
                    return render(
                        request, "error.html", {
                            "error":
                            "You uploaded a quarantine file that exceeds the maximum allowed upload size specified in web/web/local_settings.py."
                        })

                # Moving sample from django temporary file to Cuckoo temporary storage to
                # let it persist between reboot (if user like to configure it in that way).
                tmp_path = store_temp_file(sample.read(), sample.name)

                path = unquarantine(tmp_path)
                try:
                    os.remove(tmp_path)
                except:
                    pass

                if not path:
                    return render(request, "error.html", {
                        "error":
                        "You uploaded an unsupported quarantine file."
                    })

                for gw in task_gateways:
                    options = update_options(gw, orig_options)

                    for entry in task_machines:
                        task_ids_new = db.demux_sample_and_add_to_db(
                            file_path=path,
                            package=package,
                            timeout=timeout,
                            options=options,
                            priority=priority,
                            machine=entry,
                            custom=custom,
                            memory=memory,
                            enforce_timeout=enforce_timeout,
                            tags=tags,
                            clock=clock)
                        task_ids.extend(task_ids_new)
        elif "pcap" in request.FILES:
            samples = request.FILES.getlist("pcap")
            for sample in samples:
                if not sample.size:
                    if len(samples) != 1:
                        continue

                    return render(
                        request, "error.html",
                        {"error": "You uploaded an empty PCAP file."})
                elif sample.size > settings.MAX_UPLOAD_SIZE:
                    return render(
                        request, "error.html", {
                            "error":
                            "You uploaded a PCAP file that exceeds the maximum allowed upload size specified in web/web/local_settings.py."
                        })

                # Moving sample from django temporary file to Cuckoo temporary storage to
                # let it persist between reboot (if user like to configure it in that way).
                path = store_temp_file(sample.read(), sample.name)

                if sample.name.lower().endswith(".saz"):
                    saz = saz_to_pcap(path)
                    if saz:
                        try:
                            os.remove(path)
                        except:
                            pass
                        path = saz
                    else:
                        return render(
                            request, "error.html",
                            {"error": "Conversion from SAZ to PCAP failed."})

                task_id = db.add_pcap(file_path=path, priority=priority)
                task_ids.append(task_id)

        elif "url" in request.POST and request.POST.get("url").strip():
            url = request.POST.get("url").strip()
            if not url:
                return render(request, "error.html",
                              {"error": "You specified an invalid URL!"})

            url = url.replace("hxxps://", "https://").replace(
                "hxxp://", "http://").replace("[.]", ".")
            for gw in task_gateways:
                options = update_options(gw, orig_options)

                for entry in task_machines:
                    task_id = db.add_url(url=url,
                                         package=package,
                                         timeout=timeout,
                                         options=options,
                                         priority=priority,
                                         machine=entry,
                                         custom=custom,
                                         memory=memory,
                                         enforce_timeout=enforce_timeout,
                                         tags=tags,
                                         clock=clock)
                    if task_id:
                        task_ids.append(task_id)
        elif settings.VTDL_ENABLED and "vtdl" in request.POST and request.POST.get(
                "vtdl", False) and request.POST.get("vtdl")[0] != '':
            vtdl = request.POST.get("vtdl")
            if (not settings.VTDL_PRIV_KEY
                    and not settings.VTDL_INTEL_KEY) or not settings.VTDL_PATH:
                return render(
                    request, "error.html", {
                        "error":
                        "You specified VirusTotal but must edit the file and specify your VTDL_PRIV_KEY or VTDL_INTEL_KEY variable and VTDL_PATH base directory"
                    })
            else:
                base_dir = tempfile.mkdtemp(prefix='cuckoovtdl',
                                            dir=settings.VTDL_PATH)
                hashlist = []
                if "," in vtdl:
                    hashlist = vtdl.replace(" ", "").strip().split(",")
                else:
                    hashlist = vtdl.split()

                for h in hashlist:
                    if opt_filename:
                        filename = base_dir + "/" + opt_filename
                    else:
                        filename = base_dir + "/" + h

                    paths = db.sample_path_by_hash(h)
                    content = ""
                    if paths is not None:
                        content = submit_utils.get_file_content(paths)

                    headers = {}
                    url = 'https://www.virustotal.com/intelligence/download/'
                    params = {'apikey': settings.VTDL_INTEL_KEY, 'hash': h}

                    if content is False:
                        if settings.VTDL_PRIV_KEY:
                            url = 'https://www.virustotal.com/vtapi/v2/file/download'
                            params = {
                                'apikey': settings.VTDL_PRIV_KEY,
                                'hash': h
                            }

                        status, task_ids = download_file(
                            content, request, db, task_ids, url, params,
                            headers, "VirusTotal", filename, package, timeout,
                            options, priority, machine, gateway, clock, custom,
                            memory, enforce_timeout, referrer, tags,
                            orig_options, task_gateways, task_machines)
                    else:

                        status, task_ids = download_file(
                            content, request, db, task_ids, url, params,
                            headers, "Local", filename, package, timeout,
                            options, priority, machine, gateway, clock, custom,
                            memory, enforce_timeout, referrer, tags,
                            orig_options, task_gateways, task_machines)
        if status == "error":
            # is render msg
            return task_ids
        tasks_count = len(task_ids)
        if tasks_count > 0:
            return render(request, "submission/complete.html", {
                "tasks": task_ids,
                "tasks_count": tasks_count
            })
        else:
            return render(request, "error.html",
                          {"error": "Error adding task to Cuckoo's database."})
    else:
        cfg = Config("cuckoo")
        enabledconf = dict()
        enabledconf["vt"] = settings.VTDL_ENABLED
        enabledconf["kernel"] = settings.OPT_ZER0M0N
        enabledconf["memory"] = Config("processing").memory.get("enabled")
        enabledconf["procmemory"] = Config("processing").procmemory.get(
            "enabled")
        enabledconf["tor"] = Config("auxiliary").tor.get("enabled")
        if Config("auxiliary").gateways:
            enabledconf["gateways"] = True
        else:
            enabledconf["gateways"] = False
        enabledconf["tags"] = False
        # Get enabled machinery
        machinery = Config("cuckoo").cuckoo.get("machinery")
        # Get VM names for machinery config elements
        vms = [
            x.strip() for x in getattr(Config(machinery), machinery).get(
                "machines").split(",")
        ]
        # Check each VM config element for tags
        for vmtag in vms:
            if "tags" in getattr(Config(machinery), vmtag).keys():
                enabledconf["tags"] = True

        files = os.listdir(
            os.path.join(settings.CUCKOO_PATH, "analyzer", "windows",
                         "modules", "packages"))

        packages = []
        for name in files:
            name = os.path.splitext(name)[0]
            if name == "__init__":
                continue

            packages.append(name)

        # Prepare a list of VM names, description label based on tags.
        machines = []
        for machine in Database().list_machines():
            tags = []
            for tag in machine.tags:
                tags.append(tag.name)

            if tags:
                label = machine.label + ": " + ", ".join(tags)
            else:
                label = machine.label

            machines.append((machine.label, label))

        # Prepend ALL/ANY options.
        machines.insert(0, ("", "First available"))
        machines.insert(1, ("all", "All"))

        return render(
            request, "submission/index.html", {
                "packages": sorted(packages),
                "machines": machines,
                "vpns": vpns.values(),
                "route": cfg.routing.route,
                "internet": cfg.routing.internet,
                "inetsim": cfg.routing.inetsim,
                "tor": cfg.routing.tor,
                "gateways": settings.GATEWAYS,
                "config": enabledconf,
                "resubmit": resubmit_hash,
            })
Пример #46
0
def prepare_route(route):
    for r in route:
        r['resource'] = re.compile(r['resource'], re.I | re.S)
        for rr in r['route']:
            rr[0] = re.compile(rr[0], re.I | re.S)
    return route
Пример #47
0
    def start(self, path):
        password = self.options.get("password")
        if password is None:
            password = b""
        appdata = self.options.get("appdata")
        if appdata:
            root = os.environ["APPDATA"]
        else:
            root = os.environ["TEMP"]
        exe_regex = re.compile(
            '(\.exe|\.dll|\.scr|\.msi|\.bat|\.lnk|\.js|\.jse|\.vbs|\.vbe|\.wsf)$',
            flags=re.IGNORECASE)
        zipinfos = self.get_infos(path)
        self.extract_zip(path, root, password, 0)

        file_name = self.options.get("file")
        # If no file name is provided via option, take the first file.
        if not file_name:
            # No name provided try to find a better name.
            if len(zipinfos):
                # Attempt to find a valid exe extension in the archive
                for f in zipinfos:
                    if exe_regex.search(f.filename):
                        file_name = f.filename
                        break
                # Default to the first one if none found
                file_name = file_name if file_name else zipinfos[0].filename
                log.debug("Missing file option, auto executing: {0}".format(
                    file_name))
            else:
                raise CuckooPackageError("Empty ZIP archive")

        file_path = os.path.join(root, file_name)
        log.debug("file_name: \"%s\"" % (file_name))
        if file_name.lower().endswith(".lnk"):
            cmd_path = self.get_path("cmd.exe")
            cmd_args = "/c start /wait \"\" \"{0}\"".format(file_path)
            return self.execute(cmd_path, cmd_args, file_path)
        elif file_name.lower().endswith(".msi"):
            msi_path = self.get_path("msiexec.exe")
            msi_args = "/I \"{0}\"".format(file_path)
            return self.execute(msi_path, msi_args, file_path)
        elif file_name.lower().endswith(
            (".js", ".jse", ".vbs", ".vbe", ".wsf")):
            wscript = self.get_path_app_in_path("wscript.exe")
            wscript_args = "\"{0}\"".format(file_path)
            return self.execute(wscript, wscript_args, file_path)
        elif file_name.lower().endswith(".dll"):
            rundll32 = self.get_path_app_in_path("rundll32.exe")
            function = self.options.get("function", "#1")
            arguments = self.options.get("arguments")
            dllloader = self.options.get("dllloader")
            dll_args = "\"{0}\",{1}".format(file_path, function)
            if arguments:
                dll_args += " {0}".format(arguments)
            if dllloader:
                newname = os.path.join(os.path.dirname(rundll32), dllloader)
                shutil.copy(rundll32, newname)
                rundll32 = newname
            return self.execute(rundll32, dll_args, file_path)
        elif file_name.lower().endswith(".ps1"):
            powershell = self.get_path_app_in_path("powershell.exe")
            args = "-NoProfile -ExecutionPolicy bypass -File \"{0}\"".format(
                path)
            return self.execute(powershell, args, file_path)
        else:
            if "." not in os.path.basename(file_path):
                new_path = file_path + ".exe"
                os.rename(file_path, new_path)
                file_path = new_path
            return self.execute(file_path, self.options.get("arguments"),
                                file_path)
Пример #48
0
except ImportError:
    logging.warning(
        "Failed to load 're2'.  Falling back to 're' for regular expression parsing. See https://github.com/blockspeiser/Sefaria-Project/wiki/Regular-Expression-Engines"
    )
    import re

letter_scope = "\u05b0\u05b4\u05b5\u05b6\u05b7\u05b8\u05b9\u05bc\u05c1\u05d0\u05d1\u05d2\u05d3\u05d4\u05d5\u05d6\u05d7\u05d8\u05d9\u05da\u05db\u05dc\u05dd\u05de\u05df\u05e0\u05e1\u05e2\u05e3\u05e4\u05e5\u05e6\u05e7\u05e8\u05e9\u05ea\u05f3\u05f4\u200e\u200f\u2013\u201d\ufeffabcdefghijklmnopqrstuvwxyz1234567890[]`:;.-,*()'& \""


def normalizer(lang):
    if lang == "he":
        return hebrew.normalize_final_letters_in_str
    return str.lower


splitter = re.compile(r"[\s,]+")


class AutoCompleter(object):
    """
    An AutoCompleter object provides completion services - it is the object in this module designed to be used by the Library.
    It instantiates objects that provide string completion according to different algorithms.
    """
    def __init__(self,
                 lang,
                 lib,
                 include_titles=True,
                 include_people=False,
                 include_categories=False,
                 include_parasha=False,
                 include_lexicons=False,
Пример #49
0
    def handleEvent(self, event):
        eventName = event.eventType
        srcModuleName = event.module
        eventData = event.data
        eventDataHash = self.sf.hashstring(eventData)
        addrs = None
        parentEvent = event

        self.sf.debug("Received event, " + eventName + ", from " +
                      srcModuleName)

        if eventDataHash in self.events:
            self.sf.debug("Skipping duplicate event for " + eventData)
            return None

        self.events[eventDataHash] = True

        self.sf.debug("Gathering DNS records for " + eventData)
        # Process the raw data alone
        recdata = dict()
        recs = {
            'MX':
            ['\S+\s+(?:\d+)?\s+IN\s+MX\s+\d+\s+(\S+)\.', 'PROVIDER_MAIL'],
            'NS': ['\S+\s+(?:\d+)?\s+IN\s+NS\s+(\S+)\.', 'PROVIDER_DNS'],
            'TXT': ['\S+\s+TXT\s+\"(.[^\"]*)"', 'DNS_TEXT']
        }

        for rec in recs.keys():
            if self.checkForStop():
                return None

            try:
                req = dns.message.make_query(eventData,
                                             dns.rdatatype.from_text(rec))

                if self.opts.get('_dnsserver', "") != "":
                    n = self.opts['_dnsserver']
                else:
                    ns = dns.resolver.get_default_resolver()
                    n = ns.nameservers[0]

                res = dns.query.udp(req, n, timeout=30)
                for x in res.answer:
                    if str(x) in self.checked:
                        continue
                    self.checked[str(x)] = True
                    for rx in recs.keys():
                        self.sf.debug("Checking " + str(x) + " + against " +
                                      recs[rx][0])
                        pat = re.compile(recs[rx][0],
                                         re.IGNORECASE | re.DOTALL)
                        grps = re.findall(pat, str(x))

                        if len(grps) == 0:
                            continue

                        for m in grps:
                            self.sf.debug("Matched: " + m)
                            strdata = unicode(m, 'utf-8', errors='replace')
                            evt = SpiderFootEvent(recs[rx][1], strdata,
                                                  self.__name__, parentEvent)
                            self.notifyListeners(evt)
                            if rec != "TXT" and not strdata.endswith(
                                    eventData):
                                evt = SpiderFootEvent(
                                    "AFFILIATE_INTERNET_NAME", strdata,
                                    self.__name__, parentEvent)
                                self.notifyListeners(evt)

                            if rec == "TXT" and "v=spf" in strdata:
                                evt = SpiderFootEvent("DNS_SPF", strdata,
                                                      self.__name__,
                                                      parentEvent)
                                self.notifyListeners(evt)

                                matches = re.findall(r'include:(.+?) ',
                                                     strdata,
                                                     re.IGNORECASE | re.DOTALL)
                                if matches:
                                    for domain in matches:
                                        if '_' in domain:
                                            continue
                                        if self.getTarget().matches(
                                                domain,
                                                includeChildren=True,
                                                includeParents=True):
                                            evt_type = 'INTERNET_NAME'
                                        else:
                                            evt_type = 'AFFILIATE_DOMAIN'

                                        if self.opts[
                                                'verify'] and not self.sf.resolveHost(
                                                    domain):
                                            self.sf.debug(
                                                "Host " + domain +
                                                " could not be resolved")
                                            evt_type += '_UNRESOLVED'

                                        evt = SpiderFootEvent(
                                            evt_type, domain, self.__name__,
                                            parentEvent)
                                        self.notifyListeners(evt)

                    strdata = unicode(str(x), 'utf-8', errors='replace')
                    evt = SpiderFootEvent("RAW_DNS_RECORDS", strdata,
                                          self.__name__, parentEvent)
                    self.notifyListeners(evt)
            except BaseException as e:
                self.sf.error(
                    "Failed to obtain DNS response for " + eventData + "(" +
                    rec + "): " + str(e), False)
Пример #50
0
"""Frytherer Module.

This module contains all the helper functions for the
Frytherer command line interface and the Slackbot
"""
import string, sys, ast
from itertools import product
from operator import and_, or_
try:
    import re2 as re
except ImportError:
    import re
import logging
logging.basicConfig(level=logging.DEBUG)

mana_regexp = re.compile('([0-9]*)(b*)(g*)(r*)(u*)(w*)')
section_regexp = re.compile(
    '(aipg|amtr) (?:(appendix [a-f])|(\d+)(?:(?:\.)(\d)){0,1})')


def gathererCapitalise(y):
    """
    Capitalise card names as Gatherer does.

    INPUT: Regular card name in whatever case
    OUTPUT: Magic style capitalised card name
    """
    words = y.split(" ")
    ret_string = []
    for x in words:
        x = x.replace(u'\u2019', '\'').replace(u'\u2018',
Пример #51
0
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

try:
    import re2 as re
except ImportError:
    import re

from lib.cuckoo.common.abstracts import Signature

struct_pat = re.compile(
    r"\\x11\"3D\d{2}\.\d{2}(?:[A-Za-z]|\\x00)(?:\\x00){2}(?:\d{4}|(?:\\x00){4})(?:\\x00){12}http"
)
url_pat = re.compile(r"(https?://[^\|]+)(?:\||\\x00)")


class Hancitor_APIs(Signature):
    name = "hancitor_behavior"
    description = "Exhibits behavior characteristic of Hancitor downloader"
    weight = 3
    severity = 3
    categories = ["downloader"]
    families = ["hancitor", "chanitor", "tordal"]
    authors = ["KillerInstinct"]
    minimum = "1.2"
    evented = True
Пример #52
0
#headers = ["pmid", "wordCounts"]

# keep mapping word -> list of pmids
wordsToPmids = {}

# this plugin produces marshal output files
outTypes = ["marshal"]

# we want to run on fulltext files
runOn = "files"
# we only want main files
onlyMain = True
# only give us one single main article file
preferXml = True

sentSplitter = re.compile(r'[.!?;][ ]')
wordSplitter = re.compile(r'[;:",. !?=\[\]()\t\n\r\f\v]')

addTwoGrams = False
addMeta = False
pmids = None
outFhs = None

# run before outTypes is read or any files are opened
# can be used to change outTypes depending on paramDict
def setup(paramDict):
    global addTwoGrams
    global addTwoGrams
    global pmids
    addTwoGrams = bool(paramDict["addTwoGrams"])
    addMeta = bool(paramDict["addMeta"])
Пример #53
0
from wwwutil.md import markdown
from datetime import datetime
import calendar

try:
    import re2 as re
except ImportError:
    import re

from geweb.exceptions import GewebError
from point.util.env import env

import settings

_urlre = re.compile(
    '^(?P<proto>\w+)://(?:[\w\.\-%\:]*\@)?(?P<host>[\w\.\-%]+)(?P<port>::(\d+))?(?P<path>(?:/[\w\.\-%]*)*)(?:\?(?P<query>[^#]*))?'
)


def parse_url(url):
    if not url:
        return None
    m = re.search(_urlre, url)
    if m:
        return m.groupdict()


def check_referer(fn):
    def _fn(*args, **kwargs):
        referer = parse_url(env.request.referer)
        if not referer or not referer['host'].endswith(settings.domain):
Пример #54
0
    def handleEvent(self, event):
        eventName = event.eventType
        srcModuleName = event.module
        eventData = event.data

        self.sf.debug("Received event, " + eventName + ", from " +
                      srcModuleName)

        # If the source event is web content, check if the source URL was javascript
        # or CSS, in which case optionally ignore it.
        if eventName == "TARGET_WEB_CONTENT":
            url = event.actualSource
            if self.opts['filterjscss'] and (".js" in url or ".css" in url):
                self.sf.debug("Ignoring web content from CSS/JS.")
                return None

        if eventName == "EMAILADDR" and self.opts['emailtoname']:
            if "." in eventData.split("@")[0]:
                if type(eventData) == unicode:
                    name = " ".join(
                        map(unicode.capitalize,
                            eventData.split("@")[0].split(".")))
                else:
                    name = " ".join(
                        map(str.capitalize,
                            eventData.split("@")[0].split(".")))
                    name = unicode(name, 'utf-8', errors='replace')
                # Notify other modules of what you've found
                evt = SpiderFootEvent("HUMAN_NAME", name, self.__name__, event)
                if event.moduleDataSource:
                    evt.moduleDataSource = event.moduleDataSource
                else:
                    evt.moduleDataSource = "Unknown"
                self.notifyListeners(evt)
                return None

        # Stage 1: Find things that look (very vaguely) like names
        rx = re.compile(
            "([A-Z][a-z�������������]+)\s+.?.?\s?([A-Z][�������������a-zA-Z\'\-]+)"
        )
        m = re.findall(rx, eventData)
        for r in m:
            # Start off each match as 0 points.
            p = 0
            notindict = False

            # Shouldn't encounter "Firstname's Secondname"
            first = r[0].lower()
            if first[len(first) - 2] == "'" or first[len(first) - 1] == "'":
                continue

            # Strip off trailing ' or 's
            secondOrig = r[1].replace("'s", "")
            secondOrig = secondOrig.rstrip("'")
            second = r[1].lower().replace("'s", "")
            second = second.rstrip("'")

            # If both words are not in the dictionary, add 75 points.
            if first not in self.d and second not in self.d:
                self.sf.debug(
                    "Both first and second names are not in the dictionary, so high chance of name: ("
                    + first + ":" + second + ").")
                p += 75
                notindict = True
            else:
                self.sf.debug(first + " was found or " + second +
                              " was found in dictionary.")

            # If the first word is a known popular first name, award 50 points.
            if first in self.n:
                p += 50

            # If either word is 2 characters, subtract 50 points.
            if len(first) == 2 or len(second) == 2:
                p -= 50

            # If the first word is in the dictionary but the second isn't,
            # subtract 40 points.
            if not notindict:
                if first in self.d and second not in self.d:
                    p -= 20

                # If the second word is in the dictionary but the first isn't,
                # reduce 20 points.
                if first not in self.d and second in self.d:
                    p -= 40

            name = r[0] + " " + secondOrig

            self.sf.debug("Name of " + name + " has score: " + str(p))
            if p > self.opts['algolimit']:
                # Notify other modules of what you've found
                evt = SpiderFootEvent("HUMAN_NAME", name, self.__name__, event)
                if event.moduleDataSource:
                    evt.moduleDataSource = event.moduleDataSource
                else:
                    evt.moduleDataSource = "Unknown"
                self.notifyListeners(evt)
Пример #55
0
    def run(self):
        clam_no_score_re = re.compile(
            r'^(SaneSecurity\.FoxHole|MiscreantPunch\.(?:Susp|INFO))', re.I)
        clam_ignore = ['PhishTank.Phishing.6117523.UNOFFICIAL']
        self.data = []
        if self.results["target"]["category"] == "file":
            if "clamav" in self.results["target"]["file"].keys(
            ) and self.results["target"]["file"][
                    "clamav"] and "sha256" in self.results["target"][
                        "file"].keys():
                for detection in self.results["target"]["file"]["clamav"]:
                    entry = "%s, target" % (detection)
                    if detection in clam_ignore:
                        continue
                    if not clam_no_score_re.search(detection):
                        self.weight = 3
                    if "type" in self.results["target"]["file"]:
                        entry = "%s, type:%s" % (entry, self.results["target"]
                                                 ["file"].get("type", ""))
                    self.data.append(
                        {self.results["target"]["file"]["sha256"]: entry})

        if "suricata" in self.results and self.results["suricata"]:
            if "files" in self.results["suricata"]:
                for entry in self.results["suricata"]["files"]:
                    proto = entry["protocol"]
                    if "clamav" in entry["file_info"].keys(
                    ) and entry["file_info"]["clamav"] and "sha256" in entry[
                            "file_info"].keys():
                        for detection in entry["file_info"]["clamav"]:
                            if detection in clam_ignore:
                                continue
                            if not clam_no_score_re.search(detection):
                                self.weight = 3
                            lentry = "{}, suricata_extracted_files, src:{}, sp:{}, dst:{}, dp:{}".format(
                                detection, entry.get('srcip', ''),
                                entry.get('sp', ''), entry.get('dstip', ''),
                                entry.get('dp', ''))
                            if "http_user_agent" in entry.keys():
                                lentry = "%s, ua:%s" % (
                                    lentry, entry['http_user_agent'])
                            if "http_uri" in entry.keys():
                                lentry = "%s, uri:%s" % (lentry,
                                                         entry['http_uri'])
                            if "http_referer" in entry.keys():
                                lentry = "%s, referer:%s" % (
                                    lentry, entry['http_referer'])
                            if entry["file_info"]["type"]:
                                lentry = "%s, type:%s" % (
                                    lentry, entry["file_info"]["type"])
                            self.data.append(
                                {entry["file_info"]["sha256"]: lentry})

        if "dropped" in self.results:
            for entry in self.results["dropped"]:
                if "clamav" in entry.keys(
                ) and entry["clamav"] and "sha256" in entry.keys():
                    for detection in entry["clamav"]:
                        if detection in clam_ignore:
                            continue
                        if not clam_no_score_re.search(detection):
                            self.weight = 3
                        lentry = "%s, dropped" % (detection)
                        if "guest_paths" in entry.keys():
                            lentry = "%s, guest_paths:%s" % (lentry, "*".join(
                                entry["guest_paths"]))
                        if "type" in entry.keys():
                            lentry = "%s, type:%s" % (lentry, entry["type"])
                        self.data.append({entry["sha256"]: lentry})

        if len(self.data) > 0:
            return True

        return False
Пример #56
0
        'o',
        'u',
        'y',
    ]),  #all consonants go away
    ('.', [
        r'\.+',
    ]),
)

#_GROUPS1 = [(k, '|'.join(ls)) for k, ls in GROUPS1]
#_GROUPS2 = [(k, '|'.join(ls)) for k, ls in GROUPS2]
#GROUPS1_SINGLEREGEXP = re.compile('|'.join(["(%s)" % v for k, v in _GROUPS1]))
#GROUPS2_SINGLEREGEXP = re.compile('|'.join(["(%s)" % v for k, v in _GROUPS2]))
#GROUPS1_LOOKUP = dict((i+1, k) for (i, (k,v)) in enumerate(GROUPS1))
#GROUPS2_LOOKUP = dict((i+1, k) for (i, (k,v)) in enumerate(GROUPS2))
GROUPS1 = [(k, re.compile('|'.join(ls))) for k, ls in _GROUPS1]
GROUPS2 = [(k, re.compile('|'.join(ls))) for k, ls in _GROUPS2]


def dict_sub(d, text):
    """ Replace in 'text' non-overlapping occurences of REs whose patterns are keys
    in dictionary 'd' by corresponding values (which must be constant strings: may
    have named backreferences but not numeric ones). The keys must not contain
    anonymous matching-groups.
    Returns the new string."""

    # Create a regular expression  from the dictionary keys
    regex = re.compile("|".join("(%s)" % k for k in d))
    # Facilitate lookup from group number to value
    lookup = dict((i + 1, v) for i, v in enumerate(d.values()))
Пример #57
0
    def run(self):
        """Run Suricata.
        @return: hash with alerts
        """
        self.key = "suricata"
        # General
        SURICATA_CONF = self.options.get("conf", None)
        SURICATA_EVE_LOG = self.options.get("evelog", None)
        SURICATA_ALERT_LOG = self.options.get("alertlog", None)
        SURICATA_TLS_LOG = self.options.get("tlslog", None)
        SURICATA_HTTP_LOG = self.options.get("httplog", None)
        SURICATA_SSH_LOG = self.options.get("sshlog", None)
        SURICATA_DNS_LOG = self.options.get("dnslog", None)
        SURICATA_FILE_LOG = self.options.get("fileslog", None)
        SURICATA_FILES_DIR = self.options.get("filesdir", None)
        SURICATA_RUNMODE = self.options.get("runmode", None)
        SURICATA_FILE_BUFFER = self.options.get("buffer", 8192)
        Z7_PATH = self.options.get("7zbin", None)
        FILES_ZIP_PASS = self.options.get("zippass", None)
        SURICATA_FILE_COPY_DST_DIR = self.options.get("file_copy_dest_dir",
                                                      None)
        SURICATA_FILE_COPY_MAGIC_RE = self.options.get("file_magic_re", None)
        if SURICATA_FILE_COPY_MAGIC_RE:
            try:
                SURICATA_FILE_COPY_MAGIC_RE = re.compile(
                    SURICATA_FILE_COPY_MAGIC_RE)
            except:
                log.warning("Failed to compile suricata copy magic RE" %
                            (SURICATA_FILE_COPY_MAGIC_RE))
                SURICATA_FILE_COPY_MAGIC_RE = None
        # Socket
        SURICATA_SOCKET_PATH = self.options.get("socket_file", None)
        SURICATA_SOCKET_PYLIB = self.options.get("pylib_dir", None)

        # Command Line
        SURICATA_BIN = self.options.get("bin", None)

        suricata = {}
        suricata["alerts"] = []
        suricata["tls"] = []
        suricata["perf"] = []
        suricata["files"] = []
        suricata["http"] = []
        suricata["dns"] = []
        suricata["ssh"] = []
        suricata["file_info"] = []

        suricata["eve_log_full_path"] = None
        suricata["alert_log_full_path"] = None
        suricata["tls_log_full_path"] = None
        suricata["http_log_full_path"] = None
        suricata["file_log_full_path"] = None
        suricata["ssh_log_full_path"] = None
        suricata["dns_log_full_path"] = None

        SURICATA_ALERT_LOG_FULL_PATH = "%s/%s" % (self.logs_path,
                                                  SURICATA_ALERT_LOG)
        SURICATA_TLS_LOG_FULL_PATH = "%s/%s" % (self.logs_path,
                                                SURICATA_TLS_LOG)
        SURICATA_HTTP_LOG_FULL_PATH = "%s/%s" % (self.logs_path,
                                                 SURICATA_HTTP_LOG)
        SURICATA_SSH_LOG_FULL_PATH = "%s/%s" % (self.logs_path,
                                                SURICATA_SSH_LOG)
        SURICATA_DNS_LOG_FULL_PATH = "%s/%s" % (self.logs_path,
                                                SURICATA_DNS_LOG)
        SURICATA_EVE_LOG_FULL_PATH = "%s/%s" % (self.logs_path,
                                                SURICATA_EVE_LOG)
        SURICATA_FILE_LOG_FULL_PATH = "%s/%s" % (self.logs_path,
                                                 SURICATA_FILE_LOG)
        SURICATA_FILES_DIR_FULL_PATH = "%s/%s" % (self.logs_path,
                                                  SURICATA_FILES_DIR)

        separate_log_paths = [
            ("alert_log_full_path", SURICATA_ALERT_LOG_FULL_PATH),
            ("tls_log_full_path", SURICATA_TLS_LOG_FULL_PATH),
            ("http_log_full_path", SURICATA_HTTP_LOG_FULL_PATH),
            ("ssh_log_full_path", SURICATA_SSH_LOG_FULL_PATH),
            ("dns_log_full_path", SURICATA_DNS_LOG_FULL_PATH)
        ]

        # handle reprocessing
        all_log_paths = [x[1] for x in separate_log_paths] + \
            [SURICATA_EVE_LOG_FULL_PATH, SURICATA_FILE_LOG_FULL_PATH]
        for log_path in all_log_paths:
            if os.path.exists(log_path):
                try:
                    os.unlink(log_path)
                except:
                    pass
        if os.path.isdir(SURICATA_FILES_DIR_FULL_PATH):
            try:
                shutil.rmtree(SURICATA_FILES_DIR_FULL_PATH, ignore_errors=True)
            except:
                pass

        if not os.path.exists(SURICATA_CONF):
            log.warning("Unable to Run Suricata: Conf File %s Does Not Exist" %
                        (SURICATA_CONF))
            return suricata["alerts"]
        if not os.path.exists(self.pcap_path):
            log.warning("Unable to Run Suricata: Pcap file %s Does Not Exist" %
                        (self.pcap_path))
            return suricata["alerts"]

        # Add to this if you wish to ignore any SIDs for the suricata alert logs
        # Useful for ignoring SIDs without disabling them. Ex: surpress an alert for
        # a SID which is a dependent of another. (Bad TCP data for HTTP(S) alert)
        sid_blacklist = [
            # SURICATA FRAG IPv6 Fragmentation overlap
            2200074,
            # ET INFO InetSim Response from External Source Possible SinkHole
            2017363,
            # SURICATA UDPv4 invalid checksum
            2200075,
            # ET POLICY SSLv3 outbound connection from client vulnerable to POODLE attack
            2019416,
        ]

        if SURICATA_RUNMODE == "socket":
            if SURICATA_SOCKET_PYLIB != None:
                sys.path.append(SURICATA_SOCKET_PYLIB)
            try:
                from suricatasc import SuricataSC
            except Exception as e:
                log.warning("Failed to import suricatasc lib %s" % (e))
                return suricata

            loopcnt = 0
            maxloops = 24
            loopsleep = 5

            args = {}
            args["filename"] = self.pcap_path
            args["output-dir"] = self.logs_path

            suris = SuricataSC(SURICATA_SOCKET_PATH)
            try:
                suris.connect()
                suris.send_command("pcap-file", args)
            except Exception as e:
                log.warning(
                    "Failed to connect to socket and send command %s: %s" %
                    (SURICATA_SOCKET_PATH, e))
                return suricata
            while loopcnt < maxloops:
                try:
                    pcap_flist = suris.send_command("pcap-file-list")
                    current_pcap = suris.send_command("pcap-current")
                    log.debug("pcapfile list: %s current pcap: %s" %
                              (pcap_flist, current_pcap))

                    if self.pcap_path not in pcap_flist["message"][
                            "files"] and current_pcap[
                                "message"] != self.pcap_path:
                        log.debug(
                            "Pcap not in list and not current pcap lets assume it's processed"
                        )
                        break
                    else:
                        loopcnt = loopcnt + 1
                        time.sleep(loopsleep)
                except Exception as e:
                    log.warning(
                        "Failed to get pcap status breaking out of loop %s" %
                        (e))
                    break

            if loopcnt == maxloops:
                log.warning(
                    "Loop timeout of %ssec occured waiting for file %s to finish processing"
                    % (maxloops * loopsleep, pcapfile))
                return suricata
        elif SURICATA_RUNMODE == "cli":
            if not os.path.exists(SURICATA_BIN):
                log.warning(
                    "Unable to Run Suricata: Bin File %s Does Not Exist" %
                    (SURICATA_CONF))
                return suricata["alerts"]
            cmd = "%s -c %s -k none -l %s -r %s" % (
                SURICATA_BIN, SURICATA_CONF, self.logs_path, self.pcap_path)
            ret, stdout, stderr = self.cmd_wrapper(cmd)
            if ret != 0:
                log.warning(
                    "Suricata returned a Exit Value Other than Zero %s" %
                    (stderr))
                return suricata

        else:
            log.warning("Unknown Suricata Runmode")
            return suricata

        datalist = []
        if os.path.exists(SURICATA_EVE_LOG_FULL_PATH):
            suricata["eve_log_full_path"] = SURICATA_EVE_LOG_FULL_PATH
            with open(SURICATA_EVE_LOG_FULL_PATH, "rb") as eve_log:
                datalist.append(eve_log.read())
        else:
            for path in separate_log_paths:
                if os.path.exists(path[1]):
                    suricata[path[0]] = path[1]
                    with open(path[1], "rb") as the_log:
                        datalist.append(the_log.read())

        if not datalist:
            log.warning("Suricata: Failed to find usable Suricata log file")

        for data in datalist:
            for line in data.splitlines():
                try:
                    parsed = json.loads(line)
                except:
                    log.warning("Suricata: Failed to parse line as json" %
                                (line))
                    continue

                if 'event_type' in parsed:
                    if parsed["event_type"] == "alert":
                        if (parsed["alert"]["signature_id"]
                                not in sid_blacklist and not parsed["alert"]
                            ["signature"].startswith("SURICATA STREAM")):
                            alog = dict()
                            if parsed["alert"]["gid"] == '':
                                alog["gid"] = "None"
                            else:
                                alog["gid"] = parsed["alert"]["gid"]
                            if parsed["alert"]["rev"] == '':
                                alog["rev"] = "None"
                            else:
                                alog["rev"] = parsed["alert"]["rev"]
                            if parsed["alert"]["severity"] == '':
                                alog["severity"] = "None"
                            else:
                                alog["severity"] = parsed["alert"]["severity"]
                            alog["sid"] = parsed["alert"]["signature_id"]
                            try:
                                alog["srcport"] = parsed["src_port"]
                            except:
                                alog["srcport"] = "None"
                            alog["srcip"] = parsed["src_ip"]
                            try:
                                alog["dstport"] = parsed["dest_port"]
                            except:
                                alog["dstport"] = "None"
                            alog["dstip"] = parsed["dest_ip"]
                            alog["protocol"] = parsed["proto"]
                            alog["timestamp"] = parsed["timestamp"].replace(
                                "T", " ")
                            if parsed["alert"]["category"] == '':
                                alog["category"] = "None"
                            else:
                                alog["category"] = parsed["alert"]["category"]
                            alog["signature"] = parsed["alert"]["signature"]
                            suricata["alerts"].append(alog)

                    elif parsed["event_type"] == "http":
                        hlog = dict()
                        hlog["srcport"] = parsed["src_port"]
                        hlog["srcip"] = parsed["src_ip"]
                        hlog["dstport"] = parsed["dest_port"]
                        hlog["dstip"] = parsed["dest_ip"]
                        hlog["timestamp"] = parsed["timestamp"].replace(
                            "T", " ")
                        try:
                            hlog["uri"] = parsed["http"]["url"]
                        except:
                            hlog["uri"] = "None"
                        hlog["length"] = parsed["http"]["length"]
                        try:
                            hlog["hostname"] = parsed["http"]["hostname"]
                        except:
                            hlog["hostname"] = "None"
                        try:
                            hlog["status"] = str(parsed["http"]["status"])
                        except:
                            hlog["status"] = "None"
                        try:
                            hlog["method"] = parsed["http"]["http_method"]
                        except:
                            hlog["method"] = "None"
                        try:
                            hlog["contenttype"] = parsed["http"][
                                "http_content_type"]
                        except:
                            hlog["contenttype"] = "None"
                        try:
                            hlog["ua"] = parsed["http"]["http_user_agent"]
                        except:
                            hlog["ua"] = "None"
                        try:
                            hlog["referrer"] = parsed["http"]["http_refer"]
                        except:
                            hlog["referrer"] = "None"
                        suricata["http"].append(hlog)

                    elif parsed["event_type"] == "tls":
                        tlog = dict()
                        tlog["srcport"] = parsed["src_port"]
                        tlog["srcip"] = parsed["src_ip"]
                        tlog["dstport"] = parsed["dest_port"]
                        tlog["dstip"] = parsed["dest_ip"]
                        tlog["timestamp"] = parsed["timestamp"].replace(
                            "T", " ")
                        tlog["fingerprint"] = parsed["tls"]["fingerprint"]
                        tlog["issuer"] = parsed["tls"]["issuerdn"]
                        tlog["version"] = parsed["tls"]["version"]
                        tlog["subject"] = parsed["tls"]["subject"]
                        suricata["tls"].append(tlog)

                    elif parsed["event_type"] == "ssh":
                        suricata["ssh"].append(parsed)
                    elif parsed["event_type"] == "dns":
                        suricata["dns"].append(parsed)

        if os.path.exists(SURICATA_FILE_LOG_FULL_PATH):
            suricata["file_log_full_path"] = SURICATA_FILE_LOG_FULL_PATH
            f = open(SURICATA_FILE_LOG_FULL_PATH, "rb").readlines()
            for l in f:
                try:
                    d = json.loads(l)
                except:
                    log.warning("failed to load JSON from file log")
                    continue
                # Some log entries do not have an id
                if "id" not in d:
                    continue
                src_file = "%s/file.%s" % (SURICATA_FILES_DIR_FULL_PATH,
                                           d["id"])
                if os.path.exists(src_file):
                    if SURICATA_FILE_COPY_MAGIC_RE and SURICATA_FILE_COPY_DST_DIR and os.path.exists(
                            SURICATA_FILE_COPY_DST_DIR):
                        try:
                            m = re.search(SURICATA_FILE_COPY_MAGIC_RE,
                                          d["magic"])
                            if m:
                                dst_file = "%s/%s" % (
                                    SURICATA_FILE_COPY_DST_DIR, d["md5"])
                                shutil.copy2(src_file, dst_file)
                                log.warning("copied %s to %s" %
                                            (src_file, dst_file))
                        except Exception, e:
                            log.warning("Unable to copy suricata file: %s" % e)
                    file_info = File(file_path=src_file).get_all()
                    texttypes = [
                        "ASCII",
                        "Windows Registry text",
                        "XML document text",
                        "Unicode text",
                    ]
                    readit = False
                    for texttype in texttypes:
                        if texttype in file_info["type"]:
                            readit = True
                            break
                    if readit:
                        with open(file_info["path"], "rb") as drop_open:
                            filedata = drop_open.read(SURICATA_FILE_BUFFER + 1)
                        if len(filedata) > SURICATA_FILE_BUFFER:
                            file_info["data"] = convert_to_printable(
                                filedata[:SURICATA_FILE_BUFFER] +
                                " <truncated>")
                        else:
                            file_info["data"] = convert_to_printable(filedata)
                    d["file_info"] = file_info
                if "/" in d["filename"]:
                    d["filename"] = d["filename"].split("/")[-1]
                suricata["files"].append(d)
Пример #58
0
try:
    import re2 as re
except ImportError:
    import re

url_regex = re.compile(
    br"http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+"
)


def config(data):
    urls_dict = {}

    try:
        urls_dict["URLs"] = [
            url.lower().decode() for url in url_regex.findall(data)
        ]
    except Exception as e:
        print(e)

    if "URLs" in urls_dict and len(urls_dict["URLs"]) > 0:
        return urls_dict

    return None
Пример #59
0
 #second go through each concept/term, find them in subresources, and process into matrix
 tc = 0
 j = 0
 row_sentence = []
 row_paragraph = []
 col_sentence = []
 col_paragraph = []
 data_sentence = []
 data_paragraph = []
 # initialize list of empty lists for storing concepts contained in each paragraph
 para_conceptIDs_contained = [[] for i in range(tot_para)]
 for i, con_ID in enumerate(concept_IDs):
     term_list = term_lists[i]
     wordcount_in_paragraphs = [0] * tot_para
     terms_regex = [r"\b"+re2.escape(term.lower())+r"\b" for term in term_list]
     search_pattern = re2.compile("|".join(terms_regex))
     for sent_num, sentence in enumerate(sentences):
         wordcount = len(search_pattern.findall(sentence.lower()))
         if wordcount > 0: #only go ahead if search_pattern is in the sentence
             row_sentence.append(sent_num)
             col_sentence.append(tc)
             data_sentence.append(1)
             wordcount_in_paragraphs[sentences_indexofparagraph[sent_num]] += wordcount
     for para_num in range(tot_para):
         wordcount_in_p = wordcount_in_paragraphs[para_num]
         if wordcount_in_p > 0:
             row_paragraph.append(para_num)
             col_paragraph.append(tc)
             data_paragraph.append(1)
             para_conceptIDs_contained[para_num].append(con_ID)
     if tc*10/tot_concepts > j:
vtdl_cfg = Config("auxiliary").virustotaldl

MOLOCH_BASE = moloch_cfg.get("base", None)
MOLOCH_NODE = moloch_cfg.get("node", None)
MOLOCH_ENABLED = moloch_cfg.get("enabled", False)

GATEWAYS = aux_cfg.get("gateways")
VTDL_ENABLED = vtdl_cfg.get("enabled", False)
VTDL_PRIV_KEY = vtdl_cfg.get("dlprivkey", None)
VTDL_INTEL_KEY = vtdl_cfg.get("dlintelkey", None)
VTDL_PATH = vtdl_cfg.get("dlpath", None)

TEMP_PATH = Config().cuckoo.get("tmppath", "/tmp")

ipaddy_re = re.compile(
    r"^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$"
)

if GATEWAYS:
    GATEWAYS_IP_MAP = {}
    for e in GATEWAYS:
        if "," in e:
            continue
        elif ipaddy_re.match(GATEWAYS[e]):
            GATEWAYS_IP_MAP[GATEWAYS[e]] = e

# Enabled/Disable Zer0m0n tickbox on the submission page
OPT_ZER0M0N = True

# To disable comment support, change the below to False
COMMENTS = True