def _search(self, searchTerms, logPath, files, searchForNick, includeToday, reverse): if searchForNick: pattern = re2.compile(fr"^\[[^]]+\]\s+<(.?{searchTerms})>\s+.*", re2.IGNORECASE) else: pattern = re2.compile(fr'.*<.*> .*({searchTerms}).*', re2.IGNORECASE) found = None if not includeToday: today = f"{strftime('%Y-%m-%d')}.log" if today in files: files.remove(today) if reverse: files.reverse() for filename in files: with open(os.path.join(logPath, filename), 'r', errors='ignore') as logfile: if reverse: lines = reversed(logfile.readlines()) else: lines = logfile.readlines() if reverse and includeToday: lines = list(lines)[1:] for line in lines: if pattern.match(line.rstrip()): found = line.rstrip() break if found: return f'[{filename[:10]}] {found}' return 'Nothing that matches your search terms has been found in the log.'
def run(self): if self.results["target"]["category"] == "file": return False self.ie_paths_re = re.compile(r"^c:\\program files(?:\s\(x86\))?\\internet explorer\\iexplore.exe$",re.I) #run through re.escape() self.white_list_re = ["^C\\:\\\\Program Files(?:\s\\(x86\\))?\\\\Adobe\\\\Reader\\ \\d+\\.\\d+\\\\Reader\\\\AcroRd32\\.exe$", "^C\\:\\\\Program Files(?:\s\\(x86\\))?\\\\Java\\\\jre\\d+\\\\bin\\\\j(?:avaw?|p2launcher)\\.exe$", "^C\\:\\\\Program Files(?:\s\\(x86\\))?\\\\Microsoft SilverLight\\\\(?:\\d+\\.)+\\d\\\\agcp.exe$", "^C\\:\\\\Windows\\\\System32\\\\ntvdm\\.exe$", "^C\\:\\\\Windows\\\\system32\\\\rundll32\\.exe$", "^C\\:\\\\Windows\\\\syswow64\\\\rundll32\\.exe$", "^C\\:\\\\Windows\\\\system32\\\\drwtsn32\\.exe$", "^C\\:\\\\Windows\\\\syswow64\\\\drwtsn32\\.exe$", "^C\\:\\\\Windows\\\\system32\\\\dwwin\\.exe$" ] #means we can be evaded but also means we can have relatively tight paths between 32-bit and 64-bit self.white_list_re_compiled = [] for entry in self.white_list_re: self.white_list_re_compiled.append(re.compile(entry,re.I)) self.white_list_re_compiled.append(self.ie_paths_re) # Sometimes if we get a service loaded we get out of order processes in tree need iterate over IE processes get the path of the initial monitored executable self.initialpath = None processes = self.results["behavior"]["processtree"] if len(processes): for p in processes: initialpath = p["module_path"].lower() if initialpath and self.ie_paths_re.match(initialpath) and p.has_key("children"): self.martians = self.find_martians(p,self.white_list_re_compiled) if len(self.martians) > 0: for martian in self.martians: self.data.append({"ie_martian": martian}) return True return False
def substitute(self, search, replace, flags, text, inputMessage, channel): # Apparently re.sub understands escape sequences in the replacement string; # strip all but the backreferences replace = replace.replace('\\', '\\\\') replace = re.sub(r'\\([1-9][0-9]?([^0-9]|$))', r'\1', replace) if channel not in self.messages: self.messages[channel] = [] self.unmodifiedMessages[channel] = [] messages = self.unmodifiedMessages[channel] if 'o' in flags else self.messages[channel] if 'g' in flags: count = 0 else: count = 1 subFlags = 0 if 'i' in flags: subFlags |= re.IGNORECASE if 'v' in flags: subFlags |= re.VERBOSE if 'c' in flags: newMessage = copy.copy(inputMessage) try: searchC = re2.compile(search, subFlags) new = searchC.sub(replace, text, count) except sre_constants.error as e: newMessage.messageString = "[Regex Error in Sed pattern: {}]".format(e.message) return newMessage if new != text: newMessage.messageString = new self.storeMessage(newMessage, False) else: newMessage.messageString = text self.storeMessage(newMessage, False) return newMessage for message in reversed(messages): try: searchC = re2.compile(search, subFlags) new = searchC.sub(replace, message.messageString, count) except sre_constants.error as e: newMessage = copy.copy(inputMessage) newMessage.messageString = "[Regex Error in Sed pattern: {}]".format(e.message) return newMessage new = new[:300] if searchC.search(message.messageString): newMessage = copy.copy(message) newMessage.messageString = new self.storeMessage(newMessage, False) return newMessage return None
def _search(self, searchTerms, logPath, files, searchForNick, includeToday, reverse): if searchForNick: pattern = re2.compile(r"^\[[^]]+\]\s+<(.?{})>\s+.*".format(searchTerms), re.IGNORECASE) else: pattern = re2.compile(r".*<.*> .*({}).*".format(searchTerms), re.IGNORECASE) found = None if not includeToday: today = "{}.log".format(strftime("%Y-%m-%d")) if today in files: files.remove(today) if reverse: files.reverse() for filename in files: with open(os.path.join(logPath, filename), "r") as logfile: if reverse: lines = reversed(logfile.readlines()) else: lines = logfile.readlines() if reverse and includeToday: lines = list(lines)[1:] for line in lines: if pattern.match(line.rstrip()): found = line.rstrip() break if found: return "[{}] {}".format(filename[:10], found) return "Nothing that matches your search terms has been found in the log."
def test_empty_array(self): # SF buf 1647541 import array for typecode in 'cbBuhHiIlLfd': a = array.array(typecode) self.assertEqual(re.compile("bla").match(a), None) self.assertEqual(re.compile("").match(a).groups(), ())
def test_bug_926075(self): try: unicode except NameError: return # no problem if we have no unicode self.assert_(re.compile('bug_926075') is not re.compile(eval("u'bug_926075'")))
def __init__(self,regular_expressions): """ Initialize the object regular_expressions: an interative set of regular expressions to be applied for extracting mutations. These are in the default python syntax (i.e., perl regular expressions), with the single exception being that regular expressions which should be performed in a case sensitive manner should be followed by the string '[CASE_SENSITIVE]', with no spaces between it and the regular expression. This can be a list, a file, or any other object which supports iteration. For an example, you should refer to the regex.txt file in the MutationFinder directory. """ MutationExtractor.__init__(self) self._regular_expressions = [] for regular_expression in regular_expressions: if regular_expression.endswith('[CASE_SENSITIVE]'): self._regular_expressions.append(\ compile(regular_expression[:regular_expression.rindex('[')])) else: self._regular_expressions.append(\ compile(regular_expression,IGNORECASE))
def run(self): self.ie_paths_re = re.compile(r"^c:\\program files(?:\s\(x86\))?\\internet explorer\\iexplore.exe$",re.I) #run through re.escape() self.white_list_re = ["^C\\:\\\\Program Files(?:\s\\(x86\\))?\\\\Adobe\\\\Reader\\ \\d+\\.\\d+\\\\Reader\\\\AcroRd32\\.exe$", "^C\\:\\\\Program Files(?:\s\\(x86\\))?\\\\Java\\\\jre\\d+\\\\bin\\\\j(?:avaw?|p2launcher)\\.exe$", "^C\\:\\\\Program Files(?:\s\\(x86\\))?\\\\Microsoft SilverLight\\\\(?:\\d+\\.)+\\d\\\\agcp.exe$", "^C\\:\\\\Windows\\\\System32\\\\ntvdm.exe$", ] #means we can be evaded but also means we can have relatively tight paths between 32-bit and 64-bit self.white_list_re_compiled = [] for entry in self.white_list_re: self.white_list_re_compiled.append(re.compile(entry,re.I)) self.white_list_re_compiled.append(self.ie_paths_re) # get the path of the initial monitored executable self.initialpath = None processes = self.results["behavior"]["processtree"] if len(processes): self.initialpath = processes[0]["module_path"].lower() if self.initialpath and self.ie_paths_re.match(self.initialpath) and processes[0].has_key("children"): self.martians = self.find_martians(processes,self.white_list_re_compiled) if len(self.martians) > 0: for martian in self.martians: self.data.append({"ie_martian": martian}) return True return False
def test_re_match(self): self.assertEqual(re.match('a', 'a').groups(), ()) self.assertEqual(re.match('(a)', 'a').groups(), ('a',)) self.assertEqual(re.match(r'(a)', 'a').group(0), 'a') self.assertEqual(re.match(r'(a)', 'a').group(1), 'a') self.assertEqual(re.match(r'(a)', 'a').group(1, 1), ('a', 'a')) pat = re.compile('((a)|(b))(c)?') self.assertEqual(pat.match('a').groups(), ('a', 'a', None, None)) self.assertEqual(pat.match('b').groups(), ('b', None, 'b', None)) self.assertEqual(pat.match('ac').groups(), ('a', 'a', None, 'c')) self.assertEqual(pat.match('bc').groups(), ('b', None, 'b', 'c')) self.assertEqual(pat.match('bc').groups(""), ('b', "", 'b', 'c')) # A single group m = re.match('(a)', 'a') self.assertEqual(m.group(0), 'a') self.assertEqual(m.group(0), 'a') self.assertEqual(m.group(1), 'a') self.assertEqual(m.group(1, 1), ('a', 'a')) pat = re.compile('(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?') self.assertEqual(pat.match('a').group(1, 2, 3), ('a', None, None)) self.assertEqual(pat.match('b').group('a1', 'b2', 'c3'), (None, 'b', None)) self.assertEqual(pat.match('ac').group(1, 'b2', 3), ('a', None, 'c'))
def __init__(self, *args, **kwargs): Signature.__init__(self, *args, **kwargs) # Named group to extract the URL of the cloned website. self.rex = { "saved from url": re.compile(r"\<!--\ssaved\sfrom\surl=\(\d+\)(?P<url>[^\s]+)", re.I), "mirrored from": re.compile(r"<!--\smirrored\sfrom\s(?P<url>[^\s]+)\sby\sHTTrack", re.I), } self.hits = set()
def _prepare_pattern(self, pattern): """ Strip out key:value pairs from the pattern and compile the regular expression. """ regex, _, rest = pattern.partition('\\;') try: return re.compile(regex, re.I) except re.error as e: warnings.warn("Caught '{error}' compiling regex: {regex}".format(error=e, regex=regex)) return re.compile(r'(?!x)x') # regex that never matches: http://stackoverflow.com/a/1845097/413622
def _compile_regex(regex_string, flags=0): try: if re2: # default max_mem is 8<<20 = 8*1000*1000 return re.compile(regex_string, max_mem=60*1000*1000, flags=flags) else: return re.compile(regex_string, flags=flags) except: logging.exception("Error compiling with flags %s for string: %s", flags, regex_string) raise
def test_regex(regex_array): """Ensures the regex strings are validated for proper syntax. """ for regex_entry in regex_array: try: re.compile(regex_entry, re.MULTILINE | re.UNICODE) except re.error: logging.error('Invalid Regex Found: %s', regex_entry) sys.exit(1)
def test_dollar_matches_twice(self): "$ matches the end of string, and just before the terminating \n" pattern = re.compile('$') self.assertEqual(pattern.sub('#', 'a\nb\n'), 'a\nb#\n#') self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a\nb\nc#') self.assertEqual(pattern.sub('#', '\n'), '#\n#') pattern = re.compile('$', re.MULTILINE) self.assertEqual(pattern.sub('#', 'a\nb\n' ), 'a#\nb#\n#' ) self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a#\nb#\nc#') self.assertEqual(pattern.sub('#', '\n'), '#\n#')
def run(self): if self.results["target"]["category"] == "url": return False office_pkgs = ["ppt","doc","xls","eml"] if not any(e in self.results["info"]["package"] for e in office_pkgs): return False self.office_paths_re = re.compile(r"^[A-Z]\:\\Program Files(?:\s\(x86\))?\\Microsoft Office\\(?:Office1[1-5]\\)?(?:WINWORD|OUTLOOK|POWERPNT|EXCEL|WORDVIEW)\.EXE$",re.I) #run through re.escape() ############################################# #YOU MAY HAVE TO CUSTOMIZE THIS FOR YOUR ENV# ############################################# self.white_list_re = ["C\\:\\\\Program Files(?:\s\\(x86\\))?\\\\Adobe\\\\Reader\\ \\d+\\.\\d+\\\\Reader\\\\AcroRd32\\.exe$", "C\\:\\\\Program Files(?:\s\\(x86\\))?\\\\Java\\\\jre\\d+\\\\bin\\\\j(?:avaw?|p2launcher)\\.exe$", "C\\:\\\\Program Files(?:\s\\(x86\\))?\\\\Microsoft SilverLight\\\\(?:\\d+\\.)+\\d\\\\agcp\\.exe$", "C\\:\\\\Windows\\\\System32\\\\ntvdm\\.exe$", "C\\:\\\\Windows\\\\System32\\\\svchost\\.exe$", "C\\:\\\\Program Files(?:\s\\(x86\\))?\\\\internet explorer\\\\iexplore\.exe$", # remove this one at some point "C\\:\\\\Windows\\\\System32\\\\rundll32\\.exe$", "C\\:\\\\Windows\\\\System32\\\\drwtsn32\\.exe$", "C\\:\\\\Windows\\\\splwow64\\.exe$", "C\\:\\\\Program Files(?:\s\\(x86\\))?\\\\Common Files\\\\Microsoft Shared\\\\office1[1-6]\\\\off(?:lb|diag)\\.exe$", "C\\:\\\\Program Files(?:\s\\(x86\\))?\\\\Common Files\\\\Microsoft Shared\\\\dw\\\\dw(?:20)?\\.exe$", "C\\:\\\\Windows\\\\system32\\\\dwwin\\.exe$", "C\\:\\\\Windows\\\\system32\\\\WerFault\\.exe$", "C\\:\\\\Windows\\\\syswow64\\\\WerFault\\.exe$" ] #means we can be evaded but also means we can have relatively tight paths between 32-bit and 64-bit self.white_list_re_compiled = [] for entry in self.white_list_re: try: self.white_list_re_compiled.append(re.compile(entry,re.I)) except Exception as e: print "failed to compile expression %s error:%s" % (entry,e) self.white_list_re_compiled.append(self.office_paths_re) # Sometimes if we get a service loaded we get out of order processes in tree need iterate over IE processes get the path of the initial monitored executable self.initialpath = None processes = self.results["behavior"]["processtree"] if len(processes): for p in processes: initialpath = p["module_path"].lower() if initialpath and self.office_paths_re.match(initialpath) and p.has_key("children"): self.martians = self.find_martians(p,self.white_list_re_compiled) if len(self.martians) > 0: for martian in self.martians: self.data.append({"office_martian": martian}) return True return False
def __init__(self): """ Initialize the object """ MutationExtractor.__init__(self) word_regex_patterns = self._build_word_regex_patterns() string_regex_patterns = self._build_string_regex_patterns() self._word_regexs = [] self._string_regexs = [] self._replace_regex = compile('[^a-zA-Z0-9\s]') # Compile the regular expressions for regex_pattern in word_regex_patterns: self._word_regexs.append(compile(regex_pattern)) for regex_pattern in string_regex_patterns: self._string_regexs.append(compile(regex_pattern))
def on_call(self, call, process): if call["api"] == "RegSetValueExA" and call["status"]: key = re.compile(".*\\\\SOFTWARE\\\\(Wow6432Node\\\\)?Microsoft\\\\Windows\\\\CurrentVersion\\\\Run\\\\cryptedinfo$") buff = self.get_argument(call, "Buffer").lower() fullname = self.get_argument(call, "FullName") if buff == "notepad c:\programdata\cryptinfo.txt" and key.match(fullname): return True
def execute(self, message: IRCMessage): comicLimit = 8 params = list(message.parameterList) if len(params) > 0 and string.isNumber(params[0]): comicLimit = int(params.pop(0)) messages = self.getMessages(message.replyTo) if len(params) > 0: regex = re2.compile(" ".join(params), re2.IGNORECASE) matches = list(filter(regex.search, [msg[1] for msg in messages])) if len(matches) == 0: return IRCResponse(ResponseType.Say, "Sorry, that didn't match anything in my message buffer.", message.replyTo) elif len(matches) > 1: return IRCResponse(ResponseType.Say, "Sorry, that matches too many lines in my message buffer.", message.replyTo) index = [msg[1] for msg in messages].index(matches[0]) lastIndex = index + comicLimit if lastIndex > len(messages): lastIndex = len(messages) messages = messages[index:lastIndex] else: messages = messages[comicLimit * -1:] if messages: comicBytes = self.makeComic(messages) return IRCResponse(ResponseType.Say, self.postComic(comicBytes), message.replyTo) else: return IRCResponse(ResponseType.Say, "There are no messages in the buffer to create a comic with.", message.replyTo)
def start(self, path): root = os.environ["TEMP"] password = self.options.get("password") exe_regex = re.compile('(\.exe|\.scr|\.msi|\.bat|\.lnk)$',flags=re.IGNORECASE) zipinfos = self.get_infos(path) self.extract_zip(path, root, password, 0) file_name = self.options.get("file") # If no file name is provided via option, take the first file. if not file_name: # No name provided try to find a better name. if len(zipinfos): # Attempt to find a valid exe extension in the archive for f in zipinfos: if exe_regex.search(f.filename): file_name = f.filename break # Default to the first one if none found file_name = file_name if file_name else zipinfos[0].filename log.debug("Missing file option, auto executing: {0}".format(file_name)) else: raise CuckooPackageError("Empty ZIP archive") file_path = os.path.join(root, file_name) return self.execute(file_path, self.options.get("arguments"), file_path)
def score_pattern(pattern, pos,neg): """Calculates a one tailed p-value for a pattern using a Fisher's exact test :param pattern: regular expression search pattern :type pattern: string :param pos: list of strings where the outcome was positive :type pos: list of strings :param neg: list of strings where the outcome was negative :type neg: list of strings :returns: one tailed p-value :rtype: float """ p=re.compile(pattern) pp=0 pn=0 for l in pos: if p.search(l): pp+=1 for l in neg: if p.search(l): pn+=1 s1=getLogPvalue(pp, len(pos), pn, len(neg)) s2=getLogPvalue(pn, len(neg),pp, len(pos)) return min(s1,s2)
def when(self, context): if not self.regex: regex = ')('.join(e.re_value() for e in self.expressions) self.regex = re.compile('(' + regex + ')', self.modifiers) end = context.start+SEARCH_WINDOW return self.regex.match(context.input[context.start:end])
def render_pony(name, text, balloonstyle, width=80, center=False, centertext=False): pony = load_pony(name) balloon = link_l = link_r = '' if text: [link_l, link_r] = balloonstyle[-2:] for i,line in enumerate(pony): match = re.search('\$balloon([0-9]*)\$', line) if match: minwidth = int(match.group(1) or '0') pony[i:i+1] = render_balloon(text, balloonstyle, minwidth=minwidth, maxwidth=int(width/2), pad=str.center if centertext else str.ljust) break try: first = pony.index('$$$') second = pony[first+1:].index('$$$') pony[first:] = pony[first+1+second+1:] except: pass pony = [ line.replace('$\\$', link_l).replace('$/$', link_r) for line in pony ] indent = '' if center: ponywidth = max([ len(re.sub(r'\x1B\[[0-9;]+m|\$.*\$', '', line)) for line in pony ]) indent = ' '*int((width-ponywidth)/2) wre = re.compile('((\x1B\[[0-9;]+m)*.){0,%s}' % width) reset = '[39;49m\n' return indent+(reset+indent).join([ wre.search(line).group() for line in pony ])+reset
def AWGetStripDialogue(yyyy=None, mm=None, dd=None, urlstring=None): """ Get a strip's dialogue from ohnorobot.com for a given date. This works by taking a URL like this: "http://www.ohnorobot.com/index.pl?s=%s+%s+%s&Search=Search&comic=636&e=0&n=0&b=0&m=0&d=0&t=0" % ( monthnames[mm], dd, yyyy ) ... and looking up the specific AWAchewoodDate in the mess of return data to find the dialogue. """ if urlstring: bar = AWGetStripAssetbarData(urlstring=urlstring) yyyy, mm, dd = bar['year'], bar['month'], bar['day'] dsurl = "http://www.ohnorobot.com/index.pl?s=%s+%s+%s&Search=Search&comic=636&e=0&n=0&b=0&m=0&d=0&t=0" % ( monthnames[int(mm)], dd, yyyy ) dsearch = soup(dsurl) dlg = filter(lambda li: li.find('a', {'class':"searchlink", 'href':re.compile("%s$" % AWAchewoodDate(yyyy, mm, dd))}), dsearch.findAll('li')) if len(dlg) == 1: #return strip_entities(strip_tags(dlg.pop())) return strip_tags(dlg.pop()) return u""
def run(self): indicators = [ "UPDATE__", "MAIN_.*", "BACKUP_.*" ] count = 0 for indicator in indicators: if self.check_mutex(pattern=indicator, regex=True): count += 1 if count == len(indicators): return True athena_http_re = re.compile("a=(%[A-Fa-f0-9]{2})+&b=[-A-Za-z0-9+/]+(%3[dD])*&c=(%[A-Fa-f0-9]{2})+") if "network" in self.results: httpitems = self.results["network"].get("http") if not httpitems: return False for http in httpitems: if http["method"] == "POST" and athena_http_re.search(http["body"]): self.data.append({"url" : http["uri"], "data" : http["body"]}) return True return False
def _gen_keyword(self, content): """ Generator for self.keywords (dictionary made of of regexps as keys and thier categories as values) """ for line in content[2].split("\n")[1:-1]: # Comments start with // if line and not line.startswith("//"): line = line.split("\t") # If not using a dictionary made of regexps # it fixes the keyword for regexping # "^" is added at the beginning of every keyword # If keyword doesn't ends with "*", a "$" is added # bad -> ^bad$ matches "bad" but not "badass" # bad* -> ^bad matches "bad" and "badass" if not self.dic_regex: line[0] = "".join(["\\b", line[0]]) try: if (line[0][-1] == "*"): line[0] = line[0][:-1] else: line[0] = "".join([line[0], "\\b"]) except IndexError: continue yield (re.compile(line[0], re.IGNORECASE), line[1:])
def handleEvent(self, event): eventName = event.eventType srcModuleName = event.module eventData = event.data # We only want web content from the target if srcModuleName != "sfp_spider": return None eventSource = event.sourceEvent.data self.sf.debug("Received event, " + eventName + ", from " + srcModuleName) if eventSource not in self.results.keys(): self.results[eventSource] = list() # We only want web content for pages on the target site if not self.getTarget().matches(self.sf.urlFQDN(eventSource)): self.sf.debug("Not collecting web content information for external sites.") return None for regexpGrp in regexps.keys(): if regexpGrp in self.results[eventSource]: continue for regex in regexps[regexpGrp]: pat = re.compile(regex, re.IGNORECASE) matches = re.findall(pat, eventData) if len(matches) > 0 and regexpGrp not in self.results[eventSource]: self.sf.info("Matched " + regexpGrp + " in content from " + eventSource) self.results[eventSource].append(regexpGrp) evt = SpiderFootEvent("ERROR_MESSAGE", regexpGrp, self.__name__, event.sourceEvent) self.notifyListeners(evt) return None
def when(self, context): if not self.regex: regex = ')('.join(e.re_value() for e in self.expressions) unire = unicode(regex, 'latin1') self.regex = re.compile('(' + unire + ')', self.modifiers) return self.regex.match(context.input, context.start)
def build_smile_re(dsmile): out = {} for name, lsmile in dsmile.items(): out[name] = re.compile(r'(?: %s)' % (r'| '.join(lsmile))) #print name, r'(?:\s%s)' % (r'|\s'.join(lsmile)) return out
def run(self): # Check zeus synchronization-mutex. # Regexp pattern for zeus synchronization-mutex such as for example: # 2CCB0BFE-ECAB-89CD-0261-B06D1C10937F exp = re.compile(".*[A-Z0-9]{8}-([A-Z0-9]{4}-){3}[A-Z0-9]{12}", re.IGNORECASE) mutexes = self.results["behavior"]["summary"]["mutexes"] mutexset = set() count = 0 for mutex in mutexes: if exp.match(mutex): mutexset.add(mutex) count += 1 # Check if there are at least 5 mutexes opened matching the pattern? if count < 5: return False # Check for UDP Traffic on remote port greater than 1024. # TODO: this might be faulty without checking whether the destination # IP is really valid. count = 0 if "network" in self.results: for udp in self.results["network"]["udp"]: if udp["dport"] > 1024: count += 1 if count < 4: return False for mutex in mutexset: self.data.append({"mutex": mutex}) return True
def test_bug_1661(self): # Verify that flags do not get silently ignored with compiled patterns pattern = re.compile('.') self.assertRaises(ValueError, re.match, pattern, 'A', re.I) self.assertRaises(ValueError, re.search, pattern, 'A', re.I) self.assertRaises(ValueError, re.findall, pattern, 'A', re.I) self.assertRaises(ValueError, re.compile, pattern, re.I)
def _add_domain(self, domain): """Add a domain to unique list. @param domain: domain name. """ filters = [ ".*\\.windows\\.com$", ".*\\.in\\-addr\\.arpa$", ".*\\.ip6\\.arpa$" ] regexps = [re.compile(filter) for filter in filters] for regexp in regexps: if regexp.match(domain): return for entry in self.unique_domains: if entry["domain"] == domain: return self.unique_domains.append({ "domain": domain, "ip": self._dns_gethostbyname(domain) })
def handleEvent(self, event): eventName = event.eventType srcModuleName = event.module eventData = event.data # We only want web content from the target if srcModuleName != "sfp_spider": return None eventSource = event.actualSource self.sf.debug("Received event, " + eventName + ", from " + srcModuleName) if eventSource not in self.results.keys(): self.results[eventSource] = list() # We only want web content for pages on the target site if not self.getTarget().matches(self.sf.urlFQDN(eventSource)): self.sf.debug( "Not collecting web content information for external sites.") return None for regexpGrp in regexps.keys(): if regexpGrp in self.results[eventSource]: continue for regex in regexps[regexpGrp]: pat = re.compile(regex, re.IGNORECASE) matches = re.findall(pat, eventData) if len(matches ) > 0 and regexpGrp not in self.results[eventSource]: self.sf.info("Matched " + regexpGrp + " in content from " + eventSource) self.results[eventSource].append(regexpGrp) evt = SpiderFootEvent("ERROR_MESSAGE", regexpGrp, self.__name__, event) self.notifyListeners(evt) return None
def start(self, path): root = os.environ["TEMP"] password = self.options.get("password") exe_regex = re.compile('(\.exe|\.scr|\.msi|\.bat|\.lnk|\.js|\.jse|\.vbs|\.vbe|\.wsf)$',flags=re.IGNORECASE) zipinfos = self.get_infos(path) self.extract_zip(path, root, password, 0) file_name = self.options.get("file") # If no file name is provided via option, take the first file. if not file_name: # No name provided try to find a better name. if len(zipinfos): # Attempt to find a valid exe extension in the archive for f in zipinfos: if exe_regex.search(f.filename): file_name = f.filename break # Default to the first one if none found file_name = file_name if file_name else zipinfos[0].filename log.debug("Missing file option, auto executing: {0}".format(file_name)) else: raise CuckooPackageError("Empty ZIP archive") file_path = os.path.join(root, file_name) log.debug("file_name: \"%s\"" % (file_name)) if file_name.lower().endswith(".lnk"): cmd_path = self.get_path("cmd.exe") cmd_args = "/c start /wait \"\" \"{0}\"".format(file_path) return self.execute(cmd_path, cmd_args, file_path) elif file_name.lower().endswith(".msi"): msi_path = self.get_path("msiexec.exe") msi_args = "/I \"{0}\"".format(file_path) return self.execute(msi_path, msi_args, file_path) elif file_name.lower().endswith((".js", ".jse", ".vbs", ".vbe", ".wsf")): wscript = self.get_path_app_in_path("wscript.exe") wscript_args = "\"{0}\"".format(file_path) return self.execute(wscript, wscript_args, file_path) else: return self.execute(file_path, self.options.get("arguments"), file_path)
def getLinks(html): soup = BeautifulSoup(html) links = [] for link in soup.findAll('a', attrs={'href': re.compile("^http://")}): links.append(link.get('href')) for link in soup.findAll('a', attrs={'href': re.compile("^https://")}): links.append(link.get('href')) for link in soup.findAll('iframe', attrs={'src': re.compile("^https://")}): links.append(link.get('src')) for link in soup.findAll('iframe', attrs={'src': re.compile("^http://")}): links.append(link.get('src')) for link in soup.findAll('script', attrs={'src': re.compile("^https://")}): links.append(link.get('src')) for link in soup.findAll('script', attrs={'src': re.compile("^http://")}): links.append(link.get('src')) return links
def _remove(self, message: IRCMessage): """remove <regex> - remove a quote from the OutOfContext log.""" if len(message.parameterList) < 2: return IRCResponse(ResponseType.Say, "Remove what?", message.replyTo) if len(self.storage) == 0 or message.replyTo not in self.storage: return IRCResponse(ResponseType.Say, "There are no quotes in the log.", message.replyTo) regex = re2.compile(" ".join(message.parameterList[1:]), re2.IGNORECASE) matches = list(filter(regex.search, self.storage[message.replyTo])) if len(matches) == 0: return IRCResponse(ResponseType.Say, "That message is not in the log.", message.replyTo) if len(matches) > 1: return IRCResponse( ResponseType.Say, f"Unable to remove quote, {len(matches)} matches found.", message.replyTo) return self._removeQuote(message.replyTo, matches[0])
def test_float(): print re('[#ss #float #es]') f = compile('[#ss #float #es]') assert f.match('0.0') assert f.match('-0.0') assert f.match('0.0e1') assert f.match('-0.0e1') assert f.match('0.0e-1') assert f.match('0.0E1') assert f.match('0.') assert f.match('0.e1') assert f.match('.0') assert f.match('.0e1') assert f.match('0e1') assert not f.match('0') assert not f.match('.') assert not f.match('.e1') assert not f.match('0.0e') assert f.match('1024.12e3') assert f.match('-1024.12e-3') assert f.match('-.12e3') assert f.match('-1024.12E-3')
def test_re_groupref_exists(self): self.assertEqual( re.match('^(\()?([^()]+)(?(1)\))$', '(a)').groups(), ('(', 'a')) self.assertEqual( re.match('^(\()?([^()]+)(?(1)\))$', 'a').groups(), (None, 'a')) self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a)'), None) self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a'), None) self.assertEqual( re.match('^(?:(a)|c)((?(1)b|d))$', 'ab').groups(), ('a', 'b')) self.assertEqual( re.match('^(?:(a)|c)((?(1)b|d))$', 'cd').groups(), (None, 'd')) self.assertEqual( re.match('^(?:(a)|c)((?(1)|d))$', 'cd').groups(), (None, 'd')) self.assertEqual( re.match('^(?:(a)|c)((?(1)|d))$', 'a').groups(), ('a', '')) # Tests for bug #1177831: exercise groups other than the first group p = re.compile('(?P<g1>a)(?P<g2>b)?((?(g2)c|d))') self.assertEqual(p.match('abc').groups(), ('a', 'b', 'c')) self.assertEqual(p.match('ad').groups(), ('a', None, 'd')) self.assertEqual(p.match('abd'), None) self.assertEqual(p.match('ac'), None)
async def ss(text, channel, reply, event, bot, storage): """<regex replacement> - replace regex with replacement""" delete_if_needed(storage, event) text = text.split(maxsplit=1) if len(text) == 0 or len(text) > 2: msg = "Invalid format" msg += ": " + "\n`" + "<regex replacement> - replace regex with replacement`" msg += ( "\n`" + "if only the regex is specified, it will be replaced with a blankspace`" ) reply(msg, timeout=15) return if len(text) == 1: text.append("") replied_to = await event.msg.reference() if replied_to: messages = [replied_to] else: messages = await channel.async_get_latest_messages(MAX_LEN) try: regex = re2.compile(text[0]) except: reply("You don't have a valid regex") return for msg in messages: if (msg.id == event.msg.id or msg.author.id == bot.get_own_id() or msg.text.startswith(".s")): continue if regex.search(msg.text) != None: msg = "<%s> %s" % (msg.author.name, regex.sub(text[1], msg.text)) reply(msg) return
def search_behavior(request, task_id): if request.method == 'POST': query = request.POST.get('search') results = [] # Fetch anaylsis report record = results_db.analysis.find_one( {"info.id": int(task_id)} ) # Loop through every process for process in record["behavior"]["processes"]: process_results = [] chunks = results_db.calls.find({ "_id": { "$in": process["calls"] } }) for chunk in chunks: for call in chunk["calls"]: query = re.compile(query) if query.search(call['api']): process_results.append(call) else: for argument in call['arguments']: if query.search(argument['name']) or query.search(argument['value']): process_results.append(call) break if len(process_results) > 0: results.append({ 'process': process, 'signs': process_results }) return render_to_response("analysis/behavior/_search_results.html", {"results": results}, context_instance=RequestContext(request)) else: raise PermissionDenied
def execute(self, message: IRCMessage): comicLimit = 8 params = list(message.parameterList) if len(params) > 0 and string.isNumber(params[0]): comicLimit = int(params.pop(0)) messages = self.getMessages(message.replyTo) if len(params) > 0: regex = re2.compile(" ".join(params), re2.IGNORECASE) matches = list(filter(regex.search, [msg[1] for msg in messages])) if len(matches) == 0: return IRCResponse( ResponseType.Say, "Sorry, that didn't match anything in my message buffer.", message.replyTo) elif len(matches) > 1: return IRCResponse( ResponseType.Say, "Sorry, that matches too many lines in my message buffer.", message.replyTo) index = [msg[1] for msg in messages].index(matches[0]) lastIndex = index + comicLimit if lastIndex > len(messages): lastIndex = len(messages) messages = messages[index:lastIndex] else: messages = messages[comicLimit * -1:] if messages: comicBytes = self.makeComic(messages) return IRCResponse(ResponseType.Say, self.postComic(comicBytes), message.replyTo) else: return IRCResponse( ResponseType.Say, "There are no messages in the buffer to create a comic with.", message.replyTo)
def apply_transformation(transform_tool, regex_hit_pattern, input_f, output_f): nul = open(os.devnull, 'w') # print("tool: {}\ninput: {}\nouput: {}".format(' '.join(transform_tool), input_f, output_f)) # print(' '.join(transform_tool)) counter = 0 p = Popen(transform_tool, stdout=PIPE, stderr=None) # get number of hits for tranformation regex_c = re2.compile(regex_hit_pattern) for line in p.stdout: # print(line) match_p = regex_c.search(line) if match_p is not None: counter += 1 p.wait() print("Transformation return code: {}".format(p.returncode)) print("Number of successful transformations: {}".format(counter)) nul.close() return counter if p.returncode == 0 else -1
def execute(self, irc_c, msg, cmd): # Set the return mode of the output selection = { 'ignorepromoted': self['ignorepromoted'], 'order': self['order'], 'limit': self['limit'], 'offset': self['offset'], } if self['random']: selection['order'] = 'random' selection['limit'] = 1 # if self['recommend']: # selection['order'] = 'recommend' # selection['limit'] = 1 if self['newest']: selection['order'] = 'recent' selection['limit'] = 1 # What are we searching for? searches = [] strings = [] if len(self['title']) > 0: strings = self['title'] searches.extend([{'term': s, 'type': None} for s in strings]) # Add any regexes regexes = [] for regex in self['regex']: try: re.compile(regex) except re.error as e: raise CommandError( "'{}' isn't a valid regular expression: {}.".format( regex, e ) ) from e regexes.append(regex) # don't append compiled regex - SQL doesn't like that searches.extend([{'term': r, 'type': 'regex'} for r in regexes]) # Set the tags tags = {'include': [], 'exclude': []} for tag in self['tags']: if tag[0] == "-": tags['exclude'].append(tag[1:]) continue if tag[0] == "+": tags['include'].append(tag[1:]) continue tags['include'].append(tag) searches.append({'term': tags, 'type': 'tags'}) # Set the author authors = {'include': [], 'exclude': []} for author in self['author']: if author[0] == "-": authors['exclude'].append(author[1:]) continue if author[0] == "+": authors['include'].append(author[1:]) continue authors['include'].append(author) searches.append({'term': authors, 'type': 'author'}) # Set the rating # Cases to account for: modifiers, range, combination ratings = MinMax() for rating in self['rating']: if ".." in rating: rating = rating.split("..") if len(rating) > 2: raise CommandError("Too many ratings in range.") try: rating = [int(x) for x in rating] except ValueError as e: raise CommandError( "Ratings in a range must be integers." ) from e try: ratings >= min(rating) ratings <= max(rating) except MinMaxError as e: raise CommandError(str(e).format("rating")) from e elif rating[0] in [">", "<", "="]: pattern = r"^(?P<comp>[<>=]{1,2})(?P<value>[0-9]+)" match = re.search(pattern, rating) if match: try: rating = int(match.group('value')) except ValueError as e: raise CommandError("Invalid rating comparison.") from e comp = match.group('comp') try: if comp == ">=": ratings >= rating elif comp == "<=": ratings <= rating elif comp == "<": ratings < rating elif comp == ">": ratings > rating elif comp == "=": ratings >= rating ratings <= rating else: raise CommandError("Unknown rating comparison.") except MinMaxError as e: raise CommandError(str(e).format("rating")) from e elif rating[0] in [">", "<", "="]: pattern = r"^(?P<comp>[<>=]{1,2})(?P<value>-?[0-9]+)" match = re.search(pattern, rating) if match: try: rating = int(match.group('value')) except ValueError: raise CommandError("Invalid rating comparison.") comp = match.group('comp') try: if comp == ">=": ratings >= rating elif comp == "<=": ratings <= rating elif comp == "<": ratings < rating elif comp == ">": ratings > rating elif comp == "=": ratings >= rating ratings <= rating else: raise CommandError( "Unknown operator in rating comparison." ) except MinMaxError as e: raise CommandError(str(e).format("rating")) else: raise CommandError("Invalid rating comparison.") else: raise CommandError("Invalid rating comparison.") else: try: rating = int(rating) except ValueError as e: raise CommandError( "Rating must be a range, comparison, or number." ) from e # Assume =, assign both try: ratings >= rating ratings <= rating except MinMaxError as e: raise CommandError(str(e).format("rating")) searches.append({'term': ratings, 'type': 'rating'}) # Set created date # Cases to handle: absolute, relative, range (which can be both) createds = MinMax() created = self['created'] # created is a list of date selectors - ranges, abs and rels # but ALL dates are ranges! created = [DateRange(c) for c in created] # created is now a list of DateRanges with min and max try: for selector in created: if selector.max is not None: createds <= selector.max if selector.min is not None: createds >= selector.min except MinMaxError as e: raise CommandError(str(e).format("date")) searches.append({'term': createds, 'type': 'date'}) # Set category categories = {'include': [], 'exclude': []} for category in self['category']: if category[0] == "-": categories['exclude'].append(category[1:]) continue else: if category[0] == "+": categories['include'].append(category[1:]) continue categories['include'].append(category) continue categories['include'].append(category) searches.append({'term': categories, 'type': 'category'}) # Set parent page parents = self['parent'] if parents is not None: searches.append({'term': parents, 'type': 'parent'}) # FINAL BIT - summarise commands if self['verbose']: verbose = "Searching for articles " if len(strings) > 0: verbose += "containing \"{}\"; ".format("\", \"".join(strings)) if len(regexes) > 0: verbose += "matching the regex /{}/; ".format( "/ & /".join(regexes) ) if parents is not None: verbose += "whose parent page is '{}'; ".format(parents) if len(categories['include']) == 1: verbose += ( "in the category '" + categories['include'][0] + "'; " ) elif len(categories['include']) > 1: verbose += ( "in the categories '" + "', '".join(categories) + "; " ) if len(categories['exclude']) == 1: verbose += ( "not in the category '" + categories['exclude'][0] + "'; " ) elif len(categories['exclude']) > 1: verbose += ( "not in the categories '" + "', '".join(categories) + "; " ) if len(tags['include']) > 0: verbose += ( "with the tags '" + "', '".join(tags['include']) + "'; " ) if len(tags['exclude']) > 0: verbose += ( "without the tags '" + "', '".join(tags['exclude']) + "'; " ) if len(authors['include']) > 0: verbose += "by " + " & ".join(authors['include']) + "; " if len(authors['exclude']) > 0: verbose += "not by " + " or ".join(authors['exclude']) + "; " if ratings['max'] is not None and ratings['min'] is not None: if ratings['max'] == ratings['min']: verbose += "with a rating of " + str(ratings['max']) + "; " else: verbose += ( "with a rating between " + str(ratings['min']) + " and " + str(ratings['max']) + "; " ) elif ratings['max'] is not None: verbose += ( "with a rating less than " + str(ratings['max'] + 1) + "; " ) elif ratings['min'] is not None: verbose += ( "with a rating greater than " + str(ratings['min'] - 1) + "; " ) if createds['min'] is not None and createds['max'] is not None: verbose += ( "created between " + createds['min'].to_datetime_string() + " and " + createds['max'].to_datetime_string() + "; " ) elif createds['max'] is not None: verbose += ( "created before " + createds['max'].to_datetime_string() + "; " ) elif createds['min'] is not None: verbose += ( "created after " + createds['min'].to_datetime_string() + "; " ) if verbose.endswith("; "): verbose = verbose[:-2] msg.reply(verbose) page_ids = DB.get_articles(searches) pages = [DB.get_article_info(p_id) for p_id in page_ids] pages = Search.order(pages, search_term=strings, **selection) if len(pages) >= 50: msg.reply( "{} results found - you're going to have to be more " "specific!".format(len(pages)) ) return if len(pages) > 3: msg.reply( "{} results (use ..sm to choose): {}".format( len(pages), Showmore.parse_multiple_titles(pages) ) ) DB.set_showmore_list(msg.raw_channel, [p['id'] for p in pages]) return if len(pages) == 0: # check if there's no args other than --verbose if len(self['title']) > 0: # google only takes 10 args url = google_search( '"' + '" "'.join(self['title'][:10]) + '"', num=1 )[0] if url is None: msg.reply("No matches found.") return if url['title'].endswith(" - SCP Foundation"): url['title'] = url['title'][:-17] msg.reply( "No matches found. Did you mean \x02{}\x0F? {}".format( url['title'], url['link'] ) ) else: msg.reply("No matches found.") return for page in pages: msg.reply( Gib.obfuscate( Showmore.parse_title(page), DB.get_channel_members(msg.raw_channel), ) )
def handleEvent(self, event): eventName = event.eventType srcModuleName = event.module eventData = event.data # Don't re-parse e-mail addresses if "EMAILADDR" in eventName: return None # Ignore any web content that isn't from the target. This avoids noise from # pastebin and other content where unrelated e-mails are likely to be found. if "_CONTENT" in eventName and eventName != "TARGET_WEB_CONTENT": return None self.sf.debug("Received event, " + eventName + ", from " + srcModuleName) if type(eventData) not in [str, unicode]: try: if type(eventData) in [list, dict]: eventData = str(eventData) else: self.sf.debug("Unhandled type to find e-mails: " + str(type(eventData))) return None except BaseException as e: self.sf.debug("Unable to convert list/dict to string: " + str(e)) return None pat = re.compile( "([\%a-zA-Z\.0-9_\-]+@[a-zA-Z\.0-9\-]+\.[a-zA-Z\.0-9\-]+)") matches = re.findall(pat, eventData) myres = list() for match in matches: evttype = "EMAILADDR" if len(match) < 4: self.sf.debug("Likely invalid address: " + match) continue # Handle messed up encodings if "%" in match: self.sf.debug("Skipped address: " + match) continue # Get the domain and strip potential ending . mailDom = match.lower().split('@')[1].strip('.') if not self.getTarget().matches( mailDom) and not self.getTarget().matches(match): self.sf.debug("External domain, so possible affiliate e-mail") # Raw RIR data returning external e-mails generates way # too much noise. if eventName == "RAW_RIR_DATA": return None evttype = "AFFILIATE_EMAILADDR" self.sf.info("Found e-mail address: " + match) if type(match) == str: mail = unicode(match.strip('.'), 'utf-8', errors='replace') else: mail = match.strip('.') if mail in myres: self.sf.debug("Already found from this source.") continue else: myres.append(mail) evt = SpiderFootEvent(evttype, mail, self.__name__, event) if event.moduleDataSource: evt.moduleDataSource = event.moduleDataSource else: evt.moduleDataSource = "Unknown" self.notifyListeners(evt) return None
def resourceList(self, id, target, targetType): targetDom = '' # Get the base domain if we're supplied a domain if targetType == "domain": targetDom = self.sf.hostDomain(target, self.opts['_internettlds']) for check in malchecks.keys(): cid = malchecks[check]['id'] if id == cid and malchecks[check]['type'] == "list": data = dict() url = malchecks[check]['url'] data['content'] = self.sf.cacheGet( "sfmal_" + cid, self.opts.get('cacheperiod', 0)) if data['content'] is None: data = self.sf.fetchUrl(url, timeout=self.opts['_fetchtimeout'], useragent=self.opts['_useragent']) if data['content'] is None: self.sf.error("Unable to fetch " + url, False) return None else: self.sf.cachePut("sfmal_" + cid, data['content']) # If we're looking at netblocks if targetType == "netblock": iplist = list() # Get the regex, replace {0} with an IP address matcher to # build a list of IP. # Cycle through each IP and check if it's in the netblock. if 'regex' in malchecks[check]: rx = malchecks[check]['regex'].replace( "{0}", "(\d+\.\d+\.\d+\.\d+)") pat = re.compile(rx, re.IGNORECASE) self.sf.debug("New regex for " + check + ": " + rx) for line in data['content'].split('\n'): grp = re.findall(pat, line) if len(grp) > 0: #self.sf.debug("Adding " + grp[0] + " to list.") iplist.append(grp[0]) else: iplist = data['content'].split('\n') for ip in iplist: if len(ip) < 8 or ip.startswith("#"): continue ip = ip.strip() try: if IPAddress(ip) in IPNetwork(target): self.sf.debug( ip + " found within netblock/subnet " + target + " in " + check) return url except Exception as e: self.sf.debug("Error encountered parsing: " + str(e)) continue return None # If we're looking at hostnames/domains/IPs if 'regex' not in malchecks[check]: for line in data['content'].split('\n'): if line == target or (targetType == "domain" and line == targetDom): self.sf.debug(target + "/" + targetDom + " found in " + check + " list.") return url else: # Check for the domain and the hostname try: rxDom = unicode( malchecks[check]['regex']).format(targetDom) rxTgt = unicode( malchecks[check]['regex']).format(target) for line in data['content'].split('\n'): if (targetType == "domain" and re.match(rxDom, line, re.IGNORECASE)) or \ re.match(rxTgt, line, re.IGNORECASE): self.sf.debug(target + "/" + targetDom + " found in " + check + " list.") return url except BaseException as e: self.sf.debug("Error encountered parsing 2: " + str(e)) continue return None
def index(request, resubmit_hash=False): if request.method == "POST": package = request.POST.get("package", "") timeout = min(force_int(request.POST.get("timeout")), 60 * 60 * 24) options = request.POST.get("options", "") priority = force_int(request.POST.get("priority")) machine = request.POST.get("machine", "") gateway = request.POST.get("gateway", None) clock = request.POST.get("clock", None) custom = request.POST.get("custom", "") memory = bool(request.POST.get("memory", False)) enforce_timeout = bool(request.POST.get("enforce_timeout", False)) referrer = validate_referrer(request.POST.get("referrer", None)) tags = request.POST.get("tags", None) opt_filename = "" for option in options.split(","): if option.startswith("filename="): opt_filename = option.split("filename=")[1] break task_gateways = [] ipaddy_re = re.compile( r"^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$" ) if referrer: if options: options += "," options += "referrer=%s" % (referrer) if request.POST.get("free"): if options: options += "," options += "free=yes" if request.POST.get("nohuman"): if options: options += "," options += "nohuman=yes" if request.POST.get("tor"): if options: options += "," options += "tor=yes" if request.POST.get("route", None): if options: options += "," options += "route={0}".format(request.POST.get("route", None)) if request.POST.get("process_dump"): if options: options += "," options += "procdump=0" else: if options: options += "," options += "procdump=1" if request.POST.get("process_memory"): if options: options += "," options += "procmemdump=1" if request.POST.get("import_reconstruction"): if options: options += "," options += "import_reconstruction=1" if request.POST.get("disable_cape"): if options: options += "," options += "disable_cape=1" if request.POST.get("kernel_analysis"): if options: options += "," options += "kernel_analysis=yes" if request.POST.get("norefer"): if options: options += "," options += "norefer=1" orig_options = options if gateway and gateway.lower() == "all": for e in settings.GATEWAYS: if ipaddy_re.match(settings.GATEWAYS[e]): task_gateways.append(settings.GATEWAYS[e]) elif gateway and gateway in settings.GATEWAYS: if "," in settings.GATEWAYS[gateway]: if request.POST.get("all_gw_in_group"): tgateway = settings.GATEWAYS[gateway].split(",") for e in tgateway: task_gateways.append(settings.GATEWAYS[e]) else: tgateway = random.choice( settings.GATEWAYS[gateway].split(",")) task_gateways.append(settings.GATEWAYS[tgateway]) else: task_gateways.append(settings.GATEWAYS[gateway]) if not task_gateways: # To reduce to the default case task_gateways = [None] db = Database() task_ids = [] task_machines = [] if machine.lower() == "all": for entry in db.list_machines(): task_machines.append(entry.label) else: task_machines.append(machine) status = "ok" if "hash" in request.POST and request.POST.get( "hash", False) and request.POST.get("hash")[0] != '': resubmission_hash = request.POST.get("hash").strip() paths = db.sample_path_by_hash(resubmission_hash) paths = filter( None, [path if os.path.exists(path) else False for path in paths]) if not paths and FULL_DB: tasks = results_db.analysis.find( {"dropped.sha256": resubmission_hash}) if tasks: for task in tasks: # grab task id and replace in path aka distributed cuckoo hack path = os.path.join(settings.CUCKOO_PATH, "storage", "analyses", str(task["info"]["id"]), "files", resubmission_hash) if os.path.exists(path): paths = [path] break if paths: content = "" content = submit_utils.get_file_content(paths) if content is False: return render(request, "error.html", { "error": "Can't find {} on disk".format(resubmission_hash) }) base_dir = tempfile.mkdtemp(prefix='resubmit_', dir=settings.TEMP_PATH) if opt_filename: filename = base_dir + "/" + opt_filename else: filename = base_dir + "/" + resubmission_hash path = store_temp_file(content, filename) headers = {} url = 'local' params = {} status, task_ids = download_file( content, request, db, task_ids, url, params, headers, "Local", path, package, timeout, options, priority, machine, gateway, clock, custom, memory, enforce_timeout, referrer, tags, orig_options, task_gateways, task_machines) elif "sample" in request.FILES: samples = request.FILES.getlist("sample") for sample in samples: # Error if there was only one submitted sample and it's empty. # But if there are multiple and one was empty, just ignore it. if not sample.size: if len(samples) != 1: continue return render(request, "error.html", {"error": "You uploaded an empty file."}) elif sample.size > settings.MAX_UPLOAD_SIZE: return render( request, "error.html", { "error": "You uploaded a file that exceeds the maximum allowed upload size specified in web/web/local_settings.py." }) # Moving sample from django temporary file to Cuckoo temporary storage to # let it persist between reboot (if user like to configure it in that way). path = store_temp_file(sample.read(), sample.name) for gw in task_gateways: options = update_options(gw, orig_options) for entry in task_machines: try: task_ids_new = db.demux_sample_and_add_to_db( file_path=path, package=package, timeout=timeout, options=options, priority=priority, machine=entry, custom=custom, memory=memory, enforce_timeout=enforce_timeout, tags=tags, clock=clock) task_ids.extend(task_ids_new) except CuckooDemuxError as err: return render(request, "error.html", {"error": err}) elif "quarantine" in request.FILES: samples = request.FILES.getlist("quarantine") for sample in samples: # Error if there was only one submitted sample and it's empty. # But if there are multiple and one was empty, just ignore it. if not sample.size: if len(samples) != 1: continue return render( request, "error.html", {"error": "You uploaded an empty quarantine file."}) elif sample.size > settings.MAX_UPLOAD_SIZE: return render( request, "error.html", { "error": "You uploaded a quarantine file that exceeds the maximum allowed upload size specified in web/web/local_settings.py." }) # Moving sample from django temporary file to Cuckoo temporary storage to # let it persist between reboot (if user like to configure it in that way). tmp_path = store_temp_file(sample.read(), sample.name) path = unquarantine(tmp_path) try: os.remove(tmp_path) except: pass if not path: return render(request, "error.html", { "error": "You uploaded an unsupported quarantine file." }) for gw in task_gateways: options = update_options(gw, orig_options) for entry in task_machines: task_ids_new = db.demux_sample_and_add_to_db( file_path=path, package=package, timeout=timeout, options=options, priority=priority, machine=entry, custom=custom, memory=memory, enforce_timeout=enforce_timeout, tags=tags, clock=clock) task_ids.extend(task_ids_new) elif "pcap" in request.FILES: samples = request.FILES.getlist("pcap") for sample in samples: if not sample.size: if len(samples) != 1: continue return render( request, "error.html", {"error": "You uploaded an empty PCAP file."}) elif sample.size > settings.MAX_UPLOAD_SIZE: return render( request, "error.html", { "error": "You uploaded a PCAP file that exceeds the maximum allowed upload size specified in web/web/local_settings.py." }) # Moving sample from django temporary file to Cuckoo temporary storage to # let it persist between reboot (if user like to configure it in that way). path = store_temp_file(sample.read(), sample.name) if sample.name.lower().endswith(".saz"): saz = saz_to_pcap(path) if saz: try: os.remove(path) except: pass path = saz else: return render( request, "error.html", {"error": "Conversion from SAZ to PCAP failed."}) task_id = db.add_pcap(file_path=path, priority=priority) task_ids.append(task_id) elif "url" in request.POST and request.POST.get("url").strip(): url = request.POST.get("url").strip() if not url: return render(request, "error.html", {"error": "You specified an invalid URL!"}) url = url.replace("hxxps://", "https://").replace( "hxxp://", "http://").replace("[.]", ".") for gw in task_gateways: options = update_options(gw, orig_options) for entry in task_machines: task_id = db.add_url(url=url, package=package, timeout=timeout, options=options, priority=priority, machine=entry, custom=custom, memory=memory, enforce_timeout=enforce_timeout, tags=tags, clock=clock) if task_id: task_ids.append(task_id) elif settings.VTDL_ENABLED and "vtdl" in request.POST and request.POST.get( "vtdl", False) and request.POST.get("vtdl")[0] != '': vtdl = request.POST.get("vtdl") if (not settings.VTDL_PRIV_KEY and not settings.VTDL_INTEL_KEY) or not settings.VTDL_PATH: return render( request, "error.html", { "error": "You specified VirusTotal but must edit the file and specify your VTDL_PRIV_KEY or VTDL_INTEL_KEY variable and VTDL_PATH base directory" }) else: base_dir = tempfile.mkdtemp(prefix='cuckoovtdl', dir=settings.VTDL_PATH) hashlist = [] if "," in vtdl: hashlist = vtdl.replace(" ", "").strip().split(",") else: hashlist = vtdl.split() for h in hashlist: if opt_filename: filename = base_dir + "/" + opt_filename else: filename = base_dir + "/" + h paths = db.sample_path_by_hash(h) content = "" if paths is not None: content = submit_utils.get_file_content(paths) headers = {} url = 'https://www.virustotal.com/intelligence/download/' params = {'apikey': settings.VTDL_INTEL_KEY, 'hash': h} if content is False: if settings.VTDL_PRIV_KEY: url = 'https://www.virustotal.com/vtapi/v2/file/download' params = { 'apikey': settings.VTDL_PRIV_KEY, 'hash': h } status, task_ids = download_file( content, request, db, task_ids, url, params, headers, "VirusTotal", filename, package, timeout, options, priority, machine, gateway, clock, custom, memory, enforce_timeout, referrer, tags, orig_options, task_gateways, task_machines) else: status, task_ids = download_file( content, request, db, task_ids, url, params, headers, "Local", filename, package, timeout, options, priority, machine, gateway, clock, custom, memory, enforce_timeout, referrer, tags, orig_options, task_gateways, task_machines) if status == "error": # is render msg return task_ids tasks_count = len(task_ids) if tasks_count > 0: return render(request, "submission/complete.html", { "tasks": task_ids, "tasks_count": tasks_count }) else: return render(request, "error.html", {"error": "Error adding task to Cuckoo's database."}) else: cfg = Config("cuckoo") enabledconf = dict() enabledconf["vt"] = settings.VTDL_ENABLED enabledconf["kernel"] = settings.OPT_ZER0M0N enabledconf["memory"] = Config("processing").memory.get("enabled") enabledconf["procmemory"] = Config("processing").procmemory.get( "enabled") enabledconf["tor"] = Config("auxiliary").tor.get("enabled") if Config("auxiliary").gateways: enabledconf["gateways"] = True else: enabledconf["gateways"] = False enabledconf["tags"] = False # Get enabled machinery machinery = Config("cuckoo").cuckoo.get("machinery") # Get VM names for machinery config elements vms = [ x.strip() for x in getattr(Config(machinery), machinery).get( "machines").split(",") ] # Check each VM config element for tags for vmtag in vms: if "tags" in getattr(Config(machinery), vmtag).keys(): enabledconf["tags"] = True files = os.listdir( os.path.join(settings.CUCKOO_PATH, "analyzer", "windows", "modules", "packages")) packages = [] for name in files: name = os.path.splitext(name)[0] if name == "__init__": continue packages.append(name) # Prepare a list of VM names, description label based on tags. machines = [] for machine in Database().list_machines(): tags = [] for tag in machine.tags: tags.append(tag.name) if tags: label = machine.label + ": " + ", ".join(tags) else: label = machine.label machines.append((machine.label, label)) # Prepend ALL/ANY options. machines.insert(0, ("", "First available")) machines.insert(1, ("all", "All")) return render( request, "submission/index.html", { "packages": sorted(packages), "machines": machines, "vpns": vpns.values(), "route": cfg.routing.route, "internet": cfg.routing.internet, "inetsim": cfg.routing.inetsim, "tor": cfg.routing.tor, "gateways": settings.GATEWAYS, "config": enabledconf, "resubmit": resubmit_hash, })
def prepare_route(route): for r in route: r['resource'] = re.compile(r['resource'], re.I | re.S) for rr in r['route']: rr[0] = re.compile(rr[0], re.I | re.S) return route
def start(self, path): password = self.options.get("password") if password is None: password = b"" appdata = self.options.get("appdata") if appdata: root = os.environ["APPDATA"] else: root = os.environ["TEMP"] exe_regex = re.compile( '(\.exe|\.dll|\.scr|\.msi|\.bat|\.lnk|\.js|\.jse|\.vbs|\.vbe|\.wsf)$', flags=re.IGNORECASE) zipinfos = self.get_infos(path) self.extract_zip(path, root, password, 0) file_name = self.options.get("file") # If no file name is provided via option, take the first file. if not file_name: # No name provided try to find a better name. if len(zipinfos): # Attempt to find a valid exe extension in the archive for f in zipinfos: if exe_regex.search(f.filename): file_name = f.filename break # Default to the first one if none found file_name = file_name if file_name else zipinfos[0].filename log.debug("Missing file option, auto executing: {0}".format( file_name)) else: raise CuckooPackageError("Empty ZIP archive") file_path = os.path.join(root, file_name) log.debug("file_name: \"%s\"" % (file_name)) if file_name.lower().endswith(".lnk"): cmd_path = self.get_path("cmd.exe") cmd_args = "/c start /wait \"\" \"{0}\"".format(file_path) return self.execute(cmd_path, cmd_args, file_path) elif file_name.lower().endswith(".msi"): msi_path = self.get_path("msiexec.exe") msi_args = "/I \"{0}\"".format(file_path) return self.execute(msi_path, msi_args, file_path) elif file_name.lower().endswith( (".js", ".jse", ".vbs", ".vbe", ".wsf")): wscript = self.get_path_app_in_path("wscript.exe") wscript_args = "\"{0}\"".format(file_path) return self.execute(wscript, wscript_args, file_path) elif file_name.lower().endswith(".dll"): rundll32 = self.get_path_app_in_path("rundll32.exe") function = self.options.get("function", "#1") arguments = self.options.get("arguments") dllloader = self.options.get("dllloader") dll_args = "\"{0}\",{1}".format(file_path, function) if arguments: dll_args += " {0}".format(arguments) if dllloader: newname = os.path.join(os.path.dirname(rundll32), dllloader) shutil.copy(rundll32, newname) rundll32 = newname return self.execute(rundll32, dll_args, file_path) elif file_name.lower().endswith(".ps1"): powershell = self.get_path_app_in_path("powershell.exe") args = "-NoProfile -ExecutionPolicy bypass -File \"{0}\"".format( path) return self.execute(powershell, args, file_path) else: if "." not in os.path.basename(file_path): new_path = file_path + ".exe" os.rename(file_path, new_path) file_path = new_path return self.execute(file_path, self.options.get("arguments"), file_path)
except ImportError: logging.warning( "Failed to load 're2'. Falling back to 're' for regular expression parsing. See https://github.com/blockspeiser/Sefaria-Project/wiki/Regular-Expression-Engines" ) import re letter_scope = "\u05b0\u05b4\u05b5\u05b6\u05b7\u05b8\u05b9\u05bc\u05c1\u05d0\u05d1\u05d2\u05d3\u05d4\u05d5\u05d6\u05d7\u05d8\u05d9\u05da\u05db\u05dc\u05dd\u05de\u05df\u05e0\u05e1\u05e2\u05e3\u05e4\u05e5\u05e6\u05e7\u05e8\u05e9\u05ea\u05f3\u05f4\u200e\u200f\u2013\u201d\ufeffabcdefghijklmnopqrstuvwxyz1234567890[]`:;.-,*()'& \"" def normalizer(lang): if lang == "he": return hebrew.normalize_final_letters_in_str return str.lower splitter = re.compile(r"[\s,]+") class AutoCompleter(object): """ An AutoCompleter object provides completion services - it is the object in this module designed to be used by the Library. It instantiates objects that provide string completion according to different algorithms. """ def __init__(self, lang, lib, include_titles=True, include_people=False, include_categories=False, include_parasha=False, include_lexicons=False,
def handleEvent(self, event): eventName = event.eventType srcModuleName = event.module eventData = event.data eventDataHash = self.sf.hashstring(eventData) addrs = None parentEvent = event self.sf.debug("Received event, " + eventName + ", from " + srcModuleName) if eventDataHash in self.events: self.sf.debug("Skipping duplicate event for " + eventData) return None self.events[eventDataHash] = True self.sf.debug("Gathering DNS records for " + eventData) # Process the raw data alone recdata = dict() recs = { 'MX': ['\S+\s+(?:\d+)?\s+IN\s+MX\s+\d+\s+(\S+)\.', 'PROVIDER_MAIL'], 'NS': ['\S+\s+(?:\d+)?\s+IN\s+NS\s+(\S+)\.', 'PROVIDER_DNS'], 'TXT': ['\S+\s+TXT\s+\"(.[^\"]*)"', 'DNS_TEXT'] } for rec in recs.keys(): if self.checkForStop(): return None try: req = dns.message.make_query(eventData, dns.rdatatype.from_text(rec)) if self.opts.get('_dnsserver', "") != "": n = self.opts['_dnsserver'] else: ns = dns.resolver.get_default_resolver() n = ns.nameservers[0] res = dns.query.udp(req, n, timeout=30) for x in res.answer: if str(x) in self.checked: continue self.checked[str(x)] = True for rx in recs.keys(): self.sf.debug("Checking " + str(x) + " + against " + recs[rx][0]) pat = re.compile(recs[rx][0], re.IGNORECASE | re.DOTALL) grps = re.findall(pat, str(x)) if len(grps) == 0: continue for m in grps: self.sf.debug("Matched: " + m) strdata = unicode(m, 'utf-8', errors='replace') evt = SpiderFootEvent(recs[rx][1], strdata, self.__name__, parentEvent) self.notifyListeners(evt) if rec != "TXT" and not strdata.endswith( eventData): evt = SpiderFootEvent( "AFFILIATE_INTERNET_NAME", strdata, self.__name__, parentEvent) self.notifyListeners(evt) if rec == "TXT" and "v=spf" in strdata: evt = SpiderFootEvent("DNS_SPF", strdata, self.__name__, parentEvent) self.notifyListeners(evt) matches = re.findall(r'include:(.+?) ', strdata, re.IGNORECASE | re.DOTALL) if matches: for domain in matches: if '_' in domain: continue if self.getTarget().matches( domain, includeChildren=True, includeParents=True): evt_type = 'INTERNET_NAME' else: evt_type = 'AFFILIATE_DOMAIN' if self.opts[ 'verify'] and not self.sf.resolveHost( domain): self.sf.debug( "Host " + domain + " could not be resolved") evt_type += '_UNRESOLVED' evt = SpiderFootEvent( evt_type, domain, self.__name__, parentEvent) self.notifyListeners(evt) strdata = unicode(str(x), 'utf-8', errors='replace') evt = SpiderFootEvent("RAW_DNS_RECORDS", strdata, self.__name__, parentEvent) self.notifyListeners(evt) except BaseException as e: self.sf.error( "Failed to obtain DNS response for " + eventData + "(" + rec + "): " + str(e), False)
"""Frytherer Module. This module contains all the helper functions for the Frytherer command line interface and the Slackbot """ import string, sys, ast from itertools import product from operator import and_, or_ try: import re2 as re except ImportError: import re import logging logging.basicConfig(level=logging.DEBUG) mana_regexp = re.compile('([0-9]*)(b*)(g*)(r*)(u*)(w*)') section_regexp = re.compile( '(aipg|amtr) (?:(appendix [a-f])|(\d+)(?:(?:\.)(\d)){0,1})') def gathererCapitalise(y): """ Capitalise card names as Gatherer does. INPUT: Regular card name in whatever case OUTPUT: Magic style capitalised card name """ words = y.split(" ") ret_string = [] for x in words: x = x.replace(u'\u2019', '\'').replace(u'\u2018',
# but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. try: import re2 as re except ImportError: import re from lib.cuckoo.common.abstracts import Signature struct_pat = re.compile( r"\\x11\"3D\d{2}\.\d{2}(?:[A-Za-z]|\\x00)(?:\\x00){2}(?:\d{4}|(?:\\x00){4})(?:\\x00){12}http" ) url_pat = re.compile(r"(https?://[^\|]+)(?:\||\\x00)") class Hancitor_APIs(Signature): name = "hancitor_behavior" description = "Exhibits behavior characteristic of Hancitor downloader" weight = 3 severity = 3 categories = ["downloader"] families = ["hancitor", "chanitor", "tordal"] authors = ["KillerInstinct"] minimum = "1.2" evented = True
#headers = ["pmid", "wordCounts"] # keep mapping word -> list of pmids wordsToPmids = {} # this plugin produces marshal output files outTypes = ["marshal"] # we want to run on fulltext files runOn = "files" # we only want main files onlyMain = True # only give us one single main article file preferXml = True sentSplitter = re.compile(r'[.!?;][ ]') wordSplitter = re.compile(r'[;:",. !?=\[\]()\t\n\r\f\v]') addTwoGrams = False addMeta = False pmids = None outFhs = None # run before outTypes is read or any files are opened # can be used to change outTypes depending on paramDict def setup(paramDict): global addTwoGrams global addTwoGrams global pmids addTwoGrams = bool(paramDict["addTwoGrams"]) addMeta = bool(paramDict["addMeta"])
from wwwutil.md import markdown from datetime import datetime import calendar try: import re2 as re except ImportError: import re from geweb.exceptions import GewebError from point.util.env import env import settings _urlre = re.compile( '^(?P<proto>\w+)://(?:[\w\.\-%\:]*\@)?(?P<host>[\w\.\-%]+)(?P<port>::(\d+))?(?P<path>(?:/[\w\.\-%]*)*)(?:\?(?P<query>[^#]*))?' ) def parse_url(url): if not url: return None m = re.search(_urlre, url) if m: return m.groupdict() def check_referer(fn): def _fn(*args, **kwargs): referer = parse_url(env.request.referer) if not referer or not referer['host'].endswith(settings.domain):
def handleEvent(self, event): eventName = event.eventType srcModuleName = event.module eventData = event.data self.sf.debug("Received event, " + eventName + ", from " + srcModuleName) # If the source event is web content, check if the source URL was javascript # or CSS, in which case optionally ignore it. if eventName == "TARGET_WEB_CONTENT": url = event.actualSource if self.opts['filterjscss'] and (".js" in url or ".css" in url): self.sf.debug("Ignoring web content from CSS/JS.") return None if eventName == "EMAILADDR" and self.opts['emailtoname']: if "." in eventData.split("@")[0]: if type(eventData) == unicode: name = " ".join( map(unicode.capitalize, eventData.split("@")[0].split("."))) else: name = " ".join( map(str.capitalize, eventData.split("@")[0].split("."))) name = unicode(name, 'utf-8', errors='replace') # Notify other modules of what you've found evt = SpiderFootEvent("HUMAN_NAME", name, self.__name__, event) if event.moduleDataSource: evt.moduleDataSource = event.moduleDataSource else: evt.moduleDataSource = "Unknown" self.notifyListeners(evt) return None # Stage 1: Find things that look (very vaguely) like names rx = re.compile( "([A-Z][a-z�������������]+)\s+.?.?\s?([A-Z][�������������a-zA-Z\'\-]+)" ) m = re.findall(rx, eventData) for r in m: # Start off each match as 0 points. p = 0 notindict = False # Shouldn't encounter "Firstname's Secondname" first = r[0].lower() if first[len(first) - 2] == "'" or first[len(first) - 1] == "'": continue # Strip off trailing ' or 's secondOrig = r[1].replace("'s", "") secondOrig = secondOrig.rstrip("'") second = r[1].lower().replace("'s", "") second = second.rstrip("'") # If both words are not in the dictionary, add 75 points. if first not in self.d and second not in self.d: self.sf.debug( "Both first and second names are not in the dictionary, so high chance of name: (" + first + ":" + second + ").") p += 75 notindict = True else: self.sf.debug(first + " was found or " + second + " was found in dictionary.") # If the first word is a known popular first name, award 50 points. if first in self.n: p += 50 # If either word is 2 characters, subtract 50 points. if len(first) == 2 or len(second) == 2: p -= 50 # If the first word is in the dictionary but the second isn't, # subtract 40 points. if not notindict: if first in self.d and second not in self.d: p -= 20 # If the second word is in the dictionary but the first isn't, # reduce 20 points. if first not in self.d and second in self.d: p -= 40 name = r[0] + " " + secondOrig self.sf.debug("Name of " + name + " has score: " + str(p)) if p > self.opts['algolimit']: # Notify other modules of what you've found evt = SpiderFootEvent("HUMAN_NAME", name, self.__name__, event) if event.moduleDataSource: evt.moduleDataSource = event.moduleDataSource else: evt.moduleDataSource = "Unknown" self.notifyListeners(evt)
def run(self): clam_no_score_re = re.compile( r'^(SaneSecurity\.FoxHole|MiscreantPunch\.(?:Susp|INFO))', re.I) clam_ignore = ['PhishTank.Phishing.6117523.UNOFFICIAL'] self.data = [] if self.results["target"]["category"] == "file": if "clamav" in self.results["target"]["file"].keys( ) and self.results["target"]["file"][ "clamav"] and "sha256" in self.results["target"][ "file"].keys(): for detection in self.results["target"]["file"]["clamav"]: entry = "%s, target" % (detection) if detection in clam_ignore: continue if not clam_no_score_re.search(detection): self.weight = 3 if "type" in self.results["target"]["file"]: entry = "%s, type:%s" % (entry, self.results["target"] ["file"].get("type", "")) self.data.append( {self.results["target"]["file"]["sha256"]: entry}) if "suricata" in self.results and self.results["suricata"]: if "files" in self.results["suricata"]: for entry in self.results["suricata"]["files"]: proto = entry["protocol"] if "clamav" in entry["file_info"].keys( ) and entry["file_info"]["clamav"] and "sha256" in entry[ "file_info"].keys(): for detection in entry["file_info"]["clamav"]: if detection in clam_ignore: continue if not clam_no_score_re.search(detection): self.weight = 3 lentry = "{}, suricata_extracted_files, src:{}, sp:{}, dst:{}, dp:{}".format( detection, entry.get('srcip', ''), entry.get('sp', ''), entry.get('dstip', ''), entry.get('dp', '')) if "http_user_agent" in entry.keys(): lentry = "%s, ua:%s" % ( lentry, entry['http_user_agent']) if "http_uri" in entry.keys(): lentry = "%s, uri:%s" % (lentry, entry['http_uri']) if "http_referer" in entry.keys(): lentry = "%s, referer:%s" % ( lentry, entry['http_referer']) if entry["file_info"]["type"]: lentry = "%s, type:%s" % ( lentry, entry["file_info"]["type"]) self.data.append( {entry["file_info"]["sha256"]: lentry}) if "dropped" in self.results: for entry in self.results["dropped"]: if "clamav" in entry.keys( ) and entry["clamav"] and "sha256" in entry.keys(): for detection in entry["clamav"]: if detection in clam_ignore: continue if not clam_no_score_re.search(detection): self.weight = 3 lentry = "%s, dropped" % (detection) if "guest_paths" in entry.keys(): lentry = "%s, guest_paths:%s" % (lentry, "*".join( entry["guest_paths"])) if "type" in entry.keys(): lentry = "%s, type:%s" % (lentry, entry["type"]) self.data.append({entry["sha256"]: lentry}) if len(self.data) > 0: return True return False
'o', 'u', 'y', ]), #all consonants go away ('.', [ r'\.+', ]), ) #_GROUPS1 = [(k, '|'.join(ls)) for k, ls in GROUPS1] #_GROUPS2 = [(k, '|'.join(ls)) for k, ls in GROUPS2] #GROUPS1_SINGLEREGEXP = re.compile('|'.join(["(%s)" % v for k, v in _GROUPS1])) #GROUPS2_SINGLEREGEXP = re.compile('|'.join(["(%s)" % v for k, v in _GROUPS2])) #GROUPS1_LOOKUP = dict((i+1, k) for (i, (k,v)) in enumerate(GROUPS1)) #GROUPS2_LOOKUP = dict((i+1, k) for (i, (k,v)) in enumerate(GROUPS2)) GROUPS1 = [(k, re.compile('|'.join(ls))) for k, ls in _GROUPS1] GROUPS2 = [(k, re.compile('|'.join(ls))) for k, ls in _GROUPS2] def dict_sub(d, text): """ Replace in 'text' non-overlapping occurences of REs whose patterns are keys in dictionary 'd' by corresponding values (which must be constant strings: may have named backreferences but not numeric ones). The keys must not contain anonymous matching-groups. Returns the new string.""" # Create a regular expression from the dictionary keys regex = re.compile("|".join("(%s)" % k for k in d)) # Facilitate lookup from group number to value lookup = dict((i + 1, v) for i, v in enumerate(d.values()))
def run(self): """Run Suricata. @return: hash with alerts """ self.key = "suricata" # General SURICATA_CONF = self.options.get("conf", None) SURICATA_EVE_LOG = self.options.get("evelog", None) SURICATA_ALERT_LOG = self.options.get("alertlog", None) SURICATA_TLS_LOG = self.options.get("tlslog", None) SURICATA_HTTP_LOG = self.options.get("httplog", None) SURICATA_SSH_LOG = self.options.get("sshlog", None) SURICATA_DNS_LOG = self.options.get("dnslog", None) SURICATA_FILE_LOG = self.options.get("fileslog", None) SURICATA_FILES_DIR = self.options.get("filesdir", None) SURICATA_RUNMODE = self.options.get("runmode", None) SURICATA_FILE_BUFFER = self.options.get("buffer", 8192) Z7_PATH = self.options.get("7zbin", None) FILES_ZIP_PASS = self.options.get("zippass", None) SURICATA_FILE_COPY_DST_DIR = self.options.get("file_copy_dest_dir", None) SURICATA_FILE_COPY_MAGIC_RE = self.options.get("file_magic_re", None) if SURICATA_FILE_COPY_MAGIC_RE: try: SURICATA_FILE_COPY_MAGIC_RE = re.compile( SURICATA_FILE_COPY_MAGIC_RE) except: log.warning("Failed to compile suricata copy magic RE" % (SURICATA_FILE_COPY_MAGIC_RE)) SURICATA_FILE_COPY_MAGIC_RE = None # Socket SURICATA_SOCKET_PATH = self.options.get("socket_file", None) SURICATA_SOCKET_PYLIB = self.options.get("pylib_dir", None) # Command Line SURICATA_BIN = self.options.get("bin", None) suricata = {} suricata["alerts"] = [] suricata["tls"] = [] suricata["perf"] = [] suricata["files"] = [] suricata["http"] = [] suricata["dns"] = [] suricata["ssh"] = [] suricata["file_info"] = [] suricata["eve_log_full_path"] = None suricata["alert_log_full_path"] = None suricata["tls_log_full_path"] = None suricata["http_log_full_path"] = None suricata["file_log_full_path"] = None suricata["ssh_log_full_path"] = None suricata["dns_log_full_path"] = None SURICATA_ALERT_LOG_FULL_PATH = "%s/%s" % (self.logs_path, SURICATA_ALERT_LOG) SURICATA_TLS_LOG_FULL_PATH = "%s/%s" % (self.logs_path, SURICATA_TLS_LOG) SURICATA_HTTP_LOG_FULL_PATH = "%s/%s" % (self.logs_path, SURICATA_HTTP_LOG) SURICATA_SSH_LOG_FULL_PATH = "%s/%s" % (self.logs_path, SURICATA_SSH_LOG) SURICATA_DNS_LOG_FULL_PATH = "%s/%s" % (self.logs_path, SURICATA_DNS_LOG) SURICATA_EVE_LOG_FULL_PATH = "%s/%s" % (self.logs_path, SURICATA_EVE_LOG) SURICATA_FILE_LOG_FULL_PATH = "%s/%s" % (self.logs_path, SURICATA_FILE_LOG) SURICATA_FILES_DIR_FULL_PATH = "%s/%s" % (self.logs_path, SURICATA_FILES_DIR) separate_log_paths = [ ("alert_log_full_path", SURICATA_ALERT_LOG_FULL_PATH), ("tls_log_full_path", SURICATA_TLS_LOG_FULL_PATH), ("http_log_full_path", SURICATA_HTTP_LOG_FULL_PATH), ("ssh_log_full_path", SURICATA_SSH_LOG_FULL_PATH), ("dns_log_full_path", SURICATA_DNS_LOG_FULL_PATH) ] # handle reprocessing all_log_paths = [x[1] for x in separate_log_paths] + \ [SURICATA_EVE_LOG_FULL_PATH, SURICATA_FILE_LOG_FULL_PATH] for log_path in all_log_paths: if os.path.exists(log_path): try: os.unlink(log_path) except: pass if os.path.isdir(SURICATA_FILES_DIR_FULL_PATH): try: shutil.rmtree(SURICATA_FILES_DIR_FULL_PATH, ignore_errors=True) except: pass if not os.path.exists(SURICATA_CONF): log.warning("Unable to Run Suricata: Conf File %s Does Not Exist" % (SURICATA_CONF)) return suricata["alerts"] if not os.path.exists(self.pcap_path): log.warning("Unable to Run Suricata: Pcap file %s Does Not Exist" % (self.pcap_path)) return suricata["alerts"] # Add to this if you wish to ignore any SIDs for the suricata alert logs # Useful for ignoring SIDs without disabling them. Ex: surpress an alert for # a SID which is a dependent of another. (Bad TCP data for HTTP(S) alert) sid_blacklist = [ # SURICATA FRAG IPv6 Fragmentation overlap 2200074, # ET INFO InetSim Response from External Source Possible SinkHole 2017363, # SURICATA UDPv4 invalid checksum 2200075, # ET POLICY SSLv3 outbound connection from client vulnerable to POODLE attack 2019416, ] if SURICATA_RUNMODE == "socket": if SURICATA_SOCKET_PYLIB != None: sys.path.append(SURICATA_SOCKET_PYLIB) try: from suricatasc import SuricataSC except Exception as e: log.warning("Failed to import suricatasc lib %s" % (e)) return suricata loopcnt = 0 maxloops = 24 loopsleep = 5 args = {} args["filename"] = self.pcap_path args["output-dir"] = self.logs_path suris = SuricataSC(SURICATA_SOCKET_PATH) try: suris.connect() suris.send_command("pcap-file", args) except Exception as e: log.warning( "Failed to connect to socket and send command %s: %s" % (SURICATA_SOCKET_PATH, e)) return suricata while loopcnt < maxloops: try: pcap_flist = suris.send_command("pcap-file-list") current_pcap = suris.send_command("pcap-current") log.debug("pcapfile list: %s current pcap: %s" % (pcap_flist, current_pcap)) if self.pcap_path not in pcap_flist["message"][ "files"] and current_pcap[ "message"] != self.pcap_path: log.debug( "Pcap not in list and not current pcap lets assume it's processed" ) break else: loopcnt = loopcnt + 1 time.sleep(loopsleep) except Exception as e: log.warning( "Failed to get pcap status breaking out of loop %s" % (e)) break if loopcnt == maxloops: log.warning( "Loop timeout of %ssec occured waiting for file %s to finish processing" % (maxloops * loopsleep, pcapfile)) return suricata elif SURICATA_RUNMODE == "cli": if not os.path.exists(SURICATA_BIN): log.warning( "Unable to Run Suricata: Bin File %s Does Not Exist" % (SURICATA_CONF)) return suricata["alerts"] cmd = "%s -c %s -k none -l %s -r %s" % ( SURICATA_BIN, SURICATA_CONF, self.logs_path, self.pcap_path) ret, stdout, stderr = self.cmd_wrapper(cmd) if ret != 0: log.warning( "Suricata returned a Exit Value Other than Zero %s" % (stderr)) return suricata else: log.warning("Unknown Suricata Runmode") return suricata datalist = [] if os.path.exists(SURICATA_EVE_LOG_FULL_PATH): suricata["eve_log_full_path"] = SURICATA_EVE_LOG_FULL_PATH with open(SURICATA_EVE_LOG_FULL_PATH, "rb") as eve_log: datalist.append(eve_log.read()) else: for path in separate_log_paths: if os.path.exists(path[1]): suricata[path[0]] = path[1] with open(path[1], "rb") as the_log: datalist.append(the_log.read()) if not datalist: log.warning("Suricata: Failed to find usable Suricata log file") for data in datalist: for line in data.splitlines(): try: parsed = json.loads(line) except: log.warning("Suricata: Failed to parse line as json" % (line)) continue if 'event_type' in parsed: if parsed["event_type"] == "alert": if (parsed["alert"]["signature_id"] not in sid_blacklist and not parsed["alert"] ["signature"].startswith("SURICATA STREAM")): alog = dict() if parsed["alert"]["gid"] == '': alog["gid"] = "None" else: alog["gid"] = parsed["alert"]["gid"] if parsed["alert"]["rev"] == '': alog["rev"] = "None" else: alog["rev"] = parsed["alert"]["rev"] if parsed["alert"]["severity"] == '': alog["severity"] = "None" else: alog["severity"] = parsed["alert"]["severity"] alog["sid"] = parsed["alert"]["signature_id"] try: alog["srcport"] = parsed["src_port"] except: alog["srcport"] = "None" alog["srcip"] = parsed["src_ip"] try: alog["dstport"] = parsed["dest_port"] except: alog["dstport"] = "None" alog["dstip"] = parsed["dest_ip"] alog["protocol"] = parsed["proto"] alog["timestamp"] = parsed["timestamp"].replace( "T", " ") if parsed["alert"]["category"] == '': alog["category"] = "None" else: alog["category"] = parsed["alert"]["category"] alog["signature"] = parsed["alert"]["signature"] suricata["alerts"].append(alog) elif parsed["event_type"] == "http": hlog = dict() hlog["srcport"] = parsed["src_port"] hlog["srcip"] = parsed["src_ip"] hlog["dstport"] = parsed["dest_port"] hlog["dstip"] = parsed["dest_ip"] hlog["timestamp"] = parsed["timestamp"].replace( "T", " ") try: hlog["uri"] = parsed["http"]["url"] except: hlog["uri"] = "None" hlog["length"] = parsed["http"]["length"] try: hlog["hostname"] = parsed["http"]["hostname"] except: hlog["hostname"] = "None" try: hlog["status"] = str(parsed["http"]["status"]) except: hlog["status"] = "None" try: hlog["method"] = parsed["http"]["http_method"] except: hlog["method"] = "None" try: hlog["contenttype"] = parsed["http"][ "http_content_type"] except: hlog["contenttype"] = "None" try: hlog["ua"] = parsed["http"]["http_user_agent"] except: hlog["ua"] = "None" try: hlog["referrer"] = parsed["http"]["http_refer"] except: hlog["referrer"] = "None" suricata["http"].append(hlog) elif parsed["event_type"] == "tls": tlog = dict() tlog["srcport"] = parsed["src_port"] tlog["srcip"] = parsed["src_ip"] tlog["dstport"] = parsed["dest_port"] tlog["dstip"] = parsed["dest_ip"] tlog["timestamp"] = parsed["timestamp"].replace( "T", " ") tlog["fingerprint"] = parsed["tls"]["fingerprint"] tlog["issuer"] = parsed["tls"]["issuerdn"] tlog["version"] = parsed["tls"]["version"] tlog["subject"] = parsed["tls"]["subject"] suricata["tls"].append(tlog) elif parsed["event_type"] == "ssh": suricata["ssh"].append(parsed) elif parsed["event_type"] == "dns": suricata["dns"].append(parsed) if os.path.exists(SURICATA_FILE_LOG_FULL_PATH): suricata["file_log_full_path"] = SURICATA_FILE_LOG_FULL_PATH f = open(SURICATA_FILE_LOG_FULL_PATH, "rb").readlines() for l in f: try: d = json.loads(l) except: log.warning("failed to load JSON from file log") continue # Some log entries do not have an id if "id" not in d: continue src_file = "%s/file.%s" % (SURICATA_FILES_DIR_FULL_PATH, d["id"]) if os.path.exists(src_file): if SURICATA_FILE_COPY_MAGIC_RE and SURICATA_FILE_COPY_DST_DIR and os.path.exists( SURICATA_FILE_COPY_DST_DIR): try: m = re.search(SURICATA_FILE_COPY_MAGIC_RE, d["magic"]) if m: dst_file = "%s/%s" % ( SURICATA_FILE_COPY_DST_DIR, d["md5"]) shutil.copy2(src_file, dst_file) log.warning("copied %s to %s" % (src_file, dst_file)) except Exception, e: log.warning("Unable to copy suricata file: %s" % e) file_info = File(file_path=src_file).get_all() texttypes = [ "ASCII", "Windows Registry text", "XML document text", "Unicode text", ] readit = False for texttype in texttypes: if texttype in file_info["type"]: readit = True break if readit: with open(file_info["path"], "rb") as drop_open: filedata = drop_open.read(SURICATA_FILE_BUFFER + 1) if len(filedata) > SURICATA_FILE_BUFFER: file_info["data"] = convert_to_printable( filedata[:SURICATA_FILE_BUFFER] + " <truncated>") else: file_info["data"] = convert_to_printable(filedata) d["file_info"] = file_info if "/" in d["filename"]: d["filename"] = d["filename"].split("/")[-1] suricata["files"].append(d)
try: import re2 as re except ImportError: import re url_regex = re.compile( br"http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+" ) def config(data): urls_dict = {} try: urls_dict["URLs"] = [ url.lower().decode() for url in url_regex.findall(data) ] except Exception as e: print(e) if "URLs" in urls_dict and len(urls_dict["URLs"]) > 0: return urls_dict return None
#second go through each concept/term, find them in subresources, and process into matrix tc = 0 j = 0 row_sentence = [] row_paragraph = [] col_sentence = [] col_paragraph = [] data_sentence = [] data_paragraph = [] # initialize list of empty lists for storing concepts contained in each paragraph para_conceptIDs_contained = [[] for i in range(tot_para)] for i, con_ID in enumerate(concept_IDs): term_list = term_lists[i] wordcount_in_paragraphs = [0] * tot_para terms_regex = [r"\b"+re2.escape(term.lower())+r"\b" for term in term_list] search_pattern = re2.compile("|".join(terms_regex)) for sent_num, sentence in enumerate(sentences): wordcount = len(search_pattern.findall(sentence.lower())) if wordcount > 0: #only go ahead if search_pattern is in the sentence row_sentence.append(sent_num) col_sentence.append(tc) data_sentence.append(1) wordcount_in_paragraphs[sentences_indexofparagraph[sent_num]] += wordcount for para_num in range(tot_para): wordcount_in_p = wordcount_in_paragraphs[para_num] if wordcount_in_p > 0: row_paragraph.append(para_num) col_paragraph.append(tc) data_paragraph.append(1) para_conceptIDs_contained[para_num].append(con_ID) if tc*10/tot_concepts > j:
vtdl_cfg = Config("auxiliary").virustotaldl MOLOCH_BASE = moloch_cfg.get("base", None) MOLOCH_NODE = moloch_cfg.get("node", None) MOLOCH_ENABLED = moloch_cfg.get("enabled", False) GATEWAYS = aux_cfg.get("gateways") VTDL_ENABLED = vtdl_cfg.get("enabled", False) VTDL_PRIV_KEY = vtdl_cfg.get("dlprivkey", None) VTDL_INTEL_KEY = vtdl_cfg.get("dlintelkey", None) VTDL_PATH = vtdl_cfg.get("dlpath", None) TEMP_PATH = Config().cuckoo.get("tmppath", "/tmp") ipaddy_re = re.compile( r"^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$" ) if GATEWAYS: GATEWAYS_IP_MAP = {} for e in GATEWAYS: if "," in e: continue elif ipaddy_re.match(GATEWAYS[e]): GATEWAYS_IP_MAP[GATEWAYS[e]] = e # Enabled/Disable Zer0m0n tickbox on the submission page OPT_ZER0M0N = True # To disable comment support, change the below to False COMMENTS = True