def _process_packet(packet, sec, usec, ip_offset): """ Processes single (raw) IP layer data """ global _connect_sec global _last_syn global _last_logged_syn global _last_udp global _last_logged_udp try: if len(_result_cache) > MAX_RESULT_CACHE_ENTRIES: _result_cache.clear() if config.USE_HEURISTICS: if _locks.connect_sec: _locks.connect_sec.acquire() connect_sec = _connect_sec _connect_sec = sec if _locks.connect_sec: _locks.connect_sec.release() if sec > connect_sec: for key in _connect_src_dst: if len(_connect_src_dst[key]) > PORT_SCANNING_THRESHOLD: _src_ip, _dst_ip = key.split('~') if _src_ip not in WHITELIST: _src_ports = set(str(_[2]) for _ in _connect_src_details[key]) _dst_ports = set(str(_[3]) for _ in _connect_src_details[key]) log_event((sec, usec, _src_ip, ','.join(_src_ports), _dst_ip, ','.join(_dst_ports), PROTO.TCP, TRAIL.IP, "-", "potential port scanning", "(heuristic)"), packet) _connect_src_dst.clear() _connect_src_details.clear() ip_data = packet[ip_offset:] ip_version = ord(ip_data[0]) >> 4 localhost_ip = LOCALHOST_IP[ip_version] if ip_version == 0x04: # IPv4 ip_header = struct.unpack("!BBHHHBBH4s4s", ip_data[:20]) iph_length = (ip_header[0] & 0xf) << 2 protocol = ip_header[6] src_ip = socket.inet_ntoa(ip_header[8]) dst_ip = socket.inet_ntoa(ip_header[9]) elif ip_version == 0x06: # IPv6 # Reference: http://chrisgrundemann.com/index.php/2012/introducing-ipv6-understanding-ipv6-addresses/ ip_header = struct.unpack("!BBHHBB16s16s", ip_data[:40]) iph_length = 40 protocol = ip_header[4] src_ip = inet_ntoa6(ip_header[6]) dst_ip = inet_ntoa6(ip_header[7]) else: return if protocol == socket.IPPROTO_TCP: # TCP src_port, dst_port, _, _, doff_reserved, flags = struct.unpack("!HHLLBB", ip_data[iph_length:iph_length+14]) if flags != 2 and config.plugin_functions: if dst_ip in trails: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.IP, dst_ip, trails[dst_ip][0], trails[dst_ip][1]), packet, skip_write=True) elif src_ip in trails and dst_ip != localhost_ip: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.IP, src_ip, trails[src_ip][0], trails[src_ip][1]), packet, skip_write=True) if flags == 2: # SYN set (only) _ = _last_syn _last_syn = (sec, src_ip, src_port, dst_ip, dst_port) if _ == _last_syn: # skip bursts return if dst_ip in trails: _ = _last_logged_syn _last_logged_syn = _last_syn if _ != _last_logged_syn: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.IP, dst_ip, trails[dst_ip][0], trails[dst_ip][1]), packet) elif src_ip in trails and dst_ip != localhost_ip: _ = _last_logged_syn _last_logged_syn = _last_syn if _ != _last_logged_syn: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.IP, src_ip, trails[src_ip][0], trails[src_ip][1]), packet) if config.USE_HEURISTICS: if dst_ip != localhost_ip: key = "%s~%s" % (src_ip, dst_ip) if key not in _connect_src_dst: _connect_src_dst[key] = set() _connect_src_details[key] = set() _connect_src_dst[key].add(dst_port) _connect_src_details[key].add((sec, usec, src_port, dst_port)) else: tcph_length = doff_reserved >> 4 h_size = iph_length + (tcph_length << 2) tcp_data = ip_data[h_size:] if config.USE_DEEP_HEURISTICS: if tcp_data.startswith("HTTP/"): if any(_ in tcp_data[:tcp_data.find("\r\n\r\n")] for _ in ("X-Sinkhole:", "X-Malware-Sinkhole:", "Server: You got served", "Server: Apache 1.0/SinkSoft", "sinkdns.org")) or "\r\n\r\nsinkhole" in tcp_data: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.IP, src_ip, "sinkhole response (malware)", "(heuristic)"), packet) else: index = tcp_data.find("<title>") if index >= 0: title = tcp_data[index + len("<title>"):tcp_data.find("</title>", index)] if all(_ in title.lower() for _ in ("this domain", "has been seized")): log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.IP, title, "seized domain (suspicious)", "(heuristic)"), packet) method, path = None, None index = tcp_data.find("\r\n") if index >= 0: line = tcp_data[:index] if line.count(' ') == 2 and " HTTP/" in line: method, path, _ = line.split(' ') if method and path: post_data = None host = dst_ip first_index = tcp_data.find("\r\nHost:") if first_index >= 0: first_index = first_index + len("\r\nHost:") last_index = tcp_data.find("\r\n", first_index) if last_index >= 0: host = tcp_data[first_index:last_index] host = host.strip() if host.endswith(":80"): host = host[:-3] if host and host[0].isalpha() and dst_ip in trails: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.IP, "%s (%s)" % (dst_ip, host.split(':')[0]), trails[dst_ip][0], trails[dst_ip][1]), packet) elif config.USE_HEURISTICS and config.CHECK_MISSING_HOST: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.HTTP, "%s%s" % (host, path), "missing host header (suspicious)", "(heuristic)"), packet) index = tcp_data.find("\r\n\r\n") if index >= 0: post_data = tcp_data[index + 4:] if "://" in path: url = path.split("://", 1)[1] if '/' not in url: url = "%s/" % url host, path = url.split('/', 1) if host.endswith(":80"): host = host[:-3] path = "/%s" % path proxy_domain = host.split(':')[0] _check_domain(proxy_domain, sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, packet) elif method == "CONNECT": if '/' in path: host, path = path.split('/', 1) path = "/%s" % path else: host, path = path, '/' if host.endswith(":80"): host = host[:-3] url = "%s%s" % (host, path) proxy_domain = host.split(':')[0] _check_domain(proxy_domain, sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, packet) else: url = "%s%s" % (host, path) if config.USE_HEURISTICS: user_agent, result = None, None first_index = tcp_data.find("\r\nUser-Agent:") if first_index >= 0: first_index = first_index + len("\r\nUser-Agent:") last_index = tcp_data.find("\r\n", first_index) if last_index >= 0: user_agent = tcp_data[first_index:last_index] user_agent = urllib.unquote(user_agent).strip() if user_agent: result = _result_cache.get(user_agent) if result is None: if not any(_ in user_agent for _ in WHITELIST_UA_KEYWORDS): match = re.search(SUSPICIOUS_UA_REGEX, user_agent) if match: def _(value): return value.replace('(', "\\(").replace(')', "\\)") parts = user_agent.split(match.group(0), 1) if len(parts) > 1 and parts[0] and parts[-1]: result = _result_cache[user_agent] = "%s (%s)" % (_(match.group(0)), _(user_agent)) else: result = _result_cache[user_agent] = _(match.group(0)).join(("(%s)" if part else "%s") % _(part) for part in parts) if not result: _result_cache[user_agent] = False if result: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.UA, result, "user agent (suspicious)", "(heuristic)"), packet) checks = [path.rstrip('/')] if '?' in path: checks.append(path.split('?')[0].rstrip('/')) _ = os.path.splitext(checks[-1]) if _[1]: checks.append(_[0]) if checks[-1].count('/') > 1: checks.append(checks[-1][:checks[-1].rfind('/')]) for check in filter(None, checks): for _ in ("", host): check = "%s%s" % (_, check) if check in trails: parts = url.split(check) other = ("(%s)" % _ if _ else _ for _ in parts) trail = check.join(other) log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.URL, trail, trails[check][0], trails[check][1])) return if config.USE_HEURISTICS: unquoted_path = urllib.unquote(path) unquoted_post_data = urllib.unquote(post_data or "") for char in SUSPICIOUS_HTTP_REQUEST_FORCE_ENCODE_CHARS: replacement = SUSPICIOUS_HTTP_REQUEST_FORCE_ENCODE_CHARS[char] path = path.replace(char, replacement) if post_data: post_data = post_data.replace(char, replacement) if not _check_domain_whitelisted(host): if not any(_ in unquoted_path.lower() for _ in WHITELIST_HTTP_REQUEST_PATHS): if any(_ in unquoted_path for _ in SUSPICIOUS_HTTP_REQUEST_PRE_CONDITION): found = _result_cache.get(unquoted_path) if found is None: for desc, regex in SUSPICIOUS_HTTP_REQUEST_REGEXES: if re.search(regex, unquoted_path, re.I): found = desc break _result_cache[unquoted_path] = found or "" if found: trail = "%s(%s)" % (host, path) log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.URL, trail, "potential %s (suspicious)" % found, "(heuristic)"), packet) return if any(_ in unquoted_post_data for _ in SUSPICIOUS_HTTP_REQUEST_PRE_CONDITION): found = _result_cache.get(unquoted_post_data) if found is None: for desc, regex in SUSPICIOUS_HTTP_REQUEST_REGEXES: if re.search(regex, unquoted_post_data, re.I): found = desc break _result_cache[unquoted_post_data] = found or "" if found: trail = "%s(%s \(%s %s\))" % (host, path, method, post_data.strip()) log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.URL, trail, "potential %s (suspicious)" % found, "(heuristic)"), packet) return if '.' in path: _ = urlparse.urlparse("http://%s" % url) # dummy scheme filename = _.path.split('/')[-1] name, extension = os.path.splitext(filename) if extension and extension in SUSPICIOUS_DIRECT_DOWNLOAD_EXTENSIONS and not any(_ in path for _ in WHITELIST_DIRECT_DOWNLOAD_KEYWORDS) and not _.query and len(name) < 10: trail = "%s(%s)" % (host, path) log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.URL, trail, "direct %s download (suspicious)" % extension, "(heuristic)"), packet) elif filename == "suspendedpage.cgi": log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.URL, trail, "suspended page (suspicious)", "(heuristic)"), packet) elif protocol == socket.IPPROTO_UDP: # UDP _ = ip_data[iph_length:iph_length + 4] if len(_) < 4: return src_port, dst_port = struct.unpack("!HH", _) _ = _last_udp _last_udp = (sec, src_ip, src_port, dst_ip, dst_port) if _ == _last_udp: # skip bursts return if src_port != 53 and dst_port != 53: # not DNS if dst_ip in trails: trail = dst_ip elif src_ip in trails: trail = src_ip else: trail = None if trail: _ = _last_logged_udp _last_logged_udp = _last_udp if _ != _last_logged_udp: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, TRAIL.IP, trail, trails[trail][0], trails[trail][1]), packet) else: dns_data = ip_data[iph_length + 8:] # Reference: http://www.ccs.neu.edu/home/amislove/teaching/cs4700/fall09/handouts/project1-primer.pdf if len(dns_data) > 6: qdcount = struct.unpack("!H", dns_data[4:6])[0] if qdcount > 0: offset = 12 query = "" while len(dns_data) > offset: length = ord(dns_data[offset]) if not length: query = query[:-1] break query += dns_data[offset + 1:offset + length + 1] + '.' offset += length + 1 query = query.lower() if not query or '.' not in query or not all(_ in VALID_DNS_CHARS for _ in query) or any(_ in query for _ in (".intranet.",)) or any(query.endswith(_) for _ in IGNORE_DNS_QUERY_SUFFIXES): return if ord(dns_data[2]) == 0x01: # standard query type_, class_ = struct.unpack("!HH", dns_data[offset + 1:offset + 5]) # Reference: http://en.wikipedia.org/wiki/List_of_DNS_record_types if type_ not in (12, 28) and class_ == 1: # Type not in (PTR, AAAA), Class IN if dst_ip in trails: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, TRAIL.IP, "%s (%s)" % (dst_ip, query), trails[dst_ip][0], trails[dst_ip][1]), packet) elif src_ip in trails: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, TRAIL.IP, src_ip, trails[src_ip][0], trails[src_ip][1]), packet) _check_domain(query, sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, packet) elif config.USE_HEURISTICS: if (ord(dns_data[2]) & 0x80) and (ord(dns_data[3]) == 0x83): # standard response, recursion available, no such name if not _check_domain_whitelisted(query): parts = query.split('.') if parts[-1].isdigit(): return if not (len(parts) > 4 and all(_.isdigit() and int(_) < 256 for _ in parts[:4])): # generic check for DNSBL IP lookups for _ in filter(None, (query, "*.%s" % '.'.join(parts[-2:]) if query.count('.') > 1 else None)): if _ not in NO_SUCH_NAME_COUNTERS or NO_SUCH_NAME_COUNTERS[_][0] != sec / 3600: NO_SUCH_NAME_COUNTERS[_] = [sec / 3600, 1, set()] else: NO_SUCH_NAME_COUNTERS[_][1] += 1 NO_SUCH_NAME_COUNTERS[_][2].add(query) if NO_SUCH_NAME_COUNTERS[_][1] > NO_SUCH_NAME_PER_HOUR_THRESHOLD: if _.startswith("*."): log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, TRAIL.DNS, "%s%s" % ("(%s)" % ','.join(item.replace(_[1:], "") for item in NO_SUCH_NAME_COUNTERS[_][2]), _[1:]), "excessive no such domain (suspicious)", "(heuristic)"), packet) for item in NO_SUCH_NAME_COUNTERS[_][2]: try: del NO_SUCH_NAME_COUNTERS[item] except KeyError: pass else: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, TRAIL.DNS, _, "excessive no such domain (suspicious)", "(heuristic)"), packet) try: del NO_SUCH_NAME_COUNTERS[_] except KeyError: pass break if len(parts) > 2: part = parts[0] if parts[0] != "www" else parts[1] trail = "(%s).%s" % ('.'.join(parts[:-2]), '.'.join(parts[-2:])) elif len(parts) == 2: part = parts[0] trail = "(%s).%s" % (parts[0], parts[1]) else: part = query trail = query result = _result_cache.get(part) if part: if result is None: # Reference: https://github.com/exp0se/dga_detector probabilities = (float(part.count(c)) / len(part) for c in set(_ for _ in part)) entropy = -sum(p * math.log(p) / math.log(2.0) for p in probabilities) if entropy > SUSPICIOUS_DOMAIN_ENTROPY_THRESHOLD: result = "entropy threshold no such domain (suspicious)" if not result: if sum(_ in CONSONANTS for _ in part) > SUSPICIOUS_DOMAIN_CONSONANT_THRESHOLD: result = "consonant threshold no such domain (suspicious)" _result_cache[part] = result or False if result: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, TRAIL.DNS, trail, result, "(heuristic)"), packet) elif protocol in IPPROTO_LUT: # non-TCP/UDP (e.g. ICMP) if protocol == socket.IPPROTO_ICMP: if ord(ip_data[iph_length]) != 0x08: # Non-echo request return elif protocol == socket.IPPROTO_ICMPV6: if ord(ip_data[iph_length]) != 0x80: # Non-echo request return if dst_ip in trails: log_event((sec, usec, src_ip, '-', dst_ip, '-', IPPROTO_LUT[protocol], TRAIL.IP, dst_ip, trails[dst_ip][0], trails[dst_ip][1]), packet) elif src_ip in trails: log_event((sec, usec, src_ip, '-', dst_ip, '-', IPPROTO_LUT[protocol], TRAIL.IP, src_ip, trails[src_ip][0], trails[src_ip][1]), packet) except struct.error: pass except Exception: if config.SHOW_DEBUG: traceback.print_exc()
def _process_packet(packet, sec, usec, ip_offset): """ Processes single (raw) IP layer data """ global _connect_sec global _last_syn global _last_logged_syn global _last_udp global _last_logged_udp global _last_dns_exhaustion global _subdomains_sec try: if len(_result_cache) > MAX_RESULT_CACHE_ENTRIES: _result_cache.clear() if config.USE_HEURISTICS: if _locks.connect_sec: _locks.connect_sec.acquire() connect_sec = _connect_sec _connect_sec = sec if _locks.connect_sec: _locks.connect_sec.release() if sec > connect_sec: for key in _connect_src_dst: if len(_connect_src_dst[key]) > PORT_SCANNING_THRESHOLD: _src_ip, _dst_ip = key.split('~') if _src_ip not in WHITELIST: _src_ports = set(str(_[2]) for _ in _connect_src_details[key]) _dst_ports = set(str(_[3]) for _ in _connect_src_details[key]) log_event((sec, usec, _src_ip, ','.join(_src_ports), _dst_ip, ','.join(_dst_ports), PROTO.TCP, TRAIL.IP, "-", "potential port scanning", "(heuristic)"), packet) _connect_src_dst.clear() _connect_src_details.clear() ip_data = packet[ip_offset:] ip_version = ord(ip_data[0]) >> 4 localhost_ip = LOCALHOST_IP[ip_version] if ip_version == 0x04: # IPv4 ip_header = struct.unpack("!BBHHHBBH4s4s", ip_data[:20]) iph_length = (ip_header[0] & 0xf) << 2 protocol = ip_header[6] src_ip = socket.inet_ntoa(ip_header[8]) dst_ip = socket.inet_ntoa(ip_header[9]) elif ip_version == 0x06: # IPv6 # Reference: http://chrisgrundemann.com/index.php/2012/introducing-ipv6-understanding-ipv6-addresses/ ip_header = struct.unpack("!BBHHBB16s16s", ip_data[:40]) iph_length = 40 protocol = ip_header[4] src_ip = inet_ntoa6(ip_header[6]) dst_ip = inet_ntoa6(ip_header[7]) else: return if protocol == socket.IPPROTO_TCP: # TCP src_port, dst_port, _, _, doff_reserved, flags = struct.unpack("!HHLLBB", ip_data[iph_length:iph_length+14]) if flags != 2 and config.plugin_functions: if dst_ip in trails: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.IP, dst_ip, trails[dst_ip][0], trails[dst_ip][1]), packet, skip_write=True) elif src_ip in trails and dst_ip != localhost_ip: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.IP, src_ip, trails[src_ip][0], trails[src_ip][1]), packet, skip_write=True) if flags == 2: # SYN set (only) _ = _last_syn _last_syn = (sec, src_ip, src_port, dst_ip, dst_port) if _ == _last_syn: # skip bursts return if dst_ip in trails: _ = _last_logged_syn _last_logged_syn = _last_syn if _ != _last_logged_syn: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.IP, dst_ip, trails[dst_ip][0], trails[dst_ip][1]), packet) elif src_ip in trails and dst_ip != localhost_ip: _ = _last_logged_syn _last_logged_syn = _last_syn if _ != _last_logged_syn: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.IP, src_ip, trails[src_ip][0], trails[src_ip][1]), packet) if config.USE_HEURISTICS: if dst_ip != localhost_ip: key = "%s~%s" % (src_ip, dst_ip) if key not in _connect_src_dst: _connect_src_dst[key] = set() _connect_src_details[key] = set() _connect_src_dst[key].add(dst_port) _connect_src_details[key].add((sec, usec, src_port, dst_port)) else: tcph_length = doff_reserved >> 4 h_size = iph_length + (tcph_length << 2) tcp_data = ip_data[h_size:] if config.USE_DEEP_HEURISTICS: if tcp_data.startswith("HTTP/"): if any(_ in tcp_data[:tcp_data.find("\r\n\r\n")] for _ in ("X-Sinkhole:", "X-Malware-Sinkhole:", "Server: You got served", "Server: Apache 1.0/SinkSoft", "sinkdns.org")) or "\r\n\r\nsinkhole" in tcp_data: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.IP, src_ip, "sinkhole response (malware)", "(heuristic)"), packet) else: index = tcp_data.find("<title>") if index >= 0: title = tcp_data[index + len("<title>"):tcp_data.find("</title>", index)] if all(_ in title.lower() for _ in ("this domain", "has been seized")): log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.IP, title, "seized domain (suspicious)", "(heuristic)"), packet) method, path = None, None index = tcp_data.find("\r\n") if index >= 0: line = tcp_data[:index] if line.count(' ') == 2 and " HTTP/" in line: method, path, _ = line.split(' ') if method and path: post_data = None host = dst_ip first_index = tcp_data.find("\r\nHost:") if first_index >= 0: first_index = first_index + len("\r\nHost:") last_index = tcp_data.find("\r\n", first_index) if last_index >= 0: host = tcp_data[first_index:last_index] host = host.strip() if host.endswith(":80"): host = host[:-3] if host and host[0].isalpha() and dst_ip in trails: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.IP, "%s (%s)" % (dst_ip, host.split(':')[0]), trails[dst_ip][0], trails[dst_ip][1]), packet) elif config.USE_HEURISTICS and config.CHECK_MISSING_HOST: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.HTTP, "%s%s" % (host, path), "missing host header (suspicious)", "(heuristic)"), packet) index = tcp_data.find("\r\n\r\n") if index >= 0: post_data = tcp_data[index + 4:] if "://" in path: url = path.split("://", 1)[1] if '/' not in url: url = "%s/" % url host, path = url.split('/', 1) if host.endswith(":80"): host = host[:-3] path = "/%s" % path proxy_domain = host.split(':')[0] _check_domain(proxy_domain, sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, packet) elif method == "CONNECT": if '/' in path: host, path = path.split('/', 1) path = "/%s" % path else: host, path = path, '/' if host.endswith(":80"): host = host[:-3] url = "%s%s" % (host, path) proxy_domain = host.split(':')[0] _check_domain(proxy_domain, sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, packet) else: url = "%s%s" % (host, path) if config.USE_HEURISTICS: user_agent, result = None, None first_index = tcp_data.find("\r\nUser-Agent:") if first_index >= 0: first_index = first_index + len("\r\nUser-Agent:") last_index = tcp_data.find("\r\n", first_index) if last_index >= 0: user_agent = tcp_data[first_index:last_index] user_agent = urllib.unquote(user_agent).strip() if user_agent: result = _result_cache.get(user_agent) if result is None: if not any(_ in user_agent for _ in WHITELIST_UA_KEYWORDS): match = re.search(SUSPICIOUS_UA_REGEX, user_agent) if match: def _(value): return value.replace('(', "\\(").replace(')', "\\)") parts = user_agent.split(match.group(0), 1) if len(parts) > 1 and parts[0] and parts[-1]: result = _result_cache[user_agent] = "%s (%s)" % (_(match.group(0)), _(user_agent)) else: result = _result_cache[user_agent] = _(match.group(0)).join(("(%s)" if part else "%s") % _(part) for part in parts) if not result: _result_cache[user_agent] = False if result: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.UA, result, "user agent (suspicious)", "(heuristic)"), packet) checks = [path.rstrip('/')] if '?' in path: checks.append(path.split('?')[0].rstrip('/')) _ = os.path.splitext(checks[-1]) if _[1]: checks.append(_[0]) if checks[-1].count('/') > 1: checks.append(checks[-1][:checks[-1].rfind('/')]) for check in filter(None, checks): for _ in ("", host): check = "%s%s" % (_, check) if check in trails: parts = url.split(check) other = ("(%s)" % _ if _ else _ for _ in parts) trail = check.join(other) log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.URL, trail, trails[check][0], trails[check][1])) return if config.USE_HEURISTICS: unquoted_path = urllib.unquote(path) unquoted_post_data = urllib.unquote(post_data or "") for char in SUSPICIOUS_HTTP_REQUEST_FORCE_ENCODE_CHARS: replacement = SUSPICIOUS_HTTP_REQUEST_FORCE_ENCODE_CHARS[char] path = path.replace(char, replacement) if post_data: post_data = post_data.replace(char, replacement) if not _check_domain_whitelisted(host): if not any(_ in unquoted_path.lower() for _ in WHITELIST_HTTP_REQUEST_PATHS): if any(_ in unquoted_path for _ in SUSPICIOUS_HTTP_REQUEST_PRE_CONDITION): found = _result_cache.get(unquoted_path) if found is None: for desc, regex in SUSPICIOUS_HTTP_REQUEST_REGEXES: if re.search(regex, unquoted_path, re.I | re.DOTALL): found = desc break _result_cache[unquoted_path] = found or "" if found: trail = "%s(%s)" % (host, path) log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.URL, trail, "potential %s (suspicious)" % found, "(heuristic)"), packet) return if any(_ in unquoted_post_data for _ in SUSPICIOUS_HTTP_REQUEST_PRE_CONDITION): found = _result_cache.get(unquoted_post_data) if found is None: for desc, regex in SUSPICIOUS_HTTP_REQUEST_REGEXES: if re.search(regex, unquoted_post_data, re.I | re.DOTALL): found = desc break _result_cache[unquoted_post_data] = found or "" if found: trail = "%s(%s \(%s %s\))" % (host, path, method, post_data.strip()) log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.HTTP, trail, "potential %s (suspicious)" % found, "(heuristic)"), packet) return if '.' in path: _ = urlparse.urlparse("http://%s" % url) # dummy scheme filename = _.path.split('/')[-1] name, extension = os.path.splitext(filename) if extension and extension in SUSPICIOUS_DIRECT_DOWNLOAD_EXTENSIONS and not any(_ in path for _ in WHITELIST_DIRECT_DOWNLOAD_KEYWORDS) and not _.query and len(name) < 10: trail = "%s(%s)" % (host, path) log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.URL, trail, "direct %s download (suspicious)" % extension, "(heuristic)"), packet) elif filename == "suspendedpage.cgi": log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.URL, trail, "suspended page (suspicious)", "(heuristic)"), packet) elif filename == "defaultwebpage.cgi": log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.URL, trail, "non-existent page (suspicious)", "(heuristic)"), packet) elif protocol == socket.IPPROTO_UDP: # UDP _ = ip_data[iph_length:iph_length + 4] if len(_) < 4: return src_port, dst_port = struct.unpack("!HH", _) _ = _last_udp _last_udp = (sec, src_ip, src_port, dst_ip, dst_port) if _ == _last_udp: # skip bursts return if src_port != 53 and dst_port != 53: # not DNS if dst_ip in trails: trail = dst_ip elif src_ip in trails: trail = src_ip else: trail = None if trail: _ = _last_logged_udp _last_logged_udp = _last_udp if _ != _last_logged_udp: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, TRAIL.IP, trail, trails[trail][0], trails[trail][1]), packet) else: dns_data = ip_data[iph_length + 8:] # Reference: http://www.ccs.neu.edu/home/amislove/teaching/cs4700/fall09/handouts/project1-primer.pdf if len(dns_data) > 6: qdcount = struct.unpack("!H", dns_data[4:6])[0] if qdcount > 0: offset = 12 query = "" while len(dns_data) > offset: length = ord(dns_data[offset]) if not length: query = query[:-1] break query += dns_data[offset + 1:offset + length + 1] + '.' offset += length + 1 query = query.lower() if not query or '.' not in query or not all(_ in VALID_DNS_CHARS for _ in query) or any(_ in query for _ in (".intranet.",)) or any(query.endswith(_) for _ in IGNORE_DNS_QUERY_SUFFIXES): return parts = query.split('.') if ord(dns_data[2]) == 0x01: # standard query type_, class_ = struct.unpack("!HH", dns_data[offset + 1:offset + 5]) if len(parts) > 2: domain = '.'.join(parts[-2:]) if not _check_domain_whitelisted(domain): # e.g. <hash>.hashserver.cs.trendmicro.com if (sec - (_subdomains_sec or 0)) > DAILY_SECS: _subdomains.clear() _dns_exhausted_domains.clear() _subdomains_sec = sec subdomains = _subdomains.get(domain) if not subdomains: subdomains = _subdomains[domain] = set() if len(subdomains) < DNS_EXHAUSTION_THRESHOLD: subdomains.add('.'.join(parts[:-2])) else: if (sec - (_last_dns_exhaustion or 0)) > 60: trail = "(%s).%s" % ('.'.join(parts[:-2]), '.'.join(parts[-2:])) log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, TRAIL.DNS, trail, "potential dns exhaustion (suspicious)", "(heuristic)"), packet) _dns_exhausted_domains.add(domain) _last_dns_exhaustion = sec return # Reference: http://en.wikipedia.org/wiki/List_of_DNS_record_types if type_ not in (12, 28) and class_ == 1: # Type not in (PTR, AAAA), Class IN if dst_ip in trails: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, TRAIL.IP, "%s (%s)" % (dst_ip, query), trails[dst_ip][0], trails[dst_ip][1]), packet) elif src_ip in trails: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, TRAIL.IP, src_ip, trails[src_ip][0], trails[src_ip][1]), packet) _check_domain(query, sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, packet) elif config.USE_HEURISTICS: if ord(dns_data[2]) & 0x80: # standard response if ord(dns_data[3]) == 0x80: # recursion available, no error if (ord(dns_data[offset + 5]) & 0xc0) and (dns_data[offset + 15] == "\x00") and (dns_data[offset + 16] == "\x04"): # QNAME compression, IPv4 result address answer = socket.inet_ntoa(dns_data[offset + 17:offset + 21]) if answer in trails and "sinkhole" in trails[answer][0]: trail = "(%s).%s" % ('.'.join(parts[:-1]), '.'.join(parts[-1:])) log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, TRAIL.DNS, trail, "sinkholed by %s (malware)" % trails[answer][0].split(" ")[1], "(heuristic)"), packet) # (e.g. kitro.pl, devomchart.com, jebena.ananikolic.su, vuvet.cn) elif ord(dns_data[3]) == 0x83: # recursion available, no such name if '.'.join(parts[-2:]) not in _dns_exhausted_domains and not _check_domain_whitelisted(query) and not _check_domain_member(query, trails): if parts[-1].isdigit(): return if not (len(parts) > 4 and all(_.isdigit() and int(_) < 256 for _ in parts[:4])): # generic check for DNSBL IP lookups for _ in filter(None, (query, "*.%s" % '.'.join(parts[-2:]) if query.count('.') > 1 else None)): if _ not in NO_SUCH_NAME_COUNTERS or NO_SUCH_NAME_COUNTERS[_][0] != sec / 3600: NO_SUCH_NAME_COUNTERS[_] = [sec / 3600, 1, set()] else: NO_SUCH_NAME_COUNTERS[_][1] += 1 NO_SUCH_NAME_COUNTERS[_][2].add(query) if NO_SUCH_NAME_COUNTERS[_][1] > NO_SUCH_NAME_PER_HOUR_THRESHOLD: if _.startswith("*."): log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, TRAIL.DNS, "%s%s" % ("(%s)" % ','.join(item.replace(_[1:], "") for item in NO_SUCH_NAME_COUNTERS[_][2]), _[1:]), "excessive no such domain (suspicious)", "(heuristic)"), packet) for item in NO_SUCH_NAME_COUNTERS[_][2]: try: del NO_SUCH_NAME_COUNTERS[item] except KeyError: pass else: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, TRAIL.DNS, _, "excessive no such domain (suspicious)", "(heuristic)"), packet) try: del NO_SUCH_NAME_COUNTERS[_] except KeyError: pass break if len(parts) > 2: part = parts[0] if parts[0] != "www" else parts[1] trail = "(%s).%s" % ('.'.join(parts[:-2]), '.'.join(parts[-2:])) elif len(parts) == 2: part = parts[0] trail = "(%s).%s" % (parts[0], parts[1]) else: part = query trail = query if part and '-' not in part: result = _result_cache.get(part) if result is None: # Reference: https://github.com/exp0se/dga_detector probabilities = (float(part.count(c)) / len(part) for c in set(_ for _ in part)) entropy = -sum(p * math.log(p) / math.log(2.0) for p in probabilities) if entropy > SUSPICIOUS_DOMAIN_ENTROPY_THRESHOLD: result = "entropy threshold no such domain (suspicious)" if not result: if sum(_ in CONSONANTS for _ in part) > SUSPICIOUS_DOMAIN_CONSONANT_THRESHOLD: result = "consonant threshold no such domain (suspicious)" _result_cache[part] = result or False if result: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, TRAIL.DNS, trail, result, "(heuristic)"), packet) elif protocol in IPPROTO_LUT: # non-TCP/UDP (e.g. ICMP) if protocol == socket.IPPROTO_ICMP: if ord(ip_data[iph_length]) != 0x08: # Non-echo request return elif protocol == socket.IPPROTO_ICMPV6: if ord(ip_data[iph_length]) != 0x80: # Non-echo request return if dst_ip in trails: log_event((sec, usec, src_ip, '-', dst_ip, '-', IPPROTO_LUT[protocol], TRAIL.IP, dst_ip, trails[dst_ip][0], trails[dst_ip][1]), packet) elif src_ip in trails: log_event((sec, usec, src_ip, '-', dst_ip, '-', IPPROTO_LUT[protocol], TRAIL.IP, src_ip, trails[src_ip][0], trails[src_ip][1]), packet) except struct.error: pass except Exception: if config.SHOW_DEBUG: traceback.print_exc()