def scan(self, chunk): body = chunk.lower() result = MatchResult() active_phrases = [] for char in body: # First advance activated phrases for phrase in active_phrases: phrase.advance(char) if phrase.matched(): result.matched = True result.category = self.category phrase_string = '' for word in phrase.phrase.words: phrase_string += word + ", " result.criteria = phrase_string[:-2] return result elif not phrase.active(): # Remove from active list active_phrases.remove(phrase) # Now activate any inactive phrases that match this character if char in self.phrase_list: for phrase in self.phrase_list[char]: is_active = False for scan_phrase in active_phrases: if scan_phrase.phrase == phrase: is_active = True if not is_active: # Add to active phrases scan_phrase = ScanPhrase(phrase) scan_phrase.advance(char) active_phrases.append(scan_phrase) # No match return result
def scan(self, r): result = MatchResult() hosts = None if 'host' in r.enc_req_headers: hosts = r.enc_req_headers['host'] elif 'host' in r.enc_res_headers: hosts = r.enc_res_headers['host'] if hosts == None: # No host in headers return result for host in hosts: req_host_parts = host.split('.') req_host_parts.reverse() for banned_host in self.host_list: banned_host_parts = banned_host.split('.') banned_host_parts.reverse() if len(banned_host_parts) > len(req_host_parts): # req host is not a subdomain of banned host continue matched = True for i in range(0, len(banned_host_parts)): if banned_host_parts[i] != req_host_parts[i]: # No match matched = False if matched: result.matched = True result.category = self.category result.criteria = host return result # No match return result
def scan(self, r): result = MatchResult() url = r.enc_req[1] for banned_url in self.url_list: if banned_url in url: result.matched = True result.category = self.category result.criteria = banned_url return result # No match return result
def scan(self, request): result = MatchResult() url = request.enc_req[1].lower() for regex in self.regex_list: match = re.search(regex, url) if match != None: result.matched = True result.category = self.category result.criteria = regex return result # No match return result
def scan(self, request): result = MatchResult() url = request.enc_req[1].lower() path = urlparse.urlparse(url).path parts = os.path.splitext(path) ext = parts[len(parts) - 1] if ext in self.extension_list: result.matched = True result.category = self.category result.criteria = ext return result # No match return result
def scan(self, response): result = MatchResult() if not 'content-type' in response.enc_res_headers: # No mimetype in response return result mime_types = response.enc_res_headers['content-type'] for mime_type in mime_types: if mime_type.strip() in self.mimetype_list: result.matched = True result.category = self.category result.criteria = mime_type return result # No match return result
def scan(self, request): result = MatchResult() url = request.enc_req[1] vid_id = '' is_youtube = False if 'host' in request.enc_req_headers: for host in request.enc_req_headers['host']: if 'youtube' in host or 'ytimg' in host: is_youtube = True if not is_youtube: return result m = re.search('watch\?v=(.){11}', url) if m != None: vid_id = url[m.start(0):m.end(0)][-11:] m = re.search('v/(.){11}', url) if m != None: vid_id = url[m.start(0):m.end(0)][-11:] m = re.search('vi/(.){11}', url) if m != None: vid_id = url[m.start(0):m.end(0)][-11:] if vid_id == '': # No youtube url present return result # Check cache if vid_id in self.clean_cache: self.clean_cache[vid_id] += 1 return result # Get info on youtube video data = urllib2.urlopen('http://gdata.youtube.com/feeds/api/videos/' + vid_id + '?v=2&alt=json').read().lower() for regex in self.regex_list: match = re.search(regex, data) if match != None: result.matched = True result.category = self.category result.criteria = regex return result # Clean video; add to cache if len(self.clean_cache) == MAX_CACHE_SIZE: min_cache_key = min(self.clean_cache, key=self.clean_cache.get) del self.clean_cache[min_cache_key] if not vid_id in self.clean_cache: self.clean_cache[vid_id] = 1 # No match return result
def scan(self, request): result = MatchResult() url = request.enc_req[1] ext_start = url.find('.') if ext_start == -1: # No extension return result extension = url[ext_start:] while ext_start != -1: extension = extension[ext_start+1:] ext_start = extension[1:].find('.') if extension in self.extension_list: result.matched = True result.category = self.category result.criteria = extension return result # No match return result
def scan(self, request): result = MatchResult() url = request.enc_req[1] vid_id = '' is_youtube=False if 'host' in request.enc_req_headers: for host in request.enc_req_headers['host']: if 'youtube' in host or 'ytimg' in host: is_youtube=True if not is_youtube: return result m = re.search('watch\?v=(.){11}', url) if m != None: vid_id = url[m.start(0):m.end(0)][-11:] m = re.search('v/(.){11}', url) if m != None: vid_id = url[m.start(0):m.end(0)][-11:] m = re.search('vi/(.){11}', url) if m != None: vid_id = url[m.start(0):m.end(0)][-11:] if vid_id == '': # No youtube url present return result # Check cache if vid_id in self.clean_cache: self.clean_cache[vid_id] += 1 return result # Get info on youtube video data = urllib2.urlopen('http://gdata.youtube.com/feeds/api/videos/'+vid_id+'?v=2&alt=json').read().lower() for regex in self.regex_list: match = re.search(regex, data) if match != None: result.matched = True result.category = self.category result.criteria = regex return result # Clean video; add to cache if len(self.clean_cache) == MAX_CACHE_SIZE: min_cache_key = min(self.clean_cache, key=self.clean_cache.get) del self.clean_cache[min_cache_key] if not vid_id in self.clean_cache: self.clean_cache[vid_id] = 1 # No match return result
def scan(self, r): result = MatchResult() hosts = None if 'host' in r.enc_req_headers: hosts = r.enc_req_headers['host'] elif 'host' in r.enc_res_headers: hosts = r.enc_res_headers['host'] if hosts == None: # No host in headers return result for host in hosts: for allowed_host in self.host_list: if allowed_host.strip() in host.strip(): result.matched = True result.category = self.category result.criteria = host return result # No match return result
def scan(self, chunk): body = chunk.lower() result = MatchResult() phraseCounts = {} for word in self.wordMap.sorted: if re.search(word, body): for phrase in self.wordMap.words[word]: if not phrase in phraseCounts: phraseCounts[phrase] = 1 else: phraseCounts[phrase] += 1 if phraseCounts[phrase] == len(phrase.words): result.matched = True result.category = self.category phrase_string = '' for word in phrase.words: phrase_string += word + ', ' result.criteria = phrase_string[:-2] # No match return result
def scan(self, request): result = MatchResult() url = request.enc_req[1] headers = None if len(request.enc_res_headers) > 0: # This is a respmod headers = request.enc_res_headers else: # This is a reqmod headers = request.enc_req_headers for regex in self.regex_list: for h in headers: for v in headers: header_line = h + ': ' + v match = re.search(regex, url) if match != None: result.matched = True result.category = self.category result.criteria = regex return result # No match return result