Пример #1
0
 def scan(self, chunk):
     body = chunk.lower()
     result = MatchResult()
     active_phrases = []
     for char in body:
         # First advance activated phrases
         for phrase in active_phrases:
             phrase.advance(char)
             if phrase.matched():
                 result.matched = True
                 result.category = self.category
                 phrase_string = ''
                 for word in phrase.phrase.words:
                     phrase_string += word + ", "
                 result.criteria = phrase_string[:-2]
                 return result
             elif not phrase.active():
                 # Remove from active list
                 active_phrases.remove(phrase)
         # Now activate any inactive phrases that match this character
         if char in self.phrase_list:
             for phrase in self.phrase_list[char]:
                 is_active = False
                 for scan_phrase in active_phrases:
                     if scan_phrase.phrase == phrase:
                         is_active = True
                 if not is_active:
                     # Add to active phrases
                     scan_phrase = ScanPhrase(phrase)
                     scan_phrase.advance(char)
                     active_phrases.append(scan_phrase)
     # No match
     return result
Пример #2
0
 def scan(self, chunk):
     body = chunk.lower()
     result = MatchResult()
     active_phrases = []
     for char in body:
         # First advance activated phrases
         for phrase in active_phrases:
             phrase.advance(char)
             if phrase.matched():
                 result.matched = True
                 result.category = self.category
                 phrase_string = ''
                 for word in phrase.phrase.words:
                     phrase_string += word + ", "
                 result.criteria = phrase_string[:-2]
                 return result
             elif not phrase.active():
                 # Remove from active list
                 active_phrases.remove(phrase)
         # Now activate any inactive phrases that match this character
         if char in self.phrase_list:
             for phrase in self.phrase_list[char]:
                 is_active = False
                 for scan_phrase in active_phrases:
                     if scan_phrase.phrase == phrase:
                         is_active = True
                 if not is_active:
                     # Add to active phrases
                     scan_phrase = ScanPhrase(phrase)
                     scan_phrase.advance(char)
                     active_phrases.append(scan_phrase)
     # No match
     return result
Пример #3
0
 def scan(self, r):
     result = MatchResult()
     hosts = None
     if 'host' in r.enc_req_headers:
         hosts = r.enc_req_headers['host']
     elif 'host' in r.enc_res_headers:
         hosts = r.enc_res_headers['host']
     if hosts == None:
         # No host in headers
         return result
     for host in hosts:
         req_host_parts = host.split('.')
         req_host_parts.reverse()
         for banned_host in self.host_list:
             banned_host_parts = banned_host.split('.')
             banned_host_parts.reverse()
             if len(banned_host_parts) > len(req_host_parts):
                 # req host is not a subdomain of banned host
                 continue
             matched = True
             for i in range(0, len(banned_host_parts)):
                 if banned_host_parts[i] != req_host_parts[i]:
                     # No match
                     matched = False
             if matched:
                 result.matched = True
                 result.category = self.category
                 result.criteria = host
                 return result
     # No match
     return result
Пример #4
0
 def scan(self, r):
     result = MatchResult()
     hosts = None
     if 'host' in r.enc_req_headers:
         hosts = r.enc_req_headers['host']
     elif 'host' in r.enc_res_headers:
         hosts = r.enc_res_headers['host']
     if hosts == None:
         # No host in headers
         return result
     for host in hosts:
         req_host_parts = host.split('.')
         req_host_parts.reverse()
         for banned_host in self.host_list:
             banned_host_parts = banned_host.split('.')
             banned_host_parts.reverse()
             if len(banned_host_parts) > len(req_host_parts):
                 # req host is not a subdomain of banned host
                 continue
             matched = True
             for i in range(0, len(banned_host_parts)):
                 if banned_host_parts[i] != req_host_parts[i]:
                     # No match
                     matched = False
             if matched:
                 result.matched = True
                 result.category = self.category
                 result.criteria = host
                 return result
     # No match
     return result
Пример #5
0
 def scan(self, r):
     result = MatchResult()
     url = r.enc_req[1]
     for banned_url in self.url_list:
         if banned_url in url:
             result.matched = True
             result.category = self.category
             result.criteria = banned_url
             return result
     # No match
     return result
Пример #6
0
 def scan(self, r):
     result = MatchResult()
     url = r.enc_req[1]
     for banned_url in self.url_list:
         if banned_url in url:
             result.matched = True
             result.category = self.category
             result.criteria = banned_url
             return result
     # No match
     return result
Пример #7
0
 def scan(self, request):
     result = MatchResult()
     url = request.enc_req[1].lower()
     for regex in self.regex_list:
         match = re.search(regex, url)
         if match != None:
             result.matched = True
             result.category = self.category
             result.criteria = regex
             return result
     # No match
     return result
Пример #8
0
 def scan(self, request):
     result = MatchResult()
     url = request.enc_req[1].lower()
     path = urlparse.urlparse(url).path
     parts = os.path.splitext(path)
     ext = parts[len(parts) - 1]
     if ext in self.extension_list:
         result.matched = True
         result.category = self.category
         result.criteria = ext
         return result
     # No match
     return result
Пример #9
0
 def scan(self, request):
     result = MatchResult()
     url = request.enc_req[1].lower()
     path = urlparse.urlparse(url).path
     parts = os.path.splitext(path)
     ext = parts[len(parts) - 1]
     if ext in self.extension_list:
         result.matched = True
         result.category = self.category
         result.criteria = ext
         return result
     # No match
     return result
Пример #10
0
 def scan(self, response):
     result = MatchResult()
     if not 'content-type' in response.enc_res_headers:
         # No mimetype in response
         return result
     mime_types = response.enc_res_headers['content-type']
     for mime_type in mime_types:
         if mime_type.strip() in self.mimetype_list:
             result.matched = True
             result.category = self.category
             result.criteria = mime_type
             return result
     # No match
     return result
Пример #11
0
 def scan(self, request):
     result = MatchResult()
     url = request.enc_req[1]
     vid_id = ''
     is_youtube = False
     if 'host' in request.enc_req_headers:
         for host in request.enc_req_headers['host']:
             if 'youtube' in host or 'ytimg' in host:
                 is_youtube = True
     if not is_youtube:
         return result
     m = re.search('watch\?v=(.){11}', url)
     if m != None:
         vid_id = url[m.start(0):m.end(0)][-11:]
     m = re.search('v/(.){11}', url)
     if m != None:
         vid_id = url[m.start(0):m.end(0)][-11:]
     m = re.search('vi/(.){11}', url)
     if m != None:
         vid_id = url[m.start(0):m.end(0)][-11:]
     if vid_id == '':
         # No youtube url present
         return result
     # Check cache
     if vid_id in self.clean_cache:
         self.clean_cache[vid_id] += 1
         return result
     # Get info on youtube video
     data = urllib2.urlopen('http://gdata.youtube.com/feeds/api/videos/' +
                            vid_id + '?v=2&alt=json').read().lower()
     for regex in self.regex_list:
         match = re.search(regex, data)
         if match != None:
             result.matched = True
             result.category = self.category
             result.criteria = regex
             return result
     # Clean video; add to cache
     if len(self.clean_cache) == MAX_CACHE_SIZE:
         min_cache_key = min(self.clean_cache, key=self.clean_cache.get)
         del self.clean_cache[min_cache_key]
     if not vid_id in self.clean_cache:
         self.clean_cache[vid_id] = 1
     # No match
     return result
Пример #12
0
 def scan(self, request):
     result = MatchResult()
     url = request.enc_req[1]
     ext_start = url.find('.')
     if ext_start == -1:
         # No extension
         return result
     extension = url[ext_start:]
     while ext_start != -1:
         extension = extension[ext_start+1:]
         ext_start = extension[1:].find('.')
     if extension in self.extension_list:
         result.matched = True
         result.category = self.category
         result.criteria = extension
         return result
     # No match
     return result
Пример #13
0
 def scan(self, request):
     result = MatchResult()
     url = request.enc_req[1]
     vid_id = ''
     is_youtube=False
     if 'host' in request.enc_req_headers:
         for host in request.enc_req_headers['host']:
             if 'youtube' in host or 'ytimg' in host:
                 is_youtube=True
     if not is_youtube:
         return result
     m = re.search('watch\?v=(.){11}', url) 
     if m != None:
         vid_id = url[m.start(0):m.end(0)][-11:]
     m = re.search('v/(.){11}', url)
     if m != None:
         vid_id = url[m.start(0):m.end(0)][-11:]
     m = re.search('vi/(.){11}', url)
     if m != None:
         vid_id = url[m.start(0):m.end(0)][-11:]
     if vid_id == '':
         # No youtube url present
         return result
     # Check cache
     if vid_id in self.clean_cache:
         self.clean_cache[vid_id] += 1
         return result
     # Get info on youtube video
     data = urllib2.urlopen('http://gdata.youtube.com/feeds/api/videos/'+vid_id+'?v=2&alt=json').read().lower()
     for regex in self.regex_list:
         match = re.search(regex, data)
         if match != None:
             result.matched = True
             result.category = self.category
             result.criteria = regex
             return result
     # Clean video; add to cache
     if len(self.clean_cache) == MAX_CACHE_SIZE:
         min_cache_key = min(self.clean_cache, key=self.clean_cache.get)
         del self.clean_cache[min_cache_key]
     if not vid_id in self.clean_cache:
         self.clean_cache[vid_id] = 1
     # No match
     return result
Пример #14
0
 def scan(self, r):
     result = MatchResult()
     hosts = None
     if 'host' in r.enc_req_headers:
         hosts = r.enc_req_headers['host']
     elif 'host' in r.enc_res_headers:
         hosts = r.enc_res_headers['host']
     if hosts == None:
         # No host in headers
         return result
     for host in hosts:
         for allowed_host in self.host_list:
             if allowed_host.strip() in host.strip():
                 result.matched = True
                 result.category = self.category
                 result.criteria = host
                 return result
     # No match
     return result
Пример #15
0
 def scan(self, chunk):
     body = chunk.lower()
     result = MatchResult()
     phraseCounts = {}
     for word in self.wordMap.sorted:
         if re.search(word, body):
             for phrase in self.wordMap.words[word]:
                 if not phrase in phraseCounts:
                     phraseCounts[phrase] = 1
                 else:
                     phraseCounts[phrase] += 1
                 if phraseCounts[phrase] == len(phrase.words):
                     result.matched = True
                     result.category = self.category
                     phrase_string = ''
                     for word in phrase.words:
                         phrase_string += word + ', '
                     result.criteria = phrase_string[:-2]
     # No match
     return result
Пример #16
0
 def scan(self, chunk):
     body = chunk.lower()
     result = MatchResult()
     phraseCounts = {}
     for word in self.wordMap.sorted:
         if re.search(word, body):
             for phrase in self.wordMap.words[word]:
                 if not phrase in phraseCounts:
                     phraseCounts[phrase] = 1
                 else:
                     phraseCounts[phrase] += 1
                 if phraseCounts[phrase] == len(phrase.words):
                     result.matched = True
                     result.category = self.category
                     phrase_string = ''
                     for word in phrase.words:
                         phrase_string += word + ', '
                     result.criteria = phrase_string[:-2]
     # No match
     return result
Пример #17
0
 def scan(self, request):
     result = MatchResult()
     url = request.enc_req[1]
     headers = None
     if len(request.enc_res_headers) > 0:
         # This is a respmod
         headers = request.enc_res_headers
     else:
         # This is a reqmod
         headers = request.enc_req_headers
     for regex in self.regex_list:
         for h in headers:
             for v in headers:
                 header_line = h + ': ' + v
                 match = re.search(regex, url)
                 if match != None:
                     result.matched = True
                     result.category = self.category
                     result.criteria = regex
                     return result
     # No match
     return result
Пример #18
0
 def scan(self, request):
     result = MatchResult()
     url = request.enc_req[1]
     headers = None
     if len(request.enc_res_headers) > 0:
         # This is a respmod
         headers = request.enc_res_headers
     else:
         # This is a reqmod
         headers = request.enc_req_headers
     for regex in self.regex_list:
         for h in headers:
             for v in headers:
                 header_line = h + ': ' + v
                 match = re.search(regex, url)
                 if match != None:
                     result.matched = True
                     result.category = self.category
                     result.criteria = regex
                     return result
     # No match
     return result