def test_table_name_with_prefix(self): _unittest = 'unittest' disk_set = DiskSet(_unittest) self.assertIn(_unittest, disk_set.table_name) db = get_default_temp_db_instance() self.assertTrue(db.table_exists(disk_set.table_name)) disk_set.cleanup() self.assertFalse(db.table_exists(disk_set.table_name))
def test_remove_table(self): disk_set = DiskSet() disk_set.add(1) disk_set.add(2) table_name = disk_set.table_name db = get_default_temp_db_instance() self.assertTrue(db.table_exists(table_name)) disk_set.cleanup() self.assertFalse(db.table_exists(table_name))
class DBKnowledgeBase(BasicKnowledgeBase): """ This class saves the data that is sent to it by plugins. It is the only way in which plugins can exchange information. Data is stored in a DB. :author: Andres Riancho ([email protected]) """ COLUMNS = [('location_a', 'TEXT'), ('location_b', 'TEXT'), ('uniq_id', 'TEXT'), ('pickle', 'BLOB')] def __init__(self): super(DBKnowledgeBase, self).__init__() self.initialized = False # TODO: Why doesn't this work with a WeakValueDictionary? self.observers = {} #WeakValueDictionary() self._observer_id = 0 def setup(self): """ Setup all the required backend stores. This was mostly created to avoid starting any threads during __init__() which is called during python's import phase and dead-locks in some cases. :return: None """ with self._kb_lock: if self.initialized: return self.urls = DiskSet(table_prefix='kb_urls') self.fuzzable_requests = DiskSet(table_prefix='kb_fuzzable_requests') self.db = get_default_persistent_db_instance() self.table_name = 'knowledge_base_' + rand_alpha(30) self.db.create_table(self.table_name, self.COLUMNS) self.db.create_index(self.table_name, ['location_a', 'location_b']) self.db.create_index(self.table_name, ['uniq_id']) self.db.commit() # Only initialize once self.initialized = True @requires_setup def clear(self, location_a, location_b): location_a = self._get_real_name(location_a) query = "DELETE FROM %s WHERE location_a = ? and location_b = ?" params = (location_a, location_b) self.db.execute(query % self.table_name, params) @requires_setup def raw_write(self, location_a, location_b, value): """ This method saves value to (location_a,location_b) but previously clears any pre-existing values. """ if isinstance(value, Info): raise TypeError('Use append or append_uniq to store vulnerabilities') location_a = self._get_real_name(location_a) self.clear(location_a, location_b) self.append(location_a, location_b, value, ignore_type=True) @requires_setup def raw_read(self, location_a, location_b): """ This method reads the value from (location_a, location_b) """ location_a = self._get_real_name(location_a) result = self.get(location_a, location_b, check_types=False) if len(result) > 1: msg = 'Incorrect use of raw_write/raw_read, found %s results.' raise RuntimeError(msg % len(result)) elif len(result) == 0: return [] else: return result[0] @requires_setup def get_one(self, location_a, location_b): """ This method reads the value from (location_a, location_b), checking it's type and making sure only one is stored at that address. Similar to raw_read, but checking types. :see: https://github.com/andresriancho/w3af/issues/3955 """ location_a = self._get_real_name(location_a) result = self.get(location_a, location_b, check_types=True) if len(result) > 1: msg = 'Incorrect use of get_one(), found %s results.' raise RuntimeError(msg % result) elif len(result) == 0: return [] else: return result[0] def _get_uniq_id(self, obj): if isinstance(obj, (Info, InfoSet)): return obj.get_uniq_id() else: if isinstance(obj, collections.Iterable): concat_all = ''.join([str(hash(i)) for i in obj]) return str(hash(concat_all)) else: return str(hash(obj)) @requires_setup def append(self, location_a, location_b, value, ignore_type=False): """ This method appends the location_b value to a dict. """ if not ignore_type and not isinstance(value, (Info, Shell, InfoSet)): msg = ('You MUST use raw_write/raw_read to store non-info objects' ' to the KnowledgeBase.') raise TypeError(msg) location_a = self._get_real_name(location_a) uniq_id = self._get_uniq_id(value) pickled_obj = cpickle_dumps(value) t = (location_a, location_b, uniq_id, pickled_obj) query = "INSERT INTO %s VALUES (?, ?, ?, ?)" % self.table_name self.db.execute(query, t) self._notify_observers(self.APPEND, location_a, location_b, value, ignore_type=ignore_type) @requires_setup def get(self, location_a, location_b, check_types=True): """ :param location_a: The plugin that saved the data to the kb.info Typically the name of the plugin, but could also be the plugin instance. :param location_b: The name of the variables under which the vuln objects were saved. Typically the same name of the plugin, or something like "vulns", "errors", etc. In most cases this is NOT None. When set to None, a dict with all the vuln objects found by the plugin_name is returned. :return: Returns the data that was saved by another plugin. """ location_a = self._get_real_name(location_a) if location_b is None: query = 'SELECT pickle FROM %s WHERE location_a = ?' params = (location_a,) else: query = 'SELECT pickle FROM %s WHERE location_a = ?'\ ' and location_b = ?' params = (location_a, location_b) result_lst = [] results = self.db.select(query % self.table_name, params) for r in results: obj = cPickle.loads(r[0]) if check_types and not isinstance(obj, (Info, InfoSet, Shell)): raise TypeError('Use raw_write and raw_read to query the' ' knowledge base for non-Info objects') result_lst.append(obj) return result_lst @requires_setup def get_by_uniq_id(self, uniq_id): query = 'SELECT pickle FROM %s WHERE uniq_id = ?' params = (uniq_id,) result = self.db.select_one(query % self.table_name, params) if result is not None: result = cPickle.loads(result[0]) return result @requires_setup def update(self, old_info, update_info): """ :param old_info: The info/vuln instance to be updated in the kb. :param update_info: The info/vuln instance with new information :return: Nothing """ old_not_info = not isinstance(old_info, (Info, InfoSet, Shell)) update_not_info = not isinstance(update_info, (Info, InfoSet, Shell)) if old_not_info or update_not_info: msg = ('You MUST use raw_write/raw_read to store non-info objects' ' to the KnowledgeBase.') raise TypeError(msg) old_uniq_id = old_info.get_uniq_id() new_uniq_id = update_info.get_uniq_id() pickle = cpickle_dumps(update_info) # Update the pickle and unique_id after finding by original uniq_id query = "UPDATE %s SET pickle = ?, uniq_id = ? WHERE uniq_id = ?" params = (pickle, new_uniq_id, old_uniq_id) result = self.db.execute(query % self.table_name, params).result() if result.rowcount: self._notify_observers(self.UPDATE, old_info, update_info) else: ex = ('Failed to update() %s instance because' ' the original unique_id (%s) does not exist in the DB,' ' or the new unique_id (%s) is invalid.') raise DBException(ex % (old_info.__class__.__name__, old_uniq_id, new_uniq_id)) def add_observer(self, observer): """ Add the observer instance to the list. """ observer_id = self.get_observer_id() self.observers[observer_id] = observer def get_observer_id(self): self._observer_id += 1 return self._observer_id def _notify_observers(self, method, *args, **kwargs): """ Call the observer if the location_a/location_b matches with the configured observers. :return: None """ # Note that I copy the items list in order to iterate though it without # any issues like the size changing for _, observer in self.observers.items()[:]: functor = getattr(observer, method) functor(*args, **kwargs) @requires_setup def get_all_entries_of_class(self, klass): """ :return: A list of all objects of class == klass that are saved in the kb. """ query = 'SELECT pickle FROM %s' results = self.db.select(query % self.table_name) result_lst = [] for r in results: obj = cPickle.loads(r[0]) if isinstance(obj, klass): result_lst.append(obj) return result_lst @requires_setup def get_all_vulns(self): """ :return: A list of all info instances with severity in (LOW, MEDIUM, HIGH) """ query = 'SELECT pickle FROM %s' results = self.db.select(query % self.table_name) result_lst = [] for r in results: obj = cPickle.loads(r[0]) if hasattr(obj, 'get_severity'): severity = obj.get_severity() if severity in (LOW, MEDIUM, HIGH): result_lst.append(obj) return result_lst @requires_setup def get_all_infos(self): """ :return: A list of all info instances with severity eq INFORMATION """ query = 'SELECT pickle FROM %s' results = self.db.select(query % self.table_name) result_lst = [] for r in results: obj = cPickle.loads(r[0]) if hasattr(obj, 'get_severity'): severity = obj.get_severity() if severity in (INFORMATION,): result_lst.append(obj) return result_lst @requires_setup def dump(self): result_dict = {} query = 'SELECT location_a, location_b, pickle FROM %s' results = self.db.select(query % self.table_name) for location_a, location_b, pickle in results: obj = cPickle.loads(pickle) if location_a not in result_dict: result_dict[location_a] = {location_b: [obj,]} elif location_b not in result_dict[location_a]: result_dict[location_a][location_b] = [obj,] else: result_dict[location_a][location_b].append(obj) return result_dict @requires_setup def cleanup(self): """ Cleanup internal data. """ self.db.execute("DELETE FROM %s WHERE 1=1" % self.table_name) # Remove the old, create new. old_urls = self.urls self.urls = DiskSet(table_prefix='kb_urls') old_urls.cleanup() old_fuzzable_requests = self.fuzzable_requests self.fuzzable_requests = DiskSet(table_prefix='kb_fuzzable_requests') old_fuzzable_requests.cleanup() self.observers.clear() @requires_setup def remove(self): self.db.drop_table(self.table_name) self.urls.cleanup() self.fuzzable_requests.cleanup() self.observers.clear() @requires_setup def get_all_known_urls(self): """ :return: A DiskSet with all the known URLs as URL objects. """ return self.urls @requires_setup def add_url(self, url): """ :return: True if the URL was previously unknown """ if not isinstance(url, URL): msg = 'add_url requires a URL as parameter got %s instead.' raise TypeError(msg % type(url)) self._notify_observers(self.ADD_URL, url) return self.urls.add(url) @requires_setup def get_all_known_fuzzable_requests(self): """ :return: A DiskSet with all the known URLs as URL objects. """ return self.fuzzable_requests @requires_setup def add_fuzzable_request(self, fuzzable_request): """ :return: True if the FuzzableRequest was previously unknown """ if not isinstance(fuzzable_request, FuzzableRequest): msg = ('add_fuzzable_request requires a FuzzableRequest as' ' parameter, got "%s" instead.') raise TypeError(msg % type(fuzzable_request)) self.add_url(fuzzable_request.get_url()) return self.fuzzable_requests.add(fuzzable_request)
class find_captchas(CrawlPlugin): """ Identify captcha images on web pages. :author: Andres Riancho ([email protected]) """ def __init__(self): CrawlPlugin.__init__(self) self._captchas_found = DiskSet(table_prefix='find_captchas') def crawl(self, fuzzable_request): """ Find CAPTCHA images. :param fuzzable_request: A fuzzable_request instance that contains (among other things) the URL to test. """ result, captchas = self._identify_captchas(fuzzable_request) if not result: return for captcha in captchas: desc = 'Found a CAPTCHA image at: "%s".' % captcha.img_src response_ids = [response.id for response in captcha.http_responses] i = Info('Captcha image detected', desc, response_ids, self.get_name()) i.set_uri(captcha.img_src) kb.kb.append(self, 'CAPTCHA', i) om.out.information(i.get_desc()) def _identify_captchas(self, fuzzable_request): """ :return: A tuple with the following information: * True indicating that the page has CAPTCHAs * A list with tuples that contain: * The CAPTCHA image source * The http responses used to verify that the image was indeed a CAPTCHA """ found_captcha = False captchas = [] # GET the document, and fetch the images images_1 = self._get_images(fuzzable_request) # Re-GET the document, and fetch the images images_2 = self._get_images(fuzzable_request) # If the number of images in each response is different, don't even # bother to perform any analysis since our simplistic approach will fail # TODO: Add something more advanced. if len(images_1) == len(images_2): not_in_2 = [] for img_src_1, img_hash_1, http_response_1 in images_1: for _, img_hash_2, http_response_2 in images_2: if img_hash_1 == img_hash_2: # The image is in both lists, can't be a CAPTCHA break else: not_in_2.append((img_src_1, img_hash_1, [http_response_1, http_response_2])) # Results # # TODO: This allows for more than one CAPTCHA in the same page. Does # that make sense? When that's found, should I simply declare # defeat and don't report anything? for img_src, _, http_responses in not_in_2: CaptchaInfo = namedtuple('CaptchaInfo', ['img_src', 'http_responses']) img_src = img_src.uri2url() if img_src not in self._captchas_found: self._captchas_found.add(img_src) found_captcha = True captchas.append(CaptchaInfo(img_src, http_responses)) return found_captcha, captchas def _get_images(self, fuzzable_request): """ Get all img tags and retrieve the src. :param fuzzable_request: The request to modify :return: A list with tuples containing (img_src, image_hash, http_response) """ res = [] try: response = self._uri_opener.GET(fuzzable_request.get_uri(), cache=False) except: om.out.debug('Failed to retrieve the page for finding captchas.') else: # Do not use parser_cache here, it's not good since CAPTCHA implementations # *might* change the image name for each request of the HTML #dp = parser_cache.dpc.get_document_parser_for( response ) try: document_parser = DocumentParser.DocumentParser(response) except BaseFrameworkException: return [] image_path_list = document_parser.get_references_of_tag('img') GET = self._uri_opener.GET sha1 = hashlib.sha1 result_iter = self.worker_pool.imap_unordered(GET, image_path_list) for image_response in result_iter: if image_response.is_image(): img_src = image_response.get_uri() img_hash = sha1(image_response.get_body()).hexdigest() res.append((img_src, img_hash, response)) return res def end(self): self._captchas_found.cleanup() def get_long_desc(self): """ :return: A DETAILED description of the plugin functions and features. """ return """
class error_500(GrepPlugin): """ Grep every page for error 500 pages that haven't been identified as bugs by other plugins. :author: Andres Riancho ([email protected]) """ IGNORE_CODES = (404, 403, 401, 405, 400, 501) FALSE_POSITIVE_STRINGS = ('<h1>Bad Request (Invalid URL)</h1>', ) def __init__(self): GrepPlugin.__init__(self) self._error_500_responses = DiskSet(table_prefix='error_500') def grep(self, request, response): """ Plugin entry point, identify which requests generated a 500 error. :param request: The HTTP request object. :param response: The HTTP response object :return: None """ if response.is_text_or_html() \ and 400 < response.get_code() < 600 \ and response.get_code() not in self.IGNORE_CODES\ and not self._is_false_positive(response): self._error_500_responses.add((request, response.id)) def _is_false_positive(self, response): """ Filters out some false positives like this one: This false positive is generated by IIS when I send an URL that's "odd" Some examples of URLs that trigger this false positive: - http://127.0.0.2/ext.ini.%00.txt - http://127.0.0.2/%00/ - http://127.0.0.2/%0a%0a<script>alert(\Vulnerable\)</script>.jsp :return: True if the response is a false positive. """ for fps in self.FALSE_POSITIVE_STRINGS: if fps in response.get_body(): return True return False def end(self): """ This method is called when the plugin wont be used anymore. The real job of this plugin is done here, where I will try to see if one of the error_500 responses were not identified as a vuln by some of my audit plugins """ all_vuln_ids = set() for info in kb.kb.get_all_findings(): for _id in info.get_id(): all_vuln_ids.add(_id) for request, error_500_response_id in self._error_500_responses: if error_500_response_id not in all_vuln_ids: # Found a error 500 that wasn't identified ! desc = 'An unidentified web application error (HTTP response'\ ' code 500) was found at: "%s". Enable all plugins and'\ ' try again, if the vulnerability still is not'\ ' identified, please verify manually and report it to'\ ' the w3af developers.' desc = desc % request.get_url() v = Vuln('Unhandled error in web application', desc, severity.MEDIUM, error_500_response_id, self.get_name()) v.set_uri(request.get_uri()) self.kb_append_uniq(self, 'error_500', v, 'VAR') self._error_500_responses.cleanup() def get_long_desc(self): """ :return: A DETAILED description of the plugin functions and features. """ return """
class DBKnowledgeBase(BasicKnowledgeBase): """ This class saves the data that is sent to it by plugins. It is the only way in which plugins can exchange information. Data is stored in a DB. :author: Andres Riancho ([email protected]) """ def __init__(self): super(DBKnowledgeBase, self).__init__() self.urls = DiskSet(table_prefix='kb_urls') self.fuzzable_requests = DiskSet(table_prefix='kb_fuzzable_requests') self.db = get_default_persistent_db_instance() columns = [('location_a', 'TEXT'), ('location_b', 'TEXT'), ('uniq_id', 'TEXT'), ('pickle', 'BLOB')] self.table_name = 'knowledge_base_' + rand_alpha(30) self.db.create_table(self.table_name, columns) self.db.create_index(self.table_name, ['location_a', 'location_b']) self.db.create_index(self.table_name, ['uniq_id',]) self.db.commit() # TODO: Why doesn't this work with a WeakValueDictionary? self.observers = {} #WeakValueDictionary() self.type_observers = {} #WeakValueDictionary() self.url_observers = [] self._observer_id = 0 def clear(self, location_a, location_b): location_a = self._get_real_name(location_a) query = "DELETE FROM %s WHERE location_a = ? and location_b = ?" params = (location_a, location_b) self.db.execute(query % self.table_name, params) def raw_write(self, location_a, location_b, value): """ This method saves value to (location_a,location_b) but previously clears any pre-existing values. """ if isinstance(value, Info): raise TypeError('Use append or append_uniq to store vulnerabilities') location_a = self._get_real_name(location_a) self.clear(location_a, location_b) self.append(location_a, location_b, value, ignore_type=True) def raw_read(self, location_a, location_b): """ This method reads the value from (location_a,location_b) """ location_a = self._get_real_name(location_a) result = self.get(location_a, location_b, check_types=False) if len(result) > 1: msg = 'Incorrect use of raw_write/raw_read, found %s rows.' raise RuntimeError(msg % result) elif len(result) == 0: return [] else: return result[0] def _get_uniq_id(self, obj): if isinstance(obj, Info): return obj.get_uniq_id() else: if isinstance(obj, collections.Iterable): concat_all = ''.join([str(i) for i in obj]) return str(hash(concat_all)) else: return str(hash(obj)) def append(self, location_a, location_b, value, ignore_type=False): """ This method appends the location_b value to a dict. """ if not ignore_type and not isinstance(value, (Info, Shell)): msg = 'You MUST use raw_write/raw_read to store non-info objects'\ ' to the KnowledgeBase.' raise TypeError(msg) location_a = self._get_real_name(location_a) uniq_id = self._get_uniq_id(value) pickled_obj = cpickle_dumps(value) t = (location_a, location_b, uniq_id, pickled_obj) query = "INSERT INTO %s VALUES (?, ?, ?, ?)" % self.table_name self.db.execute(query, t) self._notify(location_a, location_b, value) def get(self, location_a, location_b, check_types=True): """ :param location_a: The plugin that saved the data to the kb.info Typically the name of the plugin, but could also be the plugin instance. :param location_b: The name of the variables under which the vuln objects were saved. Typically the same name of the plugin, or something like "vulns", "errors", etc. In most cases this is NOT None. When set to None, a dict with all the vuln objects found by the plugin_name is returned. :return: Returns the data that was saved by another plugin. """ location_a = self._get_real_name(location_a) if location_b is None: query = 'SELECT pickle FROM %s WHERE location_a = ?' params = (location_a,) else: query = 'SELECT pickle FROM %s WHERE location_a = ?'\ ' and location_b = ?' params = (location_a, location_b) result_lst = [] results = self.db.select(query % self.table_name, params) for r in results: obj = cPickle.loads(r[0]) if check_types and not isinstance(obj, (Info, Shell)): raise TypeError('Use raw_write and raw_read to query the' ' knowledge base for non-Info objects') result_lst.append(obj) return result_lst def get_by_uniq_id(self, uniq_id): query = 'SELECT pickle FROM %s WHERE uniq_id = ?' params = (uniq_id,) result = self.db.select_one(query % self.table_name, params) if result is not None: result = cPickle.loads(result[0]) return result def add_observer(self, location_a, location_b, observer): """ Add the observer function to the observer list. The function will be called when there is a change in (location_a, location_b). You can use None in location_a or location_b as wildcards. The observer function needs to be a function which takes three params: * location_a * location_b * value that's added to the kb location :return: None """ if not isinstance(location_a, (basestring, types.NoneType)) or \ not isinstance(location_a, (basestring, types.NoneType)): raise TypeError('Observer locations need to be strings or None.') observer_id = self.get_observer_id() self.observers[(location_a, location_b, observer_id)] = observer def add_types_observer(self, type_filter, observer): """ Add the observer function to the list of functions to be called when a new object that is of type "type_filter" is added to the KB. The type_filter must be one of Info, Vuln or Shell. :return: None """ if type_filter not in (Info, Vuln, Shell): msg = 'The type_filter needs to be one of Info, Vuln or Shell' raise TypeError(msg) observer_id = self.get_observer_id() self.type_observers[(type_filter, observer_id)] = observer def get_observer_id(self): self._observer_id += 1 return self._observer_id def _notify(self, location_a, location_b, value): """ Call the observer if the location_a/location_b matches with the configured observers. :return: None """ # Note that I copy the items list in order to iterate though it without # any issues like the size changing for (obs_loc_a, obs_loc_b, _), observer in self.observers.items()[:]: if obs_loc_a is None and obs_loc_b is None: observer(location_a, location_b, value) continue if obs_loc_a == location_a and obs_loc_b is None: observer(location_a, location_b, value) continue if obs_loc_a == location_a and obs_loc_b == location_b: observer(location_a, location_b, value) continue for (type_filter, _), observer in self.type_observers.items()[:]: if isinstance(value, type_filter): observer(location_a, location_b, value) def get_all_entries_of_class(self, klass): """ :return: A list of all objects of class == klass that are saved in the kb. """ query = 'SELECT pickle FROM %s' results = self.db.select(query % self.table_name) result_lst = [] for r in results: obj = cPickle.loads(r[0]) if isinstance(obj, klass): result_lst.append(obj) return result_lst def dump(self): result_dict = {} query = 'SELECT location_a, location_b, pickle FROM %s' results = self.db.select(query % self.table_name) for location_a, location_b, pickle in results: obj = cPickle.loads(pickle) if location_a not in result_dict: result_dict[location_a] = {location_b: [obj,]} elif location_b not in result_dict[location_a]: result_dict[location_a][location_b] = [obj,] else: result_dict[location_a][location_b].append(obj) return result_dict def cleanup(self): """ Cleanup internal data. """ self.db.execute("DELETE FROM %s WHERE 1=1" % self.table_name) # Remove the old, create new. self.urls.cleanup() self.urls = DiskSet(table_prefix='kb_urls') self.fuzzable_requests.cleanup() self.fuzzable_requests = DiskSet(table_prefix='kb_fuzzable_requests') self.observers.clear() def remove(self): self.db.drop_table(self.table_name) self.urls.cleanup() self.fuzzable_requests.cleanup() self.observers.clear() def get_all_known_urls(self): """ :return: A DiskSet with all the known URLs as URL objects. """ return self.urls def add_url_observer(self, observer): self.url_observers.append(observer) def _notify_url_observers(self, new_url): """ Call the observer with new_url. :return: None """ # Note that I copy the items list in order to iterate though it without # any issues like the size changing for observer in self.url_observers[:]: observer(new_url) def add_url(self, url): """ :return: True if the URL was previously unknown """ if not isinstance(url, URL): msg = 'add_url requires a URL as parameter got %s instead.' raise TypeError(msg % type(url)) self._notify_url_observers(url) return self.urls.add(url) def get_all_known_fuzzable_requests(self): """ :return: A DiskSet with all the known URLs as URL objects. """ return self.fuzzable_requests def add_fuzzable_request(self, fuzzable_request): """ :return: True if the FuzzableRequest was previously unknown """ if not isinstance(fuzzable_request, FuzzableRequest): msg = 'add_fuzzable_request requires a FuzzableRequest as '\ 'parameter, got "%s" instead.' raise TypeError(msg % type(fuzzable_request)) self.add_url(fuzzable_request.get_url()) return self.fuzzable_requests.add(fuzzable_request)
class click_jacking(GrepPlugin): """ Grep every page for missing click jacking protection headers. :author: Taras ([email protected]) :author: Andres ([email protected]) """ MAX_SAMPLES = 25 DO_NOT_FRAME = {301, 302, 303, 307, 400, 403, 404, 500} def __init__(self): GrepPlugin.__init__(self) self._total_http_request_count = 0 self._vuln_count = 0 self._vuln_urls = DiskSet(table_prefix='click_jacking') self._vuln_ids = DiskSet(table_prefix='click_jacking') def grep(self, request, response): """ Check x-frame-options header """ # Can not iframe a POST, PUT, etc. if request.get_method() != 'GET': return if response.get_code() in self.DO_NOT_FRAME: return if not response.is_text_or_html(): return # An attacker will never run a clickjacking attack on an empty response # Empty responses are common in redirects, 400 and 500 errors, etc. if not response.get_body(): return if not self._response_will_be_rendered(response): return if is_404(response): return self._total_http_request_count += 1 if self._is_protected_against_clickjacking(request, response): return self._add_response_to_findings(response) def _response_will_be_rendered(self, response): """ Browsers will never render responses with application/javascript content-type, so it doesn't make sense for an attacker to do a click-jacking attack on these. :param response: An HTTP response :return: True if the response has javascript content type """ if 'javascript' in response.content_type: return False if 'css' in response.content_type: return False if 'application/xml' in response.content_type: return False return True def _add_response_to_findings(self, response): self._vuln_count += 1 if len(self._vuln_urls) >= self.MAX_SAMPLES: return self._vuln_urls.add(response.get_uri()) self._vuln_ids.add(response.id) def _is_protected_against_clickjacking(self, request, response): """ There are many methods to protect a site against clickjacking, this method checks for all of them. :param request: HTTP request :param response: HTTP response :return: True if the response is protected """ methods = [ self._is_protected_with_x_frame_options, self._is_protected_with_csp ] for method in methods: if method(request, response): return True return False def _is_protected_with_x_frame_options(self, request, response): """ Check if the HTTP response has the x-frame-options header set to the secure value. :param request: HTTP request :param response: HTTP response :return: True if the response is protected """ headers = response.get_headers() x_frame_options, header_name = headers.iget('x-frame-options', '') if x_frame_options.lower() in ('deny', 'sameorigin'): return True return False def _is_protected_with_csp(self, request, response): """ Check if the HTTP response has a CSP header, parse it, extract the frame-ancestors attribute and check it is secure. :param request: HTTP request :param response: HTTP response :return: True if the response is protected """ # These are the policies that will be enforced by the browser non_report_only_policies = retrieve_csp_policies(response, False, True) frame_ancestors = non_report_only_policies.get('frame-ancestors', []) # # This is the strictest policy, nobody can frame me! # # Content-Security-Policy: frame-ancestors 'none'; # for policy in frame_ancestors: if policy.lower() == 'none': return True # # Fail when the frame-ancestors has insecure wildcards # # Content-Security-Policy: frame-ancestors '*'; # Content-Security-Policy: frame-ancestors 'https://*'; # insecure_ancestors = ('*', 'http', 'https', 'http://', 'https://', 'http://*', 'https://*') for policy in frame_ancestors: if policy.lower() in insecure_ancestors: return False # Content-Security-Policy: frame-ancestors 'self'; if 'self' in frame_ancestors: return True # Content-Security-Policy: frame-ancestors 'foo.com' '*.somesite.com'; if len(frame_ancestors): return True return False def end(self): # If all URLs implement protection, don't report anything. if not self._vuln_count: return response_ids = [_id for _id in self._vuln_ids] if self._total_http_request_count == self._vuln_count: # If none of the URLs implement protection, simply report # ONE vulnerability that says that desc = 'The application has no protection against Click-Jacking attacks.' if len(response_ids) >= self.MAX_SAMPLES: desc += (' All the received HTTP responses were found to be' ' vulnerable, only the first %s samples were captured' ' as proof.' % self.MAX_SAMPLES) else: # If most of the URLs implement the protection but some # don't, report ONE vulnerability saying: "Most are protected, # but x, y are not if len(response_ids) >= self.MAX_SAMPLES: desc = ('Multiple application URLs have no protection against' ' Click-Jacking attacks. Only the first %s samples were' ' captured as proof. The list of vulnerable URLs is:' '\n\n - ' % self.MAX_SAMPLES) else: desc = ('Multiple application URLs have no protection against' ' Click-Jacking attacks. The list of vulnerable URLs is:' '\n\n - ') desc += ' - '.join([str(url) + '\n' for url in self._vuln_urls]) v = Vuln('Click-Jacking vulnerability', desc, severity.MEDIUM, response_ids, self.get_name()) self.kb_append(self, 'click_jacking', v) self._vuln_urls.cleanup() self._vuln_ids.cleanup() def get_long_desc(self): return """
class DBKnowledgeBase(BasicKnowledgeBase): """ This class saves the data that is sent to it by plugins. It is the only way in which plugins can exchange information. Data is stored in a DB. :author: Andres Riancho ([email protected]) """ COLUMNS = [('location_a', 'TEXT'), ('location_b', 'TEXT'), ('uniq_id', 'TEXT'), ('pickle', 'BLOB')] def __init__(self): super(DBKnowledgeBase, self).__init__() self.initialized = False # TODO: Why doesn't this work with a WeakValueDictionary? self.observers = {} #WeakValueDictionary() self._observer_id = 0 def setup(self): """ Setup all the required backend stores. This was mostly created to avoid starting any threads during __init__() which is called during python's import phase and dead-locks in some cases. :return: None """ with self._kb_lock: if self.initialized: return self.urls = DiskSet(table_prefix='kb_urls') self.fuzzable_requests = DiskSet(table_prefix='kb_fuzzable_requests') self.db = get_default_persistent_db_instance() self.table_name = 'knowledge_base_' + rand_alpha(30) self.db.create_table(self.table_name, self.COLUMNS) self.db.create_index(self.table_name, ['location_a', 'location_b']) self.db.create_index(self.table_name, ['uniq_id']) self.db.commit() # Only initialize once self.initialized = True @requires_setup def clear(self, location_a, location_b): location_a = self._get_real_name(location_a) query = "DELETE FROM %s WHERE location_a = ? and location_b = ?" params = (location_a, location_b) self.db.execute(query % self.table_name, params) @requires_setup def raw_write(self, location_a, location_b, value): """ This method saves value to (location_a,location_b) but previously clears any pre-existing values. """ if isinstance(value, Info): raise TypeError('Use append or append_uniq to store vulnerabilities') location_a = self._get_real_name(location_a) self.clear(location_a, location_b) self.append(location_a, location_b, value, ignore_type=True) @requires_setup def raw_read(self, location_a, location_b): """ This method reads the value from (location_a, location_b) """ location_a = self._get_real_name(location_a) result = self.get(location_a, location_b, check_types=False) if len(result) > 1: msg = 'Incorrect use of raw_write/raw_read, found %s results.' raise RuntimeError(msg % len(result)) elif len(result) == 0: return [] else: return result[0] @requires_setup def get_one(self, location_a, location_b): """ This method reads the value from (location_a, location_b), checking it's type and making sure only one is stored at that address. Similar to raw_read, but checking types. :see: https://github.com/andresriancho/w3af/issues/3955 """ location_a = self._get_real_name(location_a) result = self.get(location_a, location_b, check_types=True) if len(result) > 1: msg = 'Incorrect use of get_one(), found %s results.' raise RuntimeError(msg % result) elif len(result) == 0: return [] else: return result[0] def _get_uniq_id(self, obj): if isinstance(obj, (Info, InfoSet)): return obj.get_uniq_id() if isinstance(obj, collections.Iterable): concat_all = ''.join([str(hash(i)) for i in obj]) return str(hash(concat_all)) return str(hash(obj)) @requires_setup def append(self, location_a, location_b, value, ignore_type=False): """ This method appends the location_b value to a dict. """ if not ignore_type and not isinstance(value, (Info, Shell, InfoSet)): msg = ('You MUST use raw_write/raw_read to store non-info objects' ' to the KnowledgeBase.') raise TypeError(msg) location_a = self._get_real_name(location_a) uniq_id = self._get_uniq_id(value) pickled_obj = cpickle_dumps(value) t = (location_a, location_b, uniq_id, pickled_obj) query = "INSERT INTO %s VALUES (?, ?, ?, ?)" % self.table_name self.db.execute(query, t) self._notify_observers(self.APPEND, location_a, location_b, value, ignore_type=ignore_type) @requires_setup def get(self, location_a, location_b, check_types=True): """ :param location_a: The plugin that saved the data to the kb.info Typically the name of the plugin, but could also be the plugin instance. :param location_b: The name of the variables under which the vuln objects were saved. Typically the same name of the plugin, or something like "vulns", "errors", etc. In most cases this is NOT None. When set to None, a dict with all the vuln objects found by the plugin_name is returned. :return: Returns the data that was saved by another plugin. """ result_lst = [] for obj in self.get_iter(location_a, location_b, check_types=check_types): result_lst.append(obj) return result_lst @requires_setup def get_iter(self, location_a, location_b, check_types=True): """ Same as get() but yields items one by one instead of returning a list with all the items. """ location_a = self._get_real_name(location_a) if location_b is None: query = 'SELECT pickle FROM %s WHERE location_a = ?' params = (location_a,) else: query = 'SELECT pickle FROM %s WHERE location_a = ?' \ ' and location_b = ?' params = (location_a, location_b) for r in self.db.select(query % self.table_name, params): obj = cPickle.loads(r[0]) if check_types and not isinstance(obj, (Info, InfoSet, Shell)): raise TypeError('Use raw_write and raw_read to query the' ' knowledge base for non-Info objects') yield obj @requires_setup def get_by_uniq_id(self, uniq_id): query = 'SELECT pickle FROM %s WHERE uniq_id = ?' params = (uniq_id,) result = self.db.select_one(query % self.table_name, params) if result is not None: result = cPickle.loads(result[0]) return result @requires_setup def get_all_uniq_ids_iter(self, include_ids=()): """ :param include_ids: If specified, only include these IDs. :yield: All uniq IDs from the KB """ if include_ids: bindings = ['?'] * len(include_ids) bindings = ','.join(bindings) query = 'SELECT uniq_id FROM %s WHERE uniq_id IN (%s)' query %= (self.table_name, bindings) result = self.db.select(query, parameters=include_ids) else: query = 'SELECT uniq_id FROM %s' result = self.db.select(query % self.table_name) for uniq_id, in result: yield uniq_id @requires_setup def update(self, old_info, update_info): """ :param old_info: The info/vuln instance to be updated in the kb. :param update_info: The info/vuln instance with new information :return: Nothing """ old_not_info = not isinstance(old_info, (Info, InfoSet, Shell)) update_not_info = not isinstance(update_info, (Info, InfoSet, Shell)) if old_not_info or update_not_info: msg = ('You MUST use raw_write/raw_read to store non-info objects' ' to the KnowledgeBase.') raise TypeError(msg) old_uniq_id = old_info.get_uniq_id() new_uniq_id = update_info.get_uniq_id() pickle = cpickle_dumps(update_info) # Update the pickle and unique_id after finding by original uniq_id query = "UPDATE %s SET pickle = ?, uniq_id = ? WHERE uniq_id = ?" params = (pickle, new_uniq_id, old_uniq_id) result = self.db.execute(query % self.table_name, params).result() if result.rowcount: self._notify_observers(self.UPDATE, old_info, update_info) else: ex = ('Failed to update() %s instance because' ' the original unique_id (%s) does not exist in the DB,' ' or the new unique_id (%s) is invalid.') raise DBException(ex % (old_info.__class__.__name__, old_uniq_id, new_uniq_id)) def add_observer(self, observer): """ Add the observer instance to the list. """ observer_id = self.get_observer_id() self.observers[observer_id] = observer def get_observer_id(self): self._observer_id += 1 return self._observer_id def _notify_observers(self, method, *args, **kwargs): """ Call the observer if the location_a/location_b matches with the configured observers. :return: None """ # Note that I copy the items list in order to iterate though it without # any issues like the size changing for _, observer in self.observers.items()[:]: functor = getattr(observer, method) functor(*args, **kwargs) @requires_setup def get_all_entries_of_class(self, klass, exclude_ids=()): """ :return: A list of all objects where class in klass that are saved in the kb. """ result_lst = [] for entry in self.get_all_entries_of_class_iter(klass, exclude_ids=exclude_ids): result_lst.append(entry) return result_lst @requires_setup def get_all_entries_of_class_iter(self, klass, exclude_ids=()): """ :yield: All objects where class in klass that are saved in the kb. """ bindings = ['?'] * len(exclude_ids) bindings = ','.join(bindings) query = 'SELECT uniq_id, pickle FROM %s WHERE uniq_id NOT IN (%s)' query %= (self.table_name, bindings) results = self.db.select(query, parameters=exclude_ids) for uniq_id, serialized_obj, in results: obj = cPickle.loads(serialized_obj) if isinstance(obj, klass): yield obj @requires_setup def get_all_vulns(self): """ :return: A list of all info instances with severity in (LOW, MEDIUM, HIGH) """ query = 'SELECT pickle FROM %s' results = self.db.select(query % self.table_name) result_lst = [] for r in results: obj = cPickle.loads(r[0]) if hasattr(obj, 'get_severity'): severity = obj.get_severity() if severity in (LOW, MEDIUM, HIGH): result_lst.append(obj) return result_lst @requires_setup def get_all_infos(self): """ :return: A list of all info instances with severity eq INFORMATION """ query = 'SELECT pickle FROM %s' results = self.db.select(query % self.table_name) result_lst = [] for r in results: obj = cPickle.loads(r[0]) if hasattr(obj, 'get_severity'): severity = obj.get_severity() if severity in (INFORMATION,): result_lst.append(obj) return result_lst @requires_setup def dump(self): result_dict = {} query = 'SELECT location_a, location_b, pickle FROM %s' results = self.db.select(query % self.table_name) for location_a, location_b, pickle in results: obj = cPickle.loads(pickle) if location_a not in result_dict: result_dict[location_a] = {location_b: [obj,]} elif location_b not in result_dict[location_a]: result_dict[location_a][location_b] = [obj,] else: result_dict[location_a][location_b].append(obj) return result_dict @requires_setup def cleanup(self): """ Cleanup internal data. """ self.db.execute("DELETE FROM %s WHERE 1=1" % self.table_name) # Remove the old, create new. old_urls = self.urls self.urls = DiskSet(table_prefix='kb_urls') old_urls.cleanup() old_fuzzable_requests = self.fuzzable_requests self.fuzzable_requests = DiskSet(table_prefix='kb_fuzzable_requests') old_fuzzable_requests.cleanup() self.observers.clear() @requires_setup def remove(self): self.db.drop_table(self.table_name) self.urls.cleanup() self.fuzzable_requests.cleanup() self.observers.clear() @requires_setup def get_all_known_urls(self): """ :return: A DiskSet with all the known URLs as URL objects. """ return self.urls @requires_setup def add_url(self, url): """ :return: True if the URL was previously unknown """ if not isinstance(url, URL): msg = 'add_url requires a URL as parameter got %s instead.' raise TypeError(msg % type(url)) self._notify_observers(self.ADD_URL, url) return self.urls.add(url) @requires_setup def get_all_known_fuzzable_requests(self): """ :return: A DiskSet with all the known URLs as URL objects. """ return self.fuzzable_requests @requires_setup def add_fuzzable_request(self, fuzzable_request): """ :return: True if the FuzzableRequest was previously unknown """ if not isinstance(fuzzable_request, FuzzableRequest): msg = ('add_fuzzable_request requires a FuzzableRequest as' ' parameter, got "%s" instead.') raise TypeError(msg % type(fuzzable_request)) self.add_url(fuzzable_request.get_url()) return self.fuzzable_requests.add(fuzzable_request)
class web_spider(CrawlPlugin): """ Crawl the web application. :author: Andres Riancho ([email protected]) """ UNAUTH_FORBID = {http_constants.UNAUTHORIZED, http_constants.FORBIDDEN} def __init__(self): CrawlPlugin.__init__(self) # Internal variables self._compiled_ignore_re = None self._compiled_follow_re = None self._broken_links = DiskSet(table_prefix='web_spider') self._first_run = True self._target_urls = [] self._target_domain = None self._already_filled_form = ScalableBloomFilter() self._variant_db = VariantDB() # User configured variables self._ignore_regex = '' self._follow_regex = '.*' self._only_forward = False self._ignore_extensions = [] self._compile_re() def crawl(self, fuzzable_request, debugging_id): """ Searches for links on the html. :param debugging_id: A unique identifier for this call to discover() :param fuzzable_request: A fuzzable_req instance that contains (among other things) the URL to test. """ self._handle_first_run() # # If it is a form, then smart_fill the parameters to send something that # makes sense and will allow us to cover more code. # data_container = fuzzable_request.get_raw_data() if isinstance(data_container, Form): if fuzzable_request.get_url() in self._already_filled_form: return self._already_filled_form.add(fuzzable_request.get_url()) data_container.smart_fill() # Send the HTTP request resp = self._uri_opener.send_mutant(fuzzable_request) # Nothing to do here... if resp.get_code() == http_constants.UNAUTHORIZED: return # Nothing to do here... if resp.is_image(): return # And we don't trust what comes from the core, check if 404 if is_404(resp): return self._extract_html_forms(resp, fuzzable_request) self._extract_links_and_verify(resp, fuzzable_request) def _extract_html_forms(self, resp, fuzzable_req): """ Parses the HTTP response body and extract HTML forms, resulting forms are put() on the output queue. """ # Try to find forms in the document try: dp = parser_cache.dpc.get_document_parser_for(resp) except BaseFrameworkException: # Failed to find a suitable parser for the document return # Create one FuzzableRequest for each form variant mode = cf.cf.get('form_fuzzing_mode') for form_params in dp.get_forms(): # Form exclusion #15161 form_id_json = form_params.get_form_id().to_json() om.out.debug('A new form was found! Form-id is: "%s"' % form_id_json) if not self._should_analyze_url(form_params.get_action()): continue headers = fuzzable_req.get_headers() for form_params_variant in form_params.get_variants(mode): data_container = dc_from_form_params(form_params_variant) # Now data_container is one of Multipart of URLEncoded form # instances, which is a DataContainer. Much better than the # FormParameters instance we had before in form_params_variant r = FuzzableRequest.from_form(data_container, headers=headers) self.output_queue.put(r) def _handle_first_run(self): if not self._first_run: return # I have to set some variables, in order to be able to code # the "only_forward" feature self._first_run = False self._target_urls = [i.uri2url() for i in cf.cf.get('targets')] # The following line triggered lots of bugs when the "stop" button # was pressed and the core did this: "cf.cf.save('targets', [])" # #self._target_domain = cf.cf.get('targets')[0].get_domain() # # Changing it to something awful but bug-free. targets = cf.cf.get('targets') if not targets: return self._target_domain = targets[0].get_domain() def _urls_to_verify_generator(self, resp, fuzzable_req): """ Yields tuples containing: * Newly found URL * The FuzzableRequest instance passed as parameter * The HTTPResponse generated by the FuzzableRequest * Boolean indicating if we trust this reference or not :param resp: HTTP response object :param fuzzable_req: The HTTP request that generated the response """ gen = itertools.chain(self._url_path_url_generator(resp, fuzzable_req), self._body_url_generator(resp, fuzzable_req), headers_url_generator(resp, fuzzable_req)) for ref, fuzzable_req, original_resp, possibly_broken in gen: if self._should_verify_extracted_url(ref, original_resp): yield ref, fuzzable_req, original_resp, possibly_broken def _url_path_url_generator(self, resp, fuzzable_req): """ Yields tuples containing: * Newly found URL * The FuzzableRequest instance passed as parameter * The HTTPResponse generated by the FuzzableRequest * Boolean indicating if we trust this reference or not :param resp: HTTP response object :param fuzzable_req: The HTTP request that generated the response """ # Analyze all directories, if the URL w3af just found is: # # http://localhost/a/b/c/f00.php # # I want to GET: # # http://localhost/a/b/c/ # http://localhost/a/b/ # http://localhost/a/ # http://localhost/ # # And analyze the responses... dirs = resp.get_url().get_directories() for ref in unique_justseen(dirs): yield ref, fuzzable_req, resp, False def _body_url_generator(self, resp, fuzzable_req): """ Yields tuples containing: * Newly found URL * The FuzzableRequest instance passed as parameter * The HTTPResponse generated by the FuzzableRequest * Boolean indicating if we trust this reference or not The newly found URLs are extracted from the http response body using one of the framework's parsers. :param resp: HTTP response object :param fuzzable_req: The HTTP request that generated the response """ # # Note: I WANT to follow links that are in the 404 page. # try: doc_parser = parser_cache.dpc.get_document_parser_for(resp) except BaseFrameworkException as w3: om.out.debug('Failed to find a suitable document parser. ' 'Exception "%s"' % w3) else: # Note: # # - With parsed_refs I'm 100% that it's really # something in the HTML that the developer intended to add. # # - The re_refs are the result of regular expressions, # which in some cases are just false positives. parsed_refs, re_refs = doc_parser.get_references() dirs = resp.get_url().get_directories() only_re_refs = set(re_refs) - set(dirs + parsed_refs) all_refs = itertools.chain(parsed_refs, re_refs) resp_is_404 = is_404(resp) for ref in unique_justseen(sorted(all_refs)): possibly_broken = resp_is_404 or (ref in only_re_refs) yield ref, fuzzable_req, resp, possibly_broken def _should_analyze_url(self, ref): """ :param ref: A URL instance to match against the user configured filters :return: True if we should navigate to this URL """ # I don't want w3af sending requests to 3rd parties! if ref.get_domain() != self._target_domain: msg = 'web_spider will ignore %s (different domain name)' args = (ref.get_domain(),) om.out.debug(msg % args) return False # Filter the URL according to the configured regular expressions if not self._compiled_follow_re.match(ref.url_string): msg = 'web_spider will ignore %s (not match follow regex)' args = (ref.url_string,) om.out.debug(msg % args) return False if self._compiled_ignore_re.match(ref.url_string): msg = 'web_spider will ignore %s (match ignore regex)' args = (ref.url_string,) om.out.debug(msg % args) return False if self._has_ignored_extension(ref): msg = 'web_spider will ignore %s (match ignore extensions)' args = (ref.url_string,) om.out.debug(msg % args) return False # Implementing only forward if self._only_forward and not self._is_forward(ref): msg = 'web_spider will ignore %s (is not forward)' args = (ref.url_string,) om.out.debug(msg % args) return False return True def _has_ignored_extension(self, new_url): if not self._ignore_extensions: return False return new_url.get_extension().lower() in self._ignore_extensions def _should_verify_extracted_url(self, ref, resp): """ :param ref: A newly found URL :param resp: The HTTP response where the URL was found :return: Boolean indicating if I should send this new reference to the core. """ # Ignore myself if ref == resp.get_uri(): return False if not self._should_analyze_url(ref): return False # # I tried to have only one VariantDB in the framework instead of two, # but after some tests and architecture considerations it was better # to duplicate the data. # # In the future I'll run plugins in different processes than the core, # so it makes sense to have independent plugins. # # If I remove the web_spider VariantDB and just leave the one in the # core the framework keeps working but this method # (_should_verify_extracted_url) will return True much more often, which # leads to extra HTTP requests for URLs which we already checked and the # core will dismiss anyway # fuzzable_request = FuzzableRequest(ref) if self._variant_db.append(fuzzable_request): return True return False def _extract_links_and_verify(self, resp, fuzzable_req): """ This is a very basic method that will send the work to different threads. Work is generated by the _urls_to_verify_generator :param resp: HTTP response object :param fuzzable_req: The HTTP request that generated the response """ self.worker_pool.map_multi_args( self._verify_reference, self._urls_to_verify_generator(resp, fuzzable_req)) def _verify_reference(self, reference, original_request, original_response, possibly_broken, be_recursive=True): """ The parameters are: * Newly found URL * The FuzzableRequest instance which generated the response where the new URL was found * The HTTPResponse generated by the FuzzableRequest * Boolean indicating if we trust this reference or not This method GET's every new link and parses it in order to get new links and forms. """ # # Remember that this "breaks" the cache=True in most cases! # headers = { 'Referer': original_url } # # But this does not, and it is friendlier than simply ignoring the # referer # referer = original_response.get_url().base_url().url_string headers = Headers([('Referer', referer)]) # Note: We're not grep'ing this HTTP request/response now because it # has high probability of being a 404, and the grep plugins # already got enough 404 responses to analyze (from is_404 for # example). If it's not a 404 then we'll push it to the core # and it will come back to this plugin's crawl() where it will # be requested with grep=True resp = self._uri_opener.GET(reference, cache=True, headers=headers, grep=False) if not is_404(resp): msg = '[web_spider] Found new link "%s" at "%s"' args = (reference, original_response.get_url()) om.out.debug(msg % args) fuzz_req = FuzzableRequest(reference, headers=headers) # These next steps are simple, but actually allows me to set the # referer and cookie for the FuzzableRequest instances I'm sending # to the core, which will then allow the fuzzer to create # CookieMutant and HeadersMutant instances. # # Without setting the Cookie, the CookieMutant would never have any # data to modify; remember that cookies are actually set by the # urllib2 cookie handler when the request already exited the # framework. cookie = Cookie.from_http_response(original_response) fuzz_req.set_referer(referer) fuzz_req.set_cookie(cookie) self.output_queue.put(fuzz_req) return # Note: I WANT to follow links that are in the 404 page, but # DO NOT return the 404 itself to the core. # # This will parse the 404 response and add the 404-links in the # output queue, so that the core can get them # if be_recursive: # # Only follow one level of links in 404 pages, this limits the # potential issue when this is found: # # http://foo.com/abc/ => 404 # Body: <a href="def/">link</a> # # Which would lead to this function to perform requests to: # * http://foo.com/abc/ # * http://foo.com/abc/def/ # * http://foo.com/abc/def/def/ # * http://foo.com/abc/def/def/def/ # * ... # # Do not use threads here, it will dead-lock (for unknown # reasons). This is tested in TestDeadLock unittest. for args in self._urls_to_verify_generator(resp, original_request): self._verify_reference(*args, be_recursive=False) # Store the broken links if not possibly_broken and resp.get_code() not in self.UNAUTH_FORBID: t = (resp.get_url(), original_request.get_uri()) self._broken_links.add(t) def end(self): """ Called when the process ends, prints out the list of broken links. """ if len(self._broken_links): om.out.information('The following is a list of broken links that' ' were found by the web_spider plugin:') for broken, where in unique_justseen(self._broken_links.ordered_iter()): om.out.information('- %s [ referenced from: %s ]' % (broken, where)) self._broken_links.cleanup() def _is_forward(self, reference): """ Check if the reference is inside the target directories. :return: True if reference is an URL inside the directory structure of at least one of the target URLs. """ for domain_path in self._target_urls: if reference.url_string.startswith(domain_path.url_string): return True return False def get_options(self): """ :return: A list of option objects for this plugin. """ ol = OptionList() d = 'Only crawl links to paths inside the URL given as target.' o = opt_factory('only_forward', self._only_forward, d, BOOL) ol.add(o) d = ('Only crawl links that match this regular expression.' ' Note that ignore_regex has precedence over follow_regex.') o = opt_factory('follow_regex', self._follow_regex, d, REGEX) ol.add(o) d = ('DO NOT crawl links that match this regular expression.' ' Note that ignore_regex has precedence over follow_regex.') o = opt_factory('ignore_regex', self._ignore_regex, d, REGEX) ol.add(o) d = 'DO NOT crawl links that use these extensions.' h = ('This configuration parameter is commonly used to ignore' ' static files such as zip, pdf, jpeg, etc. It is possible to' ' ignore these files using `ignore_regex`, but configuring' ' this parameter is easier and performs case insensitive' ' matching.') o = opt_factory('ignore_extensions', self._ignore_extensions, d, LIST, help=h) ol.add(o) return ol def set_options(self, options_list): """ This method sets all the options that are configured using the user interface generated by the framework using the result of get_options(). :param options_list: A dictionary with the options for the plugin. :return: No value is returned. """ self._only_forward = options_list['only_forward'].get_value() self._ignore_regex = options_list['ignore_regex'].get_value() self._follow_regex = options_list['follow_regex'].get_value() self._compile_re() self._ignore_extensions = options_list['ignore_extensions'].get_value() self._ignore_extensions = [ext.lower() for ext in self._ignore_extensions] def _compile_re(self): """ Compile the regular expressions that are going to be used to ignore or follow links. """ if self._ignore_regex: # Compilation of this regex can't fail because it was already # verified as valid at regex_option.py: see REGEX in get_options() self._compiled_ignore_re = re.compile(self._ignore_regex) else: # If the self._ignore_regex is empty then I don't have to ignore # anything. To be able to do that, I simply compile an re with "abc" # as the pattern, which won't match any URL since they will all # start with http:// or https:// self._compiled_ignore_re = re.compile('abc') # Compilation of this regex can't fail because it was already # verified as valid at regex_option.py: see REGEX in get_options() self._compiled_follow_re = re.compile(self._follow_regex) def get_long_desc(self): """ :return: A DETAILED description of the plugin functions and features. """ return """
class DBKnowledgeBase(BasicKnowledgeBase): """ This class saves the data that is sent to it by plugins. It is the only way in which plugins can exchange information. Data is stored in a DB. :author: Andres Riancho ([email protected]) """ def __init__(self): super(DBKnowledgeBase, self).__init__() self.urls = DiskSet(table_prefix='kb_urls') self.fuzzable_requests = DiskSet(table_prefix='kb_fuzzable_requests') self.db = get_default_persistent_db_instance() columns = [('location_a', 'TEXT'), ('location_b', 'TEXT'), ('uniq_id', 'TEXT'), ('pickle', 'BLOB')] self.table_name = 'knowledge_base_' + rand_alpha(30) self.db.create_table(self.table_name, columns) self.db.create_index(self.table_name, ['location_a', 'location_b']) self.db.create_index(self.table_name, [ 'uniq_id', ]) self.db.commit() # TODO: Why doesn't this work with a WeakValueDictionary? self.observers = {} #WeakValueDictionary() self.type_observers = {} #WeakValueDictionary() self.url_observers = [] self._observer_id = 0 def clear(self, location_a, location_b): location_a = self._get_real_name(location_a) query = "DELETE FROM %s WHERE location_a = ? and location_b = ?" params = (location_a, location_b) self.db.execute(query % self.table_name, params) def raw_write(self, location_a, location_b, value): """ This method saves value to (location_a,location_b) but previously clears any pre-existing values. """ if isinstance(value, Info): raise TypeError( 'Use append or append_uniq to store vulnerabilities') location_a = self._get_real_name(location_a) self.clear(location_a, location_b) self.append(location_a, location_b, value, ignore_type=True) def raw_read(self, location_a, location_b): """ This method reads the value from (location_a,location_b) """ location_a = self._get_real_name(location_a) result = self.get(location_a, location_b, check_types=False) if len(result) > 1: msg = 'Incorrect use of raw_write/raw_read, found %s rows.' raise RuntimeError(msg % result) elif len(result) == 0: return [] else: return result[0] def _get_uniq_id(self, obj): if isinstance(obj, Info): return obj.get_uniq_id() else: if isinstance(obj, collections.Iterable): concat_all = ''.join([str(i) for i in obj]) return str(hash(concat_all)) else: return str(hash(obj)) def append(self, location_a, location_b, value, ignore_type=False): """ This method appends the location_b value to a dict. """ if not ignore_type and not isinstance(value, (Info, Shell)): msg = 'You MUST use raw_write/raw_read to store non-info objects'\ ' to the KnowledgeBase.' raise TypeError(msg) location_a = self._get_real_name(location_a) uniq_id = self._get_uniq_id(value) pickled_obj = cpickle_dumps(value) t = (location_a, location_b, uniq_id, pickled_obj) query = "INSERT INTO %s VALUES (?, ?, ?, ?)" % self.table_name self.db.execute(query, t) self._notify(location_a, location_b, value) def get(self, location_a, location_b, check_types=True): """ :param location_a: The plugin that saved the data to the kb.info Typically the name of the plugin, but could also be the plugin instance. :param location_b: The name of the variables under which the vuln objects were saved. Typically the same name of the plugin, or something like "vulns", "errors", etc. In most cases this is NOT None. When set to None, a dict with all the vuln objects found by the plugin_name is returned. :return: Returns the data that was saved by another plugin. """ location_a = self._get_real_name(location_a) if location_b is None: query = 'SELECT pickle FROM %s WHERE location_a = ?' params = (location_a, ) else: query = 'SELECT pickle FROM %s WHERE location_a = ?'\ ' and location_b = ?' params = (location_a, location_b) result_lst = [] results = self.db.select(query % self.table_name, params) for r in results: obj = cPickle.loads(r[0]) if check_types and not isinstance(obj, (Info, Shell)): raise TypeError('Use raw_write and raw_read to query the' ' knowledge base for non-Info objects') result_lst.append(obj) return result_lst def get_by_uniq_id(self, uniq_id): query = 'SELECT pickle FROM %s WHERE uniq_id = ?' params = (uniq_id, ) result = self.db.select_one(query % self.table_name, params) if result is not None: result = cPickle.loads(result[0]) return result def add_observer(self, location_a, location_b, observer): """ Add the observer function to the observer list. The function will be called when there is a change in (location_a, location_b). You can use None in location_a or location_b as wildcards. The observer function needs to be a function which takes three params: * location_a * location_b * value that's added to the kb location :return: None """ if not isinstance(location_a, (basestring, types.NoneType)) or \ not isinstance(location_a, (basestring, types.NoneType)): raise TypeError('Observer locations need to be strings or None.') observer_id = self.get_observer_id() self.observers[(location_a, location_b, observer_id)] = observer def add_types_observer(self, type_filter, observer): """ Add the observer function to the list of functions to be called when a new object that is of type "type_filter" is added to the KB. The type_filter must be one of Info, Vuln or Shell. :return: None """ if type_filter not in (Info, Vuln, Shell): msg = 'The type_filter needs to be one of Info, Vuln or Shell' raise TypeError(msg) observer_id = self.get_observer_id() self.type_observers[(type_filter, observer_id)] = observer def get_observer_id(self): self._observer_id += 1 return self._observer_id def _notify(self, location_a, location_b, value): """ Call the observer if the location_a/location_b matches with the configured observers. :return: None """ # Note that I copy the items list in order to iterate though it without # any issues like the size changing for (obs_loc_a, obs_loc_b, _), observer in self.observers.items()[:]: if obs_loc_a is None and obs_loc_b is None: observer(location_a, location_b, value) continue if obs_loc_a == location_a and obs_loc_b is None: observer(location_a, location_b, value) continue if obs_loc_a == location_a and obs_loc_b == location_b: observer(location_a, location_b, value) continue for (type_filter, _), observer in self.type_observers.items()[:]: if isinstance(value, type_filter): observer(location_a, location_b, value) def get_all_entries_of_class(self, klass): """ :return: A list of all objects of class == klass that are saved in the kb. """ query = 'SELECT pickle FROM %s' results = self.db.select(query % self.table_name) result_lst = [] for r in results: obj = cPickle.loads(r[0]) if isinstance(obj, klass): result_lst.append(obj) return result_lst def dump(self): result_dict = {} query = 'SELECT location_a, location_b, pickle FROM %s' results = self.db.select(query % self.table_name) for location_a, location_b, pickle in results: obj = cPickle.loads(pickle) if location_a not in result_dict: result_dict[location_a] = { location_b: [ obj, ] } elif location_b not in result_dict[location_a]: result_dict[location_a][location_b] = [ obj, ] else: result_dict[location_a][location_b].append(obj) return result_dict def cleanup(self): """ Cleanup internal data. """ self.db.execute("DELETE FROM %s WHERE 1=1" % self.table_name) # Remove the old, create new. self.urls.cleanup() self.urls = DiskSet(table_prefix='kb_urls') self.fuzzable_requests.cleanup() self.fuzzable_requests = DiskSet(table_prefix='kb_fuzzable_requests') self.observers.clear() def remove(self): self.db.drop_table(self.table_name) self.urls.cleanup() self.fuzzable_requests.cleanup() self.observers.clear() def get_all_known_urls(self): """ :return: A DiskSet with all the known URLs as URL objects. """ return self.urls def add_url_observer(self, observer): self.url_observers.append(observer) def _notify_url_observers(self, new_url): """ Call the observer with new_url. :return: None """ # Note that I copy the items list in order to iterate though it without # any issues like the size changing for observer in self.url_observers[:]: observer(new_url) def add_url(self, url): """ :return: True if the URL was previously unknown """ if not isinstance(url, URL): msg = 'add_url requires a URL as parameter got %s instead.' raise TypeError(msg % type(url)) self._notify_url_observers(url) return self.urls.add(url) def get_all_known_fuzzable_requests(self): """ :return: A DiskSet with all the known URLs as URL objects. """ return self.fuzzable_requests def add_fuzzable_request(self, fuzzable_request): """ :return: True if the FuzzableRequest was previously unknown """ if not isinstance(fuzzable_request, FuzzableRequest): msg = 'add_fuzzable_request requires a FuzzableRequest as '\ 'parameter, got "%s" instead.' raise TypeError(msg % type(fuzzable_request)) self.add_url(fuzzable_request.get_url()) return self.fuzzable_requests.add(fuzzable_request)
class find_captchas(CrawlPlugin): """ Identify captcha images on web pages. :author: Andres Riancho ([email protected]) """ def __init__(self): CrawlPlugin.__init__(self) self._captchas_found = DiskSet(table_prefix='find_captchas') def crawl(self, fuzzable_request): """ Find CAPTCHA images. :param fuzzable_request: A fuzzable_request instance that contains (among other things) the URL to test. """ result, captchas = self._identify_captchas(fuzzable_request) if not result: return for captcha in captchas: desc = 'Found a CAPTCHA image at: "%s".' % captcha.img_src response_ids = [response.id for response in captcha.http_responses] i = Info('Captcha image detected', desc, response_ids, self.get_name()) i.set_uri(captcha.img_src) kb.kb.append(self, 'CAPTCHA', i) om.out.information(i.get_desc()) def _identify_captchas(self, fuzzable_request): """ :return: A tuple with the following information: * True indicating that the page has CAPTCHAs * A list with tuples that contain: * The CAPTCHA image source * The http responses used to verify that the image was indeed a CAPTCHA """ found_captcha = False captchas = [] # GET the document, and fetch the images images_1 = self._get_images(fuzzable_request) # Re-GET the document, and fetch the images images_2 = self._get_images(fuzzable_request) # If the number of images in each response is different, don't even # bother to perform any analysis since our simplistic approach will fail # # TODO: Add something more advanced. if len(images_1) != len(images_2): return not_in_2 = [] for img_src_1, img_hash_1, http_response_1 in images_1: for _, img_hash_2, http_response_2 in images_2: if img_hash_1 == img_hash_2: # The image is in both lists, can't be a CAPTCHA break else: not_in_2.append( (img_src_1, img_hash_1, [http_response_1, http_response_2])) # Results # # TODO: This allows for more than one CAPTCHA in the same page. Does # that make sense? When that's found, should I simply declare # defeat and don't report anything? for img_src, _, http_responses in not_in_2: CaptchaInfo = namedtuple('CaptchaInfo', ['img_src', 'http_responses']) img_src = img_src.uri2url() if img_src not in self._captchas_found: self._captchas_found.add(img_src) found_captcha = True captchas.append(CaptchaInfo(img_src, http_responses)) return found_captcha, captchas def _get_images(self, fuzzable_request): """ Get all img tags and retrieve the src. :param fuzzable_request: The request to modify :return: A list with tuples containing (img_src, image_hash, http_response) """ res = [] try: response = self._uri_opener.GET(fuzzable_request.get_uri(), cache=False) except: om.out.debug('Failed to retrieve the page for finding captchas.') else: # Do not use parser_cache here, it's not good since CAPTCHA implementations # *might* change the image name for each request of the HTML # # dp = parser_cache.dpc.get_document_parser_for( response ) # try: document_parser = DocumentParser.DocumentParser(response) except BaseFrameworkException: return [] image_path_list = document_parser.get_references_of_tag('img') GET = self._uri_opener.GET sha1 = hashlib.sha1 result_iter = self.worker_pool.imap_unordered(GET, image_path_list) for image_response in result_iter: if image_response.is_image(): img_src = image_response.get_uri() img_hash = sha1(image_response.get_body()).hexdigest() res.append((img_src, img_hash, response)) return res def end(self): self._captchas_found.cleanup() def get_long_desc(self): """ :return: A DETAILED description of the plugin functions and features. """ return """
class error_500(GrepPlugin): """ Grep every page for error 500 pages that haven't been identified as bugs by other plugins. :author: Andres Riancho ([email protected]) """ IGNORE_CODES = (404, 403, 401, 405, 400, 501) FALSE_POSITIVE_STRINGS = ('<h1>Bad Request (Invalid URL)</h1>', ) def __init__(self): GrepPlugin.__init__(self) self._error_500_responses = DiskSet(table_prefix='error_500') def grep(self, request, response): """ Plugin entry point, identify which requests generated a 500 error. :param request: The HTTP request object. :param response: The HTTP response object :return: None """ if not 400 < response.get_code() < 600: return if response.get_code() in self.IGNORE_CODES: return if not response.is_text_or_html(): return if self._is_false_positive(response): return self._error_500_responses.add((request, response.id)) def _is_false_positive(self, response): """ Filters out some false positives like this one: This false positive is generated by IIS when I send an URL that's "odd" Some examples of URLs that trigger this false positive: - http://127.0.0.2/ext.ini.%00.txt - http://127.0.0.2/%00/ - http://127.0.0.2/%0a%0a<script>alert(\Vulnerable\)</script>.jsp :return: True if the response is a false positive. """ for fps in self.FALSE_POSITIVE_STRINGS: if fps in response.get_body(): return True return False def end(self): """ This method is called when the plugin wont be used anymore. The real job of this plugin is done here, where I will try to see if one of the error_500 responses were not identified as a vuln by some of my audit plugins """ all_vuln_ids = set() for info in kb.kb.get_all_findings_iter(): for _id in info.get_id(): all_vuln_ids.add(_id) for request, error_500_response_id in self._error_500_responses: if error_500_response_id not in all_vuln_ids: # Found a error 500 that wasn't identified ! desc = ('An unidentified web application error (HTTP response' ' code 500) was found at: "%s". Enable all plugins and' ' try again, if the vulnerability still is not' ' identified, please verify manually and report it to' ' the w3af developers.') desc %= request.get_url() v = Vuln('Unhandled error in web application', desc, severity.MEDIUM, error_500_response_id, self.get_name()) v.set_uri(request.get_uri()) self.kb_append_uniq(self, 'error_500', v, 'VAR') self._error_500_responses.cleanup() def get_long_desc(self): """ :return: A DETAILED description of the plugin functions and features. """ return """