Exemplo n.º 1
0
    def test_table_name_with_prefix(self):
        _unittest = 'unittest'
        disk_set = DiskSet(_unittest)

        self.assertIn(_unittest, disk_set.table_name)
        db = get_default_temp_db_instance()

        self.assertTrue(db.table_exists(disk_set.table_name))

        disk_set.cleanup()

        self.assertFalse(db.table_exists(disk_set.table_name))
Exemplo n.º 2
0
    def test_remove_table(self):
        disk_set = DiskSet()
        disk_set.add(1)
        disk_set.add(2)

        table_name = disk_set.table_name
        db = get_default_temp_db_instance()

        self.assertTrue(db.table_exists(table_name))

        disk_set.cleanup()

        self.assertFalse(db.table_exists(table_name))
Exemplo n.º 3
0
class DBKnowledgeBase(BasicKnowledgeBase):
    """
    This class saves the data that is sent to it by plugins. It is the only way
    in which plugins can exchange information.

    Data is stored in a DB.

    :author: Andres Riancho ([email protected])
    """
    COLUMNS = [('location_a', 'TEXT'),
               ('location_b', 'TEXT'),
               ('uniq_id', 'TEXT'),
               ('pickle', 'BLOB')]

    def __init__(self):
        super(DBKnowledgeBase, self).__init__()
        self.initialized = False

        # TODO: Why doesn't this work with a WeakValueDictionary?
        self.observers = {} #WeakValueDictionary()
        self._observer_id = 0

    def setup(self):
        """
        Setup all the required backend stores. This was mostly created to avoid
        starting any threads during __init__() which is called during python's
        import phase and dead-locks in some cases.

        :return: None
        """
        with self._kb_lock:
            if self.initialized:
                return

            self.urls = DiskSet(table_prefix='kb_urls')
            self.fuzzable_requests = DiskSet(table_prefix='kb_fuzzable_requests')

            self.db = get_default_persistent_db_instance()

            self.table_name = 'knowledge_base_' + rand_alpha(30)
            self.db.create_table(self.table_name, self.COLUMNS)
            self.db.create_index(self.table_name, ['location_a', 'location_b'])
            self.db.create_index(self.table_name, ['uniq_id'])
            self.db.commit()

            # Only initialize once
            self.initialized = True

    @requires_setup
    def clear(self, location_a, location_b):
        location_a = self._get_real_name(location_a)

        query = "DELETE FROM %s WHERE location_a = ? and location_b = ?"
        params = (location_a, location_b)
        self.db.execute(query % self.table_name, params)

    @requires_setup
    def raw_write(self, location_a, location_b, value):
        """
        This method saves value to (location_a,location_b) but previously
        clears any pre-existing values.
        """
        if isinstance(value, Info):
            raise TypeError('Use append or append_uniq to store vulnerabilities')

        location_a = self._get_real_name(location_a)

        self.clear(location_a, location_b)
        self.append(location_a, location_b, value, ignore_type=True)

    @requires_setup
    def raw_read(self, location_a, location_b):
        """
        This method reads the value from (location_a, location_b)
        """
        location_a = self._get_real_name(location_a)
        result = self.get(location_a, location_b, check_types=False)

        if len(result) > 1:
            msg = 'Incorrect use of raw_write/raw_read, found %s results.'
            raise RuntimeError(msg % len(result))
        elif len(result) == 0:
            return []
        else:
            return result[0]

    @requires_setup
    def get_one(self, location_a, location_b):
        """
        This method reads the value from (location_a, location_b), checking it's
        type and making sure only one is stored at that address.

        Similar to raw_read, but checking types.

        :see: https://github.com/andresriancho/w3af/issues/3955
        """
        location_a = self._get_real_name(location_a)
        result = self.get(location_a, location_b, check_types=True)

        if len(result) > 1:
            msg = 'Incorrect use of get_one(), found %s results.'
            raise RuntimeError(msg % result)
        elif len(result) == 0:
            return []
        else:
            return result[0]

    def _get_uniq_id(self, obj):
        if isinstance(obj, (Info, InfoSet)):
            return obj.get_uniq_id()
        else:
            if isinstance(obj, collections.Iterable):
                concat_all = ''.join([str(hash(i)) for i in obj])
                return str(hash(concat_all))
            else:
                return str(hash(obj))

    @requires_setup
    def append(self, location_a, location_b, value, ignore_type=False):
        """
        This method appends the location_b value to a dict.
        """
        if not ignore_type and not isinstance(value, (Info, Shell, InfoSet)):
            msg = ('You MUST use raw_write/raw_read to store non-info objects'
                   ' to the KnowledgeBase.')
            raise TypeError(msg)

        location_a = self._get_real_name(location_a)
        uniq_id = self._get_uniq_id(value)

        pickled_obj = cpickle_dumps(value)
        t = (location_a, location_b, uniq_id, pickled_obj)

        query = "INSERT INTO %s VALUES (?, ?, ?, ?)" % self.table_name
        self.db.execute(query, t)
        self._notify_observers(self.APPEND, location_a, location_b, value,
                               ignore_type=ignore_type)

    @requires_setup
    def get(self, location_a, location_b, check_types=True):
        """
        :param location_a: The plugin that saved the data to the
                           kb.info Typically the name of the plugin,
                           but could also be the plugin instance.

        :param location_b: The name of the variables under which the vuln
                           objects were saved. Typically the same name of
                           the plugin, or something like "vulns", "errors",
                           etc. In most cases this is NOT None. When set
                           to None, a dict with all the vuln objects found
                           by the plugin_name is returned.

        :return: Returns the data that was saved by another plugin.
        """
        location_a = self._get_real_name(location_a)

        if location_b is None:
            query = 'SELECT pickle FROM %s WHERE location_a = ?'
            params = (location_a,)
        else:
            query = 'SELECT pickle FROM %s WHERE location_a = ?'\
                                           ' and location_b = ?'
            params = (location_a, location_b)

        result_lst = []

        results = self.db.select(query % self.table_name, params)
        for r in results:
            obj = cPickle.loads(r[0])

            if check_types and not isinstance(obj, (Info, InfoSet, Shell)):
                raise TypeError('Use raw_write and raw_read to query the'
                                ' knowledge base for non-Info objects')

            result_lst.append(obj)

        return result_lst

    @requires_setup
    def get_by_uniq_id(self, uniq_id):
        query = 'SELECT pickle FROM %s WHERE uniq_id = ?'
        params = (uniq_id,)

        result = self.db.select_one(query % self.table_name, params)

        if result is not None:
            result = cPickle.loads(result[0])

        return result

    @requires_setup
    def update(self, old_info, update_info):
        """
        :param old_info: The info/vuln instance to be updated in the kb.
        :param update_info: The info/vuln instance with new information
        :return: Nothing
        """
        old_not_info = not isinstance(old_info, (Info, InfoSet, Shell))
        update_not_info = not isinstance(update_info, (Info, InfoSet, Shell))

        if old_not_info or update_not_info:
            msg = ('You MUST use raw_write/raw_read to store non-info objects'
                   ' to the KnowledgeBase.')
            raise TypeError(msg)

        old_uniq_id = old_info.get_uniq_id()
        new_uniq_id = update_info.get_uniq_id()
        pickle = cpickle_dumps(update_info)

        # Update the pickle and unique_id after finding by original uniq_id
        query = "UPDATE %s SET pickle = ?, uniq_id = ? WHERE uniq_id = ?"

        params = (pickle, new_uniq_id, old_uniq_id)
        result = self.db.execute(query % self.table_name, params).result()

        if result.rowcount:
            self._notify_observers(self.UPDATE, old_info, update_info)
        else:
            ex = ('Failed to update() %s instance because'
                  ' the original unique_id (%s) does not exist in the DB,'
                  ' or the new unique_id (%s) is invalid.')
            raise DBException(ex % (old_info.__class__.__name__,
                                    old_uniq_id,
                                    new_uniq_id))

    def add_observer(self, observer):
        """
        Add the observer instance to the list.
        """
        observer_id = self.get_observer_id()
        self.observers[observer_id] = observer

    def get_observer_id(self):
        self._observer_id += 1
        return self._observer_id

    def _notify_observers(self, method, *args, **kwargs):
        """
        Call the observer if the location_a/location_b matches with the
        configured observers.

        :return: None
        """
        # Note that I copy the items list in order to iterate though it without
        # any issues like the size changing
        for _, observer in self.observers.items()[:]:
            functor = getattr(observer, method)
            functor(*args, **kwargs)

    @requires_setup
    def get_all_entries_of_class(self, klass):
        """
        :return: A list of all objects of class == klass that are saved in the
                 kb.
        """
        query = 'SELECT pickle FROM %s'
        results = self.db.select(query % self.table_name)

        result_lst = []

        for r in results:
            obj = cPickle.loads(r[0])
            if isinstance(obj, klass):
                result_lst.append(obj)

        return result_lst

    @requires_setup
    def get_all_vulns(self):
        """
        :return: A list of all info instances with severity in (LOW, MEDIUM,
                 HIGH)
        """
        query = 'SELECT pickle FROM %s'
        results = self.db.select(query % self.table_name)

        result_lst = []

        for r in results:
            obj = cPickle.loads(r[0])
            if hasattr(obj, 'get_severity'):
                severity = obj.get_severity()
                if severity in (LOW, MEDIUM, HIGH):
                    result_lst.append(obj)

        return result_lst

    @requires_setup
    def get_all_infos(self):
        """
        :return: A list of all info instances with severity eq INFORMATION
        """
        query = 'SELECT pickle FROM %s'
        results = self.db.select(query % self.table_name)

        result_lst = []

        for r in results:
            obj = cPickle.loads(r[0])
            if hasattr(obj, 'get_severity'):
                severity = obj.get_severity()
                if severity in (INFORMATION,):
                    result_lst.append(obj)

        return result_lst

    @requires_setup
    def dump(self):
        result_dict = {}

        query = 'SELECT location_a, location_b, pickle FROM %s'
        results = self.db.select(query % self.table_name)

        for location_a, location_b, pickle in results:
            obj = cPickle.loads(pickle)

            if location_a not in result_dict:
                result_dict[location_a] = {location_b: [obj,]}
            elif location_b not in result_dict[location_a]:
                result_dict[location_a][location_b] = [obj,]
            else:
                result_dict[location_a][location_b].append(obj)

        return result_dict

    @requires_setup
    def cleanup(self):
        """
        Cleanup internal data.
        """
        self.db.execute("DELETE FROM %s WHERE 1=1" % self.table_name)

        # Remove the old, create new.
        old_urls = self.urls
        self.urls = DiskSet(table_prefix='kb_urls')
        old_urls.cleanup()

        old_fuzzable_requests = self.fuzzable_requests
        self.fuzzable_requests = DiskSet(table_prefix='kb_fuzzable_requests')
        old_fuzzable_requests.cleanup()

        self.observers.clear()

    @requires_setup
    def remove(self):
        self.db.drop_table(self.table_name)
        self.urls.cleanup()
        self.fuzzable_requests.cleanup()
        self.observers.clear()

    @requires_setup
    def get_all_known_urls(self):
        """
        :return: A DiskSet with all the known URLs as URL objects.
        """
        return self.urls

    @requires_setup
    def add_url(self, url):
        """
        :return: True if the URL was previously unknown
        """
        if not isinstance(url, URL):
            msg = 'add_url requires a URL as parameter got %s instead.'
            raise TypeError(msg % type(url))

        self._notify_observers(self.ADD_URL, url)
        return self.urls.add(url)

    @requires_setup
    def get_all_known_fuzzable_requests(self):
        """
        :return: A DiskSet with all the known URLs as URL objects.
        """
        return self.fuzzable_requests

    @requires_setup
    def add_fuzzable_request(self, fuzzable_request):
        """
        :return: True if the FuzzableRequest was previously unknown
        """
        if not isinstance(fuzzable_request, FuzzableRequest):
            msg = ('add_fuzzable_request requires a FuzzableRequest as'
                   ' parameter, got "%s" instead.')
            raise TypeError(msg % type(fuzzable_request))

        self.add_url(fuzzable_request.get_url())
        return self.fuzzable_requests.add(fuzzable_request)
Exemplo n.º 4
0
class find_captchas(CrawlPlugin):
    """
    Identify captcha images on web pages.
    :author: Andres Riancho ([email protected])
    """

    def __init__(self):
        CrawlPlugin.__init__(self)

        self._captchas_found = DiskSet(table_prefix='find_captchas')

    def crawl(self, fuzzable_request):
        """
        Find CAPTCHA images.

        :param fuzzable_request: A fuzzable_request instance that contains
                                    (among other things) the URL to test.
        """
        result, captchas = self._identify_captchas(fuzzable_request)
        
        if not result:
            return

        for captcha in captchas:

            desc = 'Found a CAPTCHA image at: "%s".' % captcha.img_src
            response_ids = [response.id for response in captcha.http_responses]

            i = Info('Captcha image detected', desc, response_ids, self.get_name())
            i.set_uri(captcha.img_src)

            kb.kb.append(self, 'CAPTCHA', i)
            om.out.information(i.get_desc())

    def _identify_captchas(self, fuzzable_request):
        """
        :return: A tuple with the following information:
                    * True indicating that the page has CAPTCHAs
                    * A list with tuples that contain:
                        * The CAPTCHA image source
                        * The http responses used to verify that the image was
                          indeed a CAPTCHA
        """
        found_captcha = False
        captchas = []
        
        # GET the document, and fetch the images
        images_1 = self._get_images(fuzzable_request)

        # Re-GET the document, and fetch the images
        images_2 = self._get_images(fuzzable_request)

        # If the number of images in each response is different, don't even
        # bother to perform any analysis since our simplistic approach will fail
        # TODO: Add something more advanced.
        if len(images_1) == len(images_2):

            not_in_2 = []

            for img_src_1, img_hash_1, http_response_1 in images_1:
                for _, img_hash_2, http_response_2 in images_2:
                    if img_hash_1 == img_hash_2:
                        # The image is in both lists, can't be a CAPTCHA
                        break
                else:
                    not_in_2.append((img_src_1, img_hash_1, [http_response_1, http_response_2]))

            # Results
            #
            # TODO: This allows for more than one CAPTCHA in the same page. Does
            #       that make sense? When that's found, should I simply declare
            #       defeat and don't report anything?
            for img_src, _, http_responses in not_in_2:

                CaptchaInfo = namedtuple('CaptchaInfo', ['img_src',
                                                         'http_responses'])
                img_src = img_src.uri2url()
                
                if img_src not in self._captchas_found:
                    self._captchas_found.add(img_src)
                    found_captcha = True
                    
                    captchas.append(CaptchaInfo(img_src, http_responses))
                    
        return found_captcha, captchas
        
    def _get_images(self, fuzzable_request):
        """
        Get all img tags and retrieve the src.

        :param fuzzable_request: The request to modify
        :return: A list with tuples containing (img_src, image_hash, http_response)
        """
        res = []

        try:
            response = self._uri_opener.GET(fuzzable_request.get_uri(),
                                            cache=False)
        except:
            om.out.debug('Failed to retrieve the page for finding captchas.')
        else:
            # Do not use parser_cache here, it's not good since CAPTCHA implementations
            # *might* change the image name for each request of the HTML
            #dp = parser_cache.dpc.get_document_parser_for( response )
            try:
                document_parser = DocumentParser.DocumentParser(response)
            except BaseFrameworkException:
                return []
            
            image_path_list = document_parser.get_references_of_tag('img')

            GET = self._uri_opener.GET
            sha1 = hashlib.sha1
            
            result_iter = self.worker_pool.imap_unordered(GET, image_path_list)
            
            for image_response in result_iter:
                if image_response.is_image():
                    img_src = image_response.get_uri()
                    img_hash = sha1(image_response.get_body()).hexdigest()
                    res.append((img_src, img_hash, response))

        return res

    def end(self):
        self._captchas_found.cleanup()

    def get_long_desc(self):
        """
        :return: A DETAILED description of the plugin functions and features.
        """
        return """
Exemplo n.º 5
0
class error_500(GrepPlugin):
    """
    Grep every page for error 500 pages that haven't been identified as bugs by
    other plugins.

    :author: Andres Riancho ([email protected])
    """

    IGNORE_CODES = (404, 403, 401, 405, 400, 501)
    FALSE_POSITIVE_STRINGS = ('<h1>Bad Request (Invalid URL)</h1>',
                              )

    def __init__(self):
        GrepPlugin.__init__(self)

        self._error_500_responses = DiskSet(table_prefix='error_500')

    def grep(self, request, response):
        """
        Plugin entry point, identify which requests generated a 500 error.

        :param request: The HTTP request object.
        :param response: The HTTP response object
        :return: None
        """
        if response.is_text_or_html() \
        and 400 < response.get_code() < 600 \
        and response.get_code() not in self.IGNORE_CODES\
        and not self._is_false_positive(response):
            self._error_500_responses.add((request, response.id))

    def _is_false_positive(self, response):
        """
        Filters out some false positives like this one:

        This false positive is generated by IIS when I send an URL that's "odd"
        Some examples of URLs that trigger this false positive:
            - http://127.0.0.2/ext.ini.%00.txt
            - http://127.0.0.2/%00/
            - http://127.0.0.2/%0a%0a<script>alert(\Vulnerable\)</script>.jsp

        :return: True if the response is a false positive.
        """
        for fps in self.FALSE_POSITIVE_STRINGS:
            if fps in response.get_body():
                return True
        return False

    def end(self):
        """
        This method is called when the plugin wont be used anymore.

        The real job of this plugin is done here, where I will try to see if
        one of the error_500 responses were not identified as a vuln by some
        of my audit plugins
        """
        all_vuln_ids = set()

        for info in kb.kb.get_all_findings():
            for _id in info.get_id():
                all_vuln_ids.add(_id)

        for request, error_500_response_id in self._error_500_responses:

            if error_500_response_id not in all_vuln_ids:
                # Found a error 500 that wasn't identified !
                desc = 'An unidentified web application error (HTTP response'\
                       ' code 500) was found at: "%s". Enable all plugins and'\
                       ' try again, if the vulnerability still is not'\
                       ' identified, please verify manually and report it to'\
                       ' the w3af developers.'
                desc = desc % request.get_url()

                v = Vuln('Unhandled error in web application', desc,
                         severity.MEDIUM, error_500_response_id,
                         self.get_name())

                v.set_uri(request.get_uri())

                self.kb_append_uniq(self, 'error_500', v, 'VAR')

        self._error_500_responses.cleanup()

    def get_long_desc(self):
        """
        :return: A DETAILED description of the plugin functions and features.
        """
        return """
Exemplo n.º 6
0
class DBKnowledgeBase(BasicKnowledgeBase):
    """
    This class saves the data that is sent to it by plugins. It is the only way
    in which plugins can exchange information.

    Data is stored in a DB.

    :author: Andres Riancho ([email protected])
    """

    def __init__(self):
        super(DBKnowledgeBase, self).__init__()
        
        self.urls = DiskSet(table_prefix='kb_urls')
        self.fuzzable_requests = DiskSet(table_prefix='kb_fuzzable_requests')
        
        self.db = get_default_persistent_db_instance()

        columns = [('location_a', 'TEXT'),
                   ('location_b', 'TEXT'),
                   ('uniq_id', 'TEXT'),
                   ('pickle', 'BLOB')]

        self.table_name = 'knowledge_base_' + rand_alpha(30)
        self.db.create_table(self.table_name, columns)
        self.db.create_index(self.table_name, ['location_a', 'location_b'])
        self.db.create_index(self.table_name, ['uniq_id',])
        self.db.commit()
        
        # TODO: Why doesn't this work with a WeakValueDictionary?
        self.observers = {} #WeakValueDictionary()
        self.type_observers = {} #WeakValueDictionary()
        self.url_observers = []
        self._observer_id = 0

    def clear(self, location_a, location_b):
        location_a = self._get_real_name(location_a)
        
        query = "DELETE FROM %s WHERE location_a = ? and location_b = ?"
        params = (location_a, location_b)
        self.db.execute(query % self.table_name, params)

    def raw_write(self, location_a, location_b, value):
        """
        This method saves value to (location_a,location_b) but previously
        clears any pre-existing values.
        """
        if isinstance(value, Info):
            raise TypeError('Use append or append_uniq to store vulnerabilities')
        
        location_a = self._get_real_name(location_a)
        
        self.clear(location_a, location_b)
        self.append(location_a, location_b, value, ignore_type=True)

    def raw_read(self, location_a, location_b):
        """
        This method reads the value from (location_a,location_b)
        """
        location_a = self._get_real_name(location_a)
        result = self.get(location_a, location_b, check_types=False)
        
        if len(result) > 1:
            msg = 'Incorrect use of raw_write/raw_read, found %s rows.'
            raise RuntimeError(msg % result)
        elif len(result) == 0:
            return []
        else:
            return result[0]
    
    def _get_uniq_id(self, obj):
        if isinstance(obj, Info):
            return obj.get_uniq_id()
        else:
            if isinstance(obj, collections.Iterable):
                concat_all = ''.join([str(i) for i in obj])
                return str(hash(concat_all))
            else:
                return str(hash(obj))

    def append(self, location_a, location_b, value, ignore_type=False):
        """
        This method appends the location_b value to a dict.
        """
        if not ignore_type and not isinstance(value, (Info, Shell)):
            msg = 'You MUST use raw_write/raw_read to store non-info objects'\
                  ' to the KnowledgeBase.'
            raise TypeError(msg)
        
        location_a = self._get_real_name(location_a)
        uniq_id = self._get_uniq_id(value)
        
        pickled_obj = cpickle_dumps(value)
        t = (location_a, location_b, uniq_id, pickled_obj)
        
        query = "INSERT INTO %s VALUES (?, ?, ?, ?)" % self.table_name
        self.db.execute(query, t)
        self._notify(location_a, location_b, value)

    def get(self, location_a, location_b, check_types=True):
        """
        :param location_a: The plugin that saved the data to the
                           kb.info Typically the name of the plugin,
                           but could also be the plugin instance.

        :param location_b: The name of the variables under which the vuln
                           objects were saved. Typically the same name of
                           the plugin, or something like "vulns", "errors",
                           etc. In most cases this is NOT None. When set
                           to None, a dict with all the vuln objects found
                           by the plugin_name is returned.

        :return: Returns the data that was saved by another plugin.
        """
        location_a = self._get_real_name(location_a)
        
        if location_b is None:
            query = 'SELECT pickle FROM %s WHERE location_a = ?'
            params = (location_a,)
        else:
            query = 'SELECT pickle FROM %s WHERE location_a = ?'\
                                           ' and location_b = ?'
            params = (location_a, location_b)
        
        result_lst = []
        
        results = self.db.select(query % self.table_name, params)
        for r in results:
            obj = cPickle.loads(r[0])
            
            if check_types and not isinstance(obj, (Info, Shell)):
                raise TypeError('Use raw_write and raw_read to query the'
                                ' knowledge base for non-Info objects')
            
            result_lst.append(obj)
        
        return result_lst

    def get_by_uniq_id(self, uniq_id):
        query = 'SELECT pickle FROM %s WHERE uniq_id = ?'
        params = (uniq_id,)
        
        result = self.db.select_one(query % self.table_name, params)
        
        if result is not None:
            result = cPickle.loads(result[0])
        
        return result

    def add_observer(self, location_a, location_b, observer):
        """
        Add the observer function to the observer list. The function will be
        called when there is a change in (location_a, location_b).
        
        You can use None in location_a or location_b as wildcards.
        
        The observer function needs to be a function which takes three params:
            * location_a
            * location_b
            * value that's added to the kb location
        
        :return: None
        """
        if not isinstance(location_a, (basestring, types.NoneType)) or \
        not isinstance(location_a, (basestring, types.NoneType)):
            raise TypeError('Observer locations need to be strings or None.')
        
        observer_id = self.get_observer_id()
        self.observers[(location_a, location_b, observer_id)] = observer
    
    def add_types_observer(self, type_filter, observer):
        """
        Add the observer function to the list of functions to be called when a
        new object that is of type "type_filter" is added to the KB.
        
        The type_filter must be one of Info, Vuln or Shell.
        
        :return: None
        """
        if type_filter not in (Info, Vuln, Shell):
            msg = 'The type_filter needs to be one of Info, Vuln or Shell'
            raise TypeError(msg)
        
        observer_id = self.get_observer_id()
        self.type_observers[(type_filter, observer_id)] = observer
        
    def get_observer_id(self):
        self._observer_id += 1
        return self._observer_id
    
    def _notify(self, location_a, location_b, value):
        """
        Call the observer if the location_a/location_b matches with the
        configured observers.
        
        :return: None
        """
        # Note that I copy the items list in order to iterate though it without
        # any issues like the size changing
        for (obs_loc_a, obs_loc_b, _), observer in self.observers.items()[:]:
            
            if obs_loc_a is None and obs_loc_b is None:
                observer(location_a, location_b, value)
                continue

            if obs_loc_a == location_a and obs_loc_b is None:
                observer(location_a, location_b, value)
                continue
            
            if obs_loc_a == location_a and obs_loc_b == location_b:
                observer(location_a, location_b, value)
                continue
        
        for (type_filter, _), observer in self.type_observers.items()[:]:
            if isinstance(value, type_filter):
                observer(location_a, location_b, value)

    def get_all_entries_of_class(self, klass):
        """
        :return: A list of all objects of class == klass that are saved in the
                 kb.
        """
        query = 'SELECT pickle FROM %s'
        results = self.db.select(query % self.table_name)
        
        result_lst = []

        for r in results:
            obj = cPickle.loads(r[0])
            if isinstance(obj, klass):
                result_lst.append(obj)
        
        return result_lst

    def dump(self):
        result_dict = {}
        
        query = 'SELECT location_a, location_b, pickle FROM %s'
        results = self.db.select(query % self.table_name)
        
        for location_a, location_b, pickle in results:
            obj = cPickle.loads(pickle)
            
            if location_a not in result_dict:
                result_dict[location_a] = {location_b: [obj,]}
            elif location_b not in result_dict[location_a]:
                result_dict[location_a][location_b] = [obj,]
            else:
                result_dict[location_a][location_b].append(obj)
                
        return result_dict

    def cleanup(self):
        """
        Cleanup internal data.
        """
        self.db.execute("DELETE FROM %s WHERE 1=1" % self.table_name)
        
        # Remove the old, create new.
        self.urls.cleanup()
        self.urls = DiskSet(table_prefix='kb_urls')
        
        self.fuzzable_requests.cleanup()
        self.fuzzable_requests = DiskSet(table_prefix='kb_fuzzable_requests')
        
        self.observers.clear()
    
    def remove(self):
        self.db.drop_table(self.table_name)
        self.urls.cleanup()
        self.fuzzable_requests.cleanup()
        self.observers.clear()
    
    def get_all_known_urls(self):
        """
        :return: A DiskSet with all the known URLs as URL objects.
        """
        return self.urls

    def add_url_observer(self, observer):
        self.url_observers.append(observer)

    def _notify_url_observers(self, new_url):
        """
        Call the observer with new_url.
        
        :return: None
        """
        # Note that I copy the items list in order to iterate though it without
        # any issues like the size changing
        for observer in self.url_observers[:]:            
            observer(new_url)
    
    def add_url(self, url):
        """
        :return: True if the URL was previously unknown 
        """
        if not isinstance(url, URL):
            msg = 'add_url requires a URL as parameter got %s instead.'
            raise TypeError(msg % type(url))
        
        self._notify_url_observers(url)
        return self.urls.add(url)
    
    def get_all_known_fuzzable_requests(self):
        """
        :return: A DiskSet with all the known URLs as URL objects.
        """
        return self.fuzzable_requests
    
    def add_fuzzable_request(self, fuzzable_request):
        """
        :return: True if the FuzzableRequest was previously unknown 
        """
        if not isinstance(fuzzable_request, FuzzableRequest):
            msg = 'add_fuzzable_request requires a FuzzableRequest as '\
                  'parameter, got "%s" instead.'
            raise TypeError(msg % type(fuzzable_request))
        
        self.add_url(fuzzable_request.get_url())
        return self.fuzzable_requests.add(fuzzable_request)
Exemplo n.º 7
0
class click_jacking(GrepPlugin):
    """
    Grep every page for missing click jacking protection headers.

    :author: Taras ([email protected])
    :author: Andres ([email protected])
    """

    MAX_SAMPLES = 25
    DO_NOT_FRAME = {301, 302, 303, 307, 400, 403, 404, 500}

    def __init__(self):
        GrepPlugin.__init__(self)

        self._total_http_request_count = 0
        self._vuln_count = 0
        self._vuln_urls = DiskSet(table_prefix='click_jacking')
        self._vuln_ids = DiskSet(table_prefix='click_jacking')

    def grep(self, request, response):
        """
        Check x-frame-options header
        """
        # Can not iframe a POST, PUT, etc.
        if request.get_method() != 'GET':
            return

        if response.get_code() in self.DO_NOT_FRAME:
            return

        if not response.is_text_or_html():
            return

        # An attacker will never run a clickjacking attack on an empty response
        # Empty responses are common in redirects, 400 and 500 errors, etc.
        if not response.get_body():
            return

        if not self._response_will_be_rendered(response):
            return

        if is_404(response):
            return

        self._total_http_request_count += 1

        if self._is_protected_against_clickjacking(request, response):
            return

        self._add_response_to_findings(response)

    def _response_will_be_rendered(self, response):
        """
        Browsers will never render responses with application/javascript
        content-type, so it doesn't make sense for an attacker to do a
        click-jacking attack on these.

        :param response: An HTTP response
        :return: True if the response has javascript content type
        """
        if 'javascript' in response.content_type:
            return False

        if 'css' in response.content_type:
            return False

        if 'application/xml' in response.content_type:
            return False

        return True

    def _add_response_to_findings(self, response):
        self._vuln_count += 1

        if len(self._vuln_urls) >= self.MAX_SAMPLES:
            return

        self._vuln_urls.add(response.get_uri())
        self._vuln_ids.add(response.id)

    def _is_protected_against_clickjacking(self, request, response):
        """
        There are many methods to protect a site against clickjacking, this
        method checks for all of them.

        :param request: HTTP request
        :param response: HTTP response
        :return: True if the response is protected
        """
        methods = [
            self._is_protected_with_x_frame_options,
            self._is_protected_with_csp
        ]

        for method in methods:
            if method(request, response):
                return True

        return False

    def _is_protected_with_x_frame_options(self, request, response):
        """
        Check if the HTTP response has the x-frame-options header set
        to the secure value.

        :param request: HTTP request
        :param response: HTTP response
        :return: True if the response is protected
        """
        headers = response.get_headers()
        x_frame_options, header_name = headers.iget('x-frame-options', '')

        if x_frame_options.lower() in ('deny', 'sameorigin'):
            return True

        return False

    def _is_protected_with_csp(self, request, response):
        """
        Check if the HTTP response has a CSP header, parse it, extract the
        frame-ancestors attribute and check it is secure.

        :param request: HTTP request
        :param response: HTTP response
        :return: True if the response is protected
        """
        # These are the policies that will be enforced by the browser
        non_report_only_policies = retrieve_csp_policies(response, False, True)
        frame_ancestors = non_report_only_policies.get('frame-ancestors', [])

        #
        # This is the strictest policy, nobody can frame me!
        #
        # Content-Security-Policy: frame-ancestors 'none';
        #
        for policy in frame_ancestors:
            if policy.lower() == 'none':
                return True

        #
        # Fail when the frame-ancestors has insecure wildcards
        #
        #   Content-Security-Policy: frame-ancestors '*';
        #   Content-Security-Policy: frame-ancestors 'https://*';
        #
        insecure_ancestors = ('*',
                              'http', 'https',
                              'http://', 'https://',
                              'http://*', 'https://*')

        for policy in frame_ancestors:
            if policy.lower() in insecure_ancestors:
                return False

        # Content-Security-Policy: frame-ancestors 'self';
        if 'self' in frame_ancestors:
            return True

        # Content-Security-Policy: frame-ancestors 'foo.com' '*.somesite.com';
        if len(frame_ancestors):
            return True

        return False

    def end(self):
        # If all URLs implement protection, don't report anything.
        if not self._vuln_count:
            return

        response_ids = [_id for _id in self._vuln_ids]
        
        if self._total_http_request_count == self._vuln_count:
            # If none of the URLs implement protection, simply report
            # ONE vulnerability that says that
            desc = 'The application has no protection against Click-Jacking attacks.'

            if len(response_ids) >= self.MAX_SAMPLES:
                desc += (' All the received HTTP responses were found to be'
                         ' vulnerable, only the first %s samples were captured'
                         ' as proof.' % self.MAX_SAMPLES)

        else:
            # If most of the URLs implement the protection but some
            # don't, report ONE vulnerability saying: "Most are protected,
            # but x, y are not
            if len(response_ids) >= self.MAX_SAMPLES:
                desc = ('Multiple application URLs have no protection against'
                        ' Click-Jacking attacks. Only the first %s samples were'
                        ' captured as proof. The list of vulnerable URLs is:'
                        '\n\n - ' % self.MAX_SAMPLES)
            else:
                desc = ('Multiple application URLs have no protection against'
                        ' Click-Jacking attacks. The list of vulnerable URLs is:'
                        '\n\n - ')

            desc += ' - '.join([str(url) + '\n' for url in self._vuln_urls])

        v = Vuln('Click-Jacking vulnerability',
                 desc,
                 severity.MEDIUM,
                 response_ids,
                 self.get_name())
        
        self.kb_append(self, 'click_jacking', v)
        
        self._vuln_urls.cleanup()
        self._vuln_ids.cleanup()

    def get_long_desc(self):
        return """
Exemplo n.º 8
0
class DBKnowledgeBase(BasicKnowledgeBase):
    """
    This class saves the data that is sent to it by plugins. It is the only way
    in which plugins can exchange information.

    Data is stored in a DB.

    :author: Andres Riancho ([email protected])
    """
    COLUMNS = [('location_a', 'TEXT'),
               ('location_b', 'TEXT'),
               ('uniq_id', 'TEXT'),
               ('pickle', 'BLOB')]

    def __init__(self):
        super(DBKnowledgeBase, self).__init__()
        self.initialized = False

        # TODO: Why doesn't this work with a WeakValueDictionary?
        self.observers = {} #WeakValueDictionary()
        self._observer_id = 0

    def setup(self):
        """
        Setup all the required backend stores. This was mostly created to avoid
        starting any threads during __init__() which is called during python's
        import phase and dead-locks in some cases.

        :return: None
        """
        with self._kb_lock:
            if self.initialized:
                return

            self.urls = DiskSet(table_prefix='kb_urls')
            self.fuzzable_requests = DiskSet(table_prefix='kb_fuzzable_requests')

            self.db = get_default_persistent_db_instance()

            self.table_name = 'knowledge_base_' + rand_alpha(30)
            self.db.create_table(self.table_name, self.COLUMNS)
            self.db.create_index(self.table_name, ['location_a', 'location_b'])
            self.db.create_index(self.table_name, ['uniq_id'])
            self.db.commit()

            # Only initialize once
            self.initialized = True

    @requires_setup
    def clear(self, location_a, location_b):
        location_a = self._get_real_name(location_a)

        query = "DELETE FROM %s WHERE location_a = ? and location_b = ?"
        params = (location_a, location_b)
        self.db.execute(query % self.table_name, params)

    @requires_setup
    def raw_write(self, location_a, location_b, value):
        """
        This method saves value to (location_a,location_b) but previously
        clears any pre-existing values.
        """
        if isinstance(value, Info):
            raise TypeError('Use append or append_uniq to store vulnerabilities')

        location_a = self._get_real_name(location_a)

        self.clear(location_a, location_b)
        self.append(location_a, location_b, value, ignore_type=True)

    @requires_setup
    def raw_read(self, location_a, location_b):
        """
        This method reads the value from (location_a, location_b)
        """
        location_a = self._get_real_name(location_a)
        result = self.get(location_a, location_b, check_types=False)

        if len(result) > 1:
            msg = 'Incorrect use of raw_write/raw_read, found %s results.'
            raise RuntimeError(msg % len(result))
        elif len(result) == 0:
            return []
        else:
            return result[0]

    @requires_setup
    def get_one(self, location_a, location_b):
        """
        This method reads the value from (location_a, location_b), checking it's
        type and making sure only one is stored at that address.

        Similar to raw_read, but checking types.

        :see: https://github.com/andresriancho/w3af/issues/3955
        """
        location_a = self._get_real_name(location_a)
        result = self.get(location_a, location_b, check_types=True)

        if len(result) > 1:
            msg = 'Incorrect use of get_one(), found %s results.'
            raise RuntimeError(msg % result)
        elif len(result) == 0:
            return []
        else:
            return result[0]

    def _get_uniq_id(self, obj):
        if isinstance(obj, (Info, InfoSet)):
            return obj.get_uniq_id()

        if isinstance(obj, collections.Iterable):
            concat_all = ''.join([str(hash(i)) for i in obj])
            return str(hash(concat_all))

        return str(hash(obj))

    @requires_setup
    def append(self, location_a, location_b, value, ignore_type=False):
        """
        This method appends the location_b value to a dict.
        """
        if not ignore_type and not isinstance(value, (Info, Shell, InfoSet)):
            msg = ('You MUST use raw_write/raw_read to store non-info objects'
                   ' to the KnowledgeBase.')
            raise TypeError(msg)

        location_a = self._get_real_name(location_a)
        uniq_id = self._get_uniq_id(value)

        pickled_obj = cpickle_dumps(value)
        t = (location_a, location_b, uniq_id, pickled_obj)

        query = "INSERT INTO %s VALUES (?, ?, ?, ?)" % self.table_name
        self.db.execute(query, t)
        self._notify_observers(self.APPEND,
                               location_a,
                               location_b,
                               value,
                               ignore_type=ignore_type)

    @requires_setup
    def get(self, location_a, location_b, check_types=True):
        """
        :param location_a: The plugin that saved the data to the
                           kb.info Typically the name of the plugin,
                           but could also be the plugin instance.

        :param location_b: The name of the variables under which the vuln
                           objects were saved. Typically the same name of
                           the plugin, or something like "vulns", "errors",
                           etc. In most cases this is NOT None. When set
                           to None, a dict with all the vuln objects found
                           by the plugin_name is returned.

        :return: Returns the data that was saved by another plugin.
        """
        result_lst = []

        for obj in self.get_iter(location_a, location_b, check_types=check_types):
            result_lst.append(obj)

        return result_lst

    @requires_setup
    def get_iter(self, location_a, location_b, check_types=True):
        """
        Same as get() but yields items one by one instead of returning
        a list with all the items.
        """
        location_a = self._get_real_name(location_a)

        if location_b is None:
            query = 'SELECT pickle FROM %s WHERE location_a = ?'
            params = (location_a,)
        else:
            query = 'SELECT pickle FROM %s WHERE location_a = ?' \
                                           ' and location_b = ?'
            params = (location_a, location_b)

        for r in self.db.select(query % self.table_name, params):
            obj = cPickle.loads(r[0])

            if check_types and not isinstance(obj, (Info, InfoSet, Shell)):
                raise TypeError('Use raw_write and raw_read to query the'
                                ' knowledge base for non-Info objects')

            yield obj

    @requires_setup
    def get_by_uniq_id(self, uniq_id):
        query = 'SELECT pickle FROM %s WHERE uniq_id = ?'
        params = (uniq_id,)

        result = self.db.select_one(query % self.table_name, params)

        if result is not None:
            result = cPickle.loads(result[0])

        return result

    @requires_setup
    def get_all_uniq_ids_iter(self, include_ids=()):
        """
        :param include_ids: If specified, only include these IDs.
        :yield: All uniq IDs from the KB
        """
        if include_ids:
            bindings = ['?'] * len(include_ids)
            bindings = ','.join(bindings)
            query = 'SELECT uniq_id FROM %s WHERE uniq_id IN (%s)'
            query %= (self.table_name, bindings)

            result = self.db.select(query, parameters=include_ids)

        else:
            query = 'SELECT uniq_id FROM %s'
            result = self.db.select(query % self.table_name)

        for uniq_id, in result:
            yield uniq_id

    @requires_setup
    def update(self, old_info, update_info):
        """
        :param old_info: The info/vuln instance to be updated in the kb.
        :param update_info: The info/vuln instance with new information
        :return: Nothing
        """
        old_not_info = not isinstance(old_info, (Info, InfoSet, Shell))
        update_not_info = not isinstance(update_info, (Info, InfoSet, Shell))

        if old_not_info or update_not_info:
            msg = ('You MUST use raw_write/raw_read to store non-info objects'
                   ' to the KnowledgeBase.')
            raise TypeError(msg)

        old_uniq_id = old_info.get_uniq_id()
        new_uniq_id = update_info.get_uniq_id()
        pickle = cpickle_dumps(update_info)

        # Update the pickle and unique_id after finding by original uniq_id
        query = "UPDATE %s SET pickle = ?, uniq_id = ? WHERE uniq_id = ?"

        params = (pickle, new_uniq_id, old_uniq_id)
        result = self.db.execute(query % self.table_name, params).result()

        if result.rowcount:
            self._notify_observers(self.UPDATE, old_info, update_info)
        else:
            ex = ('Failed to update() %s instance because'
                  ' the original unique_id (%s) does not exist in the DB,'
                  ' or the new unique_id (%s) is invalid.')
            raise DBException(ex % (old_info.__class__.__name__,
                                    old_uniq_id,
                                    new_uniq_id))

    def add_observer(self, observer):
        """
        Add the observer instance to the list.
        """
        observer_id = self.get_observer_id()
        self.observers[observer_id] = observer

    def get_observer_id(self):
        self._observer_id += 1
        return self._observer_id

    def _notify_observers(self, method, *args, **kwargs):
        """
        Call the observer if the location_a/location_b matches with the
        configured observers.

        :return: None
        """
        # Note that I copy the items list in order to iterate though it without
        # any issues like the size changing
        for _, observer in self.observers.items()[:]:
            functor = getattr(observer, method)
            functor(*args, **kwargs)

    @requires_setup
    def get_all_entries_of_class(self, klass, exclude_ids=()):
        """
        :return: A list of all objects where class in klass that are saved in the
                 kb.
        """
        result_lst = []

        for entry in self.get_all_entries_of_class_iter(klass, exclude_ids=exclude_ids):
            result_lst.append(entry)

        return result_lst

    @requires_setup
    def get_all_entries_of_class_iter(self, klass, exclude_ids=()):
        """
        :yield: All objects where class in klass that are saved in the kb.
        """
        bindings = ['?'] * len(exclude_ids)
        bindings = ','.join(bindings)
        query = 'SELECT uniq_id, pickle FROM %s WHERE uniq_id NOT IN (%s)'
        query %= (self.table_name, bindings)

        results = self.db.select(query, parameters=exclude_ids)

        for uniq_id, serialized_obj, in results:
            obj = cPickle.loads(serialized_obj)
            if isinstance(obj, klass):
                yield obj

    @requires_setup
    def get_all_vulns(self):
        """
        :return: A list of all info instances with severity in (LOW, MEDIUM,
                 HIGH)
        """
        query = 'SELECT pickle FROM %s'
        results = self.db.select(query % self.table_name)

        result_lst = []

        for r in results:
            obj = cPickle.loads(r[0])
            if hasattr(obj, 'get_severity'):
                severity = obj.get_severity()
                if severity in (LOW, MEDIUM, HIGH):
                    result_lst.append(obj)

        return result_lst

    @requires_setup
    def get_all_infos(self):
        """
        :return: A list of all info instances with severity eq INFORMATION
        """
        query = 'SELECT pickle FROM %s'
        results = self.db.select(query % self.table_name)

        result_lst = []

        for r in results:
            obj = cPickle.loads(r[0])
            if hasattr(obj, 'get_severity'):
                severity = obj.get_severity()
                if severity in (INFORMATION,):
                    result_lst.append(obj)

        return result_lst

    @requires_setup
    def dump(self):
        result_dict = {}

        query = 'SELECT location_a, location_b, pickle FROM %s'
        results = self.db.select(query % self.table_name)

        for location_a, location_b, pickle in results:
            obj = cPickle.loads(pickle)

            if location_a not in result_dict:
                result_dict[location_a] = {location_b: [obj,]}
            elif location_b not in result_dict[location_a]:
                result_dict[location_a][location_b] = [obj,]
            else:
                result_dict[location_a][location_b].append(obj)

        return result_dict

    @requires_setup
    def cleanup(self):
        """
        Cleanup internal data.
        """
        self.db.execute("DELETE FROM %s WHERE 1=1" % self.table_name)

        # Remove the old, create new.
        old_urls = self.urls
        self.urls = DiskSet(table_prefix='kb_urls')
        old_urls.cleanup()

        old_fuzzable_requests = self.fuzzable_requests
        self.fuzzable_requests = DiskSet(table_prefix='kb_fuzzable_requests')
        old_fuzzable_requests.cleanup()

        self.observers.clear()

    @requires_setup
    def remove(self):
        self.db.drop_table(self.table_name)
        self.urls.cleanup()
        self.fuzzable_requests.cleanup()
        self.observers.clear()

    @requires_setup
    def get_all_known_urls(self):
        """
        :return: A DiskSet with all the known URLs as URL objects.
        """
        return self.urls

    @requires_setup
    def add_url(self, url):
        """
        :return: True if the URL was previously unknown
        """
        if not isinstance(url, URL):
            msg = 'add_url requires a URL as parameter got %s instead.'
            raise TypeError(msg % type(url))

        self._notify_observers(self.ADD_URL, url)
        return self.urls.add(url)

    @requires_setup
    def get_all_known_fuzzable_requests(self):
        """
        :return: A DiskSet with all the known URLs as URL objects.
        """
        return self.fuzzable_requests

    @requires_setup
    def add_fuzzable_request(self, fuzzable_request):
        """
        :return: True if the FuzzableRequest was previously unknown
        """
        if not isinstance(fuzzable_request, FuzzableRequest):
            msg = ('add_fuzzable_request requires a FuzzableRequest as'
                   ' parameter, got "%s" instead.')
            raise TypeError(msg % type(fuzzable_request))

        self.add_url(fuzzable_request.get_url())
        return self.fuzzable_requests.add(fuzzable_request)
Exemplo n.º 9
0
class web_spider(CrawlPlugin):
    """
    Crawl the web application.

    :author: Andres Riancho ([email protected])
    """
    UNAUTH_FORBID = {http_constants.UNAUTHORIZED, http_constants.FORBIDDEN}

    def __init__(self):
        CrawlPlugin.__init__(self)

        # Internal variables
        self._compiled_ignore_re = None
        self._compiled_follow_re = None
        self._broken_links = DiskSet(table_prefix='web_spider')
        self._first_run = True
        self._target_urls = []
        self._target_domain = None
        self._already_filled_form = ScalableBloomFilter()
        self._variant_db = VariantDB()

        # User configured variables
        self._ignore_regex = ''
        self._follow_regex = '.*'
        self._only_forward = False
        self._ignore_extensions = []
        self._compile_re()

    def crawl(self, fuzzable_request, debugging_id):
        """
        Searches for links on the html.

        :param debugging_id: A unique identifier for this call to discover()
        :param fuzzable_request: A fuzzable_req instance that contains
                                 (among other things) the URL to test.
        """
        self._handle_first_run()

        #
        # If it is a form, then smart_fill the parameters to send something that
        # makes sense and will allow us to cover more code.
        #
        data_container = fuzzable_request.get_raw_data()
        if isinstance(data_container, Form):

            if fuzzable_request.get_url() in self._already_filled_form:
                return

            self._already_filled_form.add(fuzzable_request.get_url())
            data_container.smart_fill()

        # Send the HTTP request
        resp = self._uri_opener.send_mutant(fuzzable_request)

        # Nothing to do here...
        if resp.get_code() == http_constants.UNAUTHORIZED:
            return

        # Nothing to do here...
        if resp.is_image():
            return

        # And we don't trust what comes from the core, check if 404
        if is_404(resp):
            return

        self._extract_html_forms(resp, fuzzable_request)
        self._extract_links_and_verify(resp, fuzzable_request)

    def _extract_html_forms(self, resp, fuzzable_req):
        """
        Parses the HTTP response body and extract HTML forms, resulting forms
        are put() on the output queue.
        """
        # Try to find forms in the document
        try:
            dp = parser_cache.dpc.get_document_parser_for(resp)
        except BaseFrameworkException:
            # Failed to find a suitable parser for the document
            return

        # Create one FuzzableRequest for each form variant
        mode = cf.cf.get('form_fuzzing_mode')
        for form_params in dp.get_forms():

            # Form exclusion #15161
            form_id_json = form_params.get_form_id().to_json()
            om.out.debug('A new form was found! Form-id is: "%s"' % form_id_json)

            if not self._should_analyze_url(form_params.get_action()):
                continue

            headers = fuzzable_req.get_headers()

            for form_params_variant in form_params.get_variants(mode):
                data_container = dc_from_form_params(form_params_variant)

                # Now data_container is one of Multipart of URLEncoded form
                # instances, which is a DataContainer. Much better than the
                # FormParameters instance we had before in form_params_variant
                r = FuzzableRequest.from_form(data_container, headers=headers)
                self.output_queue.put(r)

    def _handle_first_run(self):
        if not self._first_run:
            return

        # I have to set some variables, in order to be able to code
        # the "only_forward" feature
        self._first_run = False
        self._target_urls = [i.uri2url() for i in cf.cf.get('targets')]

        # The following line triggered lots of bugs when the "stop" button
        # was pressed and the core did this: "cf.cf.save('targets', [])"
        #
        #self._target_domain = cf.cf.get('targets')[0].get_domain()
        #
        #    Changing it to something awful but bug-free.
        targets = cf.cf.get('targets')
        if not targets:
            return

        self._target_domain = targets[0].get_domain()
                
    def _urls_to_verify_generator(self, resp, fuzzable_req):
        """
        Yields tuples containing:
            * Newly found URL
            * The FuzzableRequest instance passed as parameter
            * The HTTPResponse generated by the FuzzableRequest
            * Boolean indicating if we trust this reference or not

        :param resp: HTTP response object
        :param fuzzable_req: The HTTP request that generated the response
        """
        gen = itertools.chain(self._url_path_url_generator(resp, fuzzable_req),
                              self._body_url_generator(resp, fuzzable_req),
                              headers_url_generator(resp, fuzzable_req))
        
        for ref, fuzzable_req, original_resp, possibly_broken in gen:
            if self._should_verify_extracted_url(ref, original_resp):
                yield ref, fuzzable_req, original_resp, possibly_broken

    def _url_path_url_generator(self, resp, fuzzable_req):
        """
        Yields tuples containing:
            * Newly found URL
            * The FuzzableRequest instance passed as parameter
            * The HTTPResponse generated by the FuzzableRequest
            * Boolean indicating if we trust this reference or not

        :param resp: HTTP response object
        :param fuzzable_req: The HTTP request that generated the response
        """
        # Analyze all directories, if the URL w3af just found is:
        #
        #   http://localhost/a/b/c/f00.php
        #
        # I want to GET:
        #
        #   http://localhost/a/b/c/
        #   http://localhost/a/b/
        #   http://localhost/a/
        #   http://localhost/
        #
        # And analyze the responses...
        dirs = resp.get_url().get_directories()

        for ref in unique_justseen(dirs):
            yield ref, fuzzable_req, resp, False

    def _body_url_generator(self, resp, fuzzable_req):
        """
        Yields tuples containing:
            * Newly found URL
            * The FuzzableRequest instance passed as parameter
            * The HTTPResponse generated by the FuzzableRequest
            * Boolean indicating if we trust this reference or not

        The newly found URLs are extracted from the http response body using
        one of the framework's parsers.

        :param resp: HTTP response object
        :param fuzzable_req: The HTTP request that generated the response
        """
        #
        # Note: I WANT to follow links that are in the 404 page.
        #
        try:
            doc_parser = parser_cache.dpc.get_document_parser_for(resp)
        except BaseFrameworkException as w3:
            om.out.debug('Failed to find a suitable document parser. '
                         'Exception "%s"' % w3)
        else:
            # Note:
            #
            # - With parsed_refs I'm 100% that it's really
            #   something in the HTML that the developer intended to add.
            #
            # - The re_refs are the result of regular expressions,
            #   which in some cases are just false positives.
            parsed_refs, re_refs = doc_parser.get_references()

            dirs = resp.get_url().get_directories()
            only_re_refs = set(re_refs) - set(dirs + parsed_refs)

            all_refs = itertools.chain(parsed_refs, re_refs)
            resp_is_404 = is_404(resp)

            for ref in unique_justseen(sorted(all_refs)):
                possibly_broken = resp_is_404 or (ref in only_re_refs)
                yield ref, fuzzable_req, resp, possibly_broken

    def _should_analyze_url(self, ref):
        """
        :param ref: A URL instance to match against the user configured filters
        :return: True if we should navigate to this URL
        """
        # I don't want w3af sending requests to 3rd parties!
        if ref.get_domain() != self._target_domain:
            msg = 'web_spider will ignore %s (different domain name)'
            args = (ref.get_domain(),)
            om.out.debug(msg % args)
            return False

        # Filter the URL according to the configured regular expressions
        if not self._compiled_follow_re.match(ref.url_string):
            msg = 'web_spider will ignore %s (not match follow regex)'
            args = (ref.url_string,)
            om.out.debug(msg % args)
            return False

        if self._compiled_ignore_re.match(ref.url_string):
            msg = 'web_spider will ignore %s (match ignore regex)'
            args = (ref.url_string,)
            om.out.debug(msg % args)
            return False

        if self._has_ignored_extension(ref):
            msg = 'web_spider will ignore %s (match ignore extensions)'
            args = (ref.url_string,)
            om.out.debug(msg % args)
            return False

        # Implementing only forward
        if self._only_forward and not self._is_forward(ref):
            msg = 'web_spider will ignore %s (is not forward)'
            args = (ref.url_string,)
            om.out.debug(msg % args)
            return False

        return True

    def _has_ignored_extension(self, new_url):
        if not self._ignore_extensions:
            return False

        return new_url.get_extension().lower() in self._ignore_extensions

    def _should_verify_extracted_url(self, ref, resp):
        """
        :param ref: A newly found URL
        :param resp: The HTTP response where the URL was found

        :return: Boolean indicating if I should send this new reference to the
                 core.
        """
        # Ignore myself
        if ref == resp.get_uri():
            return False

        if not self._should_analyze_url(ref):
            return False

        #
        # I tried to have only one VariantDB in the framework instead of two,
        # but after some tests and architecture considerations it was better
        # to duplicate the data.
        #
        # In the future I'll run plugins in different processes than the core,
        # so it makes sense to have independent plugins.
        #
        # If I remove the web_spider VariantDB and just leave the one in the
        # core the framework keeps working but this method
        # (_should_verify_extracted_url) will return True much more often, which
        # leads to extra HTTP requests for URLs which we already checked and the
        # core will dismiss anyway
        #
        fuzzable_request = FuzzableRequest(ref)
        if self._variant_db.append(fuzzable_request):
            return True

        return False

    def _extract_links_and_verify(self, resp, fuzzable_req):
        """
        This is a very basic method that will send the work to different
        threads. Work is generated by the _urls_to_verify_generator

        :param resp: HTTP response object
        :param fuzzable_req: The HTTP request that generated the response
        """
        self.worker_pool.map_multi_args(
            self._verify_reference,
            self._urls_to_verify_generator(resp, fuzzable_req))

    def _verify_reference(self, reference, original_request,
                          original_response, possibly_broken,
                          be_recursive=True):
        """
        The parameters are:
            * Newly found URL
            * The FuzzableRequest instance which generated the response where
              the new URL was found
            * The HTTPResponse generated by the FuzzableRequest
            * Boolean indicating if we trust this reference or not

        This method GET's every new link and parses it in order to get
        new links and forms.
        """
        #
        # Remember that this "breaks" the cache=True in most cases!
        #     headers = { 'Referer': original_url }
        #
        # But this does not, and it is friendlier than simply ignoring the
        # referer
        #
        referer = original_response.get_url().base_url().url_string
        headers = Headers([('Referer', referer)])

        # Note: We're not grep'ing this HTTP request/response now because it
        #       has high probability of being a 404, and the grep plugins
        #       already got enough 404 responses to analyze (from is_404 for
        #       example). If it's not a 404 then we'll push it to the core
        #       and it will come back to this plugin's crawl() where it will
        #       be requested with grep=True
        resp = self._uri_opener.GET(reference, cache=True, headers=headers,
                                    grep=False)

        if not is_404(resp):
            msg = '[web_spider] Found new link "%s" at "%s"'
            args = (reference, original_response.get_url())
            om.out.debug(msg % args)

            fuzz_req = FuzzableRequest(reference, headers=headers)

            # These next steps are simple, but actually allows me to set the
            # referer and cookie for the FuzzableRequest instances I'm sending
            # to the core, which will then allow the fuzzer to create
            # CookieMutant and HeadersMutant instances.
            #
            # Without setting the Cookie, the CookieMutant would never have any
            # data to modify; remember that cookies are actually set by the
            # urllib2 cookie handler when the request already exited the
            # framework.
            cookie = Cookie.from_http_response(original_response)

            fuzz_req.set_referer(referer)
            fuzz_req.set_cookie(cookie)

            self.output_queue.put(fuzz_req)
            return

        # Note: I WANT to follow links that are in the 404 page, but
        # DO NOT return the 404 itself to the core.
        #
        # This will parse the 404 response and add the 404-links in the
        # output queue, so that the core can get them
        #
        if be_recursive:
            #
            # Only follow one level of links in 404 pages, this limits the
            # potential issue when this is found:
            #
            #   http://foo.com/abc/ => 404
            #   Body: <a href="def/">link</a>
            #
            # Which would lead to this function to perform requests to:
            #   * http://foo.com/abc/
            #   * http://foo.com/abc/def/
            #   * http://foo.com/abc/def/def/
            #   * http://foo.com/abc/def/def/def/
            #   * ...
            #

            # Do not use threads here, it will dead-lock (for unknown
            # reasons). This is tested in TestDeadLock unittest.
            for args in self._urls_to_verify_generator(resp, original_request):
                self._verify_reference(*args, be_recursive=False)

        # Store the broken links
        if not possibly_broken and resp.get_code() not in self.UNAUTH_FORBID:
            t = (resp.get_url(), original_request.get_uri())
            self._broken_links.add(t)

    def end(self):
        """
        Called when the process ends, prints out the list of broken links.
        """
        if len(self._broken_links):

            om.out.information('The following is a list of broken links that'
                               ' were found by the web_spider plugin:')
            for broken, where in unique_justseen(self._broken_links.ordered_iter()):
                om.out.information('- %s [ referenced from: %s ]' %
                                   (broken, where))
        
        self._broken_links.cleanup()

    def _is_forward(self, reference):
        """
        Check if the reference is inside the target directories.

        :return: True if reference is an URL inside the directory structure of
                 at least one of the target URLs.
        """
        for domain_path in self._target_urls:
            if reference.url_string.startswith(domain_path.url_string):
                return True

        return False

    def get_options(self):
        """
        :return: A list of option objects for this plugin.
        """
        ol = OptionList()

        d = 'Only crawl links to paths inside the URL given as target.'
        o = opt_factory('only_forward', self._only_forward, d, BOOL)
        ol.add(o)

        d = ('Only crawl links that match this regular expression.'
             ' Note that ignore_regex has precedence over follow_regex.')
        o = opt_factory('follow_regex', self._follow_regex, d, REGEX)
        ol.add(o)

        d = ('DO NOT crawl links that match this regular expression.'
             ' Note that ignore_regex has precedence over follow_regex.')
        o = opt_factory('ignore_regex', self._ignore_regex, d, REGEX)
        ol.add(o)

        d = 'DO NOT crawl links that use these extensions.'
        h = ('This configuration parameter is commonly used to ignore'
             ' static files such as zip, pdf, jpeg, etc. It is possible to'
             ' ignore these files using `ignore_regex`, but configuring'
             ' this parameter is easier and performs case insensitive'
             ' matching.')
        o = opt_factory('ignore_extensions', self._ignore_extensions, d, LIST, help=h)
        ol.add(o)

        return ol

    def set_options(self, options_list):
        """
        This method sets all the options that are configured using the user
        interface generated by the framework using the result of get_options().

        :param options_list: A dictionary with the options for the plugin.
        :return: No value is returned.
        """
        self._only_forward = options_list['only_forward'].get_value()

        self._ignore_regex = options_list['ignore_regex'].get_value()
        self._follow_regex = options_list['follow_regex'].get_value()
        self._compile_re()

        self._ignore_extensions = options_list['ignore_extensions'].get_value()
        self._ignore_extensions = [ext.lower() for ext in self._ignore_extensions]

    def _compile_re(self):
        """
        Compile the regular expressions that are going to be used to ignore
        or follow links.
        """
        if self._ignore_regex:
            # Compilation of this regex can't fail because it was already
            # verified as valid at regex_option.py: see REGEX in get_options()
            self._compiled_ignore_re = re.compile(self._ignore_regex)
        else:
            # If the self._ignore_regex is empty then I don't have to ignore
            # anything. To be able to do that, I simply compile an re with "abc"
            # as the pattern, which won't match any URL since they will all
            # start with http:// or https://
            self._compiled_ignore_re = re.compile('abc')

        # Compilation of this regex can't fail because it was already
        # verified as valid at regex_option.py: see REGEX in get_options()
        self._compiled_follow_re = re.compile(self._follow_regex)

    def get_long_desc(self):
        """
        :return: A DETAILED description of the plugin functions and features.
        """
        return """
Exemplo n.º 10
0
class DBKnowledgeBase(BasicKnowledgeBase):
    """
    This class saves the data that is sent to it by plugins. It is the only way
    in which plugins can exchange information.

    Data is stored in a DB.

    :author: Andres Riancho ([email protected])
    """
    def __init__(self):
        super(DBKnowledgeBase, self).__init__()

        self.urls = DiskSet(table_prefix='kb_urls')
        self.fuzzable_requests = DiskSet(table_prefix='kb_fuzzable_requests')

        self.db = get_default_persistent_db_instance()

        columns = [('location_a', 'TEXT'), ('location_b', 'TEXT'),
                   ('uniq_id', 'TEXT'), ('pickle', 'BLOB')]

        self.table_name = 'knowledge_base_' + rand_alpha(30)
        self.db.create_table(self.table_name, columns)
        self.db.create_index(self.table_name, ['location_a', 'location_b'])
        self.db.create_index(self.table_name, [
            'uniq_id',
        ])
        self.db.commit()

        # TODO: Why doesn't this work with a WeakValueDictionary?
        self.observers = {}  #WeakValueDictionary()
        self.type_observers = {}  #WeakValueDictionary()
        self.url_observers = []
        self._observer_id = 0

    def clear(self, location_a, location_b):
        location_a = self._get_real_name(location_a)

        query = "DELETE FROM %s WHERE location_a = ? and location_b = ?"
        params = (location_a, location_b)
        self.db.execute(query % self.table_name, params)

    def raw_write(self, location_a, location_b, value):
        """
        This method saves value to (location_a,location_b) but previously
        clears any pre-existing values.
        """
        if isinstance(value, Info):
            raise TypeError(
                'Use append or append_uniq to store vulnerabilities')

        location_a = self._get_real_name(location_a)

        self.clear(location_a, location_b)
        self.append(location_a, location_b, value, ignore_type=True)

    def raw_read(self, location_a, location_b):
        """
        This method reads the value from (location_a,location_b)
        """
        location_a = self._get_real_name(location_a)
        result = self.get(location_a, location_b, check_types=False)

        if len(result) > 1:
            msg = 'Incorrect use of raw_write/raw_read, found %s rows.'
            raise RuntimeError(msg % result)
        elif len(result) == 0:
            return []
        else:
            return result[0]

    def _get_uniq_id(self, obj):
        if isinstance(obj, Info):
            return obj.get_uniq_id()
        else:
            if isinstance(obj, collections.Iterable):
                concat_all = ''.join([str(i) for i in obj])
                return str(hash(concat_all))
            else:
                return str(hash(obj))

    def append(self, location_a, location_b, value, ignore_type=False):
        """
        This method appends the location_b value to a dict.
        """
        if not ignore_type and not isinstance(value, (Info, Shell)):
            msg = 'You MUST use raw_write/raw_read to store non-info objects'\
                  ' to the KnowledgeBase.'
            raise TypeError(msg)

        location_a = self._get_real_name(location_a)
        uniq_id = self._get_uniq_id(value)

        pickled_obj = cpickle_dumps(value)
        t = (location_a, location_b, uniq_id, pickled_obj)

        query = "INSERT INTO %s VALUES (?, ?, ?, ?)" % self.table_name
        self.db.execute(query, t)
        self._notify(location_a, location_b, value)

    def get(self, location_a, location_b, check_types=True):
        """
        :param location_a: The plugin that saved the data to the
                           kb.info Typically the name of the plugin,
                           but could also be the plugin instance.

        :param location_b: The name of the variables under which the vuln
                           objects were saved. Typically the same name of
                           the plugin, or something like "vulns", "errors",
                           etc. In most cases this is NOT None. When set
                           to None, a dict with all the vuln objects found
                           by the plugin_name is returned.

        :return: Returns the data that was saved by another plugin.
        """
        location_a = self._get_real_name(location_a)

        if location_b is None:
            query = 'SELECT pickle FROM %s WHERE location_a = ?'
            params = (location_a, )
        else:
            query = 'SELECT pickle FROM %s WHERE location_a = ?'\
                                           ' and location_b = ?'
            params = (location_a, location_b)

        result_lst = []

        results = self.db.select(query % self.table_name, params)
        for r in results:
            obj = cPickle.loads(r[0])

            if check_types and not isinstance(obj, (Info, Shell)):
                raise TypeError('Use raw_write and raw_read to query the'
                                ' knowledge base for non-Info objects')

            result_lst.append(obj)

        return result_lst

    def get_by_uniq_id(self, uniq_id):
        query = 'SELECT pickle FROM %s WHERE uniq_id = ?'
        params = (uniq_id, )

        result = self.db.select_one(query % self.table_name, params)

        if result is not None:
            result = cPickle.loads(result[0])

        return result

    def add_observer(self, location_a, location_b, observer):
        """
        Add the observer function to the observer list. The function will be
        called when there is a change in (location_a, location_b).
        
        You can use None in location_a or location_b as wildcards.
        
        The observer function needs to be a function which takes three params:
            * location_a
            * location_b
            * value that's added to the kb location
        
        :return: None
        """
        if not isinstance(location_a, (basestring, types.NoneType)) or \
        not isinstance(location_a, (basestring, types.NoneType)):
            raise TypeError('Observer locations need to be strings or None.')

        observer_id = self.get_observer_id()
        self.observers[(location_a, location_b, observer_id)] = observer

    def add_types_observer(self, type_filter, observer):
        """
        Add the observer function to the list of functions to be called when a
        new object that is of type "type_filter" is added to the KB.
        
        The type_filter must be one of Info, Vuln or Shell.
        
        :return: None
        """
        if type_filter not in (Info, Vuln, Shell):
            msg = 'The type_filter needs to be one of Info, Vuln or Shell'
            raise TypeError(msg)

        observer_id = self.get_observer_id()
        self.type_observers[(type_filter, observer_id)] = observer

    def get_observer_id(self):
        self._observer_id += 1
        return self._observer_id

    def _notify(self, location_a, location_b, value):
        """
        Call the observer if the location_a/location_b matches with the
        configured observers.
        
        :return: None
        """
        # Note that I copy the items list in order to iterate though it without
        # any issues like the size changing
        for (obs_loc_a, obs_loc_b, _), observer in self.observers.items()[:]:

            if obs_loc_a is None and obs_loc_b is None:
                observer(location_a, location_b, value)
                continue

            if obs_loc_a == location_a and obs_loc_b is None:
                observer(location_a, location_b, value)
                continue

            if obs_loc_a == location_a and obs_loc_b == location_b:
                observer(location_a, location_b, value)
                continue

        for (type_filter, _), observer in self.type_observers.items()[:]:
            if isinstance(value, type_filter):
                observer(location_a, location_b, value)

    def get_all_entries_of_class(self, klass):
        """
        :return: A list of all objects of class == klass that are saved in the
                 kb.
        """
        query = 'SELECT pickle FROM %s'
        results = self.db.select(query % self.table_name)

        result_lst = []

        for r in results:
            obj = cPickle.loads(r[0])
            if isinstance(obj, klass):
                result_lst.append(obj)

        return result_lst

    def dump(self):
        result_dict = {}

        query = 'SELECT location_a, location_b, pickle FROM %s'
        results = self.db.select(query % self.table_name)

        for location_a, location_b, pickle in results:
            obj = cPickle.loads(pickle)

            if location_a not in result_dict:
                result_dict[location_a] = {
                    location_b: [
                        obj,
                    ]
                }
            elif location_b not in result_dict[location_a]:
                result_dict[location_a][location_b] = [
                    obj,
                ]
            else:
                result_dict[location_a][location_b].append(obj)

        return result_dict

    def cleanup(self):
        """
        Cleanup internal data.
        """
        self.db.execute("DELETE FROM %s WHERE 1=1" % self.table_name)

        # Remove the old, create new.
        self.urls.cleanup()
        self.urls = DiskSet(table_prefix='kb_urls')

        self.fuzzable_requests.cleanup()
        self.fuzzable_requests = DiskSet(table_prefix='kb_fuzzable_requests')

        self.observers.clear()

    def remove(self):
        self.db.drop_table(self.table_name)
        self.urls.cleanup()
        self.fuzzable_requests.cleanup()
        self.observers.clear()

    def get_all_known_urls(self):
        """
        :return: A DiskSet with all the known URLs as URL objects.
        """
        return self.urls

    def add_url_observer(self, observer):
        self.url_observers.append(observer)

    def _notify_url_observers(self, new_url):
        """
        Call the observer with new_url.
        
        :return: None
        """
        # Note that I copy the items list in order to iterate though it without
        # any issues like the size changing
        for observer in self.url_observers[:]:
            observer(new_url)

    def add_url(self, url):
        """
        :return: True if the URL was previously unknown 
        """
        if not isinstance(url, URL):
            msg = 'add_url requires a URL as parameter got %s instead.'
            raise TypeError(msg % type(url))

        self._notify_url_observers(url)
        return self.urls.add(url)

    def get_all_known_fuzzable_requests(self):
        """
        :return: A DiskSet with all the known URLs as URL objects.
        """
        return self.fuzzable_requests

    def add_fuzzable_request(self, fuzzable_request):
        """
        :return: True if the FuzzableRequest was previously unknown 
        """
        if not isinstance(fuzzable_request, FuzzableRequest):
            msg = 'add_fuzzable_request requires a FuzzableRequest as '\
                  'parameter, got "%s" instead.'
            raise TypeError(msg % type(fuzzable_request))

        self.add_url(fuzzable_request.get_url())
        return self.fuzzable_requests.add(fuzzable_request)
Exemplo n.º 11
0
class find_captchas(CrawlPlugin):
    """
    Identify captcha images on web pages.
    :author: Andres Riancho ([email protected])
    """
    def __init__(self):
        CrawlPlugin.__init__(self)

        self._captchas_found = DiskSet(table_prefix='find_captchas')

    def crawl(self, fuzzable_request):
        """
        Find CAPTCHA images.

        :param fuzzable_request: A fuzzable_request instance that contains
                                    (among other things) the URL to test.
        """
        result, captchas = self._identify_captchas(fuzzable_request)

        if not result:
            return

        for captcha in captchas:
            desc = 'Found a CAPTCHA image at: "%s".' % captcha.img_src
            response_ids = [response.id for response in captcha.http_responses]

            i = Info('Captcha image detected', desc, response_ids,
                     self.get_name())
            i.set_uri(captcha.img_src)

            kb.kb.append(self, 'CAPTCHA', i)
            om.out.information(i.get_desc())

    def _identify_captchas(self, fuzzable_request):
        """
        :return: A tuple with the following information:
                    * True indicating that the page has CAPTCHAs
                    * A list with tuples that contain:
                        * The CAPTCHA image source
                        * The http responses used to verify that the image was
                          indeed a CAPTCHA
        """
        found_captcha = False
        captchas = []

        # GET the document, and fetch the images
        images_1 = self._get_images(fuzzable_request)

        # Re-GET the document, and fetch the images
        images_2 = self._get_images(fuzzable_request)

        # If the number of images in each response is different, don't even
        # bother to perform any analysis since our simplistic approach will fail
        #
        # TODO: Add something more advanced.
        if len(images_1) != len(images_2):
            return

        not_in_2 = []

        for img_src_1, img_hash_1, http_response_1 in images_1:
            for _, img_hash_2, http_response_2 in images_2:
                if img_hash_1 == img_hash_2:
                    # The image is in both lists, can't be a CAPTCHA
                    break
            else:
                not_in_2.append(
                    (img_src_1, img_hash_1, [http_response_1,
                                             http_response_2]))

        # Results
        #
        # TODO: This allows for more than one CAPTCHA in the same page. Does
        #       that make sense? When that's found, should I simply declare
        #       defeat and don't report anything?
        for img_src, _, http_responses in not_in_2:

            CaptchaInfo = namedtuple('CaptchaInfo',
                                     ['img_src', 'http_responses'])
            img_src = img_src.uri2url()

            if img_src not in self._captchas_found:
                self._captchas_found.add(img_src)
                found_captcha = True

                captchas.append(CaptchaInfo(img_src, http_responses))

        return found_captcha, captchas

    def _get_images(self, fuzzable_request):
        """
        Get all img tags and retrieve the src.

        :param fuzzable_request: The request to modify
        :return: A list with tuples containing (img_src, image_hash, http_response)
        """
        res = []

        try:
            response = self._uri_opener.GET(fuzzable_request.get_uri(),
                                            cache=False)
        except:
            om.out.debug('Failed to retrieve the page for finding captchas.')
        else:
            # Do not use parser_cache here, it's not good since CAPTCHA implementations
            # *might* change the image name for each request of the HTML
            #
            # dp = parser_cache.dpc.get_document_parser_for( response )
            #
            try:
                document_parser = DocumentParser.DocumentParser(response)
            except BaseFrameworkException:
                return []

            image_path_list = document_parser.get_references_of_tag('img')

            GET = self._uri_opener.GET
            sha1 = hashlib.sha1

            result_iter = self.worker_pool.imap_unordered(GET, image_path_list)

            for image_response in result_iter:
                if image_response.is_image():
                    img_src = image_response.get_uri()
                    img_hash = sha1(image_response.get_body()).hexdigest()
                    res.append((img_src, img_hash, response))

        return res

    def end(self):
        self._captchas_found.cleanup()

    def get_long_desc(self):
        """
        :return: A DETAILED description of the plugin functions and features.
        """
        return """
Exemplo n.º 12
0
class error_500(GrepPlugin):
    """
    Grep every page for error 500 pages that haven't been identified as bugs by
    other plugins.

    :author: Andres Riancho ([email protected])
    """

    IGNORE_CODES = (404, 403, 401, 405, 400, 501)
    FALSE_POSITIVE_STRINGS = ('<h1>Bad Request (Invalid URL)</h1>', )

    def __init__(self):
        GrepPlugin.__init__(self)

        self._error_500_responses = DiskSet(table_prefix='error_500')

    def grep(self, request, response):
        """
        Plugin entry point, identify which requests generated a 500 error.

        :param request: The HTTP request object.
        :param response: The HTTP response object
        :return: None
        """
        if not 400 < response.get_code() < 600:
            return

        if response.get_code() in self.IGNORE_CODES:
            return

        if not response.is_text_or_html():
            return

        if self._is_false_positive(response):
            return

        self._error_500_responses.add((request, response.id))

    def _is_false_positive(self, response):
        """
        Filters out some false positives like this one:

        This false positive is generated by IIS when I send an URL that's "odd"
        Some examples of URLs that trigger this false positive:
            - http://127.0.0.2/ext.ini.%00.txt
            - http://127.0.0.2/%00/
            - http://127.0.0.2/%0a%0a<script>alert(\Vulnerable\)</script>.jsp

        :return: True if the response is a false positive.
        """
        for fps in self.FALSE_POSITIVE_STRINGS:
            if fps in response.get_body():
                return True
        return False

    def end(self):
        """
        This method is called when the plugin wont be used anymore.

        The real job of this plugin is done here, where I will try to see if
        one of the error_500 responses were not identified as a vuln by some
        of my audit plugins
        """
        all_vuln_ids = set()

        for info in kb.kb.get_all_findings_iter():
            for _id in info.get_id():
                all_vuln_ids.add(_id)

        for request, error_500_response_id in self._error_500_responses:

            if error_500_response_id not in all_vuln_ids:
                # Found a error 500 that wasn't identified !
                desc = ('An unidentified web application error (HTTP response'
                        ' code 500) was found at: "%s". Enable all plugins and'
                        ' try again, if the vulnerability still is not'
                        ' identified, please verify manually and report it to'
                        ' the w3af developers.')
                desc %= request.get_url()

                v = Vuln('Unhandled error in web application',
                         desc, severity.MEDIUM, error_500_response_id,
                         self.get_name())

                v.set_uri(request.get_uri())

                self.kb_append_uniq(self, 'error_500', v, 'VAR')

        self._error_500_responses.cleanup()

    def get_long_desc(self):
        """
        :return: A DETAILED description of the plugin functions and features.
        """
        return """