Exemplo n.º 1
0
    def __init__(self):
        GrepPlugin.__init__(self)

        self._total_http_request_count = 0
        self._vuln_count = 0
        self._vuln_urls = DiskSet(table_prefix='click_jacking')
        self._vuln_ids = DiskSet(table_prefix='click_jacking')
Exemplo n.º 2
0
    def test_store_in_disk_set(self):
        boundary, post_data = multipart_encode([
            ('a', 'bcd'),
        ], [])
        multipart_boundary = MultipartContainer.MULTIPART_HEADER

        headers = Headers([('content-length', str(len(post_data))),
                           ('content-type', multipart_boundary % boundary)])

        dc = MultipartContainer.from_postdata(headers, post_data)

        dc.set_token(('a', 0))

        disk_set = DiskSet()
        disk_set.add(dc)

        dc_read = disk_set[0]

        # These are different objects
        self.assertIsNot(dc_read, dc)

        # But they hold the same data
        self.assertEqual(dc.get_token(), dc_read.get_token())
        self.assertIsNotNone(dc.get_token())
        self.assertIsNotNone(dc_read.get_token())
        self.assertEqual(dc_read.get_token().get_name(), 'a')
Exemplo n.º 3
0
    def test_add_HTTPPostDataRequest(self):
        ds = DiskSet()

        uri = URL("http://w3af.org/?id=2")
        hdr = Headers([("Referer", "http://w3af.org/")])

        pdr1 = HTTPPostDataRequest(uri, method="GET", headers=hdr)

        uri = URL("http://w3af.org/?id=3")
        pdr2 = HTTPPostDataRequest(uri, method="GET", headers=hdr)

        uri = URL("http://w3af.org/?id=7")
        pdr3 = HTTPPostDataRequest(uri, method="FOO", headers=hdr)

        ds.add(pdr1)
        ds.add(pdr2)
        ds.add(pdr2)
        ds.add(pdr1)

        self.assertEqual(ds[0], pdr1)
        self.assertEqual(ds[1], pdr2)
        self.assertFalse(pdr3 in ds)
        self.assertTrue(pdr2 in ds)
        self.assertEqual(len(ds), 2)

        # This forces an internal change in the URL object
        pdr2.get_url().url_string
        self.assertTrue(pdr2 in ds)
Exemplo n.º 4
0
    def setup(self):
        """
        Setup all the required backend stores. This was mostly created to avoid
        starting any threads during __init__() which is called during python's
        import phase and dead-locks in some cases.

        :return: None
        """
        with self._kb_lock:
            if self.initialized:
                return

            self.urls = DiskSet(table_prefix='kb_urls')
            self.fuzzable_requests = DiskSet(table_prefix='kb_fuzzable_requests')

            self.db = get_default_persistent_db_instance()

            self.table_name = 'knowledge_base_' + rand_alpha(30)
            self.db.create_table(self.table_name, self.COLUMNS)
            self.db.create_index(self.table_name, ['location_a', 'location_b'])
            self.db.create_index(self.table_name, ['uniq_id'])
            self.db.commit()

            # Only initialize once
            self.initialized = True
Exemplo n.º 5
0
    def __init__(self):
        GrepPlugin.__init__(self)

        self._analyzed_hashes = DiskSet(table_prefix='retirejs')
        self._retirejs_path = self._get_retirejs_path()
        self._retirejs_exit_code_result = None
        self._retirejs_exit_code_was_run = False
Exemplo n.º 6
0
    def test_add_QsRequest(self):
        ds = DiskSet()

        uri = URL('http://w3af.org/?id=2')
        hdr = Headers([('Referer', 'http://w3af.org/')])

        qsr1 = FuzzableRequest(uri, method='GET', headers=hdr)

        uri = URL('http://w3af.org/?id=3')
        qsr2 = FuzzableRequest(uri, method='GET', headers=hdr)

        uri = URL('http://w3af.org/?id=7')
        qsr3 = FuzzableRequest(uri, method='FOO', headers=hdr)

        ds.add(qsr1)
        ds.add(qsr2)
        ds.add(qsr2)
        ds.add(qsr1)

        self.assertEqual(ds[0], qsr1)
        self.assertEqual(ds[1], qsr2)
        self.assertFalse(qsr3 in ds)
        self.assertTrue(qsr2 in ds)
        self.assertEqual(len(ds), 2)

        # This forces an internal change in the URL object
        qsr2.get_url().url_string
        self.assertIn(qsr2, ds)
Exemplo n.º 7
0
    def setup(self):
        """
        Setup all the required backend stores. This was mostly created to avoid
        starting any threads during __init__() which is called during python's
        import phase and dead-locks in some cases.

        :return: None
        """
        with self._kb_lock:
            if self.initialized:
                return

            self.urls = DiskSet(table_prefix='kb_urls')
            self.fuzzable_requests = DiskSet(table_prefix='kb_fuzzable_requests')

            self.db = get_default_persistent_db_instance()

            self.table_name = 'knowledge_base_' + rand_alpha(30)
            self.db.create_table(self.table_name, self.COLUMNS)
            self.db.create_index(self.table_name, ['location_a', 'location_b'])
            self.db.create_index(self.table_name, ['uniq_id'])
            self.db.commit()

            # Only initialize once
            self.initialized = True
Exemplo n.º 8
0
class dot_ds_store(CrawlPlugin):
    """
    Search .DS_Store file and checks for files containing.

    :author: Tomas Velazquez ( [email protected] )
    :author: Andres Riancho ( [email protected] )

    :credits: This code was based in cpan Mac::Finder::DSStore by Wim Lewis ( [email protected] )
    """
    DS_STORE = '.DS_Store'

    def __init__(self):
        CrawlPlugin.__init__(self)

        # Internal variables
        self._analyzed_dirs = DiskSet()

    def crawl(self, fuzzable_request):
        """
        For every directory, fetch a list of files and analyze the response.

        :parameter fuzzable_request: A fuzzable_request instance that contains
                                    (among other things) the URL to test.
        """
        directories_to_check = []

        for domain_path in fuzzable_request.get_url().get_directories():
            if domain_path not in self._analyzed_dirs:
                self._analyzed_dirs.add(domain_path)
                directories_to_check.append(domain_path)

        # Send the requests using threads
        self.worker_pool.map(self._check_and_analyze, directories_to_check)

    def _check_and_analyze(self, domain_path):
        """
        Check if a .DS_Store filename exists in the domain_path.

        :return: None, everything is saved to the self.out_queue.
        """
        # Request the file
        url = domain_path.url_join(self.DS_STORE)

        try:
            response = self.http_get_and_parse(url, binary_response=True)
        except BaseFrameworkException, w3:
            msg = 'Failed to GET .DS_Store file: %s. Exception: %s.'
            om.out.debug(msg, (url, w3))
            return

        # Check if it's a .DS_Store file
        if is_404(response):
            return

        try:
            store = DsStore(response.get_raw_body())
            entries = store.get_file_entries()
        except Exception, e:
            om.out.debug('Unexpected error while parsing DS_Store file: "%s"' % e)
            return
Exemplo n.º 9
0
 def __init__(self):
     super(ParserCache, self).__init__()
     
     self._cache = SynchronizedLRUDict(self.CACHE_SIZE)
     self._can_parse_cache = SynchronizedLRUDict(self.CACHE_SIZE * 10)
     self._parser_finished_events = {}
     self._parser_blacklist = DiskSet()
Exemplo n.º 10
0
class dwsync_xml(CrawlPlugin):
    """
    Search Dream Waver Sync file (dwsync.xml) and extract referenced files.

    :author: Tomas Velazquez ([email protected])
    """

    DWSYNC = '_notes/dwsync.xml'

    def __init__(self):
        CrawlPlugin.__init__(self)
        
        # Internal variables
        self._analyzed_dirs = DiskSet()

    def crawl(self, fuzzable_request):
        """
        For every directory, fetch a list of files and analyze the response.
        
        :parameter fuzzable_request: A fuzzable_request instance that contains
                                    (among other things) the URL to test.
        """
        for domain_path in fuzzable_request.get_url().get_directories():
            if domain_path not in self._analyzed_dirs:
                self._analyzed_dirs.add(domain_path)
                self._find_dwsync(domain_path)

    def _find_dwsync(self, domain_path):
        dwsync_url = domain_path.url_join(self.DWSYNC)
        response = self.http_get_and_parse(dwsync_url)

        if is_404(response):
            return

        if '</dwsync>' not in response.get_body():
            return

        om.out.debug('Parsing dwsync.xml file at %s' % dwsync_url)

        try:
            dom = xml.dom.minidom.parseString(response.get_body())
        except Exception, e:
            msg = 'Exception while parsing dwsync.xml file at %s : "%s"'
            om.out.debug(msg % (dwsync_url, e))
            return

        parsed_url_list = set()

        for file_entry in dom.getElementsByTagName('file'):
            try:
                _file = file_entry.getAttribute('name')
                url = domain_path.url_join(_file)
                parsed_url_list.add(url)
            except ValueError, ve:
                msg = 'dwsync file had an invalid URL: "%s"'
                om.out.debug(msg % ve)
            except Exception, e:
                msg = 'Sitemap file had an invalid format: "%s"'
                om.out.debug(msg % e)
Exemplo n.º 11
0
class dwsync_xml(CrawlPlugin):
    """
    Search Dream Waver Sync file (dwsync.xml) and extract referenced files.

    :author: Tomas Velazquez ([email protected])
    """

    DWSYNC = '_notes/dwsync.xml'

    def __init__(self):
        CrawlPlugin.__init__(self)

        # Internal variables
        self._analyzed_dirs = DiskSet()

    def crawl(self, fuzzable_request):
        """
        For every directory, fetch a list of files and analyze the response.
        
        :parameter fuzzable_request: A fuzzable_request instance that contains
                                    (among other things) the URL to test.
        """
        for domain_path in fuzzable_request.get_url().get_directories():
            if domain_path not in self._analyzed_dirs:
                self._analyzed_dirs.add(domain_path)
                self._find_dwsync(domain_path)

    def _find_dwsync(self, domain_path):
        dwsync_url = domain_path.url_join(self.DWSYNC)
        response = self.http_get_and_parse(dwsync_url)

        if is_404(response):
            return

        if '</dwsync>' not in response.get_body():
            return

        om.out.debug('Parsing dwsync.xml file at %s' % dwsync_url)

        try:
            dom = xml.dom.minidom.parseString(response.get_body())
        except Exception, e:
            msg = 'Exception while parsing dwsync.xml file at %s : "%s"'
            om.out.debug(msg % (dwsync_url, e))
            return

        parsed_url_list = set()

        for file_entry in dom.getElementsByTagName('file'):
            try:
                _file = file_entry.getAttribute('name')
                url = domain_path.url_join(_file)
                parsed_url_list.add(url)
            except ValueError, ve:
                msg = 'dwsync file had an invalid URL: "%s"'
                om.out.debug(msg % ve)
            except Exception, e:
                msg = 'Sitemap file had an invalid format: "%s"'
                om.out.debug(msg % e)
Exemplo n.º 12
0
    def test_disk_set(self):
        ds = DiskSet()

        for i in xrange(20000):
            data = (i, i)
            ds.add(data)

        for i in xrange(20000):
            data = (i, i)
            data in ds
Exemplo n.º 13
0
    def test_add(self):
        ds = DiskSet()
        ds.add(1)
        ds.add(2)
        ds.add(3)
        ds.add(1)

        self.assertEqual(list(ds), [1, 2, 3])
        self.assertEqual(len(ds), 3)
        self.assertEqual(unicode(ds), u'<DiskSet [1, 2, 3]>')
Exemplo n.º 14
0
    def __init__(self):
        """
        CHANGELOG:
            Feb/17/2009- Added PHP Settings Audit Checks by Aung Khant
            (aungkhant[at]yehg.net)
        """
        CrawlPlugin.__init__(self)

        # Internal variables
        self._analyzed_dirs = DiskSet(table_prefix='phpinfo')
        self._has_audited = 0
Exemplo n.º 15
0
    def test_table_name_with_prefix(self):
        _unittest = 'unittest'
        disk_set = DiskSet(_unittest)

        self.assertIn(_unittest, disk_set.table_name)
        db = get_default_temp_db_instance()

        self.assertTrue(db.table_exists(disk_set.table_name))

        disk_set.cleanup()

        self.assertFalse(db.table_exists(disk_set.table_name))
Exemplo n.º 16
0
    def test_add_urlobject(self):
        ds = DiskSet()

        ds.add(URL('http://w3af.org/?id=2'))
        ds.add(URL('http://w3af.org/?id=3'))
        ds.add(URL('http://w3af.org/?id=3'))

        self.assertEqual(ds[0], URL('http://w3af.org/?id=2'))
        self.assertEqual(ds[1], URL('http://w3af.org/?id=3'))
        self.assertEqual(len(ds), 2)
        self.assertFalse(URL('http://w3af.org/?id=4') in ds)
        self.assertTrue(URL('http://w3af.org/?id=2') in ds)
Exemplo n.º 17
0
    def test_table_name_with_prefix(self):
        _unittest = 'unittest'
        disk_set = DiskSet(_unittest)

        self.assertIn(_unittest, disk_set.table_name)
        db = get_default_temp_db_instance()

        self.assertTrue(db.table_exists(disk_set.table_name))

        disk_set.cleanup()

        self.assertFalse(db.table_exists(disk_set.table_name))
Exemplo n.º 18
0
    def test_remove_table(self):
        disk_set = DiskSet()
        disk_set.add(1)
        disk_set.add(2)

        table_name = disk_set.table_name
        db = get_default_temp_db_instance()

        self.assertTrue(db.table_exists(table_name))

        disk_set.cleanup()

        self.assertFalse(db.table_exists(table_name))
Exemplo n.º 19
0
    def __init__(self):
        CrawlPlugin.__init__(self)

        # Internal variables
        self._first_run = True
        self._already_analyzed = DiskSet(table_prefix='open_api')

        # User configured variables
        self._query_string_auth = ''
        self._header_auth = ''
        self._no_spec_validation = False
        self._custom_spec_location = ''
        self._discover_fuzzable_headers = True
Exemplo n.º 20
0
    def __init__(self):
        CrawlPlugin.__init__(self)

        # User configured parameters
        self._dir_list = os.path.join(self.BASE_PATH, 'common_dirs_small.db')
        self._file_list = os.path.join(self.BASE_PATH, 'common_files_small.db')

        self._bf_directories = True
        self._bf_files = False
        self._be_recursive = False

        # Internal variables
        self._exec = True
        self._already_tested = DiskSet(table_prefix='dir_file_bruter')
Exemplo n.º 21
0
class phpinfo(CrawlPlugin):
    """
    Search PHP Info file and if it finds it will determine the version of PHP.
    :author: Viktor Gazdag ( [email protected] )
    """

    """
    CHANGELOG:
        Feb/17/2009- Added PHP Settings Audit Checks by Aung Khant (aungkhant[at]yehg.net)
    """

    def __init__(self):
        CrawlPlugin.__init__(self)

        # Internal variables
        self._analyzed_dirs = DiskSet()
        self._has_audited = 0

    def crawl(self, fuzzable_request):
        """
        For every directory, fetch a list of files and analyze the response.

        :param fuzzable_request: A fuzzable_request instance that contains
                                    (among other things) the URL to test.
        """
        for domain_path in fuzzable_request.get_url().get_directories():

            if domain_path in self._analyzed_dirs:
                continue
            
            self._analyzed_dirs.add(domain_path)

            url_repeater = repeat(domain_path)
            args = izip(url_repeater, self._get_potential_phpinfos())

            self.worker_pool.map_multi_args(self._check_and_analyze, args)

    def _check_and_analyze(self, domain_path, php_info_filename):
        """
        Check if a php_info_filename exists in the domain_path.
        :return: None, everything is put() into the self.output_queue.
        """
        # Request the file
        php_info_url = domain_path.url_join(php_info_filename)
        try:
            response = self._uri_opener.GET(php_info_url, cache=True)
        except BaseFrameworkException, w3:
            msg = 'Failed to GET phpinfo file: "%s". Exception: "%s".'
            om.out.debug(msg % (php_info_url, w3))
        else:
Exemplo n.º 22
0
    def cleanup(self):
        """
        Cleanup internal data.
        """
        self.db.execute("DELETE FROM %s WHERE 1=1" % self.table_name)

        # Remove the old, create new.
        self.urls.cleanup()
        self.urls = DiskSet()

        self.fuzzable_requests.cleanup()
        self.fuzzable_requests = DiskSet()

        self.observers.clear()
Exemplo n.º 23
0
 def cleanup(self):
     """
     Cleanup internal data.
     """
     self.db.execute("DELETE FROM %s WHERE 1=1" % self.table_name)
     
     # Remove the old, create new.
     self.urls.cleanup()
     self.urls = DiskSet(table_prefix='kb_urls')
     
     self.fuzzable_requests.cleanup()
     self.fuzzable_requests = DiskSet(table_prefix='kb_fuzzable_requests')
     
     self.observers.clear()
Exemplo n.º 24
0
 def __init__(self):
     super(ParserCache, self).__init__()
     
     self._cache = SynchronizedLRUDict(self.CACHE_SIZE)
     self._can_parse_cache = SynchronizedLRUDict(self.CACHE_SIZE * 10)
     self._parser_finished_events = {}
     self._parser_blacklist = DiskSet()
Exemplo n.º 25
0
    def test_store_fuzzable_request(self):
        form_params = FormParameters()
        form_params.add_input([("name", "username"), ("value", "abc")])
        form_params.add_input([("name", "address"), ("value", "")])
        form_params.set_action(URL('http://example.com/?id=1'))
        form_params.set_method('post')

        form = dc_from_form_params(form_params)

        fr = FuzzableRequest.from_form(form)

        ds = DiskSet()
        ds.add(fr)

        stored_fr = ds[0]

        self.assertEqual(stored_fr, fr)
        self.assertIsNot(stored_fr, fr)
Exemplo n.º 26
0
    def test_store_fuzzable_request(self):
        form_params = FormParameters()
        form_params.add_input([("name", "username"), ("value", "abc")])
        form_params.add_input([("name", "address"), ("value", "")])
        form_params.set_action(URL('http://example.com/?id=1'))
        form_params.set_method('post')

        form = dc_from_form_params(form_params)

        fr = FuzzableRequest.from_form(form)

        ds = DiskSet()
        ds.add(fr)

        stored_fr = ds[0]

        self.assertEqual(stored_fr, fr)
        self.assertIsNot(stored_fr, fr)
Exemplo n.º 27
0
    def __init__(self):
        CrawlPlugin.__init__(self)

        # Internal variables
        self._compiled_ignore_re = None
        self._compiled_follow_re = None
        self._broken_links = DiskSet(table_prefix='web_spider')
        self._first_run = True
        self._target_urls = []
        self._target_domain = None
        self._already_filled_form = ScalableBloomFilter()
        self._variant_db = VariantDB()

        # User configured variables
        self._ignore_regex = ''
        self._follow_regex = '.*'
        self._only_forward = False
        self._ignore_extensions = []
        self._compile_re()
Exemplo n.º 28
0
    def __init__(self):
        CrawlPlugin.__init__(self)

        # Internal variables
        self._compiled_ignore_re = None
        self._compiled_follow_re = None
        self._broken_links = DiskSet(table_prefix='hidden_payment_gateway')
        self._first_run = True
        self._target_urls = []
		self._target_domain = None
Exemplo n.º 29
0
    def __init__(self):
        """
        CHANGELOG:
            Feb/17/2009- Added PHP Settings Audit Checks by Aung Khant
            (aungkhant[at]yehg.net)
        """
        CrawlPlugin.__init__(self)

        # Internal variables
        self._analyzed_dirs = DiskSet()
        self._has_audited = 0
Exemplo n.º 30
0
    def test_multipart_fuzzable_request_store(self):
        boundary, post_data = multipart_encode([('a', 'bcd'), ], [])
        multipart_boundary = MultipartContainer.MULTIPART_HEADER

        headers = Headers([('content-length', str(len(post_data))),
                           ('content-type', multipart_boundary % boundary)])

        dc = MultipartContainer.from_postdata(headers, post_data)
        post_data = str(dc)

        fr = FuzzableRequest.from_parts(URL('http://www.w3af.com/'),
                                        method='POST', post_data=post_data,
                                        headers=headers)
        
        disk_set = DiskSet()
        disk_set.add(fr)

        fr_read = disk_set[0]

        self.assertIsInstance(fr_read.get_raw_data(), MultipartContainer)
        self.assertIn('a', fr_read.get_raw_data())
Exemplo n.º 31
0
    def test_multipart_fuzzable_request_store(self):
        boundary, post_data = multipart_encode([('a', 'bcd'), ], [])
        multipart_boundary = MultipartContainer.MULTIPART_HEADER

        headers = Headers([('content-length', str(len(post_data))),
                           ('content-type', multipart_boundary % boundary)])

        dc = MultipartContainer.from_postdata(headers, post_data)
        post_data = str(dc)

        fr = FuzzableRequest.from_parts(URL('http://www.w3af.com/'),
                                        method='POST', post_data=post_data,
                                        headers=headers)
        
        disk_set = DiskSet()
        disk_set.add(fr)

        fr_read = disk_set[0]

        self.assertIsInstance(fr_read.get_raw_data(), MultipartContainer)
        self.assertIn('a', fr_read.get_raw_data())
Exemplo n.º 32
0
    def __init__(self):
        super(DBKnowledgeBase, self).__init__()

        self.urls = DiskSet()
        self.fuzzable_requests = DiskSet()

        self.db = get_default_persistent_db_instance()

        columns = [("location_a", "TEXT"), ("location_b", "TEXT"), ("uniq_id", "TEXT"), ("pickle", "BLOB")]

        self.table_name = rand_alpha(30)
        self.db.create_table(self.table_name, columns)
        self.db.create_index(self.table_name, ["location_a", "location_b"])
        self.db.create_index(self.table_name, ["uniq_id"])
        self.db.commit()

        # TODO: Why doesn't this work with a WeakValueDictionary?
        self.observers = {}  # WeakValueDictionary()
        self.type_observers = {}  # WeakValueDictionary()
        self.url_observers = []
        self._observer_id = 0
Exemplo n.º 33
0
    def test_store_fuzzable_request_two(self):
        ds = DiskSet()

        # Add a simple fr, without post-data
        fr = FuzzableRequest(URL('http://example.com/?id=1'))
        ds.add(fr)

        # Add a fr with post-data
        form_params = FormParameters()
        form_params.add_field_by_attr_items([("name", "username"),
                                             ("value", "abc")])
        form_params.add_field_by_attr_items([("name", "address"),
                                             ("value", "")])
        form_params.set_action(URL('http://example.com/?id=1'))
        form_params.set_method('post')

        form = dc_from_form_params(form_params)

        fr = FuzzableRequest.from_form(form)
        ds.add(fr)

        # Compare
        stored_fr = ds[1]

        self.assertEqual(stored_fr, fr)
        self.assertIsNot(stored_fr, fr)
Exemplo n.º 34
0
    def test_add_HTTPPostDataRequest(self):
        ds = DiskSet()

        uri = URL('http://w3af.org/?id=2')
        hdr = Headers([('Referer', 'http://w3af.org/')])

        pdr1 = HTTPPostDataRequest(uri, method='GET', headers=hdr)

        uri = URL('http://w3af.org/?id=3')
        pdr2 = HTTPPostDataRequest(uri, method='GET', headers=hdr)

        uri = URL('http://w3af.org/?id=7')
        pdr3 = HTTPPostDataRequest(uri, method='FOO', headers=hdr)

        ds.add(pdr1)
        ds.add(pdr2)
        ds.add(pdr2)
        ds.add(pdr1)

        self.assertEqual(ds[0], pdr1)
        self.assertEqual(ds[1], pdr2)
        self.assertFalse(pdr3 in ds)
        self.assertTrue(pdr2 in ds)
        self.assertEqual(len(ds), 2)

        # This forces an internal change in the URL object
        pdr2.get_url().url_string
        self.assertTrue(pdr2 in ds)
Exemplo n.º 35
0
    def test_add_QsRequest(self):
        ds = DiskSet()

        uri = URL('http://w3af.org/?id=2')
        hdr = Headers([('Referer', 'http://w3af.org/')])

        qsr1 = FuzzableRequest(uri, method='GET', headers=hdr)

        uri = URL('http://w3af.org/?id=3')
        qsr2 = FuzzableRequest(uri, method='GET', headers=hdr)

        uri = URL('http://w3af.org/?id=7')
        qsr3 = FuzzableRequest(uri, method='FOO', headers=hdr)

        ds.add(qsr1)
        ds.add(qsr2)
        ds.add(qsr2)
        ds.add(qsr1)

        self.assertEqual(ds[0], qsr1)
        self.assertEqual(ds[1], qsr2)
        self.assertFalse(qsr3 in ds)
        self.assertTrue(qsr2 in ds)
        self.assertEqual(len(ds), 2)

        # This forces an internal change in the URL object
        qsr2.get_url().url_string
        self.assertIn(qsr2, ds)
Exemplo n.º 36
0
    def __init__(self):
        CrawlPlugin.__init__(self)

        # User configured parameters
        self._dir_list = os.path.join(self.BASE_PATH, 'common_dirs_small.db')
        self._file_list = os.path.join(self.BASE_PATH, 'common_files_small.db')

        self._bf_directories = True
        self._bf_files = False
        self._be_recursive = False

        # Internal variables
        self._exec = True
        self._already_tested = DiskSet(table_prefix='dir_file_bruter')
Exemplo n.º 37
0
    def test_store_in_disk_set(self):
        boundary, post_data = multipart_encode([('a', 'bcd'), ], [])
        multipart_boundary = MultipartContainer.MULTIPART_HEADER

        headers = Headers([('content-length', str(len(post_data))),
                           ('content-type', multipart_boundary % boundary)])

        dc = MultipartContainer.from_postdata(headers, post_data)

        dc.set_token(('a', 0))

        disk_set = DiskSet()
        disk_set.add(dc)

        dc_read = disk_set[0]

        # These are different objects
        self.assertIsNot(dc_read, dc)

        # But they hold the same data
        self.assertEqual(dc.get_token(), dc_read.get_token())
        self.assertIsNotNone(dc.get_token())
        self.assertIsNotNone(dc_read.get_token())
        self.assertEqual(dc_read.get_token().get_name(), 'a')
Exemplo n.º 38
0
    def __init__(self):
        super(DBKnowledgeBase, self).__init__()
        
        self.urls = DiskSet(table_prefix='kb_urls')
        self.fuzzable_requests = DiskSet(table_prefix='kb_fuzzable_requests')
        
        self.db = get_default_persistent_db_instance()

        columns = [('location_a', 'TEXT'),
                   ('location_b', 'TEXT'),
                   ('uniq_id', 'TEXT'),
                   ('pickle', 'BLOB')]

        self.table_name = 'knowledge_base_' + rand_alpha(30)
        self.db.create_table(self.table_name, columns)
        self.db.create_index(self.table_name, ['location_a', 'location_b'])
        self.db.create_index(self.table_name, ['uniq_id',])
        self.db.commit()
        
        # TODO: Why doesn't this work with a WeakValueDictionary?
        self.observers = {} #WeakValueDictionary()
        self.type_observers = {} #WeakValueDictionary()
        self.url_observers = []
        self._observer_id = 0
Exemplo n.º 39
0
    def test_store_fuzzable_request_two(self):
        ds = DiskSet()

        # Add a simple fr, without post-data
        fr = FuzzableRequest(URL('http://example.com/?id=1'))
        ds.add(fr)

        # Add a fr with post-data
        form_params = FormParameters()
        form_params.add_field_by_attr_items([("name", "username"), ("value", "abc")])
        form_params.add_field_by_attr_items([("name", "address"), ("value", "")])
        form_params.set_action(URL('http://example.com/?id=1'))
        form_params.set_method('post')

        form = dc_from_form_params(form_params)

        fr = FuzzableRequest.from_form(form)
        ds.add(fr)

        # Compare
        stored_fr = ds[1]

        self.assertEqual(stored_fr, fr)
        self.assertIsNot(stored_fr, fr)
Exemplo n.º 40
0
    def __init__(self):
        super(DBKnowledgeBase, self).__init__()

        self.urls = DiskSet()
        self.fuzzable_requests = DiskSet()

        self.db = get_default_persistent_db_instance()

        columns = [('location_a', 'TEXT'), ('location_b', 'TEXT'),
                   ('uniq_id', 'TEXT'), ('pickle', 'BLOB')]

        self.table_name = rand_alpha(30)
        self.db.create_table(self.table_name, columns)
        self.db.create_index(self.table_name, ['location_a', 'location_b'])
        self.db.create_index(self.table_name, [
            'uniq_id',
        ])
        self.db.commit()

        # TODO: Why doesn't this work with a WeakValueDictionary?
        self.observers = {}  #WeakValueDictionary()
        self.type_observers = {}  #WeakValueDictionary()
        self.url_observers = []
        self._observer_id = 0
Exemplo n.º 41
0
    def test_add(self):
        ds = DiskSet()
        ds.add(1)
        ds.add(2)
        ds.add(3)
        ds.add(1)

        self.assertEqual(list(ds), [1, 2, 3])
        self.assertEqual(len(ds), 3)
        self.assertEqual(unicode(ds), u'<DiskSet [1, 2, 3]>')
Exemplo n.º 42
0
    def __init__(self):
        CrawlPlugin.__init__(self)

        # Internal variables
        self._compiled_ignore_re = None
        self._compiled_follow_re = None
        self._broken_links = DiskSet()
        self._first_run = True
        self._known_variants = VariantDB()
        self._already_filled_form = ScalableBloomFilter()

        # User configured variables
        self._ignore_regex = ''
        self._follow_regex = '.*'
        self._only_forward = False
        self._compile_re()
Exemplo n.º 43
0
    def test_add_urlobject(self):
        ds = DiskSet()

        ds.add(URL('http://w3af.org/?id=2'))
        ds.add(URL('http://w3af.org/?id=3'))
        ds.add(URL('http://w3af.org/?id=3'))

        self.assertEqual(ds[0], URL('http://w3af.org/?id=2'))
        self.assertEqual(ds[1], URL('http://w3af.org/?id=3'))
        self.assertEqual(len(ds), 2)
        self.assertFalse(URL('http://w3af.org/?id=4') in ds)
        self.assertTrue(URL('http://w3af.org/?id=2') in ds)
Exemplo n.º 44
0
    def test_remove_table(self):
        disk_set = DiskSet()
        disk_set.add(1)
        disk_set.add(2)

        table_name = disk_set.table_name
        db = get_default_temp_db_instance()

        self.assertTrue(db.table_exists(table_name))

        disk_set.cleanup()

        self.assertFalse(db.table_exists(table_name))
Exemplo n.º 45
0
    def test_thread_safe(self):
        ds = DiskSet()

        def worker(range_inst):
            for i in range_inst:
                ds.add(i)

        threads = []
        _min = 0
        add_dups = False
        for _max in xrange(0, 1100, 100):

            th = threading.Thread(target=worker, args=(xrange(_min, _max), ))
            threads.append(th)

            # For testing the uniqueness of DiskSets
            add_dups = not add_dups
            if add_dups:
                th = threading.Thread(target=worker,
                                      args=(xrange(_min, _max), ))
                threads.append(th)

            _min = _max

        for th in threads:
            th.start()

        for th in threads:
            th.join()

        for i in xrange(0, 1000):
            self.assertTrue(i in ds, i)

        ds_as_list = list(ds)
        self.assertEqual(len(ds_as_list), len(set(ds_as_list)))

        ds_as_list.sort()
        self.assertEqual(ds_as_list, range(1000))
Exemplo n.º 46
0
class find_captchas(CrawlPlugin):
    """
    Identify captcha images on web pages.
    :author: Andres Riancho ([email protected])
    """

    def __init__(self):
        CrawlPlugin.__init__(self)

        self._captchas_found = DiskSet(table_prefix='find_captchas')

    def crawl(self, fuzzable_request):
        """
        Find CAPTCHA images.

        :param fuzzable_request: A fuzzable_request instance that contains
                                    (among other things) the URL to test.
        """
        result, captchas = self._identify_captchas(fuzzable_request)
        
        if not result:
            return

        for captcha in captchas:

            desc = 'Found a CAPTCHA image at: "%s".' % captcha.img_src
            response_ids = [response.id for response in captcha.http_responses]

            i = Info('Captcha image detected', desc, response_ids, self.get_name())
            i.set_uri(captcha.img_src)

            kb.kb.append(self, 'CAPTCHA', i)
            om.out.information(i.get_desc())

    def _identify_captchas(self, fuzzable_request):
        """
        :return: A tuple with the following information:
                    * True indicating that the page has CAPTCHAs
                    * A list with tuples that contain:
                        * The CAPTCHA image source
                        * The http responses used to verify that the image was
                          indeed a CAPTCHA
        """
        found_captcha = False
        captchas = []
        
        # GET the document, and fetch the images
        images_1 = self._get_images(fuzzable_request)

        # Re-GET the document, and fetch the images
        images_2 = self._get_images(fuzzable_request)

        # If the number of images in each response is different, don't even
        # bother to perform any analysis since our simplistic approach will fail
        # TODO: Add something more advanced.
        if len(images_1) == len(images_2):

            not_in_2 = []

            for img_src_1, img_hash_1, http_response_1 in images_1:
                for _, img_hash_2, http_response_2 in images_2:
                    if img_hash_1 == img_hash_2:
                        # The image is in both lists, can't be a CAPTCHA
                        break
                else:
                    not_in_2.append((img_src_1, img_hash_1, [http_response_1, http_response_2]))

            # Results
            #
            # TODO: This allows for more than one CAPTCHA in the same page. Does
            #       that make sense? When that's found, should I simply declare
            #       defeat and don't report anything?
            for img_src, _, http_responses in not_in_2:

                CaptchaInfo = namedtuple('CaptchaInfo', ['img_src',
                                                         'http_responses'])
                img_src = img_src.uri2url()
                
                if img_src not in self._captchas_found:
                    self._captchas_found.add(img_src)
                    found_captcha = True
                    
                    captchas.append(CaptchaInfo(img_src, http_responses))
                    
        return found_captcha, captchas
        
    def _get_images(self, fuzzable_request):
        """
        Get all img tags and retrieve the src.

        :param fuzzable_request: The request to modify
        :return: A list with tuples containing (img_src, image_hash, http_response)
        """
        res = []

        try:
            response = self._uri_opener.GET(fuzzable_request.get_uri(),
                                            cache=False)
        except:
            om.out.debug('Failed to retrieve the page for finding captchas.')
        else:
            # Do not use parser_cache here, it's not good since CAPTCHA implementations
            # *might* change the image name for each request of the HTML
            #dp = parser_cache.dpc.get_document_parser_for( response )
            try:
                document_parser = DocumentParser.DocumentParser(response)
            except BaseFrameworkException:
                return []
            
            image_path_list = document_parser.get_references_of_tag('img')

            GET = self._uri_opener.GET
            sha1 = hashlib.sha1
            
            result_iter = self.worker_pool.imap_unordered(GET, image_path_list)
            
            for image_response in result_iter:
                if image_response.is_image():
                    img_src = image_response.get_uri()
                    img_hash = sha1(image_response.get_body()).hexdigest()
                    res.append((img_src, img_hash, response))

        return res

    def end(self):
        self._captchas_found.cleanup()

    def get_long_desc(self):
        """
        :return: A DETAILED description of the plugin functions and features.
        """
        return """
Exemplo n.º 47
0
    def __init__(self):
        CrawlPlugin.__init__(self)

        # Internal variables
        self._analyzed_dirs = DiskSet(table_prefix='phpinfo')
        self._has_audited = False
Exemplo n.º 48
0
class ParserCache(CacheStats):
    """
    This class is a document parser cache.

    :author: Andres Riancho ([email protected])
    """
    CACHE_SIZE = 10
    MAX_CACHEABLE_BODY_LEN = 1024 * 1024
    DEBUG = core_profiling_is_enabled()

    def __init__(self):
        super(ParserCache, self).__init__()
        
        self._cache = SynchronizedLRUDict(self.CACHE_SIZE)
        self._can_parse_cache = SynchronizedLRUDict(self.CACHE_SIZE * 10)
        self._parser_finished_events = {}
        self._parser_blacklist = DiskSet()

    def clear(self):
        """
        Clear all the internal variables
        :return: None
        """
        om.out.debug('Called clear() on ParserCache')

        # Stop any workers
        mp_doc_parser.stop_workers()

        # Make sure the parsers clear all resources
        for parser in self._cache.itervalues():
            if hasattr(parser, 'clear'):
                parser.clear()

        # We don't need the parsers anymore
        self._cache.clear()
        self._can_parse_cache.clear()

    def should_cache(self, http_response):
        """
        Defines if this http_response parser should be cached or not

        :param http_response: The http response instance
        :return: True if we should cache the parser for this response
        """
        return len(http_response.get_body()) < self.MAX_CACHEABLE_BODY_LEN

    def can_parse(self, http_response):
        """
        Check if we can parse an HTTP response

        :param http_response: The HTTP response to verify
        :return: True if we can parse this HTTP response
        """
        cached_can_parse = self._can_parse_cache.get(http_response.get_id(), default=None)

        if cached_can_parse is not None:
            return cached_can_parse

        #
        # We need to verify if we can parse this HTTP response
        #
        try:
            can_parse = DocumentParser.can_parse(http_response)
        except:
            # We catch all the exceptions here and just return False because
            # the real parsing procedure will (most likely) fail to parse
            # this response too.
            can_parse = False

        self._can_parse_cache[can_parse] = can_parse
        return can_parse

    def add_to_blacklist(self, hash_string):
        """
        Add a hash_string representing an HTTP response to the blacklist,
        indicating that we won't try to parse this response never again.

        :return: None
        """
        self._parser_blacklist.add(hash_string)

    def get_document_parser_for(self, http_response, cache=True):
        """
        Get a document parser for http_response using the cache if possible

        :param http_response: The http response instance
        :param cache: True if the document parser should be saved to the cache
        :return: An instance of DocumentParser
        """
        #
        # Before doing anything too complex like caching, sending the HTTP
        # response to a different process for parsing, checking events, etc.
        # check if we can parse this HTTP response.
        #
        # This is a performance improvement that works *only if* the
        # DocumentParser.can_parse call is *fast*, which means that the
        # `can_parse` implementations of each parser needs to be fast
        #
        # It doesn't matter if we say "yes" here and then parsing exceptions
        # appear later, that should be a 1 / 10000 calls and we would still
        # be gaining a lot of performance
        #
        if not self.can_parse(http_response):
            msg = 'There is no parser for "%s".'
            raise BaseFrameworkException(msg % http_response.get_url())

        hash_string = get_response_unique_id(http_response)

        if hash_string in self._parser_blacklist:
            msg = 'Exceeded timeout while parsing "%s" in the past. Not trying again.'
            raise BaseFrameworkException(msg % http_response.get_url())

        #
        # We know that we can parse this document, lets work!
        #
        parser_finished = self._parser_finished_events.get(hash_string, None)
        if parser_finished is not None:
            # There is one subprocess already processing this http response
            # body, the best thing to do here is to make this thread wait
            # until that process has finished
            wait_result = parser_finished.wait(timeout=mp_doc_parser.PARSER_TIMEOUT)
            if not wait_result:
                # Act just like when there is no parser
                msg = 'There is no parser for "%s". Waited more than %s sec.'
                args = (http_response.get_url(), mp_doc_parser.PARSER_TIMEOUT)
                raise BaseFrameworkException(msg % args)

        # metric increase
        self.inc_query_count()

        parser = self._cache.get(hash_string, None)
        if parser is not None:
            self._handle_cache_hit(hash_string)
            return parser
        else:
            # Not in cache, have to work.
            self._handle_cache_miss(hash_string)

            # Create a new instance of DocumentParser, add it to the cache
            event = threading.Event()
            self._parser_finished_events[hash_string] = event

            try:
                parser = mp_doc_parser.get_document_parser_for(http_response)
            except TimeoutError:
                # We failed to get a parser for this HTTP response, we better
                # ban this HTTP response so we don't waste more CPU cycles trying
                # to parse it over and over.
                self.add_to_blacklist(hash_string)

                # Act just like when there is no parser
                msg = 'Reached timeout parsing "%s".' % http_response.get_url()
                raise BaseFrameworkException(msg)
            except MemoryError:
                # We failed to get a parser for this HTTP response, we better
                # ban this HTTP response so we don't waste more CPU cycles or
                # memory trying to parse it over and over.
                self.add_to_blacklist(hash_string)

                # Act just like when there is no parser
                msg = 'Reached memory usage limit parsing "%s".' % http_response.get_url()
                raise BaseFrameworkException(msg)
            except ScanMustStopException, e:
                msg = 'The document parser is in an invalid state! %s'
                raise ScanMustStopException(msg % e)
            except:
Exemplo n.º 49
0
class dir_file_bruter(CrawlPlugin):
    """
    Finds Web server directories and files by bruteforcing.

    :author: Jon Rose ( [email protected] )
    :author: Andres Riancho ( [email protected] )
    :author: Tomas Velazquez
    """

    BASE_PATH = os.path.join(ROOT_PATH, 'plugins', 'crawl', 'dir_file_bruter')

    def __init__(self):
        CrawlPlugin.__init__(self)

        # User configured parameters
        self._dir_list = os.path.join(self.BASE_PATH, 'common_dirs_small.db')
        self._file_list = os.path.join(self.BASE_PATH, 'common_files_small.db')

        self._bf_directories = True
        self._bf_files = False
        self._be_recursive = False

        # Internal variables
        self._exec = True
        self._already_tested = DiskSet(table_prefix='dir_file_bruter')

    def crawl(self, fuzzable_request):
        """
        Get the file and parse it.

        :param fuzzable_request: A fuzzable_request instance that contains
                               (among other things) the URL to test.
        """
        if not self._exec:
            raise RunOnce()
        else:
            domain_path = fuzzable_request.get_url().get_domain_path()

            # Should I run more than once?
            if not self._be_recursive:
                self._exec = False

            if domain_path not in self._already_tested:
                self._already_tested.add(domain_path)
                self._bruteforce_directories(domain_path)

    def _dir_name_generator(self, base_path):
        """
        Simple generator that returns the names of the directories and files to
        test. It extracts the information from the user configured wordlist
        parameter.

        @yields: (A string with the directory or file name,
                  a URL object with the dir or file name)
        """
        if self._bf_directories:
            for directory_name in file(self._dir_list):
                directory_name = directory_name.strip()

                # ignore comments and empty lines
                if directory_name and not directory_name.startswith('#'):
                    try:
                        dir_url = base_path.url_join(directory_name + '/')
                    except ValueError, ve:
                        msg = 'The "%s" line at "%s" generated an ' \
                              'invalid URL: %s'
                        om.out.debug(msg %
                                     (directory_name, self._dir_list, ve))
                    else:
                        yield directory_name, dir_url

        if self._bf_files:
            for file_name in file(self._file_list):
                file_name = file_name.strip()

                # ignore comments and empty lines
                if file_name and not file_name.startswith('#'):
                    try:
                        dir_url = base_path.url_join(file_name)
                    except ValueError, ve:
                        msg = 'The "%s" line at "%s" generated an ' \
                              'invalid URL: %s'
                        om.out.debug(msg % (file_name, self._file_list, ve))
                    else:
                        yield file_name, dir_url
Exemplo n.º 50
0
    def __init__(self):
        CrawlPlugin.__init__(self)

        # Internal variables
        self._analyzed_dirs = DiskSet()
        self._has_audited = 0
Exemplo n.º 51
0
class DBKnowledgeBase(BasicKnowledgeBase):
    """
    This class saves the data that is sent to it by plugins. It is the only way
    in which plugins can exchange information.

    Data is stored in a DB.

    :author: Andres Riancho ([email protected])
    """
    COLUMNS = [('location_a', 'TEXT'),
               ('location_b', 'TEXT'),
               ('uniq_id', 'TEXT'),
               ('pickle', 'BLOB')]

    def __init__(self):
        super(DBKnowledgeBase, self).__init__()
        self.initialized = False

        # TODO: Why doesn't this work with a WeakValueDictionary?
        self.observers = {} #WeakValueDictionary()
        self._observer_id = 0

    def setup(self):
        """
        Setup all the required backend stores. This was mostly created to avoid
        starting any threads during __init__() which is called during python's
        import phase and dead-locks in some cases.

        :return: None
        """
        with self._kb_lock:
            if self.initialized:
                return

            self.urls = DiskSet(table_prefix='kb_urls')
            self.fuzzable_requests = DiskSet(table_prefix='kb_fuzzable_requests')

            self.db = get_default_persistent_db_instance()

            self.table_name = 'knowledge_base_' + rand_alpha(30)
            self.db.create_table(self.table_name, self.COLUMNS)
            self.db.create_index(self.table_name, ['location_a', 'location_b'])
            self.db.create_index(self.table_name, ['uniq_id'])
            self.db.commit()

            # Only initialize once
            self.initialized = True

    @requires_setup
    def clear(self, location_a, location_b):
        location_a = self._get_real_name(location_a)

        query = "DELETE FROM %s WHERE location_a = ? and location_b = ?"
        params = (location_a, location_b)
        self.db.execute(query % self.table_name, params)

    @requires_setup
    def raw_write(self, location_a, location_b, value):
        """
        This method saves value to (location_a,location_b) but previously
        clears any pre-existing values.
        """
        if isinstance(value, Info):
            raise TypeError('Use append or append_uniq to store vulnerabilities')

        location_a = self._get_real_name(location_a)

        self.clear(location_a, location_b)
        self.append(location_a, location_b, value, ignore_type=True)

    @requires_setup
    def raw_read(self, location_a, location_b):
        """
        This method reads the value from (location_a, location_b)
        """
        location_a = self._get_real_name(location_a)
        result = self.get(location_a, location_b, check_types=False)

        if len(result) > 1:
            msg = 'Incorrect use of raw_write/raw_read, found %s results.'
            raise RuntimeError(msg % len(result))
        elif len(result) == 0:
            return []
        else:
            return result[0]

    @requires_setup
    def get_one(self, location_a, location_b):
        """
        This method reads the value from (location_a, location_b), checking it's
        type and making sure only one is stored at that address.

        Similar to raw_read, but checking types.

        :see: https://github.com/andresriancho/w3af/issues/3955
        """
        location_a = self._get_real_name(location_a)
        result = self.get(location_a, location_b, check_types=True)

        if len(result) > 1:
            msg = 'Incorrect use of get_one(), found %s results.'
            raise RuntimeError(msg % result)
        elif len(result) == 0:
            return []
        else:
            return result[0]

    def _get_uniq_id(self, obj):
        if isinstance(obj, (Info, InfoSet)):
            return obj.get_uniq_id()
        else:
            if isinstance(obj, collections.Iterable):
                concat_all = ''.join([str(hash(i)) for i in obj])
                return str(hash(concat_all))
            else:
                return str(hash(obj))

    @requires_setup
    def append(self, location_a, location_b, value, ignore_type=False):
        """
        This method appends the location_b value to a dict.
        """
        if not ignore_type and not isinstance(value, (Info, Shell, InfoSet)):
            msg = ('You MUST use raw_write/raw_read to store non-info objects'
                   ' to the KnowledgeBase.')
            raise TypeError(msg)

        location_a = self._get_real_name(location_a)
        uniq_id = self._get_uniq_id(value)

        pickled_obj = cpickle_dumps(value)
        t = (location_a, location_b, uniq_id, pickled_obj)

        query = "INSERT INTO %s VALUES (?, ?, ?, ?)" % self.table_name
        self.db.execute(query, t)
        self._notify_observers(self.APPEND, location_a, location_b, value,
                               ignore_type=ignore_type)

    @requires_setup
    def get(self, location_a, location_b, check_types=True):
        """
        :param location_a: The plugin that saved the data to the
                           kb.info Typically the name of the plugin,
                           but could also be the plugin instance.

        :param location_b: The name of the variables under which the vuln
                           objects were saved. Typically the same name of
                           the plugin, or something like "vulns", "errors",
                           etc. In most cases this is NOT None. When set
                           to None, a dict with all the vuln objects found
                           by the plugin_name is returned.

        :return: Returns the data that was saved by another plugin.
        """
        location_a = self._get_real_name(location_a)

        if location_b is None:
            query = 'SELECT pickle FROM %s WHERE location_a = ?'
            params = (location_a,)
        else:
            query = 'SELECT pickle FROM %s WHERE location_a = ?'\
                                           ' and location_b = ?'
            params = (location_a, location_b)

        result_lst = []

        results = self.db.select(query % self.table_name, params)
        for r in results:
            obj = cPickle.loads(r[0])

            if check_types and not isinstance(obj, (Info, InfoSet, Shell)):
                raise TypeError('Use raw_write and raw_read to query the'
                                ' knowledge base for non-Info objects')

            result_lst.append(obj)

        return result_lst

    @requires_setup
    def get_by_uniq_id(self, uniq_id):
        query = 'SELECT pickle FROM %s WHERE uniq_id = ?'
        params = (uniq_id,)

        result = self.db.select_one(query % self.table_name, params)

        if result is not None:
            result = cPickle.loads(result[0])

        return result

    @requires_setup
    def update(self, old_info, update_info):
        """
        :param old_info: The info/vuln instance to be updated in the kb.
        :param update_info: The info/vuln instance with new information
        :return: Nothing
        """
        old_not_info = not isinstance(old_info, (Info, InfoSet, Shell))
        update_not_info = not isinstance(update_info, (Info, InfoSet, Shell))

        if old_not_info or update_not_info:
            msg = ('You MUST use raw_write/raw_read to store non-info objects'
                   ' to the KnowledgeBase.')
            raise TypeError(msg)

        old_uniq_id = old_info.get_uniq_id()
        new_uniq_id = update_info.get_uniq_id()
        pickle = cpickle_dumps(update_info)

        # Update the pickle and unique_id after finding by original uniq_id
        query = "UPDATE %s SET pickle = ?, uniq_id = ? WHERE uniq_id = ?"

        params = (pickle, new_uniq_id, old_uniq_id)
        result = self.db.execute(query % self.table_name, params).result()

        if result.rowcount:
            self._notify_observers(self.UPDATE, old_info, update_info)
        else:
            ex = ('Failed to update() %s instance because'
                  ' the original unique_id (%s) does not exist in the DB,'
                  ' or the new unique_id (%s) is invalid.')
            raise DBException(ex % (old_info.__class__.__name__,
                                    old_uniq_id,
                                    new_uniq_id))

    def add_observer(self, observer):
        """
        Add the observer instance to the list.
        """
        observer_id = self.get_observer_id()
        self.observers[observer_id] = observer

    def get_observer_id(self):
        self._observer_id += 1
        return self._observer_id

    def _notify_observers(self, method, *args, **kwargs):
        """
        Call the observer if the location_a/location_b matches with the
        configured observers.

        :return: None
        """
        # Note that I copy the items list in order to iterate though it without
        # any issues like the size changing
        for _, observer in self.observers.items()[:]:
            functor = getattr(observer, method)
            functor(*args, **kwargs)

    @requires_setup
    def get_all_entries_of_class(self, klass):
        """
        :return: A list of all objects of class == klass that are saved in the
                 kb.
        """
        query = 'SELECT pickle FROM %s'
        results = self.db.select(query % self.table_name)

        result_lst = []

        for r in results:
            obj = cPickle.loads(r[0])
            if isinstance(obj, klass):
                result_lst.append(obj)

        return result_lst

    @requires_setup
    def get_all_vulns(self):
        """
        :return: A list of all info instances with severity in (LOW, MEDIUM,
                 HIGH)
        """
        query = 'SELECT pickle FROM %s'
        results = self.db.select(query % self.table_name)

        result_lst = []

        for r in results:
            obj = cPickle.loads(r[0])
            if hasattr(obj, 'get_severity'):
                severity = obj.get_severity()
                if severity in (LOW, MEDIUM, HIGH):
                    result_lst.append(obj)

        return result_lst

    @requires_setup
    def get_all_infos(self):
        """
        :return: A list of all info instances with severity eq INFORMATION
        """
        query = 'SELECT pickle FROM %s'
        results = self.db.select(query % self.table_name)

        result_lst = []

        for r in results:
            obj = cPickle.loads(r[0])
            if hasattr(obj, 'get_severity'):
                severity = obj.get_severity()
                if severity in (INFORMATION,):
                    result_lst.append(obj)

        return result_lst

    @requires_setup
    def dump(self):
        result_dict = {}

        query = 'SELECT location_a, location_b, pickle FROM %s'
        results = self.db.select(query % self.table_name)

        for location_a, location_b, pickle in results:
            obj = cPickle.loads(pickle)

            if location_a not in result_dict:
                result_dict[location_a] = {location_b: [obj,]}
            elif location_b not in result_dict[location_a]:
                result_dict[location_a][location_b] = [obj,]
            else:
                result_dict[location_a][location_b].append(obj)

        return result_dict

    @requires_setup
    def cleanup(self):
        """
        Cleanup internal data.
        """
        self.db.execute("DELETE FROM %s WHERE 1=1" % self.table_name)

        # Remove the old, create new.
        old_urls = self.urls
        self.urls = DiskSet(table_prefix='kb_urls')
        old_urls.cleanup()

        old_fuzzable_requests = self.fuzzable_requests
        self.fuzzable_requests = DiskSet(table_prefix='kb_fuzzable_requests')
        old_fuzzable_requests.cleanup()

        self.observers.clear()

    @requires_setup
    def remove(self):
        self.db.drop_table(self.table_name)
        self.urls.cleanup()
        self.fuzzable_requests.cleanup()
        self.observers.clear()

    @requires_setup
    def get_all_known_urls(self):
        """
        :return: A DiskSet with all the known URLs as URL objects.
        """
        return self.urls

    @requires_setup
    def add_url(self, url):
        """
        :return: True if the URL was previously unknown
        """
        if not isinstance(url, URL):
            msg = 'add_url requires a URL as parameter got %s instead.'
            raise TypeError(msg % type(url))

        self._notify_observers(self.ADD_URL, url)
        return self.urls.add(url)

    @requires_setup
    def get_all_known_fuzzable_requests(self):
        """
        :return: A DiskSet with all the known URLs as URL objects.
        """
        return self.fuzzable_requests

    @requires_setup
    def add_fuzzable_request(self, fuzzable_request):
        """
        :return: True if the FuzzableRequest was previously unknown
        """
        if not isinstance(fuzzable_request, FuzzableRequest):
            msg = ('add_fuzzable_request requires a FuzzableRequest as'
                   ' parameter, got "%s" instead.')
            raise TypeError(msg % type(fuzzable_request))

        self.add_url(fuzzable_request.get_url())
        return self.fuzzable_requests.add(fuzzable_request)
Exemplo n.º 52
0
    def test_update(self):
        ds = DiskSet()
        ds.add(1)
        ds.update([2, 3, 1])

        self.assertEqual(list(ds), [1, 2, 3])
Exemplo n.º 53
0
    def __init__(self):
        CrawlPlugin.__init__(self)

        self._captchas_found = DiskSet(table_prefix='find_captchas')
Exemplo n.º 54
0
class error_500(GrepPlugin):
    """
    Grep every page for error 500 pages that haven't been identified as bugs by
    other plugins.

    :author: Andres Riancho ([email protected])
    """

    IGNORE_CODES = (404, 403, 401, 405, 400, 501)
    FALSE_POSITIVE_STRINGS = ('<h1>Bad Request (Invalid URL)</h1>',
                              )

    def __init__(self):
        GrepPlugin.__init__(self)

        self._error_500_responses = DiskSet(table_prefix='error_500')

    def grep(self, request, response):
        """
        Plugin entry point, identify which requests generated a 500 error.

        :param request: The HTTP request object.
        :param response: The HTTP response object
        :return: None
        """
        if response.is_text_or_html() \
        and 400 < response.get_code() < 600 \
        and response.get_code() not in self.IGNORE_CODES\
        and not self._is_false_positive(response):
            self._error_500_responses.add((request, response.id))

    def _is_false_positive(self, response):
        """
        Filters out some false positives like this one:

        This false positive is generated by IIS when I send an URL that's "odd"
        Some examples of URLs that trigger this false positive:
            - http://127.0.0.2/ext.ini.%00.txt
            - http://127.0.0.2/%00/
            - http://127.0.0.2/%0a%0a<script>alert(\Vulnerable\)</script>.jsp

        :return: True if the response is a false positive.
        """
        for fps in self.FALSE_POSITIVE_STRINGS:
            if fps in response.get_body():
                return True
        return False

    def end(self):
        """
        This method is called when the plugin wont be used anymore.

        The real job of this plugin is done here, where I will try to see if
        one of the error_500 responses were not identified as a vuln by some
        of my audit plugins
        """
        all_vuln_ids = set()

        for info in kb.kb.get_all_findings():
            for _id in info.get_id():
                all_vuln_ids.add(_id)

        for request, error_500_response_id in self._error_500_responses:

            if error_500_response_id not in all_vuln_ids:
                # Found a error 500 that wasn't identified !
                desc = 'An unidentified web application error (HTTP response'\
                       ' code 500) was found at: "%s". Enable all plugins and'\
                       ' try again, if the vulnerability still is not'\
                       ' identified, please verify manually and report it to'\
                       ' the w3af developers.'
                desc = desc % request.get_url()

                v = Vuln('Unhandled error in web application', desc,
                         severity.MEDIUM, error_500_response_id,
                         self.get_name())

                v.set_uri(request.get_uri())

                self.kb_append_uniq(self, 'error_500', v, 'VAR')

        self._error_500_responses.cleanup()

    def get_long_desc(self):
        """
        :return: A DETAILED description of the plugin functions and features.
        """
        return """
Exemplo n.º 55
0
class dir_file_bruter(CrawlPlugin):
    """
    Finds Web server directories and files by bruteforcing.

    :author: Jon Rose ( [email protected] )
    :author: Andres Riancho ( [email protected] )
    :author: Tomas Velazquez
    """
    
    BASE_PATH = os.path.join(ROOT_PATH, 'plugins', 'crawl', 'dir_file_bruter')
    
    def __init__(self):
        CrawlPlugin.__init__(self)

        # User configured parameters
        self._dir_list = os.path.join(self.BASE_PATH, 'common_dirs_small.db')
        self._file_list = os.path.join(self.BASE_PATH, 'common_files_small.db')

        self._bf_directories = True
        self._bf_files = False
        self._be_recursive = False

        # Internal variables
        self._exec = True
        self._already_tested = DiskSet(table_prefix='dir_file_bruter')

    def crawl(self, fuzzable_request):
        """
        Get the file and parse it.

        :param fuzzable_request: A fuzzable_request instance that contains
                               (among other things) the URL to test.
        """
        if not self._exec:
            raise RunOnce()
        else:
            domain_path = fuzzable_request.get_url().get_domain_path()

            # Should I run more than once?
            if not self._be_recursive:
                self._exec = False

            if domain_path not in self._already_tested:
                self._already_tested.add(domain_path)
                self._bruteforce_directories(domain_path)

    def _dir_name_generator(self, base_path):
        """
        Simple generator that returns the names of the directories and files to
        test. It extracts the information from the user configured wordlist
        parameter.

        @yields: (A string with the directory or file name,
                  a URL object with the dir or file name)
        """
        if self._bf_directories:
            for directory_name in file(self._dir_list):
                directory_name = directory_name.strip()

                # ignore comments and empty lines
                if directory_name and not directory_name.startswith('#'):
                    try:
                        dir_url = base_path.url_join(directory_name + '/')
                    except ValueError, ve:
                        msg = 'The "%s" line at "%s" generated an ' \
                              'invalid URL: %s'
                        om.out.debug(msg % (directory_name, self._dir_list, ve))
                    else:
                        yield directory_name, dir_url

        if self._bf_files:
            for file_name in file(self._file_list):
                file_name = file_name.strip()

                # ignore comments and empty lines
                if file_name and not file_name.startswith('#'):
                    try:
                        dir_url = base_path.url_join(file_name)
                    except ValueError, ve:
                        msg = 'The "%s" line at "%s" generated an ' \
                              'invalid URL: %s'
                        om.out.debug(msg % (file_name, self._file_list, ve))
                    else:
                        yield file_name, dir_url
Exemplo n.º 56
0
class DBKnowledgeBase(BasicKnowledgeBase):
    """
    This class saves the data that is sent to it by plugins. It is the only way
    in which plugins can exchange information.

    Data is stored in a DB.

    :author: Andres Riancho ([email protected])
    """

    def __init__(self):
        super(DBKnowledgeBase, self).__init__()
        
        self.urls = DiskSet(table_prefix='kb_urls')
        self.fuzzable_requests = DiskSet(table_prefix='kb_fuzzable_requests')
        
        self.db = get_default_persistent_db_instance()

        columns = [('location_a', 'TEXT'),
                   ('location_b', 'TEXT'),
                   ('uniq_id', 'TEXT'),
                   ('pickle', 'BLOB')]

        self.table_name = 'knowledge_base_' + rand_alpha(30)
        self.db.create_table(self.table_name, columns)
        self.db.create_index(self.table_name, ['location_a', 'location_b'])
        self.db.create_index(self.table_name, ['uniq_id',])
        self.db.commit()
        
        # TODO: Why doesn't this work with a WeakValueDictionary?
        self.observers = {} #WeakValueDictionary()
        self.type_observers = {} #WeakValueDictionary()
        self.url_observers = []
        self._observer_id = 0

    def clear(self, location_a, location_b):
        location_a = self._get_real_name(location_a)
        
        query = "DELETE FROM %s WHERE location_a = ? and location_b = ?"
        params = (location_a, location_b)
        self.db.execute(query % self.table_name, params)

    def raw_write(self, location_a, location_b, value):
        """
        This method saves value to (location_a,location_b) but previously
        clears any pre-existing values.
        """
        if isinstance(value, Info):
            raise TypeError('Use append or append_uniq to store vulnerabilities')
        
        location_a = self._get_real_name(location_a)
        
        self.clear(location_a, location_b)
        self.append(location_a, location_b, value, ignore_type=True)

    def raw_read(self, location_a, location_b):
        """
        This method reads the value from (location_a,location_b)
        """
        location_a = self._get_real_name(location_a)
        result = self.get(location_a, location_b, check_types=False)
        
        if len(result) > 1:
            msg = 'Incorrect use of raw_write/raw_read, found %s rows.'
            raise RuntimeError(msg % result)
        elif len(result) == 0:
            return []
        else:
            return result[0]
    
    def _get_uniq_id(self, obj):
        if isinstance(obj, Info):
            return obj.get_uniq_id()
        else:
            if isinstance(obj, collections.Iterable):
                concat_all = ''.join([str(i) for i in obj])
                return str(hash(concat_all))
            else:
                return str(hash(obj))

    def append(self, location_a, location_b, value, ignore_type=False):
        """
        This method appends the location_b value to a dict.
        """
        if not ignore_type and not isinstance(value, (Info, Shell)):
            msg = 'You MUST use raw_write/raw_read to store non-info objects'\
                  ' to the KnowledgeBase.'
            raise TypeError(msg)
        
        location_a = self._get_real_name(location_a)
        uniq_id = self._get_uniq_id(value)
        
        pickled_obj = cpickle_dumps(value)
        t = (location_a, location_b, uniq_id, pickled_obj)
        
        query = "INSERT INTO %s VALUES (?, ?, ?, ?)" % self.table_name
        self.db.execute(query, t)
        self._notify(location_a, location_b, value)

    def get(self, location_a, location_b, check_types=True):
        """
        :param location_a: The plugin that saved the data to the
                           kb.info Typically the name of the plugin,
                           but could also be the plugin instance.

        :param location_b: The name of the variables under which the vuln
                           objects were saved. Typically the same name of
                           the plugin, or something like "vulns", "errors",
                           etc. In most cases this is NOT None. When set
                           to None, a dict with all the vuln objects found
                           by the plugin_name is returned.

        :return: Returns the data that was saved by another plugin.
        """
        location_a = self._get_real_name(location_a)
        
        if location_b is None:
            query = 'SELECT pickle FROM %s WHERE location_a = ?'
            params = (location_a,)
        else:
            query = 'SELECT pickle FROM %s WHERE location_a = ?'\
                                           ' and location_b = ?'
            params = (location_a, location_b)
        
        result_lst = []
        
        results = self.db.select(query % self.table_name, params)
        for r in results:
            obj = cPickle.loads(r[0])
            
            if check_types and not isinstance(obj, (Info, Shell)):
                raise TypeError('Use raw_write and raw_read to query the'
                                ' knowledge base for non-Info objects')
            
            result_lst.append(obj)
        
        return result_lst

    def get_by_uniq_id(self, uniq_id):
        query = 'SELECT pickle FROM %s WHERE uniq_id = ?'
        params = (uniq_id,)
        
        result = self.db.select_one(query % self.table_name, params)
        
        if result is not None:
            result = cPickle.loads(result[0])
        
        return result

    def add_observer(self, location_a, location_b, observer):
        """
        Add the observer function to the observer list. The function will be
        called when there is a change in (location_a, location_b).
        
        You can use None in location_a or location_b as wildcards.
        
        The observer function needs to be a function which takes three params:
            * location_a
            * location_b
            * value that's added to the kb location
        
        :return: None
        """
        if not isinstance(location_a, (basestring, types.NoneType)) or \
        not isinstance(location_a, (basestring, types.NoneType)):
            raise TypeError('Observer locations need to be strings or None.')
        
        observer_id = self.get_observer_id()
        self.observers[(location_a, location_b, observer_id)] = observer
    
    def add_types_observer(self, type_filter, observer):
        """
        Add the observer function to the list of functions to be called when a
        new object that is of type "type_filter" is added to the KB.
        
        The type_filter must be one of Info, Vuln or Shell.
        
        :return: None
        """
        if type_filter not in (Info, Vuln, Shell):
            msg = 'The type_filter needs to be one of Info, Vuln or Shell'
            raise TypeError(msg)
        
        observer_id = self.get_observer_id()
        self.type_observers[(type_filter, observer_id)] = observer
        
    def get_observer_id(self):
        self._observer_id += 1
        return self._observer_id
    
    def _notify(self, location_a, location_b, value):
        """
        Call the observer if the location_a/location_b matches with the
        configured observers.
        
        :return: None
        """
        # Note that I copy the items list in order to iterate though it without
        # any issues like the size changing
        for (obs_loc_a, obs_loc_b, _), observer in self.observers.items()[:]:
            
            if obs_loc_a is None and obs_loc_b is None:
                observer(location_a, location_b, value)
                continue

            if obs_loc_a == location_a and obs_loc_b is None:
                observer(location_a, location_b, value)
                continue
            
            if obs_loc_a == location_a and obs_loc_b == location_b:
                observer(location_a, location_b, value)
                continue
        
        for (type_filter, _), observer in self.type_observers.items()[:]:
            if isinstance(value, type_filter):
                observer(location_a, location_b, value)

    def get_all_entries_of_class(self, klass):
        """
        :return: A list of all objects of class == klass that are saved in the
                 kb.
        """
        query = 'SELECT pickle FROM %s'
        results = self.db.select(query % self.table_name)
        
        result_lst = []

        for r in results:
            obj = cPickle.loads(r[0])
            if isinstance(obj, klass):
                result_lst.append(obj)
        
        return result_lst

    def dump(self):
        result_dict = {}
        
        query = 'SELECT location_a, location_b, pickle FROM %s'
        results = self.db.select(query % self.table_name)
        
        for location_a, location_b, pickle in results:
            obj = cPickle.loads(pickle)
            
            if location_a not in result_dict:
                result_dict[location_a] = {location_b: [obj,]}
            elif location_b not in result_dict[location_a]:
                result_dict[location_a][location_b] = [obj,]
            else:
                result_dict[location_a][location_b].append(obj)
                
        return result_dict

    def cleanup(self):
        """
        Cleanup internal data.
        """
        self.db.execute("DELETE FROM %s WHERE 1=1" % self.table_name)
        
        # Remove the old, create new.
        self.urls.cleanup()
        self.urls = DiskSet(table_prefix='kb_urls')
        
        self.fuzzable_requests.cleanup()
        self.fuzzable_requests = DiskSet(table_prefix='kb_fuzzable_requests')
        
        self.observers.clear()
    
    def remove(self):
        self.db.drop_table(self.table_name)
        self.urls.cleanup()
        self.fuzzable_requests.cleanup()
        self.observers.clear()
    
    def get_all_known_urls(self):
        """
        :return: A DiskSet with all the known URLs as URL objects.
        """
        return self.urls

    def add_url_observer(self, observer):
        self.url_observers.append(observer)

    def _notify_url_observers(self, new_url):
        """
        Call the observer with new_url.
        
        :return: None
        """
        # Note that I copy the items list in order to iterate though it without
        # any issues like the size changing
        for observer in self.url_observers[:]:            
            observer(new_url)
    
    def add_url(self, url):
        """
        :return: True if the URL was previously unknown 
        """
        if not isinstance(url, URL):
            msg = 'add_url requires a URL as parameter got %s instead.'
            raise TypeError(msg % type(url))
        
        self._notify_url_observers(url)
        return self.urls.add(url)
    
    def get_all_known_fuzzable_requests(self):
        """
        :return: A DiskSet with all the known URLs as URL objects.
        """
        return self.fuzzable_requests
    
    def add_fuzzable_request(self, fuzzable_request):
        """
        :return: True if the FuzzableRequest was previously unknown 
        """
        if not isinstance(fuzzable_request, FuzzableRequest):
            msg = 'add_fuzzable_request requires a FuzzableRequest as '\
                  'parameter, got "%s" instead.'
            raise TypeError(msg % type(fuzzable_request))
        
        self.add_url(fuzzable_request.get_url())
        return self.fuzzable_requests.add(fuzzable_request)
Exemplo n.º 57
0
    def __init__(self):
        GrepPlugin.__init__(self)

        self._error_500_responses = DiskSet(table_prefix='error_500')