示例#1
0
    def __init__(self, crawl_infrastructure_plugins, w3af_core,
                 max_discovery_time):
        """
        :param crawl_infrastructure_plugins: Instances of CrawlInfrastructure
                                             plugins in a list
        :param w3af_core: The w3af core that we'll use for status reporting
        :param max_discovery_time: The max time (in seconds) to use for the
                                   discovery phase
        """
        super(CrawlInfrastructure, self).__init__(crawl_infrastructure_plugins,
                                                  w3af_core,
                                                  thread_name=self.get_name(),
                                                  max_pool_queued_tasks=100)
        self._max_discovery_time = int(max_discovery_time)

        # For filtering fuzzable requests found by plugins:
        self._variant_db = VariantDB()

        self._disabled_plugins = set()
        self._running = True
        self._report_max_time = True
        self._reported_found_urls = ScalableBloomFilter()

        # Override BaseConsumer.in_queue in order to have an ordered queue for
        # our crawling process.
        #
        # Read OrderedCachedQueue's documentation to understand why order is
        # important
        self.in_queue = OrderedCachedQueue(maxsize=10,
                                           name=self.get_name() + 'In')
示例#2
0
    def __init__(self):
        CrawlPlugin.__init__(self)

        # Internal variables
        self._compiled_ignore_re = None
        self._compiled_follow_re = None
        self._broken_links = DiskSet()
        self._first_run = True
        self._known_variants = VariantDB()
        self._already_filled_form = ScalableBloomFilter()

        # User configured variables
        self._ignore_regex = ''
        self._follow_regex = '.*'
        self._only_forward = False
        self._compile_re()
示例#3
0
    def __init__(self):
        CrawlPlugin.__init__(self)

        # Internal variables
        self._compiled_ignore_re = None
        self._compiled_follow_re = None
        self._broken_links = DiskSet(table_prefix='web_spider')
        self._first_run = True
        self._target_urls = []
        self._target_domain = None
        self._already_filled_form = ScalableBloomFilter()
        self._variant_db = VariantDB()

        # User configured variables
        self._ignore_regex = ''
        self._follow_regex = '.*'
        self._only_forward = False
        self._ignore_extensions = []
        self._compile_re()
示例#4
0
    def __init__(self, crawl_infrastructure_plugins, w3af_core,
                 max_discovery_time):
        """
        :param in_queue: The input queue that will feed the crawl_infrastructure plugins
        :param crawl_infrastructure_plugins: Instances of crawl_infrastructure plugins in a list
        :param w3af_core: The w3af core that we'll use for status reporting
        :param max_discovery_time: The max time (in seconds) to use for the discovery phase
        """
        super(crawl_infrastructure,
              self).__init__(crawl_infrastructure_plugins,
                             w3af_core,
                             thread_name='CrawlInfra')
        self._max_discovery_time = int(max_discovery_time)

        # For filtering fuzzable requests found by plugins:
        self._variant_db = VariantDB()
        self._already_seen_urls = ScalableBloomFilter()

        self._disabled_plugins = set()
        self._running = True
        self._report_max_time = True
示例#5
0
 def setUp(self):
     create_temp_dir()
     self.vdb = VariantDB()
示例#6
0
class TestVariantDB(unittest.TestCase):
    def setUp(self):
        create_temp_dir()
        self.vdb = VariantDB()

    def test_db_int(self):
        url_fmt = 'http://w3af.org/foo.htm?id=%s'

        for i in xrange(PARAMS_MAX_VARIANTS):
            url = URL(url_fmt % i)
            self.assertTrue(self.vdb.append(fr(url)))

        extra_url = URL(url_fmt % (PARAMS_MAX_VARIANTS + 1, ))
        self.assertFalse(self.vdb.append(fr(extra_url)))

    def test_db_int_int(self):
        url_fmt = 'http://w3af.org/foo.htm?id=%s&bar=1'

        for i in xrange(PARAMS_MAX_VARIANTS):
            url = URL(url_fmt % i)
            self.assertTrue(self.vdb.append(fr(url)))

        extra_url = URL(url_fmt % (PARAMS_MAX_VARIANTS + 1, ))
        self.assertFalse(self.vdb.append(fr(extra_url)))

    def test_db_int_int_var(self):
        url_fmt = 'http://w3af.org/foo.htm?id=%s&bar=%s'

        for i in xrange(PARAMS_MAX_VARIANTS):
            url = URL(url_fmt % (i, i))
            self.assertTrue(self.vdb.append(fr(url)))

        url = URL(url_fmt % (PARAMS_MAX_VARIANTS + 1, PARAMS_MAX_VARIANTS + 1))
        self.assertFalse(self.vdb.append(fr(url)))

    def test_db_int_str(self):
        url_fmt = 'http://w3af.org/foo.htm?id=%s&bar=%s'

        for i in xrange(PARAMS_MAX_VARIANTS):
            url = URL(url_fmt % (i, 'abc' * i))
            self.assertTrue(self.vdb.append(fr(url)))

        url = URL(url_fmt % (PARAMS_MAX_VARIANTS + 1, 'abc' *
                             (PARAMS_MAX_VARIANTS + 1)))
        self.assertFalse(self.vdb.append(fr(url)))

    def test_db_int_str_then_int_int(self):
        url_fmt = 'http://w3af.org/foo.htm?id=%s&bar=%s'

        # Add (int, str)
        for i in xrange(PARAMS_MAX_VARIANTS):
            url = URL(url_fmt % (i, 'abc' * i))
            self.assertTrue(self.vdb.append(fr(url)))

        # Add (int, int)
        for i in xrange(PARAMS_MAX_VARIANTS):
            url = URL(url_fmt % (i, i))
            self.assertTrue(self.vdb.append(fr(url)))

        url = URL(url_fmt % (PARAMS_MAX_VARIANTS + 1, PARAMS_MAX_VARIANTS + 1))
        self.assertFalse(self.vdb.append(fr(url)))

        url = URL(url_fmt % (PARAMS_MAX_VARIANTS + 1, 'spameggs'))
        self.assertFalse(self.vdb.append(fr(url)))

    def test_clean_fuzzable_request_simple(self):
        u = 'http://w3af.org/'
        s = clean_fuzzable_request(fr(URL(u)))
        e = u'(GET)-http://w3af.org/'
        self.assertEqual(s, e)

    def test_clean_fuzzable_request_file(self):
        u = 'http://w3af.org/index.php'
        s = clean_fuzzable_request(fr(URL(u)))
        e = u'(GET)-http://w3af.org/%s.php' % FILENAME_TOKEN
        self.assertEqual(s, e)

    def test_clean_fuzzable_request_directory_file(self):
        u = 'http://w3af.org/foo/index.php'
        s = clean_fuzzable_request(fr(URL(u)))
        e = u'(GET)-http://w3af.org/foo/%s.php' % FILENAME_TOKEN
        self.assertEqual(s, e)

    def test_clean_fuzzable_request_directory_file_int(self):
        u = 'http://w3af.org/foo/index.php?id=2'
        s = clean_fuzzable_request(fr(URL(u)))
        e = u'(GET)-http://w3af.org/foo/index.php?id=number'
        self.assertEqual(s, e)

    def test_clean_fuzzable_request_int(self):
        u = 'http://w3af.org/index.php?id=2'
        s = clean_fuzzable_request(fr(URL(u)))
        e = u'(GET)-http://w3af.org/index.php?id=number'
        self.assertEqual(s, e)

    def test_clean_fuzzable_request_int_str(self):
        u = 'http://w3af.org/index.php?id=2&foo=bar'
        s = clean_fuzzable_request(fr(URL(u)))
        e = u'(GET)-http://w3af.org/index.php?id=number&foo=string'
        self.assertEqual(s, e)

    def test_clean_fuzzable_request_int_str_empty(self):
        u = 'http://w3af.org/index.php?id=2&foo=bar&spam='
        s = clean_fuzzable_request(fr(URL(u)))
        e = u'(GET)-http://w3af.org/index.php?id=number&foo=string&spam=string'
        self.assertEqual(s, e)

    def test_clean_fuzzable_request_directory_file_no_params(self):
        u = 'http://w3af.org/foo/index.php'
        s = clean_fuzzable_request(fr(URL(u)))
        e = u'(GET)-http://w3af.org/foo/%s.php' % FILENAME_TOKEN
        self.assertEqual(s, e)

    def test_clean_fuzzable_request_directory(self):
        u = 'http://w3af.org/foo/'
        s = clean_fuzzable_request(fr(URL(u)))
        e = u'(GET)-http://w3af.org/%s/' % PATH_TOKEN
        self.assertEqual(s, e)

    def test_clean_fuzzable_request_directory_parent_path(self):
        u = 'http://w3af.org/spam/foo/'
        s = clean_fuzzable_request(fr(URL(u)))
        e = u'(GET)-http://w3af.org/spam/%s/' % PATH_TOKEN
        self.assertEqual(s, e)

    def test_clean_form_fuzzable_request(self):
        fr = FuzzableRequest(URL("http://www.w3af.com/"),
                             headers=Headers([('Host', 'www.w3af.com')]),
                             method='POST',
                             post_data=KeyValueContainer(init_val=[('data',
                                                                    ['23'])]))

        expected = u'(POST)-http://www.w3af.com/!data=number'
        self.assertEqual(clean_fuzzable_request(fr), expected)

    def test_clean_form_fuzzable_request_form(self):
        form_params = FormParameters()
        form_params.add_field_by_attr_items([("name", "username"),
                                             ("value", "abc")])
        form_params.add_field_by_attr_items([("name", "address"),
                                             ("value", "")])
        form_params.set_action(URL('http://example.com/?id=1'))
        form_params.set_method('post')

        form = dc_from_form_params(form_params)

        fr = FuzzableRequest.from_form(form)

        expected = u'(POST)-http://example.com/' \
                   u'?id=number!username=string&address=string'
        self.assertEqual(clean_fuzzable_request(fr), expected)

    def test_db_many_files_in_root(self):
        url_fmt = 'http://w3af.org/foo%s.htm'

        for i in xrange(PATH_MAX_VARIANTS):
            url = URL(url_fmt % i)
            self.assertTrue(self.vdb.append(fr(url)))

        extra_url = URL(url_fmt % (PATH_MAX_VARIANTS + 1, ))
        self.assertFalse(self.vdb.append(fr(extra_url)))

    def test_db_many_files_in_root_without_extension(self):
        url_fmt = 'http://w3af.org/foo%s'

        for i in xrange(PATH_MAX_VARIANTS):
            url = URL(url_fmt % i)
            self.assertTrue(self.vdb.append(fr(url)))

        extra_url = URL(url_fmt % (PATH_MAX_VARIANTS + 1, ))
        self.assertFalse(self.vdb.append(fr(extra_url)))

    def test_db_many_files_different_extensions_in_root(self):
        url_fmt = 'http://w3af.org/foo%s.htm'

        for i in xrange(PATH_MAX_VARIANTS):
            url = URL(url_fmt % i)
            self.assertTrue(self.vdb.append(fr(url)))

        extra_url = URL(url_fmt % (PATH_MAX_VARIANTS + 1, ))
        self.assertFalse(self.vdb.append(fr(extra_url)))

        #
        #   Now a different extension
        #
        url_fmt = 'http://w3af.org/foo%s.jpeg'

        for i in xrange(PATH_MAX_VARIANTS):
            url = URL(url_fmt % i)
            self.assertTrue(self.vdb.append(fr(url)))

        extra_url = URL(url_fmt % (PATH_MAX_VARIANTS + 1, ))
        self.assertFalse(self.vdb.append(fr(extra_url)))

    def test_db_many_paths_in_root(self):
        url_fmt = 'http://w3af.org/foo%s/'

        for i in xrange(PATH_MAX_VARIANTS):
            url = URL(url_fmt % i)
            self.assertTrue(self.vdb.append(fr(url)))

        extra_url = URL(url_fmt % (PATH_MAX_VARIANTS + 1, ))
        self.assertFalse(self.vdb.append(fr(extra_url)))

    def test_db_many_paths_in_other_directories(self):
        url_fmt = 'http://w3af.org/foo/bar%s/'

        for i in xrange(PATH_MAX_VARIANTS):
            url = URL(url_fmt % i)
            self.assertTrue(self.vdb.append(fr(url)))

        extra_url = URL(url_fmt % (PATH_MAX_VARIANTS + 1, ))
        self.assertFalse(self.vdb.append(fr(extra_url)))

        #
        #   Now a different parent directory
        #
        url_fmt = 'http://w3af.org/spam/bar%s/'

        for i in xrange(PATH_MAX_VARIANTS):
            url = URL(url_fmt % i)
            self.assertTrue(self.vdb.append(fr(url)))

        extra_url = URL(url_fmt % (PATH_MAX_VARIANTS + 1, ))
        self.assertFalse(self.vdb.append(fr(extra_url)))

    def test_db_many_files_other_directories(self):
        url_fmt = 'http://w3af.org/spam/foo%s.htm'

        for i in xrange(PATH_MAX_VARIANTS):
            url = URL(url_fmt % i)
            self.assertTrue(self.vdb.append(fr(url)))

        extra_url = URL(url_fmt % (PATH_MAX_VARIANTS + 1, ))
        self.assertFalse(self.vdb.append(fr(extra_url)))

        #
        #   Now a different parent path and the same extension
        #
        url_fmt = 'http://w3af.org/eggs/foo%s.htm'

        for i in xrange(PATH_MAX_VARIANTS):
            url = URL(url_fmt % i)
            self.assertTrue(self.vdb.append(fr(url)))

        extra_url = URL(url_fmt % (PATH_MAX_VARIANTS + 1, ))
        self.assertFalse(self.vdb.append(fr(extra_url)))

    def test_db_many_files_different_path_length_directories(self):
        url_fmt = 'http://w3af.org/spam/foo%s.htm'

        for i in xrange(PATH_MAX_VARIANTS):
            url = URL(url_fmt % i)
            self.assertTrue(self.vdb.append(fr(url)))

        extra_url = URL(url_fmt % (PATH_MAX_VARIANTS + 1, ))
        self.assertFalse(self.vdb.append(fr(extra_url)))

        #
        #   Now a different parent path and the same extension
        #
        #   Note the /bar/ here! This is what makes this test different
        url_fmt = 'http://w3af.org/eggs/bar/foo%s.htm'

        for i in xrange(PATH_MAX_VARIANTS):
            url = URL(url_fmt % i)
            self.assertTrue(self.vdb.append(fr(url)))

        extra_url = URL(url_fmt % (PATH_MAX_VARIANTS + 1, ))
        self.assertFalse(self.vdb.append(fr(extra_url)))

    def test_db_same_without_qs(self):
        url = URL('http://w3af.org/spam/foo.htm')

        self.assertTrue(self.vdb.append(fr(url)))
        self.assertFalse(self.vdb.append(fr(url)))

    def test_db_same_with_qs(self):
        url = URL('http://w3af.org/spam/foo.htm?id=2&abc=333')

        self.assertTrue(self.vdb.append(fr(url)))
        self.assertFalse(self.vdb.append(fr(url)))
示例#7
0
class TestVariantDB(unittest.TestCase):
    def setUp(self):
        create_temp_dir()
        self.vdb = VariantDB()

    def test_db_int(self):
        url_fmt = 'http://w3af.org/foo.htm?id=%s'

        for i in xrange(DEFAULT_MAX_VARIANTS):
            url = URL(url_fmt % i)
            self.assertTrue(self.vdb.need_more_variants(url))
            self.vdb.append(url)

        extra_url = URL(url_fmt % (DEFAULT_MAX_VARIANTS + 1, ))
        self.assertFalse(self.vdb.need_more_variants(extra_url))

    def test_db_int_int(self):
        url_fmt = 'http://w3af.org/foo.htm?id=%s&bar=1'

        for i in xrange(DEFAULT_MAX_VARIANTS):
            url = URL(url_fmt % i)
            self.assertTrue(self.vdb.need_more_variants(url))
            self.vdb.append(url)

        self.assertFalse(
            self.vdb.need_more_variants(
                URL(url_fmt % (DEFAULT_MAX_VARIANTS + 1, ))))

    def test_db_int_int_var(self):
        url_fmt = 'http://w3af.org/foo.htm?id=%s&bar=%s'

        for i in xrange(DEFAULT_MAX_VARIANTS):
            url = URL(url_fmt % (i, i))
            self.assertTrue(self.vdb.need_more_variants(url))
            self.vdb.append(url)

        self.assertFalse(
            self.vdb.need_more_variants(
                URL(url_fmt %
                    (DEFAULT_MAX_VARIANTS + 1, DEFAULT_MAX_VARIANTS + 1))))

    def test_db_int_str(self):
        url_fmt = 'http://w3af.org/foo.htm?id=%s&bar=%s'

        for i in xrange(DEFAULT_MAX_VARIANTS):
            url = URL(url_fmt % (i, 'abc' * i))
            self.assertTrue(self.vdb.need_more_variants(url))
            self.vdb.append(url)

        self.assertFalse(
            self.vdb.need_more_variants(
                URL(url_fmt % (DEFAULT_MAX_VARIANTS + 1, 'abc' *
                               (DEFAULT_MAX_VARIANTS + 1)))))

    def test_db_int_str_then_int_int(self):
        url_fmt = 'http://w3af.org/foo.htm?id=%s&bar=%s'

        # Add (int, str)
        for i in xrange(DEFAULT_MAX_VARIANTS):
            url = URL(url_fmt % (i, 'abc' * i))
            self.assertTrue(self.vdb.need_more_variants(url))
            self.vdb.append(url)

        # Please note that in this case I'm asking for (int, int) and I added
        # (int, str) before
        self.assertTrue(
            self.vdb.need_more_variants(
                URL(url_fmt %
                    (DEFAULT_MAX_VARIANTS + 1, DEFAULT_MAX_VARIANTS + 1))))

        # Add (int, int)
        for i in xrange(DEFAULT_MAX_VARIANTS):
            url = URL(url_fmt % (i, i))
            self.assertTrue(self.vdb.need_more_variants(url))
            self.vdb.append(url)

        self.assertFalse(
            self.vdb.need_more_variants(
                URL(url_fmt %
                    (DEFAULT_MAX_VARIANTS + 1, DEFAULT_MAX_VARIANTS + 1))))

    def test_clean_reference_simple(self):
        self.assertEqual(self.vdb._clean_reference(URL('http://w3af.org/')),
                         u'http://w3af.org/')

    def test_clean_reference_file(self):
        self.assertEqual(
            self.vdb._clean_reference(URL('http://w3af.org/index.php')),
            u'http://w3af.org/index.php')

    def test_clean_reference_directory_file(self):
        self.assertEqual(
            self.vdb._clean_reference(URL('http://w3af.org/foo/index.php')),
            u'http://w3af.org/foo/index.php')

    def test_clean_reference_directory_file_int(self):
        self.assertEqual(
            self.vdb._clean_reference(
                URL('http://w3af.org/foo/index.php?id=2')),
            u'http://w3af.org/foo/index.php?id=number')

    def test_clean_reference_int(self):
        self.assertEqual(
            self.vdb._clean_reference(URL('http://w3af.org/index.php?id=2')),
            u'http://w3af.org/index.php?id=number')

    def test_clean_reference_int_str(self):
        self.assertEqual(
            self.vdb._clean_reference(
                URL('http://w3af.org/index.php?id=2&foo=bar')),
            u'http://w3af.org/index.php?id=number&foo=string')

    def test_clean_reference_int_str_empty(self):
        self.assertEqual(
            self.vdb._clean_reference(
                URL('http://w3af.org/index.php?id=2&foo=bar&spam=')),
            u'http://w3af.org/index.php?id=number&foo=string&spam=string')
示例#8
0
 def setUp(self):
     MiscSettings().set_default_values()
     create_temp_dir()
     self.vdb = VariantDB()
示例#9
0
class TestVariantDB(unittest.TestCase):
    def setUp(self):
        create_temp_dir()
        self.vdb = VariantDB()

    def test_db_int(self):
        url_fmt = 'http://w3af.org/foo.htm?id=%s'

        for i in xrange(DEFAULT_MAX_VARIANTS):
            url = URL(url_fmt % i)
            self.assertTrue(self.vdb.need_more_variants(url))
            self.vdb.append(url)

        extra_url = URL(url_fmt % (DEFAULT_MAX_VARIANTS + 1, ))
        self.assertFalse(self.vdb.need_more_variants(extra_url))

    def test_db_int_int(self):
        url_fmt = 'http://w3af.org/foo.htm?id=%s&bar=1'

        for i in xrange(DEFAULT_MAX_VARIANTS):
            url = URL(url_fmt % i)
            self.assertTrue(self.vdb.need_more_variants(url))
            self.vdb.append(url)

        self.assertFalse(
            self.vdb.need_more_variants(
                URL(url_fmt % (DEFAULT_MAX_VARIANTS + 1, ))))

    def test_db_int_int_var(self):
        url_fmt = 'http://w3af.org/foo.htm?id=%s&bar=%s'

        for i in xrange(DEFAULT_MAX_VARIANTS):
            url = URL(url_fmt % (i, i))
            self.assertTrue(self.vdb.need_more_variants(url))
            self.vdb.append(url)

        self.assertFalse(
            self.vdb.need_more_variants(
                URL(url_fmt %
                    (DEFAULT_MAX_VARIANTS + 1, DEFAULT_MAX_VARIANTS + 1))))

    def test_db_int_str(self):
        url_fmt = 'http://w3af.org/foo.htm?id=%s&bar=%s'

        for i in xrange(DEFAULT_MAX_VARIANTS):
            url = URL(url_fmt % (i, 'abc' * i))
            self.assertTrue(self.vdb.need_more_variants(url))
            self.vdb.append(url)

        self.assertFalse(
            self.vdb.need_more_variants(
                URL(url_fmt % (DEFAULT_MAX_VARIANTS + 1, 'abc' *
                               (DEFAULT_MAX_VARIANTS + 1)))))

    def test_db_int_str_then_int_int(self):
        url_fmt = 'http://w3af.org/foo.htm?id=%s&bar=%s'

        # Add (int, str)
        for i in xrange(DEFAULT_MAX_VARIANTS):
            url = URL(url_fmt % (i, 'abc' * i))
            self.assertTrue(self.vdb.need_more_variants(url))
            self.vdb.append(url)

        # Please note that in this case I'm asking for (int, int) and I added
        # (int, str) before
        self.assertTrue(
            self.vdb.need_more_variants(
                URL(url_fmt %
                    (DEFAULT_MAX_VARIANTS + 1, DEFAULT_MAX_VARIANTS + 1))))

        # Add (int, int)
        for i in xrange(DEFAULT_MAX_VARIANTS):
            url = URL(url_fmt % (i, i))
            self.assertTrue(self.vdb.need_more_variants(url))
            self.vdb.append(url)

        self.assertFalse(
            self.vdb.need_more_variants(
                URL(url_fmt %
                    (DEFAULT_MAX_VARIANTS + 1, DEFAULT_MAX_VARIANTS + 1))))

    def test_clean_reference_simple(self):
        self.assertEqual(self.vdb._clean_reference(URL('http://w3af.org/')),
                         u'(GET)-http://w3af.org/')

    def test_clean_reference_file(self):
        self.assertEqual(
            self.vdb._clean_reference(URL('http://w3af.org/index.php')),
            u'(GET)-http://w3af.org/index.php')

    def test_clean_reference_directory_file(self):
        self.assertEqual(
            self.vdb._clean_reference(URL('http://w3af.org/foo/index.php')),
            u'(GET)-http://w3af.org/foo/index.php')

    def test_clean_reference_directory_file_int(self):
        self.assertEqual(
            self.vdb._clean_reference(
                URL('http://w3af.org/foo/index.php?id=2')),
            u'(GET)-http://w3af.org/foo/index.php?id=number')

    def test_clean_reference_int(self):
        self.assertEqual(
            self.vdb._clean_reference(URL('http://w3af.org/index.php?id=2')),
            u'(GET)-http://w3af.org/index.php?id=number')

    def test_clean_reference_int_str(self):
        self.assertEqual(
            self.vdb._clean_reference(
                URL('http://w3af.org/index.php?id=2&foo=bar')),
            u'(GET)-http://w3af.org/index.php?id=number&foo=string')

    def test_clean_reference_int_str_empty(self):
        self.assertEqual(
            self.vdb._clean_reference(
                URL('http://w3af.org/index.php?id=2&foo=bar&spam=')),
            u'(GET)-http://w3af.org/index.php?id=number&foo=string&spam=string'
        )

    def test_clean_form_fuzzable_request(self):
        fr = FuzzableRequest(URL("http://www.w3af.com/"),
                             headers=Headers([('Host', 'www.w3af.com')]),
                             method='POST',
                             post_data=KeyValueContainer(init_val=[('data',
                                                                    ['23'])]))

        expected = u'(POST)-http://www.w3af.com/!data=number'
        self.assertEqual(self.vdb._clean_fuzzable_request(fr), expected)

    def test_clean_form_fuzzable_request_form(self):
        form_params = FormParameters()
        form_params.add_input([("name", "username"), ("value", "abc")])
        form_params.add_input([("name", "address"), ("value", "")])
        form_params.set_action(URL('http://example.com/?id=1'))
        form_params.set_method('post')

        form = dc_from_form_params(form_params)

        fr = FuzzableRequest.from_form(form)

        expected = u'(POST)-http://example.com/?id=number!username=string&address=string'
        self.assertEqual(self.vdb._clean_fuzzable_request(fr), expected)
示例#10
0
class web_spider(CrawlPlugin):
    """
    Crawl the web application.

    :author: Andres Riancho ([email protected])
    """
    NOT_404 = set([http_constants.UNAUTHORIZED,
                   http_constants.FORBIDDEN])

    def __init__(self):
        CrawlPlugin.__init__(self)

        # Internal variables
        self._compiled_ignore_re = None
        self._compiled_follow_re = None
        self._broken_links = DiskSet()
        self._first_run = True
        self._known_variants = VariantDB()
        self._already_filled_form = ScalableBloomFilter()

        # User configured variables
        self._ignore_regex = ''
        self._follow_regex = '.*'
        self._only_forward = False
        self._compile_re()

    def crawl(self, fuzzable_req):
        """
        Searches for links on the html.

        :param fuzzable_req: A fuzzable_req instance that contains
                             (among other things) the URL to test.
        """
        self._handle_first_run()

        #
        # If it is a form, then smart_fill the parameters to send something that
        # makes sense and will allow us to cover more code.
        #
        if isinstance(fuzzable_req, HTTPPostDataRequest):

            if fuzzable_req.get_url() in self._already_filled_form:
                return

            fuzzable_req = self._fill_form(fuzzable_req)

        # Send the HTTP request,
        resp = self._uri_opener.send_mutant(fuzzable_req)

        # Nothing to do here...
        if resp.get_code() == 401:
            return

        fuzz_req_list = self._create_fuzzable_requests(
            resp,
            request=fuzzable_req,
            add_self=False
        )
        
        for fr in fuzz_req_list:
            self.output_queue.put(fr)

        self._extract_links_and_verify(resp, fuzzable_req)

    def _handle_first_run(self):
        if self._first_run:
            # I have to set some variables, in order to be able to code
            # the "only_forward" feature
            self._first_run = False
            self._target_urls = [i.get_domain_path() for i in cf.cf.get('targets')]

            #    The following line triggered lots of bugs when the "stop" button
            #    was pressed and the core did this: "cf.cf.save('targets', [])"
            #self._target_domain = cf.cf.get('targets')[0].get_domain()
            #    Changing it to something awful but bug-free.
            targets = cf.cf.get('targets')
            if not targets:
                return
            else:
                self._target_domain = targets[0].get_domain()
                
    def _urls_to_verify_generator(self, resp, fuzzable_req):
        """
        :param resp: HTTP response object
        :param fuzzable_req: The HTTP request that generated the response
        """
        #
        # Note: I WANT to follow links that are in the 404 page.
        #

        # Modified when I added the PDFParser
        # I had to add this x OR y stuff, just because I don't want
        # the SGML parser to analyze a image file, its useless and
        # consumes CPU power.
        if resp.is_text_or_html() or resp.is_pdf() or resp.is_swf():
            original_url = resp.get_redir_uri()
            try:
                doc_parser = parser_cache.dpc.get_document_parser_for(resp)
            except BaseFrameworkException, w3:
                om.out.debug('Failed to find a suitable document parser. '
                             'Exception "%s"' % w3)
            else:
                # Note:
                # - With parsed_refs I'm 100% that it's really
                # something in the HTML that the developer intended to add.
                #
                # - The re_refs are the result of regular expressions,
                # which in some cases are just false positives.

                parsed_refs, re_refs = doc_parser.get_references()

                # I also want to analyze all directories, if the URL I just
                # fetched is:
                # http://localhost/a/b/c/f00.php I want to GET:
                # http://localhost/a/b/c/
                # http://localhost/a/b/
                # http://localhost/a/
                # http://localhost/
                # And analyze the responses...
                dirs = resp.get_url().get_directories()
                only_re_refs = set(re_refs) - set(dirs + parsed_refs)

                all_refs = itertools.chain(dirs, parsed_refs, re_refs)

                for ref in unique_justseen(sorted(all_refs)):

                    # Ignore myself
                    if ref == resp.get_uri():
                        continue

                    # I don't want w3af sending requests to 3rd parties!
                    if ref.get_domain() != self._target_domain:
                        continue

                    # Filter the URL's according to the configured regexs
                    urlstr = ref.url_string
                    if not self._compiled_follow_re.match(urlstr) or \
                    self._compiled_ignore_re.match(urlstr):
                        continue

                    if self._only_forward:
                        if not self._is_forward(ref):
                            continue

                    # Work with the parsed references and report broken
                    # links. Then work with the regex references and DO NOT
                    # report broken links
                    if self._need_more_variants(ref):
                        self._known_variants.append(ref)
                        possibly_broken = ref in only_re_refs
                        yield ref, fuzzable_req, original_url, possibly_broken
示例#11
0
class TestVariantDB(unittest.TestCase):

    def setUp(self):
        create_temp_dir()
        self.vdb = VariantDB()

    def test_db_int(self):
        url_fmt = 'http://w3af.org/foo.htm?id=%s'

        for i in xrange(DEFAULT_MAX_VARIANTS):
            url = URL(url_fmt % i)
            self.assertTrue(self.vdb.need_more_variants(url))
            self.vdb.append(url)

        extra_url = URL(url_fmt % (DEFAULT_MAX_VARIANTS + 1,))
        self.assertFalse(self.vdb.need_more_variants(extra_url))

    def test_db_int_int(self):
        url_fmt = 'http://w3af.org/foo.htm?id=%s&bar=1'

        for i in xrange(DEFAULT_MAX_VARIANTS):
            url = URL(url_fmt % i)
            self.assertTrue(self.vdb.need_more_variants(url))
            self.vdb.append(url)

        self.assertFalse(
            self.vdb.need_more_variants(URL(url_fmt % (DEFAULT_MAX_VARIANTS + 1,))))

    def test_db_int_int_var(self):
        url_fmt = 'http://w3af.org/foo.htm?id=%s&bar=%s'

        for i in xrange(DEFAULT_MAX_VARIANTS):
            url = URL(url_fmt % (i, i))
            self.assertTrue(self.vdb.need_more_variants(url))
            self.vdb.append(url)

        self.assertFalse(
            self.vdb.need_more_variants(URL(url_fmt % (DEFAULT_MAX_VARIANTS + 1, DEFAULT_MAX_VARIANTS + 1))))

    def test_db_int_str(self):
        url_fmt = 'http://w3af.org/foo.htm?id=%s&bar=%s'

        for i in xrange(DEFAULT_MAX_VARIANTS):
            url = URL(url_fmt % (i, 'abc' * i))
            self.assertTrue(self.vdb.need_more_variants(url))
            self.vdb.append(url)

        self.assertFalse(self.vdb.need_more_variants(
            URL(url_fmt % (DEFAULT_MAX_VARIANTS + 1, 'abc' * (DEFAULT_MAX_VARIANTS + 1)))))

    def test_db_int_str_then_int_int(self):
        url_fmt = 'http://w3af.org/foo.htm?id=%s&bar=%s'

        # Add (int, str)
        for i in xrange(DEFAULT_MAX_VARIANTS):
            url = URL(url_fmt % (i, 'abc' * i))
            self.assertTrue(self.vdb.need_more_variants(url))
            self.vdb.append(url)

        # Please note that in this case I'm asking for (int, int) and I added
        # (int, str) before
        self.assertTrue(
            self.vdb.need_more_variants(URL(url_fmt % (DEFAULT_MAX_VARIANTS + 1, DEFAULT_MAX_VARIANTS + 1))))

        # Add (int, int)
        for i in xrange(DEFAULT_MAX_VARIANTS):
            url = URL(url_fmt % (i, i))
            self.assertTrue(self.vdb.need_more_variants(url))
            self.vdb.append(url)

        self.assertFalse(
            self.vdb.need_more_variants(URL(url_fmt % (DEFAULT_MAX_VARIANTS + 1, DEFAULT_MAX_VARIANTS + 1))))

    def test_clean_reference_simple(self):
        self.assertEqual(self.vdb._clean_reference(URL('http://w3af.org/')),
                         u'(GET)-http://w3af.org/')

    def test_clean_reference_file(self):
        self.assertEqual(
            self.vdb._clean_reference(URL('http://w3af.org/index.php')),
            u'(GET)-http://w3af.org/index.php')

    def test_clean_reference_directory_file(self):
        self.assertEqual(
            self.vdb._clean_reference(URL('http://w3af.org/foo/index.php')),
                                      u'(GET)-http://w3af.org/foo/index.php')

    def test_clean_reference_directory_file_int(self):
        self.assertEqual(
            self.vdb._clean_reference(URL('http://w3af.org/foo/index.php?id=2')),
                                      u'(GET)-http://w3af.org/foo/index.php?id=number')

    def test_clean_reference_int(self):
        self.assertEqual(
            self.vdb._clean_reference(URL('http://w3af.org/index.php?id=2')),
            u'(GET)-http://w3af.org/index.php?id=number')

    def test_clean_reference_int_str(self):
        self.assertEqual(
            self.vdb._clean_reference(
                URL('http://w3af.org/index.php?id=2&foo=bar')),
            u'(GET)-http://w3af.org/index.php?id=number&foo=string')

    def test_clean_reference_int_str_empty(self):
        self.assertEqual(
            self.vdb._clean_reference(
                URL('http://w3af.org/index.php?id=2&foo=bar&spam=')),
            u'(GET)-http://w3af.org/index.php?id=number&foo=string&spam=string')

    def test_clean_form_fuzzable_request(self):
        fr = FuzzableRequest(URL("http://www.w3af.com/"),
                             headers=Headers([('Host', 'www.w3af.com')]),
                             method='POST',
                             post_data=KeyValueContainer(init_val=[('data', ['23'])]))

        expected = u'(POST)-http://www.w3af.com/!data=number'
        self.assertEqual(self.vdb._clean_fuzzable_request(fr), expected)

    def test_clean_form_fuzzable_request_form(self):
        form_params = FormParameters()
        form_params.add_input([("name", "username"), ("value", "abc")])
        form_params.add_input([("name", "address"), ("value", "")])
        form_params.set_action(URL('http://example.com/?id=1'))
        form_params.set_method('post')

        form = dc_from_form_params(form_params)

        fr = FuzzableRequest.from_form(form)

        expected = u'(POST)-http://example.com/?id=number!username=string&address=string'
        self.assertEqual(self.vdb._clean_fuzzable_request(fr), expected)
示例#12
0
 def setUp(self):
     MiscSettings().set_default_values()
     create_temp_dir()
     self.vdb = VariantDB()
示例#13
0
class TestVariantDB(unittest.TestCase):
    def setUp(self):
        MiscSettings().set_default_values()
        create_temp_dir()
        self.vdb = VariantDB()

    def test_db_int(self):
        url_fmt = 'http://w3af.org/foo.htm?id=%s'

        for i in xrange(PARAMS_MAX_VARIANTS):
            url = URL(url_fmt % i)
            self.assertTrue(self.vdb.append(fr(url)))

        extra_url = URL(url_fmt % (PARAMS_MAX_VARIANTS + 1, ))
        self.assertFalse(self.vdb.append(fr(extra_url)))

    def test_db_int_int(self):
        url_fmt = 'http://w3af.org/foo.htm?id=%s&bar=1'

        for i in xrange(PARAMS_MAX_VARIANTS):
            url = URL(url_fmt % i)
            self.assertTrue(self.vdb.append(fr(url)))

        extra_url = URL(url_fmt % (PARAMS_MAX_VARIANTS + 1, ))
        self.assertFalse(self.vdb.append(fr(extra_url)))

    def test_db_int_int_var(self):
        url_fmt = 'http://w3af.org/foo.htm?id=%s&bar=%s'

        for i in xrange(PARAMS_MAX_VARIANTS):
            url = URL(url_fmt % (i, i))
            self.assertTrue(self.vdb.append(fr(url)))

        url = URL(url_fmt % (PARAMS_MAX_VARIANTS + 1, PARAMS_MAX_VARIANTS + 1))
        self.assertFalse(self.vdb.append(fr(url)))

    def test_db_int_str(self):
        url_fmt = 'http://w3af.org/foo.htm?id=%s&bar=%s'

        for i in xrange(PARAMS_MAX_VARIANTS):
            url = URL(url_fmt % (i, 'abc' * i))
            self.assertTrue(self.vdb.append(fr(url)))

        url = URL(url_fmt % (PARAMS_MAX_VARIANTS + 1, 'abc' *
                             (PARAMS_MAX_VARIANTS + 1)))
        self.assertFalse(self.vdb.append(fr(url)))

    def test_db_int_str_then_int_int(self):
        url_fmt = 'http://w3af.org/foo.htm?id=%s&bar=%s'

        # Add (int, str)
        for i in xrange(PARAMS_MAX_VARIANTS):
            url = URL(url_fmt % (i, 'abc' * i))
            self.assertTrue(self.vdb.append(fr(url)))

        # Add (int, int)
        for i in xrange(PARAMS_MAX_VARIANTS):
            url = URL(url_fmt % (i, i))
            self.assertTrue(self.vdb.append(fr(url)))

        url = URL(url_fmt % (PARAMS_MAX_VARIANTS + 1, PARAMS_MAX_VARIANTS + 1))
        self.assertFalse(self.vdb.append(fr(url)))

        url = URL(url_fmt % (PARAMS_MAX_VARIANTS + 1, 'spameggs'))
        self.assertFalse(self.vdb.append(fr(url)))

    def test_clean_fuzzable_request_simple(self):
        u = 'http://w3af.org/'
        s = clean_fuzzable_request(fr(URL(u)))
        e = u'(GET)-http://w3af.org/'
        self.assertEqual(s, e)

    def test_clean_fuzzable_request_file(self):
        u = 'http://w3af.org/index.php'
        s = clean_fuzzable_request(fr(URL(u)))
        e = u'(GET)-http://w3af.org/%s.php' % FILENAME_TOKEN
        self.assertEqual(s, e)

    def test_clean_fuzzable_request_directory_file(self):
        u = 'http://w3af.org/foo/index.php'
        s = clean_fuzzable_request(fr(URL(u)))
        e = u'(GET)-http://w3af.org/foo/%s.php' % FILENAME_TOKEN
        self.assertEqual(s, e)

    def test_clean_fuzzable_request_directory_file_int(self):
        u = 'http://w3af.org/foo/index.php?id=2'
        s = clean_fuzzable_request(fr(URL(u)))
        e = u'(GET)-http://w3af.org/foo/index.php?id=number'
        self.assertEqual(s, e)

    def test_clean_fuzzable_request_int(self):
        u = 'http://w3af.org/index.php?id=2'
        s = clean_fuzzable_request(fr(URL(u)))
        e = u'(GET)-http://w3af.org/index.php?id=number'
        self.assertEqual(s, e)

    def test_clean_fuzzable_request_int_str(self):
        u = 'http://w3af.org/index.php?id=2&foo=bar'
        s = clean_fuzzable_request(fr(URL(u)))
        e = u'(GET)-http://w3af.org/index.php?id=number&foo=string'
        self.assertEqual(s, e)

    def test_clean_fuzzable_request_int_str_empty(self):
        u = 'http://w3af.org/index.php?id=2&foo=bar&spam='
        s = clean_fuzzable_request(fr(URL(u)))
        e = u'(GET)-http://w3af.org/index.php?id=number&foo=string&spam=string'
        self.assertEqual(s, e)

    def test_clean_fuzzable_request_directory_file_no_params(self):
        u = 'http://w3af.org/foo/index.php'
        s = clean_fuzzable_request(fr(URL(u)))
        e = u'(GET)-http://w3af.org/foo/%s.php' % FILENAME_TOKEN
        self.assertEqual(s, e)

    def test_clean_fuzzable_request_directory(self):
        u = 'http://w3af.org/foo/'
        s = clean_fuzzable_request(fr(URL(u)))
        e = u'(GET)-http://w3af.org/%s/' % PATH_TOKEN
        self.assertEqual(s, e)

    def test_clean_fuzzable_request_directory_parent_path(self):
        u = 'http://w3af.org/spam/foo/'
        s = clean_fuzzable_request(fr(URL(u)))
        e = u'(GET)-http://w3af.org/spam/%s/' % PATH_TOKEN
        self.assertEqual(s, e)

    def test_clean_form_fuzzable_request(self):
        fr = FuzzableRequest(URL("http://www.w3af.com/"),
                             headers=Headers([('Host', 'www.w3af.com')]),
                             method='POST',
                             post_data=KeyValueContainer(init_val=[('data',
                                                                    ['23'])]))

        expected = u'(POST)-http://www.w3af.com/!data=number'
        self.assertEqual(clean_fuzzable_request(fr), expected)

    def test_clean_form_fuzzable_request_form(self):
        form_params = FormParameters()
        form_params.add_field_by_attr_items([("name", "username"),
                                             ("value", "abc")])
        form_params.add_field_by_attr_items([("name", "address"),
                                             ("value", "")])
        form_params.set_action(URL('http://example.com/?id=1'))
        form_params.set_method('post')

        form = dc_from_form_params(form_params)

        fr = FuzzableRequest.from_form(form)

        expected = u'(POST)-http://example.com/' \
                   u'?id=number!username=string&address=string'
        self.assertEqual(clean_fuzzable_request(fr), expected)

    def test_db_many_files_in_root(self):
        url_fmt = 'http://w3af.org/foo%s.htm'

        for i in xrange(PATH_MAX_VARIANTS):
            url = URL(url_fmt % i)
            self.assertTrue(self.vdb.append(fr(url)))

        extra_url = URL(url_fmt % (PATH_MAX_VARIANTS + 1, ))
        self.assertFalse(self.vdb.append(fr(extra_url)))

    def test_db_many_files_in_root_without_extension(self):
        url_fmt = 'http://w3af.org/foo%s'

        for i in xrange(PATH_MAX_VARIANTS):
            url = URL(url_fmt % i)
            self.assertTrue(self.vdb.append(fr(url)))

        extra_url = URL(url_fmt % (PATH_MAX_VARIANTS + 1, ))
        self.assertFalse(self.vdb.append(fr(extra_url)))

    def test_db_many_files_different_extensions_in_root(self):
        url_fmt = 'http://w3af.org/foo%s.htm'

        for i in xrange(PATH_MAX_VARIANTS):
            url = URL(url_fmt % i)
            self.assertTrue(self.vdb.append(fr(url)))

        extra_url = URL(url_fmt % (PATH_MAX_VARIANTS + 1, ))
        self.assertFalse(self.vdb.append(fr(extra_url)))

        #
        #   Now a different extension
        #
        url_fmt = 'http://w3af.org/foo%s.jpeg'

        for i in xrange(PATH_MAX_VARIANTS):
            url = URL(url_fmt % i)
            self.assertTrue(self.vdb.append(fr(url)))

        extra_url = URL(url_fmt % (PATH_MAX_VARIANTS + 1, ))
        self.assertFalse(self.vdb.append(fr(extra_url)))

    def test_db_many_paths_in_root(self):
        url_fmt = 'http://w3af.org/foo%s/'

        for i in xrange(PATH_MAX_VARIANTS):
            url = URL(url_fmt % i)
            self.assertTrue(self.vdb.append(fr(url)))

        extra_url = URL(url_fmt % (PATH_MAX_VARIANTS + 1, ))
        self.assertFalse(self.vdb.append(fr(extra_url)))

    def test_db_many_paths_in_other_directories(self):
        url_fmt = 'http://w3af.org/foo/bar%s/'

        for i in xrange(PATH_MAX_VARIANTS):
            url = URL(url_fmt % i)
            self.assertTrue(self.vdb.append(fr(url)))

        extra_url = URL(url_fmt % (PATH_MAX_VARIANTS + 1, ))
        self.assertFalse(self.vdb.append(fr(extra_url)))

        #
        #   Now a different parent directory
        #
        url_fmt = 'http://w3af.org/spam/bar%s/'

        for i in xrange(PATH_MAX_VARIANTS):
            url = URL(url_fmt % i)
            self.assertTrue(self.vdb.append(fr(url)))

        extra_url = URL(url_fmt % (PATH_MAX_VARIANTS + 1, ))
        self.assertFalse(self.vdb.append(fr(extra_url)))

    def test_db_many_files_other_directories(self):
        url_fmt = 'http://w3af.org/spam/foo%s.htm'

        for i in xrange(PATH_MAX_VARIANTS):
            url = URL(url_fmt % i)
            self.assertTrue(self.vdb.append(fr(url)))

        extra_url = URL(url_fmt % (PATH_MAX_VARIANTS + 1, ))
        self.assertFalse(self.vdb.append(fr(extra_url)))

        #
        #   Now a different parent path and the same extension
        #
        url_fmt = 'http://w3af.org/eggs/foo%s.htm'

        for i in xrange(PATH_MAX_VARIANTS):
            url = URL(url_fmt % i)
            self.assertTrue(self.vdb.append(fr(url)))

        extra_url = URL(url_fmt % (PATH_MAX_VARIANTS + 1, ))
        self.assertFalse(self.vdb.append(fr(extra_url)))

    def test_db_many_files_different_path_length_directories(self):
        url_fmt = 'http://w3af.org/spam/foo%s.htm'

        for i in xrange(PATH_MAX_VARIANTS):
            url = URL(url_fmt % i)
            self.assertTrue(self.vdb.append(fr(url)))

        extra_url = URL(url_fmt % (PATH_MAX_VARIANTS + 1, ))
        self.assertFalse(self.vdb.append(fr(extra_url)))

        #
        #   Now a different parent path and the same extension
        #
        #   Note the /bar/ here! This is what makes this test different
        url_fmt = 'http://w3af.org/eggs/bar/foo%s.htm'

        for i in xrange(PATH_MAX_VARIANTS):
            url = URL(url_fmt % i)
            self.assertTrue(self.vdb.append(fr(url)))

        extra_url = URL(url_fmt % (PATH_MAX_VARIANTS + 1, ))
        self.assertFalse(self.vdb.append(fr(extra_url)))

    def test_db_same_without_qs(self):
        url = URL('http://w3af.org/spam/foo.htm')

        self.assertTrue(self.vdb.append(fr(url)))
        self.assertFalse(self.vdb.append(fr(url)))

    def test_db_same_with_qs(self):
        url = URL('http://w3af.org/spam/foo.htm?id=2&abc=333')

        self.assertTrue(self.vdb.append(fr(url)))
        self.assertFalse(self.vdb.append(fr(url)))

    def test_encoding_issues_se(self):
        u = u'http://w3af.org/vård.png'
        s = clean_fuzzable_request(fr(URL(u)))
        e = u'(GET)-http://w3af.org/file-5692fef3f5dcd97.png'
        self.assertEqual(s, e)

    def test_encoding_issues_se_with_qs(self):
        u = u'http://w3af.org/vård.png?id=1'
        s = clean_fuzzable_request(fr(URL(u)))
        e = '(GET)-http://w3af.org/vård.png?id=number'
        self.assertEqual(s, e)

    def test_encoding_issues_se_filename(self):
        u = u'http://w3af.org/x.vård'
        s = clean_fuzzable_request(fr(URL(u)))
        e = '(GET)-http://w3af.org/file-5692fef3f5dcd97.vård'
        self.assertEqual(s, e)

    def test_encoding_issues_se_path(self):
        u = u'http://w3af.org/vård/xyz.html'
        s = clean_fuzzable_request(fr(URL(u)))
        e = '(GET)-http://w3af.org/vård/file-5692fef3f5dcd97.html'
        self.assertEqual(s, e)

    def test_same_form_different_url(self):
        def create_fuzzable_request(_id):
            url_fmt = 'http://example.com/product/%s'

            form_params = FormParameters()
            form_params.add_field_by_attr_items([("name", "username"),
                                                 ("value", "abc")])
            form_params.add_field_by_attr_items([("name", "address"),
                                                 ("value", "")])
            form_params.set_action(URL(url_fmt % _id))
            form_params.set_method('post')

            form = dc_from_form_params(form_params)

            return FuzzableRequest.from_form(form)

        # These two make sure we're returning false in the last call to
        # append because of MAX_EQUAL_FORM_VARIANTS and not any other limits
        self.assertGreater(PARAMS_MAX_VARIANTS, MAX_EQUAL_FORM_VARIANTS)
        self.assertGreater(PATH_MAX_VARIANTS, MAX_EQUAL_FORM_VARIANTS)

        for i in xrange(MAX_EQUAL_FORM_VARIANTS):
            fri = create_fuzzable_request(i)
            self.assertTrue(self.vdb.append(fri))

        fri = create_fuzzable_request(i + 1)
        self.assertFalse(self.vdb.append(fri))

    def test_same_form_completely_different_url(self):
        def create_fuzzable_request(_id):
            path_count = _id * 5
            paths = [rand_alnum(9) for _ in xrange(path_count)]
            url = 'http://example.com/%s' % '/'.join(paths)

            form_params = FormParameters()
            form_params.add_field_by_attr_items([("name", "username"),
                                                 ("value", "abc")])
            form_params.add_field_by_attr_items([("name", "address"),
                                                 ("value", "")])
            form_params.set_action(URL(url))
            form_params.set_method('post')

            form = dc_from_form_params(form_params)

            return FuzzableRequest.from_form(form)

        for i in xrange(MAX_EQUAL_FORM_VARIANTS):
            fri = create_fuzzable_request(i)
            self.assertTrue(self.vdb.append(fri))

        fri = create_fuzzable_request(i + 1)
        self.assertFalse(self.vdb.append(fri))

    def test_different_form_different_url(self):
        def create_fuzzable_request(_id):
            url_fmt = 'http://example.com/product/%s'

            form_params = FormParameters()
            form_params.add_field_by_attr_items([("name", "username%s" % _id),
                                                 ("value", "abc")])
            form_params.add_field_by_attr_items([("name", "address"),
                                                 ("value", "")])
            form_params.set_action(URL(url_fmt % _id))
            form_params.set_method('post')

            form = dc_from_form_params(form_params)

            return FuzzableRequest.from_form(form)

        for i in xrange(MAX_EQUAL_FORM_VARIANTS * 2):
            fri = create_fuzzable_request(i)
            self.assertTrue(self.vdb.append(fri))

    def test_different_form_same_url(self):
        def create_fuzzable_request(_id):
            url = 'http://example.com/product/1'

            form_params = FormParameters()
            form_params.add_field_by_attr_items([("name", "username%s" % _id),
                                                 ("value", "abc")])
            form_params.add_field_by_attr_items([("name", "address"),
                                                 ("value", "")])
            form_params.set_action(URL(url))
            form_params.set_method('post')

            form = dc_from_form_params(form_params)

            return FuzzableRequest.from_form(form)

        for i in xrange(MAX_EQUAL_FORM_VARIANTS * 2):
            fri = create_fuzzable_request(i)
            self.assertTrue(self.vdb.append(fri))

    def test_forms_with_one_parameter_always_more_variants(self):
        def create_fuzzable_request(_id):
            url_fmt = 'http://example.com/product/%s'

            form_params = FormParameters()
            form_params.add_field_by_attr_items([("name", "username"),
                                                 ("value", "abc")])
            form_params.set_action(URL(url_fmt % _id))
            form_params.set_method('post')

            form = dc_from_form_params(form_params)

            return FuzzableRequest.from_form(form)

        for i in xrange(MAX_EQUAL_FORM_VARIANTS * 2):
            fri = create_fuzzable_request(i)
            self.assertTrue(self.vdb.append(fri))
示例#14
0
class web_spider(CrawlPlugin):
    """
    Crawl the web application.

    :author: Andres Riancho ([email protected])
    """
    UNAUTH_FORBID = {http_constants.UNAUTHORIZED, http_constants.FORBIDDEN}

    def __init__(self):
        CrawlPlugin.__init__(self)

        # Internal variables
        self._compiled_ignore_re = None
        self._compiled_follow_re = None
        self._broken_links = DiskSet(table_prefix='web_spider')
        self._first_run = True
        self._target_urls = []
        self._target_domain = None
        self._already_filled_form = ScalableBloomFilter()
        self._variant_db = VariantDB()

        # User configured variables
        self._ignore_regex = ''
        self._follow_regex = '.*'
        self._only_forward = False
        self._ignore_extensions = []
        self._compile_re()

    def crawl(self, fuzzable_request, debugging_id):
        """
        Searches for links on the html.

        :param debugging_id: A unique identifier for this call to discover()
        :param fuzzable_request: A fuzzable_req instance that contains
                                 (among other things) the URL to test.
        """
        self._handle_first_run()

        #
        # If it is a form, then smart_fill the parameters to send something that
        # makes sense and will allow us to cover more code.
        #
        data_container = fuzzable_request.get_raw_data()
        if isinstance(data_container, Form):

            if fuzzable_request.get_url() in self._already_filled_form:
                return

            self._already_filled_form.add(fuzzable_request.get_url())
            data_container.smart_fill()

        # Send the HTTP request
        resp = self._uri_opener.send_mutant(fuzzable_request)

        # Nothing to do here...
        if resp.get_code() == http_constants.UNAUTHORIZED:
            return

        # Nothing to do here...
        if resp.is_image():
            return

        # And we don't trust what comes from the core, check if 404
        if is_404(resp):
            return

        self._extract_html_forms(resp, fuzzable_request)
        self._extract_links_and_verify(resp, fuzzable_request)

    def _extract_html_forms(self, resp, fuzzable_req):
        """
        Parses the HTTP response body and extract HTML forms, resulting forms
        are put() on the output queue.
        """
        # Try to find forms in the document
        try:
            dp = parser_cache.dpc.get_document_parser_for(resp)
        except BaseFrameworkException:
            # Failed to find a suitable parser for the document
            return

        # Create one FuzzableRequest for each form variant
        mode = cf.cf.get('form_fuzzing_mode')
        for form_params in dp.get_forms():

            # Form exclusion #15161
            form_id_json = form_params.get_form_id().to_json()
            om.out.debug('A new form was found! Form-id is: "%s"' % form_id_json)

            if not self._should_analyze_url(form_params.get_action()):
                continue

            headers = fuzzable_req.get_headers()

            for form_params_variant in form_params.get_variants(mode):
                data_container = dc_from_form_params(form_params_variant)

                # Now data_container is one of Multipart of URLEncoded form
                # instances, which is a DataContainer. Much better than the
                # FormParameters instance we had before in form_params_variant
                r = FuzzableRequest.from_form(data_container, headers=headers)
                self.output_queue.put(r)

    def _handle_first_run(self):
        if not self._first_run:
            return

        # I have to set some variables, in order to be able to code
        # the "only_forward" feature
        self._first_run = False
        self._target_urls = [i.uri2url() for i in cf.cf.get('targets')]

        # The following line triggered lots of bugs when the "stop" button
        # was pressed and the core did this: "cf.cf.save('targets', [])"
        #
        #self._target_domain = cf.cf.get('targets')[0].get_domain()
        #
        #    Changing it to something awful but bug-free.
        targets = cf.cf.get('targets')
        if not targets:
            return

        self._target_domain = targets[0].get_domain()
                
    def _urls_to_verify_generator(self, resp, fuzzable_req):
        """
        Yields tuples containing:
            * Newly found URL
            * The FuzzableRequest instance passed as parameter
            * The HTTPResponse generated by the FuzzableRequest
            * Boolean indicating if we trust this reference or not

        :param resp: HTTP response object
        :param fuzzable_req: The HTTP request that generated the response
        """
        gen = itertools.chain(self._url_path_url_generator(resp, fuzzable_req),
                              self._body_url_generator(resp, fuzzable_req),
                              headers_url_generator(resp, fuzzable_req))
        
        for ref, fuzzable_req, original_resp, possibly_broken in gen:
            if self._should_verify_extracted_url(ref, original_resp):
                yield ref, fuzzable_req, original_resp, possibly_broken

    def _url_path_url_generator(self, resp, fuzzable_req):
        """
        Yields tuples containing:
            * Newly found URL
            * The FuzzableRequest instance passed as parameter
            * The HTTPResponse generated by the FuzzableRequest
            * Boolean indicating if we trust this reference or not

        :param resp: HTTP response object
        :param fuzzable_req: The HTTP request that generated the response
        """
        # Analyze all directories, if the URL w3af just found is:
        #
        #   http://localhost/a/b/c/f00.php
        #
        # I want to GET:
        #
        #   http://localhost/a/b/c/
        #   http://localhost/a/b/
        #   http://localhost/a/
        #   http://localhost/
        #
        # And analyze the responses...
        dirs = resp.get_url().get_directories()

        for ref in unique_justseen(dirs):
            yield ref, fuzzable_req, resp, False

    def _body_url_generator(self, resp, fuzzable_req):
        """
        Yields tuples containing:
            * Newly found URL
            * The FuzzableRequest instance passed as parameter
            * The HTTPResponse generated by the FuzzableRequest
            * Boolean indicating if we trust this reference or not

        The newly found URLs are extracted from the http response body using
        one of the framework's parsers.

        :param resp: HTTP response object
        :param fuzzable_req: The HTTP request that generated the response
        """
        #
        # Note: I WANT to follow links that are in the 404 page.
        #
        try:
            doc_parser = parser_cache.dpc.get_document_parser_for(resp)
        except BaseFrameworkException as w3:
            om.out.debug('Failed to find a suitable document parser. '
                         'Exception "%s"' % w3)
        else:
            # Note:
            #
            # - With parsed_refs I'm 100% that it's really
            #   something in the HTML that the developer intended to add.
            #
            # - The re_refs are the result of regular expressions,
            #   which in some cases are just false positives.
            parsed_refs, re_refs = doc_parser.get_references()

            dirs = resp.get_url().get_directories()
            only_re_refs = set(re_refs) - set(dirs + parsed_refs)

            all_refs = itertools.chain(parsed_refs, re_refs)
            resp_is_404 = is_404(resp)

            for ref in unique_justseen(sorted(all_refs)):
                possibly_broken = resp_is_404 or (ref in only_re_refs)
                yield ref, fuzzable_req, resp, possibly_broken

    def _should_analyze_url(self, ref):
        """
        :param ref: A URL instance to match against the user configured filters
        :return: True if we should navigate to this URL
        """
        # I don't want w3af sending requests to 3rd parties!
        if ref.get_domain() != self._target_domain:
            msg = 'web_spider will ignore %s (different domain name)'
            args = (ref.get_domain(),)
            om.out.debug(msg % args)
            return False

        # Filter the URL according to the configured regular expressions
        if not self._compiled_follow_re.match(ref.url_string):
            msg = 'web_spider will ignore %s (not match follow regex)'
            args = (ref.url_string,)
            om.out.debug(msg % args)
            return False

        if self._compiled_ignore_re.match(ref.url_string):
            msg = 'web_spider will ignore %s (match ignore regex)'
            args = (ref.url_string,)
            om.out.debug(msg % args)
            return False

        if self._has_ignored_extension(ref):
            msg = 'web_spider will ignore %s (match ignore extensions)'
            args = (ref.url_string,)
            om.out.debug(msg % args)
            return False

        # Implementing only forward
        if self._only_forward and not self._is_forward(ref):
            msg = 'web_spider will ignore %s (is not forward)'
            args = (ref.url_string,)
            om.out.debug(msg % args)
            return False

        return True

    def _has_ignored_extension(self, new_url):
        if not self._ignore_extensions:
            return False

        return new_url.get_extension().lower() in self._ignore_extensions

    def _should_verify_extracted_url(self, ref, resp):
        """
        :param ref: A newly found URL
        :param resp: The HTTP response where the URL was found

        :return: Boolean indicating if I should send this new reference to the
                 core.
        """
        # Ignore myself
        if ref == resp.get_uri():
            return False

        if not self._should_analyze_url(ref):
            return False

        #
        # I tried to have only one VariantDB in the framework instead of two,
        # but after some tests and architecture considerations it was better
        # to duplicate the data.
        #
        # In the future I'll run plugins in different processes than the core,
        # so it makes sense to have independent plugins.
        #
        # If I remove the web_spider VariantDB and just leave the one in the
        # core the framework keeps working but this method
        # (_should_verify_extracted_url) will return True much more often, which
        # leads to extra HTTP requests for URLs which we already checked and the
        # core will dismiss anyway
        #
        fuzzable_request = FuzzableRequest(ref)
        if self._variant_db.append(fuzzable_request):
            return True

        return False

    def _extract_links_and_verify(self, resp, fuzzable_req):
        """
        This is a very basic method that will send the work to different
        threads. Work is generated by the _urls_to_verify_generator

        :param resp: HTTP response object
        :param fuzzable_req: The HTTP request that generated the response
        """
        self.worker_pool.map_multi_args(
            self._verify_reference,
            self._urls_to_verify_generator(resp, fuzzable_req))

    def _verify_reference(self, reference, original_request,
                          original_response, possibly_broken,
                          be_recursive=True):
        """
        The parameters are:
            * Newly found URL
            * The FuzzableRequest instance which generated the response where
              the new URL was found
            * The HTTPResponse generated by the FuzzableRequest
            * Boolean indicating if we trust this reference or not

        This method GET's every new link and parses it in order to get
        new links and forms.
        """
        #
        # Remember that this "breaks" the cache=True in most cases!
        #     headers = { 'Referer': original_url }
        #
        # But this does not, and it is friendlier than simply ignoring the
        # referer
        #
        referer = original_response.get_url().base_url().url_string
        headers = Headers([('Referer', referer)])

        # Note: We're not grep'ing this HTTP request/response now because it
        #       has high probability of being a 404, and the grep plugins
        #       already got enough 404 responses to analyze (from is_404 for
        #       example). If it's not a 404 then we'll push it to the core
        #       and it will come back to this plugin's crawl() where it will
        #       be requested with grep=True
        resp = self._uri_opener.GET(reference, cache=True, headers=headers,
                                    grep=False)

        if not is_404(resp):
            msg = '[web_spider] Found new link "%s" at "%s"'
            args = (reference, original_response.get_url())
            om.out.debug(msg % args)

            fuzz_req = FuzzableRequest(reference, headers=headers)

            # These next steps are simple, but actually allows me to set the
            # referer and cookie for the FuzzableRequest instances I'm sending
            # to the core, which will then allow the fuzzer to create
            # CookieMutant and HeadersMutant instances.
            #
            # Without setting the Cookie, the CookieMutant would never have any
            # data to modify; remember that cookies are actually set by the
            # urllib2 cookie handler when the request already exited the
            # framework.
            cookie = Cookie.from_http_response(original_response)

            fuzz_req.set_referer(referer)
            fuzz_req.set_cookie(cookie)

            self.output_queue.put(fuzz_req)
            return

        # Note: I WANT to follow links that are in the 404 page, but
        # DO NOT return the 404 itself to the core.
        #
        # This will parse the 404 response and add the 404-links in the
        # output queue, so that the core can get them
        #
        if be_recursive:
            #
            # Only follow one level of links in 404 pages, this limits the
            # potential issue when this is found:
            #
            #   http://foo.com/abc/ => 404
            #   Body: <a href="def/">link</a>
            #
            # Which would lead to this function to perform requests to:
            #   * http://foo.com/abc/
            #   * http://foo.com/abc/def/
            #   * http://foo.com/abc/def/def/
            #   * http://foo.com/abc/def/def/def/
            #   * ...
            #

            # Do not use threads here, it will dead-lock (for unknown
            # reasons). This is tested in TestDeadLock unittest.
            for args in self._urls_to_verify_generator(resp, original_request):
                self._verify_reference(*args, be_recursive=False)

        # Store the broken links
        if not possibly_broken and resp.get_code() not in self.UNAUTH_FORBID:
            t = (resp.get_url(), original_request.get_uri())
            self._broken_links.add(t)

    def end(self):
        """
        Called when the process ends, prints out the list of broken links.
        """
        if len(self._broken_links):

            om.out.information('The following is a list of broken links that'
                               ' were found by the web_spider plugin:')
            for broken, where in unique_justseen(self._broken_links.ordered_iter()):
                om.out.information('- %s [ referenced from: %s ]' %
                                   (broken, where))
        
        self._broken_links.cleanup()

    def _is_forward(self, reference):
        """
        Check if the reference is inside the target directories.

        :return: True if reference is an URL inside the directory structure of
                 at least one of the target URLs.
        """
        for domain_path in self._target_urls:
            if reference.url_string.startswith(domain_path.url_string):
                return True

        return False

    def get_options(self):
        """
        :return: A list of option objects for this plugin.
        """
        ol = OptionList()

        d = 'Only crawl links to paths inside the URL given as target.'
        o = opt_factory('only_forward', self._only_forward, d, BOOL)
        ol.add(o)

        d = ('Only crawl links that match this regular expression.'
             ' Note that ignore_regex has precedence over follow_regex.')
        o = opt_factory('follow_regex', self._follow_regex, d, REGEX)
        ol.add(o)

        d = ('DO NOT crawl links that match this regular expression.'
             ' Note that ignore_regex has precedence over follow_regex.')
        o = opt_factory('ignore_regex', self._ignore_regex, d, REGEX)
        ol.add(o)

        d = 'DO NOT crawl links that use these extensions.'
        h = ('This configuration parameter is commonly used to ignore'
             ' static files such as zip, pdf, jpeg, etc. It is possible to'
             ' ignore these files using `ignore_regex`, but configuring'
             ' this parameter is easier and performs case insensitive'
             ' matching.')
        o = opt_factory('ignore_extensions', self._ignore_extensions, d, LIST, help=h)
        ol.add(o)

        return ol

    def set_options(self, options_list):
        """
        This method sets all the options that are configured using the user
        interface generated by the framework using the result of get_options().

        :param options_list: A dictionary with the options for the plugin.
        :return: No value is returned.
        """
        self._only_forward = options_list['only_forward'].get_value()

        self._ignore_regex = options_list['ignore_regex'].get_value()
        self._follow_regex = options_list['follow_regex'].get_value()
        self._compile_re()

        self._ignore_extensions = options_list['ignore_extensions'].get_value()
        self._ignore_extensions = [ext.lower() for ext in self._ignore_extensions]

    def _compile_re(self):
        """
        Compile the regular expressions that are going to be used to ignore
        or follow links.
        """
        if self._ignore_regex:
            # Compilation of this regex can't fail because it was already
            # verified as valid at regex_option.py: see REGEX in get_options()
            self._compiled_ignore_re = re.compile(self._ignore_regex)
        else:
            # If the self._ignore_regex is empty then I don't have to ignore
            # anything. To be able to do that, I simply compile an re with "abc"
            # as the pattern, which won't match any URL since they will all
            # start with http:// or https://
            self._compiled_ignore_re = re.compile('abc')

        # Compilation of this regex can't fail because it was already
        # verified as valid at regex_option.py: see REGEX in get_options()
        self._compiled_follow_re = re.compile(self._follow_regex)

    def get_long_desc(self):
        """
        :return: A DETAILED description of the plugin functions and features.
        """
        return """
示例#15
0
class TestVariantDB(unittest.TestCase):

    def setUp(self):
        create_temp_dir()
        self.vdb = VariantDB()

    def test_db_int(self):
        url_fmt = 'http://w3af.org/foo.htm?id=%s'

        for i in xrange(DEFAULT_MAX_VARIANTS):
            url = URL(url_fmt % i)
            self.assertTrue(self.vdb.need_more_variants(url))
            self.vdb.append(url)

        extra_url = URL(url_fmt % (DEFAULT_MAX_VARIANTS + 1,))
        self.assertFalse(self.vdb.need_more_variants(extra_url))

    def test_db_int_int(self):
        url_fmt = 'http://w3af.org/foo.htm?id=%s&bar=1'

        for i in xrange(DEFAULT_MAX_VARIANTS):
            url = URL(url_fmt % i)
            self.assertTrue(self.vdb.need_more_variants(url))
            self.vdb.append(url)

        self.assertFalse(
            self.vdb.need_more_variants(URL(url_fmt % (DEFAULT_MAX_VARIANTS + 1,))))

    def test_db_int_int_var(self):
        url_fmt = 'http://w3af.org/foo.htm?id=%s&bar=%s'

        for i in xrange(DEFAULT_MAX_VARIANTS):
            url = URL(url_fmt % (i, i))
            self.assertTrue(self.vdb.need_more_variants(url))
            self.vdb.append(url)

        self.assertFalse(
            self.vdb.need_more_variants(URL(url_fmt % (DEFAULT_MAX_VARIANTS + 1, DEFAULT_MAX_VARIANTS + 1))))

    def test_db_int_str(self):
        url_fmt = 'http://w3af.org/foo.htm?id=%s&bar=%s'

        for i in xrange(DEFAULT_MAX_VARIANTS):
            url = URL(url_fmt % (i, 'abc' * i))
            self.assertTrue(self.vdb.need_more_variants(url))
            self.vdb.append(url)

        self.assertFalse(self.vdb.need_more_variants(
            URL(url_fmt % (DEFAULT_MAX_VARIANTS + 1, 'abc' * (DEFAULT_MAX_VARIANTS + 1)))))

    def test_db_int_str_then_int_int(self):
        url_fmt = 'http://w3af.org/foo.htm?id=%s&bar=%s'

        # Add (int, str)
        for i in xrange(DEFAULT_MAX_VARIANTS):
            url = URL(url_fmt % (i, 'abc' * i))
            self.assertTrue(self.vdb.need_more_variants(url))
            self.vdb.append(url)

        # Please note that in this case I'm asking for (int, int) and I added
        # (int, str) before
        self.assertTrue(
            self.vdb.need_more_variants(URL(url_fmt % (DEFAULT_MAX_VARIANTS + 1, DEFAULT_MAX_VARIANTS + 1))))

        # Add (int, int)
        for i in xrange(DEFAULT_MAX_VARIANTS):
            url = URL(url_fmt % (i, i))
            self.assertTrue(self.vdb.need_more_variants(url))
            self.vdb.append(url)

        self.assertFalse(
            self.vdb.need_more_variants(URL(url_fmt % (DEFAULT_MAX_VARIANTS + 1, DEFAULT_MAX_VARIANTS + 1))))

    def test_clean_reference_simple(self):
        self.assertEqual(self.vdb._clean_reference(URL('http://w3af.org/')),
                         u'http://w3af.org/')

    def test_clean_reference_file(self):
        self.assertEqual(
            self.vdb._clean_reference(URL('http://w3af.org/index.php')),
            u'http://w3af.org/index.php')

    def test_clean_reference_directory_file(self):
        self.assertEqual(
            self.vdb._clean_reference(URL('http://w3af.org/foo/index.php')),
                                         u'http://w3af.org/foo/index.php')

    def test_clean_reference_directory_file_int(self):
        self.assertEqual(
            self.vdb._clean_reference(URL('http://w3af.org/foo/index.php?id=2')),
                                      u'http://w3af.org/foo/index.php?id=number')

    def test_clean_reference_int(self):
        self.assertEqual(
            self.vdb._clean_reference(URL('http://w3af.org/index.php?id=2')),
            u'http://w3af.org/index.php?id=number')

    def test_clean_reference_int_str(self):
        self.assertEqual(
            self.vdb._clean_reference(
                URL('http://w3af.org/index.php?id=2&foo=bar')),
            u'http://w3af.org/index.php?id=number&foo=string')

    def test_clean_reference_int_str_empty(self):
        self.assertEqual(
            self.vdb._clean_reference(
                URL('http://w3af.org/index.php?id=2&foo=bar&spam=')),
            u'http://w3af.org/index.php?id=number&foo=string&spam=string')
示例#16
0
class TestVariantDB(unittest.TestCase):

    def setUp(self):
        MiscSettings().set_default_values()
        create_temp_dir()
        self.vdb = VariantDB()

    def test_db_int(self):
        url_fmt = 'http://w3af.org/foo.htm?id=%s'

        for i in xrange(PARAMS_MAX_VARIANTS):
            url = URL(url_fmt % i)
            self.assertTrue(self.vdb.append(fr(url)))

        extra_url = URL(url_fmt % (PARAMS_MAX_VARIANTS + 1,))
        self.assertFalse(self.vdb.append(fr(extra_url)))

    def test_db_int_int(self):
        url_fmt = 'http://w3af.org/foo.htm?id=%s&bar=1'

        for i in xrange(PARAMS_MAX_VARIANTS):
            url = URL(url_fmt % i)
            self.assertTrue(self.vdb.append(fr(url)))

        extra_url = URL(url_fmt % (PARAMS_MAX_VARIANTS + 1,))
        self.assertFalse(self.vdb.append(fr(extra_url)))

    def test_db_int_int_var(self):
        url_fmt = 'http://w3af.org/foo.htm?id=%s&bar=%s'

        for i in xrange(PARAMS_MAX_VARIANTS):
            url = URL(url_fmt % (i, i))
            self.assertTrue(self.vdb.append(fr(url)))

        url = URL(url_fmt % (PARAMS_MAX_VARIANTS + 1, PARAMS_MAX_VARIANTS + 1))
        self.assertFalse(self.vdb.append(fr(url)))

    def test_db_int_str(self):
        url_fmt = 'http://w3af.org/foo.htm?id=%s&bar=%s'

        for i in xrange(PARAMS_MAX_VARIANTS):
            url = URL(url_fmt % (i, 'abc' * i))
            self.assertTrue(self.vdb.append(fr(url)))

        url = URL(url_fmt % (PARAMS_MAX_VARIANTS + 1,
                             'abc' * (PARAMS_MAX_VARIANTS + 1)))
        self.assertFalse(self.vdb.append(fr(url)))

    def test_db_int_str_then_int_int(self):
        url_fmt = 'http://w3af.org/foo.htm?id=%s&bar=%s'

        # Add (int, str)
        for i in xrange(PARAMS_MAX_VARIANTS):
            url = URL(url_fmt % (i, 'abc' * i))
            self.assertTrue(self.vdb.append(fr(url)))

        # Add (int, int)
        for i in xrange(PARAMS_MAX_VARIANTS):
            url = URL(url_fmt % (i, i))
            self.assertTrue(self.vdb.append(fr(url)))

        url = URL(url_fmt % (PARAMS_MAX_VARIANTS + 1, PARAMS_MAX_VARIANTS + 1))
        self.assertFalse(self.vdb.append(fr(url)))

        url = URL(url_fmt % (PARAMS_MAX_VARIANTS + 1, 'spameggs'))
        self.assertFalse(self.vdb.append(fr(url)))

    def test_clean_fuzzable_request_simple(self):
        u = 'http://w3af.org/'
        s = clean_fuzzable_request(fr(URL(u)))
        e = u'(GET)-http://w3af.org/'
        self.assertEqual(s, e)

    def test_clean_fuzzable_request_file(self):
        u = 'http://w3af.org/index.php'
        s = clean_fuzzable_request(fr(URL(u)))
        e = u'(GET)-http://w3af.org/%s.php' % FILENAME_TOKEN
        self.assertEqual(s, e)

    def test_clean_fuzzable_request_directory_file(self):
        u = 'http://w3af.org/foo/index.php'
        s = clean_fuzzable_request(fr(URL(u)))
        e = u'(GET)-http://w3af.org/foo/%s.php' % FILENAME_TOKEN
        self.assertEqual(s, e)

    def test_clean_fuzzable_request_directory_file_int(self):
        u = 'http://w3af.org/foo/index.php?id=2'
        s = clean_fuzzable_request(fr(URL(u)))
        e = u'(GET)-http://w3af.org/foo/index.php?id=number'
        self.assertEqual(s, e)

    def test_clean_fuzzable_request_int(self):
        u = 'http://w3af.org/index.php?id=2'
        s = clean_fuzzable_request(fr(URL(u)))
        e = u'(GET)-http://w3af.org/index.php?id=number'
        self.assertEqual(s, e)

    def test_clean_fuzzable_request_int_str(self):
        u = 'http://w3af.org/index.php?id=2&foo=bar'
        s = clean_fuzzable_request(fr(URL(u)))
        e = u'(GET)-http://w3af.org/index.php?id=number&foo=string'
        self.assertEqual(s, e)

    def test_clean_fuzzable_request_int_str_empty(self):
        u = 'http://w3af.org/index.php?id=2&foo=bar&spam='
        s = clean_fuzzable_request(fr(URL(u)))
        e = u'(GET)-http://w3af.org/index.php?id=number&foo=string&spam=string'
        self.assertEqual(s, e)

    def test_clean_fuzzable_request_directory_file_no_params(self):
        u = 'http://w3af.org/foo/index.php'
        s = clean_fuzzable_request(fr(URL(u)))
        e = u'(GET)-http://w3af.org/foo/%s.php' % FILENAME_TOKEN
        self.assertEqual(s, e)

    def test_clean_fuzzable_request_directory(self):
        u = 'http://w3af.org/foo/'
        s = clean_fuzzable_request(fr(URL(u)))
        e = u'(GET)-http://w3af.org/%s/' % PATH_TOKEN
        self.assertEqual(s, e)

    def test_clean_fuzzable_request_directory_parent_path(self):
        u = 'http://w3af.org/spam/foo/'
        s = clean_fuzzable_request(fr(URL(u)))
        e = u'(GET)-http://w3af.org/spam/%s/' % PATH_TOKEN
        self.assertEqual(s, e)

    def test_clean_form_fuzzable_request(self):
        fr = FuzzableRequest(URL("http://www.w3af.com/"),
                             headers=Headers([('Host', 'www.w3af.com')]),
                             method='POST',
                             post_data=KeyValueContainer(init_val=[('data', ['23'])]))

        expected = u'(POST)-http://www.w3af.com/!data=number'
        self.assertEqual(clean_fuzzable_request(fr), expected)

    def test_clean_form_fuzzable_request_form(self):
        form_params = FormParameters()
        form_params.add_field_by_attr_items([("name", "username"), ("value", "abc")])
        form_params.add_field_by_attr_items([("name", "address"), ("value", "")])
        form_params.set_action(URL('http://example.com/?id=1'))
        form_params.set_method('post')

        form = dc_from_form_params(form_params)

        fr = FuzzableRequest.from_form(form)

        expected = u'(POST)-http://example.com/' \
                   u'?id=number!username=string&address=string'
        self.assertEqual(clean_fuzzable_request(fr), expected)

    def test_db_many_files_in_root(self):
        url_fmt = 'http://w3af.org/foo%s.htm'

        for i in xrange(PATH_MAX_VARIANTS):
            url = URL(url_fmt % i)
            self.assertTrue(self.vdb.append(fr(url)))

        extra_url = URL(url_fmt % (PATH_MAX_VARIANTS + 1,))
        self.assertFalse(self.vdb.append(fr(extra_url)))

    def test_db_many_files_in_root_without_extension(self):
        url_fmt = 'http://w3af.org/foo%s'

        for i in xrange(PATH_MAX_VARIANTS):
            url = URL(url_fmt % i)
            self.assertTrue(self.vdb.append(fr(url)))

        extra_url = URL(url_fmt % (PATH_MAX_VARIANTS + 1,))
        self.assertFalse(self.vdb.append(fr(extra_url)))

    def test_db_many_files_different_extensions_in_root(self):
        url_fmt = 'http://w3af.org/foo%s.htm'

        for i in xrange(PATH_MAX_VARIANTS):
            url = URL(url_fmt % i)
            self.assertTrue(self.vdb.append(fr(url)))

        extra_url = URL(url_fmt % (PATH_MAX_VARIANTS + 1,))
        self.assertFalse(self.vdb.append(fr(extra_url)))

        #
        #   Now a different extension
        #
        url_fmt = 'http://w3af.org/foo%s.jpeg'

        for i in xrange(PATH_MAX_VARIANTS):
            url = URL(url_fmt % i)
            self.assertTrue(self.vdb.append(fr(url)))

        extra_url = URL(url_fmt % (PATH_MAX_VARIANTS + 1,))
        self.assertFalse(self.vdb.append(fr(extra_url)))

    def test_db_many_paths_in_root(self):
        url_fmt = 'http://w3af.org/foo%s/'

        for i in xrange(PATH_MAX_VARIANTS):
            url = URL(url_fmt % i)
            self.assertTrue(self.vdb.append(fr(url)))

        extra_url = URL(url_fmt % (PATH_MAX_VARIANTS + 1,))
        self.assertFalse(self.vdb.append(fr(extra_url)))

    def test_db_many_paths_in_other_directories(self):
        url_fmt = 'http://w3af.org/foo/bar%s/'

        for i in xrange(PATH_MAX_VARIANTS):
            url = URL(url_fmt % i)
            self.assertTrue(self.vdb.append(fr(url)))

        extra_url = URL(url_fmt % (PATH_MAX_VARIANTS + 1,))
        self.assertFalse(self.vdb.append(fr(extra_url)))

        #
        #   Now a different parent directory
        #
        url_fmt = 'http://w3af.org/spam/bar%s/'

        for i in xrange(PATH_MAX_VARIANTS):
            url = URL(url_fmt % i)
            self.assertTrue(self.vdb.append(fr(url)))

        extra_url = URL(url_fmt % (PATH_MAX_VARIANTS + 1,))
        self.assertFalse(self.vdb.append(fr(extra_url)))

    def test_db_many_files_other_directories(self):
        url_fmt = 'http://w3af.org/spam/foo%s.htm'

        for i in xrange(PATH_MAX_VARIANTS):
            url = URL(url_fmt % i)
            self.assertTrue(self.vdb.append(fr(url)))

        extra_url = URL(url_fmt % (PATH_MAX_VARIANTS + 1,))
        self.assertFalse(self.vdb.append(fr(extra_url)))

        #
        #   Now a different parent path and the same extension
        #
        url_fmt = 'http://w3af.org/eggs/foo%s.htm'

        for i in xrange(PATH_MAX_VARIANTS):
            url = URL(url_fmt % i)
            self.assertTrue(self.vdb.append(fr(url)))

        extra_url = URL(url_fmt % (PATH_MAX_VARIANTS + 1,))
        self.assertFalse(self.vdb.append(fr(extra_url)))

    def test_db_many_files_different_path_length_directories(self):
        url_fmt = 'http://w3af.org/spam/foo%s.htm'

        for i in xrange(PATH_MAX_VARIANTS):
            url = URL(url_fmt % i)
            self.assertTrue(self.vdb.append(fr(url)))

        extra_url = URL(url_fmt % (PATH_MAX_VARIANTS + 1,))
        self.assertFalse(self.vdb.append(fr(extra_url)))

        #
        #   Now a different parent path and the same extension
        #
        #   Note the /bar/ here! This is what makes this test different
        url_fmt = 'http://w3af.org/eggs/bar/foo%s.htm'

        for i in xrange(PATH_MAX_VARIANTS):
            url = URL(url_fmt % i)
            self.assertTrue(self.vdb.append(fr(url)))

        extra_url = URL(url_fmt % (PATH_MAX_VARIANTS + 1,))
        self.assertFalse(self.vdb.append(fr(extra_url)))

    def test_db_same_without_qs(self):
        url = URL('http://w3af.org/spam/foo.htm')

        self.assertTrue(self.vdb.append(fr(url)))
        self.assertFalse(self.vdb.append(fr(url)))

    def test_db_same_with_qs(self):
        url = URL('http://w3af.org/spam/foo.htm?id=2&abc=333')

        self.assertTrue(self.vdb.append(fr(url)))
        self.assertFalse(self.vdb.append(fr(url)))

    def test_encoding_issues_se(self):
        u = u'http://w3af.org/vård.png'
        s = clean_fuzzable_request(fr(URL(u)))
        e = u'(GET)-http://w3af.org/file-5692fef3f5dcd97.png'
        self.assertEqual(s, e)

    def test_encoding_issues_se_with_qs(self):
        u = u'http://w3af.org/vård.png?id=1'
        s = clean_fuzzable_request(fr(URL(u)))
        e = '(GET)-http://w3af.org/vård.png?id=number'
        self.assertEqual(s, e)

    def test_encoding_issues_se_filename(self):
        u = u'http://w3af.org/x.vård'
        s = clean_fuzzable_request(fr(URL(u)))
        e = '(GET)-http://w3af.org/file-5692fef3f5dcd97.vård'
        self.assertEqual(s, e)

    def test_encoding_issues_se_path(self):
        u = u'http://w3af.org/vård/xyz.html'
        s = clean_fuzzable_request(fr(URL(u)))
        e = '(GET)-http://w3af.org/vård/file-5692fef3f5dcd97.html'
        self.assertEqual(s, e)

    def test_same_form_different_url(self):

        def create_fuzzable_request(_id):
            url_fmt = 'http://example.com/product/%s'

            form_params = FormParameters()
            form_params.add_field_by_attr_items([("name", "username"), ("value", "abc")])
            form_params.add_field_by_attr_items([("name", "address"), ("value", "")])
            form_params.set_action(URL(url_fmt % _id))
            form_params.set_method('post')

            form = dc_from_form_params(form_params)

            return FuzzableRequest.from_form(form)

        # These two make sure we're returning false in the last call to
        # append because of MAX_EQUAL_FORM_VARIANTS and not any other limits
        self.assertGreater(PARAMS_MAX_VARIANTS, MAX_EQUAL_FORM_VARIANTS)
        self.assertGreater(PATH_MAX_VARIANTS, MAX_EQUAL_FORM_VARIANTS)

        for i in xrange(MAX_EQUAL_FORM_VARIANTS):
            fri = create_fuzzable_request(i)
            self.assertTrue(self.vdb.append(fri))

        fri = create_fuzzable_request(i + 1)
        self.assertFalse(self.vdb.append(fri))

    def test_same_form_completely_different_url(self):

        def create_fuzzable_request(_id):
            path_count = _id * 5
            paths = [rand_alnum(9) for _ in xrange(path_count)]
            url = 'http://example.com/%s' % '/'.join(paths)

            form_params = FormParameters()
            form_params.add_field_by_attr_items([("name", "username"), ("value", "abc")])
            form_params.add_field_by_attr_items([("name", "address"), ("value", "")])
            form_params.set_action(URL(url))
            form_params.set_method('post')

            form = dc_from_form_params(form_params)

            return FuzzableRequest.from_form(form)

        for i in xrange(MAX_EQUAL_FORM_VARIANTS):
            fri = create_fuzzable_request(i)
            self.assertTrue(self.vdb.append(fri))

        fri = create_fuzzable_request(i + 1)
        self.assertFalse(self.vdb.append(fri))

    def test_different_form_different_url(self):

        def create_fuzzable_request(_id):
            url_fmt = 'http://example.com/product/%s'

            form_params = FormParameters()
            form_params.add_field_by_attr_items([("name", "username%s" % _id), ("value", "abc")])
            form_params.add_field_by_attr_items([("name", "address"), ("value", "")])
            form_params.set_action(URL(url_fmt % _id))
            form_params.set_method('post')

            form = dc_from_form_params(form_params)

            return FuzzableRequest.from_form(form)

        for i in xrange(MAX_EQUAL_FORM_VARIANTS * 2):
            fri = create_fuzzable_request(i)
            self.assertTrue(self.vdb.append(fri))

    def test_different_form_same_url(self):

        def create_fuzzable_request(_id):
            url = 'http://example.com/product/1'

            form_params = FormParameters()
            form_params.add_field_by_attr_items([("name", "username%s" % _id), ("value", "abc")])
            form_params.add_field_by_attr_items([("name", "address"), ("value", "")])
            form_params.set_action(URL(url))
            form_params.set_method('post')

            form = dc_from_form_params(form_params)

            return FuzzableRequest.from_form(form)

        for i in xrange(MAX_EQUAL_FORM_VARIANTS * 2):
            fri = create_fuzzable_request(i)
            self.assertTrue(self.vdb.append(fri))

    def test_forms_with_one_parameter_always_more_variants(self):

        def create_fuzzable_request(_id):
            url_fmt = 'http://example.com/product/%s'

            form_params = FormParameters()
            form_params.add_field_by_attr_items([("name", "username"), ("value", "abc")])
            form_params.set_action(URL(url_fmt % _id))
            form_params.set_method('post')

            form = dc_from_form_params(form_params)

            return FuzzableRequest.from_form(form)

        for i in xrange(MAX_EQUAL_FORM_VARIANTS * 2):
            fri = create_fuzzable_request(i)
            self.assertTrue(self.vdb.append(fri))
示例#17
0
 def setUp(self):
     create_temp_dir()
     self.vdb = VariantDB()
示例#18
0
class TestVariantDB(unittest.TestCase):

    def setUp(self):
        create_temp_dir()
        self.vdb = VariantDB()

    def test_db_int(self):
        url_fmt = 'http://w3af.org/foo.htm?id=%s'

        for i in xrange(PARAMS_MAX_VARIANTS):
            url = URL(url_fmt % i)
            self.assertTrue(self.vdb.append(fr(url)))

        extra_url = URL(url_fmt % (PARAMS_MAX_VARIANTS + 1,))
        self.assertFalse(self.vdb.append(fr(extra_url)))

    def test_db_int_int(self):
        url_fmt = 'http://w3af.org/foo.htm?id=%s&bar=1'

        for i in xrange(PARAMS_MAX_VARIANTS):
            url = URL(url_fmt % i)
            self.assertTrue(self.vdb.append(fr(url)))

        extra_url = URL(url_fmt % (PARAMS_MAX_VARIANTS + 1,))
        self.assertFalse(self.vdb.append(fr(extra_url)))

    def test_db_int_int_var(self):
        url_fmt = 'http://w3af.org/foo.htm?id=%s&bar=%s'

        for i in xrange(PARAMS_MAX_VARIANTS):
            url = URL(url_fmt % (i, i))
            self.assertTrue(self.vdb.append(fr(url)))

        url = URL(url_fmt % (PARAMS_MAX_VARIANTS + 1, PARAMS_MAX_VARIANTS + 1))
        self.assertFalse(self.vdb.append(fr(url)))

    def test_db_int_str(self):
        url_fmt = 'http://w3af.org/foo.htm?id=%s&bar=%s'

        for i in xrange(PARAMS_MAX_VARIANTS):
            url = URL(url_fmt % (i, 'abc' * i))
            self.assertTrue(self.vdb.append(fr(url)))

        url = URL(url_fmt % (PARAMS_MAX_VARIANTS + 1,
                             'abc' * (PARAMS_MAX_VARIANTS + 1)))
        self.assertFalse(self.vdb.append(fr(url)))

    def test_db_int_str_then_int_int(self):
        url_fmt = 'http://w3af.org/foo.htm?id=%s&bar=%s'

        # Add (int, str)
        for i in xrange(PARAMS_MAX_VARIANTS):
            url = URL(url_fmt % (i, 'abc' * i))
            self.assertTrue(self.vdb.append(fr(url)))

        # Add (int, int)
        for i in xrange(PARAMS_MAX_VARIANTS):
            url = URL(url_fmt % (i, i))
            self.assertTrue(self.vdb.append(fr(url)))

        url = URL(url_fmt % (PARAMS_MAX_VARIANTS + 1, PARAMS_MAX_VARIANTS + 1))
        self.assertFalse(self.vdb.append(fr(url)))

        url = URL(url_fmt % (PARAMS_MAX_VARIANTS + 1, 'spameggs'))
        self.assertFalse(self.vdb.append(fr(url)))

    def test_clean_fuzzable_request_simple(self):
        u = 'http://w3af.org/'
        s = clean_fuzzable_request(fr(URL(u)))
        e = u'(GET)-http://w3af.org/'
        self.assertEqual(s, e)

    def test_clean_fuzzable_request_file(self):
        u = 'http://w3af.org/index.php'
        s = clean_fuzzable_request(fr(URL(u)))
        e = u'(GET)-http://w3af.org/%s.php' % FILENAME_TOKEN
        self.assertEqual(s, e)

    def test_clean_fuzzable_request_directory_file(self):
        u = 'http://w3af.org/foo/index.php'
        s = clean_fuzzable_request(fr(URL(u)))
        e = u'(GET)-http://w3af.org/foo/%s.php' % FILENAME_TOKEN
        self.assertEqual(s, e)

    def test_clean_fuzzable_request_directory_file_int(self):
        u = 'http://w3af.org/foo/index.php?id=2'
        s = clean_fuzzable_request(fr(URL(u)))
        e = u'(GET)-http://w3af.org/foo/index.php?id=number'
        self.assertEqual(s, e)

    def test_clean_fuzzable_request_int(self):
        u = 'http://w3af.org/index.php?id=2'
        s = clean_fuzzable_request(fr(URL(u)))
        e = u'(GET)-http://w3af.org/index.php?id=number'
        self.assertEqual(s, e)

    def test_clean_fuzzable_request_int_str(self):
        u = 'http://w3af.org/index.php?id=2&foo=bar'
        s = clean_fuzzable_request(fr(URL(u)))
        e = u'(GET)-http://w3af.org/index.php?id=number&foo=string'
        self.assertEqual(s, e)

    def test_clean_fuzzable_request_int_str_empty(self):
        u = 'http://w3af.org/index.php?id=2&foo=bar&spam='
        s = clean_fuzzable_request(fr(URL(u)))
        e = u'(GET)-http://w3af.org/index.php?id=number&foo=string&spam=string'
        self.assertEqual(s, e)

    def test_clean_fuzzable_request_directory_file_no_params(self):
        u = 'http://w3af.org/foo/index.php'
        s = clean_fuzzable_request(fr(URL(u)))
        e = u'(GET)-http://w3af.org/foo/%s.php' % FILENAME_TOKEN
        self.assertEqual(s, e)

    def test_clean_fuzzable_request_directory(self):
        u = 'http://w3af.org/foo/'
        s = clean_fuzzable_request(fr(URL(u)))
        e = u'(GET)-http://w3af.org/%s/' % PATH_TOKEN
        self.assertEqual(s, e)

    def test_clean_fuzzable_request_directory_parent_path(self):
        u = 'http://w3af.org/spam/foo/'
        s = clean_fuzzable_request(fr(URL(u)))
        e = u'(GET)-http://w3af.org/spam/%s/' % PATH_TOKEN
        self.assertEqual(s, e)

    def test_clean_form_fuzzable_request(self):
        fr = FuzzableRequest(URL("http://www.w3af.com/"),
                             headers=Headers([('Host', 'www.w3af.com')]),
                             method='POST',
                             post_data=KeyValueContainer(init_val=[('data', ['23'])]))

        expected = u'(POST)-http://www.w3af.com/!data=number'
        self.assertEqual(clean_fuzzable_request(fr), expected)

    def test_clean_form_fuzzable_request_form(self):
        form_params = FormParameters()
        form_params.add_field_by_attr_items([("name", "username"), ("value", "abc")])
        form_params.add_field_by_attr_items([("name", "address"), ("value", "")])
        form_params.set_action(URL('http://example.com/?id=1'))
        form_params.set_method('post')

        form = dc_from_form_params(form_params)

        fr = FuzzableRequest.from_form(form)

        expected = u'(POST)-http://example.com/' \
                   u'?id=number!username=string&address=string'
        self.assertEqual(clean_fuzzable_request(fr), expected)

    def test_db_many_files_in_root(self):
        url_fmt = 'http://w3af.org/foo%s.htm'

        for i in xrange(PATH_MAX_VARIANTS):
            url = URL(url_fmt % i)
            self.assertTrue(self.vdb.append(fr(url)))

        extra_url = URL(url_fmt % (PATH_MAX_VARIANTS + 1,))
        self.assertFalse(self.vdb.append(fr(extra_url)))

    def test_db_many_files_in_root_without_extension(self):
        url_fmt = 'http://w3af.org/foo%s'

        for i in xrange(PATH_MAX_VARIANTS):
            url = URL(url_fmt % i)
            self.assertTrue(self.vdb.append(fr(url)))

        extra_url = URL(url_fmt % (PATH_MAX_VARIANTS + 1,))
        self.assertFalse(self.vdb.append(fr(extra_url)))

    def test_db_many_files_different_extensions_in_root(self):
        url_fmt = 'http://w3af.org/foo%s.htm'

        for i in xrange(PATH_MAX_VARIANTS):
            url = URL(url_fmt % i)
            self.assertTrue(self.vdb.append(fr(url)))

        extra_url = URL(url_fmt % (PATH_MAX_VARIANTS + 1,))
        self.assertFalse(self.vdb.append(fr(extra_url)))

        #
        #   Now a different extension
        #
        url_fmt = 'http://w3af.org/foo%s.jpeg'

        for i in xrange(PATH_MAX_VARIANTS):
            url = URL(url_fmt % i)
            self.assertTrue(self.vdb.append(fr(url)))

        extra_url = URL(url_fmt % (PATH_MAX_VARIANTS + 1,))
        self.assertFalse(self.vdb.append(fr(extra_url)))

    def test_db_many_paths_in_root(self):
        url_fmt = 'http://w3af.org/foo%s/'

        for i in xrange(PATH_MAX_VARIANTS):
            url = URL(url_fmt % i)
            self.assertTrue(self.vdb.append(fr(url)))

        extra_url = URL(url_fmt % (PATH_MAX_VARIANTS + 1,))
        self.assertFalse(self.vdb.append(fr(extra_url)))

    def test_db_many_paths_in_other_directories(self):
        url_fmt = 'http://w3af.org/foo/bar%s/'

        for i in xrange(PATH_MAX_VARIANTS):
            url = URL(url_fmt % i)
            self.assertTrue(self.vdb.append(fr(url)))

        extra_url = URL(url_fmt % (PATH_MAX_VARIANTS + 1,))
        self.assertFalse(self.vdb.append(fr(extra_url)))

        #
        #   Now a different parent directory
        #
        url_fmt = 'http://w3af.org/spam/bar%s/'

        for i in xrange(PATH_MAX_VARIANTS):
            url = URL(url_fmt % i)
            self.assertTrue(self.vdb.append(fr(url)))

        extra_url = URL(url_fmt % (PATH_MAX_VARIANTS + 1,))
        self.assertFalse(self.vdb.append(fr(extra_url)))

    def test_db_many_files_other_directories(self):
        url_fmt = 'http://w3af.org/spam/foo%s.htm'

        for i in xrange(PATH_MAX_VARIANTS):
            url = URL(url_fmt % i)
            self.assertTrue(self.vdb.append(fr(url)))

        extra_url = URL(url_fmt % (PATH_MAX_VARIANTS + 1,))
        self.assertFalse(self.vdb.append(fr(extra_url)))

        #
        #   Now a different parent path and the same extension
        #
        url_fmt = 'http://w3af.org/eggs/foo%s.htm'

        for i in xrange(PATH_MAX_VARIANTS):
            url = URL(url_fmt % i)
            self.assertTrue(self.vdb.append(fr(url)))

        extra_url = URL(url_fmt % (PATH_MAX_VARIANTS + 1,))
        self.assertFalse(self.vdb.append(fr(extra_url)))

    def test_db_many_files_different_path_length_directories(self):
        url_fmt = 'http://w3af.org/spam/foo%s.htm'

        for i in xrange(PATH_MAX_VARIANTS):
            url = URL(url_fmt % i)
            self.assertTrue(self.vdb.append(fr(url)))

        extra_url = URL(url_fmt % (PATH_MAX_VARIANTS + 1,))
        self.assertFalse(self.vdb.append(fr(extra_url)))

        #
        #   Now a different parent path and the same extension
        #
        #   Note the /bar/ here! This is what makes this test different
        url_fmt = 'http://w3af.org/eggs/bar/foo%s.htm'

        for i in xrange(PATH_MAX_VARIANTS):
            url = URL(url_fmt % i)
            self.assertTrue(self.vdb.append(fr(url)))

        extra_url = URL(url_fmt % (PATH_MAX_VARIANTS + 1,))
        self.assertFalse(self.vdb.append(fr(extra_url)))

    def test_db_same_without_qs(self):
        url = URL('http://w3af.org/spam/foo.htm')

        self.assertTrue(self.vdb.append(fr(url)))
        self.assertFalse(self.vdb.append(fr(url)))

    def test_db_same_with_qs(self):
        url = URL('http://w3af.org/spam/foo.htm?id=2&abc=333')

        self.assertTrue(self.vdb.append(fr(url)))
        self.assertFalse(self.vdb.append(fr(url)))