def extract_all_stats(): """ :return: A list with all the stats generated during this scan """ stats_url = get_wivet_http('/offscanpages/statistics.php') response = urllib2.urlopen(stats_url) index_page = response.read() result = [] SCAN_ID_RE = '<a href="statistics\.php\?id=(.*?)">' SCAN_STATS = get_wivet_http('/offscanpages/statistics.php?id=') for scan_id in re.findall(SCAN_ID_RE, index_page): scan_stat_url = SCAN_STATS + scan_id response = urllib2.urlopen(scan_stat_url) result.append((scan_id, response.read())) return result
def get_coverage_for_scan_id(scan_id): specific_stats_url = get_wivet_http('/offscanpages/statistics.php?id=%s') response = urllib2.urlopen(specific_stats_url % scan_id) html = response.read() match_obj = re.search('<span id="coverage">%(.*?)</span>', html) if match_obj is not None: return int(match_obj.group(1)) return None
def clear_wivet(): """ Utility function that will clear all the previous stats from my wivet instance, very helpful for performing analysis of the stats after the scan ends. """ clear_url = get_wivet_http('/offscanpages/remove-all-stats.php?sure=yes') response = urllib2.urlopen(clear_url) html = response.read() assert 'Done!' in html, html
class TestWebSpider(PluginTest): follow_links_url = get_moth_http('/crawl/web_spider/test_case_01/') dir_get_url = 'http://moth/w3af/crawl/web_spider/a/b/c/d/' encoding_url = get_moth_http('/core/encoding') relative_url = 'http://moth/w3af/crawl/web_spider/relativeRegex.html' wivet = get_wivet_http() _run_configs = { 'basic': { 'target': None, 'plugins': { 'crawl': (PluginConfig( 'web_spider', ('only_forward', True, PluginConfig.BOOL), ('ignore_regex', '.*logout.php*', PluginConfig.STR)), ) } }, } def generic_scan(self, config, base_directory, start_url, expected_files): self._scan(start_url, config['plugins']) # Add the webroot to the list of expected files expected_files.append('') expected_urls = set( URL(base_directory).url_join(end).url_string for end in expected_files) # pylint: disable=E1101 # Pylint fails to detect the object types that come out of the KB urls = self.kb.get_all_known_urls() found_urls = set(str(u).decode('utf-8') for u in urls) self.assertEquals(found_urls, expected_urls) @attr('smoke') def test_spider_found_urls(self): config = self._run_configs['basic'] expected_files = [ '1.html', '2.html', '3.html', '4.html', 'd%20f/index.html', 'a%20b.html', 'd%20f/', ] start_url = self.follow_links_url self.generic_scan(config, self.follow_links_url, start_url, expected_files) def test_utf8_urls(self): config = self._run_configs['basic'] expected_files = [u'vúlnerable.py', u'é.py', u'改.py', u'проверка.py'] start_url = self.encoding_url + '_utf8/' self.generic_scan(config, start_url, start_url, expected_files) def test_euc_jp_urls(self): config = self._run_configs['basic'] expected_files = [u'raw-qs-jp.py', u'qs-jp.py'] start_url = self.encoding_url + '_euc-jp/' self.generic_scan(config, start_url, start_url, expected_files) def test_spider_relative_urls_found_with_regex(self): raise SkipTest('FIXME: Need to test this feature!') def test_spider_traverse_directories(self): raise SkipTest('FIXME: Need to test this feature!') def test_wivet(self): clear_wivet() cfg = self._run_configs['basic'] self._scan(self.wivet, cfg['plugins']) # # First, check that w3af identified all the URLs we want: # ALL_WIVET_URLS = { '10_17d77.php', '11_1f2e4.php', '1_12c3b.php', '11_2d3ff.php', '12_2a2cf.php', '12_3a2cf.php', '1_25e2a.php', '13_10ad3.php', '13_25af3.php', '14_1eeab.php', '15_1c95a.php', '16_1b14f.php', '16_2f41a.php', '17_143ef.php', '17_2da76.php', '18_1a2f3.php', '19_1f52a.php', '19_2e3a2.php', '20_1e833.php', '21_1f822.php', '2_1f84b.php', '2_2b7a3.php', '3_16e1a.php', '3_2cc42.php', '3_3fadc.php', '3_45589.php', '3_5befd.php', '3_6ff22.php', '3_7e215.php', '4_1c3f8.php', '5_1e4d2.php', '6_14b3c.php', '7_16a9c.php', '8_1b6e1.php', '8_2b6f1.php', '9_10ee31.php', '9_11ee31.php', '9_12ee31.php', '9_13ee31.php', '9_14ee31.php', '9_15ee31.php', '9_16ee31.php', '9_17ee31.php', '9_18ee31.php', '9_19ee31.php', '9_1a1b2.php', '9_20ee31.php', '9_21ee31.php', '9_22ee31.php', '9_23ee31.php', '9_24ee31.php', '9_25ee31.php', '9_26dd2e.php', '9_2ff21.php', '9_3a2b7.php', '9_4b82d.php', '9_5ee31.php', '9_6ee31.php', '9_7ee31.php', '9_8ee31.php', '9_9ee31.php', '12_1a2cf.php' } # # FIXME: At some point this should be reduced to an empty set() # W3AF_FAILS = { '9_16ee31.php', '9_9ee31.php', '9_18ee31.php', '9_11ee31.php', '9_20ee31.php', '9_25ee31.php', '9_15ee31.php', '9_8ee31.php', '9_17ee31.php', '9_13ee31.php', '9_19ee31.php', '9_14ee31.php', '19_2e3a2.php', '17_143ef.php', '9_23ee31.php', '9_12ee31.php', '9_5ee31.php', '9_6ee31.php', '9_22ee31.php', '11_2d3ff.php', '17_2da76.php', '18_1a2f3.php', '9_24ee31.php', '9_7ee31.php', '9_10ee31.php', '9_21ee31.php', # These were added to the fails group after #2104 '15_1c95a.php', '6_14b3c.php', '8_1b6e1.php', '14_1eeab.php', '8_2b6f1.php' } EXPECTED_URLS = ALL_WIVET_URLS - W3AF_FAILS inner_pages = 'innerpages/' urls = self.kb.get_all_known_urls() found = set( str(u) for u in urls if inner_pages in str(u) and str(u).endswith('.php')) expected = set( (self.wivet + inner_pages + end) for end in EXPECTED_URLS) self.assertEquals(found, expected) # # And now, verify that w3af used only one session to identify these # wivet links. # stats = extract_all_stats() self.assertEquals(len(stats), 1) coverage = get_coverage_for_scan_id(stats[0][0]) # TODO: Sometimes coverage is 44 and sometimes it is 42! # https://github.com/andresriancho/w3af/issues/2309 self.assertEqual(coverage, 42)