Пример #1
0
def extract_all_stats():
    """
    :return: A list with all the stats generated during this scan
    """
    stats_url = get_wivet_http('/offscanpages/statistics.php')
    response = urllib2.urlopen(stats_url)

    index_page = response.read()

    result = []
    SCAN_ID_RE = '<a href="statistics\.php\?id=(.*?)">'
    SCAN_STATS = get_wivet_http('/offscanpages/statistics.php?id=')

    for scan_id in re.findall(SCAN_ID_RE, index_page):
        scan_stat_url = SCAN_STATS + scan_id
        response = urllib2.urlopen(scan_stat_url)
        result.append((scan_id, response.read()))

    return result
Пример #2
0
def extract_all_stats():
    """
    :return: A list with all the stats generated during this scan
    """
    stats_url = get_wivet_http('/offscanpages/statistics.php')
    response = urllib2.urlopen(stats_url)

    index_page = response.read()

    result = []
    SCAN_ID_RE = '<a href="statistics\.php\?id=(.*?)">'
    SCAN_STATS = get_wivet_http('/offscanpages/statistics.php?id=')

    for scan_id in re.findall(SCAN_ID_RE, index_page):
        scan_stat_url = SCAN_STATS + scan_id
        response = urllib2.urlopen(scan_stat_url)
        result.append((scan_id, response.read()))

    return result
Пример #3
0
def get_coverage_for_scan_id(scan_id):
    specific_stats_url = get_wivet_http('/offscanpages/statistics.php?id=%s')

    response = urllib2.urlopen(specific_stats_url % scan_id)
    html = response.read()

    match_obj = re.search('<span id="coverage">%(.*?)</span>', html)
    if match_obj is not None:
        return int(match_obj.group(1))

    return None
Пример #4
0
def get_coverage_for_scan_id(scan_id):
    specific_stats_url = get_wivet_http('/offscanpages/statistics.php?id=%s')

    response = urllib2.urlopen(specific_stats_url % scan_id)
    html = response.read()

    match_obj = re.search('<span id="coverage">%(.*?)</span>', html)
    if match_obj is not None:
        return int(match_obj.group(1))

    return None
Пример #5
0
def clear_wivet():
    """
    Utility function that will clear all the previous stats from my wivet
    instance, very helpful for performing analysis of the stats after the
    scan ends.
    """
    clear_url = get_wivet_http('/offscanpages/remove-all-stats.php?sure=yes')

    response = urllib2.urlopen(clear_url)
    html = response.read()

    assert 'Done!' in html, html
Пример #6
0
def clear_wivet():
    """
    Utility function that will clear all the previous stats from my wivet
    instance, very helpful for performing analysis of the stats after the
    scan ends.
    """
    clear_url = get_wivet_http('/offscanpages/remove-all-stats.php?sure=yes')

    response = urllib2.urlopen(clear_url)
    html = response.read()

    assert 'Done!' in html, html
Пример #7
0
class TestWebSpider(PluginTest):

    follow_links_url = get_moth_http('/crawl/web_spider/test_case_01/')
    dir_get_url = 'http://moth/w3af/crawl/web_spider/a/b/c/d/'
    encoding_url = get_moth_http('/core/encoding')
    relative_url = 'http://moth/w3af/crawl/web_spider/relativeRegex.html'

    wivet = get_wivet_http()

    _run_configs = {
        'basic': {
            'target': None,
            'plugins': {
                'crawl': (PluginConfig(
                    'web_spider', ('only_forward', True, PluginConfig.BOOL),
                    ('ignore_regex', '.*logout.php*', PluginConfig.STR)), )
            }
        },
    }

    def generic_scan(self, config, base_directory, start_url, expected_files):
        self._scan(start_url, config['plugins'])

        # Add the webroot to the list of expected files
        expected_files.append('')
        expected_urls = set(
            URL(base_directory).url_join(end).url_string
            for end in expected_files)

        # pylint: disable=E1101
        # Pylint fails to detect the object types that come out of the KB
        urls = self.kb.get_all_known_urls()
        found_urls = set(str(u).decode('utf-8') for u in urls)

        self.assertEquals(found_urls, expected_urls)

    @attr('smoke')
    def test_spider_found_urls(self):
        config = self._run_configs['basic']
        expected_files = [
            '1.html',
            '2.html',
            '3.html',
            '4.html',
            'd%20f/index.html',
            'a%20b.html',
            'd%20f/',
        ]
        start_url = self.follow_links_url

        self.generic_scan(config, self.follow_links_url, start_url,
                          expected_files)

    def test_utf8_urls(self):
        config = self._run_configs['basic']
        expected_files = [u'vúlnerable.py', u'é.py', u'改.py', u'проверка.py']
        start_url = self.encoding_url + '_utf8/'

        self.generic_scan(config, start_url, start_url, expected_files)

    def test_euc_jp_urls(self):
        config = self._run_configs['basic']
        expected_files = [u'raw-qs-jp.py', u'qs-jp.py']
        start_url = self.encoding_url + '_euc-jp/'

        self.generic_scan(config, start_url, start_url, expected_files)

    def test_spider_relative_urls_found_with_regex(self):
        raise SkipTest('FIXME: Need to test this feature!')

    def test_spider_traverse_directories(self):
        raise SkipTest('FIXME: Need to test this feature!')

    def test_wivet(self):
        clear_wivet()

        cfg = self._run_configs['basic']
        self._scan(self.wivet, cfg['plugins'])

        #
        #    First, check that w3af identified all the URLs we want:
        #
        ALL_WIVET_URLS = {
            '10_17d77.php', '11_1f2e4.php', '1_12c3b.php', '11_2d3ff.php',
            '12_2a2cf.php', '12_3a2cf.php', '1_25e2a.php', '13_10ad3.php',
            '13_25af3.php', '14_1eeab.php', '15_1c95a.php', '16_1b14f.php',
            '16_2f41a.php', '17_143ef.php', '17_2da76.php', '18_1a2f3.php',
            '19_1f52a.php', '19_2e3a2.php', '20_1e833.php', '21_1f822.php',
            '2_1f84b.php', '2_2b7a3.php', '3_16e1a.php', '3_2cc42.php',
            '3_3fadc.php', '3_45589.php', '3_5befd.php', '3_6ff22.php',
            '3_7e215.php', '4_1c3f8.php', '5_1e4d2.php', '6_14b3c.php',
            '7_16a9c.php', '8_1b6e1.php', '8_2b6f1.php', '9_10ee31.php',
            '9_11ee31.php', '9_12ee31.php', '9_13ee31.php', '9_14ee31.php',
            '9_15ee31.php', '9_16ee31.php', '9_17ee31.php', '9_18ee31.php',
            '9_19ee31.php', '9_1a1b2.php', '9_20ee31.php', '9_21ee31.php',
            '9_22ee31.php', '9_23ee31.php', '9_24ee31.php', '9_25ee31.php',
            '9_26dd2e.php', '9_2ff21.php', '9_3a2b7.php', '9_4b82d.php',
            '9_5ee31.php', '9_6ee31.php', '9_7ee31.php', '9_8ee31.php',
            '9_9ee31.php', '12_1a2cf.php'
        }

        #
        #    FIXME: At some point this should be reduced to an empty set()
        #
        W3AF_FAILS = {
            '9_16ee31.php',
            '9_9ee31.php',
            '9_18ee31.php',
            '9_11ee31.php',
            '9_20ee31.php',
            '9_25ee31.php',
            '9_15ee31.php',
            '9_8ee31.php',
            '9_17ee31.php',
            '9_13ee31.php',
            '9_19ee31.php',
            '9_14ee31.php',
            '19_2e3a2.php',
            '17_143ef.php',
            '9_23ee31.php',
            '9_12ee31.php',
            '9_5ee31.php',
            '9_6ee31.php',
            '9_22ee31.php',
            '11_2d3ff.php',
            '17_2da76.php',
            '18_1a2f3.php',
            '9_24ee31.php',
            '9_7ee31.php',
            '9_10ee31.php',
            '9_21ee31.php',

            # These were added to the fails group after #2104
            '15_1c95a.php',
            '6_14b3c.php',
            '8_1b6e1.php',
            '14_1eeab.php',
            '8_2b6f1.php'
        }

        EXPECTED_URLS = ALL_WIVET_URLS - W3AF_FAILS

        inner_pages = 'innerpages/'

        urls = self.kb.get_all_known_urls()

        found = set(
            str(u) for u in urls
            if inner_pages in str(u) and str(u).endswith('.php'))
        expected = set(
            (self.wivet + inner_pages + end) for end in EXPECTED_URLS)

        self.assertEquals(found, expected)

        #
        #    And now, verify that w3af used only one session to identify these
        #    wivet links.
        #
        stats = extract_all_stats()
        self.assertEquals(len(stats), 1)

        coverage = get_coverage_for_scan_id(stats[0][0])
        # TODO: Sometimes coverage is 44 and sometimes it is 42!
        # https://github.com/andresriancho/w3af/issues/2309
        self.assertEqual(coverage, 42)