Python MultiRE.query示例，w3af.core.data.quick_match.multi_re.MultiRE.query Python示例

示例#1

0

显示文件

文件： test_multire.py 项目： 5l1v3r1/Vulcan

    def test_simplest(self):
        re_list = ['1234', '4567', '7890']
        mre = MultiRE(re_list)

        result = to_list(mre.query('4567'))
        self.assertEqual(1, len(result))
        self.assertEqual('4567', result[0][1])

        result = to_list(mre.query('7890'))
        self.assertEqual(1, len(result))
        self.assertEqual('7890', result[0][1])

示例#2

0

显示文件

文件： test_multire.py 项目： 5l1v3r1/Vulcan

    def test_unicode_re(self):
        re_list = [u'ñandú', u'ýandex']
        mre = MultiRE(re_list)

        result = to_list(mre.query('abcn'))
        self.assertEqual(0, len(result))
        self.assertEqual([], result)

        result = to_list(mre.query('123 ñandú 345'))
        self.assertEqual(1, len(result))
        self.assertEqual('ñandú', result[0][1])

示例#3

0

显示文件

文件： test_multire.py 项目： foobarmonk/w3af

    def test_simplest(self):
        re_list = ['1234', '4567', '7890']
        mre = MultiRE(re_list)

        result = to_list(mre.query('4567'))
        self.assertEqual(1, len(result))
        self.assertEqual('4567', result[0][1])

        result = to_list(mre.query('7890'))
        self.assertEqual(1, len(result))
        self.assertEqual('7890', result[0][1])

示例#4

0

显示文件

文件： test_multire.py 项目： foobarmonk/w3af

    def test_unicode_re(self):
        re_list = [u'ñandú', u'ýandex']
        mre = MultiRE(re_list)

        result = to_list(mre.query('abcn'))
        self.assertEqual(0, len(result))
        self.assertEqual([], result)

        result = to_list(mre.query('123 ñandú 345'))
        self.assertEqual(1, len(result))
        self.assertEqual('ñandú', result[0][1])

示例#5

0

显示文件

文件： test_multire.py 项目： foobarmonk/w3af

    def test_unicode_query(self):
        re_list = [u'abc321', u'def123']
        mre = MultiRE(re_list)

        result = to_list(mre.query('abc321ñ'))
        self.assertEqual(1, len(result))
        self.assertEqual('abc321', result[0][1])

        result = to_list(mre.query('abc321\x00def123'))
        self.assertEqual(2, len(result))
        match_res = set(i[1] for i in result)
        self.assertEqual(set(re_list), match_res)

示例#6

0

显示文件

文件： test_multire.py 项目： 5l1v3r1/Vulcan

    def test_unicode_query(self):
        re_list = [u'abc321', u'def123']
        mre = MultiRE(re_list)

        result = to_list(mre.query('abc321ñ'))
        self.assertEqual(1, len(result))
        self.assertEqual('abc321', result[0][1])

        result = to_list(mre.query('abc321\x00def123'))
        self.assertEqual(2, len(result))
        match_res = set(i[1] for i in result)
        self.assertEqual(set(re_list), match_res)

示例#7

0

显示文件

文件： test_multire.py 项目： foobarmonk/w3af

    def test_re(self):
        re_list = ['1234.*56', 'ab.*cdef']
        mre = MultiRE(re_list)
        result = to_list(mre.query('456'))
        self.assertEqual(0, len(result))
        self.assertEqual([], result)

        result = to_list(mre.query('1234a56'))
        self.assertEqual(1, len(result))
        self.assertEqual('1234.*56', result[0][1])

        result = to_list(mre.query('abAAAcdef'))
        self.assertEqual(1, len(result))
        self.assertEqual('ab.*cdef', result[0][1])

示例#8

0

显示文件

文件： test_multire.py 项目： 5l1v3r1/Vulcan

    def test_re(self):
        re_list = ['1234.*56', 'ab.*cdef']
        mre = MultiRE(re_list)
        result = to_list(mre.query('456'))
        self.assertEqual(0, len(result))
        self.assertEqual([], result)

        result = to_list(mre.query('1234a56'))
        self.assertEqual(1, len(result))
        self.assertEqual('1234.*56', result[0][1])

        result = to_list(mre.query('abAAAcdef'))
        self.assertEqual(1, len(result))
        self.assertEqual('ab.*cdef', result[0][1])

示例#9

0

显示文件

文件： test_multire.py 项目： foobarmonk/w3af

    def test_re_with_obj(self):
        re_list = [('1234.*56', None, None), ('ab.*cdef', 1, 2)]
        mre = MultiRE(re_list)

        result = to_list(mre.query('1234A56'))
        self.assertEqual(1, len(result))
        self.assertEqual('1234.*56', result[0][1])
        self.assertEqual(None, result[0][3])
        self.assertEqual(None, result[0][4])

        result = to_list(mre.query('abAAAcdef'))
        self.assertEqual(1, len(result))
        self.assertEqual('ab.*cdef', result[0][1])
        self.assertEqual(1, result[0][3])
        self.assertEqual(2, result[0][4])

示例#10

0

显示文件

文件： test_multire.py 项目： 5l1v3r1/Vulcan

    def test_re_with_obj(self):
        re_list = [('1234.*56', None, None), ('ab.*cdef', 1, 2)]
        mre = MultiRE(re_list)

        result = to_list(mre.query('1234A56'))
        self.assertEqual(1, len(result))
        self.assertEqual('1234.*56', result[0][1])
        self.assertEqual(None, result[0][3])
        self.assertEqual(None, result[0][4])

        result = to_list(mre.query('abAAAcdef'))
        self.assertEqual(1, len(result))
        self.assertEqual('ab.*cdef', result[0][1])
        self.assertEqual(1, result[0][3])
        self.assertEqual(2, result[0][4])

示例#11

0

显示文件

文件： test_multire.py 项目： foobarmonk/w3af

    def test_re_flags(self):
        re_list = ['12.*3456', 'ab.*cdef']
        mre = MultiRE(re_list, re.IGNORECASE)

        result = to_list(mre.query('AB3Cdef'))
        self.assertEqual(1, len(result))
        self.assertEqual('ab.*cdef', result[0][1])

示例#12

0

显示文件

文件： test_multire.py 项目： 5l1v3r1/Vulcan

    def test_special_char(self):
        re_list = [u'\x00\x01\x02\x03']
        mre = MultiRE(re_list)

        result = to_list(mre.query('abc\x00\x01\x02\x03def'))
        self.assertEqual(1, len(result))
        self.assertEqual('\x00\x01\x02\x03', result[0][1])

示例#13

0

显示文件

文件： test_multire.py 项目： foobarmonk/w3af

    def test_special_char(self):
        re_list = [u'\x00\x01\x02\x03']
        mre = MultiRE(re_list)

        result = to_list(mre.query('abc\x00\x01\x02\x03def'))
        self.assertEqual(1, len(result))
        self.assertEqual('\x00\x01\x02\x03', result[0][1])

示例#14

0

显示文件

文件： test_multire.py 项目： 5l1v3r1/Vulcan

    def test_re_flags(self):
        re_list = ['12.*3456', 'ab.*cdef']
        mre = MultiRE(re_list, re.IGNORECASE)

        result = to_list(mre.query('AB3Cdef'))
        self.assertEqual(1, len(result))
        self.assertEqual('ab.*cdef', result[0][1])

示例#15

0

显示文件

文件： find_backdoors.py 项目： foobarmonk/w3af

class find_backdoors(CrawlPlugin):
    """
    Find web backdoors and web shells.

    :author: Andres Riancho ([email protected])
    """
    WEBSHELL_DB = os.path.join(CRAWL_PATH, 'find_backdoors', 'web_shells.txt')
    SIGNATURE_DB = os.path.join(CRAWL_PATH, 'find_backdoors', 'signatures.txt')

    def __init__(self):
        CrawlPlugin.__init__(self)

        # Internal variables
        self._analyzed_dirs = ScalableBloomFilter()
        self._signature_re = None

    def setup(self):
        with self._plugin_lock:
            if self._signature_re is not None:
                return

            signatures = self._read_signatures()
            self._signature_re = MultiRE(signatures, hint_len=2)

    def _read_signatures(self):
        for line in file(self.SIGNATURE_DB):
            line = line.strip()

            if not line:
                continue

            if line.startswith('#'):
                continue

            yield (line, 'Backdoor signature')

    def crawl(self, fuzzable_request):
        """
        For every directory, fetch a list of shell files and analyze the
        response.

        :param fuzzable_request: A fuzzable_request instance that contains
                                    (among other things) the URL to test.
        """
        domain_path = fuzzable_request.get_url().get_domain_path()

        if domain_path not in self._analyzed_dirs:
            self._analyzed_dirs.add(domain_path)

            self.setup()

            # Read the web shell database
            web_shells = self._iter_web_shells()

            # Send the requests using threads:
            args_iter = (domain_path.url_join(fname) for fname in web_shells)
            self.worker_pool.map(self._check_if_exists, args_iter)

    def _iter_web_shells(self):
        """
        :yield: lines from the web shell DB
        """
        for line in file(self.WEBSHELL_DB):
            line = line.strip()

            if line.startswith('#'):
                continue

            if not line:
                continue

            yield line

    def _check_if_exists(self, web_shell_url):
        """
        Check if the file exists.

        :param web_shell_url: The URL to check
        """
        try:
            response = self._uri_opener.GET(web_shell_url, cache=True)
        except BaseFrameworkException:
            om.out.debug('Failed to GET webshell:' + web_shell_url)
        else:
            signature = self._match_signature(response)
            if signature is None:
                return

            desc = (u'An HTTP response matching the web backdoor signature'
                    u' "%s" was found at: "%s"; this could indicate that the'
                    u' server has been compromised.')
            desc %= (signature, response.get_url())

            # It's probability is higher if we found a long signature
            _severity = severity.HIGH if len(signature) > 8 else severity.MEDIUM

            v = Vuln(u'Potential web backdoor', desc, _severity,
                     response.id, self.get_name())
            v.set_url(response.get_url())

            kb.kb.append(self, 'backdoors', v)
            om.out.vulnerability(v.get_desc(), severity=v.get_severity())

            fr = FuzzableRequest.from_http_response(response)
            self.output_queue.put(fr)

    def _match_signature(self, response):
        """
        Heuristic to infer if the content of <response> has the pattern of a
        backdoor response.

        :param response: HTTPResponse object
        :return: A bool value
        """
        body_text = response.get_body()
        
        for match, _, _, _ in self._signature_re.query(body_text):
            match_string = match.group(0)
            return match_string

        return None

    def get_long_desc(self):
        """
        :return: A DETAILED description of the plugin functions and features.
        """
        return """

示例#16

0

显示文件

文件： test_multire.py 项目： foobarmonk/w3af

    def test_short(self):
        re_list = ['12.?34']
        mre = MultiRE(re_list)

        result = to_list(mre.query('12X34'))
        self.assertEqual(1, len(result))

示例#17

0

显示文件

文件： test_multire.py 项目： foobarmonk/w3af

    def test_dup(self):
        re_list = ['1234', '4567']
        mre = MultiRE(re_list)

        result = to_list(mre.query('4567 4567'))
        self.assertEqual(1, len(result))

示例#18

0

显示文件

class path_disclosure(GrepPlugin):
    """
    Grep every page for traces of path disclosure vulnerabilities.

    :author: Andres Riancho ([email protected])
    """
    def __init__(self):
        GrepPlugin.__init__(self)

        # Internal variables
        self._reported = DiskList(table_prefix='path_disclosure')
        self._signature_re = None

    def setup(self):
        """
        :return: None, the result is saved in self._path_disc_regex_list
        """
        if self._signature_re is not None:
            return

        all_signatures = []

        for path_disclosure_string in get_common_directories():
            regex_string = '(%s.*?)[^A-Za-z0-9\._\-\\/\+~]'
            regex_string = regex_string % path_disclosure_string
            all_signatures.append(regex_string)

        self._signature_re = MultiRE(all_signatures, hint_len=1)

    def grep(self, request, response):
        """
        Identify the path disclosure vulnerabilities.

        :param request: The HTTP request object.
        :param response: The HTTP response object
        :return: None, the result is saved in the kb.
        """
        if not response.is_text_or_html():
            return

        self.setup()

        if self.find_path_disclosure(request, response):
            self._update_kb_path_list()

    def find_path_disclosure(self, request, response):
        """
        Actually find the path disclosure vulnerabilities
        """
        match_list = []
        body_text = response.get_body()
        real_url = response.get_url().url_decode()

        for match, _, _ in self._signature_re.query(body_text):
            match_list.append(match.group(1))

        # Sort by the longest match, this is needed for filtering out
        # some false positives please read the note below.
        match_list.sort(longest_cmp)

        for match in match_list:
            # Avoid duplicated reports
            if (real_url, match) in self._reported:
                continue

            # Remove false positives
            if self._is_false_positive(match, request, response):
                continue

            # Found!
            self._reported.append((real_url, match))

            desc = ('The URL: "%s" has a path disclosure vulnerability which'
                    ' discloses "%s".')
            desc %= (response.get_url(), match)

            v = Vuln('Path disclosure vulnerability', desc, severity.LOW,
                     response.id, self.get_name())
            v.add_to_highlight(match)
            v.set_url(real_url)
            v['path'] = match

            self.kb_append(self, 'path_disclosure', v)
            return v

    def _is_false_positive(self, match, request, response):
        """
        :return: True if the match is a false positive
        """
        # This if is to avoid false positives
        if request.sent(match):
            return True

        # https://github.com/andresriancho/w3af/issues/6640
        url_list = kb.kb.get_all_known_urls()

        for url in url_list:
            path_and_file = url.get_path()
            if match == path_and_file:
                return True

        # There is a rare bug also, which is triggered in cases like this one:
        #
        #   >>> import re
        #
        #   >>> re.findall('/var/www/.*','/var/www/foobar/htdocs/article.php')
        #   ['/var/www/foobar/htdocs/article.php']
        #
        #   >>> re.findall('/htdocs/.*','/var/www/foobar/htdocs/article.php')
        #   ['/htdocs/article.php']
        #
        # What I need to do here, is to keep the longest match.
        for real_url_reported, match_reported in self._reported:
            if match_reported.endswith(match):
                return True

        # Check if the match we got is part of a tag attribute value
        #
        # This part of the function is the one that consumes the most CPU usage
        # thus we run it last, hoping that at least one of the methods we
        # implemented above tags this match as a false positive and we don't
        # have to run the expensive method
        if self._is_attr_value(match, response):
            return True

        return False

    def _is_attr_value(self, path_disclosure_string, response):
        """
        This method was created to remove some false positives.

        This method consumes 99% of the CPU usage of the plugin, but there
        are only a few improvements that come to mind:

            * Run the code that checks if the value is in the attributes
              in the subprocess. The performance of this plugin will be
              slightly improved.

            * Before calling the document parser check at least it looks like
              the path_disclosure_string is part of an attribute value using
              a regular expression such as:

                </?\w+((\s+\w+(\s*=\s*(?:".*?"|'.*?'|[\^'">\s]+))?)+\s*|\s*)/?>

                (I just need to add the path_disclosure_string somewhere there)

              At some point I was using a similar approach [0] but it seems
              that it was slow? (I doubt that it will be slower than parsing
              the response with lxml).

              Something that could be done, and given that we know that this
              is an HTML string is:

                - Find all places in the response where path_disclosure_string
                  appears

                - Create 'HTTP response snippets' with the locations of
                  path_disclosure_string +/- 500 strings.

                - Apply the regular expression over those strings only, avoiding
                  the cost of applying the regex to the whole HTML response

        [0] https://github.com/andresriancho/w3af/commit/f1029328fcaf7e790cc317701b63954c55a3f4c8
        [1] https://haacked.com/archive/2004/10/25/usingregularexpressionstomatchhtml.aspx/

        :return: True if path_disclosure_string is the value of an attribute
                 inside a tag.

        Examples:
            path_disclosure_string = '/home/image.png'
            response_body = '....<img src="/home/image.png">...'
            return: True

            path_disclosure_string = '/home/image.png'
            response_body = '...<b>Error while checking /home/image.png</b>...'
            return: False
        """
        for tag in mp_doc_parser.get_tags_by_filter(response, None):
            for value in tag.attrib.itervalues():
                if path_disclosure_string in value:
                    return True

        return False

    def _update_kb_path_list(self):
        """
        If a path disclosure was found, I can create a list of full paths to
        all URLs ever visited. This method updates that list.
        """
        path_disc_vulns = kb.kb.get('path_disclosure', 'path_disclosure')
        url_list = kb.kb.get_all_known_urls()

        # Now I find the longest match between one of the URLs that w3af has
        # discovered, and one of the path disclosure strings that this plugin
        # has found. I use the longest match because with small match_list I
        # have more probability of making a mistake.
        longest_match = ''
        longest_path_disc_vuln = None
        for path_disc_vuln in path_disc_vulns:
            for url in url_list:
                path_and_file = url.get_path()

                if path_disc_vuln['path'].endswith(path_and_file):
                    if len(longest_match) < len(path_and_file):
                        longest_match = path_and_file
                        longest_path_disc_vuln = path_disc_vuln

        # Now I recalculate the place where all the resources are in disk, all
        # this is done taking the longest_match as a reference, so... if we
        # don't have a longest_match, then nothing is actually done
        if not longest_match:
            return

        # Get the webroot
        webroot = longest_path_disc_vuln['path'].replace(longest_match, '')

        #
        # This if fixes a strange case reported by Olle
        #         if webroot[0] == '/':
        #         IndexError: string index out of range
        # That seems to be because the webroot == ''
        #
        if not webroot:
            return

        # Check what path separator we should use (linux / windows)
        path_sep = '/' if webroot.startswith('/') else '\\'

        # Create the remote locations
        remote_locations = []
        for url in url_list:
            remote_path = url.get_path().replace('/', path_sep)
            remote_locations.append(webroot + remote_path)
        remote_locations = list(set(remote_locations))

        kb.kb.raw_write(self, 'list_files', remote_locations)
        kb.kb.raw_write(self, 'webroot', webroot)

    def end(self):
        self._reported.cleanup()

    def get_long_desc(self):
        """
        :return: A DETAILED description of the plugin functions and features.
        """
        return """

示例#19

0

显示文件

文件： test_multire.py 项目： 5l1v3r1/Vulcan

    def test_dup(self):
        re_list = ['1234', '4567']
        mre = MultiRE(re_list)

        result = to_list(mre.query('4567 4567'))
        self.assertEqual(1, len(result))

示例#20

0

显示文件

文件： test_multire.py 项目： 5l1v3r1/Vulcan

    def test_short(self):
        re_list = ['12.?34']
        mre = MultiRE(re_list)

        result = to_list(mre.query('12X34'))
        self.assertEqual(1, len(result))

示例#21

0

显示文件

文件： find_backdoors.py 项目： chenbremer/w3af-1

class find_backdoors(CrawlPlugin):
    """
    Find web backdoors and web shells.

    :author: Andres Riancho ([email protected])
    """
    WEBSHELL_DB = os.path.join(CRAWL_PATH, 'find_backdoors', 'web_shells.txt')
    SIGNATURE_DB = os.path.join(CRAWL_PATH, 'find_backdoors', 'signatures.txt')

    def __init__(self):
        CrawlPlugin.__init__(self)

        # Internal variables
        self._analyzed_dirs = ScalableBloomFilter()
        self._signature_re = None

    def setup(self):
        with self._plugin_lock:
            if self._signature_re is not None:
                return

            signatures = self._read_signatures()
            self._signature_re = MultiRE(signatures, hint_len=2)

    def _read_signatures(self):
        for line in file(self.SIGNATURE_DB):
            line = line.strip()

            if not line:
                continue

            if line.startswith('#'):
                continue

            yield (line, 'Backdoor signature')

    def crawl(self, fuzzable_request, debugging_id):
        """
        For every directory, fetch a list of shell files and analyze the
        response.

        :param debugging_id: A unique identifier for this call to discover()
        :param fuzzable_request: A fuzzable_request instance that contains
                                    (among other things) the URL to test.
        """
        domain_path = fuzzable_request.get_url().get_domain_path()

        if domain_path in self._analyzed_dirs:
            return

        self._analyzed_dirs.add(domain_path)

        self.setup()

        # Read the web shell database
        web_shells = self._iter_web_shells()

        # Send the requests using threads:
        args_iter = (domain_path.url_join(fname) for fname in web_shells)
        self.worker_pool.map(self._check_if_exists, args_iter)

    def _iter_web_shells(self):
        """
        :yield: lines from the web shell DB
        """
        for line in file(self.WEBSHELL_DB):
            line = line.strip()

            if line.startswith('#'):
                continue

            if not line:
                continue

            yield line

    def _check_if_exists(self, web_shell_url):
        """
        Check if the file exists.

        :param web_shell_url: The URL to check
        """
        try:
            response = self._uri_opener.GET(web_shell_url, cache=True)
        except BaseFrameworkException:
            om.out.debug('Failed to GET webshell: %s' % web_shell_url)
            return

        signature = self._match_signature(response)
        if signature is None:
            return

        desc = (u'An HTTP response matching the web backdoor signature'
                u' "%s" was found at: "%s"; this could indicate that the'
                u' server has been compromised.')
        desc %= (signature, response.get_url())

        # It's probability is higher if we found a long signature
        _severity = severity.HIGH if len(signature) > 8 else severity.MEDIUM

        v = Vuln(u'Potential web backdoor', desc, _severity, response.id,
                 self.get_name())
        v.set_url(response.get_url())

        kb.kb.append(self, 'backdoors', v)
        om.out.vulnerability(v.get_desc(), severity=v.get_severity())

        fr = FuzzableRequest.from_http_response(response)
        self.output_queue.put(fr)

    def _match_signature(self, response):
        """
        Heuristic to infer if the content of <response> has the pattern of a
        backdoor response.

        :param response: HTTPResponse object
        :return: A bool value
        """
        body_text = response.get_body()

        for match, _, _, _ in self._signature_re.query(body_text):
            match_string = match.group(0)
            return match_string

        return None

    def get_long_desc(self):
        """
        :return: A DETAILED description of the plugin functions and features.
        """
        return """

示例#22

0

显示文件

class path_disclosure(GrepPlugin):
    """
    Grep every page for traces of path disclosure vulnerabilities.

    :author: Andres Riancho ([email protected])
    """

    def __init__(self):
        GrepPlugin.__init__(self)

        # Internal variables
        self._reported = DiskList(table_prefix='path_disclosure')
        self._signature_re = None

    def setup(self):
        """
        :return: None, the result is saved in self._path_disc_regex_list
        """
        if self._signature_re is not None:
            return

        all_signatures = []

        for path_disclosure_string in get_common_directories():
            regex_string = '(%s.*?)[^A-Za-z0-9\._\-\\/\+~]'
            regex_string = regex_string % path_disclosure_string
            all_signatures.append(regex_string)
            
        self._signature_re = MultiRE(all_signatures, hint_len=1)

    def grep(self, request, response):
        """
        Identify the path disclosure vulnerabilities.

        :param request: The HTTP request object.
        :param response: The HTTP response object
        :return: None, the result is saved in the kb.
        """
        if not response.is_text_or_html():
            return

        self.setup()

        if self.find_path_disclosure(request, response):
            self._update_kb_path_list()
        
    def find_path_disclosure(self, request, response):
        """
        Actually find the path disclosure vulnerabilities
        """
        body_text = response.get_body()
        match_list = []

        for match, _, _ in self._signature_re.query(body_text):
            match_list.append(match.group(1))

        # Sort by the longest match, this is needed for filtering out
        # some false positives please read the note below.
        match_list.sort(longest_cmp)
        real_url = response.get_url().url_decode()

        for match in match_list:
            # Avoid duplicated reports
            if (real_url, match) in self._reported:
                continue

            # Remove false positives
            if self._is_false_positive(match, request, response):
                continue

            # Found!
            self._reported.append((real_url, match))

            desc = ('The URL: "%s" has a path disclosure vulnerability which'
                    ' discloses "%s".')
            desc %= (response.get_url(), match)

            v = Vuln('Path disclosure vulnerability', desc, severity.LOW,
                     response.id, self.get_name())
            v.add_to_highlight(match)
            v.set_url(real_url)
            v['path'] = match

            self.kb_append(self, 'path_disclosure', v)
            return v

    def _is_false_positive(self, match, request, response):
        """
        :return: True if the match is a false positive
        """
        # This if is to avoid false positives
        if request.sent(match):
            return True

        if self._is_attr_value(match, response):
            return True

        # https://github.com/andresriancho/w3af/issues/6640
        url_list = kb.kb.get_all_known_urls()
        for url in url_list:
            path_and_file = url.get_path()
            if match == path_and_file:
                return True

        # There is a rare bug also, which is triggered in cases like this one:
        #
        #   >>> import re
        #   >>> re.findall('/var/www/.*','/var/www/foobar/htdocs/article.php')
        #   ['/var/www/foobar/htdocs/article.php']
        #   >>> re.findall('/htdocs/.*','/var/www/foobar/htdocs/article.php')
        #   ['/htdocs/article.php']
        #   >>>
        #
        #   What I need to do here, is to keep the longest match.
        for real_url_reported, match_reported in self._reported:
            if match_reported.endswith(match):
                break
        else:
            # Note to self: I get here when "break" is NOT executed.
            # It's a new one, report!
            return False

        return True

    def _is_attr_value(self, path_disclosure_string, response):
        """
        This method was created to remove some false positives.

        :return: True if path_disclosure_string is the value of an attribute
                 inside a tag.

        Examples:
            path_disclosure_string = '/home/image.png'
            response_body = '....<img src="/home/image.png">...'
            return: True

            path_disclosure_string = '/home/image.png'
            response_body = '...<b>Error while checking /home/image.png</b>...'
            return: False
        """
        for tag in mp_doc_parser.get_tags_by_filter(response, None):
            for value in tag.attrib.itervalues():
                if path_disclosure_string in value:
                    return True

        return False

    def _update_kb_path_list(self):
        """
        If a path disclosure was found, I can create a list of full paths to
        all URLs ever visited. This method updates that list.
        """
        path_disc_vulns = kb.kb.get('path_disclosure', 'path_disclosure')
        url_list = kb.kb.get_all_known_urls()
        
        # Now I find the longest match between one of the URLs that w3af has
        # discovered, and one of the path disclosure strings that this plugin
        # has found. I use the longest match because with small match_list I
        # have more probability of making a mistake.
        longest_match = ''
        longest_path_disc_vuln = None
        for path_disc_vuln in path_disc_vulns:
            for url in url_list:
                path_and_file = url.get_path()

                if path_disc_vuln['path'].endswith(path_and_file):
                    if len(longest_match) < len(path_and_file):
                        longest_match = path_and_file
                        longest_path_disc_vuln = path_disc_vuln

        # Now I recalculate the place where all the resources are in disk, all
        # this is done taking the longest_match as a reference, so... if we
        # don't have a longest_match, then nothing is actually done
        if not longest_match:
            return

        # Get the webroot
        webroot = longest_path_disc_vuln['path'].replace(longest_match, '')

        #
        # This if fixes a strange case reported by Olle
        #         if webroot[0] == '/':
        #         IndexError: string index out of range
        # That seems to be because the webroot == ''
        #
        if not webroot:
            return
        
        # Check what path separator we should use (linux / windows)
        path_sep = '/' if webroot.startswith('/') else '\\'

        # Create the remote locations
        remote_locations = []
        for url in url_list:
            remote_path = url.get_path().replace('/', path_sep)
            remote_locations.append(webroot + remote_path)
        remote_locations = list(set(remote_locations))

        kb.kb.raw_write(self, 'list_files', remote_locations)
        kb.kb.raw_write(self, 'webroot', webroot)

    def end(self):
        self._reported.cleanup()

    def get_long_desc(self):
        """
        :return: A DETAILED description of the plugin functions and features.
        """
        return """