示例#1
0
    def test_get(self):
        disk_dict = DiskDict()

        disk_dict[0] = 'abc'

        abc1 = disk_dict.get(0)
        abc2 = disk_dict.get(0, 1)
        two = disk_dict.get(1, 2)

        self.assertEqual(abc1, 'abc')
        self.assertEqual(abc2, 'abc')
        self.assertEqual(two, 2)
示例#2
0
    def test_get(self):
        disk_dict = DiskDict()

        disk_dict[0] = 'abc'
        
        abc1 = disk_dict.get(0)
        abc2 = disk_dict.get(0, 1)
        two = disk_dict.get(1, 2)
        
        self.assertEqual(abc1, 'abc')
        self.assertEqual(abc2, 'abc')
        self.assertEqual(two, 2)
示例#3
0
class html_comments(GrepPlugin):
    """
    Extract and analyze HTML comments.

    :author: Andres Riancho ([email protected])
    """

    HTML_RE = re.compile('<[a-zA-Z]*.*?>.*?</[a-zA-Z]>')

    INTERESTING_WORDS = (
        # In English
        'user',
        'pass',
        'xxx',
        'fix',
        'bug',
        'broken',
        'oops',
        'hack',
        'caution',
        'todo',
        'note',
        'warning',
        '!!!',
        '???',
        'shit',
        'pass',
        'password',
        'passwd',
        'pwd',
        'secret',
        'stupid',

        # In Spanish
        'tonto',
        'porqueria',
        'cuidado',
        'usuario',
        u'contraseña',
        'puta',
        'email',
        'security',
        'captcha',
        'pinga',
        'cojones',

        # some in Portuguese
        'banco',
        'bradesco',
        'itau',
        'visa',
        'bancoreal',
        u'transfêrencia',
        u'depósito',
        u'cartão',
        u'crédito',
        'dados pessoais')

    _multi_in = multi_in([' %s ' % w for w in INTERESTING_WORDS])

    def __init__(self):
        GrepPlugin.__init__(self)

        # Internal variables
        self._comments = DiskDict(table_prefix='html_comments')
        self._already_reported = ScalableBloomFilter()

    def grep(self, request, response):
        """
        Plugin entry point, parse those comments!

        :param request: The HTTP request object.
        :param response: The HTTP response object
        :return: None
        """
        if not response.is_text_or_html():
            return

        try:
            dp = parser_cache.dpc.get_document_parser_for(response)
        except BaseFrameworkException:
            return

        for comment in dp.get_comments():
            # These next two lines fix this issue:
            # audit.ssi + grep.html_comments + web app with XSS = false positive
            if request.sent(comment):
                continue

            if self._is_new(comment, response):

                self._interesting_word(comment, request, response)
                self._html_in_comment(comment, request, response)

    def _interesting_word(self, comment, request, response):
        """
        Find interesting words in HTML comments
        """
        comment = comment.lower()

        for word in self._multi_in.query(comment):
            if (word, response.get_url()) in self._already_reported:
                continue

            desc = ('A comment with the string "%s" was found in: "%s".'
                    ' This could be interesting.')
            desc %= (word, response.get_url())

            v = Vuln.from_fr('Interesting HTML comment',
                             desc, severity.INFORMATION, response.id,
                             self.get_name(), request)
            v.add_to_highlight(word)

            kb.kb.append(self, 'interesting_comments', v)

            self._already_reported.add((word, response.get_url()))

    def _html_in_comment(self, comment, request, response):
        """
        Find HTML code in HTML comments
        """
        html_in_comment = self.HTML_RE.search(comment)

        if html_in_comment is None:
            return

        if (comment, response.get_url()) in self._already_reported:
            return

        # There is HTML code in the comment.
        comment = comment.strip()
        comment = comment.replace('\n', '')
        comment = comment.replace('\r', '')
        comment = comment[:40]

        desc = ('A comment with the string "%s" was found in: "%s".'
                ' This could be interesting.')
        desc %= (comment, response.get_url())

        v = Vuln.from_fr('HTML comment contains HTML code',
                         desc, severity.INFORMATION, response.id,
                         self.get_name(), request)
        v.set_uri(response.get_uri())
        v.add_to_highlight(html_in_comment.group(0))

        om.out.vulnerability(v.get_desc(), severity=severity.INFORMATION)
        kb.kb.append(self, 'html_comment_hides_html', v)
        self._already_reported.add((comment, response.get_url()))

    def _is_new(self, comment, response):
        """
        Make sure that we perform a thread safe check on the self._comments
        dict, in order to avoid duplicates.
        """
        with self._plugin_lock:

            #pylint: disable=E1103
            comment_data = self._comments.get(comment, None)
            response_url = response.get_url()

            if comment_data is None:
                self._comments[comment] = [(response_url, response.id)]
                return True
            else:
                for saved_url, response_id in comment_data:
                    if response_url == saved_url:
                        return False
                else:
                    comment_data.append((response_url, response.id))
                    self._comments[comment] = comment_data
                    return True
            #pylint: enable=E1103

    def end(self):
        """
        This method is called when the plugin wont be used anymore.
        :return: None
        """
        for comment, url_request_id_lst in self._comments.iteritems():

            stick_comment = ' '.join(comment.split())

            if len(stick_comment) > 40:
                msg = ('A comment with the string "%s..." (and %s more bytes)'
                       ' was found on these URL(s):')
                args = (stick_comment[:40], str(len(stick_comment) - 40))
                om.out.vulnerability(msg % args, severity=severity.INFORMATION)
            else:
                msg = 'A comment containing "%s" was found on these URL(s):'
                om.out.vulnerability(msg % stick_comment,
                                     severity=severity.INFORMATION)

            inform = []

            for url, request_id in url_request_id_lst:
                msg = '- %s (request with id: %s)'
                inform.append(msg % (url, request_id))

            for i in sorted(inform):
                om.out.vulnerability(i, severity=severity.INFORMATION)

        self._comments.cleanup()

    def get_long_desc(self):
        """
        :return: A DETAILED description of the plugin functions and features.
        """
        return """
示例#4
0
class VariantDB(object):
    def __init__(self, max_variants=5):
        self._disk_dict = DiskDict()
        self._db_lock = threading.RLock()
        self.max_variants = max_variants

    def append(self, reference):
        """
        Called when a new reference is found and we proved that new
        variants are still needed.

        :param reference: The reference (as a URL object) to add. This method
                          will "normalize" it before adding it to the internal
                          shelve.
        """
        clean_reference = self._clean_reference(reference)

        with self._db_lock:
            count = self._disk_dict.get(clean_reference, None)

            if count is not None:
                self._disk_dict[clean_reference] = count + 1
            else:
                self._disk_dict[clean_reference] = 1

    def _clean_reference(self, reference):
        """
        This method is VERY dependent on the are_variants method from
        core.data.request.variant_identification , make sure to remember that
        when changing stuff here or there.

        What this method does is to "normalize" any input reference string so
        that they can be compared very simply using string match.

        """
        res = reference.get_domain_path() + reference.get_file_name()

        if reference.has_query_string():

            res += '?'
            qs = reference.querystring.copy()

            for key in qs:
                value_list = qs[key]
                for i, value in enumerate(value_list):
                    if value.isdigit():
                        qs[key][i] = 'number'
                    else:
                        qs[key][i] = 'string'

            res += str(qs)

        return res

    def need_more_variants(self, reference):
        """
        :return: True if there are not enough variants associated with
        this reference in the DB.
        """
        clean_reference = self._clean_reference(reference)
        # I believe this is atomic enough...
        count = self._disk_dict.get(clean_reference, 0)
        if count >= self.max_variants:
            return False
        else:
            return True
示例#5
0
class VariantDB(object):
    """
    See the notes on PARAMS_MAX_VARIANTS and PATH_MAX_VARIANTS above. Also
    understand that we'll keep "dirty" versions of the references/fuzzable
    requests in order to be able to answer "False" to a call for
    need_more_variants in a situation like this:

        >> need_more_variants('http://foo.com/abc?id=32')
        True

        >> append('http://foo.com/abc?id=32')
        True

        >> need_more_variants('http://foo.com/abc?id=32')
        False

    """
    HASH_IGNORE_HEADERS = ('referer', )
    TAG = '[variant_db]'

    def __init__(self):
        self._variants = DiskDict(table_prefix='variant_db')
        self._variants_eq = ScalableBloomFilter()
        self._variants_form = DiskDict(table_prefix='variant_db_form')

        self.params_max_variants = cf.cf.get('params_max_variants')
        self.path_max_variants = cf.cf.get('path_max_variants')
        self.max_equal_form_variants = cf.cf.get('max_equal_form_variants')

        self._db_lock = threading.RLock()

    def cleanup(self):
        self._variants.cleanup()
        self._variants_form.cleanup()

    def append(self, fuzzable_request):
        """
        :return: True if we added a new fuzzable request variant to the DB,
                 False if NO more variants are required for this fuzzable
                 request.
        """
        with self._db_lock:
            if self._seen_exactly_the_same(fuzzable_request):
                return False

            if self._has_form(fuzzable_request):
                if not self._need_more_variants_for_form(fuzzable_request):
                    return False

            if not self._need_more_variants_for_uri(fuzzable_request):
                return False

            # Yes, please give me more variants of fuzzable_request
            return True

    def _log_return_false(self, fuzzable_request, reason):
        args = (reason, fuzzable_request)
        msg = 'VariantDB is returning False because of "%s" for "%s"'
        om.out.debug(msg % args)

    def _need_more_variants_for_uri(self, fuzzable_request):
        #
        # Do we need more variants for the fuzzable request? (similar match)
        # PARAMS_MAX_VARIANTS and PATH_MAX_VARIANTS
        #
        clean_dict_key = clean_fuzzable_request(fuzzable_request)
        count = self._variants.get(clean_dict_key, None)

        if count is None:
            self._variants[clean_dict_key] = 1
            return True

        # We've seen at least one fuzzable request with this pattern...
        url = fuzzable_request.get_uri()
        has_params = url.has_query_string() or fuzzable_request.get_raw_data()

        # Choose which max_variants to use
        if has_params:
            max_variants = self.params_max_variants
            max_variants_type = 'params'
        else:
            max_variants = self.path_max_variants
            max_variants_type = 'path'

        if count >= max_variants:
            _type = 'need_more_variants_for_uri(%s)' % max_variants_type
            self._log_return_false(fuzzable_request, _type)
            return False

        self._variants[clean_dict_key] = count + 1
        return True

    def _seen_exactly_the_same(self, fuzzable_request):
        #
        # Is the fuzzable request already known to us? (exactly the same)
        #
        request_hash = fuzzable_request.get_request_hash(
            self.HASH_IGNORE_HEADERS)
        if request_hash in self._variants_eq:
            return True

        # Store it to avoid duplicated fuzzable requests in our framework
        self._variants_eq.add(request_hash)

        self._log_return_false(fuzzable_request, 'seen_exactly_the_same')
        return False

    def _has_form(self, fuzzable_request):
        raw_data = fuzzable_request.get_raw_data()
        if raw_data and len(raw_data.get_param_names()) >= 2:
            return True

        return False

    def _need_more_variants_for_form(self, fuzzable_request):
        #
        # Do we need more variants for this form? (similar match)
        # MAX_EQUAL_FORM_VARIANTS
        #
        clean_dict_key_form = clean_fuzzable_request_form(fuzzable_request)
        count = self._variants_form.get(clean_dict_key_form, None)

        if count is None:
            self._variants_form[clean_dict_key_form] = 1
            return True

        if count >= self.max_equal_form_variants:
            self._log_return_false(fuzzable_request,
                                   'need_more_variants_for_form')
            return False

        self._variants_form[clean_dict_key_form] = count + 1
        return True
示例#6
0
class VariantDB(object):

    def __init__(self, max_variants=DEFAULT_MAX_VARIANTS):
        self._disk_dict = DiskDict(table_prefix='variant_db')
        self._db_lock = threading.RLock()
        self.max_variants = max_variants

    def append(self, reference):
        """
        Called when a new reference is found and we proved that new
        variants are still needed.

        :param reference: The reference (as a URL object) to add. This method
                          will "normalize" it before adding it to the internal
                          shelve.
        """
        clean_reference = self._clean_reference(reference)

        with self._db_lock:
            count = self._disk_dict.get(clean_reference, None)

            if count is not None:
                self._disk_dict[clean_reference] = count + 1
            else:
                self._disk_dict[clean_reference] = 1

    def need_more_variants(self, reference):
        """
        :return: True if there are not enough variants associated with
        this reference in the DB.
        """
        clean_reference = self._clean_reference(reference)

        # I believe this is atomic enough...
        count = self._disk_dict.get(clean_reference, 0)
        if count >= self.max_variants:
            return False
        else:
            return True

    def _clean_reference(self, reference):
        """
        This method is VERY dependent on the are_variants method from
        core.data.request.variant_identification , make sure to remember that
        when changing stuff here or there.

        What this method does is to "normalize" any input reference string so
        that they can be compared very simply using string match.

        """
        res = reference.get_domain_path() + reference.get_file_name()

        if reference.has_query_string():

            res += '?'
            qs = copy.deepcopy(reference.querystring)

            for key, value, path, setter in qs.iter_setters():

                if value.isdigit():
                    setter('number')
                else:
                    setter('string')

            res += str(qs)

        return res
示例#7
0
class VariantDB(object):
    """
    See the notes on PARAMS_MAX_VARIANTS and PATH_MAX_VARIANTS above. Also
    understand that we'll keep "dirty" versions of the references/fuzzable
    requests in order to be able to answer "False" to a call for
    need_more_variants in a situation like this:

        need_more_variants('http://foo.com/abc?id=32')      --> True
        append('http://foo.com/abc?id=32')
        need_more_variants('http://foo.com/abc?id=32')      --> False

    """
    HASH_IGNORE_HEADERS = ('referer', )
    TAG = '[variant_db]'

    def __init__(self,
                 params_max_variants=PARAMS_MAX_VARIANTS,
                 path_max_variants=PATH_MAX_VARIANTS):

        self._variants_eq = DiskDict(table_prefix='variant_db_eq')
        self._variants = DiskDict(table_prefix='variant_db')

        self.params_max_variants = params_max_variants
        self.path_max_variants = path_max_variants

        self._db_lock = threading.RLock()

    def cleanup(self):
        self._variants_eq.cleanup()
        self._variants.cleanup()

    def append(self, fuzzable_request):
        """
        :return: True if we added a new fuzzable request variant to the DB,
                 False if no more variants are required for this fuzzable
                 request.
        """
        with self._db_lock:
            #
            # Is the fuzzable request already known to us? (exactly the same)
            #
            request_hash = fuzzable_request.get_request_hash(
                self.HASH_IGNORE_HEADERS)
            already_seen = self._variants_eq.get(request_hash, False)
            if already_seen:
                return False

            # Store it to avoid duplicated fuzzable requests in our framework
            self._variants_eq[request_hash] = True

            #
            # Do we need more variants for the fuzzable request? (similar match)
            #
            clean_dict_key = clean_fuzzable_request(fuzzable_request)
            count = self._variants.get(clean_dict_key, None)

            if count is None:
                self._variants[clean_dict_key] = 1
                return True

            # We've seen at least one fuzzable request with this pattern...
            url = fuzzable_request.get_uri()
            has_params = url.has_query_string(
            ) or fuzzable_request.get_raw_data()

            # Choose which max_variants to use
            if has_params:
                max_variants = self.params_max_variants
            else:
                max_variants = self.path_max_variants

            if count >= max_variants:
                return False

            else:
                self._variants[clean_dict_key] = count + 1
                return True
示例#8
0
class html_comments(GrepPlugin):
    """
    Extract and analyze HTML comments.

    :author: Andres Riancho ([email protected])
    """

    HTML_RE = re.compile('<[a-zA-Z]+ .*?>.*?</[a-zA-Z]+>')

    HTML_FALSE_POSITIVES = {
        '[if IE]',
        '[if !IE]',
        '[if IE 7 ]',
        '[if IE 8 ]',
        '[if IE 9]',
        '[if lte IE 8]',
        '[if lte IE 9]',
    }

    INTERESTING_WORDS = (
        # In English
        'user',
        'pass',
        'xxx',
        'fix',
        'bug',
        'broken',
        'oops',
        'hack',
        'caution',
        'todo',
        'note',
        'warning',
        '!!!',
        '???',
        'shit',
        'pass',
        'password',
        'passwd',
        'pwd',
        'secret',
        'stupid',

        # In Spanish
        'tonto',
        'porqueria',
        'cuidado',
        'usuario',
        u'contraseña',
        'puta',
        'email',
        'security',
        'captcha',
        'pinga',
        'cojones',

        # In Portuguese
        'banco',
        'bradesco',
        'itau',
        'visa',
        'bancoreal',
        u'transfêrencia',
        u'depósito',
        u'cartão',
        u'crédito',
        'dados pessoais')

    _multi_in = MultiIn([' %s ' % w for w in INTERESTING_WORDS])

    def __init__(self):
        GrepPlugin.__init__(self)

        # Internal variables
        self._comments = DiskDict(table_prefix='html_comments')
        self._already_reported = ScalableBloomFilter()
        self._end_was_called = False

    def grep(self, request, response):
        """
        Plugin entry point, parse those comments!

        :param request: The HTTP request object.
        :param response: The HTTP response object
        :return: None
        """
        if not response.is_text_or_html():
            return

        try:
            dp = parser_cache.dpc.get_document_parser_for(response)
        except BaseFrameworkException:
            return

        for comment in dp.get_comments():
            if self._is_new(comment, response):
                self._interesting_word(comment, request, response)
                self._html_in_comment(comment, request, response)

    def _interesting_word(self, comment, request, response):
        """
        Find interesting words in HTML comments
        """
        lower_comment = comment.lower()

        for word in self._multi_in.query(lower_comment):
            if (word, response.get_url()) in self._already_reported:
                continue

            # These next two lines fix a false positive which appears when
            # audit.ssi sends a payload to a site which has XSS, and
            # grep.html_comments sees that comment and reports it.
            if request.sent(comment):
                continue

            self._already_reported.add((word, response.get_url()))

            desc = ('A comment with the string "%s" was found in: "%s".'
                    ' This could be interesting.')
            desc %= (word, response.get_url())

            i = Info.from_fr('Interesting HTML comment', desc, response.id,
                             self.get_name(), request)
            i.add_to_highlight(word)

            kb.kb.append(self, 'interesting_comments', i)
            om.out.information(i.get_desc())

    def _html_in_comment(self, comment, request, response):
        """
        Find HTML code in HTML comments
        """
        html_in_comment = self.HTML_RE.search(comment)

        if html_in_comment is None:
            return

        for false_positive_string in self.HTML_FALSE_POSITIVES:
            if false_positive_string in comment:
                return

        comment_data = (comment, response.get_url())

        if comment_data in self._already_reported:
            return

        self._already_reported.add(comment_data)

        # There is HTML code in the comment.
        comment = comment.strip()
        comment = comment.replace('\n', '')
        comment = comment.replace('\r', '')
        comment = comment[:40]

        desc = ('A comment containing HTML code "%s" was found in: "%s".'
                ' This could be interesting.')
        desc %= (comment, response.get_url())

        i = Info.from_fr('HTML comment contains HTML code', desc, response.id,
                         self.get_name(), request)
        i.set_uri(response.get_uri())
        i.add_to_highlight(html_in_comment.group(0))

        kb.kb.append(self, 'html_comment_hides_html', i)
        om.out.information(i.get_desc())

    def _handle_no_such_table(self, comment, response, nste):
        """
        I had a lot of issues trying to reproduce [0], so this code is just
        a helper for me to identify the root cause.

        [0] https://github.com/andresriancho/w3af/issues/10849

        :param nste: The original exception
        :param comment: The comment we're analyzing
        :param response: The HTTP response
        :return: None, an exception with more information is re-raised
        """
        msg = ('A NoSuchTableException was raised by the DBMS. This issue is'
               ' related with #10849 , but since I was unable to reproduce'
               ' it, extra debug information is added to the exception:'
               '\n'
               '\n - Grep plugin end() was called: %s'
               '\n - Response ID is: %s'
               '\n - HTML comment is: "%s"'
               '\n - Original exception: "%s"'
               '\n\n'
               'https://github.com/andresriancho/w3af/issues/10849\n')
        args = (self._end_was_called, response.get_id(), comment, nste)

        raise NoSuchTableException(msg % args)

    def _is_new(self, comment, response):
        """
        Avoid duplicates by checking self._comments
        """
        # pylint: disable=E1103
        try:
            comment_data = self._comments.get(comment, None)
        except NoSuchTableException, nste:
            self._handle_no_such_table(comment, response, nste)
            return

        response_url = response.get_url()

        # The comment was never seen before
        if comment_data is None:
            self._comments[comment] = [(response_url, response.id)]
            return True

        # The comment was seen before, maybe on a different URL
        for saved_url, response_id in comment_data:
            if response_url == saved_url:
                return False

        # The comment was never seen before on this URL, store this knowledge
        comment_data.append((response_url, response.id))
        self._comments[comment] = comment_data

        return True
示例#9
0
class VariantDB(object):
    def __init__(self, max_variants=DEFAULT_MAX_VARIANTS):
        self._disk_dict = DiskDict(table_prefix='variant_db')
        self._db_lock = threading.RLock()
        self.max_variants = max_variants

    def append(self, reference):
        """
        Called when a new reference is found and we proved that new
        variants are still needed.

        :param reference: The reference (as a URL object) to add. This method
                          will "normalize" it before adding it to the internal
                          shelve.
        """
        clean_reference = self._clean_reference(reference)

        with self._db_lock:
            count = self._disk_dict.get(clean_reference, None)

            if count is not None:
                self._disk_dict[clean_reference] = count + 1
            else:
                self._disk_dict[clean_reference] = 1

    def append_fr(self, fuzzable_request):
        """
        See append()'s documentation
        """
        clean_fuzzable_request = self._clean_fuzzable_request(fuzzable_request)

        with self._db_lock:
            count = self._disk_dict.get(clean_fuzzable_request, None)

            if count is not None:
                self._disk_dict[clean_fuzzable_request] = count + 1
            else:
                self._disk_dict[clean_fuzzable_request] = 1

    def need_more_variants(self, reference):
        """
        :return: True if there are not enough variants associated with
        this reference in the DB.
        """
        clean_reference = self._clean_reference(reference)
        has_qs = reference.has_query_string()

        # I believe this is atomic enough...
        count = self._disk_dict.get(clean_reference, 0)

        # When we're analyzing a path (without QS), we just need 1
        max_variants = self.max_variants if has_qs else 1

        if count >= max_variants:
            return False
        else:
            return True

    def need_more_variants_for_fr(self, fuzzable_request):
        """
        :return: True if there are not enough variants associated with
        this reference in the DB.
        """
        clean_fuzzable_request = self._clean_fuzzable_request(fuzzable_request)

        # I believe this is atomic enough...
        count = self._disk_dict.get(clean_fuzzable_request, 0)

        if count >= self.max_variants:
            return False
        else:
            return True

    def _clean_reference(self, reference):
        """
        This method is VERY dependent on the are_variants method from
        core.data.request.variant_identification , make sure to remember that
        when changing stuff here or there.

        What this method does is to "normalize" any input reference string so
        that they can be compared very simply using string match.

        Since this is a reference (link) we'll prepend '(GET)-' to the result,
        which will help us add support for forms/fuzzable requests with
        '(POST)-' in the future.
        """
        res = '(GET)-'
        res += reference.get_domain_path().url_string.encode(DEFAULT_ENCODING)
        res += reference.get_file_name()

        if reference.has_query_string():
            res += '?' + self._clean_data_container(reference.querystring)

        return res

    def _clean_data_container(self, data_container):
        """
        A simplified/serialized version of the query string
        """
        dc = copy.deepcopy(data_container)

        for key, value, path, setter in dc.iter_setters():

            if value.isdigit():
                setter('number')
            else:
                setter('string')

        return str(dc)

    def _clean_fuzzable_request(self, fuzzable_request):
        """
        Very similar to _clean_reference but we receive a fuzzable request
        instead. The output includes the HTTP method and any parameters which
        might be sent over HTTP post-data in the request are appended to the
        result as query string params.

        :param fuzzable_request: The fuzzable request instance to clean
        :return: See _clean_reference
        """
        res = '(%s)-' % fuzzable_request.get_method().upper()

        uri = fuzzable_request.get_uri()
        res += uri.get_domain_path() + uri.get_file_name()

        if uri.has_query_string():
            res += '?' + self._clean_data_container(uri.querystring)

        if fuzzable_request.get_raw_data():
            res += '!' + self._clean_data_container(
                fuzzable_request.get_raw_data())

        return res
示例#10
0
class html_comments(GrepPlugin):
    """
    Extract and analyze HTML comments.

    :author: Andres Riancho ([email protected])
    """

    HTML_RE = re.compile('<[a-zA-Z]*.*?>.*?</[a-zA-Z]>')

    INTERESTING_WORDS = (
        # In English
        'user', 'pass', 'xxx', 'fix', 'bug', 'broken', 'oops', 'hack',
        'caution', 'todo', 'note', 'warning', '!!!', '???', 'shit',
        'pass', 'password', 'passwd', 'pwd', 'secret', 'stupid',
        
        # In Spanish
        'tonto', 'porqueria', 'cuidado', 'usuario', u'contraseña',
        'puta', 'email', 'security', 'captcha', 'pinga', 'cojones',
        
        # some in Portuguese
        'banco', 'bradesco', 'itau', 'visa', 'bancoreal', u'transfêrencia',
        u'depósito', u'cartão', u'crédito', 'dados pessoais'
    )

    _multi_in = MultiIn([' %s ' % w for w in INTERESTING_WORDS])

    def __init__(self):
        GrepPlugin.__init__(self)

        # Internal variables
        self._comments = DiskDict(table_prefix='html_comments')
        self._already_reported = ScalableBloomFilter()
        self._end_was_called = False

    def grep(self, request, response):
        """
        Plugin entry point, parse those comments!

        :param request: The HTTP request object.
        :param response: The HTTP response object
        :return: None
        """
        if not response.is_text_or_html():
            return
        
        try:
            dp = parser_cache.dpc.get_document_parser_for(response)
        except BaseFrameworkException:
            return
        
        for comment in dp.get_comments():
            # These next two lines fix this issue:
            # audit.ssi + grep.html_comments + web app with XSS = false positive
            if request.sent(comment):
                continue

            if self._is_new(comment, response):

                self._interesting_word(comment, request, response)
                self._html_in_comment(comment, request, response)

    def _interesting_word(self, comment, request, response):
        """
        Find interesting words in HTML comments
        """
        comment = comment.lower()

        for word in self._multi_in.query(comment):
            if (word, response.get_url()) in self._already_reported:
                continue

            desc = ('A comment with the string "%s" was found in: "%s".'
                    ' This could be interesting.')
            desc %= (word, response.get_url())

            i = Info.from_fr('Interesting HTML comment', desc, response.id,
                             self.get_name(), request)
            i.add_to_highlight(word)

            kb.kb.append(self, 'interesting_comments', i)
            om.out.information(i.get_desc())
                
            self._already_reported.add((word, response.get_url()))

    def _html_in_comment(self, comment, request, response):
        """
        Find HTML code in HTML comments
        """
        html_in_comment = self.HTML_RE.search(comment)

        if html_in_comment is None:
            return

        if (comment, response.get_url()) in self._already_reported:
            return

        # There is HTML code in the comment.
        comment = comment.strip()
        comment = comment.replace('\n', '')
        comment = comment.replace('\r', '')
        comment = comment[:40]

        desc = ('A comment with the string "%s" was found in: "%s".'
                ' This could be interesting.')
        desc %= (comment, response.get_url())

        i = Info.from_fr('HTML comment contains HTML code', desc, response.id,
                         self.get_name(), request)
        i.set_uri(response.get_uri())
        i.add_to_highlight(html_in_comment.group(0))

        kb.kb.append(self, 'html_comment_hides_html', i)
        om.out.information(i.get_desc())
        self._already_reported.add((comment, response.get_url()))

    def _handle_no_such_table(self, comment, response, nste):
        """
        I had a lot of issues trying to reproduce [0], so this code is just
        a helper for me to identify the root cause.

        [0] https://github.com/andresriancho/w3af/issues/10849

        :param nste: The original exception
        :param comment: The comment we're analyzing
        :param response: The HTTP response
        :return: None, an exception with more information is re-raised
        """
        msg = ('A NoSuchTableException was raised by the DBMS. This issue is'
               ' related with #10849 , but since I was unable to reproduce'
               ' it, extra debug information is added to the exception:'
               '\n'
               '\n - Grep plugin end() was called: %s'
               '\n - Response ID is: %s'
               '\n - HTML comment is: "%s"'
               '\n - Original exception: "%s"'
               '\n\n'
               'https://github.com/andresriancho/w3af/issues/10849\n')
        args = (self._end_was_called,
                response.get_id(),
                comment,
                nste)
        raise NoSuchTableException(msg % args)

    def _is_new(self, comment, response):
        """
        Make sure that we perform a thread safe check on the self._comments
        dict, in order to avoid duplicates.
        """
        with self._plugin_lock:
            
            #pylint: disable=E1103
            try:
                comment_data = self._comments.get(comment, None)
            except NoSuchTableException, nste:
                self._handle_no_such_table(comment, response, nste)

            response_url = response.get_url()

            if comment_data is None:
                self._comments[comment] = [(response_url, response.id)]
                return True
            else:
                for saved_url, response_id in comment_data:
                    if response_url == saved_url:
                        return False
                else:
                    comment_data.append((response_url, response.id))
                    self._comments[comment] = comment_data
                    return True
示例#11
0
class VariantDB(object):
    """
    See the notes on PARAMS_MAX_VARIANTS and PATH_MAX_VARIANTS above. Also
    understand that we'll keep "dirty" versions of the references/fuzzable
    requests in order to be able to answer "False" to a call for
    need_more_variants in a situation like this:

        need_more_variants('http://foo.com/abc?id=32')      --> True
        append('http://foo.com/abc?id=32')
        need_more_variants('http://foo.com/abc?id=32')      --> False

    """
    HASH_IGNORE_HEADERS = ('referer',)
    TAG = '[variant_db]'

    def __init__(self, params_max_variants=PARAMS_MAX_VARIANTS,
                 path_max_variants=PATH_MAX_VARIANTS):

        self._variants_eq = DiskDict(table_prefix='variant_db_eq')
        self._variants = DiskDict(table_prefix='variant_db')

        self.params_max_variants = params_max_variants
        self.path_max_variants = path_max_variants

        self._db_lock = threading.RLock()

    def cleanup(self):
        self._variants_eq.cleanup()
        self._variants.cleanup()

    def append(self, fuzzable_request):
        """
        :return: True if we added a new fuzzable request variant to the DB,
                 False if no more variants are required for this fuzzable
                 request.
        """
        with self._db_lock:
            #
            # Is the fuzzable request already known to us? (exactly the same)
            #
            request_hash = fuzzable_request.get_request_hash(self.HASH_IGNORE_HEADERS)
            already_seen = self._variants_eq.get(request_hash, False)
            if already_seen:
                return False

            # Store it to avoid duplicated fuzzable requests in our framework
            self._variants_eq[request_hash] = True

            #
            # Do we need more variants for the fuzzable request? (similar match)
            #
            clean_dict_key = clean_fuzzable_request(fuzzable_request)
            count = self._variants.get(clean_dict_key, None)

            if count is None:
                self._variants[clean_dict_key] = 1
                return True

            # We've seen at least one fuzzable request with this pattern...
            url = fuzzable_request.get_uri()
            has_params = url.has_query_string() or fuzzable_request.get_raw_data()

            # Choose which max_variants to use
            if has_params:
                max_variants = self.params_max_variants
            else:
                max_variants = self.path_max_variants

            if count >= max_variants:
                return False

            else:
                self._variants[clean_dict_key] = count + 1
                return True
示例#12
0
class html_comments(GrepPlugin):
    """
    Extract and analyze HTML comments.

    :author: Andres Riancho ([email protected])
    """

    HTML_RE = re.compile('<[a-zA-Z]*.*?>.*?</[a-zA-Z]>')

    INTERESTING_WORDS = (
        # In English
        'user', 'pass', 'xxx', 'fix', 'bug', 'broken', 'oops', 'hack',
        'caution', 'todo', 'note', 'warning', '!!!', '???', 'shit',
        'pass', 'password', 'passwd', 'pwd', 'secret', 'stupid',
        
        # In Spanish
        'tonto', 'porqueria', 'cuidado', 'usuario', u'contraseña',
        'puta', 'email', 'security', 'captcha', 'pinga', 'cojones',
        
        # some in Portuguese
        'banco', 'bradesco', 'itau', 'visa', 'bancoreal', u'transfêrencia',
        u'depósito', u'cartão', u'crédito', 'dados pessoais'
    )

    _multi_in = multi_in([' %s ' % w for w in INTERESTING_WORDS])

    def __init__(self):
        GrepPlugin.__init__(self)

        # Internal variables
        self._comments = DiskDict()
        self._already_reported_interesting = ScalableBloomFilter()

    def grep(self, request, response):
        """
        Plugin entry point, parse those comments!

        :param request: The HTTP request object.
        :param response: The HTTP response object
        :return: None
        """
        if not response.is_text_or_html():
            return
        
        try:
            dp = parser_cache.dpc.get_document_parser_for(response)
        except BaseFrameworkException:
            return
        
        for comment in dp.get_comments():
            # These next two lines fix this issue:
            # audit.ssi + grep.html_comments + web app with XSS = false positive
            if request.sent(comment):
                continue

            if self._is_new(comment, response):

                self._interesting_word(comment, request, response)
                self._html_in_comment(comment, request, response)

    def _interesting_word(self, comment, request, response):
        """
        Find interesting words in HTML comments
        """
        comment = comment.lower()
        for word in self._multi_in.query(comment):
            if (word, response.get_url()) not in self._already_reported_interesting:
                desc = 'A comment with the string "%s" was found in: "%s".'\
                       ' This could be interesting.'
                desc = desc % (word, response.get_url())

                i = Info('Interesting HTML comment', desc,
                         response.id, self.get_name())
                i.set_dc(request.get_dc())
                i.set_uri(response.get_uri())
                i.add_to_highlight(word)
                
                kb.kb.append(self, 'interesting_comments', i)
                om.out.information(i.get_desc())
                
                self._already_reported_interesting.add((word,
                                                        response.get_url()))

    def _html_in_comment(self, comment, request, response):
        """
        Find HTML code in HTML comments
        """
        html_in_comment = self.HTML_RE.search(comment)
        
        if html_in_comment and \
        (comment, response.get_url()) not in self._already_reported_interesting:
            # There is HTML code in the comment.
            comment = comment.strip()
            comment = comment.replace('\n', '')
            comment = comment.replace('\r', '')
            comment = comment[:40]
            desc = 'A comment with the string "%s" was found in: "%s".'\
                   ' This could be interesting.'
            desc = desc % (comment, response.get_url())

            i = Info('HTML comment contains HTML code', desc,
                     response.id, self.get_name())
            i.set_dc(request.get_dc())
            i.set_uri(response.get_uri())
            i.add_to_highlight(html_in_comment.group(0))
            
            kb.kb.append(self, 'html_comment_hides_html', i)
            om.out.information(i.get_desc())
            self._already_reported_interesting.add(
                (comment, response.get_url()))

    def _is_new(self, comment, response):
        """
        Make sure that we perform a thread safe check on the self._comments dict,
        in order to avoid duplicates.
        """
        with self._plugin_lock:
            
            #pylint: disable=E1103
            comment_data = self._comments.get(comment, None)
            
            if comment_data is None:
                self._comments[comment] = [(response.get_url(), response.id), ]
                return True
            else:
                if response.get_url() not in [x[0] for x in comment_data]:
                    comment_data.append((response.get_url(), response.id))
                    self._comments[comment] = comment_data
                    return True
            #pylint: enable=E1103
            
        return False

    def end(self):
        """
        This method is called when the plugin wont be used anymore.
        :return: None
        """
        inform = []
        for comment in self._comments.iterkeys():
            urls_with_this_comment = self._comments[comment]
            stick_comment = ' '.join(comment.split())
            if len(stick_comment) > 40:
                msg = 'A comment with the string "%s..." (and %s more bytes)'\
                      ' was found on these URL(s):'
                om.out.information(
                    msg % (stick_comment[:40], str(len(stick_comment) - 40)))
            else:
                msg = 'A comment containing "%s" was found on these URL(s):'
                om.out.information(msg % (stick_comment))

            for url, request_id in urls_with_this_comment:
                inform.append('- ' + url +
                              ' (request with id: ' + str(request_id) + ')')

            inform.sort()
            for i in inform:
                om.out.information(i)
        
        self._comments.cleanup()

    def get_long_desc(self):
        """
        :return: A DETAILED description of the plugin functions and features.
        """
        return """
示例#13
0
class VariantDB(object):

    def __init__(self, max_variants=DEFAULT_MAX_VARIANTS):
        self._disk_dict = DiskDict(table_prefix='variant_db')
        self._db_lock = threading.RLock()
        self.max_variants = max_variants

    def append(self, reference):
        """
        Called when a new reference is found and we proved that new
        variants are still needed.

        :param reference: The reference (as a URL object) to add. This method
                          will "normalize" it before adding it to the internal
                          shelve.
        """
        clean_reference = self._clean_reference(reference)

        with self._db_lock:
            count = self._disk_dict.get(clean_reference, None)

            if count is not None:
                self._disk_dict[clean_reference] = count + 1
            else:
                self._disk_dict[clean_reference] = 1

    def append_fr(self, fuzzable_request):
        """
        See append()'s documentation
        """
        clean_fuzzable_request = self._clean_fuzzable_request(fuzzable_request)

        with self._db_lock:
            count = self._disk_dict.get(clean_fuzzable_request, None)

            if count is not None:
                self._disk_dict[clean_fuzzable_request] = count + 1
            else:
                self._disk_dict[clean_fuzzable_request] = 1

    def need_more_variants(self, reference):
        """
        :return: True if there are not enough variants associated with
        this reference in the DB.
        """
        clean_reference = self._clean_reference(reference)
        has_qs = reference.has_query_string()

        # I believe this is atomic enough...
        count = self._disk_dict.get(clean_reference, 0)

        # When we're analyzing a path (without QS), we just need 1
        max_variants = self.max_variants if has_qs else 1

        if count >= max_variants:
            return False
        else:
            return True

    def need_more_variants_for_fr(self, fuzzable_request):
        """
        :return: True if there are not enough variants associated with
        this reference in the DB.
        """
        clean_fuzzable_request = self._clean_fuzzable_request(fuzzable_request)

        # I believe this is atomic enough...
        count = self._disk_dict.get(clean_fuzzable_request, 0)

        if count >= self.max_variants:
            return False
        else:
            return True

    def _clean_reference(self, reference):
        """
        This method is VERY dependent on the are_variants method from
        core.data.request.variant_identification , make sure to remember that
        when changing stuff here or there.

        What this method does is to "normalize" any input reference string so
        that they can be compared very simply using string match.

        Since this is a reference (link) we'll prepend '(GET)-' to the result,
        which will help us add support for forms/fuzzable requests with
        '(POST)-' in the future.
        """
        res = '(GET)-'
        res += reference.get_domain_path().url_string.encode(DEFAULT_ENCODING)
        res += reference.get_file_name()

        if reference.has_query_string():
            res += '?' + self._clean_data_container(reference.querystring)

        return res

    def _clean_data_container(self, data_container):
        """
        A simplified/serialized version of the query string
        """
        dc = copy.deepcopy(data_container)

        for key, value, path, setter in dc.iter_setters():

            if value.isdigit():
                setter('number')
            else:
                setter('string')

        return str(dc)

    def _clean_fuzzable_request(self, fuzzable_request):
        """
        Very similar to _clean_reference but we receive a fuzzable request
        instead. The output includes the HTTP method and any parameters which
        might be sent over HTTP post-data in the request are appended to the
        result as query string params.

        :param fuzzable_request: The fuzzable request instance to clean
        :return: See _clean_reference
        """
        res = '(%s)-' % fuzzable_request.get_method().upper()

        uri = fuzzable_request.get_uri()
        res += uri.get_domain_path() + uri.get_file_name()

        if uri.has_query_string():
            res += '?' + self._clean_data_container(uri.querystring)

        if fuzzable_request.get_raw_data():
            res += '!' + self._clean_data_container(fuzzable_request.get_raw_data())

        return res
示例#14
0
class VariantDB(object):
    def __init__(self, max_variants=DEFAULT_MAX_VARIANTS):
        self._disk_dict = DiskDict(table_prefix='variant_db')
        self._db_lock = threading.RLock()
        self.max_variants = max_variants

    def append(self, reference):
        """
        Called when a new reference is found and we proved that new
        variants are still needed.

        :param reference: The reference (as a URL object) to add. This method
                          will "normalize" it before adding it to the internal
                          shelve.
        """
        clean_reference = self._clean_reference(reference)

        with self._db_lock:
            count = self._disk_dict.get(clean_reference, None)

            if count is not None:
                self._disk_dict[clean_reference] = count + 1
            else:
                self._disk_dict[clean_reference] = 1

    def need_more_variants(self, reference):
        """
        :return: True if there are not enough variants associated with
        this reference in the DB.
        """
        clean_reference = self._clean_reference(reference)
        has_qs = reference.has_query_string()

        # I believe this is atomic enough...
        count = self._disk_dict.get(clean_reference, 0)

        # When we're analyzing a path (without QS), we just need 1
        max_variants = self.max_variants if has_qs else 1

        if count >= max_variants:
            return False
        else:
            return True

    def _clean_reference(self, reference):
        """
        This method is VERY dependent on the are_variants method from
        core.data.request.variant_identification , make sure to remember that
        when changing stuff here or there.

        What this method does is to "normalize" any input reference string so
        that they can be compared very simply using string match.

        """
        res = reference.get_domain_path() + reference.get_file_name()

        if reference.has_query_string():

            res += '?'
            qs = copy.deepcopy(reference.querystring)

            for key, value, path, setter in qs.iter_setters():

                if value.isdigit():
                    setter('number')
                else:
                    setter('string')

            res += str(qs)

        return res