Python relative_distance示例，w3af.core.controllers.misc.fuzzy_string_cmp.relative_distance Python示例

示例#1

0

显示文件

    def _analyze_responses(self, orig_resp, limit_response, error_response,
                           mutant):
        """
        Analyze responses; if error_response doesn't look like orig_resp nor
        limit_response, then we have a vuln.

        :return: None
        """
        original_to_error = relative_distance(orig_resp.get_body(),
                                              error_response.get_body())
        limit_to_error = relative_distance(limit_response.get_body(),
                                           error_response.get_body())
        original_to_limit = relative_distance(limit_response.get_body(),
                                              orig_resp.get_body())

        ratio = self._diff_ratio + (1 - original_to_limit)

        if original_to_error < ratio and limit_to_error < ratio:
            # Maybe the limit I requested wasn't really a non-existent one
            # (and the error page really found the limit),
            # let's request a new limit (one that hopefully doesn't exist)
            # in order to remove some false positives
            limit_response2 = self._get_limit_response(mutant)

            id_list = [orig_resp.id, limit_response.id, error_response.id]

            if relative_distance(
                    limit_response2.get_body(),
                    limit_response.get_body()) > 1 - self._diff_ratio:
                # The two limits are "equal"; It's safe to suppose that we have
                # found the limit here and that the error string really produced
                # an error
                self._potential_vulns.append(
                    (mutant.get_url(), mutant.get_token_name(), mutant,
                     id_list))

示例#2

0

显示文件

文件： generic.py 项目： zcr214/w3af

    def _analyze_body(self, orig_resp, limit_response, error_response, mutant):
        """
        :return: True if we found a bug by comparing the response bodies
        """
        original_to_error = relative_distance(orig_resp.get_body(),
                                              error_response.get_body())
        limit_to_error = relative_distance(limit_response.get_body(),
                                           error_response.get_body())
        original_to_limit = relative_distance(limit_response.get_body(),
                                              orig_resp.get_body())

        ratio = self._diff_ratio + (1 - original_to_limit)

        if original_to_error < ratio and limit_to_error < ratio:
            # Maybe the limit I requested wasn't really a non-existent one
            # (and the error page really found the limit),
            # let's request a new limit (one that hopefully doesn't exist)
            # in order to remove some false positives
            limit_response_2 = self._get_limit_response(mutant)
            limit_to_limit = relative_distance(limit_response_2.get_body(),
                                               limit_response.get_body())

            if limit_to_limit > 1 - self._diff_ratio:
                # The two limits are "equal"; It's safe to suppose that we have
                # found the limit here and that the error string really produced
                # an error
                id_list = [orig_resp.id, limit_response.id, error_response.id]
                self._add_potential_vuln(mutant, id_list)

示例#3

0

显示文件

文件： generic.py 项目： foobarmonk/w3af

    def _analyze_body(self, orig_resp, limit_response, error_response, mutant):
        """
        :return: True if we found a bug by comparing the response bodies
        """
        original_to_error = relative_distance(orig_resp.get_body(),
                                              error_response.get_body())
        limit_to_error = relative_distance(limit_response.get_body(),
                                           error_response.get_body())
        original_to_limit = relative_distance(limit_response.get_body(),
                                              orig_resp.get_body())

        ratio = self._diff_ratio + (1 - original_to_limit)

        if original_to_error < ratio and limit_to_error < ratio:
            # Maybe the limit I requested wasn't really a non-existent one
            # (and the error page really found the limit),
            # let's request a new limit (one that hopefully doesn't exist)
            # in order to remove some false positives
            limit_response_2 = self._get_limit_response(mutant)
            limit_to_limit = relative_distance(limit_response_2.get_body(),
                                               limit_response.get_body())

            if limit_to_limit > 1 - self._diff_ratio:
                # The two limits are "equal"; It's safe to suppose that we have
                # found the limit here and that the error string really produced
                # an error
                id_list = [orig_resp.id, limit_response.id, error_response.id]
                self._add_potential_vuln(mutant, id_list)

示例#4

0

显示文件

文件： generic.py 项目： 0x554simon/w3af

    def _analyze_responses(self, orig_resp, limit_response, error_response, mutant):
        """
        Analyze responses; if error_response doesn't look like orig_resp nor
        limit_response, then we have a vuln.

        :return: None
        """
        original_to_error = relative_distance(orig_resp.get_body(),
                                              error_response.get_body())
        limit_to_error = relative_distance(limit_response.get_body(),
                                           error_response.get_body())
        original_to_limit = relative_distance(limit_response.get_body(),
                                              orig_resp.get_body())

        ratio = self._diff_ratio + (1 - original_to_limit)

        if original_to_error < ratio and limit_to_error < ratio:
            # Maybe the limit I requested wasn't really a non-existent one
            # (and the error page really found the limit),
            # let's request a new limit (one that hopefully doesn't exist)
            # in order to remove some false positives
            limit_response2 = self._get_limit_response(mutant)

            id_list = [orig_resp.id, limit_response.id, error_response.id]

            if relative_distance(limit_response2.get_body(),
                                 limit_response.get_body()) > 1 - self._diff_ratio:
                # The two limits are "equal"; It's safe to suppose that we have
                # found the limit here and that the error string really produced
                # an error
                self._potential_vulns.append((mutant.get_url(),
                                              mutant.get_token_name(),
                                              mutant, id_list))

示例#5

0

显示文件

    def test_all(self):
        acceptance_tests = []
        acceptance_tests.append(('a', 'a', 1.0))
        acceptance_tests.append(('a', 'a', 0.1))
        acceptance_tests.append(('a', 'a', 0.0))

        acceptance_tests.append(('a', 'b', 1.0))
        acceptance_tests.append(('a', 'b', 0.1))
        acceptance_tests.append(('a', 'b', 0.0))

        acceptance_tests.append(('a', 'ab', 1.0))
        acceptance_tests.append(('a', 'ab', 0.1))

        acceptance_tests.append(('a', 'b', 0.0000000000000000001))
        acceptance_tests.append(('a', 'b' * 100, 1.0))

        acceptance_tests.append(('a', 'ab', 0.66666666666))
        acceptance_tests.append(('a', 'aab', 0.5))
        acceptance_tests.append(('a', 'aaab', 0.4))
        acceptance_tests.append(
            ('a', 'aaaab',
             0.33333333333333333333333333333333333333333333333333333333))

        acceptance_tests.append(('a' * 25, 'a', 1.0))
        acceptance_tests.append(('aaa', 'aa', 1.0))
        acceptance_tests.append(('a', 'a', 1.0))

        acceptance_tests.append(('a' * 25, 'a', 0.076923076923076927))
        acceptance_tests.append(('aaa', 'aa', 0.8))

        acceptance_tests.append(('a', 'a', 0.0))

        for e, d, f in acceptance_tests:
            res1 = relative_distance_boolean(e, d, f)
            res2 = relative_distance(e, d) >= f

            msg = ('relative_distance_boolean and relative_distance returned'
                   ' different results for the same parameters:\n'
                   '    - Parameter #1: %s\n'
                   '    - Parameter #2: %s\n'
                   '    - Threshold: %s\n'
                   '    - Result relative_distance_boolean: %s\n'
                   '    - Result relative_distance: %s\n')

            self.assertEqual(res1, res2,
                             msg % (e, d, f, res1, relative_distance(e, d)))

示例#6

0

显示文件

    def _relative_distance(self, a, b):
        """
        Calculates the distance between two responses based on the levenshtein
        distance

        :return: The distance
        """
        return 1 - relative_distance(a.get_body(), b.get_body())

示例#7

0

显示文件

文件： clusterGraph.py 项目： 0x554simon/w3af

    def _relative_distance(self, a, b):
        """
        Calculates the distance between two responses based on the levenshtein
        distance

        :return: The distance
        """
        return 1 - relative_distance(a.get_body(), b.get_body())

示例#8

0

显示文件

文件： test_fuzzy_string_cmp.py 项目： andresriancho/w3af

    def test_all(self):
        acceptance_tests = []
        acceptance_tests.append(('a', 'a', 1.0))
        acceptance_tests.append(('a', 'a', 0.1))
        acceptance_tests.append(('a', 'a', 0.0))

        acceptance_tests.append(('a', 'b', 1.0))
        acceptance_tests.append(('a', 'b', 0.1))
        acceptance_tests.append(('a', 'b', 0.0))

        acceptance_tests.append(('a', 'ab', 1.0))
        acceptance_tests.append(('a', 'ab', 0.1))

        acceptance_tests.append(('a', 'b', 0.0000000000000000001))
        acceptance_tests.append(('a', 'b' * 100, 1.0))

        acceptance_tests.append(('a', 'ab', 0.66666666666))
        acceptance_tests.append(('a', 'aab', 0.5))
        acceptance_tests.append(('a', 'aaab', 0.4))
        acceptance_tests.append(('a', 'aaaab', 0.33333333333333333333333333333333333333333333333333333333))

        acceptance_tests.append(('a' * 25, 'a', 1.0))
        acceptance_tests.append(('aaa', 'aa', 1.0))
        acceptance_tests.append(('a', 'a', 1.0))

        acceptance_tests.append(('a' * 25, 'a', 0.076923076923076927))
        acceptance_tests.append(('aaa', 'aa', 0.8))

        acceptance_tests.append(('a', 'a', 0.0))

        for e, d, f in acceptance_tests:
            res1 = fuzzy_equal(e, d, f)
            res2 = relative_distance(e, d) >= f
            
            msg = ('fuzzy_equal and relative_distance returned'
                   ' different results for the same parameters:\n'
                   '    - Parameter #1: %s\n'
                   '    - Parameter #2: %s\n'
                   '    - Threshold: %s\n'
                   '    - Result fuzzy_equal: %s\n'
                   '    - Result relative_distance: %s\n')
            
            self.assertEqual(res1, res2, msg % (e, d, f, res1, relative_distance(e, d)))

示例#9

0

显示文件

    def test_relative_distance(self):
        acceptance_tests = [('a', 'a', 1.0), ('ab ac ad', 'ab ae ad', 0.6),
                            ('ab ac ae', 'ab af ad', 0.3),
                            ('ab ac ad', 'aa ae af', 0.0), ('a', 'b', 0.0),
                            ('a<a"a<a', 'a<a"a<b', 0.75),
                            ('a' * 25, 'a', 0.00)]

        for e, d, f in acceptance_tests:
            res = relative_distance(e, d)
            msg = "return value: %f, expected value: %f" % (res, f)
            self.assertTrue(res >= f, msg)

示例#10

0

显示文件

文件： test_fuzzy_string_cmp.py 项目： andresriancho/w3af

    def test_relative_distance(self):
        acceptance_tests = [('a', 'a', 1.0),
                            ('ab\nac\nad', 'ab\nae\nad', 0.6),
                            ('ab\nac\nae', 'ab\naf\nad', 0.3),
                            ('ab\nac\nad', 'aa\nae\naf', 0.0),
                            ('a', 'b', 0.0),
                            ('a<a"a<a', 'a<a"a<b', 0.75),
                            ('a' * 25, 'a', 0.00)]

        for e, d, f in acceptance_tests:
            res = relative_distance(e, d)
            msg = "return value: %f, expected value: %f" % (res, f)
            self.assertTrue(res >= f, msg)

示例#11

0

显示文件

 def test_relative_distance(self):
     acceptance_tests = []
     acceptance_tests.append(('a', 'a', 1.0))
     acceptance_tests.append(('ab ac ad', 'ab ae ad', 0.6))
     acceptance_tests.append(('ab ac ae', 'ab af ad', 0.3))
     acceptance_tests.append(('ab ac ad', 'aa ae af', 0.0))
     acceptance_tests.append(('a', 'b', 0.0))
     acceptance_tests.append(('aaaa', 'aaab', 0.75))
     acceptance_tests.append(('a' * 25, 'a', 0.04))
     for e, d, f in acceptance_tests:
         res = relative_distance(e, d)
         msg = "return value:%f, given value:%f" % (res, f)
         self.assertTrue(res >= f, msg)

示例#12

0

显示文件

文件： test_fuzzy_string_cmp.py 项目： 0x554simon/w3af

 def test_relative_distance(self):
     acceptance_tests = []
     acceptance_tests.append(('a', 'a', 1.0))
     acceptance_tests.append(('ab ac ad', 'ab ae ad', 0.6))
     acceptance_tests.append(('ab ac ae', 'ab af ad', 0.3))
     acceptance_tests.append(('ab ac ad', 'aa ae af', 0.0))
     acceptance_tests.append(('a', 'b', 0.0))
     acceptance_tests.append(('aaaa', 'aaab', 0.75))
     acceptance_tests.append(('a' * 25, 'a', 0.04))
     for e, d, f in acceptance_tests:
         res = relative_distance(e, d)
         msg = "return value:%f, given value:%f" % (res, f)
         self.assertTrue(res >= f, msg)

示例#13

0

显示文件

文件： test_fuzzy_string_cmp.py 项目： 3rdDegree/w3af

    def test_all(self):
        acceptance_tests = []
        acceptance_tests.append(('a', 'a', 1.0))
        acceptance_tests.append(('a', 'a', 0.1))
        acceptance_tests.append(('a', 'a', 0.0))

        acceptance_tests.append(('a', 'b', 1.0))
        acceptance_tests.append(('a', 'b', 0.1))
        acceptance_tests.append(('a', 'b', 0.0))

        acceptance_tests.append(('a', 'ab', 1.0))
        acceptance_tests.append(('a', 'ab', 0.1))

        acceptance_tests.append(('a', 'b', 0.0000000000000000001))
        acceptance_tests.append(('a', 'b' * 100, 1.0))

        acceptance_tests.append(('a', 'ab', 0.66666666666))
        acceptance_tests.append(('a', 'aab', 0.5))
        acceptance_tests.append(('a', 'aaab', 0.4))
        acceptance_tests.append(('a', 'aaaab', 0.33333333333333333333333333333333333333333333333333333333))

        acceptance_tests.append(('a' * 25, 'a', 1.0))
        acceptance_tests.append(('aaa', 'aa', 1.0))
        acceptance_tests.append(('a', 'a', 1.0))

        acceptance_tests.append(('a' * 25, 'a', 0.076923076923076927))
        acceptance_tests.append(('aaa', 'aa', 0.8))

        acceptance_tests.append(('a', 'a', 0.0))

        for e, d, f in acceptance_tests:
            res1 = relative_distance_boolean(e, d, f)
            res2 = relative_distance(e, d) >= f
            
            msg = 'relative_distance_boolean and relative_distance returned'\
                  ' different results for the same parameters:\n'\
                  '    - %s\n'\
                  '    - %s\n'\
                  '    - Threshold: %s\n'\
            
            self.assertEqual(res1, res2, msg % (e, d, f))

示例#14

0

显示文件

文件： fingerprint_404.py 项目： 0x554simon/w3af

    def is_404(self, http_response):
        """
        All of my previous versions of is_404 were very complex and tried to
        struggle with all possible cases. The truth is that in most "strange"
        cases I was failing miserably, so now I changed my 404 detection once
        again, but keeping it as simple as possible.

        Also, and because I was trying to cover ALL CASES, I was performing a
        lot of requests in order to cover them, which in most situations was
        unnecessary.

        So now I go for a much simple approach:
            1- Cover the simplest case of all using only 1 HTTP request
            2- Give the users the power to configure the 404 detection by
               setting a string that identifies the 404 response (in case we
               are missing it for some reason in case #1)

        :param http_response: The HTTP response which we want to know if it
                                  is a 404 or not.
        """
        #
        #   First we handle the user configured exceptions:
        #
        domain_path = http_response.get_url().get_domain_path()
        if domain_path in cf.cf.get('always_404'):
            return True
        elif domain_path in cf.cf.get('never_404'):
            return False

        #
        #    The user configured setting. "If this string is in the response,
        #    then it is a 404"
        #
        if cf.cf.get('string_match_404') and cf.cf.get('string_match_404') in http_response:
            return True

        #
        #   This is the most simple case, we don't even have to think about this
        #
        #   If there is some custom website that always returns 404 codes, then
        #   we are screwed, but this is open source, and the pentester working
        #   on that site can modify these lines.
        #
        if http_response.get_code() == 404:
            return True

        #
        #    Simple, if the file we requested is in a directory that's known to
        #    return 404 codes for files that do not exist, AND this is NOT a 404
        #    then we're return False!
        #
        if domain_path in self._directory_uses_404_codes and \
        http_response.get_code() != 404:
            return False

        #
        #   Lets start with the rather complex code...
        #
        with self._lock:
            if not self._already_analyzed:
                self.generate_404_knowledge(http_response.get_url())
                self._already_analyzed = True

        # 404_body was already cleaned inside generate_404_knowledge
        # so we need to clean this one in order to have a fair comparison
        resp_body = get_clean_body(http_response)
        resp_content_type = http_response.doc_type
        resp_path = http_response.get_url().get_domain_path().url_string

        # See https://github.com/andresriancho/w3af/issues/6646
        max_similarity_with_404 = 0.0
        resp_path_in_db = False

        with self._lock:
            #
            #   Compare this response to all the 404's I have in my DB
            #
            for resp_404 in self._404_responses:

                # Since the fuzzy_equal function is CPU-intensive we want to
                # avoid calling it for cases where we know it won't match, for
                # example in comparing an image and an html
                if resp_content_type != resp_404.doc_type:
                    continue

                if fuzzy_equal(resp_404.body, resp_body, IS_EQUAL_RATIO):
                    msg = '"%s" (id:%s) is a 404 [similarity_index > %s]'
                    fmt = (http_response.get_url(),
                           http_response.id,
                           IS_EQUAL_RATIO)
                    om.out.debug(msg % fmt)
                    return True
                else:
                    # I could calculate this before and avoid the call to
                    # fuzzy_equal, but I believe it's going to be faster this
                    # way
                    current_ratio = relative_distance(resp_404.body, resp_body)
                    max_similarity_with_404 = max(max_similarity_with_404,
                                                  current_ratio)

                # Track if the response path is in the DB
                if not resp_path_in_db and resp_path == resp_404.path:
                    resp_path_in_db = True

            #
            # I get here when the for ends and no body_404_db matched with
            # the resp_body that was sent as a parameter by the user. This
            # means one of two things:
            #     * There is not enough knowledge in self._404_responses, or
            #     * The answer is NOT a 404.
            #
            # Because we want to reduce the amount of "false positives" that
            # this method returns, we'll perform some extra checks before
            # saying that this is NOT a 404.
            #
            if resp_path_in_db and max_similarity_with_404 < MUST_VERIFY_RATIO:
                msg = ('"%s" (id:%s) is NOT a 404 [similarity_index < %s'
                       ' with sample path in 404 DB].')
                args = (http_response.get_url(),
                        http_response.id,
                        MUST_VERIFY_RATIO)
                om.out.debug(msg % args)
                return False

            if self._is_404_with_extra_request(http_response, resp_body):
                #
                #   Aha! It actually was a 404!
                #
                four_oh_data = FourOhFourResponseFactory(http_response)
                self._404_responses.append(four_oh_data)

                msg = ('"%s" (id:%s) is a 404 (similarity_index > %s).'
                       ' Adding new knowledge to the 404_responses database'
                       ' (length=%s).')
                fmt = (http_response.get_url(), http_response.id,
                       IS_EQUAL_RATIO, len(self._404_responses))
                om.out.debug(msg % fmt)
                return True

            msg = '"%s" (id:%s) is NOT a 404 [similarity_index < %s].'
            args = (http_response.get_url(), http_response.id, IS_EQUAL_RATIO)
            om.out.debug(msg % args)

            return False

示例#15

0

显示文件

文件： fingerprint_404.py 项目： illintentions92/w3af-kali

    def is_404(self, http_response):
        """
        All of my previous versions of is_404 were very complex and tried to
        struggle with all possible cases. The truth is that in most "strange"
        cases I was failing miserably, so now I changed my 404 detection once
        again, but keeping it as simple as possible.

        Also, and because I was trying to cover ALL CASES, I was performing a
        lot of requests in order to cover them, which in most situations was
        unnecessary.

        So now I go for a much simple approach:
            1- Cover the simplest case of all using only 1 HTTP request
            2- Give the users the power to configure the 404 detection by
               setting a string that identifies the 404 response (in case we
               are missing it for some reason in case #1)

        :param http_response: The HTTP response which we want to know if it
                                  is a 404 or not.
        """
        #
        #   First we handle the user configured exceptions:
        #
        domain_path = http_response.get_url().get_domain_path()
        if domain_path in cf.cf.get('always_404'):
            return True
        elif domain_path in cf.cf.get('never_404'):
            return False

        #
        #    The user configured setting. "If this string is in the response,
        #    then it is a 404"
        #
        if cf.cf.get('string_match_404') and cf.cf.get(
                'string_match_404') in http_response:
            return True

        #
        #   This is the most simple case, we don't even have to think about this
        #
        #   If there is some custom website that always returns 404 codes, then
        #   we are screwed, but this is open source, and the pentester working
        #   on that site can modify these lines.
        #
        if http_response.get_code() == 404:
            return True

        #
        #    Simple, if the file we requested is in a directory that's known to
        #    return 404 codes for files that do not exist, AND this is NOT a 404
        #    then we're return False!
        #
        if domain_path in self._directory_uses_404_codes and \
        http_response.get_code() != 404:
            return False

        #
        #   Lets start with the rather complex code...
        #
        with self._lock:
            if not self._already_analyzed:
                self.generate_404_knowledge(http_response.get_url())
                self._already_analyzed = True

        # 404_body was already cleaned inside generate_404_knowledge
        # so we need to clean this one in order to have a fair comparison
        resp_body = get_clean_body(http_response)
        resp_content_type = http_response.doc_type
        resp_path = http_response.get_url().get_domain_path().url_string

        # See https://github.com/andresriancho/w3af/issues/6646
        max_similarity_with_404 = 0.0
        resp_path_in_db = False

        with self._lock:
            #
            #   Compare this response to all the 404's I have in my DB
            #
            for resp_404 in self._404_responses:

                # Since the fuzzy_equal function is CPU-intensive we want to
                # avoid calling it for cases where we know it won't match, for
                # example in comparing an image and an html
                if resp_content_type != resp_404.doc_type:
                    continue

                if fuzzy_equal(resp_404.body, resp_body, IS_EQUAL_RATIO):
                    msg = '"%s" (id:%s) is a 404 [similarity_index > %s]'
                    fmt = (http_response.get_url(), http_response.id,
                           IS_EQUAL_RATIO)
                    om.out.debug(msg % fmt)
                    return True
                else:
                    # I could calculate this before and avoid the call to
                    # fuzzy_equal, but I believe it's going to be faster this
                    # way
                    current_ratio = relative_distance(resp_404.body, resp_body)
                    max_similarity_with_404 = max(max_similarity_with_404,
                                                  current_ratio)

                # Track if the response path is in the DB
                if not resp_path_in_db and resp_path == resp_404.path:
                    resp_path_in_db = True

            #
            # I get here when the for ends and no body_404_db matched with
            # the resp_body that was sent as a parameter by the user. This
            # means one of two things:
            #     * There is not enough knowledge in self._404_responses, or
            #     * The answer is NOT a 404.
            #
            # Because we want to reduce the amount of "false positives" that
            # this method returns, we'll perform some extra checks before
            # saying that this is NOT a 404.
            #
            if resp_path_in_db and max_similarity_with_404 < MUST_VERIFY_RATIO:
                msg = ('"%s" (id:%s) is NOT a 404 [similarity_index < %s'
                       ' with sample path in 404 DB].')
                args = (http_response.get_url(), http_response.id,
                        MUST_VERIFY_RATIO)
                om.out.debug(msg % args)
                return False

            if self._is_404_with_extra_request(http_response, resp_body):
                #
                #   Aha! It actually was a 404!
                #
                four_oh_data = FourOhFourResponseFactory(http_response)
                self._404_responses.append(four_oh_data)

                msg = ('"%s" (id:%s) is a 404 (similarity_index > %s).'
                       ' Adding new knowledge to the 404_responses database'
                       ' (length=%s).')
                fmt = (http_response.get_url(), http_response.id,
                       IS_EQUAL_RATIO, len(self._404_responses))
                om.out.debug(msg % fmt)
                return True

            msg = '"%s" (id:%s) is NOT a 404 [similarity_index < %s].'
            args = (http_response.get_url(), http_response.id, IS_EQUAL_RATIO)
            om.out.debug(msg % args)

            return False

示例#16

0

显示文件

文件： fingerprint_404.py 项目： yvonneKim/w3af

    def is_404(self, http_response):
        """
        All of my previous versions of is_404 were very complex and tried to
        struggle with all possible cases. The truth is that in most "strange"
        cases I was failing miserably, so now I changed my 404 detection once
        again, but keeping it as simple as possible.

        Also, and because I was trying to cover ALL CASES, I was performing a
        lot of requests in order to cover them, which in most situations was
        unnecessary.

        So now I go for a much simple approach:
            1- Cover the simplest case of all using only 1 HTTP request
            2- Give the users the power to configure the 404 detection by
               setting a string that identifies the 404 response (in case we
               are missing it for some reason in case #1)

        :param http_response: The HTTP response which we want to know if it
                                  is a 404 or not.
        """
        domain_path = http_response.get_url().get_domain_path()
        extension = http_response.get_url().get_extension()

        #
        #   First we handle the user configured exceptions:
        #
        if domain_path in cf.cf.get('always_404'):
            return True

        if domain_path in cf.cf.get('never_404'):
            return False

        #
        #    The user configured setting. "If this string is in the response,
        #    then it is a 404"
        #
        if cf.cf.get('string_match_404') and cf.cf.get(
                'string_match_404') in http_response:
            return True

        #
        #   This is the most simple case, we don't even have to think about this
        #
        #   If there is some custom website that always returns 404 codes, then
        #   we are screwed, but this is open source, and the pentester working
        #   on that site can modify these lines.
        #
        if http_response.get_code() == 404:
            return True

        #
        #   This is an edge case. Let me explain...
        #
        #   Doing try/except in all plugins that send HTTP requests was hard (tm)
        #   so plugins don't use ExtendedUrllib directly, instead they use the
        #   UrlOpenerProxy (defined in plugin.py). This proxy catches any
        #   exceptions and returns a 204 response.
        #
        #   In most cases that works perfectly, because it will allow the plugin
        #   to keep working without caring much about the exceptions. In some
        #   edge cases someone will call is_404(204_response_generated_by_w3af)
        #   and that will most likely return False, because the 204 response we
        #   generate doesn't look like anything w3af has in the 404 DB.
        #
        #   The following iff fixes the race condition
        #
        if http_response.get_code() == 204:
            if http_response.get_msg() == NO_CONTENT_MSG:
                if http_response.get_headers() == Headers():
                    return True

        #
        #   Lets start with the rather complex code...
        #
        with self._lock:
            if not self._already_analyzed:
                self.generate_404_knowledge(http_response.get_url())
                self._already_analyzed = True

        #
        #    Simple, if the file we requested is in a directory that's known to
        #    return 404 codes for files that do not exist, AND this is NOT a 404
        #    then we're return False!
        #
        path_extension = (domain_path, extension)
        if path_extension in self._directory_uses_404_codes:
            if http_response.get_code() != 404:
                return False

        # 404_body stored in the DB was already cleaned inside
        # generate_404_knowledge
        #
        # We need to clean the body we receive as parameter in order to have
        # a fair comparison
        resp_body = get_clean_body(http_response)
        resp_content_type = http_response.doc_type
        resp_path = http_response.get_url().get_domain_path().url_string

        # See https://github.com/andresriancho/w3af/issues/6646
        max_similarity_with_404 = 0.0
        resp_path_in_db = False
        debugging_id = rand_alnum(8)

        #
        #   Compare this response to all the 404's I have in my DB
        #
        for resp_404 in self.get_404_responses():

            # Since the fuzzy_equal function is CPU-intensive we want to
            # avoid calling it for cases where we know it won't match, for
            # example in comparing an image and an html
            if resp_content_type != resp_404.doc_type:
                continue

            is_fuzzy_equal, distance = fuzzy_equal_return_distance(
                resp_404.body, resp_body, IS_EQUAL_RATIO)

            if is_fuzzy_equal:
                msg = ('"%s" (id:%s, code:%s, len:%s, did:%s) is a 404'
                       ' [similarity_index > %s with 404 DB entry with ID %s]')
                args = (http_response.get_url(), http_response.id,
                        http_response.get_code(),
                        len(http_response.get_body()), debugging_id,
                        IS_EQUAL_RATIO, resp_404.id)
                om.out.debug(msg % args)
                return True

            if distance is None:
                distance = 0.0

                # In some cases the distance is None, because the
                # fuzzy_equal didn't have to calculate it to produce the result
                # (because of the optimizations)
                #
                # Also, we can calculate the upper_bound_similarity which
                # indicates how much (in the best case) two strings can look
                # alike based on their lengths
                #
                # This allows us to calculate the distance between two strings
                # only if we know that the distance could be large enough
                ups = upper_bound_similarity(len(resp_404.body),
                                             len(resp_body))

                if ups > max_similarity_with_404:
                    distance = relative_distance(resp_404.body, resp_body)

            max_similarity_with_404 = max(max_similarity_with_404, distance)

            # Track if the response path is in the DB
            if not resp_path_in_db and resp_path == resp_404.path:
                resp_path_in_db = True

        #
        # I get here when the for ends and no 404 body matched with
        # the resp_body that was sent as a parameter. This means one of two things:
        #
        #     * There is not enough knowledge in get_404_responses(), or
        #
        #     * The answer is NOT a 404.
        #
        # Because we want to reduce the amount of false positives that
        # this method returns, we'll perform some extra checks before
        # saying that this is NOT a 404.
        #
        if resp_path_in_db and max_similarity_with_404 < MUST_VERIFY_RATIO:
            msg = ('"%s" (id:%s, code:%s, len:%s, did:%s) is NOT a 404'
                   ' [similarity_index < %s with sample path in 404 DB]')
            args = (http_response.get_url(), http_response.id,
                    http_response.get_code(), len(http_response.get_body()),
                    debugging_id, MUST_VERIFY_RATIO)
            om.out.debug(msg % args)
            return False

        if self._is_404_with_extra_request(http_response, resp_body,
                                           debugging_id):
            #
            #   Aha! It is a 404!
            #
            msg = ('"%s" (id:%s, code:%s, len:%s, did:%s) is a 404'
                   ' [similarity_index > %s with extra request]')
            args = (http_response.get_url(), http_response.id,
                    http_response.get_code(), len(http_response.get_body()),
                    debugging_id, IS_EQUAL_RATIO)
            om.out.debug(msg % args)
            return True

        msg = '"%s" (id:%s, code:%s, len:%s, did:%s) is NOT a 404 [default to False]'
        args = (http_response.get_url(), http_response.id,
                http_response.get_code(), len(http_response.get_body()),
                debugging_id)
        om.out.debug(msg % args)

        return False