def _analyze_responses(self, orig_resp, limit_response, error_response, mutant): """ Analyze responses; if error_response doesn't look like orig_resp nor limit_response, then we have a vuln. :return: None """ original_to_error = relative_distance(orig_resp.get_body(), error_response.get_body()) limit_to_error = relative_distance(limit_response.get_body(), error_response.get_body()) original_to_limit = relative_distance(limit_response.get_body(), orig_resp.get_body()) ratio = self._diff_ratio + (1 - original_to_limit) if original_to_error < ratio and limit_to_error < ratio: # Maybe the limit I requested wasn't really a non-existent one # (and the error page really found the limit), # let's request a new limit (one that hopefully doesn't exist) # in order to remove some false positives limit_response2 = self._get_limit_response(mutant) id_list = [orig_resp.id, limit_response.id, error_response.id] if relative_distance( limit_response2.get_body(), limit_response.get_body()) > 1 - self._diff_ratio: # The two limits are "equal"; It's safe to suppose that we have # found the limit here and that the error string really produced # an error self._potential_vulns.append( (mutant.get_url(), mutant.get_token_name(), mutant, id_list))
def _analyze_body(self, orig_resp, limit_response, error_response, mutant): """ :return: True if we found a bug by comparing the response bodies """ original_to_error = relative_distance(orig_resp.get_body(), error_response.get_body()) limit_to_error = relative_distance(limit_response.get_body(), error_response.get_body()) original_to_limit = relative_distance(limit_response.get_body(), orig_resp.get_body()) ratio = self._diff_ratio + (1 - original_to_limit) if original_to_error < ratio and limit_to_error < ratio: # Maybe the limit I requested wasn't really a non-existent one # (and the error page really found the limit), # let's request a new limit (one that hopefully doesn't exist) # in order to remove some false positives limit_response_2 = self._get_limit_response(mutant) limit_to_limit = relative_distance(limit_response_2.get_body(), limit_response.get_body()) if limit_to_limit > 1 - self._diff_ratio: # The two limits are "equal"; It's safe to suppose that we have # found the limit here and that the error string really produced # an error id_list = [orig_resp.id, limit_response.id, error_response.id] self._add_potential_vuln(mutant, id_list)
def _analyze_responses(self, orig_resp, limit_response, error_response, mutant): """ Analyze responses; if error_response doesn't look like orig_resp nor limit_response, then we have a vuln. :return: None """ original_to_error = relative_distance(orig_resp.get_body(), error_response.get_body()) limit_to_error = relative_distance(limit_response.get_body(), error_response.get_body()) original_to_limit = relative_distance(limit_response.get_body(), orig_resp.get_body()) ratio = self._diff_ratio + (1 - original_to_limit) if original_to_error < ratio and limit_to_error < ratio: # Maybe the limit I requested wasn't really a non-existent one # (and the error page really found the limit), # let's request a new limit (one that hopefully doesn't exist) # in order to remove some false positives limit_response2 = self._get_limit_response(mutant) id_list = [orig_resp.id, limit_response.id, error_response.id] if relative_distance(limit_response2.get_body(), limit_response.get_body()) > 1 - self._diff_ratio: # The two limits are "equal"; It's safe to suppose that we have # found the limit here and that the error string really produced # an error self._potential_vulns.append((mutant.get_url(), mutant.get_token_name(), mutant, id_list))
def test_all(self): acceptance_tests = [] acceptance_tests.append(('a', 'a', 1.0)) acceptance_tests.append(('a', 'a', 0.1)) acceptance_tests.append(('a', 'a', 0.0)) acceptance_tests.append(('a', 'b', 1.0)) acceptance_tests.append(('a', 'b', 0.1)) acceptance_tests.append(('a', 'b', 0.0)) acceptance_tests.append(('a', 'ab', 1.0)) acceptance_tests.append(('a', 'ab', 0.1)) acceptance_tests.append(('a', 'b', 0.0000000000000000001)) acceptance_tests.append(('a', 'b' * 100, 1.0)) acceptance_tests.append(('a', 'ab', 0.66666666666)) acceptance_tests.append(('a', 'aab', 0.5)) acceptance_tests.append(('a', 'aaab', 0.4)) acceptance_tests.append( ('a', 'aaaab', 0.33333333333333333333333333333333333333333333333333333333)) acceptance_tests.append(('a' * 25, 'a', 1.0)) acceptance_tests.append(('aaa', 'aa', 1.0)) acceptance_tests.append(('a', 'a', 1.0)) acceptance_tests.append(('a' * 25, 'a', 0.076923076923076927)) acceptance_tests.append(('aaa', 'aa', 0.8)) acceptance_tests.append(('a', 'a', 0.0)) for e, d, f in acceptance_tests: res1 = relative_distance_boolean(e, d, f) res2 = relative_distance(e, d) >= f msg = ('relative_distance_boolean and relative_distance returned' ' different results for the same parameters:\n' ' - Parameter #1: %s\n' ' - Parameter #2: %s\n' ' - Threshold: %s\n' ' - Result relative_distance_boolean: %s\n' ' - Result relative_distance: %s\n') self.assertEqual(res1, res2, msg % (e, d, f, res1, relative_distance(e, d)))
def _relative_distance(self, a, b): """ Calculates the distance between two responses based on the levenshtein distance :return: The distance """ return 1 - relative_distance(a.get_body(), b.get_body())
def test_all(self): acceptance_tests = [] acceptance_tests.append(('a', 'a', 1.0)) acceptance_tests.append(('a', 'a', 0.1)) acceptance_tests.append(('a', 'a', 0.0)) acceptance_tests.append(('a', 'b', 1.0)) acceptance_tests.append(('a', 'b', 0.1)) acceptance_tests.append(('a', 'b', 0.0)) acceptance_tests.append(('a', 'ab', 1.0)) acceptance_tests.append(('a', 'ab', 0.1)) acceptance_tests.append(('a', 'b', 0.0000000000000000001)) acceptance_tests.append(('a', 'b' * 100, 1.0)) acceptance_tests.append(('a', 'ab', 0.66666666666)) acceptance_tests.append(('a', 'aab', 0.5)) acceptance_tests.append(('a', 'aaab', 0.4)) acceptance_tests.append(('a', 'aaaab', 0.33333333333333333333333333333333333333333333333333333333)) acceptance_tests.append(('a' * 25, 'a', 1.0)) acceptance_tests.append(('aaa', 'aa', 1.0)) acceptance_tests.append(('a', 'a', 1.0)) acceptance_tests.append(('a' * 25, 'a', 0.076923076923076927)) acceptance_tests.append(('aaa', 'aa', 0.8)) acceptance_tests.append(('a', 'a', 0.0)) for e, d, f in acceptance_tests: res1 = fuzzy_equal(e, d, f) res2 = relative_distance(e, d) >= f msg = ('fuzzy_equal and relative_distance returned' ' different results for the same parameters:\n' ' - Parameter #1: %s\n' ' - Parameter #2: %s\n' ' - Threshold: %s\n' ' - Result fuzzy_equal: %s\n' ' - Result relative_distance: %s\n') self.assertEqual(res1, res2, msg % (e, d, f, res1, relative_distance(e, d)))
def test_relative_distance(self): acceptance_tests = [('a', 'a', 1.0), ('ab ac ad', 'ab ae ad', 0.6), ('ab ac ae', 'ab af ad', 0.3), ('ab ac ad', 'aa ae af', 0.0), ('a', 'b', 0.0), ('a<a"a<a', 'a<a"a<b', 0.75), ('a' * 25, 'a', 0.00)] for e, d, f in acceptance_tests: res = relative_distance(e, d) msg = "return value: %f, expected value: %f" % (res, f) self.assertTrue(res >= f, msg)
def test_relative_distance(self): acceptance_tests = [('a', 'a', 1.0), ('ab\nac\nad', 'ab\nae\nad', 0.6), ('ab\nac\nae', 'ab\naf\nad', 0.3), ('ab\nac\nad', 'aa\nae\naf', 0.0), ('a', 'b', 0.0), ('a<a"a<a', 'a<a"a<b', 0.75), ('a' * 25, 'a', 0.00)] for e, d, f in acceptance_tests: res = relative_distance(e, d) msg = "return value: %f, expected value: %f" % (res, f) self.assertTrue(res >= f, msg)
def test_relative_distance(self): acceptance_tests = [] acceptance_tests.append(('a', 'a', 1.0)) acceptance_tests.append(('ab ac ad', 'ab ae ad', 0.6)) acceptance_tests.append(('ab ac ae', 'ab af ad', 0.3)) acceptance_tests.append(('ab ac ad', 'aa ae af', 0.0)) acceptance_tests.append(('a', 'b', 0.0)) acceptance_tests.append(('aaaa', 'aaab', 0.75)) acceptance_tests.append(('a' * 25, 'a', 0.04)) for e, d, f in acceptance_tests: res = relative_distance(e, d) msg = "return value:%f, given value:%f" % (res, f) self.assertTrue(res >= f, msg)
def test_all(self): acceptance_tests = [] acceptance_tests.append(('a', 'a', 1.0)) acceptance_tests.append(('a', 'a', 0.1)) acceptance_tests.append(('a', 'a', 0.0)) acceptance_tests.append(('a', 'b', 1.0)) acceptance_tests.append(('a', 'b', 0.1)) acceptance_tests.append(('a', 'b', 0.0)) acceptance_tests.append(('a', 'ab', 1.0)) acceptance_tests.append(('a', 'ab', 0.1)) acceptance_tests.append(('a', 'b', 0.0000000000000000001)) acceptance_tests.append(('a', 'b' * 100, 1.0)) acceptance_tests.append(('a', 'ab', 0.66666666666)) acceptance_tests.append(('a', 'aab', 0.5)) acceptance_tests.append(('a', 'aaab', 0.4)) acceptance_tests.append(('a', 'aaaab', 0.33333333333333333333333333333333333333333333333333333333)) acceptance_tests.append(('a' * 25, 'a', 1.0)) acceptance_tests.append(('aaa', 'aa', 1.0)) acceptance_tests.append(('a', 'a', 1.0)) acceptance_tests.append(('a' * 25, 'a', 0.076923076923076927)) acceptance_tests.append(('aaa', 'aa', 0.8)) acceptance_tests.append(('a', 'a', 0.0)) for e, d, f in acceptance_tests: res1 = relative_distance_boolean(e, d, f) res2 = relative_distance(e, d) >= f msg = 'relative_distance_boolean and relative_distance returned'\ ' different results for the same parameters:\n'\ ' - %s\n'\ ' - %s\n'\ ' - Threshold: %s\n'\ self.assertEqual(res1, res2, msg % (e, d, f))
def is_404(self, http_response): """ All of my previous versions of is_404 were very complex and tried to struggle with all possible cases. The truth is that in most "strange" cases I was failing miserably, so now I changed my 404 detection once again, but keeping it as simple as possible. Also, and because I was trying to cover ALL CASES, I was performing a lot of requests in order to cover them, which in most situations was unnecessary. So now I go for a much simple approach: 1- Cover the simplest case of all using only 1 HTTP request 2- Give the users the power to configure the 404 detection by setting a string that identifies the 404 response (in case we are missing it for some reason in case #1) :param http_response: The HTTP response which we want to know if it is a 404 or not. """ # # First we handle the user configured exceptions: # domain_path = http_response.get_url().get_domain_path() if domain_path in cf.cf.get('always_404'): return True elif domain_path in cf.cf.get('never_404'): return False # # The user configured setting. "If this string is in the response, # then it is a 404" # if cf.cf.get('string_match_404') and cf.cf.get('string_match_404') in http_response: return True # # This is the most simple case, we don't even have to think about this # # If there is some custom website that always returns 404 codes, then # we are screwed, but this is open source, and the pentester working # on that site can modify these lines. # if http_response.get_code() == 404: return True # # Simple, if the file we requested is in a directory that's known to # return 404 codes for files that do not exist, AND this is NOT a 404 # then we're return False! # if domain_path in self._directory_uses_404_codes and \ http_response.get_code() != 404: return False # # Lets start with the rather complex code... # with self._lock: if not self._already_analyzed: self.generate_404_knowledge(http_response.get_url()) self._already_analyzed = True # 404_body was already cleaned inside generate_404_knowledge # so we need to clean this one in order to have a fair comparison resp_body = get_clean_body(http_response) resp_content_type = http_response.doc_type resp_path = http_response.get_url().get_domain_path().url_string # See https://github.com/andresriancho/w3af/issues/6646 max_similarity_with_404 = 0.0 resp_path_in_db = False with self._lock: # # Compare this response to all the 404's I have in my DB # for resp_404 in self._404_responses: # Since the fuzzy_equal function is CPU-intensive we want to # avoid calling it for cases where we know it won't match, for # example in comparing an image and an html if resp_content_type != resp_404.doc_type: continue if fuzzy_equal(resp_404.body, resp_body, IS_EQUAL_RATIO): msg = '"%s" (id:%s) is a 404 [similarity_index > %s]' fmt = (http_response.get_url(), http_response.id, IS_EQUAL_RATIO) om.out.debug(msg % fmt) return True else: # I could calculate this before and avoid the call to # fuzzy_equal, but I believe it's going to be faster this # way current_ratio = relative_distance(resp_404.body, resp_body) max_similarity_with_404 = max(max_similarity_with_404, current_ratio) # Track if the response path is in the DB if not resp_path_in_db and resp_path == resp_404.path: resp_path_in_db = True # # I get here when the for ends and no body_404_db matched with # the resp_body that was sent as a parameter by the user. This # means one of two things: # * There is not enough knowledge in self._404_responses, or # * The answer is NOT a 404. # # Because we want to reduce the amount of "false positives" that # this method returns, we'll perform some extra checks before # saying that this is NOT a 404. # if resp_path_in_db and max_similarity_with_404 < MUST_VERIFY_RATIO: msg = ('"%s" (id:%s) is NOT a 404 [similarity_index < %s' ' with sample path in 404 DB].') args = (http_response.get_url(), http_response.id, MUST_VERIFY_RATIO) om.out.debug(msg % args) return False if self._is_404_with_extra_request(http_response, resp_body): # # Aha! It actually was a 404! # four_oh_data = FourOhFourResponseFactory(http_response) self._404_responses.append(four_oh_data) msg = ('"%s" (id:%s) is a 404 (similarity_index > %s).' ' Adding new knowledge to the 404_responses database' ' (length=%s).') fmt = (http_response.get_url(), http_response.id, IS_EQUAL_RATIO, len(self._404_responses)) om.out.debug(msg % fmt) return True msg = '"%s" (id:%s) is NOT a 404 [similarity_index < %s].' args = (http_response.get_url(), http_response.id, IS_EQUAL_RATIO) om.out.debug(msg % args) return False
def is_404(self, http_response): """ All of my previous versions of is_404 were very complex and tried to struggle with all possible cases. The truth is that in most "strange" cases I was failing miserably, so now I changed my 404 detection once again, but keeping it as simple as possible. Also, and because I was trying to cover ALL CASES, I was performing a lot of requests in order to cover them, which in most situations was unnecessary. So now I go for a much simple approach: 1- Cover the simplest case of all using only 1 HTTP request 2- Give the users the power to configure the 404 detection by setting a string that identifies the 404 response (in case we are missing it for some reason in case #1) :param http_response: The HTTP response which we want to know if it is a 404 or not. """ # # First we handle the user configured exceptions: # domain_path = http_response.get_url().get_domain_path() if domain_path in cf.cf.get('always_404'): return True elif domain_path in cf.cf.get('never_404'): return False # # The user configured setting. "If this string is in the response, # then it is a 404" # if cf.cf.get('string_match_404') and cf.cf.get( 'string_match_404') in http_response: return True # # This is the most simple case, we don't even have to think about this # # If there is some custom website that always returns 404 codes, then # we are screwed, but this is open source, and the pentester working # on that site can modify these lines. # if http_response.get_code() == 404: return True # # Simple, if the file we requested is in a directory that's known to # return 404 codes for files that do not exist, AND this is NOT a 404 # then we're return False! # if domain_path in self._directory_uses_404_codes and \ http_response.get_code() != 404: return False # # Lets start with the rather complex code... # with self._lock: if not self._already_analyzed: self.generate_404_knowledge(http_response.get_url()) self._already_analyzed = True # 404_body was already cleaned inside generate_404_knowledge # so we need to clean this one in order to have a fair comparison resp_body = get_clean_body(http_response) resp_content_type = http_response.doc_type resp_path = http_response.get_url().get_domain_path().url_string # See https://github.com/andresriancho/w3af/issues/6646 max_similarity_with_404 = 0.0 resp_path_in_db = False with self._lock: # # Compare this response to all the 404's I have in my DB # for resp_404 in self._404_responses: # Since the fuzzy_equal function is CPU-intensive we want to # avoid calling it for cases where we know it won't match, for # example in comparing an image and an html if resp_content_type != resp_404.doc_type: continue if fuzzy_equal(resp_404.body, resp_body, IS_EQUAL_RATIO): msg = '"%s" (id:%s) is a 404 [similarity_index > %s]' fmt = (http_response.get_url(), http_response.id, IS_EQUAL_RATIO) om.out.debug(msg % fmt) return True else: # I could calculate this before and avoid the call to # fuzzy_equal, but I believe it's going to be faster this # way current_ratio = relative_distance(resp_404.body, resp_body) max_similarity_with_404 = max(max_similarity_with_404, current_ratio) # Track if the response path is in the DB if not resp_path_in_db and resp_path == resp_404.path: resp_path_in_db = True # # I get here when the for ends and no body_404_db matched with # the resp_body that was sent as a parameter by the user. This # means one of two things: # * There is not enough knowledge in self._404_responses, or # * The answer is NOT a 404. # # Because we want to reduce the amount of "false positives" that # this method returns, we'll perform some extra checks before # saying that this is NOT a 404. # if resp_path_in_db and max_similarity_with_404 < MUST_VERIFY_RATIO: msg = ('"%s" (id:%s) is NOT a 404 [similarity_index < %s' ' with sample path in 404 DB].') args = (http_response.get_url(), http_response.id, MUST_VERIFY_RATIO) om.out.debug(msg % args) return False if self._is_404_with_extra_request(http_response, resp_body): # # Aha! It actually was a 404! # four_oh_data = FourOhFourResponseFactory(http_response) self._404_responses.append(four_oh_data) msg = ('"%s" (id:%s) is a 404 (similarity_index > %s).' ' Adding new knowledge to the 404_responses database' ' (length=%s).') fmt = (http_response.get_url(), http_response.id, IS_EQUAL_RATIO, len(self._404_responses)) om.out.debug(msg % fmt) return True msg = '"%s" (id:%s) is NOT a 404 [similarity_index < %s].' args = (http_response.get_url(), http_response.id, IS_EQUAL_RATIO) om.out.debug(msg % args) return False
def is_404(self, http_response): """ All of my previous versions of is_404 were very complex and tried to struggle with all possible cases. The truth is that in most "strange" cases I was failing miserably, so now I changed my 404 detection once again, but keeping it as simple as possible. Also, and because I was trying to cover ALL CASES, I was performing a lot of requests in order to cover them, which in most situations was unnecessary. So now I go for a much simple approach: 1- Cover the simplest case of all using only 1 HTTP request 2- Give the users the power to configure the 404 detection by setting a string that identifies the 404 response (in case we are missing it for some reason in case #1) :param http_response: The HTTP response which we want to know if it is a 404 or not. """ domain_path = http_response.get_url().get_domain_path() extension = http_response.get_url().get_extension() # # First we handle the user configured exceptions: # if domain_path in cf.cf.get('always_404'): return True if domain_path in cf.cf.get('never_404'): return False # # The user configured setting. "If this string is in the response, # then it is a 404" # if cf.cf.get('string_match_404') and cf.cf.get( 'string_match_404') in http_response: return True # # This is the most simple case, we don't even have to think about this # # If there is some custom website that always returns 404 codes, then # we are screwed, but this is open source, and the pentester working # on that site can modify these lines. # if http_response.get_code() == 404: return True # # This is an edge case. Let me explain... # # Doing try/except in all plugins that send HTTP requests was hard (tm) # so plugins don't use ExtendedUrllib directly, instead they use the # UrlOpenerProxy (defined in plugin.py). This proxy catches any # exceptions and returns a 204 response. # # In most cases that works perfectly, because it will allow the plugin # to keep working without caring much about the exceptions. In some # edge cases someone will call is_404(204_response_generated_by_w3af) # and that will most likely return False, because the 204 response we # generate doesn't look like anything w3af has in the 404 DB. # # The following iff fixes the race condition # if http_response.get_code() == 204: if http_response.get_msg() == NO_CONTENT_MSG: if http_response.get_headers() == Headers(): return True # # Lets start with the rather complex code... # with self._lock: if not self._already_analyzed: self.generate_404_knowledge(http_response.get_url()) self._already_analyzed = True # # Simple, if the file we requested is in a directory that's known to # return 404 codes for files that do not exist, AND this is NOT a 404 # then we're return False! # path_extension = (domain_path, extension) if path_extension in self._directory_uses_404_codes: if http_response.get_code() != 404: return False # 404_body stored in the DB was already cleaned inside # generate_404_knowledge # # We need to clean the body we receive as parameter in order to have # a fair comparison resp_body = get_clean_body(http_response) resp_content_type = http_response.doc_type resp_path = http_response.get_url().get_domain_path().url_string # See https://github.com/andresriancho/w3af/issues/6646 max_similarity_with_404 = 0.0 resp_path_in_db = False debugging_id = rand_alnum(8) # # Compare this response to all the 404's I have in my DB # for resp_404 in self.get_404_responses(): # Since the fuzzy_equal function is CPU-intensive we want to # avoid calling it for cases where we know it won't match, for # example in comparing an image and an html if resp_content_type != resp_404.doc_type: continue is_fuzzy_equal, distance = fuzzy_equal_return_distance( resp_404.body, resp_body, IS_EQUAL_RATIO) if is_fuzzy_equal: msg = ('"%s" (id:%s, code:%s, len:%s, did:%s) is a 404' ' [similarity_index > %s with 404 DB entry with ID %s]') args = (http_response.get_url(), http_response.id, http_response.get_code(), len(http_response.get_body()), debugging_id, IS_EQUAL_RATIO, resp_404.id) om.out.debug(msg % args) return True if distance is None: distance = 0.0 # In some cases the distance is None, because the # fuzzy_equal didn't have to calculate it to produce the result # (because of the optimizations) # # Also, we can calculate the upper_bound_similarity which # indicates how much (in the best case) two strings can look # alike based on their lengths # # This allows us to calculate the distance between two strings # only if we know that the distance could be large enough ups = upper_bound_similarity(len(resp_404.body), len(resp_body)) if ups > max_similarity_with_404: distance = relative_distance(resp_404.body, resp_body) max_similarity_with_404 = max(max_similarity_with_404, distance) # Track if the response path is in the DB if not resp_path_in_db and resp_path == resp_404.path: resp_path_in_db = True # # I get here when the for ends and no 404 body matched with # the resp_body that was sent as a parameter. This means one of two things: # # * There is not enough knowledge in get_404_responses(), or # # * The answer is NOT a 404. # # Because we want to reduce the amount of false positives that # this method returns, we'll perform some extra checks before # saying that this is NOT a 404. # if resp_path_in_db and max_similarity_with_404 < MUST_VERIFY_RATIO: msg = ('"%s" (id:%s, code:%s, len:%s, did:%s) is NOT a 404' ' [similarity_index < %s with sample path in 404 DB]') args = (http_response.get_url(), http_response.id, http_response.get_code(), len(http_response.get_body()), debugging_id, MUST_VERIFY_RATIO) om.out.debug(msg % args) return False if self._is_404_with_extra_request(http_response, resp_body, debugging_id): # # Aha! It is a 404! # msg = ('"%s" (id:%s, code:%s, len:%s, did:%s) is a 404' ' [similarity_index > %s with extra request]') args = (http_response.get_url(), http_response.id, http_response.get_code(), len(http_response.get_body()), debugging_id, IS_EQUAL_RATIO) om.out.debug(msg % args) return True msg = '"%s" (id:%s, code:%s, len:%s, did:%s) is NOT a 404 [default to False]' args = (http_response.get_url(), http_response.id, http_response.get_code(), len(http_response.get_body()), debugging_id) om.out.debug(msg % args) return False