def matches(self, query): """ This method is used to check if the `query` HTTP response body matches the failed login page instance. :param query: An HTTP response body :return: True if the `query` response body is equal to the failed login bodies which were received in __init__(). """ if self.body_a == query: return True if self.body_b == query: return True if not fuzzy_equal(self.body_a, query, 0.60): # They are really different, no need to calculate diff() return False if self.diff_a_b is None: self.diff_a_b, _ = chunked_diff(self.body_a, self.body_b) _, diff_query_a = chunked_diff(self.body_a, query) # Had to add this in order to prevent issues with CSRF tokens, which # might be part of the HTTP response body, are random (not removed by # clean_body) and will "break" the diff if len(diff_query_a) < 64: return True if fuzzy_equal(self.diff_a_b, diff_query_a, 0.9): return True return False
def test_xml(self): """ Comment the @SkipTest and then run: nosetests --with-timer -s -v -x w3af/core/controllers/misc/tests/test_diff.py """ a = file(os.path.join(self.DATA, 'source.xml')).read() b = file(os.path.join(self.DATA, 'target.xml')).read() # This takes ~0.07 seconds on my workstation chunked_diff(a, b)
def equal_with_limit(self, body1, body2, compare_diff=False): """ Determines if two pages are equal using a ratio, if compare_diff is set then we just compare the parts of the response bodies which are different. """ if compare_diff: body1, body2 = chunked_diff(body1, body2) cmp_res = fuzzy_equal(body1, body2, self._eq_limit) return cmp_res
def equal_with_limit(self, body1, body2, compare_diff=False): """ Determines if two pages are equal using a ratio. """ if compare_diff: body1, body2 = chunked_diff(body1, body2) cmp_res = relative_distance_boolean(body1, body2, self._eq_limit) args = (self._eq_limit, cmp_res) self.debug('Strings are similar enough with limit %s? %s' % args, None) return cmp_res
def equal_with_limit(self, body1, body2, compare_diff=False): """ Determines if two pages are equal using a ratio. """ start = time.time() if compare_diff: body1, body2 = chunked_diff(body1, body2) cmp_res = fuzzy_equal(body1, body2, self._eq_limit) are = 'ARE' if cmp_res else 'ARE NOT' args = (are, self._eq_limit) self.debug('Strings %s similar enough (limit: %s)' % args) spent = time.time() - start self.debug('Took %.2f seconds to run equal_with_limit' % spent) return cmp_res
def test_all_no_sep(self): a = 'ABC' b = 'AXC' self.assertEqual(chunked_diff(a, b), ('ABC', 'AXC'))
def test_middle_0(self): self.assertEqual(chunked_diff('123456', '123a56'), ('123456', '123a56'))
def _handle_large_http_responses(self, http_response, query, known_404, debugging_id): """ When HTTP response bodies are large the fuzzy_equal() will generate 404 false positives. This is explained in a comment above, (search for "{header-4000bytes}"). This method will handle that case by using three HTTP responses instead of two (which is the most common case). The three HTTP responses used by this method are: * known_404: The forced 404 generated by this class * query: The HTTP response we want to know if it is a 404 * Another forced 404 generated by this method The method will diff the two 404 responses, and one 404 response with the query response, then compare using fuzzy_equal() to determine if the query is a 404. :return: True if the query response is a 404! """ # Make the algorithm easier to read known_404_1 = known_404 if known_404_1.diff is not None: # At some point during the execution of this scan we already sent # an HTTP request to use in this process and calculated the diff # # In order to prevent more HTTP requests from being sent to the # server, and also to reduce CPU usage, we saved the diff as an # attribute. pass else: # Need to send the second request and calculate the diff, there is # no previous knowledge that we can use # # Send exclude=[known_404_1.url] to prevent the function from sending # an HTTP request to the same forced 404 URL known_404_2 = send_request_generate_404(self._uri_opener, http_response, debugging_id, exclude=[known_404_1.url]) known_404_1.diff, _ = chunked_diff(known_404_1.body, known_404_2.body) known_404_1.diff_with_id = known_404_2.id self._404_responses[query.normalized_path] = known_404_1.dumps() diff_x = known_404_1.diff _, diff_y = chunked_diff(known_404_1.body, query.body) is_fuzzy_equal = fuzzy_equal_for_diff(diff_x, diff_y, IS_EQUAL_RATIO) if not is_fuzzy_equal: msg = ('"%s" (id:%s, code:%s, len:%s, did:%s) is NOT a 404' ' [similarity_ratio < %s with diff of 404]' ' [Request IDs: %s]') args = (http_response.get_url(), http_response.id, http_response.get_code(), len(http_response.get_body()), debugging_id, IS_EQUAL_RATIO, ', '.join([ str(http_response.id), str(known_404_1.id), str(known_404_1.diff_with_id) ])) om.out.debug(msg % args) return False msg = ('"%s" (id:%s, code:%s, len:%s, did:%s) is a 404' ' [similarity_ratio > %s with diff of 404]' ' [Request IDs: %s]') args = (http_response.get_url(), http_response.id, http_response.get_code(), len(http_response.get_body()), debugging_id, IS_EQUAL_RATIO, ', '.join([ str(http_response.id), str(known_404_1.id), str(known_404_1.diff_with_id) ])) om.out.debug(msg % args) return True
def test_nono(self): self.assertEqual(chunked_diff('123abc\nyes', 'no\n123abc\nno'), ('yes', 'nono'))
def test_start_0(self): self.assertEqual(chunked_diff('yes 123abc', 'no 123abc'), ('yes 123abc', 'no 123abc'))
def test_start(self): a = 'X\nB\nC' b = 'A\nB\nC' self.assertEqual(chunked_diff(a, b), ('X', 'A'))
def test_special_chars(self): a = 'X\tB\nC' b = 'A<B\nC' self.assertEqual(chunked_diff(a, b), ('X', 'A'))
def test_empty(self): self.assertEqual(chunked_diff('', ''), ('', ''))
def test_middle_not_aligned(self): a = 'A\nB\nC' b = 'A\nXY\nC' self.assertEqual(chunked_diff(a, b), ('B', 'XY'))
def test_equal(self): self.assertEqual(chunked_diff('123456', '123456'), ('', ''))
def test_middle(self): a = 'A\nB\nC' b = 'A\nX\nC' self.assertEqual(chunked_diff(a, b), ('B', 'X'))
def test_different_separator(self): a = 'X\tB\nC' b = 'A<B\nC' self.assertEqual(chunked_diff(a, b), ('X', 'A'))