def generic_fuzzy_string_diff_runner_against_200(self, fuzzy_func, ratio): """ Generic runner for fuzzy string diff, choose the first five 200 responses, match it against all the other. None should match. """ MAX_TESTS = 5 failed_domains = set() total = 0 run_tests = 0 start = time.time() for domain_base, (base, _) in self.not_exists_data.iteritems(): base = self._gunzip(base) if run_tests == MAX_TESTS: break run_tests += 1 base_resp = self._create_http_response(domain_base, base, False) clean_body_base = get_clean_body(base_resp) for domain_test, (ok, _) in self.not_exists_data.iteritems(): total += 1 ok = self._gunzip(ok) # Skip myself if base == ok: continue ok_resp = self._create_http_response(domain_test, ok, False) clean_body_ok = get_clean_body(ok_resp) if fuzzy_func(clean_body_base, clean_body_ok, ratio): failed_domains.add((domain_base, domain_test)) end = time.time() perc_fail = len(failed_domains) / total func_name = fuzzy_func.__name__ print('%s fail rate: %s' % (func_name, perc_fail)) print('Total time: %ss' % (end - start)) print('Analyzed samples: %s' % total) output = '/tmp/%s.txt' % func_name output_fh = file(output, 'w') for domain_a, domain_b in sorted(failed_domains): output_fh.write('%s - %s\n' % (domain_a, domain_b)) print('Failed domains stored at %s' % output)
def generic_fuzzy_string_diff_runner_against_200(self, fuzzy_func, ratio): """ Generic runner for fuzzy string diff, choose the first five 200 responses, match it against all the other. None should match. """ MAX_TESTS = 5 failed_domains = set() total = 0 run_tests = 0 start = time.time() for domain_base, (base, _) in self.not_exists_data.iteritems(): base = self._gunzip(base) if run_tests == MAX_TESTS: break run_tests += 1 base_resp = self._create_http_response(domain_base, base, False) clean_body_base = get_clean_body(base_resp) for domain_test, (ok, _) in self.not_exists_data.iteritems(): total += 1 ok = self._gunzip(ok) # Skip myself if base == ok: continue ok_resp = self._create_http_response(domain_test, ok, False) clean_body_ok = get_clean_body(ok_resp) if fuzzy_func(clean_body_base, clean_body_ok, ratio): failed_domains.add((domain_base, domain_test)) end = time.time() perc_fail = len(failed_domains) / total func_name = fuzzy_func.__name__ print('%s fail rate: %s' % (func_name, perc_fail)) print('Total time: %ss' % (end-start)) print('Analyzed samples: %s' % total) output = '/tmp/%s.txt' % func_name output_fh = file(output, 'w') for domain_a, domain_b in sorted(failed_domains): output_fh.write('%s - %s\n' % (domain_a, domain_b)) print('Failed domains stored at %s' % output)
def test_get_clean_body_14956(self): """ Trying to fix issue 14956 https://github.com/andresriancho/w3af/issues/14956 """ url = URL('http://w3af.org/install.php?mode=phpinfo') headers = Headers([('Content-Type', 'text/html')]) # Note that the redirect changes the protocol, which is probably why the # get_clean_body wasn't removing the URL from the body # # Also, after this URL is not removed body = ( '<head><title>Document Moved</title></head>' '<body><h1>Object Moved</h1>This document may be found ' '<a HREF="https://w3af.org/install.php?mode=phpinfo">here</a></body>' ) resp = HTTPResponse(200, body, headers, url, url) clean_body = get_clean_body(resp) ebody = ('<head><title>Document Moved</title></head>' '<body><h1>Object Moved</h1>This document may be found ' '<a HREF="">here</a></body>') self.assertEqual(clean_body, ebody)
def generic_fuzzy_string_diff_runner_noise(self, fuzzy_func, ratio): """ Generic runner for fuzzy string diff which adds noise to the cmp """ failed_domains = set() total = 0 start = time.time() for domain, (ok, _) in self.not_exists_data.iteritems(): total += 1 ok = self._gunzip(ok) ok_resp = self._create_http_response(domain, ok, False) ok_with_noise = self._add_noise_to_str(ok, 10, 12) # print ok_with_noise # break ok_noise_resp = self._create_http_response(domain, ok_with_noise, True) clean_body_ok = get_clean_body(ok_resp) clean_body_noise = get_clean_body(ok_noise_resp) if not fuzzy_func(clean_body_noise, clean_body_ok, ratio): failed_domains.add(domain) end = time.time() perc_fail = len(failed_domains) / total func_name = fuzzy_func.__name__ print('%s fail rate: %s' % (func_name, perc_fail)) print('Total time: %ss' % (end - start)) print('Analyzed samples: %s' % total) output = '/tmp/%s.txt' % func_name output_fh = file(output, 'w') for domain in sorted(failed_domains): output_fh.write('%s\n' % domain) print('Failed domains stored at %s' % output)
def generic_fuzzy_string_diff_runner_noise(self, fuzzy_func, ratio): """ Generic runner for fuzzy string diff which adds noise to the cmp """ failed_domains = set() total = 0 start = time.time() for domain, (ok, _) in self.not_exists_data.iteritems(): total += 1 ok = self._gunzip(ok) ok_resp = self._create_http_response(domain, ok, False) ok_with_noise = self._add_noise_to_str(ok, 10, 12) #print ok_with_noise #break ok_noise_resp = self._create_http_response(domain, ok_with_noise, True) clean_body_ok = get_clean_body(ok_resp) clean_body_noise = get_clean_body(ok_noise_resp) if not fuzzy_func(clean_body_noise, clean_body_ok, ratio): failed_domains.add(domain) end = time.time() perc_fail = len(failed_domains) / total func_name = fuzzy_func.__name__ print('%s fail rate: %s' % (func_name, perc_fail)) print('Total time: %ss' % (end-start)) print('Analyzed samples: %s' % total) output = '/tmp/%s.txt' % func_name output_fh = file(output, 'w') for domain in sorted(failed_domains): output_fh.write('%s\n' % domain) print('Failed domains stored at %s' % output)
def test_get_clean_body_14955(self): """ Trying to fix issue 14955 https://github.com/andresriancho/w3af/issues/14955 """ url = URL('http://w3af.org/.git/.git/.git/index') headers = Headers([('Content-Type', 'text/html')]) body = ('<head><title>Document Moved</title></head>' '<body><h1>Object Moved</h1>This document may be found ' '<a HREF="http://w3af.org/.git/.git/.git/index/">here</a></body>') resp = HTTPResponse(200, body, headers, url, url) clean_body = get_clean_body(resp) ebody = ('<head><title>Document Moved</title></head>' '<body><h1>Object Moved</h1>This document may be found ' '<a HREF="/">here</a></body>') self.assertEqual(clean_body, ebody)
def test_get_clean_body_14955(self): """ Trying to fix issue 14955 https://github.com/andresriancho/w3af/issues/14955 """ url = URL('http://w3af.org/.git/.git/.git/index') headers = Headers([('Content-Type', 'text/html')]) body = ( '<head><title>Document Moved</title></head>' '<body><h1>Object Moved</h1>This document may be found ' '<a HREF="http://w3af.org/.git/.git/.git/index/">here</a></body>') resp = HTTPResponse(200, body, headers, url, url) clean_body = get_clean_body(resp) ebody = ('<head><title>Document Moved</title></head>' '<body><h1>Object Moved</h1>This document may be found ' '<a HREF="/">here</a></body>') self.assertEqual(clean_body, ebody)
def test_get_clean_body_14956(self): """ Trying to fix issue 14956 https://github.com/andresriancho/w3af/issues/14956 """ url = URL('http://w3af.org/install.php?mode=phpinfo') headers = Headers([('Content-Type', 'text/html')]) # Note that the redirect changes the protocol, which is probably why the # get_clean_body wasn't removing the URL from the body # # Also, after this URL is not removed body = ('<head><title>Document Moved</title></head>' '<body><h1>Object Moved</h1>This document may be found ' '<a HREF="https://w3af.org/install.php?mode=phpinfo">here</a></body>') resp = HTTPResponse(200, body, headers, url, url) clean_body = get_clean_body(resp) ebody = ('<head><title>Document Moved</title></head>' '<body><h1>Object Moved</h1>This document may be found ' '<a HREF="">here</a></body>') self.assertEqual(clean_body, ebody)