def _verify_reference(self, reference, original_request, original_response, possibly_broken, be_recursive=True): """ The parameters are: * Newly found URL * The FuzzableRequest instance which generated the response where the new URL was found * The HTTPResponse generated by the FuzzableRequest * Boolean indicating if we trust this reference or not This method GET's every new link and parses it in order to get new links and forms. """ # # Remember that this "breaks" the cache=True in most cases! # headers = { 'Referer': original_url } # # But this does not, and it is friendlier than simply ignoring the # referer # # if self._fuzzy_browser.check_page( str(reference) ): # self._fuzzy_browser.add_page( str(reference) ) # else: # return referer = original_response.get_url().base_url().url_string headers = Headers([('Referer', referer)]) # Note: We're not grep'ing this HTTP request/response now because it # has high probability of being a 404, and the grep plugins # already got enough 404 responses to analyze (from is_404 for # example). If it's not a 404 then we'll push it to the core # and it will come back to this plugin's crawl() where it will # be requested with grep=True self._requests_count += 1 if self._requests_count > self._max_requests_count: return resp = self._uri_opener.GET(reference, cache=True, headers=headers, grep=False) if is_404(resp): # Note: I WANT to follow links that are in the 404 page, but # DO NOT return the 404 itself to the core. # # This will parse the 404 response and add the 404-links in the # output queue, so that the core can get them # if be_recursive: # # Only follow one level of links in 404 pages, this limits the # potential issue when this is found: # # http://foo.com/abc/ => 404 # Body: <a href="def/">link</a> # # Which would lead to this function to perform requests to: # * http://foo.com/abc/ # * http://foo.com/abc/def/ # * http://foo.com/abc/def/def/ # * http://foo.com/abc/def/def/def/ # * ... # # Do not use threads here, it will dead-lock (for unknown # reasons). This is tested in TestDeadLock unittest. for args in self._urls_to_verify_generator( resp, original_request): self._verify_reference(*args, be_recursive=False) # Store the broken links if not possibly_broken and resp.get_code( ) not in self.UNAUTH_FORBID: t = (resp.get_url(), original_request.get_uri()) self._broken_links.add(t) else: msg = '[web_spider] Sending link to w3af core: "%s"' om.out.debug(msg % reference) fuzz_req = FuzzableRequest(reference, headers=headers) # These next steps are simple, but actually allows me to set the # referer and cookie for the FuzzableRequest instances I'm sending # to the core, which will then allow the fuzzer to create # CookieMutant and HeadersMutant instances. # # Without setting the Cookie, the CookieMutant would never have any # data to modify; remember that cookies are actually set by the # urllib2 cookie handler when the request already exited the # framework. cookie = Cookie.from_http_response(original_response) fuzz_req.set_referer(referer) fuzz_req.set_cookie(cookie) self.output_queue.put(fuzz_req)
def _verify_reference(self, reference, original_request, original_response, possibly_broken, be_recursive=True): """ The parameters are: * Newly found URL * The FuzzableRequest instance which generated the response where the new URL was found * The HTTPResponse generated by the FuzzableRequest * Boolean indicating if we trust this reference or not This method GET's every new link and parses it in order to get new links and forms. """ # # Remember that this "breaks" the cache=True in most cases! # headers = { 'Referer': original_url } # # But this does not, and it is friendlier than simply ignoring the # referer # referer = original_response.get_url().base_url().url_string headers = Headers([('Referer', referer)]) resp = self._uri_opener.GET(reference, cache=True, headers=headers) if is_404(resp): # Note: I WANT to follow links that are in the 404 page, but # DO NOT return the 404 itself to the core. # # This will parse the 404 response and add the 404-links in the # output queue, so that the core can get them # if be_recursive: # # Only follow one level of links in 404 pages, this limits the # potential issue when this is found: # # http://foo.com/abc/ => 404 # Body: <a href="def/">link</a> # # Which would lead to this function to perform requests to: # * http://foo.com/abc/ # * http://foo.com/abc/def/ # * http://foo.com/abc/def/def/ # * http://foo.com/abc/def/def/def/ # * ... # non_recursive_verify_ref = partial(self._verify_reference, be_recursive=False) self.worker_pool.map_multi_args( non_recursive_verify_ref, self._urls_to_verify_generator(resp, original_request)) # Store the broken links if not possibly_broken and resp.get_code( ) not in self.UNAUTH_FORBID: t = (resp.get_url(), original_request.get_uri()) self._broken_links.add(t) else: msg = 'Adding reference "%s" to the result.' om.out.debug(msg % reference) fuzz_req = FuzzableRequest(reference, headers=headers) # These next steps are simple, but actually allows me to set the # referer and cookie for the FuzzableRequest instances I'm sending # to the core, which will then allow the fuzzer to create # CookieMutant and HeadersMutant instances. # # Without setting the Cookie, the CookieMutant would never have any # data to modify; remember that cookies are actually set by the # urllib2 cookie handler when the request already exited the # framework. cookie = Cookie.from_http_response(original_response) fuzz_req.set_referer(referer) fuzz_req.set_cookie(cookie) self.output_queue.put(fuzz_req)
def _verify_reference(self, reference, original_request, original_response, possibly_broken, be_recursive=True): """ The parameters are: * Newly found URL * The FuzzableRequest instance which generated the response where the new URL was found * The HTTPResponse generated by the FuzzableRequest * Boolean indicating if we trust this reference or not This method GET's every new link and parses it in order to get new links and forms. """ # # Remember that this "breaks" the cache=True in most cases! # headers = { 'Referer': original_url } # # But this does not, and it is friendlier than simply ignoring the # referer # referer = original_response.get_url().base_url().url_string headers = Headers([('Referer', referer)]) # Note: We're not grep'ing this HTTP request/response now because it # has high probability of being a 404, and the grep plugins # already got enough 404 responses to analyze (from is_404 for # example). If it's not a 404 then we'll push it to the core # and it will come back to this plugin's crawl() where it will # be requested with grep=True resp = self._uri_opener.GET(reference, cache=True, headers=headers, grep=False) if is_404(resp): # Note: I WANT to follow links that are in the 404 page, but # DO NOT return the 404 itself to the core. # # This will parse the 404 response and add the 404-links in the # output queue, so that the core can get them # if be_recursive: # # Only follow one level of links in 404 pages, this limits the # potential issue when this is found: # # http://foo.com/abc/ => 404 # Body: <a href="def/">link</a> # # Which would lead to this function to perform requests to: # * http://foo.com/abc/ # * http://foo.com/abc/def/ # * http://foo.com/abc/def/def/ # * http://foo.com/abc/def/def/def/ # * ... # # Do not use threads here, it will dead-lock (for unknown # reasons). This is tested in TestDeadLock unittest. for args in self._urls_to_verify_generator(resp, original_request): self._verify_reference(*args, be_recursive=False) # Store the broken links if not possibly_broken and resp.get_code() not in self.UNAUTH_FORBID: t = (resp.get_url(), original_request.get_uri()) self._broken_links.add(t) else: msg = '[web_spider] Sending link to w3af core: "%s"' om.out.debug(msg % reference) fuzz_req = FuzzableRequest(reference, headers=headers) # These next steps are simple, but actually allows me to set the # referer and cookie for the FuzzableRequest instances I'm sending # to the core, which will then allow the fuzzer to create # CookieMutant and HeadersMutant instances. # # Without setting the Cookie, the CookieMutant would never have any # data to modify; remember that cookies are actually set by the # urllib2 cookie handler when the request already exited the # framework. cookie = Cookie.from_http_response(original_response) fuzz_req.set_referer(referer) fuzz_req.set_cookie(cookie) self.output_queue.put(fuzz_req)
def _verify_reference(self, reference, original_request, original_response, possibly_broken, be_recursive=True): """ The parameters are: * Newly found URL * The FuzzableRequest instance which generated the response where the new URL was found * The HTTPResponse generated by the FuzzableRequest * Boolean indicating if we trust this reference or not This method GET's every new link and parses it in order to get new links and forms. """ # # Remember that this "breaks" the cache=True in most cases! # headers = { 'Referer': original_url } # # But this does not, and it is friendlier than simply ignoring the # referer # referer = original_response.get_url().base_url().url_string headers = Headers([('Referer', referer)]) resp = self._uri_opener.GET(reference, cache=True, headers=headers) if is_404(resp): # Note: I WANT to follow links that are in the 404 page, but # DO NOT return the 404 itself to the core. # # This will parse the 404 response and add the 404-links in the # output queue, so that the core can get them # if be_recursive: # # Only follow one level of links in 404 pages, this limits the # potential issue when this is found: # # http://foo.com/abc/ => 404 # Body: <a href="def/">link</a> # # Which would lead to this function to perform requests to: # * http://foo.com/abc/ # * http://foo.com/abc/def/ # * http://foo.com/abc/def/def/ # * http://foo.com/abc/def/def/def/ # * ... # non_recursive_verify_ref = partial(self._verify_reference, be_recursive=False) self.worker_pool.map_multi_args( non_recursive_verify_ref, self._urls_to_verify_generator(resp, original_request)) # Store the broken links if not possibly_broken and resp.get_code() not in self.UNAUTH_FORBID: t = (resp.get_url(), original_request.get_uri()) self._broken_links.add(t) else: msg = 'Adding reference "%s" to the result.' om.out.debug(msg % reference) fuzz_req = FuzzableRequest(reference, headers=headers) # These next steps are simple, but actually allows me to set the # referer and cookie for the FuzzableRequest instances I'm sending # to the core, which will then allow the fuzzer to create # CookieMutant and HeadersMutant instances. # # Without setting the Cookie, the CookieMutant would never have any # data to modify; remember that cookies are actually set by the # urllib2 cookie handler when the request already exited the # framework. cookie = Cookie.from_http_response(original_response) fuzz_req.set_referer(referer) fuzz_req.set_cookie(cookie) self.output_queue.put(fuzz_req)