def _setup_404_detection(self): # # NOTE: I need to perform this test here in order to avoid some weird # thread locking that happens when the webspider calls is_404, and # because I want to initialize the is_404 database in a controlled # try/except block. # from w3af.core.controllers.core_helpers.fingerprint_404 import is_404 for url in cf.cf.get('targets'): try: response = self._w3af_core.uri_opener.GET(url, cache=True) except ScanMustStopByUserRequest: raise except Exception, e: msg = ('Failed to send HTTP request to the configured target' ' URL "%s", the original exception was: "%s" (%s).') args = (url, e, e.__class__.__name__) raise ScanMustStopException(msg % args) try: is_404(response) except ScanMustStopByUserRequest: raise except Exception, e: msg = ('Failed to initialize the 404 detection using HTTP' ' response from "%s", the original exception was: "%s"' ' (%s).') args = (url, e, e.__class__.__name__) raise ScanMustStopException(msg % args)
class dot_ds_store(CrawlPlugin): """ Search .DS_Store file and checks for files containing. :author: Tomas Velazquez ( [email protected] ) :author: Andres Riancho ( [email protected] ) :credits: This code was based in cpan Mac::Finder::DSStore by Wim Lewis ( [email protected] ) """ DS_STORE = '.DS_Store' def __init__(self): CrawlPlugin.__init__(self) # Internal variables self._analyzed_dirs = DiskSet() def crawl(self, fuzzable_request): """ For every directory, fetch a list of files and analyze the response. :parameter fuzzable_request: A fuzzable_request instance that contains (among other things) the URL to test. """ directories_to_check = [] for domain_path in fuzzable_request.get_url().get_directories(): if domain_path not in self._analyzed_dirs: self._analyzed_dirs.add(domain_path) directories_to_check.append(domain_path) # Send the requests using threads self.worker_pool.map(self._check_and_analyze, directories_to_check) def _check_and_analyze(self, domain_path): """ Check if a .DS_Store filename exists in the domain_path. :return: None, everything is saved to the self.out_queue. """ # Request the file url = domain_path.url_join(self.DS_STORE) try: response = self.http_get_and_parse(url, binary_response=True) except BaseFrameworkException, w3: msg = 'Failed to GET .DS_Store file: %s. Exception: %s.' om.out.debug(msg, (url, w3)) return # Check if it's a .DS_Store file if is_404(response): return try: store = DsStore(response.get_raw_body()) entries = store.get_file_entries() except Exception, e: om.out.debug('Unexpected error while parsing DS_Store file: "%s"' % e) return
def _confirm_file_upload(self, path, mutant, http_response): """ Confirms if the file was uploaded to path :param path: The URL where we suspect that a file was uploaded to. :param mutant: The mutant that originated the file on the remote end :param http_response: The HTTP response asociated with sending mutant """ get_response = self._uri_opener.GET(path, cache=False) if not is_404(get_response) and self._has_no_bug(mutant): # This is necessary, if I don't do this, the session # saver will break cause REAL file objects can't # be picked mutant.set_mod_value('<file_object>') desc = 'A file upload to a directory inside the webroot' \ ' was found at: %s' % mutant.found_at() v = Vuln.from_mutant('Insecure file upload', desc, severity.HIGH, [http_response.id, get_response.id], self.get_name(), mutant) v['file_dest'] = get_response.get_url() v['file_vars'] = mutant.get_file_vars() self.kb_append_uniq(self, 'file_upload', v) return
def _extract_urls(self, fuzzable_request, response): """ Extract information from the server-status page and send FuzzableRequest instances to the core. """ self.output_queue.put(FuzzableRequest(response.get_url())) # Now really parse the file and create custom made fuzzable requests regex = '<td>.*?<td nowrap>(.*?)</td><td nowrap>.*? (.*?) HTTP/1' for domain, path in re.findall(regex, response.get_body()): if 'unavailable' in domain: domain = response.get_url().get_domain() # Check if the requested domain and the found one are equal. if domain == response.get_url().get_domain(): proto = response.get_url().get_protocol() found_url = proto + '://' + domain + path found_url = URL(found_url) # They are equal, request the URL and create the fuzzable # requests tmp_res = self._uri_opener.GET(found_url, cache=True) if not is_404(tmp_res): self.output_queue.put(FuzzableRequest(found_url)) else: # This is a shared hosting server self._shared_hosting_hosts.append(domain)
def grep(self, request, response): """ Plugin entry point. Get responses, analyze words, create dictionary. :param request: The HTTP request object. :param response: The HTTP response object :return: None. """ if not self.got_lang(): return # I added the 404 code here to avoid doing some is_404 lookups if response.get_code() not in self.BANNED_STATUS \ and not is_404(response) \ and request.get_method() in self.ALLOWED_METHODS: old_data = kb.kb.raw_read(self.get_name(), self.get_name()) if not isinstance(old_data, dict): return # Run the plugins data = self._run_plugins(response) with self._plugin_lock: new_data = self.merge_maps(old_data, data, request, self.captured_lang) new_data = self._trim_data(new_data) # save the updated map kb.kb.raw_write(self, self.get_name(), new_data)
def _force_disclosures(self, domain_path, potentially_vulnerable_paths): """ :param domain_path: The path to wordpress' root directory :param potentially_vulnerable_paths: A list with the paths I'll URL-join with @domain_path, GET and parse. """ for pvuln_path in potentially_vulnerable_paths: pvuln_url = domain_path.url_join(pvuln_path) response = self._uri_opener.GET(pvuln_url, cache=True) if is_404(response): continue response_body = response.get_body() if 'Fatal error: ' in response_body: desc = 'Analyze the HTTP response body to find the full path'\ ' where wordpress was installed.' i = Info('WordPress path disclosure', desc, response.id, self.get_name()) i.set_url(pvuln_url) kb.kb.append(self, 'info', i) om.out.information(i.get_desc()) break
def _extract_paths(self, domain_path): """ :param domain_path: The URL object pointing to the current wordpress installation :return: A list with the paths that might trigger full path disclosures TODO: Will fail if WordPress is running on a Windows server due to paths manipulation. """ theme_paths = [] wp_root_response = self._uri_opener.GET(domain_path, cache=True) if is_404(wp_root_response): return response_body = wp_root_response.get_body() theme_regexp = '%swp-content/themes/(.*)/style.css' % domain_path theme = re.search(theme_regexp, response_body, re.IGNORECASE) if theme: theme_name = theme.group(1) for fname in ('header', 'footer'): path_fname = 'wp-content/themes/%s/%s.php' % (theme_name, fname) theme_paths.append(path_fname) return theme_paths
def discover(self, fuzzable_request): """ Checks if JBoss Interesting Directories exist in the target server. Also verifies some vulnerabilities. """ base_url = fuzzable_request.get_url().base_url() args_iter = izip(repeat(base_url), self.JBOSS_VULNS) otm_send_request = one_to_many(self.send_request) response_pool = self.worker_pool.imap_unordered( otm_send_request, args_iter) for vuln_db_instance, response in response_pool: if is_404(response): continue vuln_url = base_url.url_join(vuln_db_instance['url']) name = vuln_db_instance['name'] desc = vuln_db_instance['desc'] if vuln_db_instance['type'] == 'info': o = Info(name, desc, response.id, self.get_name()) else: o = Vuln(name, desc, severity.LOW, response.id, self.get_name()) o.set_url(vuln_url) kb.kb.append(self, 'find_jboss', o) self.output_queue.put(FuzzableRequest(response.get_uri()))
def grep(self, request, response): """ Plugin entry point. Get responses, analyze words, create dictionary. :param request: The HTTP request object. :param response: The HTTP response object :return: None. """ if not self.got_lang(): return # I added the 404 code here to avoid doing some is_404 lookups if response.get_code() not in self.BANNED_STATUS \ and not is_404(response) \ and request.get_method() in self.ALLOWED_METHODS: # Run the plugins data = self._run_plugins(response) with self._plugin_lock: old_data = kb.kb.raw_read(self.get_name(), self.get_name()) new_data = self.merge_maps(old_data, data, request, self.captured_lang) new_data = self._trim_data(new_data) # save the updated map kb.kb.raw_write(self, self.get_name(), new_data)
def _extract_urls(self, fuzzable_request, response): """ Extract information from the server-status page and send FuzzableRequest instances to the core. """ self.output_queue.put(FuzzableRequest(response.get_url())) # Now really parse the file and create custom made fuzzable requests regex = "<td>.*?<td nowrap>(.*?)</td><td nowrap>.*? (.*?) HTTP/1" for domain, path in re.findall(regex, response.get_body()): if "unavailable" in domain: domain = response.get_url().get_domain() # Check if the requested domain and the found one are equal. if domain == response.get_url().get_domain(): proto = response.get_url().get_protocol() found_url = proto + "://" + domain + path found_url = URL(found_url) # They are equal, request the URL and create the fuzzable # requests tmp_res = self._uri_opener.GET(found_url, cache=True) if not is_404(tmp_res): self.output_queue.put(FuzzableRequest(found_url)) else: # This is a shared hosting server self._shared_hosting_hosts.append(domain)
def _confirm_file_upload(self, path, mutant, http_response): """ Confirms if the file was uploaded to path :param path: The URL where we suspect that a file was uploaded to. :param mutant: The mutant that originated the file on the remote end :param http_response: The HTTP response asociated with sending mutant """ get_response = self._uri_opener.GET(path, cache=False) if not is_404(get_response) and self._has_no_bug(mutant): desc = "A file upload to a directory inside the webroot" " was found at: %s" % mutant.found_at() v = Vuln.from_mutant( "Insecure file upload", desc, severity.HIGH, [http_response.id, get_response.id], self.get_name(), mutant, ) v["file_dest"] = get_response.get_url() v["file_vars"] = mutant.get_file_vars() self.kb_append_uniq(self, "file_upload", v)
def crawl(self, fuzzable_request, debugging_id): """ Finds the version of a WordPress installation. :param debugging_id: A unique identifier for this call to discover() :param fuzzable_request: A fuzzable_request instance that contains (among other things) the URL to test. """ if not self._exec: # This will remove the plugin from the crawl plugins to be run. raise RunOnce() # # Check if the server is running wp # domain_path = fuzzable_request.get_url().get_domain_path() # Main scan URL passed from w3af + unique wp file wp_unique_url = domain_path.url_join('wp-login.php') response = self._uri_opener.GET(wp_unique_url, cache=True) if is_404(response): return # It was possible to analyze wp-login.php, don't run again self._exec = False # Analyze the identified wordpress installation self._fingerprint_wordpress(domain_path, wp_unique_url, response)
def discover(self, fuzzable_request): """ For every directory, fetch a list of files and analyze the response. :param fuzzable_request: A fuzzable_request instance that contains (among other things) the URL to test. """ for domain_path in fuzzable_request.get_url().get_directories(): if domain_path not in self._analyzed_dirs: # Save the domain_path so I know I'm not working in vane self._analyzed_dirs.add(domain_path) # Request the file frontpage_info_url = domain_path.url_join("_vti_inf.html") try: response = self._uri_opener.GET(frontpage_info_url, cache=True) except BaseFrameworkException, w3: msg = 'Failed to GET Frontpage Server _vti_inf.html file: "' msg += frontpage_info_url + \ '". Exception: "' + str(w3) + '".' om.out.debug(msg) else: # Check if it's a Frontpage Info file if not is_404(response): for fr in self._create_fuzzable_requests(response): self.output_queue.put(fr) self._analyze_response(response)
def crawl(self, fuzzable_request): """ Finds the version of a WordPress installation. :param fuzzable_request: A fuzzable_request instance that contains (among other things) the URL to test. """ if not self._exec: # This will remove the plugin from the crawl plugins to be run. raise RunOnce() # # Check if the server is running wp # domain_path = fuzzable_request.get_url().get_domain_path() # Main scan URL passed from w3af + unique wp file wp_unique_url = domain_path.url_join('wp-login.php') response = self._uri_opener.GET(wp_unique_url, cache=True) # If wp_unique_url is not 404, wordpress = true if not is_404(response): # It was possible to analyze wp-login.php, don't run again self._exec = False # Analyze the identified wordpress installation self._fingerprint_wordpress(domain_path, wp_unique_url, response) # Send link to core fr = FuzzableRequest(response.get_uri()) self.output_queue.put(fr)
def _classic_worker(self, gh, search_term): """ Perform the searches and store the results in the kb. """ google_list = self._google_se.get_n_results(search_term, 9) for result in google_list: # I found a vuln in the site! response = self._uri_opener.GET(result.URL, cache=True) if not is_404(response): desc = ('ghdb plugin found a vulnerability at URL: "%s".' ' According to GHDB the vulnerability description' ' is "%s".') desc %= (response.get_url(), gh.desc) v = Vuln('Google hack database match', desc, severity.MEDIUM, response.id, self.get_name()) v.set_url(response.get_url()) v.set_method('GET') kb.kb.append(self, 'vuln', v) om.out.vulnerability(v.get_desc(), severity=severity.LOW) # Create the fuzzable requests fr = FuzzableRequest(response.get_url()) self.output_queue.put(fr)
def grep(self, request, response): """ Check x-frame-options header """ # Can not iframe a POST, PUT, etc. if request.get_method() != 'GET': return if response.get_code() in self.DO_NOT_FRAME: return if not response.is_text_or_html(): return # An attacker will never run a clickjacking attack on an empty response # Empty responses are common in redirects, 400 and 500 errors, etc. if not response.get_body(): return if not self._response_will_be_rendered(response): return if is_404(response): return self._total_http_request_count += 1 if self._is_protected_against_clickjacking(request, response): return self._add_response_to_findings(response)
def _do_request(self, url, mutant): """ Perform a simple GET to see if the result is an error or not, and then run the actual fuzzing. """ response = self._uri_opener.GET( mutant, cache=True, headers=self._headers) if not (is_404(response) or response.get_code() in (403, 401) or self._return_without_eval(mutant)): # Create the fuzzable request and send it to the core fr = FuzzableRequest.from_http_response(response) self.output_queue.put(fr) # # Save it to the kb (if new)! # if response.get_url() not in self._seen and response.get_url().get_file_name(): desc = 'A potentially interesting file was found at: "%s".' desc = desc % response.get_url() i = Info('Potentially interesting file', desc, response.id, self.get_name()) i.set_url(response.get_url()) kb.kb.append(self, 'files', i) om.out.information(i.get_desc()) # Report only once self._seen.add(response.get_url())
def crawl(self, fuzzable_req): """ Searches for links on the html. :param fuzzable_req: A fuzzable_req instance that contains (among other things) the URL to test. """ self._handle_first_run() # # If it is a form, then smart_fill the parameters to send something that # makes sense and will allow us to cover more code. # data_container = fuzzable_req.get_raw_data() if isinstance(data_container, Form): if fuzzable_req.get_url() in self._already_filled_form: return self._already_filled_form.add(fuzzable_req.get_url()) data_container.smart_fill() # Send the HTTP request resp = self._uri_opener.send_mutant(fuzzable_req) # Nothing to do here... if resp.get_code() == http_constants.UNAUTHORIZED: return # And we don't trust what comes from the core, check if 404 if is_404(resp): return self._extract_html_forms(resp, fuzzable_req) self._extract_links_and_verify(resp, fuzzable_req)
def _extract_paths(self, domain_path): """ :param domain_path: The URL object pointing to the current wordpress installation :return: A list with the paths that might trigger full path disclosures TODO: Will fail if WordPress is running on a Windows server due to paths manipulation. """ wp_root_response = self._uri_opener.GET(domain_path, cache=True) if is_404(wp_root_response): return [] theme_paths = [] response_body = wp_root_response.get_body() theme_regexp = '%swp-content/themes/(.*)/style.css' % domain_path theme = re.search(theme_regexp, response_body, re.IGNORECASE) if theme: theme_name = theme.group(1) for fname in ('header', 'footer'): path_fname = 'wp-content/themes/%s/%s.php' % (theme_name, fname) theme_paths.append(path_fname) return theme_paths
def crawl(self, fuzzable_request): """ Get the sitemap.xml file and parse it. :param fuzzable_request: A fuzzable_request instance that contains (among other things) the URL to test. """ base_url = fuzzable_request.get_url().base_url() sitemap_url = base_url.url_join('sitemap.xml') response = self._uri_opener.GET(sitemap_url, cache=True) if '</urlset>' not in response: return if is_404(response): return # Send response to core fr = FuzzableRequest.from_http_response(response) self.output_queue.put(fr) om.out.debug('Parsing xml file with xml.dom.minidom.') try: dom = xml.dom.minidom.parseString(response.get_body()) except Exception, e: msg = 'Exception while parsing sitemap.xml from %s: "%s"' args = (response.get_url(), e) om.out.debug(msg % args) return
def _extract_api_calls(self, spec_url, debugging_id): """ HTTP GET the `spec_url` and try to parse it. Send all the newly found fuzzable requests to the core after adding any authentication data that might have been configured. :return: None """ # # Merge the user-configured authentication query string (if any) # with the spec_url query string # qs = spec_url.get_querystring() for key, values in self._query_string_auth.iteritems(): qs[key] = values spec_url.set_querystring(qs) # # Also add the authentication headers to the request (if any) # # Disable the cache because we're sending auth headers which might # confuse the cache implementation # http_response = self._uri_opener.GET(spec_url, headers=self._header_auth, cache=False, debugging_id=debugging_id) if is_404(http_response): return self._extract_api_calls_from_response(spec_url, http_response)
def grep(self, request, response): """ Plugin entry point. Get responses, analyze words, create dictionary. :param request: The HTTP request object. :param response: The HTTP response object :return: None. """ if not self.got_lang(): return # I added the 404 code here to avoid doing some is_404 lookups if response.get_code() not in {500, 401, 403, 404} \ and not is_404(response) and request.get_method() in {'POST', 'GET'}: # Run the plugins data = self._run_plugins(response) with self._plugin_lock: old_data = kb.kb.raw_read('password_profiling', 'password_profiling') new_data = self.merge_maps(old_data, data, request, self.captured_lang) new_data = self._trim_data(new_data) # save the updated map kb.kb.raw_write(self, 'password_profiling', new_data)
def _exists_in_target(self, url): """ Check if a resource still exists in the target web site. :param url: The resource to verify. :return: None, the result is stored in self.output_queue """ if url in self._already_verified: return self._already_verified.add(url) response = self._uri_opener.GET(url, cache=True) if not is_404(response): msg = 'The URL: "%s" was found at archive.org and is'\ ' STILL AVAILABLE in the target site.' om.out.debug(msg % url) fr = FuzzableRequest(response.get_uri()) self.output_queue.put(fr) else: msg = 'The URL: "%s" was found at archive.org and was'\ ' DELETED from the target site.' om.out.debug(msg % url)
def crawl(self, fuzzable_request): """ Get the sitemap.xml file and parse it. :param fuzzable_request: A fuzzable_request instance that contains (among other things) the URL to test. """ base_url = fuzzable_request.get_url().base_url() sitemap_url = base_url.url_join('sitemap.xml') response = self._uri_opener.GET(sitemap_url, cache=True) if '</urlset>' in response and not is_404(response): # Send response to core fr = FuzzableRequest.from_http_response(response) self.output_queue.put(fr) om.out.debug('Parsing xml file with xml.dom.minidom.') try: dom = xml.dom.minidom.parseString(response.get_body()) except: raise BaseFrameworkException('Error while parsing sitemap.xml') else: raw_url_list = dom.getElementsByTagName("loc") parsed_url_list = [] for url in raw_url_list: try: url = url.childNodes[0].data url = URL(url) except ValueError, ve: msg = 'Sitemap file had an invalid URL: "%s"' om.out.debug(msg % ve) except: om.out.debug('Sitemap file had an invalid format')
def _confirm_file_upload(self, path, mutant, http_response): """ Confirms if the file was uploaded to path :param path: The URL where we suspect that a file was uploaded to. :param mutant: The mutant that originated the file on the remote end :param http_response: The HTTP response asociated with sending mutant """ get_response = self._uri_opener.GET(path, cache=False) if not is_404(get_response) and self._has_no_bug(mutant): # This is necessary, if I don't do this, the session # saver will break cause REAL file objects can't # be picked mutant.set_mod_value('<file_object>') desc = 'A file upload to a directory inside the webroot' \ ' was found at: %s' % mutant.found_at() v = Vuln.from_mutant('Insecure file upload', desc, severity.HIGH, [http_response.id, get_response.id], self.get_name(), mutant) v['file_dest'] = get_response.get_url() v['file_vars'] = mutant.get_file_vars() self.kb_append_uniq(self, 'file_upload', v)
def _find_dwsync(self, domain_path): dwsync_url = domain_path.url_join(self.DWSYNC) response = self.http_get_and_parse(dwsync_url) if is_404(response): return if '</dwsync>' not in response.get_body(): return om.out.debug('Parsing dwsync.xml file at %s' % dwsync_url) try: dom = xml.dom.minidom.parseString(response.get_body()) except Exception as e: msg = 'Exception while parsing dwsync.xml file at %s : "%s"' om.out.debug(msg % (dwsync_url, e)) return parsed_url_list = set() for file_entry in dom.getElementsByTagName('file'): try: _file = file_entry.getAttribute('name') url = domain_path.url_join(_file) parsed_url_list.add(url) except ValueError, ve: msg = 'dwsync file had an invalid URL: "%s"' om.out.debug(msg % ve) except Exception as e: msg = 'Sitemap file had an invalid format: "%s"' om.out.debug(msg % e)
def discover(self, fuzzable_request, debugging_id): """ For every directory, fetch a list of files and analyze the response. :param debugging_id: A unique identifier for this call to discover() :param fuzzable_request: A fuzzable_request instance that contains (among other things) the URL to test. """ for domain_path in fuzzable_request.get_url().get_directories(): if domain_path in self._analyzed_dirs: continue # Save the domain_path so I know I'm not working in vane self._analyzed_dirs.add(domain_path) # Request the file frontpage_info_url = domain_path.url_join("_vti_inf.html") try: response = self._uri_opener.GET(frontpage_info_url, cache=True) except BaseFrameworkException as w3: fmt = ( 'Failed to GET Frontpage Server _vti_inf.html file: "%s". ' 'Exception: "%s".') om.out.debug(fmt % (frontpage_info_url, w3)) else: # Check if it's a Frontpage Info file if not is_404(response): fr = FuzzableRequest(response.get_uri()) self.output_queue.put(fr) self._analyze_response(response)
def _send_and_check(self, repo_url, repo_get_files, repo, domain_path): """ Check if a repository index exists in the domain_path. :return: None, everything is saved to the self.out_queue. """ http_response = self.http_get_and_parse(repo_url, binary_response=True, respect_size_limit=False) if is_404(http_response): return try: filenames = repo_get_files(http_response.get_raw_body()) except Exception, e: # We get here when the HTTP response is NOT a 404, but the response # body couldn't be properly parsed. This is usually because of a false # positive in the is_404 function, OR a new version-format of the file # to be parsed. # # Log in order to be able to improve the framework. args = (e, repo_get_files.__name__, repo_url) om.out.debug('Got a "%s" exception while running "%s" on "%s"' % args)
def _do_request(self, url, mutant): """ Perform a simple GET to see if the result is an error or not, and then run the actual fuzzing. """ response = self._uri_opener.GET(mutant, cache=True, headers=self._headers) if not (is_404(response) or response.get_code() in (403, 401) or self._return_without_eval(mutant)): # Create the fuzzable request and send it to the core fr = FuzzableRequest.from_http_response(response) self.output_queue.put(fr) # # Save it to the kb (if new)! # if response.get_url() not in self._seen and response.get_url( ).get_file_name(): desc = 'A potentially interesting file was found at: "%s".' desc = desc % response.get_url() i = Info('Potentially interesting file', desc, response.id, self.get_name()) i.set_url(response.get_url()) kb.kb.append(self, 'files', i) om.out.information(i.get_desc()) # Report only once self._seen.add(response.get_url())
def audit(self, freq, orig_response): """ Searches for file upload vulns using a POST to author.dll. :param freq: A FuzzableRequest """ domain_path = freq.get_url().get_domain_path() if kb.kb.get(self, 'frontpage'): # Nothing to do, I have found vuln(s) and I should stop on first msg = 'Not verifying if I can upload files to: "%s" using'\ ' author.dll. Because I already found a vulnerability.' om.out.debug(msg) return # I haven't found any vulns yet, OR i'm trying to find every # directory where I can write a file. if domain_path not in self._already_tested: self._already_tested.add(domain_path) # Find a file that doesn't exist and then try to upload it for _ in xrange(3): rand_file = rand_alpha(5) + '.html' rand_path_file = domain_path.url_join(rand_file) res = self._uri_opener.GET(rand_path_file) if is_404(res): upload_id = self._upload_file(domain_path, rand_file) self._verify_upload(domain_path, rand_file, upload_id) break else: msg = 'frontpage plugin failed to find a 404 page. This is'\ ' mostly because of an error in 404 page detection.' om.out.error(msg)
def _send_and_check(self, repo_url, repo_get_files, repo, domain_path): """ Check if a repository index exists in the domain_path. :return: None, everything is saved to the self.out_queue. """ http_response = self.http_get_and_parse(repo_url) if not is_404(http_response): filenames = repo_get_files(http_response.get_body()) parsed_url_set = set() for filename in self._clean_filenames(filenames): test_url = domain_path.url_join(filename) if test_url not in self._analyzed_filenames: parsed_url_set.add(test_url) self._analyzed_filenames.add(filename) self.worker_pool.map(self.http_get_and_parse, parsed_url_set) if parsed_url_set: desc = 'A %s was found at: "%s"; this could indicate that'\ ' a %s is accessible. You might be able to download'\ ' the Web application source code.' desc = desc % (repo, http_response.get_url(), repo) v = Vuln('Source code repository', desc, severity.MEDIUM, http_response.id, self.get_name()) v.set_url(http_response.get_url()) kb.kb.append(self, repo, v) om.out.vulnerability(v.get_desc(), severity=v.get_severity())
def discover(self, fuzzable_request): """ For every directory, fetch a list of files and analyze the response. :param fuzzable_request: A fuzzable_request instance that contains (among other things) the URL to test. """ for domain_path in fuzzable_request.get_url().get_directories(): if domain_path in self._analyzed_dirs: continue # Save the domain_path so I know I'm not working in vane self._analyzed_dirs.add(domain_path) # Request the file frontpage_info_url = domain_path.url_join("_vti_inf.html") try: response = self._uri_opener.GET(frontpage_info_url, cache=True) except BaseFrameworkException, w3: fmt = 'Failed to GET Frontpage Server _vti_inf.html file: "%s"'\ '. Exception: "%s".' om.out.debug(fmt % (frontpage_info_url, w3)) else: # Check if it's a Frontpage Info file if not is_404(response): fr = FuzzableRequest(response.get_uri()) self.output_queue.put(fr) self._analyze_response(response)
def _classic_worker(self, gh, search_term): """ Perform the searches and store the results in the kb. """ google_list = self._google_se.get_n_results(search_term, 9) for result in google_list: # I found a vuln in the site! response = self._uri_opener.GET(result.URL, cache=True) if not is_404(response): desc = 'ghdb plugin found a vulnerability at URL: "%s".' \ ' According to GHDB the vulnerability description'\ ' is "%s".' desc = desc % (response.get_url(), gh.desc) v = Vuln('Google hack database match', desc, severity.MEDIUM, response.id, self.get_name()) v.set_url(response.get_url()) v.set_method('GET') kb.kb.append(self, 'vuln', v) om.out.vulnerability(v.get_desc(), severity=severity.LOW) # Create the fuzzable requests for fr in self._create_fuzzable_requests(response): self.output_queue.put(fr)
def _is_possible_backdoor(self, response): """ Heuristic to infer if the content of <response> has the pattern of a backdoor response. :param response: HTTPResponse object :return: A bool value """ if not is_404(response): body_text = response.get_body() dom = response.get_dom() if dom is not None: for ele, attrs in BACKDOOR_COLLECTION.iteritems(): for attrname, attr_vals in attrs.iteritems(): # Set of lowered attribute values dom_attr_vals = \ set(n.get(attrname).lower() for n in (dom.xpath('//%s[@%s]' % (ele, attrname)))) # If at least one elem in intersection return True if (dom_attr_vals and set(attr_vals)): return True # If no regex matched then try with keywords. At least 2 should be # contained in 'body_text' to succeed. times = 0 for back_kw in KNOWN_OFFENSIVE_WORDS: if re.search(back_kw, body_text, re.I): times += 1 if times == 2: return True return False
def crawl(self, fuzzable_request): """ Finds the version of a WordPress installation. :param fuzzable_request: A fuzzable_request instance that contains (among other things) the URL to test. """ if not self._exec: # This will remove the plugin from the crawl plugins to be run. raise RunOnce() # # Check if the server is running wp # domain_path = fuzzable_request.get_url().get_domain_path() # Main scan URL passed from w3af + unique wp file wp_unique_url = domain_path.url_join('wp-login.php') response = self._uri_opener.GET(wp_unique_url, cache=True) # If wp_unique_url is not 404, wordpress = true if not is_404(response): # It was possible to analyze wp-login.php, don't run again self._exec = False # Analyze the identified wordpress installation self._fingerprint_wordpress(domain_path, wp_unique_url, response) # Extract the links for fr in self._create_fuzzable_requests(response): self.output_queue.put(fr)
def discover(self, fuzzable_request): """ Checks if JBoss Interesting Directories exist in the target server. Also verifies some vulnerabilities. """ base_url = fuzzable_request.get_url().base_url() args_iter = izip(repeat(base_url), self.JBOSS_VULNS) otm_send_request = one_to_many(self.send_request) response_pool = self.worker_pool.imap_unordered(otm_send_request, args_iter) for vuln_db_instance, response in response_pool: if is_404(response): continue vuln_url = base_url.url_join(vuln_db_instance['url']) name = vuln_db_instance['name'] desc = vuln_db_instance['desc'] if vuln_db_instance['type'] == 'info': o = Info(name, desc, response.id, self.get_name()) else: o = Vuln(name, desc, severity.LOW, response.id, self.get_name()) o.set_url(vuln_url) kb.kb.append(self, 'find_jboss', o) for fr in self._create_fuzzable_requests(response): self.output_queue.put(fr)
def grep(self, request, response): """ Plugin entry point, search for meta tags. :param request: The HTTP request object. :param response: The HTTP response object :return: None """ if not response.is_text_or_html() or is_404(response): return try: dp = parser_cache.dpc.get_document_parser_for(response) except BaseFrameworkException: return meta_tag_list = dp.get_meta_tags() for tag in meta_tag_list: for attr_name, attr_value in tag.items(): for word in self.INTERESTING_WORDS: # Check if we have something interesting and WHERE that # thing actually is where = content = None if word in attr_name: where = ATTR_NAME content = attr_name elif word in attr_value: where = ATTR_VALUE content = attr_value # Go to the next one if nothing is found if where is None: continue # Now... if we found something, report it =) desc = ( 'The URI: "%s" sent a <meta> tag with the attribute' ' %s set to "%s" which looks interesting.') desc %= (response.get_uri(), where, content) tag_name = self._find_tag_name(tag) usage = self.INTERESTING_WORDS.get(tag_name, None) if usage is not None: desc += ' The tag is used for %s.' % usage i = Info('Interesting META tag', desc, response.id, self.get_name()) i.set_uri(response.get_uri()) i.add_to_highlight(where, content) i[CONTENT] = content i[WHERE] = where self.kb_append_uniq_group(self, 'meta_tags', i, group_klass=MetaTagsInfoSet)
def grep(self, request, response): """ Plugin entry point, search for meta tags. :param request: The HTTP request object. :param response: The HTTP response object :return: None """ if not response.is_text_or_html() or is_404(response): return try: dp = parser_cache.dpc.get_document_parser_for(response) except BaseFrameworkException: return meta_tag_list = dp.get_meta_tags() for tag in meta_tag_list: for attr_name, attr_value in tag.items(): if not attr_name or not attr_value: # https://github.com/andresriancho/w3af/issues/2012 continue for word in self.INTERESTING_WORDS: # Check if we have something interesting and WHERE that # thing actually is if word in attr_name: where = ATTR_NAME content = attr_name elif word in attr_value: where = ATTR_VALUE content = attr_value else: # Go to the next one if nothing is found continue # Now... if we found something, report it =) desc = ('The URI: "%s" sent a <meta> tag with the attribute' ' %s set to "%s" which looks interesting.') desc %= (response.get_uri(), where, content) tag_name = self._find_tag_name(tag) usage = self.INTERESTING_WORDS.get(tag_name, None) if usage is not None: desc += ' The tag is used for %s.' % usage i = Info('Interesting META tag', desc, response.id, self.get_name()) i.set_uri(response.get_uri()) i.add_to_highlight(where, content) i[CONTENT] = content i[WHERE] = where self.kb_append_uniq_group(self, 'meta_tags', i, group_klass=MetaTagsInfoSet)
def crawl(self, fuzzable_request): """ Get the execute.xml file and parse it. :param fuzzable_request: A fuzzable_request instance that contains (among other things) the URL to test. """ base_url = fuzzable_request.get_url().base_url() for file_name in ('execute.xml', 'DeveloperMenu.xml'): genexus_url = base_url.url_join(file_name) http_response = self._uri_opener.GET(genexus_url, cache=True) if '</ObjLink>' in http_response and not is_404(http_response): # Save it to the kb! desc = 'The "%s" file was found at: "%s", this file might'\ ' expose private URLs and requires a manual review. The'\ ' scanner will add all URLs listed in this file to the'\ ' crawl queue.' desc = desc % (file_name, genexus_url) title_info = 'GeneXus "%s" file' % file_name i = Info(title_info, desc, http_response.id, self.get_name()) i.set_url(genexus_url) kb.kb.append(self, file_name, i) om.out.information(i.get_desc()) # Extract the links om.out.debug('Analyzing "%s" file.' % file_name) for fr in self._create_fuzzable_requests(http_response): self.output_queue.put(fr) om.out.debug('Parsing xml file with xml.dot.minidom.') try: dom = xml.dom.minidom.parseString(http_response.get_body()) except: raise BaseFrameworkException('Error while parsing "%s"' % file_name) else: raw_url_list = dom.getElementsByTagName("ObjLink") parsed_url_list = [] for url in raw_url_list: try: url = url.childNodes[0].data url = base_url.url_join(url) except ValueError, ve: msg = '"%s" file had an invalid URL "%s"' om.out.debug(msg % (file_name, ve)) except: msg = '"%s" file had an invalid format' om.out.debug(msg % file_name) else: parsed_url_list.append(url)
def _setup_404_detection(self): # # NOTE: I need to perform this test here in order to avoid some weird # thread locking that happens when the webspider calls is_404, and # because I want to initialize the is_404 database in a controlled # try/except block. # from w3af.core.controllers.core_helpers.fingerprint_404 import is_404 for url in cf.cf.get('targets'): try: response = self._w3af_core.uri_opener.GET(url, cache=True) is_404(response) except ScanMustStopByUserRequest: raise except Exception, e: msg = 'Failed to initialize the 404 detection, original' \ ' exception was: "%s".' raise ScanMustStopException(msg % e)
def grep(self, request, response): """ Plugin entry point, search for the code disclosures. Unit tests are available at plugins/grep/tests. :param request: The HTTP request object. :param response: The HTTP response object :return: None """ if not response.is_text_or_html(): return # This is a performance improvement to prevent the plugin from # applying contains_source_code to a 404 response that will be # discarded even if it matches if is_404(response) and not self._report_404_match: return match, lang = contains_source_code(response) if not match: return # Only report 404 findings once if is_404(response) and self._report_404_match: self._report_404_match = False desc = (u'The URL: "%s" has a %s code disclosure' u' vulnerability in the customized 404 script.') name = u'Code disclosure vulnerability in 404 page' else: desc = u'The URL: "%s" has a %s code disclosure vulnerability.' name = u'Code disclosure vulnerability' # Report the vulnerability desc %= (response.get_url(), ' or '.join(list(lang))) v = Vuln(name, desc, severity.LOW, response.id, self.get_name()) v.set_url(response.get_url()) v.add_to_highlight(match.group()) self.kb_append_uniq(self, 'code_disclosure', v, 'URL')
def crawl(self, fuzzable_request): """ Get the execute.xml file and parse it. :param fuzzable_request: A fuzzable_request instance that contains (among other things) the URL to test. """ base_url = fuzzable_request.get_url().base_url() for file_name in ('execute.xml', 'DeveloperMenu.xml'): genexus_url = base_url.url_join(file_name) http_response = self._uri_opener.GET(genexus_url, cache=True) if '</ObjLink>' in http_response and not is_404(http_response): # Save it to the kb! desc = 'The "%s" file was found at: "%s", this file might'\ ' expose private URLs and requires a manual review. The'\ ' scanner will add all URLs listed in this file to the'\ ' crawl queue.' desc = desc % (file_name, genexus_url) title_info = 'GeneXus "%s" file' % file_name i = Info(title_info, desc, http_response.id, self.get_name()) i.set_url(genexus_url) kb.kb.append(self, file_name, i) om.out.information(i.get_desc()) # Extract the links om.out.debug('Analyzing "%s" file.' % file_name) for fr in self._create_fuzzable_requests(http_response): self.output_queue.put(fr) om.out.debug('Parsing xml file with xml.dot.minidom.') try: dom = xml.dom.minidom.parseString(http_response.get_body()) except: raise BaseFrameworkException('Error while parsing "%s"' % file_name) else: raw_url_list = dom.getElementsByTagName("ObjLink") parsed_url_list = [] for url in raw_url_list: try: url = url.childNodes[0].data url = base_url.url_join(url) except ValueError, ve: msg = '"%s" file had an invalid URL "%s"' om.out.debug(msg % (file_name,ve)) except: msg = '"%s" file had an invalid format' om.out.debug(msg % file_name) else: parsed_url_list.append(url)
def _verify_reference(self, reference, original_request, original_url, possibly_broken): """ This method GET's every new link and parses it in order to get new links and forms. """ # # Remember that this "breaks" the cache=True in most cases! # headers = { 'Referer': original_url } # # But this does not, and it is friendlier that simply ignoring the # referer # referer = original_url.base_url().url_string headers = Headers([('Referer', referer)]) try: resp = self._uri_opener.GET(reference, cache=True, headers=headers) except ScanMustStopOnUrlError: pass else: fuzz_req_list = [] if is_404(resp): # Note: I WANT to follow links that are in the 404 page, but # if the page I fetched is a 404 then it should be ignored. # # add_self will be True when the response code is 401 or 403 # which is something needed for other plugins to keep poking # at that URL # # add_self will be False in all the other cases, for example # in the case where the response code is a 404, because we don't # want to return a 404 to the core. add_self = resp.get_code() in self.NOT_404 fuzz_req_list = self._create_fuzzable_requests(resp, request=original_request, add_self=add_self) if not possibly_broken and not add_self: t = (resp.get_url(), original_request.get_uri()) self._broken_links.add(t) else: om.out.debug('Adding relative reference "%s" ' 'to the result.' % reference) frlist = self._create_fuzzable_requests(resp, request=original_request) fuzz_req_list.extend(frlist) # Process the list. for fuzz_req in fuzz_req_list: fuzz_req.set_referer(referer) self.output_queue.put(fuzz_req)
def grep(self, request, response): """ Plugin entry point, search for meta tags. :param request: The HTTP request object. :param response: The HTTP response object :return: None """ if not response.is_text_or_html() or is_404(response): return try: dp = parser_cache.dpc.get_document_parser_for(response) except BaseFrameworkException: return meta_tag_list = dp.get_meta_tags() for tag in meta_tag_list: for attr_name, attr_value in tag.items(): for word in self.INTERESTING_WORDS: # Check if we have something interesting # and WHERE that thing actually is where = content = None if word in attr_name: where = self.ATTR_NAME content = attr_name elif word in attr_value: where = self.ATTR_VALUE content = attr_value # Now... if we found something, report it =) if self._should_report(attr_name, attr_value, where): # The attribute is interesting! fmt = 'The URI: "%s" sent a <meta> tag with attribute'\ ' %s set to "%s" which looks interesting.' desc = fmt % (response.get_uri(), where, content) tag_name = self._find_name(tag) if self.INTERESTING_WORDS.get(tag_name, None): usage = self.INTERESTING_WORDS[tag_name] desc += ' The tag is used for %s.' % usage i = Info('Interesting META tag', desc, response.id, self.get_name()) i.set_uri(response.get_uri()) i.add_to_highlight(where, content) self.kb_append_uniq(self, 'meta_tags', i, 'URL')
def grep(self, request, response): """ Plugin entry point, search for meta tags. :param request: The HTTP request object. :param response: The HTTP response object :return: None """ if not response.is_text_or_html() or is_404(response): return try: dp = parser_cache.dpc.get_document_parser_for(response) except BaseFrameworkException: return meta_tag_list = dp.get_meta_tags() for tag in meta_tag_list: tag_name = self._find_name(tag) for key, val in tag.items(): for word in self.INTERESTING_WORDS: # Check if we have something interesting # and WHERE that thing actually is where = content = None if word in key: where = "name" content = key elif word in val: where = "value" content = val # Now... if we found something, report it =) if where is not None: # The atribute is interesting! fmt = ( 'The URI: "%s" sent a <meta> tag with attribute' ' %s set to "%s" which looks interesting.' ) desc = fmt % (response.get_uri(), where, content) if self.INTERESTING_WORDS.get(tag_name, None): usage = self.INTERESTING_WORDS[tag_name] desc += " The tag is used for %s." % usage i = Info("Interesting META tag", desc, response.id, self.get_name()) i.set_uri(response.get_uri()) i.add_to_highlight(where, content) self.kb_append_uniq(self, "meta_tags", i, "URL")
def grep(self, request, response): """ Get the page indicated by the fuzzable_request and determine the language using the preposition list. :param request: The HTTP request object. :param response: The HTTP response object """ with self._plugin_lock: if not self._exec: return if not response.is_text_or_html(): return if is_404(response): return body = response.get_clear_text_body().lower() try: guessed_lang = guess_language.guessLanguage(body) except IndexError: # I don't care about exception handling of the external lib guessed_lang = 'UNKNOWN' if guessed_lang == 'UNKNOWN': # None means "I'm still trying" kb.kb.raw_write(self, 'lang', None) # Keep running until self._tries_left is zero self._tries_left -= 1 if self._tries_left == 0: msg = ('Could not determine the site language using the' ' first 25 HTTP responses, not enough text to make' ' a good analysis.') om.out.debug(msg) # unknown means I'll stop testing because I don't # have any idea about the target's language kb.kb.raw_write(self, 'lang', 'unknown') self._exec = False else: # Only run until we find the page language self._exec = False msg = 'The page is written in: "%s".' om.out.information(msg % guessed_lang) kb.kb.raw_write(self, 'lang', guessed_lang)
def crawl(self, fuzzable_request): """ Get the robots.txt file and parse it. :param fuzzable_request: A fuzzable_request instance that contains (among other things) the URL to test. """ dirs = [] base_url = fuzzable_request.get_url().base_url() robots_url = base_url.url_join('robots.txt') http_response = self._uri_opener.GET(robots_url, cache=True) if is_404(http_response): return # Save it to the kb! desc = ('A robots.txt file was found at: "%s", this file might' ' expose private URLs and requires a manual review. The' ' scanner will add all URLs listed in this files to the' ' analysis queue.') desc %= robots_url i = Info('robots.txt file', desc, http_response.id, self.get_name()) i.set_url(robots_url) kb.kb.append(self, 'robots.txt', i) om.out.information(i.get_desc()) # Work with it... dirs.append(robots_url) for line in http_response.get_body().split('\n'): line = line.strip() if len(line) > 0 and line[0] != '#' and \ (line.upper().find('ALLOW') == 0 or line.upper().find('DISALLOW') == 0): url = line[line.find(':') + 1:] url = url.strip() try: url = base_url.url_join(url) except: # Simply ignore the invalid URL pass else: dirs.append(url) self.worker_pool.map(self.http_get_and_parse, dirs)
def _send_and_check(self, url): """ Analyze XML files. """ response = self._uri_opener.GET(url, cache=True) if is_404(response): return file_name = url.get_file_name() om.out.debug('Checking response for %s in ria_enumerator.' % response) self._analyze_gears_manifest(url, response, file_name) self._analyze_crossdomain_clientaccesspolicy(url, response, file_name)
def _extract_urls(self, fuzzable_request, response): """ Extract information from the server-status page and return fuzzable requests to the caller. """ res = self._create_fuzzable_requests(response) # Now really parse the file and create custom made fuzzable requests regex = "<td>.*?<td nowrap>(.*?)</td><td nowrap>.*? (.*?) HTTP/1" for domain, path in re.findall(regex, response.get_body()): if "unavailable" in domain: domain = response.get_url().get_domain() # Check if the requested domain and the found one are equal. if domain == response.get_url().get_domain(): proto = response.get_url().get_protocol() found_url = proto + "://" + domain + path found_url = URL(found_url) # They are equal, request the URL and create the fuzzable # requests tmp_res = self._uri_opener.GET(found_url, cache=True) if not is_404(tmp_res): res.extend(self._create_fuzzable_requests(tmp_res)) else: # This is a shared hosting server self._shared_hosting_hosts.append(domain) # Now that we are outsite the for loop, we can report the possible vulns if len(self._shared_hosting_hosts): desc = "The web application under test seems to be in a shared" " hosting." v = Vuln.from_fr("Shared hosting", desc, severity.MEDIUM, response.id, self.get_name(), fuzzable_request) self._shared_hosting_hosts = list(set(self._shared_hosting_hosts)) v["also_in_hosting"] = self._shared_hosting_hosts kb.kb.append(self, "shared_hosting", v) om.out.vulnerability(v.get_desc(), severity=v.get_severity()) msg = ( "This list of domains, and the domain of the web application" " under test, all point to the same server:" ) om.out.vulnerability(msg, severity=severity.MEDIUM) for url in self._shared_hosting_hosts: om.out.vulnerability("- " + url, severity=severity.MEDIUM) return res
def _compare_dir(self, arg, directory, flist): """ This function is the callback function called from os.path.walk, python's help says: walk(top, func, arg) Directory tree walk with callback function. For each directory in the directory tree rooted at top (including top itself, but excluding '.' and '..'), call func(arg, dirname, fnames). dirname is the name of the directory, and fnames a list of the names of the files and subdirectories in dirname (excluding '.' and '..'). func may modify the fnames list in-place (e.g. via del or slice assignment), and walk will only recurse into the subdirectories whose names remain in fnames; this can be used to implement a filter, or to impose a specific order of visiting. No semantics are defined for, or required of, arg, beyond that arg is always passed to func. It can be used, e.g., to pass a filename pattern, or a mutable object designed to accumulate statistics. Passing None for arg is common. """ if self._first: self._first = False self._start_path = directory relative_dir = directory.replace(self._start_path, '') if relative_dir and not relative_dir.endswith('/'): relative_dir += '/' remote_root = self._remote_url_path remote_root_with_local_path = remote_root.url_join(relative_dir) for fname in flist: if os.path.isfile(directory + os.path.sep + fname): url = remote_root_with_local_path.url_join(fname) response = self._uri_opener.GET(url, cache=True) if not is_404(response): if response.is_text_or_html(): fr = FuzzableRequest(response.get_url()) self.output_queue.put(fr) path = '%s%s%s' % (directory, os.path.sep, fname) self._check_content(response, path) self._exist_remote.append(url) else: self._not_exist_remote.append(url)