def sf_TARGET_WEB_CONTENT(self, sf_module, source, raw_data): url = URL(source) html = HTML(raw_data) url.add_information(html) self.reconstruct_http_data[source] = raw_data if source in self.reconstruct_http_code and source in self.reconstruct_http_headers: return (url, html) + self.__reconstruct_http(source) return url, html
def test_auditdb_dump(): main_config = OrchestratorConfig() main_config.ui_mode = "disabled" audit_config = AuditConfig() audit_config.targets = ["www.example.com"] audit_config.audit_db = "test_auditdb.db" with PluginTester(main_config, audit_config) as t: disk = t.audit.database assert t.audit.name == "test_auditdb" assert type(disk) is AuditSQLiteDB assert disk.filename == "test_auditdb.db" print "Testing the audit database dump..." print " -> Writing..." for x in xrange(30): d1 = URL("http://www.example.com/" + generate_random_string()) d2 = Text(generate_random_string()) d3 = UrlDisclosure(d1) d1.add_information(d2) disk.add_data(d1) disk.add_data(d2) disk.add_data(d3) disk.mark_plugin_finished(d1.identity, "some_plugin") disk.mark_plugin_finished(d2.identity, "some_plugin") disk.mark_plugin_finished(d3.identity, "some_plugin") disk.mark_stage_finished(d1.identity, 1) disk.mark_stage_finished(d2.identity, 2) disk.mark_stage_finished(d3.identity, 3) disk.add_shared_values("fake_set_id", ( "string", u"unicode", 100, 200L, 5.0, True, False, complex(1, 1), None, frozenset({"string", 100, 1.0}), (None, True, False), )) disk.put_mapped_values("fake_map_id", ( ("a_string", "string"), ("a_unicode_string", u"unicode"), ("an_integer", 100), ("a_long", 200L), ("a_float", 5.0), ("a_bool", True), ("another_bool", False), ("a_complex", complex(1, 1)), ("none", None), ("a_frozenset", frozenset({"string", 100, 1.0})), ("a_tuple", (None, True, False)), )) print " -> Dumping..." disk.dump("test_auditdb.sql")
def sf_TARGET_WEB_CONTENT(self, sf_module, source, raw_data): url = URL(source) html = HTML(raw_data) url.add_information(html) self.reconstruct_http_data[source] = raw_data if source in self.reconstruct_http_code and \ source in self.reconstruct_http_headers: return (url, html) + self.__reconstruct_http(source) return url, html
def run(self, info): m_return = [] m_url = info.url cookie_param = None cookie_dict = Config.audit_config.cookie if cookie_dict != None: if hasattr(cookie_dict, "iteritems"): cookie_params = { to_utf8(k): to_utf8(v) for k, v in cookie_dict.iteritems() } cookie_param = ';'.join( '%s=%s' % (k, v) for (k, v) in sorted(cookie_params.iteritems())) __ = start_wvs_spider_dispatch(m_url, cookie_param, Logger) #__ = test_start_wvs_spider_dispatch('www.bbktel.com.cn_d2cc49d948a8589628d260faa6ba41a4') json_content = json.loads(__) for urls in json_content['info']: #print item Logger.log_verbose("Web Spider:found url %s" % urls['fullurl']) m_resource = URL(url=urls['fullurl']) m_return.append(m_resource) for item_url in urls['content']: post_param = item_url['param_data'] if "AcunetixBoundary_" in post_param: #multipart/form-data method = 'FILE_UPLOAD' print method else: method = item_url['method'] if method == "POST": post_param_dict = argument_query(item_url['param_data']) m_resource = URL(url=item_url['url'], method="POST", post_params=post_param_dict, referer=urls['fullurl']) else: m_resource = URL(url=item_url['url'], method=method, referer=urls['fullurl']) Logger.log_verbose("Web Spider:found url %s" % item_url['url']) m_return.append(m_resource) # Send the results return m_return
def generate_results(unique_texts): """ Generates a list of results from a list of URLs as string format. :param unique_texts: list with a list of URL as string. :type unique_texts: list(URL) :return: a list of URL/UrlDiclosure. :type: list(URL|UrlDiclosure) """ # Analyze resutls m_results = [] m_results_append = m_results.append for l_match in unique_texts: # # Set disclosure vulnerability l_url = URL(l_match.url) l_vuln = UrlDisclosure(l_url) # Set impact l_vuln.risk = l_match.risk # Store m_results_append(l_url) m_results_append(l_vuln) return m_results
def parse_xsser_result(self, target, filename): """ Convert the result to GoLismero data model. :param target: Dectected URL. :type target: URL :param filename: Path to scan results file generated by XSSer. :type filename: str :return: Scan results. :rtype: list(XSSInjection) """ result = [] try: tree = ET.parse(filename) scan = tree.getroot() # Get the count of successful injections. # Abort if no injections were successful. node = scan.find('.//abstract/injections/successful') if node is None: return result successcount = int(node.text) if successcount <= 0: return result # Get the results. for node in scan.findall(".//results/attack"): _injection = self.get_subnode_text(node, "injection", None) if _injection is None: continue _browsers = self.get_subnode_text(node, "browsers", "IE") _method = self.get_subnode_text(node, "method", "GET") url = URL(url=target.url, method=_method, post_params=target.post_params if _method == "POST" else None, referer=target.referer) vul = XSS( url, vulnerable_params={"injection": _injection}, injection_point=XSS.INJECTION_POINT_URL, injection_type="XSS", ) vul.description += "\n\nBrowsers: %s\n" % _browsers result.append(vul) except Exception, e: tb = format_exc() Logger.log_error(str(e)) Logger.log_error_more_verbose(tb)
def run(self, info): #if not info.has_url_params and not info.has_post_params: # return m_return = [] #TODO 30X redirect #TODO Content-Type p = get_request(url=info, allow_redirects=False) if (p.status == '301' or p.status == '302') and not p.headers.get('Location'): return m_return if p.content_type is not None and re.search( '(application\/json)|(application\/javascript)|(text\/json)|(text\/javascript)|' '(application\/x-javascript)|(application\/octet-stream)|(text\/xml)|(application\/xml)', p.content_type) is not None: return m_return m_url = info if info.has_url_params: for k, v in m_url.url_params.iteritems(): key = to_utf8(k) value = to_utf8(v) if self.xss_detect(m_url, method='GET', k=key, v=value): url = URL(url=m_url.url, method='GET', post_params=None, referer=m_url.referer) vul = XSS(url, vulnerable_params={"injection": "xxxxxx"}, injection_point=XSS.INJECTION_POINT_URL, injection_type="XSS") vul.description += "f**k" m_return.append(vul) break #return m_return if info.has_post_params: print 'POST' # Send the results return m_return
def __reconstruct_http(self, raw_url): url = URL(raw_url) req = HTTP_Request( method = "GET", url = raw_url, ) req.add_resource(url) resp = HTTP_Response( request = req, status = self.reconstruct_http_code[raw_url], headers = eval(self.reconstruct_http_headers[raw_url]), data = self.reconstruct_http_data[raw_url], ) self.reconstructed_http[raw_url] = resp.identity del self.reconstruct_http_code[raw_url] del self.reconstruct_http_headers[raw_url] del self.reconstruct_http_data[raw_url] return url, req, resp
def parse_sqlmap_results(info, output_dir): """ Convert the output of a SQLMap scan to the GoLismero data model. :param info: Data object to link all results to (optional). :type info: URL :param output_filename: Path to the output filename. The format should always be XML. :type output_filename: :returns: Results from the SQLMap scan. :rtype: list(Data) """ # Example output file format: # # --- # Place: GET # Parameter: feria # Type: boolean-based blind # Title: AND boolean-based blind - WHERE or HAVING clause # Payload: feria=VG13' AND 8631=8631 AND 'VWDy'='VWDy&idioma=es&tipouso=I # --- # web application technology: Tomcat 5.0, JSP, Servlet 2.5 # back-end DBMS: Oracle # banner: 'Oracle Database 11g Release 11.2.0.3.0 - 64bit Production' results = [] # Get result file log_file = join(output_dir, info.parsed_url.host, "log") # Parse try: with open(log_file, "rU") as f: text = f.read() # Split injections m_banner = None m_backend = None m_technology = None tmp = [] for t in text.split("---"): # # Is ijection details? # l_injectable_place = re.search("(Place: )([a-zA-Z]+)", t) if l_injectable_place: # Common params l_inject_place = l_injectable_place.group(2) l_inject_param = re.search("(Parameter: )([\w\_\-]+)", t).group(2) l_inject_type = re.search("(Type: )([\w\- ]+)", t).group(2) l_inject_title = re.search("(Title: )([\w\- ]+)", t).group(2) l_inject_payload = re.search(r"""(Payload: )([\w\- =\'\"\%\&\$\)\(\?\¿\*\@\!\|\/\\\{\}\[\]\<\>\_\:,;\.]+)""", t).group(2) url = URL(info.url, method=l_inject_place, post_params=info.post_params, referer=info.referer) v = SQLInjection(url, title = "SQL Injection Vulnerability - " + l_inject_title, vulnerable_params = { l_inject_param : l_inject_payload }, injection_point = SQLInjection.str2injection_point(l_inject_place), injection_type = l_inject_type, ) tmp.append(v) # Get banner info if not m_banner: m_banner = re.search("(banner:[\s]*)(')([\w\- =\'\"\%\&\$\)\(\?\¿\*\@\!\|\/\\\{\}\[\]\<\>\_\.\:,;]*)(')", t) if m_banner: m_banner = m_banner.group(3) m_backend = re.search("(back-end DBMS:[\s]*)([\w\- =\'\"\%\&\$\)\(\?\¿\*\@\!\|\/\\\{\}\[\]\<\>\_\.\:,;]+)", t).group(2) m_technology = re.search("(web application technology:[\s]*)([\w\- =\'\"\%\&\$\)\(\?\¿\*\@\!\|\/\\\{\}\[\]\<\>\_\.\:,;]+)", t).group(2) # If banner was found, fill the vulns with these info for v in tmp: if m_banner: v.description = "Banner: %s\n\n%s\n%s" % (m_backend, m_backend, m_technology) results.append(v) # On error, log the exception. except Exception, e: Logger.log_error_verbose(str(e)) Logger.log_error_more_verbose(format_exc())
def run(self, info): # If it's an URL... if info.is_instance(BaseURL): target = URL(info.url) # Get the hostname to test. hostname = info.hostname # If it's HTTPS, use the port number from the URL. if info.is_https: port = info.parsed_url.port # Otherwise, assume the port is 443. else: port = 443 # Test this port. is_vulnerable = self.test(hostname, port) # If it's a service fingerprint... elif info.is_instance(Relationship(IP, ServiceFingerprint)): ip, fp = info.instances target = ip port = fp.port starttls = False # Ignore if the port does not respond directly to SSL... if fp.protocol != "SSL": # If it's SMTP, we need to issue a STARTTLS command first. if fp.name == "smtp": starttls = True # Ignore if the port does not support SSL. else: Logger.log_more_verbose( "No SSL services found in fingerprint [%s] for IP %s," " aborting." % (fp, ip)) return # Test this port. is_vulnerable = self.test(ip.address, port, starttls=starttls) # Internal error! else: assert False, "Unexpected data type received: %s" % type(info) # If it's vulnerable, report the vulnerability. if is_vulnerable: title = "OpenSSL Heartbleed Vulnerability" description = "An unpatched OpenSSL service was found that's" \ " vulnerable to the Heartbleed vulnerability" \ " (CVE-2014-0162). This vulnerability allows an" \ " attacker to dump the memory contents of the" \ " service running the flawed version of the" \ " OpenSSL library, potentially compromising" \ " usernames, passwords, private keys and other" \ " sensitive data." references = ["http://heartbleed.com/"] cve = ["CVE-2014-0162",], if target.is_instance(IP): vuln = VulnerableService( target = target, port = port, protocol = "TCP", title = title, description = description, references = references, cve = cve, ) elif target.is_instance(URL): vuln = VulnerableWebApp( target = target, title = title, description = description, references = references, cve = cve, ) else: assert False, "Internal error!" return vuln
l_p = HTTP.get_url(l_url, callback=self.check_response) # FIXME handle exceptions! except: if l_p: discard_data(l_p) continue if l_p: if m_analyzer.analyze(l_p.data, url=l_url): match[l_url] = l_p else: discard_data(l_p) # Generate results for i in m_analyzer.unique_texts: l_url = i.url l_p = match.pop(l_url) m_result = URL(l_url, referer=m_url) m_result.add_information(l_p) m_return.append(m_result) m_return.append(l_p) # Check for posible suspicious URL if l_url in m_discovered_suspicious: v = SuspiciousURL(m_result, title = "Suspicious URL found un robots.txt", risk = 1, severity = 0, level = "low", description = "An URLs was found in Disallow tag of robots.txt. It can contain confidential content or some information leak.", tool_id = "robots") v.add_resource(info) m_return.append(v)
def helper_data_links(): # Create some dummy data. from golismero.api.data.resource.url import URL from golismero.api.data.information.text import Text from golismero.api.data.vulnerability.information_disclosure.url_disclosure import UrlDisclosure d1 = URL("http://www.example.com/") d2 = Text("some text") d3 = UrlDisclosure(d1) d1.add_information(d2) # Test data_type, data_subtype, etc. print "Testing Data type checks..." assert d1.data_type == Data.TYPE_RESOURCE assert d1.data_subtype == URL.data_subtype assert d1.resource_type == d1.data_subtype assert d2.data_type == Data.TYPE_INFORMATION assert d2.data_subtype == Text.data_subtype assert d2.information_type == d2.data_subtype assert d3.data_type == Data.TYPE_VULNERABILITY assert d3.data_subtype == UrlDisclosure.data_subtype assert d3.vulnerability_type == d3.data_subtype # Test validate_link_minimums(). print "Testing Data.validate_link_minimums()..." d1.validate_link_minimums() d2.validate_link_minimums() d3.validate_link_minimums() # Test the links property. print "Testing Data.links..." assert d1.links == {d2.identity, d3.identity} assert d2.links == {d1.identity} assert d3.links == {d1.identity} # Test the get_links method. print "Testing Data.get_links()..." assert d1.get_links(d1.data_type) == set() assert d1.get_links(d1.data_type, d1.resource_type) == set() assert d1.get_links(d2.data_type) == {d2.identity} assert d1.get_links(d2.data_type, d2.information_type) == {d2.identity} assert d1.get_links(d3.data_type) == {d3.identity} assert d1.get_links(d3.data_type, d3.vulnerability_type) == {d3.identity} assert d2.get_links(d2.data_type) == set() assert d2.get_links(d2.data_type, d2.information_type) == set() assert d2.get_links(d1.data_type) == {d1.identity} assert d2.get_links(d1.data_type, d1.resource_type) == {d1.identity} assert d2.get_links(d3.data_type) == set() assert d2.get_links(d3.data_type, d3.vulnerability_type) == set() assert d3.get_links(d3.data_type) == set() assert d3.get_links(d3.data_type, d3.vulnerability_type) == set() assert d3.get_links(d1.data_type) == {d1.identity} assert d3.get_links(d1.data_type, d1.resource_type) == {d1.identity} assert d3.get_links(d2.data_type) == set() assert d3.get_links(d2.data_type, d2.information_type) == set() # Test the linked_data property. # There should be no accesses to the database since all data is local. print "Testing Data.linked_data..." assert {x.identity for x in d1.linked_data} == {d2.identity, d3.identity} assert {x.identity for x in d2.linked_data} == {d1.identity} assert {x.identity for x in d3.linked_data} == {d1.identity} # Test the get_linked_data() method. # There should be no accesses to the database since all data is local. print "Testing Data.get_linked_data()..." assert {x.identity for x in d1.find_linked_data(d1.data_type)} == set() assert { x.identity for x in d1.find_linked_data(d1.data_type, d1.resource_type) } == set() assert {x.identity for x in d1.find_linked_data(d2.data_type)} == {d2.identity} assert { x.identity for x in d1.find_linked_data(d2.data_type, d2.information_type) } == {d2.identity} assert {x.identity for x in d1.find_linked_data(d3.data_type)} == {d3.identity} assert { x.identity for x in d1.find_linked_data(d3.data_type, d3.vulnerability_type) } == {d3.identity} assert {x.identity for x in d2.find_linked_data(d2.data_type)} == set() assert { x.identity for x in d2.find_linked_data(d2.data_type, d2.information_type) } == set() assert {x.identity for x in d2.find_linked_data(d1.data_type)} == {d1.identity} assert { x.identity for x in d2.find_linked_data(d1.data_type, d1.resource_type) } == {d1.identity} assert {x.identity for x in d2.find_linked_data(d3.data_type)} == set() assert { x.identity for x in d2.find_linked_data(d3.data_type, d3.vulnerability_type) } == set() assert {x.identity for x in d3.find_linked_data(d3.data_type)} == set() assert { x.identity for x in d3.find_linked_data(d3.data_type, d3.vulnerability_type) } == set() assert {x.identity for x in d3.find_linked_data(d1.data_type)} == {d1.identity} assert { x.identity for x in d3.find_linked_data(d1.data_type, d1.resource_type) } == {d1.identity} assert {x.identity for x in d3.find_linked_data(d2.data_type)} == set() assert { x.identity for x in d3.find_linked_data(d2.data_type, d2.information_type) } == set() # Test the associated_* properties. # There should be no accesses to the database since all data is local. print "Testing Data.associated_*..." assert {x.identity for x in d1.associated_resources} == set() assert {x.identity for x in d1.associated_informations} == {d2.identity} assert {x.identity for x in d1.associated_vulnerabilities} == {d3.identity} assert {x.identity for x in d2.associated_informations} == set() assert {x.identity for x in d2.associated_resources} == {d1.identity} assert {x.identity for x in d2.associated_vulnerabilities} == set() assert {x.identity for x in d3.associated_vulnerabilities} == set() assert {x.identity for x in d3.associated_resources} == {d1.identity} assert {x.identity for x in d3.associated_informations} == set() # Test the get_associated_*_by_category() methods. # There should be no accesses to the database since all data is local. print "Testing Data.get_associated_*_by_category()..." assert { x.identity for x in d1.get_associated_resources_by_category(d1.resource_type) } == set() assert { x.identity for x in d1.get_associated_informations_by_category( d2.information_type) } == {d2.identity} assert { x.identity for x in d1.get_associated_vulnerabilities_by_category( d3.vulnerability_type) } == {d3.identity} assert { x.identity for x in d2.get_associated_informations_by_category( d2.information_type) } == set() assert { x.identity for x in d2.get_associated_resources_by_category(d1.resource_type) } == {d1.identity} assert { x.identity for x in d2.get_associated_vulnerabilities_by_category( d3.vulnerability_type) } == set() assert { x.identity for x in d3.get_associated_vulnerabilities_by_category( d3.vulnerability_type) } == set() assert { x.identity for x in d3.get_associated_resources_by_category(d1.resource_type) } == {d1.identity} assert { x.identity for x in d3.get_associated_informations_by_category( d2.information_type) } == set() # Test TempDataStorage.on_finish(). print "Testing LocalDataCache.on_finish() on ideal conditions..." result = LocalDataCache.on_finish([d2, d3], d1) assert set(result) == set([d1, d2, d3]) d1.validate_link_minimums() d2.validate_link_minimums() d3.validate_link_minimums() assert d1.links == {d2.identity, d3.identity} assert d2.links == {d1.identity} assert d3.links == {d1.identity} assert d1.get_links(d1.data_type) == set() assert d1.get_links(d1.data_type, d1.resource_type) == set() assert d1.get_links(d2.data_type) == {d2.identity} assert d1.get_links(d2.data_type, d2.information_type) == {d2.identity} assert d1.get_links(d3.data_type) == {d3.identity} assert d1.get_links(d3.data_type, d3.vulnerability_type) == {d3.identity} assert d2.get_links(d2.data_type) == set() assert d2.get_links(d2.data_type, d2.information_type) == set() assert d2.get_links(d1.data_type) == {d1.identity} assert d2.get_links(d1.data_type, d1.resource_type) == {d1.identity} assert d2.get_links(d3.data_type) == set() assert d2.get_links(d3.data_type, d3.vulnerability_type) == set() assert d3.get_links(d3.data_type) == set() assert d3.get_links(d3.data_type, d3.vulnerability_type) == set() assert d3.get_links(d1.data_type) == {d1.identity} assert d3.get_links(d1.data_type, d1.resource_type) == {d1.identity} assert d3.get_links(d2.data_type) == set() assert d3.get_links(d2.data_type, d2.information_type) == set()
def sf_DEFACED_COHOST(self, sf_module, source, raw_data): if self.allow_external: url = URL(source) vulnerability = DefacedUrl(url, tool_id = sf_module) return url, vulnerability
def sf_URL_FLASH(self, sf_module, source, raw_data): return URL(raw_data, referer=source)
def sf_LINKED_URL_EXTERNAL(self, sf_module, source, raw_data): if self.allow_external: return URL(raw_data, referer=source)
def sf_URL_STATIC(self, sf_module, source, raw_data): return URL(raw_data)
def sf_URL_PASSWORD(self, sf_module, source, raw_data): url = URL(source) password = Password(raw_data) url.add_information(password) return url, password
def run(self, info): # If it's an URL... if info.is_instance(BaseURL): target = URL(info.url) discard_data(target) # Get the hostname to test. hostname = info.hostname # If it's HTTPS, use the port number from the URL. if info.is_https: port = info.parsed_url.port # Otherwise, assume the port is 443. else: port = 443 # Test this port. is_vulnerable = self.test(hostname, port) # If it's a service fingerprint... elif info.is_instance(Relationship(IP, ServiceFingerprint)): ip, fp = info.instances target = ip port = fp.port starttls = False # Ignore if the port does not respond directly to SSL... if fp.protocol != "SSL": # If it's SMTP, we need to issue a STARTTLS command first. if fp.name == "smtp": starttls = True # Ignore if the port does not support SSL. else: Logger.log_more_verbose( "No SSL services found in fingerprint [%s] for IP %s," " aborting." % (fp, ip)) return # Test this port. is_vulnerable = self.test(ip.address, port, starttls=starttls) # Internal error! else: assert False, "Unexpected data type received: %s" % type(info) # If it's vulnerable, report the vulnerability. if is_vulnerable: title = "OpenSSL Heartbleed Vulnerability" description = "An unpatched OpenSSL service was found that's" \ " vulnerable to the Heartbleed vulnerability" \ " (CVE-2014-0162). This vulnerability allows an" \ " attacker to dump the memory contents of the" \ " service running the flawed version of the" \ " OpenSSL library, potentially compromising" \ " usernames, passwords, private keys and other" \ " sensitive data." references = ["http://heartbleed.com/"] cve = [ "CVE-2014-0162", ], if target.is_instance(IP): vuln = VulnerableService( target=target, port=port, protocol="TCP", title=title, description=description, references=references, cve=cve, ) elif target.is_instance(URL): vuln = VulnerableWebApp( target=target, title=title, description=description, references=references, cve=cve, ) else: assert False, "Internal error!" return vuln
def parse_nikto_results(info, output_filename): """ Convert the output of a Nikto scan to the GoLismero data model. :param info: Data object to link all results to (optional). :type info: BaseURL :param output_filename: Path to the output filename. The format should always be CSV. :type output_filename: :returns: Results from the Nikto scan, and the vulnerability count. :rtype: list(Data), int """ # Parse the scan results. # On error log the exception and continue. results = [] vuln_count = 0 hosts_seen = set() urls_seen = {} try: with open(output_filename, "rU") as f: csv_reader = reader(f) for row in csv_reader: try: # Each row (except for the first) has always # the same 7 columns, but some may be empty. if len(row) < 7: continue host, ip, port, vuln_tag, method, path, text = row[:7] # Report domain names and IP addresses. if ((info is None or host != info.hostname) and host not in hosts_seen): hosts_seen.add(host) if host in Config.audit_scope: results.append(Domain(host)) if ip not in hosts_seen: hosts_seen.add(ip) if ip in Config.audit_scope: results.append(IP(ip)) # Skip rows not informing of vulnerabilities. if not vuln_tag: continue # Calculate the vulnerable URL. if info is not None: target = urljoin(info.url, path) else: if port == 443: target = urljoin("https://%s/" % host, path) else: target = urljoin("http://%s/" % host, path) # Skip if out of scope. if target not in Config.audit_scope: continue # Report the URLs. if (target, method) not in urls_seen: url = URL(target, method) urls_seen[(target, method)] = url results.append(url) else: url = urls_seen[(target, method)] # Get the reference URLs. refs = extract_from_text(text) refs.difference_update(urls_seen.itervalues()) # Convert the information to the GoLismero model. if vuln_tag == "OSVDB-0": kwargs = {"level": "informational"} else: kwargs = extract_vuln_ids("%s: %s" % (vuln_tag, text)) kwargs["custom_id"] = ";".join( (host, ip, port, vuln_tag, method, path, text)) kwargs["description"] = text if text else None kwargs["references"] = refs if "osvdb" in kwargs and "OSVDB-0" in kwargs["osvdb"]: tmp = list(kwargs["osvdb"]) tmp.remove("OSVDB-0") if tmp: kwargs["osvdb"] = tuple(tmp) else: del kwargs["osvdb"] # Instance the Vulnerability object. if vuln_tag == "OSVDB-0": vuln = UncategorizedVulnerability(url, **kwargs) else: vuln = VulnerableWebApp(url, **kwargs) # Add the vulnerability to the results. results.append(vuln) vuln_count += 1 # On error, log the exception and continue. except Exception, e: Logger.log_error_verbose(str(e)) Logger.log_error_more_verbose(format_exc()) # On error, log the exception. except Exception, e: Logger.log_error_verbose(str(e)) Logger.log_error_more_verbose(format_exc())
) # FIXME handle exceptions! except: if l_p: discard_data(l_p) continue if l_p: if m_analyzer.analyze(l_p.data, url=l_url): match[l_url] = l_p else: discard_data(l_p) # Generate results for i in m_analyzer.unique_texts: l_url = i.url l_p = match.pop(l_url) m_result = URL(l_url, referer=m_url) m_result.add_information(l_p) m_return.append(m_result) m_return.append(l_p) # Check for posible suspicious URL if l_url in m_discovered_suspicious: v = SuspiciousURL( m_result, title="Suspicious URL found un robots.txt", risk=1, severity=0, level="low", description= "An URLs was found in Disallow tag of robots.txt. It can contain confidential content or some information leak.", tool_id="robots")
def helper_auditdb_stress(n, dbname=":auto:"): main_config = OrchestratorConfig() main_config.ui_mode = "disabled" audit_config = AuditConfig() audit_config.targets = ["www.example.com"] audit_config.audit_db = dbname with PluginTester(main_config, audit_config) as t: disk = t.audit.database assert type(disk) is AuditSQLiteDB print " Testing %d elements..." % (n * 3) t1 = time.time() print " -> Writing..." for x in xrange(n): d1 = URL("http://www.example.com/" + generate_random_string()) d2 = Text(generate_random_string()) d3 = UrlDisclosure(d1) d1.add_information(d2) disk.add_data(d1) disk.add_data(d2) disk.add_data(d3) t2 = time.time() print " -- Reading..." keys = disk.get_data_keys() assert len(keys) == (n * 3) for key in keys: assert disk.has_data_key(key) data = disk.get_data(key) assert data is not None keys = disk.get_data_keys(Data.TYPE_INFORMATION) assert len(keys) == n for key in keys: assert disk.has_data_key(key) data = disk.get_data(key) assert data is not None assert data.data_type == Data.TYPE_INFORMATION assert isinstance(data, Text) keys = disk.get_data_keys(Data.TYPE_RESOURCE) assert len(keys) == n for key in keys: assert disk.has_data_key(key) data = disk.get_data(key) assert data is not None assert data.data_type == Data.TYPE_RESOURCE assert isinstance(data, URL) keys = disk.get_data_keys(Data.TYPE_VULNERABILITY) assert len(keys) == n for key in keys: assert disk.has_data_key(key) data = disk.get_data(key) assert data is not None assert data.data_type == Data.TYPE_VULNERABILITY assert isinstance(data, UrlDisclosure) t3 = time.time() print " <- Deleting..." for key in keys: disk.remove_data(key) t4 = time.time() print " Write time: %d seconds (%f seconds per element)" % ( t2 - t1, (t2 - t1) / (n * 3.0)) print " Read time: %d seconds (%f seconds per element)" % ( t3 - t2, (t3 - t2) / (n * 3.0)) print " Delete time: %d seconds (%f seconds per element)" % ( t4 - t3, (t4 - t3) / (n * 3.0)) print " Total time: %d seconds (%f seconds per element)" % ( t4 - t1, (t4 - t1) / (n * 3.0))
def helper_test_auditdb_data_consistency(db, key, data): assert isinstance(db, BaseAuditDB) # Test the database start and end times. db.set_audit_times(None, None) assert db.get_audit_times() == (None, None) db.set_audit_start_time(1) assert db.get_audit_times() == (1, None) db.set_audit_stop_time(2) assert db.get_audit_times() == (1, 2) db.set_audit_start_time(None) assert db.get_audit_times() == (None, 2) db.set_audit_stop_time(None) assert db.get_audit_times() == (None, None) # Create some fake data and add it to the database. d1 = URL("http://www.example.com/" + key) d2 = Text(data) d3 = UrlDisclosure(d1) d1.add_information(d2) assert d1.links == {d2.identity, d3.identity} assert d2.links == {d1.identity} assert d3.links == {d1.identity} db.add_data(d1) db.add_data(d2) db.add_data(d3) # Test has_data_key(). assert db.has_data_key(d1.identity) assert db.has_data_key(d2.identity) assert db.has_data_key(d3.identity) # Test get_data(). d1p = db.get_data(d1.identity) d2p = db.get_data(d2.identity) d3p = db.get_data(d3.identity) assert d1p is not None assert d2p is not None assert d3p is not None assert d1p.identity == d1.identity assert d2p.identity == d2.identity assert d3p.identity == d3.identity assert d1p.links == d1.links, (d1p.links, d1.links) assert d2p.links == d2.links assert d3p.links == d3.links # Test get_data_types(). assert db.get_data_types((d1.identity, d2.identity, d3.identity)) == { (d1.data_type, d1.resource_type), (d2.data_type, d2.information_type), (d3.data_type, d3.vulnerability_type) }, (db.get_data_types((d1.identity, d2.identity, d3.identity)), {(d1.data_type, d1.resource_type), (d2.data_type, d2.information_type), (d3.data_type, d3.vulnerability_type)}) # Test get_data_count(). assert db.get_data_count() == 3 assert db.get_data_count(d1.data_type) == 1 assert db.get_data_count(d2.data_type) == 1 assert db.get_data_count(d3.data_type) == 1 assert db.get_data_count(data_subtype=d1.resource_type) == 1 assert db.get_data_count(data_subtype=d2.information_type) == 1 assert db.get_data_count(data_subtype=d3.vulnerability_type) == 1 # Test get_many_data(). assert { x.identity for x in db.get_many_data((d1.identity, d2.identity, d3.identity)) } == {d1.identity, d2.identity, d3.identity} # Test stage and plugin completion logic. # XXX TODO # Test remove_data(). db.remove_data(d1.identity) db.remove_data(d2.identity) db.remove_data(d3.identity) assert not db.has_data_key(d1.identity) assert not db.has_data_key(d2.identity) assert not db.has_data_key(d3.identity) assert db.get_data_count() == 0 assert db.get_data_count(d1.data_type) == 0 assert db.get_data_count(d2.data_type) == 0 assert db.get_data_count(d3.data_type) == 0 assert db.get_data_count(d1.data_type, d1.resource_type) == 0 assert db.get_data_count(d2.data_type, d2.information_type) == 0 assert db.get_data_count(d3.data_type, d3.vulnerability_type) == 0 assert db.get_data_count(data_subtype=d1.resource_type) == 0 assert db.get_data_count(data_subtype=d2.information_type) == 0 assert db.get_data_count(data_subtype=d3.vulnerability_type) == 0 assert db.get_data_types((d1.identity, d2.identity, d3.identity)) == set() assert db.get_data(d1.identity) is None assert db.get_data(d2.identity) is None assert db.get_data(d3.identity) is None
def sf_URL_UPLOAD(self, sf_module, source, raw_data): return URL(raw_data, referer=source, method="POST")
def helper_test_auditdb_data_consistency(db, key, data): assert isinstance(db, BaseAuditDB) # Test the database start and end times. db.set_audit_times(None, None) assert db.get_audit_times() == (None, None) db.set_audit_start_time(1) assert db.get_audit_times() == (1, None) db.set_audit_stop_time(2) assert db.get_audit_times() == (1, 2) db.set_audit_start_time(None) assert db.get_audit_times() == (None, 2) db.set_audit_stop_time(None) assert db.get_audit_times() == (None, None) # Create some fake data and add it to the database. d1 = URL("http://www.example.com/" + key) d2 = Text(data) d3 = UrlDisclosure(d1) d1.add_information(d2) assert d1.links == {d2.identity, d3.identity} assert d2.links == {d1.identity} assert d3.links == {d1.identity} db.add_data(d1) db.add_data(d2) db.add_data(d3) # Test has_data_key(). assert db.has_data_key(d1.identity) assert db.has_data_key(d2.identity) assert db.has_data_key(d3.identity) # Test get_data(). d1p = db.get_data(d1.identity) d2p = db.get_data(d2.identity) d3p = db.get_data(d3.identity) assert d1p is not None assert d2p is not None assert d3p is not None assert d1p.identity == d1.identity assert d2p.identity == d2.identity assert d3p.identity == d3.identity assert d1p.links == d1.links, (d1p.links, d1.links) assert d2p.links == d2.links assert d3p.links == d3.links # Test get_data_types(). assert db.get_data_types((d1.identity, d2.identity, d3.identity)) == {(d1.data_type, d1.resource_type), (d2.data_type, d2.information_type), (d3.data_type, d3.vulnerability_type)}, (db.get_data_types((d1.identity, d2.identity, d3.identity)), {(d1.data_type, d1.resource_type), (d2.data_type, d2.information_type), (d3.data_type, d3.vulnerability_type)}) # Test get_data_count(). assert db.get_data_count() == 3 assert db.get_data_count(d1.data_type) == 1 assert db.get_data_count(d2.data_type) == 1 assert db.get_data_count(d3.data_type) == 1 assert db.get_data_count(data_subtype = d1.resource_type) == 1 assert db.get_data_count(data_subtype = d2.information_type) == 1 assert db.get_data_count(data_subtype = d3.vulnerability_type) == 1 # Test get_many_data(). assert {x.identity for x in db.get_many_data((d1.identity, d2.identity, d3.identity))} == {d1.identity, d2.identity, d3.identity} # Test stage and plugin completion logic. # XXX TODO # Test remove_data(). db.remove_data(d1.identity) db.remove_data(d2.identity) db.remove_data(d3.identity) assert not db.has_data_key(d1.identity) assert not db.has_data_key(d2.identity) assert not db.has_data_key(d3.identity) assert db.get_data_count() == 0 assert db.get_data_count(d1.data_type) == 0 assert db.get_data_count(d2.data_type) == 0 assert db.get_data_count(d3.data_type) == 0 assert db.get_data_count(d1.data_type, d1.resource_type) == 0 assert db.get_data_count(d2.data_type, d2.information_type) == 0 assert db.get_data_count(d3.data_type, d3.vulnerability_type) == 0 assert db.get_data_count(data_subtype = d1.resource_type) == 0 assert db.get_data_count(data_subtype = d2.information_type) == 0 assert db.get_data_count(data_subtype = d3.vulnerability_type) == 0 assert db.get_data_types((d1.identity, d2.identity, d3.identity)) == set() assert db.get_data(d1.identity) is None assert db.get_data(d2.identity) is None assert db.get_data(d3.identity) is None
def sf_URL_JAVA_APPLET(self, sf_module, source, raw_data): return URL(raw_data, referer=source)
def helper_auditdb_stress(n, dbname = ":auto:"): main_config = OrchestratorConfig() main_config.ui_mode = "disabled" audit_config = AuditConfig() audit_config.targets = ["www.example.com"] audit_config.audit_db = dbname with PluginTester(main_config, audit_config) as t: disk = t.audit.database assert type(disk) is AuditSQLiteDB print " Testing %d elements..." % (n * 3) t1 = time.time() print " -> Writing..." for x in xrange(n): d1 = URL("http://www.example.com/" + generate_random_string()) d2 = Text(generate_random_string()) d3 = UrlDisclosure(d1) d1.add_information(d2) disk.add_data(d1) disk.add_data(d2) disk.add_data(d3) t2 = time.time() print " -- Reading..." keys = disk.get_data_keys() assert len(keys) == (n * 3) for key in keys: assert disk.has_data_key(key) data = disk.get_data(key) assert data is not None keys = disk.get_data_keys(Data.TYPE_INFORMATION) assert len(keys) == n for key in keys: assert disk.has_data_key(key) data = disk.get_data(key) assert data is not None assert data.data_type == Data.TYPE_INFORMATION assert isinstance(data, Text) keys = disk.get_data_keys(Data.TYPE_RESOURCE) assert len(keys) == n for key in keys: assert disk.has_data_key(key) data = disk.get_data(key) assert data is not None assert data.data_type == Data.TYPE_RESOURCE assert isinstance(data, URL) keys = disk.get_data_keys(Data.TYPE_VULNERABILITY) assert len(keys) == n for key in keys: assert disk.has_data_key(key) data = disk.get_data(key) assert data is not None assert data.data_type == Data.TYPE_VULNERABILITY assert isinstance(data, UrlDisclosure) t3 = time.time() print " <- Deleting..." for key in keys: disk.remove_data(key) t4 = time.time() print " Write time: %d seconds (%f seconds per element)" % (t2 - t1, (t2 - t1) / (n * 3.0)) print " Read time: %d seconds (%f seconds per element)" % (t3 - t2, (t3 - t2) / (n * 3.0)) print " Delete time: %d seconds (%f seconds per element)" % (t4 - t3, (t4 - t3) / (n * 3.0)) print " Total time: %d seconds (%f seconds per element)" % (t4 - t1, (t4 - t1) / (n * 3.0))
def sf_LINKED_URL_INTERNAL(self, sf_module, source, raw_data): return URL(raw_data, referer=source)
def payload_muntants(url_info, payload={}, bmethod='GET', exclude_cgi_suffix=[ 'css', 'js', 'jpeg', 'jpg', 'png', 'gif', 'svg', 'txt' ], use_cache=None, timeout=10.0, bcheck_use_orig_body=True, req_header={}, resp_code='200', resp_header={}, **kwargs): ''' :param url_info: :param payload: {'k':'id', 'pos': 1, 'payload':str, 'type': 0} (pos:0 key, pos:1 value) (type:0 append, type:1 replace) :param exclude_cgi_suffix: :param depth: :param bcheck_use_orig_body: :param req_header: :param resp_code: :param resp_header: :param kwargs: :return: ''' if not isinstance(url_info, URL): raise TypeError("Expected url object, type:%s" % type(url_info)) if not isinstance(payload, dict): raise TypeError("Excepted payload object, type:%s" % type(payload)) if url_info.parsed_url.extension[1:] in exclude_cgi_suffix: Logger.log_verbose("Skipping URL: %s" % url_info.url) m_url_info = copy(url_info) if bmethod == "GET": param_dict = copy(m_url_info.url_params) elif bmethod == "POST": param_dict = copy(m_url_info.post_params) if len(param_dict) == None and len(param_dict) == 0: return None __ = parse_url(m_url_info.url) k = payload['k'] if payload['pos'] == 1: #value if payload['type'] == 0: #append param_dict[k] = param_dict[k] + payload['payload'] elif payload['type'] == 1: #replace param_dict[k] = payload['payload'] else: #key 先不考虑key值 if payload['type'] == 0: param_dict.update(k=param_dict.pop(k)) # TODO GET/POST param key need deal raise ValueError("GET/POST param key payload is not support!") retry_cnt = 0 while retry_cnt < 3: if bmethod == "GET": m_resource_url_payload = URL(url=__.request_cgi, method=m_url_info.method, referer=m_url_info.referer, url_params=param_dict) elif bmethod == "POST": m_resource_url_payload = URL(url=__.request_cgi, method=m_url_info.method, referer=m_url_info.referer, post_params=param_dict) try: p = get_request(url=m_resource_url_payload, allow_redirects=False, use_cache=use_cache, timeout=timeout) return p except NetworkException, e: retry_cnt += 1 time.sleep(0.5) Logger.log_error_verbose("Error while processing %r: %s" % (m_resource_url_payload.url, str(e)))
def sf_PROVIDER_JAVASCRIPT(self, sf_module, source, raw_data): return URL(raw_data, referer=source)
def run(self, info): # Query PunkSPIDER. host_id = info.hostname host_id = parse_url(host_id).hostname host_id = ".".join(reversed(host_id.split("."))) d = self.query_punkspider(host_id) # Stop if we have no results. if not d: Logger.log("No results found for host: %s" % info.hostname) return # This is where we'll collect the data we'll return. results = [] # For each vulnerability... for v in d["data"]: try: # Future-proof checks. if v["protocol"] not in ("http", "https"): Logger.log_more_verbose( "Skipped non-web vulnerability: %s" % to_utf8(v["id"])) continue if v["bugType"] not in ("xss", "sqli", "bsqli"): Logger.log_more_verbose( "Skipped unknown vulnerability type: %s" % to_utf8(v["bugType"])) continue # Get the vulnerable URL, parameter and payload. url = to_utf8(v["vulnerabilityUrl"]) param = to_utf8(v["parameter"]) parsed = parse_url(url) payload = parsed.query_params[param] # Get the level. level = to_utf8(v["level"]) # Create the URL object. url_o = URL(url) results.append(url_o) # Get the vulnerability class. if v["bugType"] == "xss": clazz = XSS else: clazz = SQLInjection # Create the Vulnerability object. vuln = clazz( url_o, vulnerable_params={param: payload}, injection_point=clazz.INJECTION_POINT_URL, injection_type=to_utf8(v["bugType"]), # FIXME level=level, tool_id=to_utf8(v["id"]), ) print '------------' print vuln print type(vuln) print '------------' results.append(vuln) # Log errors. except Exception, e: tb = traceback.format_exc() Logger.log_error_verbose(str(e)) Logger.log_error_more_verbose(tb)
def sf_DEFACED(self, sf_module, source, raw_data): url = URL(source) vulnerability = DefacedUrl(url, tool_id = sf_module) return url, vulnerability
class SuspiciousURLPlugin(TestingPlugin): """ Find suspicious words in URLs. """ #-------------------------------------------------------------------------- def get_accepted_types(self): return [URL, HTML, Text] #-------------------------------------------------------------------------- def run(self, info): if info.is_instance(URL): return self.analyze_url(info) return self.analyze_html(info) #-------------------------------------------------------------------------- def analyze_url(self, info): m_parsed_url = info.parsed_url m_results = [] Logger.log_more_verbose("Processing URL: %s" % m_parsed_url) #---------------------------------------------------------------------- # Find suspicious URLs by matching against known substrings. # Load wordlists m_wordlist_middle = WordListLoader.get_wordlist_as_raw( Config.plugin_config['middle']) m_wordlist_extensions = WordListLoader.get_wordlist_as_raw( Config.plugin_config['extensions']) # Add matching keywords at any positions of URL. m_results.extend([ SuspiciousURLPath(info, x) for x in m_wordlist_middle if x in m_parsed_url.directory.split("/") or x == m_parsed_url.filebase or x == m_parsed_url.extension ]) # Add matching keywords at any positions of URL. m_results.extend([ SuspiciousURLPath(info, x) for x in m_wordlist_extensions if m_parsed_url.extension == x ]) #---------------------------------------------------------------------- # Find suspicious URLs by calculating the Shannon entropy of the hostname. # Idea from: https://github.com/stricaud/urlweirdos/blob/master/src/urlw/plugins/shannon/__init__.py # TODO: test with unicode enabled hostnames! # Check the Shannon entropy for the hostname. hostname = info.parsed_url.hostname entropy = calculate_shannon_entropy(hostname) if entropy > 4.0: m_results.append(SuspiciousURLPath(info, hostname)) # Check the Shannon entropy for the subdomains. for subdomain in info.parsed_url.hostname.split('.'): if len(subdomain) > 3: entropy = calculate_shannon_entropy(subdomain) if entropy > 4.0: m_results.append(SuspiciousURLPath(info, subdomain)) return m_results #-------------------------------------------------------------------------- def analyze_html(self, info): #---------------------------------------------------------------------- # Get malware suspicious links. Logger.log_more_verbose("Processing HTML: %s" % info.identity) # Load the malware wordlist. wordlist_filename = Config.plugin_config["malware_sites"] try: wordlist = WordListLoader.get_wordlist_as_list(wordlist_filename) except WordlistNotFound: Logger.log_error("Wordlist '%s' not found.." % wordlist_filename) return except TypeError: Logger.log_error("Wordlist '%s' is not a file." % wordlist_filename) return if not wordlist: Logger.log_error("Wordlist '%s' is empty." % wordlist_filename) Logger.log("1") # Get links base_urls = set() for url in info.find_linked_data(Data.TYPE_RESOURCE, Resource.RESOURCE_URL): m_url = url.url base_urls.add(m_url) if info.information_type == Information.INFORMATION_HTML: m_links = extract_from_html(info.raw_data, m_url) m_links.update(extract_from_text(info.raw_data, m_url)) elif info.information_type == Information.INFORMATION_PLAIN_TEXT: m_links = extract_from_text(info.raw_data, m_url) else: raise Exception("Internal error!") m_links.difference_update(base_urls) Logger.log("2") # If we have no links, abort now if not m_links: Logger.log_verbose("No output links found.") return # Do not follow URLs that contain certain keywords m_forbidden = WordListLoader.get_wordlist_as_raw( Config.plugin_config["wordlist_no_spider"]) m_urls_allowed = { url for url in m_links if url and not any(x in url for x in m_forbidden) } Logger.log("3") # Get only output links m_output_links = [] for url in m_urls_allowed: try: if url not in Config.audit_scope: m_output_links.append(url) except Exception, e: Logger.log_error_more_verbose(format_exc()) Logger.log("4") wordlist_filtered = { x for x in wordlist if x and not x.startswith("#") and not x.startswith("[") } Logger.log("5") m_results = [] l_malware_sites_found = m_urls_allowed.intersection(wordlist_filtered) Logger.log_more_verbose( "Found links to possible malware sites:\n%s" % "\n".join(" - " + x for x in sorted(l_malware_sites_found))) for l_malware_site in l_malware_sites_found: # Out url u = URL(url=l_malware_site, referer=info.url) v = MaliciousUrl(u) v.add_resource(info) m_results.append(v) m_results.append(u) Logger.log("6") if m_results: Logger.log_verbose("Discovered %s links to malware sites." % len(m_results)) else: Logger.log_verbose("No output links to malware sites found.") return m_results
def sf_MALICIOUS_COHOST(self, sf_module, source, raw_data): if self.allow_external: url = URL(source) vulnerability = MaliciousUrl(url, tool_id = sf_module) return url, vulnerability
class Robots(TestingPlugin): """ This plugin analyzes robots.txt files looking for private pages. """ #-------------------------------------------------------------------------- def get_accepted_types(self): return [BaseURL] #-------------------------------------------------------------------------- def check_download(self, url, name, content_length, content_type): # Returns True to continue or False to cancel. # # Some error page couldn't return non 200 code. # Some error page couldn't return content_type # return (content_type and content_type.strip().lower().split(";")[0] == "text/plain") #-------------------------------------------------------------------------- def check_response(self, request, url, status_code, content_length, content_type): # Returns True to continue or False to cancel. # # Some error page couldn't return non 200 code. # Some error page couldn't return content_type # return (content_type and content_type.strip().lower().startswith("text/")) #-------------------------------------------------------------------------- def run(self, info): m_return = [] m_url = info.url m_hostname = info.hostname m_url_robots_txt = urljoin(m_url, 'robots.txt') p = None try: msg = "Looking for robots.txt in: %s" % m_hostname Logger.log_more_verbose(msg) p = download(m_url_robots_txt, self.check_download) except NetworkOutOfScope: Logger.log_more_verbose("URL out of scope: %s" % (m_url_robots_txt)) return except Exception, e: Logger.log_more_verbose("Error while processing %r: %s" % (m_url_robots_txt, str(e))) return # Check for errors if not p: Logger.log_more_verbose("No robots.txt found.") return u = URL(m_url_robots_txt, referer=m_url) p.add_resource(u) m_return.append(u) m_return.append(p) # Text with info m_robots_text = p.raw_data # Prepare for unicode try: if m_robots_text.startswith(codecs.BOM_UTF8): m_robots_text = m_robots_text.decode('utf-8').lstrip( unicode(codecs.BOM_UTF8, 'utf-8')) elif m_robots_text.startswith(codecs.BOM_UTF16): m_robots_text = m_robots_text.decode('utf-16') except UnicodeDecodeError: Logger.log_error_verbose( "Error while parsing robots.txt: Unicode format error.") return # Extract URLs m_discovered_suspicious = [] m_discovered_urls = [] m_discovered_urls_append = m_discovered_urls.append tmp_discovered = None m_lines = m_robots_text.splitlines() # Var used to update the status m_lines_count = len(m_lines) m_total = float(m_lines_count) for m_step, m_line in enumerate(m_lines): # Remove comments m_octothorpe = m_line.find('#') if m_octothorpe >= 0: m_line = m_line[:m_octothorpe] # Delete init spaces m_line = m_line.rstrip() # Ignore invalid lines if not m_line or ':' not in m_line: continue # Looking for URLs try: m_key, m_value = m_line.split(':', 1) m_key = m_key.strip().lower() m_value = m_value.strip() # Ignore wildcards if '*' in m_value: continue if m_key in ('disallow', 'allow', 'sitemap') and m_value: tmp_discovered = urljoin(m_url, m_value) m_discovered_urls_append(tmp_discovered) # If is a disallow URL, it must be suspicious if m_key.lower() == "disallow": m_discovered_suspicious.append(tmp_discovered) except Exception, e: continue
class Spider(TestingPlugin): """ This plugin is a web spider. """ #-------------------------------------------------------------------------- def get_accepted_types(self): return [URL] #-------------------------------------------------------------------------- def run(self, info): m_return = [] m_url = info.url Logger.log_verbose("Spidering URL: %s" % m_url) # Check if need follow first redirect, then follow the link. p = None try: allow_redirects = Config.audit_config.follow_redirects or \ (info.depth == 0 and Config.audit_config.follow_first_redirect) p = download(m_url, self.check_download, allow_redirects=allow_redirects) except NetworkException, e: Logger.log_error_verbose("Error while processing %r: %s" % (m_url, str(e))) if not p: return m_return # Send back the data m_return.append(p) # TODO: If it's a 301 response, get the Location header # Get links m_forms = None if p.information_type == HTML.data_subtype: m_links = extract_from_html(p.raw_data, m_url) m_forms = extract_forms_from_html(p.raw_data, m_url) #m_links.update( extract_from_text(p.raw_data, m_url) ) elif p.information_type == Text.data_subtype: m_links = extract_from_text(p.raw_data, m_url) else: return m_return try: m_links.remove(m_url) except Exception: pass # Do not follow URLs that contain certain keywords m_forbidden = [ x for x in WordListLoader.get_wordlist_as_list( Config.plugin_config["wordlist_no_spider"]) ] m_urls_allowed = [ url for url in m_links if not any(x in url for x in m_forbidden) ] m_urls_not_allowed = m_links.difference(m_urls_allowed) if m_urls_not_allowed: Logger.log_more_verbose("Skipped forbidden URLs:\n %s" % "\n ".join(sorted(m_urls_not_allowed))) # Do not follow URLs out of scope m_urls_in_scope = [] m_broken = [] for url in m_urls_allowed: try: if url in Config.audit_scope: m_urls_in_scope.append(url) except Exception: m_broken.append(url) if m_broken: if len(m_broken) == 1: Logger.log_more_verbose("Skipped uncrawlable URL: %s" % m_broken[0]) else: Logger.log_more_verbose("Skipped uncrawlable URLs:\n %s" % "\n ".join(sorted(m_broken))) m_out_of_scope_count = len(m_urls_allowed) - len( m_urls_in_scope) - len(m_broken) if m_out_of_scope_count: Logger.log_more_verbose("Skipped %d links out of scope." % m_out_of_scope_count) if m_urls_in_scope: Logger.log_verbose("Found %d links in URL: %s" % (len(m_urls_allowed), m_url)) else: Logger.log_more_verbose("No links found in URL: %s" % m_url) # Convert to URL data type for u in m_urls_in_scope: try: p = parse_url(u) if p.scheme == "mailto": m_resource = Email(p.netloc) elif p.scheme in ("http", "https"): m_resource = URL(url=u, referer=m_url) except Exception: warn(format_exc(), RuntimeWarning) print m_resource m_resource.add_resource(info) m_return.append(m_resource) # Get forms info if m_forms: m_forms_allowed = [ url for url in m_forms if not any(x in url[0] for x in m_forbidden) ] m_forms_not_allowed = {x[0] for x in m_forms }.difference(x[0] for x in m_forms_allowed) else: m_forms_allowed = [] m_forms_not_allowed = set() if m_forms_not_allowed: Logger.log_more_verbose("Skipped forbidden forms:\n %s" % "\n ".join(sorted(m_forms_not_allowed))) # Do not follow forms out of scope m_forms_in_scope = [] m_broken = [] for url in m_forms_allowed: try: if url[0] in Config.audit_scope: m_forms_in_scope.append(url) except Exception: m_broken.append(url[0]) if m_broken: if len(m_broken) == 1: Logger.log_more_verbose("Skipped uncrawlable forms: %s" % m_broken[0]) else: Logger.log_more_verbose("Skipped uncrawlable forms:\n %s" % "\n ".join(sorted(m_broken))) m_out_of_scope_count = len(m_forms_allowed) - len( m_forms_in_scope) - len(m_broken) if m_out_of_scope_count: Logger.log_more_verbose("Skipped %d forms out of scope." % m_out_of_scope_count) if m_forms_in_scope: Logger.log_verbose("Found %d forms in URL: %s" % (len(m_forms_in_scope), m_url)) else: Logger.log_more_verbose("No forms found in URL: %s" % m_url) # Convert to URL data type for u in m_forms_in_scope: try: url = u[0] method = u[1] params = {x["name"]: x["value"] for x in u[2]} m_resource = URL(url=url, referer=m_url, method=method, post_params=params) except Exception: warn(format_exc(), RuntimeWarning) m_resource.add_resource(info) m_return.append(m_resource) # Send the results return m_return
def helper_data_links(): # Create some dummy data. from golismero.api.data.resource.url import URL from golismero.api.data.information.text import Text from golismero.api.data.vulnerability.information_disclosure.url_disclosure import UrlDisclosure d1 = URL("http://www.example.com/") d2 = Text("some text") d3 = UrlDisclosure(d1) d1.add_information(d2) # Test data_type, data_subtype, etc. print "Testing Data type checks..." assert d1.data_type == Data.TYPE_RESOURCE assert d1.data_subtype == URL.data_subtype assert d1.resource_type == d1.data_subtype assert d2.data_type == Data.TYPE_INFORMATION assert d2.data_subtype == Text.data_subtype assert d2.information_type == d2.data_subtype assert d3.data_type == Data.TYPE_VULNERABILITY assert d3.data_subtype == UrlDisclosure.data_subtype assert d3.vulnerability_type == d3.data_subtype # Test validate_link_minimums(). print "Testing Data.validate_link_minimums()..." d1.validate_link_minimums() d2.validate_link_minimums() d3.validate_link_minimums() # Test the links property. print "Testing Data.links..." assert d1.links == {d2.identity, d3.identity} assert d2.links == {d1.identity} assert d3.links == {d1.identity} # Test the get_links method. print "Testing Data.get_links()..." assert d1.get_links(d1.data_type) == set() assert d1.get_links(d1.data_type, d1.resource_type) == set() assert d1.get_links(d2.data_type) == {d2.identity} assert d1.get_links(d2.data_type, d2.information_type) == {d2.identity} assert d1.get_links(d3.data_type) == {d3.identity} assert d1.get_links(d3.data_type, d3.vulnerability_type) == {d3.identity} assert d2.get_links(d2.data_type) == set() assert d2.get_links(d2.data_type, d2.information_type) == set() assert d2.get_links(d1.data_type) == {d1.identity} assert d2.get_links(d1.data_type, d1.resource_type) == {d1.identity} assert d2.get_links(d3.data_type) == set() assert d2.get_links(d3.data_type, d3.vulnerability_type) == set() assert d3.get_links(d3.data_type) == set() assert d3.get_links(d3.data_type, d3.vulnerability_type) == set() assert d3.get_links(d1.data_type) == {d1.identity} assert d3.get_links(d1.data_type, d1.resource_type) == {d1.identity} assert d3.get_links(d2.data_type) == set() assert d3.get_links(d2.data_type, d2.information_type) == set() # Test the linked_data property. # There should be no accesses to the database since all data is local. print "Testing Data.linked_data..." assert {x.identity for x in d1.linked_data} == {d2.identity, d3.identity} assert {x.identity for x in d2.linked_data} == {d1.identity} assert {x.identity for x in d3.linked_data} == {d1.identity} # Test the get_linked_data() method. # There should be no accesses to the database since all data is local. print "Testing Data.get_linked_data()..." assert {x.identity for x in d1.find_linked_data(d1.data_type)} == set() assert {x.identity for x in d1.find_linked_data(d1.data_type, d1.resource_type)} == set() assert {x.identity for x in d1.find_linked_data(d2.data_type)} == {d2.identity} assert {x.identity for x in d1.find_linked_data(d2.data_type, d2.information_type)} == {d2.identity} assert {x.identity for x in d1.find_linked_data(d3.data_type)} == {d3.identity} assert {x.identity for x in d1.find_linked_data(d3.data_type, d3.vulnerability_type)} == {d3.identity} assert {x.identity for x in d2.find_linked_data(d2.data_type)} == set() assert {x.identity for x in d2.find_linked_data(d2.data_type, d2.information_type)} == set() assert {x.identity for x in d2.find_linked_data(d1.data_type)} == {d1.identity} assert {x.identity for x in d2.find_linked_data(d1.data_type, d1.resource_type)} == {d1.identity} assert {x.identity for x in d2.find_linked_data(d3.data_type)} == set() assert {x.identity for x in d2.find_linked_data(d3.data_type, d3.vulnerability_type)} == set() assert {x.identity for x in d3.find_linked_data(d3.data_type)} == set() assert {x.identity for x in d3.find_linked_data(d3.data_type, d3.vulnerability_type)} == set() assert {x.identity for x in d3.find_linked_data(d1.data_type)} == {d1.identity} assert {x.identity for x in d3.find_linked_data(d1.data_type, d1.resource_type)} == {d1.identity} assert {x.identity for x in d3.find_linked_data(d2.data_type)} == set() assert {x.identity for x in d3.find_linked_data(d2.data_type, d2.information_type)} == set() # Test the associated_* properties. # There should be no accesses to the database since all data is local. print "Testing Data.associated_*..." assert {x.identity for x in d1.associated_resources} == set() assert {x.identity for x in d1.associated_informations} == {d2.identity} assert {x.identity for x in d1.associated_vulnerabilities} == {d3.identity} assert {x.identity for x in d2.associated_informations} == set() assert {x.identity for x in d2.associated_resources} == {d1.identity} assert {x.identity for x in d2.associated_vulnerabilities} == set() assert {x.identity for x in d3.associated_vulnerabilities} == set() assert {x.identity for x in d3.associated_resources} == {d1.identity} assert {x.identity for x in d3.associated_informations} == set() # Test the get_associated_*_by_category() methods. # There should be no accesses to the database since all data is local. print "Testing Data.get_associated_*_by_category()..." assert {x.identity for x in d1.get_associated_resources_by_category(d1.resource_type)} == set() assert {x.identity for x in d1.get_associated_informations_by_category(d2.information_type)} == {d2.identity} assert {x.identity for x in d1.get_associated_vulnerabilities_by_category(d3.vulnerability_type)} == {d3.identity} assert {x.identity for x in d2.get_associated_informations_by_category(d2.information_type)} == set() assert {x.identity for x in d2.get_associated_resources_by_category(d1.resource_type)} == {d1.identity} assert {x.identity for x in d2.get_associated_vulnerabilities_by_category(d3.vulnerability_type)} == set() assert {x.identity for x in d3.get_associated_vulnerabilities_by_category(d3.vulnerability_type)} == set() assert {x.identity for x in d3.get_associated_resources_by_category(d1.resource_type)} == {d1.identity} assert {x.identity for x in d3.get_associated_informations_by_category(d2.information_type)} == set() # Test TempDataStorage.on_finish(). print "Testing LocalDataCache.on_finish() on ideal conditions..." result = LocalDataCache.on_finish([d2, d3], d1) assert set(result) == set([d1, d2, d3]) d1.validate_link_minimums() d2.validate_link_minimums() d3.validate_link_minimums() assert d1.links == {d2.identity, d3.identity} assert d2.links == {d1.identity} assert d3.links == {d1.identity} assert d1.get_links(d1.data_type) == set() assert d1.get_links(d1.data_type, d1.resource_type) == set() assert d1.get_links(d2.data_type) == {d2.identity} assert d1.get_links(d2.data_type, d2.information_type) == {d2.identity} assert d1.get_links(d3.data_type) == {d3.identity} assert d1.get_links(d3.data_type, d3.vulnerability_type) == {d3.identity} assert d2.get_links(d2.data_type) == set() assert d2.get_links(d2.data_type, d2.information_type) == set() assert d2.get_links(d1.data_type) == {d1.identity} assert d2.get_links(d1.data_type, d1.resource_type) == {d1.identity} assert d2.get_links(d3.data_type) == set() assert d2.get_links(d3.data_type, d3.vulnerability_type) == set() assert d3.get_links(d3.data_type) == set() assert d3.get_links(d3.data_type, d3.vulnerability_type) == set() assert d3.get_links(d1.data_type) == {d1.identity} assert d3.get_links(d1.data_type, d1.resource_type) == {d1.identity} assert d3.get_links(d2.data_type) == set() assert d3.get_links(d2.data_type, d2.information_type) == set()