def recv_info(self, info): m_parsed_url = info.parsed_url m_results = [] #------------------------------------------------------------------ # Find suspicious URLs by matching against known substrings. # Load wordlists m_wordlist_middle = WordListLoader.get_wordlist(Config.plugin_config['middle']) m_wordlist_extensions = WordListLoader.get_wordlist(Config.plugin_config['extensions']) # Add matching keywords at any positions of URL. m_results.extend([SuspiciousURLPath(info, x) for x in m_wordlist_middle if x in m_parsed_url.directory.split("/") or x == m_parsed_url.filebase or x == m_parsed_url.extension]) # Add matching keywords at any positions of URL. m_results.extend([SuspiciousURLPath(info, x) for x in m_wordlist_extensions if m_parsed_url.extension == x]) #------------------------------------------------------------------ # Find suspicious URLs by calculating the Shannon entropy of the hostname. # Idea from: https://github.com/stricaud/urlweirdos/blob/master/src/urlw/plugins/shannon/__init__.py # TODO: test with unicode enabled hostnames! # Check the Shannon entropy for the hostname. hostname = info.parsed_url.hostname entropy = calculate_shannon_entropy(hostname) if entropy > 4.0: m_results.append( SuspiciousURLPath(info, hostname) ) # Check the Shannon entropy for the subdomains. for subdomain in info.parsed_url.hostname.split('.'): if len(subdomain) > 3: entropy = calculate_shannon_entropy(subdomain) if entropy > 4.0: m_results.append( SuspiciousURLPath(info, subdomain) ) #------------------------------------------------------------------ # # # # Get malware suspicious links # # # #------------------------------------------------------------------ p = None m_url = info.url Logger.log_more_verbose("Looking for output links to malware sites") try: allow_redirects = Config.audit_config.follow_redirects or \ (info.depth == 0 and Config.audit_config.follow_first_redirect) p = download(m_url, self.check_download, allow_redirects=allow_redirects) except NetworkException,e: Logger.log_more_verbose("Error while processing %r: %s" % (m_url, str(e)))
def is_URL_in_windows(self, main_url): """ Detect if platform is Windows or \*NIX. To do this, get the first link, in scope, and does two resquest. If are the same response, then, platform are Windows. Else are \*NIX. :returns: True, if the remote host is a Windows system. False is \*NIX or None if unknown. :rtype: bool """ m_forbidden = ( "logout", "logoff", "exit", "sigout", "signout", ) # Get the main web page m_r = download(main_url, callback=self.check_download) if not m_r or not m_r.raw_data: return None discard_data(m_r) # Get the first link m_links = None try: if m_r.information_type == Information.INFORMATION_HTML: m_links = extract_from_html(m_r.raw_data, main_url) else: m_links = extract_from_text(m_r.raw_data, main_url) except TypeError,e: Logger.log_error_more_verbose("Plugin error: %s" % format_exc()) return None
def recv_info(self, info): m_return = [] m_url = info.url m_depth = info.depth # Check depth if Config.audit_config.depth is not None and m_depth > Config.audit_config.depth: Logger.log_more_verbose("Spider depth level exceeded for URL: %s" % m_url) return m_return Logger.log_verbose("Spidering URL: %r" % m_url) # Check if need follow first redirect p = None try: allow_redirects = Config.audit_config.follow_redirects or \ (m_depth == 0 and Config.audit_config.follow_first_redirect) p = download(m_url, self.check_download, allow_redirects=allow_redirects) except NetworkException, e: Logger.log_more_verbose("Error while processing %r: %s" % (m_url, str(e)))
def is_URL_in_windows(self, main_url): """ Detect if platform is Windows or \*NIX. To do this, get the first link, in scope, and does two resquest. If are the same response, then, platform are Windows. Else are \*NIX. :returns: True, if the remote host is a Windows system. False is \*NIX or None if unknown. :rtype: bool """ m_forbidden = ( "logout", "logoff", "exit", "sigout", "signout", ) # Get the main web page m_r = download(main_url, callback=self.check_download) if not m_r or not m_r.raw_data: return None discard_data(m_r) # Get the first link m_links = None try: if m_r.information_type == Information.INFORMATION_HTML: m_links = extract_from_html(m_r.raw_data, main_url) else: m_links = extract_from_text(m_r.raw_data, main_url) except TypeError, e: Logger.log_error_more_verbose("Plugin error: %s" % format_exc()) return None
def find_htm_file(url): new_file = [] for file_name in ['DeveloperMenu.htm']: url_check = url[1:] if url.startswith("/") else url tmp_u = urljoin(url_check, file_name) p = HTTP.get_url(tmp_u, use_cache=False, method="GET") if p.status == "200": file_save = download(tmp_u) new_file = re.findall(r'href=[\'"]?([^\'" >]+)', file_save.raw_data) return new_file
def recv_info(self, info): m_return = [] m_url = info.url Logger.log_verbose("Spidering URL: %r" % m_url) # Check if need follow first redirect p = None try: allow_redirects = Config.audit_config.follow_redirects or \ (info.depth == 0 and Config.audit_config.follow_first_redirect) p = download(m_url, self.check_download, allow_redirects=allow_redirects) except NetworkException,e: Logger.log_more_verbose("Error while processing %r: %s" % (m_url, str(e)))
def recv_info(self, info): m_return = [] m_url = info.url m_hostname = info.hostname m_url_robots_txt = urljoin(m_url, 'robots.txt') p = None try: msg = "Looking for robots.txt in: %s" % m_hostname Logger.log_more_verbose(msg) p = download(m_url_robots_txt, self.check_download) except NetworkOutOfScope: Logger.log_more_verbose("URL out of scope: %s" % (m_url_robots_txt)) return except Exception, e: Logger.log_more_verbose("Error while processing %r: %s" % (m_url_robots_txt, str(e))) return
def run(self, info): m_return = [] m_url = info.url Logger.log_verbose("Spidering URL: %s" % m_url) # Check if need follow first redirect, then follow the link. p = None try: allow_redirects = Config.audit_config.follow_redirects or \ (info.depth == 0 and Config.audit_config.follow_first_redirect) p = download(m_url, self.check_download, allow_redirects=allow_redirects) except NetworkException, e: Logger.log_error_verbose("Error while processing %r: %s" % (m_url, str(e)))
def find_xml_files(url): new_file = [] for file_name in ['execute.xml', 'DeveloperMenu.xml']: url_check = url[1:] if url.startswith("/") else url tmp_u = urljoin(url_check, file_name) p = HTTP.get_url(tmp_u, use_cache=False, method="GET") if p.status == "200": file_save = download(tmp_u) tree = ET.fromstring(file_save.raw_data) try: for links in tree.findall('Object'): Logger.log(links.find('ObjLink').text) new_file.append(links.find('ObjLink').text) except Exception: ##raise # XXX DEBUG pass return new_file
def run(self, info): m_return = [] m_url = info.url m_hostname = info.hostname m_url_robots_txt = urljoin(m_url, 'robots.txt') p = None try: msg = "Looking for robots.txt in: %s" % m_hostname Logger.log_more_verbose(msg) p = download(m_url_robots_txt, self.check_download) except NetworkOutOfScope: Logger.log_more_verbose("URL out of scope: %s" % (m_url_robots_txt)) return except Exception, e: Logger.log_more_verbose("Error while processing %r: %s" % (m_url_robots_txt, str(e))) return
def is_URL_in_windows(self, main_url): """ Detect if platform is Windows or \*NIX. To do this, get the first link, in scope, and does two resquest. If are the same response, then, platform are Windows. Else are \*NIX. :returns: True, if the remote host is a Windows system. False is \*NIX or None if unknown. :rtype: bool """ m_forbidden = ( "logout", "logoff", "exit", "sigout", "signout", ) # Get the main web page m_r = download(main_url, callback=self.check_download) if not m_r: return None discard_data(m_r) # Get the first link if m_r.information_type == Information.INFORMATION_HTML: m_links = extract_from_html(m_r.raw_data, main_url) else: m_links = extract_from_text(m_r.raw_data, main_url) if not m_links: return None # Get the first link of the page that's in scope of the audit m_first_link = None for u in m_links: if u in Config.audit_scope and not any(x in u for x in m_forbidden): m_first_link = u break if not m_first_link: return None # Now get two request to the links. One to the original URL and other # as upper URL. # Original m_response_orig = HTTP.get_url(m_first_link, callback=self.check_response) # FIXME handle exceptions! discard_data(m_response_orig) # Uppercase m_response_upper = HTTP.get_url(m_first_link.upper(), callback=self.check_response) # FIXME handle exceptions! discard_data(m_response_upper) # Compare them m_orig_data = m_response_orig.raw_response if m_response_orig else "" m_upper_data = m_response_upper.raw_response if m_response_upper else "" m_match_level = get_diff_ratio(m_orig_data, m_upper_data) # If the responses are equal by 90%, two URL are the same => Windows; else => *NIX m_return = None if m_match_level > 0.95: m_return = True else: m_return = False return m_return
def __get_wordpress_version(self, url): """ This function get the current version of wordpress and the last version available for download. :param url: URL fo target. :type url: str. :return: a tuple with (CURRENT_VERSION, LAST_AVAILABLE_VERSION) :type: tuple(str, str) """ url_version = { # Generic "wp-login.php": r"(;ver=)([0-9\.]+)([\-a-z]*)", # For WordPress 3.8 "wp-admin/css/wp-admin-rtl.css": r"(Version[\s]+)([0-9\.]+)", "wp-admin/css/wp-admin.css": r"(Version[\s]+)([0-9\.]+)" } # # Get current version # # URL to find wordpress version url_current_version = urljoin(url, "readme.html") current_version_content_1 = download(url_current_version) if isinstance(current_version_content_1, HTML): current_version_method1 = re.search(r"(<br/>[\s]*[vV]ersion[\s]*)([0-9\.]*)", current_version_content_1.raw_data) if current_version_method1 is None: current_version_method1 = None else: if len(current_version_method1.groups()) != 2: current_version_method1 = None else: current_version_method1 = current_version_method1.group(2) else: current_version_method1 = None # Try to find the version into HTML meta value # Get content of main page current_version_content_2 = download(url) # Try to find the info current_version_method2 = re.search(r"(<meta name=\"generator\" content=\"WordPress[\s]+)([0-9\.]+)", current_version_content_2.raw_data) if current_version_method2 is None: current_version_method2 = None else: if len(current_version_method2.groups()) != 2: current_version_method2 = None else: current_version_method2 = current_version_method2.group(2) # Match versions of the diffentents methods current_version = "unknown" if current_version_method1 is None and current_version_method2 is None: current_version = "unknown" elif current_version_method1 is None and current_version_method2 is not None: current_version = current_version_method2 elif current_version_method1 is not None and current_version_method2 is None: current_version = current_version_method1 elif current_version_method1 is not None and current_version_method2 is not None: if current_version_method1 != current_version_method2: current_version = current_version_method2 else: current_version = current_version_method1 else: current_version = "unknown" # If Current version not found if current_version == "unknown": for url_pre, regex in url_version.iteritems(): # URL to find wordpress version url_current_version = urljoin(url, url_pre) current_version_content = download(url_current_version) discard_data(current_version_content) # Find the version tmp_version = re.search(regex, current_version_content.raw_data) if tmp_version is not None: current_version = tmp_version.group(2) break # Found -> stop search # # Get last version # # URL to get last version of WordPress available url_last_version = "http://wordpress.org/download/" last_version_content = download(url_last_version, allow_out_of_scope=True) if isinstance(last_version_content, HTML): last_version = re.search("(WordPress )([0-9\.]*)", last_version_content.raw_data) if last_version is None: last_version = "unknown" else: if len(last_version.groups()) != 2: last_version = "unknown" else: last_version = last_version.group(2) else: last_version = "unknown" # Discard unused data discard_data(current_version_content_2) discard_data(current_version_content_1) discard_data(last_version_content) return current_version, last_version
def is_URL_in_windows(self, main_url): """ Detect if platform is Windows or \*NIX. To do this, get the first link, in scope, and does two resquest. If are the same response, then, platform are Windows. Else are \*NIX. :returns: True, if the remote host is a Windows system. False is \*NIX or None if unknown. :rtype: bool """ m_forbidden = ( "logout", "logoff", "exit", "sigout", "signout", ) # Get the main web page m_r = download(main_url, callback=self.check_download) if not m_r: return None discard_data(m_r) # Get the first link if m_r.information_type == Information.INFORMATION_HTML: m_links = extract_from_html(m_r.raw_data, main_url) else: m_links = extract_from_text(m_r.raw_data, main_url) if not m_links: return None # Get the first link of the page that's in scope of the audit m_first_link = None for u in m_links: if u in Config.audit_scope and not any(x in u for x in m_forbidden): m_first_link = u break if not m_first_link: return None # Now get two request to the links. One to the original URL and other # as upper URL. # Original m_response_orig = HTTP.get_url( m_first_link, callback=self.check_response) # FIXME handle exceptions! discard_data(m_response_orig) # Uppercase m_response_upper = HTTP.get_url( m_first_link.upper(), callback=self.check_response) # FIXME handle exceptions! discard_data(m_response_upper) # Compare them m_orig_data = m_response_orig.raw_response if m_response_orig else "" m_upper_data = m_response_upper.raw_response if m_response_upper else "" m_match_level = get_diff_ratio(m_orig_data, m_upper_data) # If the responses are equal by 90%, two URL are the same => Windows; else => *NIX m_return = None if m_match_level > 0.95: m_return = True else: m_return = False return m_return
def __get_wordpress_version(self, url): """ This function get the current version of wordpress and the last version available for download. :param url: URL fo target. :type url: str. :return: a tuple with (CURRENT_VERSION, LAST_AVAILABLE_VERSION) :type: tuple(str, str) """ url_version = { # Generic "wp-login.php": r"(;ver=)([0-9\.]+)([\-a-z]*)", # For WordPress 3.8 "wp-admin/css/wp-admin-rtl.css": r"(Version[\s]+)([0-9\.]+)", "wp-admin/css/wp-admin.css": r"(Version[\s]+)([0-9\.]+)" } # # Get current version # # URL to find wordpress version url_current_version = urljoin(url, "readme.html") current_version_content_1 = download(url_current_version) if isinstance(current_version_content_1, HTML): current_version_method1 = re.search( r"(<br/>[\s]*[vV]ersion[\s]*)([0-9\.]*)", current_version_content_1.raw_data) if current_version_method1 is None: current_version_method1 = None else: if len(current_version_method1.groups()) != 2: current_version_method1 = None else: current_version_method1 = current_version_method1.group(2) else: current_version_method1 = None # Try to find the version into HTML meta value # Get content of main page current_version_content_2 = download(url) # Try to find the info current_version_method2 = re.search( r"(<meta name=\"generator\" content=\"WordPress[\s]+)([0-9\.]+)", current_version_content_2.raw_data) if current_version_method2 is None: current_version_method2 = None else: if len(current_version_method2.groups()) != 2: current_version_method2 = None else: current_version_method2 = current_version_method2.group(2) # Match versions of the diffentents methods current_version = "unknown" if current_version_method1 is None and current_version_method2 is None: current_version = "unknown" elif current_version_method1 is None and current_version_method2 is not None: current_version = current_version_method2 elif current_version_method1 is not None and current_version_method2 is None: current_version = current_version_method1 elif current_version_method1 is not None and current_version_method2 is not None: if current_version_method1 != current_version_method2: current_version = current_version_method2 else: current_version = current_version_method1 else: current_version = "unknown" # If Current version not found if current_version == "unknown": for url_pre, regex in url_version.iteritems(): # URL to find wordpress version url_current_version = urljoin(url, url_pre) current_version_content = download(url_current_version) discard_data(current_version_content) # Find the version tmp_version = re.search(regex, current_version_content.raw_data) if tmp_version is not None: current_version = tmp_version.group(2) break # Found -> stop search # # Get last version # # URL to get last version of WordPress available url_last_version = "http://wordpress.org/download/" last_version_content = download(url_last_version, allow_out_of_scope=True) if isinstance(last_version_content, HTML): last_version = re.search("(WordPress )([0-9\.]*)", last_version_content.raw_data) if last_version is None: last_version = "unknown" else: if len(last_version.groups()) != 2: last_version = "unknown" else: last_version = last_version.group(2) else: last_version = "unknown" # Discard unused data discard_data(current_version_content_2) discard_data(current_version_content_1) discard_data(last_version_content) return current_version, last_version