def _matchesFailedLogin(self, resp_body): ''' @return: True if the resp_body matches the previously created responses that are stored in self._login_failed_result_list. ''' lfrl = self._login_failed_result_list # 0.65 gives a good measure of similarity if relative_distance_ge(resp_body, lfrl[0], 0.65) or \ relative_distance_ge(resp_body, lfrl[1], 0.65): return True else: # I'm happy! The response_body *IS NOT* a failed login page. return False
def _single_404_check(self, http_response, html_body): ''' Performs a very simple check to verify if this response is a 404 or not. It takes the original URL and modifies it by pre-pending a "not-" to the filename, then performs a request to that URL and compares the original response with the modified one. If they are equal then the original request is a 404. :param http_response: The original HTTP response :param html_body: The original HTML body after passing it by a cleaner :return: True if the original response was a 404 ! ''' response_url = http_response.get_url() filename = response_url.get_file_name() if not filename: relative_url = '../%s/' % rand_alnum(8) url_404 = response_url.url_join(relative_url) else: relative_url = 'not-%s' % filename url_404 = response_url.url_join(relative_url) response_404 = self._send_404(url_404, store=False) clean_response_404_body = get_clean_body(response_404) if response_404.get_code() == 404 and \ url_404.get_domain_path() not in self._directory_uses_404_codes: self._directory_uses_404_codes.add(url_404.get_domain_path()) return relative_distance_ge(clean_response_404_body, html_body, IS_EQUAL_RATIO)
def _single_404_check(self, http_response, html_body): ''' Performs a very simple check to verify if this response is a 404 or not. It takes the original URL and modifies it by pre-pending a "not-" to the filename, then performs a request to that URL and compares the original response with the modified one. If they are equal then the original request is a 404. @param http_response: The original HTTP response @param html_body: The original HTML body after passing it by a cleaner @return: True if the original response was a 404 ! ''' response_url = http_response.getURL() filename = response_url.getFileName() if not filename: relative_url = '../%s/' % createRandAlNum( 8 ) url_404 = response_url.urlJoin( relative_url ) else: relative_url = 'not-%s' % filename url_404 = response_url.urlJoin( relative_url ) response_404 = self._send_404( url_404, store=False ) clean_response_404_body = get_clean_body(response_404) return relative_distance_ge(clean_response_404_body, html_body, IS_EQUAL_RATIO)
def _single_404_check(self, http_response, html_body): ''' Performs a very simple check to verify if this response is a 404 or not. It takes the original URL and modifies it by pre-pending a "not-" to the filename, then performs a request to that URL and compares the original response with the modified one. If they are equal then the original request is a 404. :param http_response: The original HTTP response :param html_body: The original HTML body after passing it by a cleaner :return: True if the original response was a 404 ! ''' response_url = http_response.get_url() filename = response_url.get_file_name() if not filename: relative_url = '../%s/' % rand_alnum(8) url_404 = response_url.url_join(relative_url) else: relative_url = 'not-%s' % filename url_404 = response_url.url_join(relative_url) response_404 = self._send_404(url_404, store=False) clean_response_404_body = get_clean_body(response_404) if response_404.get_code() == 404 and \ url_404.get_domain_path() not in self._directory_uses_404_codes: self._directory_uses_404_codes.add(url_404.get_domain_path()) return relative_distance_ge(clean_response_404_body, html_body, IS_EQUAL_RATIO)
def test_prox_req_ok(self): '''Test if the responses either using a proxy or not are the same''' # Get response using the proxy proxy_resp = self.proxy_opener.open('http://moth').read() # Get it the other way resp = urllib2.urlopen('http://moth').read() # They must be very similar self.assertTrue(relative_distance_ge(resp, proxy_resp, 0.9))
def _generate_404_knowledge( self, url ): ''' Based on a URL, request something that we know is going to be a 404. Afterwards analyze the 404's and summarise them. @return: A list with 404 bodies. ''' # Get the filename extension and create a 404 for it extension = urlParser.getExtension( url ) domain_path = urlParser.getDomainPath( url ) # the result self._response_body_list = [] # # This is a list of the most common handlers, in some configurations, the 404 # depends on the handler, so I want to make sure that I catch the 404 for each one # handlers = ['py', 'php', 'asp', 'aspx', 'do', 'jsp', 'rb', 'do', 'gif', 'htm', extension] handlers += ['pl', 'cgi', 'xhtml', 'htmls'] handlers = list(set(handlers)) for extension in handlers: rand_alnum_file = createRandAlNum( 8 ) + '.' + extension url404 = urlParser.urlJoin( domain_path , rand_alnum_file ) # Send the requests using threads: targs = ( url404, ) tm.startFunction( target=self._send_404, args=targs , ownerObj=self ) # Wait for all threads to finish sending the requests. tm.join( self ) # # I have the bodies in self._response_body_list , but maybe they all look the same, so I'll # filter the ones that look alike. # result = [ self._response_body_list[0], ] for i in self._response_body_list: for j in self._response_body_list: if relative_distance_ge(i, j, IS_EQUAL_RATIO): # They are equal, we are ok with that continue else: # They are no equal, this means that we'll have to add this to the list result.append(j) # I don't need these anymore self._response_body_list = None # And I return the ones I need result = list(set(result)) om.out.debug('The 404 body result database has a lenght of ' + str(len(result)) +'.') return result
def _matches_failed_login(self, resp_body, login_failed_result_list): ''' :return: True if the resp_body matches the previously created responses that are stored in self._login_failed_result_list. ''' for login_failed_result in login_failed_result_list: if relative_distance_ge(resp_body, login_failed_result, 0.65): return True else: # I'm happy! The response_body *IS NOT* a failed login page. return False
def _find_OS(self, fuzzableRequest): ''' Analyze responses and determine if remote web server runs on windows or *nix @Return: None, the knowledge is saved in the knowledgeBase ''' found_os = False freq_url = fuzzableRequest.getURL() filename = freq_url.getFileName() dirs = freq_url.getDirectories()[:-1] # Skipping "domain level" dir. if dirs and filename: last_url = dirs[-1] last_url = last_url.url_string windows_url = url_object(last_url[0:-1] + '\\' + filename) windows_response = self._uri_opener.GET(windows_url) original_response = self._uri_opener.GET(freq_url) found_os = True if relative_distance_ge(original_response.getBody(), windows_response.getBody(), 0.98): i = info.info() i.setPluginName(self.getName()) i.setName('Operating system') i.setURL( windows_response.getURL() ) i.setMethod( 'GET' ) i.setDesc('Fingerprinted this host as a Microsoft Windows system.' ) i.setId( [windows_response.id, original_response.id] ) kb.kb.append( self, 'operating_system_str', 'windows' ) kb.kb.append( self, 'operating_system', i ) om.out.information( i.getDesc() ) else: i = info.info() i.setPluginName(self.getName()) i.setName('Operating system') i.setURL( original_response.getURL() ) i.setMethod( 'GET' ) msg = 'Fingerprinted this host as a *nix system. Detection for this operating' msg += ' system is weak, "if not windows: is linux".' i.setDesc( msg ) i.setId( [original_response.id, windows_response.id] ) kb.kb.append( self, 'operating_system_str', 'unix' ) kb.kb.append( self, 'operating_system', i ) om.out.information( i.getDesc() ) return found_os
def _find_OS(self, fuzzableRequest): """ Analyze responses and determine if remote web server runs on windows or *nix @Return: None, the knowledge is saved in the knowledgeBase """ dirs = fuzzableRequest.getURL().getDirectories() filename = fuzzableRequest.getURL().getFileName() if len(dirs) > 1 and filename: last_url = dirs[-1] last_url = last_url.url_string windows_url = url_object(last_url[0:-1] + "\\" + filename) windows_response = self._urlOpener.GET(windows_url) original_response = self._urlOpener.GET(fuzzableRequest.getURL()) self._found_OS = True if relative_distance_ge(original_response.getBody(), windows_response.getBody(), 0.98): i = info.info() i.setPluginName(self.getName()) i.setName("Operating system") i.setURL(windows_response.getURL()) i.setMethod("GET") i.setDesc("Fingerprinted this host as a Microsoft Windows system.") i.setId([windows_response.id, original_response.id]) kb.kb.append(self, "operating_system_str", "windows") kb.kb.append(self, "operating_system", i) om.out.information(i.getDesc()) else: i = info.info() i.setPluginName(self.getName()) i.setName("Operating system") i.setURL(original_response.getURL()) i.setMethod("GET") msg = "Fingerprinted this host as a *nix system. Detection for this operating" msg += ' system is weak, "if not windows: is linux".' i.setDesc(msg) i.setId([original_response.id, windows_response.id]) kb.kb.append(self, "operating_system_str", "unix") kb.kb.append(self, "operating_system", i) om.out.information(i.getDesc())
def _find_OS(self, fuzzable_request): ''' Analyze responses and determine if remote web server runs on windows or *nix. @Return: None, the knowledge is saved in the knowledgeBase ''' freq_url = fuzzable_request.get_url() filename = freq_url.get_file_name() dirs = freq_url.get_directories()[:-1] # Skipping "domain level" dir. if dirs and filename: last_url = dirs[-1] last_url = last_url.url_string windows_url = URL(last_url[0:-1] + '\\' + filename) windows_response = self._uri_opener.GET(windows_url) original_response = self._uri_opener.GET(freq_url) if relative_distance_ge(original_response.get_body(), windows_response.get_body(), 0.98): desc = 'Fingerprinted this host as a Microsoft Windows system.' os_str = 'windows' else: desc = 'Fingerprinted this host as a *nix system. Detection for'\ ' this operating system is weak, "if not windows then'\ ' linux".' os_str = 'unix' response_ids = [windows_response.id, original_response.id] i = Info('Operating system', desc, response_ids, self.get_name()) i.set_url(windows_response.get_url()) kb.kb.raw_write(self, 'operating_system_str', os_str) kb.kb.append(self, 'operating_system', i) om.out.information(i.get_desc()) return True return False
def _find_OS(self, fuzzable_request): ''' Analyze responses and determine if remote web server runs on windows or *nix. @Return: None, the knowledge is saved in the knowledgeBase ''' freq_url = fuzzable_request.get_url() filename = freq_url.get_file_name() dirs = freq_url.get_directories()[:-1] # Skipping "domain level" dir. if dirs and filename: last_url = dirs[-1] last_url = last_url.url_string windows_url = URL(last_url[0:-1] + '\\' + filename) windows_response = self._uri_opener.GET(windows_url) original_response = self._uri_opener.GET(freq_url) if relative_distance_ge(original_response.get_body(), windows_response.get_body(), 0.98): desc = 'Fingerprinted this host as a Microsoft Windows system.' os_str = 'windows' else: desc = 'Fingerprinted this host as a *nix system. Detection for'\ ' this operating system is weak, "if not windows then'\ ' linux".' os_str = 'unix' response_ids = [windows_response.id, original_response.id] i = Info('Operating system', desc, response_ids, self.get_name()) i.set_url(windows_response.get_url()) kb.kb.raw_write(self, 'operating_system_str', os_str) kb.kb.append(self, 'operating_system', i) om.out.information(i.get_desc()) return True return False
def _find_OS( self, fuzzableRequest ): ''' Analyze responses and determine if remote web server runs on windows or *nix @Return: None, the knowledge is saved in the knowledgeBase ''' dirs = urlParser.getDirectories( fuzzableRequest.getURL() ) filename = urlParser.getFileName( fuzzableRequest.getURL() ) if len( dirs ) > 1 and filename: last = dirs[-1] windowsURL = last[0:-1] + '\\' + filename windows_response = self._urlOpener.GET( windowsURL ) original_response = self._urlOpener.GET( fuzzableRequest.getURL() ) self._found_OS = True if relative_distance_ge(original_response.getBody(), windows_response.getBody(), 0.98): i = info.info() i.setPluginName(self.getName()) i.setName('Operating system') i.setURL( windows_response.getURL() ) i.setMethod( 'GET' ) i.setDesc('Fingerprinted this host as a Microsoft Windows system.' ) i.setId( [windows_response.id, original_response.id] ) kb.kb.append( self, 'operating_system_str', 'windows' ) kb.kb.append( self, 'operating_system', i ) om.out.information( i.getDesc() ) else: i = info.info() i.setPluginName(self.getName()) i.setName('Operating system') i.setURL( original_response.getURL() ) i.setMethod( 'GET' ) msg = 'Fingerprinted this host as a *nix system. Detection for this operating' msg += ' system is weak, "if not windows: is linux".' i.setDesc( msg ) i.setId( [original_response.id, windows_response.id] ) kb.kb.append( self, 'operating_system_str', 'unix' ) kb.kb.append( self, 'operating_system', i ) om.out.information( i.getDesc() )
def _filter_errors( self, result, filename ): ''' Filter out ugly php errors and print a simple "Permission denied" or "File not found" ''' filtered = '' if result.count('<b>Warning</b>'): if result.count( 'Permission denied' ): filtered = PERMISSION_DENIED elif result.count( 'No such file or directory in' ): filtered = NO_SUCH_FILE elif result.count( 'Not a directory in' ): filtered = READ_DIRECTORY elif result.count('</a>]: failed to open stream:'): filtered = FAILED_STREAM elif self._application_file_not_found_error is not None: # The application file not found error string that I have has the "not_exist0.txt" # string in it, so I'm going to remove that string from it. app_error = self._application_file_not_found_error.replace("not_exist0.txt", '') # The result string has the file I requested inside, so I'm going to remove it. trimmed_result = result.replace( filename, '') # Now I compare both strings, if they are VERY similar, then filename is a non # existing file. if relative_distance_ge(app_error, trimmed_result, 0.9): filtered = NO_SUCH_FILE # # I want this function to return an empty string on errors. Not the error itself. # if filtered != '': return '' return result
def _filter_errors(self, result, filename): ''' Filter out ugly php errors and print a simple "Permission denied" or "File not found" ''' #print filename error = None if result.count('Permission denied'): error = PERMISSION_DENIED elif result.count('No such file or directory in'): error = NO_SUCH_FILE elif result.count('Not a directory in'): error = READ_DIRECTORY elif result.count(': failed to open stream: '): error = FAILED_STREAM elif self._application_file_not_found_error is not None: # The result string has the file I requested inside, so I'm going # to remove it. clean_result = result.replace(filename, '') # Now I compare both strings, if they are VERY similar, then # filename is a non existing file. if relative_distance_ge(self._application_file_not_found_error, clean_result, 0.9): error = NO_SUCH_FILE # # I want this function to return an empty string on errors. # Not the error itself. # if error is not None: return '' return result
def _filter_errors(self, result, filename): ''' Filter out ugly php errors and print a simple "Permission denied" or "File not found" ''' #print filename error = None if result.count('Permission denied'): error = PERMISSION_DENIED elif result.count('No such file or directory in'): error = NO_SUCH_FILE elif result.count('Not a directory in'): error = READ_DIRECTORY elif result.count(': failed to open stream: '): error = FAILED_STREAM elif self._application_file_not_found_error is not None: # The result string has the file I requested inside, so I'm going # to remove it. clean_result = result.replace(filename, '') # Now I compare both strings, if they are VERY similar, then # filename is a non existing file. if relative_distance_ge(self._application_file_not_found_error, clean_result, 0.9): error = NO_SUCH_FILE # # I want this function to return an empty string on errors. # Not the error itself. # if error is not None: return '' return result
/es/ga.js/google-analytics.com/ga.js/google-analytics.com/ga.js/ /ga.js/google-analytics.com/ga.js/google-analytics.com/ga.js/ /ga.js/google-analytics.com/ga.js/google-analytics.com/ /ga.js/google-analytics.com/ga.js/google-analytics.com/google-analytics.com/ga.js """ filename = urlParser.getFileName(reference) if filename: rindex = reference.rindex(filename) # 'ar9k' is just a random string to get a 404 new_reference = reference[:rindex] + "ar9k" + reference[rindex:] check_response = self._urlOpener.GET(new_reference, useCache=True, headers=headers) resp_body = response.getBody() check_resp_body = check_response.getBody() if relative_distance_ge(resp_body, check_resp_body, IS_EQUAL_RATIO): # If they are equal, then they are both a 404 (or something invalid) # om.out.debug( reference + ' was broken!') return else: # The URL was possibly_broken, but after testing we found out that # it was not, so not we use it! om.out.debug('Adding relative reference "' + reference + '" to the response.') fuzzable_request_list.extend( self._createFuzzableRequests(response, request=original_request) ) else: # Not possibly_broken: fuzzable_request_list = self._createFuzzableRequests(response, request=original_request)
def is_404(self, http_response): ''' All of my previous versions of is_404 were very complex and tried to struggle with all possible cases. The truth is that in most "strange" cases I was failing miserably, so now I changed my 404 detection once again, but keeping it as simple as possible. Also, and because I was trying to cover ALL CASES, I was performing a lot of requests in order to cover them, which in most situations was unnecesary. So now I go for a much simple approach: 1- Cover the simplest case of all using only 1 HTTP request 2- Give the users the power to configure the 404 detection by setting a string that identifies the 404 response (in case we are missing it for some reason in case #1) @parameter http_response: The HTTP response which we want to know if it is a 404 or not. ''' # This is here for testing. if self._test_db: i = self._test_db_index try: result = self._test_db[ i ] self._test_db_index = i + 1 except: raise Exception('Your test_db is incomplete!') else: return result # # First we handle the user configured exceptions: # domain_path = http_response.getURL().getDomainPath() if domain_path in cf.cf.getData('always404'): return True elif domain_path in cf.cf.getData('never404'): return False # # This is the most simple case, we don't even have to think about this. # # If there is some custom website that always returns 404 codes, then we are # screwed, but this is open source, and the pentester working on that site can modify # these lines. # if http_response.getCode() == 404: return True # # The user configured setting. "If this string is in the response, then it is a 404" # if cf.cf.getData('404string') and cf.cf.getData('404string') in http_response: return True # # Before actually working, I'll check if this response is in the LRU, if it is I just return # the value stored there. # if http_response.id in self.is_404_LRU: return self.is_404_LRU[ http_response.id ] if self.need_analysis(): self.generate_404_knowledge( http_response.getURL() ) # self._404_body was already cleaned inside generate_404_knowledge # so we need to clean this one. html_body = get_clean_body( http_response ) # # Compare this response to all the 404's I have in my DB # for body_404_db in self._404_bodies: if relative_distance_ge(body_404_db, html_body, IS_EQUAL_RATIO): msg = '"%s" is a 404. [similarity_index > %s]' % \ (http_response.getURL(), IS_EQUAL_RATIO) om.out.debug(msg) self.is_404_LRU[ http_response.id ] = True return True else: # If it is not eq to one of the 404 responses I have in my DB, that does not means # that it won't match the next one, so I simply do nothing pass else: # # I get here when the for ends and no 404 is matched. # msg = '"%s" is NOT a 404. [similarity_index < %s]' % \ (http_response.getURL(), IS_EQUAL_RATIO) om.out.debug(msg) self.is_404_LRU[ http_response.id ] = False return False
def generate_404_knowledge(self, url): ''' Based on a URL, request something that we know is going to be a 404. Afterwards analyze the 404's and summarise them. :return: A list with 404 bodies. ''' # # This is the case when nobody has properly configured # the object in order to use it. # if self._uri_opener is None: msg = '404 fingerprint database was incorrectly initialized.' raise RuntimeError(msg) # Get the filename extension and create a 404 for it extension = url.get_extension() domain_path = url.get_domain_path() # the result self._response_body_list = [] # # This is a list of the most common handlers, in some configurations, the 404 # depends on the handler, so I want to make sure that I catch the 404 for each one # handlers = set() handlers.update( ['py', 'php', 'asp', 'aspx', 'do', 'jsp', 'rb', 'do']) handlers.update( ['gif', 'htm', 'pl', 'cgi', 'xhtml', 'htmls', 'foobar']) if extension: handlers.add(extension) args_list = [] for extension in handlers: rand_alnum_file = rand_alnum(8) + '.' + extension url404 = domain_path.url_join(rand_alnum_file) args_list.append(url404) self._worker_pool.map(self._send_404, args_list) # # I have the bodies in self._response_body_list , but maybe they # all look the same, so I'll filter the ones that look alike. # result = [self._response_body_list[0], ] for i in self._response_body_list: for j in self._response_body_list: if relative_distance_ge(i, j, IS_EQUAL_RATIO): # They are equal, we are ok with that continue else: # They are no equal, this means that we'll have to add this to the list result.append(j) # I don't need these anymore self._response_body_list = None # And I return the ones I need result = list(set(result)) om.out.debug('The 404 body result database has a length of ' + str(len(result)) + '.') self._404_bodies = result self._already_analyzed = True self._fingerprinted_paths.add(domain_path)
def is_404(self, http_response): ''' All of my previous versions of is_404 were very complex and tried to struggle with all possible cases. The truth is that in most "strange" cases I was failing miserably, so now I changed my 404 detection once again, but keeping it as simple as possible. Also, and because I was trying to cover ALL CASES, I was performing a lot of requests in order to cover them, which in most situations was unnecesary. So now I go for a much simple approach: 1- Cover the simplest case of all using only 1 HTTP request 2- Give the users the power to configure the 404 detection by setting a string that identifies the 404 response (in case we are missing it for some reason in case #1) :param http_response: The HTTP response which we want to know if it is a 404 or not. ''' # # First we handle the user configured exceptions: # domain_path = http_response.get_url().get_domain_path() if domain_path in cf.cf.get('always_404'): return True elif domain_path in cf.cf.get('never_404'): return False # # The user configured setting. "If this string is in the response, # then it is a 404" # if cf.cf.get('string_match_404') and cf.cf.get('string_match_404') in http_response: return True # # This is the most simple case, we don't even have to think about this. # # If there is some custom website that always returns 404 codes, then we # are screwed, but this is open source, and the pentester working on # that site can modify these lines. # if http_response.get_code() == 404: return True # # Simple, if the file we requested is in a directory that's known to # return 404 codes for files that do not exist, AND this is NOT a 404 # then we're return False! # if domain_path in self._directory_uses_404_codes and \ http_response.get_code() != 404: return False # # Before actually working, I'll check if this response is in the LRU, # if it is I just return the value stored there. # if http_response.get_url().get_path() in self.is_404_LRU: return self.is_404_LRU[http_response.get_url().get_path()] with self._lock: if self.need_analysis(): self.generate_404_knowledge(http_response.get_url()) # self._404_body was already cleaned inside generate_404_knowledge # so we need to clean this one in order to have a fair comparison html_body = get_clean_body(http_response) # # Compare this response to all the 404's I have in my DB # # Note: while self._404_bodies is a list, we can perform this for loop # without "with self._lock", read comments in stackoverflow: # http://stackoverflow.com/questions/9515364/does-python-freeze-the-list-before-for-loop # for body_404_db in self._404_bodies: if relative_distance_ge(body_404_db, html_body, IS_EQUAL_RATIO): msg = '"%s" (id:%s) is a 404 [similarity_index > %s]' fmt = ( http_response.get_url(), http_response.id, IS_EQUAL_RATIO) om.out.debug(msg % fmt) return self._fingerprinted_as_404(http_response) else: # # I get here when the for ends and no body_404_db matched with the # html_body that was sent as a parameter by the user. This means one # of two things: # * There is not enough knowledge in self._404_bodies, or # * The answer is NOT a 404. # # Because we want to reduce the amount of "false positives" that # this method returns, we'll perform one extra check before saying # that this is NOT a 404. if http_response.get_url().get_domain_path() not in self._fingerprinted_paths: if self._single_404_check(http_response, html_body): self._404_bodies.append(html_body) self._fingerprinted_paths.add( http_response.get_url().get_domain_path()) msg = '"%s" (id:%s) is a 404 (similarity_index > %s). Adding new' msg += ' knowledge to the 404_bodies database (length=%s).' fmt = (http_response.get_url(), http_response.id, IS_EQUAL_RATIO, len(self._404_bodies)) om.out.debug(msg % fmt) return self._fingerprinted_as_404(http_response) msg = '"%s" (id:%s) is NOT a 404 [similarity_index < %s].' fmt = (http_response.get_url(), http_response.id, IS_EQUAL_RATIO) om.out.debug(msg % fmt) return self._fingerprinted_as_200(http_response)
def generate_404_knowledge(self, url): ''' Based on a URL, request something that we know is going to be a 404. Afterwards analyze the 404's and summarise them. :return: A list with 404 bodies. ''' # # This is the case when nobody has properly configured # the object in order to use it. # if self._uri_opener is None: msg = '404 fingerprint database was incorrectly initialized.' raise RuntimeError(msg) # Get the filename extension and create a 404 for it extension = url.get_extension() domain_path = url.get_domain_path() # the result self._response_body_list = [] # # This is a list of the most common handlers, in some configurations, the 404 # depends on the handler, so I want to make sure that I catch the 404 for each one # handlers = set() handlers.update(['py', 'php', 'asp', 'aspx', 'do', 'jsp', 'rb', 'do']) handlers.update( ['gif', 'htm', 'pl', 'cgi', 'xhtml', 'htmls', 'foobar']) if extension: handlers.add(extension) args_list = [] for extension in handlers: rand_alnum_file = rand_alnum(8) + '.' + extension url404 = domain_path.url_join(rand_alnum_file) args_list.append(url404) self._worker_pool.map(self._send_404, args_list) # # I have the bodies in self._response_body_list , but maybe they # all look the same, so I'll filter the ones that look alike. # result = [ self._response_body_list[0], ] for i in self._response_body_list: for j in self._response_body_list: if relative_distance_ge(i, j, IS_EQUAL_RATIO): # They are equal, we are ok with that continue else: # They are no equal, this means that we'll have to add this to the list result.append(j) # I don't need these anymore self._response_body_list = None # And I return the ones I need result = list(set(result)) om.out.debug('The 404 body result database has a length of ' + str(len(result)) + '.') self._404_bodies = result self._already_analyzed = True self._fingerprinted_paths.add(domain_path)
def is_404(self, http_response): ''' All of my previous versions of is_404 were very complex and tried to struggle with all possible cases. The truth is that in most "strange" cases I was failing miserably, so now I changed my 404 detection once again, but keeping it as simple as possible. Also, and because I was trying to cover ALL CASES, I was performing a lot of requests in order to cover them, which in most situations was unnecesary. So now I go for a much simple approach: 1- Cover the simplest case of all using only 1 HTTP request 2- Give the users the power to configure the 404 detection by setting a string that identifies the 404 response (in case we are missing it for some reason in case #1) :param http_response: The HTTP response which we want to know if it is a 404 or not. ''' # # First we handle the user configured exceptions: # domain_path = http_response.get_url().get_domain_path() if domain_path in cf.cf.get('always_404'): return True elif domain_path in cf.cf.get('never_404'): return False # # The user configured setting. "If this string is in the response, # then it is a 404" # if cf.cf.get('string_match_404') and cf.cf.get( 'string_match_404') in http_response: return True # # This is the most simple case, we don't even have to think about this. # # If there is some custom website that always returns 404 codes, then we # are screwed, but this is open source, and the pentester working on # that site can modify these lines. # if http_response.get_code() == 404: return True # # Simple, if the file we requested is in a directory that's known to # return 404 codes for files that do not exist, AND this is NOT a 404 # then we're return False! # if domain_path in self._directory_uses_404_codes and \ http_response.get_code() != 404: return False # # Before actually working, I'll check if this response is in the LRU, # if it is I just return the value stored there. # if http_response.get_url().get_path() in self.is_404_LRU: return self.is_404_LRU[http_response.get_url().get_path()] with self._lock: if self.need_analysis(): self.generate_404_knowledge(http_response.get_url()) # self._404_body was already cleaned inside generate_404_knowledge # so we need to clean this one in order to have a fair comparison html_body = get_clean_body(http_response) # # Compare this response to all the 404's I have in my DB # # Note: while self._404_bodies is a list, we can perform this for loop # without "with self._lock", read comments in stackoverflow: # http://stackoverflow.com/questions/9515364/does-python-freeze-the-list-before-for-loop # for body_404_db in self._404_bodies: if relative_distance_ge(body_404_db, html_body, IS_EQUAL_RATIO): msg = '"%s" (id:%s) is a 404 [similarity_index > %s]' fmt = (http_response.get_url(), http_response.id, IS_EQUAL_RATIO) om.out.debug(msg % fmt) return self._fingerprinted_as_404(http_response) else: # # I get here when the for ends and no body_404_db matched with the # html_body that was sent as a parameter by the user. This means one # of two things: # * There is not enough knowledge in self._404_bodies, or # * The answer is NOT a 404. # # Because we want to reduce the amount of "false positives" that # this method returns, we'll perform one extra check before saying # that this is NOT a 404. if http_response.get_url().get_domain_path( ) not in self._fingerprinted_paths: if self._single_404_check(http_response, html_body): self._404_bodies.append(html_body) self._fingerprinted_paths.add( http_response.get_url().get_domain_path()) msg = '"%s" (id:%s) is a 404 (similarity_index > %s). Adding new' msg += ' knowledge to the 404_bodies database (length=%s).' fmt = (http_response.get_url(), http_response.id, IS_EQUAL_RATIO, len(self._404_bodies)) om.out.debug(msg % fmt) return self._fingerprinted_as_404(http_response) msg = '"%s" (id:%s) is NOT a 404 [similarity_index < %s].' fmt = (http_response.get_url(), http_response.id, IS_EQUAL_RATIO) om.out.debug(msg % fmt) return self._fingerprinted_as_200(http_response)