def handle_timeout(queued, url, thread_id, output=True): """ Handle timeout operation for workers """ if database.latest_successful_request_time > conf.max_timeout_secs: database.latest_successful_request_time = conf.max_timeout_secs else: database.latest_successful_request_time += 1 textutils.output_debug("-Ajusted timeout to: " + str(database.latest_successful_request_time)) if not queued['timeout_count']: queued['timeout_count'] = 0 if queued.get('timeout_count') < conf.max_timeout_count: new_timeout_count = queued.get('timeout_count') + 1 queued['timeout_count'] = new_timeout_count textutils.output_debug('Thread #' + str(thread_id) + ': re-queuing ' + str(queued)) # Add back the timed-out item database.fetch_queue.put(queued) elif output and not database.kill_received: # We definitely timed out textutils.output_timeout(queued.get('description') + ' at ' + url) # update stats database.total_timeouts += 1
def run(self): while not self.kill_received: try: # Non-Blocking get since we use the queue as a ringbuffer queued = database.fetch_queue.get(False) url = conf.target_base_path + queued.get('url') description = queued.get('description') match_string = queued.get('match_string') textutils.output_debug("Testing: " + url + " " + str(queued)) stats.update_stats(url) # Fetch the target url start_time = datetime.now() if match_string: response_code, content, headers = self.fetcher.fetch_url( url, conf.user_agent, database.latest_successful_request_time, limit_len=False) else: response_code, content, headers = self.fetcher.fetch_url( url, conf.user_agent, database.latest_successful_request_time) end_time = datetime.now() # handle timeout if response_code in conf.timeout_codes: handle_timeout(queued, url, self.thread_id, output=self.output) elif response_code == 500: textutils.output_found('ISE, ' + description + ' at: ' + conf.target_host + url) elif response_code in conf.expected_file_responses: # If the CRC missmatch, and we have an expected code, we found a valid link if match_string and re.search(re.escape(match_string), content, re.I): textutils.output_found("String-Matched " + description + ' at: ' + conf.target_host + url) elif test_valid_result(content): textutils.output_found(description + ' at: ' + conf.target_host + url) elif response_code in conf.redirect_codes: location = headers.get('location') if location: handle_redirects(queued, location) # Stats if response_code not in conf.timeout_codes: stats.update_processed_items() compute_request_time(start_time, end_time) # Mark item as processed database.fetch_queue.task_done() except Empty: continue
def add_generated_path(path): current_template = conf.path_template.copy() current_template['description'] = 'Computer generated path' current_template['is_file'] = False current_template['url'] = '/' + path database.paths.append(current_template) textutils.output_debug(' - PathGenerator Plugin Generated path: ' + str(current_template))
def decrease_throttle_delay(): """ If we reach this code, a worker successfully completed a request, we reduce throttling for all threads.""" if database.throttle_delay > 0: database.throttle_delay -= conf.throttle_increment if conf.debug: textutils.output_debug('Decreasing throttle limit: ' + str(database.throttle_delay))
def handle_timeout(queued, url, thread_id, output=True): """ Handle timeout operation for workers """ if database.latest_successful_request_time > conf.max_timeout_secs: database.latest_successful_request_time = conf.max_timeout_secs else: database.latest_successful_request_time += 1 # Update pool timeout textutils.output_debug("-Ajusted timeout to: " + str(database.latest_successful_request_time)) if not queued['timeout_count']: queued['timeout_count'] = 0 if queued.get('timeout_count') < conf.max_timeout_count: new_timeout_count = queued.get('timeout_count') + 1 queued['timeout_count'] = new_timeout_count textutils.output_debug('Thread #' + str(thread_id) + ': re-queuing ' + str(queued)) # Add back the timed-out item database.fetch_queue.put(queued) elif output and not database.kill_received: # We definitely timed out textutils.output_timeout(queued.get('description') + ' at ' + url) # update stats database.total_timeouts += 1
def add_generated_file(file): """ Add file to database """ current_template = conf.path_template.copy() current_template['description'] = 'Computer generated file' current_template['url'] = file database.files.append(current_template) textutils.output_debug(' - PathGenerator Plugin Generated file: ' + str(current_template))
def add_generated_path(path): current_template = dict(conf.path_template) current_template['description'] = 'Computer generated path' current_template['url'] = path if current_template not in database.files: textutils.output_debug(' - PathGenerator Plugin Generated: ' + str(current_template)) database.files.append(current_template)
def wait_for_idle(self, workers, queue): """ Wait until fetch queue is empty and handle user interrupt """ while not self.kill_received and not queue.empty(): try: sleep(0.1) except KeyboardInterrupt: try: stats.output_stats() sleep(1) except KeyboardInterrupt: textutils.output_info( 'Keyboard Interrupt Received, cleaning up threads') self.kill_received = True # Kill remaining workers but don't join the queue (we want to abort:)) for worker in workers: worker.kill_received = True if worker is not None and worker.isAlive(): worker.join(1) # Kill the soft sys.exit() # Make sure everything is done before sending control back to application textutils.output_debug("Threads: joining queue of size: " + str(queue.qsize())) queue.join() textutils.output_debug("Threads: join done") for worker in workers: worker.kill_received = True worker.join()
def wait_for_idle(self, workers, queue): """ Wait until fetch queue is empty and handle user interrupt """ while not self.kill_received and not queue.empty(): try: sleep(0.1) except KeyboardInterrupt: try: stats.output_stats() sleep(1) except KeyboardInterrupt: textutils.output_info('Keyboard Interrupt Received, cleaning up threads') # Clean reference to sockets database.connection_pool = None self.kill_received = True # Kill remaining workers but don't join the queue (we want to abort:)) for worker in workers: worker.kill_received = True if worker is not None and worker.isAlive(): worker.join(1) # Kill the soft sys.exit() # Make sure everything is done before sending control back to application textutils.output_debug("Threads: joining queue of size: " + str(queue.qsize())) queue.join() textutils.output_debug("Threads: join done") for worker in workers: worker.kill_received = True worker.join()
def handle_timeout(queued, url, thread_id, output=True): """ Handle timeout operation for workers """ if database.latest_successful_request_time > conf.max_timeout_secs: database.latest_successful_request_time = conf.max_timeout_secs else: database.latest_successful_request_time += 1 textutils.output_debug("-Ajusted timeout to: " + str(database.latest_successful_request_time)) if not queued["timeout_count"]: queued["timeout_count"] = 0 if queued.get("timeout_count") < conf.max_timeout_count: new_timeout_count = queued.get("timeout_count") + 1 queued["timeout_count"] = new_timeout_count textutils.output_debug("Thread #" + str(thread_id) + ": re-queuing " + str(queued)) # Add back the timed-out item database.fetch_queue.put(queued) elif output: # We definitely timed out textutils.output_timeout(queued.get("description") + " at " + url) # update stats database.total_timeouts += 1
def increase_throttle_delay(): """ A worker encountered a timeout, we need to increase throttle time for all threads. """ if database.throttle_delay < conf.max_throttle: database.throttle_delay += conf.throttle_increment if conf.debug: textutils.output_debug('Increasing throttle limit: ' + str(database.throttle_delay))
def wait_for_idle(self, workers, queue): """ Wait until fetch queue is empty and handle user interrupt """ while not database.kill_received and not queue.empty(): try: # Make sure everything is done before sending control back to application textutils.output_debug("Threads: joining queue of size: " + str(queue.qsize())) queue.join() textutils.output_debug("Threads: join done") except KeyboardInterrupt: try: stats.output_stats(workers) sleep( 1 ) # The time you have to re-press ctrl+c to kill the app. except KeyboardInterrupt: textutils.output_info( 'Keyboard Interrupt Received, waiting for blocking threads to exit' ) # Clean reference to sockets database.connection_pool = None database.kill_received = True self.kill_workers(workers) sys.exit(0) # Make sure we get all the worker's results before continuing the next step self.kill_workers(workers)
def add_generated_file(file): """ Add file to database """ current_template = conf.path_template.copy() current_template['description'] = 'Computer generated file' current_template['url'] = file current_template['handle_redirect'] = "ignoreRedirect" not in plugin_settings database.files.append(current_template) textutils.output_debug(' - PathGenerator Plugin Generated file: ' + str(current_template))
def add_generated_path(path): current_template = conf.path_template.copy() current_template['description'] = 'Computer generated path' current_template['is_file'] = False current_template['url'] = '/' + path current_template['handle_redirect'] = "ignoreRedirect" not in plugin_settings database.paths.append(current_template) textutils.output_debug(' - PathGenerator Plugin Generated path: ' + str(current_template))
def get_host_ip(host, port): """ Fetch the resolved ip addresses from the cache and return a random address if load-balanced """ resolved = database.dns_cache.get(host) if not resolved: textutils.output_debug("Host entry not found in cache for host:" + str(host) + ", resolving") resolved = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM) database.dns_cache[host] = resolved return _get_random_ip_from_cache(resolved), port
def fetch_url(self, url, user_agent, timeout, limit_len=True, add_headers=dict()): """ Fetch a given url, with a given user_agent and timeout""" try: if not add_headers.get('User-Agent'): add_headers['User-Agent'] = user_agent if not add_headers.get('Connection'): add_headers['Connection'] = 'Keep-Alive' if not add_headers.get('Host'): add_headers['Host'] = conf.target_host # Session cookie, priority to used-supplied. if conf.cookies: add_headers['Cookie'] = conf.cookies elif database.session_cookie: add_headers['Cookie'] = database.session_cookie # Limit request len on binary types if limit_len: content_range = 'bytes=0-' + str(conf.file_sample_len - 1) add_headers['Range'] = content_range else: if 'Range' in add_headers: del add_headers['Range'] if conf.proxy_url: url = conf.scheme + '://' + conf.target_host + ':' + str( conf.target_port) + url textutils.output_debug(url) if conf.is_ssl: database.connection_pool.ConnectionCls = UnverifiedHTTPSConnection response = database.connection_pool.request('GET', url, headers=add_headers, retries=0, redirect=False, release_conn=False, assert_same_host=False, timeout=timeout) content = response.data code = response.status headers = response.headers except Exception as e: #raise code = 0 content = '' headers = dict() return code, content, headers
def add_generated_file(file): """ Add file to database """ current_template = conf.path_template.copy() current_template['description'] = 'Computer generated file' current_template['url'] = file current_template[ 'handle_redirect'] = "ignoreRedirect" not in plugin_settings database.files.append(current_template) textutils.output_debug(' - PathGenerator Plugin Generated file: ' + str(current_template))
def add_generated_path(path): current_template = conf.path_template.copy() current_template['description'] = 'Computer generated path' current_template['is_file'] = False current_template['url'] = '/' + path current_template[ 'handle_redirect'] = "ignoreRedirect" not in plugin_settings database.paths.append(current_template) textutils.output_debug(' - PathGenerator Plugin Generated path: ' + str(current_template))
def execute(): """ Fetch sitemap.xml and add each entry as a target """ current_template = dict(conf.path_template) current_template['description'] = 'sitemap.xml entry' target_url = urljoin(conf.target_base_path, "/sitemap.xml") fetcher = Fetcher() response_code, content, headers = fetcher.fetch_url( target_url, conf.user_agent, conf.fetch_timeout_secs, limit_len=False, add_headers={}) if not isinstance(content, str): content = content.decode('utf-8', 'ignore') if response_code is 200 or response_code is 302 and content: regexp = re.compile('(?im).*<url>\s*<loc>(.*)</loc>\s*</url>.*') matches = re.findall(regexp, content) textutils.output_debug("SitemapXML plugin") added = 0 for match in matches: if not isinstance(match, str): match = match.decode('utf-8', 'ignore') parsed = urlparse(match) if parsed.path: new_path = parsed.path else: continue # Remove trailing / if new_path.endswith('/'): new_path = new_path[:-1] if add_path(new_path): added += 1 textutils.output_debug(" - Added: %s from /sitemap.xml" % new_path) if added > 0: textutils.output_info(' - SitemapXML Plugin: added %d base paths ' 'using /sitemap.xml' % added) else: textutils.output_info(' - SitemapXML Plugin: no usable entries ' 'in /sitemap.xml') else: textutils.output_info( ' - SitemapXML Plugin: /sitemap.xml not found on ' 'target site')
def test_paths_exists(): """ Test for path existence using http codes and computed 404 Spawn workers and turn off output for now, it would be irrelevant at this point. """ manager = ThreadManager() # Fill work queue with fetch list for path in database.paths: dbutils.add_path_to_fetch_queue(path) # Consider some file target as potential path for file in database.files: if not file.get('no_suffix'): file_as_path = file.copy() file_as_path['url'] = '/' + file_as_path['url'] dbutils.add_path_to_fetch_queue(file_as_path) done_paths = [] recursion_depth = 0 textutils.output_debug('Cached: ' + str(database.path_cache)) while database.fetch_queue.qsize() > 0: textutils.output_info('Probing ' + str(database.fetch_queue.qsize()) + ' paths') # Wait for initial valid path lookup workers = manager.spawn_workers(conf.thread_count, TestPathExistsWorker) manager.wait_for_idle(workers, database.fetch_queue) recursion_depth += 1 if not conf.recursive: break if recursion_depth >= conf.recursive_depth_limit: break for validpath in database.valid_paths: if validpath['url'] == '/' or validpath['url'] in done_paths: continue done_paths.append(validpath['url']) for path in database.paths: if path['url'] in ('/', ''): continue path = path.copy() path['url'] = validpath['url'] + path['url'] dbutils.add_path_to_fetch_queue(path) textutils.output_info('Found ' + str(len(database.valid_paths)) + ' valid paths')
def parse_svn_entries(url): description_file = 'SVN entries file at' description_dir = "SVN entries Dir at" target_url = url + "/.svn/entries" fetcher = Fetcher() response_code, content, headers = fetcher.fetch_url( target_url, conf.user_agent, conf.fetch_timeout_secs, limit_len=False, add_headers=base_headers) if response_code in conf.expected_file_responses and content: tokens = content.decode().split('\n') if 'dir' in tokens: for pos, token in enumerate(tokens): if token == 'dir': # Fetch more entries recursively if tokens[pos - 1] != '': textutils.output_debug(' - Svn Plugin: Found dir: ' + url + '/' + tokens[pos - 1]) if conf.allow_download: textutils.output_info( ' - Svn Plugin: Downloading: ' + url + '/' + tokens[pos - 1] + '\r') else: textutils.output_found(description_dir + ' at: ' + url + '/' + tokens[pos - 1]) # Parse next parse_svn_entries(url + "/" + tokens[pos - 1]) elif token == 'file': textutils.output_debug(' - Svn Plugin: Found file: ' + url + '/' + tokens[pos - 1]) if conf.allow_download: textutils.output_info(' - Svn Plugin: Downloading: ' + url + '/' + tokens[pos - 1] + '\r') # Fetch text-base file path = url + "/.svn/text-base" + '/' + tokens[ pos - 1] + ".svn-base" fetcher = Fetcher() response_code, content, headers = fetcher.fetch_url( path, conf.user_agent, conf.fetch_timeout_secs, limit_len=False) save_file(url + '/' + tokens[pos - 1], content) else: textutils.output_found(description_file + ' at: ' + url + '/' + tokens[pos - 1])
def execute(): """ Fetch sitemap.xml and add each entry as a target """ current_template = dict(conf.path_template) current_template['description'] = 'sitemap.xml entry' target_url = urljoin(conf.target_base_path, "/sitemap.xml") fetcher = Fetcher() response_code, content, headers = fetcher.fetch_url(target_url, conf.user_agent, conf.fetch_timeout_secs, limit_len=False, add_headers={} ) if not isinstance(content, str): content = content.decode('utf-8', 'ignore') if response_code is 200 or response_code is 302 and content: regexp = re.compile('(?im).*<url>\s*<loc>(.*)</loc>\s*</url>.*') matches = re.findall(regexp, content) textutils.output_debug("SitemapXML plugin") added = 0 for match in matches: new_path = match.decode().split(conf.target_host)[1] # Remove trailing / if new_path.endswith('/'): new_path = new_path[:-1] add_path(new_path) add_file(new_path) textutils.output_debug(" - Added: %s from /sitemap.xml" % new_path) added += 1 if added > 0: textutils.output_info(' - SitemapXML Plugin: added %d base paths ' 'using /sitemap.xml' % added) else : textutils.output_info(' - SitemapXML Plugin: no usable entries ' 'in /sitemap.xml') else: textutils.output_info(' - SitemapXML Plugin: /sitemap.xml not found on ' 'target site')
def execute(): """ Fetch /robots.txt and add the disallowed paths as target """ current_template = dict(conf.path_template) current_template['description'] = 'Robots.txt entry' target_url = urljoin(conf.target_base_path, "/robots.txt") fetcher = Fetcher() response_code, content, headers = fetcher.fetch_url( target_url, conf.user_agent, conf.fetch_timeout_secs, limit_len=False) if response_code is 200 or response_code is 302 and content: matches = re.findall(r'Disallow:\s*/[a-zA-Z0-9-/\r]+\n', content) added = 0 for match in matches: # Filter out some characters match = filter(lambda c: c not in ' *?.\n\r\t', match) if conf.debug: textutils.output_debug(match) # Split on ':' splitted = match.split(':') if splitted[1]: target_path = splitted[1] # Remove trailing / if target_path.endswith('/'): target_path = target_path[:-1] current_template = dict(current_template) current_template['url'] = target_path if current_template not in database.paths: database.paths.append(current_template) textutils.output_debug(' - Robots Plugin Added: ' + str(target_path) + ' from robots.txt') added += 1 if added > 0: textutils.output_info(' - Robots Plugin: added ' + str(added) + ' base paths using /robots.txt') else: textutils.output_info( ' - Robots Plugin: no usable entries in /robots.txt') else: textutils.output_info( ' - Robots Plugin: /robots.txt not found on target site')
def run(self): while not self.kill_received: try: # Non-Blocking get since we use the queue as a ringbuffer queued = database.fetch_queue.get(False) url = conf.target_base_path + queued.get("url") description = queued.get("description") match_string = queued.get("match_string") textutils.output_debug("Testing: " + url + " " + str(queued)) stats.update_stats(url) # Fetch the target url start_time = datetime.now() if match_string: response_code, content, headers = self.fetcher.fetch_url( url, conf.user_agent, database.latest_successful_request_time, limit_len=False ) else: response_code, content, headers = self.fetcher.fetch_url( url, conf.user_agent, database.latest_successful_request_time ) end_time = datetime.now() # handle timeout if response_code in conf.timeout_codes: handle_timeout(queued, url, self.thread_id, output=self.output) elif response_code == 500: textutils.output_found("ISE, " + description + " at: " + conf.target_host + url) elif response_code in conf.expected_file_responses: # If the CRC missmatch, and we have an expected code, we found a valid link if match_string and re.search(re.escape(match_string), content, re.I): textutils.output_found("String-Matched " + description + " at: " + conf.target_host + url) elif test_valid_result(content): textutils.output_found(description + " at: " + conf.target_host + url) elif response_code in conf.redirect_codes: location = headers.get("location") if location: handle_redirects(queued, location) # Stats if response_code not in conf.timeout_codes: stats.update_processed_items() compute_request_time(start_time, end_time) # Mark item as processed database.fetch_queue.task_done() except Empty: continue
def fetch_url(self, url, user_agent, timeout, limit_len=True, add_headers=dict()): """ Fetch a given url, with a given user_agent and timeout""" response = None try: if not add_headers.get('User-Agent'): add_headers['User-Agent'] = user_agent if not add_headers.get('Connection'): add_headers['Connection'] = 'Keep-Alive' if not add_headers.get('Host'): add_headers['Host'] = conf.target_host # Session cookie, priority to used-supplied. if conf.cookies: add_headers['Cookie'] = conf.cookies elif database.session_cookie: add_headers['Cookie'] = database.session_cookie # Limit request len on binary types if limit_len: content_range = 'bytes=0-' + str(conf.file_sample_len-1) add_headers['Range'] = content_range else: if 'Range' in add_headers: del add_headers['Range'] if conf.proxy_url: url = conf.scheme + '://' + conf.target_host + ':' + str(conf.target_port) + url textutils.output_debug(url) if conf.is_ssl: database.connection_pool.ConnectionCls = UnverifiedHTTPSConnection # Dynamic timeout request_timeout = Timeout(connect=timeout, read=timeout) response = database.connection_pool.request('GET', url, headers=add_headers, retries=0, redirect=False, release_conn=True, assert_same_host=False, timeout=request_timeout, preload_content=False) content = response.data code = response.status headers = response.headers response.release_conn() # return the connection back to the pool except Exception as e: code = 0 content = '' headers = dict() return code, content, headers
def sample_404_from_found_path(): """ For all existing path, compute the 404 CRC so we don't get trapped in a tarpit """ manager = ThreadManager() for path in database.valid_paths: textutils.output_debug("Path in valid path table: " + str(path)) for ext in conf.crafted_404_extensions: path_clone = dict(path) random_file = str(uuid.uuid4()) # We don't benchmark / since we do it first before path discovery if path_clone['url'] != '/': path_clone['url'] = path_clone['url'] + '/' + random_file + ext database.fetch_queue.put(path_clone) workers = manager.spawn_workers(conf.thread_count, FetchCrafted404Worker) manager.wait_for_idle(workers, database.fetch_queue)
def execute(): """ Fetch /robots.txt and add the disallowed paths as target """ current_template = dict(conf.path_template) current_template['description'] = 'Robots.txt entry' target_url = urljoin(conf.target_base_path, "/robots.txt") fetcher = Fetcher() response_code, content, headers = fetcher.fetch_url(target_url, conf.user_agent, conf.fetch_timeout_secs, limit_len=False) if response_code is 200 or response_code is 302 and content: matches = re.findall(r'Disallow:\s*/[a-zA-Z0-9-/\r]+\n', content) added = 0 for match in matches: # Filter out some characters match = filter(lambda c: c not in ' *?.\n\r\t', match) if conf.debug: textutils.output_debug(match) # Split on ':' splitted = match.split(':') if splitted[1]: target_path = splitted[1] # Remove trailing / if target_path.endswith('/'): target_path = target_path[:-1] current_template = dict(current_template) current_template['url'] = target_path if current_template not in database.paths: database.paths.append(current_template) textutils.output_debug(' - Robots Plugin Added: ' + str(target_path) + ' from robots.txt') added += 1 if added > 0: textutils.output_info(' - Robots Plugin: added ' + str(added) + ' base paths using /robots.txt') else : textutils.output_info(' - Robots Plugin: no usable entries in /robots.txt') else: textutils.output_info(' - Robots Plugin: /robots.txt not found on target site')
def handle_timeout(queued, url, thread_id, output=True): """ Handle timeout operation for workers """ if not queued['timeout_count']: queued['timeout_count'] = 0 if queued.get('timeout_count') < conf.max_timeout_count: new_timeout_count = queued.get('timeout_count') + 1 queued['timeout_count'] = new_timeout_count textutils.output_debug('Thread #' + str(thread_id) + ': re-queuing ' + str(queued)) # Add back the timed-out item database.fetch_queue.put(queued) elif output: # We definitely timed out textutils.output_timeout(queued.get('description') + ' at ' + url) # update timeout count stats.update_timeouts()
def compute_request_time(start_time, end_time): """ Compute the average request time and set pessimistically (math.ceil) the request timeout value based on it This call will mostly decrease the timeout time. """ # Adjust dynamic timeout level: completed_time = (end_time - start_time).seconds textutils.output_debug("Completed in: " + str(completed_time)) database.latest_successful_request_time = completed_time + 1 # We still need to have a max timeout in seconds if database.latest_successful_request_time > conf.max_timeout_secs: database.latest_successful_request_time = conf.max_timeout_secs elif database.latest_successful_request_time < 1: database.latest_successful_request_time = 1 textutils.output_debug("+Ajusted timeout to: " + str(database.latest_successful_request_time))
def test_behavior(content): """ Test if a given valid hit has an improbable behavior. Mainly, no url should have the same return content As the previous one if it's already deemed valid by the software (non error, unique content) Some identical content should be expected during the runtime, but not the same in X consecutive hits""" # Assume normal behavior normal = True textutils.output_debug('Testing behavior') if not isinstance(content, str): content = content.decode('utf-8', 'ignore') if len(database.behavioral_buffer) <= (conf.behavior_queue_size-1): database.behavioral_buffer.append(content) # If the queue is full, start to test. if not, the system will let a "chance" to the entries. if len(database.behavioral_buffer) >= conf.behavior_queue_size: textutils.output_debug('Testing for sameness with bufsize:' + str(len(database.behavioral_buffer))) # Check if all results in the buffer are the same same = all(SequenceMatcher(isjunk=None, a=content, b=saved_content, autojunk=False).ratio() > 0.80 for saved_content in database.behavioral_buffer) if same: textutils.output_debug('Same!') normal = False # Kick out only the first item in the queue if the queue is full so we can detect if behavior restores if not normal and len(database.behavioral_buffer): database.behavioral_buffer.pop(0) return normal
def test_behavior(content): """ Test if a given valid hit has an improbable behavior. Mainly, no url should have the same return content As the previous one if it's already deemed valid by the software (non error, unique content) Some identical content should be expected during the runtime, but not the same in X consecutive hits""" # Assume normal behavior normal = True textutils.output_debug('Testing behavior') if not isinstance(content, str): content = content.decode('utf-8', 'ignore') if len(database.behavioral_buffer) <= (conf.behavior_queue_size - 1): database.behavioral_buffer.append(content) # If the queue is full, start to test. if not, the system will let a "chance" to the entries. if len(database.behavioral_buffer) >= conf.behavior_queue_size: textutils.output_debug('Testing for sameness with bufsize:' + str(len(database.behavioral_buffer))) # Check if all results in the buffer are the same same = all( SequenceMatcher( isjunk=None, a=content, b=saved_content, autojunk=False).ratio() > 0.80 for saved_content in database.behavioral_buffer) if same: textutils.output_debug('Same!') normal = False # Kick out only the first item in the queue if the queue is full so we can detect if behavior restores if not normal and len(database.behavioral_buffer): database.behavioral_buffer.pop(0) return normal
def test_valid_result(content): is_valid_result = True # Tweak the content len if len(content) > conf.file_sample_len: content = content[0:conf.file_sample_len - 1] # False positive cleanup for some edge cases content = content.strip(b'\r\n ') # Test signatures for fingerprint in database.crafted_404s: textutils.output_debug("Testing [" + content.encode('hex') + "]" + " against Fingerprint: [" + fingerprint.encode('hex') + "]") matcher = SequenceMatcher(isjunk=None, a=fingerprint, b=content, autojunk=False) textutils.output_debug("Ratio " + str(matcher.ratio())) # This content is almost similar to a generated 404, therefore it's a 404. if matcher.ratio() > 0.8: textutils.output_debug("False positive detected!") is_valid_result = False break return is_valid_result
def handle_redirects(queued, target): """ This call is used to determine if a suggested redirect is valid. if it happens to be, we change the url entry with the redirected location and add it back to the call stack. """ retry_count = queued.get('retries') if retry_count and retry_count > 1: return elif not retry_count: queued['retries'] = 0 parsed_taget = urlparse(target) target_path = parsed_taget.path source_path = conf.target_base_path + queued.get('url') textutils.output_debug("Handling redirect from: " + source_path + " to " + target_path) matcher = SequenceMatcher(isjunk=None, a=target_path, b=source_path, autojunk=False) if matcher.ratio() > 0.8: queued['url'] = target_path queued['retries'] += 1 # Add back the timed-out item textutils.output_debug("Following redirect! " + str(matcher.ratio())) database.fetch_queue.put(queued) else: textutils.output_debug("Bad redirect! " + str(matcher.ratio()))
def wait_for_idle(self, workers, queue): """ Wait until fetch queue is empty and handle user interrupt """ while not database.kill_received and not queue.empty(): try: sleep(0.1) except KeyboardInterrupt: try: stats.output_stats() sleep(1) except KeyboardInterrupt: textutils.output_info( 'Keyboard Interrupt Received, cleaning up threads') # Clean reference to sockets database.connection_pool = None database.kill_received = True # Kill remaining workers but don't join the queue (we want to abort:)) for worker in workers: if worker is not None and worker.isAlive(): worker.kill_received = True worker.join(0) # Set leftover done in cas of a kill. while not queue.empty(): queue.get() queue.task_done() break # Make sure everything is done before sending control back to application textutils.output_debug("Threads: joining queue of size: " + str(queue.qsize())) queue.join() textutils.output_debug("Threads: join done") # Make sure we get all the worker's results before continuing the next step for worker in workers: if worker is not None and worker.isAlive(): worker.kill_received = True worker.join()
def test_valid_result(content): is_valid_result = True # Tweak the content len if len(content) > conf.file_sample_len: content = content[0 : conf.file_sample_len - 1] # False positive cleanup for some edge cases content = content.strip("\r\n ") # Test signatures for fingerprint in database.crafted_404s: textutils.output_debug( "Testing [" + content.encode("hex") + "]" + " against Fingerprint: [" + fingerprint.encode("hex") + "]" ) matcher = SequenceMatcher(isjunk=None, a=fingerprint, b=content, autojunk=False) textutils.output_debug("Ratio " + str(matcher.ratio())) # This content is almost similar to a generated 404, therefore it's a 404. if matcher.ratio() > 0.8: textutils.output_debug("False positive detected!") is_valid_result = False break return is_valid_result
def handle_redirects(queued, target): """ This call is used to determine if a suggested redirect is valid. if it happens to be, we change the url entry with the redirected location and add it back to the call stack. """ retry_count = queued.get("retries") if retry_count and retry_count > 1: return elif not retry_count: queued["retries"] = 0 parsed_taget = urlparse(target) target_path = parsed_taget.path source_path = conf.target_base_path + queued.get("url") textutils.output_debug("Handling redirect from: " + source_path + " to " + target_path) matcher = SequenceMatcher(isjunk=None, a=target_path, b=source_path, autojunk=False) if matcher.ratio() > 0.8: queued["url"] = target_path queued["retries"] += 1 # Add back the timed-out item textutils.output_debug("Following redirect! " + str(matcher.ratio())) database.fetch_queue.put(queued) else: textutils.output_debug("Bad redirect! " + str(matcher.ratio()))
def test_valid_result(content): is_valid_result = True # Encoding edge case # Must be a string to be compared to the 404 fingerprint if not isinstance(content, str): content = content.decode('utf-8', 'ignore') if not len(content): content = "" # empty file, still a forged 404 elif len(content) < conf.file_sample_len: content = content[0:len(content) - 1] else: content = content[0:conf.file_sample_len - 1] # False positive cleanup for some edge cases content = content.strip('\r\n ') # Test signatures for fingerprint in database.crafted_404s: textutils.output_debug("Testing [" + content + "]" + " against Fingerprint: [" + fingerprint + "]") matcher = SequenceMatcher(isjunk=None, a=fingerprint, b=content, autojunk=False) textutils.output_debug("Ratio " + str(matcher.ratio())) # This content is almost similar to a generated 404, therefore it's a 404. if matcher.ratio() > 0.8: textutils.output_debug("False positive detected!") is_valid_result = False break return is_valid_result
def add_files_to_paths(): """ Combine all path, filenames and suffixes to build the target list """ work_list = list() cache_test = dict() for path in database.valid_paths: # Combine current path with all files and suffixes if enabled for filename in database.files: if filename.get('no_suffix'): new_filename = dict(filename) new_filename['is_file'] = True if path['url'] == '/': new_filename['url'] = path['url'] + filename['url'] else: new_filename['url'] = path['url'] + '/' + filename['url'] if not cache_test.get(new_filename['url']): work_list.append(new_filename) cache_test[new_filename['url']] = True textutils.output_debug("No Suffix file added: " + str(new_filename)) else : for suffix in conf.file_suffixes: new_filename = dict(filename) new_filename['is_file'] = True if path['url'] == '/': new_filename['url'] = path['url'] + filename['url'] + suffix else: new_filename['url'] = path['url'] + '/' + filename['url'] + suffix if not cache_test.get(new_filename['url']): work_list.append(new_filename) cache_test[new_filename['url']] = True textutils.output_debug("File added: " + str(new_filename)) # Since we have already output the found directories, replace the valid path list database.valid_paths = work_list
def parse_hostname(hostname): ssl = False if not re.search(r'http://', hostname, re.I) and not re.search(r'https://', hostname, re.I): hostname = 'http://' + hostname if re.search(r'https://', hostname, re.I): ssl = True parsed = urlparse(hostname) parsed_path = parsed.path if parsed_path.endswith('/'): parsed_path = parsed_path[0:-1] if not parsed.port: parsed_port = 80 else: parsed_port = parsed.port textutils.output_debug("Starting scan on: " + parsed.hostname + " base: " + parsed_path + " ssl: " + str(ssl)) return parsed.hostname, parsed_port, parsed_path, ssl
def wait_for_idle(self, workers, queue): """ Wait until fetch queue is empty and handle user interrupt """ while not database.kill_received and not queue.empty(): try: # Make sure everything is done before sending control back to application textutils.output_debug("Threads: joining queue of size: " + str(queue.qsize())) queue.join() textutils.output_debug("Threads: join done") except KeyboardInterrupt: try: stats.output_stats(workers) sleep(1) # The time you have to re-press ctrl+c to kill the app. except KeyboardInterrupt: textutils.output_info('Keyboard Interrupt Received, waiting for blocking threads to exit') # Clean reference to sockets database.connection_pool = None database.kill_received = True self.kill_workers(workers) sys.exit(0) # Make sure we get all the worker's results before continuing the next step self.kill_workers(workers)
def parse_svn_entries(url): description_file = "SVN entries file at" description_dir = "SVN entries Dir at" target_url = url + "/.svn/entries" fetcher = Fetcher() response_code, content, headers = fetcher.fetch_url( target_url, conf.user_agent, conf.fetch_timeout_secs, limit_len=False ) if response_code is 200 or response_code is 302 and content: tokens = content.split("\n") if "dir" in tokens: for pos, token in enumerate(tokens): if token == "dir": # Fetch more entries recursively if tokens[pos - 1] != "": textutils.output_debug(" - Svn Plugin: Found dir: " + url + "/" + tokens[pos - 1]) if conf.allow_download: textutils.output_info(" - Svn Plugin: Downloading: " + url + "/" + tokens[pos - 1] + "\r") else: textutils.output_found(description_dir + " at: " + url + "/" + tokens[pos - 1]) # Parse next parse_svn_entries(url + "/" + tokens[pos - 1]) elif token == "file": textutils.output_debug(" - Svn Plugin: Found file: " + url + "/" + tokens[pos - 1]) if conf.allow_download: textutils.output_info(" - Svn Plugin: Downloading: " + url + "/" + tokens[pos - 1] + "\r") # Fetch text-base file path = url + "/.svn/text-base" + "/" + tokens[pos - 1] + ".svn-base" fetcher = Fetcher() response_code, content, headers = fetcher.fetch_url( path, conf.user_agent, conf.fetch_timeout_secs, limit_len=False ) save_file(url + "/" + tokens[pos - 1], content) else: textutils.output_found(description_file + " at: " + url + "/" + tokens[pos - 1])
def parse_svn_entries(url): description_file = 'SVN entries file at' description_dir = "SVN entries Dir at" target_url = url + "/.svn/entries" fetcher = Fetcher() response_code, content, headers = fetcher.fetch_url(target_url, conf.user_agent, conf.fetch_timeout_secs, limit_len=False, add_headers=base_headers) if not isinstance(content, str): content = content.decode('utf-8', 'ignore') if response_code in conf.expected_file_responses and content: tokens = content.split('\n') if 'dir' in tokens: for pos, token in enumerate(tokens): if token == 'dir': # Fetch more entries recursively if tokens[pos-1] != '': textutils.output_debug(' - Svn Plugin: Found dir: ' + url + '/' + tokens[pos-1]) if conf.allow_download: textutils.output_info(' - Svn Plugin: Downloading: ' + url + '/' + tokens[pos-1] + '\r') else: textutils.output_found(description_dir + ' at: ' + url + '/' + tokens[pos-1]) # Parse next parse_svn_entries(url + "/" + tokens[pos-1]) elif token == 'file': textutils.output_debug(' - Svn Plugin: Found file: ' + url + '/' + tokens[pos-1]) if conf.allow_download: textutils.output_info(' - Svn Plugin: Downloading: ' + url + '/' + tokens[pos-1] + '\r') # Fetch text-base file path = url + "/.svn/text-base" + '/' + tokens[pos-1] + ".svn-base" fetcher = Fetcher() response_code, content, headers = fetcher.fetch_url(path, conf.user_agent, conf.fetch_timeout_secs, limit_len=False) save_file(url + '/' + tokens[pos-1], content) else: textutils.output_found(description_file + ' at: ' + url + '/' + tokens[pos-1])
def run(self): while not self.kill_received: try: # Non-Blocking get since we use the queue as a ringbuffer queued = database.fetch_queue.get(False) url = conf.target_base_path + queued.get('url') textutils.output_debug("Fetching crafted 404: " + str(url)) stats.update_stats(url) # Fetch the target url start_time = datetime.now() response_code, content, headers = self.fetcher.fetch_url( url, conf.user_agent, database.latest_successful_request_time) end_time = datetime.now() # Handle fetch timeouts by re-adding the url back to the global fetch queue # if timeout count is under max timeout count if response_code is 0 or response_code is 500: handle_timeout(queued, url, self.thread_id, output=self.output) elif response_code in conf.expected_file_responses: # The server responded with whatever code but 404 or invalid stuff (500). We take a sample if not len(content): crafted_404 = "" # empty file, still a forged 404 elif len(content) < conf.file_sample_len: crafted_404 = content[0:len(content) - 1] else: crafted_404 = content[0:conf.file_sample_len - 1] # Edge case control crafted_404 = crafted_404.strip('\r\n ') database.crafted_404s.append(crafted_404) # Exception case for root 404, since it's used as a model for other directories textutils.output_debug( "Computed and saved a sample 404 for: " + str(queued) + ": " + crafted_404) elif response_code in conf.redirect_codes: location = headers.get('location') if location: handle_redirects(queued, location) # Stats if response_code not in conf.timeout_codes: stats.update_processed_items() compute_request_time(start_time, end_time) # Dequeue item database.fetch_queue.task_done() except Empty: continue textutils.output_debug("Thread #" + str(self.thread_id) + " killed.")
def wait_for_idle(self, workers, queue): """ Wait until fetch queue is empty and handle user interrupt """ while not database.kill_received and not queue.empty(): try: sleep(0.1) except KeyboardInterrupt: try: stats.output_stats() sleep(1) except KeyboardInterrupt: textutils.output_info('Keyboard Interrupt Received, cleaning up threads') # Clean reference to sockets database.connection_pool = None database.kill_received = True # Kill remaining workers but don't join the queue (we want to abort:)) for worker in workers: if worker is not None and worker.isAlive(): worker.kill_received = True worker.join(0) # Set leftover done in cas of a kill. while not queue.empty(): queue.get() queue.task_done() break # Make sure everything is done before sending control back to application textutils.output_debug("Threads: joining queue of size: " + str(queue.qsize())) queue.join() textutils.output_debug("Threads: join done") # Make sure we get all the worker's results before continuing the next step for worker in workers: if worker is not None and worker.isAlive(): worker.kill_received = True worker.join()
def parse_hostname(hostname): ssl = False if not re.search(r'http://', hostname, re.I) and not re.search( r'https://', hostname, re.I): hostname = 'http://' + hostname if re.search(r'https://', hostname, re.I): ssl = True parsed = urlparse(hostname) parsed_path = parsed.path if parsed_path.endswith('/'): parsed_path = parsed_path[0:-1] if not parsed.port: parsed_port = 80 else: parsed_port = parsed.port textutils.output_debug("Starting scan on: " + parsed.hostname + " base: " + parsed_path + " ssl: " + str(ssl)) return parsed.hostname, parsed_port, parsed_path, ssl
def run(self): while not self.kill_received: try: # Non-Blocking get since we use the queue as a ringbuffer queued = database.fetch_queue.get(False) url = conf.target_base_path + queued.get('url') textutils.output_debug("Fetching crafted 404: " + str(url)) stats.update_stats(url) # Fetch the target url start_time = datetime.now() response_code, content, headers = self.fetcher.fetch_url(url, conf.user_agent, database.latest_successful_request_time) end_time = datetime.now() # Handle fetch timeouts by re-adding the url back to the global fetch queue # if timeout count is under max timeout count if response_code is 0 or response_code is 500: handle_timeout(queued, url, self.thread_id, output=self.output) elif response_code in conf.expected_file_responses: # Encoding edge case # Must be a string to be compared to the 404 fingerprint if not isinstance(content, str): content = content.decode('utf-8', 'ignore') # The server responded with whatever code but 404 or invalid stuff (500). We take a sample if not len(content): crafted_404 = "" # empty file, still a forged 404 elif len(content) < conf.file_sample_len: crafted_404 = content[0:len(content) - 1] else: crafted_404 = content[0:conf.file_sample_len - 1] crafted_404 = crafted_404.strip('\r\n ') database.crafted_404s.append(crafted_404) # Exception case for root 404, since it's used as a model for other directories textutils.output_debug("Computed and saved a sample 404 for: " + str(queued) + ": " + crafted_404) elif response_code in conf.redirect_codes: if queued.get('handle_redirect', True): location = headers.get('location') if location: handle_redirects(queued, location) # Stats if response_code not in conf.timeout_codes: stats.update_processed_items() compute_request_time(start_time, end_time) # Dequeue item database.fetch_queue.task_done() except Empty: continue textutils.output_debug("Thread #" + str(self.thread_id) + " killed.")
def add_files_to_paths(): """ Combine all path, filenames and suffixes to build the target list """ work_list = list() for path in database.valid_paths: # Combine current path with all files and suffixes if enabled for filename in database.files: if filename.get('no_suffix'): new_filename = filename.copy() new_filename['is_file'] = True if path['url'] == '/': new_filename['url'] = ''.join( [path['url'], filename['url']]) else: new_filename['url'] = ''.join( [path['url'], '/', filename['url']]) work_list.append(new_filename) textutils.output_debug("No Suffix file added: " + str(new_filename)) elif filename.get('executable'): for executable_suffix in conf.executables_suffixes: new_filename = filename.copy() new_filename['is_file'] = True if path['url'] == '/': new_filename['url'] = ''.join( [path['url'], filename['url'], executable_suffix]) else: new_filename['url'] = ''.join([ path['url'], '/', filename['url'], executable_suffix ]) work_list.append(new_filename) textutils.output_debug("Executable File added: " + str(new_filename)) else: for suffix in conf.file_suffixes: new_filename = filename.copy() new_filename['is_file'] = True if path['url'] == '/': new_filename['url'] = ''.join( [path['url'], filename['url'], suffix]) else: new_filename['url'] = ''.join( [path['url'], '/', filename['url'], suffix]) work_list.append(new_filename) textutils.output_debug("Regular File added: " + str(new_filename)) # Since we have already output the found directories, replace the valid path list database.valid_paths = work_list
def run(self): while not self.kill_received: try: # Non-Blocking get since we use the queue as a ringbuffer queued = database.fetch_queue.get(False) url = conf.target_base_path + queued.get('url') textutils.output_debug("Fetching crafted 404: " + str(url)) stats.update_stats(url) # Fetch the target url timeout = False response_code, content, headers = self.fetcher.fetch_url(url, conf.user_agent, conf.fetch_timeout_secs) # Handle fetch timeouts by re-adding the url back to the global fetch queue # if timeout count is under max timeout count if response_code is 0 or response_code is 500: handle_timeout(queued, url, self.thread_id, output=self.output) # increase throttle delay throttle.increase_throttle_delay() timeout = True elif response_code in conf.expected_file_responses: # The server responded with whatever code but 404 or invalid stuff (500). We take a sample if len(content) < conf.file_sample_len: crafted_404 = content[0:len(content) - 1] else: crafted_404 = content[0:conf.file_sample_len - 1] database.crafted_404s.append(crafted_404) # Exception case for root 404, since it's used as a model for other directories textutils.output_debug("Computed and saved a sample 404 for: " + str(queued) + ": " + crafted_404) elif response_code in conf.redirect_codes: location = headers.get('location') if location: handle_redirects(queued, location) # Decrease throttle delay if needed if not timeout: throttle.decrease_throttle_delay() # Dequeue item stats.update_processed_items() database.fetch_queue.task_done() except Empty: continue textutils.output_debug("Thread #" + str(self.thread_id) + " killed.")
def add_files_to_paths(): """ Combine all path, filenames and suffixes to build the target list """ work_list = list() for path in database.valid_paths: # Combine current path with all files and suffixes if enabled for filename in database.files: if filename.get('no_suffix'): new_filename = filename.copy() new_filename['is_file'] = True if path['url'] == '/': new_filename['url'] = ''.join([path['url'], filename['url']]) else: new_filename['url'] = ''.join([path['url'], '/', filename['url']]) work_list.append(new_filename) textutils.output_debug("No Suffix file added: " + str(new_filename)) elif filename.get('executable'): for executable_suffix in conf.executables_suffixes: new_filename = filename.copy() new_filename['is_file'] = True if path['url'] == '/': new_filename['url'] = ''.join([path['url'], filename['url'], executable_suffix]) else: new_filename['url'] = ''.join([path['url'], '/', filename['url'], executable_suffix]) work_list.append(new_filename) textutils.output_debug("Executable File added: " + str(new_filename)) else: for suffix in conf.file_suffixes: new_filename = filename.copy() new_filename['is_file'] = True if path['url'] == '/': new_filename['url'] = ''.join([path['url'], filename['url'], suffix]) else: new_filename['url'] = ''.join([path['url'], '/', filename['url'], suffix]) work_list.append(new_filename) textutils.output_debug("Regular File added: " + str(new_filename)) # Since we have already output the found directories, replace the valid path list database.valid_paths = work_list
def test_valid_result(content, is_file=False): is_valid_result = True # Encoding edge case # Must be a string to be compared to the 404 fingerprint if not isinstance(content, str): content = content.decode('utf-8', 'ignore') if not len(content): content = "" # empty file, still a forged 404 elif len(content) < conf.file_sample_len: content = content[0:len(content) - 1] else: content = content[0:conf.file_sample_len - 1] # False positive cleanup for some edge cases content = content.strip('\r\n ') # Test signatures for fingerprint in database.crafted_404s: textutils.output_debug("Testing [" + content + "]" + " against Fingerprint: [" + fingerprint + "]") matcher = SequenceMatcher(isjunk=None, a=fingerprint, b=content, autojunk=False) textutils.output_debug("Ratio " + str(matcher.ratio())) # This content is almost similar to a generated 404, therefore it's a 404. if matcher.ratio() > 0.8: textutils.output_debug("False positive detected!") is_valid_result = False break # An empty file could be a proof of a hidden structure if is_file and content == "": is_valid_result = True return is_valid_result
if len(sys.argv) <= 1: parser.print_help() print('') sys.exit() # Spawn synchronized print output worker print_worker = PrintWorker() print_worker.daemon = True print_worker.start() # Ensure the host is of the right format and set it in config parsed_host, parsed_port, parsed_path, is_ssl = netutils.parse_hostname( args[1]) textutils.output_debug("Parsed: " + parsed_host + " port: " + str(parsed_port) + " " + parsed_path + " SSL:" + str(is_ssl)) # Set conf values conf.target_host = parsed_host conf.target_base_path = parsed_path conf.is_ssl = is_ssl textutils.output_debug('Version: ' + str(conf.version)) textutils.output_debug('Max timeouts per url: ' + str(conf.max_timeout_count)) textutils.output_debug('Worker threads: ' + str(conf.thread_count)) textutils.output_debug('Target Host: ' + str(conf.target_host)) textutils.output_debug('Using Tor: ' + str(conf.use_tor)) textutils.output_debug('Eval-able output: ' + str(conf.eval_output)) textutils.output_debug('Using User-Agent: ' + str(conf.user_agent))