def find_redos(pattern: str, flags: int, output: TextOutput, parser): try: parsed = parser(pattern, flags) except Exception as e: print(f"Error parsing: {pattern}", e) return output.next() for redos in find(parsed): if redos.starriness > 2: output.record(redos, pattern) yield redos
def handle_file(filename: str, output: TextOutput): with open(filename, "rb") as f: code = f.read() for regex in find_regexes(code): pattern = regex.pattern if len(pattern) < 5: continue # (.+)+ if pattern.count("*") + pattern.count("+") + pattern.count(",}") < 2: continue # no ReDoS possible try: logging.debug("%s#%s: %s", filename, regex.lineno, pattern) parsed = SreOpParser().parse_sre(pattern, regex.flags) except: try: fixed = fix_js_regex(pattern) re.compile(fixed, regex.flags) except: if regex.definitely_regex: print( f"Error parsing: {pattern} from {filename} line {regex.lineno}\n" ) continue try: parsed = SreOpParser().parse_sre(fixed, regex.flags) except: print( f"Error in regexploit parsing: {pattern} from {filename}") print(traceback.format_exc()) continue try: output.next() for redos in find(parsed): if redos.starriness > 2: context = None try: context = code.splitlines()[regex.lineno - 1].decode().strip() except UnicodeDecodeError: pass output.record( redos, pattern, filename=filename, lineno=regex.lineno, context=context, ) except Exception: print( f"Error finding ReDoS: {pattern} from {filename} #{regex.lineno}" ) print(traceback.format_exc())
def main(): with warnings.catch_warnings(): # Some weird regexes emit warnings warnings.simplefilter("ignore", category=FutureWarning) warnings.simplefilter("ignore", category=DeprecationWarning) parser = argparse.ArgumentParser( description="Parse regexes out of python files and scan them for ReDoS" ) parser.add_argument("files", nargs="+", help="Python files or directories") parser.add_argument( "--glob", action="store_true", help="Glob the input filenames (**/*)" ) parser.add_argument("--verbose", action="store_true", help="Verbose logging") parser.add_argument( "--ignore", action="append", help="Paths containing this string are ignored" ) args = parser.parse_args() if args.verbose: logging.basicConfig(level=logging.DEBUG) files = file_generator(args.files, args.glob, ["*.py"], args.ignore) output = TextOutput() for filename in files: logging.debug(filename) handle_file(filename, output) print(f"Processed {output.regexes} regexes")
def handle_file(filename: str, output: TextOutput): with open(filename, "rb") as f: code = f.read() try: code_ast = ast.parse(code) pnv = PythonNodeVisitor() pnv.visit(code_ast) except RecursionError: print(f"RecursionError parsing AST for {filename}") return except SyntaxError as e: print(f"Bad Python3 syntax in {filename}: {e}") return for regex in pnv.patterns: try: parsed = SreOpParser().parse_sre(regex.pattern, regex.flags) except re.error: continue # We will have many strings which aren't actually regexes try: output.next() for redos in find(parsed): if redos.starriness > 2: context = None try: context = code.splitlines()[regex.lineno - 1].decode().strip() except UnicodeDecodeError: pass output.record( redos, regex.pattern, filename=filename, lineno=regex.lineno, context=context, ) except Exception: print( f"Error finding ReDoS: {regex.pattern} from {filename} #{regex.lineno}" ) print(traceback.format_exc())
def main(): parser = argparse.ArgumentParser( description="Parse regexes from stdin and scan them for ReDoS") parser.add_argument( "-f", "--flavour", "--flavor", choices=["python", "js"], default="python", help="Regex language", ) parser.add_argument("-v", "--verbose", action="count", default=0, help="Verbose logging") parser.add_argument( "-u", "--unescape", action="store_true", help= "Unescape the regular expressions before parsing them (e.g. double backslashes)", ) args = parser.parse_args() sys.argv = sys.argv[:1] if args.verbose == 1: logging.basicConfig(level=logging.INFO) elif args.verbose > 1: logging.basicConfig(level=logging.DEBUG) isatty = sys.stdin.isatty() if isatty: print("Welcome to Regexploit. Enter your regexes:") output = TextOutput() try: for line in fileinput.input(): found = False line = line.rstrip("\n") if args.unescape: # \\d -> \d line = line.encode().decode("unicode_escape") for _ in find_redos( line, 0, output, javascript if args.flavour == "js" else python): found = True if isatty and not found: print("No ReDoS found.") except KeyboardInterrupt: pass
def main(): if not os.path.isdir( os.path.join(os.path.split(__file__)[0], "javascript", "node_modules") ): path = os.path.join(os.path.split(__file__)[0], "javascript") print("The JavaScript & TypeScript parsers require some node modules.\n") print(f"Run (cd {path}; npm install)") sys.exit(1) with warnings.catch_warnings(): warnings.simplefilter( "ignore", category=FutureWarning ) # Some js regexes are weird parser = argparse.ArgumentParser( description="Parse regexes out of javascript files and scan them for ReDoS" ) parser.add_argument("files", nargs="+", help="Javascript or typescript files") parser.add_argument( "--node", help="Location of nodejs executable (rather than using node from PATH)", ) parser.add_argument( "--glob", action="store_true", help="Glob the input filenames (**/*)" ) parser.add_argument("--verbose", action="store_true", help="Verbose logging") parser.add_argument( "--ignore", action="append", help="Paths containing this string are ignored" ) args = parser.parse_args() if args.verbose: logging.basicConfig(level=logging.DEBUG) output = TextOutput(js_flavour=True) files = file_generator(args.files, args.glob, ["*.js", "*.ts"], args.ignore) while True: batch = [] for _ in range(50): try: batch.append(next(files)) except StopIteration: if batch: process_files(batch, args.node, output) return process_files(batch, args.node, output) print(f"Processed {output.regexes} regexes")