def __init__(self, ruleset): self.ruleset = ruleset util.set_flags(False, True, False) try: self.setting = OutputSettings( output_format=OutputFormat.JSON, output_destination=None, error_on_findings=False, verbose_errors=False, strict=False, timeout_threshold=3, json_stats=False, # json_time = False, output_per_finding_max_lines_limit=None, ) except: self.setting = OutputSettings( output_format=OutputFormat.JSON, output_destination=None, error_on_findings=False, verbose_errors=False, strict=False, timeout_threshold=3, json_stats=False, json_time=False, output_per_finding_max_lines_limit=None, )
def invoke_semgrep(config: Path, targets: List[Path], **kwargs: Any) -> Any: """ Call semgrep with config on targets and return result as a json object Uses default arguments of MAIN unless overwritten with a kwarg """ io_capture = StringIO() output_handler = OutputHandler( OutputSettings( output_format=OutputFormat.JSON, output_destination=None, error_on_findings=False, verbose_errors=False, strict=False, json_stats=False, output_per_finding_max_lines_limit=None, ), stdout=io_capture, ) main( output_handler=output_handler, target=[str(t) for t in targets], pattern="", lang="", configs=[str(config)], **kwargs, ) output_handler.close() return json.loads(io_capture.getvalue())
def test_raise_semgrep_error_from_json_unknown_error(): test_rule_id = "test_rule_id" rule_yaml_text = io.StringIO(f""" rules: - id: {test_rule_id} pattern: $X == $X severity: INFO languages: [python] message: blah """) rule_dict = yaml.load(rule_yaml_text).get("rules")[0] rule: Rule = Rule.from_json(rule_dict) core_runner = CoreRunner( allow_exec=False, output_settings=OutputSettings(OutputFormat.TEXT), jobs=1, timeout=0, max_memory=0, timeout_threshold=0, report_time=False, ) patterns: List[Pattern] = list(core_runner._flatten_rule_patterns([rule])) output_json: Dict[str, Any] = { "error": "unknown exception", "message": "End_of_file", } with pytest.raises(SemgrepError) as excinfo: core_runner._raise_semgrep_error_from_json(output_json, patterns, rule) assert test_rule_id in str(excinfo.value)
def invoke_semgrep( config: Path, targets: List[Path], output_settings: Optional[OutputSettings] = None, **kwargs: Any, ) -> Union[Dict[str, Any], str]: """ Return Semgrep results of 'config' on 'targets' as a dict|str Uses default arguments of 'semgrep_main.main' unless overwritten with 'kwargs' """ if output_settings is None: output_settings = OutputSettings(output_format=OutputFormat.JSON) io_capture = StringIO() output_handler = OutputHandler(output_settings, stdout=io_capture) main( output_handler=output_handler, target=[str(t) for t in targets], pattern="", lang="", configs=[str(config)], **kwargs, ) output_handler.close() result: Union[Dict[str, Any], str] = (json.loads( io_capture.getvalue()) if output_settings.output_format.is_json() else io_capture.getvalue()) return result
def test_ignore_git_dir(tmp_path, monkeypatch): """ Ignores all files in .git directory when scanning generic """ foo = tmp_path / ".git" foo.mkdir() (foo / "bar").touch() monkeypatch.chdir(tmp_path) language = Language("generic") output_settings = OutputSettings( output_format=OutputFormat.TEXT, output_destination=None, error_on_findings=False, verbose_errors=False, strict=False, json_stats=False, output_time=False, output_per_finding_max_lines_limit=None, output_per_line_max_chars_limit=None, ) defaulthandler = OutputHandler(output_settings) assert [] == TargetManager([], [], 0, [foo], True, defaulthandler, False).get_files( language, [], [] )
def invoke_semgrep(paths, scan_rules, **kwargs): """Call Semgrep.""" if platform.system() == 'Windows': return None from semgrep import semgrep_main, util from semgrep.constants import OutputFormat from semgrep.output import OutputHandler, OutputSettings try: cpu_count = multiprocessing.cpu_count() except NotImplementedError: cpu_count = 1 # CPU count is not implemented on Windows util.set_flags(False, True, False) # Verbose, Quiet, Force_color io_capture = StringIO() output_handler = OutputHandler( OutputSettings( output_format=OutputFormat.JSON, output_destination=None, error_on_findings=False, strict=False, ), stdout=io_capture, ) semgrep_main.main( output_handler=output_handler, target=[pt.as_posix() for pt in paths], jobs=cpu_count, pattern=None, lang=None, config=scan_rules, **kwargs, ) output_handler.close() return json.loads(io_capture.getvalue())
def invoke_semgrep(paths, scan_rules, **kwargs): """Call Semgrep.""" if platform.system() == 'Windows': return None from semgrep import semgrep_main from semgrep.state import get_state from semgrep.constants import OutputFormat from semgrep.output import OutputHandler, OutputSettings try: cpu_count = multiprocessing.cpu_count() except NotImplementedError: cpu_count = 1 # CPU count is not implemented on Windows # Semgrep output formatting state = get_state() state.terminal.configure( verbose=False, debug=False, quiet=True, force_color=False, ) logging.getLogger('semgrep').propagate = False output_settings = OutputSettings( output_format=OutputFormat.JSON, output_destination=None, output_per_finding_max_lines_limit=None, output_per_line_max_chars_limit=None, error_on_findings=False, verbose_errors=False, strict=False, timeout_threshold=3, ) output_handler = OutputHandler(output_settings) ( filtered_matches_by_rule, _, _, _, _, _, _, _, ) = semgrep_main.main( output_handler=output_handler, target=[pt.as_posix() for pt in paths], jobs=cpu_count, pattern=None, lang=None, configs=[scan_rules], timeout=5, timeout_threshold=3, **kwargs, ) output_handler.rule_matches = [ m for ms in filtered_matches_by_rule.values() for m in ms ] return json.loads(output_handler._build_output())
def test_explicit_path(tmp_path, monkeypatch): foo = tmp_path / "foo" foo.mkdir() (foo / "a.go").touch() (foo / "b.go").touch() foo_noext = foo / "noext" foo_noext.touch() foo_a = foo / "a.py" foo_a.touch() foo_b = foo / "b.py" foo_b.touch() monkeypatch.chdir(tmp_path) # Should include explicitly passed python file foo_a = foo_a.relative_to(tmp_path) output_settings = OutputSettings( output_format=OutputFormat.TEXT, output_destination=None, error_on_findings=False, strict=False, ) defaulthandler = OutputHandler(output_settings) python_language = Language("python") assert foo_a in TargetManager([], [], ["foo/a.py"], False, defaulthandler, False).get_files(python_language, [], []) # Should include explicitly passed python file even if is in excludes assert foo_a not in TargetManager([], ["foo/a.py"], ["."], False, defaulthandler, False).get_files(python_language, [], []) assert foo_a in TargetManager([], ["foo/a.py"], [".", "foo/a.py"], False, defaulthandler, False).get_files(python_language, [], []) # Should ignore expliclty passed .go file when requesting python assert (TargetManager([], [], ["foo/a.go"], False, defaulthandler, False).get_files(python_language, [], []) == []) # Should include explicitly passed file with unknown extension if skip_unknown_extensions=False assert cmp_path_sets( set( TargetManager([], [], ["foo/noext"], False, defaulthandler, False).get_files(python_language, [], [])), {foo_noext}, ) # Should not include explicitly passed file with unknown extension if skip_unknown_extensions=True assert cmp_path_sets( set( TargetManager([], [], ["foo/noext"], False, defaulthandler, True).get_files(python_language, [], [])), set(), )
def cli() -> None: parser = argparse.ArgumentParser( description= f"semgrep CLI. For more information about semgrep, go to {SEMGREP_URL}", prog="semgrep", ) # input parser.add_argument( "target", nargs="*", default=[os.curdir], help=("Search these files or directories. Defaults to entire current " "working directory. Implied argument if piping to semgrep."), ) # config options config = parser.add_argument_group("config") config_ex = config.add_mutually_exclusive_group() config_ex.add_argument( "-g", "--generate-config", action="store_true", help=f"Generate starter configuration file, {DEFAULT_CONFIG_FILE}", ) config_ex.add_argument( "-f", "--config", help= ("YAML configuration file, directory of YAML files ending in " ".yml|.yaml, URL of a configuration file, or semgrep registry entry " "name. See README for information on configuration file format."), ) config_ex.add_argument( "-e", "--pattern", help= "Code search pattern. See README for information on pattern features.", ) config.add_argument( "-l", "--lang", help=( "Parse pattern and all files in specified language. Must be used " "with -e/--pattern."), ) config.add_argument( "--validate", action="store_true", help="Validate configuration file(s). No search is performed.", ) config.add_argument( "--strict", action="store_true", help="Only invoke semgrep if configuration files(s) are valid.", ) parser.add_argument( "--exclude", action="append", default=[], help= "Skip any file or directory that matches this pattern; --exclude='*.py' will ignore" " the following: foo.py, src/foo.py, foo.py/bar.sh. --exclude='tests' will ignore tests/foo.py" " as well as a/b/tests/c/foo.py. Can add multiple times. Overrides includes.", ) parser.add_argument( "--include", action="append", default=[], help= "Scan only files or directories that match this pattern; --include='*.jsx' will scan" " the following: foo.jsx, src/foo.jsx, foo.jsx/bar.sh. --include='src' will scan src/foo.py" " as well as a/b/src/c/foo.py. Can add multiple times.", ) parser.add_argument( "--no-git-ignore", action="store_true", help="Scan all files even those ignored by a projects gitignore(s)", ) config.add_argument( RCE_RULE_FLAG, action="store_true", help=("WARNING: allow rules to run arbitrary code. ONLY ENABLE IF YOU " "TRUST THE SOURCE OF ALL RULES IN YOUR CONFIGURATION."), ) config.add_argument( "-j", "--jobs", action="store", type=int, default=CPU_COUNT, help=( "Number of subprocesses to use to run checks in parallel. Defaults " "to the number of CPUs on the system."), ) config.add_argument( "--timeout", type=int, default=0, help= ("Maximum time to spend running a rule on a single file in seconds. If set to 0 will not have time limit. Defaults to 0." ), ) config.add_argument( "--max-memory", type=int, default=0, help= ("Maximum memory to use running a rule on a single file in MB. If set to 0 will not have memory limit. Defaults to 0." ), ) config.add_argument( "--timeout-threshold", type=int, default=0, help= ("Maximum number of rules that can timeout on a file before the file is skipped. If set to 0 will not have limit. Defaults to 0." ), ) # output options output = parser.add_argument_group("output") output.add_argument( "-q", "--quiet", action="store_true", help=("Do not print anything to stdout. Search results can still be " "saved to an output file specified by -o/--output. Exit code " "provides success status."), ) output.add_argument( "--no-rewrite-rule-ids", action="store_true", help= ("Do not rewrite rule ids when they appear in nested sub-directories " "(by default, rule 'foo' in test/rules.yaml will be renamed " "'test.foo')."), ) output.add_argument( "-o", "--output", help=("Save search results to a file or post to URL. " "Default is to print to stdout."), ) output.add_argument("--json", action="store_true", help="Output results in JSON format.") output.add_argument( "--debugging-json", action="store_true", help="Output JSON with extra debugging information.", ) output.add_argument("--sarif", action="store_true", help="Output results in SARIF format.") output.add_argument("--test", action="store_true", help="Run test suite.") parser.add_argument( "--test-ignore-todo", action="store_true", help="Ignore rules marked as '#todoruleid:' in test files.", ) output.add_argument( "--dump-ast", action="store_true", help=("Show AST of the input file or passed expression and then exit " "(can use --json)."), ) output.add_argument("--synthesize-patterns", help=argparse.SUPPRESS) output.add_argument( "--error", action="store_true", help="Exit 1 if there are findings. Useful for CI and scripts.", ) output.add_argument( "-a", "--autofix", action="store_true", help= ("Apply the autofix patches. WARNING: data loss can occur with this " "flag. Make sure your files are stored in a version control system."), ) output.add_argument( "--dryrun", action="store_true", help=("Do autofixes, but don't write them to a file. " "This will print the changes to the console. " "This lets you see the changes before you commit to them. " "Only works with the --autofix flag. Otherwise does nothing."), ) output.add_argument( "--disable-nosem", action="store_true", help=( "Disable the effect of 'nosem'. This will report findings on lines " "containing a 'nosem' comment at the end."), ) # logging options logging_ = parser.add_argument_group("logging") logging_.add_argument( "-v", "--verbose", action="store_true", help=("Set the logging level to verbose. E.g. statements about which " "files are being processed will be printed."), ) parser.add_argument("--version", action="store_true", help="Show the version and exit.") parser.add_argument( "--force-color", action="store_true", help= "Always include ANSI color in the output, even if not writing to a TTY", ) parser.add_argument( "--disable-version-check", action="store_true", help="Disable checking for latest version.", ) ### Parse and validate args = parser.parse_args() if args.version: print(__VERSION__) return if args.pattern and not args.lang: parser.error("-e/--pattern and -l/--lang must both be specified") if args.dump_ast and not args.lang: parser.error("--dump-ast and -l/--lang must both be specified") # set the flags semgrep.util.set_flags(args.verbose, args.quiet, args.force_color) # change cwd if using docker try: semgrep.config_resolver.adjust_for_docker() except SemgrepError as e: logger.exception(str(e)) raise e output_format = OutputFormat.TEXT if args.json: output_format = OutputFormat.JSON elif args.debugging_json: output_format = OutputFormat.JSON_DEBUG elif args.sarif: output_format = OutputFormat.SARIF output_settings = OutputSettings( output_format=output_format, output_destination=args.output, error_on_findings=args.error, strict=args.strict, timeout_threshold=args.timeout_threshold, ) if not args.disable_version_check: if not is_running_latest(): logger.warning( "A new version of Semgrep is available. Please see https://github.com/returntocorp/semgrep#upgrading for more information." ) if args.test: # the test code (which isn't a "test" per se but is actually machinery to evaluate semgrep performance) # uses managed_output internally semgrep.test.test_main(args) with managed_output(output_settings) as output_handler: if args.dump_ast: dump_parsed_ast(args.json, args.lang, args.pattern, args.target) elif args.synthesize_patterns: synthesize_patterns(args.lang, args.synthesize_patterns, args.target) elif args.validate: configs, config_errors = semgrep.semgrep_main.get_config( args.pattern, args.lang, args.config) valid_str = "invalid" if config_errors else "valid" logger.info( f"Configuration is {valid_str} - found {len(configs)} valid configuration(s) and {len(config_errors)} configuration error(s)." ) if config_errors: for error in config_errors: output_handler.handle_semgrep_error(error) raise SemgrepError( "Please fix the above errors and try again.") elif args.generate_config: semgrep.config_resolver.generate_config() else: semgrep.semgrep_main.main( output_handler=output_handler, target=args.target, pattern=args.pattern, lang=args.lang, config=args.config, no_rewrite_rule_ids=args.no_rewrite_rule_ids, jobs=args.jobs, include=args.include, exclude=args.exclude, strict=args.strict, autofix=args.autofix, dryrun=args.dryrun, disable_nosem=args.disable_nosem, dangerously_allow_arbitrary_code_execution_from_rules=args. dangerously_allow_arbitrary_code_execution_from_rules, no_git_ignore=args.no_git_ignore, timeout=args.timeout, max_memory=args.max_memory, timeout_threshold=args.timeout_threshold, )
def cli() -> None: parser = argparse.ArgumentParser( description= f"semgrep CLI. For more information about semgrep, go to {SEMGREP_URL}", prog="semgrep", ) # input parser.add_argument( "target", nargs="*", default=[os.curdir], help=("Search these files or directories. Defaults to entire current " "working directory. Implied argument if piping to semgrep."), ) # config options config = parser.add_argument_group("config") config_ex = config.add_mutually_exclusive_group() config_ex.add_argument( "-f", # for backwards compatibility "-c", "--config", action="append", default=[], help= ("YAML configuration file, directory of YAML files ending in " ".yml|.yaml, URL of a configuration file, or semgrep registry entry " "name. See https://semgrep.dev/docs/writing-rules/rule-syntax for information on configuration file format." ), ) config_ex.add_argument( "-e", "--pattern", help= "Code search pattern. See https://semgrep.dev/docs/writing-rules/pattern-syntax for information on pattern features.", ) config.add_argument( "-g", "--generate-config", action="store", nargs="?", const=DEFAULT_CONFIG_FILE, type=argparse.FileType("x"), help= f"Generate starter configuration file. Defaults to {DEFAULT_CONFIG_FILE}.", ) config.add_argument( "-l", "--lang", help=( "Parse pattern and all files in specified language. Must be used " "with -e/--pattern."), ) config.add_argument( "--validate", action="store_true", help="Validate configuration file(s). No search is performed.", ) config.add_argument( "--strict", action="store_true", help="Only invoke semgrep if configuration files(s) are valid.", ) parser.add_argument( "--exclude", action="append", default=[], help= "Skip any file or directory that matches this pattern; --exclude='*.py' will ignore" " the following: foo.py, src/foo.py, foo.py/bar.sh. --exclude='tests' will ignore tests/foo.py" " as well as a/b/tests/c/foo.py. Can add multiple times. Overrides includes.", ) parser.add_argument( "--include", action="append", default=[], help="Filter files or directories by path. The argument is a" " glob-style pattern such as 'foo.*' that must match the path." " This is an extra filter in addition to other applicable filters." " For example, specifying the language with '-l javascript' might" " preselect files 'src/foo.jsx' and 'lib/bar.js'. Specifying one of" " '--include=src', '--include=*.jsx', or '--include=src/foo.*'" " will restrict the selection to the single file 'src/foo.jsx'." " A choice of multiple '--include' patterns can be specified." " For example, '--include=foo.* --include=bar.*' will select" " both 'src/foo.jsx' and 'lib/bar.js'." " Glob-style patterns follow the syntax supported by python," " which is documented at https://docs.python.org/3/library/glob.html", ) parser.add_argument( "--no-git-ignore", action="store_true", help="Don't skip files ignored by git." " Scanning starts from the root folder specified on the semgrep" " command line." " Normally, if the scanning root is within a git repository, " " only the tracked files and the new files" " would be scanned. Git submodules and git-ignored files would" " normally be skipped." " This option will disable git-aware filtering." " Setting this flag does nothing if the scanning root is not" " in a git repository.", ) parser.add_argument( "--skip-unknown-extensions", action="store_true", help= "Scan only known file extensions, even if unrecognized ones are explicitly targeted.", ) config.add_argument( RCE_RULE_FLAG, action="store_true", help=("WARNING: allow rules to run arbitrary code. ONLY ENABLE IF YOU " "TRUST THE SOURCE OF ALL RULES IN YOUR CONFIGURATION."), ) config.add_argument( "-j", "--jobs", action="store", type=int, default=CPU_COUNT, help=( "Number of subprocesses to use to run checks in parallel. Defaults " "to the number of CPUs on the system."), ) config.add_argument( "--timeout", type=int, default=DEFAULT_TIMEOUT, help= ("Maximum time to spend running a rule on a single file in seconds. If set to 0 will not have time limit. Defaults to {} s." .format(DEFAULT_TIMEOUT)), ) config.add_argument( "--max-memory", type=int, default=0, help= ("Maximum memory to use running a rule on a single file in MB. If set to 0 will not have memory limit. Defaults to 0." ), ) config.add_argument( "--timeout-threshold", type=int, default=0, help= ("Maximum number of rules that can timeout on a file before the file is skipped. If set to 0 will not have limit. Defaults to 0." ), ) config.add_argument( "--severity", action="append", default=[], help= ("Report findings only from rules matching the supplied severity level. By default all applicable rules are run." "Can add multiple times. Each should be one of INFO, WARNING, or ERROR." ), ) # output options output = parser.add_argument_group("output") output.add_argument( "-q", "--quiet", action="store_true", help= ("Do not print any logging messages to stderr. Finding output will still be sent to stdout. Exit code " "provides success status."), ) output.add_argument( "--no-rewrite-rule-ids", action="store_true", help= ("Do not rewrite rule ids when they appear in nested sub-directories " "(by default, rule 'foo' in test/rules.yaml will be renamed " "'test.foo')."), ) output.add_argument( "-o", "--output", help=("Save search results to a file or post to URL. " "Default is to print to stdout."), ) output.add_argument("--json", action="store_true", help="Output results in JSON format.") output.add_argument( "--save-test-output-tar", action="store_true", help= ("Store json output as a tarball that will be uploaded as a Github artifact." ), ) output.add_argument( "--json-stats", action="store_true", help=argparse. SUPPRESS, # this flag is experimental and users should not yet rely on the output being stable # help="Include statistical information about performance in JSON output (experimental).", ) output.add_argument( "--debugging-json", action="store_true", help="Output JSON with extra debugging information (experimental).", ) output.add_argument("--junit-xml", action="store_true", help="Output results in JUnit XML format.") output.add_argument("--sarif", action="store_true", help="Output results in SARIF format.") output.add_argument( "--emacs", action="store_true", help="Output results in Emacs single-line format.", ) output.add_argument("--test", action="store_true", help="Run test suite.") parser.add_argument( "--test-ignore-todo", action="store_true", help="Ignore rules marked as '#todoruleid:' in test files.", ) output.add_argument( "--dump-ast", action="store_true", help=("Show AST of the input file or passed expression and then exit " "(can use --json)."), ) output.add_argument("--synthesize-patterns", help=argparse.SUPPRESS) output.add_argument( "--error", action="store_true", help="Exit 1 if there are findings. Useful for CI and scripts.", ) output.add_argument( "-a", "--autofix", action="store_true", help= ("Apply the autofix patches. WARNING: data loss can occur with this " "flag. Make sure your files are stored in a version control system."), ) output.add_argument( "--dryrun", action="store_true", help=("Do autofixes, but don't write them to a file. " "This will print the changes to the console. " "This lets you see the changes before you commit to them. " "Only works with the --autofix flag. Otherwise does nothing."), ) output.add_argument( "--disable-nosem", action="store_true", help=( "Disable the effect of 'nosem'. This will report findings on lines " "containing a 'nosem' comment at the end."), ) output.add_argument( MAX_LINES_FLAG_NAME, type=int, default=DEFAULT_MAX_LINES_PER_FINDING, help= ("Maximum number of lines of code that will be shown for each match before trimming (set to 0 for unlimited)." ), ) # logging options logging_ = parser.add_argument_group("logging") logging_.add_argument( "-v", "--verbose", action="store_true", help= ("Show more details about what rules are running, which files failed to parse, etc." ), ) output.add_argument( "--debug", action="store_true", help="Set the logging level to DEBUG", ) parser.add_argument("--version", action="store_true", help="Show the version and exit.") parser.add_argument( "--force-color", action="store_true", help= "Always include ANSI color in the output, even if not writing to a TTY", ) parser.add_argument( "--disable-version-check", action="store_true", help="Disable checking for latest version.", ) ### Parse and validate args = parser.parse_args() if args.version: print(__VERSION__) return if args.pattern and not args.lang: parser.error("-e/--pattern and -l/--lang must both be specified") if args.dump_ast and not args.lang: parser.error("--dump-ast and -l/--lang must both be specified") # set the flags semgrep.util.set_flags(args.debug, args.quiet, args.force_color) # change cwd if using docker try: semgrep.config_resolver.adjust_for_docker() except SemgrepError as e: logger.exception(str(e)) raise e output_format = OutputFormat.TEXT if args.json: output_format = OutputFormat.JSON elif args.debugging_json: output_format = OutputFormat.JSON_DEBUG elif args.junit_xml: output_format = OutputFormat.JUNIT_XML elif args.sarif: output_format = OutputFormat.SARIF elif args.emacs: output_format = OutputFormat.EMACS output_settings = OutputSettings( output_format=output_format, output_destination=args.output, error_on_findings=args.error, strict=args.strict, verbose_errors=args.verbose, timeout_threshold=args.timeout_threshold, json_stats=args.json_stats, output_per_finding_max_lines_limit=args.max_lines_per_finding, ) if not args.disable_version_check: if not is_running_latest(): logger.warning( "A new version of Semgrep is available. Please see https://github.com/returntocorp/semgrep#upgrading for more information." ) if args.test: # the test code (which isn't a "test" per se but is actually machinery to evaluate semgrep performance) # uses managed_output internally semgrep.test.test_main(args) # The 'optional_stdin_target' context manager must remain before # 'managed_output'. Output depends on file contents so we cannot have # already deleted the temporary stdin file. with optional_stdin_target(args.target) as target, managed_output( output_settings) as output_handler: if args.dump_ast: dump_parsed_ast(args.json, args.lang, args.pattern, target) elif args.synthesize_patterns: synthesize_patterns(args.lang, args.synthesize_patterns, target) elif args.validate: configs, config_errors = semgrep.semgrep_main.get_config( args.pattern, args.lang, args.config) valid_str = "invalid" if config_errors else "valid" rule_count = len(configs.get_rules(True)) logger.info( f"Configuration is {valid_str} - found {len(configs.valid)} valid configuration(s), {len(config_errors)} configuration error(s), and {rule_count} rule(s)." ) if config_errors: for error in config_errors: output_handler.handle_semgrep_error(error) raise SemgrepError( "Please fix the above errors and try again.") elif args.generate_config: semgrep.config_resolver.generate_config(args.generate_config, args.lang, args.pattern) else: semgrep.semgrep_main.main( output_handler=output_handler, target=target, pattern=args.pattern, lang=args.lang, configs=args.config, no_rewrite_rule_ids=args.no_rewrite_rule_ids, jobs=args.jobs, include=args.include, exclude=args.exclude, strict=args.strict, autofix=args.autofix, dryrun=args.dryrun, disable_nosem=args.disable_nosem, dangerously_allow_arbitrary_code_execution_from_rules=args. dangerously_allow_arbitrary_code_execution_from_rules, no_git_ignore=args.no_git_ignore, timeout=args.timeout, max_memory=args.max_memory, timeout_threshold=args.timeout_threshold, skip_unknown_extensions=args.skip_unknown_extensions, severity=args.severity, )
def cli( autofix: bool, config: Optional[Tuple[str, ...]], dangerously_allow_arbitrary_code_execution_from_rules: bool, debug: bool, debugging_json: bool, dryrun: bool, dump_ast: bool, emacs: bool, enable_metrics: bool, enable_nosem: bool, enable_version_check: bool, error_on_findings: bool, exclude: Optional[Tuple[str, ...]], force_color: bool, generate_config: bool, include: Optional[Tuple[str, ...]], jobs: int, json: bool, json_stats: bool, json_time: bool, junit_xml: bool, lang: Optional[str], max_chars_per_line: int, max_lines_per_finding: int, max_memory: int, max_target_bytes: int, optimizations: str, output: Optional[str], pattern: Optional[str], quiet: bool, replacement: Optional[str], rewrite_rule_ids: bool, sarif: bool, save_test_output_tar: bool, scan_unknown_extensions: bool, severity: Optional[Tuple[str, ...]], strict: bool, synthesize_patterns: str, target: Tuple[str, ...], test: bool, test_ignore_todo: bool, time: bool, timeout: int, timeout_threshold: int, use_git_ignore: bool, validate: bool, verbose: bool, version: bool, vim: bool, ) -> None: """ Semgrep CLI. Searches TARGET paths for matches to rules or patterns. Defaults to searching entire current working directory. For more information about Semgrep, go to https://semgrep.dev. """ if version: print(__VERSION__) if enable_version_check: from semgrep.version import version_check version_check() return # To keep version runtime fast, we defer non-version imports until here import semgrep.semgrep_main import semgrep.test import semgrep.config_resolver from semgrep.constants import OutputFormat from semgrep.constants import DEFAULT_CONFIG_FILE from semgrep.dump_ast import dump_parsed_ast from semgrep.error import SemgrepError from semgrep.metric_manager import metric_manager from semgrep.output import managed_output from semgrep.output import OutputSettings from semgrep.synthesize_patterns import synthesize from semgrep.target_manager import optional_stdin_target target_sequence: Sequence[str] = list(target) if target else [os.curdir] if enable_metrics: metric_manager.enable() else: metric_manager.disable() if include and exclude: logger.warning( with_color( "yellow", "Paths that match both --include and --exclude will be skipped by Semgrep.", )) if pattern is not None and lang is None: abort("-e/--pattern and -l/--lang must both be specified") if dangerously_allow_arbitrary_code_execution_from_rules: logger.warning( "The '--dangerously-allow-arbitrary-code-execution-from-rules' flag is now deprecated and does nothing. It will be removed in the future." ) output_time = time or json_time # set the flags semgrep.util.set_flags(verbose=verbose, debug=debug, quiet=quiet, force_color=force_color) # change cwd if using docker try: semgrep.config_resolver.adjust_for_docker() except SemgrepError as e: logger.exception(str(e)) raise e output_format = OutputFormat.TEXT if json or json_time or debugging_json: output_format = OutputFormat.JSON elif junit_xml: output_format = OutputFormat.JUNIT_XML elif sarif: output_format = OutputFormat.SARIF elif emacs: output_format = OutputFormat.EMACS elif vim: output_format = OutputFormat.VIM output_settings = OutputSettings( output_format=output_format, output_destination=output, error_on_findings=error_on_findings, strict=strict, debug=debugging_json, verbose_errors=verbose, timeout_threshold=timeout_threshold, json_stats=json_stats, output_time=output_time, output_per_finding_max_lines_limit=max_lines_per_finding, output_per_line_max_chars_limit=max_chars_per_line, ) if test: # the test code (which isn't a "test" per se but is actually machinery to evaluate semgrep performance) # uses managed_output internally semgrep.test.test_main( target=target_sequence, config=config, test_ignore_todo=test_ignore_todo, strict=strict, json=json, save_test_output_tar=save_test_output_tar, optimizations=optimizations, ) # The 'optional_stdin_target' context manager must remain before # 'managed_output'. Output depends on file contents so we cannot have # already deleted the temporary stdin file. with optional_stdin_target( target_sequence) as target_sequence, managed_output( output_settings) as output_handler: if dump_ast: dump_parsed_ast(json, __validate_lang("--dump_ast", lang), pattern, target_sequence) elif synthesize_patterns: synthesize( __validate_lang("--synthesize-patterns", lang), synthesize_patterns, target_sequence, ) elif validate: configs, config_errors = semgrep.config_resolver.get_config( pattern, lang, config or []) valid_str = "invalid" if config_errors else "valid" rule_count = len(configs.get_rules(True)) logger.info( f"Configuration is {valid_str} - found {len(configs.valid)} valid configuration(s), {len(config_errors)} configuration error(s), and {rule_count} rule(s)." ) if config_errors: for err in config_errors: output_handler.handle_semgrep_error(err) raise SemgrepError( "Please fix the above errors and try again.") elif generate_config: with open(DEFAULT_CONFIG_FILE, "w") as fd: semgrep.config_resolver.generate_config(fd, lang, pattern) else: semgrep.semgrep_main.main( output_handler=output_handler, target=target_sequence, pattern=pattern, lang=lang, configs=(config or []), no_rewrite_rule_ids=(not rewrite_rule_ids), jobs=jobs, include=include, exclude=exclude, max_target_bytes=max_target_bytes, replacement=replacement, strict=strict, autofix=autofix, dryrun=dryrun, disable_nosem=(not enable_nosem), no_git_ignore=(not use_git_ignore), timeout=timeout, max_memory=max_memory, timeout_threshold=timeout_threshold, skip_unknown_extensions=(not scan_unknown_extensions), severity=severity, optimizations=optimizations, ) if enable_version_check: from semgrep.version import version_check version_check()