Пример #1
0
def invoke_semgrep(
    config: Path,
    targets: List[Path],
    output_settings: Optional[OutputSettings] = None,
    **kwargs: Any,
) -> Union[Dict[str, Any], str]:
    """
    Return Semgrep results of 'config' on 'targets' as a dict|str

    Uses default arguments of 'semgrep_main.main' unless overwritten with 'kwargs'
    """
    if output_settings is None:
        output_settings = OutputSettings(output_format=OutputFormat.JSON)

    io_capture = StringIO()
    output_handler = OutputHandler(output_settings, stdout=io_capture)
    main(
        output_handler=output_handler,
        target=[str(t) for t in targets],
        pattern="",
        lang="",
        configs=[str(config)],
        **kwargs,
    )
    output_handler.close()

    result: Union[Dict[str, Any], str] = (json.loads(
        io_capture.getvalue()) if output_settings.output_format.is_json() else
                                          io_capture.getvalue())

    return result
Пример #2
0
def invoke_semgrep(config: Path, targets: List[Path], **kwargs: Any) -> Any:
    """
    Call semgrep with config on targets and return result as a json object

    Uses default arguments of MAIN unless overwritten with a kwarg
    """
    io_capture = StringIO()
    output_handler = OutputHandler(
        OutputSettings(
            output_format=OutputFormat.JSON,
            output_destination=None,
            error_on_findings=False,
            verbose_errors=False,
            strict=False,
            json_stats=False,
            output_per_finding_max_lines_limit=None,
        ),
        stdout=io_capture,
    )
    main(
        output_handler=output_handler,
        target=[str(t) for t in targets],
        pattern="",
        lang="",
        configs=[str(config)],
        **kwargs,
    )
    output_handler.close()
    return json.loads(io_capture.getvalue())
Пример #3
0
def test_ignore_git_dir(tmp_path, monkeypatch):
    """
    Ignores all files in .git directory when scanning generic
    """
    foo = tmp_path / ".git"
    foo.mkdir()
    (foo / "bar").touch()

    monkeypatch.chdir(tmp_path)
    language = Language("generic")
    output_settings = OutputSettings(
        output_format=OutputFormat.TEXT,
        output_destination=None,
        error_on_findings=False,
        verbose_errors=False,
        strict=False,
        json_stats=False,
        output_time=False,
        output_per_finding_max_lines_limit=None,
        output_per_line_max_chars_limit=None,
    )
    defaulthandler = OutputHandler(output_settings)
    assert [] == TargetManager([], [], 0, [foo], True, defaulthandler, False).get_files(
        language, [], []
    )
Пример #4
0
def invoke_semgrep(paths, scan_rules, **kwargs):
    """Call Semgrep."""
    if platform.system() == 'Windows':
        return None
    from semgrep import semgrep_main, util
    from semgrep.constants import OutputFormat
    from semgrep.output import OutputHandler, OutputSettings
    try:
        cpu_count = multiprocessing.cpu_count()
    except NotImplementedError:
        cpu_count = 1  # CPU count is not implemented on Windows
    util.set_flags(False, True, False)  # Verbose, Quiet, Force_color

    io_capture = StringIO()
    output_handler = OutputHandler(
        OutputSettings(
            output_format=OutputFormat.JSON,
            output_destination=None,
            error_on_findings=False,
            strict=False,
        ),
        stdout=io_capture,
    )
    semgrep_main.main(
        output_handler=output_handler,
        target=[pt.as_posix() for pt in paths],
        jobs=cpu_count,
        pattern=None,
        lang=None,
        config=scan_rules,
        **kwargs,
    )
    output_handler.close()
    return json.loads(io_capture.getvalue())
Пример #5
0
def invoke_semgrep(paths, scan_rules, **kwargs):
    """Call Semgrep."""
    if platform.system() == 'Windows':
        return None
    from semgrep import semgrep_main
    from semgrep.state import get_state
    from semgrep.constants import OutputFormat
    from semgrep.output import OutputHandler, OutputSettings
    try:
        cpu_count = multiprocessing.cpu_count()
    except NotImplementedError:
        cpu_count = 1  # CPU count is not implemented on Windows
    # Semgrep output formatting
    state = get_state()
    state.terminal.configure(
        verbose=False,
        debug=False,
        quiet=True,
        force_color=False,
    )
    logging.getLogger('semgrep').propagate = False
    output_settings = OutputSettings(
        output_format=OutputFormat.JSON,
        output_destination=None,
        output_per_finding_max_lines_limit=None,
        output_per_line_max_chars_limit=None,
        error_on_findings=False,
        verbose_errors=False,
        strict=False,
        timeout_threshold=3,
    )
    output_handler = OutputHandler(output_settings)
    (
        filtered_matches_by_rule,
        _,
        _,
        _,
        _,
        _,
        _,
        _,
    ) = semgrep_main.main(
        output_handler=output_handler,
        target=[pt.as_posix() for pt in paths],
        jobs=cpu_count,
        pattern=None,
        lang=None,
        configs=[scan_rules],
        timeout=5,
        timeout_threshold=3,
        **kwargs,
    )
    output_handler.rule_matches = [
        m for ms in filtered_matches_by_rule.values() for m in ms
    ]
    return json.loads(output_handler._build_output())
Пример #6
0
def test_explicit_path(tmp_path, monkeypatch):
    foo = tmp_path / "foo"
    foo.mkdir()
    (foo / "a.go").touch()
    (foo / "b.go").touch()
    foo_noext = foo / "noext"
    foo_noext.touch()
    foo_a = foo / "a.py"
    foo_a.touch()
    foo_b = foo / "b.py"
    foo_b.touch()

    monkeypatch.chdir(tmp_path)

    # Should include explicitly passed python file
    foo_a = foo_a.relative_to(tmp_path)
    output_settings = OutputSettings(
        output_format=OutputFormat.TEXT,
        output_destination=None,
        error_on_findings=False,
        strict=False,
    )
    defaulthandler = OutputHandler(output_settings)

    python_language = Language("python")

    assert foo_a in TargetManager([], [], ["foo/a.py"], False, defaulthandler,
                                  False).get_files(python_language, [], [])

    # Should include explicitly passed python file even if is in excludes
    assert foo_a not in TargetManager([], ["foo/a.py"], ["."], False,
                                      defaulthandler,
                                      False).get_files(python_language, [], [])
    assert foo_a in TargetManager([], ["foo/a.py"], [".", "foo/a.py"], False,
                                  defaulthandler,
                                  False).get_files(python_language, [], [])

    # Should ignore expliclty passed .go file when requesting python
    assert (TargetManager([], [], ["foo/a.go"], False, defaulthandler,
                          False).get_files(python_language, [], []) == [])

    # Should include explicitly passed file with unknown extension if skip_unknown_extensions=False
    assert cmp_path_sets(
        set(
            TargetManager([], [], ["foo/noext"], False, defaulthandler,
                          False).get_files(python_language, [], [])),
        {foo_noext},
    )

    # Should not include explicitly passed file with unknown extension if skip_unknown_extensions=True
    assert cmp_path_sets(
        set(
            TargetManager([], [], ["foo/noext"], False, defaulthandler,
                          True).get_files(python_language, [], [])),
        set(),
    )
Пример #7
0
    def Scan(self, filepath):
        self.io_capture = StringIO()

        self.output = OutputHandler(self.setting, stdout=self.io_capture)

        semgrep_main.main(
            output_handler=self.output,
            target=[filepath],
            jobs=1,
            pattern=None,
            lang=None,
            configs=[self.ruleset],
            timeout=5,
            timeout_threshold=3,
        )
        self.output.close()
        return self.format(filepath)