Пример #1
0
    def __init__(self, raw: YamlTree[YamlMap]) -> None:
        self._yaml = raw
        self._raw: Dict[str, Any] = raw.unroll_dict()

        # For tracking errors from semgrep-core
        self._pattern_spans: Dict[PatternId, Span] = {}

        paths_tree: Optional[YamlTree] = self._yaml.value.get("paths")
        if paths_tree is None:
            path_dict = {}
        else:
            paths, paths_span = paths_tree.value, paths_tree.span
            if not isinstance(paths, YamlMap):
                path_key = self._yaml.value.key_tree("paths").span
                help_str: Optional[str] = None
                if isinstance(paths, list):
                    help_str = "remove the `-` to convert the list into a mapping"
                raise InvalidRuleSchemaError(
                    short_msg="invalid paths",
                    long_msg=f"the `paths:` targeting rules must be an object with at least one of {ALLOWED_GLOB_TYPES}",
                    spans=[path_key.extend_to(paths_span)],
                    help=help_str,
                )
            path_dict = paths_tree.unroll_dict()
        self._includes = path_dict.get("include", [])
        self._excludes = path_dict.get("exclude", [])
        self._languages = [Language(l) for l in self._raw["languages"]]

        # check taint/search mode
        self._expression, self._mode = self._build_search_patterns_for_mode(self._yaml)
Пример #2
0
def test_ignore_git_dir(tmp_path, monkeypatch):
    """
    Ignores all files in .git directory when scanning generic
    """
    foo = tmp_path / ".git"
    foo.mkdir()
    (foo / "bar").touch()

    monkeypatch.chdir(tmp_path)
    language = Language("generic")
    output_settings = OutputSettings(
        output_format=OutputFormat.TEXT,
        output_destination=None,
        error_on_findings=False,
        verbose_errors=False,
        strict=False,
        json_stats=False,
        output_time=False,
        output_per_finding_max_lines_limit=None,
        output_per_line_max_chars_limit=None,
    )
    defaulthandler = OutputHandler(output_settings)
    assert [] == TargetManager([], [], 0, [foo], True, defaulthandler, False).get_files(
        language, [], []
    )
Пример #3
0
def test_explicit_path(tmp_path, monkeypatch):
    foo = tmp_path / "foo"
    foo.mkdir()
    (foo / "a.go").touch()
    (foo / "b.go").touch()
    foo_noext = foo / "noext"
    foo_noext.touch()
    foo_a = foo / "a.py"
    foo_a.touch()
    foo_b = foo / "b.py"
    foo_b.touch()

    monkeypatch.chdir(tmp_path)

    # Should include explicitly passed python file
    foo_a = foo_a.relative_to(tmp_path)
    output_settings = OutputSettings(
        output_format=OutputFormat.TEXT,
        output_destination=None,
        error_on_findings=False,
        strict=False,
    )
    defaulthandler = OutputHandler(output_settings)

    python_language = Language("python")

    assert foo_a in TargetManager([], [], ["foo/a.py"], False, defaulthandler,
                                  False).get_files(python_language, [], [])

    # Should include explicitly passed python file even if is in excludes
    assert foo_a not in TargetManager([], ["foo/a.py"], ["."], False,
                                      defaulthandler,
                                      False).get_files(python_language, [], [])
    assert foo_a in TargetManager([], ["foo/a.py"], [".", "foo/a.py"], False,
                                  defaulthandler,
                                  False).get_files(python_language, [], [])

    # Should ignore expliclty passed .go file when requesting python
    assert (TargetManager([], [], ["foo/a.go"], False, defaulthandler,
                          False).get_files(python_language, [], []) == [])

    # Should include explicitly passed file with unknown extension if skip_unknown_extensions=False
    assert cmp_path_sets(
        set(
            TargetManager([], [], ["foo/noext"], False, defaulthandler,
                          False).get_files(python_language, [], [])),
        {foo_noext},
    )

    # Should not include explicitly passed file with unknown extension if skip_unknown_extensions=True
    assert cmp_path_sets(
        set(
            TargetManager([], [], ["foo/noext"], False, defaulthandler,
                          True).get_files(python_language, [], [])),
        set(),
    )
Пример #4
0
def test_skip_symlink(tmp_path, monkeypatch):
    foo = tmp_path / "foo"
    foo.mkdir()
    (foo / "a.py").touch()
    (foo / "link.py").symlink_to(foo / "a.py")

    monkeypatch.chdir(tmp_path)

    python_language = Language("python")

    assert cmp_path_sets(
        TargetManager.expand_targets([foo], python_language, False),
        {foo / "a.py"},
    )

    assert cmp_path_sets(
        TargetManager.expand_targets([foo / "link.py"], python_language,
                                     False), set())
Пример #5
0
    def __init__(self, raw: YamlTree[YamlMap]) -> None:
        self._yaml = raw
        self._raw: Dict[str, Any] = raw.unroll_dict()

        # For tracking errors from semgrep-core
        self._pattern_spans: Dict[PatternId, Span] = {}

        paths_tree: Optional[YamlTree] = self._yaml.value.get("paths")
        if paths_tree is None:
            path_dict = {}
        else:
            paths, paths_span = paths_tree.value, paths_tree.span
            if not isinstance(paths, YamlMap):
                path_key = self._yaml.value.key_tree("paths").span
                help_str: Optional[str] = None
                if isinstance(paths, list):
                    help_str = "remove the `-` to convert the list into a mapping"
                raise InvalidRuleSchemaError(
                    short_msg="invalid paths",
                    long_msg=
                    f"the `paths:` targeting rules must be an object with at least one of {ALLOWED_GLOB_TYPES}",
                    spans=[path_key.extend_to(paths_span)],
                    help=help_str,
                )
            for key, value in paths.items():
                if key.value not in ALLOWED_GLOB_TYPES:
                    raise InvalidRuleSchemaError(
                        short_msg="invalid targeting rules",
                        long_msg=
                        f"the `paths:` targeting rules must each be one of {ALLOWED_GLOB_TYPES}",
                        spans=[key.span.with_context(before=1, after=1)],
                    )
                if not isinstance(value.value, list):
                    raise InvalidRuleSchemaError(
                        short_msg="invalid target value",
                        long_msg=
                        f"the `paths:` targeting rule values must be lists",
                        spans=[value.span],
                    )
            path_dict = paths_tree.unroll_dict()
        self._includes = path_dict.get("include", [])
        self._excludes = path_dict.get("exclude", [])
        self._languages = [Language(l) for l in self._raw["languages"]]
        self._expression = self._build_boolean_expression(self._yaml)
Пример #6
0
def test_delete_git(tmp_path, monkeypatch):
    """
    Check that deleted files are not included in expanded targets
    """
    foo = tmp_path / "foo.py"
    bar = tmp_path / "bar.py"
    foo.touch()
    bar.touch()

    monkeypatch.chdir(tmp_path)
    subprocess.run(["git", "init"])
    subprocess.run(["git", "add", foo])
    subprocess.run(["git", "commit", "-m", "first commit"])

    foo.unlink()
    subprocess.run(["git", "status"])

    assert cmp_path_sets(
        TargetManager.expand_targets([Path(".")], Language("python"), True),
        {bar})
Пример #7
0
def test_expand_targets_not_git(tmp_path, monkeypatch):
    """
    Check that directory expansion works with relative paths, absolute paths, paths with ..
    """
    foo = tmp_path / "foo"
    foo.mkdir()
    (foo / "a.go").touch()
    (foo / "b.go").touch()
    (foo / "py").touch()
    foo_a = foo / "a.py"
    foo_a.touch()
    foo_b = foo / "b.py"
    foo_b.touch()

    bar = tmp_path / "bar"
    bar.mkdir()
    bar_a = bar / "a.py"
    bar_a.touch()
    bar_b = bar / "b.py"
    bar_b.touch()

    foo_bar = foo / "bar"
    foo_bar.mkdir()
    foo_bar_a = foo_bar / "a.py"
    foo_bar_a.touch()
    foo_bar_b = foo_bar / "b.py"
    foo_bar_b.touch()

    in_foo_bar = {foo_bar_a, foo_bar_b}
    in_foo = {foo_a, foo_b}.union(in_foo_bar)
    in_bar = {bar_a, bar_b}
    in_all = in_foo.union(in_bar)

    python_language = Language("python")

    monkeypatch.chdir(tmp_path)
    assert cmp_path_sets(
        TargetManager.expand_targets([Path(".")], python_language, False),
        in_all)
    assert cmp_path_sets(
        TargetManager.expand_targets([Path("bar")], python_language, False),
        in_bar)
    assert cmp_path_sets(
        TargetManager.expand_targets([Path("foo")], python_language, False),
        in_foo)
    assert cmp_path_sets(
        TargetManager.expand_targets([Path("foo").resolve()], python_language,
                                     False),
        in_foo,
    )
    assert cmp_path_sets(
        TargetManager.expand_targets([Path("foo/bar")], python_language,
                                     False),
        in_foo_bar,
    )
    assert cmp_path_sets(
        TargetManager.expand_targets([Path("foo/bar").resolve()],
                                     python_language, False),
        in_foo_bar,
    )

    monkeypatch.chdir(foo)
    assert cmp_path_sets(
        TargetManager.expand_targets([Path(".")], python_language, False),
        in_foo)
    assert cmp_path_sets(
        TargetManager.expand_targets([Path("./foo")], python_language, False),
        set())
    assert cmp_path_sets(
        TargetManager.expand_targets([Path("bar")], python_language, False),
        in_foo_bar)
    assert cmp_path_sets(
        TargetManager.expand_targets([Path("bar")], python_language, False),
        in_foo_bar)
    assert cmp_path_sets(
        TargetManager.expand_targets([Path("..")], python_language, False),
        in_all)
    assert cmp_path_sets(
        TargetManager.expand_targets([Path("../bar")], python_language, False),
        in_bar)
    assert cmp_path_sets(
        TargetManager.expand_targets([Path("../foo/bar")], python_language,
                                     False),
        in_foo_bar,
    )
Пример #8
0
def test_expand_targets_git(tmp_path, monkeypatch):
    """
    Test TargetManager with visible_to_git_only flag on in a git repository
    with nested .gitignores
    """
    foo = tmp_path / "foo"
    foo.mkdir()
    foo_a_go = foo / "a.go"
    foo_a_go.touch()
    (foo / "b.go").touch()
    (foo / "py").touch()
    foo_a = foo / "a.py"
    foo_a.touch()
    foo_b = foo / "b.py"
    foo_b.touch()

    bar = tmp_path / "bar"
    bar.mkdir()
    bar_a = bar / "a.py"
    bar_a.touch()
    bar_b = bar / "b.py"
    bar_b.touch()

    foo_bar = foo / "bar"
    foo_bar.mkdir()
    foo_bar_a = foo_bar / "a.py"
    foo_bar_a.touch()
    foo_bar_b = foo_bar / "b.py"
    foo_bar_b.touch()

    monkeypatch.chdir(tmp_path)
    subprocess.run(["git", "init"])
    subprocess.run(["git", "add", foo_a])
    subprocess.run(["git", "add", foo_bar_a])
    subprocess.run(["git", "add", foo_bar_b])
    subprocess.run(["git", "add", foo_a_go])
    subprocess.run(["git", "commit", "-m", "first"])

    # Check that all files are visible without a .gitignore
    in_foo_bar = {foo_bar_a, foo_bar_b}
    in_foo = {foo_a, foo_b}.union(in_foo_bar)
    in_bar = {bar_a, bar_b}
    in_all = in_foo.union(in_bar)

    python_language = Language("python")

    monkeypatch.chdir(tmp_path)
    assert cmp_path_sets(
        TargetManager.expand_targets([Path(".")], python_language, True),
        in_all)
    assert cmp_path_sets(
        TargetManager.expand_targets([Path("bar")], python_language, True),
        in_bar)
    assert cmp_path_sets(
        TargetManager.expand_targets([Path("foo")], python_language, True),
        in_foo)
    assert cmp_path_sets(
        TargetManager.expand_targets([Path("foo").resolve()], python_language,
                                     True),
        in_foo,
    )
    assert cmp_path_sets(
        TargetManager.expand_targets([Path("foo/bar")], python_language, True),
        in_foo_bar,
    )
    assert cmp_path_sets(
        TargetManager.expand_targets([Path("foo/bar").resolve()],
                                     python_language, True),
        in_foo_bar,
    )
    monkeypatch.chdir(foo)
    assert cmp_path_sets(
        TargetManager.expand_targets([Path(".")], python_language, True),
        in_foo)
    assert cmp_path_sets(
        TargetManager.expand_targets([Path("./foo")], python_language, True),
        set())
    assert cmp_path_sets(
        TargetManager.expand_targets([Path("bar")], python_language, True),
        in_foo_bar)
    assert cmp_path_sets(
        TargetManager.expand_targets([Path("bar")], python_language, True),
        in_foo_bar)
    assert cmp_path_sets(
        TargetManager.expand_targets([Path("..")], python_language, True),
        in_all)
    assert cmp_path_sets(
        TargetManager.expand_targets([Path("../bar")], python_language, True),
        in_bar)
    assert cmp_path_sets(
        TargetManager.expand_targets([Path("../foo/bar")], python_language,
                                     True),
        in_foo_bar,
    )

    # Add bar/, foo/bar/a.py, foo/b.py to gitignores
    monkeypatch.chdir(tmp_path)
    (tmp_path / ".gitignore").write_text("bar/\nfoo/bar/a.py")
    (tmp_path / "foo" / ".gitignore").write_text("b.py")

    # Reflect what should now be visible given gitignores
    in_foo_bar = {
        foo_bar_a,
        foo_bar_b,
    }  # foo/bar/a.py is gitignored but is already tracked
    in_foo = {foo_a}.union(
        in_foo_bar)  # foo/b.py is gitignored with a nested gitignore
    in_bar = set()  # bar/ is gitignored
    in_all = in_foo.union(in_bar)

    monkeypatch.chdir(tmp_path)
    assert cmp_path_sets(
        TargetManager.expand_targets([Path(".")], python_language, True),
        in_all)
    assert cmp_path_sets(
        TargetManager.expand_targets([Path("bar")], python_language, True),
        in_bar)
    assert cmp_path_sets(
        TargetManager.expand_targets([Path("foo")], python_language, True),
        in_foo)
    assert cmp_path_sets(
        TargetManager.expand_targets([Path("foo").resolve()], python_language,
                                     True),
        in_foo,
    )
    assert cmp_path_sets(
        TargetManager.expand_targets([Path("foo/bar")], python_language, True),
        in_foo_bar,
    )
    assert cmp_path_sets(
        TargetManager.expand_targets([Path("foo/bar").resolve()],
                                     python_language, True),
        in_foo_bar,
    )
    monkeypatch.chdir(foo)
    assert cmp_path_sets(
        TargetManager.expand_targets([Path(".")], python_language, True),
        in_foo)
    assert cmp_path_sets(
        TargetManager.expand_targets([Path("./foo")], python_language, True),
        set())
    assert cmp_path_sets(
        TargetManager.expand_targets([Path("bar")], python_language, True),
        in_foo_bar)
    assert cmp_path_sets(
        TargetManager.expand_targets([Path("bar")], python_language, True),
        in_foo_bar)
    assert cmp_path_sets(
        TargetManager.expand_targets([Path("..")], python_language, True),
        in_all)
    assert cmp_path_sets(
        TargetManager.expand_targets([Path("../bar")], python_language, True),
        in_bar)
    assert cmp_path_sets(
        TargetManager.expand_targets([Path("../foo/bar")], python_language,
                                     True),
        in_foo_bar,
    )
# This is used to determine the set of files with known extensions,
# i.e. those for which we have a proper parser.
ALL_EXTENSIONS = (PYTHON_EXTENSIONS + JAVASCRIPT_EXTENSIONS +
                  TYPESCRIPT_EXTENSIONS + JAVA_EXTENSIONS + C_EXTENSIONS +
                  GO_EXTENSIONS + RUBY_EXTENSIONS + ML_EXTENSIONS +
                  JSON_EXTENSIONS + RUST_EXTENSIONS + KOTLIN_EXTENSIONS +
                  YAML_EXTENSIONS)

# This is used to select the files suitable for spacegrep, which is
# all of them. It is spacegrep itself that will detect and ignore binary
# files.
GENERIC_EXTENSIONS = [FileExtension("")]

PYTHON_LANGUAGES = [
    Language("python"),
    Language("python2"),
    Language("python3"),
    Language("py"),
]
JAVASCRIPT_LANGUAGES = [Language("javascript"), Language("js")]
TYPESCRIPT_LANGUAGES = [Language("typescript"), Language("ts")]
JAVA_LANGUAGES = [Language("java")]
C_LANGUAGES = [Language("c")]
GO_LANGUAGES = [Language("golang"), Language("go")]
RUBY_LANGUAGES = [Language("ruby"), Language("rb")]
PHP_LANGUAGES = [Language("php")]
LUA_LANGUAGES = [Language("lua")]
CSHARP_LANGUAGES = [Language("csharp"), Language("cs"), Language("C#")]
RUST_LANGUAGES = [Language("rust"), Language("Rust"), Language("rs")]
KOTLIN_LANGUAGES = [Language("kotlin"), Language("Kotlin"), Language("kt")]