Exemplo n.º 1
0
def test_cache_catches_last_found_secrets(client, isolated_fs):
    """
    GIVEN an empty cache and an empty config matches-ignore section
    WHEN I run a scan with multiple secrets
    THEN cache last_found_secrets is updated with these secrets and saved
    """
    c = Commit()
    c._patch = _MULTIPLE_SECRETS
    config = Config()
    setattr(config, "matches_ignore", [])
    cache = Cache()
    cache.purge()
    assert cache.last_found_secrets == list()

    with my_vcr.use_cassette("multiple_secrets"):
        c.scan(
            client=client,
            cache=cache,
            matches_ignore=config.matches_ignore,
            all_policies=True,
            verbose=False,
        )
    assert config.matches_ignore == list()

    cache_found_secrets = sorted(cache.last_found_secrets, key=compare_matches_ignore)
    found_secrets = sorted(FOUND_SECRETS, key=compare_matches_ignore)

    assert [found_secret["match"] for found_secret in cache_found_secrets] == [
        found_secret["match"] for found_secret in found_secrets
    ]
    ignore_last_found(config, cache)
    for ignore in config.matches_ignore:
        assert "test.txt" in ignore["name"]
    cache.load_cache()
Exemplo n.º 2
0
    def scan(
        self,
        client: GGClient,
        cache: Cache,
        matches_ignore: Iterable[IgnoredMatch],
        all_policies: bool,
        verbose: bool,
        banlisted_detectors: Optional[Set[str]] = None,
        on_file_chunk_scanned: Callable[
            [List[Dict[str, Any]]], None
        ] = lambda chunk: None,
    ) -> List[Result]:
        cache.purge()
        scannable_list = self.scannable_list
        results = []
        chunks = []
        for i in range(0, len(scannable_list), MULTI_DOCUMENT_LIMIT):
            chunks.append(scannable_list[i : i + MULTI_DOCUMENT_LIMIT])

        with concurrent.futures.ThreadPoolExecutor(
            max_workers=min(CPU_COUNT, 4), thread_name_prefix="content_scan"
        ) as executor:
            future_to_scan = {
                executor.submit(
                    client.multi_content_scan,
                    chunk,
                    self.extra_headers,
                ): chunk
                for chunk in chunks
            }

            for future in concurrent.futures.as_completed(future_to_scan):
                chunk = future_to_scan[future]
                on_file_chunk_scanned(chunk)

                scan = future.result()
                if not scan.success:
                    handle_scan_error(scan, chunk)
                    continue
                for index, scanned in enumerate(scan.scan_results):
                    remove_ignored_from_result(scanned, all_policies, matches_ignore)
                    remove_results_from_banlisted_detectors(
                        scanned, banlisted_detectors
                    )
                    if scanned.has_policy_breaks:
                        for policy_break in scanned.policy_breaks:
                            cache.add_found_policy_break(
                                policy_break, chunk[index]["filename"]
                            )
                        results.append(
                            Result(
                                content=chunk[index]["document"],
                                scan=scanned,
                                filemode=chunk[index]["filemode"],
                                filename=chunk[index]["filename"],
                            )
                        )
        cache.save()
        return results
Exemplo n.º 3
0
    def test_load_cache_and_purge(self, cli_fs_runner):
        with open(".cache_ggshield", "w") as file:
            json.dump({"last_found_secrets": [{
                "name": "",
                "match": "XXX"
            }]}, file)
        cache = Cache()
        assert cache.last_found_secrets == [{"name": "", "match": "XXX"}]

        cache.purge()
        assert cache.last_found_secrets == []
Exemplo n.º 4
0
def cache() -> Cache:
    c = Cache()
    c.purge()
    return c