def prepare_for_scan(user_path, scan_ignore):
    """
    Prepare the files for scanning by collecting their names and contents.
    :param user_path: user path
    :param scan_ignore: list of files to ignore
    :return: list of dictionaries with filenames and their contents for scanning
    """
    API_KEY = os.getenv("GG_API_KEY")
    client = GGClient(api_key=API_KEY)
    if client.health_check(
    ).success:  # check health of the API and the API key used.
        file_list = []
        for name in glob.iglob(user_path, recursive=True):
            if os.path.isdir(name) or os.path.relpath(name, start=user_path)[6:] in scan_ignore \
                    or os.path.basename(name) == 'gg_secret_scanner_results.txt':
                continue
            try:
                with open(name, mode='r', encoding='utf-8') as file:
                    file_list.append({
                        'filename':
                        os.path.relpath(name, start=user_path)[6:],
                        'document':
                        file.read()
                    })
            except Exception:
                # continue if some files could not be open (like images or executables)
                continue
        return file_list
    else:
        print('Invalid API Key or API maintenance.')
Exemplo n.º 2
0
def test_extra_headers(
    request_mock: Mock,
    client: GGClient,
    session_headers: Any,
    extra_headers: Optional[Dict[str, str]],
    expected_headers: Dict[str, str],
):
    """
    GIVEN client's session headers
    WHEN calling any client method with additional headers
    THEN session/method headers should be merged with priority on method headers
    """
    client.session.headers = session_headers

    mock_response = Mock(spec=Response)
    mock_response.headers = {"content-type": "text"}
    mock_response.text = "some error"
    mock_response.status_code = 400
    request_mock.return_value = mock_response

    client.multi_content_scan(
        [{
            "filename": FILENAME,
            "document": DOCUMENT
        }],
        extra_headers=extra_headers,
    )
    assert request_mock.called
    _, kwargs = request_mock.call_args
    assert expected_headers == kwargs["headers"]

    client.content_scan("some_string", extra_headers=extra_headers)
    assert request_mock.called
    _, kwargs = request_mock.call_args
    assert expected_headers == kwargs["headers"]
Exemplo n.º 3
0
def test_client_creation(
    api_key: str,
    uri: str,
    user_agent: str,
    timeout: float,
    exception: Type[Exception],
):
    if exception is not None:
        with pytest.raises(exception):
            client = GGClient(
                api_key=api_key,
                base_uri=uri,
                user_agent=user_agent,
                timeout=timeout,
            )
    else:
        client = GGClient(
            base_uri=uri,
            api_key=api_key,
            user_agent=user_agent,
            timeout=timeout,
        )

    if exception is None:
        if uri:
            assert client.base_uri == uri
        else:
            assert client.base_uri == DEFAULT_BASE_URI
        assert client.api_key == api_key
        assert client.timeout == timeout
        assert user_agent in client.session.headers["User-Agent"]
        assert client.session.headers["Authorization"] == "Token {0}".format(
            api_key)
Exemplo n.º 4
0
def test_versions_from_headers(request_mock: Mock, client: GGClient, method):
    app_version_value = "1.0"
    secrets_engine_version_value = "2.0"

    mock_response = Mock(spec=Response)
    mock_response.headers = {
        "X-App-Version": app_version_value,
        "X-Secrets-Engine-Version": secrets_engine_version_value,
    }
    request_mock.return_value = mock_response

    client.request(method=method, endpoint="endpoint")
    assert request_mock.called

    assert client.app_version is app_version_value
    assert client.secrets_engine_version is secrets_engine_version_value

    mock_response = Mock(spec=Response)
    mock_response.headers = {}
    request_mock.return_value = mock_response

    client.request(method=method, endpoint="endpoint")
    assert request_mock.called

    assert client.app_version is app_version_value
    assert client.secrets_engine_version is secrets_engine_version_value

    other_client = GGClient(api_key="")
    assert other_client.app_version is app_version_value
    assert other_client.secrets_engine_version is secrets_engine_version_value
Exemplo n.º 5
0
def test_multi_content_not_ok():
    req = [{"document": "valid"}]
    client = GGClient(base_uri=base_uri, api_key="invalid")

    obj = client.multi_content_scan(req)

    assert obj.status_code == 401
    assert isinstance(obj, Detail)
    assert obj.detail == "Invalid API key."
Exemplo n.º 6
0
def test_client__url_from_endpoint(base_uries, version, endpoints_and_urls):
    for curr_base_uri in base_uries:
        client = GGClient(api_key="validapi_keyforsure",
                          base_uri=curr_base_uri)
        for endpoint, expected_url in endpoints_and_urls:
            assert (
                client._url_from_endpoint(endpoint, version) == expected_url
            ), "Could not get the expected URL for base_uri=`{}`".format(
                base_uri)
Exemplo n.º 7
0
def test_content_not_ok():
    req = {"document": "valid", "filename": "valid"}
    client = GGClient(base_uri=base_uri, api_key="invalid")

    obj = client.content_scan(**req)

    assert obj.status_code == 401
    assert isinstance(obj, Detail)
    assert obj.detail == "Invalid API key."
Exemplo n.º 8
0
def scan(
    ctx: click.Context,
    show_secrets: bool,
    exit_zero: bool,
    all_policies: bool,
    verbose: bool,
) -> int:
    """ Command to scan various contents. """
    api_key = os.getenv("GITGUARDIAN_API_KEY")
    base_uri = os.getenv("GITGUARDIAN_API_URL", ctx.obj["config"].api_url)
    if not api_key:
        raise click.ClickException("GitGuardian API Key is needed.")

    ctx.obj["client"] = GGClient(
        api_key=api_key, base_uri=base_uri, user_agent="ggshield", timeout=60
    )
    return_code = 0

    ctx.obj["filter_set"] = path_filter_set(
        Path(os.getcwd()), ctx.obj["config"].paths_ignore
    )
    if show_secrets is not None:
        ctx.obj["config"].show_secrets = show_secrets

    if all_policies is not None:
        ctx.obj["config"].all_policies = all_policies

    if verbose is not None:
        ctx.obj["config"].verbose = verbose

    if exit_zero is not None:
        ctx.obj["config"].exit_zero = exit_zero

    return return_code
Exemplo n.º 9
0
def retrieve_client(config: Config) -> GGClient:
    session = Session()
    if config.allow_self_signed:
        urllib3.disable_warnings()
        session.verify = False

    try:
        api_key = config.api_key
        api_url = config.api_url
    except UnknownInstanceError as e:
        if e.instance == DEFAULT_DASHBOARD_URL:
            # This can happen when the user first tries the app and has not gone through
            # the authentication procedure yet. In this case, replace the error message
            # complaining about an unknown instance with a more user-friendly one.
            raise click.ClickException("GitGuardian API key is needed.")
        else:
            raise

    try:
        return GGClient(
            api_key=api_key,
            base_uri=api_url,
            user_agent="ggshield",
            timeout=60,
            session=session,
        )
    except ValueError as e:
        # Can be raised by pygitguardian
        raise click.ClickException(f"Failed to create API client. {e}")
Exemplo n.º 10
0
    def scan(
        self,
        client: GGClient,
        matches_ignore: Iterable[str],
        all_policies: bool,
        verbose: bool,
    ) -> List[Result]:
        scannable_list = self.scannable_list
        results = []
        for i in range(0, len(scannable_list), MULTI_DOCUMENT_LIMIT):
            chunk = scannable_list[i:i + MULTI_DOCUMENT_LIMIT]
            scan = client.multi_content_scan(chunk)
            if not scan.success:
                handle_scan_error(scan, chunk)
                continue
            for index, scanned in enumerate(scan.scan_results):
                remove_ignored_from_result(scanned, all_policies,
                                           matches_ignore)
                if scanned.has_policy_breaks:
                    results.append(
                        Result(
                            content=chunk[index]["document"],
                            scan=scanned,
                            filemode=chunk[index]["filemode"],
                            filename=chunk[index]["filename"],
                        ))

        return results
Exemplo n.º 11
0
def test_multi_content_scan(
    client: GGClient,
    name: str,
    to_scan: List[Dict[str, str]],
    expected: str,
    has_secrets: bool,
    has_policy_breaks: bool,
):
    with my_vcr.use_cassette(name + ".yaml"):
        multiscan = client.multi_content_scan(to_scan)

        assert multiscan.status_code == 200
        if not isinstance(multiscan, MultiScanResult):
            pytest.fail("multiscan is not a MultiScanResult")
            return

        assert type(multiscan.to_dict()) == dict
        assert type(multiscan.to_json()) == str
        assert type(repr(multiscan)) == str
        assert type(str(multiscan)) == str
        assert multiscan.has_secrets == has_secrets
        assert multiscan.has_policy_breaks == has_policy_breaks

        for i, scan_result in enumerate(multiscan.scan_results):
            if expected:
                example_dict = json.loads(expected)
                assert all(elem in example_dict[i]["policies"]
                           for elem in scan_result.policies)
                assert (scan_result.policy_break_count == example_dict[i]
                        ["policy_break_count"])
Exemplo n.º 12
0
def test_health_check(client: GGClient):
    health = client.health_check()
    assert health.status_code == 200
    assert health.detail == "Valid API key."
    assert str(health) == "200:Valid API key."
    assert bool(health)
    assert health.success

    assert type(health.to_dict()) == dict
    assert type(health.to_json()) == str
Exemplo n.º 13
0
    async def content_scan(self, api_key, content, file_id):
        client = GGClient(api_key=api_key)  
        
        if file_id and content:
            raise Exception("Can not use file_id & content at once, Please use either one of them.")     
        
        if file_id:
            text = file_id['data']
            try:
                scan_result = client.content_scan(document=text)
                return scan_result.to_json()
            except Exception as e:
                return f"Exception occured: {e}"

        if content:
            try:
                scan_result = client.content_scan(document=content)
                return scan_result.to_json()
            except Exception as e:
                return f"Exception occured: {e}"
Exemplo n.º 14
0
def test_health_check(client: GGClient):
    health = client.health_check()
    assert health.status_code == 200
    assert health.detail == "Valid API key."
    assert str(health) == (
        "detail:Valid API key., status_code:200, "
        "app version:1.26.0-rc.4, secrets engine version:2.43.0")
    assert bool(health)
    assert health.success

    assert type(health.to_dict()) == OrderedDict
    assert type(health.to_json()) == str
def scan(file_list):
    """
    Scanning files in the folder and subfolders for secrets in chunks.
    :return: list of results
    """
    if file_list is not None:
        API_KEY = os.getenv("GG_API_KEY")
        client = GGClient(api_key=API_KEY)
        # scan docs in chunks to stay within the size limit
        scanned = []
        for i in range(0, len(file_list), MULTI_DOCUMENT_LIMIT):
            chunk = file_list[i:i + MULTI_DOCUMENT_LIMIT]
            try:
                scan_result = client.multi_content_scan(chunk)
                if scan_result.success:
                    scanned.extend(scan_result.scan_results)
            except Exception as exc:
                print('Could not scan some files. ' + str(exc))
        return scanned
    else:
        print('Scanner did not receive documents to scan.')
Exemplo n.º 16
0
def test_health_check_error(client: GGClient):
    health = client.health_check()
    assert health.status_code == 400
    assert health.detail == "Configuration error."
    assert str(health) == (
        "detail:Configuration error., status_code:400, "
        "app version:1.26.0-rc.4, secrets engine version:2.43.0")
    assert bool(health) is False
    assert health.success is False

    assert type(health.to_dict()) == OrderedDict
    assert type(health.to_json()) == str
Exemplo n.º 17
0
def test_assert_content_type(client: GGClient):
    """
    GIVEN a response that's 200 but the content is not JSON
    WHEN is_ok is called
    THEN is_ok should be false
    WHEN load_detail is called
    THEN is should return a Detail object
    """
    resp = client.get(endpoint="/docs/static/logo.png", version=None)
    assert is_ok(resp) is False
    obj = load_detail(resp)
    obj.status_code = resp.status_code
    assert obj.status_code == 200
    assert isinstance(obj, Detail)
    assert str(obj).startswith("200:"), str(obj)
Exemplo n.º 18
0
def scan(
    ctx: click.Context,
    mode: str,
    show_secrets: bool,
    exit_zero: bool,
    all_policies: bool,
    verbose: bool,
) -> int:
    """ Command to scan various contents. """
    api_key = os.getenv("GITGUARDIAN_API_KEY")
    base_uri = os.getenv("GITGUARDIAN_API_URL", ctx.obj["config"].api_url)
    if not api_key:
        raise click.ClickException("GitGuardian API Key is needed.")

    ctx.obj["client"] = GGClient(api_key=api_key,
                                 base_uri=base_uri,
                                 user_agent="ggshield",
                                 timeout=60)
    return_code = 0

    ctx.obj["filter_set"] = path_filter_set(Path(os.getcwd()),
                                            ctx.obj["config"].paths_ignore)
    if show_secrets is not None:
        ctx.obj["config"].show_secrets = show_secrets

    if all_policies is not None:
        ctx.obj["config"].all_policies = all_policies

    if verbose is not None:
        ctx.obj["config"].verbose = verbose

    if exit_zero is not None:
        ctx.obj["config"].exit_zero = exit_zero

    if ctx.invoked_subcommand is None:
        if mode:
            click.echo(
                "--mode has been deprecated and will be removed "
                "after ggshield version 1.2. prefer to use subcommands.")
            if mode == "pre-commit":
                return ctx.invoke(precommit_cmd)
            elif mode == "ci":
                return ctx.invoke(ci_cmd)
            else:
                click.echo(ctx.get_help())
        else:
            click.echo(ctx.get_help())
    return return_code
Exemplo n.º 19
0
def scan(
    ctx: click.Context,
    show_secrets: bool,
    exit_zero: bool,
    all_policies: bool,
    verbose: bool,
    json_output: bool,
    output: Optional[str],
) -> int:
    """ Command to scan various contents. """
    api_key = os.getenv("GITGUARDIAN_API_KEY")
    base_uri = os.getenv("GITGUARDIAN_API_URL", ctx.obj["config"].api_url)
    if not api_key:
        raise click.ClickException("GitGuardian API Key is needed.")

    ctx.obj["client"] = GGClient(
        api_key=api_key, base_uri=base_uri, user_agent="ggshield", timeout=60
    )
    return_code = 0

    ctx.obj["filter_set"] = path_filter_set(
        Path(os.getcwd()), ctx.obj["config"].paths_ignore
    )
    config: Config = ctx.obj["config"]

    if show_secrets is not None:
        config.show_secrets = show_secrets

    if all_policies is not None:
        config.all_policies = all_policies

    if verbose is not None:
        config.verbose = verbose

    if exit_zero is not None:
        config.exit_zero = exit_zero

    output_handler_cls: Type[OutputHandler] = TextHandler
    if json_output:
        output_handler_cls = JSONHandler

    ctx.obj["output_handler"] = output_handler_cls(
        show_secrets=config.show_secrets, verbose=config.verbose, output=output
    )

    return return_code
Exemplo n.º 20
0
def test_quota_overview(client: GGClient):
    with my_vcr.use_cassette("quota.yaml"):
        quota_response = client.quota_overview()
        assert type(repr(quota_response)) == str
        assert type(str(quota_response)) == str
        assert quota_response.status_code == 200
        if isinstance(quota_response, QuotaResponse):
            assert quota_response.content.limit == 5000
            assert quota_response.content.count == 2
            assert quota_response.content.remaining == 4998
            assert quota_response.content.since == date(2021, 4, 18)
        else:
            pytest.fail("returned should be a QuotaResponse")

        assert type(quota_response.to_dict()) == OrderedDict
        quota_response_json = quota_response.to_json()
        assert type(quota_response_json) == str
        assert type(json.loads(quota_response_json)) == dict
Exemplo n.º 21
0
def test_content_scan(
    client: GGClient,
    name: str,
    to_scan: Dict[str, str],
    has_secrets: bool,
    has_policy_breaks: bool,
    policy_break_count: int,
):
    with my_vcr.use_cassette(name + ".yaml"):
        scan_result = client.content_scan(**to_scan)
        assert type(repr(scan_result)) == str
        assert type(str(scan_result)) == str
        assert scan_result.status_code == 200
        if isinstance(scan_result, ScanResult):
            assert scan_result.has_secrets == has_secrets
            assert scan_result.has_policy_breaks == has_policy_breaks
            assert scan_result.policy_break_count == policy_break_count
        else:
            pytest.fail("returned should be a ScanResult")

        assert type(scan_result.to_dict()) == dict
        scan_result_json = scan_result.to_json()
        assert type(scan_result_json) == str
        assert type(json.loads(scan_result_json)) == dict
Exemplo n.º 22
0
def client() -> GGClient:
    api_key = os.getenv("GITGUARDIAN_API_KEY", "1234567890")
    base_uri = os.getenv("GITGUARDIAN_API_URL", "https://api.gitguardian.com")
    return GGClient(api_key, base_uri)
Exemplo n.º 23
0
def test_content_scan_exceptions(client: GGClient, to_scan: str,
                                 exception: Type[Exception], regex: str):
    with pytest.raises(exception, match=regex):
        client.content_scan(to_scan)
Exemplo n.º 24
0
def scan(
    ctx: click.Context,
    paths: Union[List, str],
    mode: str,
    recursive: bool,
    yes: bool,
    show_secrets: bool,
    exit_zero: bool,
    all_policies: bool,
    verbose: bool,
    repo: str,
) -> int:
    """ Command to scan various content. """
    api_key = os.getenv("GITGUARDIAN_API_KEY")
    base_uri = os.getenv("GITGUARDIAN_API_URL", ctx.obj["config"].api_url)
    if not api_key:
        raise click.ClickException("GitGuardian API Key is needed.")

    client = GGClient(api_key=api_key,
                      base_uri=base_uri,
                      user_agent="ggshield",
                      timeout=60)
    return_code = 0

    matches_ignore = ctx.obj["config"].matches_ignore
    filter_set = path_filter_set(Path(os.getcwd()),
                                 ctx.obj["config"].paths_ignore)
    if show_secrets is None:
        show_secrets = ctx.obj["config"].show_secrets

    if all_policies is None:
        all_policies = ctx.obj["config"].all_policies

    if verbose is None:
        verbose = ctx.obj["config"].verbose

    if exit_zero is None:
        exit_zero = ctx.obj["config"].exit_zero

    try:
        if mode:
            check_git_dir()
            if mode == "pre-commit":
                return_code = scan_pre_commit(
                    client=client,
                    filter_set=filter_set,
                    matches_ignore=matches_ignore,
                    verbose=verbose,
                    all_policies=all_policies,
                    show_secrets=show_secrets,
                )
            elif mode == "ci":
                return_code = scan_ci(
                    client=client,
                    verbose=verbose,
                    filter_set=filter_set,
                    matches_ignore=matches_ignore,
                    all_policies=all_policies,
                    show_secrets=show_secrets,
                )
            else:
                click.echo(ctx.get_help())
        elif repo:
            return_code = scan_repo(
                client=client,
                verbose=verbose,
                repo=repo,
                matches_ignore=matches_ignore,
                all_policies=all_policies,
                show_secrets=show_secrets,
            )
        elif paths:
            return_code = scan_path(
                client=client,
                verbose=verbose,
                paths=paths,
                paths_ignore=ctx.obj["config"].paths_ignore,
                recursive=recursive,
                yes=yes,
                matches_ignore=matches_ignore,
                all_policies=all_policies,
                show_secrets=show_secrets,
            )
        else:
            click.echo(ctx.get_help())
    except click.exceptions.Abort:
        return_code = 0
    except Exception as error:
        if verbose:
            traceback.print_exc()
        raise click.ClickException(str(error))

    if exit_zero:
        return_code = 0
    sys.exit(return_code)
Exemplo n.º 25
0
def client():
    api_key = os.environ.get("TEST_LIVE_SERVER_TOKEN", "sample_api_key")
    return GGClient(base_uri=base_uri, api_key=api_key)
Exemplo n.º 26
0
import glob
import os
import sys
import traceback
from dotenv import load_dotenv
load_dotenv()
API_KEY = os.getenv("GG_API_KEY")

from pygitguardian import GGClient
from pygitguardian.config import MULTI_DOCUMENT_LIMIT

# Init GGClient for connecting to gitguardian
client = GGClient(api_key="API_KEY")

# Creating a list of dictionaries for scanning files
to_scan = []
for name in glob.glob("**/*", recursive=True):
    with open(name) as fn:
        to_scan.append({
            "document": fn.read(),
            "filename": os.path.basename(name)
        })

to_process = []
for i in range(0, len(to_scan), MULTI_DOCUMENT_LIMIT):
    chunk = to_scan[i:i + MULTI_DOCUMENT_LIMIT]
    try:
        scan = client.multi_content_scan(chunk)
    except Exception as exc:
        # Handle exceptions such as schema validation
        traceback.print_exc(2, file=sys.stderr)
Exemplo n.º 27
0
import traceback

from requests import codes

from pygitguardian import GGClient

API_KEY = os.getenv("GG_API_KEY")
FILENAME = ".env"
DOCUMENT = """
    import urllib.request
    url = 'http://*****:*****@cake.gitguardian.com/isreal.json'
    response = urllib.request.urlopen(url)
    consume(response.read())"
"""

client = GGClient(api_key=API_KEY)

# Check the health of the API and the API key used.
health_obj = client.health_check()

if health_obj.status_code == codes[r"\o/"]:  # this is 200 but cooler
    try:
        scan_result = client.content_scan(filename=FILENAME, document=DOCUMENT)
    except Exception as exc:
        # Handle exceptions such as schema validation
        traceback.print_exc(2, file=sys.stderr)
        print(str(exc))

    print("Scan results:", scan_result.has_secrets, "-",
          scan_result.policy_break_count)
else:
Exemplo n.º 28
0
def test_multi_content_exceptions(client: GGClient, to_scan: List,
                                  exception: Type[Exception]):
    with pytest.raises(exception):
        client.multi_content_scan(to_scan)