Exemplo n.º 1
0
    def test_scan_with_baseline(self, mock_subprocess_obj,
                                mock_load_from_string, mock_apply):
        repo = mock_tracked_repo()

        # Setup secrets
        secretA = PotentialSecret('type', 'filenameA', 1, 'blah')
        secretB = PotentialSecret('type', 'filenameA', 2, 'curry')
        original_secrets = SecretsCollection()
        original_secrets.data['filenameA'] = {
            secretA: secretA,
            secretB: secretB,
        }
        baseline_secrets = SecretsCollection()
        baseline_secrets.data['filenameA'] = {
            secretA: secretA,
        }

        # Easier than mocking load_from_diff.
        mock_apply.side_effect = lambda orig, base: \
            get_secrets_not_in_baseline(original_secrets, baseline_secrets)

        mock_subprocess_obj.side_effect = mock_subprocess((SubprocessMock(
            expected_input='git show',
            mocked_output=b'will be mocked',
        ), ))
        secrets = repo.scan()

        assert len(secrets.data) == 1
        assert secrets.data['filenameA'][secretB] == secretB
Exemplo n.º 2
0
def main(argv=None):
    args = parse_args(argv)
    if args.verbose:  # pragma: no cover
        log.set_debug_level(args.verbose)

    try:
        # If baseline is provided, we first want to make sure
        # it's valid, before doing any further computation.
        baseline_collection = get_baseline(args.baseline[0])
    except (IOError, ValueError):
        # Error logs handled within logic.
        return 1

    plugins = initialize.from_parser_builder(args.plugins)
    results = find_secrets_in_files(args, plugins)
    if baseline_collection:
        original_results = results
        results = get_secrets_not_in_baseline(
            results,
            baseline_collection,
        )

    if len(results.data) > 0:
        pretty_print_diagnostics(results)
        return 1

    if not baseline_collection:
        return 0

    # Only attempt baseline modifications if we don't find any new secrets
    baseline_modified = trim_baseline_of_removed_secrets(
        original_results,
        baseline_collection,
        args.filenames,
    )

    if VERSION != baseline_collection.version:
        baseline_collection.plugins = plugins
        baseline_collection.version = VERSION
        baseline_modified = True

    if baseline_modified:
        write_baseline_to_file(
            filename=args.baseline[0],
            data=baseline_collection.format_for_baseline_output(),
        )

        log.error(
            'The baseline file was updated.\n'
            'Probably to keep line numbers of secrets up-to-date.\n'
            'Please `git add {}`, thank you.\n\n'.format(args.baseline[0]), )
        return 1

    return 0
Exemplo n.º 3
0
    def scan(self, exclude_files_regex=None, exclude_lines_regex=None, scan_head=False):
        """Fetches latest changes, and scans the git diff between last_commit_hash
        and HEAD.

        :raises: subprocess.CalledProcessError

        :type exclude_files_regex: str|None
        :param exclude_files_regex: A regex matching filenames to skip over.

        :type exclude_lines: str|None
        :param exclude_lines: A regex matching lines to skip over.

        :rtype: SecretsCollection
        :returns: secrets found.
        """
        self.storage.fetch_new_changes()

        default_plugins = initialize_plugins.from_parser_builder(
            self.plugin_config,
            exclude_lines_regex=exclude_lines_regex,
        )
        # TODO Issue 17: Ignoring self.exclude_regex, using the server scan CLI arg
        secrets = SecretsCollection(
            plugins=default_plugins,
            exclude_files=exclude_files_regex,
            exclude_lines=exclude_lines_regex,
        )

        scan_from_this_commit = git.get_empty_tree_commit_hash() if scan_head else self.last_commit_hash
        try:
            diff_name_only = self.storage.get_diff_name_only(scan_from_this_commit)

            # do a per-file diff + scan so we don't get a OOM if the the commit-diff is too large
            for filename in diff_name_only:
                file_diff = self.storage.get_diff(scan_from_this_commit, filename)

                secrets.scan_diff(
                    file_diff,
                    baseline_filename=self.baseline_filename,
                    last_commit_hash=scan_from_this_commit,
                    repo_name=self.name,
                )
        except subprocess.CalledProcessError:
            self.update()
            return secrets

        if self.baseline_filename:
            baseline = self.storage.get_baseline_file(self.baseline_filename)
            if baseline:
                baseline_collection = SecretsCollection.load_baseline_from_string(baseline)
                secrets = get_secrets_not_in_baseline(secrets, baseline_collection)

        return secrets
Exemplo n.º 4
0
    def test_nothing_new(self):
        # We want a secret, but just a default secret (no overriding parameters)
        new_findings = secrets_collection_factory([{}])
        baseline = secrets_collection_factory([{}])

        results = get_secrets_not_in_baseline(new_findings, baseline)

        # No expected results, because everything filtered out by baseline
        assert len(results.data) == 0

        # Make sure that baseline didn't get modified either
        assert len(baseline.data) == 1
        assert next(iter(baseline.data['filename'])).lineno == 1
Exemplo n.º 5
0
    def test_nothing_new(self):
        # We want a secret, but just a default secret (no overriding parameters)
        new_findings = secrets_collection_factory([{}])
        baseline = secrets_collection_factory([{}])

        results = get_secrets_not_in_baseline(new_findings, baseline)

        # No expected results, because everything filtered out by baseline
        assert len(results.data) == 0

        # Make sure that baseline didn't get modified either
        assert len(baseline.data) == 1
        assert next(iter(baseline.data['filename'])).lineno == 1
Exemplo n.º 6
0
    def test_new_file(self):
        new_findings = secrets_collection_factory([{
            'filename': 'filename1',
        }])
        baseline = secrets_collection_factory([{
            'filename': 'filename2',
        }])

        backup_baseline = baseline.data.copy()
        results = get_secrets_not_in_baseline(new_findings, baseline)

        assert len(results.data) == 1
        assert 'filename1' in results.data
        assert baseline.data == backup_baseline
Exemplo n.º 7
0
    def scan(self, exclude_files_regex=None, exclude_lines_regex=None):
        """Fetches latest changes, and scans the git diff between last_commit_hash
        and HEAD.

        :raises: subprocess.CalledProcessError

        :type exclude_files_regex: str|None
        :param exclude_files_regex: A regex matching filenames to skip over.

        :type exclude_lines: str|None
        :param exclude_lines: A regex matching lines to skip over.

        :rtype: SecretsCollection
        :returns: secrets found.
        """
        self.storage.fetch_new_changes()

        default_plugins = initialize_plugins.from_parser_builder(
            self.plugin_config,
            exclude_lines_regex=exclude_lines_regex,
        )
        # TODO Issue 17: Ignoring self.exclude_regex, using the server scan CLI arg
        secrets = SecretsCollection(
            plugins=default_plugins,
            exclude_files=exclude_files_regex,
            exclude_lines=exclude_lines_regex,
        )

        try:
            diff = self.storage.get_diff(self.last_commit_hash)
        except subprocess.CalledProcessError:
            self.update()
            return secrets

        secrets.scan_diff(
            diff,
            baseline_filename=self.baseline_filename,
            last_commit_hash=self.last_commit_hash,
            repo_name=self.name,
        )

        if self.baseline_filename:
            baseline = self.storage.get_baseline_file(self.baseline_filename)
            if baseline:
                baseline_collection = SecretsCollection.load_baseline_from_string(
                    baseline)
                secrets = get_secrets_not_in_baseline(secrets,
                                                      baseline_collection)

        return secrets
Exemplo n.º 8
0
def main(argv=None):
    args = parse_args(argv)
    if args.verbose:  # pragma: no cover
        CustomLog.enableDebug(args.verbose)

    try:
        # If baseline is provided, we first want to make sure
        # it's valid, before doing any further computation.
        baseline_collection = get_baseline(args.baseline[0])
    except (IOError, ValueError):
        # Error logs handled in load_baseline_from_file logic.
        return 1

    results = find_secrets_in_files(args)
    if baseline_collection:
        original_results = results
        results = get_secrets_not_in_baseline(
            results,
            baseline_collection,
        )

    if len(results.data) > 0:
        pretty_print_diagnostics(results)
        return 1

    if not baseline_collection:
        return 0

    # Only attempt baseline modifications if we don't find any new secrets
    successful_update = update_baseline_with_removed_secrets(
        original_results,
        baseline_collection,
        args.filenames,
    )
    if successful_update:
        with open(args.baseline[0], 'w') as f:
            f.write(
                json.dumps(
                    baseline_collection.format_for_baseline_output(),
                    indent=2,
                )
            )

        # The pre-commit framework should automatically detect a file change
        # and print a relevant error message.
        return 1

    return 0
Exemplo n.º 9
0
def main(argv=None):
    args = parse_args(argv)
    if args.verbose:  # pragma: no cover
        log.set_debug_level(args.verbose)

    try:
        # If baseline is provided, we first want to make sure
        # it's valid, before doing any further computation.
        baseline_collection = get_baseline(args.baseline[0])
    except (IOError, ValueError):
        # Error logs handled within logic.
        return 1

    results = find_secrets_in_files(args)
    if baseline_collection:
        original_results = results
        results = get_secrets_not_in_baseline(
            results,
            baseline_collection,
        )

    if len(results.data) > 0:
        pretty_print_diagnostics(results)
        return 1

    if not baseline_collection:
        return 0

    # Only attempt baseline modifications if we don't find any new secrets
    successful_update = update_baseline_with_removed_secrets(
        original_results,
        baseline_collection,
        args.filenames,
    )
    if successful_update:
        _write_to_baseline_file(
            args.baseline[0],
            baseline_collection.format_for_baseline_output(),
        )

        # The pre-commit framework should automatically detect a file change
        # and print a relevant error message.
        return 1

    return 0
Exemplo n.º 10
0
    def test_rolled_creds(self):
        """Same line, different secret"""
        new_findings = secrets_collection_factory([{
            'secret': 'secret_new',
        }])
        baseline = secrets_collection_factory([{
            'secret': 'secret',
        }])

        backup_baseline = baseline.data.copy()
        results = get_secrets_not_in_baseline(new_findings, baseline)

        assert len(results.data['filename']) == 1

        secretA = PotentialSecret('type', 'filename', 1, 'secret_new')
        assert results.data['filename'][secretA].secret_hash == \
            PotentialSecret.hash_secret('secret_new')
        assert baseline.data == backup_baseline
Exemplo n.º 11
0
    def test_new_file(self):
        new_findings = secrets_collection_factory([
            {
                'filename': 'filename1',
            },
        ])
        baseline = secrets_collection_factory([
            {
                'filename': 'filename2',
            },
        ])

        backup_baseline = baseline.data.copy()
        results = get_secrets_not_in_baseline(new_findings, baseline)

        assert len(results.data) == 1
        assert 'filename1' in results.data
        assert baseline.data == backup_baseline
Exemplo n.º 12
0
    def test_new_secret_line_old_file(self):
        """Same file, new line with potential secret"""
        new_findings = secrets_collection_factory([{
            'secret': 'secret1',
            'lineno': 1,
        }])
        baseline = secrets_collection_factory([{
            'secret': 'secret2',
            'lineno': 2,
        }])

        backup_baseline = baseline.data.copy()
        results = get_secrets_not_in_baseline(new_findings, baseline)

        assert len(results.data['filename']) == 1
        secretA = PotentialSecret('type', 'filename', 1, 'secret1')
        assert results.data['filename'][
            secretA].secret_hash == PotentialSecret.hash_secret('secret1')
        assert baseline.data == backup_baseline
Exemplo n.º 13
0
    def test_rolled_creds(self):
        """Same line, different secret"""
        new_findings = secrets_collection_factory([
            {
                'secret': 'secret_new',
            },
        ])
        baseline = secrets_collection_factory([
            {
                'secret': 'secret',
            },
        ])

        backup_baseline = baseline.data.copy()
        results = get_secrets_not_in_baseline(new_findings, baseline)

        assert len(results.data['filename']) == 1

        secretA = PotentialSecret('type', 'filename', 1, 'secret_new')
        assert results.data['filename'][secretA].secret_hash == \
            PotentialSecret.hash_secret('secret_new')
        assert baseline.data == backup_baseline
Exemplo n.º 14
0
    def test_new_file_excluded(self):
        new_findings = secrets_collection_factory([
            {
                'filename': 'filename1',
            },
            {
                'filename': 'filename2',
            },
        ])
        baseline = secrets_collection_factory([
            {
                'filename': 'filename3',
            },
        ])

        backup_baseline = baseline.data.copy()
        baseline.exclude_files = 'filename1'
        results = get_secrets_not_in_baseline(new_findings, baseline)

        assert len(results.data) == 1
        assert 'filename1' not in results.data
        assert baseline.data == backup_baseline
Exemplo n.º 15
0
    def test_new_secret_line_old_file(self):
        """Same file, new line with potential secret"""
        new_findings = secrets_collection_factory([
            {
                'secret': 'secret1',
                'lineno': 1,
            },
        ])
        baseline = secrets_collection_factory([
            {
                'secret': 'secret2',
                'lineno': 2,
            },
        ])

        backup_baseline = baseline.data.copy()
        results = get_secrets_not_in_baseline(new_findings, baseline)

        assert len(results.data['filename']) == 1
        secretA = PotentialSecret('type', 'filename', 1, 'secret1')
        assert results.data['filename'][secretA].secret_hash == \
            PotentialSecret.hash_secret('secret1')
        assert baseline.data == backup_baseline
    def scan(self):
        """Clones the repo, and scans the git diff between last_commit_hash and HEAD.

        :raises: subprocess.CalledProcessError
        """
        self.clone_and_pull_repo()
        diff = self._get_latest_changes()
        baseline = self._get_baseline()

        default_plugins = initialize(self.plugin_config)

        secrets = SecretsCollection(default_plugins, self.exclude_regex)

        secrets.scan_diff(diff,
                          baseline_filename=baseline,
                          last_commit_hash=self.last_commit_hash,
                          repo_name=self.name)

        if baseline:
            baseline_collection = SecretsCollection.load_baseline_from_string(
                baseline)
            secrets = get_secrets_not_in_baseline(secrets, baseline_collection)

        return secrets
Exemplo n.º 17
0
def _perform_scan(args, plugins, automaton, word_list_hash):
    """
    :param args: output of `argparse.ArgumentParser.parse_args`
    :param plugins: tuple of initialized plugins

    :type automaton: ahocorasick.Automaton|None
    :param automaton: optional automaton for ignoring certain words.

    :type word_list_hash: str|None
    :param word_list_hash: optional iterated sha1 hash of the words in the word list.

    :rtype: dict
    """
    old_baseline = _get_existing_baseline(args.old_baseline)
    if old_baseline:
        plugins = initialize.merge_plugins_from_baseline(
            _get_plugins_from_baseline(old_baseline),
            args,
            automaton=automaton,
        )

    # Favors CLI arguments over existing baseline configuration
    if old_baseline:
        if not args.exclude_files:
            args.exclude_files = _get_exclude_files(old_baseline)

        if (
            not args.exclude_lines
            and old_baseline.get('exclude')
        ):
            args.exclude_lines = old_baseline['exclude']['lines']

        if (
            not args.word_list_file
            and old_baseline.get('word_list')
        ):
            args.word_list_file = old_baseline['word_list']['file']

        if (
            not args.custom_plugin_paths
            and old_baseline.get('custom_plugin_paths')
        ):
            args.custom_plugin_paths = old_baseline['custom_plugin_paths']

    # If we have knowledge of an existing baseline file, we should use
    # that knowledge and add it to our exclude_files regex.
    if args.old_baseline:
        _add_baseline_to_exclude_files(args)

    new_baseline = baseline.initialize(
        path=args.path,
        plugins=plugins,
        custom_plugin_paths=args.custom_plugin_paths,
        exclude_files_regex=args.exclude_files,
        exclude_lines_regex=args.exclude_lines,
        word_list_file=args.word_list_file,
        word_list_hash=word_list_hash,
        should_scan_all_files=args.all_files,
    )

    if old_baseline:
        new_baseline = baseline.get_secrets_not_in_baseline(
            new_baseline,
            get_baseline(args.old_baseline[0])
        )

    return new_baseline.format_for_baseline_output()
Exemplo n.º 18
0
def main(argv=None):
    version_check()
    args = parse_args(argv)
    if args.verbose:  # pragma: no cover
        log.set_debug_level(args.verbose)

    try:
        # If baseline is provided, we first want to make sure
        # it's valid, before doing any further computation.
        baseline_collection = get_baseline(
            args.baseline[0],
            plugin_filenames=args.plugin_filenames,
        )
    except (IOError, TypeError, ValueError):
        # Error logs handled within logic.
        return 1

    automaton = None
    word_list_hash = None
    if args.word_list_file:
        automaton, word_list_hash = build_automaton(args.word_list_file)

    plugins = initialize.from_parser_builder(
        args.plugins,
        exclude_lines_regex=args.exclude_lines,
        automaton=automaton,
        should_verify_secrets=not args.no_verify,
        plugin_filenames=args.plugin_filenames,
    )

    # Merge plugins from baseline
    if baseline_collection:
        plugins = initialize.merge_plugins_from_baseline(
            baseline_collection.plugins,
            args,
            automaton,
        )
        baseline_collection.plugins = plugins

    results_collection = find_secrets_in_files(args, plugins)
    if baseline_collection:
        original_results_collection = results_collection
        results_collection = get_secrets_not_in_baseline(
            results_collection,
            baseline_collection,
        )

    if len(results_collection.data) > 0:
        pretty_print_diagnostics_for_new_secrets(results_collection)
        return 1

    # if no baseline been supplied
    if not baseline_collection:
        return 0

    # Only attempt baseline modifications if we don't find any new secrets
    baseline_modified = trim_baseline_of_removed_secrets(
        original_results_collection,
        baseline_collection,
        args.filenames,
    )

    if VERSION != baseline_collection.version:
        baseline_collection.version = VERSION
        baseline_modified = True

    if baseline_modified:
        write_baseline_to_file(
            filename=args.baseline[0],
            data=baseline_collection.format_for_baseline_output(),
        )

        log.error(
            'The baseline file was updated.\n'
            'Probably to keep line numbers of secrets up-to-date.\n'
            'Please `git add {}`, thank you.\n\n'.format(args.baseline[0]),
        )
        return 3

    # check if there are verified but haven't been audited secrets
    verified_non_audited = get_verified_non_audited_secrets_from_baseline(
        baseline_collection,
    )

    if len(verified_non_audited.data) > 0:
        pretty_print_diagnostics_for_verified_non_audited(verified_non_audited)
        return 2

    # check if there are non-audited secrets
    if args.fail_on_non_audited:
        non_audited = get_non_audited_secrets_from_baseline(
            baseline_collection,
        )

        if len(non_audited.data) > 0:
            pretty_print_diagnostics_for_non_audited(non_audited)
            return 4

    return 0
def main(argv=sys.argv[1:]):
    args = parse_args(argv)
    if args.verbose:  # pragma: no cover
        log.set_debug_level(args.verbose)

    try:
        # If baseline is provided, we first want to make sure
        # it's valid, before doing any further computation.
        baseline_collection = get_baseline(args.baseline[0])
    except (IOError, TypeError, ValueError):
        # Error logs handled within logic.
        return 1

    automaton = None
    word_list_hash = None
    if args.word_list_file:
        automaton, word_list_hash = build_automaton(args.word_list_file)

    plugins = initialize.from_parser_builder(
        plugins_dict=args.plugins,
        custom_plugin_paths=args.custom_plugin_paths,
        exclude_lines_regex=args.exclude_lines,
        automaton=automaton,
        should_verify_secrets=not args.no_verify,
    )

    # Merge plugins from baseline
    if baseline_collection:
        plugins = initialize.merge_plugins_from_baseline(
            baseline_plugins=baseline_collection.plugins,
            args=args,
            automaton=automaton,
        )
        baseline_collection.plugins = plugins

    results = find_secrets_in_files(args, plugins)

    if baseline_collection:
        original_results = results
        results = get_secrets_not_in_baseline(
            results,
            baseline_collection,
        )

    if len(results.data) > 0:
        pretty_print_diagnostics(results)
        return 1

    if not baseline_collection:
        return 0

    # Only attempt baseline modifications if we don't find any new secrets
    baseline_modified = trim_baseline_of_removed_secrets(
        original_results,
        baseline_collection,
        args.filenames,
    )

    if VERSION != baseline_collection.version:
        baseline_collection.version = VERSION
        baseline_modified = True

    # adding this line as we don't want the modification of baseline file.
    baseline_modified = False

    if baseline_modified:
        write_baseline_to_file(
            filename=args.baseline[0],
            data=baseline_collection.format_for_baseline_output(),
        )

        log.error(
            'The baseline file was updated.\n'
            'Probably to keep line numbers of secrets up-to-date.\n'
            'Please `git add {}`, thank you.\n\n'.format(args.baseline[0]), )
        return 3

    return 0