Exemplo n.º 1
0
def test_build_automaton():
    word_list = """
        foam\n
    """
    with mock_open(
        data=word_list,
        namespace='detect_secrets.util.open',
    ):
        automaton, word_list_hash = util.build_automaton(word_list='will_be_mocked.txt')
        assert word_list_hash == hashlib.sha1('foam'.encode('utf-8')).hexdigest()
        assert filters.is_found_with_aho_corasick(
            secret='foam_roller',
            automaton=automaton,
        )
        assert not filters.is_found_with_aho_corasick(
            secret='no_words_in_word_list',
            automaton=automaton,
        )
Exemplo n.º 2
0
def get_project_secrets(project_results_dir: str, project_name: str) -> dict:
    argv = ["scan", f"{project_results_dir}/{project_name}"]

    args = parse_args(argv)

    automaton = None
    word_list_hash = None
    if args.word_list_file:
        automaton, word_list_hash = build_automaton(args.word_list_file)

    # Plugins are *always* rescanned with fresh settings, because
    # we want to get the latest updates.
    plugins = initialize.from_parser_builder(
        plugins_dict=args.plugins,
        custom_plugin_paths=args.custom_plugin_paths,
        exclude_lines_regex=args.exclude_lines,
        automaton=automaton,
        should_verify_secrets=not args.no_verify,
    )

    baseline_dict = _perform_scan(args, plugins, automaton, word_list_hash,)

    return baseline_dict
Exemplo n.º 3
0
def main(argv=None):
    args = parse_args(argv)
    if args.verbose:  # pragma: no cover
        log.set_debug_level(args.verbose)

    try:
        # If baseline is provided, we first want to make sure
        # it's valid, before doing any further computation.
        baseline_collection = get_baseline(args.baseline[0])
    except (IOError, ValueError):
        # Error logs handled within logic.
        return 1

    automaton = None
    word_list_hash = None
    if args.word_list_file:
        automaton, word_list_hash = build_automaton(args.word_list_file)

    plugins = initialize.from_parser_builder(
        args.plugins,
        exclude_lines_regex=args.exclude_lines,
        automaton=automaton,
        should_verify_secrets=not args.no_verify,
    )

    # Merge plugins from baseline
    if baseline_collection:
        plugins = initialize.merge_plugins_from_baseline(
            baseline_collection.plugins,
            args,
            automaton,
        )
        baseline_collection.plugins = plugins

    results = find_secrets_in_files(args, plugins)
    if baseline_collection:
        original_results = results
        results = get_secrets_not_in_baseline(
            results,
            baseline_collection,
        )

    if len(results.data) > 0:
        pretty_print_diagnostics(results)
        return 1

    if not baseline_collection:
        return 0

    # Only attempt baseline modifications if we don't find any new secrets
    baseline_modified = trim_baseline_of_removed_secrets(
        original_results,
        baseline_collection,
        args.filenames,
    )

    if VERSION != baseline_collection.version:
        baseline_collection.version = VERSION
        baseline_modified = True

    if baseline_modified:
        write_baseline_to_file(
            filename=args.baseline[0],
            data=baseline_collection.format_for_baseline_output(),
        )

        log.error(
            'The baseline file was updated.\n'
            'Probably to keep line numbers of secrets up-to-date.\n'
            'Please `git add {}`, thank you.\n\n'.format(args.baseline[0]), )
        return 3

    return 0
Exemplo n.º 4
0
def main(argv=sys.argv[1:]):
    if len(sys.argv) == 1:  # pragma: no cover
        sys.argv.append('--help')

    args = parse_args(argv)
    if args.verbose:  # pragma: no cover
        log.set_debug_level(args.verbose)

    if args.action == 'scan':
        automaton = None
        word_list_hash = None
        if args.word_list_file:
            automaton, word_list_hash = build_automaton(args.word_list_file)

        # Plugins are *always* rescanned with fresh settings, because
        # we want to get the latest updates.
        plugins = initialize.from_parser_builder(
            plugins_dict=args.plugins,
            custom_plugin_paths=args.custom_plugin_paths,
            exclude_lines_regex=args.exclude_lines,
            automaton=automaton,
            should_verify_secrets=not args.no_verify,
        )
        if args.string:
            line = args.string

            if isinstance(args.string, bool):
                line = sys.stdin.read().splitlines()[0]

            _scan_string(line, plugins)

        else:
            baseline_dict = _perform_scan(
                args,
                plugins,
                automaton,
                word_list_hash,
            )

            if args.import_filename:
                write_baseline_to_file(
                    filename=args.import_filename[0],
                    data=baseline_dict,
                )
            else:
                print(baseline.format_baseline_for_output(baseline_dict, ), )

    elif args.action == 'audit':
        if not args.diff and not args.display_results:
            audit.audit_baseline(args.filename[0])
            return 0

        if args.display_results:
            audit.print_audit_results(args.filename[0])
            return 0

        if len(args.filename) != 2:
            print(
                'Must specify two files to compare!',
                file=sys.stderr,
            )
            return 1

        try:
            audit.compare_baselines(args.filename[0], args.filename[1])
        except audit.RedundantComparisonError:
            print(
                'No difference, because it\'s the same file!',
                file=sys.stderr,
            )

    return 0
Exemplo n.º 5
0
    def load_baseline_from_dict(cls, data, plugin_filenames=None):
        """Initializes a SecretsCollection object from dictionary.

        :type data: dict
        :param data: properly formatted dictionary to load SecretsCollection from.

        :type plugin_filenames: tuple
        :param plugin_filenames: the plugin filenames.

        :rtype: SecretsCollection
        :raises: IOError
        """
        result = SecretsCollection()

        if not all(key in data for key in (
                'plugins_used',
                'results',
        )):
            raise IOError

        # In v0.12.0 `exclude_regex` got replaced by `exclude`
        if not any(key in data for key in (
                'exclude',
                'exclude_regex',
        )):
            raise IOError

        if 'exclude_regex' in data:
            result.exclude_files = data['exclude_regex']
        else:
            result.exclude_files = data['exclude']['files']
            result.exclude_lines = data['exclude']['lines']

        # In v0.12.7 the `--word-list` option got added
        automaton = None
        if 'word_list' in data:
            result.word_list_file = data['word_list']['file']
            result.word_list_hash = data['word_list']['hash']

            if result.word_list_file:
                # Always ignore the given `data['word_list']['hash']`
                # The difference will show whenever the word list changes
                automaton, result.word_list_hash = build_automaton(
                    result.word_list_file)

        plugins = []
        for plugin in data['plugins_used']:
            plugin_classname = plugin.pop('name')
            initialized_plugin_classname = initialize.from_plugin_classname(
                plugin_classname,
                exclude_lines_regex=result.exclude_lines,
                automaton=automaton,
                should_verify_secrets=True,
                plugin_filenames=plugin_filenames,
                **plugin,
            )
            if initialized_plugin_classname is not None:
                plugins.append(initialized_plugin_classname)
        result.plugins = tuple(plugins)

        for filename in data['results']:
            result.data[filename] = {}

            for item in data['results'][filename]:
                secret = PotentialSecret(
                    item['type'],
                    filename,
                    secret='will be replaced',
                    lineno=item['line_number'],
                    is_secret=item.get('is_secret'),
                    output_raw=result.output_raw,
                    is_verified=item['is_verified'],
                    verified_result=item.get('verified_result'),
                )
                secret.secret_hash = item['hashed_secret']
                result.data[filename][secret] = secret

        result.version = (data['version'] if 'version' in data else '0.0.0')

        return result
Exemplo n.º 6
0
    def load_baseline_from_dict(cls, data):
        """Initializes a SecretsCollection object from dictionary.

        :type data: dict
        :param data: properly formatted dictionary to load SecretsCollection from.

        :rtype: SecretsCollection
        :raises: IOError
        """
        result = SecretsCollection()

        if not all(key in data for key in (
                'plugins_used',
                'results',
        )):
            raise IOError

        # In v0.12.0 `exclude_regex` got replaced by `exclude`
        if not any(key in data for key in (
                'exclude',
                'exclude_regex',
        )):
            raise IOError

        if 'exclude_regex' in data:
            result.exclude_files = data['exclude_regex']
        else:
            result.exclude_files = data['exclude']['files']
            result.exclude_lines = data['exclude']['lines']

        # In v0.12.7 the `--word-list` option got added
        automaton = None
        if 'word_list' in data:
            result.word_list_file = data['word_list']['file']
            result.word_list_hash = data['word_list']['hash']

            if result.word_list_file:
                # Always ignore the existing `data['word_list']['hash']`
                # The difference will show whenever the word list changes
                automaton, result.word_list_hash = build_automaton(
                    result.word_list_file)

        # In v0.14.0 the `--custom-plugins` option got added
        result.custom_plugin_paths = tuple(data.get('custom_plugin_paths', ()))

        result.plugins = tuple(
            initialize.from_plugin_classname(
                plugin_classname=plugin.pop('name'),
                custom_plugin_paths=result.custom_plugin_paths,
                exclude_lines_regex=result.exclude_lines,
                automaton=automaton,
                should_verify_secrets=False,
                **plugin) for plugin in data['plugins_used'])

        for filename in data['results']:
            result.data[filename] = {}

            for item in data['results'][filename]:
                secret = PotentialSecret(
                    item['type'],
                    filename,
                    secret='will be replaced',
                    lineno=item['line_number'],
                    is_secret=item.get('is_secret'),
                )
                secret.secret_hash = item['hashed_secret']
                result.data[filename][secret] = secret

        result.version = (data['version'] if 'version' in data else '0.0.0')

        return result