def test_merge(): old_secrets = SecretsCollection() old_secrets.scan_file('test_data/each_secret.py') assert len(list(old_secrets)) >= 3 # otherwise, this test won't work. index = 0 for _, secret in old_secrets: if index == 0: secret.is_secret = False elif index == 1: secret.is_secret = True elif index == 2: secret.is_verified = True index += 1 new_secrets = SecretsCollection() new_secrets.scan_file('test_data/each_secret.py') list(new_secrets)[-1][1].is_secret = True new_secrets.merge(old_secrets) index = 0 for _, secret in new_secrets: if index == 0: assert secret.is_secret is False assert secret.is_verified is False elif index == 1: assert secret.is_secret is True assert secret.is_verified is False elif index == 2: assert secret.is_secret is True assert secret.is_verified is True index += 1
def modified_baseline(self): secrets = SecretsCollection() secrets.scan_file(self.FILENAME) for _, secret in secrets: secret.line_number += 1 yield secrets
def test_baseline_filters_out_known_secrets(): secrets = SecretsCollection() secrets.scan_file('test_data/each_secret.py') with tempfile.NamedTemporaryFile() as f: baseline.save_to_file(secrets, f.name) f.seek(0) # This succeeds, because all the secrets are known. assert_commit_succeeds([ 'test_data/each_secret.py', '--baseline', f.name, ]) # Remove one arbitrary secret, so that it won't be the full set. secrets.data['test_data/each_secret.py'].pop() with tempfile.NamedTemporaryFile() as f: baseline.save_to_file(secrets, f.name) f.seek(0) # Test that it isn't the case that a baseline is provided, and everything passes. # import pdb; pdb.set_trace() assert_commit_blocked([ 'test_data/each_secret.py', '--baseline', f.name, ])
def initialize(plugins, exclude_regex=None, rootdir='.'): """Scans the entire codebase for high entropy strings, and returns a SecretsCollection object. :type plugins: tuple of detect_secrets.plugins.base.BasePlugin :param plugins: rules to initialize the SecretsCollection with. :type exclude_regex: str|None :type rootdir: str :rtype: SecretsCollection """ output = SecretsCollection(plugins, exclude_regex) if os.path.isfile(rootdir): # This option allows for much easier adhoc usage. git_files = [rootdir] else: git_files = _get_git_tracked_files(rootdir) if not git_files: return output if exclude_regex: regex = re.compile(exclude_regex, re.IGNORECASE) git_files = filter( lambda x: not regex.search(x), git_files, ) for file in git_files: output.scan_file(file) return output
def test_disable_filter(parser): with tempfile.NamedTemporaryFile() as f: f.write(f'secret = "{uuid.uuid4()}"'.encode()) # First, make sure that we actually catch it. f.seek(0) with transient_settings({ 'plugins_used': [{ 'name': 'KeywordDetector', }], }): secrets = SecretsCollection() secrets.scan_file(f.name) assert not secrets f.seek(0) with default_settings(): parser.parse_args([ 'scan', '--disable-filter', 'detect_secrets.filters.heuristic.is_potential_uuid', # invalid filter '--disable-filter', 'blah', ]) secrets = SecretsCollection() secrets.scan_file(f.name) assert secrets
def test_basic(configure_plugins): with transient_settings({**configure_plugins, 'filters_used': []}): secrets = SecretsCollection() secrets.scan_file('test_data/each_secret.py') # This baseline will have less secrets, since it filtered out some. with transient_settings({ **configure_plugins, 'filters_used': [ { 'path': 'detect_secrets.filters.regex.should_exclude_line', 'pattern': [ 'EXAMPLE', ], }, ], }): baseline = SecretsCollection() baseline.scan_file('test_data/each_secret.py') # This tests the != operator for same file, different number of secrets. # It's hidden in a different test, but I didn't want to set up the boilerplate # again. assert secrets != baseline result = secrets - baseline assert len(result['test_data/each_secret.py']) == 2 assert len(secrets['test_data/each_secret.py']) == 4
def test_ensure_file_transformers_are_used(printer): """ In this tests, we construct a situation where detect-secrets scan leverages special file transformers in order to find a secret, that wouldn't otherwise be found with normal line-by-line reading. In doing so, if audit is able to find this secret, it can be inferred that it too knows how to use file transformers. """ with transient_settings({ 'plugins_used': [ { 'name': 'Base64HighEntropyString' }, ], }): secrets = SecretsCollection() secrets.scan_file('test_data/config.env') assert bool(secrets) with open('test_data/config.env') as f: lines = [line.rstrip() for line in f.readlines()] with mock.patch('detect_secrets.audit.io.print_secret_not_found') as m: run_logic(secrets, 'y') assert not m.called line_number = list(secrets['test_data/config.env'])[0].line_number assert lines[line_number - 1] in printer.message
def test_mismatch_files(): secretsA = SecretsCollection() secretsA.scan_file('test_data/each_secret.py') secretsB = SecretsCollection() secretsB.scan_file('test_data/files/file_with_secrets.py') assert secretsA != secretsB
def test_basic(file_content): with tempfile.NamedTemporaryFile() as f: f.write(file_content.encode()) f.seek(0) secrets = SecretsCollection() secrets.scan_file(f.name) assert len(list(secrets)) == 1
def test_no_overlapping_files(configure_plugins): secrets_a = SecretsCollection() secrets_b = SecretsCollection() with transient_settings({**configure_plugins, 'filters_used': []}): secrets_a.scan_file('test_data/each_secret.py') secrets_b.scan_file('test_data/config.env') assert (secrets_a - secrets_b).files == {'test_data/each_secret.py'} assert (secrets_b - secrets_a).files == {'test_data/config.env'}
def test_bool(): secrets = SecretsCollection() assert not secrets secrets.scan_file('test_data/each_secret.py') assert secrets secrets['test_data/each_secret.py'].clear() assert not secrets
def initialize( path, plugins, exclude_files_regex=None, exclude_lines_regex=None, should_scan_all_files=False, ): """Scans the entire codebase for secrets, and returns a SecretsCollection object. :type plugins: tuple of detect_secrets.plugins.base.BasePlugin :param plugins: rules to initialize the SecretsCollection with. :type exclude_files_regex: str|None :type exclude_lines_regex: str|None :type path: list :type should_scan_all_files: bool :rtype: SecretsCollection """ output = SecretsCollection( plugins, exclude_files=exclude_files_regex, exclude_lines=exclude_lines_regex, ) files_to_scan = [] for element in path: if os.path.isdir(element): if should_scan_all_files: files_to_scan.extend(_get_files_recursively(element)) else: files = _get_git_tracked_files(element) if files: files_to_scan.extend(files) elif os.path.isfile(element): files_to_scan.append(element) else: log.error('detect-secrets: ' + element + ': No such file or directory') if not files_to_scan: return output if exclude_files_regex: exclude_files_regex = re.compile(exclude_files_regex, re.IGNORECASE) files_to_scan = filter( lambda file: ( not exclude_files_regex.search(file) ), files_to_scan, ) for file in files_to_scan: output.scan_file(file) return output
def test_remove_non_existent_files(): secrets = SecretsCollection() secrets.scan_file('test_data/each_secret.py') assert bool(secrets) secrets.data['does-not-exist'] = secrets.data.pop( 'test_data/each_secret.py') secrets.trim() assert not bool(secrets)
def test_deleted_secret_file(): secrets = SecretsCollection() secrets.scan_file('test_data/each_secret.py') secrets.trim(SecretsCollection()) assert secrets secrets.trim(SecretsCollection(), filelist=['test_data/each_secret.py']) assert not secrets
def find_secrets_in_files(args, plugins): collection = SecretsCollection(plugins) for filename in args.filenames: # Don't scan the baseline file if filename == args.baseline[0]: continue collection.scan_file(filename) return collection
def find_secrets_in_files(args): plugins = initialize.from_parser_builder(args.plugins) collection = SecretsCollection(plugins) for filename in args.filenames: if filename == args.baseline[0]: # Obviously, don't detect the baseline file continue collection.scan_file(filename) return collection
def test_deleted_secret(): secrets = SecretsCollection() secrets.scan_file('test_data/each_secret.py') results = SecretsCollection.load_from_baseline( {'results': secrets.json()}) results.data['test_data/each_secret.py'].pop() original_size = len(secrets['test_data/each_secret.py']) secrets.trim(results) assert len(secrets['test_data/each_secret.py']) < original_size
def main(argv: Optional[List[str]] = None) -> int: try: args = parse_args(argv) except ValueError: return 1 if args.verbose: # pragma: no cover log.set_debug_level(args.verbose) # Find all secrets in files to be committed secrets = SecretsCollection() for filename in args.filenames: secrets.scan_file(filename) new_secrets = secrets if args.baseline: new_secrets = secrets - args.baseline if new_secrets: pretty_print_diagnostics(new_secrets) return 1 if not args.baseline: return 0 # Only attempt baseline modifications if we don't find any new secrets. is_modified = should_update_baseline( args.baseline, scanned_results=secrets, filelist=args.filenames, baseline_version=args.baseline_version, ) if is_modified: if args.baseline_version != VERSION: with open(args.baseline_filename) as f: old_baseline = json.loads(f.read()) # Override the results, because this has been updated in `should_update_baseline`. old_baseline['results'] = args.baseline.json() args.baseline = baseline.upgrade(old_baseline) baseline.save_to_file(args.baseline, filename=args.baseline_filename) print( 'The baseline file was updated.\n' 'Probably to keep line numbers of secrets up-to-date.\n' 'Please `git add {}`, thank you.\n\n'.format( args.baseline_filename), ) return 3 return 0
def get_baseline_file(self, formatter=baseline.format_for_output): secrets = SecretsCollection() secrets.scan_file(self.FILENAME) with tempfile.NamedTemporaryFile() as f: with mock.patch('detect_secrets.core.baseline.VERSION', '0.0.1'): data = formatter(secrets) # Simulating old version data['plugins_used'][0]['base64_limit'] = data['plugins_used'][0].pop('limit') baseline.save_to_file(data, f.name) yield f
def test_maintains_labels(): labelled_secrets = SecretsCollection() labelled_secrets.scan_file('test_data/each_secret.py') for _, secret in labelled_secrets: secret.is_secret = True break secrets = SecretsCollection() secrets.scan_file('test_data/each_secret.py') labelled_secrets.trim(scanned_results=secrets) assert any([secret.is_secret for _, secret in labelled_secrets])
def test_file_based_success_yaml(): get_settings().configure_plugins([ { 'name': 'HexHighEntropyString', 'limit': 3.0, }, ]) secrets = SecretsCollection() secrets.scan_file('test_data/config.yaml') assert [str(secret).splitlines()[1] for _, secret in secrets] == [ 'Location: test_data/config.yaml:3', 'Location: test_data/config.yaml:5', ]
def test_local_file_success(scheme, parser): secrets = SecretsCollection() with transient_settings({ 'plugins_used': [{ 'name': 'Base64HighEntropyString', }], }): parser.parse_args([ 'scan', '--filter', scheme + 'testing/custom_filters.py::is_invalid_secret', ]) secrets.scan_file('test_data/config.env') assert not secrets
def initialize( plugins, exclude_files_regex=None, exclude_lines_regex=None, path='.', scan_all_files=False, ): """Scans the entire codebase for secrets, and returns a SecretsCollection object. :type plugins: tuple of detect_secrets.plugins.base.BasePlugin :param plugins: rules to initialize the SecretsCollection with. :type exclude_files_regex: str|None :type exclude_lines_regex: str|None :type path: str :type scan_all_files: bool :rtype: SecretsCollection """ output = SecretsCollection( plugins, exclude_files=exclude_files_regex, exclude_lines=exclude_lines_regex, ) if os.path.isfile(path): # This option allows for much easier adhoc usage. files_to_scan = [path] elif scan_all_files: files_to_scan = _get_files_recursively(path) else: files_to_scan = _get_git_tracked_files(path) if not files_to_scan: return output if exclude_files_regex: exclude_files_regex = re.compile(exclude_files_regex, re.IGNORECASE) files_to_scan = filter( lambda file: (not exclude_files_regex.search(file)), files_to_scan, ) for file in files_to_scan: output.scan_file(file) return output
def test_file_based_success_config(): get_settings().configure_plugins([ { 'name': 'Base64HighEntropyString', 'limit': 3.0, }, ]) secrets = SecretsCollection() secrets.scan_file('test_data/config.ini') assert [str(secret).splitlines()[1] for _, secret in secrets] == [ 'Location: test_data/config.ini:2', 'Location: test_data/config.ini:10', 'Location: test_data/config.ini:21', 'Location: test_data/config.ini:22', 'Location: test_data/config.ini:32', ]
def test_nothing_to_audit(printer): with transient_settings({ 'plugins_used': [ { 'name': 'BasicAuthDetector' }, ], }): secrets = SecretsCollection() secrets.scan_file('test_data/each_secret.py') for _, secret in secrets: secret.is_secret = random.choice([True, False]) run_logic(secrets) assert 'Nothing to audit' in printer.message assert 'Saving progress' not in printer.message
def test_fails_if_no_line_numbers_found(printer): with transient_settings({ 'plugins_used': [ { 'name': 'Base64HighEntropyString' }, ], }): secrets = SecretsCollection() secrets.scan_file('test_data/config.env') # Remove line numbers secrets = baseline.load( baseline.format_for_output(secrets, is_slim_mode=True)) with mock.patch('detect_secrets.audit.io.clear_screen') as m: run_logic(secrets) assert not m.called assert 'No line numbers found in baseline' in printer.message
def execute(self, finder): issues = [] plugins = [ plugin for plugin in PLUGINS if plugin_code(plugin) not in self.config['disabled'] ] detector = SecretsCollection(plugins) for filepath in finder.files(self.config['filters']): try: detector.scan_file(filepath) except Exception as exc: # pylint: disable=broad-except issues.append(self.make_issue(exc, filepath)) for filepath, problems in iteritems(detector.data): for problem in problems: issues.append(self.make_issue(problem, filepath)) return issues
def baseline_file(): # Create our own SecretsCollection manually, so that we have fine-tuned control. first_content = textwrap.dedent(f""" url = {url_format.format(first_secret)} example = {url_format.format(random_secret)} link = {url_format.format(first_secret)} """)[1:] second_content = textwrap.dedent(f""" url = {url_format.format(second_secret)} example = {url_format.format(random_secret)} """)[1:] with create_file_with_content(first_content) as first_file, \ create_file_with_content(second_content) as second_file, \ tempfile.NamedTemporaryFile() as baseline_file, \ transient_settings({ 'plugins_used': [ {'name': 'BasicAuthDetector'}, {'name': 'JwtTokenDetector'}, ], }): secrets = SecretsCollection() secrets.scan_file(first_file) secrets.scan_file(second_file) labels = { (first_file, BasicAuthDetector.secret_type, 1): True, (first_file, BasicAuthDetector.secret_type, 2): None, (first_file, BasicAuthDetector.secret_type, 3): True, (second_file, JwtTokenDetector.secret_type, 1): True, (second_file, BasicAuthDetector.secret_type, 1): False, (second_file, BasicAuthDetector.secret_type, 2): False, } for item in secrets: _, secret = item secret.is_secret = labels[(secret.filename, secret.type, secret.line_number)] baseline.save_to_file(secrets, baseline_file.name) baseline_file.seek(0) yield baseline_file.name
def test_line_based_success(): # Explicitly configure filters, so that additions to filters won't affect this test. get_settings().configure_filters([ # This will remove the `id` string { 'path': 'detect_secrets.filters.heuristic.is_likely_id_string' }, # This gets rid of the aws keys with `EXAMPLE` in them. { 'path': 'detect_secrets.filters.regex.should_exclude_line', 'pattern': [ 'EXAMPLE', ], }, ]) secrets = SecretsCollection() secrets.scan_file('test_data/each_secret.py') secret = next(iter(secrets['test_data/each_secret.py'])) assert secret.secret_value.startswith('c2VjcmV0IG1lc') assert len(secrets['test_data/each_secret.py']) == 1
def initialize( path, plugins, exclude_files_regex=None, exclude_lines_regex=None, word_list_file=None, word_list_hash=None, should_scan_all_files=False, output_raw=False, output_verified_false=False, ): """Scans the entire codebase for secrets, and returns a SecretsCollection object. :type path: list :type plugins: tuple of detect_secrets.plugins.base.BasePlugin :param plugins: rules to initialize the SecretsCollection with. :type exclude_files_regex: str|None :type exclude_lines_regex: str|None :type word_list_file: str|None :param word_list_file: optional word list file for ignoring certain words. :type word_list_hash: str|None :param word_list_hash: optional iterated sha1 hash of the words in the word list. :type should_scan_all_files: bool :type output_raw: bool :type output_verified_false: bool :rtype: SecretsCollection """ output = SecretsCollection( plugins, exclude_files=exclude_files_regex, exclude_lines=exclude_lines_regex, word_list_file=word_list_file, word_list_hash=word_list_hash, output_raw=output_raw, output_verified_false=output_verified_false, ) files_to_scan = [] for element in path: if os.path.isdir(element): if should_scan_all_files: files_to_scan.extend(_get_files_recursively(element), ) else: files_to_scan.extend(_get_git_tracked_files(element), ) elif os.path.isfile(element): files_to_scan.append(element) else: log.error('detect-secrets: %s: No such file or directory', element) if not files_to_scan: return output if exclude_files_regex: exclude_files_regex = re.compile(exclude_files_regex, re.IGNORECASE) files_to_scan = filter( lambda file: (not exclude_files_regex.search(file)), files_to_scan, ) for file in sorted(files_to_scan): output.scan_file(file) return output