def test_scan_with_baseline(self, mock_subprocess_obj, mock_load_from_string, mock_apply): repo = mock_tracked_repo() # Setup secrets secretA = PotentialSecret('type', 'filenameA', 1, 'blah') secretB = PotentialSecret('type', 'filenameA', 2, 'curry') original_secrets = SecretsCollection() original_secrets.data['filenameA'] = { secretA: secretA, secretB: secretB, } baseline_secrets = SecretsCollection() baseline_secrets.data['filenameA'] = { secretA: secretA, } # Easier than mocking load_from_diff. mock_apply.side_effect = lambda orig, base: \ get_secrets_not_in_baseline(original_secrets, baseline_secrets) mock_subprocess_obj.side_effect = mock_subprocess((SubprocessMock( expected_input='git show', mocked_output=b'will be mocked', ), )) secrets = repo.scan() assert len(secrets.data) == 1 assert secrets.data['filenameA'][secretB] == secretB
def main(argv=None): args = parse_args(argv) if args.verbose: # pragma: no cover log.set_debug_level(args.verbose) try: # If baseline is provided, we first want to make sure # it's valid, before doing any further computation. baseline_collection = get_baseline(args.baseline[0]) except (IOError, ValueError): # Error logs handled within logic. return 1 plugins = initialize.from_parser_builder(args.plugins) results = find_secrets_in_files(args, plugins) if baseline_collection: original_results = results results = get_secrets_not_in_baseline( results, baseline_collection, ) if len(results.data) > 0: pretty_print_diagnostics(results) return 1 if not baseline_collection: return 0 # Only attempt baseline modifications if we don't find any new secrets baseline_modified = trim_baseline_of_removed_secrets( original_results, baseline_collection, args.filenames, ) if VERSION != baseline_collection.version: baseline_collection.plugins = plugins baseline_collection.version = VERSION baseline_modified = True if baseline_modified: write_baseline_to_file( filename=args.baseline[0], data=baseline_collection.format_for_baseline_output(), ) log.error( 'The baseline file was updated.\n' 'Probably to keep line numbers of secrets up-to-date.\n' 'Please `git add {}`, thank you.\n\n'.format(args.baseline[0]), ) return 1 return 0
def scan(self, exclude_files_regex=None, exclude_lines_regex=None, scan_head=False): """Fetches latest changes, and scans the git diff between last_commit_hash and HEAD. :raises: subprocess.CalledProcessError :type exclude_files_regex: str|None :param exclude_files_regex: A regex matching filenames to skip over. :type exclude_lines: str|None :param exclude_lines: A regex matching lines to skip over. :rtype: SecretsCollection :returns: secrets found. """ self.storage.fetch_new_changes() default_plugins = initialize_plugins.from_parser_builder( self.plugin_config, exclude_lines_regex=exclude_lines_regex, ) # TODO Issue 17: Ignoring self.exclude_regex, using the server scan CLI arg secrets = SecretsCollection( plugins=default_plugins, exclude_files=exclude_files_regex, exclude_lines=exclude_lines_regex, ) scan_from_this_commit = git.get_empty_tree_commit_hash() if scan_head else self.last_commit_hash try: diff_name_only = self.storage.get_diff_name_only(scan_from_this_commit) # do a per-file diff + scan so we don't get a OOM if the the commit-diff is too large for filename in diff_name_only: file_diff = self.storage.get_diff(scan_from_this_commit, filename) secrets.scan_diff( file_diff, baseline_filename=self.baseline_filename, last_commit_hash=scan_from_this_commit, repo_name=self.name, ) except subprocess.CalledProcessError: self.update() return secrets if self.baseline_filename: baseline = self.storage.get_baseline_file(self.baseline_filename) if baseline: baseline_collection = SecretsCollection.load_baseline_from_string(baseline) secrets = get_secrets_not_in_baseline(secrets, baseline_collection) return secrets
def test_nothing_new(self): # We want a secret, but just a default secret (no overriding parameters) new_findings = secrets_collection_factory([{}]) baseline = secrets_collection_factory([{}]) results = get_secrets_not_in_baseline(new_findings, baseline) # No expected results, because everything filtered out by baseline assert len(results.data) == 0 # Make sure that baseline didn't get modified either assert len(baseline.data) == 1 assert next(iter(baseline.data['filename'])).lineno == 1
def test_nothing_new(self): # We want a secret, but just a default secret (no overriding parameters) new_findings = secrets_collection_factory([{}]) baseline = secrets_collection_factory([{}]) results = get_secrets_not_in_baseline(new_findings, baseline) # No expected results, because everything filtered out by baseline assert len(results.data) == 0 # Make sure that baseline didn't get modified either assert len(baseline.data) == 1 assert next(iter(baseline.data['filename'])).lineno == 1
def test_new_file(self): new_findings = secrets_collection_factory([{ 'filename': 'filename1', }]) baseline = secrets_collection_factory([{ 'filename': 'filename2', }]) backup_baseline = baseline.data.copy() results = get_secrets_not_in_baseline(new_findings, baseline) assert len(results.data) == 1 assert 'filename1' in results.data assert baseline.data == backup_baseline
def scan(self, exclude_files_regex=None, exclude_lines_regex=None): """Fetches latest changes, and scans the git diff between last_commit_hash and HEAD. :raises: subprocess.CalledProcessError :type exclude_files_regex: str|None :param exclude_files_regex: A regex matching filenames to skip over. :type exclude_lines: str|None :param exclude_lines: A regex matching lines to skip over. :rtype: SecretsCollection :returns: secrets found. """ self.storage.fetch_new_changes() default_plugins = initialize_plugins.from_parser_builder( self.plugin_config, exclude_lines_regex=exclude_lines_regex, ) # TODO Issue 17: Ignoring self.exclude_regex, using the server scan CLI arg secrets = SecretsCollection( plugins=default_plugins, exclude_files=exclude_files_regex, exclude_lines=exclude_lines_regex, ) try: diff = self.storage.get_diff(self.last_commit_hash) except subprocess.CalledProcessError: self.update() return secrets secrets.scan_diff( diff, baseline_filename=self.baseline_filename, last_commit_hash=self.last_commit_hash, repo_name=self.name, ) if self.baseline_filename: baseline = self.storage.get_baseline_file(self.baseline_filename) if baseline: baseline_collection = SecretsCollection.load_baseline_from_string( baseline) secrets = get_secrets_not_in_baseline(secrets, baseline_collection) return secrets
def main(argv=None): args = parse_args(argv) if args.verbose: # pragma: no cover CustomLog.enableDebug(args.verbose) try: # If baseline is provided, we first want to make sure # it's valid, before doing any further computation. baseline_collection = get_baseline(args.baseline[0]) except (IOError, ValueError): # Error logs handled in load_baseline_from_file logic. return 1 results = find_secrets_in_files(args) if baseline_collection: original_results = results results = get_secrets_not_in_baseline( results, baseline_collection, ) if len(results.data) > 0: pretty_print_diagnostics(results) return 1 if not baseline_collection: return 0 # Only attempt baseline modifications if we don't find any new secrets successful_update = update_baseline_with_removed_secrets( original_results, baseline_collection, args.filenames, ) if successful_update: with open(args.baseline[0], 'w') as f: f.write( json.dumps( baseline_collection.format_for_baseline_output(), indent=2, ) ) # The pre-commit framework should automatically detect a file change # and print a relevant error message. return 1 return 0
def main(argv=None): args = parse_args(argv) if args.verbose: # pragma: no cover log.set_debug_level(args.verbose) try: # If baseline is provided, we first want to make sure # it's valid, before doing any further computation. baseline_collection = get_baseline(args.baseline[0]) except (IOError, ValueError): # Error logs handled within logic. return 1 results = find_secrets_in_files(args) if baseline_collection: original_results = results results = get_secrets_not_in_baseline( results, baseline_collection, ) if len(results.data) > 0: pretty_print_diagnostics(results) return 1 if not baseline_collection: return 0 # Only attempt baseline modifications if we don't find any new secrets successful_update = update_baseline_with_removed_secrets( original_results, baseline_collection, args.filenames, ) if successful_update: _write_to_baseline_file( args.baseline[0], baseline_collection.format_for_baseline_output(), ) # The pre-commit framework should automatically detect a file change # and print a relevant error message. return 1 return 0
def test_rolled_creds(self): """Same line, different secret""" new_findings = secrets_collection_factory([{ 'secret': 'secret_new', }]) baseline = secrets_collection_factory([{ 'secret': 'secret', }]) backup_baseline = baseline.data.copy() results = get_secrets_not_in_baseline(new_findings, baseline) assert len(results.data['filename']) == 1 secretA = PotentialSecret('type', 'filename', 1, 'secret_new') assert results.data['filename'][secretA].secret_hash == \ PotentialSecret.hash_secret('secret_new') assert baseline.data == backup_baseline
def test_new_file(self): new_findings = secrets_collection_factory([ { 'filename': 'filename1', }, ]) baseline = secrets_collection_factory([ { 'filename': 'filename2', }, ]) backup_baseline = baseline.data.copy() results = get_secrets_not_in_baseline(new_findings, baseline) assert len(results.data) == 1 assert 'filename1' in results.data assert baseline.data == backup_baseline
def test_new_secret_line_old_file(self): """Same file, new line with potential secret""" new_findings = secrets_collection_factory([{ 'secret': 'secret1', 'lineno': 1, }]) baseline = secrets_collection_factory([{ 'secret': 'secret2', 'lineno': 2, }]) backup_baseline = baseline.data.copy() results = get_secrets_not_in_baseline(new_findings, baseline) assert len(results.data['filename']) == 1 secretA = PotentialSecret('type', 'filename', 1, 'secret1') assert results.data['filename'][ secretA].secret_hash == PotentialSecret.hash_secret('secret1') assert baseline.data == backup_baseline
def test_rolled_creds(self): """Same line, different secret""" new_findings = secrets_collection_factory([ { 'secret': 'secret_new', }, ]) baseline = secrets_collection_factory([ { 'secret': 'secret', }, ]) backup_baseline = baseline.data.copy() results = get_secrets_not_in_baseline(new_findings, baseline) assert len(results.data['filename']) == 1 secretA = PotentialSecret('type', 'filename', 1, 'secret_new') assert results.data['filename'][secretA].secret_hash == \ PotentialSecret.hash_secret('secret_new') assert baseline.data == backup_baseline
def test_new_file_excluded(self): new_findings = secrets_collection_factory([ { 'filename': 'filename1', }, { 'filename': 'filename2', }, ]) baseline = secrets_collection_factory([ { 'filename': 'filename3', }, ]) backup_baseline = baseline.data.copy() baseline.exclude_files = 'filename1' results = get_secrets_not_in_baseline(new_findings, baseline) assert len(results.data) == 1 assert 'filename1' not in results.data assert baseline.data == backup_baseline
def test_new_secret_line_old_file(self): """Same file, new line with potential secret""" new_findings = secrets_collection_factory([ { 'secret': 'secret1', 'lineno': 1, }, ]) baseline = secrets_collection_factory([ { 'secret': 'secret2', 'lineno': 2, }, ]) backup_baseline = baseline.data.copy() results = get_secrets_not_in_baseline(new_findings, baseline) assert len(results.data['filename']) == 1 secretA = PotentialSecret('type', 'filename', 1, 'secret1') assert results.data['filename'][secretA].secret_hash == \ PotentialSecret.hash_secret('secret1') assert baseline.data == backup_baseline
def scan(self): """Clones the repo, and scans the git diff between last_commit_hash and HEAD. :raises: subprocess.CalledProcessError """ self.clone_and_pull_repo() diff = self._get_latest_changes() baseline = self._get_baseline() default_plugins = initialize(self.plugin_config) secrets = SecretsCollection(default_plugins, self.exclude_regex) secrets.scan_diff(diff, baseline_filename=baseline, last_commit_hash=self.last_commit_hash, repo_name=self.name) if baseline: baseline_collection = SecretsCollection.load_baseline_from_string( baseline) secrets = get_secrets_not_in_baseline(secrets, baseline_collection) return secrets
def _perform_scan(args, plugins, automaton, word_list_hash): """ :param args: output of `argparse.ArgumentParser.parse_args` :param plugins: tuple of initialized plugins :type automaton: ahocorasick.Automaton|None :param automaton: optional automaton for ignoring certain words. :type word_list_hash: str|None :param word_list_hash: optional iterated sha1 hash of the words in the word list. :rtype: dict """ old_baseline = _get_existing_baseline(args.old_baseline) if old_baseline: plugins = initialize.merge_plugins_from_baseline( _get_plugins_from_baseline(old_baseline), args, automaton=automaton, ) # Favors CLI arguments over existing baseline configuration if old_baseline: if not args.exclude_files: args.exclude_files = _get_exclude_files(old_baseline) if ( not args.exclude_lines and old_baseline.get('exclude') ): args.exclude_lines = old_baseline['exclude']['lines'] if ( not args.word_list_file and old_baseline.get('word_list') ): args.word_list_file = old_baseline['word_list']['file'] if ( not args.custom_plugin_paths and old_baseline.get('custom_plugin_paths') ): args.custom_plugin_paths = old_baseline['custom_plugin_paths'] # If we have knowledge of an existing baseline file, we should use # that knowledge and add it to our exclude_files regex. if args.old_baseline: _add_baseline_to_exclude_files(args) new_baseline = baseline.initialize( path=args.path, plugins=plugins, custom_plugin_paths=args.custom_plugin_paths, exclude_files_regex=args.exclude_files, exclude_lines_regex=args.exclude_lines, word_list_file=args.word_list_file, word_list_hash=word_list_hash, should_scan_all_files=args.all_files, ) if old_baseline: new_baseline = baseline.get_secrets_not_in_baseline( new_baseline, get_baseline(args.old_baseline[0]) ) return new_baseline.format_for_baseline_output()
def main(argv=None): version_check() args = parse_args(argv) if args.verbose: # pragma: no cover log.set_debug_level(args.verbose) try: # If baseline is provided, we first want to make sure # it's valid, before doing any further computation. baseline_collection = get_baseline( args.baseline[0], plugin_filenames=args.plugin_filenames, ) except (IOError, TypeError, ValueError): # Error logs handled within logic. return 1 automaton = None word_list_hash = None if args.word_list_file: automaton, word_list_hash = build_automaton(args.word_list_file) plugins = initialize.from_parser_builder( args.plugins, exclude_lines_regex=args.exclude_lines, automaton=automaton, should_verify_secrets=not args.no_verify, plugin_filenames=args.plugin_filenames, ) # Merge plugins from baseline if baseline_collection: plugins = initialize.merge_plugins_from_baseline( baseline_collection.plugins, args, automaton, ) baseline_collection.plugins = plugins results_collection = find_secrets_in_files(args, plugins) if baseline_collection: original_results_collection = results_collection results_collection = get_secrets_not_in_baseline( results_collection, baseline_collection, ) if len(results_collection.data) > 0: pretty_print_diagnostics_for_new_secrets(results_collection) return 1 # if no baseline been supplied if not baseline_collection: return 0 # Only attempt baseline modifications if we don't find any new secrets baseline_modified = trim_baseline_of_removed_secrets( original_results_collection, baseline_collection, args.filenames, ) if VERSION != baseline_collection.version: baseline_collection.version = VERSION baseline_modified = True if baseline_modified: write_baseline_to_file( filename=args.baseline[0], data=baseline_collection.format_for_baseline_output(), ) log.error( 'The baseline file was updated.\n' 'Probably to keep line numbers of secrets up-to-date.\n' 'Please `git add {}`, thank you.\n\n'.format(args.baseline[0]), ) return 3 # check if there are verified but haven't been audited secrets verified_non_audited = get_verified_non_audited_secrets_from_baseline( baseline_collection, ) if len(verified_non_audited.data) > 0: pretty_print_diagnostics_for_verified_non_audited(verified_non_audited) return 2 # check if there are non-audited secrets if args.fail_on_non_audited: non_audited = get_non_audited_secrets_from_baseline( baseline_collection, ) if len(non_audited.data) > 0: pretty_print_diagnostics_for_non_audited(non_audited) return 4 return 0
def main(argv=sys.argv[1:]): args = parse_args(argv) if args.verbose: # pragma: no cover log.set_debug_level(args.verbose) try: # If baseline is provided, we first want to make sure # it's valid, before doing any further computation. baseline_collection = get_baseline(args.baseline[0]) except (IOError, TypeError, ValueError): # Error logs handled within logic. return 1 automaton = None word_list_hash = None if args.word_list_file: automaton, word_list_hash = build_automaton(args.word_list_file) plugins = initialize.from_parser_builder( plugins_dict=args.plugins, custom_plugin_paths=args.custom_plugin_paths, exclude_lines_regex=args.exclude_lines, automaton=automaton, should_verify_secrets=not args.no_verify, ) # Merge plugins from baseline if baseline_collection: plugins = initialize.merge_plugins_from_baseline( baseline_plugins=baseline_collection.plugins, args=args, automaton=automaton, ) baseline_collection.plugins = plugins results = find_secrets_in_files(args, plugins) if baseline_collection: original_results = results results = get_secrets_not_in_baseline( results, baseline_collection, ) if len(results.data) > 0: pretty_print_diagnostics(results) return 1 if not baseline_collection: return 0 # Only attempt baseline modifications if we don't find any new secrets baseline_modified = trim_baseline_of_removed_secrets( original_results, baseline_collection, args.filenames, ) if VERSION != baseline_collection.version: baseline_collection.version = VERSION baseline_modified = True # adding this line as we don't want the modification of baseline file. baseline_modified = False if baseline_modified: write_baseline_to_file( filename=args.baseline[0], data=baseline_collection.format_for_baseline_output(), ) log.error( 'The baseline file was updated.\n' 'Probably to keep line numbers of secrets up-to-date.\n' 'Please `git add {}`, thank you.\n\n'.format(args.baseline[0]), ) return 3 return 0