def scan(self, exclude_files_regex=None, exclude_lines_regex=None, scan_head=False): """Fetches latest changes, and scans the git diff between last_commit_hash and HEAD. :raises: subprocess.CalledProcessError :type exclude_files_regex: str|None :param exclude_files_regex: A regex matching filenames to skip over. :type exclude_lines: str|None :param exclude_lines: A regex matching lines to skip over. :rtype: SecretsCollection :returns: secrets found. """ self.storage.fetch_new_changes() default_plugins = initialize_plugins.from_parser_builder( self.plugin_config, exclude_lines_regex=exclude_lines_regex, ) # TODO Issue 17: Ignoring self.exclude_regex, using the server scan CLI arg secrets = SecretsCollection( plugins=default_plugins, exclude_files=exclude_files_regex, exclude_lines=exclude_lines_regex, ) scan_from_this_commit = git.get_empty_tree_commit_hash() if scan_head else self.last_commit_hash try: diff_name_only = self.storage.get_diff_name_only(scan_from_this_commit) # do a per-file diff + scan so we don't get a OOM if the the commit-diff is too large for filename in diff_name_only: file_diff = self.storage.get_diff(scan_from_this_commit, filename) secrets.scan_diff( file_diff, baseline_filename=self.baseline_filename, last_commit_hash=scan_from_this_commit, repo_name=self.name, ) except subprocess.CalledProcessError: self.update() return secrets if self.baseline_filename: baseline = self.storage.get_baseline_file(self.baseline_filename) if baseline: baseline_collection = SecretsCollection.load_baseline_from_string(baseline) secrets = get_secrets_not_in_baseline(secrets, baseline_collection) return secrets
def scan(self, exclude_files_regex=None, exclude_lines_regex=None): """Fetches latest changes, and scans the git diff between last_commit_hash and HEAD. :raises: subprocess.CalledProcessError :type exclude_files_regex: str|None :param exclude_files_regex: A regex matching filenames to skip over. :type exclude_lines: str|None :param exclude_lines: A regex matching lines to skip over. :rtype: SecretsCollection :returns: secrets found. """ self.storage.fetch_new_changes() default_plugins = initialize_plugins.from_parser_builder( self.plugin_config, exclude_lines_regex=exclude_lines_regex, ) # TODO Issue 17: Ignoring self.exclude_regex, using the server scan CLI arg secrets = SecretsCollection( plugins=default_plugins, exclude_files=exclude_files_regex, exclude_lines=exclude_lines_regex, ) try: diff = self.storage.get_diff(self.last_commit_hash) except subprocess.CalledProcessError: self.update() return secrets secrets.scan_diff( diff, baseline_filename=self.baseline_filename, last_commit_hash=self.last_commit_hash, repo_name=self.name, ) if self.baseline_filename: baseline = self.storage.get_baseline_file(self.baseline_filename) if baseline: baseline_collection = SecretsCollection.load_baseline_from_string( baseline) secrets = get_secrets_not_in_baseline(secrets, baseline_collection) return secrets
def test_filename_filters_are_invoked_first(): get_settings().configure_filters([ { 'path': 'detect_secrets.filters.regex.should_exclude_file', 'pattern': [ 'test|baseline', ], }, ]) secrets = SecretsCollection() with open('test_data/sample.diff') as f: secrets.scan_diff(f.read()) assert len(secrets.files) == 0
def test_success(): with transient_settings({ 'plugins_used': [ { 'name': 'HexHighEntropyString', 'limit': 3, }, ], 'filters_used': [], }): secrets = SecretsCollection() with open('test_data/sample.diff') as f: secrets.scan_diff(f.read()) assert secrets.files == { 'detect_secrets/core/baseline.py', 'tests/core/secrets_collection_test.py', '.secrets.baseline', }
def scan(self): """Clones the repo, and scans the git diff between last_commit_hash and HEAD. :raises: subprocess.CalledProcessError """ self.clone_and_pull_repo() diff = self._get_latest_changes() baseline = self._get_baseline() default_plugins = initialize(self.plugin_config) secrets = SecretsCollection(default_plugins, self.exclude_regex) secrets.scan_diff(diff, baseline_filename=baseline, last_commit_hash=self.last_commit_hash, repo_name=self.name) if baseline: baseline_collection = SecretsCollection.load_baseline_from_string( baseline) secrets = get_secrets_not_in_baseline(secrets, baseline_collection) return secrets