Exemplo n.º 1
0
def scan_repo(args):
    """Returns 0 on success"""
    try:
        repo = tracked_repo_factory(
            args.local,
            bool(getattr(args, 's3_config', None)),
        ).load_from_file(
            args.repo,
            args.root_dir,
            s3_config=getattr(args, 's3_config', None),
        )
    except FileNotFoundError:
        log.error('Unable to find repo: %s', args.repo)
        return 1

    secrets = repo.scan(
        exclude_files_regex=args.exclude_files,
        exclude_lines_regex=args.exclude_lines,
    )

    if (len(secrets.data) > 0) or args.always_run_output_hook:
        _alert_on_secrets_found(repo, secrets.json(), args.output_hook)

    if args.always_update_state or (
        len(secrets.data) == 0 and not args.dry_run
    ):
        _update_tracked_repo(repo)

    return 0
Exemplo n.º 2
0
def _git(directory, *args, **kwargs):
    try:
        output = subprocess.check_output([
            'git',
            '--git-dir',
            directory,
        ] + list(args),
                                         stderr=subprocess.STDOUT).decode(
                                             'utf-8', errors='ignore')

        # This is to fix https://github.com/matiasb/python-unidiff/issues/54
        if not kwargs.get('should_strip_output', True):
            return output
        return output.strip()
    except subprocess.CalledProcessError as e:
        error_message = e.output.decode('utf-8')

        # Catch this error, this happens during scanning and means it's an empty repo. This bails out
        # of the scan process and logs error.
        if re.match(r"fatal: couldn't find remote ref (None|HEAD)",
                    error_message):
            # directory is the best/only output without drastic rewrites, hashed path correlates to repo
            log.error("Empty repository cannot be scanned: %s", directory)
            sys.exit(1)
            # TODO: This won't work if scan loops through repos, but works since it's a single scan currently

        # Catch this error, this happens during initialization and means it's an empty repo. This allows
        # the repo metadata to be written to /tracked
        elif re.match(
                r"fatal: ambiguous argument 'HEAD': unknown revision or path not in the working tree.",
                error_message):
            return None
        else:
            raise
Exemplo n.º 3
0
    def scan_diff(
        self,
        diff,
        baseline_filename='',
        last_commit_hash='',
        repo_name='',
    ):
        """For optimization purposes, our scanning strategy focuses on looking
        at incremental differences, rather than re-scanning the codebase every time.
        This function supports this, and adds information to self.data.

        :type diff: str
        :param diff: diff string.
                     Eg. The output of `git diff <fileA> <fileB>`

        :type baseline_filename: str
        :param baseline_filename: if there are any baseline secrets, then the baseline
                                  file will have hashes in them. By specifying it, we
                                  can skip this clear exception.

        :type last_commit_hash: str
        :param last_commit_hash: used for logging only -- the last commit hash we saved

        :type repo_name: str
        :param repo_name: used for logging only -- the name of the repo
        """
        try:
            patch_set = PatchSet.from_string(diff)
        except UnidiffParseError:  # pragma: no cover
            alert = {
                'alert': 'UnidiffParseError',
                'hash': last_commit_hash,
                'repo_name': repo_name,
            }
            log.error(alert)
            raise

        if self.exclude_regex:
            regex = re.compile(self.exclude_regex, re.IGNORECASE)

        for patch_file in patch_set:
            filename = patch_file.path
            # If the file matches the exclude_regex, we skip it
            if self.exclude_regex and regex.search(filename):
                continue

            if filename == baseline_filename:
                continue

            for results, plugin in self._results_accumulator(filename):
                results.update(
                    self._extract_secrets_from_patch(
                        patch_file,
                        plugin,
                        filename,
                    ),
                )
    def scan_diff(
        self,
        diff,
        baseline_filename='',
        last_commit_hash='',
        repo_name='',
    ):
        """For optimization purposes, our scanning strategy focuses on looking
        at incremental differences, rather than re-scanning the codebase every time.
        This function supports this, and adds information to self.data.

        :type diff: str
        :param diff: diff string.
                     e.g. The output of `git diff <fileA> <fileB>`

        :type baseline_filename: str
        :param baseline_filename: if there are any baseline secrets, then the baseline
                                  file will have hashes in them. By specifying it, we
                                  can skip this clear exception.

        :type last_commit_hash: str
        :param last_commit_hash: used for logging only -- the last commit hash we saved

        :type repo_name: str
        :param repo_name: used for logging only -- the name of the repo
        """
        try:
            patch_set = PatchSet.from_string(diff)
        except UnidiffParseError:  # pragma: no cover
            alert = {
                'alert': 'UnidiffParseError',
                'hash': last_commit_hash,
                'repo_name': repo_name,
            }
            log.error(alert)
            raise

        if self.exclude_regex:
            regex = re.compile(self.exclude_regex, re.IGNORECASE)

        for patch_file in patch_set:
            filename = patch_file.path
            # If the file matches the exclude_regex, we skip it
            if self.exclude_regex and regex.search(filename):
                continue

            if filename == baseline_filename:
                continue

            for results, plugin in self._results_accumulator(filename):
                results.update(
                    self._extract_secrets_from_patch(
                        patch_file,
                        plugin,
                        filename,
                    ), )
Exemplo n.º 5
0
def open_config_file(config_file):
    try:
        with codecs.open(config_file) as f:
            data = yaml.safe_load(f)

    except IOError:
        log.error('Unable to open config file: %s', config_file)

        raise

    return data
Exemplo n.º 6
0
def _should_discard_tracked_repo_in_config(tracked_repo):
    try:
        if tracked_repo.get('is_local_repo', False):
            is_valid_file(tracked_repo['repo'])
        else:
            is_git_url(tracked_repo['repo'])

        return False
    except argparse.ArgumentTypeError as e:
        # We log the error, rather than hard failing, because we don't want
        # to hard fail if one out of many repositories are bad.
        log.error(str(e))
        return True
Exemplo n.º 7
0
    def load_baseline_from_string(cls, string):
        """Initializes a SecretsCollection object from string.

        :type string: str
        :param string: string to load SecretsCollection from.

        :rtype: SecretsCollection
        :raises: IOError
        """
        try:
            return cls._load_baseline_from_dict(json.loads(string))
        except (IOError, ValueError):
            log.error('Incorrectly formatted baseline!')
            raise
Exemplo n.º 8
0
def _get_baseline_string_from_file(filename):  # pragma: no cover
    """Breaking this function up for mockability."""
    try:
        with open(filename) as f:
            return f.read()

    except IOError:
        log.error(
            'Unable to open baseline file: {}\n'
            'Please create it via\n'
            '   `detect-secrets scan > {}`\n'
            .format(filename, filename),
        )
        raise
Exemplo n.º 9
0
    def load_baseline_from_string(cls, string):
        """Initializes a SecretsCollection object from string.

        :type string: str
        :param string: string to load SecretsCollection from.

        :rtype: SecretsCollection
        :raises: IOError
        """
        try:
            return cls.load_baseline_from_dict(json.loads(string))
        except (IOError, ValueError):
            log.error('Incorrectly formatted baseline!')
            raise
Exemplo n.º 10
0
def _alert_on_secrets_found(repo, secrets, output_hook):
    """
    :type repo: detect_secrets_server.repos.base_tracked_repo.BaseTrackedRepo

    :type secrets: dict
    :param secrets: output of
        detect_secrets.core.secrets_collection.SecretsCollection.json()

    :type output_hook: detect_secrets_server.hooks.base.BaseHook
    """
    log.error('Secrets found in %s', repo.name)

    _set_authors_for_found_secrets(repo, secrets)

    output_hook.alert(repo.name, secrets)
Exemplo n.º 11
0
def from_plugin_classname(plugin_classname,
                          custom_plugin_paths,
                          exclude_lines_regex=None,
                          automaton=None,
                          should_verify_secrets=False,
                          **kwargs):
    """Initializes a plugin class, given a classname and kwargs.

    :type plugin_classname: str
    :param plugin_classname: subclass of BasePlugin.

    :type custom_plugin_paths: Tuple[str]
    :param custom_plugin_paths: possibly empty tuple of paths that have custom plugins.

    :type exclude_lines_regex: str|None
    :param exclude_lines_regex: optional regex for ignored lines.

    :type automaton: ahocorasick.Automaton|None
    :param automaton: optional automaton for ignoring English-words.

    :type should_verify_secrets: bool
    """
    try:
        klass = import_plugins(custom_plugin_paths)[plugin_classname]
    except KeyError:
        log.error('Error: No such `{}` plugin to initialize.'.format(
            plugin_classname))
        log.error('Chances are you should run `pre-commit autoupdate`.')
        log.error(
            'This error can occur when using a baseline that was made by '
            'a newer detect-secrets version than the one running.', )
        log.error(
            'It can also occur if the baseline has custom plugin paths, '
            'but the `--custom-plugins` option was not passed.', )
        raise TypeError

    try:
        instance = klass(exclude_lines_regex=exclude_lines_regex,
                         automaton=automaton,
                         should_verify=should_verify_secrets,
                         **kwargs)
    except TypeError:
        log.error('Unable to initialize plugin!')
        raise

    return instance
Exemplo n.º 12
0
    def get_diff(self, from_sha):
        try:
            return git.get_diff(self._repo_location, from_sha)
        except subprocess.CalledProcessError:
            # This sometimes complains, if the hash does not exist.
            # There could be a variety of reasons for this, including:
            #    - some sort of rewrite of git history
            #    - this scanner being run on an out-of-date repo
            #
            # To prevent from any further alerting on this, we are going to
            # update the last_commit_hash, to prevent re-alerting on old
            # secrets.
            #
            # TODO: Fix this to be more robust.
            log.error(self._construct_debugging_output(from_sha), )

            raise
Exemplo n.º 13
0
    def load_baseline_from_string(cls, string, plugin_filenames=None):
        """Initializes a SecretsCollection object from string.

        :type string: str
        :param string: string to load SecretsCollection from.

        :type plugin_filenames: tuple
        :param plugin_filenames: list of plugins to import

        :rtype: SecretsCollection
        :raises: IOError
        """
        try:
            return cls.load_baseline_from_dict(
                json.loads(string),
                plugin_filenames=plugin_filenames,
            )
        except (IOError, ValueError):
            log.error('Incorrectly formatted baseline!')
            raise
Exemplo n.º 14
0
def scan_repo(args):
    """Returns 0 on success"""
    try:
        repo = tracked_repo_factory(
            args.local,
            bool(getattr(args, 's3_config', None)),
        ).load_from_file(
            args.repo,
            args.root_dir,
            s3_config=getattr(args, 's3_config', None),
        )
    except FileNotFoundError:
        log.error('Unable to find repo: %s', args.repo)
        return 1

    # if last_commit_hash is empty, re-clone and see if there's an initial commit hash
    if repo.last_commit_hash is None:
        _clone_and_save_repo(repo)

    secrets = repo.scan(
        exclude_files_regex=args.exclude_files,
        exclude_lines_regex=args.exclude_lines,
        scan_head=args.scan_head,
    )

    if (len(secrets.data) > 0) or args.always_run_output_hook:
        _alert_on_secrets_found(repo, secrets.json(), args.output_hook)

    if args.always_update_state or (
        (len(secrets.data) == 0)
        and
        (not args.dry_run)
        and
        (not args.scan_head)
    ):
        _update_tracked_repo(repo)

    return 0
Exemplo n.º 15
0
def from_plugin_classname(plugin_classname,
                          exclude_lines_regex=None,
                          automaton=None,
                          should_verify_secrets=False,
                          **kwargs):
    """Initializes a plugin class, given a classname and kwargs.

    :type plugin_classname: str
    :param plugin_classname: subclass of BasePlugin.

    :type exclude_lines_regex: str|None
    :param exclude_lines_regex: optional regex for ignored lines.

    :type automaton: ahocorasick.Automaton|None
    :param automaton: optional automaton for ignoring English-words.

    :type should_verify_secrets: bool
    """
    try:
        klass = import_plugins()[plugin_classname]
    except KeyError:
        log.error('Error: No such `{}` plugin to initialize.'.format(
            plugin_classname))
        log.error('Chances are you should run `pre-commit autoupdate`.')
        log.error(
            'This error occurs when using a baseline that was made by '
            'a newer detect-secrets version than the one running.', )
        raise TypeError

    try:
        instance = klass(exclude_lines_regex=exclude_lines_regex,
                         automaton=automaton,
                         should_verify=should_verify_secrets,
                         **kwargs)
    except TypeError:
        log.warning('Unable to initialize plugin!')
        raise

    return instance
Exemplo n.º 16
0
def initialize_repos_from_repo_yaml(repo_yaml,
                                    plugin_sensitivity,
                                    repo_config,
                                    s3_config=None):
    """For expected yaml file format, see `repos.yaml.sample`

    :type repo_yaml: string
    :param repo_yaml: filename of config file to read and parse

    :type plugin_sensitivity: SensitivityValues

    :type repo_config: RepoConfig

    :type s3_config: S3Config

    :return: list of TrackedRepos
    :raises: IOError
    """
    data = open_config_file(repo_yaml)

    output = []
    if data.get('tracked') is None:
        return output

    for entry in data['tracked']:
        sensitivity = plugin_sensitivity
        if entry.get('plugins'):
            # Merge plugin sensitivities
            plugin_dict = plugin_sensitivity._asdict()

            # Use SensitivityValues constructor to convert values
            entry_sensitivity = SensitivityValues(**entry['plugins'])
            plugin_dict.update(entry_sensitivity._asdict())

            sensitivity = SensitivityValues(**plugin_dict)

        entry['plugin_sensitivity'] = sensitivity

        config = repo_config
        if 'baseline_file' in entry:
            config = RepoConfig(
                base_tmp_dir=repo_config.base_tmp_dir,
                exclude_regex=repo_config.exclude_regex,
                baseline=entry['baseline_file'],
            )

        entry['repo_config'] = config

        if entry.get('s3_backed') and s3_config is None:
            log.error(
                ('Unable to load s3 config for %s. Make sure to specify '
                 '--s3-config-file for s3_backed repos!'),
                entry.get('repo'),
            )
            continue
        entry['s3_config'] = s3_config

        # After setting up all arguments, create respective object.
        repo = tracked_repo_factory(
            entry.get('is_local_repo', False),
            entry.get('s3_backed', False),
        )
        output.append(repo(**entry))

    return output
Exemplo n.º 17
0
def main(argv=None):
    """
    Expected Usage:
      1. Initialize TrackedRepos from config.yaml, and save to crontab.
      2. Each cron command will run and scan git diff from previous commit saved, to now.
      3. If something is found, alert.

    :return: shell error code
    """
    if len(sys.argv) == 1:  # pragma: no cover
        sys.argv.append('-h')

    args = parse_args(argv)
    if args.verbose:  # pragma: no cover
        log.set_debug_level(args.verbose)

    plugin_sensitivity = parse_sensitivity_values(args)
    repo_config = parse_repo_config(args)
    s3_config = parse_s3_config(args)

    if args.initialize:
        # initialize sets up the local file storage for tracking
        try:
            tracked_repos = initialize_repos_from_repo_yaml(
                args.initialize,
                plugin_sensitivity,
                repo_config,
                s3_config,
            )
        except IOError:
            # Error handled in initialize_repos_from_repo_yaml
            return 1

        cron_repos = [repo for repo in tracked_repos if repo.save()]
        if not cron_repos:
            return 0

        print('# detect-secrets scanner')
        for repo in cron_repos:
            print('{} {}'.format(
                repo.cron(),
                args.output_hook_command,
            ))

    elif args.add_repo:
        add_repo(
            args.add_repo[0],
            plugin_sensitivity,
            is_local_repo=args.local,
            s3_config=s3_config,
            repo_config=repo_config,
        )

    elif args.scan_repo:

        repo_name = args.scan_repo[0]
        repo = tracked_repo_factory(args.local, bool(s3_config)) \
            .load_from_file(repo_name, repo_config, s3_config)
        if not repo:
            return 1

        secrets = repo.scan()

        if not secrets:
            return 1

        if len(secrets.data) > 0:
            log.error('SCAN COMPLETE - We found secrets in: %s', repo.name)

            secrets = secrets.json()
            set_authors_for_found_secrets(secrets, repo)

            alert = {
                'alert': 'Secrets found',
                'repo_name': repo.name,
                'secrets': secrets,
            }
            log.error(alert)
            args.output_hook.alert(repo.name, secrets)
        else:
            log.info('SCAN COMPLETE - STATUS: clean for %s', repo.name)

            # Save records, since the latest scan indicates that the most recent commit is clean
            repo.update()
            repo.save(OverrideLevel.ALWAYS)

    return 0
Exemplo n.º 18
0
def main(argv=None):
    if len(sys.argv) == 1:  # pragma: no cover
        sys.argv.append('-h')

    log.error('here it is')

    args = parse_args(argv)
    if args.verbose:  # pragma: no cover
        log.set_debug_level(args.verbose)

    if args.action == 'scan':
        automaton = None
        word_list_hash = None
        if args.word_list_file:
            automaton, word_list_hash = build_automaton(args.word_list_file)

        # Plugins are *always* rescanned with fresh settings, because
        # we want to get the latest updates.
        plugins = initialize.from_parser_builder(
            args.plugins,
            exclude_lines_regex=args.exclude_lines,
            automaton=automaton,
            should_verify_secrets=not args.no_verify,
        )
        if args.string:
            line = args.string

            if isinstance(args.string, bool):
                line = sys.stdin.read().splitlines()[0]

            _scan_string(line, plugins)

        else:
            baseline_dict = _perform_scan(
                args,
                plugins,
                automaton,
                word_list_hash,
            )

            if args.import_filename:
                write_baseline_to_file(
                    filename=args.import_filename[0],
                    data=baseline_dict,
                )
            else:
                print(
                    baseline.format_baseline_for_output(
                        baseline_dict,
                    ),
                )

    elif args.action == 'audit':
        if not args.diff and not args.display_results:
            audit.audit_baseline(args.filename[0])
            return 0

        if args.display_results:
            audit.print_audit_results(args.filename[0])
            return 0

        if len(args.filename) != 2:
            print(
                'Must specify two files to compare!',
                file=sys.stderr,
            )
            return 1

        try:
            audit.compare_baselines(args.filename[0], args.filename[1])
        except audit.RedundantComparisonError:
            print(
                'No difference, because it\'s the same file!',
                file=sys.stderr,
            )

    return 0