Пример #1
0
    def test_return_correct_commit_hash(self):
        # Start at commit 202564cf776b402800a4aab8bb14fa4624888475, which 
        # is immediately followed by a secret inserting commit:
        # https://github.com/dxa4481/truffleHog/commit/d15627104d07846ac2914a976e8e347a663bbd9b
        since_commit = '202564cf776b402800a4aab8bb14fa4624888475'
        commit_w_secret = 'd15627104d07846ac2914a976e8e347a663bbd9b'
        cross_valdiating_commit_w_secret_comment = 'Oh no a secret file'

        json_result = ''
        if sys.version_info >= (3,):
            tmp_stdout = io.StringIO()
        else:
            tmp_stdout = io.BytesIO()
        bak_stdout = sys.stdout

        # Redirect STDOUT, run scan and re-establish STDOUT
        sys.stdout = tmp_stdout
        try:
            truffleHog.find_strings("https://github.com/dxa4481/truffleHog.git", 
                since_commit=since_commit, printJson=True, surpress_output=False)
        finally:
            sys.stdout = bak_stdout

        json_result_list = tmp_stdout.getvalue().split('\n')
        results = [json.loads(r) for r in json_result_list if bool(r.strip())]
        filtered_results = list(filter(lambda r: r['commitHash'] == commit_w_secret and r['branch'] == 'origin/master', results))

        self.assertEqual(1, len(filtered_results))
        self.assertEqual(commit_w_secret, filtered_results[0]['commitHash'])
        # Additionally, we cross-validate the commit comment matches the expected comment
        self.assertEqual(cross_valdiating_commit_w_secret_comment, filtered_results[0]['commit'].strip())
Пример #2
0
    def test_return_correct_commit_hash(self):
        # Start at commit d15627104d07846ac2914a976e8e347a663bbd9b, which
        # is immediately followed by a secret inserting commit:
        # https://github.com/dxa4481/truffleHog/commit/9ed54617547cfca783e0f81f8dc5c927e3d1e345
        since_commit = 'd15627104d07846ac2914a976e8e347a663bbd9b'  # nosec
        commit_w_secret = '9ed54617547cfca783e0f81f8dc5c927e3d1e345'  # nosec
        cross_validating_commit_w_secret_comment = 'OH no a secret'  # nosec

        if sys.version_info >= (3, ):
            tmp_stdout = io.StringIO()
        else:
            tmp_stdout = io.BytesIO()
        bak_stdout = sys.stdout

        # Redirect STDOUT, run scan and re-establish STDOUT
        sys.stdout = tmp_stdout
        try:
            truffleHog.find_strings(
                "https://github.com/dxa4481/truffleHog.git",
                since_commit=since_commit,
                print_json=True,
                surpress_output=False)
        finally:
            sys.stdout = bak_stdout

        json_result_list = tmp_stdout.getvalue().split('\n')
        results = [json.loads(r) for r in json_result_list if bool(r.strip())]
        filtered_results = list(
            filter(lambda r: r['commitHash'] == commit_w_secret, results))
        self.assertEqual(1, len(filtered_results))
        self.assertEqual(commit_w_secret, filtered_results[0]['commitHash'])
        # Additionally, we cross-validate the commit comment matches the expected comment
        self.assertEqual(cross_validating_commit_w_secret_comment,
                         filtered_results[0]['commit'].strip())
Пример #3
0
def truffle(git_url, do_regex, custom_regex):
    if do_regex:
        strings_found = truffleHog.find_strings(git_url=git_url,
                                                since_commit=None,
                                                max_depth=1000000,
                                                printJson=True,
                                                do_regex=True,
                                                do_entropy=True,
                                                surpress_output=True)
    if not do_regex:
        strings_found = truffleHog.find_strings(git_url=git_url,
                                                since_commit=None,
                                                max_depth=1000000,
                                                printJson=True,
                                                do_entropy=True,
                                                surpress_output=True)
    found_issues = strings_found['foundIssues']
    found = {}
    count = 0
    for issues in found_issues:
        with open(issues, 'r', encoding='utf-8') as issue:
            data = json.loads([line.rstrip() for line in issue][0],
                              strict=False)
            found['issue%s' % count] = data
        count += 1
    return found
Пример #4
0
def get_org_repos(orgname, page):
    response = requests.get(url='https://api.github.com/users/' + orgname + '/repos?page={}'.format(page))
    json = response.json()
    if not json:
        return None
    for item in json:
        if item['private'] == False:
            print('searching ' + item["html_url"])
            truffleHog.find_strings(item["html_url"], do_regex=True, do_entropy=False, max_depth=100000)
    get_org_repos(orgname, page + 1)
Пример #5
0
    def test_remove_only_temp_repos(self):
        # First, we'll clone the remote repo
        git_url = "https://github.com/dxa4481/truffleHog.git"
        project_path, created = truffleHog.clone_git_repo(git_url)
        self.assertTrue(re.search(r'^/tmp/', project_path))
        self.assertTrue(created)

        # Second, we'll use a local repo without cloning to find strigs
        truffleHog.find_strings('file://' + project_path)
        self.assertTrue(os.path.exists(project_path))
Пример #6
0
def get_org_repos(orgname, page):
    response = requests.get(url='https://api.github.com/users/' + orgname +
                            '/repos?page={}'.format(page))
    json = response.json()
    if not json:
        return None
    for item in json:
        if item['private'] == False:
            print('searching ' + item["html_url"])
            truffleHog.find_strings(item["html_url"],
                                    do_regex=True,
                                    do_entropy=False,
                                    max_depth=100000)
    get_org_repos(orgname, page + 1)
Пример #7
0
def getMatches(conf, path):
    #Run TruffleHog against github repo and store results in list
    matches = []
    ## lets build our options we're going to pass to truffleHog
    ## This should probably go in it's own function.
    kwargs = dict()
    if 'regexRules' in conf:
        if conf['regexRules'] and conf['regexRules'] is not None:
            customRegexes = conf['regexRules']
            for rule in customRegexes:
                customRegexes[rule] = re.compile(customRegexes[rule])
            kwargs = buildkwargs(git_url=path,
                                 custom_regexes=customRegexes,
                                 **conf['flags'])
        else:
            kwargs = buildkwargs(git_url=path, **conf['flags'])
    if 'sincecommit' in conf:
        if conf['sincecommit'] and conf['sincecommit'] is not None:
            sinceCommit = conf['sincecommit']
            kwargs = buildkwargs(since_commit=sinceCommit, **kwargs)

    printDebug(
        conf,
        "Calling TruffleHog with the following options\n {}".format(kwargs))
    output = TH.find_strings(**kwargs)
    printDebug(conf, "found issues: {}".format(len(output['foundIssues'])))
    for i in range(len(output['foundIssues'])):
        issueFile = output['foundIssues'][i]
        issue = json.load(open(issueFile))
        issue_json = issueJson(issue)
        if issue_json not in matches:
            matches.append(issue_json)
    printDebug(conf, "Found {} Matches".format(len(matches)))
    return matches
Пример #8
0
def get_org_repos(orgname, page):
    response = requests.get(url='https://api.github.com/users/' + orgname +
                            '/repos?page={}'.format(page))
    json = response.json()
    if not json:
        return None
    for item in json:

        if item['fork'] == False and reached:
            print('searching ' + item["html_url"])
            results = truffleHog.find_strings(item["html_url"],
                                              do_regex=True,
                                              custom_regexes=rules,
                                              do_entropy=False,
                                              max_depth=100000)
            for issue in results["foundIssues"]:
                d = loads(open(issue).read())
                d['github_url'] = "{}/blob/{}/{}".format(
                    item["html_url"], d['commitHash'], d['path'])
                d['github_commit_url'] = "{}/commit/{}".format(
                    item["html_url"], d['commitHash'])
                d['diff'] = d['diff'][0:200]
                d['printDiff'] = d['printDiff'][0:200]
                print(dumps(d, indent=4))
    get_org_repos(orgname, page + 1)
Пример #9
0
def search_bitbucket(count, git_urls, username, secret, args, json_repos, total_rep):

    git_url = git_urls[count]
    try:
        git_slug = git_url.split('/')[-1][:-4]
        git_auth_url = git_url.replace('https://', 'https://' + username + ':' + secret + '@')
        do_entropy = truffleHog.str2bool(args.do_entropy)

        project_path = clone_git_repo(git_auth_url, git_url, json_repos, count, args.not_clone)
        logger.info('Rep #{0}. Starting to verify Bitbucket repo #{1} from {2} {3}'.format(count,
                                                                                           count,
                                                                                           total_rep,
                                                                                           git_slug))
        # Search sensitive data using regexChecks.regexes_txt in folder: truffleHog
        found_leaks = truffleHog.find_strings(project_path, git_url, json_repos, args.since_commit,
                                              args.max_depth, args.do_regex, do_entropy)
        fount_leaks_file = str(count) + "-code-" + git_slug + ".json"
        if found_leaks:
            write_json_to_file(fount_leaks_file, 'checks', found_leaks, False)

        # Search sensitive data using regexChecks.regexes_fs in folder: truffleHog
        found_fs_leaks = truffleHog.searchSensitiveFilesInRepo(project_path, git_url, json_repos)
        fs_file = str(count) + "-fs-" + git_slug + ".json"
        if found_fs_leaks:
            write_json_to_file(fs_file, 'checks', found_fs_leaks, False)
        logger.info('Rep #{0}. Successfully Verified Bitbucket repo #{1} from {2} {3}\n'.format(count,
                                                                                                count,
                                                                                                total_rep,
                                                                                                git_slug))
        print("Repo#", count, ". Slug. ", git_slug)
        count += 1
    except GitCommandError as exception:
        logger.info('Rep #{0}. Exception in parsing repo {1}. Details are - {2}'.format(count,
                                                                                        git_slug,
                                                                                        str(exception)))
Пример #10
0
    def run(self, params={}):
        git_url = params.get("git_url")
        do_entropy = params.get("do_entropy")
        do_regex = params.get("do_regex")
        since_commit = params.get("since_commit")
        max_depth = params.get("max_depth")
        custom_regex = params.get("custom_regexes")
        if custom_regex is None:
            custom_regex = {}
        else:
            for key in custom_regex:
                custom_regex[key] = re.compile(custom_regex[key])
        try:
            scan = truffleHog.find_strings(
                git_url,
                printJson=True,
                do_entropy=do_entropy,
                do_regex=do_regex,
                since_commit=since_commit,
                custom_regexes=custom_regex,
                max_depth=max_depth,
                surpress_output=True,
            )
            git_url = re.sub("\.git", "", git_url)
            found_issues = scan["foundIssues"]
            found = {}
            issues = []
            count = 0
            for issue in found_issues:
                with open(issue, "r") as issue:
                    data = json.loads([line.rstrip() for line in issue][0],
                                      strict=False)
                    commit_hash = data["commitHash"]
                    # diff = data['diff']
                    # url = re.search("(?P<url>https?://[^\s]+)", diff).group("url")
                    # url = re.sub('\.git', '', url)
                    commit_url = git_url + "/commit/" + commit_hash
                    data.update({"url": str(commit_url)})
                    # found.update({'issue%s' % count: data})
                    found["issue%s" % count] = data
                    issues.append(data)
                count += 1
            return {"issues": issues}

        except Exception:
            self.logger.error(
                "Please enter the correct variables for the input")
Пример #11
0
    def run(self, params={}):
        git_url = params.get('git_url')
        do_entropy = params.get('do_entropy')
        do_regex = params.get('do_regex')
        since_commit = params.get('since_commit')
        max_depth = params.get('max_depth')
        custom_regex = params.get('custom_regexes')
        if custom_regex is None:
            custom_regex = {}
        else:
            for key in custom_regex:
                custom_regex[key] = re.compile(custom_regex[key])
        try:
            scan = truffleHog.find_strings(git_url,
                                           printJson=True,
                                           do_entropy=do_entropy,
                                           do_regex=do_regex,
                                           since_commit=since_commit,
                                           custom_regexes=custom_regex,
                                           max_depth=max_depth,
                                           surpress_output=True)
            git_url = re.sub('\.git', '', git_url)
            found_issues = scan['foundIssues']
            found = {}
            issues = []
            count = 0
            for issue in found_issues:
                with open(issue, 'r') as issue:
                    data = json.loads([line.rstrip() for line in issue][0],
                                      strict=False)
                    commit_hash = data['commitHash']
                    # diff = data['diff']
                    # url = re.search("(?P<url>https?://[^\s]+)", diff).group("url")
                    # url = re.sub('\.git', '', url)
                    commit_url = git_url + '/commit/' + commit_hash
                    data.update({'url': str(commit_url)})
                    # found.update({'issue%s' % count: data})
                    found['issue%s' % count] = data
                    issues.append(data)
                count += 1
            return {'issues': issues}

        except Exception:
            self.logger.error(
                'Please enter the correct variables for the input')
Пример #12
0
def checkRepositories(git_urls):
    for index, git_url in enumerate(git_urls):
        try:
            print(f'\n{110*"-"}\n\nChecking {git_url}  ({index + 1}/{len(git_urls)})\n')
            repoStrings = truffleHog.find_strings(git_url, do_regex=True)
            issues = repoStrings['foundIssues']

            keysFound = 0

            for issue in issues:
                with open(issue) as json_data:
                    json = load(json_data)

                    if(json['reason'] == 'RSA private key'):
                        keysFound += 1

                        # Appending key path to key_paths.txt
                        with open(f'keys/key_paths.txt', 'a') as file:
                            file.write(f'Commit link: {git_url}/commit/{json["commitHash"]}\nPath: {json["path"]}\n\n')
                        
                        print(f'\nRSA private key found in {git_url + ("/" if git_url[-1] != "/" else "") + json["path"]} on branch {json["branch"]}\nCommit hash: {json["commitHash"]}\nDate:{json["date"]}\n')
                        rsaKey = json['diff'].split('-----BEGIN RSA PRIVATE KEY-----')[1].split('-----END RSA PRIVATE KEY-----')[0]
                        rsaKey = cleanRSA(rsaKey.split('\n')[1:-1])
                        rsaKey = ['-----BEGIN RSA PRIVATE KEY-----'] + rsaKey + ['-----END RSA PRIVATE KEY-----']
                        
                        # Printing the key to the console
                        for line in rsaKey:
                            print(line)
                        
                        # Saving the key to a file
                        repositoryName = git_url.split('/')[-1]
                        with open(f'keys/{repositoryName}_private_RSA_{keysFound}.txt', 'a') as file:
                            file.write('\n'.join(rsaKey))

            if not keysFound:
                print('\nNo private RSA keys found.\n')

        except:
            print(f'\n{110*"-"}\n\n\nCouldn\'t seatch {git_url}.\nMake sure the url is correct and that you have the required permissions.\n')
Пример #13
0
 def test_branch(self, rmtree_mock, repo_const_mock, clone_git_repo):
     repo = MagicMock()
     repo_const_mock.return_value = repo
     truffleHog.find_strings("test_repo", branch="testbranch")
     repo.remotes.origin.fetch.assert_called_once_with("testbranch")
Пример #14
0
def execute_find_secrets_request(request: FindSecretsRequest) -> List[Secret]:
    """
    Executes the search for secrets with the given request

    :param FindSecretsRequest request:
        request object containing the path to the git repository and
        other configurations for the search

    :return: list of secret objects that represent the secrets found by the search
    """
    path = request.path
    repo_config = request.repo_config
    search_config = request.search_config

    if not repo_config:
        repo_config = RepoConfig()

    if not search_config:
        search_config = SearchConfig()

    token_key = repo_config.access_token_env_key
    token_exists = token_key and token_key in os.environ

    repo = None
    repo_path = path

    if is_git_dir(path + os.path.sep + ".git"):
        # If repo is local and env key for access token is present, display warning
        if token_exists:
            warnings.warn(
                "Warning: local repository path provided with an access token - "
                "Token will be ignored")
    else:
        # If repo is remote, append access token to path from its env key
        git_url = path
        if token_exists:
            git_url = _append_env_access_token_to_path(path, token_key)

        # We pre-clone the repo to fix a bug that causes truffleHog to crash
        # on Windows machines when run on remote repositories.
        try:
            repo_path = tempfile.mkdtemp()
            repo = Repo.clone_from(git_url, repo_path)
        except Exception as e:
            _delete_tempdir(repo_path)
            raise TrufflehogApiError(e)

    do_regex = search_config.regexes

    secrets = None
    try:
        output = truffleHog.find_strings(
            git_url=None,
            since_commit=repo_config.since_commit,
            max_depth=search_config.max_depth,
            do_regex=do_regex,
            do_entropy=search_config.entropy_checks_enabled,
            custom_regexes=search_config.regexes,
            branch=repo_config.branch,
            repo_path=repo_path,
            path_inclusions=search_config.include_search_paths,
            path_exclusions=search_config.exclude_search_paths)
        secrets = _convert_default_output_to_secrets(output)
        _clean_up(output)
    except Exception as e:
        raise TrufflehogApiError(e)

    # Delete our clone of the remote repo (if it exists)
    if repo is not None:
        repo.close(
        )  # truffleHog doesn't do this, which causes a bug on Windows
        _delete_tempdir(repo_path)

    return secrets
Пример #15
0
 def test_unicode_expection(self):
     try:
         truffleHog.find_strings("https://github.com/dxa4481/tst.git")
     except UnicodeEncodeError:
         self.fail("Unicode print error")
Пример #16
0
 def test_repo_path(self, rmtree_mock, repo_const_mock, clone_git_repo):
     truffleHog.find_strings("test_repo", repo_path="test/path/")
     rmtree_mock.assert_not_called()
     clone_git_repo.assert_not_called()
Пример #17
0
 def test_unicode_expection(self):
     try:
         truffleHog.find_strings("https://github.com/dxa4481/tst.git")
     except UnicodeEncodeError:
         self.fail("Unicode print error")
Пример #18
0
 def test_find_strings(self):
     try:
         truffleHog.find_strings("file:///Users/xavier/Development/Projects/truffleHog", False)
     except UnicodeEncodeError:
         self.fail("Unicode print error")