예제 #1
0
    def find_secrets(self, diff):
        changes = None

        secrets_collection = SecretsCollection()
        with transient_settings({'plugins_used': [{'name': plugin_type.__name__} for plugin_type in get_mapping_from_secret_type_to_class().values()]}) as settings:
            settings.disable_filters(
                'detect_secrets.filters.common.is_invalid_file',
            )
            secrets_collection.scan_diff(diff)

        for file_name, secret in secrets_collection:
            if len(secret.secret_value) < 6:
                continue  # Ignore small secrets to reduce false positives.

            # Only parse the diff if at least one secret was found.
            if not changes:
                patch_set = PatchSet.from_string(diff)
                changes = {}
                for patch_file in patch_set:
                    lines = dict((line.target_line_no, line.value.strip()) for chunk in patch_file for line in chunk.target_lines() if line.is_added)
                    changes[patch_file.path] = lines

            line = changes[secret.filename][secret.line_number]
            if self._blacklist.is_blacklisted(line, file_name, secret.secret_value):
                continue

            # detect_secrets sometimes return a lowercase version of the secret. Find the real string.
            secret_index = line.lower().find(secret.secret_value.lower())
            secret_value = line[secret_index:secret_index + len(secret.secret_value)]

            yield Secret(secret.type, secret.filename, secret.line_number, secret_value, line, secret.is_verified)
예제 #2
0
def get_latest_patchset():
    """Return the PatchSet for the latest commit."""
    # regex from https://github.com/PyCQA/pylint/blob/master/pylintrc
    output = check_output(
        ['git', 'diff', '-U0', '@~..@'])
    return PatchSet.from_string(
        output.replace(b'\r\n', b'\n'), encoding='utf-8')
예제 #3
0
파일: client.py 프로젝트: sugusbs/sentry
 def get_commit_filechanges(self, repo, sha):
     resp = self.get(
         BitbucketAPIPath.repository_diff.format(repo=repo, spec=sha), allow_text=True
     )
     diff_file = resp.text
     ps = PatchSet.from_string(diff_file)
     return self.transform_patchset(ps)
예제 #4
0
def diff_parser_from_str(diff_str):
    patches = []
    try:
        patches = PatchSet.from_string(diff_str)
    except Exception as e:
        logging.error("diff_parser_from_str failed! diff str {}\nerror: {}".format(diff_str, e))
    return patches
예제 #5
0
def get_latest_patchset():
    """Return the PatchSet for the latest commit."""
    # regex from https://github.com/PyCQA/pylint/blob/master/pylintrc
    output = check_output(
        ['git', 'diff', '-U0', '@~..@'])
    return PatchSet.from_string(
        output.replace(b'\r\n', b'\n'), encoding='utf-8')
예제 #6
0
    def from_string(text):
        """Reads diff from the string provided.

    This fills self._mapping with a mapping for each file the represents that
    map from target filename and line number to source filename and line number.
    """
        patchset = PatchSet.from_string(text)
        return DiffMapper.from_patchset(patchset)
예제 #7
0
def get_patch(api, urn, pr_num, raw=False):
    """ get the formatted or not patch file for a pr """
    path = "https://github.com/{urn}/pull/{pr}.patch".format(urn=urn,
                                                             pr=pr_num)
    data = api("get", path)
    if raw:
        return data
    return PatchSet.from_string(data)
예제 #8
0
파일: client.py 프로젝트: sugusbs/sentry
    def get_commit_filechanges(self, repo, sha):
        # returns unidiff file

        resp = self.get("/2.0/repositories/{}/diff/{}".format(repo, sha), allow_text=True)

        diff_file = resp.text
        ps = PatchSet.from_string(diff_file)
        return self.transform_patchset(ps)
예제 #9
0
    def scan_diff(
        self,
        diff,
        baseline_filename='',
        last_commit_hash='',
        repo_name='',
    ):
        """For optimization purposes, our scanning strategy focuses on looking
        at incremental differences, rather than re-scanning the codebase every time.
        This function supports this, and adds information to self.data.

        :type diff: str
        :param diff: diff string.
                     Eg. The output of `git diff <fileA> <fileB>`

        :type baseline_filename: str
        :param baseline_filename: if there are any baseline secrets, then the baseline
                                  file will have hashes in them. By specifying it, we
                                  can skip this clear exception.

        :type last_commit_hash: str
        :param last_commit_hash: used for logging only -- the last commit hash we saved

        :type repo_name: str
        :param repo_name: used for logging only -- the name of the repo
        """
        try:
            patch_set = PatchSet.from_string(diff)
        except UnidiffParseError:  # pragma: no cover
            alert = {
                'alert': 'UnidiffParseError',
                'hash': last_commit_hash,
                'repo_name': repo_name,
            }
            log.error(alert)
            raise

        if self.exclude_regex:
            regex = re.compile(self.exclude_regex, re.IGNORECASE)

        for patch_file in patch_set:
            filename = patch_file.path
            # If the file matches the exclude_regex, we skip it
            if self.exclude_regex and regex.search(filename):
                continue

            if filename == baseline_filename:
                continue

            for results, plugin in self._results_accumulator(filename):
                results.update(
                    self._extract_secrets_from_patch(
                        patch_file,
                        plugin,
                        filename,
                    ),
                )
예제 #10
0
    def test_patchset_from_bytes_string(self):
        with codecs.open(self.sample_file, 'rb') as diff_file:
            diff_data = diff_file.read()
            ps1 = PatchSet.from_string(diff_data, encoding='utf-8')

        with codecs.open(self.sample_file, 'r', encoding='utf-8') as diff_file:
            ps2 = PatchSet(diff_file)

        self.assertEqual(ps1, ps2)
예제 #11
0
    def test_patchset_from_bytes_string(self):
        with codecs.open(self.sample_file, 'rb') as diff_file:
            diff_data = diff_file.read()
            ps1 = PatchSet.from_string(diff_data, encoding='utf-8')

        with codecs.open(self.sample_file, 'r', encoding='utf-8') as diff_file:
            ps2 = PatchSet(diff_file)

        self.assertEqual(ps1, ps2)
    def scan_diff(
        self,
        diff,
        baseline_filename='',
        last_commit_hash='',
        repo_name='',
    ):
        """For optimization purposes, our scanning strategy focuses on looking
        at incremental differences, rather than re-scanning the codebase every time.
        This function supports this, and adds information to self.data.

        :type diff: str
        :param diff: diff string.
                     Eg. The output of `git diff <fileA> <fileB>`

        :type baseline_filename: str
        :param baseline_filename: if there are any baseline secrets, then the baseline
                                  file will have hashes in them. By specifying it, we
                                  can skip this clear exception.

        :type last_commit_hash: str
        :param last_commit_hash: used for logging only -- the last commit hash we saved

        :type repo_name: str
        :param repo_name: used for logging only -- the name of the repo
        """
        try:
            patch_set = PatchSet.from_string(diff)
        except UnidiffParseError:  # pragma: no cover
            alert = {
                'alert': 'UnidiffParseError',
                'hash': last_commit_hash,
                'repo_name': repo_name,
            }
            CustomLogObj.getLogger().error(alert)
            raise

        if self.exclude_regex:
            regex = re.compile(self.exclude_regex, re.IGNORECASE)

        for patch_file in patch_set:
            filename = patch_file.path
            # If the file matches the exclude_regex, we skip it
            if self.exclude_regex and regex.search(filename):
                continue

            if filename == baseline_filename:
                continue

            for results, plugin in self._results_accumulator(filename):
                results.update(
                    self._extract_secrets_from_patch(
                        patch_file,
                        plugin,
                        filename,
                    ), )
예제 #13
0
파일: client.py 프로젝트: Kayle009/sentry
 def get_commit_filechanges(self, repo, sha):
     resp = self.get(
         BitbucketAPIPath.repository_diff.format(
             repo=repo,
             spec=sha,
         ),
         allow_text=True,
     )
     diff_file = resp.text
     ps = PatchSet.from_string(diff_file)
     return self.transform_patchset(ps)
예제 #14
0
    def get_commit_filechanges(self, repo, sha):
        # returns unidiff file

        diff_file = self.get(
            '/2.0/repositories/{}/diff/{}'.format(
                repo,
                sha,
            ),
            allow_text=True,
        )
        ps = PatchSet.from_string(diff_file)
        return self.transform_patchset(ps)
예제 #15
0
    def get_commit_filechanges(self, repo, sha):
        # returns unidiff file

        resp = self.get(
            '/2.0/repositories/{}/diff/{}'.format(
                repo,
                sha,
            ),
            allow_text=True,
        )

        diff_file = resp.text
        ps = PatchSet.from_string(diff_file)
        return self.transform_patchset(ps)
예제 #16
0
    def get_commit_filechanges(self, repo, sha):
        # returns unidiff file

        diff_file = self.request(
            'GET',
            '2.0',
            '/repositories/{}/diff/{}'.format(
                repo,
                sha,
            ),
            data=None,
            params=None,
            json=False,
        )
        ps = PatchSet.from_string(diff_file)
        return self.transform_patchset(ps)
예제 #17
0
def _get_lines_from_diff(diff: str) -> Generator[Tuple[str, List[Tuple[int, str]]], None, None]:
    """
    :raises: ImportError
    """
    # Local imports, so that we don't need to require unidiff for versions of
    # detect-secrets that don't use it.
    from unidiff import PatchSet

    patch_set = PatchSet.from_string(diff)
    for patch_file in patch_set:
        filename = patch_file.path
        if _is_filtered_out(required_filter_parameters=['filename'], filename=filename):
            continue

        yield (
            filename,
            [
                (line.target_line_no, line.value)
                for chunk in patch_file
                # target_lines refers to incoming (new) changes
                for line in chunk.target_lines()
                if line.is_added
            ],
        )
 def get_diff_info(self, commit):
     diffString = self.repo.git.diff(commit.parents[0].hexsha,
                                     commit.hexsha)
     patch = PatchSet.from_string(diffString)
     return (str(patch.added), str(patch.removed),
             str(len(patch.modified_files)))
예제 #19
0
def parse(data, tree):
    patches = PatchSet.from_string(data)
    for patch in patches:
        parse_patch(patch, tree)