Exemplo n.º 1
0
    def parse_special_header(self, linenum, info):
        linenum = super(CVSDiffParser,
                        self).parse_special_header(linenum, info)

        if 'index' not in info:
            # We didn't find an index, so the rest is probably bogus too.
            return linenum

        m = self.regex_full.match(self.lines[linenum])
        if not m:
            m = self.regex_small.match(self.lines[linenum])

        if m:
            info['filename'] = m.group(1)
            linenum += 1
        else:
            raise DiffParserError('Unable to find RCS line', linenum)

        while self.lines[linenum].startswith(b'retrieving '):
            linenum += 1

        if self.lines[linenum].startswith(b'diff '):
            linenum += 1

        return linenum
Exemplo n.º 2
0
    def _parse_diff_git_line(self, diff_git_line, file_info, linenum):
        """Parses the "diff --git" line for filename information.

        Not all diffs have "---" and "+++" lines we can parse for the
        filenames. Git leaves these out if there aren't any changes made
        to the file.

        This function attempts to extract this information from the
        "diff --git" lines in the diff. It supports the following:

        * All filenames with quotes.
        * All filenames with a/ and b/ prefixes.
        * Filenames without quotes, prefixes, or spaces.
        * Filenames without quotes or prefixes, where the original and
          modified filenames are identical.
        """
        for regex in self.DIFF_GIT_LINE_RES:
            m = regex.match(diff_git_line)

            if m:
                file_info.orig_filename = m.group('orig_filename')
                file_info.modified_filename = m.group('new_filename')
                return

        raise DiffParserError(
            'Unable to parse the "diff --git" line for this file, due to '
            'the use of filenames with spaces or --no-prefix, --src-prefix, '
            'or --dst-prefix options.', linenum)
Exemplo n.º 3
0
    def parse(self):
        """
        Parses the diff, returning a list of File objects representing each
        file in the diff.
        """
        self.files = []
        i = 0
        preamble = ''

        while i < len(self.lines):
            next_i, file_info, new_diff = self._parse_diff(i)

            if file_info:
                self._ensure_file_has_required_fields(file_info)

                if preamble:
                    file_info.data = preamble + file_info.data
                    preamble = ''

                self.files.append(file_info)
            elif new_diff:
                # We found a diff, but it was empty and has no file entry.
                # Reset the preamble.
                preamble = ''
            else:
                preamble += self.lines[i] + '\n'

            i = next_i

        if not self.files and preamble.strip() != '':
            # This is probably not an actual git diff file.
            raise DiffParserError('This does not appear to be a git diff', 0)

        return self.files
Exemplo n.º 4
0
    def parse_special_header(self, linenum, info):
        diff_line = self.lines[linenum]
        split_line = diff_line.split()

        # git style diffs are supported as long as the node ID and parent ID
        # are present in the patch header
        if diff_line.startswith(b"# Node ID") and len(split_line) == 4:
            self.new_changeset_id = split_line[3]
        elif diff_line.startswith(b"# Parent") and len(split_line) == 3:
            self.orig_changeset_id = split_line[2]
        elif diff_line.startswith(b"diff -r"):
            # diff between two revisions are in the following form:
            #  "diff -r abcdef123456 -r 123456abcdef filename"
            # diff between a revision and the working copy are like:
            #  "diff -r abcdef123456 filename"
            self.is_git_diff = False
            try:
                # ordinary hg diffs don't record renames, so
                # new file always == old file
                if len(split_line) > 4 and split_line[3] == b'-r':
                    # Committed revision
                    name_start_ix = 5
                    info['newInfo'] = split_line[4]
                else:
                    # Uncommitted revision
                    name_start_ix = 3
                    info['newInfo'] = "Uncommitted"
                info['newFile'] = info['origFile'] = \
                    b' '.join(split_line[name_start_ix:])
                info['origInfo'] = split_line[2]
                info['orig_changeset_id'] = split_line[2]
            except ValueError:
                raise DiffParserError(
                    "The diff file is missing revision "
                    "information", linenum)
            linenum += 1

        elif (diff_line.startswith(b"diff --git") and self.orig_changeset_id):
            # diff is in the following form:
            #  "diff --git a/origfilename b/newfilename"
            # possibly followed by:
            #  "{copy|rename} from origfilename"
            #  "{copy|rename} from newfilename"
            self.is_git_diff = True

            info['origInfo'] = self.orig_changeset_id
            info['orig_changeset_id'] = self.orig_changeset_id

            if not self.new_changeset_id:
                info['newInfo'] = "Uncommitted"
            else:
                info['newInfo'] = self.new_changeset_id

            match = re.search(r' a/(.*?) b/(.*?)( (copy|rename) from .*)?$',
                              diff_line)
            info['origFile'] = match.group(1)
            info['newFile'] = match.group(2)
            linenum += 1

        return linenum
Exemplo n.º 5
0
    def parse_special_header(self, linenum, info):
        diffLine = self.lines[linenum].split()

        # git style diffs are supported as long as the node ID and parent ID
        # are present in the patch header
        if self.lines[linenum].startswith("# Node ID") and len(diffLine) == 4:
            self.newChangesetId = diffLine[3]
        elif self.lines[linenum].startswith("# Parent") and len(diffLine) == 3:
            self.origChangesetId = diffLine[2]
        elif self.lines[linenum].startswith("diff -r"):
            # diff between two revisions are in the following form:
            #  "diff -r abcdef123456 -r 123456abcdef filename"
            # diff between a revision and the working copy are like:
            #  "diff -r abcdef123456 filename"
            self.isGitDiff = False
            try:
                # ordinary hg diffs don't record renames, so
                # new file always == old file
                isCommitted = len(diffLine) > 4 and diffLine[3] == '-r'
                if isCommitted:
                    nameStartIndex = 5
                    info['newInfo'] = diffLine[4]
                else:
                    nameStartIndex = 3
                    info['newInfo'] = "Uncommitted"
                info['newFile'] = info['origFile'] = \
                    ' '.join(diffLine[nameStartIndex:])
                info['origInfo'] = diffLine[2]
                info['origChangesetId'] = diffLine[2]
            except ValueError:
                raise DiffParserError(
                    "The diff file is missing revision "
                    "information", linenum)
            linenum += 1

        elif self.lines[linenum].startswith("diff --git") and \
            self.origChangesetId and diffLine[2].startswith("a/") and \
            diffLine[3].startswith("b/"):
            # diff is in the following form:
            #  "diff --git a/origfilename b/newfilename"
            # possibly followed by:
            #  "{copy|rename} from origfilename"
            #  "{copy|rename} from newfilename"
            self.isGitDiff = True
            info['origInfo'] = info['origChangesetId'] = self.origChangesetId
            if not self.newChangesetId:
                info['newInfo'] = "Uncommitted"
            else:
                info['newInfo'] = self.newChangesetId
            info['origFile'] = diffLine[2][2:]
            info['newFile'] = diffLine[3][2:]
            linenum += 1

        return linenum
Exemplo n.º 6
0
    def parse_special_header(self, linenum, parsed_file):
        """Parse a special diff header marking the start of a new file's info.

        This will look for:

        * An ``Index:`` line at the given line number, which must be present
          for any further processing of special headers
        * An ``RCS file:`` line, which specifies the filename that should be
          used for modified filenames, and for original filenames when
          parsing a binary file.
        * Any ``retrieving ...` lines, or a ``diff`` line, all of which will
          be skipped.

        Args:
            linenum (int):
                The line number to begin parsing.

            parsed_file (reviewboard.diffviewer.parser.ParsedDiffFile):
                The file currently being parsed.

        Returns:
            int:
            The next line number to parse.

        Raises:
            reviewboard.diffviewer.errors.DiffParserError:
                There was an error parsing the special header. This may be
                a corrupted diff, or an error in the parsing implementation.
                Details are in the error message.
        """
        linenum = super(CVSDiffParser,
                        self).parse_special_header(linenum, parsed_file)

        if not parsed_file.index_header_value:
            # We didn't find an index, so the rest is probably bogus too.
            return linenum

        lines = self.lines
        m = self.rcs_file_re.match(lines[linenum])

        if m:
            parsed_file.rcs_filename = m.group('path')
            linenum += 1
        else:
            raise DiffParserError('Unable to find RCS line', linenum=linenum)

        while lines[linenum].startswith(b'retrieving '):
            linenum += 1

        if lines[linenum].startswith(b'diff '):
            linenum += 1

        return linenum
Exemplo n.º 7
0
    def parse(self):
        """
        Parses the diff, returning a list of File objects representing each
        file in the diff.
        """
        self.files = []
        i = 0
        preamble = io.BytesIO()

        while i < len(self.lines):
            next_i, file_info, new_diff = self._parse_diff(i)

            if file_info:
                if self.files:
                    self.files[-1].append_data(preamble.getvalue())
                    preamble.close()
                    preamble = io.BytesIO()
                    self.files[-1].finalize()

                self._ensure_file_has_required_fields(file_info)

                file_info.prepend_data(preamble.getvalue())
                preamble.close()
                preamble = io.BytesIO()

                self.files.append(file_info)
            elif new_diff:
                # We found a diff, but it was empty and has no file entry.
                # Reset the preamble.
                preamble.close()
                preamble = io.BytesIO()
            else:
                preamble.write(self.lines[i])
                preamble.write(b'\n')

            i = next_i

        try:
            if self.files:
                self.files[-1].append_data(preamble.getvalue())
                self.files[-1].finalize()
            elif preamble.getvalue().strip() != b'':
                # This is probably not an actual git diff file.
                raise DiffParserError('This does not appear to be a git diff',
                                      0)
        finally:
            preamble.close()

        return self.files
Exemplo n.º 8
0
    def parse_special_header(self, linenum, info):
        diff_line = self.lines[linenum]
        split_line = diff_line.split()

        if diff_line.startswith(b"# Node ID") and len(split_line) == 4:
            self.new_changeset_id = split_line[3]
        elif diff_line.startswith(b"# Parent") and len(split_line) == 3:
            self.orig_changeset_id = split_line[2]
        elif diff_line.startswith(b"diff -r"):
            # diff between two revisions are in the following form:
            #  "diff -r abcdef123456 -r 123456abcdef filename"
            # diff between a revision and the working copy are like:
            #  "diff -r abcdef123456 filename"
            try:
                # ordinary hg diffs don't record renames, so
                # new file always == old file
                if len(split_line) > 4 and split_line[3] == b'-r':
                    # Committed revision
                    name_start_ix = 5
                    info['newInfo'] = split_line[4]
                else:
                    # Uncommitted revision
                    name_start_ix = 3
                    info['newInfo'] = b'Uncommitted'

                info['newFile'] = info['origFile'] = b' '.join(
                    split_line[name_start_ix:])
                info['origInfo'] = split_line[2]
                info['origChangesetId'] = split_line[2]
                self.orig_changeset_id = split_line[2]
            except ValueError:
                raise DiffParserError(
                    "The diff file is missing revision "
                    "information", linenum)
            linenum += 1

        return linenum
Exemplo n.º 9
0
    def _parse_git_diff(self, linenum):
        # First check if it is a new file with no content or
        # a file mode change with no content or
        # a deleted file with no content
        # then skip

        try:
            if self._is_empty_change(linenum):
                linenum += GIT_DIFF_EMPTY_CHANGESET_SIZE
                return linenum, None
        except IndexError:
            # This means this is the only bit left in the file
            linenum += GIT_DIFF_EMPTY_CHANGESET_SIZE
            return linenum, None

        # Now we have a diff we are going to use so get the filenames + commits
        file_info = File()
        file_info.data = self.lines[linenum] + "\n"
        file_info.binary = False
        diff_line = self.lines[linenum].split()

        try:
            # Need to remove the "a/" and "b/" prefix
            file_info.origFile = GIT_DIFF_PREFIX.sub("", diff_line[-2])
            file_info.newFile = GIT_DIFF_PREFIX.sub("", diff_line[-1])
        except ValueError:
            raise DiffParserError('The diff file is missing revision '
                                  'information', linenum)
        linenum += 1

        # Save the new file, deleted file, mode change and index
        if self._is_new_file(linenum):
            file_info.data += self.lines[linenum] + "\n"
            linenum += 1
        elif self._is_deleted_file(linenum):
            file_info.data += self.lines[linenum] + "\n"
            linenum += 1
            file_info.deleted = True
        elif self._is_mode_change(linenum):
            file_info.data += self.lines[linenum] + "\n"
            file_info.data += self.lines[linenum + 1] + "\n"
            linenum += 2

        if self._is_index_range_line(linenum):
            index_range = self.lines[linenum].split(None, 2)[1]

            if '..' in index_range:
                file_info.origInfo, file_info.newInfo = index_range.split("..")

            if self.pre_creation_regexp.match(file_info.origInfo):
                file_info.origInfo = PRE_CREATION

            file_info.data += self.lines[linenum] + "\n"
            linenum += 1

        # Get the changes
        while linenum < len(self.lines):
            if self._is_git_diff(linenum):
                return linenum, file_info

            if self._is_binary_patch(linenum):
                file_info.binary = True
                return linenum + 1, file_info

            if self._is_diff_fromfile_line(linenum):
                if self.lines[linenum].split()[1] == "/dev/null":
                    file_info.origInfo = PRE_CREATION

            file_info.data += self.lines[linenum] + "\n"
            linenum += 1

        return linenum, file_info
Exemplo n.º 10
0
    def _parse_git_diff(self, linenum):
        # First check if it is a new file with no content or
        # a file mode change with no content or
        # a deleted file with no content
        # then skip

        # Now we have a diff we are going to use so get the filenames + commits
        file_info = File()
        file_info.data = self.lines[linenum] + b"\n"
        file_info.binary = False
        diff_line = self.lines[linenum].split()

        try:
            # Need to remove the "a/" and "b/" prefix
            file_info.origFile = GIT_DIFF_PREFIX.sub(b"", diff_line[-2])
            file_info.newFile = GIT_DIFF_PREFIX.sub(b"", diff_line[-1])

            if isinstance(file_info.origFile, six.binary_type):
                file_info.origFile = file_info.origFile.decode('utf-8')

            if isinstance(file_info.newFile, six.binary_type):
                file_info.newFile = file_info.newFile.decode('utf-8')
        except ValueError:
            raise DiffParserError(
                'The diff file is missing revision '
                'information', linenum)

        linenum += 1

        # Check to make sure we haven't reached the end of the diff.
        if linenum >= len(self.lines):
            return linenum, None

        # Parse the extended header to save the new file, deleted file,
        # mode change, file move, and index.
        if self._is_new_file(linenum):
            file_info.data += self.lines[linenum] + b"\n"
            linenum += 1
        elif self._is_deleted_file(linenum):
            file_info.data += self.lines[linenum] + b"\n"
            linenum += 1
            file_info.deleted = True
        elif self._is_mode_change(linenum):
            file_info.data += self.lines[linenum] + b"\n"
            file_info.data += self.lines[linenum + 1] + b"\n"
            linenum += 2
        elif self._is_moved_file(linenum):
            file_info.data += self.lines[linenum] + b"\n"
            file_info.data += self.lines[linenum + 1] + b"\n"
            file_info.data += self.lines[linenum + 2] + b"\n"
            linenum += 3
            file_info.moved = True
        elif self._is_copied_file(linenum):
            file_info.data += self.lines[linenum] + b"\n"
            file_info.data += self.lines[linenum + 1] + b"\n"
            file_info.data += self.lines[linenum + 2] + b"\n"
            linenum += 3
            file_info.copied = True

        # Assume by default that the change is empty. If we find content
        # later, we'll clear this.
        empty_change = True

        if self._is_index_range_line(linenum):
            index_range = self.lines[linenum].split(None, 2)[1]

            if '..' in index_range:
                file_info.origInfo, file_info.newInfo = index_range.split("..")

            if self.pre_creation_regexp.match(file_info.origInfo):
                file_info.origInfo = PRE_CREATION

            file_info.data += self.lines[linenum] + b"\n"
            linenum += 1

        # Get the changes
        while linenum < len(self.lines):
            if self._is_git_diff(linenum):
                break
            elif self._is_binary_patch(linenum):
                file_info.binary = True
                file_info.data += self.lines[linenum] + b"\n"
                empty_change = False
                linenum += 1
                break
            elif self._is_diff_fromfile_line(linenum):
                if self.lines[linenum].split()[1] == b"/dev/null":
                    file_info.origInfo = PRE_CREATION

                file_info.data += self.lines[linenum] + b'\n'
                file_info.data += self.lines[linenum + 1] + b'\n'
                linenum += 2
            else:
                empty_change = False
                linenum = self.parse_diff_line(linenum, file_info)

        if empty_change and not (file_info.moved or file_info.copied):
            # We didn't find any interesting content, so leave out this
            # file's info.
            #
            # Note that we may want to change this in the future to preserve
            # data like mode changes, but that will require filtering out
            # empty changes at the diff viewer level in a sane way.
            file_info = None

        return linenum, file_info
Exemplo n.º 11
0
    def _parse_git_diff(self, linenum):
        # First check if it is a new file with no content or
        # a file mode change with no content or
        # a deleted file with no content
        # then skip

        empty_change = self._is_empty_change(linenum)
        empty_change_linenum = linenum + GIT_DIFF_EMPTY_CHANGESET_SIZE

        # Now we have a diff we are going to use so get the filenames + commits
        file_info = File()
        file_info.data = self.lines[linenum] + "\n"
        file_info.binary = False
        diff_line = self.lines[linenum].split()

        try:
            # Need to remove the "a/" and "b/" prefix
            file_info.origFile = GIT_DIFF_PREFIX.sub("", diff_line[-2])
            file_info.newFile = GIT_DIFF_PREFIX.sub("", diff_line[-1])
        except ValueError:
            raise DiffParserError('The diff file is missing revision '
                                  'information', linenum)

        linenum += 1

        # Parse the extended header to save the new file, deleted file,
        # mode change, file move, and index.
        if self._is_new_file(linenum):
            file_info.data += self.lines[linenum] + "\n"
            linenum += 1
        elif self._is_deleted_file(linenum):
            file_info.data += self.lines[linenum] + "\n"
            linenum += 1
            file_info.deleted = True
        elif self._is_mode_change(linenum):
            file_info.data += self.lines[linenum] + "\n"
            file_info.data += self.lines[linenum + 1] + "\n"
            linenum += 2
        elif self._is_moved_file(linenum):
            file_info.data += self.lines[linenum] + "\n"
            file_info.data += self.lines[linenum + 1] + "\n"
            file_info.data += self.lines[linenum + 2] + "\n"
            linenum += 3
            file_info.moved = True

        # Only show interesting empty changes. Basically, deletions.
        # It's likely a binary file if we're at this point, and so we want
        # to process the rest of it.
        if empty_change and not file_info.deleted:
            return empty_change_linenum, None

        if self._is_index_range_line(linenum):
            index_range = self.lines[linenum].split(None, 2)[1]

            if '..' in index_range:
                file_info.origInfo, file_info.newInfo = index_range.split("..")

            if self.pre_creation_regexp.match(file_info.origInfo):
                file_info.origInfo = PRE_CREATION

            file_info.data += self.lines[linenum] + "\n"
            linenum += 1

        # Get the changes
        while linenum < len(self.lines):
            if self._is_git_diff(linenum):
                return linenum, file_info
            elif self._is_binary_patch(linenum):
                file_info.binary = True
                file_info.data += self.lines[linenum] + "\n"
                return linenum + 1, file_info
            elif self._is_diff_fromfile_line(linenum):
                if self.lines[linenum].split()[1] == "/dev/null":
                    file_info.origInfo = PRE_CREATION

                file_info.data += self.lines[linenum] + '\n'
                file_info.data += self.lines[linenum + 1] + '\n'
                linenum += 2
            else:
                linenum = self.parse_diff_line(linenum, file_info)

        return linenum, file_info
Exemplo n.º 12
0
    def parse_special_header(self, linenum, parsed_file):
        """Parse a special diff header marking the start of a new file's info.

        This looks for some special markers found in Mercurial diffs, trying
        to find a ``Parent`` or a ``diff -r`` line.

        A ``Parent`` line specifies a changeset ID that will be used as the
        source revision for all files.

        A ``diff -r`` line contains information identifying the file's name
        and other details.

        Args:
            linenum (int):
                The line number to begin parsing.

            parsed_file (reviewboard.diffviewer.parser.ParsedDiffFile):
                The file currently being parsed.

        Returns:
            int:
            The next line number to parse.

        Raises:
            reviewboard.diffviewer.errors.DiffParserError:
                There was an error parsing the special header. This may be
                a corrupted diff, or an error in the parsing implementation.
                Details are in the error message.
        """
        diff_line = self.lines[linenum]
        split_line = diff_line.split()

        if diff_line.startswith(b'# Parent') and len(split_line) == 3:
            self.orig_changeset_id = split_line[2]
        elif diff_line.startswith(b'diff -r'):
            # A diff between two revisions are in the following form:
            #
            #     diff -r abcdef123456 -r 123456abcdef filename
            #
            # A diff between a revision and the working copy:
            #
            #     diff -r abcdef123456 filename
            try:
                # Ordinary hg diffs don't record renames, so a new file
                # is always equivalent to an old file.
                if len(split_line) > 4 and split_line[3] == b'-r':
                    # Committed revision
                    name_start_ix = 5
                    parsed_file.modified_file_details = split_line[4]
                else:
                    # Uncommitted revision
                    name_start_ix = 3
                    parsed_file.modified_file_details = b'Uncommitted'

                filename = b' '.join(split_line[name_start_ix:])

                parsed_file.orig_filename = filename
                parsed_file.orig_file_details = split_line[2]
                parsed_file.modified_filename = filename

                self.orig_changeset_id = split_line[2]
            except ValueError:
                raise DiffParserError(
                    'The diff file is missing revision '
                    'information',
                    linenum=linenum)

            linenum += 1

        return linenum