def _parse_git_diff(self, linenum): # First check if it is a new file with no content or # a file mode change with no content or # a deleted file with no content # then skip # Now we have a diff we are going to use so get the filenames + commits file_info = File() file_info.data = self.lines[linenum] + b"\n" file_info.binary = False diff_line = self.lines[linenum].split() try: # Need to remove the "a/" and "b/" prefix file_info.origFile = GIT_DIFF_PREFIX.sub(b"", diff_line[-2]) file_info.newFile = GIT_DIFF_PREFIX.sub(b"", diff_line[-1]) if isinstance(file_info.origFile, six.binary_type): file_info.origFile = file_info.origFile.decode('utf-8') if isinstance(file_info.newFile, six.binary_type): file_info.newFile = file_info.newFile.decode('utf-8') except ValueError: raise DiffParserError( 'The diff file is missing revision ' 'information', linenum) linenum += 1 # Check to make sure we haven't reached the end of the diff. if linenum >= len(self.lines): return linenum, None # Parse the extended header to save the new file, deleted file, # mode change, file move, and index. if self._is_new_file(linenum): file_info.data += self.lines[linenum] + b"\n" linenum += 1 elif self._is_deleted_file(linenum): file_info.data += self.lines[linenum] + b"\n" linenum += 1 file_info.deleted = True elif self._is_mode_change(linenum): file_info.data += self.lines[linenum] + b"\n" file_info.data += self.lines[linenum + 1] + b"\n" linenum += 2 elif self._is_moved_file(linenum): file_info.data += self.lines[linenum] + b"\n" file_info.data += self.lines[linenum + 1] + b"\n" file_info.data += self.lines[linenum + 2] + b"\n" linenum += 3 file_info.moved = True elif self._is_copied_file(linenum): file_info.data += self.lines[linenum] + b"\n" file_info.data += self.lines[linenum + 1] + b"\n" file_info.data += self.lines[linenum + 2] + b"\n" linenum += 3 file_info.copied = True # Assume by default that the change is empty. If we find content # later, we'll clear this. empty_change = True if self._is_index_range_line(linenum): index_range = self.lines[linenum].split(None, 2)[1] if '..' in index_range: file_info.origInfo, file_info.newInfo = index_range.split("..") if self.pre_creation_regexp.match(file_info.origInfo): file_info.origInfo = PRE_CREATION file_info.data += self.lines[linenum] + b"\n" linenum += 1 # Get the changes while linenum < len(self.lines): if self._is_git_diff(linenum): break elif self._is_binary_patch(linenum): file_info.binary = True file_info.data += self.lines[linenum] + b"\n" empty_change = False linenum += 1 break elif self._is_diff_fromfile_line(linenum): if self.lines[linenum].split()[1] == b"/dev/null": file_info.origInfo = PRE_CREATION file_info.data += self.lines[linenum] + b'\n' file_info.data += self.lines[linenum + 1] + b'\n' linenum += 2 else: empty_change = False linenum = self.parse_diff_line(linenum, file_info) if empty_change and not (file_info.moved or file_info.copied): # We didn't find any interesting content, so leave out this # file's info. # # Note that we may want to change this in the future to preserve # data like mode changes, but that will require filtering out # empty changes at the diff viewer level in a sane way. file_info = None return linenum, file_info
def _parse_git_diff(self, linenum): # First check if it is a new file with no content or # a file mode change with no content or # a deleted file with no content # then skip # Now we have a diff we are going to use so get the filenames + commits diff_git_line = self.lines[linenum] file_info = File() file_info.data = diff_git_line + b'\n' file_info.binary = False linenum += 1 # Check to make sure we haven't reached the end of the diff. if linenum >= len(self.lines): return linenum, None # Assume the blob / commit information is provided globally. If # we found an index header we'll override this. file_info.origInfo = self.base_commit_id file_info.newInfo = self.new_commit_id headers, linenum = self._parse_extended_headers(linenum) if self._is_new_file(headers): file_info.data += headers[b'new file mode'][1] file_info.origInfo = PRE_CREATION elif self._is_deleted_file(headers): file_info.data += headers[b'deleted file mode'][1] file_info.deleted = True elif self._is_mode_change(headers): file_info.data += headers[b'old mode'][1] file_info.data += headers[b'new mode'][1] if self._is_moved_file(headers): file_info.origFile = headers[b'rename from'][0] file_info.newFile = headers[b'rename to'][0] file_info.moved = True if b'similarity index' in headers: file_info.data += headers[b'similarity index'][1] file_info.data += headers[b'rename from'][1] file_info.data += headers[b'rename to'][1] elif self._is_copied_file(headers): file_info.origFile = headers[b'copy from'][0] file_info.newFile = headers[b'copy to'][0] file_info.copied = True if b'similarity index' in headers: file_info.data += headers[b'similarity index'][1] file_info.data += headers[b'copy from'][1] file_info.data += headers[b'copy to'][1] # Assume by default that the change is empty. If we find content # later, we'll clear this. empty_change = True if b'index' in headers: index_range = headers[b'index'][0].split()[0] if '..' in index_range: file_info.origInfo, file_info.newInfo = index_range.split("..") if self.pre_creation_regexp.match(file_info.origInfo): file_info.origInfo = PRE_CREATION file_info.data += headers[b'index'][1] # Get the changes while linenum < len(self.lines): if self._is_git_diff(linenum): break elif self._is_binary_patch(linenum): file_info.binary = True file_info.data += self.lines[linenum] + b"\n" empty_change = False linenum += 1 break elif self._is_diff_fromfile_line(linenum): orig_line = self.lines[linenum] new_line = self.lines[linenum + 1] orig_filename = orig_line[len(b'--- '):] new_filename = new_line[len(b'+++ '):] # Some diffs may incorrectly contain filenames listed as: # # --- filename\t # +++ filename\t # # We need to strip those single trailing tabs. if orig_filename.endswith(b'\t'): orig_filename = orig_filename[:-1] if new_filename.endswith(b'\t'): new_filename = new_filename[:-1] # Strip the Git a/ and b/ prefixes, if set in the diff. if orig_filename.startswith(b'a/'): orig_filename = orig_filename[2:] if new_filename.startswith(b'b/'): new_filename = new_filename[2:] if orig_filename == b'/dev/null': file_info.origInfo = PRE_CREATION file_info.origFile = new_filename else: file_info.origFile = orig_filename if new_filename == b'/dev/null': file_info.newFile = orig_filename else: file_info.newFile = new_filename file_info.data += orig_line + b'\n' file_info.data += new_line + b'\n' linenum += 2 else: empty_change = False linenum = self.parse_diff_line(linenum, file_info) if not file_info.origFile: # This file didn't have any --- or +++ lines. This usually means # the file was deleted or moved without changes. We'll need to # fall back to parsing the diff --git line, which is more # error-prone. assert not file_info.newFile self._parse_diff_git_line(diff_git_line, file_info, linenum) if isinstance(file_info.origFile, six.binary_type): file_info.origFile = file_info.origFile.decode('utf-8') if isinstance(file_info.newFile, six.binary_type): file_info.newFile = file_info.newFile.decode('utf-8') # For an empty change, we keep the file's info only if it is a new # 0-length file, a moved file, a copied file, or a deleted 0-length # file. if (empty_change and file_info.origInfo != PRE_CREATION and not (file_info.moved or file_info.copied or file_info.deleted)): # We didn't find any interesting content, so leave out this # file's info. # # Note that we may want to change this in the future to preserve # data like mode changes, but that will require filtering out # empty changes at the diff viewer level in a sane way. file_info = None return linenum, file_info
def _parse_git_diff(self, linenum): # First check if it is a new file with no content or # a file mode change with no content or # a deleted file with no content # then skip # Now we have a diff we are going to use so get the filenames + commits diff_git_line = self.lines[linenum] file_info = File() file_info.data = diff_git_line + b'\n' file_info.binary = False linenum += 1 # Check to make sure we haven't reached the end of the diff. if linenum >= len(self.lines): return linenum, None line = self.lines[linenum] # Parse the extended header to save the new file, deleted file, # mode change, file move, and index. if self._is_new_file(linenum): file_info.data += line + b"\n" linenum += 1 elif self._is_deleted_file(linenum): file_info.data += line + b"\n" linenum += 1 file_info.deleted = True elif self._is_mode_change(linenum): file_info.data += line + b"\n" file_info.data += self.lines[linenum + 1] + b"\n" linenum += 2 if self._is_moved_file(linenum): rename_from = self.lines[linenum + 1] rename_to = self.lines[linenum + 2] file_info.origFile = rename_from[len(b'rename from '):] file_info.newFile = rename_to[len(b'rename to '):] file_info.data += line + b"\n" file_info.data += rename_from + b"\n" file_info.data += rename_to + b"\n" linenum += 3 file_info.moved = True elif self._is_copied_file(linenum): copy_from = self.lines[linenum + 1] copy_to = self.lines[linenum + 2] file_info.origFile = copy_from[len(b'copy from '):] file_info.newFile = copy_to[len(b'copy to '):] file_info.data += line + b"\n" file_info.data += copy_from + b"\n" file_info.data += copy_to + b"\n" linenum += 3 file_info.copied = True # Assume by default that the change is empty. If we find content # later, we'll clear this. empty_change = True if self._is_index_range_line(linenum): index_range = self.lines[linenum].split(None, 2)[1] if '..' in index_range: file_info.origInfo, file_info.newInfo = index_range.split("..") if self.pre_creation_regexp.match(file_info.origInfo): file_info.origInfo = PRE_CREATION file_info.data += self.lines[linenum] + b"\n" linenum += 1 # Get the changes while linenum < len(self.lines): if self._is_git_diff(linenum): break elif self._is_binary_patch(linenum): file_info.binary = True file_info.data += self.lines[linenum] + b"\n" empty_change = False linenum += 1 break elif self._is_diff_fromfile_line(linenum): orig_line = self.lines[linenum] new_line = self.lines[linenum + 1] orig_filename = orig_line[len(b'--- '):] new_filename = new_line[len(b'+++ '):] if orig_filename.startswith(b'a/'): orig_filename = orig_filename[2:] if new_filename.startswith(b'b/'): new_filename = new_filename[2:] if orig_filename == b'/dev/null': file_info.origInfo = PRE_CREATION file_info.origFile = new_filename else: file_info.origFile = orig_filename if new_filename == b'/dev/null': file_info.newFile = orig_filename else: file_info.newFile = new_filename file_info.data += orig_line + b'\n' file_info.data += new_line + b'\n' linenum += 2 else: empty_change = False linenum = self.parse_diff_line(linenum, file_info) if not file_info.origFile: # This file didn't have any --- or +++ lines. This usually means # the file was deleted or moved without changes. We'll need to # fall back to parsing the diff --git line, which is more # error-prone. assert not file_info.newFile self._parse_diff_git_line(diff_git_line, file_info, linenum) if isinstance(file_info.origFile, six.binary_type): file_info.origFile = file_info.origFile.decode('utf-8') if isinstance(file_info.newFile, six.binary_type): file_info.newFile = file_info.newFile.decode('utf-8') # For an empty change, we keep the file's info only if it is a new # 0-length file, a moved file, a copied file, or a deleted 0-length # file. if (empty_change and file_info.origInfo != PRE_CREATION and not (file_info.moved or file_info.copied or file_info.deleted)): # We didn't find any interesting content, so leave out this # file's info. # # Note that we may want to change this in the future to preserve # data like mode changes, but that will require filtering out # empty changes at the diff viewer level in a sane way. file_info = None return linenum, file_info
def _parse_git_diff(self, linenum): # First check if it is a new file with no content or # a file mode change with no content or # a deleted file with no content # then skip # Now we have a diff we are going to use so get the filenames + commits file_info = File() file_info.data = self.lines[linenum] + b"\n" file_info.binary = False diff_line = self.lines[linenum].split(' b/') # We split at the b/ to deal with space in filenames, this is not perfect, but it should solve most of the whitespace problems try: file_info.origFile = diff_line[-2].replace('diff --git a/', '') file_info.newFile = diff_line[-1] if isinstance(file_info.origFile, six.binary_type): file_info.origFile = file_info.origFile.decode('utf-8') if isinstance(file_info.newFile, six.binary_type): file_info.newFile = file_info.newFile.decode('utf-8') except ValueError: raise DiffParserError('The diff file is missing revision ' 'information', linenum) linenum += 1 # Check to make sure we haven't reached the end of the diff. if linenum >= len(self.lines): return linenum, None # Parse the extended header to save the new file, deleted file, # mode change, file move, and index. if self._is_new_file(linenum): file_info.data += self.lines[linenum] + b"\n" linenum += 1 elif self._is_deleted_file(linenum): file_info.data += self.lines[linenum] + b"\n" linenum += 1 file_info.deleted = True elif self._is_mode_change(linenum): file_info.data += self.lines[linenum] + b"\n" file_info.data += self.lines[linenum + 1] + b"\n" linenum += 2 elif self._is_moved_file(linenum): file_info.data += self.lines[linenum] + b"\n" file_info.data += self.lines[linenum + 1] + b"\n" file_info.data += self.lines[linenum + 2] + b"\n" linenum += 3 file_info.moved = True elif self._is_copied_file(linenum): file_info.data += self.lines[linenum] + b"\n" file_info.data += self.lines[linenum + 1] + b"\n" file_info.data += self.lines[linenum + 2] + b"\n" linenum += 3 file_info.copied = True # Assume by default that the change is empty. If we find content # later, we'll clear this. empty_change = True if self._is_index_range_line(linenum): index_range = self.lines[linenum].split(None, 2)[1] if '..' in index_range: file_info.origInfo, file_info.newInfo = index_range.split("..") if self.pre_creation_regexp.match(file_info.origInfo): file_info.origInfo = PRE_CREATION file_info.data += self.lines[linenum] + b"\n" linenum += 1 # Get the changes while linenum < len(self.lines): if self._is_git_diff(linenum): break elif self._is_binary_patch(linenum): file_info.binary = True file_info.data += self.lines[linenum] + b"\n" empty_change = False linenum += 1 break elif self._is_diff_fromfile_line(linenum): if self.lines[linenum].split()[1] == b"/dev/null": file_info.origInfo = PRE_CREATION file_info.data += self.lines[linenum] + b'\n' file_info.data += self.lines[linenum + 1] + b'\n' linenum += 2 else: empty_change = False linenum = self.parse_diff_line(linenum, file_info) # For an empty change, we keep the file's info only if it is a new # 0-length file, a moved file, a copied file, or a deleted 0-length # file. if (empty_change and file_info.origInfo != PRE_CREATION and not (file_info.moved or file_info.copied or file_info.deleted)): # We didn't find any interesting content, so leave out this # file's info. # # Note that we may want to change this in the future to preserve # data like mode changes, but that will require filtering out # empty changes at the diff viewer level in a sane way. file_info = None return linenum, file_info
def _parse_git_diff(self, linenum): # First check if it is a new file with no content or # a file mode change with no content or # a deleted file with no content # then skip # Now we have a diff we are going to use so get the filenames + commits file_info = File() file_info.data = self.lines[linenum] + b"\n" file_info.binary = False diff_line = self.lines[linenum].split() try: # Need to remove the "a/" and "b/" prefix file_info.origFile = GIT_DIFF_PREFIX.sub(b"", diff_line[-2]) file_info.newFile = GIT_DIFF_PREFIX.sub(b"", diff_line[-1]) if isinstance(file_info.origFile, six.binary_type): file_info.origFile = file_info.origFile.decode("utf-8") if isinstance(file_info.newFile, six.binary_type): file_info.newFile = file_info.newFile.decode("utf-8") except ValueError: raise DiffParserError("The diff file is missing revision " "information", linenum) linenum += 1 # Check to make sure we haven't reached the end of the diff. if linenum >= len(self.lines): return linenum, None # Parse the extended header to save the new file, deleted file, # mode change, file move, and index. if self._is_new_file(linenum): file_info.data += self.lines[linenum] + b"\n" linenum += 1 elif self._is_deleted_file(linenum): file_info.data += self.lines[linenum] + b"\n" linenum += 1 file_info.deleted = True elif self._is_mode_change(linenum): file_info.data += self.lines[linenum] + b"\n" file_info.data += self.lines[linenum + 1] + b"\n" linenum += 2 elif self._is_moved_file(linenum): file_info.data += self.lines[linenum] + b"\n" file_info.data += self.lines[linenum + 1] + b"\n" file_info.data += self.lines[linenum + 2] + b"\n" linenum += 3 file_info.moved = True elif self._is_copied_file(linenum): file_info.data += self.lines[linenum] + b"\n" file_info.data += self.lines[linenum + 1] + b"\n" file_info.data += self.lines[linenum + 2] + b"\n" linenum += 3 file_info.copied = True # Assume by default that the change is empty. If we find content # later, we'll clear this. empty_change = True if self._is_index_range_line(linenum): index_range = self.lines[linenum].split(None, 2)[1] if ".." in index_range: file_info.origInfo, file_info.newInfo = index_range.split("..") if self.pre_creation_regexp.match(file_info.origInfo): file_info.origInfo = PRE_CREATION file_info.data += self.lines[linenum] + b"\n" linenum += 1 # Get the changes while linenum < len(self.lines): if self._is_git_diff(linenum): break elif self._is_binary_patch(linenum): file_info.binary = True file_info.data += self.lines[linenum] + b"\n" empty_change = False linenum += 1 break elif self._is_diff_fromfile_line(linenum): if self.lines[linenum].split()[1] == b"/dev/null": file_info.origInfo = PRE_CREATION file_info.data += self.lines[linenum] + b"\n" file_info.data += self.lines[linenum + 1] + b"\n" linenum += 2 else: empty_change = False linenum = self.parse_diff_line(linenum, file_info) if empty_change and not (file_info.moved or file_info.copied): # We didn't find any interesting content, so leave out this # file's info. # # Note that we may want to change this in the future to preserve # data like mode changes, but that will require filtering out # empty changes at the diff viewer level in a sane way. file_info = None return linenum, file_info