def parse_diff_header(self, linenum, info): """ Parses part of a diff beginning at the specified line number, trying to find a standard diff header. The line number returned is the line after the special header, which can be multiple lines long. """ if linenum + 1 < len(self.lines) and \ ((self.lines[linenum].startswith(b'--- ') and self.lines[linenum + 1].startswith(b'+++ ')) or (self.lines[linenum].startswith(b'*** ') and self.lines[linenum + 1].startswith(b'--- ') and not self.lines[linenum].endswith(b" ****"))): # This is a unified or context diff header. Parse the # file and extra info. try: info['origFile'], info['origInfo'] = \ self.parse_filename_header(self.lines[linenum][4:], linenum) linenum += 1 info['newFile'], info['newInfo'] = \ self.parse_filename_header(self.lines[linenum][4:], linenum) linenum += 1 except ValueError: raise DiffParserError("The diff file is missing revision " + "information", linenum) return linenum
def recalculate_line_counts(self, tool): """Recalculates the insert_count and delete_count values. This will attempt to re-parse the stored diff and fetch the line counts through the parser. """ logging.debug('Recalculating insert/delete line counts on ' 'RawFileDiffData %s' % self.pk) try: files = tool.get_parser(self.content).parse() if len(files) != 1: raise DiffParserError('Got wrong number of files (%d)' % len(files)) except DiffParserError as e: logging.error( 'Failed to correctly parse stored diff data in ' 'RawFileDiffData ID %s when trying to get ' 'insert/delete line counts: %s', self.pk, e) else: file_info = files[0] self.insert_count = file_info.insert_count self.delete_count = file_info.delete_count if self.pk: self.save(update_fields=['extra_data'])
def parse_filename_header(self, s, linenum): """Parse the filename found in a diff filename line. This parses the value after a ``---`` or ``+++`` indicator (or a special variant handled by a subclass), normalizing the filename and any following file details, and returning both for processing and storage. Often times, the file details will be a revision for the original file, but this is not guaranteed, and is up to the variation of the diff format. By default, this will assume that a filename and file details are separated by either a single tab, or two or more spaces. If neither are found, this will fail to parse. This must parse only the provided value, and cannot parse subsequent lines. Subclasses can override this behavior to parse these lines another way, or to normalize filenames (handling escaping or filenames with spaces as needed by that particular diff variation). Args: s (bytes): The value to parse. linenum (int): The line number containing the value to parse. Returns: tuple: A tuple containing: 1. The filename (as bytes) 2. The additional file information (as bytes) Raises: reviewboard.diffviewer.errors.DiffParserError: There was an error parsing the diff header. This may be a corrupted diff, or an error in the parsing implementation. Details are in the error message. """ if b'\t' in s: # There's a \t separating the filename and info. This is the # best case scenario, since it allows for filenames with spaces # without much work. return s.split(b'\t', 1) # There's spaces being used to separate the filename and info. # This is technically wrong, so all we can do is assume that # 1) the filename won't have multiple consecutive spaces, and # 2) there's at least 2 spaces separating the filename and info. if b' ' in s: return re.split(br' +', s, 1) raise DiffParserError( 'No valid separator after the filename was ' 'found in the diff header', linenum)
def parse_filename_header(self, s, linenum): if b"\t" in s: # There's a \t separating the filename and info. This is the # best case scenario, since it allows for filenames with spaces # without much work. return s.split(b"\t", 1) # There's spaces being used to separate the filename and info. # This is technically wrong, so all we can do is assume that # 1) the filename won't have multiple consecutive spaces, and # 2) there's at least 2 spaces separating the filename and info. if b" " in s: return re.split(r" +", s, 1) raise DiffParserError("No valid separator after the filename was " + "found in the diff header", linenum)
def parse_special_header(self, linenum, info): """ Parses part of a diff beginning at the specified line number, trying to find a special diff header. This usually occurs before the standard diff header. The line number returned is the line after the special header, which can be multiple lines long. """ try: index_line = self.lines[linenum] is_index = index_line.startswith(b'Index: ') except IndexError: is_index = False if is_index: # Try to find the "====" line. temp_linenum = linenum + 1 while temp_linenum + 1 < len(self.lines): line = self.lines[temp_linenum] if line == self.INDEX_SEP: # We found the line. This is looking like a valid diff # for CVS, Subversion, and other systems. Try to parse # the data from the line. try: info['index'] = index_line.split(None, 1)[1] except ValueError: raise DiffParserError('Malformed Index line', linenum) linenum = temp_linenum + 1 break elif line.startswith((b'---', b'+++')): # We never found that line, but we did hit the start of # a diff file. We can't treat the "Index:" line as special # in this case. break temp_linenum += 1 return linenum
def parse_special_header(self, linenum, info): """ Parses part of a diff beginning at the specified line number, trying to find a special diff header. This usually occurs before the standard diff header. The line number returned is the line after the special header, which can be multiple lines long. """ if linenum + 1 < len(self.lines) and \ self.lines[linenum].startswith(b"Index: ") and \ self.lines[linenum + 1] == self.INDEX_SEP: # This is an Index: header, which is common in CVS and Subversion, # amongst other systems. try: info['index'] = self.lines[linenum].split(None, 1)[1] except ValueError: raise DiffParserError("Malformed Index line", linenum) linenum += 2 return linenum
def parse_diff_header(self, linenum, info): """ Parses part of a diff beginning at the specified line number, trying to find a standard diff header. The line number returned is the line after the special header, which can be multiple lines long. """ try: line1 = self.lines[linenum] line2 = self.lines[linenum + 1] is_diff_header = ( (line1.startswith(b'--- ') and line2.startswith(b'+++ ')) or (line1.startswith(b'*** ') and line2.startswith(b'--- ') and not line1.endswith(b' ****')) ) except IndexError: is_diff_header = False if is_diff_header: # This is a unified or context diff header. Parse the # file and extra info. try: info['origFile'], info['origInfo'] = \ self.parse_filename_header(self.lines[linenum][4:], linenum) linenum += 1 info['newFile'], info['newInfo'] = \ self.parse_filename_header(self.lines[linenum][4:], linenum) linenum += 1 except ValueError: raise DiffParserError( 'The diff file is missing revision information', linenum) return linenum
def parse_diff_header(self, linenum, parsed_file): """Parse a standard header before changes made to a file. This attempts to parse the ``---`` (original) and ``+++`` (modified) file lines, which are usually present right before any changes to the file. By default, this method expects the ``---`` line to be found at ``linenum``. If found, this will populate :py:attr:`ParsedDiffFile.orig_filename`, :py:attr:`ParsedDiffFile.orig_file_details`, :py:attr:`ParsedDiffFile.modified_filename`, and :py:attr:`ParsedDiffFile.modified_file_details`. This calls out to :py:meth:`parse_filename_header` to help parse the contents immediately after the ``---`` or ``+++``. Subclasses can override this to parse these lines differently, or to to process the results of these lines (such as converting special filenames to states like "deleted" or "new file"). They may also set :py:class:`ParsedFileDiff.skip` to skip the rest of this file and begin parsing a new entry at the returned line number. Args: linenum (int): The line number to begin parsing. parsed_file (ParsedDiffFile): The file currently being parsed. Returns: int: The next line number to parse. Raises: reviewboard.diffviewer.errors.DiffParserError: There was an error parsing the diff header. This may be a corrupted diff, or an error in the parsing implementation. Details are in the error message. """ try: line1 = self.lines[linenum] line2 = self.lines[linenum + 1] is_diff_header = ( # Unified diff headers (line1.startswith(b'--- ') and line2.startswith(b'+++ ')) or # Context diff headers (line1.startswith(b'*** ') and line2.startswith(b'--- ') and not line1.endswith(b' ****'))) except IndexError: is_diff_header = False if is_diff_header: # This is a unified or context diff header. Parse the # file and extra info. try: (parsed_file.orig_filename, parsed_file.orig_file_details) = \ self.parse_filename_header(self.lines[linenum][4:], linenum) linenum += 1 (parsed_file.modified_filename, parsed_file.modified_file_details) = \ self.parse_filename_header(self.lines[linenum][4:], linenum) # Set these for backwards-compatibility. # # This should be removed in Review Board 5.0. parsed_file._deprecated_info['origFile'] = \ parsed_file.orig_filename parsed_file._deprecated_info['origInfo'] = \ parsed_file.orig_file_details parsed_file._deprecated_info['newFile'] = \ parsed_file.modified_filename parsed_file._deprecated_info['newInfo'] = \ parsed_file.modified_file_details linenum += 1 except ValueError: raise DiffParserError( 'The diff file is missing revision information', linenum) return linenum
def parse_special_header(self, linenum, parsed_file): """Parse a special diff header marking the start of a new file's info. This attempts to locate an ``Index:`` line at the specified line number, which usually indicates the beginning of file's information in a diff (for Unified Diff variants that support it). By default, this method expects the line to be found at ``linenum``. If present, the value found immediately after the ``Index:`` will be stored in :py:attr:`ParsedDiffFile.index_header_value`, allowing subclasses to make a determination based on its contents (which may vary between types of diffs, but should include at least a filename. If the ``Index:`` line is not present, this won't do anything by default. Subclasses can override this to parse additional information before the standard diff header. They may also set :py:attr:`ParsedFileDiff.skip` to skip the rest of this file and begin parsing a new entry at the returned line number. Args: linenum (int): The line number to begin parsing. parsed_file (ParsedDiffFile): The file currently being parsed. Returns: int: The next line number to parse. Raises: reviewboard.diffviewer.errors.DiffParserError: There was an error parsing the special header. This may be a corrupted diff, or an error in the parsing implementation. Details are in the error message. """ try: index_line = self.lines[linenum] is_index = index_line.startswith(b'Index: ') except IndexError: is_index = False if is_index: # Try to find the "====" line. temp_linenum = linenum + 1 while temp_linenum + 1 < len(self.lines): line = self.lines[temp_linenum] if line == self.INDEX_SEP: # We found the line. This is looking like a valid diff # for CVS, Subversion, and other systems. Try to parse # the data from the line. try: parsed_file.index_header_value = \ index_line.split(None, 1)[1] # Set these for backwards-compatibility. # # This should be removed in Review Board 5.0. parsed_file._deprecated_info['index'] = \ parsed_file.index_header_value except ValueError: raise DiffParserError('Malformed Index line', linenum) linenum = temp_linenum + 1 break elif line.startswith((b'---', b'+++')): # We never found that line, but we did hit the start of # a diff file. We can't treat the "Index:" line as special # in this case. break temp_linenum += 1 return linenum