def diff(self, revision1, revision2, path=None): """Returns a diff between two revisions. The diff will contain the differences between the two revisions, and may optionally be limited to a specific path. The returned diff will be returned as a Unicode object. """ if path: path = self.normalize_path(path) else: path = self.repopath tmpdir = mkdtemp(prefix='reviewboard-svn.') try: diff = self.client.diff( tmpdir, path, revision1=self._normalize_revision(revision1), revision2=self._normalize_revision(revision2), header_encoding='UTF-8', diff_options=['-u']) encoding, diff = convert_to_unicode(diff, self.encoding_list) except Exception as e: logging.error('Failed to generate diff using pysvn for revisions ' '%s:%s for path %s: %s', revision1, revision2, path, e, exc_info=1) raise SCMError( _('Unable to get diff revisions %s through %s: %s') % (revision1, revision2, e)) finally: rmtree(tmpdir) return diff
def normalize_source_string(self, s): """Normalize a source string of text to use for the diff. This will normalize the encoding of the string and the newlines, returning a tuple containing the normalized string and a list of lines split from the source. Both the original and modified strings used for the diff will be normalized independently. This is only used if the caller passes a string instead of a list for the original or new values. Subclasses can override this to provide custom behavior. """ if self.encoding_list: s = convert_to_unicode(s, self.encoding_list)[1] # Normalize the input so that if there isn't a trailing newline, we # add it. if s and not s.endswith('\n'): s += '\n' lines = self.NEWLINES_RE.split(s or '') # Remove the trailing newline, now that we've split this. This will # prevent a duplicate line number at the end of the diff. del lines[-1] return s, lines
def normalize_source_string(self, s): """Normalize a source string of text to use for the diff. This will normalize the encoding of the string and the newlines, returning a tuple containing the normalized string and a list of lines split from the source. Both the original and modified strings used for the diff will be normalized independently. This is only used if the caller passes a string instead of a list for the original or new values. Subclasses can override this to provide custom behavior. """ s = convert_to_unicode(s, self.encoding_list)[1] # Normalize the input so that if there isn't a trailing newline, we # add it. if s and not s.endswith('\n'): s += '\n' lines = self.NEWLINES_RE.split(s or '') # Remove the trailing newline, now that we've split this. This will # prevent a duplicate line number at the end of the diff. del lines[-1] return s, lines
def get_change(self, revision): """Get an individual change. This returns a Commit object containing the details of the commit. """ revision = int(revision) commits = self.client.get_log('/', start=revision, limit=2) commit = commits[0] message = commit.get('message', b'').decode('utf-8', 'replace') author_name = commit.get('author', b'').decode('utf-8', 'replace') date = commit['date'].isoformat() if len(commits) > 1: base_revision = commits[1]['revision'] else: base_revision = 0 try: enc, diff = convert_to_unicode( self.client.diff(base_revision, revision), self.repository.get_encoding_list()) except Exception as e: raise self.normalize_error(e) commit = Commit(author_name, six.text_type(revision), date, message, six.text_type(base_revision)) commit.diff = diff return commit
def _download_analysis(analyze_function, request, review_request_id, revision, filediff_id, local_site=None, modified=True): """Generates file analysis given by analyze_function on the specified file. This will download the file as a string, write it to a temporary file in the homefolder, run the analysis, delete the temporary file, and output the filename and data_analysis """ logging.debug("Analyze_function" + analyze_function.__name__) review_request, response = \ _find_review_request(request, review_request_id, local_site) if not review_request: return response draft = review_request.get_draft(request.user) diffset = _query_for_diff(review_request, request.user, revision, draft) filediff = get_object_or_404(diffset.files, pk=filediff_id) encoding_list = diffset.repository.get_encoding_list() # Get a file either from the cache or the SCM, applying the parent diff if it exists. # SCM exceptions are passed back to the caller. working_file = get_original_file(filediff, request, encoding_list) if modified: working_file = get_patched_file(working_file, filediff, request) working_file = convert_to_unicode(working_file, encoding_list)[1] logging.debug("Encoding List: %s", encoding_list) logging.debug("Source File: " + filediff.source_file) temp_file_name = "cctempfile_" + filediff.source_file.replace("/","_") logging.debug("temp_file_name: " + temp_file_name) source_file = os.path.join(HOMEFOLDER, temp_file_name) #with open (source_file, "r") as myfile: # logging.debug("String: " + myfile.read().replace('\n', '')) logging.debug("File contents" + working_file) #temp_file = open(source_file, 'w') #temp_file = codecs.open(source_file, encoding='utf-8') #temp_file.write(working_file.encode('utf-8')) temp_file = codecs.open(source_file, 'w', encoding='utf-8') temp_file.write(working_file) temp_file.close() data_analysis = analyze_function(source_file) os.remove(source_file) if not data_analysis: data_analysis = None return filediff.source_file, data_analysis
def normalize_source_string(self, s, encoding_list, **kwargs): """Normalize a source string of text to use for the diff. This will normalize the encoding of the string and the newlines, returning a tuple containing the normalized string and a list of lines split from the source. Both the original and modified strings used for the diff will be normalized independently. This is only used if the caller passes a string instead of a list for the original or new values. Subclasses can override this to provide custom behavior. Args: s (bytes): The string to normalize. encoding_list (list of unicode): The list of encodings to try when converting the string to Unicode. **kwargs (dict): Additional keyword arguments, for future expansion. Returns: tuple: A tuple containing: 1. The full normalized string 2. The list of lines from the string Raises: UnicodeDecodeError: The string could not be converted to Unicode. """ s = convert_to_unicode(s, encoding_list)[1] # Normalize the input so that if there isn't a trailing newline, we # add it. if s and not s.endswith('\n'): s += '\n' lines = self.NEWLINES_RE.split(s or '') # Remove the trailing newline, now that we've split this. This will # prevent a duplicate line number at the end of the diff. del lines[-1] return s, lines
def normalize_source_list(self, l): """Normalize a list of source lines to use for the diff. This will normalize the encoding of the lines. Both the original and modified lists of lines used for the diff will be normalized independently. This is only used if the caller passes a list instead of a string for the original or new values. Subclasses can override this to provide custom behavior. """ if self.encoding_list: l = [convert_to_unicode(s, self.encoding_list)[1] for s in l] return l
def _download_analysis(analyze_function, request, review_request_id, revision, filediff_id, local_site=None, modified=True): """Generates file analysis given by analyze_function on the specified file. This will download the file as a string, write it to a temporary file in the homefolder, run the analysis, delete the temporary file, and output the filename and data_analysis """ review_request, response = \ _find_review_request(request, review_request_id, local_site) if not review_request: return response draft = review_request.get_draft(request.user) diffset = _query_for_diff(review_request, request.user, revision, draft) filediff = get_object_or_404(diffset.files, pk=filediff_id) encoding_list = diffset.repository.get_encoding_list() data = get_original_file(filediff, request, encoding_list) if modified: data = get_patched_file(data, filediff, request) data = convert_to_unicode(data, encoding_list)[1] temp_file_name = "cctempfile_" + filediff.source_file source_file = os.path.join(HOMEFOLDER, temp_file_name) temp_file = open(source_file, 'w') temp_file.write(data) temp_file.close() data_analysis = analyze_function(source_file) os.remove(source_file) if not data_analysis: data_analysis = None return filediff.source_file, data_analysis
def diff(self, revision1, revision2, path=None): """Returns a diff between two revisions. The diff will contain the differences between the two revisions, and may optionally be limited to a specific path. The returned diff will be returned as a Unicode object. """ if path: path = self.normalize_path(path) else: path = self.repopath out = None err = None try: out, err = self.client.diff(self._normalize_revision(revision1), self._normalize_revision(revision2), B(path), B(path), diffopts=DIFF_UNIFIED) diff = out.read() encoding, diff = convert_to_unicode(diff, self.encoding_list) except Exception as e: logging.error('Failed to generate diff using subvertpy for ' 'revisions %s:%s for path %s: %s', revision1, revision2, path, e, exc_info=1) raise SCMError( _('Unable to get diff revisions %s through %s: %s') % (revision1, revision2, e)) finally: if out: out.close() if err: err.close() return diff
def normalize_source_list(self, l, encoding_list, **kwargs): """Normalize a list of source lines to use for the diff. This will normalize the encoding of the lines. Both the original and modified lists of lines used for the diff will be normalized independently. This is only used if the caller passes a list instead of a string for the original or new values. Subclasses can override this to provide custom behavior. Args: l (list of bytes): The list of lines to normalize. encoding_list (list of unicode): The list of encodings to try when converting the lines to Unicode. **kwargs (dict): Additional keyword arguments, for future expansion. Returns: list of unicode: The resulting list of normalized lines. Raises: UnicodeDecodeError: One or more lines could not be converted to Unicode. """ if encoding_list: l = [convert_to_unicode(s, encoding_list)[1] for s in l] return l
def create_from_data(self, repository, diff_file_name, diff_file_contents, parent_diff_file_name=None, parent_diff_file_contents=None, diffset_history=None, basedir=None, request=None, base_commit_id=None, check_existence=True, validate_only=False, **kwargs): """Create a DiffSet from raw diff data. This parses a diff and optional parent diff covering one or more files, validates, and constructs :py:class:`DiffSets <reviewboard.diffviewer.models.DiffSet>` and :py:class:`FileDiffs <reviewboard.diffviewer.models.FileDiff>` representing the diff. This can optionally validate the diff without saving anything to the database. In this case, no value will be returned. Instead, callers should take any result as success. Args: repository (reviewboard.scmtools.models.Repository): The repository the diff applies to. diff_file_name (unicode): The filename of the main diff file. diff_file_contents (bytes): The contents of the main diff file. parent_diff_file_name (unicode, optional): The filename of the parent diff, if one is provided. parent_diff_file_contents (bytes, optional): The contents of the parent diff, if one is provided. diffset_history (reviewboard.diffviewer.models.DiffSetHistory, optional): The history object to associate the DiffSet with. This is not required if using ``validate_only=True``. basedir (unicode, optional): The base directory to prepend to all file paths in the diff. request (django.http.HttpRequest, optional): The current HTTP request, if any. This will result in better logging. base_commit_id (unicode, optional): The ID of the commit that the diff is based upon. This is needed by some SCMs or hosting services to properly look up files, if the diffs represent blob IDs instead of commit IDs and the service doesn't support those lookups. check_existence (bool, optional): Whether to check for file existence as part of the validation process. This defaults to ``True``. validate_only (bool, optional): Whether to just validate and not save. If ``True``, then this won't populate the database at all and will return ``None`` upon success. This defaults to ``False``. Returns: reviewboard.diffviewer.models.DiffSet: The resulting DiffSet stored in the database, if processing succeeded and ``validate_only=False``. Raises: reviewboard.diffviewer.errors.DiffParserError: There was an error parsing the main diff or parent diff. reviewboard.diffviewer.errors.EmptyDiffError: The provided diff file did not contain any file changes. reviewboard.scmtools.core.FileNotFoundError: A file specified in the diff could not be found in the repository. reviewboard.scmtools.core.SCMError: There was an error talking to the repository when validating the existence of a file. reviewboard.scmtools.git.ShortSHA1Error: A SHA1 specified in the diff was in the short form, which could not be used to look up the file. This is applicable only to Git. """ from reviewboard.diffviewer.diffutils import convert_to_unicode from reviewboard.diffviewer.models import FileDiff if 'save' in kwargs: warnings.warn( 'The save parameter to ' 'DiffSet.objects.create_from_data is deprecated. ' 'Please set validate_only instead.', DeprecationWarning) validate_only = not kwargs['save'] tool = repository.get_scmtool() parser = tool.get_parser(diff_file_contents) files = list( self._process_files(parser, basedir, repository, base_commit_id, request, check_existence=check_existence and not parent_diff_file_contents)) # Parse the diff if len(files) == 0: raise EmptyDiffError(_("The diff file is empty")) # Sort the files so that header files come before implementation. files.sort(cmp=self._compare_files, key=lambda f: f.origFile) # Parse the parent diff parent_files = {} # This is used only for tools like Mercurial that use atomic changeset # IDs to identify all file versions but not individual file version # IDs. parent_commit_id = None if parent_diff_file_contents: diff_filenames = set([f.origFile for f in files]) parent_parser = tool.get_parser(parent_diff_file_contents) # If the user supplied a base diff, we need to parse it and # later apply each of the files that are in the main diff for f in self._process_files(parent_parser, basedir, repository, base_commit_id, request, check_existence=check_existence, limit_to=diff_filenames): parent_files[f.newFile] = f # This will return a non-None value only for tools that use # commit IDs to identify file versions as opposed to file revision # IDs. parent_commit_id = parent_parser.get_orig_commit_id() diffset = self.model(name=diff_file_name, revision=0, basedir=basedir, history=diffset_history, repository=repository, diffcompat=DiffCompatVersion.DEFAULT, base_commit_id=base_commit_id) if not validate_only: diffset.save() encoding_list = repository.get_encoding_list() filediffs = [] for f in files: parent_file = None orig_rev = None parent_content = b'' if f.origFile in parent_files: parent_file = parent_files[f.origFile] parent_content = parent_file.data orig_rev = parent_file.origInfo # If there is a parent file there is not necessarily an original # revision for the parent file in the case of a renamed file in # git. if not orig_rev: if parent_commit_id and f.origInfo != PRE_CREATION: orig_rev = parent_commit_id else: orig_rev = f.origInfo enc, orig_file = convert_to_unicode(f.origFile, encoding_list) enc, dest_file = convert_to_unicode(f.newFile, encoding_list) if f.deleted: status = FileDiff.DELETED elif f.moved: status = FileDiff.MOVED elif f.copied: status = FileDiff.COPIED else: status = FileDiff.MODIFIED filediff = FileDiff( diffset=diffset, source_file=parser.normalize_diff_filename(orig_file), dest_file=parser.normalize_diff_filename(dest_file), source_revision=smart_unicode(orig_rev), dest_detail=f.newInfo, binary=f.binary, status=status) filediff.extra_data = { 'is_symlink': f.is_symlink, } if (parent_file and (parent_file.moved or parent_file.copied) and parent_file.insert_count == 0 and parent_file.delete_count == 0): filediff.extra_data['parent_moved'] = True if not validate_only: # This state all requires making modifications to the database. # We only want to do this if we're saving. filediff.diff = f.data filediff.parent_diff = parent_content filediff.set_line_counts(raw_insert_count=f.insert_count, raw_delete_count=f.delete_count) filediffs.append(filediff) if validate_only: return None if filediffs: FileDiff.objects.bulk_create(filediffs) return diffset
def create_filediffs(diff_file_contents, parent_diff_file_contents, repository, basedir, base_commit_id, diffset, request=None, check_existence=True, get_file_exists=None, diffcommit=None, validate_only=False): """Create FileDiffs from the given data. Args: diff_file_contents (bytes): The contents of the diff file. parent_diff_file_contents (bytes): The contents of the parent diff file. repository (reviewboard.scmtools.models.Repository): The repository the diff is being posted against. basedir (unicode): The base directory to prepend to all file paths in the diff. base_commit_id (unicode): The ID of the commit that the diff is based upon. This is needed by some SCMs or hosting services to properly look up files, if the diffs represent blob IDs instead of commit IDs and the service doesn't support those lookups. diffset (reviewboard.diffviewer.models.diffset.DiffSet): The DiffSet to attach the created FileDiffs to. request (django.http.HttpRequest, optional): The current HTTP request. check_existence (bool, optional): Whether or not existence checks should be performed against the upstream repository. This argument defaults to ``True``. get_file_exists (callable, optional): A callable that is used to determine if a file exists. This must be provided if ``check_existence`` is ``True``. diffcommit (reviewboard.diffviewer.models.diffcommit.DiffCommit, optional): The Diffcommit to attach the created FileDiffs to. validate_only (bool, optional): Whether to just validate and not save. If ``True``, then this won't populate the database at all and will return ``None`` upon success. This defaults to ``False``. Returns: list of reviewboard.diffviewer.models.filediff.FileDiff: The created FileDiffs. If ``validate_only`` is ``True``, the returned list will be empty. """ from reviewboard.diffviewer.diffutils import convert_to_unicode from reviewboard.diffviewer.models import FileDiff files, parser, parent_commit_id, parent_files = _prepare_file_list( diff_file_contents=diff_file_contents, parent_diff_file_contents=parent_diff_file_contents, repository=repository, request=request, basedir=basedir, check_existence=check_existence, get_file_exists=get_file_exists, base_commit_id=base_commit_id) encoding_list = repository.get_encoding_list() filediffs = [] for f in files: parent_file = None orig_rev = None parent_content = b'' if f.origFile in parent_files: parent_file = parent_files[f.origFile] parent_content = parent_file.data orig_rev = parent_file.origInfo # If there is a parent file there is not necessarily an original # revision for the parent file in the case of a renamed file in # git. if not orig_rev: if parent_commit_id and f.origInfo != PRE_CREATION: orig_rev = parent_commit_id else: orig_rev = f.origInfo orig_file = convert_to_unicode(f.origFile, encoding_list)[1] dest_file = convert_to_unicode(f.newFile, encoding_list)[1] if f.deleted: status = FileDiff.DELETED elif f.moved: status = FileDiff.MOVED elif f.copied: status = FileDiff.COPIED else: status = FileDiff.MODIFIED filediff = FileDiff( diffset=diffset, commit=diffcommit, source_file=parser.normalize_diff_filename(orig_file), dest_file=parser.normalize_diff_filename(dest_file), source_revision=force_text(orig_rev), dest_detail=f.newInfo, binary=f.binary, status=status) filediff.extra_data = { 'is_symlink': f.is_symlink, } if parent_file: if (parent_file.insert_count == 0 and parent_file.delete_count == 0): filediff.extra_data[FileDiff._IS_PARENT_EMPTY_KEY] = True if parent_file.moved or parent_file.copied: filediff.extra_data['parent_moved'] = True else: filediff.extra_data[FileDiff._IS_PARENT_EMPTY_KEY] = False if not validate_only: # This state all requires making modifications to the database. # We only want to do this if we're saving. filediff.diff = f.data filediff.parent_diff = parent_content filediff.set_line_counts(raw_insert_count=f.insert_count, raw_delete_count=f.delete_count) filediffs.append(filediff) if not validate_only: FileDiff.objects.bulk_create(filediffs) num_filediffs = len(filediffs) return filediffs
def create_from_data(self, repository, diff_file_name, diff_file_contents, parent_diff_file_name, parent_diff_file_contents, diffset_history, basedir, request, base_commit_id=None, save=True): """Create a DiffSet from raw diff data. The diff_file_contents and parent_diff_file_contents parameters are strings with the actual diff contents. """ from reviewboard.diffviewer.diffutils import convert_to_unicode from reviewboard.diffviewer.models import FileDiff tool = repository.get_scmtool() parser = tool.get_parser(diff_file_contents) files = list( self._process_files( parser, basedir, repository, base_commit_id, request, check_existence=(not parent_diff_file_contents))) # Parse the diff if len(files) == 0: raise EmptyDiffError(_("The diff file is empty")) # Sort the files so that header files come before implementation. files.sort(cmp=self._compare_files, key=lambda f: f.origFile) # Parse the parent diff parent_files = {} # This is used only for tools like Mercurial that use atomic changeset # IDs to identify all file versions but not individual file version # IDs. parent_commit_id = None if parent_diff_file_contents: diff_filenames = set([f.origFile for f in files]) parent_parser = tool.get_parser(parent_diff_file_contents) # If the user supplied a base diff, we need to parse it and # later apply each of the files that are in the main diff for f in self._process_files(parent_parser, basedir, repository, base_commit_id, request, check_existence=True, limit_to=diff_filenames): parent_files[f.newFile] = f # This will return a non-None value only for tools that use # commit IDs to identify file versions as opposed to file revision # IDs. parent_commit_id = parent_parser.get_orig_commit_id() diffset = self.model(name=diff_file_name, revision=0, basedir=basedir, history=diffset_history, repository=repository, diffcompat=DiffCompatVersion.DEFAULT, base_commit_id=base_commit_id) if save: diffset.save() encoding_list = repository.get_encoding_list() for f in files: parent_file = None orig_rev = None parent_content = b'' if f.origFile in parent_files: parent_file = parent_files[f.origFile] parent_content = parent_file.data orig_rev = parent_file.origInfo # If there is a parent file there is not necessarily an original # revision for the parent file in the case of a renamed file in # git. if not orig_rev: if parent_commit_id and f.origInfo != PRE_CREATION: orig_rev = parent_commit_id else: orig_rev = f.origInfo enc, orig_file = convert_to_unicode(f.origFile, encoding_list) enc, dest_file = convert_to_unicode(f.newFile, encoding_list) if f.deleted: status = FileDiff.DELETED elif f.moved: status = FileDiff.MOVED elif f.copied: status = FileDiff.COPIED else: status = FileDiff.MODIFIED filediff = FileDiff( diffset=diffset, source_file=parser.normalize_diff_filename(orig_file), dest_file=parser.normalize_diff_filename(dest_file), source_revision=smart_unicode(orig_rev), dest_detail=f.newInfo, diff=f.data, parent_diff=parent_content, binary=f.binary, status=status) if (parent_file and (parent_file.moved or parent_file.copied) and parent_file.insert_count == 0 and parent_file.delete_count == 0): filediff.extra_data = {'parent_moved': True} filediff.set_line_counts(raw_insert_count=f.insert_count, raw_delete_count=f.delete_count) if save: filediff.save() return diffset
def generate_chunks(self, old, new): """Generate chunks for the difference between two strings. The strings will be normalized, ensuring they're of the proper encoding and ensuring they have consistent newlines. They're then syntax-highlighted (if requested). Once the strings are ready, chunks are built from the strings and yielded to the caller. Each chunk represents information on an equal, inserted, deleted, or replaced set of lines. The number of lines of each chunk type are stored in the :py:attr:`counts` dictionary, which can then be accessed after yielding all chunks. """ if self.encoding_list: old = convert_to_unicode(old, self.encoding_list)[1] new = convert_to_unicode(new, self.encoding_list)[1] # Normalize the input so that if there isn't a trailing newline, we add # it. if old and old[-1] != '\n': old += '\n' if new and new[-1] != '\n': new += '\n' a = self.NEWLINES_RE.split(old or '') b = self.NEWLINES_RE.split(new or '') # Remove the trailing newline, now that we've split this. This will # prevent a duplicate line number at the end of the diff. del a[-1] del b[-1] a_num_lines = len(a) b_num_lines = len(b) markup_a = markup_b = None if self._get_enable_syntax_highlighting(old, new, a, b): source_file = \ self.normalize_path_for_display(self.orig_filename) dest_file = \ self.normalize_path_for_display(self.modified_filename) try: # TODO: Try to figure out the right lexer for these files # once instead of twice. markup_a = self._apply_pygments(old or '', source_file) markup_b = self._apply_pygments(new or '', dest_file) except: pass if not markup_a: markup_a = self.NEWLINES_RE.split(escape(old)) if not markup_b: markup_b = self.NEWLINES_RE.split(escape(new)) siteconfig = SiteConfiguration.objects.get_current() ignore_space = True for pattern in siteconfig.get('diffviewer_include_space_patterns'): if fnmatch.fnmatch(self.orig_filename, pattern): ignore_space = False break self.differ = get_differ(a, b, ignore_space=ignore_space, compat_version=self.diff_compat) self.differ.add_interesting_lines_for_headers(self.orig_filename) context_num_lines = siteconfig.get("diffviewer_context_num_lines") collapse_threshold = 2 * context_num_lines + 3 line_num = 1 opcodes_generator = self.get_opcode_generator() counts = { 'equal': 0, 'replace': 0, 'insert': 0, 'delete': 0, } for tag, i1, i2, j1, j2, meta in opcodes_generator: old_lines = markup_a[i1:i2] new_lines = markup_b[j1:j2] num_lines = max(len(old_lines), len(new_lines)) lines = map(functools.partial(self._diff_line, tag, meta), range(line_num, line_num + num_lines), range(i1 + 1, i2 + 1), range(j1 + 1, j2 + 1), a[i1:i2], b[j1:j2], old_lines, new_lines) counts[tag] += num_lines if tag == 'equal' and num_lines > collapse_threshold: last_range_start = num_lines - context_num_lines if line_num == 1: yield self._new_chunk(lines, 0, last_range_start, True) yield self._new_chunk(lines, last_range_start, num_lines) else: yield self._new_chunk(lines, 0, context_num_lines) if i2 == a_num_lines and j2 == b_num_lines: yield self._new_chunk(lines, context_num_lines, num_lines, True) else: yield self._new_chunk(lines, context_num_lines, last_range_start, True) yield self._new_chunk(lines, last_range_start, num_lines) else: yield self._new_chunk(lines, 0, num_lines, False, tag, meta) line_num += num_lines self.counts = counts
def create_from_data(self, repository, diff_file_name, diff_file_contents, parent_diff_file_name, parent_diff_file_contents, diffset_history, basedir, request, base_commit_id=None, save=True): """Create a DiffSet from raw diff data. The diff_file_contents and parent_diff_file_contents parameters are strings with the actual diff contents. """ from reviewboard.diffviewer.diffutils import convert_to_unicode from reviewboard.diffviewer.models import FileDiff tool = repository.get_scmtool() encoding, diff_text = convert_to_unicode( diff_file_contents, repository.get_encoding_list()) parser = tool.get_parser(diff_text) files = list(self._process_files( parser, basedir, repository, base_commit_id, request, check_existence=(not parent_diff_file_contents))) # Parse the diff if len(files) == 0: raise EmptyDiffError(_("The diff file is empty")) # Sort the files so that header files come before implementation. files.sort(cmp=self._compare_files, key=lambda f: f.origFile) # Parse the parent diff parent_files = {} # This is used only for tools like Mercurial that use atomic changeset # IDs to identify all file versions but not individual file version # IDs. parent_commit_id = None if parent_diff_file_contents: diff_filenames = set([f.origFile for f in files]) parent_parser = tool.get_parser( convert_to_unicode(parent_diff_file_contents, [encoding])[1]) # If the user supplied a base diff, we need to parse it and # later apply each of the files that are in the main diff for f in self._process_files(parent_parser, basedir, repository, base_commit_id, request, check_existence=True, limit_to=diff_filenames): parent_files[f.origFile] = f # This will return a non-None value only for tools that use # commit IDs to identify file versions as opposed to file revision # IDs. parent_commit_id = parent_parser.get_orig_commit_id() diffset = super(DiffSetManager, self).create( name=diff_file_name, revision=0, basedir=basedir, history=diffset_history, repository=repository, diffcompat=DiffCompatVersion.DEFAULT, base_commit_id=base_commit_id) if save: diffset.save() for f in files: if f.origFile in parent_files: parent_file = parent_files[f.origFile] parent_content = parent_file.data.encode(encoding) source_rev = parent_file.origInfo else: parent_content = b"" if parent_commit_id and f.origInfo != PRE_CREATION: source_rev = parent_commit_id else: source_rev = f.origInfo dest_file = os.path.join(basedir, f.newFile).replace("\\", "/") if f.deleted: status = FileDiff.DELETED elif f.moved: status = FileDiff.MOVED elif f.copied: status = FileDiff.COPIED else: status = FileDiff.MODIFIED filediff = FileDiff( diffset=diffset, source_file=parser.normalize_diff_filename(f.origFile), dest_file=parser.normalize_diff_filename(dest_file), source_revision=smart_unicode(source_rev), dest_detail=f.newInfo, diff=f.data.encode(encoding), parent_diff=parent_content, binary=f.binary, status=status) filediff.set_line_counts(raw_insert_count=f.insert_count, raw_delete_count=f.delete_count) if save: filediff.save() return diffset
def _get_chunks_uncached(self): """Returns the list of chunks, bypassing the cache.""" encoding_list = self.diffset.repository.get_encoding_list() old = get_original_file(self.filediff, self.request, encoding_list) new = get_patched_file(old, self.filediff, self.request) if self.interfilediff: old = new interdiff_orig = get_original_file(self.interfilediff, self.request, encoding_list) new = get_patched_file(interdiff_orig, self.interfilediff, self.request) elif self.force_interdiff: # Basically, revert the change. old, new = new, old old = convert_to_unicode(old, encoding_list)[1] new = convert_to_unicode(new, encoding_list)[1] # Normalize the input so that if there isn't a trailing newline, we add # it. if old and old[-1] != '\n': old += '\n' if new and new[-1] != '\n': new += '\n' a = self.NEWLINES_RE.split(old or '') b = self.NEWLINES_RE.split(new or '') # Remove the trailing newline, now that we've split this. This will # prevent a duplicate line number at the end of the diff. del a[-1] del b[-1] a_num_lines = len(a) b_num_lines = len(b) markup_a = markup_b = None if self._get_enable_syntax_highlighting(old, new, a, b): repository = self.filediff.diffset.repository tool = repository.get_scmtool() source_file = \ tool.normalize_path_for_display(self.filediff.source_file) dest_file = \ tool.normalize_path_for_display(self.filediff.dest_file) try: # TODO: Try to figure out the right lexer for these files # once instead of twice. markup_a = self._apply_pygments(old or '', source_file) markup_b = self._apply_pygments(new or '', dest_file) except: pass if not markup_a: markup_a = self.NEWLINES_RE.split(escape(old)) if not markup_b: markup_b = self.NEWLINES_RE.split(escape(new)) siteconfig = SiteConfiguration.objects.get_current() ignore_space = True for pattern in siteconfig.get('diffviewer_include_space_patterns'): if fnmatch.fnmatch(self.filename, pattern): ignore_space = False break self.differ = get_differ(a, b, ignore_space=ignore_space, compat_version=self.diffset.diffcompat) self.differ.add_interesting_lines_for_headers(self.filename) context_num_lines = siteconfig.get("diffviewer_context_num_lines") collapse_threshold = 2 * context_num_lines + 3 if self.interfilediff: log_timer = log_timed( "Generating diff chunks for interdiff ids %s-%s (%s)" % (self.filediff.id, self.interfilediff.id, self.filediff.source_file), request=self.request) else: log_timer = log_timed( "Generating diff chunks for self.filediff id %s (%s)" % (self.filediff.id, self.filediff.source_file), request=self.request) line_num = 1 opcodes_generator = get_diff_opcode_generator(self.differ, self.filediff, self.interfilediff) counts = { 'equal': 0, 'replace': 0, 'insert': 0, 'delete': 0, } for tag, i1, i2, j1, j2, meta in opcodes_generator: old_lines = markup_a[i1:i2] new_lines = markup_b[j1:j2] num_lines = max(len(old_lines), len(new_lines)) lines = map(functools.partial(self._diff_line, tag, meta), range(line_num, line_num + num_lines), range(i1 + 1, i2 + 1), range(j1 + 1, j2 + 1), a[i1:i2], b[j1:j2], old_lines, new_lines) counts[tag] += num_lines if tag == 'equal' and num_lines > collapse_threshold: last_range_start = num_lines - context_num_lines if line_num == 1: yield self._new_chunk(lines, 0, last_range_start, True) yield self._new_chunk(lines, last_range_start, num_lines) else: yield self._new_chunk(lines, 0, context_num_lines) if i2 == a_num_lines and j2 == b_num_lines: yield self._new_chunk(lines, context_num_lines, num_lines, True) else: yield self._new_chunk(lines, context_num_lines, last_range_start, True) yield self._new_chunk(lines, last_range_start, num_lines) else: yield self._new_chunk(lines, 0, num_lines, False, tag, meta) line_num += num_lines log_timer.done() if not self.interfilediff: insert_count = counts['insert'] delete_count = counts['delete'] replace_count = counts['replace'] equal_count = counts['equal'] self.filediff.set_line_counts( insert_count=insert_count, delete_count=delete_count, replace_count=replace_count, equal_count=equal_count, total_line_count=(insert_count + delete_count + replace_count + equal_count))
def create_filediffs(diff_file_contents, parent_diff_file_contents, repository, basedir, base_commit_id, diffset, request=None, check_existence=True, get_file_exists=None, diffcommit=None, validate_only=False): """Create FileDiffs from the given data. Args: diff_file_contents (bytes): The contents of the diff file. parent_diff_file_contents (bytes): The contents of the parent diff file. repository (reviewboard.scmtools.models.Repository): The repository the diff is being posted against. basedir (unicode): The base directory to prepend to all file paths in the diff. base_commit_id (unicode): The ID of the commit that the diff is based upon. This is needed by some SCMs or hosting services to properly look up files, if the diffs represent blob IDs instead of commit IDs and the service doesn't support those lookups. diffset (reviewboard.diffviewer.models.diffset.DiffSet): The DiffSet to attach the created FileDiffs to. request (django.http.HttpRequest, optional): The current HTTP request. check_existence (bool, optional): Whether or not existence checks should be performed against the upstream repository. This argument defaults to ``True``. get_file_exists (callable, optional): A callable that is used to determine if a file exists. This must be provided if ``check_existence`` is ``True``. diffcommit (reviewboard.diffviewer.models.diffcommit.DiffCommit, optional): The Diffcommit to attach the created FileDiffs to. validate_only (bool, optional): Whether to just validate and not save. If ``True``, then this won't populate the database at all and will return ``None`` upon success. This defaults to ``False``. Returns: list of reviewboard.diffviewer.models.filediff.FileDiff: The created FileDiffs. If ``validate_only`` is ``True``, the returned list will be empty. """ from reviewboard.diffviewer.diffutils import convert_to_unicode from reviewboard.diffviewer.models import FileDiff files, parser, parent_commit_id, parent_files = _prepare_file_list( diff_file_contents=diff_file_contents, parent_diff_file_contents=parent_diff_file_contents, repository=repository, request=request, basedir=basedir, check_existence=check_existence, get_file_exists=get_file_exists, base_commit_id=base_commit_id) encoding_list = repository.get_encoding_list() filediffs = [] for f in files: parent_file = None orig_rev = None parent_content = b'' if f.origFile in parent_files: parent_file = parent_files[f.origFile] parent_content = parent_file.data orig_rev = parent_file.origInfo # If there is a parent file there is not necessarily an original # revision for the parent file in the case of a renamed file in # git. if not orig_rev: if parent_commit_id and f.origInfo != PRE_CREATION: orig_rev = parent_commit_id else: orig_rev = f.origInfo orig_file = convert_to_unicode(f.origFile, encoding_list)[1] dest_file = convert_to_unicode(f.newFile, encoding_list)[1] if f.deleted: status = FileDiff.DELETED elif f.moved: status = FileDiff.MOVED elif f.copied: status = FileDiff.COPIED else: status = FileDiff.MODIFIED filediff = FileDiff( diffset=diffset, commit=diffcommit, source_file=parser.normalize_diff_filename(orig_file), dest_file=parser.normalize_diff_filename(dest_file), source_revision=smart_unicode(orig_rev), dest_detail=f.newInfo, binary=f.binary, status=status) filediff.extra_data = { 'is_symlink': f.is_symlink, } if parent_file: if (parent_file.insert_count == 0 and parent_file.delete_count == 0): filediff.extra_data[FileDiff._IS_PARENT_EMPTY_KEY] = True if parent_file.moved or parent_file.copied: filediff.extra_data['parent_moved'] = True else: filediff.extra_data[FileDiff._IS_PARENT_EMPTY_KEY] = False if not validate_only: # This state all requires making modifications to the database. # We only want to do this if we're saving. filediff.diff = f.data filediff.parent_diff = parent_content filediff.set_line_counts(raw_insert_count=f.insert_count, raw_delete_count=f.delete_count) filediffs.append(filediff) if not validate_only: FileDiff.objects.bulk_create(filediffs) num_filediffs = len(filediffs) if diffset.file_count is None: diffset.reinit_file_count() else: diffset.file_count += num_filediffs diffset.save(update_fields=('file_count',)) if diffcommit is not None: diffcommit.file_count = len(filediffs) diffcommit.save(update_fields=('file_count',)) return filediffs
def create_from_data(self, repository, diff_file_name, diff_file_contents, parent_diff_file_name, parent_diff_file_contents, diffset_history, basedir, request, base_commit_id=None, save=True): """Create a DiffSet from raw diff data. The diff_file_contents and parent_diff_file_contents parameters are strings with the actual diff contents. """ from reviewboard.diffviewer.diffutils import convert_to_unicode from reviewboard.diffviewer.models import FileDiff tool = repository.get_scmtool() encoding, diff_text = convert_to_unicode( diff_file_contents, repository.get_encoding_list()) parser = tool.get_parser(diff_text) files = list( self._process_files( parser, basedir, repository, base_commit_id, request, check_existence=(not parent_diff_file_contents))) # Parse the diff if len(files) == 0: raise EmptyDiffError(_("The diff file is empty")) # Sort the files so that header files come before implementation. files.sort(cmp=self._compare_files, key=lambda f: f.origFile) # Parse the parent diff parent_files = {} # This is used only for tools like Mercurial that use atomic changeset # IDs to identify all file versions but not individual file version # IDs. parent_commit_id = None if parent_diff_file_contents: diff_filenames = set([f.origFile for f in files]) parent_parser = tool.get_parser( convert_to_unicode(parent_diff_file_contents, [encoding])[1]) # If the user supplied a base diff, we need to parse it and # later apply each of the files that are in the main diff for f in self._process_files(parent_parser, basedir, repository, base_commit_id, request, check_existence=True, limit_to=diff_filenames): parent_files[f.origFile] = f # This will return a non-None value only for tools that use # commit IDs to identify file versions as opposed to file revision # IDs. parent_commit_id = parent_parser.get_orig_commit_id() diffset = super(DiffSetManager, self).create(name=diff_file_name, revision=0, basedir=basedir, history=diffset_history, repository=repository, diffcompat=DiffCompatVersion.DEFAULT, base_commit_id=base_commit_id) if save: diffset.save() for f in files: if f.origFile in parent_files: parent_file = parent_files[f.origFile] parent_content = parent_file.data.encode(encoding) source_rev = parent_file.origInfo else: parent_content = b"" if parent_commit_id and f.origInfo != PRE_CREATION: source_rev = parent_commit_id else: source_rev = f.origInfo dest_file = os.path.join(basedir, f.newFile).replace("\\", "/") if f.deleted: status = FileDiff.DELETED elif f.moved: status = FileDiff.MOVED elif f.copied: status = FileDiff.COPIED else: status = FileDiff.MODIFIED filediff = FileDiff(diffset=diffset, source_file=f.origFile, dest_file=dest_file, source_revision=smart_unicode(source_rev), dest_detail=f.newInfo, diff=f.data.encode(encoding), parent_diff=parent_content, binary=f.binary, status=status) filediff.set_line_counts(raw_insert_count=f.insert_count, raw_delete_count=f.delete_count) if save: filediff.save() return diffset
def create_filediffs(diff_file_contents, parent_diff_file_contents, repository, basedir, base_commit_id, diffset, request=None, check_existence=True, get_file_exists=None, diffcommit=None, validate_only=False): """Create FileDiffs from the given data. Args: diff_file_contents (bytes): The contents of the diff file. parent_diff_file_contents (bytes): The contents of the parent diff file. repository (reviewboard.scmtools.models.Repository): The repository the diff is being posted against. basedir (unicode): The base directory to prepend to all file paths in the diff. base_commit_id (unicode): The ID of the commit that the diff is based upon. This is needed by some SCMs or hosting services to properly look up files, if the diffs represent blob IDs instead of commit IDs and the service doesn't support those lookups. diffset (reviewboard.diffviewer.models.diffset.DiffSet): The DiffSet to attach the created FileDiffs to. request (django.http.HttpRequest, optional): The current HTTP request. check_existence (bool, optional): Whether or not existence checks should be performed against the upstream repository. This argument defaults to ``True``. get_file_exists (callable, optional): A callable that is used to determine if a file exists. This must be provided if ``check_existence`` is ``True``. diffcommit (reviewboard.diffviewer.models.diffcommit.DiffCommit, optional): The DiffCommit to attach the created FileDiffs to. validate_only (bool, optional): Whether to just validate and not save. If ``True``, then this won't populate the database at all and will return ``None`` upon success. This defaults to ``False``. Returns: list of reviewboard.diffviewer.models.filediff.FileDiff: The created FileDiffs. If ``validate_only`` is ``True``, the returned list will be empty. """ from reviewboard.diffviewer.diffutils import convert_to_unicode from reviewboard.diffviewer.models import FileDiff diff_info = _prepare_diff_info( diff_file_contents=diff_file_contents, parent_diff_file_contents=parent_diff_file_contents, repository=repository, request=request, basedir=basedir, check_existence=check_existence, get_file_exists=get_file_exists, base_commit_id=base_commit_id) parent_files = diff_info['parent_files'] parsed_diff = diff_info['parsed_diff'] parsed_parent_diff = diff_info['parsed_parent_diff'] parser = diff_info['parser'] encoding_list = repository.get_encoding_list() # Copy over any extra_data for the DiffSet and DiffCommit, if any were # set by the parser. # # We'll do this even if we're validating, to ensure the data can be # copied over fine. main_extra_data = deepcopy(parsed_diff.extra_data) change_extra_data = deepcopy(parsed_diff.changes[0].extra_data) if change_extra_data: if diffcommit is not None: # We've already checked in _parse_diff that there's only a single # change in the diff, so we can assume that here. diffcommit.extra_data.update(change_extra_data) else: main_extra_data['change_extra_data'] = change_extra_data if main_extra_data: diffset.extra_data.update(main_extra_data) if parsed_parent_diff is not None: parent_extra_data = deepcopy(parsed_parent_diff.extra_data) parent_change_extra_data = deepcopy( parsed_parent_diff.changes[0].extra_data) if parent_change_extra_data: if diffcommit is not None: diffcommit.extra_data['parent_extra_data'] = \ parent_change_extra_data else: parent_extra_data['change_extra_data'] = \ parent_change_extra_data if parent_extra_data: diffset.extra_data['parent_extra_data'] = parent_extra_data # Convert the list of parsed files into FileDiffs. filediffs = [] for f in diff_info['files']: parent_file = None parent_content = b'' extra_data = f.extra_data.copy() if parsed_parent_diff is not None: parent_file = parent_files.get(f.orig_filename) if parent_file is not None: parent_content = parent_file.data # Store the information on the parent's filename and revision. # It's important we force these to text, since they may be # byte strings and the revision may be a Revision instance. parent_source_filename = parent_file.orig_filename parent_source_revision = parent_file.orig_file_details parent_is_empty = (parent_file.insert_count == 0 and parent_file.delete_count == 0) if parent_file.moved or parent_file.copied: extra_data['parent_moved'] = True if parent_file.extra_data: extra_data['parent_extra_data'] = \ parent_file.extra_data.copy() else: # We don't have an entry, but we still want to record the # parent ID, so we have something in common for all the files # when looking up the source revision to fetch from the # repository. parent_is_empty = True parent_source_filename = f.orig_filename parent_source_revision = f.orig_file_details if (parent_source_revision != PRE_CREATION and parsed_diff.uses_commit_ids_as_revisions): # Since the file wasn't explicitly provided in the parent # diff, but the ParsedDiff says that commit IDs are used # as revisions, we can use its parent commit ID as the # parent revision here. parent_commit_id = \ parsed_parent_diff.changes[0].parent_commit_id assert parent_commit_id parent_source_revision = parent_commit_id # Store the information on the parent's filename and revision. # It's important we force these to text, since they may be # byte strings and the revision may be a Revision instance. # # Starting in Review Board 4.0.5, we store this any time there's # a parent diff, whether or not the file existed in the parent # diff. extra_data.update({ FileDiff._IS_PARENT_EMPTY_KEY: parent_is_empty, 'parent_source_filename': convert_to_unicode(parent_source_filename, encoding_list)[1], 'parent_source_revision': convert_to_unicode(parent_source_revision, encoding_list)[1], }) orig_file = convert_to_unicode(f.orig_filename, encoding_list)[1] dest_file = convert_to_unicode(f.modified_filename, encoding_list)[1] if f.deleted: status = FileDiff.DELETED elif f.moved: status = FileDiff.MOVED elif f.copied: status = FileDiff.COPIED else: status = FileDiff.MODIFIED filediff = FileDiff( diffset=diffset, commit=diffcommit, source_file=parser.normalize_diff_filename(orig_file), dest_file=parser.normalize_diff_filename(dest_file), source_revision=force_text(f.orig_file_details), dest_detail=force_text(f.modified_file_details), binary=f.binary, status=status, extra_data=extra_data) # Set this unconditionally, for backwards-compatibility purposes. # Review Board 4.0.6 introduced attribute wrappers in FileDiff and # introduced symlink targets. We ideally would not set this unless # it's True, but we don't want to risk breaking any assumptions on # its presence at this time. filediff.is_symlink = f.is_symlink if f.is_symlink: if f.old_symlink_target: filediff.old_symlink_target = \ convert_to_unicode(f.old_symlink_target, encoding_list)[1] if f.new_symlink_target: filediff.new_symlink_target = \ convert_to_unicode(f.new_symlink_target, encoding_list)[1] filediff.old_unix_mode = f.old_unix_mode filediff.new_unix_mode = f.new_unix_mode if not validate_only: # This state all requires making modifications to the database. # We only want to do this if we're saving. filediff.diff = f.data filediff.parent_diff = parent_content filediff.set_line_counts(raw_insert_count=f.insert_count, raw_delete_count=f.delete_count) filediffs.append(filediff) if not validate_only: FileDiff.objects.bulk_create(filediffs) if diffset.extra_data: diffset.save(update_fields=('extra_data', )) if diffcommit is not None and diffcommit.extra_data: diffcommit.save(update_fields=('extra_data', )) return filediffs