Пример #1
0
    def get_chunks_uncached(self):
        """Yield the list of chunks, bypassing the cache."""
        old = get_original_file(self.filediff, self.request, self.encoding_list)
        new = get_patched_file(old, self.filediff, self.request)

        if self.filediff.orig_sha1 is None:
            self.filediff.extra_data.update(
                {"orig_sha1": self._get_checksum(old), "patched_sha1": self._get_checksum(new)}
            )
            self.filediff.save(update_fields=["extra_data"])

        if self.interfilediff:
            old = new
            interdiff_orig = get_original_file(self.interfilediff, self.request, self.encoding_list)
            new = get_patched_file(interdiff_orig, self.interfilediff, self.request)

            if self.interfilediff.orig_sha1 is None:
                self.interfilediff.extra_data.update(
                    {"orig_sha1": self._get_checksum(interdiff_orig), "patched_sha1": self._get_checksum(new)}
                )
                self.interfilediff.save(update_fields=["extra_data"])
        elif self.force_interdiff:
            # Basically, revert the change.
            old, new = new, old

        if self.interfilediff:
            log_timer = log_timed(
                "Generating diff chunks for interdiff ids %s-%s (%s)"
                % (self.filediff.id, self.interfilediff.id, self.filediff.source_file),
                request=self.request,
            )
        else:
            log_timer = log_timed(
                "Generating diff chunks for self.filediff id %s (%s)" % (self.filediff.id, self.filediff.source_file),
                request=self.request,
            )

        for chunk in self.generate_chunks(old, new):
            yield chunk

        log_timer.done()

        if not self.interfilediff and not self.force_interdiff:
            insert_count = self.counts["insert"]
            delete_count = self.counts["delete"]
            replace_count = self.counts["replace"]
            equal_count = self.counts["equal"]

            self.filediff.set_line_counts(
                insert_count=insert_count,
                delete_count=delete_count,
                replace_count=replace_count,
                equal_count=equal_count,
                total_line_count=(insert_count + delete_count + replace_count + equal_count),
            )
Пример #2
0
 def fetch_file(file, revision):
     log_timer = log_timed("Fetching file '%s' r%s from %s" %
                           (file, revision, repository))
     data = tool.get_file(file, revision)
     data = convert_line_endings(data)
     log_timer.done()
     return data
Пример #3
0
    def _get_file_uncached(self, path, revision, request):
        """Internal function for fetching an uncached file.

        This is called by get_file if the file isn't already in the cache.
        """
        fetching_file.send(sender=self,
                           path=path,
                           revision=revision,
                           request=request)

        log_timer = log_timed("Fetching file '%s' r%s from %s" %
                              (path, revision, self),
                              request=request)

        hosting_service = self.hosting_service

        if hosting_service:
            data = hosting_service.get_file(self, path, revision)
        else:
            data = self.get_scmtool().get_file(path, revision)

        log_timer.done()

        fetched_file.send(sender=self,
                          path=path,
                          revision=revision,
                          request=request,
                          data=data)

        return data
Пример #4
0
def patch(diff, file, filename, request=None):
    """Apply a diff to a file.  Delegates out to `patch` because noone
       except Larry Wall knows how to patch."""

    log_timer = log_timed("Patching file %s" % filename,
                          request=request)

    if diff.strip() == "":
        # Someone uploaded an unchanged file. Return the one we're patching.
        return file

    # Prepare the temporary directory if none is available
    tempdir = tempfile.mkdtemp(prefix='reviewboard.')

    (fd, oldfile) = tempfile.mkstemp(dir=tempdir)
    f = os.fdopen(fd, "w+b")
    f.write(convert_line_endings(file))
    f.close()

    diff = convert_line_endings(diff)

    newfile = '%s-new' % oldfile

    process = subprocess.Popen(['patch', '-o', newfile, oldfile],
                               stdin=subprocess.PIPE, stdout=subprocess.PIPE,
                               stderr=subprocess.STDOUT)

    with controlled_subprocess("patch", process) as p:
        p.stdin.write(diff)
        p.stdin.close()
        patch_output = p.stdout.read()
        failure = p.wait()

    if failure:
        f = open("%s.diff" %
                 (os.path.join(tempdir, os.path.basename(filename))), "w")
        f.write(diff)
        f.close()

        log_timer.done()

        # FIXME: This doesn't provide any useful error report on why the patch
        # failed to apply, which makes it hard to debug.  We might also want to
        # have it clean up if DEBUG=False
        raise Exception(_("The patch to '%s' didn't apply cleanly. The temporary " +
                          "files have been left in '%s' for debugging purposes.\n" +
                          "`patch` returned: %s") %
                        (filename, tempdir, patch_output))

    f = open(newfile, "r")
    data = f.read()
    f.close()

    os.unlink(oldfile)
    os.unlink(newfile)
    os.rmdir(tempdir)

    log_timer.done()

    return data
Пример #5
0
    def process_request(self, request):
        """
        Processes an incoming request. This will set up logging.
        """
        if getattr(settings, 'LOGGING_PAGE_TIMES', False):
            request._page_timedloginfo = \
                log_timed('Page request: HTTP %s %s (by %s)' %
                          (request.method, request.path, request.user))

        if ('profiling' in request.GET and
            getattr(settings, "LOGGING_ALLOW_PROFILING", False)):
            settings.DEBUG = True
Пример #6
0
    def _get_file_uncached(self, path, revision, base_commit_id, request):
        """Internal function for fetching an uncached file.

        This is called by get_file if the file isn't already in the cache.
        """
        fetching_file.send(sender=self,
                           path=path,
                           revision=revision,
                           base_commit_id=base_commit_id,
                           request=request)

        if base_commit_id:
            timer_msg = "Fetching file '%s' r%s (base commit ID %s) from %s" \
                        % (path, revision, base_commit_id, self)
        else:
            timer_msg = "Fetching file '%s' r%s from %s" \
                        % (path, revision, self)

        log_timer = log_timed(timer_msg, request=request)

        hosting_service = self.hosting_service

        if hosting_service:
            data = hosting_service.get_file(
                self,
                path,
                revision,
                base_commit_id=base_commit_id)
        else:
            tool = self.get_scmtool()
            argspec = inspect.getargspec(tool.get_file)

            if argspec.keywords is None:
                warnings.warn('SCMTool.get_file() must take keyword '
                              'arguments, signature for %s is deprecated.'
                              % tool.name,
                              RemovedInReviewBoard40Warning)
                data = tool.get_file(path, revision)
            else:
                data = tool.get_file(path, revision,
                                     base_commit_id=base_commit_id)

        log_timer.done()

        fetched_file.send(sender=self,
                          path=path,
                          revision=revision,
                          base_commit_id=base_commit_id,
                          request=request,
                          data=data)

        return data
Пример #7
0
    def apply_pygments(data, filename):
        # Sometimes pygments spins forever trying to highlight something, give
        # it a couple seconds then give up.

        def timeout_handler(signum, frame):
            raise TimeoutException()

        result = None
        signal.signal(signal.SIGALRM, timeout_handler)
        signal.alarm(2)

        try:
            # XXX Guessing is preferable but really slow, especially on XML
            #     files.

            log_timer = log_timed("Lexer guessing for '%s'" % filename)

            if filename.endswith(".xml"):
                lexer = get_lexer_for_filename(filename, stripnl=False, encoding='utf-8')
            else:
                lexer = guess_lexer_for_filename(filename, data, stripnl=False)

            log_timer.done()

            try:
                # This is only available in 0.7 and higher
                lexer.add_filter('codetagify')
            except AttributeError:
                pass

            log_timer = log_timed("Syntax highlighting file '%s' with lexer %s" % (filename, lexer.name))
            result = pygments.highlight(data, lexer, NoWrapperHtmlFormatter()).splitlines()
            log_timer.done()

            signal.alarm(0)
        except TimeoutException:
            logging.warn("Timed out trying to highlight data for file '%s' with lexer %s" % (filename, lexer.name))

        return result
Пример #8
0
    def _get_file_uncached(self, path, revision, base_commit_id, request):
        """Internal function for fetching an uncached file.

        This is called by get_file if the file isn't already in the cache.
        """
        fetching_file.send(sender=self,
                           path=path,
                           revision=revision,
                           base_commit_id=base_commit_id,
                           request=request)

        if base_commit_id:
            timer_msg = "Fetching file '%s' r%s (base commit ID %s) from %s" \
                        % (path, revision, base_commit_id, self)
        else:
            timer_msg = "Fetching file '%s' r%s from %s" \
                        % (path, revision, self)

        log_timer = log_timed(timer_msg, request=request)

        hosting_service = self.hosting_service

        if hosting_service:
            data = hosting_service.get_file(
                self,
                path,
                revision,
                base_commit_id=base_commit_id)
        else:
            try:
                data = self.get_scmtool().get_file(path, revision)
            except FileNotFoundError:
                if base_commit_id:
                    # Some funky workflows with mq (mercurial) can cause issues
                    # with parent diffs. If we didn't find it with the parsed
                    # revision, and there's a base commit ID, try that.
                    data = self.get_scmtool().get_file(path, base_commit_id)
                else:
                    raise

        log_timer.done()

        fetched_file.send(sender=self,
                          path=path,
                          revision=revision,
                          base_commit_id=base_commit_id,
                          request=request,
                          data=data)

        return data
Пример #9
0
    def _get_file_uncached(self, path, revision, base_commit_id, request):
        """Internal function for fetching an uncached file.

        This is called by get_file if the file isn't already in the cache.
        """
        fetching_file.send(sender=self,
                           path=path,
                           revision=revision,
                           base_commit_id=base_commit_id,
                           request=request)

        if base_commit_id:
            timer_msg = "Fetching file '%s' r%s (base commit ID %s) from %s" \
                        % (path, revision, base_commit_id, self)
        else:
            timer_msg = "Fetching file '%s' r%s from %s" \
                        % (path, revision, self)

        log_timer = log_timed(timer_msg, request=request)

        hosting_service = self.hosting_service

        if hosting_service:
            data = hosting_service.get_file(
                self,
                path,
                revision,
                base_commit_id=base_commit_id)
        else:
            data = self.get_scmtool().get_file(path, revision)

        log_timer.done()

        fetched_file.send(sender=self,
                          path=path,
                          revision=revision,
                          base_commit_id=base_commit_id,
                          request=request,
                          data=data)

        return data
Пример #10
0
    def _get_chunks_uncached(self):
        """Returns the list of chunks, bypassing the cache."""
        encoding_list = self.diffset.repository.get_encoding_list()

        old = get_original_file(self.filediff, self.request, encoding_list)
        new = get_patched_file(old, self.filediff, self.request)

        if self.interfilediff:
            old = new
            interdiff_orig = get_original_file(self.interfilediff,
                                               self.request, encoding_list)
            new = get_patched_file(interdiff_orig, self.interfilediff,
                                   self.request)
        elif self.force_interdiff:
            # Basically, revert the change.
            old, new = new, old

        old = convert_to_unicode(old, encoding_list)[1]
        new = convert_to_unicode(new, encoding_list)[1]

        # Normalize the input so that if there isn't a trailing newline, we add
        # it.
        if old and old[-1] != '\n':
            old += '\n'

        if new and new[-1] != '\n':
            new += '\n'

        a = self.NEWLINES_RE.split(old or '')
        b = self.NEWLINES_RE.split(new or '')

        # Remove the trailing newline, now that we've split this. This will
        # prevent a duplicate line number at the end of the diff.
        del a[-1]
        del b[-1]

        a_num_lines = len(a)
        b_num_lines = len(b)

        markup_a = markup_b = None

        if self._get_enable_syntax_highlighting(old, new, a, b):
            repository = self.filediff.diffset.repository
            tool = repository.get_scmtool()
            source_file = \
                tool.normalize_path_for_display(self.filediff.source_file)
            dest_file = \
                tool.normalize_path_for_display(self.filediff.dest_file)

            try:
                # TODO: Try to figure out the right lexer for these files
                #       once instead of twice.
                markup_a = self._apply_pygments(old or '', source_file)
                markup_b = self._apply_pygments(new or '', dest_file)
            except:
                pass

        if not markup_a:
            markup_a = self.NEWLINES_RE.split(escape(old))

        if not markup_b:
            markup_b = self.NEWLINES_RE.split(escape(new))

        siteconfig = SiteConfiguration.objects.get_current()
        ignore_space = True

        for pattern in siteconfig.get('diffviewer_include_space_patterns'):
            if fnmatch.fnmatch(self.filename, pattern):
                ignore_space = False
                break

        self.differ = get_differ(a,
                                 b,
                                 ignore_space=ignore_space,
                                 compat_version=self.diffset.diffcompat)
        self.differ.add_interesting_lines_for_headers(self.filename)

        context_num_lines = siteconfig.get("diffviewer_context_num_lines")
        collapse_threshold = 2 * context_num_lines + 3

        if self.interfilediff:
            log_timer = log_timed(
                "Generating diff chunks for interdiff ids %s-%s (%s)" %
                (self.filediff.id, self.interfilediff.id,
                 self.filediff.source_file),
                request=self.request)
        else:
            log_timer = log_timed(
                "Generating diff chunks for self.filediff id %s (%s)" %
                (self.filediff.id, self.filediff.source_file),
                request=self.request)

        line_num = 1
        opcodes_generator = get_diff_opcode_generator(self.differ,
                                                      self.filediff,
                                                      self.interfilediff)

        counts = {
            'equal': 0,
            'replace': 0,
            'insert': 0,
            'delete': 0,
        }

        for tag, i1, i2, j1, j2, meta in opcodes_generator:
            old_lines = markup_a[i1:i2]
            new_lines = markup_b[j1:j2]
            num_lines = max(len(old_lines), len(new_lines))

            lines = map(functools.partial(self._diff_line, tag, meta),
                        range(line_num, line_num + num_lines),
                        range(i1 + 1, i2 + 1), range(j1 + 1, j2 + 1), a[i1:i2],
                        b[j1:j2], old_lines, new_lines)

            counts[tag] += num_lines

            if tag == 'equal' and num_lines > collapse_threshold:
                last_range_start = num_lines - context_num_lines

                if line_num == 1:
                    yield self._new_chunk(lines, 0, last_range_start, True)
                    yield self._new_chunk(lines, last_range_start, num_lines)
                else:
                    yield self._new_chunk(lines, 0, context_num_lines)

                    if i2 == a_num_lines and j2 == b_num_lines:
                        yield self._new_chunk(lines, context_num_lines,
                                              num_lines, True)
                    else:
                        yield self._new_chunk(lines, context_num_lines,
                                              last_range_start, True)
                        yield self._new_chunk(lines, last_range_start,
                                              num_lines)
            else:
                yield self._new_chunk(lines, 0, num_lines, False, tag, meta)

            line_num += num_lines

        log_timer.done()

        if not self.interfilediff:
            insert_count = counts['insert']
            delete_count = counts['delete']
            replace_count = counts['replace']
            equal_count = counts['equal']

            self.filediff.set_line_counts(
                insert_count=insert_count,
                delete_count=delete_count,
                replace_count=replace_count,
                equal_count=equal_count,
                total_line_count=(insert_count + delete_count + replace_count +
                                  equal_count))
Пример #11
0
def get_diff_files(diffset, filediff=None, interdiffset=None):
    """Generates a list of files that will be displayed in a diff.

    This will go through the given diffset/interdiffset, or a given filediff
    within that diffset, and generate the list of files that will be
    displayed. This file list will contain a bunch of metadata on the files,
    such as the index, original/modified names, revisions, associated
    filediffs/diffsets, and so on.

    This can be used along with populate_diff_chunks to build a full list
    containing all diff chunks used for rendering a side-by-side diff.
    """
    if filediff:
        filediffs = [filediff]

        if interdiffset:
            log_timer = log_timed("Generating diff file info for "
                                  "interdiffset ids %s-%s, filediff %s" %
                                  (diffset.id, interdiffset.id, filediff.id))
        else:
            log_timer = log_timed("Generating diff file info for "
                                  "diffset id %s, filediff %s" %
                                  (diffset.id, filediff.id))
    else:
        filediffs = diffset.files.select_related().all()

        if interdiffset:
            log_timer = log_timed("Generating diff file info for "
                                  "interdiffset ids %s-%s" %
                                  (diffset.id, interdiffset.id))
        else:
            log_timer = log_timed("Generating diff file info for "
                                  "diffset id %s" % diffset.id)


    # A map used to quickly look up the equivalent interfilediff given a
    # source file.
    interdiff_map = {}
    if interdiffset:
        for interfilediff in interdiffset.files.all():
            if not filediff or \
               filediff.source_file == interfilediff.source_file:
                interdiff_map[interfilediff.source_file] = interfilediff


    # In order to support interdiffs properly, we need to display diffs
    # on every file in the union of both diffsets. Iterating over one diffset
    # or the other doesn't suffice.
    #
    # We build a list of parts containing the source filediff, the interdiff
    # filediff (if specified), and whether to force showing an interdiff
    # (in the case where a file existed in the source filediff but was
    # reverted in the interdiff).
    filediff_parts = []

    for filediff in filediffs:
        interfilediff = None

        if filediff.source_file in interdiff_map:
            interfilediff = interdiff_map[filediff.source_file]
            del(interdiff_map[filediff.source_file])

        filediff_parts.append((filediff, interfilediff, interdiffset != None))


    if interdiffset:
        # We've removed everything in the map that we've already found.
        # What's left are interdiff files that are new. They have no file
        # to diff against.
        #
        # The end result is going to be a view that's the same as when you're
        # viewing a standard diff. As such, we can pretend the interdiff is
        # the source filediff and not specify an interdiff. Keeps things
        # simple, code-wise, since we really have no need to special-case
        # this.
        filediff_parts += [(interdiff, None, False)
                           for interdiff in interdiff_map.values()]


    files = []

    for parts in filediff_parts:
        filediff, interfilediff, force_interdiff = parts

        newfile = (filediff.source_revision == PRE_CREATION)

        if interdiffset:
            # First, find out if we want to even process this one.
            # We only process if there's a difference in files.

            if (filediff and interfilediff and
                filediff.diff == interfilediff.diff):
                continue

            source_revision = "Diff Revision %s" % diffset.revision

            if not interfilediff and force_interdiff:
                dest_revision = "Diff Revision %s - File Reverted" % \
                                interdiffset.revision
            else:
                dest_revision = "Diff Revision %s" % interdiffset.revision
        else:
            source_revision = get_revision_str(filediff.source_revision)

            if newfile:
                dest_revision = NEW_FILE_STR
            else:
                dest_revision = NEW_CHANGE_STR

        i = filediff.source_file.rfind('/')

        if i != -1:
            basepath = filediff.source_file[:i]
            basename = filediff.source_file[i + 1:]
        else:
            basepath = ""
            basename = filediff.source_file

        tool = filediff.diffset.repository.get_scmtool()
        depot_filename = tool.normalize_path_for_display(filediff.source_file)
        dest_filename = tool.normalize_path_for_display(filediff.dest_file)

        files.append({
            'depot_filename': depot_filename,
            'dest_filename': dest_filename or depot_filename,
            'basename': basename,
            'basepath': basepath,
            'revision': source_revision,
            'dest_revision': dest_revision,
            'filediff': filediff,
            'interfilediff': interfilediff,
            'force_interdiff': force_interdiff,
            'binary': filediff.binary,
            'deleted': filediff.deleted,
            'moved': filediff.moved,
            'newfile': newfile,
            'index': len(files),
            'chunks_loaded': False,
        })

    def cmp_file(x, y):
        # Sort based on basepath in asc order
        if x["basepath"] != y["basepath"]:
            return cmp(x["basepath"], y["basepath"])

        # Sort based on filename in asc order, then based on extension in desc
        # order, to make *.h be ahead of *.c/cpp
        x_file, x_ext = os.path.splitext(x["basename"])
        y_file, y_ext = os.path.splitext(y["basename"])
        if x_file != y_file:
            return cmp(x_file, y_file)
        else:
            return cmp(y_ext, x_ext)

    files.sort(cmp_file)

    log_timer.done()

    return files
Пример #12
0
def patch(diff, file, filename, request=None):
    """Apply a diff to a file.  Delegates out to `patch` because noone
       except Larry Wall knows how to patch."""

    log_timer = log_timed("Patching file %s" % filename, request=request)

    if diff.strip() == "":
        # Someone uploaded an unchanged file. Return the one we're patching.
        return file

    # Prepare the temporary directory if none is available
    tempdir = tempfile.mkdtemp(prefix='reviewboard.')

    (fd, oldfile) = tempfile.mkstemp(dir=tempdir)
    f = os.fdopen(fd, "w+b")
    f.write(convert_line_endings(file))
    f.close()

    diff = convert_line_endings(diff)

    newfile = '%s-new' % oldfile

    process = subprocess.Popen(['patch', '-o', newfile, oldfile],
                               stdin=subprocess.PIPE,
                               stdout=subprocess.PIPE,
                               stderr=subprocess.STDOUT,
                               cwd=tempdir)

    with controlled_subprocess("patch", process) as p:
        p.stdin.write(diff)
        p.stdin.close()
        patch_output = p.stdout.read()
        failure = p.wait()

    if failure:
        f = open(
            "%s.diff" % (os.path.join(tempdir, os.path.basename(filename))),
            "w")
        f.write(diff)
        f.close()

        log_timer.done()

        # FIXME: This doesn't provide any useful error report on why the patch
        # failed to apply, which makes it hard to debug.  We might also want to
        # have it clean up if DEBUG=False
        raise Exception(
            _("The patch to '%(filename)s' didn't apply cleanly. The "
              "temporary files have been left in '%(tempdir)s' for debugging "
              "purposes.\n"
              "`patch` returned: %(output)s") % {
                  'filename': filename,
                  'tempdir': tempdir,
                  'output': patch_output,
              })

    f = open(newfile, "r")
    data = f.read()
    f.close()

    os.unlink(oldfile)
    os.unlink(newfile)
    os.rmdir(tempdir)

    log_timer.done()

    return data
Пример #13
0
    def get_chunks_uncached(self):
        """Yield the list of chunks, bypassing the cache."""
        base_filediff = self.base_filediff
        filediff = self.filediff
        interfilediff = self.interfilediff
        request = self.request

        old = get_original_file(filediff=filediff, request=request)
        new = get_patched_file(source_data=old,
                               filediff=filediff,
                               request=request)

        old_encoding_list = get_filediff_encodings(filediff)
        new_encoding_list = old_encoding_list

        if base_filediff is not None:
            # The diff is against a commit that:
            #
            # 1. Follows the first commit in a series (the first won't have
            #    a base_commit/base_filediff that can be looked up)
            #
            # 2. Follows a commit that modifies this file, or is the base
            #    commit that modifies this file.
            #
            # We'll be diffing against the patched version of this commit's
            # version of the file.
            old = get_original_file(filediff=base_filediff, request=request)
            old = get_patched_file(source_data=old,
                                   filediff=base_filediff,
                                   request=request)
            old_encoding_list = get_filediff_encodings(base_filediff)
        elif filediff.commit_id:
            # This diff is against a commit, but no previous FileDiff
            # modifying this file could be found. As per the above comment,
            # this could end up being the very first commit in a series, or
            # it might not have been modified in the base commit or any
            # previous commit.
            #
            # We'll need to fetch the first ancestor of this file in the
            # commit history, if we can find one. We'll base the "old" version
            # of the file on the original version of this commit, meaning that
            # this commit and all modifications since will be shown as "new".
            # Basically, viewing the upstream of the file, before any commits.
            #
            # This should be safe because, without a base_filediff, there
            # should be no older commit containing modifications that we want
            # to diff against. This would be the first one, and we're using
            # its upstream changes.
            ancestors = filediff.get_ancestors(minimal=True)

            if ancestors:
                ancestor_filediff = ancestors[0]
                old = get_original_file(filediff=ancestor_filediff,
                                        request=request)
                old_encoding_list = get_filediff_encodings(ancestor_filediff)

        # Check whether we have a SHA256 checksum first. They were introduced
        # in Review Board 4.0, long after SHA1 checksums. If we already have
        # a SHA256 checksum, then we'll also have a SHA1 checksum, but the
        # inverse is not true.
        if filediff.orig_sha256 is None:
            if filediff.orig_sha1 is None:
                filediff.extra_data.update({
                    'orig_sha1': self._get_sha1(old),
                    'patched_sha1': self._get_sha1(new),
                })

            filediff.extra_data.update({
                'orig_sha256': self._get_sha256(old),
                'patched_sha256': self._get_sha256(new),
            })
            filediff.save(update_fields=['extra_data'])

        if interfilediff:
            old = new
            old_encoding_list = new_encoding_list

            interdiff_orig = get_original_file(filediff=interfilediff,
                                               request=request)
            new = get_patched_file(source_data=interdiff_orig,
                                   filediff=interfilediff,
                                   request=request)
            new_encoding_list = get_filediff_encodings(interfilediff)

            # Check whether we have a SHA256 checksum first. They were
            # introduced in Review Board 4.0, long after SHA1 checksums. If we
            # already have a SHA256 checksum, then we'll also have a SHA1
            # checksum, but the inverse is not true.
            if interfilediff.orig_sha256 is None:
                if interfilediff.orig_sha1 is None:
                    interfilediff.extra_data.update({
                        'orig_sha1':
                        self._get_sha1(interdiff_orig),
                        'patched_sha1':
                        self._get_sha1(new),
                    })

                interfilediff.extra_data.update({
                    'orig_sha256':
                    self._get_sha256(interdiff_orig),
                    'patched_sha256':
                    self._get_sha256(new),
                })
                interfilediff.save(update_fields=['extra_data'])
        elif self.force_interdiff:
            # Basically, revert the change.
            old, new = new, old
            old_encoding_list, new_encoding_list = \
                new_encoding_list, old_encoding_list

        if interfilediff:
            log_timer = log_timed(
                "Generating diff chunks for interdiff ids %s-%s (%s)" %
                (filediff.id, interfilediff.id, filediff.source_file),
                request=request)
        else:
            log_timer = log_timed(
                "Generating diff chunks for filediff id %s (%s)" %
                (filediff.id, filediff.source_file),
                request=request)

        for chunk in self.generate_chunks(old=old,
                                          new=new,
                                          old_encoding_list=old_encoding_list,
                                          new_encoding_list=new_encoding_list):
            yield chunk

        log_timer.done()

        if (not interfilediff and not self.base_filediff
                and not self.force_interdiff):
            insert_count = self.counts['insert']
            delete_count = self.counts['delete']
            replace_count = self.counts['replace']
            equal_count = self.counts['equal']

            filediff.set_line_counts(
                insert_count=insert_count,
                delete_count=delete_count,
                replace_count=replace_count,
                equal_count=equal_count,
                total_line_count=(insert_count + delete_count + replace_count +
                                  equal_count))
Пример #14
0
    def _get_file_uncached(self, path, revision, context):
        """Return a file from the repository, bypassing cache.

        This is called internally by :py:meth:`get_file` if the file isn't
        already in the cache.

        This will send the
        :py:data:`~reviewboard.scmtools.signals.fetching_file` signal before
        beginning a file fetch from the repository, and the
        :py:data:`~reviewboard.scmtools.signals.fetched_file` signal after.

        Args:
            path (unicode):
                The path to the file in the repository.

            revision (unicode):
                The revision of the file to retrieve.

            context (reviewboard.scmtools.core.FileLookupContext):
                Extra context used to help look up this file.

                Version Added:
                    4.0.5

        Returns:
            bytes:
            The resulting file contents.

        """
        request = context.request
        base_commit_id = context.base_commit_id

        fetching_file.send(sender=self,
                           path=path,
                           revision=revision,
                           base_commit_id=base_commit_id,
                           request=request,
                           context=context)

        if base_commit_id:
            timer_msg = "Fetching file '%s' r%s (base commit ID %s) from %s" \
                        % (path, revision, base_commit_id, self)
        else:
            timer_msg = "Fetching file '%s' r%s from %s" \
                        % (path, revision, self)

        log_timer = log_timed(timer_msg, request=request)

        hosting_service = self.hosting_service

        if hosting_service:
            data = hosting_service.get_file(self,
                                            path,
                                            revision,
                                            base_commit_id=base_commit_id,
                                            context=context)

            assert isinstance(
                data,
                bytes), ('%s.get_file() must return a byte string, not %s' %
                         (type(hosting_service).__name__, type(data)))
        else:
            tool = self.get_scmtool()
            data = tool.get_file(path,
                                 revision,
                                 base_commit_id=base_commit_id,
                                 context=context)

            assert isinstance(
                data,
                bytes), ('%s.get_file() must return a byte string, not %s' %
                         (type(tool).__name__, type(data)))

        log_timer.done()

        fetched_file.send(sender=self,
                          path=path,
                          revision=revision,
                          base_commit_id=base_commit_id,
                          request=request,
                          context=context,
                          data=data)

        return data
Пример #15
0
def get_chunks(diffset, filediff, interfilediff, force_interdiff,
               enable_syntax_highlighting):
    def diff_line(vlinenum, oldlinenum, newlinenum, oldline, newline,
                  oldmarkup, newmarkup):
        # This function accesses the variable meta, defined in an outer context.
        if (oldline and newline and
            len(oldline) <= STYLED_MAX_LINE_LEN and
            len(newline) <= STYLED_MAX_LINE_LEN and
            oldline != newline):
            oldregion, newregion = get_line_changed_regions(oldline, newline)
        else:
            oldregion = newregion = []

        result = [vlinenum,
                  oldlinenum or '', mark_safe(oldmarkup or ''), oldregion,
                  newlinenum or '', mark_safe(newmarkup or ''), newregion,
                  (oldlinenum, newlinenum) in meta['whitespace_lines']]

        if oldlinenum and oldlinenum in meta.get('moved', {}):
            destination = meta["moved"][oldlinenum]
            result.append(destination)
        elif newlinenum and newlinenum in meta.get('moved', {}):
            destination = meta["moved"][newlinenum]
            result.append(destination)

        return result

    def new_chunk(chunk_index, all_lines, start, end, collapsable=False,
                  tag='equal', meta=None):
        if not meta:
            meta = {}

        left_headers = list(get_interesting_headers(differ, all_lines,
                                                    start, end - 1, False))
        right_headers = list(get_interesting_headers(differ, all_lines,
                                                     start, end - 1, True))

        meta['left_headers'] = left_headers
        meta['right_headers'] = right_headers

        lines = all_lines[start:end]
        numlines = len(lines)

        compute_chunk_last_header(lines, numlines, meta, last_header)

        if (collapsable and end < len(all_lines) and
            (last_header[0] or last_header[1])):
            meta['headers'] = list(last_header)

        return {
            'index': chunk_index,
            'lines': lines,
            'numlines': numlines,
            'change': tag,
            'collapsable': collapsable,
            'meta': meta,
        }

    def get_interesting_headers(differ, lines, start, end, is_modified_file):
        """Returns all headers for a region of a diff.

        This scans for all headers that fall within the specified range
        of the specified lines on both the original and modified files.
        """
        possible_functions = differ.get_interesting_lines('header',
                                                          is_modified_file)

        if not possible_functions:
            raise StopIteration

        try:
            if is_modified_file:
                last_index = last_header_index[1]
                i1 = lines[start][4]
                i2 = lines[end - 1][4]
            else:
                last_index = last_header_index[0]
                i1 = lines[start][1]
                i2 = lines[end - 1][1]
        except IndexError:
            raise StopIteration

        for i in xrange(last_index, len(possible_functions)):
            linenum, line = possible_functions[i]
            linenum += 1

            if linenum > i2:
                break
            elif linenum >= i1:
                last_index = i
                yield (linenum, line)

        if is_modified_file:
            last_header_index[1] = last_index
        else:
            last_header_index[0] = last_index

    def apply_pygments(data, filename):
        # XXX Guessing is preferable but really slow, especially on XML
        #     files.
        #if filename.endswith(".xml"):
        lexer = get_lexer_for_filename(filename, stripnl=False,
                                       encoding='utf-8')
        #else:
        #    lexer = guess_lexer_for_filename(filename, data, stripnl=False)

        try:
            # This is only available in 0.7 and higher
            lexer.add_filter('codetagify')
        except AttributeError:
            pass

        return pygments.highlight(data, lexer, NoWrapperHtmlFormatter()).splitlines()


    # There are three ways this function is called:
    #
    #     1) filediff, no interfilediff
    #        - Returns chunks for a single filediff. This is the usual way
    #          people look at diffs in the diff viewer.
    #
    #          In this mode, we get the original file based on the filediff
    #          and then patch it to get the resulting file.
    #
    #          This is also used for interdiffs where the source revision
    #          has no equivalent modified file but the interdiff revision
    #          does. It's no different than a standard diff.
    #
    #     2) filediff, interfilediff
    #        - Returns chunks showing the changes between a source filediff
    #          and the interdiff.
    #
    #          This is the typical mode used when showing the changes
    #          between two diffs. It requires that the file is included in
    #          both revisions of a diffset.
    #
    #     3) filediff, no interfilediff, force_interdiff
    #        - Returns chunks showing the changes between a source
    #          diff and an unmodified version of the diff.
    #
    #          This is used when the source revision in the diffset contains
    #          modifications to a file which have then been reverted in the
    #          interdiff revision. We don't actually have an interfilediff
    #          in this case, so we have to indicate that we are indeed in
    #          interdiff mode so that we can special-case this and not
    #          grab a patched file for the interdiff version.

    assert filediff

    file = filediff.source_file

    old = get_original_file(filediff)
    new = get_patched_file(old, filediff)

    if interfilediff:
        old = new
        interdiff_orig = get_original_file(interfilediff)
        new = get_patched_file(interdiff_orig, interfilediff)
    elif force_interdiff:
        # Basically, revert the change.
        old, new = new, old

    encoding = diffset.repository.encoding or 'iso-8859-15'
    old = convert_to_utf8(old, encoding)
    new = convert_to_utf8(new, encoding)

    # Normalize the input so that if there isn't a trailing newline, we add
    # it.
    if old and old[-1] != '\n':
        old += '\n'

    if new and new[-1] != '\n':
        new += '\n'

    a = NEWLINES_RE.split(old or '')
    b = NEWLINES_RE.split(new or '')

    # Remove the trailing newline, now that we've split this. This will
    # prevent a duplicate line number at the end of the diff.
    del(a[-1])
    del(b[-1])

    a_num_lines = len(a)
    b_num_lines = len(b)

    markup_a = markup_b = None

    siteconfig = SiteConfiguration.objects.get_current()

    threshold = siteconfig.get('diffviewer_syntax_highlighting_threshold')

    if threshold and (a_num_lines > threshold or b_num_lines > threshold):
        enable_syntax_highlighting = False

    if enable_syntax_highlighting:
        # Very long files, especially XML files, can take a long time to
        # highlight. For files over a certain size, don't highlight them.
        if (len(old) > STYLED_MAX_LIMIT_BYTES or
            len(new) > STYLED_MAX_LIMIT_BYTES):
            enable_syntax_highlighting = False

    if enable_syntax_highlighting:
        # Don't style the file if we have any *really* long lines.
        # It's likely a minified file or data or something that doesn't
        # need styling, and it will just grind Review Board to a halt.
        for lines in (a, b):
            for line in lines:
                if len(line) > STYLED_MAX_LINE_LEN:
                    enable_syntax_highlighting = False
                    break

            if not enable_syntax_highlighting:
                break

    if enable_syntax_highlighting:
        repository = filediff.diffset.repository
        tool = repository.get_scmtool()
        source_file = tool.normalize_path_for_display(filediff.source_file)
        dest_file = tool.normalize_path_for_display(filediff.dest_file)
        try:
            # TODO: Try to figure out the right lexer for these files
            #       once instead of twice.
            markup_a = apply_pygments(old or '', source_file)
            markup_b = apply_pygments(new or '', dest_file)
        except:
            pass

    if not markup_a:
        markup_a = NEWLINES_RE.split(escape(old))

    if not markup_b:
        markup_b = NEWLINES_RE.split(escape(new))

    linenum = 1
    last_header = [None, None]
    last_header_index = [0, 0]

    ignore_space = True
    for pattern in siteconfig.get("diffviewer_include_space_patterns"):
        if fnmatch.fnmatch(file, pattern):
            ignore_space = False
            break

    differ = Differ(a, b, ignore_space=ignore_space,
                    compat_version=diffset.diffcompat)

    # Register any regexes for interesting lines we may want to show.
    register_interesting_lines_for_filename(differ, file)

    # TODO: Make this back into a preference if people really want it.
    context_num_lines = siteconfig.get("diffviewer_context_num_lines")
    collapse_threshold = 2 * context_num_lines + 3

    if interfilediff:
        log_timer = log_timed(
            "Generating diff chunks for interdiff ids %s-%s (%s)" %
            (filediff.id, interfilediff.id, filediff.source_file))
    else:
        log_timer = log_timed(
            "Generating diff chunks for filediff id %s (%s)" %
            (filediff.id, filediff.source_file))

    chunk_index = 0

    for tag, i1, i2, j1, j2, meta in opcodes_with_metadata(differ):
        oldlines = markup_a[i1:i2]
        newlines = markup_b[j1:j2]
        numlines = max(len(oldlines), len(newlines))

        lines = map(diff_line,
                    xrange(linenum, linenum + numlines),
                    xrange(i1 + 1, i2 + 1), xrange(j1 + 1, j2 + 1),
                    a[i1:i2], b[j1:j2], oldlines, newlines)

        if tag == 'equal' and numlines > collapse_threshold:
            last_range_start = numlines - context_num_lines

            if linenum == 1:
                yield new_chunk(chunk_index, lines, 0, last_range_start, True)
                chunk_index += 1

                yield new_chunk(chunk_index, lines, last_range_start, numlines)
                chunk_index += 1
            else:
                yield new_chunk(chunk_index, lines, 0, context_num_lines)
                chunk_index += 1

                if i2 == a_num_lines and j2 == b_num_lines:
                    yield new_chunk(chunk_index, lines, context_num_lines,
                                    numlines, True)
                    chunk_index += 1
                else:
                    yield new_chunk(chunk_index, lines, context_num_lines,
                                    last_range_start, True)
                    chunk_index += 1

                    yield new_chunk(chunk_index, lines, last_range_start,
                                    numlines)
                    chunk_index += 1
        else:
            yield new_chunk(chunk_index, lines, 0, numlines, False, tag, meta)
            chunk_index += 1

        linenum += numlines

    log_timer.done()
Пример #16
0
def get_diff_files(diffset, filediff=None, interdiffset=None, enable_syntax_highlighting=True, load_chunks=True):
    if filediff:
        filediffs = [filediff]

        if interdiffset:
            log_timer = log_timed(
                "Generating diff file info for "
                "interdiffset ids %s-%s, filediff %s" % (diffset.id, interdiffset.id, filediff.id)
            )
        else:
            log_timer = log_timed(
                "Generating diff file info for " "diffset id %s, filediff %s" % (diffset.id, filediff.id)
            )
    else:
        filediffs = diffset.files.all()

        if interdiffset:
            log_timer = log_timed(
                "Generating diff file info for " "interdiffset ids %s-%s" % (diffset.id, interdiffset.id)
            )
        else:
            log_timer = log_timed("Generating diff file info for " "diffset id %s" % diffset.id)

    # A map used to quickly look up the equivalent interfilediff given a
    # source file.
    interdiff_map = {}
    if interdiffset:
        for interfilediff in interdiffset.files.all():
            if not filediff or filediff.source_file == interfilediff.source_file:
                interdiff_map[interfilediff.source_file] = interfilediff

    key_prefix = "diff-sidebyside-"

    if enable_syntax_highlighting:
        key_prefix += "hl-"

    # In order to support interdiffs properly, we need to display diffs
    # on every file in the union of both diffsets. Iterating over one diffset
    # or the other doesn't suffice.
    #
    # We build a list of parts containing the source filediff, the interdiff
    # filediff (if specified), and whether to force showing an interdiff
    # (in the case where a file existed in the source filediff but was
    # reverted in the interdiff).
    filediff_parts = []

    for filediff in filediffs:
        interfilediff = None

        if filediff.source_file in interdiff_map:
            interfilediff = interdiff_map[filediff.source_file]
            del (interdiff_map[filediff.source_file])

        filediff_parts.append((filediff, interfilediff, interdiffset != None))

    if interdiffset:
        # We've removed everything in the map that we've already found.
        # What's left are interdiff files that are new. They have no file
        # to diff against.
        #
        # The end result is going to be a view that's the same as when you're
        # viewing a standard diff. As such, we can pretend the interdiff is
        # the source filediff and not specify an interdiff. Keeps things
        # simple, code-wise, since we really have no need to special-case
        # this.
        for interdiff in interdiff_map.values():
            filediff_parts.append((interdiff, None, False))

    files = []
    index = 0

    for parts in filediff_parts:
        filediff, interfilediff, force_interdiff = parts

        filediff_revision_str = get_revision_str(filediff.source_revision)
        newfile = filediff.source_revision == PRE_CREATION

        if interdiffset:
            # First, find out if we want to even process this one.
            # We only process if there's a difference in files.

            if filediff and interfilediff and filediff.diff == interfilediff.diff:
                continue

            source_revision = "Diff Revision %s" % diffset.revision

            if not interfilediff and force_interdiff:
                dest_revision = "Diff Revision %s - File Reverted" % interdiffset.revision
            else:
                dest_revision = "Diff Revision %s" % interdiffset.revision
        else:
            source_revision = get_revision_str(filediff.source_revision)

            if newfile:
                dest_revision = _("New File")
            else:
                dest_revision = _("New Change")

        i = filediff.source_file.rfind("/")

        if i != -1:
            basepath = filediff.source_file[:i]
            basename = filediff.source_file[i + 1 :]
        else:
            basepath = ""
            basename = filediff.source_file

        file = {
            "depot_filename": filediff.source_file,
            "basename": basename,
            "basepath": basepath,
            "revision": source_revision,
            "dest_revision": dest_revision,
            "filediff": filediff,
            "interfilediff": interfilediff,
            "force_interdiff": force_interdiff,
            "binary": filediff.binary,
            "newfile": newfile,
            "index": len(files),
        }

        if load_chunks:
            chunks = []

            if not filediff.binary:
                key = key_prefix

                if not force_interdiff:
                    key += str(filediff.id)
                elif interfilediff:
                    key += "interdiff-%s-%s" % (filediff.id, interfilediff.id)
                else:
                    key += "interdiff-%s-none" % filediff.id

                chunks = cache_memoize(
                    key,
                    lambda: get_chunks(
                        filediff.diffset, filediff, interfilediff, force_interdiff, enable_syntax_highlighting
                    ),
                    large_data=True,
                )

            file["chunks"] = chunks
            file["changed_chunks"] = []
            file["whitespace_only"] = True

            for j, chunk in enumerate(file["chunks"]):
                chunk["index"] = j
                if chunk["change"] != "equal":
                    file["changed_chunks"].append(chunk)
                    meta = chunk.get("meta", {})

                    if not meta.get("whitespace_chunk", False):
                        file["whitespace_only"] = False

            file["num_changes"] = len(file["changed_chunks"])

        files.append(file)

    def cmp_file(x, y):
        # Sort based on basepath in asc order
        if x["basepath"] != y["basepath"]:
            return cmp(x["basepath"], y["basepath"])
        # Sort based on filename in asc order, then basod on extension in desc
        # order, to make *.h be ahead of *.c/cpp
        x_file, x_ext = os.path.splitext(x["basename"])
        y_file, y_ext = os.path.splitext(y["basename"])
        if x_file != y_file:
            return cmp(x_file, y_file)
        else:
            return cmp(y_ext, x_ext)

    files.sort(cmp_file)

    log_timer.done()

    return files
Пример #17
0
def get_diff_files(diffset, filediff=None, interdiffset=None, request=None):
    """Generates a list of files that will be displayed in a diff.

    This will go through the given diffset/interdiffset, or a given filediff
    within that diffset, and generate the list of files that will be
    displayed. This file list will contain a bunch of metadata on the files,
    such as the index, original/modified names, revisions, associated
    filediffs/diffsets, and so on.

    This can be used along with populate_diff_chunks to build a full list
    containing all diff chunks used for rendering a side-by-side diff.
    """
    if filediff:
        filediffs = [filediff]

        if interdiffset:
            log_timer = log_timed("Generating diff file info for "
                                  "interdiffset ids %s-%s, filediff %s" %
                                  (diffset.id, interdiffset.id, filediff.id),
                                  request=request)
        else:
            log_timer = log_timed("Generating diff file info for "
                                  "diffset id %s, filediff %s" %
                                  (diffset.id, filediff.id),
                                  request=request)
    else:
        filediffs = diffset.files.select_related().all()

        if interdiffset:
            log_timer = log_timed("Generating diff file info for "
                                  "interdiffset ids %s-%s" %
                                  (diffset.id, interdiffset.id),
                                  request=request)
        else:
            log_timer = log_timed("Generating diff file info for "
                                  "diffset id %s" % diffset.id,
                                  request=request)

    # A map used to quickly look up the equivalent interfilediff given a
    # source file.
    interdiff_map = {}

    if interdiffset:
        for interfilediff in interdiffset.files.all():
            if (not filediff
                    or filediff.source_file == interfilediff.source_file):
                interdiff_map[interfilediff.source_file] = interfilediff

    # In order to support interdiffs properly, we need to display diffs
    # on every file in the union of both diffsets. Iterating over one diffset
    # or the other doesn't suffice.
    #
    # We build a list of parts containing the source filediff, the interdiff
    # filediff (if specified), and whether to force showing an interdiff
    # (in the case where a file existed in the source filediff but was
    # reverted in the interdiff).
    has_interdiffset = interdiffset is not None

    filediff_parts = [(temp_filediff,
                       interdiff_map.pop(temp_filediff.source_file,
                                         None), has_interdiffset)
                      for temp_filediff in filediffs]

    if interdiffset:
        # We've removed everything in the map that we've already found.
        # What's left are interdiff files that are new. They have no file
        # to diff against.
        #
        # The end result is going to be a view that's the same as when you're
        # viewing a standard diff. As such, we can pretend the interdiff is
        # the source filediff and not specify an interdiff. Keeps things
        # simple, code-wise, since we really have no need to special-case
        # this.
        filediff_parts += [(interdiff, None, False)
                           for interdiff in interdiff_map.itervalues()]

    files = []

    for parts in filediff_parts:
        filediff, interfilediff, force_interdiff = parts

        newfile = (filediff.source_revision == PRE_CREATION)

        if interdiffset:
            # First, find out if we want to even process this one.
            # We only process if there's a difference in files.

            if (filediff and interfilediff
                    and filediff.diff == interfilediff.diff):
                continue

            source_revision = _("Diff Revision %s") % diffset.revision

            if not interfilediff and force_interdiff:
                dest_revision = _("Diff Revision %s - File Reverted") % \
                                interdiffset.revision
            else:
                dest_revision = _("Diff Revision %s") % interdiffset.revision
        else:
            source_revision = get_revision_str(filediff.source_revision)

            if newfile:
                dest_revision = _("New File")
            else:
                dest_revision = _("New Change")

        i = filediff.source_file.rfind('/')

        if i != -1:
            basepath = filediff.source_file[:i]
            basename = filediff.source_file[i + 1:]
        else:
            basepath = ""
            basename = filediff.source_file

        tool = filediff.diffset.repository.get_scmtool()
        depot_filename = tool.normalize_path_for_display(filediff.source_file)
        dest_filename = tool.normalize_path_for_display(filediff.dest_file)

        files.append({
            'depot_filename':
            depot_filename,
            'dest_filename':
            dest_filename or depot_filename,
            'basename':
            basename,
            'basepath':
            basepath,
            'revision':
            source_revision,
            'dest_revision':
            dest_revision,
            'filediff':
            filediff,
            'interfilediff':
            interfilediff,
            'force_interdiff':
            force_interdiff,
            'binary':
            filediff.binary,
            'deleted':
            filediff.deleted,
            'moved':
            filediff.moved,
            'newfile':
            newfile,
            'index':
            len(files),
            'chunks_loaded':
            False,
            'is_new_file': (newfile and not interfilediff
                            and not filediff.parent_diff),
        })

    def cmp_file(x, y):
        # Sort based on basepath in asc order
        if x["basepath"] != y["basepath"]:
            return cmp(x["basepath"], y["basepath"])

        # Sort based on filename in asc order, then based on extension in desc
        # order, to make *.h be ahead of *.c/cpp
        x_file, x_ext = os.path.splitext(x["basename"])
        y_file, y_ext = os.path.splitext(y["basename"])
        if x_file != y_file:
            return cmp(x_file, y_file)
        else:
            return cmp(y_ext, x_ext)

    files.sort(cmp_file)

    log_timer.done()

    return files
Пример #18
0
def get_chunks(diffset, filediff, interfilediff, force_interdiff,
               enable_syntax_highlighting):
    def diff_line(vlinenum, oldlinenum, newlinenum, oldline, newline,
                  oldmarkup, newmarkup):
        # This function accesses the variable meta, defined in an outer context.
        if oldline and newline and oldline != newline:
            oldregion, newregion = get_line_changed_regions(oldline, newline)
        else:
            oldregion = newregion = []

        result = [vlinenum,
                  oldlinenum or '', mark_safe(oldmarkup or ''), oldregion,
                  newlinenum or '', mark_safe(newmarkup or ''), newregion,
                  (oldlinenum, newlinenum) in meta['whitespace_lines']]

        if oldlinenum and oldlinenum in meta.get('moved', {}):
            destination = meta["moved"][oldlinenum]
            result.append(destination)
        elif newlinenum and newlinenum in meta.get('moved', {}):
            destination = meta["moved"][newlinenum]
            result.append(destination)

        return result

    def new_chunk(lines, start, end, collapsable=False,
                  tag='equal', meta=None):
        if not meta:
            meta = {}

        left_headers = list(get_interesting_headers(differ, lines,
                                                    start, end - 1, False))
        right_headers = list(get_interesting_headers(differ, lines,
                                                     start, end - 1, True))

        meta['left_headers'] = left_headers
        meta['right_headers'] = right_headers

        if left_headers:
            last_header[0] = left_headers[-1][1]

        if right_headers:
            last_header[1] = right_headers[-1][1]

        if (collapsable and end < len(lines) and
            (last_header[0] or last_header[1])):
            meta['headers'] = [
                (last_header[0] or "").strip(),
                (last_header[1] or "").strip(),
            ]

        return {
            'lines': lines[start:end],
            'numlines': end - start,
            'change': tag,
            'collapsable': collapsable,
            'meta': meta,
        }

    def get_interesting_headers(differ, lines, start, end, is_modified_file):
        """Returns all headers for a region of a diff.

        This scans for all headers that fall within the specified range
        of the specified lines on both the original and modified files.
        """
        possible_functions = differ.get_interesting_lines('header',
                                                          is_modified_file)

        if not possible_functions:
            raise StopIteration

        try:
            if is_modified_file:
                last_index = last_header_index[1]
                i1 = lines[start][4]
                i2 = lines[end - 1][4]
            else:
                last_index = last_header_index[0]
                i1 = lines[start][1]
                i2 = lines[end - 1][1]
        except IndexError:
            raise StopIteration

        for i in xrange(last_index, len(possible_functions)):
            linenum, line = possible_functions[i]
            linenum += 1

            if linenum > i2:
                break
            elif linenum >= i1:
                last_index = i
                yield (linenum, line)

        if is_modified_file:
            last_header_index[1] = last_index
        else:
            last_header_index[0] = last_index

    def apply_pygments(data, filename):
        # XXX Guessing is preferable but really slow, especially on XML
        #     files.
        #if filename.endswith(".xml"):
        lexer = get_lexer_for_filename(filename, stripnl=False,
                                       encoding='utf-8')
        #else:
        #    lexer = guess_lexer_for_filename(filename, data, stripnl=False)

        try:
            # This is only available in 0.7 and higher
            lexer.add_filter('codetagify')
        except AttributeError:
            pass

        return pygments.highlight(data, lexer, NoWrapperHtmlFormatter()).splitlines()


    # There are three ways this function is called:
    #
    #     1) filediff, no interfilediff
    #        - Returns chunks for a single filediff. This is the usual way
    #          people look at diffs in the diff viewer.
    #
    #          In this mode, we get the original file based on the filediff
    #          and then patch it to get the resulting file.
    #
    #          This is also used for interdiffs where the source revision
    #          has no equivalent modified file but the interdiff revision
    #          does. It's no different than a standard diff.
    #
    #     2) filediff, interfilediff
    #        - Returns chunks showing the changes between a source filediff
    #          and the interdiff.
    #
    #          This is the typical mode used when showing the changes
    #          between two diffs. It requires that the file is included in
    #          both revisions of a diffset.
    #
    #     3) filediff, no interfilediff, force_interdiff
    #        - Returns chunks showing the changes between a source
    #          diff and an unmodified version of the diff.
    #
    #          This is used when the source revision in the diffset contains
    #          modifications to a file which have then been reverted in the
    #          interdiff revision. We don't actually have an interfilediff
    #          in this case, so we have to indicate that we are indeed in
    #          interdiff mode so that we can special-case this and not
    #          grab a patched file for the interdiff version.

    assert filediff

    file = filediff.source_file

    old = get_original_file(filediff)
    new = get_patched_file(old, filediff)

    if interfilediff:
        old = new
        interdiff_orig = get_original_file(interfilediff)
        new = get_patched_file(interdiff_orig, interfilediff)
    elif force_interdiff:
        # Basically, revert the change.
        old, new = new, old

    encoding = diffset.repository.encoding or 'iso-8859-15'
    old = convert_to_utf8(old, encoding)
    new = convert_to_utf8(new, encoding)

    # Normalize the input so that if there isn't a trailing newline, we add
    # it.
    if old and old[-1] != '\n':
        old += '\n'

    if new and new[-1] != '\n':
        new += '\n'

    a = NEWLINES_RE.split(old or '')
    b = NEWLINES_RE.split(new or '')

    # Remove the trailing newline, now that we've split this. This will
    # prevent a duplicate line number at the end of the diff.
    del(a[-1])
    del(b[-1])

    a_num_lines = len(a)
    b_num_lines = len(b)

    markup_a = markup_b = None

    siteconfig = SiteConfiguration.objects.get_current()

    threshold = siteconfig.get('diffviewer_syntax_highlighting_threshold')

    if threshold and (a_num_lines > threshold or b_num_lines > threshold):
        enable_syntax_highlighting = False

    if enable_syntax_highlighting:
        repository = filediff.diffset.repository
        tool = repository.get_scmtool()
        source_file = tool.normalize_path_for_display(filediff.source_file)
        dest_file = tool.normalize_path_for_display(filediff.dest_file)
        try:
            # TODO: Try to figure out the right lexer for these files
            #       once instead of twice.
            markup_a = apply_pygments(old or '', source_file)
            markup_b = apply_pygments(new or '', dest_file)
        except:
            pass

    if not markup_a:
        markup_a = NEWLINES_RE.split(escape(old))

    if not markup_b:
        markup_b = NEWLINES_RE.split(escape(new))

    linenum = 1
    last_header = [None, None]
    last_header_index = [0, 0]

    ignore_space = True
    for pattern in siteconfig.get("diffviewer_include_space_patterns"):
        if fnmatch.fnmatch(file, pattern):
            ignore_space = False
            break

    differ = Differ(a, b, ignore_space=ignore_space,
                    compat_version=diffset.diffcompat)

    # Register any regexes for interesting lines we may want to show.
    register_interesting_lines_for_filename(differ, file)

    # TODO: Make this back into a preference if people really want it.
    context_num_lines = siteconfig.get("diffviewer_context_num_lines")
    collapse_threshold = 2 * context_num_lines + 3

    if interfilediff:
        log_timer = log_timed(
            "Generating diff chunks for interdiff ids %s-%s (%s)" %
            (filediff.id, interfilediff.id, filediff.source_file))
    else:
        log_timer = log_timed(
            "Generating diff chunks for filediff id %s (%s)" %
            (filediff.id, filediff.source_file))

    for tag, i1, i2, j1, j2, meta in opcodes_with_metadata(differ):
        oldlines = markup_a[i1:i2]
        newlines = markup_b[j1:j2]
        numlines = max(len(oldlines), len(newlines))

        lines = map(diff_line,
                    xrange(linenum, linenum + numlines),
                    xrange(i1 + 1, i2 + 1), xrange(j1 + 1, j2 + 1),
                    a[i1:i2], b[j1:j2], oldlines, newlines)

        if tag == 'equal' and numlines > collapse_threshold:
            last_range_start = numlines - context_num_lines

            if linenum == 1:
                yield new_chunk(lines, 0, last_range_start, True)
                yield new_chunk(lines, last_range_start, numlines)
            else:
                yield new_chunk(lines, 0, context_num_lines)

                if i2 == a_num_lines and j2 == b_num_lines:
                    yield new_chunk(lines, context_num_lines, numlines, True)
                else:
                    yield new_chunk(lines, context_num_lines,
                                    last_range_start, True)
                    yield new_chunk(lines, last_range_start, numlines)
        else:
            yield new_chunk(lines, 0, numlines, False, tag, meta)

        linenum += numlines

    log_timer.done()
Пример #19
0
def patch(diff, orig_file, filename, request=None):
    """Apply a diff to a file.

    This delegates out to ``patch`` because noone except Larry Wall knows how
    to patch.

    Args:
        diff (bytes):
            The contents of the diff to apply.

        orig_file (bytes):
            The contents of the original file.

        filename (unicode):
            The name of the file being patched.

        request (django.http.HttpRequest, optional):
            The HTTP request, for use in logging.

    Returns:
        bytes:
        The contents of the patched file.

    Raises:
        reviewboard.diffutils.errors.PatchError:
            An error occurred when trying to apply the patch.
    """
    log_timer = log_timed('Patching file %s' % filename, request=request)

    if not diff.strip():
        # Someone uploaded an unchanged file. Return the one we're patching.
        return orig_file

    # Prepare the temporary directory if none is available
    tempdir = tempfile.mkdtemp(prefix='reviewboard.')

    try:
        orig_file = convert_line_endings(orig_file)
        diff = convert_line_endings(diff)

        (fd, oldfile) = tempfile.mkstemp(dir=tempdir)
        f = os.fdopen(fd, 'w+b')
        f.write(orig_file)
        f.close()

        newfile = '%s-new' % oldfile

        process = subprocess.Popen(['patch', '-o', newfile, oldfile],
                                   stdin=subprocess.PIPE, stdout=subprocess.PIPE,
                                   stderr=subprocess.PIPE, cwd=tempdir)

        with controlled_subprocess('patch', process) as p:
            stdout, stderr = p.communicate(diff)
            failure = p.returncode

        try:
            with open(newfile, 'r') as f:
                new_file = f.read()
        except Exception:
            new_file = None

        if failure:
            rejects_file = '%s.rej' % newfile

            try:
                with open(rejects_file, 'rb') as f:
                    rejects = f.read()
            except Exception:
                rejects = None

            error_output = stderr.strip() or stdout.strip()

            # Munge the output to show the filename instead of
            # randomly-generated tempdir locations.
            base_filename = os.path.basename(filename)
            error_output = (
                error_output
                .replace(rejects_file, '%s.rej' % base_filename)
                .replace(oldfile, base_filename)
            )

            raise PatchError(filename, error_output, orig_file, new_file,
                             diff, rejects)

        return new_file
    finally:
        shutil.rmtree(tempdir)
        log_timer.done()
Пример #20
0
    def get_chunks_uncached(self):
        """Yield the list of chunks, bypassing the cache."""
        old = get_original_file(self.filediff, self.request,
                                self.encoding_list)
        new = get_patched_file(old, self.filediff, self.request)

        if self.base_filediff is not None:
            # The diff is against a commit that:
            #
            # 1. Follows the first commit in a series (the first won't have
            #    a base_commit/base_filediff that can be looked up)
            #
            # 2. Follows a commit that modifies this file, or is the base
            #    commit that modifies this file.
            #
            # We'll be diffing against the patched version of this commit's
            # version of the file.
            old = get_original_file(self.base_filediff, self.request,
                                    self.encoding_list)
            old = get_patched_file(old, self.base_filediff, self.request)
        elif self.filediff.commit_id:
            # This diff is against a commit, but no previous FileDiff
            # modifying this file could be found. As per the above comment,
            # this could end up being the very first commit in a series, or
            # it might not have been modified in the base commit or any
            # previous commit.
            #
            # We'll need to fetch the first ancestor of this file in the
            # commit history, if we can find one. We'll base the "old" version
            # of the file on the original version of this commit, meaning that
            # this commit and all modifications since will be shown as "new".
            # Basically, viewing the upstream of the file, before any commits.
            #
            # This should be safe because, without a base_filediff, there
            # should be no older commit containing modifications that we want
            # to diff against. This would be the first one, and we're using
            # its upstream changes.
            ancestors = self.filediff.get_ancestors(minimal=True)

            if ancestors:
                old = get_original_file(ancestors[0],
                                        self.request,
                                        self.encoding_list)

        if self.filediff.orig_sha1 is None:
            self.filediff.extra_data.update({
                'orig_sha1': self._get_checksum(old),
                'patched_sha1': self._get_checksum(new),
            })
            self.filediff.save(update_fields=['extra_data'])

        if self.interfilediff:
            old = new
            interdiff_orig = get_original_file(self.interfilediff,
                                               self.request,
                                               self.encoding_list)
            new = get_patched_file(interdiff_orig, self.interfilediff,
                                   self.request)

            if self.interfilediff.orig_sha1 is None:
                self.interfilediff.extra_data.update({
                    'orig_sha1': self._get_checksum(interdiff_orig),
                    'patched_sha1': self._get_checksum(new),
                })
                self.interfilediff.save(update_fields=['extra_data'])
        elif self.force_interdiff:
            # Basically, revert the change.
            old, new = new, old

        if self.interfilediff:
            log_timer = log_timed(
                "Generating diff chunks for interdiff ids %s-%s (%s)" %
                (self.filediff.id, self.interfilediff.id,
                 self.filediff.source_file),
                request=self.request)
        else:
            log_timer = log_timed(
                "Generating diff chunks for self.filediff id %s (%s)" %
                (self.filediff.id, self.filediff.source_file),
                request=self.request)

        for chunk in self.generate_chunks(old, new):
            yield chunk

        log_timer.done()

        if (not self.interfilediff and
            not self.base_filediff and
            not self.force_interdiff):
            insert_count = self.counts['insert']
            delete_count = self.counts['delete']
            replace_count = self.counts['replace']
            equal_count = self.counts['equal']

            self.filediff.set_line_counts(
                insert_count=insert_count,
                delete_count=delete_count,
                replace_count=replace_count,
                equal_count=equal_count,
                total_line_count=(insert_count + delete_count +
                                  replace_count + equal_count))
Пример #21
0
def get_diff_files(diffset, filediff=None, interdiffset=None,
                   enable_syntax_highlighting=True,
                   load_chunks=True):
    if filediff:
        filediffs = [filediff]

        if interdiffset:
            log_timer = log_timed("Generating diff file info for "
                                  "interdiffset ids %s-%s, filediff %s" %
                                  (diffset.id, interdiffset.id, filediff.id))
        else:
            log_timer = log_timed("Generating diff file info for "
                                  "diffset id %s, filediff %s" %
                                  (diffset.id, filediff.id))
    else:
        filediffs = diffset.files.select_related().all()

        if interdiffset:
            log_timer = log_timed("Generating diff file info for "
                                  "interdiffset ids %s-%s" %
                                  (diffset.id, interdiffset.id))
        else:
            log_timer = log_timed("Generating diff file info for "
                                  "diffset id %s" % diffset.id)


    # A map used to quickly look up the equivalent interfilediff given a
    # source file.
    interdiff_map = {}
    if interdiffset:
        for interfilediff in interdiffset.files.all():
            if not filediff or \
               filediff.source_file == interfilediff.source_file:
                interdiff_map[interfilediff.source_file] = interfilediff

    key_prefix = "diff-sidebyside-"

    if enable_syntax_highlighting:
        key_prefix += "hl-"


    # In order to support interdiffs properly, we need to display diffs
    # on every file in the union of both diffsets. Iterating over one diffset
    # or the other doesn't suffice.
    #
    # We build a list of parts containing the source filediff, the interdiff
    # filediff (if specified), and whether to force showing an interdiff
    # (in the case where a file existed in the source filediff but was
    # reverted in the interdiff).
    filediff_parts = []

    for filediff in filediffs:
        interfilediff = None

        if filediff.source_file in interdiff_map:
            interfilediff = interdiff_map[filediff.source_file]
            del(interdiff_map[filediff.source_file])

        filediff_parts.append((filediff, interfilediff, interdiffset != None))


    if interdiffset:
        # We've removed everything in the map that we've already found.
        # What's left are interdiff files that are new. They have no file
        # to diff against.
        #
        # The end result is going to be a view that's the same as when you're
        # viewing a standard diff. As such, we can pretend the interdiff is
        # the source filediff and not specify an interdiff. Keeps things
        # simple, code-wise, since we really have no need to special-case
        # this.
        filediff_parts += [(interdiff, None, False)
                           for interdiff in interdiff_map.values()]


    files = []

    for parts in filediff_parts:
        filediff, interfilediff, force_interdiff = parts

        newfile = (filediff.source_revision == PRE_CREATION)

        if interdiffset:
            # First, find out if we want to even process this one.
            # We only process if there's a difference in files.

            if (filediff and interfilediff and
                filediff.diff == interfilediff.diff):
                continue

            source_revision = "Diff Revision %s" % diffset.revision

            if not interfilediff and force_interdiff:
                dest_revision = "Diff Revision %s - File Reverted" % \
                                interdiffset.revision
            else:
                dest_revision = "Diff Revision %s" % interdiffset.revision
        else:
            source_revision = get_revision_str(filediff.source_revision)

            if newfile:
                dest_revision = NEW_FILE_STR
            else:
                dest_revision = NEW_CHANGE_STR

        i = filediff.source_file.rfind('/')

        if i != -1:
            basepath = filediff.source_file[:i]
            basename = filediff.source_file[i + 1:]
        else:
            basepath = ""
            basename = filediff.source_file

        tool = filediff.diffset.repository.get_scmtool()
        depot_filename = tool.normalize_path_for_display(filediff.source_file)
        dest_filename = tool.normalize_path_for_display(filediff.dest_file)

        file = {
            'depot_filename': depot_filename,
            'dest_filename': dest_filename or depot_filename,
            'basename': basename,
            'basepath': basepath,
            'revision': source_revision,
            'dest_revision': dest_revision,
            'filediff': filediff,
            'interfilediff': interfilediff,
            'force_interdiff': force_interdiff,
            'binary': filediff.binary,
            'deleted': filediff.deleted,
            'newfile': newfile,
            'index': len(files),
        }

        if load_chunks:
            chunks = []

            if not filediff.binary and not filediff.deleted:
                key = key_prefix

                if not force_interdiff:
                    key += str(filediff.id)
                elif interfilediff:
                    key += "interdiff-%s-%s" % (filediff.id, interfilediff.id)
                else:
                    key += "interdiff-%s-none" % filediff.id

                chunks = cache_memoize(
                    key,
                    lambda: list(get_chunks(filediff.diffset,
                                            filediff, interfilediff,
                                            force_interdiff,
                                            enable_syntax_highlighting)),
                    large_data=True)

            file['chunks'] = chunks
            file['changed_chunk_indexes'] = []
            file['whitespace_only'] = True

            for j, chunk in enumerate(file['chunks']):
                chunk['index'] = j

                if chunk['change'] != 'equal':
                    file['changed_chunk_indexes'].append(j)
                    meta = chunk.get('meta', {})

                    if not meta.get('whitespace_chunk', False):
                        file['whitespace_only'] = False

            file['num_changes'] = len(file['changed_chunk_indexes'])

        files.append(file)

    def cmp_file(x, y):
        # Sort based on basepath in asc order
        if x["basepath"] != y["basepath"]:
            return cmp(x["basepath"], y["basepath"])

        # Sort based on filename in asc order, then based on extension in desc
        # order, to make *.h be ahead of *.c/cpp
        x_file, x_ext = os.path.splitext(x["basename"])
        y_file, y_ext = os.path.splitext(y["basename"])
        if x_file != y_file:
            return cmp(x_file, y_file)
        else:
            return cmp(y_ext, x_ext)

    files.sort(cmp_file)

    log_timer.done()

    return files
Пример #22
0
def patch(diff, file, filename, request=None):
    """Apply a diff to a file.  Delegates out to `patch` because noone
       except Larry Wall knows how to patch."""

    log_timer = log_timed("Patching file %s" % filename, request=request)

    if not diff.strip():
        # Someone uploaded an unchanged file. Return the one we're patching.
        return file

    # Prepare the temporary directory if none is available
    tempdir = tempfile.mkdtemp(prefix="reviewboard.")

    (fd, oldfile) = tempfile.mkstemp(dir=tempdir)
    f = os.fdopen(fd, "w+b")
    f.write(convert_line_endings(file))
    f.close()

    diff = convert_line_endings(diff)

    newfile = "%s-new" % oldfile

    process = subprocess.Popen(
        ["patch", "-o", newfile, oldfile],
        stdin=subprocess.PIPE,
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE,
        cwd=tempdir,
    )

    with controlled_subprocess("patch", process) as p:
        stdout, stderr = p.communicate(diff)
        failure = p.returncode

    if failure:
        absolute_path = os.path.join(tempdir, os.path.basename(filename))
        with open("%s.diff" % absolute_path, "w") as f:
            f.write(diff)

        log_timer.done()

        # FIXME: This doesn't provide any useful error report on why the patch
        # failed to apply, which makes it hard to debug.  We might also want to
        # have it clean up if DEBUG=False
        raise Exception(
            _(
                "The patch to '%(filename)s' didn't apply cleanly. The "
                "temporary files have been left in '%(tempdir)s' for debugging "
                "purposes.\n"
                "`patch` returned: %(output)s"
            )
            % {"filename": filename, "tempdir": tempdir, "output": stderr}
        )

    with open(newfile, "r") as f:
        data = f.read()

    os.unlink(oldfile)
    os.unlink(newfile)
    os.rmdir(tempdir)

    log_timer.done()

    return data
Пример #23
0
def get_diff_files(diffset, filediff=None, interdiffset=None, request=None):
    """Generates a list of files that will be displayed in a diff.

    This will go through the given diffset/interdiffset, or a given filediff
    within that diffset, and generate the list of files that will be
    displayed. This file list will contain a bunch of metadata on the files,
    such as the index, original/modified names, revisions, associated
    filediffs/diffsets, and so on.

    This can be used along with populate_diff_chunks to build a full list
    containing all diff chunks used for rendering a side-by-side diff.
    """
    if filediff:
        filediffs = [filediff]

        if interdiffset:
            log_timer = log_timed("Generating diff file info for "
                                  "interdiffset ids %s-%s, filediff %s" %
                                  (diffset.id, interdiffset.id, filediff.id),
                                  request=request)
        else:
            log_timer = log_timed("Generating diff file info for "
                                  "diffset id %s, filediff %s" %
                                  (diffset.id, filediff.id),
                                  request=request)
    else:
        filediffs = diffset.files.select_related().all()

        if interdiffset:
            log_timer = log_timed("Generating diff file info for "
                                  "interdiffset ids %s-%s" %
                                  (diffset.id, interdiffset.id),
                                  request=request)
        else:
            log_timer = log_timed("Generating diff file info for "
                                  "diffset id %s" % diffset.id,
                                  request=request)

    # A map used to quickly look up the equivalent interfilediff given a
    # source file.
    interdiff_map = {}

    if interdiffset:
        for interfilediff in interdiffset.files.all():
            if (not filediff or
                    filediff.source_file == interfilediff.source_file):
                interdiff_map[interfilediff.source_file] = interfilediff

    # In order to support interdiffs properly, we need to display diffs
    # on every file in the union of both diffsets. Iterating over one diffset
    # or the other doesn't suffice.
    #
    # We build a list of parts containing the source filediff, the interdiff
    # filediff (if specified), and whether to force showing an interdiff
    # (in the case where a file existed in the source filediff but was
    # reverted in the interdiff).
    has_interdiffset = interdiffset is not None

    filediff_parts = [
        (temp_filediff,
         interdiff_map.pop(temp_filediff.source_file, None),
         has_interdiffset)
        for temp_filediff in filediffs
    ]

    if interdiffset:
        # We've removed everything in the map that we've already found.
        # What's left are interdiff files that are new. They have no file
        # to diff against.
        #
        # The end result is going to be a view that's the same as when you're
        # viewing a standard diff. As such, we can pretend the interdiff is
        # the source filediff and not specify an interdiff. Keeps things
        # simple, code-wise, since we really have no need to special-case
        # this.
        filediff_parts += [
            (interdiff, None, False)
            for interdiff in six.itervalues(interdiff_map)
        ]

    files = []

    for parts in filediff_parts:
        filediff, interfilediff, force_interdiff = parts

        newfile = filediff.is_new

        if interdiffset:
            # First, find out if we want to even process this one.
            # We only process if there's a difference in files.

            if (filediff and interfilediff and
                    filediff.diff == interfilediff.diff):
                continue

            source_revision = _("Diff Revision %s") % diffset.revision

            if not interfilediff and force_interdiff:
                dest_revision = (_("Diff Revision %s - File Reverted") %
                                 interdiffset.revision)
            else:
                dest_revision = _("Diff Revision %s") % interdiffset.revision
        else:
            source_revision = get_revision_str(filediff.source_revision)

            if newfile:
                dest_revision = _("New File")
            else:
                dest_revision = _("New Change")

        tool = filediff.diffset.repository.get_scmtool()
        depot_filename = tool.normalize_path_for_display(filediff.source_file)
        dest_filename = tool.normalize_path_for_display(filediff.dest_file)

        f = {
            'depot_filename': depot_filename,
            'dest_filename': dest_filename or depot_filename,
            'revision': source_revision,
            'dest_revision': dest_revision,
            'filediff': filediff,
            'interfilediff': interfilediff,
            'force_interdiff': force_interdiff,
            'binary': filediff.binary,
            'deleted': filediff.deleted,
            'moved': filediff.moved,
            'copied': filediff.copied,
            'moved_or_copied': filediff.moved or filediff.copied,
            'newfile': newfile,
            'index': len(files),
            'chunks_loaded': False,
            'is_new_file': (newfile and not interfilediff and
                            not filediff.parent_diff),
        }

        if force_interdiff:
            f['force_interdiff_revision'] = interdiffset.revision

        files.append(f)

    log_timer.done()

    return get_sorted_filediffs(files, key=lambda f: f['filediff'])
Пример #24
0
def get_diff_files(diffset, filediff=None, interdiffset=None, interfilediff=None, request=None):
    """Return a list of files that will be displayed in a diff.

    This will go through the given diffset/interdiffset, or a given filediff
    within that diffset, and generate the list of files that will be
    displayed. This file list will contain a bunch of metadata on the files,
    such as the index, original/modified names, revisions, associated
    filediffs/diffsets, and so on.

    This can be used along with :py:func:`populate_diff_chunks` to build a full
    list containing all diff chunks used for rendering a side-by-side diff.

    Args:
        diffset (reviewboard.diffviewer.models.DiffSet):
            The diffset containing the files to return.

        filediff (reviewboard.diffviewer.models.FileDiff, optional):
            A specific file in the diff to return information for.

        interdiffset (reviewboard.diffviewer.models.DiffSet, optional):
            A second diffset used for an interdiff range.

        interfilediff (reviewboard.diffviewer.models.FileDiff, optional):
            A second specific file in ``interdiffset`` used to return
            information for. This should be provided if ``filediff`` and
            ``interdiffset`` are both provided. If it's ``None`` in this
            case, then the diff will be shown as reverted for this file.

    Returns:
        list of dict:
        A list of dictionaries containing information on the files to show
        in the diff, in the order in which they would be shown.
    """
    if filediff:
        filediffs = [filediff]

        if interdiffset:
            log_timer = log_timed(
                "Generating diff file info for "
                "interdiffset ids %s-%s, filediff %s" % (diffset.id, interdiffset.id, filediff.id),
                request=request,
            )
        else:
            log_timer = log_timed(
                "Generating diff file info for " "diffset id %s, filediff %s" % (diffset.id, filediff.id),
                request=request,
            )
    else:
        filediffs = list(diffset.files.select_related().all())

        if interdiffset:
            log_timer = log_timed(
                "Generating diff file info for " "interdiffset ids %s-%s" % (diffset.id, interdiffset.id),
                request=request,
            )
        else:
            log_timer = log_timed("Generating diff file info for " "diffset id %s" % diffset.id, request=request)

    # Filediffs that were created with leading slashes stripped won't match
    # those created with them present, so we need to compare them without in
    # order for the filenames to match up properly.
    tool = diffset.repository.get_scmtool()

    if interdiffset:
        if not filediff:
            interfilediffs = list(interdiffset.files.all())
        elif interfilediff:
            interfilediffs = [interfilediff]
        else:
            interfilediffs = []

        filediff_parts = []
        matched_filediffs = get_matched_interdiff_files(tool=tool, filediffs=filediffs, interfilediffs=interfilediffs)

        for temp_filediff, temp_interfilediff in matched_filediffs:
            if temp_filediff:
                filediff_parts.append((temp_filediff, temp_interfilediff, True))
            elif temp_interfilediff:
                filediff_parts.append((temp_interfilediff, None, False))
            else:
                logging.error(
                    "get_matched_interdiff_files returned an entry with an "
                    "empty filediff and interfilediff for diffset=%r, "
                    "interdiffset=%r, filediffs=%r, interfilediffs=%r",
                    diffset,
                    interdiffset,
                    filediffs,
                    interfilediffs,
                )

                raise ValueError(
                    "Internal error: get_matched_interdiff_files returned an "
                    "entry with an empty filediff and interfilediff! Please "
                    "report this along with information from the server "
                    "error log."
                )
    else:
        # We're not working with interdiffs. We can easily create the
        # filediff_parts directly.
        filediff_parts = [(temp_filediff, None, False) for temp_filediff in filediffs]

    # Now that we have all the bits and pieces we care about for the filediffs,
    # we can start building information about each entry on the diff viewer.
    files = []

    for parts in filediff_parts:
        filediff, interfilediff, force_interdiff = parts

        newfile = filediff.is_new

        if interdiffset:
            # First, find out if we want to even process this one.
            # If the diffs are identical, or the patched files are identical,
            # or if the files were deleted in both cases, then we can be
            # absolutely sure that there's nothing interesting to show to
            # the user.
            if (
                filediff
                and interfilediff
                and (
                    filediff.diff == interfilediff.diff
                    or (filediff.deleted and interfilediff.deleted)
                    or (filediff.patched_sha1 is not None and filediff.patched_sha1 == interfilediff.patched_sha1)
                )
            ):
                continue

            source_revision = _("Diff Revision %s") % diffset.revision

        else:
            source_revision = get_revision_str(filediff.source_revision)

        if interfilediff:
            dest_revision = _("Diff Revision %s") % interdiffset.revision
        else:
            if force_interdiff:
                dest_revision = _("Diff Revision %s - File Reverted") % interdiffset.revision
            elif newfile:
                dest_revision = _("New File")
            else:
                dest_revision = _("New Change")

        if interfilediff:
            raw_depot_filename = filediff.dest_file
            raw_dest_filename = interfilediff.dest_file
        else:
            raw_depot_filename = filediff.source_file
            raw_dest_filename = filediff.dest_file

        depot_filename = tool.normalize_path_for_display(raw_depot_filename)
        dest_filename = tool.normalize_path_for_display(raw_dest_filename)

        f = {
            "depot_filename": depot_filename,
            "dest_filename": dest_filename or depot_filename,
            "revision": source_revision,
            "dest_revision": dest_revision,
            "filediff": filediff,
            "interfilediff": interfilediff,
            "force_interdiff": force_interdiff,
            "binary": filediff.binary,
            "deleted": filediff.deleted,
            "moved": filediff.moved,
            "copied": filediff.copied,
            "moved_or_copied": filediff.moved or filediff.copied,
            "newfile": newfile,
            "index": len(files),
            "chunks_loaded": False,
            "is_new_file": (newfile and not interfilediff and not filediff.parent_diff),
        }

        if force_interdiff:
            f["force_interdiff_revision"] = interdiffset.revision

        files.append(f)

    log_timer.done()

    if len(files) == 1:
        return files
    else:
        return get_sorted_filediffs(files, key=lambda f: f["interfilediff"] or f["filediff"])
Пример #25
0
    def _get_chunks_uncached(self):
        """Returns the list of chunks, bypassing the cache."""
        old = get_original_file(self.filediff, self.request)
        new = get_patched_file(old, self.filediff, self.request)

        if self.interfilediff:
            old = new
            interdiff_orig = get_original_file(self.interfilediff,
                                               self.request)
            new = get_patched_file(interdiff_orig, self.interfilediff,
                                   self.request)
        elif self.force_interdiff:
            # Basically, revert the change.
            old, new = new, old

        encoding = self.diffset.repository.encoding or 'iso-8859-15'
        old = self._convert_to_utf8(old, encoding)
        new = self._convert_to_utf8(new, encoding)

        # Normalize the input so that if there isn't a trailing newline, we add
        # it.
        if old and old[-1] != '\n':
            old += '\n'

        if new and new[-1] != '\n':
            new += '\n'

        a = self.NEWLINES_RE.split(old or '')
        b = self.NEWLINES_RE.split(new or '')

        # Remove the trailing newline, now that we've split this. This will
        # prevent a duplicate line number at the end of the diff.
        del a[-1]
        del b[-1]

        a_num_lines = len(a)
        b_num_lines = len(b)

        markup_a = markup_b = None

        if self._get_enable_syntax_highlighting(old, new, a, b):
            repository = self.filediff.diffset.repository
            tool = repository.get_scmtool()
            source_file = \
                tool.normalize_path_for_display(self.filediff.source_file)
            dest_file = \
                tool.normalize_path_for_display(self.filediff.dest_file)

            try:
                # TODO: Try to figure out the right lexer for these files
                #       once instead of twice.
                markup_a = self._apply_pygments(old or '', source_file)
                markup_b = self._apply_pygments(new or '', dest_file)
            except:
                pass

        if not markup_a:
            markup_a = self.NEWLINES_RE.split(escape(old))

        if not markup_b:
            markup_b = self.NEWLINES_RE.split(escape(new))

        siteconfig = SiteConfiguration.objects.get_current()
        ignore_space = True

        for pattern in siteconfig.get('diffviewer_include_space_patterns'):
            if fnmatch.fnmatch(self.filename, pattern):
                ignore_space = False
                break

        self.differ = get_differ(a, b, ignore_space=ignore_space,
                                 compat_version=self.diffset.diffcompat)
        self.differ.add_interesting_lines_for_headers(self.filename)

        context_num_lines = siteconfig.get("diffviewer_context_num_lines")
        collapse_threshold = 2 * context_num_lines + 3

        if self.interfilediff:
            log_timer = log_timed(
                "Generating diff chunks for interdiff ids %s-%s (%s)" %
                (self.filediff.id, self.interfilediff.id,
                 self.filediff.source_file),
                request=self.request)
        else:
            log_timer = log_timed(
                "Generating diff chunks for self.filediff id %s (%s)" %
                (self.filediff.id, self.filediff.source_file),
                request=self.request)

        line_num = 1
        opcodes_generator = get_diff_opcode_generator(self.differ,
                                                      self.filediff,
                                                      self.interfilediff)

        for tag, i1, i2, j1, j2, meta in opcodes_generator:
            old_lines = markup_a[i1:i2]
            new_lines = markup_b[j1:j2]
            num_lines = max(len(old_lines), len(new_lines))

            self._cur_meta = meta
            lines = map(self._diff_line,
                        range(line_num, line_num + num_lines),
                        range(i1 + 1, i2 + 1), range(j1 + 1, j2 + 1),
                        a[i1:i2], b[j1:j2], old_lines, new_lines)
            self._cur_meta = None

            if tag == 'equal' and num_lines > collapse_threshold:
                last_range_start = num_lines - context_num_lines

                if line_num == 1:
                    yield self._new_chunk(lines, 0, last_range_start, True)
                    yield self._new_chunk(lines, last_range_start, num_lines)
                else:
                    yield self._new_chunk(lines, 0, context_num_lines)

                    if i2 == a_num_lines and j2 == b_num_lines:
                        yield self._new_chunk(lines, context_num_lines,
                                              num_lines, True)
                    else:
                        yield self._new_chunk(lines, context_num_lines,
                                              last_range_start, True)
                        yield self._new_chunk(lines, last_range_start,
                                              num_lines)
            else:
                yield self._new_chunk(lines, 0, num_lines, False, tag, meta)

            line_num += num_lines

        log_timer.done()
Пример #26
0
def get_diff_files(diffset, filediff=None, interdiffset=None, request=None):
    """Generates a list of files that will be displayed in a diff.

    This will go through the given diffset/interdiffset, or a given filediff
    within that diffset, and generate the list of files that will be
    displayed. This file list will contain a bunch of metadata on the files,
    such as the index, original/modified names, revisions, associated
    filediffs/diffsets, and so on.

    This can be used along with populate_diff_chunks to build a full list
    containing all diff chunks used for rendering a side-by-side diff.
    """
    if filediff:
        filediffs = [filediff]

        if interdiffset:
            log_timer = log_timed("Generating diff file info for "
                                  "interdiffset ids %s-%s, filediff %s" %
                                  (diffset.id, interdiffset.id, filediff.id),
                                  request=request)
        else:
            log_timer = log_timed("Generating diff file info for "
                                  "diffset id %s, filediff %s" %
                                  (diffset.id, filediff.id),
                                  request=request)
    else:
        filediffs = diffset.files.select_related().all()

        if interdiffset:
            log_timer = log_timed("Generating diff file info for "
                                  "interdiffset ids %s-%s" %
                                  (diffset.id, interdiffset.id),
                                  request=request)
        else:
            log_timer = log_timed("Generating diff file info for "
                                  "diffset id %s" % diffset.id,
                                  request=request)

    # A map used to quickly look up the equivalent interfilediff given a
    # source file.
    interdiff_map = {}

    # Filediffs that were created with leading slashes stripped won't match
    # those created with them present, so we need to compare them without in
    # order for the filenames to match up properly.
    parser = diffset.repository.get_scmtool().get_parser('')

    def _normfile(filename):
        return parser.normalize_diff_filename(filename)

    if interdiffset:
        for interfilediff in interdiffset.files.all():
            interfilediff_source_file = _normfile(interfilediff.source_file)

            if (not filediff or _normfile(filediff.source_file)
                    == interfilediff_source_file):
                interdiff_map[interfilediff_source_file] = interfilediff

    # In order to support interdiffs properly, we need to display diffs
    # on every file in the union of both diffsets. Iterating over one diffset
    # or the other doesn't suffice.
    #
    # We build a list of parts containing the source filediff, the interdiff
    # filediff (if specified), and whether to force showing an interdiff
    # (in the case where a file existed in the source filediff but was
    # reverted in the interdiff).
    has_interdiffset = interdiffset is not None

    filediff_parts = [(temp_filediff,
                       interdiff_map.pop(_normfile(temp_filediff.source_file),
                                         None), has_interdiffset)
                      for temp_filediff in filediffs]

    if interdiffset:
        # We've removed everything in the map that we've already found.
        # What's left are interdiff files that are new. They have no file
        # to diff against.
        #
        # The end result is going to be a view that's the same as when you're
        # viewing a standard diff. As such, we can pretend the interdiff is
        # the source filediff and not specify an interdiff. Keeps things
        # simple, code-wise, since we really have no need to special-case
        # this.
        filediff_parts += [(interdiff, None, False)
                           for interdiff in six.itervalues(interdiff_map)]

    files = []

    for parts in filediff_parts:
        filediff, interfilediff, force_interdiff = parts

        newfile = filediff.is_new

        if interdiffset:
            # First, find out if we want to even process this one.
            # If the diffs are identical, or the patched files are identical,
            # or if the files were deleted in both cases, then we can be
            # absolutely sure that there's nothing interesting to show to
            # the user.
            if (filediff and interfilediff and
                (filediff.diff == interfilediff.diff or
                 (filediff.deleted and interfilediff.deleted) or
                 (filediff.patched_sha1 is not None
                  and filediff.patched_sha1 == interfilediff.patched_sha1))):
                continue

            source_revision = _("Diff Revision %s") % diffset.revision

            if not interfilediff and force_interdiff:
                dest_revision = (_("Diff Revision %s - File Reverted") %
                                 interdiffset.revision)
            else:
                dest_revision = _("Diff Revision %s") % interdiffset.revision
        else:
            source_revision = get_revision_str(filediff.source_revision)

            if newfile:
                dest_revision = _("New File")
            else:
                dest_revision = _("New Change")

        tool = filediff.diffset.repository.get_scmtool()
        depot_filename = tool.normalize_path_for_display(filediff.source_file)
        dest_filename = tool.normalize_path_for_display(filediff.dest_file)

        f = {
            'depot_filename':
            depot_filename,
            'dest_filename':
            dest_filename or depot_filename,
            'revision':
            source_revision,
            'dest_revision':
            dest_revision,
            'filediff':
            filediff,
            'interfilediff':
            interfilediff,
            'force_interdiff':
            force_interdiff,
            'binary':
            filediff.binary,
            'deleted':
            filediff.deleted,
            'moved':
            filediff.moved,
            'copied':
            filediff.copied,
            'moved_or_copied':
            filediff.moved or filediff.copied,
            'newfile':
            newfile,
            'index':
            len(files),
            'chunks_loaded':
            False,
            'is_new_file': (newfile and not interfilediff
                            and not filediff.parent_diff),
        }

        if force_interdiff:
            f['force_interdiff_revision'] = interdiffset.revision

        files.append(f)

    log_timer.done()

    return get_sorted_filediffs(files, key=lambda f: f['filediff'])