示例#1
0
    def process_new_commit(self, commit: ghstack.diff.Diff) -> None:
        """
        Process a diff that has never been pushed to GitHub before.
        """

        if '[ghstack-poisoned]' in commit.summary:
            raise RuntimeError('''\
This commit is poisoned: it is from a head or base branch--ghstack
cannot validly submit it.  The most common situation for this to
happen is if you checked out the head branch of a pull request that was
previously submitted with ghstack (e.g., by using hub checkout).
Making modifications on the head branch is not supported; instead,
you should fetch the original commits in question by running:

    ghstack checkout $PR_URL

Since we cannot proceed, ghstack will abort now.
''')

        title, pr_body = self._default_title_and_body(commit, None)

        # Determine the next available GhNumber.  We do this by
        # iterating through known branches and keeping track
        # of the max.  The next available GhNumber is the next number.
        # This is technically subject to a race, but we assume
        # end user is not running this script concurrently on
        # multiple machines (you bad bad)
        refs = self.sh.git(
            "for-each-ref",
            # Use OUR username here, since there's none attached to the
            # diff
            "refs/remotes/{}/gh/{}".format(self.remote_name, self.username),
            "--format=%(refname)").split()
        max_ref_num = max(int(ref.split('/')[-2]) for ref in refs) \
            if refs else 0
        ghnum = GhNumber(str(max_ref_num + 1))

        # Create the incremental pull request diff
        tree = commit.patch.apply(self.sh, self.base_tree)

        # Actually, if there's no change in the tree, stop processing
        if tree == self.base_tree:
            self.ignored_diffs.append((commit, None))
            logging.warn("Skipping {} {}, as the commit has no changes"
                         .format(commit.oid, title))
            self.stack_meta.append(None)
            return

        assert ghnum not in self.seen_ghnums
        self.seen_ghnums.add(ghnum)

        new_pull = GitCommitHash(
            self.sh.git("commit-tree", tree,
                        "-p", self.base_commit,
                        input=commit.summary + "\n\n[ghstack-poisoned]"))

        # Push the branches, so that we can create a PR for them
        new_branches = (
            push_spec(new_pull, branch_head(self.username, ghnum)),
            push_spec(self.base_commit, branch_base(self.username, ghnum))
        )
        self.sh.git(
            "push",
            self.remote_name,
            *new_branches,
        )
        self.github.push_hook(new_branches)

        # Time to open the PR
        # NB: GraphQL API does not support opening PRs
        r = self.github.post(
            "repos/{owner}/{repo}/pulls"
            .format(owner=self.repo_owner, repo=self.repo_name),
            title=title,
            head=branch_head(self.username, ghnum),
            base=branch_base(self.username, ghnum),
            body=pr_body,
            maintainer_can_modify=True,
            draft=self.draft,
        )
        number = r['number']

        logging.info("Opened PR #{}".format(number))

        # Update the commit message of the local diff with metadata
        # so we can correlate these later
        pull_request_resolved = ghstack.diff.PullRequestResolved(
            owner=self.repo_owner, repo=self.repo_name, number=number)
        commit_msg = ("{commit_msg}\n\n"
                      "ghstack-source-id: {sourceid}\n"
                      "Pull Request resolved: "
                      "https://{github_url}/{owner}/{repo}/pull/{number}"
                      .format(commit_msg=commit.summary.rstrip(),
                              owner=self.repo_owner,
                              repo=self.repo_name,
                              number=number,
                              sourceid=commit.source_id,
                              github_url=self.github_url))

        # TODO: Try harder to preserve the old author/commit
        # information (is it really necessary? Check what
        # --amend does...)
        new_orig = GitCommitHash(self.sh.git(
            "commit-tree",
            tree,
            "-p", self.base_orig,
            input=commit_msg))

        self.stack_meta.append(DiffMeta(
            title=title,
            number=number,
            body=pr_body,
            ghnum=ghnum,
            username=self.username,
            push_branches=((new_orig, 'orig'), ),
            head_branch=new_pull,
            what='Created',
            closed=False,
            pr_url=pull_request_resolved.url(self.github_url),
        ))

        self.base_commit = new_pull
        self.base_orig = new_orig
        self.base_tree = tree
示例#2
0
    def process_old_commit(self, elab_commit: DiffWithGitHubMetadata) -> None:
        """
        Process a diff that has an existing upload to GitHub.
        """

        commit = elab_commit.diff
        username = elab_commit.username
        ghnum = elab_commit.ghnum
        number = elab_commit.number

        if ghnum in self.seen_ghnums:
            raise RuntimeError(
                "Something very strange has happened: a commit for "
                "the pull request #{} occurs twice in your local "
                "commit stack.  This is usually because of a botched "
                "rebase.  Please take a look at your git log and seek "
                "help from your local Git expert.".format(number))
        self.seen_ghnums.add(ghnum)

        logging.info("Pushing to #{}".format(number))

        # Compute the local and remote source IDs
        summary = commit.summary
        m_local_source_id = RE_GHSTACK_SOURCE_ID.search(summary)
        if m_local_source_id is None:
            # For BC, just slap on a source ID.  After BC is no longer
            # needed, we can just error in this case; however, this
            # situation is extremely likely to happen for preexisting
            # stacks.
            logging.warning(
                "Local commit has no ghstack-source-id; assuming that it is "
                "up-to-date with remote.")
            summary = "{}\nghstack-source-id: {}".format(summary, commit.source_id)
        else:
            local_source_id = m_local_source_id.group(1)
            if elab_commit.remote_source_id is None:
                # This should also be an error condition, but I suppose
                # it can happen in the wild if a user had an aborted
                # ghstack run, where they updated their head pointer to
                # a copy with source IDs, but then we failed to push to
                # orig.  We should just go ahead and push in that case.
                logging.warning(
                    "Remote commit has no ghstack-source-id; assuming that we are "
                    "up-to-date with remote.")
            else:
                if local_source_id != elab_commit.remote_source_id and not self.force:
                    logging.debug(f"elab_commit.remote_source_id = {elab_commit.remote_source_id}")
                    raise RuntimeError(
                        "Cowardly refusing to push an update to GitHub, since it "
                        "looks another source has updated GitHub since you last "
                        "pushed.  If you want to push anyway, rerun this command "
                        "with --force.  Otherwise, diff your changes against "
                        "{} and reapply them on top of an up-to-date commit from "
                        "GitHub.".format(local_source_id))
                summary = RE_GHSTACK_SOURCE_ID.sub(
                    'ghstack-source-id: {}\n'.format(commit.source_id),
                    summary)

        # We've got an update to do!  But what exactly should we
        # do?
        #
        # Here are a number of situations which may have
        # occurred.
        #
        #   1. None of the parent commits changed, and this is
        #      the first change we need to push an update to.
        #
        #   2. A parent commit changed, so we need to restack
        #      this commit too.  (You can't easily tell distinguish
        #      between rebase versus rebase+amend)
        #
        #   3. The parent is now master (any prior parent
        #      commits were absorbed into master.)
        #
        #   4. The parent is totally disconnected, the history
        #      is bogus but at least the merge-base on master
        #      is the same or later.  (This can occur if you
        #      cherry-picked a commit out of an old stack and
        #      want to make it independent.)
        #
        # In cases 1-3, we can maintain a clean merge history
        # if we do a little extra book-keeping, which is what
        # we do now.
        #
        # TODO: What we have here actually works pretty hard to
        # maintain a consistent merge history between all PRs;
        # so, e.g., you could merge with master and things
        # wouldn't break.  But we don't necessarily have to do
        # this; all we need is the delta between base and head
        # to make sense.  The benefit to doing this is you could
        # more easily update single revs only, without doing
        # the rest of the stack.  The downside is that you
        # get less accurate merge structure for your changes
        # (because each "diff" is completely disconnected.)
        #

        # First, check if the parent commit hasn't changed.
        # We do this by checking if our base_commit is the same
        # as the gh/ezyang/X/base commit.
        #
        # In this case, we don't need to include the base as a
        # parent at all; just construct our new diff as a plain,
        # non-merge commit.
        base_args: Tuple[str, ...]
        orig_base_hash = self.sh.git(
            "rev-parse",
            self.remote_name + "/" + branch_base(username, ghnum))

        # I vacillated between whether or not we should use the PR
        # body or the literal commit message here.  Right now we use
        # the PR body, because after initial commit the original
        # commit message is not supposed to "matter" anymore.  orig
        # still uses the original commit message, however, because
        # it's supposed to be the "original".
        non_orig_commit_msg = RE_STACK.sub('', elab_commit.body)

        if orig_base_hash == self.base_commit:

            new_base = self.base_commit
            base_args = ()

        else:
            # Second, check if our local base (self.base_commit)
            # added some new commits, but is still rooted on the
            # old base.
            #
            # If so, all we need to do is include the local base
            # as a parent when we do the merge.
            is_ancestor = self.sh.git(
                "merge-base",
                "--is-ancestor",
                self.remote_name + "/" + branch_base(username, ghnum),
                self.base_commit, exitcode=True)

            if is_ancestor:
                new_base = self.base_commit

            else:
                # If we've gotten here, it means that the new
                # base and the old base are completely
                # unrelated.  We'll make a fake commit that
                # "resets" the tree back to something that makes
                # sense and merge with that.  This doesn't fix
                # the fact that we still incorrectly report
                # the old base as an ancestor of our commit, but
                # it's better than nothing.
                new_base = GitCommitHash(self.sh.git(
                    "commit-tree", self.base_tree,
                    "-p",
                    self.remote_name + "/" + branch_base(username, ghnum),
                    "-p", self.base_commit,
                    input='Update base for {} on "{}"\n\n{}\n\n[ghstack-poisoned]'
                          .format(self.msg, elab_commit.title,
                                  non_orig_commit_msg)))

            base_args = ("-p", new_base)

        # Blast our current tree as the newest commit, merging
        # against the previous pull entry, and the newest base.

        tree = commit.patch.apply(self.sh, self.base_tree)

        # Nothing to do, just ignore the diff
        if tree == self.base_tree:
            self.ignored_diffs.append((commit, number))
            logging.warn("Skipping PR #{} {}, as the commit now has no changes"
                         .format(number, elab_commit.title))
            return

        new_pull = GitCommitHash(self.sh.git(
            "commit-tree", tree,
            "-p", self.remote_name + "/" + branch_head(username, ghnum),
            *base_args,
            input='{} on "{}"\n\n{}\n\n[ghstack-poisoned]'.format(self.msg, elab_commit.title, non_orig_commit_msg)))

        # Perform what is effectively an interactive rebase
        # on the orig branch.
        #
        # Hypothetically, there could be a case when this isn't
        # necessary, but it's INCREDIBLY unlikely (because we'd
        # have to look EXACTLY like the original orig, and since
        # we're in the branch that says "hey we changed
        # something" that's probably not what happened.

        logging.info("Restacking commit on {}".format(self.base_orig))
        new_orig = GitCommitHash(self.sh.git(
            "commit-tree", tree,
            "-p", self.base_orig, input=summary))

        push_branches = (
            (new_base, "base"),
            (new_pull, "head"),
            (new_orig, "orig"),
        )

        if elab_commit.closed:
            what = 'Skipped closed'
        else:
            what = 'Updated'

        self.stack_meta.append(DiffMeta(
            title=elab_commit.title,
            number=number,
            # NB: Ignore the commit message, and just reuse the old commit
            # message.  This is consistent with 'jf submit' default
            # behavior.  The idea is that people may have edited the
            # PR description on GitHub and you don't want to clobber
            # it.
            body=elab_commit.body,
            ghnum=ghnum,
            username=username,
            push_branches=push_branches,
            head_branch=new_pull,
            what=what,
            closed=elab_commit.closed,
            pr_url=elab_commit.pull_request_resolved.url(self.github_url),
        ))

        self.base_commit = new_pull
        self.base_orig = new_orig
        self.base_tree = tree
示例#3
0
    def process_new_commit(self, commit: ghstack.diff.Diff) -> None:
        """
        Process a diff that has never been pushed to GitHub before.
        """

        title, pr_body = self._default_title_and_body(commit, None)

        # Determine the next available GhNumber.  We do this by
        # iterating through known branches and keeping track
        # of the max.  The next available GhNumber is the next number.
        # This is technically subject to a race, but we assume
        # end user is not running this script concurrently on
        # multiple machines (you bad bad)
        refs = self.sh.git("for-each-ref",
                           "refs/remotes/origin/gh/{}".format(self.username),
                           "--format=%(refname)").split()
        max_ref_num = max(int(ref.split('/')[-2]) for ref in refs) \
            if refs else 0
        ghnum = GhNumber(str(max_ref_num + 1))

        # Create the incremental pull request diff
        tree = commit.patch.apply(self.sh, self.base_tree)

        # Actually, if there's no change in the tree, stop processing
        if tree == self.base_tree:
            self.ignored_diffs.append((commit, None))
            logging.warn("Skipping {} {}, as the commit has no changes".format(
                commit.oid, title))
            return

        assert ghnum not in self.seen_ghnums
        self.seen_ghnums.add(ghnum)

        new_pull = GitCommitHash(
            self.sh.git("commit-tree",
                        tree,
                        "-p",
                        self.base_commit,
                        input=commit.summary))

        # Push the branches, so that we can create a PR for them
        new_branches = (push_spec(new_pull, branch_head(self.username, ghnum)),
                        push_spec(self.base_commit,
                                  branch_base(self.username, ghnum)))
        self.sh.git(
            "push",
            "origin",
            *new_branches,
        )
        self.github.push_hook(new_branches)

        # Time to open the PR
        # NB: GraphQL API does not support opening PRs
        r = self.github.post(
            "repos/{owner}/{repo}/pulls".format(owner=self.repo_owner,
                                                repo=self.repo_name),
            title=title,
            head=branch_head(self.username, ghnum),
            base=branch_base(self.username, ghnum),
            body=pr_body,
            maintainer_can_modify=True,
        )
        number = r['number']

        logging.info("Opened PR #{}".format(number))

        # Update the commit message of the local diff with metadata
        # so we can correlate these later
        commit_msg = ("{commit_msg}\n\n"
                      "ghstack-source-id: {sourceid}\n"
                      "Pull Request resolved: "
                      "https://github.com/{owner}/{repo}/pull/{number}".format(
                          commit_msg=commit.summary.rstrip(),
                          owner=self.repo_owner,
                          repo=self.repo_name,
                          number=number,
                          sourceid=commit.source_id))

        # TODO: Try harder to preserve the old author/commit
        # information (is it really necessary? Check what
        # --amend does...)
        new_orig = GitCommitHash(
            self.sh.git("commit-tree",
                        tree,
                        "-p",
                        self.base_orig,
                        input=commit_msg))

        self.stack_meta.append(
            DiffMeta(
                title=title,
                number=number,
                body=pr_body,
                ghnum=ghnum,
                push_branches=((new_orig, 'orig'), ),
                head_branch=new_pull,
                what='Created',
                closed=False,
            ))

        self.base_commit = new_pull
        self.base_orig = new_orig
        self.base_tree = tree