def repo_files_changed(revish, include_uncommitted=False, include_new=False): # type: (Text, bool, bool) -> Set[Text] git = get_git_cmd(wpt_root) if git is None: raise Exception("git not found") files_list = git("diff", "--name-only", "-z", revish).split(u"\0") assert not files_list[-1] files = set(files_list[:-1]) if include_uncommitted: entries = git("status", "-z").split("\0") assert not entries[-1] entries = entries[:-1] for item in entries: status, path = item.split() if status == "??" and not include_new: continue else: if not os.path.isdir(path): files.add(path) else: for dirpath, dirnames, filenames in os.walk(path): for filename in filenames: files.add(os.path.join(dirpath, filename)) return files
def repo_files_changed(revish, include_uncommitted=False, include_new=False): # type: (Text, bool, bool) -> Set[Text] git = get_git_cmd(wpt_root) if git is None: raise Exception("git not found") if "..." in revish: raise Exception( f"... not supported when finding files changed (revish: {revish!r}" ) if ".." in revish: # ".." isn't treated as a range for git-diff; what we want is # everything reachable from B but not A, and git diff A...B # gives us that (via the merge-base) revish = revish.replace("..", "...") files_list = git("diff", "--no-renames", "--name-only", "-z", revish).split("\0") assert not files_list[ -1], f"final item should be empty, got: {files_list[-1]!r}" files = set(files_list[:-1]) if include_uncommitted: entries = git("status", "-z").split("\0") assert not entries[-1] entries = entries[:-1] for item in entries: status, path = item.split(" ", 1) if status == "??" and not include_new: continue else: if not os.path.isdir(path): files.add(path) else: for dirpath, dirnames, filenames in os.walk(path): for filename in filenames: files.add(os.path.join(dirpath, filename)) return files
def branch_point(): # type: () -> Optional[Text] git = get_git_cmd(wpt_root) if git is None: raise Exception("git not found") if (os.environ.get("GITHUB_PULL_REQUEST", "false") == "false" and os.environ.get("GITHUB_BRANCH") == "master"): # For builds on the master branch just return the HEAD commit return git("rev-parse", "HEAD") elif os.environ.get("GITHUB_PULL_REQUEST", "false") != "false": # This is a PR, so the base branch is in GITHUB_BRANCH base_branch = os.environ.get("GITHUB_BRANCH") assert base_branch, "GITHUB_BRANCH environment variable is defined" branch_point = git("merge-base", "HEAD", base_branch) # type: Optional[Text] else: # Otherwise we aren't on a PR, so we try to find commits that are only in the # current branch c.f. # http://stackoverflow.com/questions/13460152/find-first-ancestor-commit-in-another-branch # parse HEAD into an object ref head = git("rev-parse", "HEAD") # get everything in refs/heads and refs/remotes that doesn't include HEAD not_heads = [ item for item in git("rev-parse", "--not", "--branches", "--remotes").split("\n") if item and item != "^%s" % head ] # get all commits on HEAD but not reachable from anything in not_heads cmd = [ "git", "rev-list", "--topo-order", "--parents", "--stdin", "HEAD" ] proc = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, cwd=wpt_root) commits_bytes, _ = proc.communicate(b"\n".join( item.encode("ascii") for item in not_heads)) if proc.returncode != 0: raise subprocess.CalledProcessError(proc.returncode, cmd, commits_bytes) commit_parents = OrderedDict() # type: Dict[Text, List[Text]] commits = commits_bytes.decode("ascii") if commits: for line in commits.split("\n"): line_commits = line.split(" ") commit_parents[line_commits[0]] = line_commits[1:] branch_point = None # if there are any commits, take the first parent that is not in commits for commit, parents in commit_parents.items(): for parent in parents: if parent not in commit_parents: branch_point = parent break if branch_point: break # if we had any commits, we should now have a branch point assert branch_point or not commit_parents # The above heuristic will fail in the following cases: # # - The current branch has fallen behind the remote version # - Changes on the current branch were rebased and therefore do not exist on any # other branch. This will result in the selection of a commit that is earlier # in the history than desired (as determined by calculating the later of the # branch point and the merge base) # # In either case, fall back to using the merge base as the branch point. merge_base = git("merge-base", "HEAD", "origin/master") if (branch_point is None or (branch_point != merge_base and not git("log", "--oneline", "%s..%s" % (merge_base, branch_point)).strip())): logger.debug("Using merge-base as the branch point") branch_point = merge_base else: logger.debug( "Using first commit on another branch as the branch point") logger.debug("Branch point from master: %s" % branch_point) if branch_point: branch_point = branch_point.strip() return branch_point