def clean(repo_folder: str) -> None: r"""Clean all unversioned files in a Git repository. :param repo_folder: Path to the Git repository. """ # Reset modified files. run_command(["git", "reset", "--hard"], cwd=repo_folder, ignore_errors=True) # Use `-f` twice to really clean everything. run_command(["git", "clean", "-xffd"], cwd=repo_folder, ignore_errors=True) # Do the same thing for submodules, if submodules exist. if os.path.exists(os.path.join(repo_folder, ".gitmodules")): run_command([ "git", "submodule", "foreach", "--recursive", "git", "reset", "--hard" ], cwd=repo_folder, ignore_errors=True) run_command([ "git", "submodule", "foreach", "--recursive", "git", "clean", "-xffd" ], cwd=repo_folder, ignore_errors=True)
def run_docker_command(command: Union[str, List[str]], cwd: Optional[str] = None, user: Optional[Union[int, Tuple[int, int]]] = None, directory_mapping: Optional[Dict[str, str]] = None, timeout: Optional[float] = None, **kwargs) -> CommandResult: r"""Run a command inside a container based on the ``gcc-custom`` Docker image. :param command: The command to run. Should be either a `str` or a list of `str`. Note: they're treated the same way, because a shell is always spawn in the entry point. :param cwd: The working directory of the command to run. If None, uses the default (probably user home). :param user: The user ID to use inside the Docker container. Additionally, group ID can be specified by passing a tuple of two `int`\ s for this argument. If not specified, the current user and group IDs are used. As a special case, pass in ``0`` to run as root. :param directory_mapping: Mapping of host directories to container paths. Mapping is performed via "bind mount". :param timeout: Maximum running time for the command. If running time exceeds the specified limit, ``subprocess.TimeoutExpired`` is thrown. :param kwargs: Additional keyword arguments to pass to :meth:`ghcc.utils.run_command`. """ # Validate `command` argument, and append call to `bash` if `shell` is True. if isinstance(command, list): command = ' '.join(command) command = f"'{command}'" # Construct the `docker run` command. docker_command = ["docker", "run", "--rm"] for host, container in (directory_mapping or {}).items(): docker_command.extend(["-v", f"{os.path.abspath(host)}:{container}"]) if cwd is not None: docker_command.extend(["-w", cwd]) # Assign user and group IDs based on `user` argument. if user != 0: user_id: Union[str, int] = "`id -u $USER`" group_id: Union[str, int] = "`id -g $USER`" if user is not None: if isinstance(user, tuple): user_id, group_id = user else: user_id = user docker_command.extend(["-e", f"LOCAL_USER_ID={user_id}"]) docker_command.extend(["-e", f"LOCAL_GROUP_ID={group_id}"]) docker_command.append("gcc-custom") if timeout is not None: # Timeout is implemented by calling `timeout` inside Docker container. docker_command.extend(["timeout", f"{timeout}s"]) docker_command.append(command) ret = run_command(' '.join(docker_command), shell=True, **kwargs) # Check whether exceeded timeout limit by inspecting return code. if ret.return_code == 124: assert timeout is not None raise error_wrapper(subprocess.TimeoutExpired(ret.command, timeout, output=ret.captured_output)) return ret
def try_clone(): # If a true git error was thrown, re-raise it and let the outer code deal with it. try: try_branch = default_branch or "master" # Try cloning only 'master' branch, but it's possible there's no branch named 'master'. run_command([ "git", "clone", "--depth=1", f"--branch={try_branch}", "--single-branch", url, clone_folder ], env=env, timeout=timeout) return except subprocess.CalledProcessError as err: expected_msg = b"fatal: Remote branch master not found in upstream origin" if default_branch is not None or not (err.output is not None and expected_msg in err.output): # If `default_branch` is specified, always re-raise the exception. raise err # 'master' branch doesn't exist; do a shallow clone of all branches. run_command(["git", "clone", "--depth=1", url, clone_folder], env=env, timeout=timeout)
def _preprocess(input_path: str, output_path: str) -> str: compile_ret = run_command([ "gcc", "-E", "-nostdlib", "-I" + FAKE_LIBC_PATH, "-o", output_path, input_path ], ignore_errors=True) if compile_ret.return_code != 0: if compile_ret.captured_output is not None: raise PreprocessError(compile_ret.captured_output.decode("utf-8")) raise PreprocessError with open(output_path, "r") as f: preprocessed_code = f.read() # Remove line control macros so we can programmatically locate errors. preprocessed_code = LINE_CONTROL_REGEX.sub("", preprocessed_code) return preprocessed_code
def verify_docker_image(verbose: bool = False, print_checked_paths: bool = False) -> bool: r"""Checks whether the Docker image is up-to-date. This is done by verifying the modification dates for all library files are earlier than the Docker image build date. :param verbose: If ``True``, prints out error message telling the user to rebuild Docker image. :param print_checked_paths: If ``True``, prints out paths of all checked files. """ output = run_command( ["docker", "image", "ls", "gcc-custom", "--format", "{{.CreatedAt}}"], return_output=True).captured_output assert output is not None image_creation_time_string = output.decode("utf-8").strip() image_creation_timestamp = datetime.strptime(image_creation_time_string, "%Y-%m-%d %H:%M:%S %z %Z").timestamp() repo_root: Path = Path(__file__).parent.parent.parent paths_to_check = ["ghcc", "scripts", ".dockerignore", "Dockerfile", "requirements.txt"] paths_to_ignore = ["ghcc/parse", "ghcc/database.py", "scripts/fake_libc_include"] prefixes_to_ignore = [str(repo_root / path) for path in paths_to_ignore] max_timestamp = 0.0 for repo_path in paths_to_check: path = str(repo_root / repo_path) if os.path.isfile(path) and not any(path.startswith(prefix) for prefix in prefixes_to_ignore): if print_checked_paths: print(path) max_timestamp = max(max_timestamp, os.path.getmtime(path)) else: for subdir, dirs, files in os.walk(path): if subdir.endswith("__pycache__"): continue for f in files: file_path = os.path.join(subdir, f) if not any(file_path.startswith(prefix) for prefix in prefixes_to_ignore): if print_checked_paths: print(file_path) max_timestamp = max(max_timestamp, os.path.getmtime(file_path)) up_to_date = max_timestamp <= image_creation_timestamp if not up_to_date and verbose: image_path = os.path.relpath(os.path.join(__file__, "..", "..", ".."), os.getcwd()) log("ERROR: Your Docker image is out-of-date. Please rebuild the image by: " f"`docker build -t gcc-custom {image_path}`", "error", force_console=True) return up_to_date
def _make_skeleton( directory: str, timeout: Optional[float] = None, env: Optional[Dict[str, str]] = None, verbose: bool = True, *, make_fn, check_file_fn: Callable[[str, str], bool] = _check_elf_fn) -> CompileResult: r"""A composable routine for different compilation methods. Different routines can be composed by specifying different ``make_fn``\ s and ``check_file_fn``\ s. :param directory: The directory containing the Makefile. :param timeout: Maximum compilation time. :param env: A dictionary of environment variables. :param verbose: If ``True``, print out executed commands and outputs. :param make_fn: The function to call for compilation. The function takes as input variables ``directory``, ``timeout``, and ``env``. :param check_file_fn: A function to determine whether a generated file should be collected, i.e., whether it is a binary file. The function takes as input variables ``directory`` and ``file``, where ``file`` is the path of the file to check, relative to ``directory``. Defaults to :meth:`_check_elf_fn`, which checks whether the file is an ELF file. """ directory = os.path.abspath(directory) try: # Clean unversioned files by previous compilations. clean(directory) # Call the actual function for `make`. make_fn(directory, timeout=timeout, env=env, verbose=verbose) result = _create_result(True) except subprocess.TimeoutExpired as e: # Even if exceptions occur, we still check for ELF files, just in case. result = _create_result(error_type=CompileErrorType.Timeout, captured_output=e.output) except subprocess.CalledProcessError as e: result = _create_result(error_type=CompileErrorType.CompileFailed, captured_output=e.output) except OSError as e: result = _create_result(error_type=CompileErrorType.Unknown, captured_output=str(e)) try: # Use Git to find all unversioned files -- these would be the products of compilation. output = run_command(["git", "ls-files", "--others"], cwd=directory, timeout=timeout, return_output=True).captured_output assert output is not None diff_files = [ # files containing escape characters are in quotes file if file[0] != '"' else file[1:-1] for file in output.decode('unicode_escape').split("\n") if file ] # file names could contain spaces # Inspect each file and find ELF files. for file in diff_files: if check_file_fn(directory, file): result.elf_files.append(file) except subprocess.TimeoutExpired as e: return _create_result(elf_files=result.elf_files, error_type=CompileErrorType.Timeout, captured_output=e.output) except subprocess.CalledProcessError as e: return _create_result(elf_files=result.elf_files, error_type=CompileErrorType.Unknown, captured_output=e.output) except OSError as e: return _create_result(elf_files=result.elf_files, error_type=CompileErrorType.Unknown, captured_output=str(e)) return result
def _unsafe_make(directory: str, timeout: Optional[float] = None, env: Optional[Dict[str, str]] = None, verbose: bool = False) -> None: env = {"PATH": f"{MOCK_PATH}:{os.environ['PATH']}", **(env or {})} # Try GNU Automake first. Note that errors are ignored because it's possible that the original files still work. if contains_files(directory, ["configure.ac", "configure.in"]): start_time = time.time() if os.path.isfile(os.path.join(directory, "autogen.sh")): # Some projects with non-trivial build instructions provide an "autogen.sh" script. run_command(["chmod", "+x", "./autogen.sh"], env=env, cwd=directory, verbose=verbose) run_command(["./autogen.sh"], env=env, cwd=directory, timeout=timeout, verbose=verbose, ignore_errors=True) else: run_command(["autoreconf", "--force", "--install"], env=env, cwd=directory, timeout=timeout, ignore_errors=True, verbose=verbose) end_time = time.time() if timeout is not None: timeout = max(1.0, timeout - int(end_time - start_time)) # Try running `./configure` if it exists. if os.path.isfile(os.path.join(directory, "configure")): start_time = time.time() run_command(["chmod", "+x", "./configure"], env=env, cwd=directory, verbose=verbose) ret = run_command(["./configure", "--disable-werror"], env=env, cwd=directory, timeout=timeout, verbose=verbose, ignore_errors=True) end_time = time.time() if ret.return_code != 0 and end_time - start_time <= 2: # The configure file might not support `--disable-werror` and died instantly. Try again without the flag. run_command(["./configure"], env=env, cwd=directory, timeout=timeout, verbose=verbose) end_time = time.time() if timeout is not None: timeout = max(1.0, timeout - int(end_time - start_time)) # Make while ignoring errors. # `-B/--always-make` could give strange errors for certain Makefiles, e.g. ones containing "%:" try: run_command(["make", "--keep-going", "-j1"], env=env, cwd=directory, timeout=timeout, verbose=verbose) except subprocess.CalledProcessError as err: expected_msg = b"missing separator" if not (err.output is not None and expected_msg in err.output): raise err else: # Try again using BSD Make instead of GNU Make. Note BSD Make does not have a flag equivalent to # `-B/--always-make`. run_command(["bmake", "-k", "-j1"], env=env, cwd=directory, timeout=timeout, verbose=verbose)
def clone(repo_owner: str, repo_name: str, clone_folder: str, folder_name: Optional[str] = None, *, default_branch: Optional[str] = None, timeout: Optional[float] = None, recursive: bool = False, skip_if_exists: bool = True) -> CloneResult: r"""Clone a repository on GitHub, for instance, ``torvalds/linux``. :param repo_owner: Name of the repository owner, e.g., ``torvalds``. :param repo_name: Name of the repository, e.g., ``linux``. :param clone_folder: Path to the folder where the repository will be stored. :param folder_name: Name of the folder of the cloned repository. If ``None``, ``repo_owner/repo_name`` is used. :param default_branch: Name of the default branch of the repository. Cloning behavior differs slightly depending on whether the argument is ``None``. If ``None``, then the following happens: 1. Attempts a shallow clone on only the ``master`` branch. 2. If error occurs, attempts a shallow clone for all branches. 3. If error still occurs, raise the error. If not ``None``, then the following happens: 1. Attempts a shallow clone on only the default branch. 2. If error occurs, raise the error. :param timeout: Maximum time allowed for cloning, in seconds. Defaults to ``None`` (unlimited time). :param recursive: If ``True``, passes the ``--recursive`` flag to Git, which recursively clones submodules. :param skip_if_exists: Whether to skip cloning if the destination folder already exists. If ``False``, the folder will be deleted. :return: An instance of :class:`CloneResult` indicating the result. Fields ``repo_owner``, ``repo_name``, and ``success`` are not ``None``. - If cloning succeeded, the field ``time`` is also not ``None``. - If cloning failed, the fields ``error_type`` and ``captured_output`` are also not ``None``. """ start_time = time.time() url = f"https://github.com/{repo_owner}/{repo_name}.git" if folder_name is None: folder_name = f"{repo_owner}/{repo_name}" clone_folder = os.path.join(clone_folder, folder_name) if os.path.exists(clone_folder): if not skip_if_exists: shutil.rmtree(clone_folder) else: return CloneResult(repo_owner, repo_name, error_type=CloneErrorType.FolderExists) # Certain repos might have turned private or been deleted, and git prompts for username/password when it happens. # Setting the environment variable `GIT_TERMINAL_PROMPT` to 0 could disable such behavior and let git fail promptly. # Lucky that this is introduced in version 2.3; otherwise would have to poll waiting channel of current process # and see if it's waiting for IO. # See: https://askubuntu.com/questions/19442/what-is-the-waiting-channel-of-a-process env = {"GIT_TERMINAL_PROMPT": "0"} def try_clone(): # If a true git error was thrown, re-raise it and let the outer code deal with it. try: try_branch = default_branch or "master" # Try cloning only 'master' branch, but it's possible there's no branch named 'master'. run_command([ "git", "clone", "--depth=1", f"--branch={try_branch}", "--single-branch", url, clone_folder ], env=env, timeout=timeout) return except subprocess.CalledProcessError as err: expected_msg = b"fatal: Remote branch master not found in upstream origin" if default_branch is not None or not (err.output is not None and expected_msg in err.output): # If `default_branch` is specified, always re-raise the exception. raise err # 'master' branch doesn't exist; do a shallow clone of all branches. run_command(["git", "clone", "--depth=1", url, clone_folder], env=env, timeout=timeout) try: try_clone() end_time = time.time() elapsed_time = end_time - start_time except subprocess.CalledProcessError as e: no_ssh_expected_msg = b"fatal: could not read Username for 'https://github.com': terminal prompts disabled" ssh_expected_msg = b"remote: Repository not found." if e.output is not None and (no_ssh_expected_msg in e.output or ssh_expected_msg in e.output): return CloneResult(repo_owner, repo_name, error_type=CloneErrorType.PrivateOrNonexistent) else: return CloneResult(repo_owner, repo_name, error_type=CloneErrorType.Unknown, captured_output=e.output) except subprocess.TimeoutExpired as e: return CloneResult(repo_owner, repo_name, error_type=CloneErrorType.Timeout, captured_output=e.output) if recursive: submodule_timeout = (timeout - elapsed_time) if timeout is not None else None try: # If this fails, still treat it as a success, but include a special error type. run_command( ["git", "submodule", "update", "--init", "--recursive"], env=env, cwd=clone_folder, timeout=submodule_timeout) except (subprocess.CalledProcessError, subprocess.TimeoutExpired) as e: return CloneResult(repo_owner, repo_name, success=True, time=elapsed_time, error_type=CloneErrorType.SubmodulesFailed, captured_output=e.output) end_time = time.time() elapsed_time = end_time - start_time return CloneResult(repo_owner, repo_name, success=True, time=elapsed_time)