def setUp(self, *args, **kwargs): try: container.execute_in_namespace(lambda: 0) except OSError as e: self.skipTest("Namespaces not supported: {}".format(os.strerror(e.errno))) self.runexecutor = RunExecutor( use_namespaces=True, dir_modes={"/": containerexecutor.DIR_READ_ONLY, "/tmp": containerexecutor.DIR_HIDDEN}, container_system_config=False, *args, **kwargs)
def setUp(self, *args, **kwargs): try: container.execute_in_namespace(lambda: 0) except OSError as e: self.skipTest(f"Namespaces not supported: {os.strerror(e.errno)}") dir_modes = kwargs.pop( "dir_modes", { "/": containerexecutor.DIR_READ_ONLY, "/home": containerexecutor.DIR_HIDDEN, "/tmp": containerexecutor.DIR_HIDDEN, }, ) self.runexecutor = RunExecutor( use_namespaces=True, dir_modes=dir_modes, *args, **kwargs )
def _start_execution_in_container( self, args, stdin, stdout, stderr, env, root_dir, cwd, temp_dir, memlimit, memory_nodes, cgroups, output_dir, result_files_patterns, parent_setup_fn, child_setup_fn, parent_cleanup_fn, ): """Execute the given command and measure its resource usage similarly to super()._start_execution(), but inside a container implemented using Linux namespaces. The command has no network access (only loopback), a fresh directory as /tmp and no write access outside of this, and it does not see other processes except itself. """ assert self._use_namespaces if root_dir is None: env.update(self._env_override) # We have three processes involved: # parent: the current Python process in which RunExecutor is executing # child: child process in new namespace (PID 1 in inner namespace), # configures inner namespace, serves as dummy init, # collects result of grandchild and passes it to parent # grandchild: child of child process (PID 2 in inner namespace), exec()s tool # We need the following communication steps between these proceses: # 1a) grandchild tells parent its PID (in outer namespace). # 1b) grandchild tells parent that it is ready and measurement should begin. # 2) parent tells grandchild that measurement has begun and tool should # be exec()ed. # 3) child tells parent about return value and resource consumption of # grandchild. # 1a and 1b are done together by sending the PID through a pipe. # 2 is done by sending a null byte through a pipe. # 3 is done by sending a pickled object through the same pipe as #2. # We cannot use the same pipe for both directions, because otherwise a sender # might read the bytes it has sent itself. # Error codes from child to parent CHILD_OSERROR = 128 # noqa: N806 local constant CHILD_UNKNOWN_ERROR = 129 # noqa: N806 local constant # "downstream" pipe parent->grandchild from_parent, to_grandchild = os.pipe() # "upstream" pipe grandchild/child->parent from_grandchild, to_parent = os.pipe() # The protocol for these pipes is that first the parent sends the marker for # user mappings, then the grand child sends its outer PID back, # and finally the parent sends its completion marker. # After the run, the child sends the result of the grand child and then waits # for the post_run marker, before it terminates. MARKER_USER_MAPPING_COMPLETED = b"A" # noqa: N806 local constant MARKER_PARENT_COMPLETED = b"B" # noqa: N806 local constant MARKER_PARENT_POST_RUN_COMPLETED = b"C" # noqa: N806 local constant # If the current directory is within one of the bind mounts we create, # we need to cd into this directory again, otherwise we would not see the # bind mount, but the directory behind it. # Thus we always set cwd to force a change of directory. if root_dir is None: cwd = os.path.abspath(cwd or os.curdir) else: root_dir = os.path.abspath(root_dir) cwd = os.path.abspath(cwd) def grandchild(): """Setup everything inside the process that finally exec()s the tool.""" try: # We know that this process has PID 2 in the inner namespace, # but we actually need to know its PID in the outer namespace # such that parent can put us into the correct cgroups. According to # http://man7.org/linux/man-pages/man7/pid_namespaces.7.html, # there are two ways to achieve this: sending a message with the PID # via a socket (but Python 2 lacks a convenient API for sendmsg), # and reading /proc/self in the outer procfs instance # (that's what we do). my_outer_pid = container.get_my_pid_from_procfs() container.mount_proc(self._container_system_config) container.drop_capabilities() container.reset_signal_handling() child_setup_fn() # Do some other setup the caller wants. # Signal readiness to parent by sending our PID # and wait until parent is also ready os.write(to_parent, str(my_outer_pid).encode()) received = os.read(from_parent, 1) assert received == MARKER_PARENT_COMPLETED, received finally: # close remaining ends of pipe os.close(from_parent) os.close(to_parent) # here Python will exec() the tool for us def child(): """Setup everything inside the container, start the tool, and wait for result.""" try: logging.debug( "Child: child process of RunExecutor with PID %d started", container.get_my_pid_from_procfs(), ) # Put all received signals on hold until we handle them later. container.block_all_signals() # We want to avoid leaking file descriptors to the executed child. # It is also nice if the child has only the minimal necessary file # descriptors, to avoid keeping other pipes and files open, e.g., # those that the parent uses to communicate with other containers # (if containers are started in parallel). # Thus we do not use the close_fds feature of subprocess.Popen, # but do the same here manually. We keep the relevant ends of our pipes, # and stdin/out/err of child and grandchild. necessary_fds = { sys.stdin, sys.stdout, sys.stderr, to_parent, from_parent, stdin, stdout, stderr, } - {None} container.close_open_fds(keep_files=necessary_fds) try: if self._container_system_config: # A standard hostname increases reproducibility. socket.sethostname(container.CONTAINER_HOSTNAME) if not self._allow_network: container.activate_network_interface("lo") # Wait until user mapping is finished, # this is necessary for filesystem writes received = os.read(from_parent, len(MARKER_USER_MAPPING_COMPLETED)) assert received == MARKER_USER_MAPPING_COMPLETED, received if root_dir is not None: self._setup_root_filesystem(root_dir) else: self._setup_container_filesystem( temp_dir, output_dir if result_files_patterns else None, memlimit, memory_nodes, ) # Marking this process as "non-dumpable" (no core dumps) also # forbids several other ways how other processes can access and # influence it: # ptrace is forbidden and much of /proc/<child>/ is inaccessible. # We set this to prevent the benchmarked tool from messing with this # process or using it to escape from the container. More info: # http://man7.org/linux/man-pages/man5/proc.5.html # It needs to be done after MARKER_USER_MAPPING_COMPLETED. libc.prctl(libc.PR_SET_DUMPABLE, libc.SUID_DUMP_DISABLE, 0, 0, 0) except OSError as e: logging.critical("Failed to configure container: %s", e) return CHILD_OSERROR try: os.chdir(cwd) except OSError as e: logging.critical( "Cannot change into working directory inside container: %s", e) return CHILD_OSERROR container.setup_seccomp_filter() try: grandchild_proc = subprocess.Popen( args, stdin=stdin, stdout=stdout, stderr=stderr, env=env, close_fds=False, preexec_fn=grandchild, ) except (OSError, RuntimeError) as e: logging.critical("Cannot start process: %s", e) return CHILD_OSERROR # keep capability for unmount if necessary later necessary_capabilities = ([libc.CAP_SYS_ADMIN] if result_files_patterns else []) container.drop_capabilities(keep=necessary_capabilities) # Close other fds that were still necessary above. container.close_open_fds(keep_files={ sys.stdout, sys.stderr, to_parent, from_parent }) # Set up signal handlers to forward signals to grandchild # (because we are PID 1, there is a special signal handling otherwise). # cf. dumb-init project: https://github.com/Yelp/dumb-init # Also wait for grandchild and return its result. grandchild_result = container.wait_for_child_and_forward_signals( grandchild_proc.pid, args[0]) logging.debug( "Child: process %s terminated with exit code %d.", args[0], grandchild_result[0], ) if result_files_patterns: # Remove the bind mount that _setup_container_filesystem added # such that the parent can access the result files. libc.umount(temp_dir.encode()) # Re-allow access to /proc/<child>/..., # this is used by the parent for accessing output files libc.prctl(libc.PR_SET_DUMPABLE, libc.SUID_DUMP_USER, 0, 0, 0) os.write(to_parent, pickle.dumps(grandchild_result)) os.close(to_parent) # Now the parent copies the output files, we need to wait until this is # finished. If the child terminates, the container file system and its # tmpfs go away. assert os.read(from_parent, 1) == MARKER_PARENT_POST_RUN_COMPLETED os.close(from_parent) return 0 except OSError: logging.exception("Error in child process of RunExecutor") return CHILD_OSERROR except BaseException: # Need to catch everything because this method always needs to return an # int (we are inside a C callback that requires returning int). logging.exception("Error in child process of RunExecutor") return CHILD_UNKNOWN_ERROR try: # parent try: child_pid = container.execute_in_namespace( child, use_network_ns=not self._allow_network) except OSError as e: if (e.errno == errno.EPERM and util.try_read_file( "/proc/sys/kernel/unprivileged_userns_clone") == "0"): raise BenchExecException( "Unprivileged user namespaces forbidden on this system, please " "enable them with 'sysctl -w kernel.unprivileged_userns_clone=1' " "or disable container mode") elif (e.errno in {errno.ENOSPC, errno.EINVAL} and util.try_read_file("/proc/sys/user/max_user_namespaces") == "0"): # Ubuntu has ENOSPC, Centos seems to produce EINVAL in this case raise BenchExecException( "Unprivileged user namespaces forbidden on this system, please " "enable by using 'sysctl -w user.max_user_namespaces=10000' " "(or another value) or disable container mode") else: raise BenchExecException( "Creating namespace for container mode failed: " + os.strerror(e.errno)) logging.debug( "Parent: child process of RunExecutor with PID %d started.", child_pid) def check_child_exit_code(): """Check if the child process terminated cleanly and raise an error otherwise.""" child_exitcode, unused_child_rusage = self._wait_for_process( child_pid, args[0]) child_exitcode = util.ProcessExitCode.from_raw(child_exitcode) logging.debug( "Parent: child process of RunExecutor with PID %d" " terminated with %s.", child_pid, child_exitcode, ) if child_exitcode: if child_exitcode.value: if child_exitcode.value == CHILD_OSERROR: # This was an OSError in the child, # details were already logged raise BenchExecException( "execution in container failed, check log for details" ) elif child_exitcode.value == CHILD_UNKNOWN_ERROR: raise BenchExecException( "unexpected error in container") raise OSError(child_exitcode.value, os.strerror(child_exitcode.value)) raise OSError( 0, "Child process of RunExecutor terminated with " + str(child_exitcode), ) # Close unnecessary ends of pipes such that read() does not block forever # if all other processes have terminated. os.close(from_parent) os.close(to_parent) container.setup_user_mapping(child_pid, uid=self._uid, gid=self._gid) # signal child to continue os.write(to_grandchild, MARKER_USER_MAPPING_COMPLETED) try: # read at most 10 bytes because this is enough for 32bit int grandchild_pid = int(os.read(from_grandchild, 10)) except ValueError: # probably empty read, i.e., pipe closed, # i.e., child or grandchild failed check_child_exit_code() assert False, ( "Child process of RunExecutor terminated cleanly" " but did not send expected data.") logging.debug( "Parent: executing %s in grand child with PID %d" " via child with PID %d.", args[0], grandchild_pid, child_pid, ) # start measurements cgroups.add_task(grandchild_pid) parent_setup = parent_setup_fn() # Signal grandchild that setup is finished os.write(to_grandchild, MARKER_PARENT_COMPLETED) # Copy file descriptor, otherwise we could not close from_grandchild in # finally block and would leak a file descriptor in case of exception. from_grandchild_copy = os.dup(from_grandchild) to_grandchild_copy = os.dup(to_grandchild) finally: os.close(from_grandchild) os.close(to_grandchild) def wait_for_grandchild(): # 1024 bytes ought to be enough for everyone^Wour pickled result try: received = os.read(from_grandchild_copy, 1024) except OSError as e: if self.PROCESS_KILLED and e.errno == errno.EINTR: # Read was interrupted because of Ctrl+C, we just try again received = os.read(from_grandchild_copy, 1024) else: raise e if not received: # Typically this means the child exited prematurely because an error # occurred, and check_child_exitcode() will handle this. # We close the pipe first, otherwise child could hang infinitely. os.close(from_grandchild_copy) os.close(to_grandchild_copy) check_child_exit_code() assert False, "Child process terminated cleanly without sending result" exitcode, ru_child = pickle.loads(received) base_path = "/proc/{}/root".format(child_pid) parent_cleanup = parent_cleanup_fn( parent_setup, util.ProcessExitCode.from_raw(exitcode), base_path) if result_files_patterns: # As long as the child process exists # we can access the container file system here self._transfer_output_files(base_path + temp_dir, cwd, output_dir, result_files_patterns) os.close(from_grandchild_copy) os.write(to_grandchild_copy, MARKER_PARENT_POST_RUN_COMPLETED) os.close(to_grandchild_copy) # signal child that it can terminate check_child_exit_code() return exitcode, ru_child, parent_cleanup return grandchild_pid, wait_for_grandchild
def _start_execution_in_container( self, args, stdin, stdout, stderr, env, cwd, temp_dir, cgroups, output_dir, result_files_patterns, parent_setup_fn, child_setup_fn, parent_cleanup_fn): """Execute the given command and measure its resource usage similarly to super()._start_execution(), but inside a container implemented using Linux namespaces. The command has no network access (only loopback), a fresh directory as /tmp and no write access outside of this, and it does not see other processes except itself. """ assert self._use_namespaces args = self._build_cmdline(args, env=env) # We have three processes involved: # parent: the current Python process in which RunExecutor is executing # child: child process in new namespace (PID 1 in inner namespace), # configures inner namespace, serves as dummy init, # collects result of grandchild and passes it to parent # grandchild: child of child process (PID 2 in inner namespace), exec()s tool # We need the following communication steps between these proceses: # 1a) grandchild tells parent its PID (in outer namespace). # 1b) grandchild tells parent that it is ready and measurement should begin. # 2) parent tells grandchild that measurement has begun and tool should # be exec()ed. # 3) child tells parent about return value and resource consumption of grandchild. # 1a and 1b are done together by sending the PID through a pipe. # 2 is done by sending a null byte through a pipe. # 3 is done by sending a pickled object through the same pipe as #2. # We cannot use the same pipe for both directions, because otherwise a sender might # read the bytes it has sent itself. from_parent, to_grandchild = os.pipe() # "downstream" pipe parent->grandchild from_grandchild, to_parent = os.pipe() # "upstream" pipe grandchild/child->parent # If the current directory is within one of the bind mounts we create, # we need to cd into this directory again, otherwise we would not see the bind mount, # but the directory behind it. Thus we always set cwd to force a change of directory. cwd = os.path.abspath(cwd or os.curdir) def grandchild(): """Setup everything inside the process that finally exec()s the tool.""" try: # We know that this process has PID 2 in the inner namespace, # but we actually need to know its PID in the outer namespace # such that parent can put us into the correct cgroups. # According to http://man7.org/linux/man-pages/man7/pid_namespaces.7.html, # there are two ways to achieve this: sending a message with the PID # via a socket (but Python < 3.3 lacks a convenient API for sendmsg), # and reading /proc/self in the outer procfs instance (that's what we do). my_outer_pid = container.get_my_pid_from_procfs() container.mount_proc() container.drop_capabilities() child_setup_fn() # Do some other setup the caller wants. # Signal readiness to parent by sending our PID and wait until parent is also ready os.write(to_parent, str(my_outer_pid).encode()) received = os.read(from_parent, 1) assert received == b'\0', received finally: # close remaining ends of pipe os.close(from_parent) os.close(to_parent) # here Python will exec() the tool for us def child(): """Setup everything inside the container, start the tool, and wait for result.""" try: logging.debug("Child: child process of RunExecutor with PID %d started", container.get_my_pid_from_procfs()) # We want to avoid leaking file descriptors to the executed child. # It is also nice if the child has only the minimal necessary file descriptors, # to avoid keeping other pipes and files open, e.g., those that the parent # uses to communicate with other containers (if containers are started in parallel). # Thus we do not use the close_fds feature of subprocess.Popen, # but do the same here manually. # We keep the relevant ends of our pipes, and stdin/out/err of child and grandchild. necessary_fds = {sys.stdin, sys.stdout, sys.stderr, to_parent, from_parent, stdin, stdout, stderr} - {None} container.close_open_fds(keep_files=necessary_fds) try: if not self._allow_network: container.activate_network_interface("lo") self._setup_container_filesystem(temp_dir) except EnvironmentError as e: logging.critical("Failed to configure container: %s", e) return int(e.errno) try: os.chdir(cwd) except EnvironmentError as e: logging.critical( "Cannot change into working directory inside container: %s", e) return int(e.errno) try: grandchild_proc = subprocess.Popen(args, stdin=stdin, stdout=stdout, stderr=stderr, env=env, close_fds=False, preexec_fn=grandchild) except (EnvironmentError, RuntimeError) as e: logging.critical("Cannot start process: %s", e) try: return int(e.errno) except BaseException: # subprocess.Popen in Python 2.7 throws OSError with errno=None # if the preexec_fn fails. return -2 container.drop_capabilities() # Set up signal handlers to forward signals to grandchild # (because we are PID 1, there is a special signal handling otherwise). # cf. dumb-init project: https://github.com/Yelp/dumb-init container.forward_all_signals(grandchild_proc.pid, args[0]) # Close other fds that were still necessary above. container.close_open_fds(keep_files={sys.stdout, sys.stderr, to_parent}) # wait for grandchild and return its result grandchild_result = self._wait_for_process(grandchild_proc.pid, args[0]) logging.debug("Child: process %s terminated with exit code %d.", args[0], grandchild_result[0]) os.write(to_parent, pickle.dumps(grandchild_result)) os.close(to_parent) return 0 except EnvironmentError as e: logging.exception("Error in child process of RunExecutor") return int(e.errno) except: # Need to catch everything because this method always needs to return a int # (we are inside a C callback that requires returning int). logging.exception("Error in child process of RunExecutor") return -1 try: # parent try: child_pid = container.execute_in_namespace(child, use_network_ns=not self._allow_network) except OSError as e: raise BenchExecException( "Creating namespace for container mode failed: " + os.strerror(e.errno)) logging.debug("Parent: child process of RunExecutor with PID %d started.", child_pid) def check_child_exit_code(): """Check if the child process terminated cleanly and raise an error otherwise.""" child_exitcode, unused_child_rusage = self._wait_for_process(child_pid, args[0]) child_exitcode = util.ProcessExitCode.from_raw(child_exitcode) logging.debug("Parent: child process of RunExecutor with PID %d terminated with %s.", child_pid, child_exitcode) if child_exitcode: if child_exitcode.value and child_exitcode.value <= 128: # This was an OSError in the child, re-create it raise OSError(child_exitcode.value, os.strerror(child_exitcode.value)) raise OSError(0, "Child process of RunExecutor terminated with " + str(child_exitcode)) # Close unnecessary ends of pipes such that read() does not block forever # if all other processes have terminated. os.close(from_parent) os.close(to_parent) container.setup_user_mapping(child_pid, uid=self._uid, gid=self._gid) try: grandchild_pid = int(os.read(from_grandchild, 10)) # 10 bytes is enough for 32bit int except ValueError: # probably empty read, i.e., pipe closed, i.e., child or grandchild failed check_child_exit_code() assert False, "Child process of RunExecutor terminated cleanly but did not send expected data." logging.debug("Parent: executing %s in grand child with PID %d via child with PID %d.", args[0], grandchild_pid, child_pid) # start measurements cgroups.add_task(grandchild_pid) parent_setup = parent_setup_fn() # Signal grandchild that setup is finished os.write(to_grandchild, b'\0') # Copy file descriptor, otherwise we could not close from_grandchild in finally block # and would leak a file descriptor in case of exception. from_grandchild_copy = os.dup(from_grandchild) finally: os.close(from_grandchild) os.close(to_grandchild) def wait_for_grandchild(): # 1024 bytes ought to be enough for everyone^Wour pickled result try: received = os.read(from_grandchild_copy, 1024) except OSError as e: if self.PROCESS_KILLED and e.errno == errno.EINTR: # Read was interrupted because of Ctrl+C, we just try again received = os.read(from_grandchild_copy, 1024) else: raise e parent_cleanup = parent_cleanup_fn(parent_setup) os.close(from_grandchild_copy) check_child_exit_code() if result_files_patterns: self._transfer_output_files(temp_dir, cwd, output_dir, result_files_patterns) exitcode, ru_child = pickle.loads(received) return exitcode, ru_child, parent_cleanup return grandchild_pid, wait_for_grandchild
def _start_execution_in_container(self, args, stdin, stdout, stderr, env, root_dir, cwd, temp_dir, cgroups, output_dir, result_files_patterns, parent_setup_fn, child_setup_fn, parent_cleanup_fn): """Execute the given command and measure its resource usage similarly to super()._start_execution(), but inside a container implemented using Linux namespaces. The command has no network access (only loopback), a fresh directory as /tmp and no write access outside of this, and it does not see other processes except itself. """ assert self._use_namespaces env.update(self._env_override) args = self._build_cmdline(args, env=env) # We have three processes involved: # parent: the current Python process in which RunExecutor is executing # child: child process in new namespace (PID 1 in inner namespace), # configures inner namespace, serves as dummy init, # collects result of grandchild and passes it to parent # grandchild: child of child process (PID 2 in inner namespace), exec()s tool # We need the following communication steps between these proceses: # 1a) grandchild tells parent its PID (in outer namespace). # 1b) grandchild tells parent that it is ready and measurement should begin. # 2) parent tells grandchild that measurement has begun and tool should # be exec()ed. # 3) child tells parent about return value and resource consumption of grandchild. # 1a and 1b are done together by sending the PID through a pipe. # 2 is done by sending a null byte through a pipe. # 3 is done by sending a pickled object through the same pipe as #2. # We cannot use the same pipe for both directions, because otherwise a sender might # read the bytes it has sent itself. # Error codes from child to parent CHILD_OSERROR = 128 CHILD_UNKNOWN_ERROR = 129 from_parent, to_grandchild = os.pipe( ) # "downstream" pipe parent->grandchild from_grandchild, to_parent = os.pipe( ) # "upstream" pipe grandchild/child->parent # If the current directory is within one of the bind mounts we create, # we need to cd into this directory again, otherwise we would not see the bind mount, # but the directory behind it. Thus we always set cwd to force a change of directory. if root_dir is None: cwd = os.path.abspath(cwd or os.curdir) else: root_dir = os.path.abspath(root_dir) cwd = os.path.abspath(cwd) def grandchild(): """Setup everything inside the process that finally exec()s the tool.""" try: # We know that this process has PID 2 in the inner namespace, # but we actually need to know its PID in the outer namespace # such that parent can put us into the correct cgroups. # According to http://man7.org/linux/man-pages/man7/pid_namespaces.7.html, # there are two ways to achieve this: sending a message with the PID # via a socket (but Python < 3.3 lacks a convenient API for sendmsg), # and reading /proc/self in the outer procfs instance (that's what we do). my_outer_pid = container.get_my_pid_from_procfs() container.mount_proc() container.drop_capabilities() container.reset_signal_handling() child_setup_fn() # Do some other setup the caller wants. # Signal readiness to parent by sending our PID and wait until parent is also ready os.write(to_parent, str(my_outer_pid).encode()) received = os.read(from_parent, 1) assert received == b'\0', received finally: # close remaining ends of pipe os.close(from_parent) os.close(to_parent) # here Python will exec() the tool for us def child(): """Setup everything inside the container, start the tool, and wait for result.""" try: logging.debug( "Child: child process of RunExecutor with PID %d started", container.get_my_pid_from_procfs()) # Put all received signals on hold until we handle them later. container.block_all_signals() # We want to avoid leaking file descriptors to the executed child. # It is also nice if the child has only the minimal necessary file descriptors, # to avoid keeping other pipes and files open, e.g., those that the parent # uses to communicate with other containers (if containers are started in parallel). # Thus we do not use the close_fds feature of subprocess.Popen, # but do the same here manually. # We keep the relevant ends of our pipes, and stdin/out/err of child and grandchild. necessary_fds = { sys.stdin, sys.stdout, sys.stderr, to_parent, from_parent, stdin, stdout, stderr } - {None} container.close_open_fds(keep_files=necessary_fds) try: if not self._allow_network: container.activate_network_interface("lo") if root_dir is not None: self._setup_root_filesystem(root_dir) else: self._setup_container_filesystem(temp_dir) except EnvironmentError as e: logging.critical("Failed to configure container: %s", e) return CHILD_OSERROR try: os.chdir(cwd) except EnvironmentError as e: logging.critical( "Cannot change into working directory inside container: %s", e) return CHILD_OSERROR try: grandchild_proc = subprocess.Popen(args, stdin=stdin, stdout=stdout, stderr=stderr, env=env, close_fds=False, preexec_fn=grandchild) except (EnvironmentError, RuntimeError) as e: logging.critical("Cannot start process: %s", e) return CHILD_OSERROR container.drop_capabilities() # Close other fds that were still necessary above. container.close_open_fds( keep_files={sys.stdout, sys.stderr, to_parent}) # Set up signal handlers to forward signals to grandchild # (because we are PID 1, there is a special signal handling otherwise). # cf. dumb-init project: https://github.com/Yelp/dumb-init # Also wait for grandchild and return its result. if _HAS_SIGWAIT: grandchild_result = container.wait_for_child_and_forward_all_signals( grandchild_proc.pid, args[0]) else: container.forward_all_signals_async( grandchild_proc.pid, args[0]) grandchild_result = self._wait_for_process( grandchild_proc.pid, args[0]) logging.debug( "Child: process %s terminated with exit code %d.", args[0], grandchild_result[0]) os.write(to_parent, pickle.dumps(grandchild_result)) os.close(to_parent) return 0 except EnvironmentError as e: logging.exception("Error in child process of RunExecutor") return CHILD_OSERROR except: # Need to catch everything because this method always needs to return a int # (we are inside a C callback that requires returning int). logging.exception("Error in child process of RunExecutor") return CHILD_UNKNOWN_ERROR try: # parent try: child_pid = container.execute_in_namespace( child, use_network_ns=not self._allow_network) except OSError as e: raise BenchExecException( "Creating namespace for container mode failed: " + os.strerror(e.errno)) logging.debug( "Parent: child process of RunExecutor with PID %d started.", child_pid) def check_child_exit_code(): """Check if the child process terminated cleanly and raise an error otherwise.""" child_exitcode, unused_child_rusage = self._wait_for_process( child_pid, args[0]) child_exitcode = util.ProcessExitCode.from_raw(child_exitcode) logging.debug( "Parent: child process of RunExecutor with PID %d terminated with %s.", child_pid, child_exitcode) if child_exitcode: if child_exitcode.value: if child_exitcode.value == CHILD_OSERROR: # This was an OSError in the child, details were already logged raise BenchExecException( "execution in container failed, check log for details" ) elif child_exitcode.value == CHILD_UNKNOWN_ERROR: raise BenchExecException( "unexpected error in container") raise OSError(child_exitcode.value, os.strerror(child_exitcode.value)) raise OSError( 0, "Child process of RunExecutor terminated with " + str(child_exitcode)) # Close unnecessary ends of pipes such that read() does not block forever # if all other processes have terminated. os.close(from_parent) os.close(to_parent) container.setup_user_mapping(child_pid, uid=self._uid, gid=self._gid) try: grandchild_pid = int(os.read( from_grandchild, 10)) # 10 bytes is enough for 32bit int except ValueError: # probably empty read, i.e., pipe closed, i.e., child or grandchild failed check_child_exit_code() assert False, "Child process of RunExecutor terminated cleanly but did not send expected data." logging.debug( "Parent: executing %s in grand child with PID %d via child with PID %d.", args[0], grandchild_pid, child_pid) # start measurements cgroups.add_task(grandchild_pid) parent_setup = parent_setup_fn() # Signal grandchild that setup is finished os.write(to_grandchild, b'\0') # Copy file descriptor, otherwise we could not close from_grandchild in finally block # and would leak a file descriptor in case of exception. from_grandchild_copy = os.dup(from_grandchild) finally: os.close(from_grandchild) os.close(to_grandchild) def wait_for_grandchild(): # 1024 bytes ought to be enough for everyone^Wour pickled result try: received = os.read(from_grandchild_copy, 1024) except OSError as e: if self.PROCESS_KILLED and e.errno == errno.EINTR: # Read was interrupted because of Ctrl+C, we just try again received = os.read(from_grandchild_copy, 1024) else: raise e parent_cleanup = parent_cleanup_fn(parent_setup) os.close(from_grandchild_copy) check_child_exit_code() if result_files_patterns: self._transfer_output_files(temp_dir, cwd, output_dir, result_files_patterns) exitcode, ru_child = pickle.loads(received) return exitcode, ru_child, parent_cleanup return grandchild_pid, wait_for_grandchild