def _setup_root_filesystem(self, root_dir):
        """Setup the filesystem layout in the given root directory.
        Create a copy of the existing proc- and dev-mountpoints in the specified root
        directory. Afterwards we chroot into it.

        @param root_dir:
            The path of the root directory that is used to execute the process.
        """
        root_dir = root_dir.encode()

        # Create an empty proc folder into the root dir. The grandchild still needs a
        # view of the old /proc, therefore we do not mount a fresh /proc here.
        proc_base = os.path.join(root_dir, b"proc")
        os.makedirs(proc_base, exist_ok=True)

        dev_base = os.path.join(root_dir, b"dev")
        os.makedirs(dev_base, exist_ok=True)

        # Create a copy of the host's dev- and proc-mountpoints.
        # They are marked as private in order to not being changed
        # by existing mounts during run execution.
        container.make_bind_mount(b"/dev/",
                                  dev_base,
                                  recursive=True,
                                  private=True)
        container.make_bind_mount(b"/proc/",
                                  proc_base,
                                  recursive=True,
                                  private=True)

        os.chroot(root_dir)
示例#2
0
 def make_tmpfs_dir(path):
     """Ensure that a tmpfs is mounted on path, if the path exists"""
     if path in dir_modes:
         return  # explicitly configured by user
     mount_tmpfs = mount_base + path
     if os.path.isdir(mount_tmpfs):
         temp_tmpfs = temp_base + path
         util.makedirs(temp_tmpfs, exist_ok=True)
         container.make_bind_mount(temp_tmpfs, mount_tmpfs)
示例#3
0
 def make_tmpfs_dir(path):
     """Ensure that a tmpfs is mounted on path, if the path exists"""
     if path in self._dir_modes:
         return # explicitly configured by user
     mount_tmpfs = mount_base + path
     temp_tmpfs = temp_base + path
     util.makedirs(temp_tmpfs, exist_ok=True)
     if os.path.isdir(mount_tmpfs):
         # If we already have a tmpfs, we can just bind mount it, otherwise we need one
         if self._container_tmpfs:
             container.make_bind_mount(temp_tmpfs, mount_tmpfs)
         else:
             libc.mount(None, mount_tmpfs, b"tmpfs", 0, tmpfs_opts)
    def _setup_container_filesystem(self, temp_dir, output_dir, memlimit,
                                    memory_nodes):
        """Setup the filesystem layout in the container.
        As first step, we create a copy of all existing mountpoints in mount_base,
        recursively, and as "private" mounts
        (i.e., changes to existing mountpoints afterwards won't propagate to our copy).
        Then we iterate over all mountpoints and change them according to the mode
        the user has specified (hidden, read-only, overlay, or full-access).
        This has do be done for each mountpoint because overlays are not recursive.
        Then we chroot into the new mount hierarchy.

        The new filesystem layout still has a view of the host's /proc. We do not mount
        a fresh /proc here because the grandchild still needs the old /proc.

        We do simply iterate over all existing mount points and set them to
        read-only/overlay them, because it is easier to create a new hierarchy and
        chroot into it. First, we still have access to the original mountpoints while
        doing so, and second, we avoid race conditions if someone else changes the
        existing mountpoints.

        @param temp_dir:
            The base directory under which all our directories should be created.
        """
        # All strings here are bytes to avoid issues
        # if existing mountpoints are invalid UTF-8.

        # directory with files created by tool
        temp_base = self._get_result_files_base(temp_dir).encode()
        temp_dir = temp_dir.encode()

        tmpfs_opts = ["size=" + str(memlimit or "100%")]
        if memory_nodes:
            tmpfs_opts.append("mpol=bind:" + ",".join(map(str, memory_nodes)))
        tmpfs_opts = (",".join(tmpfs_opts)).encode()
        if self._container_tmpfs:
            libc.mount(None, temp_dir, b"tmpfs", 0, tmpfs_opts)

        mount_base = os.path.join(temp_dir,
                                  b"mount")  # base dir for container mounts
        os.mkdir(mount_base)
        os.mkdir(temp_base)

        # Overlayfs needs its own additional temporary directory ("work" directory).
        # temp_base will be the "upper" layer, the host FS the "lower" layer,
        # and mount_base the mount target.
        work_base = os.path.join(temp_dir, b"overlayfs")
        os.mkdir(work_base)

        # Copy all mounts to mount_base and apply directory modes
        container.duplicate_mount_hierarchy(mount_base, temp_base, work_base,
                                            self._dir_modes)

        # Now configure some special hard-coded cases

        def make_tmpfs_dir(path):
            """Ensure that a tmpfs is mounted on path, if the path exists"""
            if path in self._dir_modes:
                return  # explicitly configured by user
            mount_tmpfs = mount_base + path
            temp_tmpfs = temp_base + path
            os.makedirs(temp_tmpfs, exist_ok=True)
            if os.path.isdir(mount_tmpfs):
                # If we already have a tmpfs, we can just bind mount it,
                # otherwise we need one
                if self._container_tmpfs:
                    container.make_bind_mount(temp_tmpfs, mount_tmpfs)
                else:
                    libc.mount(None, mount_tmpfs, b"tmpfs", 0, tmpfs_opts)

        # The following directories should be writable RAM disks
        # for Posix shared memory. For example, the Python multiprocessing module
        # explicitly checks for a tmpfs instance.
        make_tmpfs_dir(b"/dev/shm")
        make_tmpfs_dir(b"/run/shm")

        if self._container_system_config:
            container.setup_container_system_config(temp_base, mount_base,
                                                    self._dir_modes)

        if output_dir:
            # We need a way to see temp_base in the container in order to be able to
            # copy result files out of it, so we need a directory that is guaranteed to
            # exist in order to use it as mountpoint for a bind mount to temp_base.
            # Of course, the tool inside the container should not have access to
            # temp_base, so we will add another bind mount with an empty directory on
            # top (equivalent to --hidden-dir). After the tool terminates we can unmount
            # the top-level bind mount and then access temp_base. However, this works
            # only if there is no other mount point below that directory, and the user
            # can force us to create mount points at arbitrary directory if a directory
            # mode is specified. So we need an existing directory with no mount points
            # below, and luckily temp_dir fulfills all requirements (because we have
            # just created it as fresh drectory ourselves).
            # So we mount temp_base outside of the container to temp_dir inside.
            os.makedirs(mount_base + temp_dir, exist_ok=True)
            container.make_bind_mount(temp_base,
                                      mount_base + temp_dir,
                                      read_only=True)
            # And the following if branch will automatically hide the bind
            # mount below an empty directory.

        # If necessary, (i.e., if /tmp is not already hidden),
        # hide the directory where we store our files from processes in the container
        # by mounting an empty directory over it.
        if os.path.exists(mount_base + temp_dir):
            os.makedirs(temp_base + temp_dir, exist_ok=True)
            container.make_bind_mount(temp_base + temp_dir,
                                      mount_base + temp_dir)

        # Now we make mount_base the new root directory.
        container.chroot(mount_base)
    def _setup_container_filesystem(self, temp_dir):
        """Setup the filesystem layout in the container.
         As first step, we create a copy of all existing mountpoints in mount_base, recursively,
        and as "private" mounts (i.e., changes to existing mountpoints afterwards won't propagate
        to our copy).
        Then we iterate over all mountpoints and change them
        according to the mode the user has specified (hidden, read-only, overlay, or full-access).
        This has do be done for each mountpoint because overlays are not recursive.
        Then we chroot into the new mount hierarchy.

        The new filesystem layout still has a view of the host's /proc.
        We do not mount a fresh /proc here because the grandchild still needs old the /proc.

        We do simply iterate over all existing mount points and set them to read-only/overlay them,
        because it is easier create a new hierarchy and chroot into it.
        First, we still have access to the original mountpoints while doing so,
        and second, we avoid race conditions if someone else changes the existing mountpoints.

        @param temp_dir: The base directory under which all our directories should be created.
        """
        # All strings here are bytes to avoid issues if existing mountpoints are invalid UTF-8.
        temp_dir = temp_dir.encode()
        mount_base = os.path.join(temp_dir, b"mount") # base dir for container mounts
        temp_base = os.path.join(temp_dir, b"temp") # directory with files created by tool
        os.mkdir(mount_base)
        os.mkdir(temp_base)

        def _is_below(path, target_path):
            # compare with trailing slashes for cases like /foo and /foobar
            path = os.path.join(path, b"")
            target_path = os.path.join(target_path, b"")
            return path.startswith(target_path)

        def find_mode_for_dir(path, fstype):
            if (path == b"/proc"):
                # /proc is necessary for the grandchild to read PID, will be replaced later.
                return DIR_READ_ONLY
            if _is_below(path, b"/proc"):
                # Irrelevant.
                return None

            parent_mode = None
            result_mode = None
            for special_dir, mode in self._dir_modes.items():
                if _is_below(path, special_dir):
                    if path != special_dir:
                        parent_mode = mode
                    result_mode = mode
            assert result_mode is not None

            if result_mode == DIR_OVERLAY and (
                    _is_below(path, b"/dev") or
                    _is_below(path, b"/sys") or
                    fstype == b"autofs" or
                    fstype == b"cgroup"):
                # Import /dev, /sys, cgroup, and autofs from host into the container,
                # overlay does not work for them.
                return DIR_READ_ONLY

            if result_mode == DIR_HIDDEN and parent_mode == DIR_HIDDEN:
                # No need to recursively recreate mountpoints in hidden dirs.
                return None
            return result_mode

        # Overlayfs needs its own additional temporary directory ("work" directory).
        # temp_base will be the "upper" layer, the host FS the "lower" layer,
        # and mount_base the mount target.
        work_base = os.path.join(temp_dir, b"overlayfs")
        os.mkdir(work_base)

        if self._container_system_config:
            container.setup_container_system_config(temp_base)

        # Create a copy of host's mountpoints.
        container.make_bind_mount(b"/", mount_base, recursive=True, private=True)

        # Ensure each special dir is a mountpoint such that the next loop covers it.
        for special_dir in self._dir_modes.keys():
            mount_path = mount_base + special_dir
            temp_path = temp_base + special_dir
            try:
                container.make_bind_mount(mount_path, mount_path)
            except OSError as e:
                logging.debug("Failed to make %s a bind mount: %s", mount_path, e)
            if not os.path.exists(temp_path):
                os.makedirs(temp_path)

        # Set desired access mode for each mountpoint.
        for unused_source, full_mountpoint, fstype, options in list(container.get_mount_points()):
            if not _is_below(full_mountpoint, mount_base):
                continue
            mountpoint = full_mountpoint[len(mount_base):] or b"/"

            mount_path = mount_base + mountpoint
            temp_path = temp_base + mountpoint
            work_path = work_base + mountpoint

            mode = find_mode_for_dir(mountpoint, fstype)
            if mode == DIR_OVERLAY:
                if not os.path.exists(temp_path):
                    os.makedirs(temp_path)
                if not os.path.exists(work_path):
                    os.makedirs(work_path)
                try:
                    # Previous mount in this place not needed if replaced with overlay dir.
                    libc.umount(mount_path)
                except OSError as e:
                    logging.debug(e)
                try:
                    container.make_overlay_mount(mount_path, mountpoint, temp_path, work_path)
                except OSError as e:
                    raise OSError(e.errno,
                        "Creating overlay mount for '{}' failed: {}. "
                        "Please use other directory modes."
                            .format(mountpoint.decode(), os.strerror(e.errno)))

            elif mode == DIR_HIDDEN:
                if not os.path.exists(temp_path):
                    os.makedirs(temp_path)
                try:
                    # Previous mount in this place not needed if replaced with hidden dir.
                    libc.umount(mount_path)
                except OSError as e:
                    logging.debug(e)
                container.make_bind_mount(temp_path, mount_path)

            elif mode == DIR_READ_ONLY:
                try:
                    container.remount_with_additional_flags(mount_path, options, libc.MS_RDONLY)
                except OSError as e:
                    if e.errno == errno.EACCES:
                        logging.warning(
                            "Cannot mount '%s', directory may be missing from container.",
                            mountpoint.decode())
                    else:
                        # If this mountpoint is below an overlay/hidden dir re-create mountpoint.
                        # Linux does not support making read-only bind mounts in one step:
                        # https://lwn.net/Articles/281157/ http://man7.org/linux/man-pages/man8/mount.8.html
                        container.make_bind_mount(
                            mountpoint, mount_path, recursive=True, private=True)
                        container.remount_with_additional_flags(mount_path, options, libc.MS_RDONLY)

            elif mode == DIR_FULL_ACCESS:
                try:
                    # Ensure directory is still a mountpoint by attempting to remount.
                    container.remount_with_additional_flags(mount_path, options, 0)
                except OSError as e:
                    if e.errno == errno.EACCES:
                        logging.warning(
                            "Cannot mount '%s', directory may be missing from container.",
                            mountpoint.decode())
                    else:
                        # If this mountpoint is below an overlay/hidden dir re-create mountpoint.
                        container.make_bind_mount(
                            mountpoint, mount_path, recursive=True, private=True)

            elif mode is None:
                pass

            else:
                assert False

        # If necessary, (i.e., if /tmp is not already hidden),
        # hide the directory where we store our files from processes in the container
        # by mounting an empty directory over it.
        if os.path.exists(mount_base + temp_dir):
            os.makedirs(temp_base + temp_dir)
            container.make_bind_mount(temp_base + temp_dir, mount_base + temp_dir)

        os.chroot(mount_base)
示例#6
0
    def _setup_container_filesystem(self, temp_dir):
        """Setup the filesystem layout in the container.
         As first step, we create a copy of all existing mountpoints in mount_base, recursively,
        and as "private" mounts (i.e., changes to existing mountpoints afterwards won't propagate
        to our copy).
        Then we iterate over all mountpoints and change them
        according to the mode the user has specified (hidden, read-only, overlay, or full-access).
        This has do be done for each mountpoint because overlays are not recursive.
        Then we chroot into the new mount hierarchy.

        The new filesystem layout still has a view of the host's /proc.
        We do not mount a fresh /proc here because the grandchild still needs the old /proc.

        We do simply iterate over all existing mount points and set them to read-only/overlay them,
        because it is easier to create a new hierarchy and chroot into it.
        First, we still have access to the original mountpoints while doing so,
        and second, we avoid race conditions if someone else changes the existing mountpoints.

        @param temp_dir: The base directory under which all our directories should be created.
        """
        # All strings here are bytes to avoid issues if existing mountpoints are invalid UTF-8.
        temp_base = self._get_result_files_base(
            temp_dir).encode()  # directory with files created by tool
        temp_dir = temp_dir.encode()
        mount_base = os.path.join(temp_dir,
                                  b"mount")  # base dir for container mounts
        os.mkdir(mount_base)
        os.mkdir(temp_base)

        def _is_below(path, target_path):
            # compare with trailing slashes for cases like /foo and /foobar
            path = os.path.join(path, b"")
            target_path = os.path.join(target_path, b"")
            return path.startswith(target_path)

        def find_mode_for_dir(path, fstype):
            if (path == b"/proc"):
                # /proc is necessary for the grandchild to read PID, will be replaced later.
                return DIR_READ_ONLY
            if _is_below(path, b"/proc"):
                # Irrelevant.
                return None

            parent_mode = None
            result_mode = None
            for special_dir, mode in self._dir_modes.items():
                if _is_below(path, special_dir):
                    if path != special_dir:
                        parent_mode = mode
                    result_mode = mode
            assert result_mode is not None

            if result_mode == DIR_OVERLAY and (_is_below(path, b"/dev")
                                               or _is_below(path, b"/sys")
                                               or fstype == b"cgroup"):
                # Overlay does not make sense for /dev, /sys, and all cgroups.
                return DIR_READ_ONLY

            if result_mode == DIR_OVERLAY and (fstype == b"autofs"
                                               or fstype == b"vfat"
                                               or fstype == b"ntfs"):
                # Overlayfs does not support these as underlying file systems.
                logging.debug(
                    "Cannot use overlay mode for %s because it has file system %s. "
                    "Using read-only mode instead.", path.decode(),
                    fstype.decode())
                return DIR_READ_ONLY

            if result_mode == DIR_HIDDEN and parent_mode == DIR_HIDDEN:
                # No need to recursively recreate mountpoints in hidden dirs.
                return None
            return result_mode

        # Overlayfs needs its own additional temporary directory ("work" directory).
        # temp_base will be the "upper" layer, the host FS the "lower" layer,
        # and mount_base the mount target.
        work_base = os.path.join(temp_dir, b"overlayfs")
        os.mkdir(work_base)

        if self._container_system_config:
            container.setup_container_system_config(temp_base)

        # Create a copy of host's mountpoints.
        # Setting MS_PRIVATE flag discouples our mount namespace from the hosts's,
        # i.e., mounts we do are not seen by the host, and any (un)mounts the host does afterward
        # are not seen by us. The latter is desired such that new mounts (e.g.,
        # USB sticks being plugged in) do not appear in the container.
        # Blocking host-side unmounts from being propagated has the disadvantage
        # that any unmounts done by the sysadmin won't really unmount the device
        # because it stays mounted in the container and thus keep the device busy
        # (cf. https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=739593#85).
        # We could allow unmounts being propated with MS_SLAVE instead of MS_PRIVATE,
        # but we prefer to have the mount namespace of the container being
        # unchanged during run execution.
        container.make_bind_mount(b"/",
                                  mount_base,
                                  recursive=True,
                                  private=True)

        # Ensure each special dir is a mountpoint such that the next loop covers it.
        for special_dir in self._dir_modes.keys():
            mount_path = mount_base + special_dir
            temp_path = temp_base + special_dir
            try:
                container.make_bind_mount(mount_path, mount_path)
            except OSError as e:
                # on btrfs, non-recursive bind mounts faitl
                if e.errno == errno.EINVAL:
                    try:
                        container.make_bind_mount(mount_path,
                                                  mount_path,
                                                  recursive=True)
                    except OSError as e2:
                        logging.debug(
                            "Failed to make %s a (recursive) bind mount: %s",
                            mount_path, e2)
                else:
                    logging.debug("Failed to make %s a bind mount: %s",
                                  mount_path, e)
            if not os.path.exists(temp_path):
                os.makedirs(temp_path)

        # Set desired access mode for each mountpoint.
        for unused_source, full_mountpoint, fstype, options in list(
                container.get_mount_points()):
            if not _is_below(full_mountpoint, mount_base):
                continue
            mountpoint = full_mountpoint[len(mount_base):] or b"/"

            mount_path = mount_base + mountpoint
            temp_path = temp_base + mountpoint
            work_path = work_base + mountpoint

            mode = find_mode_for_dir(mountpoint, fstype)
            if mode == DIR_OVERLAY:
                if not os.path.exists(temp_path):
                    os.makedirs(temp_path)
                if not os.path.exists(work_path):
                    os.makedirs(work_path)
                try:
                    # Previous mount in this place not needed if replaced with overlay dir.
                    libc.umount(mount_path)
                except OSError as e:
                    logging.debug(e)
                try:
                    container.make_overlay_mount(mount_path, mountpoint,
                                                 temp_path, work_path)
                except OSError as e:
                    raise OSError(
                        e.errno, "Creating overlay mount for '{}' failed: {}. "
                        "Please use other directory modes.".format(
                            mountpoint.decode(), os.strerror(e.errno)))

            elif mode == DIR_HIDDEN:
                if not os.path.exists(temp_path):
                    os.makedirs(temp_path)
                try:
                    # Previous mount in this place not needed if replaced with hidden dir.
                    libc.umount(mount_path)
                except OSError as e:
                    logging.debug(e)
                container.make_bind_mount(temp_path, mount_path)

            elif mode == DIR_READ_ONLY:
                try:
                    container.remount_with_additional_flags(
                        mount_path, options, libc.MS_RDONLY)
                except OSError as e:
                    if e.errno == errno.EACCES:
                        logging.warning(
                            "Cannot mount '%s', directory may be missing from container.",
                            mountpoint.decode())
                    else:
                        # If this mountpoint is below an overlay/hidden dir re-create mountpoint.
                        # Linux does not support making read-only bind mounts in one step:
                        # https://lwn.net/Articles/281157/ http://man7.org/linux/man-pages/man8/mount.8.html
                        container.make_bind_mount(mountpoint,
                                                  mount_path,
                                                  recursive=True,
                                                  private=True)
                        container.remount_with_additional_flags(
                            mount_path, options, libc.MS_RDONLY)

            elif mode == DIR_FULL_ACCESS:
                try:
                    # Ensure directory is still a mountpoint by attempting to remount.
                    container.remount_with_additional_flags(
                        mount_path, options, 0)
                except OSError as e:
                    if e.errno == errno.EACCES:
                        logging.warning(
                            "Cannot mount '%s', directory may be missing from container.",
                            mountpoint.decode())
                    else:
                        # If this mountpoint is below an overlay/hidden dir re-create mountpoint.
                        container.make_bind_mount(mountpoint,
                                                  mount_path,
                                                  recursive=True,
                                                  private=True)

            elif mode is None:
                pass

            else:
                assert False

        # If necessary, (i.e., if /tmp is not already hidden),
        # hide the directory where we store our files from processes in the container
        # by mounting an empty directory over it.
        if os.path.exists(mount_base + temp_dir):
            os.makedirs(temp_base + temp_dir)
            container.make_bind_mount(temp_base + temp_dir,
                                      mount_base + temp_dir)

        os.chroot(mount_base)
示例#7
0
    def _setup_container_filesystem(self, temp_dir, output_dir, memlimit, memory_nodes):
        """Setup the filesystem layout in the container.
         As first step, we create a copy of all existing mountpoints in mount_base, recursively,
        and as "private" mounts (i.e., changes to existing mountpoints afterwards won't propagate
        to our copy).
        Then we iterate over all mountpoints and change them
        according to the mode the user has specified (hidden, read-only, overlay, or full-access).
        This has do be done for each mountpoint because overlays are not recursive.
        Then we chroot into the new mount hierarchy.

        The new filesystem layout still has a view of the host's /proc.
        We do not mount a fresh /proc here because the grandchild still needs the old /proc.

        We do simply iterate over all existing mount points and set them to read-only/overlay them,
        because it is easier to create a new hierarchy and chroot into it.
        First, we still have access to the original mountpoints while doing so,
        and second, we avoid race conditions if someone else changes the existing mountpoints.

        @param temp_dir: The base directory under which all our directories should be created.
        """
        # All strings here are bytes to avoid issues if existing mountpoints are invalid UTF-8.
        temp_base = self._get_result_files_base(temp_dir).encode() # directory with files created by tool
        temp_dir = temp_dir.encode()

        tmpfs_opts = ["size=" + str(memlimit or "100%")]
        if memory_nodes:
            tmpfs_opts.append("mpol=bind:" + ",".join(map(str, memory_nodes)))
        tmpfs_opts = (",".join(tmpfs_opts)).encode()
        if self._container_tmpfs:
            libc.mount(None, temp_dir, b"tmpfs", 0, tmpfs_opts)

        mount_base = os.path.join(temp_dir, b"mount") # base dir for container mounts
        os.mkdir(mount_base)
        os.mkdir(temp_base)

        def _is_below(path, target_path):
            # compare with trailing slashes for cases like /foo and /foobar
            path = os.path.join(path, b"")
            target_path = os.path.join(target_path, b"")
            return path.startswith(target_path)

        def find_mode_for_dir(path, fstype=None):
            if (path == b"/proc"):
                # /proc is necessary for the grandchild to read PID, will be replaced later.
                return DIR_READ_ONLY
            if _is_below(path, b"/proc"):
                # Irrelevant.
                return None

            parent_mode = None
            result_mode = None
            for special_dir, mode in self._dir_modes.items():
                if _is_below(path, special_dir):
                    if path != special_dir:
                        parent_mode = mode
                    result_mode = mode
            assert result_mode is not None

            if result_mode == DIR_OVERLAY and (
                    _is_below(path, b"/dev") or
                    _is_below(path, b"/sys") or
                    fstype == b"cgroup"):
                # Overlay does not make sense for /dev, /sys, and all cgroups.
                return DIR_READ_ONLY

            if result_mode == DIR_OVERLAY and (
                    fstype == b"autofs" or
                    fstype == b"vfat" or
                    fstype == b"ntfs"):
                # Overlayfs does not support these as underlying file systems.
                logging.debug("Cannot use overlay mode for %s because it has file system %s. "
                              "Using read-only mode instead.",
                              path.decode(), fstype.decode())
                return DIR_READ_ONLY

            if result_mode == DIR_HIDDEN and parent_mode == DIR_HIDDEN:
                # No need to recursively recreate mountpoints in hidden dirs.
                return None
            return result_mode

        # Overlayfs needs its own additional temporary directory ("work" directory).
        # temp_base will be the "upper" layer, the host FS the "lower" layer,
        # and mount_base the mount target.
        work_base = os.path.join(temp_dir, b"overlayfs")
        os.mkdir(work_base)

        # Create a copy of host's mountpoints.
        # Setting MS_PRIVATE flag discouples our mount namespace from the hosts's,
        # i.e., mounts we do are not seen by the host, and any (un)mounts the host does afterward
        # are not seen by us. The latter is desired such that new mounts (e.g.,
        # USB sticks being plugged in) do not appear in the container.
        # Blocking host-side unmounts from being propagated has the disadvantage
        # that any unmounts done by the sysadmin won't really unmount the device
        # because it stays mounted in the container and thus keep the device busy
        # (cf. https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=739593#85).
        # We could allow unmounts being propated with MS_SLAVE instead of MS_PRIVATE,
        # but we prefer to have the mount namespace of the container being
        # unchanged during run execution.
        container.make_bind_mount(b"/", mount_base, recursive=True, private=True)

        # Ensure each special dir is a mountpoint such that the next loop covers it.
        for special_dir in self._dir_modes.keys():
            mount_path = mount_base + special_dir
            temp_path = temp_base + special_dir
            try:
                container.make_bind_mount(mount_path, mount_path)
            except OSError as e:
                # on btrfs, non-recursive bind mounts faitl
                if e.errno == errno.EINVAL:
                    try:
                        container.make_bind_mount(mount_path, mount_path, recursive=True)
                    except OSError as e2:
                        logging.debug("Failed to make %s a (recursive) bind mount: %s", mount_path, e2)
                else:
                    logging.debug("Failed to make %s a bind mount: %s", mount_path, e)
            if not os.path.exists(temp_path):
                os.makedirs(temp_path)

        # Set desired access mode for each mountpoint.
        for unused_source, full_mountpoint, fstype, options in list(container.get_mount_points()):
            if not _is_below(full_mountpoint, mount_base):
                continue
            mountpoint = full_mountpoint[len(mount_base):] or b"/"
            mode = find_mode_for_dir(mountpoint, fstype)
            if not mode:
                continue

            if not os.access(os.path.dirname(mountpoint), os.X_OK):
                # If parent is not accessible we cannot mount something on mountpoint.
                # We mark the inaccessible directory as hidden because otherwise the mountpoint
                # could become accessible (directly!) if the permissions on the parent
                # are relaxed during container execution.
                original_mountpoint = mountpoint
                parent = os.path.dirname(mountpoint)
                while not os.access(parent, os.X_OK):
                    mountpoint = parent
                    parent = os.path.dirname(mountpoint)
                mode = DIR_HIDDEN
                logging.debug(
                    "Marking inaccessible directory '%s' as hidden "
                    "because it contains a mountpoint at '%s'",
                    mountpoint.decode(), original_mountpoint.decode())
            else:
                logging.debug("Mounting '%s' as %s", mountpoint.decode(), mode)

            mount_path = mount_base + mountpoint
            temp_path = temp_base + mountpoint
            work_path = work_base + mountpoint

            if mode == DIR_OVERLAY:
                if not os.path.exists(temp_path):
                    os.makedirs(temp_path)
                if not os.path.exists(work_path):
                    os.makedirs(work_path)
                try:
                    # Previous mount in this place not needed if replaced with overlay dir.
                    libc.umount(mount_path)
                except OSError as e:
                    logging.debug(e)
                try:
                    container.make_overlay_mount(mount_path, mountpoint, temp_path, work_path)
                except OSError as e:
                    raise OSError(e.errno,
                        "Creating overlay mount for '{}' failed: {}. "
                        "Please use other directory modes."
                            .format(mountpoint.decode(), os.strerror(e.errno)))

            elif mode == DIR_HIDDEN:
                if not os.path.exists(temp_path):
                    os.makedirs(temp_path)
                try:
                    # Previous mount in this place not needed if replaced with hidden dir.
                    libc.umount(mount_path)
                except OSError as e:
                    logging.debug(e)
                container.make_bind_mount(temp_path, mount_path)

            elif mode == DIR_READ_ONLY:
                try:
                    container.remount_with_additional_flags(mount_path, options, libc.MS_RDONLY)
                except OSError as e:
                    if e.errno == errno.EACCES:
                        logging.warning(
                            "Cannot mount '%s', directory may be missing from container.",
                            mountpoint.decode())
                    else:
                        # If this mountpoint is below an overlay/hidden dir re-create mountpoint.
                        # Linux does not support making read-only bind mounts in one step:
                        # https://lwn.net/Articles/281157/ http://man7.org/linux/man-pages/man8/mount.8.html
                        container.make_bind_mount(
                            mountpoint, mount_path, recursive=True, private=True)
                        container.remount_with_additional_flags(mount_path, options, libc.MS_RDONLY)

            elif mode == DIR_FULL_ACCESS:
                try:
                    # Ensure directory is still a mountpoint by attempting to remount.
                    container.remount_with_additional_flags(mount_path, options, 0)
                except OSError as e:
                    if e.errno == errno.EACCES:
                        logging.warning(
                            "Cannot mount '%s', directory may be missing from container.",
                            mountpoint.decode())
                    else:
                        # If this mountpoint is below an overlay/hidden dir re-create mountpoint.
                        container.make_bind_mount(
                            mountpoint, mount_path, recursive=True, private=True)

            else:
                assert False

        # Now configure some special hard-coded cases

        def make_tmpfs_dir(path):
            """Ensure that a tmpfs is mounted on path, if the path exists"""
            if path in self._dir_modes:
                return # explicitly configured by user
            mount_tmpfs = mount_base + path
            temp_tmpfs = temp_base + path
            util.makedirs(temp_tmpfs, exist_ok=True)
            if os.path.isdir(mount_tmpfs):
                # If we already have a tmpfs, we can just bind mount it, otherwise we need one
                if self._container_tmpfs:
                    container.make_bind_mount(temp_tmpfs, mount_tmpfs)
                else:
                    libc.mount(None, mount_tmpfs, b"tmpfs", 0, tmpfs_opts)

        # The following directories should be writable RAM disks for Posix shared memory.
        # For example, the Python multiprocessing module explicitly checks for a tmpfs instance.
        make_tmpfs_dir(b"/dev/shm")
        make_tmpfs_dir(b"/run/shm")

        if self._container_system_config:
            # If overlayfs is not used for /etc, we need additional bind mounts
            # for files in /etc that we want to override, like /etc/passwd
            config_mount_base = mount_base if find_mode_for_dir(b"/etc") != DIR_OVERLAY else None
            container.setup_container_system_config(temp_base, config_mount_base )

        if output_dir:
            # We need a way to see temp_base in the container in order to be able to copy result
            # files out of it, so we need a directory that is guaranteed to exist in order to use
            # it as mountpoint for a bind mount to temp_base.
            # Of course, the tool inside the container should not have access to temp_base,
            # so we will add another bind mount with an empty directory on top
            # (equivalent to --hidden-dir). After the tool terminates we can unmount
            # the top-level bind mount and then access temp_base. However, this works only
            # if there is no other mount point below that directory, and the user can force us
            # to create mount points at arbitrary directory if a directory mode is specified.
            # So we need an existing directory with no mount points below, and luckily temp_dir
            # fulfills all requirements (because we have just created it as fresh drectory ourselves).
            # So we mount temp_base outside of the container to temp_dir inside.
            util.makedirs(mount_base + temp_dir, exist_ok=True)
            container.make_bind_mount(temp_base, mount_base + temp_dir, read_only=True)
            # And the following if branch will automatically hide the bind
            # mount below an empty directory.

        # If necessary, (i.e., if /tmp is not already hidden),
        # hide the directory where we store our files from processes in the container
        # by mounting an empty directory over it.
        if os.path.exists(mount_base + temp_dir):
            util.makedirs(temp_base + temp_dir, exist_ok=True)
            container.make_bind_mount(temp_base + temp_dir, mount_base + temp_dir)

        os.chroot(mount_base)