def mountpoints_from_subvol_meta(subvol: Subvol) -> Iterator[str]: ''' Returns image-relative paths to mountpoints. Directories get a trailing /, while files do not. See the `_protected_path_set` docblock if this convention proves onerous. ''' mounts_path = subvol.path(META_MOUNTS_DIR) if not os.path.exists(mounts_path): return for path, _next_dirs, _files in os.walk( # We are not `chroot`ed, so following links could access outside the # image; `followlinks=False` is the default -- explicit for safety. mounts_path, onerror=_raise, followlinks=False, ): relpath = os.path.relpath(path, subvol.path(META_MOUNTS_DIR)).decode() if os.path.basename(relpath) == MOUNT_MARKER: mountpoint = os.path.dirname(relpath) assert not mountpoint.endswith('/'), mountpoint # It would be more technically correct to use `subvol.path()` # here (since that prevents us from following links outside the # image), but this is much more legible and probably safe. with open(os.path.join(path, b'is_directory')) as f: is_directory = json.load(f) yield mountpoint + ('/' if is_directory else '')
def builder(subvol: Subvol): fs_image_path = subvol.path('__fs_image__') # Use `.stat()`, not `.exists()`, to fail if `/` is not readable. try: os.stat(fs_image_path) maybe_protect_fs_image = ((fs_image_path, '/__fs_image__'), ) except FileNotFoundError: maybe_protect_fs_image = () run_non_booted_nspawn( # NB: stdout redirects to stderr by default new_nspawn_opts( layer=subvol, snapshot=False, cmd=item.cmd, bindmount_ro=( # The command cannot change `/meta` & `/__fs_image__` (subvol.path('/meta'), '/meta'), *maybe_protect_fs_image, ), # Future: support the case where the in-container user DB # diverges from the out-of-container user DB. And user NS. user=pwd.getpwnam(item.user), ), PopenArgs(), popen_wrappers=[ functools.partial( inject_repo_servers, item.serve_rpm_snapshots, ), ] if item.serve_rpm_snapshots else [], )
def build(self, subvol: Subvol, layer_opts: LayerOpts): outer_dir = self.path_to_make.split('/', 1)[0] inner_dir = subvol.path(os.path.join(self.into_dir, self.path_to_make)) subvol.run_as_root(['mkdir', '-p', inner_dir]) build_stat_options( self, subvol, subvol.path(os.path.join(self.into_dir, outer_dir)), )
def gen_subvolume_subtree_provides(subvol: Subvol, subtree: Path): 'Yields "Provides" instances for a path `subtree` in `subvol`.' # "Provides" classes use image-absolute paths that are `str` (for now). # Accept any string type to ease future migrations. subtree = os.path.join('/', Path(subtree).decode()) protected_paths = protected_path_set(subvol) for prot_path in protected_paths: rel_to_subtree = os.path.relpath(os.path.join('/', prot_path), subtree) if not has_leading_dot_dot(rel_to_subtree): yield ProvidesDoNotAccess(path=rel_to_subtree) subtree_full_path = subvol.path(subtree).decode() subtree_exists = False # Traverse the subvolume as root, so that we have permission to access # everything. for type_and_path in subvol.run_as_root([ # -P is the analog of --no-dereference in GNU tools # # Filter out the protected paths at traversal time. If one of the # paths has a very large or very slow mount, traversing it would # have a devastating effect on build times, so let's avoid looking # inside protected paths entirely. An alternative would be to # `send` and to parse the sendstream, but this is ok too. 'find', '-P', subtree_full_path, '(', *itertools.dropwhile( lambda x: x == '-o', # Drop the initial `-o` itertools.chain.from_iterable([ # `normpath` removes the trailing / for protected dirs '-o', '-path', subvol.path(os.path.normpath(p)) ] for p in protected_paths), ), ')', '-prune', '-o', '-printf', '%y %p\\0', ], stdout=subprocess.PIPE).stdout.split(b'\0'): if not type_and_path: # after the trailing \0 continue filetype, abspath = type_and_path.decode().split(' ', 1) relpath = os.path.relpath(abspath, subtree_full_path) assert not has_leading_dot_dot(relpath), (abspath, subtree_full_path) # We already "provided" this path above, and it should have been # filtered out by `find`. assert not is_path_protected(relpath, protected_paths), relpath # Future: This provides all symlinks as files, while we should # probably provide symlinks to valid directories inside the image as # directories to be consistent with SymlinkToDirItem. if filetype in ['b', 'c', 'p', 'f', 'l', 's']: yield ProvidesFile(path=relpath) elif filetype == 'd': yield ProvidesDirectory(path=relpath) else: # pragma: no cover raise AssertionError(f'Unknown {filetype} for {abspath}') if relpath == '.': subtree_exists = True # We should've gotten a CalledProcessError from `find`. assert subtree_exists, f'{subtree} does not exist in {subvol.path()}'
def builder(subvol: Subvol): subvol.create() # Guarantee standard / permissions. This could be a setting, # but in practice, probably any other choice would be wrong. subvol.run_as_root(['chmod', '0755', subvol.path()]) subvol.run_as_root(['chown', 'root:root', subvol.path()]) ensure_meta_dir_exists(subvol, layer_opts)
def builder(subvol: Subvol) -> None: # Convert porcelain RpmAction to plumbing YumDnfCommands. This # is done in the builder because we need access to the subvol. # # Sort by command for determinism and (hopefully) better behaivor. for cmd, nors in sorted( _convert_actions_to_commands( subvol, action_to_names_or_rpms, ).items(), key=lambda cn: YUM_DNF_COMMAND_ORDER[cn[0]]): rpms, bind_ros = _rpms_and_bind_ros(nors) _yum_dnf_using_build_appliance( build_appliance=layer_opts.build_appliance, bind_ros=bind_ros, install_root=subvol.path(), protected_paths=protected_path_set(subvol), yum_dnf_args=[ cmd.value, '--assumeyes', # Sort ensures determinism even if `yum` or `dnf` is # order-dependent *sorted(rpms), ], layer_opts=layer_opts, )
def _prepare_versionlock_lists( subvol: Subvol, snapshot_dir: Path, list_path: Path ) -> Dict[str, Tuple[str, int]]: ''' Returns a map of "in-snapshot path" -> "tempfile with its contents", with the intention that the tempfile in the value will be a read-only bind-mount over the path in the key. ''' # `dnf` and `yum` expect different formats, so we parse our own. with open(list_path) as rf: envras = [l.split('\t') for l in rf] templates = {'yum': '{e}:{n}-{v}-{r}.{a}', 'dnf': '{n}-{e}:{v}-{r}.{a}'} dest_to_src_and_size = {} with temp_dir() as d: # Only bind-mount lists for those binaries that exist in the snapshot. for prog in set( f'{p}' for p in (subvol.path(snapshot_dir)).listdir() ) & set(templates.keys()): template = templates[prog] src = d / (prog + '-versionlock.list') with create_ro(src, 'w') as wf: for e, n, v, r, a in envras: wf.write(template.format(e=e, n=n, v=v, r=r, a=a)) set_new_key( dest_to_src_and_size, # This path convention must match how `write_yum_dnf_conf.py` # and `rpm_repo_snapshot.bzl` set up their output. snapshot_dir / f'{prog}/etc/{prog}/plugins/versionlock.list', (src, len(envras)) ) yield dest_to_src_and_size
def ensure_meta_dir_exists(subvol: Subvol, layer_opts: LayerOpts): subvol.run_as_root([ 'mkdir', '--mode=0755', '--parents', subvol.path(META_DIR), ]) # One might ask: why are we serializing this into the image instead # of just putting a condition on `built_artifacts_require_repo` # into our Buck macros? Two reasons: # - In the case of build appliance images, it is possible for a # @mode/dev (in-place) build to use **either** a @mode/dev, or a # @mode/opt (standalone) build appliance. The only way to know # to know if the appliance needs a repo mount is to have a marker # in the image. # - By marking the images, we avoid having to conditionally add # `--bind-repo-ro` flags in a bunch of places in our codebase. The # in-image marker enables `nspawn_in_subvol` to decide. if os.path.exists(subvol.path(META_ARTIFACTS_REQUIRE_REPO)): _validate_artifacts_require_repo(subvol, layer_opts, 'parent layer') # I looked into adding an `allow_overwrite` flag to `serialize`, but # it was too much hassle to do it right. subvol.run_as_root(['rm', subvol.path(META_ARTIFACTS_REQUIRE_REPO)]) procfs_serde.serialize( layer_opts.artifacts_may_require_repo, subvol, META_ARTIFACTS_REQUIRE_REPO, )
def _image_source_path( layer_opts: LayerOpts, *, source: AnyStr = None, layer: Subvol = None, path: AnyStr = None, ) -> Path: assert (source is None) ^ (layer is None), (source, layer, path) source = Path.or_none(source) # Absolute `path` is still relative to `source` or `layer` path = Path((path and path.lstrip('/')) or '.') if source: return (source / path).normpath() if os.path.exists(layer.path(META_ARTIFACTS_REQUIRE_REPO)): _validate_artifacts_require_repo(layer, layer_opts, 'image.source') return Path(layer.path(path))
def build(self, subvol: Subvol, layer_opts: LayerOpts): if layer_opts.build_appliance: work_dir = generate_work_dir() full_path = Path(work_dir) / self.into_dir / self.path_to_make opts = new_nspawn_opts( cmd=['mkdir', '-p', full_path], layer=layer_opts.build_appliance, bindmount_rw=[(subvol.path(), work_dir)], user=pwd.getpwnam('root'), ) run_non_booted_nspawn(opts, PopenArgs()) else: inner_dir = subvol.path( os.path.join(self.into_dir, self.path_to_make)) subvol.run_as_root(['mkdir', '-p', inner_dir]) outer_dir = self.path_to_make.split('/', 1)[0] build_stat_options( self, subvol, subvol.path(os.path.join(self.into_dir, outer_dir)), )
def build(self, subvol: Subvol, layer_opts: LayerOpts): mount_dir = os.path.join(META_MOUNTS_DIR, self.mountpoint, MOUNT_MARKER) for name, data in ( # NB: Not exporting self.mountpoint since it's implicit in the path. ('is_directory', self.is_directory), ('build_source', self.build_source._asdict()), ('runtime_source', json.loads(self.runtime_source)), ): procfs_serde.serialize(data, subvol, os.path.join(mount_dir, name)) source_path = self.build_source.to_path( target_to_path=layer_opts.target_to_path, subvolumes_dir=layer_opts.subvolumes_dir, ) # Support mounting directories and non-directories... This check # follows symlinks for the mount source, which seems correct. is_dir = os.path.isdir(source_path) assert is_dir == self.is_directory, self if is_dir: subvol.run_as_root([ 'mkdir', '--mode=0755', subvol.path(self.mountpoint), ]) else: # Regular files, device nodes, FIFOs, you name it. # `touch` lacks a `--mode` argument, but the mode of this # mountpoint will be shadowed anyway, so let it be whatever. subvol.run_as_root(['touch', subvol.path(self.mountpoint)]) ro_rbind_mount(source_path, subvol, self.mountpoint)
def ro_rbind_mount(src: AnyStr, subvol: Subvol, dest_in_subvol: AnyStr): # Even though `fs_image` currently does not support mount nesting, the # mount must be recursive so that host mounts propagate as expected (we # don't want to have to know if a source host directory contains # sub-mounts). subvol.run_as_root([ 'mount', '-o', 'ro,rbind', src, subvol.path(dest_in_subvol), ]) # Performing mount/unmount operations inside the subvol must not be able # to affect the host system, so the tree must be marked at least # `rslave`. It would be defensible to use `rprivate`, but IMO this is # more surprising than `rslave` in the case of host mounts -- normal # filesystem operations on the host are visible to the container, which # suggests that mount changes must be, also. # # IMPORTANT: Even on fairly recent versions of `util-linux`, merging # this into the first `mount` invocation above does NOT work. Just # leave this ugly 2-call version as is. # # NB: We get slave (not private) propagation since `set_up_volume.sh` # sets propagation to shared on the parent mount `buck-image-out/volume`. subvol.run_as_root(['mount', '--make-rslave', subvol.path(dest_in_subvol)])
def build(self, subvol: Subvol, layer_opts: LayerOpts): dest = subvol.path(self.dest) # The compiler should have detected any collisons, so `--no-clobber` # is just a failsafe. `--no-dereference` is also a failsafe since # we ban symlinks above. # # Opportunistic reflinking & mandatory sparsification are easy # efficiency wins. # # Don't bother preserving metadata since we explicitly set mode & # ownership ... and our build setup lets timestamp float (for now). subvol.run_as_root([ 'cp', '--recursive', '--no-clobber', '--no-dereference', '--reflink=auto', '--sparse=always', '--no-preserve=all', self.source, dest, ]) build_stat_options(self, subvol, dest, do_not_set_mode=True) # Group by mode to make as few shell calls as possible. for mode_str, modes_and_paths in itertools.groupby( sorted((mode_to_str(i.mode), i.provides.path) for i in self.paths), lambda x: x[0]): # `chmod` follows symlinks, and there's no option to stop it. # However, `customize_fields` should have failed on symlinks. subvol.run_as_root([ 'chmod', mode_str, *(subvol.path(p) for _, p in modes_and_paths) ])
def builder(subvol: Subvol): protected_paths = protected_path_set(subvol) # Reverse-lexicographic order deletes inner paths before # deleting the outer paths, thus minimizing conflicts between # `remove_paths` items. for item in sorted( items, reverse=True, key=lambda i: i.__sort_key(), ): if is_path_protected(item.path, protected_paths): # For META_DIR, this is never reached because of # make_path_normal_relative's check, but for other # protected paths, this is required. raise AssertionError( f'Cannot remove protected {item}: {protected_paths}') # This ensures that there are no symlinks in item.path that # might take us outside of the subvolume. Since recursive # `rm` does not follow symlinks, it is OK if the inode at # `item.path` is a symlink (or one of its sub-paths). path = subvol.path(item.path, no_dereference_leaf=True) if not os.path.lexists(path): if item.action == RemovePathAction.assert_exists: raise AssertionError(f'Path does not exist: {item}') elif item.action == RemovePathAction.if_exists: continue else: # pragma: no cover raise AssertionError(f'Unknown {item.action}') subvol.run_as_root([ 'rm', '-r', # This prevents us from making removes outside of the # per-repo loopback, which is an important safeguard. # It does not stop us from reaching into other subvols, # but since those have random IDs in the path, this is # nearly impossible to do by accident. '--one-file-system', path, ]) pass
def clone_mounts(from_sv: Subvol, to_sv: Subvol): ''' Use this to transfer mountpoints into a parent from a fresh snapshot. This assumes the parent subvolume has mounted all of them. Future: once I land my mountinfo lib, we should actually confirm that the parent's mountpoints are mounted and are read-only. ''' from_mps = set(mountpoints_from_subvol_meta(from_sv)) to_mps = set(mountpoints_from_subvol_meta(to_sv)) assert from_mps == to_mps, (from_mps, to_mps) for mp in to_mps: ro_rbind_mount(from_sv.path(mp), to_sv, mp)
def build(self, subvol: Subvol, layer_opts: LayerOpts): # The compiler should have caught this, this is just paranoia. if self.pre_existing_dest: subvol.run_as_root(["test", "-d", subvol.path(self.dest)]) if self.omit_outer_dir: # Like `ls`, but NUL-separated. Needs `root` since the repo # user may not be able to access the source subvol. sources = [ self.source / p for p in subvol.run_as_root( [ 'find', self.source, '-mindepth', '1', '-maxdepth', '1', '-printf', '%f\\0', ], stdout=subprocess.PIPE).stdout.strip(b'\0').split(b'\0') ] else: sources = [self.source] # Option rationales: # - The compiler should have detected any collisons on the # destination, so `--no-clobber` is just a failsafe. # - `--no-dereference` is needed since our contract is to copy # each symlink's destination text verbatim. Not doing this # would also risk following absolute symlinks, reaching OUTSIDE # of the source subvolume! # - `--reflink=always` aids efficiency and, more importantly, # preserves "cloned extent" relationships that existed within # the source subtree. # - `--sparse=auto` is implied by `--reflink=always`. The two # together ought to preserve the original sparseness layout, # - `--preserve=all` keeps as much original metadata as possible, # including hardlinks. subvol.run_as_root([ 'cp', '--recursive', '--no-clobber', '--no-dereference', '--reflink=always', '--sparse=auto', '--preserve=all', *sources, subvol.path(self.dest), ])
def build(self, subvol: Subvol, layer_opts: LayerOpts): dest = subvol.path(self.dest) # Best-practice would tell us to do `subvol.path(self.source)`. # However, this will trigger the paranoid check in the `path()` # implementation if any component of `source` inside the image is an # absolute symlink. We are not writing to `source`, so that # safeguard isn't useful here. # # We DO check below that the relative symlink we made does not point # outside the image. However, a non-chrooted process resolving our # well-formed relative link might still traverse pre-existing # absolute symlinks on the filesystem, and go outside of the image # root. abs_source = subvol.path() / self.source # Make all symlinks relative because this makes it easy to inspect # the subvolums from outside the container. We can add an # `absolute` option if needed. rel_source = os.path.relpath(abs_source, dest.dirname()) assert os.path.normpath(dest / rel_source).startswith(subvol.path()), \ '{self}: A symlink to {rel_source} would point outside the image' if layer_opts.build_appliance: build_appliance = layer_opts.build_appliance work_dir = generate_work_dir() rel_dest = work_dir + '/' + self.dest opts = new_nspawn_opts( cmd=[ 'ln', '--symbolic', '--no-dereference', rel_source, rel_dest, ], layer=build_appliance, bindmount_rw=[(subvol.path(), work_dir)], user=pwd.getpwnam('root'), ) run_non_booted_nspawn(opts, PopenArgs()) else: subvol.run_as_root( ['ln', '--symbolic', '--no-dereference', rel_source, dest] )
def builder(subvol: Subvol): with open_for_read_decompress(item.source) as sendstream, \ subvol.receive(sendstream): pass
def builder(subvol: Subvol): subvol.snapshot(parent.subvol) # This assumes that the parent has everything mounted already. clone_mounts(parent.subvol, subvol) ensure_meta_dir_exists(subvol, layer_opts)
def build(self, subvol: Subvol, layer_opts: LayerOpts): assert (layer_opts.build_appliance is not None), ( f'`image_layer` {layer_opts.layer_target} must set ' '`build_appliance`') build_appliance = layer_opts.build_appliance work_dir = generate_work_dir() tar_cmd = ' '.join([ 'tar', # Future: Bug: `tar` unfortunately FOLLOWS existing symlinks # when unpacking. This isn't dire because the compiler's # conflict prevention SHOULD prevent us from going out of # the subvolume since this TarballItem's provides would # collide with whatever is already present. However, it's # hard to state that with complete confidence, especially if # we start adding support for following directory symlinks. '-C', work_dir + '/' + self.into_dir, '-x', # preserving xattrs need to be specified on both sides (packing # and unpacking) '--xattrs', # Block tar's weird handling of paths containing colons. '--force-local', # The uid:gid doing the extraction is root:root, so by default # tar would try to restore the file ownership from the archive. # In some cases, we just want all the files to be root-owned. *(['--no-same-owner'] if self.force_root_ownership else []), # The next option is an extra safeguard that is redundant # with the compiler's prevention of `provides` conflicts. # It has two consequences: # # (1) If a file already exists, `tar` will fail with an error. # It is **not** an error if a directory already exists -- # otherwise, one would never be able to safely untar # something into e.g. `/usr/local/bin`. # # (2) Less obviously, the option prevents `tar` from # overwriting the permissions of `directory`, as it # otherwise would. # # Thanks to the compiler's conflict detection, this should # not come up, but now you know. Observe us clobber the # permissions without it: # # $ mkdir IN OUT # $ touch IN/file # $ chmod og-rwx IN # $ ls -ld IN OUT # drwx------. 2 lesha users 17 Sep 11 21:50 IN # drwxr-xr-x. 2 lesha users 6 Sep 11 21:50 OUT # $ tar -C IN -czf file.tgz . # $ tar -C OUT -xvf file.tgz # ./ # ./file # $ ls -ld IN OUT # drwx------. 2 lesha users 17 Sep 11 21:50 IN # drwx------. 2 lesha users 17 Sep 11 21:50 OUT # # Adding `--keep-old-files` preserves `OUT`'s metadata: # # $ rm -rf OUT ; mkdir out ; ls -ld OUT # drwxr-xr-x. 2 lesha users 6 Sep 11 21:53 OUT # $ tar -C OUT --keep-old-files -xvf file.tgz # ./ # ./file # $ ls -ld IN OUT # drwx------. 2 lesha users 17 Sep 11 21:50 IN # drwxr-xr-x. 2 lesha users 17 Sep 11 21:54 OUT '--keep-old-files', '-f', '-', ]) with open_for_read_decompress(self.source) as tf: opts = new_nspawn_opts( # '0<&3' below redirects fd=3 to stdin, so 'tar ... -f -' will # read and unpack whatever we represent as fd=3. We pass `tf` as # fd=3 into container by 'forward_fd=...' below. See help # string in fs_image/nspawn_in_subvol/args.py where # _parser_add_nspawn_opts() calls # parser.add_argument('--forward-fd') cmd=['sh', '-uec', f'{tar_cmd} 0<&3'], layer=build_appliance, bindmount_rw=[(subvol.path(), work_dir)], user=pwd.getpwnam('root'), forward_fd=[tf.fileno()], allow_mknod=True, ) run_non_booted_nspawn(opts, PopenArgs())
def _default_snapshot(build_appliance: Subvol, prog_name: str) -> Path: symlink_base = '/__fs_image__/rpm/default-snapshot-for-installer/' return ( # The symlink is relative, but we need an absolute path. Path(symlink_base) / os.readlink( build_appliance.path(symlink_base + prog_name))).normpath()
def build_image(args): # We want check the umask since it can affect the result of the # `os.access` check for `image.install*` items. That said, having a # umask that denies execute permission to "user" is likely to break this # code earlier, since new directories wouldn't be traversible. At least # this check gives a nice error message. cur_umask = os.umask(0) os.umask(cur_umask) assert cur_umask & stat.S_IXUSR == 0, \ f'Refusing to run with pathological umask 0o{cur_umask:o}' subvol = Subvol(os.path.join(args.subvolumes_dir, args.subvolume_rel_path)) layer_opts = LayerOpts( layer_target=args.child_layer_target, build_appliance=get_subvolume( args.build_appliance_json, args.subvolumes_dir, ) if args.build_appliance_json else None, rpm_installer=args.rpm_installer, rpm_repo_snapshot=args.rpm_repo_snapshot, preserve_yum_dnf_cache=args.preserve_yum_dnf_cache, artifacts_may_require_repo=args.artifacts_may_require_repo, target_to_path=make_target_path_map(args.child_dependencies), subvolumes_dir=args.subvolumes_dir, debug=args.debug, allowed_host_mount_targets=frozenset(args.allowed_host_mount_target), ) # This stack allows build items to hold temporary state on disk. with ExitStack() as exit_stack: dep_graph = DependencyGraph(gen_items_for_features( exit_stack=exit_stack, features_or_paths=args.child_feature_json, layer_opts=layer_opts, ), layer_target=args.child_layer_target) # Creating all the builders up-front lets phases validate their input for builder in [ builder_maker(items, layer_opts) for builder_maker, items in dep_graph.ordered_phases() ]: builder(subvol) # We cannot validate or sort `ImageItem`s until the phases are # materialized since the items may depend on the output of the phases. for item in dep_graph.gen_dependency_order_items(PhasesProvideItem( from_target=args.child_layer_target, subvol=subvol, )): item.build(subvol, layer_opts) # Build artifacts should never change. Run this BEFORE the exit_stack # cleanup to enforce that the cleanup does not touch the image. subvol.set_readonly(True) try: return SubvolumeOnDisk.from_subvolume_path( # Converting to a path here does not seem too risky since this # class shouldn't have a reason to follow symlinks in the subvol. subvol.path().decode(), args.subvolumes_dir, ) # The complexity of covering this is high, but the only thing that can # go wrong is a typo in the f-string. except Exception as ex: # pragma: no cover raise RuntimeError(f'Serializing subvolume {subvol.path()}') from ex