def _ensure_meta_dir_exists(subvol: Subvol): subvol.run_as_root([ 'mkdir', '--mode=0755', '--parents', subvol.path(META_DIR), ])
def build_image(args): subvol = Subvol(os.path.join(args.subvolumes_dir, args.subvolume_rel_path)) for item in dependency_order_items( itertools.chain( gen_parent_layer_items( args.child_layer_target, args.parent_layer_json, args.subvolumes_dir, ), gen_items_for_features( [args.child_feature_json], make_target_filename_map(args.child_dependencies), ), )): item.build(subvol) try: return SubvolumeOnDisk.from_subvolume_path( subvol.path().decode(), args.subvolumes_dir, args.subvolume_rel_path, ) except Exception as ex: raise RuntimeError(f'Serializing subvolume {subvol.path()}') from ex
def mountpoints_from_subvol_meta(subvol: Subvol) -> Iterator[str]: ''' Returns image-relative paths to mountpoints. Directories get a trailing /, while files do not. See the `_protected_path_set` docblock if this convention proves onerous. ''' mounts_path = subvol.path(META_MOUNTS_DIR) if not os.path.exists(mounts_path): return for path, _next_dirs, _files in os.walk( # We are not `chroot`ed, so following links could access outside the # image; `followlinks=False` is the default -- explicit for safety. mounts_path, onerror=_raise, followlinks=False, ): relpath = os.path.relpath(path, subvol.path(META_MOUNTS_DIR)).decode() if os.path.basename(relpath) == MOUNT_MARKER: mountpoint = os.path.dirname(relpath) assert not mountpoint.endswith('/'), mountpoint # It would be more technically correct to use `subvol.path()` # here (since that prevents us from following links outside the # image), but this is much more legible and probably safe. with open(os.path.join(path, b'is_directory')) as f: is_directory = json.load(f) yield mountpoint + ('/' if is_directory else '')
def to_path( self, *, target_to_path: Mapping[str, str], subvolumes_dir: str, ) -> str: if self.type == 'layer': out_path = target_to_path.get(self.source) if out_path is None: raise AssertionError( f'MountItem could not resolve {self.source}') with open(os.path.join(out_path, 'layer.json')) as infile: subvol = Subvol(SubvolumeOnDisk.from_json_file( infile, subvolumes_dir, ).subvolume_path(), already_exists=True) # If we allowed mounting a layer that has other mounts # inside, it would force us to support nested mounts. We # don't want to do this (yet). if os.path.exists(subvol.path(META_MOUNTS_DIR)): raise AssertionError( f'Refusing to mount {subvol.path()} since that would ' 'require the tooling to support nested mounts.') return subvol.path() elif self.type == 'host': return self.source else: # pragma: no cover raise AssertionError( f'Bad mount source "{self.type}" for {self.source}')
def build(self, subvol: Subvol): if not self.rpms: return assert RPM_ACTION_TYPE_TO_PHASE_ORDER[self.action] is self.phase_order assert self.yum_from_snapshot is not None, \ f'{self} -- your `image_layer` must set `yum_from_repo_snapshot`' subvol.run_as_root([ # Since `yum-from-snapshot` variants are generally Python # binaries built from this very repo, in @mode/dev, we would run # a symlink-PAR from the buck-out tree as `root`. This would # leave behind root-owned `__pycache__` directories, which would # break Buck's fragile cleanup, and cause us to leak old build # artifacts. This eventually runs the host out of disk space, # and can also interfere with e.g. `test-image-layer`, since # that test relies on there being just one `create_ops` # subvolume in `buck-image-out` with the "received UUID" that # was committed to VCS as part of the test sendstream. 'env', 'PYTHONDONTWRITEBYTECODE=1', self.yum_from_snapshot, '--install-root', subvol.path(), '--', RPM_ACTION_TYPE_TO_YUM_CMD[self.action], # Sort in case `yum` behavior depends on order (for determinism). '--assumeyes', '--', *sorted(self.rpms), ])
def test_does_not_exist(self): with tempfile.TemporaryDirectory() as td: with self.assertRaisesRegex(AssertionError, 'No btrfs subvol'): Subvol(td, already_exists=True) sv = Subvol(td) with self.assertRaisesRegex(AssertionError, 'exists is False'): sv.run_as_root(['true'])
def build(self, subvol: Subvol): outer_dir = self.path_to_make.split('/', 1)[0] inner_dir = subvol.path(os.path.join(self.into_dir, self.path_to_make)) subvol.run_as_root(['mkdir', '-p', inner_dir]) self.build_stat_options( subvol, subvol.path(os.path.join(self.into_dir, outer_dir)), )
def test_out_of_subvol_symlink(self): with temp_dir() as td: os.symlink('/dev/null', td / 'my_null') self.assertEqual( td / 'my_null', Subvol(td).path('my_null', no_dereference_leaf=True), ) with self.assertRaisesRegex(AssertionError, 'outside the subvol'): Subvol(td).path('my_null')
def test_out_of_subvol_symlink(self): with tempfile.TemporaryDirectory() as td: os.symlink('/dev/null', os.path.join(td, 'my_null')) self.assertEqual( os.path.join(td, 'my_null').encode(), Subvol(td).path('my_null', no_dereference_leaf=True), ) with self.assertRaisesRegex(AssertionError, 'outside the subvol'): Subvol(td).path('my_null')
def package_full(self, subvol: Subvol, output_path: str, opts: _Opts): create_ro(output_path, 'wb').close() # Ensure non-root ownership subvol.run_as_root([ 'mksquashfs', subvol.path(), output_path, '-comp', 'zstd', '-noappend', ])
def build_stat_options(self, subvol: Subvol, full_target_path: str): # -R is not a problem since it cannot be the case that we are # creating a directory that already has something inside it. On the # plus side, it helps with nested directory creation. subvol.run_as_root( ['chmod', '-R', self._mode_impl(), full_target_path]) subvol.run_as_root([ 'chown', '-R', f'{self.user}:{self.group}', full_target_path, ])
def build(self, subvol: Subvol): subvol.run_as_root([ 'tar', '-C', subvol.path(self.into_dir), '-x', # The next option is an extra safeguard that is redundant with # the compiler's prevention of `provides` conflicts. It has two # consequences: # # (1) If a file already exists, `tar` will fail with an error. # It is **not** an error if a directory already exists -- # otherwise, one would never be able to safely untar # something into e.g. `/usr/local/bin`. # # (2) Less obviously, the option prevents `tar` from # overwriting the permissions of `directory`, as it # otherwise would. # # Thanks to the compiler's conflict detection, this should # not come up, but now you know. Observe us clobber the # permissions without it: # # $ mkdir IN OUT # $ touch IN/file # $ chmod og-rwx IN # $ ls -ld IN OUT # drwx------. 2 lesha users 17 Sep 11 21:50 IN # drwxr-xr-x. 2 lesha users 6 Sep 11 21:50 OUT # $ tar -C IN -czf file.tgz . # $ tar -C OUT -xvf file.tgz # ./ # ./file # $ ls -ld IN OUT # drwx------. 2 lesha users 17 Sep 11 21:50 IN # drwx------. 2 lesha users 17 Sep 11 21:50 OUT # # Adding `--keep-old-files` preserves the metadata of `OUT`: # # $ rm -rf OUT ; mkdir out ; ls -ld OUT # drwxr-xr-x. 2 lesha users 6 Sep 11 21:53 OUT # $ tar -C OUT --keep-old-files -xvf file.tgz # ./ # ./file # $ ls -ld IN OUT # drwx------. 2 lesha users 17 Sep 11 21:50 IN # drwxr-xr-x. 2 lesha users 17 Sep 11 21:54 OUT '--keep-old-files', '-f', self.tarball ])
def builder(subvol: Subvol): subvol.create() # Guarantee standard / permissions. This could be a setting, # but in practice, probably any other choice would be wrong. subvol.run_as_root(['chmod', '0755', subvol.path()]) subvol.run_as_root(['chown', 'root:root', subvol.path()]) _ensure_meta_dir_exists(subvol)
def test_path(self): # We are only going to do path manipulations in this test. sv = Subvol('/subvol/need/not/exist') for bad_path in ['..', 'a/../../b/c/d', '../c/d/e']: with self.assertRaisesRegex(AssertionError, 'outside the subvol'): sv.path(bad_path) self.assertEqual(sv.path('a/b'), sv.path('/a/b/')) self.assertEqual(b'a/b', os.path.relpath(sv.path('a/b'), sv.path())) self.assertTrue(not sv.path('.').endswith(b'/.'))
def package_full(self, svod: SubvolumeOnDisk, output_path: str): # Future: rpm.common.create_ro, but it's kind of a big dep. # Luckily `image_package` will promptly mark this read-only. assert not os.path.exists(output_path) with open(output_path, 'wb') as outfile, Subvol( svod.subvolume_path(), already_exists=True, ).mark_readonly_and_write_sendstream_to_file(outfile): pass
def ensure_meta_dir_exists(subvol: Subvol, layer_opts: LayerOpts): subvol.run_as_root([ 'mkdir', '--mode=0755', '--parents', subvol.path(META_DIR), ]) # One might ask: why are we serializing this into the image instead # of just putting a condition on `built_artifacts_require_repo` # into our Buck macros? Two reasons: # - In the case of build appliance images, it is possible for a # @mode/dev (in-place) build to use **either** a @mode/dev, or a # @mode/opt (standalone) build appliance. The only way to know # to know if the appliance needs a repo mount is to have a marker # in the image. # - By marking the images, we avoid having to conditionally add # `--bind-repo-ro` flags in a bunch of places in our codebase. The # in-image marker enables `nspawn_in_subvol` to decide. if os.path.exists(subvol.path(META_ARTIFACTS_REQUIRE_REPO)): _validate_artifacts_require_repo(subvol, layer_opts, 'parent layer') # I looked into adding an `allow_overwrite` flag to `serialize`, but # it was too much hassle to do it right. subvol.run_as_root(['rm', subvol.path(META_ARTIFACTS_REQUIRE_REPO)]) procfs_serde.serialize( layer_opts.artifacts_may_require_repo, subvol, META_ARTIFACTS_REQUIRE_REPO, )
def find_built_subvol(layer_output, path_in_repo=None): with open(os.path.join(layer_output, 'layer.json')) as infile: return Subvol( SubvolumeOnDisk.from_json_file( infile, subvolumes_dir(path_in_repo), ).subvolume_path(), already_exists=True, )
def package_full(self, svod: SubvolumeOnDisk, output_path: str): assert not os.path.exists(output_path) with open(output_path, 'wb') as outfile, subprocess.Popen( ['zstd', '--stdout'], stdin=subprocess.PIPE, stdout=outfile ) as zstd, Subvol( svod.subvolume_path(), already_exists=True, ).mark_readonly_and_write_sendstream_to_file(zstd.stdin): pass check_popen_returncode(zstd)
def _image_source_path( layer_opts: LayerOpts, *, source: AnyStr = None, layer: Subvol = None, path: AnyStr = None, ) -> Path: assert (source is None) ^ (layer is None), (source, layer, path) source = Path.or_none(source) # Absolute `path` is still relative to `source` or `layer` path = Path((path and path.lstrip('/')) or '.') if source: return (source / path).normpath() if os.path.exists(layer.path(META_ARTIFACTS_REQUIRE_REPO)): _validate_artifacts_require_repo(layer, layer_opts, 'image.source') return Path(layer.path(path))
def from_subvol(cls, subvol: Subvol, package_name: str) -> "RpmMetadata": db_path = subvol.path("var/lib/rpm") # `rpm` always creates a DB when `--dbpath` is an arg. # We don't want to create one if it does not already exist so check for # that here. if not os.path.exists(db_path): raise ValueError(f"RPM DB path {db_path} does not exist") return cls._repo_query(cls, db_path, package_name, None)
def package_full(self, svod: SubvolumeOnDisk, output_path: str): Subvol( svod.subvolume_path(), already_exists=True, ).mark_readonly_and_send_to_new_loopback(output_path) # Paranoia: images are read-only after being built os.chmod( output_path, stat.S_IMODE(os.stat(output_path).st_mode) & ~(stat.S_IWUSR | stat.S_IWGRP | stat.S_IWOTH), )
def test_run_as_root_return(self): args = ['bash', '-c', 'echo -n my out; echo -n my err >&2'] r = Subvol('/dev/null/no-such-dir').run_as_root( args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, _subvol_exists=False, ) self.assertEqual(['sudo', '--'] + args, r.args) self.assertEqual(0, r.returncode) self.assertEqual(b'my out', r.stdout) self.assertEqual(b'my err', r.stderr)
def builder(subvol: Subvol): protected_paths = _protected_path_set(subvol) # Reverse-lexicographic order deletes inner paths before # deleting the outer paths, thus minimizing conflicts between # `remove_paths` items. for item in sorted( items, reverse=True, key=lambda i: i.__sort_key(), ): if _is_path_protected(item.path, protected_paths): # For META_DIR, this is never reached because of # _make_path_normal_relative's check, but for other # protected paths, this is required. raise AssertionError( f'Cannot remove protected {item}: {protected_paths}') # This ensures that there are no symlinks in item.path that # might take us outside of the subvolume. Since recursive # `rm` does not follow symlinks, it is OK if the inode at # `item.path` is a symlink (or one of its sub-paths). path = subvol.path(item.path, no_dereference_leaf=True) if not os.path.lexists(path): if item.action == RemovePathAction.assert_exists: raise AssertionError(f'Path does not exist: {item}') elif item.action == RemovePathAction.if_exists: continue else: # pragma: no cover raise AssertionError(f'Unknown {item.action}') subvol.run_as_root([ 'rm', '-r', # This prevents us from making removes outside of the # per-repo loopback, which is an important safeguard. # It does not stop us from reaching into other subvols, # but since those have random IDs in the path, this is # nearly impossible to do by accident. '--one-file-system', path, ]) pass
def ro_rbind_mount(src: AnyStr, subvol: Subvol, dest_in_subvol: AnyStr): # Even though `fs_image` currently does not support mount nesting, the # mount must be recursive so that host mounts propagate as expected (we # don't want to have to know if a source host directory contains # sub-mounts). subvol.run_as_root([ 'mount', '-o', 'ro,rbind', src, subvol.path(dest_in_subvol), ]) # Performing mount/unmount operations inside the subvol must not be able # to affect the host system, so the tree must be marked at least # `rslave`. It would be defensible to use `rprivate`, but IMO this is # more surprising than `rslave` in the case of host mounts -- normal # filesystem operations on the host are visible to the container, which # suggests that mount changes must be, also. # # IMPORTANT: Even on fairly recent versions of `util-linux`, merging # this into the first `mount` invocation above does NOT work. Just # leave this ugly 2-call version as is. # # NB: We get slave (not private) propagation since `set_up_volume.sh` # sets propagation to shared on the parent mount `buck-image-out/volume`. subvol.run_as_root(['mount', '--make-rslave', subvol.path(dest_in_subvol)])
def build_image(args): subvol = Subvol(os.path.join(args.subvolumes_dir, args.subvolume_rel_path)) dep_graph = DependencyGraph(itertools.chain( gen_parent_layer_items( args.child_layer_target, args.parent_layer_json, args.subvolumes_dir, ), gen_items_for_features( feature_paths=[args.child_feature_json], target_to_path=make_target_path_map(args.child_dependencies), yum_from_repo_snapshot=args.yum_from_repo_snapshot, ), )) for phase in dep_graph.ordered_phases(): phase.build(subvol) # We cannot validate or sort `ImageItem`s until the phases are # materialized since the items may depend on the output of the phases. for item in dep_graph.gen_dependency_order_items(subvol.path().decode()): item.build(subvol) # Build artifacts should never change. subvol.set_readonly(True) try: return SubvolumeOnDisk.from_subvolume_path( subvol.path().decode(), args.subvolumes_dir, ) except Exception as ex: raise RuntimeError(f'Serializing subvolume {subvol.path()}') from ex
def build(self, subvol: Subvol, layer_opts: LayerOpts): mount_dir = os.path.join(META_MOUNTS_DIR, self.mountpoint, MOUNT_MARKER) for name, data in ( # NB: Not exporting self.mountpoint since it's implicit in the path. ('is_directory', self.is_directory), ('build_source', self.build_source._asdict()), ('runtime_source', json.loads(self.runtime_source)), ): procfs_serde.serialize(data, subvol, os.path.join(mount_dir, name)) source_path = self.build_source.to_path( target_to_path=layer_opts.target_to_path, subvolumes_dir=layer_opts.subvolumes_dir, ) # Support mounting directories and non-directories... This check # follows symlinks for the mount source, which seems correct. is_dir = os.path.isdir(source_path) assert is_dir == self.is_directory, self if is_dir: subvol.run_as_root([ 'mkdir', '--mode=0755', subvol.path(self.mountpoint), ]) else: # Regular files, device nodes, FIFOs, you name it. # `touch` lacks a `--mode` argument, but the mode of this # mountpoint will be shadowed anyway, so let it be whatever. subvol.run_as_root(['touch', subvol.path(self.mountpoint)]) ro_rbind_mount(source_path, subvol, self.mountpoint)
def clone_mounts(from_sv: Subvol, to_sv: Subvol): ''' Use this to transfer mountpoints into a parent from a fresh snapshot. This assumes the parent subvolume has mounted all of them. Future: once I land my mountinfo lib, we should actually confirm that the parent's mountpoints are mounted and are read-only. ''' from_mps = set(mountpoints_from_subvol_meta(from_sv)) to_mps = set(mountpoints_from_subvol_meta(to_sv)) assert from_mps == to_mps, (from_mps, to_mps) for mp in to_mps: ro_rbind_mount(from_sv.path(mp), to_sv, mp)
def build_image(args): subvol = Subvol(os.path.join(args.subvolumes_dir, args.subvolume_rel_path)) target_to_path = make_target_path_map(args.child_dependencies) # This stack allows build items to hold temporary state on disk. with ExitStack() as exit_stack: dep_graph = DependencyGraph( itertools.chain( gen_parent_layer_items( args.child_layer_target, args.parent_layer_json, args.subvolumes_dir, ), gen_items_for_features( exit_stack=exit_stack, feature_paths=[args.child_feature_json], target_to_path=target_to_path, ), )) layer_opts = LayerOpts( layer_target=args.child_layer_target, yum_from_snapshot=args.yum_from_repo_snapshot, build_appliance=None if not args.build_appliance_json else get_subvolume_path(args.build_appliance_json, args.subvolumes_dir), ) # Creating all the builders up-front lets phases validate their input for builder in [ builder_maker(items, layer_opts) for builder_maker, items in dep_graph.ordered_phases() ]: builder(subvol) # We cannot validate or sort `ImageItem`s until the phases are # materialized since the items may depend on the output of the phases. for item in dep_graph.gen_dependency_order_items( subvol.path().decode()): build_item( item, subvol=subvol, target_to_path=target_to_path, subvolumes_dir=args.subvolumes_dir, ) # Build artifacts should never change. Run this BEFORE the exit_stack # cleanup to enforce that the cleanup does not touch the image. subvol.set_readonly(True) try: return SubvolumeOnDisk.from_subvolume_path( # Converting to a path here does not seem too risky since this # class shouldn't have a reason to follow symlinks in the subvol. subvol.path().decode(), args.subvolumes_dir, ) # The complexity of covering this is high, but the only thing that can # go wrong is a typo in the f-string. except Exception as ex: # pragma: no cover raise RuntimeError(f'Serializing subvolume {subvol.path()}') from ex
def find_built_subvol( layer_output, *, path_in_repo=None, subvolumes_dir=None, ): # It's OK for both to be None (uses the current file to find repo), but # it's not OK to set both. assert (path_in_repo is None) or (subvolumes_dir is None) with open(Path(layer_output) / 'layer.json') as infile: return Subvol( SubvolumeOnDisk.from_json_file( infile, subvolumes_dir if subvolumes_dir else _get_subvolumes_dir(path_in_repo), ).subvolume_path(), already_exists=True, )
def test_layer_from_demo_sendstreams(self): # `btrfs_diff.demo_sendstream` produces a subvolume send-stream with # fairly thorough coverage of filesystem features. This test grabs # that send-stream, receives it into an `image_layer`, and validates # that the send-stream of the **received** volume has the same # rendering as the original send-stream was supposed to have. # # In other words, besides testing `image_layer`'s `from_sendstream`, # this is also a test of idempotence for btrfs send+receive. # # Notes: # - `compiler/tests/TARGETS` explains why `mutate_ops` is not here. # - Currently, `mutate_ops` also uses `--no-data`, which would # break this test of idempotence. for op in ['create_ops']: with self.target_subvol(op) as sod: self.assertEqual( render_demo_subvols(**{op: True}), render_sendstream( Subvol(sod.subvolume_path(), already_exists=True). mark_readonly_and_get_sendstream(), ), )