def test_snapshot_gpg_keys(self): with temp_dir() as td: hello_path = td / 'hello' with open(hello_path, 'w') as out_f: out_f.write('world') whitelist_dir = td / 'whitelist' os.mkdir(whitelist_dir) def try_snapshot(snapshot_dir): snapshot_gpg_keys( key_urls=[hello_path.file_url()], whitelist_dir=whitelist_dir, snapshot_dir=snapshot_dir, ) # The snapshot won't work until the key is correctly whitelisted. with temp_dir() as snap_dir, self.assertRaises(FileNotFoundError): try_snapshot(snap_dir) with open(whitelist_dir / 'hello', 'w') as out_f: out_f.write('wrong contents') with temp_dir() as snap_dir, self.assertRaises(AssertionError): try_snapshot(snap_dir) shutil.copy(hello_path, whitelist_dir) with temp_dir() as snapshot_dir: try_snapshot(snapshot_dir) self.assertEqual([b'gpg_keys'], snapshot_dir.listdir()) self.assertEqual( [b'hello'], (snapshot_dir / 'gpg_keys').listdir(), ) with open(snapshot_dir / 'gpg_keys/hello') as in_f: self.assertEqual('world', in_f.read())
def test_yum_is_dnf(self): # Setup for yum not being the same as dnf, modeled after fb with temp_dir() as td: yum_path = Path(td / 'yum').touch() with mock.patch('shutil.which') as mock_which: mock_which.return_value = None self.assertFalse(yum_is_dnf()) mock_which.return_value = yum_path.decode() self.assertFalse(yum_is_dnf()) # Setup for yum being the same as dnf, modeled after fedora # where `/bin/yum -> dnf-3` with temp_dir() as td: dnf_name = 'dnf-3' dnf_path = Path(td / dnf_name).touch() yum_path = td / 'yum' # Symlink to the name for a relative symlink that ends up # as yum -> dnf-3 os.symlink(dnf_name, yum_path) with mock.patch('shutil.which') as mock_which: mock_paths = {dnf_name: dnf_path, 'yum': yum_path} mock_which.side_effect = lambda p: mock_paths[p].decode() self.assertTrue(yum_is_dnf())
def setUpClass(cls): td_ctx = temp_dir() cls._shadow = td_ctx.__enter__() # NB: This may leak on SystemExit et al cls.addClassCleanup(td_ctx.__exit__, None, None, None) os.environ['FS_IMAGE_SHADOWED_PATHS_ROOT'] = f'{cls._shadow}' lib_ctx = Path.resource(__package__, 'librename_shadowed.so', exe=False) lib_path = lib_ctx.__enter__() # NB: This may leak a tempfile on SystemExit et al cls.addClassCleanup(lib_ctx.__exit__, None, None, None) lib = ctypes.cdll.LoadLibrary(lib_path) cls._get_shadowed_original = lib.get_shadowed_original cls._get_shadowed_original.restype = ctypes.c_char_p cls._get_shadowed_original.argtypes = [ctypes.c_char_p] cls._get_shadowed_rename_dest = lib.get_shadowed_rename_dest cls._get_shadowed_rename_dest.restype = ctypes.c_char_p cls._get_shadowed_rename_dest.argtypes = [ ctypes.c_char_p, ctypes.c_char_p ] cls._rename = lib.rename cls._rename.restype = ctypes.c_int cls._rename.argtypes = [ctypes.c_char_p, ctypes.c_char_p]
def test_cli(self): with temp_dir() as td: p = b'Hello, world!' # Write ~1.67 chunks of this phrase f_in = BytesIO(p * int(cli._CHUNK_SIZE * 5 / (3 * len(p)))) f_sid = BytesIO() cli.main([ '--storage', Path.json_dumps({ 'kind': 'filesystem', 'key': 'test', 'base_dir': td / 'storage', }), 'put' ], from_file=f_in, to_file=f_sid) self.assertTrue(f_sid.getvalue().endswith(b'\n')) sid = f_sid.getvalue()[:-1] f_out = BytesIO() cli.main([ '--storage', Path.json_dumps({ 'kind': 'filesystem', 'key': 'test', 'base_dir': td / 'storage', }), 'get', sid ], from_file=None, to_file=f_out) self.assertEqual(f_in.getvalue(), f_out.getvalue())
def test_explicit_update_conflicts(self): with temp_dir() as td: db_path = td / 'db.bzl' self._write_bzl_db(db_path, {'p1': {'a': {}}, 'p2': {'b': {}}}) with self.assertRaisesRegex(AssertionError, "'p1', 'a'"): self._main([ '--db', db_path.decode(), '--create', 'p1', 'a', '{}', ]) with self.assertRaisesRegex(AssertionError, "'p2', 'c'"): self._main([ '--db', db_path.decode(), '--replace', 'p2', 'c', '{}', ]) with self.assertRaisesRegex(RuntimeError, 'Conflicting "replace"'): self._main([ '--db', db_path.decode(), '--replace', 'p2', 'b', '{}', '--replace', 'p2', 'b', '{}', ])
def _prepare_versionlock_lists( subvol: Subvol, snapshot_dir: Path, list_path: Path ) -> Dict[str, Tuple[str, int]]: ''' Returns a map of "in-snapshot path" -> "tempfile with its contents", with the intention that the tempfile in the value will be a read-only bind-mount over the path in the key. ''' # `dnf` and `yum` expect different formats, so we parse our own. with open(list_path) as rf: envras = [l.split('\t') for l in rf] templates = {'yum': '{e}:{n}-{v}-{r}.{a}', 'dnf': '{n}-{e}:{v}-{r}.{a}'} dest_to_src_and_size = {} with temp_dir() as d: # Only bind-mount lists for those binaries that exist in the snapshot. for prog in set( f'{p}' for p in (subvol.path(snapshot_dir)).listdir() ) & set(templates.keys()): template = templates[prog] src = d / (prog + '-versionlock.list') with create_ro(src, 'w') as wf: for e, n, v, r, a in envras: wf.write(template.format(e=e, n=n, v=v, r=r, a=a)) set_new_key( dest_to_src_and_size, # This path convention must match how `write_yum_dnf_conf.py` # and `rpm_repo_snapshot.bzl` set up their output. snapshot_dir / f'{prog}/etc/{prog}/plugins/versionlock.list', (src, len(envras)) ) yield dest_to_src_and_size
def test_json_db(self): with temp_dir() as td: os.makedirs(td / 'idb/pkg') with open(td / 'idb/pkg/tag.json', 'w') as outfile: # Not using `_with_generated_header` to ensure that we are # resilient to changes in the header. outfile.write(f'# A {_GENERATED} file\n# 2nd header line\n') json.dump({'foo': 'bar'}, outfile) self.assertEqual( {'pkg': { 'tag': { 'foo': 'bar' } }}, updb._read_json_dir_db(td / 'idb'), ) self._main([ '--db', (td / 'idb').decode(), '--out-db', (td / 'odb').decode(), ]) self.assertEqual([b'pkg'], os.listdir(td / 'odb')) self.assertEqual([b'tag.json'], os.listdir(td / 'odb/pkg')) self._check_file( td / 'odb/pkg/tag.json', '# ' + _GENERATED + textwrap.dedent(''' \ SignedSource<<e8b8ab0d998b5fe5429777af98579c12>> # Update via `how` { "x": "z" } '''))
def mock_layer_dir_access(test_case, subvolume_path): ''' `SubvolumeOnDisk` does a ton of validation, which makes it hard to use it to read or write subvols that are not actual target outputs. Instead, this yields a fake layer directory path, and mocks `SubvolumeOnDisk.from_json_file` **only** for calls querying the fake path. For those calls, it returns a fake `SubvolumeOnDisk` pointing at the supplied `subvolume_path`. ''' sigil_dirname = b'fake-parent-layer' orig_from_json_file = svod.SubvolumeOnDisk.from_json_file with unittest.mock.patch.object( svod.SubvolumeOnDisk, 'from_json_file') as from_json_file, temp_dir() as td: parent_layer_file = td / sigil_dirname / 'layer.json' os.mkdir(parent_layer_file.dirname()) with open(parent_layer_file, 'w') as f: f.write('this will never be read') def check_call(infile, subvolumes_dir): if Path(infile.name).dirname().basename() != sigil_dirname: return orig_from_json_file(infile, subvolumes_dir) test_case.assertEqual(parent_layer_file, infile.name) test_case.assertEqual(_SUBVOLS_DIR, subvolumes_dir) class FakeSubvolumeOnDisk: def subvolume_path(self): return subvolume_path.decode() return FakeSubvolumeOnDisk() from_json_file.side_effect = check_call yield parent_layer_file.dirname()
def test_rewrite_testpilot_python_cmd(self): bin = '/layer-test-binary' # Test no-op rewriting cmd = [bin, 'foo', '--bar', 'beep', '--baz', '-xack', '7', '9'] with rewrite_testpilot_python_cmd(cmd, next_fd=1337) as cmd_and_fd: self.assertEqual((cmd, []), cmd_and_fd) for rewritten_opt in ('--output', '--list-tests'): with temp_dir() as td: tmp = td / 'foo.json' self.assertFalse(os.path.exists(tmp)) # Will be created prefix = ['--zap=3', '--ou', 'boo', '--ou=3'] suffix = ['garr', '-abc', '-gh', '-d', '--e"f'] with rewrite_testpilot_python_cmd( [bin, *prefix, f'{rewritten_opt}={tmp}', *suffix], next_fd=37, ) as (new_cmd, fds_to_forward): fd_to_forward, = fds_to_forward self.assertIsInstance(fd_to_forward, int) # The last argument deliberately requires shell quoting. self.assertEqual([ '/bin/bash', '-c', ' '.join([ 'exec', bin, rewritten_opt, '>(cat >&37)', *prefix, *suffix[:-1], """'--e"f'""", ]) ], new_cmd) self.assertTrue(os.path.exists(tmp)) # Was created
def test_rewrite_tpx_gtest_cmd(self): bin = '/layer-test-binary' # The last argument deliberately requires shell quoting. cmd = [bin, 'foo', '--bar', 'beep', '--baz', '--e"f'] # Test no-op rewriting with mock.patch.dict(os.environ, {'NO_GTEST': 'env is set'}), \ rewrite_tpx_gtest_cmd(cmd, next_fd=1337) as cmd_and_fd: self.assertEqual((cmd, []), cmd_and_fd) with temp_dir() as td: tmp = td / 'bar.xml' self.assertFalse(os.path.exists(tmp)) # Will be created with mock.patch.dict(os.environ, {'GTEST_OUTPUT': f'xml:{tmp}'}), \ rewrite_tpx_gtest_cmd(cmd, next_fd=37) as (new_cmd, fds): fd_to_forward, = fds self.assertIsInstance(fd_to_forward, int) self.assertEqual( [ '/bin/bash', '-c', ' '.join([ f'GTEST_OUTPUT=xml:>(cat >&37)', 'exec', *cmd[:-1], """'--e"f'""", # Yes, it's shell-quoted ]) ], new_cmd) self.assertTrue(os.path.exists(tmp)) # Was created
def test_open_http_url(self): with temp_dir() as server_dir: hello_path = server_dir / 'hello' with open(hello_path, 'w') as out_f: out_f.write('world') # First, check file:// URLs with open_url(hello_path.file_url()) as in_f: self.assertEqual(b'world', in_f.read()) # Now, http:// URLs with subprocess.Popen([ sys.executable, '-c', ''' import http.server as hs with hs.HTTPServer(('localhost', 0), hs.SimpleHTTPRequestHandler) as httpd: print('http://{}:{}/'.format(*httpd.socket.getsockname()), flush=True) httpd.serve_forever() ''', ], cwd=server_dir, stdout=subprocess.PIPE) as proc: try: with open_url( proc.stdout.readline().decode().rstrip('\n') + 'hello') as in_f: self.assertEqual(b'world', in_f.read()) finally: proc.kill()
def test_explicit_update(self): with temp_dir() as td: db_path = td / 'db.bzl' self._write_bzl_db( db_path, { 'p1': { 'tik': { 'foo': 'bar' } }, # replaced 'p2': { 'tok': { 'a': 'b' } }, # preserved }) self._main([ '--db', db_path.decode(), '--replace', 'p1', 'tik', '{"choo": "choo"}', '--create', 'p2', 'tak', '{"boo": "hoo"}', '--create', 'never', 'seen', '{"oompa": "loompa"}', '--no-update-existing', ]) self._check_file( db_path, '# ' + _GENERATED + textwrap.dedent(''' \ SignedSource<<37820c384800aad6bf6ebe97f7e7c1a1>> # Update via `how` package_db = { "never": { "seen": { "oompa": "loompa", }, }, "p1": { "tik": { "choo": "choo", }, }, "p2": { "tak": { "boo": "hoo", }, "tok": { "a": "b", }, }, } '''))
def test_mutable_rpm(self): with tempfile.NamedTemporaryFile() as tmp_db, temp_dir( ) as storage_dir: good_res, = list( self._make_downloader_from_ctx("0/good_dog", tmp_db, storage_dir)()) _, good_snapshot = good_res self._check_snapshot(good_snapshot, _GOOD_DOG_LOCATIONS) bad_res, = list( self._make_downloader_from_ctx("0/bad_dog", tmp_db, storage_dir)()) _, bad_snapshot = bad_res bad_location, = _BAD_DOG.locations("bad_dog") self._check_snapshot(bad_snapshot, [bad_location], has_errors=True) # Check that the MutableRpmError is populated correctly. (error, bad_rpm), = bad_snapshot.storage_id_to_rpm.items() self.assertIsInstance(error, MutableRpmError) error_dict = dict(error.args) self.assertEqual(bad_location, bad_rpm.location) self.assertEqual(bad_location, error_dict["location"]) self.assertRegex(error_dict["storage_id"], "^test:") self.assertEqual(str(bad_rpm.canonical_checksum), error_dict["checksum"]) good_rpm, = [ r for r in good_snapshot.storage_id_to_rpm.values() if "-milk-" in r.location ] self.assertEqual([str(good_rpm.canonical_checksum)], error_dict["other_checksums"]) # Now, even fetching `good_dog` will show a mutable_rpm error. self._check_snapshot( list( self._make_downloader_from_ctx("0/good_dog", tmp_db, storage_dir)())[0][1], _GOOD_DOG_LOCATIONS, has_errors=True, ) # But using the "deleted_mutable_rpms" facility, we can forget # about the error. with mock.patch( SUT + "rpm_downloader.deleted_mutable_rpms", new={ os.path.basename(bad_rpm.location): {bad_rpm.canonical_checksum} }, ): self._check_snapshot( list( self._make_downloader_from_ctx("0/good_dog", tmp_db, storage_dir)())[0][1], _GOOD_DOG_LOCATIONS, has_errors=True, )
def test_out_of_subvol_symlink(self): with temp_dir() as td: os.symlink('/dev/null', td / 'my_null') self.assertEqual( td / 'my_null', Subvol(td).path('my_null', no_dereference_leaf=True), ) with self.assertRaisesRegex(AssertionError, 'outside the subvol'): Subvol(td).path('my_null')
def test_interposed_rename(self): with temp_dir() as td: shadow_td = self._shadow / td.lstrip(b'/') os.makedirs(shadow_td) # Good case: a file gets renamed with create_ro(td / 'gets_moved', 'w') as f: f.write('i become shadow') for d in [td, shadow_td]: with create_ro(d / 'ok_dest', 'w') as f: f.write('unmodified') self._check_file_contents([ (td / 'gets_moved', 'i become shadow'), (td / 'ok_dest', 'unmodified'), (shadow_td / 'ok_dest', 'unmodified'), ]) with _capture_fd(2) as res: self.assertEqual( 0, self._rename(td / 'gets_moved', td / 'ok_dest')) self.assertEqual( f'`rename({td}/gets_moved, {td}/ok_dest)` will replace ' + f'shadowed original `{shadow_td}/ok_dest`\n', res.contents.decode(), ) self.assertFalse(os.path.exists(td / 'gets_moved')) self._check_file_contents([ (td / 'ok_dest', 'unmodified'), (shadow_td / 'ok_dest', 'i become shadow'), ]) # Normal case: destination lacks a shadow counterpart with create_ro(td / 'also_moved', 'w') as f: f.write('no shadow for me') with create_ro(td / 'unshadowed', 'w') as f: f.write('unmodified') self._check_file_contents([ (td / 'also_moved', 'no shadow for me'), (td / 'unshadowed', 'unmodified'), ]) with _capture_fd(2) as res: self.assertEqual( 0, self._rename(td / 'also_moved', td / 'unshadowed')) self.assertEqual(b'', res.contents) self.assertFalse(os.path.exists(td / 'also_moved')) self._check_file_contents([ (td / 'unshadowed', 'no shadow for me'), ])
def build_rpm(package_dir: Path, arch: str, rpm: Rpm, gpg_signing_key: str) -> Path: 'Returns the filename of the built RPM.' with temp_dir(dir=package_dir) as td, tempfile.NamedTemporaryFile() as tf, \ Path.resource(__package__, 'busybox', exe=True) as busybox_path: tf.write(rpm.spec(busybox_path).encode()) tf.flush() work_dir = Path(generate_work_dir()) format_kwargs = { "quoted_arch": shlex.quote(arch), "quoted_buildroot": Path(work_dir / 'build').shell_quote(), "quoted_home": Path(work_dir / 'home').shell_quote(), "quoted_spec_file": shlex.quote(tf.name), "quoted_work_dir": work_dir.shell_quote(), # We get the uid of the current user so that we can chown the # work_dir *inside* the running container. The nspawn'd build # appliance container needs to run as root so that it can mkdir # the `work_dir` which exists at /. If we don't chown the # resulting tree that `rpmbuild` creates the rename would would # fail. "current_uid": os.getuid(), } opts = new_nspawn_opts( cmd=[ 'sh', '-uec', '''\ /usr/bin/rpmbuild \ -bb \ --target {quoted_arch} \ --buildroot {quoted_buildroot} \ {quoted_spec_file} \ && chown -R {current_uid} {quoted_work_dir} \ '''.format(**format_kwargs), ], layer=_build_appliance(), bindmount_ro=[(tf.name, tf.name), (busybox_path, busybox_path)], bindmount_rw=[(td, work_dir)], user=pwd.getpwnam('root'), setenv=['HOME={quoted_home}'.format(**format_kwargs)], ) run_non_booted_nspawn(opts, PopenArgs()) # `rpmbuild` has a non-configurable output layout, so # we'll move the resulting rpm into our package dir. rpms_dir = td / 'home/rpmbuild/RPMS' / arch rpm_name, = rpms_dir.listdir() os.rename(rpms_dir / rpm_name, package_dir / rpm_name) sign_rpm(package_dir / rpm_name, gpg_signing_key) return rpm_name
def test_download_evolving_multi_repos(self): with tempfile.NamedTemporaryFile() as tmp_db, temp_dir( ) as storage_dir: snap_dog, snap_cat, snap_dog2, snap_cat2 = ( # Downloading a repo twice in a row should always be a # no-op, so we do that for all repos here just in case. self._download_repo_twice(repo, step_and_repo, tmp_db, storage_dir) for repo, step_and_repo in [ (_GOOD_DOG, "0/good_dog"), (_CHAOS_CAT, "0/chaos_cat"), (_GOOD_DOG2, "1/good_dog"), (_CHAOS_CAT2, "1/chaos_cat"), ]) # dog & dog2 agree except for carrot + sausage. They are the # same repo at different points in time, so even the `location` # fields of `Rpm`s agree. for sid, rpm in snap_dog.storage_id_to_rpm.items(): if "-carrot-" not in rpm.location: self.assertEqual(rpm, snap_dog2.storage_id_to_rpm[sid]) for sid, rpm in snap_dog2.storage_id_to_rpm.items(): if "-sausage-" not in rpm.location: self.assertEqual(rpm, snap_dog.storage_id_to_rpm[sid]) self.assertEqual(len(snap_dog.storage_id_to_rpm), len(snap_dog2.storage_id_to_rpm)) # cat consists of sausage 3b, which also occurs in dog2 (sid_3b, rpm_3b), = snap_cat.storage_id_to_rpm.items() self.assertEqual( _location_basename(rpm_3b), _location_basename(snap_dog2.storage_id_to_rpm[sid_3b]), ) # The remaining 4 of 6 pairs have no overlap in storage IDs or RPMs for a_snap, b_snap in [ (snap_dog, snap_cat), (snap_dog, snap_cat2), (snap_dog2, snap_cat2), (snap_cat, snap_cat2), ]: a = a_snap.storage_id_to_rpm b = b_snap.storage_id_to_rpm self.assertEqual(set(), set(a.keys()) & set(b.keys())) self.assertEqual( set(), {_location_basename(r) for r in a.values()} & {_location_basename(r) for r in b.values()}, )
def test_snapshot(self): with temp_repos.temp_repos_steps( gpg_signing_key=temp_repos.get_test_signing_key(), repo_change_steps=[{'dog': temp_repos.SAMPLE_STEPS[0]['dog']}] ) as repos_root, temp_dir() as td: with open(td / 'fake_gpg_key', 'w'): pass whitelist_dir = td / 'gpg_whitelist' os.mkdir(whitelist_dir) shutil.copy(td / 'fake_gpg_key', whitelist_dir) storage_dict = { 'key': 'test', 'kind': 'filesystem', 'base_dir': td / 'storage', } snapshot_repo([ '--repo-universe=fakeverse', '--repo-name=dog', '--repo-url=' + (repos_root / "0/dog").file_url(), f'--gpg-key-whitelist-dir={whitelist_dir}', '--gpg-url=' + (td / 'fake_gpg_key').file_url(), f'--snapshot-dir={td / "snap"}', f'--storage={Path.json_dumps(storage_dict)}', '--db=' + Path.json_dumps({ 'kind': 'sqlite', 'db_path': td / 'db.sqlite3', }), '--threads=4', ]) # This test simply checks the overall integration, so we don't # bother looking inside the DB or Storage, or inspecting the # details of the snapshot -- those should all be covered by # lower-level tests. with sqlite3.connect(RepoSnapshot.fetch_sqlite_from_storage( Storage.make(**storage_dict), td / 'snap', td / 'snapshot.sql3', )) as db: self.assertEqual({ 'dog-pkgs/rpm-test-carrot-2-rc0.x86_64.rpm', 'dog-pkgs/rpm-test-mice-0.1-a.x86_64.rpm', 'dog-pkgs/rpm-test-milk-1.41-42.x86_64.rpm', 'dog-pkgs/rpm-test-mutable-a-f.x86_64.rpm', }, { path for path, in db.execute( 'SELECT "path" FROM "rpm";' ).fetchall() })
def test_temp_file_error(self): with temp_dir() as td: path = td / 'dog' with open(path, 'w') as outfile: outfile.write('woof') with self.assertRaisesRegex(RuntimeError, '^woops$'): with updb._populate_temp_file_and_rename(path) as outfile: outfile.write('meow') tmp_path = outfile.name raise RuntimeError('woops') # Potentially can race with another tempfile creation, but this # should be vanishingly unlikely. self.assertFalse(os.path.exists(tmp_path)) # Importantly, the original file is untouched. self._check_file(td / 'dog', 'woof')
def test_default_update(self): with temp_dir() as td: db_path = td / 'db.bzl' self._write_bzl_db(db_path, {'pkg': {'tag': {'foo': 'bar'}}}) self._main(['--db', db_path.decode()]) self._check_file( db_path, '# ' + _GENERATED + textwrap.dedent(''' \ SignedSource<<69d45bae7b77e0bd2ee0d5a285d6fdb3>> # Update via `how` package_db = { "pkg": { "tag": { "x": "z", }, }, } '''))
def mock(*args, **kwargs): with temp_dir() as td, Path.resource( __package__, 'mock-s3-cli', exe=True, ) as mock_s3_cli_path: # We mock `_path_for_storage_id` such that the base dir # is always going to be the TempDir we created def _mock_path_for_storage_id(sid): return (td / sid).decode() # Instead of calls to `aws s3`, we want to call `mock-s3-cli` with unittest.mock.patch.object( S3Storage, '_base_cmd', return_value=[mock_s3_cli_path], ), unittest.mock.patch.object( S3Storage, '_path_for_storage_id', side_effect=_mock_path_for_storage_id, ): return fn(*args, **kwargs)
def test_install_rpm(self): snapshot_dir = snapshot_install_dir( '//fs_image/rpm:repo-snapshot-for-tests') for prog in ('dnf', 'yum'): with temp_dir() as td: os.mkdir(td / 'meta') subprocess.check_call([ snapshot_dir / prog / 'bin' / prog, f'--installroot={td}', 'install', '--assumeyes', 'rpm-test-carrot', ]) # We don't need a full rendered subvol test, since the # contents of the filesystem is checked by other tests. # (e.g. `test-yum-dnf-from-snapshot`, `test-image-layer`) with open(td / 'rpm_test/carrot.txt') as infile: self.assertEqual('carrot 2 rc0\n', infile.read())
def _dummies_for_protected_paths( protected_paths: Iterable[str], ) -> Mapping[Path, Path]: ''' Some locations (some host yum/dnf directories, and install root /meta/ and mountpoints) should be off-limits to writes by RPMs. We enforce that by bind-mounting an empty file or directory on top of each one. ''' with temp_dir() as td, tempfile.NamedTemporaryFile() as tf: # NB: There may be duplicates in protected_paths, so we normalize. # If the duplicates include both a file and a directory, this picks # one arbitrarily, and if the type on disk is different, we will # fail at mount time. This doesn't seem worth an explicit check. yield { Path(p).normpath(): (td if p.endswith('/') else Path(tf.name)) for p in protected_paths } # NB: The bind mount is read-only, so this is just paranoia. If it # were left RW, we'd need to check its owner / permissions too. for expected, actual in (([], td.listdir()), (b'', tf.read())): assert expected == actual, \ f'Some RPM wrote {actual} to {protected_paths}'
def test_install_file(self): with tempfile.NamedTemporaryFile() as tf: os.chmod(tf.name, stat.S_IXUSR) exe_item = _install_file_item( from_target='t', source={'source': tf.name}, dest='d/c', ) ep = _InstallablePath(Path(tf.name), ProvidesFile(path='d/c'), 'a+rx') self.assertEqual((ep, ), exe_item.paths) self.assertEqual(tf.name.encode(), exe_item.source) self._check_item(exe_item, {ep.provides}, {require_directory('d')}) # Checks `image.source(path=...)` with temp_dir() as td: os.mkdir(td / 'b') open(td / 'b/q', 'w').close() data_item = _install_file_item( from_target='t', source={ 'source': td, 'path': '/b/q' }, dest='d', ) dp = _InstallablePath(td / 'b/q', ProvidesFile(path='d'), 'a+r') self.assertEqual((dp, ), data_item.paths) self.assertEqual(td / 'b/q', data_item.source) self._check_item(data_item, {dp.provides}, {require_directory('/')}) # NB: We don't need to get coverage for this check on ALL the items # because the presence of the ProvidesDoNotAccess items it the real # safeguard -- e.g. that's what prevents TarballItem from writing # to /meta/ or other protected paths. with self.assertRaisesRegex(AssertionError, 'cannot start with meta/'): _install_file_item( from_target='t', source={'source': 'a/b/c'}, dest='/meta/foo', )
def test_update_shadowed(self): with temp_dir() as root, mock.patch.object( # Note that the shadowed root is under the install root, since # the `rename` runs under chroot. yum_dnf_from_snapshot, 'SHADOWED_PATHS_ROOT', Path('/shadow'), ): os.mkdir(root / 'meta') os.mkdir(root / 'rpm_test') os.makedirs(root / 'shadow/rpm_test') to_shadow = root / 'rpm_test/carrot.txt' replacement = root / 'rpm_test/shadows_carrot.txt' shadowed_original = root / 'shadow/rpm_test/carrot.txt' # Our shadowing setup is supposed to have moved the original here. with create_ro(shadowed_original, 'w') as outfile: outfile.write('yum/dnf overwrites this') with self._set_up_shadow(replacement, to_shadow): with open(to_shadow) as infile: self.assertEqual('shadows carrot', infile.read()) with open(shadowed_original) as infile: self.assertEqual('yum/dnf overwrites this', infile.read()) yum_dnf_from_snapshot.yum_dnf_from_snapshot( yum_dnf=self._YUM_DNF, snapshot_dir=_SNAPSHOT_DIR, protected_paths=[], yum_dnf_args=[ f'--installroot={root}', 'install', '--assumeyes', 'rpm-test-carrot', ], ) # The shadow is still in place with open(to_shadow) as infile: self.assertEqual('shadows carrot', infile.read()) # But we updated the shadowed file with open(shadowed_original) as infile: self.assertEqual('carrot 2 rc0\n', infile.read())
def test_visitor_matches_snapshot(self): class Visitor: def __init__(self): self.repomd = None self.rpms = set() self.repodatas = set() def visit_repomd(self, repomd): assert self.repomd is None self.repomd = repomd def visit_repodata(self, repodata): assert repodata not in self.repodatas self.repodatas.add(repodata) def visit_rpm(self, rpm): assert rpm not in self.rpms self.rpms.add(rpm) with tempfile.NamedTemporaryFile() as tmp_db, temp_dir( ) as storage_dir: partial_downloader = partial(self._make_downloader_from_ctx, "0/good_dog", tmp_db, storage_dir) visitor_all = Visitor() res, = list(partial_downloader()(visitors=[visitor_all])) repo, snapshot = res self._check_visitors_match_snapshot([visitor_all], snapshot) visitors = [Visitor() for _ in range(10)] repo_snapshots = [] for shard, visitor in enumerate(visitors): repo_snapshots += partial_downloader( RpmShard(shard, len(visitors)))(visitors=[visitor]) # It's about 1:1000 that all 3 RPMs end up in one shard, and # since this test is deterministic, it won't be flaky. self.assertGreater( sum(bool(s.storage_id_to_rpm) for _, s in repo_snapshots), 1) self._check_visitors_match_snapshot( visitors, self._reduce_equal_snapshots(repo_snapshots))
def test_conf(self): install_dir = '/INSTALL/DIR' prog_name = self._YUM_DNF.value expected_out = _CONF_OUT.format( prog_name=prog_name, extra_directives=textwrap.dedent('''\ skip_missing_names_on_install = 0 skip_missing_names_on_update = 0 ''') if self._YUM_DNF == YumDnf.yum else '', ) with temp_dir() as td: with create_ro(td / 'in', 'w') as outf: outf.write(_CONF_IN) wydc.main([ f'--rpm-installer={self._YUM_DNF.value}', f'--input-conf={td / "in"}', f'--output-dir={td / "out"}', f'--install-dir={install_dir}', '--repo-server-ports=1234 5678', ]) with open(td / f'out/{prog_name}/{prog_name}.conf') as infile: self.assertEqual(expected_out, infile.read())
def test_dangling_repodatas(self): orig_rd = _download_repodata with mock.patch.object( RepoDBContext, "store_repomd") as mock_store, mock.patch( SUT + "repodata_downloader._download_repodata" ) as mock_rd, tempfile.NamedTemporaryFile( ) as tmp_db, temp_dir() as storage_dir: db_cfg = { "kind": "sqlite", "db_path": tmp_db.name, "readonly": False } mock_rd.side_effect = orig_rd mock_store.side_effect = RuntimeError with self.assertRaises(RuntimeError): next( self._make_downloader_from_ctx("0/good_dog", tmp_db, storage_dir)()) # Get the repodatas that the mocked fn was passed called_rds = [x[0][0] for x in mock_rd.call_args_list] db_conn = DBConnectionContext.from_json(db_cfg) db_ctx = RepoDBContext(db_conn, db_conn.SQL_DIALECT) repodata_table = RepodataTable() with db_ctx as repo_db_ctx: storage_ids = [ repo_db_ctx.get_storage_id(repodata_table, rd) for rd in called_rds ] # All of these repodatas got stored in the db self.assertEqual(len(called_rds), len(storage_ids)) # Now ensure that no repomds got inserted (i.e. nothing to # reference the above repodatas) mock_store.assert_called_once() with db_ctx as repo_db_ctx: with repo_db_ctx._cursor() as cursor: cursor.execute("SELECT COUNT(*) from repo_metadata") res = cursor.fetchone() self.assertEqual(0, res[0])
def test_rpm_metadata_from_file(self): with temp_repos_steps( gpg_signing_key=get_test_signing_key(), repo_change_steps=[{ 'repo': Repo([Rpm('sheep', '0.3.5.beta', 'l33t.deadbeef.777')]), }], ) as repos_root, temp_dir() as td: src_rpm_path = repos_root / ('0/repo/repo-pkgs/' + 'rpm-test-sheep-0.3.5.beta-l33t.deadbeef.777.x86_64.rpm') dst_rpm_path = td / 'arbitrary_unused_name.rpm' shutil.copy(src_rpm_path, dst_rpm_path) a = RpmMetadata.from_file(dst_rpm_path) self.assertEqual(a.name, 'rpm-test-sheep') self.assertEqual(a.epoch, 0) self.assertEqual(a.version, '0.3.5.beta') self.assertEqual(a.release, 'l33t.deadbeef.777') # non-existent file with self.assertRaisesRegex(RuntimeError, '^Error querying RPM:'): a = RpmMetadata.from_file(b'idontexist.rpm') # missing extension with self.assertRaisesRegex(ValueError, ' needs to end with .rpm$'): a = RpmMetadata.from_file(b'idontendwithdotrpm')
def test_serialize_and_visit(self): repodata = Repodata( location='repodata_loc', checksum=Checksum('a', 'b'), size=123, build_timestamp=456, ) repomd = RepoMetadata( xml=b'foo', # This object is only as populated as this test requires, in # practice these would not be None. fetch_timestamp=None, repodatas=None, checksum=None, size=None, build_timestamp=7654321, ) rpm_base = Rpm( # Reuse this template for all RPMs epoch=37, name='foo-bar', version='3.14', release='rc0', arch='i386', build_timestamp=90, canonical_checksum=Checksum('e', 'f'), checksum=Checksum('c', 'd'), location=None, # `_replace`d below size=78, source_rpm='foo-bar-3.14-rc0.src.rpm', ) rpm_normal = rpm_base._replace(location='normal.rpm') rpm_file_integrity = rpm_base._replace(location='file_integrity_error') rpm_http = rpm_base._replace(location='http_error') rpm_mutable = rpm_base._replace(location='mutable_rpm_error') error_file_integrity = FileIntegrityError( location=rpm_file_integrity.location, failed_check='size', expected=42, actual=7, ) error_http = HTTPError(location=rpm_http.location, http_status=404) error_mutable_rpm = MutableRpmError( location=rpm_mutable.location, storage_id='rpm_mutable_sid', checksum=Checksum('g', 'h'), other_checksums={Checksum('i', 'j'), Checksum('k', 'l')}, ) snapshot = RepoSnapshot( repomd=repomd, storage_id_to_repodata={'repodata_sid': repodata}, storage_id_to_rpm={ 'rpm_normal_sid': rpm_normal, error_file_integrity: rpm_file_integrity, error_http: rpm_http, error_mutable_rpm: rpm_mutable, }, ) # Check the `to_sqlite` serialization with temp_dir() as td: storage = FilesystemStorage(key='test', base_dir=td / 'storage') os.mkdir(td / 'snapshot') with RepoSnapshot.add_sqlite_to_storage( storage, td / 'snapshot', ) as db: snapshot.to_sqlite('fake_repo', db) with sqlite3.connect( RepoSnapshot.fetch_sqlite_from_storage( storage, td / 'snapshot', td / 'snapshot.sql3')) as db: self.assertEqual([{ 'repo': 'fake_repo', 'metadata_xml': repomd.xml.decode(), 'build_timestamp': repomd.build_timestamp, }], _get_db_rows(db, 'repomd')) self.assertEqual([{ 'repo': 'fake_repo', 'path': 'repodata_loc', 'build_timestamp': 456, 'checksum': 'a:b', 'error': None, 'error_json': None, 'size': 123, 'storage_id': 'repodata_sid', }], _get_db_rows(db, 'repodata')) base_row = { 'repo': 'fake_repo', 'epoch': rpm_base.epoch, 'name': rpm_base.name, 'version': rpm_base.version, 'release': rpm_base.release, 'arch': rpm_base.arch, 'build_timestamp': rpm_base.build_timestamp, 'checksum': str(rpm_base.best_checksum()), 'size': rpm_base.size, 'source_rpm': rpm_base.source_rpm, } self.assertEqual( sorted( json.dumps(row, sort_keys=True) for row in [ { **base_row, 'path': rpm_normal.location, 'error': None, 'error_json': None, 'storage_id': 'rpm_normal_sid', }, { **base_row, 'path': rpm_file_integrity.location, 'error': 'file_integrity', 'error_json': json.dumps( { 'message': error_file_integrity.to_dict() ['message'], 'location': rpm_file_integrity.location, 'failed_check': 'size', # These are stringified because they might have # been checksums... seems OK for now. 'expected': '42', 'actual': '7', }, sort_keys=True), 'storage_id': None, }, { **base_row, 'path': rpm_http.location, 'error': 'http', 'error_json': json.dumps( { 'message': error_http.to_dict() ['message'], 'location': rpm_http.location, 'http_status': 404, }, sort_keys=True), 'storage_id': None, }, { **base_row, 'path': rpm_mutable.location, 'error': 'mutable_rpm', 'error_json': json.dumps( { 'message': error_mutable_rpm.to_dict()['message'], 'location': rpm_mutable.location, 'checksum': 'g:h', 'other_checksums': ['i:j', 'k:l'], }, sort_keys=True), 'storage_id': 'rpm_mutable_sid', } ]), sorted( json.dumps(row, sort_keys=True) for row in _get_db_rows(db, 'rpm'))) # Check the visitor mock = unittest.mock.MagicMock() snapshot.visit(mock) mock.visit_repomd.assert_called_once_with(repomd) mock.visit_repodata.assert_called_once_with(repodata) rpm_calls = set() for name, args, kwargs in mock.visit_rpm.mock_calls: self.assertEqual('', name) self.assertEqual({}, kwargs) self.assertEqual(1, len(args)) rpm_calls.add(args[0]) self.assertEqual( rpm_calls, {rpm_normal, rpm_file_integrity, rpm_http, rpm_mutable}, )