Exemplo n.º 1
0
 def in_shard(self, rpm):
     # Our contract is that the RPM filename is the global primary key,
     #
     # We use the last 8 bytes of SHA1, since we need a deterministic
     # hash for parallel downloads, and Python standard library lacks
     # fast non-cryptographic hashes like CityHash or SpookyHashV2.
     # adler32 is faster, but way too collision-prone to bother.
     h, = _UINT64_STRUCT.unpack_from(
         hashlib.sha1(byteme(rpm.filename())).digest(), 12)
     return h % self.modulo == self.shard
Exemplo n.º 2
0
 def __init__(self, path: AnyStr, already_exists=False):
     '''
     `Subvol` can represent not-yet-created subvolumes.  Unless
     already_exists=True, you must call create() or snapshot() to
     actually make the subvolume.
     '''
     self._path = os.path.abspath(byteme(path))
     self._exists = already_exists
     if self._exists and not _path_is_btrfs_subvol(self._path):
         raise AssertionError(f'No btrfs subvol at {self._path}')
Exemplo n.º 3
0
 def __init__(
     self,
     unshare: Optional[Unshare],
     image_path: bytes,
     size_bytes: int,
 ):
     self._unshare = unshare
     self._temp_dir_ctx = tempfile.TemporaryDirectory()  # noqa: P201
     self._size_bytes = size_bytes
     self._image_path = byteme(os.path.abspath(image_path))
     self._temp_dir: Optional[bytes] = None
     self._mount_dir: Optional[bytes] = None
Exemplo n.º 4
0
    def path(
        self,
        path_in_subvol: AnyStr = b'.',
        *,
        no_dereference_leaf=False,
    ) -> bytes:
        '''
        The only safe way to access paths inside the subvolume.  Do NOT
        `os.path.join(subvol.path('a/path'), 'more/path')`, since that skips
        crucial safety checks.  Instead: `subvol.path(os.path.join(...))`.

        This code has checks to mitigate two risks:
          - `path_in_subvol` is relative, and exits the subvolume via '..'
          - Some component of the path is a symlink, and this symlink, when
            interpreted by a non-chrooted tool, will attempt to access
            something outside of the subvolume.

        At present, the above check fail on attempting to traverse an
        in-subvolume symlink that is an absolute path to another directory
        within the subvolume, but support could easily be added.  It is not
        supported now because at present, I believe that the right idiom is
        to encourage image authors to manipulate the "real" locations of
        files, and not to manipulate paths through symlinks.

        In the rare case that you need to manipulate a symlink itself (e.g.
        remove or rename), you will want to pass `no_dereference_leaf`.

        Future: consider using a file descriptor to refer to the subvolume
        root directory to better mitigate races due to renames in its path.
        '''
        # The `btrfs` CLI is not very flexible, so it will try to name a
        # subvol '.' if we do not normalize `/subvol/.`.
        result_path = os.path.normpath(
            os.path.join(
                self._path,
                # Without the lstrip, we would lose the subvolume prefix if the
                # supplied path is absolute.
                byteme(path_in_subvol).lstrip(b'/'),
            ))
        # Paranoia: Make sure that, despite any symlinks in the path, the
        # resulting path is not outside of the subvolume root.
        #
        # NB: This will prevent us from even accessing symlinks created
        # inside the subvolume.  To fix this, we should add an OPTION not to
        # follow the LAST component of the path.
        root_relative = os.path.relpath((os.path.join(
            os.path.realpath(os.path.dirname(result_path)),
            os.path.basename(result_path),
        ) if no_dereference_leaf else os.path.realpath(result_path)),
                                        os.path.realpath(self._path))
        if root_relative.startswith(b'../') or root_relative == b'..':
            raise AssertionError(f'{path_in_subvol} is outside the subvol')
        return result_path
Exemplo n.º 5
0
 def __enter__(self) -> 'LoopbackVolume':
     self._temp_dir = byteme(os.path.abspath(
         self._temp_dir_ctx.__enter__()))
     try:
         self._size_bytes = _format_image_file(self._image_path,
                                               self._size_bytes)
         self._mount_dir = os.path.join(self._temp_dir, b'volume')
         os.mkdir(self._mount_dir)
         self._loop_dev = _mount_image_file(
             self._unshare,
             self._image_path,
             self._mount_dir,
         )
     except BaseException:
         self.__exit__(*sys.exc_info())
         raise
     return self
Exemplo n.º 6
0
    def store_repomd(
        self,
        universe: str,
        repo: str,
        repomd: RepoMetadata,
    ) -> int:
        'Returns the inserted `fetch_timestamp`, ours or from a racing writer'
        validate_universe_name(universe)
        with self._cursor() as cursor:
            fts = repomd.fetch_timestamp
            bts = repomd.build_timestamp
            checksum = str(repomd.checksum)
            repomd_xml = byteme(repomd.xml)

            # Future: We could start with a sanity check like below.  I'm
            # not sure of its value, though, and it would slow us down.
            #
            #     for repodata in repomd.repodatas:
            #         assert repodata.checksum() in DB
            p = self._placeholder()
            cursor.execute(
                f'''
                INSERT {self._or_ignore()} INTO `repo_metadata` (
                    `universe`, `repo`, `fetch_timestamp`,
                    `build_timestamp`, `checksum`, `xml`
                ) VALUES ({p}, {p}, {p}, {p}, {p}, {p});
            ''', (universe, repo, fts, bts, checksum, repomd_xml))
            if cursor.rowcount:
                return fts  # Our timestamp was the one that got inserted.

            # We lost the race, so ensure the prior data agrees with ours.
            # We don't need to check `build_timestamp`, it comes from `xml`.
            cursor.execute(
                f'''
                SELECT `fetch_timestamp`, `xml` FROM `repo_metadata`
                WHERE (`universe` = {p} AND `repo` = {p} AND `checksum` = {p});
            ''', (universe, repo, checksum))
            (db_fts, db_repomd_xml), = cursor.fetchall()
            # Allow a generous 1 minute of clock skew
            assert fts + 60 >= db_fts, f'{fts} + 60 < {db_fts}'
            assert repomd_xml == db_repomd_xml, f'{repomd_xml} {db_repomd_xml}'
            return db_fts