Пример #1
0
 def load(self, id):
     self.id = id
     data = self.key.decrypt(self.id, self.repository.get(self.id))
     self.metadata = msgpack.unpackb(data)
     if self.metadata[b'version'] != 1:
         raise Exception('Unknown archive metadata version')
     decode_dict(self.metadata, (b'name', b'hostname', b'username', b'time'))
     self.metadata[b'cmdline'] = [arg.decode('utf-8', 'surrogateescape') for arg in self.metadata[b'cmdline']]
     self.name = self.metadata[b'name']
Пример #2
0
 def load(self, id):
     self.id = id
     data = self.key.decrypt(self.id, self.repository.get(self.id))
     self.metadata = msgpack.unpackb(data)
     if self.metadata[b"version"] != 1:
         raise Exception("Unknown archive metadata version")
     decode_dict(self.metadata, (b"name", b"hostname", b"username", b"time"))
     self.metadata[b"cmdline"] = [arg.decode("utf-8", "surrogateescape") for arg in self.metadata[b"cmdline"]]
     self.name = self.metadata[b"name"]
Пример #3
0
 def load(self, id):
     self.id = id
     data = self.key.decrypt(self.id, self.repository.get(self.id))
     self.metadata = msgpack.unpackb(data)
     if self.metadata[b'version'] != 1:
         raise Exception('Unknown archive metadata version')
     decode_dict(self.metadata, (b'name', b'hostname', b'username', b'time'))
     self.metadata[b'cmdline'] = [arg.decode('utf-8', 'surrogateescape') for arg in self.metadata[b'cmdline']]
     self.name = self.metadata[b'name']
Пример #4
0
 def unpack_many(self, ids, filter=None, preload=False):
     unpacker = msgpack.Unpacker(use_list=False)
     for data in self.fetch_many(ids):
         unpacker.feed(data)
         items = [decode_dict(item, (b'path', b'source', b'user', b'group')) for item in unpacker]
         if filter:
             items = [item for item in items if filter(item)]
         if preload:
             for item in items:
                 if b'chunks' in item:
                     self.repository.preload([c[0] for c in item[b'chunks']])
         for item in items:
             yield item
Пример #5
0
 def unpack_many(self, ids, filter=None, preload=False):
     unpacker = msgpack.Unpacker(use_list=False)
     for data in self.fetch_many(ids):
         unpacker.feed(data)
         items = [decode_dict(item, (b"path", b"source", b"user", b"group")) for item in unpacker]
         if filter:
             items = [item for item in items if filter(item)]
         if preload:
             for item in items:
                 if b"chunks" in item:
                     self.repository.preload([c[0] for c in item[b"chunks"]])
         for item in items:
             yield item
Пример #6
0
 def unpack_many(self, ids, filter=None, preload=False):
     unpacker = msgpack.Unpacker(use_list=False)
     for data in self.fetch_many(ids):
         unpacker.feed(data)
         items = [decode_dict(item, (b'path', b'source', b'user', b'group')) for item in unpacker]
         if filter:
             items = [item for item in items if filter(item)]
         if preload:
             for item in items:
                 if b'chunks' in item:
                     self.repository.preload([c[0] for c in item[b'chunks']])
         for item in items:
             yield item
Пример #7
0
    def rebuild_refcounts(self):
        """Rebuild object reference counts by walking the metadata

        Missing and/or incorrect data is repaired when detected
        """
        # Exclude the manifest from chunks
        del self.chunks[Manifest.MANIFEST_ID]

        def mark_as_possibly_superseded(id_):
            if self.chunks.get(id_, (0,))[0] == 0:
                self.possibly_superseded.add(id_)

        def add_callback(chunk):
            id_ = self.key.id_hash(chunk)
            cdata = self.key.encrypt(chunk)
            add_reference(id_, len(chunk), len(cdata), cdata)
            return id_

        def add_reference(id_, size, csize, cdata=None):
            try:
                count, _, _ = self.chunks[id_]
                self.chunks[id_] = count + 1, size, csize
            except KeyError:
                assert cdata is not None
                self.chunks[id_] = 1, size, csize
                if self.repair:
                    self.repository.put(id_, cdata)

        def verify_file_chunks(item):
            """Verifies that all file chunks are present

            Missing file chunks will be replaced with new chunks of the same
            length containing all zeros.
            """
            offset = 0
            chunk_list = []
            for chunk_id, size, csize in item[b'chunks']:
                if not chunk_id in self.chunks:
                    # If a file chunk is missing, create an all empty replacement chunk
                    self.report_progress('{}: Missing file chunk detected (Byte {}-{})'.format(item[b'path'].decode('utf-8', 'surrogateescape'), offset, offset + size), error=True)
                    data = bytes(size)
                    chunk_id = self.key.id_hash(data)
                    cdata = self.key.encrypt(data)
                    csize = len(cdata)
                    add_reference(chunk_id, size, csize, cdata)
                else:
                    add_reference(chunk_id, size, csize)
                chunk_list.append((chunk_id, size, csize))
                offset += size
            item[b'chunks'] = chunk_list

        def robust_iterator(archive):
            """Iterates through all archive items

            Missing item chunks will be skipped and the msgpack stream will be restarted
            """
            unpacker = RobustUnpacker(lambda item: isinstance(item, dict) and b'path' in item)
            _state = 0
            def missing_chunk_detector(chunk_id):
                nonlocal _state
                if _state % 2 != int(not chunk_id in self.chunks):
                    _state += 1
                return _state
            for state, items in groupby(archive[b'items'], missing_chunk_detector):
                items = list(items)
                if state % 2:
                    self.report_progress('Archive metadata damage detected', error=True)
                    continue
                if state > 0:
                    unpacker.resync()
                for chunk_id, cdata in zip(items, repository.get_many(items)):
                    unpacker.feed(self.key.decrypt(chunk_id, cdata))
                    for item in unpacker:
                        yield item

        if isinstance(self.repository, RemoteRepository):
            repository = RepositoryCache(self.repository)
        else:
            repository = self.repository

        num_archives = len(self.manifest.archives)
        for i, (name, info) in enumerate(list(self.manifest.archives.items()), 1):
            self.report_progress('Analyzing archive {} ({}/{})'.format(name, i, num_archives))
            archive_id = info[b'id']
            if not archive_id in self.chunks:
                self.report_progress('Archive metadata block is missing', error=True)
                del self.manifest.archives[name]
                continue
            mark_as_possibly_superseded(archive_id)
            cdata = self.repository.get(archive_id)
            data = self.key.decrypt(archive_id, cdata)
            archive = StableDict(msgpack.unpackb(data))
            if archive[b'version'] != 1:
                raise Exception('Unknown archive metadata version')
            decode_dict(archive, (b'name', b'hostname', b'username', b'time'))  # fixme: argv
            items_buffer = ChunkBuffer(self.key)
            items_buffer.write_chunk = add_callback
            for item in robust_iterator(archive):
                if b'chunks' in item:
                    verify_file_chunks(item)
                items_buffer.add(item)
            items_buffer.flush(flush=True)
            for previous_item_id in archive[b'items']:
                mark_as_possibly_superseded(previous_item_id)
            archive[b'items'] = items_buffer.chunks
            data = msgpack.packb(archive, unicode_errors='surrogateescape')
            new_archive_id = self.key.id_hash(data)
            cdata = self.key.encrypt(data)
            add_reference(new_archive_id, len(data), len(cdata), cdata)
            info[b'id'] = new_archive_id
Пример #8
0
 def load(self, id):
     self.id = id
     self.metadata = self._load_meta(self.id)
     decode_dict(self.metadata, (b'name', b'hostname', b'username', b'time'))
     self.metadata[b'cmdline'] = [arg.decode('utf-8', 'surrogateescape') for arg in self.metadata[b'cmdline']]
     self.name = self.metadata[b'name']
Пример #9
0
    def rebuild_refcounts(self):
        """Rebuild object reference counts by walking the metadata

        Missing and/or incorrect data is repaired when detected
        """
        # Exclude the manifest from chunks
        del self.chunks[Manifest.MANIFEST_ID]

        def mark_as_possibly_superseded(id_):
            if self.chunks.get(id_, (0,))[0] == 0:
                self.possibly_superseded.add(id_)

        def add_callback(chunk):
            id_ = self.key.id_hash(chunk)
            cdata = self.key.encrypt(chunk)
            add_reference(id_, len(chunk), len(cdata), cdata)
            return id_

        def add_reference(id_, size, csize, cdata=None):
            try:
                count, _, _ = self.chunks[id_]
                self.chunks[id_] = count + 1, size, csize
            except KeyError:
                assert cdata is not None
                self.chunks[id_] = 1, size, csize
                if self.repair:
                    self.repository.put(id_, cdata)

        def verify_file_chunks(item):
            """Verifies that all file chunks are present

            Missing file chunks will be replaced with new chunks of the same
            length containing all zeros.
            """
            offset = 0
            chunk_list = []
            for chunk_id, size, csize in item[b'chunks']:
                if not chunk_id in self.chunks:
                    # If a file chunk is missing, create an all empty replacement chunk
                    self.report_progress('{}: Missing file chunk detected (Byte {}-{})'.format(item[b'path'].decode('utf-8', 'surrogateescape'), offset, offset + size), error=True)
                    data = bytes(size)
                    chunk_id = self.key.id_hash(data)
                    cdata = self.key.encrypt(data)
                    csize = len(cdata)
                    add_reference(chunk_id, size, csize, cdata)
                else:
                    add_reference(chunk_id, size, csize)
                chunk_list.append((chunk_id, size, csize))
                offset += size
            item[b'chunks'] = chunk_list

        def robust_iterator(archive):
            """Iterates through all archive items

            Missing item chunks will be skipped and the msgpack stream will be restarted
            """
            unpacker = RobustUnpacker(lambda item: isinstance(item, dict) and b'path' in item)
            _state = 0
            def missing_chunk_detector(chunk_id):
                nonlocal _state
                if _state % 2 != int(not chunk_id in self.chunks):
                    _state += 1
                return _state
            for state, items in groupby(archive[b'items'], missing_chunk_detector):
                items = list(items)
                if state % 2:
                    self.report_progress('Archive metadata damage detected', error=True)
                    continue
                if state > 0:
                    unpacker.resync()
                for chunk_id, cdata in zip(items, repository.get_many(items)):
                    unpacker.feed(self.key.decrypt(chunk_id, cdata))
                    for item in unpacker:
                        yield item

        repository = cache_if_remote(self.repository)
        num_archives = len(self.manifest.archives)
        for i, (name, info) in enumerate(list(self.manifest.archives.items()), 1):
            self.report_progress('Analyzing archive {} ({}/{})'.format(name, i, num_archives))
            archive_id = info[b'id']
            if not archive_id in self.chunks:
                self.report_progress('Archive metadata block is missing', error=True)
                del self.manifest.archives[name]
                continue
            mark_as_possibly_superseded(archive_id)
            cdata = self.repository.get(archive_id)
            data = self.key.decrypt(archive_id, cdata)
            archive = StableDict(msgpack.unpackb(data))
            if archive[b'version'] != 1:
                raise Exception('Unknown archive metadata version')
            decode_dict(archive, (b'name', b'hostname', b'username', b'time'))  # fixme: argv
            items_buffer = ChunkBuffer(self.key)
            items_buffer.write_chunk = add_callback
            for item in robust_iterator(archive):
                if b'chunks' in item:
                    verify_file_chunks(item)
                items_buffer.add(item)
            items_buffer.flush(flush=True)
            for previous_item_id in archive[b'items']:
                mark_as_possibly_superseded(previous_item_id)
            archive[b'items'] = items_buffer.chunks
            data = msgpack.packb(archive, unicode_errors='surrogateescape')
            new_archive_id = self.key.id_hash(data)
            cdata = self.key.encrypt(data)
            add_reference(new_archive_id, len(data), len(cdata), cdata)
            info[b'id'] = new_archive_id
Пример #10
0
 def get_next(self):
     while True:
         n = next(self.unpacker)
         decode_dict(n, (b"path", b"source", b"user", b"group"))
         if not self.filter or self.filter(n):
             return n