Пример #1
0
 def sync(self):
     """Initializes cache by fetching and reading all archive indicies
     """
     def add(id, size, csize):
         try:
             count, size, csize = self.chunks[id]
             self.chunks[id] = count + 1, size, csize
         except KeyError:
             self.chunks[id] = 1, size, csize
     self.begin_txn()
     print('Initializing cache...')
     self.chunks.clear()
     unpacker = msgpack.Unpacker()
     repository = cache_if_remote(self.repository)
     for name, info in self.manifest.archives.items():
         archive_id = info[b'id']
         cdata = repository.get(archive_id)
         data = self.key.decrypt(archive_id, cdata)
         add(archive_id, len(data), len(cdata))
         archive = msgpack.unpackb(data)
         if archive[b'version'] != 1:
             raise Exception('Unknown archive metadata version')
         decode_dict(archive, (b'name',))
         print('Analyzing archive:', archive[b'name'])
         for key, chunk in zip(archive[b'items'], repository.get_many(archive[b'items'])):
             data = self.key.decrypt(key, chunk)
             add(key, len(data), len(chunk))
             unpacker.feed(data)
             for item in unpacker:
                 if b'chunks' in item:
                     for chunk_id, size, csize in item[b'chunks']:
                         add(chunk_id, size, csize)
Пример #2
0
 def __init__(self, key, repository, manifest, archive):
     super(AtticOperations, self).__init__()
     self._inode_count = 0
     self.key = key
     self.repository = cache_if_remote(repository)
     self.items = {}
     self.parent = {}
     self.contents = defaultdict(dict)
     self.default_dir = {
         b'mode': 0o40755,
         b'mtime': int(time.time() * 1e9),
         b'uid': os.getuid(),
         b'gid': os.getgid()
     }
     self.pending_archives = {}
     self.cache = ItemCache()
     if archive:
         self.process_archive(archive)
     else:
         # Create root inode
         self.parent[1] = self.allocate_inode()
         self.items[1] = self.default_dir
         for archive_name in manifest.archives:
             # Create archive placeholder inode
             archive_inode = self.allocate_inode()
             self.items[archive_inode] = self.default_dir
             self.parent[archive_inode] = 1
             self.contents[1][os.fsencode(archive_name)] = archive_inode
             self.pending_archives[archive_inode] = Archive(
                 repository, key, manifest, archive_name)
Пример #3
0
    def sync(self):
        """Initializes cache by fetching and reading all archive indicies
        """
        def add(id, size, csize):
            try:
                count, size, csize = self.chunks[id]
                self.chunks[id] = count + 1, size, csize
            except KeyError:
                self.chunks[id] = 1, size, csize

        self.begin_txn()
        print('Initializing cache...')
        self.chunks.clear()
        unpacker = msgpack.Unpacker()
        repository = cache_if_remote(self.repository)
        for name, info in self.manifest.archives.items():
            archive_id = info[b'id']
            cdata = repository.get(archive_id)
            data = self.key.decrypt(archive_id, cdata)
            add(archive_id, len(data), len(cdata))
            archive = msgpack.unpackb(data)
            if archive[b'version'] != 1:
                raise Exception('Unknown archive metadata version')
            decode_dict(archive, (b'name', ))
            print('Analyzing archive:', archive[b'name'])
            for key, chunk in zip(archive[b'items'],
                                  repository.get_many(archive[b'items'])):
                data = self.key.decrypt(key, chunk)
                add(key, len(data), len(chunk))
                unpacker.feed(data)
                for item in unpacker:
                    if b'chunks' in item:
                        for chunk_id, size, csize in item[b'chunks']:
                            add(chunk_id, size, csize)
Пример #4
0
Файл: fuse.py Проект: aykit/borg
 def __init__(self, key, repository, manifest, archive):
     super(FuseOperations, self).__init__()
     self._inode_count = 0
     self.key = key
     self.repository = cache_if_remote(repository)
     self.items = {}
     self.parent = {}
     self.contents = defaultdict(dict)
     self.default_dir = {b'mode': 0o40755, b'mtime': int(time.time() * 1e9), b'uid': os.getuid(), b'gid': os.getgid()}
     self.pending_archives = {}
     self.accounted_chunks = {}
     self.cache = ItemCache()
     if archive:
         self.process_archive(archive)
     else:
         # Create root inode
         self.parent[1] = self.allocate_inode()
         self.items[1] = self.default_dir
         for archive_name in manifest.archives:
             # Create archive placeholder inode
             archive_inode = self.allocate_inode()
             self.items[archive_inode] = self.default_dir
             self.parent[archive_inode] = 1
             self.contents[1][os.fsencode(archive_name)] = archive_inode
             self.pending_archives[archive_inode] = Archive(repository, key, manifest, archive_name)
Пример #5
0
    def rebuild_refcounts(self, last=None):
        """Rebuild object reference counts by walking the metadata

        Missing and/or incorrect data is repaired when detected
        """
        # Exclude the manifest from chunks
        del self.chunks[Manifest.MANIFEST_ID]

        def mark_as_possibly_superseded(id_):
            if self.chunks.get(id_, (0,))[0] == 0:
                self.possibly_superseded.add(id_)

        def add_callback(chunk):
            id_ = self.key.id_hash(chunk)
            cdata = self.key.encrypt(chunk)
            add_reference(id_, len(chunk), len(cdata), cdata)
            return id_

        def add_reference(id_, size, csize, cdata=None):
            try:
                count, _, _ = self.chunks[id_]
                self.chunks[id_] = count + 1, size, csize
            except KeyError:
                assert cdata is not None
                self.chunks[id_] = 1, size, csize
                if self.repair:
                    self.repository.put(id_, cdata)

        def verify_file_chunks(item):
            """Verifies that all file chunks are present

            Missing file chunks will be replaced with new chunks of the same
            length containing all zeros.
            """
            offset = 0
            chunk_list = []
            for chunk_id, size, csize in item[b'chunks']:
                if chunk_id not in self.chunks:
                    # If a file chunk is missing, create an all empty replacement chunk
                    self.report_progress('{}: Missing file chunk detected (Byte {}-{})'.format(item[b'path'].decode('utf-8', 'surrogateescape'), offset, offset + size), error=True)
                    data = bytes(size)
                    chunk_id = self.key.id_hash(data)
                    cdata = self.key.encrypt(data)
                    csize = len(cdata)
                    add_reference(chunk_id, size, csize, cdata)
                else:
                    add_reference(chunk_id, size, csize)
                chunk_list.append((chunk_id, size, csize))
                offset += size
            item[b'chunks'] = chunk_list

        def robust_iterator(archive):
            """Iterates through all archive items

            Missing item chunks will be skipped and the msgpack stream will be restarted
            """
            unpacker = RobustUnpacker(lambda item: isinstance(item, dict) and b'path' in item)
            _state = 0

            def missing_chunk_detector(chunk_id):
                nonlocal _state
                if _state % 2 != int(chunk_id not in self.chunks):
                    _state += 1
                return _state

            for state, items in groupby(archive[b'items'], missing_chunk_detector):
                items = list(items)
                if state % 2:
                    self.report_progress('Archive metadata damage detected', error=True)
                    continue
                if state > 0:
                    unpacker.resync()
                for chunk_id, cdata in zip(items, repository.get_many(items)):
                    unpacker.feed(self.key.decrypt(chunk_id, cdata))
                    for item in unpacker:
                        yield item

        repository = cache_if_remote(self.repository)
        num_archives = len(self.manifest.archives)
        archive_items = sorted(self.manifest.archives.items(), reverse=True,
                               key=lambda name_info: name_info[1][b'time'])
        end = None if last is None else min(num_archives, last)
        for i, (name, info) in enumerate(archive_items[:end]):
            self.report_progress('Analyzing archive {} ({}/{})'.format(name, num_archives - i, num_archives))
            archive_id = info[b'id']
            if archive_id not in self.chunks:
                self.report_progress('Archive metadata block is missing', error=True)
                del self.manifest.archives[name]
                continue
            mark_as_possibly_superseded(archive_id)
            cdata = self.repository.get(archive_id)
            data = self.key.decrypt(archive_id, cdata)
            archive = StableDict(msgpack.unpackb(data))
            if archive[b'version'] != 1:
                raise Exception('Unknown archive metadata version')
            decode_dict(archive, (b'name', b'hostname', b'username', b'time'))  # fixme: argv
            items_buffer = ChunkBuffer(self.key)
            items_buffer.write_chunk = add_callback
            for item in robust_iterator(archive):
                if b'chunks' in item:
                    verify_file_chunks(item)
                items_buffer.add(item)
            items_buffer.flush(flush=True)
            for previous_item_id in archive[b'items']:
                mark_as_possibly_superseded(previous_item_id)
            archive[b'items'] = items_buffer.chunks
            data = msgpack.packb(archive, unicode_errors='surrogateescape')
            new_archive_id = self.key.id_hash(data)
            cdata = self.key.encrypt(data)
            add_reference(new_archive_id, len(data), len(cdata), cdata)
            info[b'id'] = new_archive_id
Пример #6
0
    def rebuild_refcounts(self):
        """Rebuild object reference counts by walking the metadata

        Missing and/or incorrect data is repaired when detected
        """
        # Exclude the manifest from chunks
        del self.chunks[Manifest.MANIFEST_ID]

        def mark_as_possibly_superseded(id_):
            if self.chunks.get(id_, (0,))[0] == 0:
                self.possibly_superseded.add(id_)

        def add_callback(chunk):
            id_ = self.key.id_hash(chunk)
            cdata = self.key.encrypt(chunk)
            add_reference(id_, len(chunk), len(cdata), cdata)
            return id_

        def add_reference(id_, size, csize, cdata=None):
            try:
                count, _, _ = self.chunks[id_]
                self.chunks[id_] = count + 1, size, csize
            except KeyError:
                assert cdata is not None
                self.chunks[id_] = 1, size, csize
                if self.repair:
                    self.repository.put(id_, cdata)

        def verify_file_chunks(item):
            """Verifies that all file chunks are present

            Missing file chunks will be replaced with new chunks of the same
            length containing all zeros.
            """
            offset = 0
            chunk_list = []
            for chunk_id, size, csize in item[b'chunks']:
                if not chunk_id in self.chunks:
                    # If a file chunk is missing, create an all empty replacement chunk
                    self.report_progress('{}: Missing file chunk detected (Byte {}-{})'.format(item[b'path'].decode('utf-8', 'surrogateescape'), offset, offset + size), error=True)
                    data = bytes(size)
                    chunk_id = self.key.id_hash(data)
                    cdata = self.key.encrypt(data)
                    csize = len(cdata)
                    add_reference(chunk_id, size, csize, cdata)
                else:
                    add_reference(chunk_id, size, csize)
                chunk_list.append((chunk_id, size, csize))
                offset += size
            item[b'chunks'] = chunk_list

        def robust_iterator(archive):
            """Iterates through all archive items

            Missing item chunks will be skipped and the msgpack stream will be restarted
            """
            unpacker = RobustUnpacker(lambda item: isinstance(item, dict) and b'path' in item)
            _state = 0
            def missing_chunk_detector(chunk_id):
                nonlocal _state
                if _state % 2 != int(not chunk_id in self.chunks):
                    _state += 1
                return _state
            for state, items in groupby(archive[b'items'], missing_chunk_detector):
                items = list(items)
                if state % 2:
                    self.report_progress('Archive metadata damage detected', error=True)
                    continue
                if state > 0:
                    unpacker.resync()
                for chunk_id, cdata in zip(items, repository.get_many(items)):
                    unpacker.feed(self.key.decrypt(chunk_id, cdata))
                    for item in unpacker:
                        yield item

        repository = cache_if_remote(self.repository)
        num_archives = len(self.manifest.archives)
        for i, (name, info) in enumerate(list(self.manifest.archives.items()), 1):
            self.report_progress('Analyzing archive {} ({}/{})'.format(name, i, num_archives))
            archive_id = info[b'id']
            if not archive_id in self.chunks:
                self.report_progress('Archive metadata block is missing', error=True)
                del self.manifest.archives[name]
                continue
            mark_as_possibly_superseded(archive_id)
            cdata = self.repository.get(archive_id)
            data = self.key.decrypt(archive_id, cdata)
            archive = StableDict(msgpack.unpackb(data))
            if archive[b'version'] != 1:
                raise Exception('Unknown archive metadata version')
            decode_dict(archive, (b'name', b'hostname', b'username', b'time'))  # fixme: argv
            items_buffer = ChunkBuffer(self.key)
            items_buffer.write_chunk = add_callback
            for item in robust_iterator(archive):
                if b'chunks' in item:
                    verify_file_chunks(item)
                items_buffer.add(item)
            items_buffer.flush(flush=True)
            for previous_item_id in archive[b'items']:
                mark_as_possibly_superseded(previous_item_id)
            archive[b'items'] = items_buffer.chunks
            data = msgpack.packb(archive, unicode_errors='surrogateescape')
            new_archive_id = self.key.id_hash(data)
            cdata = self.key.encrypt(data)
            add_reference(new_archive_id, len(data), len(cdata), cdata)
            info[b'id'] = new_archive_id