示例#1
0
    def chunk_move(self, path, chunk_id):
        meta = self.load_chunk_metadata(path, chunk_id)
        container_id = meta['container_id']
        content_id = meta['content_id']
        chunk_id = meta['chunk_id']

        try:
            content = self.content_factory.get(container_id, content_id)
        except ContentNotFound:
            raise exc.OrphanChunk('Content not found')

        new_chunk = content.move_chunk(chunk_id)

        self.logger.info('moved chunk http://%s/%s to %s', self.address,
                         chunk_id, new_chunk['url'])

        if self.allow_links:
            old_links = meta['links']
            for chunk_id, fullpath in old_links.iteritems():
                account, container, _, _, content_id = \
                    decode_fullpath(fullpath)
                container_id = cid_from_name(account, container)

                try:
                    content = self.content_factory.get(container_id,
                                                       content_id)
                except ContentNotFound:
                    raise exc.OrphanChunk('Content not found')

                new_linked_chunk = content.move_linked_chunk(
                    chunk_id, new_chunk['url'])

                self.logger.info('moved chunk http://%s/%s to %s',
                                 self.address, chunk_id,
                                 new_linked_chunk['url'])
示例#2
0
    def chunk_audit(self, path):
        with open(path) as f:
            try:
                meta = read_chunk_metadata(f)
            except exc.MissingAttribute as e:
                raise exc.FaultyChunk(
                    'Missing extended attribute %s' % e)
            size = int(meta['chunk_size'])
            md5_checksum = meta['chunk_hash'].lower()
            reader = ChunkReader(f, size, md5_checksum)
            with closing(reader):
                for buf in reader:
                    buf_len = len(buf)
                    self.bytes_running_time = ratelimit(
                        self.bytes_running_time,
                        self.max_bytes_per_second,
                        increment=buf_len)
                    self.bytes_processed += buf_len
                    self.total_bytes_processed += buf_len

            try:
                content_cid = meta['content_cid']
                content_path = meta['content_path']
                content_attr, data = self.container_client.content_show(
                    cid=content_cid, path=content_path)

                # Check chunk data
                chunks_nb = 0
                chunk_data = None
                for c in data:
                    if c['url'].endswith(meta['chunk_id']):
                        chunks_nb += 1  # FIXME: won't work with DUP
                        chunk_data = c
                if not chunk_data:
                    raise exc.OrphanChunk('Not found in content')

                if chunk_data['size'] != int(meta['chunk_size']):
                    raise exc.FaultyChunk('Invalid chunk size found')

                if chunk_data['hash'] != meta['chunk_hash']:
                    raise exc.FaultyChunk('Invalid chunk hash found')

                if chunk_data['pos'] != meta['chunk_pos']:
                    raise exc.FaultyChunk('Invalid chunk position found')

                # Check content data
                if content_attr['length'] != meta['content_size']:
                    raise exc.FaultyChunk('Invalid content size found')

                if chunks_nb != int(meta['content_chunksnb']):
                    self.logger.warn('Invalid number of chunks found')
                    # TODO: really count chunks and enable the exception
                    # raise exc.FaultyChunk('Invalid number of chunks found')

            except exc.NotFound:
                raise exc.OrphanChunk('Chunk not found in container')
示例#3
0
文件: mover.py 项目: ldenel/oio-sds
    def chunk_move(self, path):
        meta = self.load_chunk_metadata(path)
        content_cid = meta['content_cid']
        content_path = meta['content_path']

        chunk_url = 'http://%s/%s' % \
            (self.address, meta['chunk_id'])

        try:
            _, data = self.container_client.content_show(cid=content_cid,
                                                         path=content_path)
        except exc.NotFound:
            raise exc.OrphanChunk('Content not found')
        current_chunk = None
        notin = []
        for c in data:
            if c['pos'] == meta['chunk_pos']:
                notin.append(c)
        for c in notin:
            if c['url'] == chunk_url:
                current_chunk = c
                notin.remove(c)
        if not current_chunk:
            raise exc.OrphanChunk('Chunk not found in content')
        spare_data = {'notin': notin, 'broken': [current_chunk], 'size': 0}
        spare_resp = self.container_client.content_spare(cid=content_cid,
                                                         path=content_path,
                                                         data=spare_data)

        new_chunk = spare_resp['chunks'][0]
        self.blob_client.chunk_copy(current_chunk['url'], new_chunk['id'])

        old = [{
            'type': 'chunk',
            'id': current_chunk['url'],
            'hash': meta['chunk_hash'],
            'size': int(meta['chunk_size'])
        }]
        new = [{
            'type': 'chunk',
            'id': new_chunk['id'],
            'hash': meta['chunk_hash'],
            'size': int(meta['chunk_size'])
        }]
        update_data = {'old': old, 'new': new}

        self.container_client.container_raw_update(cid=content_cid,
                                                   data=update_data)

        self.blob_client.chunk_delete(current_chunk['url'])

        self.logger.info('moved chunk %s to %s', current_chunk['url'],
                         new_chunk['id'])
示例#4
0
    def chunk_file_audit(self, chunk_file, chunk_id):
        try:
            meta, _ = read_chunk_metadata(chunk_file, chunk_id)
        except exc.MissingAttribute as err:
            raise exc.FaultyChunk(err)
        size = int(meta['chunk_size'])
        md5_checksum = meta['chunk_hash'].lower()
        reader = ChunkReader(chunk_file,
                             size,
                             md5_checksum,
                             compression=meta.get("compression", ""))
        with closing(reader):
            for buf in reader:
                buf_len = len(buf)
                self.bytes_running_time = ratelimit(self.bytes_running_time,
                                                    self.max_bytes_per_second,
                                                    increment=buf_len)
                self.bytes_processed += buf_len
                self.total_bytes_processed += buf_len

        try:
            container_id = meta['container_id']
            content_id = meta['content_id']
            _obj_meta, data = self.container_client.content_locate(
                cid=container_id, content=content_id, properties=False)

            # Check chunk data
            chunk_data = None
            metachunks = set()
            for c in data:
                if c['url'].endswith(meta['chunk_id']):
                    metachunks.add(c['pos'].split('.', 2)[0])
                    chunk_data = c
            if not chunk_data:
                raise exc.OrphanChunk('Not found in content')

            metachunk_size = meta.get('metachunk_size')
            if metachunk_size is not None \
                    and chunk_data['size'] != int(metachunk_size):
                raise exc.FaultyChunk('Invalid metachunk size found')

            metachunk_hash = meta.get('metachunk_hash')
            if metachunk_hash is not None \
                    and chunk_data['hash'] != meta['metachunk_hash']:
                raise exc.FaultyChunk('Invalid metachunk hash found')

            if chunk_data['pos'] != meta['chunk_pos']:
                raise exc.FaultyChunk('Invalid chunk position found')

        except exc.NotFound:
            raise exc.OrphanChunk('Chunk not found in container')
示例#5
0
文件: mover.py 项目: lhllacp/oio-sds
    def chunk_move(self, path, chunk_id):
        meta = self.load_chunk_metadata(path, chunk_id)
        container_id = meta['container_id']
        content_id = meta['content_id']
        chunk_id = meta['chunk_id']

        # Maybe skip the chunk because it doesn't match the size constaint
        chunk_size = int(meta['chunk_size'])
        min_chunk_size = int(self.conf.get('min_chunk_size', 0))
        max_chunk_size = int(self.conf.get('max_chunk_size', 0))
        if chunk_size < min_chunk_size:
            self.logger.debug("SKIP %s too small", path)
            return
        if max_chunk_size > 0 and chunk_size > max_chunk_size:
            self.logger.debug("SKIP %s too big", path)
            return

        # Start moving the chunk
        try:
            content = self.content_factory.get(container_id, content_id)
        except ContentNotFound:
            raise exc.OrphanChunk('Content not found')

        new_chunk = content.move_chunk(
            chunk_id,
            service_id=self.service_id,
            fake_excluded_chunks=self.fake_excluded_chunks)

        self.logger.info('moved chunk http://%s/%s to %s', self.service_id,
                         chunk_id, new_chunk['url'])

        if self.allow_links:
            old_links = meta['links']
            for chunk_id, fullpath in old_links.items():
                # pylint: disable=unbalanced-tuple-unpacking
                account, container, _, _, content_id = \
                    decode_fullpath(fullpath)
                container_id = cid_from_name(account, container)

                try:
                    content = self.content_factory.get(container_id,
                                                       content_id)
                except ContentNotFound:
                    raise exc.OrphanChunk('Content not found')

                new_linked_chunk = content.move_linked_chunk(
                    chunk_id, new_chunk['url'])

                self.logger.info('moved chunk http://%s/%s to %s',
                                 self.service_id, chunk_id,
                                 new_linked_chunk['url'])
示例#6
0
    def rebuild_chunk(self, chunk_id):
        current_chunk = self.chunks.filter(id=chunk_id).one()
        if current_chunk is None:
            raise exc.OrphanChunk("Chunk not found in content")

        duplicate_chunks = self.chunks.filter(pos=current_chunk.pos).exclude(
            id=chunk_id).all()
        if len(duplicate_chunks) == 0:
            raise UnrecoverableContent("No copy of missing chunk")

        spare_urls = self._get_spare_chunk(duplicate_chunks, [current_chunk])

        uploaded = False
        for src in duplicate_chunks:
            try:
                self.blob_client.chunk_copy(src.url, spare_urls[0])
                self.logger.debug("copy chunk from %s to %s", src.url,
                                  spare_urls[0])
                uploaded = True
                break
            except Exception as e:
                self.logger.warn("Failed to copy chunk from %s to %s: %s",
                                 src.url, spare_urls[0], str(e.message))
        if not uploaded:
            raise UnrecoverableContent("No copy available of missing chunk")

        self._update_spare_chunk(current_chunk, spare_urls[0])
示例#7
0
    def rebuild_chunk(self, chunk_id, allow_same_rawx=False, chunk_pos=None):
        # Identify the chunk to rebuild
        current_chunk = self.chunks.filter(id=chunk_id).one()
        if current_chunk is None and chunk_pos is None:
            raise exc.OrphanChunk("Chunk not found in content")
        elif chunk_pos is None:
            chunk_pos = current_chunk.pos

        duplicate_chunks = self.chunks.filter(pos=chunk_pos).exclude(
            id=chunk_id).all()
        if len(duplicate_chunks) == 0:
            raise UnrecoverableContent("No copy of missing chunk")

        if current_chunk is None:
            chunk = {}
            chunk['hash'] = duplicate_chunks[0].checksum
            chunk['size'] = duplicate_chunks[0].size
            chunk['url'] = ''
            chunk['pos'] = chunk_pos
            current_chunk = Chunk(chunk)

        # Find a spare chunk address
        broken_list = list()
        if not allow_same_rawx and chunk_id is not None:
            broken_list.append(current_chunk)
        spare_urls, _quals = self._get_spare_chunk(duplicate_chunks,
                                                   broken_list)
        spare_url = spare_urls[0]

        # Actually create the spare chunk, by duplicating a good one
        uploaded = False
        for src in duplicate_chunks:
            try:
                self.blob_client.chunk_copy(src.url,
                                            spare_url,
                                            chunk_id=chunk_id,
                                            fullpath=self.full_path,
                                            cid=self.container_id,
                                            path=self.path,
                                            version=self.version,
                                            content_id=self.content_id)
                self.logger.debug('Chunk copied from %s to %s, registering it',
                                  src.url, spare_url)
                uploaded = True
                break
            except Exception as err:
                self.logger.warn("Failed to copy chunk from %s to %s: %s %s",
                                 src.url, spare_url, type(err),
                                 str(err.message))
        if not uploaded:
            raise UnrecoverableContent("No copy available of missing chunk")

        # Register the spare chunk in object's metadata
        if chunk_id is None:
            self._add_raw_chunk(current_chunk, spare_url)
        else:
            self._update_spare_chunk(current_chunk, spare_url)
        self.logger.debug('Chunk %s repaired in %s', chunk_id or chunk_pos,
                          spare_url)
示例#8
0
    def chunk_audit(self, path):
        with open(path) as f:
            try:
                meta = read_chunk_metadata(f)
            except exc.MissingAttribute as e:
                raise exc.FaultyChunk('Missing extended attribute %s' % e)
            size = int(meta['chunk_size'])
            md5_checksum = meta['chunk_hash'].lower()
            reader = ChunkReader(f, size, md5_checksum)
            with closing(reader):
                for buf in reader:
                    buf_len = len(buf)
                    self.bytes_running_time = ratelimit(
                        self.bytes_running_time,
                        self.max_bytes_per_second,
                        increment=buf_len)
                    self.bytes_processed += buf_len
                    self.total_bytes_processed += buf_len

            try:
                container_id = meta['container_id']
                content_path = meta['content_path']
                content_attr, data = self.container_client.content_show(
                    cid=container_id, path=content_path)

                # Check chunk data
                chunk_data = None
                metachunks = set()
                for c in data:
                    if c['url'].endswith(meta['chunk_id']):
                        metachunks.add(c['pos'].split('.', 2)[0])
                        chunk_data = c
                if not chunk_data:
                    raise exc.OrphanChunk('Not found in content')

                if chunk_data['size'] != int(meta['chunk_size']):
                    raise exc.FaultyChunk('Invalid chunk size found')

                if chunk_data['hash'] != meta['chunk_hash']:
                    raise exc.FaultyChunk('Invalid chunk hash found')

                if chunk_data['pos'] != meta['chunk_pos']:
                    raise exc.FaultyChunk('Invalid chunk position found')

            except exc.NotFound:
                raise exc.OrphanChunk('Chunk not found in container')
示例#9
0
    def move_chunk(self,
                   chunk_id,
                   check_quality=False,
                   dry_run=False,
                   max_attempts=3,
                   **kwargs):
        """
        Move a chunk to another place. Optionally ensure that the
        new place is an improvement over the current one.
        """
        if isinstance(chunk_id, Chunk):
            current_chunk = chunk_id
            chunk_id = current_chunk.id
        else:
            current_chunk = self.chunks.filter(id=chunk_id).one()
        if current_chunk is None or current_chunk not in self.chunks:
            raise exc.OrphanChunk("Chunk not found in content")

        other_chunks = self.chunks.filter(
            metapos=current_chunk.metapos).exclude(id=chunk_id).all()

        spare_urls, qualities = self._get_spare_chunk(
            other_chunks, [current_chunk],
            check_quality=check_quality,
            max_attempts=max_attempts,
            **kwargs)

        if dry_run:
            self.logger.info("Dry-run: would copy chunk from %s to %s",
                             current_chunk.url, spare_urls[0])
        else:
            self.logger.info("Copying chunk from %s to %s", current_chunk.url,
                             spare_urls[0])
            # TODO(FVE): retry to copy (max_attempts times)
            self.blob_client.chunk_copy(current_chunk.url,
                                        spare_urls[0],
                                        chunk_id=chunk_id,
                                        fullpath=self.full_path,
                                        cid=self.container_id,
                                        path=self.path,
                                        version=self.version,
                                        content_id=self.content_id,
                                        **kwargs)

            self._update_spare_chunk(current_chunk, spare_urls[0])

            try:
                self.blob_client.chunk_delete(current_chunk.url, **kwargs)
            except Exception as err:
                self.logger.warn("Failed to delete chunk %s: %s",
                                 current_chunk.url, err)

        current_chunk.url = spare_urls[0]
        current_chunk.quality = qualities[current_chunk.url]

        return current_chunk.raw()
示例#10
0
    def chunk_move(self, path):
        meta = self.load_chunk_metadata(path)
        container_id = meta['container_id']
        content_id = meta['content_id']
        chunk_id = meta['chunk_id']
        chunk_url = 'http://%s/%s' % (self.address, meta['chunk_id'])

        try:
            content = self.content_factory.get(container_id, content_id)
        except ContentNotFound:
            raise exc.OrphanChunk('Content not found')

        new_chunk = content.move_chunk(chunk_id)

        self.logger.info('moved chunk %s to %s', chunk_url, new_chunk['url'])
示例#11
0
    def rebuild_chunk(self, chunk_id, allow_same_rawx=False, chunk_pos=None):
        current_chunk = self.chunks.filter(id=chunk_id).one()
        if current_chunk is None and chunk_pos is None:
            raise exc.OrphanChunk("Chunk not found in content")
        elif chunk_pos is None:
            chunk_pos = current_chunk.pos

        duplicate_chunks = self.chunks.filter(
            pos=chunk_pos).exclude(id=chunk_id).all()
        if len(duplicate_chunks) == 0:
            raise UnrecoverableContent("No copy of missing chunk")

        if current_chunk is None:
            chunk = {}
            chunk['hash'] = duplicate_chunks[0].checksum
            chunk['size'] = duplicate_chunks[0].size
            chunk['url'] = ''
            chunk['pos'] = chunk_pos
            current_chunk = Chunk(chunk)

        broken_list = list()
        if not allow_same_rawx and chunk_id is not None:
            broken_list.append(current_chunk)
        spare_urls = self._get_spare_chunk(
            duplicate_chunks, broken_list)

        uploaded = False
        for src in duplicate_chunks:
            try:
                self.blob_client.chunk_copy(src.url, spare_urls[0])
                self.logger.debug('Chunk copied from %s to %s, registering it',
                                  src.url, spare_urls[0])
                uploaded = True
                break
            except Exception as err:
                self.logger.warn(
                    "Failed to copy chunk from %s to %s: %s", src.url,
                    spare_urls[0], str(err.message))
        if not uploaded:
            raise UnrecoverableContent("No copy available of missing chunk")

        if chunk_id is None:
            self._add_raw_chunk(current_chunk, spare_urls[0])
        else:
            self._update_spare_chunk(current_chunk, spare_urls[0])
        self.logger.info('Chunk %s repaired in %s',
                         chunk_id or chunk_pos, spare_urls[0])
示例#12
0
    def move_linked_chunk(self, chunk_id, from_url):
        current_chunk = self.chunks.filter(id=chunk_id).one()
        if current_chunk is None:
            raise exc.OrphanChunk("Chunk not found in content")

        _, to_url = self.blob_client.chunk_link(from_url, None, self.full_path)
        self.logger.debug("link chunk %s from %s to %s", chunk_id, from_url,
                          to_url)

        self._update_spare_chunk(current_chunk, to_url)

        try:
            self.blob_client.chunk_delete(current_chunk.url)
        except Exception as err:
            self.logger.warn("Failed to delete chunk %s: %s",
                             current_chunk.url, err)

        current_chunk.url = to_url

        return current_chunk.raw()
示例#13
0
    def chunk_rebuild(self, container_id, content_id, chunk_id):
        self.logger.info('Rebuilding (container %s, content %s, chunk %s)',
                         container_id, content_id, chunk_id)

        try:
            content = self.content_factory.get(container_id, content_id)
        except ContentNotFound:
            raise exc.OrphanChunk('Content not found')

        chunk = content.chunks.filter(id=chunk_id).one()
        if chunk is None:
            raise OrphanChunk("Chunk not found in content")
        chunk_size = chunk.size

        content.rebuild_chunk(chunk_id, allow_same_rawx=self.allow_same_rawx)

        self.rdir_client.chunk_delete(self.volume, container_id, content_id,
                                      chunk_id)

        self.bytes_processed += chunk_size
        self.total_bytes_processed += chunk_size
示例#14
0
    def move_chunk(self,
                   chunk_id,
                   service_id=None,
                   check_quality=False,
                   dry_run=False,
                   max_attempts=3,
                   **kwargs):
        """
        Move a chunk to another place. Optionally ensure that the
        new place is an improvement over the current one.
        """
        if isinstance(chunk_id, Chunk):
            current_chunk = chunk_id
            chunk_id = current_chunk.id
            service_id = current_chunk.host
        else:
            candidates = self.chunks.filter(id=chunk_id)
            if len(candidates) > 1:
                if service_id is None:
                    raise exc.ChunkException(
                        "Several chunks with ID %s and no service ID" %
                        (chunk_id, ))
                candidates = candidates.filter(host=service_id)
            current_chunk = candidates.one()

        if current_chunk is None or current_chunk not in self.chunks:
            raise exc.OrphanChunk("Chunk not found in content")

        if service_id:
            other_chunks = self.chunks.filter(
                metapos=current_chunk.metapos).exclude(host=service_id).all()
        else:
            other_chunks = self.chunks.filter(
                metapos=current_chunk.metapos).exclude(id=chunk_id).all()

        spare_urls, qualities = self._get_spare_chunk(
            other_chunks, [current_chunk],
            position=current_chunk.pos,
            check_quality=check_quality,
            max_attempts=max_attempts,
            **kwargs)

        # Sort chunks by score to try to copy with higher score.
        # When scores are close together (e.g. [95, 94, 94, 93, 50]),
        # don't always start with the highest element.
        duplicate_chunks = self.chunks \
            .filter(pos=current_chunk.pos) \
            .sort(key=lambda chunk: _get_weighted_random_score(chunk.raw()),
                  reverse=True) \
            .all()
        if dry_run:
            self.logger.info('Dry-run: would copy chunk from %s to %s',
                             duplicate_chunks[0].url, spare_urls[0])
        else:
            # To reduce the load on the rawx to decommission,
            # use one of the rawx with a copy of the chunk to move.
            for src in duplicate_chunks:
                try:
                    self.logger.info('Copying chunk from %s to %s', src.url,
                                     spare_urls[0])
                    # TODO(FVE): retry to copy (max_attempts times)
                    self.blob_client.chunk_copy(src.url,
                                                spare_urls[0],
                                                chunk_id=chunk_id,
                                                fullpath=self.full_path,
                                                cid=self.container_id,
                                                path=self.path,
                                                version=self.version,
                                                content_id=self.content_id,
                                                **kwargs)
                    break
                except Exception as err:
                    self.logger.warn('Failed to copy chunk from %s to %s: %s',
                                     src.url, spare_urls[0], err)
                    if len(duplicate_chunks) == 1:
                        raise
            else:
                raise UnrecoverableContent(
                    'No copy available of chunk to move')

            self._update_spare_chunk(current_chunk, spare_urls[0])

            try:
                self.blob_client.chunk_delete(current_chunk.url, **kwargs)
            except Exception as err:
                self.logger.warn("Failed to delete chunk %s: %s",
                                 current_chunk.url, err)

        current_chunk.url = spare_urls[0]
        current_chunk.quality = qualities[current_chunk.url]

        return current_chunk.raw()
示例#15
0
    def rebuild_chunk(self,
                      chunk_id,
                      allow_same_rawx=False,
                      chunk_pos=None,
                      allow_frozen_container=False):
        # Identify the chunk to rebuild
        current_chunk = self.chunks.filter(id=chunk_id).one()
        if current_chunk is None and chunk_pos is None:
            raise exc.OrphanChunk("Chunk not found in content")
        elif chunk_pos is None:
            chunk_pos = current_chunk.pos

        # Sort chunks by score to try to copy with higher score.
        # When scores are close together (e.g. [95, 94, 94, 93, 50]),
        # don't always start with the highest element.
        duplicate_chunks = self.chunks \
            .filter(pos=chunk_pos) \
            .exclude(id=chunk_id) \
            .sort(key=lambda chunk: _get_weighted_random_score(chunk.raw()),
                  reverse=True) \
            .all()
        if len(duplicate_chunks) == 0:
            raise UnrecoverableContent("No copy of missing chunk")

        if current_chunk is None:
            chunk = {}
            chunk['hash'] = duplicate_chunks[0].checksum
            chunk['size'] = duplicate_chunks[0].size
            chunk['url'] = ''
            chunk['pos'] = chunk_pos
            current_chunk = Chunk(chunk)

        # Find a spare chunk address
        broken_list = list()
        if not allow_same_rawx and chunk_id is not None:
            broken_list.append(current_chunk)
        spare_urls, _quals = self._get_spare_chunk(duplicate_chunks,
                                                   broken_list)
        spare_url = spare_urls[0]

        # Actually create the spare chunk, by duplicating a good one
        for src in duplicate_chunks:
            try:
                self.blob_client.chunk_copy(src.url,
                                            spare_url,
                                            chunk_id=chunk_id,
                                            fullpath=self.full_path,
                                            cid=self.container_id,
                                            path=self.path,
                                            version=self.version,
                                            content_id=self.content_id)
                self.logger.debug('Chunk copied from %s to %s, registering it',
                                  src.url, spare_url)
                break
            except Exception as err:
                self.logger.warn("Failed to copy chunk from %s to %s: %s %s",
                                 src.url, spare_url, type(err), err)
        else:
            raise UnrecoverableContent("No copy available of missing chunk")

        # Register the spare chunk in object's metadata
        if chunk_id is None:
            self._add_raw_chunk(current_chunk,
                                spare_url,
                                frozen=allow_frozen_container)
        else:
            self._update_spare_chunk(current_chunk,
                                     spare_url,
                                     frozen=allow_frozen_container)
        self.logger.debug('Chunk %s repaired in %s', chunk_id or chunk_pos,
                          spare_url)

        return current_chunk.size