示例#1
0
    def rebuild_chunk(self, chunk_id):
        current_chunk = self.chunks.filter(id=chunk_id).one()
        if current_chunk is None:
            raise exc.OrphanChunk("Chunk not found in content")

        duplicate_chunks = self.chunks.filter(pos=current_chunk.pos).exclude(
            id=chunk_id).all()
        if len(duplicate_chunks) == 0:
            raise UnrecoverableContent("No copy of missing chunk")

        spare_urls = self._get_spare_chunk(duplicate_chunks, [current_chunk])

        uploaded = False
        for src in duplicate_chunks:
            try:
                self.blob_client.chunk_copy(src.url, spare_urls[0])
                self.logger.debug("copy chunk from %s to %s", src.url,
                                  spare_urls[0])
                uploaded = True
                break
            except Exception as e:
                self.logger.warn("Failed to copy chunk from %s to %s: %s",
                                 src.url, spare_urls[0], str(e.message))
        if not uploaded:
            raise UnrecoverableContent("No copy available of missing chunk")

        self._update_spare_chunk(current_chunk, spare_urls[0])
示例#2
0
    def rebuild_chunk(self, chunk_id, allow_same_rawx=False, chunk_pos=None):
        # Identify the chunk to rebuild
        current_chunk = self.chunks.filter(id=chunk_id).one()
        if current_chunk is None and chunk_pos is None:
            raise exc.OrphanChunk("Chunk not found in content")
        elif chunk_pos is None:
            chunk_pos = current_chunk.pos

        duplicate_chunks = self.chunks.filter(pos=chunk_pos).exclude(
            id=chunk_id).all()
        if len(duplicate_chunks) == 0:
            raise UnrecoverableContent("No copy of missing chunk")

        if current_chunk is None:
            chunk = {}
            chunk['hash'] = duplicate_chunks[0].checksum
            chunk['size'] = duplicate_chunks[0].size
            chunk['url'] = ''
            chunk['pos'] = chunk_pos
            current_chunk = Chunk(chunk)

        # Find a spare chunk address
        broken_list = list()
        if not allow_same_rawx and chunk_id is not None:
            broken_list.append(current_chunk)
        spare_urls, _quals = self._get_spare_chunk(duplicate_chunks,
                                                   broken_list)
        spare_url = spare_urls[0]

        # Actually create the spare chunk, by duplicating a good one
        uploaded = False
        for src in duplicate_chunks:
            try:
                self.blob_client.chunk_copy(src.url,
                                            spare_url,
                                            chunk_id=chunk_id,
                                            fullpath=self.full_path,
                                            cid=self.container_id,
                                            path=self.path,
                                            version=self.version,
                                            content_id=self.content_id)
                self.logger.debug('Chunk copied from %s to %s, registering it',
                                  src.url, spare_url)
                uploaded = True
                break
            except Exception as err:
                self.logger.warn("Failed to copy chunk from %s to %s: %s %s",
                                 src.url, spare_url, type(err),
                                 str(err.message))
        if not uploaded:
            raise UnrecoverableContent("No copy available of missing chunk")

        # Register the spare chunk in object's metadata
        if chunk_id is None:
            self._add_raw_chunk(current_chunk, spare_url)
        else:
            self._update_spare_chunk(current_chunk, spare_url)
        self.logger.debug('Chunk %s repaired in %s', chunk_id or chunk_pos,
                          spare_url)
示例#3
0
    def rebuild_chunk(self, chunk_id, allow_same_rawx=False, chunk_pos=None):
        current_chunk = self.chunks.filter(id=chunk_id).one()
        if current_chunk is None and chunk_pos is None:
            raise exc.OrphanChunk("Chunk not found in content")
        elif chunk_pos is None:
            chunk_pos = current_chunk.pos

        duplicate_chunks = self.chunks.filter(
            pos=chunk_pos).exclude(id=chunk_id).all()
        if len(duplicate_chunks) == 0:
            raise UnrecoverableContent("No copy of missing chunk")

        if current_chunk is None:
            chunk = {}
            chunk['hash'] = duplicate_chunks[0].checksum
            chunk['size'] = duplicate_chunks[0].size
            chunk['url'] = ''
            chunk['pos'] = chunk_pos
            current_chunk = Chunk(chunk)

        broken_list = list()
        if not allow_same_rawx and chunk_id is not None:
            broken_list.append(current_chunk)
        spare_urls = self._get_spare_chunk(
            duplicate_chunks, broken_list)

        uploaded = False
        for src in duplicate_chunks:
            try:
                self.blob_client.chunk_copy(src.url, spare_urls[0])
                self.logger.debug('Chunk copied from %s to %s, registering it',
                                  src.url, spare_urls[0])
                uploaded = True
                break
            except Exception as err:
                self.logger.warn(
                    "Failed to copy chunk from %s to %s: %s", src.url,
                    spare_urls[0], str(err.message))
        if not uploaded:
            raise UnrecoverableContent("No copy available of missing chunk")

        if chunk_id is None:
            self._add_raw_chunk(current_chunk, spare_urls[0])
        else:
            self._update_spare_chunk(current_chunk, spare_urls[0])
        self.logger.info('Chunk %s repaired in %s',
                         chunk_id or chunk_pos, spare_urls[0])
示例#4
0
 def _fetch_stream(self, chunks, storage_method, headers):
     meta_ranges = get_meta_ranges([(None, None)], chunks)
     for pos, meta_range in meta_ranges.iteritems():
         meta_start, meta_end = meta_range
         reader = io.ChunkReader(iter(chunks[pos]), io.READ_CHUNK_SIZE,
                                 headers)
         it = reader.get_iter()
         if not it:
             raise UnrecoverableContent("Error while downloading")
         for part in it:
             for d in part['iter']:
                 yield d
示例#5
0
 def _fetch_stream(self, chunks, storage_method, headers):
     meta_range_list = get_meta_ranges([(None, None)], chunks)
     for meta_range_dict in meta_range_list:
         for pos, meta_range in meta_range_dict.iteritems():
             meta_start, meta_end = meta_range
             reader = io.ChunkReader(iter(chunks[pos]), io.READ_CHUNK_SIZE,
                                     headers)
             try:
                 it = reader.get_iter()
             except Exception as err:
                 raise UnrecoverableContent("Error while downloading: %s" %
                                            err)
             for part in it:
                 for d in part['iter']:
                     yield d
示例#6
0
    def _download_chunk(self, pos):
        stream = None
        for c in self.chunks.filter(pos=pos):
            try:
                meta, stream = self.blob_client.chunk_get(c.url)
                break
            except NotFound:
                self.logger.debug("Chunk %s not found" % c.url)
                continue

        if stream is None:
            raise UnrecoverableContent("No chunk found at pos %d" % pos)

        for data in stream:
            yield data
示例#7
0
    def move_chunk(self,
                   chunk_id,
                   service_id=None,
                   check_quality=False,
                   dry_run=False,
                   max_attempts=3,
                   **kwargs):
        """
        Move a chunk to another place. Optionally ensure that the
        new place is an improvement over the current one.
        """
        if isinstance(chunk_id, Chunk):
            current_chunk = chunk_id
            chunk_id = current_chunk.id
            service_id = current_chunk.host
        else:
            candidates = self.chunks.filter(id=chunk_id)
            if len(candidates) > 1:
                if service_id is None:
                    raise exc.ChunkException(
                        "Several chunks with ID %s and no service ID" %
                        (chunk_id, ))
                candidates = candidates.filter(host=service_id)
            current_chunk = candidates.one()

        if current_chunk is None or current_chunk not in self.chunks:
            raise exc.OrphanChunk("Chunk not found in content")

        if service_id:
            other_chunks = self.chunks.filter(
                metapos=current_chunk.metapos).exclude(host=service_id).all()
        else:
            other_chunks = self.chunks.filter(
                metapos=current_chunk.metapos).exclude(id=chunk_id).all()

        spare_urls, qualities = self._get_spare_chunk(
            other_chunks, [current_chunk],
            position=current_chunk.pos,
            check_quality=check_quality,
            max_attempts=max_attempts,
            **kwargs)

        # Sort chunks by score to try to copy with higher score.
        # When scores are close together (e.g. [95, 94, 94, 93, 50]),
        # don't always start with the highest element.
        duplicate_chunks = self.chunks \
            .filter(pos=current_chunk.pos) \
            .sort(key=lambda chunk: _get_weighted_random_score(chunk.raw()),
                  reverse=True) \
            .all()
        if dry_run:
            self.logger.info('Dry-run: would copy chunk from %s to %s',
                             duplicate_chunks[0].url, spare_urls[0])
        else:
            # To reduce the load on the rawx to decommission,
            # use one of the rawx with a copy of the chunk to move.
            for src in duplicate_chunks:
                try:
                    self.logger.info('Copying chunk from %s to %s', src.url,
                                     spare_urls[0])
                    # TODO(FVE): retry to copy (max_attempts times)
                    self.blob_client.chunk_copy(src.url,
                                                spare_urls[0],
                                                chunk_id=chunk_id,
                                                fullpath=self.full_path,
                                                cid=self.container_id,
                                                path=self.path,
                                                version=self.version,
                                                content_id=self.content_id,
                                                **kwargs)
                    break
                except Exception as err:
                    self.logger.warn('Failed to copy chunk from %s to %s: %s',
                                     src.url, spare_urls[0], err)
                    if len(duplicate_chunks) == 1:
                        raise
            else:
                raise UnrecoverableContent(
                    'No copy available of chunk to move')

            self._update_spare_chunk(current_chunk, spare_urls[0])

            try:
                self.blob_client.chunk_delete(current_chunk.url, **kwargs)
            except Exception as err:
                self.logger.warn("Failed to delete chunk %s: %s",
                                 current_chunk.url, err)

        current_chunk.url = spare_urls[0]
        current_chunk.quality = qualities[current_chunk.url]

        return current_chunk.raw()
示例#8
0
    def rebuild_metachunk(self, metapos, force_broken_chunk=None,
                          on_the_fly=False):
        def _encode_sparerawxlist(broken_chunks, spare_urls):
            res = []
            for i, bc in enumerate(broken_chunks):
                if bc.is_parity:
                    broken_idx = self.k + int(bc.paritypos)
                else:
                    broken_idx = int(bc.subpos)
                spare_url = spare_urls[i].split('/', 2)[2]  # remove http//
                res.append("%s|%d|%s" % (spare_url, broken_idx, bc.hash))
            return ';'.join(res)

        current_chunks = self.chunks.filter(metapos=metapos)
        broken_chunks = []
        notin_chunks = []
        for c in current_chunks:
            if force_broken_chunk is not None \
                    and force_broken_chunk.id == c.id:
                broken_chunks.append(c)
                continue
            try:
                self.blob_client.chunk_head(c.url)
            except Exception as e:
                self.logger.debug("Failed to download chunk %s: %s"
                                  % (c.url, e.message))
                broken_chunks.append(c)
                continue
            notin_chunks.append(c)

        if len(broken_chunks) > self.m:
            raise UnrecoverableContent(
                "Not enough chunks to rebuild the metachunk")

        spare_urls = self._meta2_get_spare_chunk(notin_chunks, broken_chunks)

        headers = {}
        headers["X-oio-chunk-meta-content-storage-policy"] = self.stgpol_name
        headers["X-oio-chunk-meta-rawxlist"] = \
            self._encode_rawxlist(current_chunks)
        headers["X-oio-chunk-meta-sparerawxlist"] = \
            _encode_sparerawxlist(broken_chunks, spare_urls)
        headers[chunk_headers["content_id"]] = self.content_id
        headers[chunk_headers["content_version"]] = self.version
        headers[chunk_headers["content_path"]] = self.path
        headers[chunk_headers["content_size"]] = self.length
        headers[chunk_headers["content_chunksnb"]] = \
            self._get_metachunk_nb()
        headers[chunk_headers["content_cid"]] = self.container_id
        headers[chunk_headers["chunk_pos"]] = metapos
        headers["X-oio-chunk-meta-chunk-size"] = \
            self._get_metachunk_size(metapos)
        headers[chunk_headers["content_mimetype"]] = self.mime_type
        headers[chunk_headers["content_chunkmethod"]] = self.chunk_method

        resp = self.session.get(self._get_rain_addr(on_the_fly),
                                headers=headers, stream=True)
        resp.raise_for_status()
        if on_the_fly:
            return resp.iter_content(READ_CHUNK_SIZE)
        resp.close()

        for i, bc in enumerate(broken_chunks):
            # TODO send only one request with all chunks modifications
            self._meta2_update_spare_chunk(bc, spare_urls[i])
            bc.url = spare_urls[i]  # update current content
示例#9
0
    def rebuild_chunk(self,
                      chunk_id,
                      allow_same_rawx=False,
                      chunk_pos=None,
                      allow_frozen_container=False):
        # Identify the chunk to rebuild
        current_chunk = self.chunks.filter(id=chunk_id).one()
        if current_chunk is None and chunk_pos is None:
            raise exc.OrphanChunk("Chunk not found in content")
        elif chunk_pos is None:
            chunk_pos = current_chunk.pos

        # Sort chunks by score to try to copy with higher score.
        # When scores are close together (e.g. [95, 94, 94, 93, 50]),
        # don't always start with the highest element.
        duplicate_chunks = self.chunks \
            .filter(pos=chunk_pos) \
            .exclude(id=chunk_id) \
            .sort(key=lambda chunk: _get_weighted_random_score(chunk.raw()),
                  reverse=True) \
            .all()
        if len(duplicate_chunks) == 0:
            raise UnrecoverableContent("No copy of missing chunk")

        if current_chunk is None:
            chunk = {}
            chunk['hash'] = duplicate_chunks[0].checksum
            chunk['size'] = duplicate_chunks[0].size
            chunk['url'] = ''
            chunk['pos'] = chunk_pos
            current_chunk = Chunk(chunk)

        # Find a spare chunk address
        broken_list = list()
        if not allow_same_rawx and chunk_id is not None:
            broken_list.append(current_chunk)
        spare_urls, _quals = self._get_spare_chunk(duplicate_chunks,
                                                   broken_list)
        spare_url = spare_urls[0]

        # Actually create the spare chunk, by duplicating a good one
        for src in duplicate_chunks:
            try:
                self.blob_client.chunk_copy(src.url,
                                            spare_url,
                                            chunk_id=chunk_id,
                                            fullpath=self.full_path,
                                            cid=self.container_id,
                                            path=self.path,
                                            version=self.version,
                                            content_id=self.content_id)
                self.logger.debug('Chunk copied from %s to %s, registering it',
                                  src.url, spare_url)
                break
            except Exception as err:
                self.logger.warn("Failed to copy chunk from %s to %s: %s %s",
                                 src.url, spare_url, type(err), err)
        else:
            raise UnrecoverableContent("No copy available of missing chunk")

        # Register the spare chunk in object's metadata
        if chunk_id is None:
            self._add_raw_chunk(current_chunk,
                                spare_url,
                                frozen=allow_frozen_container)
        else:
            self._update_spare_chunk(current_chunk,
                                     spare_url,
                                     frozen=allow_frozen_container)
        self.logger.debug('Chunk %s repaired in %s', chunk_id or chunk_pos,
                          spare_url)

        return current_chunk.size