def quorum_or_fail(self, successes, failures): """ Compare the number of uploads against the quorum. :param successes: a list of chunk objects whose upload succeded :type successes: `list` or `tuple` :param failures: a list of chunk objects whose upload failed :type failures: `list` or `tuple` :raises `exc.SourceReadError`: if there is an error while reading data from the client :raises `exc.SourceReadTimeout`: if there is a timeout while reading data from the client :raises `exc.OioTimeout`: if there is a timeout among the errors :raises `exc.OioException`: if quorum has not been reached for any other reason """ if len(successes) < self.quorum: errors = group_chunk_errors( ((chunk["url"], chunk.get("error", "success")) for chunk in successes + failures)) new_exc = exc.OioException( "RAWX write failure, quorum not reached (%d/%d): %s" % (len(successes), self.quorum, errors)) for err in [x.get('error') for x in failures]: if isinstance(err, exc.SourceReadError): raise exc.SourceReadError(new_exc) elif isinstance(err, green.SourceReadTimeout): # Never raise 'green' timeouts out of our API raise exc.SourceReadTimeout(new_exc) elif isinstance(err, (exc.OioTimeout, green.OioTimeout)): raise exc.OioTimeout(new_exc) raise new_exc
def _quorum_or_fail(self, successes, failures): quorum = self._check_quorum(successes) if not quorum: errors = utils.group_chunk_errors( ((chunk["url"], chunk.get("error", "success")) for chunk in successes + failures)) raise exc.OioException( "RAWX write failure, quorum not reached: %s" % errors)
def assign_services(self, service_type, max_per_rdir=None, **kwargs): all_services = self.cs.all_services(service_type, **kwargs) all_rdir = self.cs.all_services('rdir', True, **kwargs) if len(all_rdir) <= 0: raise ServiceUnavailable("No rdir service found in %s" % self.ns) by_id = {_make_id(self.ns, 'rdir', x['addr']): x for x in all_rdir} errors = list() for provider in all_services: provider_id = provider['tags'].get('tag.service_id', provider['addr']) try: resp = self.directory.list(RDIR_ACCT, provider_id, service_type='rdir', **kwargs) rdir_host = _filter_rdir_host(resp) try: provider['rdir'] = by_id[_make_id(self.ns, 'rdir', rdir_host)] except KeyError: self.logger.warn("rdir %s linked to %s %s seems down", rdir_host, service_type, provider_id) except NotFound: try: rdir = self._smart_link_rdir(provider_id, all_rdir, service_type=service_type, max_per_rdir=max_per_rdir, **kwargs) except OioException as exc: self.logger.warn("Failed to link an rdir to %s %s: %s", service_type, provider_id, exc) errors.append((provider_id, exc)) continue n_bases = by_id[rdir]['tags'].get("stat.opened_db_count", 0) by_id[rdir]['tags']["stat.opened_db_count"] = n_bases + 1 provider['rdir'] = by_id[rdir] except OioException as exc: self.logger.warn( "Failed to check rdir linked to %s %s " "(thus won't try to make the link): %s", service_type, provider_id, exc) errors.append((provider_id, exc)) if errors: # group_chunk_errors is flexible enough to accept service addresses errors = group_chunk_errors(errors) if len(errors) == 1: err, addrs = errors.popitem() oio_reraise(type(err), err, str(addrs)) else: raise OioException('Several errors encountered: %s' % errors) return all_services
def get_iter(self): source, chunk = self._get_source() if source: return self._get_iter(chunk, source) errors = group_chunk_errors(self._resp_by_chunk.items()) if len(errors) == 1: # All errors are of the same type, group them status, chunks = errors.popitem() raise exc.from_status(status[0], "%s %s" % (status[1], chunks)) raise exc.ServiceUnavailable("unavailable chunks: %s" % self._resp_by_chunk)
def quorum_or_fail(self, successes, failures): """ Compare the number of uploads against the quorum. :param successes: a list of chunk objects whose upload succeded :type successes: `list` or `tuple` :param failures: a list of chunk objects whose upload failed :type failures: `list` or `tuple` :raises `exc.OioException`: if quorum has not been reached """ if len(successes) < self.quorum: errors = group_chunk_errors( ((chunk["url"], chunk.get("error", "success")) for chunk in successes + failures)) raise exc.OioException( "RAWX write failure, quorum not reached (%d/%d): %s" % (len(successes), self.quorum, errors))
def rebuild_chunk(self, chunk_id, service_id=None, allow_same_rawx=False, chunk_pos=None, allow_frozen_container=False): # Identify the chunk to rebuild candidates = self.chunks.filter(id=chunk_id) if service_id is not None: candidates = candidates.filter(host=service_id) current_chunk = candidates.one() if current_chunk is None and chunk_pos is None: raise exc.OrphanChunk("Chunk not found in content") if chunk_pos is None: chunk_pos = current_chunk.pos # Sort chunks by score to try to copy with higher score. # When scores are close together (e.g. [95, 94, 94, 93, 50]), # don't always start with the highest element. candidates = self.chunks.filter(pos=chunk_pos) if service_id: candidates = candidates.exclude(host=service_id) else: candidates = candidates.exclude(id=chunk_id) duplicate_chunks = candidates \ .sort(key=lambda chunk: _get_weighted_random_score(chunk.raw()), reverse=True) \ .all() if len(duplicate_chunks) == 0: raise UnrecoverableContent("No copy of missing chunk") if current_chunk is None: chunk = {} chunk['hash'] = duplicate_chunks[0].checksum chunk['size'] = duplicate_chunks[0].size chunk['url'] = '' chunk['pos'] = chunk_pos current_chunk = Chunk(chunk) # Find a spare chunk address broken_list = list() if not allow_same_rawx and chunk_id is not None: broken_list.append(current_chunk) spare_urls, _quals = self._get_spare_chunk( duplicate_chunks, broken_list, position=current_chunk.pos) spare_url = spare_urls[0] # Actually create the spare chunk, by duplicating a good one errors = list() for src in duplicate_chunks: try: self.blob_client.chunk_copy( src.url, spare_url, chunk_id=chunk_id, fullpath=self.full_path, cid=self.container_id, path=self.path, version=self.version, content_id=self.content_id) self.logger.debug('Chunk copied from %s to %s, registering it', src.url, spare_url) break except Exception as err: self.logger.warn( "Failed to copy chunk from %s to %s: %s %s", src.url, spare_url, type(err), err) errors.append((src.url, err)) else: raise UnrecoverableContent("No copy available of missing chunk, " "or could not copy them. %s" % ( group_chunk_errors(errors),)) try: # Register the spare chunk in object's metadata if chunk_id is None: self._add_raw_chunk(current_chunk, spare_url, frozen=allow_frozen_container) else: self._update_spare_chunk(current_chunk, spare_url, frozen=allow_frozen_container) except Exception: self.blob_client.chunk_delete(spare_url) raise self.logger.debug('Chunk %s repaired in %s', chunk_id or chunk_pos, spare_url) return current_chunk.size
def assign_services(self, service_type, max_per_rdir=None, min_dist=None, service_id=None, reassign=False, **kwargs): """ Assign an rdir service to all `service_type` servers that aren't already assigned one. :param max_per_rdir: Maximum number of services an rdir can handle. :type max_per_rdir: `int` :param min_dist: Minimum required distance between any service and its assigned rdir service. :type min_dist: `int` :param service_id: Assign only this service ID. :type service_id: `str` :param reassign: Reassign an rdir service. :type reassign: `bool` :param dry_run: Display actions but do nothing. :type dry_run: `bool` :returns: The list of `service_type` services that were assigned rdir services. """ all_services = self.cs.all_services(service_type, **kwargs) if service_id: for provider in all_services: provider_id = provider['tags'].get('tag.service_id', provider['addr']) if service_id == provider_id: break else: raise ValueError('%s isn\'t a %s' % (service_id, service_type)) all_services = [provider] all_rdir = self.cs.all_services('rdir', True, **kwargs) if len(all_rdir) <= 0: raise ServiceUnavailable("No rdir service found in %s" % self.ns) by_id = _build_dict_by_id(self.ns, all_rdir) errors = list() for provider in all_services: provider_id = provider['tags'].get('tag.service_id', provider['addr']) try: resp = self.directory.list(RDIR_ACCT, provider_id, service_type='rdir', **kwargs) rdir_host = _filter_rdir_host(resp) try: rdir = by_id[_make_id(self.ns, 'rdir', rdir_host)] if reassign: rdir['tags']['stat.opened_db_count'] = \ rdir['tags'].get('stat.opened_db_count', 0) - 1 # TODO(adu) Delete database raise NotFound('Reassign an rdir services') provider['rdir'] = rdir except KeyError: self.logger.warn("rdir %s linked to %s %s seems down", rdir_host, service_type, provider_id) if reassign: raise NotFound('Reassign an rdir services') except NotFound: try: rdir = self._smart_link_rdir(provider_id, all_rdir, service_type=service_type, max_per_rdir=max_per_rdir, min_dist=min_dist, reassign=reassign, **kwargs) except OioException as exc: self.logger.warn("Failed to link an rdir to %s %s: %s", service_type, provider_id, exc) errors.append((provider_id, exc)) continue n_bases = by_id[rdir]['tags'].get("stat.opened_db_count", 0) by_id[rdir]['tags']["stat.opened_db_count"] = n_bases + 1 provider['rdir'] = by_id[rdir] except OioException as exc: self.logger.warn( "Failed to check rdir linked to %s %s " "(thus won't try to make the link): %s", service_type, provider_id, exc) errors.append((provider_id, exc)) if errors: # group_chunk_errors is flexible enough to accept service addresses errors = group_chunk_errors(errors) if len(errors) == 1: err, addrs = errors.popitem() oio_reraise(type(err), err, str(addrs)) else: raise OioException('Several errors encountered: %s' % errors) return all_services