def test_comparison_no_ec(self): c1 = Chunk({ "url": "http://127.0.0.1:6011/BB", "pos": "0", "size": 1048576, "hash": "00000000000000000000000000000000" }) c2 = Chunk({ "url": "http://127.0.0.1:6011/AA", "pos": "1", "size": 1048576, "hash": "00000000000000000000000000000000" }) c3 = Chunk({ "url": "http://127.0.0.1:6011/BB", "pos": "1", "size": 1048576, "hash": "00000000000000000000000000000000" }) self.assertTrue(c1 < c2) self.assertFalse(c1 > c2) self.assertTrue(c1 == c1) self.assertTrue(c2 < c3) self.assertFalse(c2 > c3) self.assertFalse(c2 == c3)
def test_comparison_ec(self): c1 = Chunk({ "url": "http://127.0.0.1:6011/BB", "pos": "0.0", "size": 1048576, "hash": "00000000000000000000000000000000"}) c2 = Chunk({ "url": "http://127.0.0.1:6011/AA", "pos": "0.1", "size": 1048576, "hash": "00000000000000000000000000000000"}) c3 = Chunk({ "url": "http://127.0.0.1:6011/BB", "pos": "1.0", "size": 1048576, "hash": "00000000000000000000000000000000"}) c4 = Chunk({ "url": "http://127.0.0.1:6011/BB", "pos": "0.2", "size": 1048576, "hash": "00000000000000000000000000000000"}) c5 = Chunk({ "url": "http://127.0.0.1:6011/BB", "pos": "0.3", "size": 1048576, "hash": "00000000000000000000000000000000"}) self.assertTrue(c1 < c2) self.assertTrue(c2 < c3) self.assertTrue(c2 < c4) self.assertTrue(c4 < c3) self.assertTrue(c4 < c5)
def rebuild_chunk(self, chunk_id, allow_same_rawx=False, chunk_pos=None): current_chunk = self.chunks.filter(id=chunk_id).one() if current_chunk is None and chunk_pos is None: raise OrphanChunk("Chunk not found in content") elif current_chunk is None: chunk = {"pos": chunk_pos, "url": ""} current_chunk = Chunk(chunk) chunks = self.chunks.filter(metapos=current_chunk.metapos)\ .exclude(id=chunk_id) if chunk_id is None: current_chunk.size = chunks[0].size current_chunk.checksum = chunks[0].checksum broken_list = list() if not allow_same_rawx: broken_list.append(current_chunk) spare_url = self._get_spare_chunk(chunks.all(), broken_list) handler = ECRebuildHandler(chunks.raw(), current_chunk.subpos, self.storage_method) new_chunk = {'pos': current_chunk.pos, 'url': spare_url[0]} new_chunk = Chunk(new_chunk) stream = handler.rebuild() meta = {} meta['chunk_id'] = new_chunk.id meta['chunk_pos'] = current_chunk.pos meta['container_id'] = self.container_id # FIXME: should be 'content_chunkmethod' everywhere # but sadly it isn't meta['chunk_method'] = self.chunk_method # FIXME: should be 'content_id' everywhere # but sadly it isn't meta['id'] = self.content_id meta['content_path'] = self.path # FIXME: should be 'content_policy' everywhere # but sadly it isn't meta['policy'] = self.policy # FIXME: should be 'content_version' everywhere # but sadly it isn't meta['version'] = self.version meta['metachunk_hash'] = current_chunk.checksum meta['metachunk_size'] = current_chunk.size meta['full_path'] = self.full_path meta['oio_version'] = OIO_VERSION self.blob_client.chunk_put(spare_url[0], meta, GeneratorIO(stream)) if chunk_id is None: self._add_raw_chunk(current_chunk, spare_url[0]) else: self._update_spare_chunk(current_chunk, spare_url[0])
def test_chunk_set_field(self): c = Chunk({ "url": "http://127.0.0.1:6011/BB", "pos": "0.0", "size": 1048576, "hash": "00000000000000000000000000000000"}) c.url = "http://0.0.0.0:0000/AA" self.assertEqual(c.url, "http://0.0.0.0:0000/AA") c.checksum = "AzErTy" self.assertEqual(c.checksum, "AZERTY") c.size = 1234 self.assertEqual(c.size, 1234)
def test_chunk_dup(self): data = { "url": "http://127.0.0.1:6010/AABBCC", "pos": "0", "size": 10, "hash": "E952A419957A6E405BFC53EC65483F73" } c = Chunk(data) self.assertEqual(c.url, "http://127.0.0.1:6010/AABBCC") self.assertEqual(c.pos, "0") self.assertEqual(c.size, 10) self.assertEqual(c.checksum, "E952A419957A6E405BFC53EC65483F73") self.assertEqual(c.id, "AABBCC") self.assertEqual(c.host, "127.0.0.1:6010") self.assertFalse(c.ec) self.assertEqual(c.data, data) self.assertEqual(c.raw(), data)
def rebuild_chunk(self, chunk_id): current_chunk = self.chunks.filter(id=chunk_id).one() if current_chunk is None: raise OrphanChunk("Chunk not found in content") chunks = self.chunks.filter(metapos=current_chunk.metapos)\ .exclude(id=chunk_id) spare_url = self._get_spare_chunk(chunks.all(), [current_chunk]) handler = ECRebuildHandler( chunks.raw(), current_chunk.subpos, self.storage_method) new_chunk = {'pos': current_chunk.pos, 'url': spare_url[0]} new_chunk = Chunk(new_chunk) stream = handler.rebuild() meta = {} meta['chunk_id'] = new_chunk.id meta['chunk_pos'] = current_chunk.pos meta['container_id'] = self.container_id meta['content_chunkmethod'] = self.chunk_method meta['content_id'] = self.content_id meta['content_path'] = self.path meta['content_policy'] = self.stgpol meta['content_version'] = self.version meta['metachunk_hash'] = current_chunk.checksum meta['metachunk_size'] = current_chunk.size self.blob_client.chunk_put(spare_url[0], meta, stream) self._update_spare_chunk(current_chunk, spare_url[0])
def rebuild_chunk(self, chunk_id, allow_same_rawx=False, chunk_pos=None): # Identify the chunk to rebuild current_chunk = self.chunks.filter(id=chunk_id).one() if current_chunk is None and chunk_pos is None: raise exc.OrphanChunk("Chunk not found in content") elif chunk_pos is None: chunk_pos = current_chunk.pos duplicate_chunks = self.chunks.filter(pos=chunk_pos).exclude( id=chunk_id).all() if len(duplicate_chunks) == 0: raise UnrecoverableContent("No copy of missing chunk") if current_chunk is None: chunk = {} chunk['hash'] = duplicate_chunks[0].checksum chunk['size'] = duplicate_chunks[0].size chunk['url'] = '' chunk['pos'] = chunk_pos current_chunk = Chunk(chunk) # Find a spare chunk address broken_list = list() if not allow_same_rawx and chunk_id is not None: broken_list.append(current_chunk) spare_urls, _quals = self._get_spare_chunk(duplicate_chunks, broken_list) spare_url = spare_urls[0] # Actually create the spare chunk, by duplicating a good one uploaded = False for src in duplicate_chunks: try: self.blob_client.chunk_copy(src.url, spare_url, chunk_id=chunk_id, fullpath=self.full_path, cid=self.container_id, path=self.path, version=self.version, content_id=self.content_id) self.logger.debug('Chunk copied from %s to %s, registering it', src.url, spare_url) uploaded = True break except Exception as err: self.logger.warn("Failed to copy chunk from %s to %s: %s %s", src.url, spare_url, type(err), str(err.message)) if not uploaded: raise UnrecoverableContent("No copy available of missing chunk") # Register the spare chunk in object's metadata if chunk_id is None: self._add_raw_chunk(current_chunk, spare_url) else: self._update_spare_chunk(current_chunk, spare_url) self.logger.debug('Chunk %s repaired in %s', chunk_id or chunk_pos, spare_url)
def test_chunk_ec(self): data = { "url": "http://127.0.0.1:6016/AA", "pos": "0.1", "size": 1048576, "hash": "00000000000000000000000000000000"} c = Chunk(data) self.assertEqual(c.pos, "0.1") self.assertEqual(c.metapos, 0) self.assertEqual(c.subpos, 1) self.assertTrue(c.ec)
def test_ensure_better_quality(self): chunk0_data = { "url": "http://127.0.0.1:6010/AABBCC", "pos": "0", "size": 0, "hash": "00000000000000000000000000000000", 'quality': CRAPPY } chunk1_data = { "url": "http://127.0.0.2:6010/AABBDD", "pos": "0", "size": 0, "hash": "00000000000000000000000000000000", 'quality': SMALL_DIST } chunk2_data = { "url": "http://127.0.0.3:6010/AABBEE", "pos": "0", "size": 0, "hash": "00000000000000000000000000000000", 'quality': PERFECT } chunk0 = Chunk(chunk0_data) chunk1 = Chunk(chunk1_data) chunk2 = Chunk(chunk2_data) # OK, better quality ensure_better_chunk_qualities([chunk0], {chunk1.url: chunk1.quality}) ensure_better_chunk_qualities([chunk0], {chunk2.url: chunk2.quality}) ensure_better_chunk_qualities([chunk1], {chunk2.url: chunk2.quality}) # Not OK, improvement is 1, threshold is 2 self.assertRaises(exceptions.SpareChunkException, ensure_better_chunk_qualities, [chunk0], {chunk1.url: chunk1.quality}, threshold=2) self.assertRaises(exceptions.SpareChunkException, ensure_better_chunk_qualities, [chunk1], {chunk2.url: chunk2.quality}, threshold=2) # OK, far better quality ensure_better_chunk_qualities([chunk0], {chunk2.url: chunk2.quality}, threshold=2)
def test_chunk_rain(self): data = { "url": "http://127.0.0.1:6016/AA", "pos": "0.1", "size": 1048576, "hash": "00000000000000000000000000000000" } c = Chunk(data) self.assertEqual(c.is_parity, False) self.assertEqual(c.pos, "0.1") self.assertEqual(c.metapos, "0") self.assertEqual(c.subpos, "1") self.assertTrue(c.is_subchunk)
def test_chunk_rain_parity(self): data = { "url": "http://127.0.0.1:6011/BB", "pos": "0.p0", "size": 1048576, "hash": "00000000000000000000000000000000" } c = Chunk(data) self.assertEqual(c.is_parity, True) self.assertEqual(c.pos, "0.p0") self.assertEqual(c.metapos, "0") self.assertEqual(c.subpos, "p0") self.assertTrue(c.is_subchunk) self.assertEqual(c.paritypos, "0")
def test_ensure_better_quality_same(self): chunk_data = { "url": "http://127.0.0.1:6010/AABBCC", "pos": "0", "size": 0, "hash": "00000000000000000000000000000000", 'quality': CRAPPY } chunk = Chunk(chunk_data) self.assertRaises(exceptions.SpareChunkException, ensure_better_chunk_qualities, [chunk], {chunk.url: chunk.quality}) # threshold=0 -> accept no improvement ensure_better_chunk_qualities([chunk], {chunk.url: chunk.quality}, threshold=0)
def rebuild_chunk(self, chunk_id, allow_same_rawx=False, chunk_pos=None): current_chunk = self.chunks.filter(id=chunk_id).one() if current_chunk is None and chunk_pos is None: raise exc.OrphanChunk("Chunk not found in content") elif chunk_pos is None: chunk_pos = current_chunk.pos duplicate_chunks = self.chunks.filter( pos=chunk_pos).exclude(id=chunk_id).all() if len(duplicate_chunks) == 0: raise UnrecoverableContent("No copy of missing chunk") if current_chunk is None: chunk = {} chunk['hash'] = duplicate_chunks[0].checksum chunk['size'] = duplicate_chunks[0].size chunk['url'] = '' chunk['pos'] = chunk_pos current_chunk = Chunk(chunk) broken_list = list() if not allow_same_rawx and chunk_id is not None: broken_list.append(current_chunk) spare_urls = self._get_spare_chunk( duplicate_chunks, broken_list) uploaded = False for src in duplicate_chunks: try: self.blob_client.chunk_copy(src.url, spare_urls[0]) self.logger.debug('Chunk copied from %s to %s, registering it', src.url, spare_urls[0]) uploaded = True break except Exception as err: self.logger.warn( "Failed to copy chunk from %s to %s: %s", src.url, spare_urls[0], str(err.message)) if not uploaded: raise UnrecoverableContent("No copy available of missing chunk") if chunk_id is None: self._add_raw_chunk(current_chunk, spare_urls[0]) else: self._update_spare_chunk(current_chunk, spare_urls[0]) self.logger.info('Chunk %s repaired in %s', chunk_id or chunk_pos, spare_urls[0])
def rebuild_chunk(self, chunk_id, allow_same_rawx=False, chunk_pos=None, allow_frozen_container=False): # Identify the chunk to rebuild current_chunk = self.chunks.filter(id=chunk_id).one() if current_chunk is None and chunk_pos is None: raise exc.OrphanChunk("Chunk not found in content") elif chunk_pos is None: chunk_pos = current_chunk.pos # Sort chunks by score to try to copy with higher score. # When scores are close together (e.g. [95, 94, 94, 93, 50]), # don't always start with the highest element. duplicate_chunks = self.chunks \ .filter(pos=chunk_pos) \ .exclude(id=chunk_id) \ .sort(key=lambda chunk: _get_weighted_random_score(chunk.raw()), reverse=True) \ .all() if len(duplicate_chunks) == 0: raise UnrecoverableContent("No copy of missing chunk") if current_chunk is None: chunk = {} chunk['hash'] = duplicate_chunks[0].checksum chunk['size'] = duplicate_chunks[0].size chunk['url'] = '' chunk['pos'] = chunk_pos current_chunk = Chunk(chunk) # Find a spare chunk address broken_list = list() if not allow_same_rawx and chunk_id is not None: broken_list.append(current_chunk) spare_urls, _quals = self._get_spare_chunk(duplicate_chunks, broken_list) spare_url = spare_urls[0] # Actually create the spare chunk, by duplicating a good one for src in duplicate_chunks: try: self.blob_client.chunk_copy(src.url, spare_url, chunk_id=chunk_id, fullpath=self.full_path, cid=self.container_id, path=self.path, version=self.version, content_id=self.content_id) self.logger.debug('Chunk copied from %s to %s, registering it', src.url, spare_url) break except Exception as err: self.logger.warn("Failed to copy chunk from %s to %s: %s %s", src.url, spare_url, type(err), err) else: raise UnrecoverableContent("No copy available of missing chunk") # Register the spare chunk in object's metadata if chunk_id is None: self._add_raw_chunk(current_chunk, spare_url, frozen=allow_frozen_container) else: self._update_spare_chunk(current_chunk, spare_url, frozen=allow_frozen_container) self.logger.debug('Chunk %s repaired in %s', chunk_id or chunk_pos, spare_url) return current_chunk.size
def rebuild_chunk(self, chunk_id, allow_same_rawx=False, chunk_pos=None, allow_frozen_container=False): # Identify the chunk to rebuild current_chunk = self.chunks.filter(id=chunk_id).one() if current_chunk is None and chunk_pos is None: raise OrphanChunk("Chunk not found in content") elif current_chunk is None: current_chunk = self.chunks.filter(pos=chunk_pos).one() if current_chunk is None: chunk = {'pos': chunk_pos, 'url': ''} current_chunk = Chunk(chunk) else: chunk_id = current_chunk.id self.logger.debug('Chunk at pos %s has id %s', chunk_pos, chunk_id) chunks = self.chunks.filter(metapos=current_chunk.metapos)\ .exclude(id=chunk_id, pos=chunk_pos) if chunk_id is None: current_chunk.size = chunks[0].size current_chunk.checksum = chunks[0].checksum # Find a spare chunk address broken_list = list() if not allow_same_rawx and chunk_id is not None: broken_list.append(current_chunk) spare_url, _quals = self._get_spare_chunk(chunks.all(), broken_list) new_chunk = Chunk({'pos': current_chunk.pos, 'url': spare_url[0]}) # Regenerate the lost chunk's data, from existing chunks handler = ECRebuildHandler(chunks.raw(), current_chunk.subpos, self.storage_method) stream = handler.rebuild() # Actually create the spare chunk meta = {} meta['chunk_id'] = new_chunk.id meta['chunk_pos'] = current_chunk.pos meta['container_id'] = self.container_id # FIXME: should be 'content_chunkmethod' everywhere # but sadly it isn't meta['chunk_method'] = self.chunk_method # FIXME: should be 'content_id' everywhere # but sadly it isn't meta['id'] = self.content_id meta['content_path'] = self.path # FIXME: should be 'content_policy' everywhere # but sadly it isn't meta['policy'] = self.policy # FIXME: should be 'content_version' everywhere # but sadly it isn't meta['version'] = self.version meta['metachunk_hash'] = current_chunk.checksum meta['metachunk_size'] = current_chunk.size meta['full_path'] = self.full_path meta['oio_version'] = OIO_VERSION self.blob_client.chunk_put(spare_url[0], meta, GeneratorIO(stream)) # Register the spare chunk in object's metadata if chunk_id is None: self._add_raw_chunk(current_chunk, spare_url[0], frozen=allow_frozen_container) else: self._update_spare_chunk(current_chunk, spare_url[0], frozen=allow_frozen_container) self.logger.debug('Chunk %s repaired in %s', chunk_id or chunk_pos, spare_url[0])
def rebuild_chunk(self, chunk_id, service_id=None, allow_same_rawx=False, chunk_pos=None, allow_frozen_container=False): # Identify the chunk to rebuild candidates = self.chunks.filter(id=chunk_id) if service_id is not None: candidates = candidates.filter(host=service_id) current_chunk = candidates.one() if current_chunk is None and chunk_pos is None: raise OrphanChunk("Chunk not found in content") if current_chunk is None: current_chunk = self.chunks.filter(pos=chunk_pos).one() if current_chunk is None: chunk = {'pos': chunk_pos, 'url': ''} current_chunk = Chunk(chunk) else: chunk_id = current_chunk.id self.logger.debug('Chunk at pos %s has id %s', chunk_pos, chunk_id) # Sort chunks by score to try to rebuild with higher score. # When scores are close together (e.g. [95, 94, 94, 93, 50]), # don't always start with the highest element. chunks = self.chunks \ .filter(metapos=current_chunk.metapos) \ .exclude(id=chunk_id, pos=chunk_pos) \ .sort(key=lambda chunk: _get_weighted_random_score(chunk.raw()), reverse=True) if chunk_id is None: current_chunk.size = chunks[0].size current_chunk.checksum = chunks[0].checksum # Find a spare chunk address broken_list = list() if not allow_same_rawx and chunk_id is not None: broken_list.append(current_chunk) spare_url, _quals = self._get_spare_chunk(chunks.all(), broken_list, position=current_chunk.pos) new_chunk = Chunk({'pos': current_chunk.pos, 'url': spare_url[0]}) # Regenerate the lost chunk's data, from existing chunks handler = ECRebuildHandler(chunks.raw(), current_chunk.subpos, self.storage_method) expected_chunk_size, stream = handler.rebuild() # Actually create the spare chunk meta = {} meta['chunk_id'] = new_chunk.id meta['chunk_pos'] = current_chunk.pos meta['container_id'] = self.container_id # FIXME: should be 'content_chunkmethod' everywhere # but sadly it isn't meta['chunk_method'] = self.chunk_method # FIXME: should be 'content_id' everywhere # but sadly it isn't meta['id'] = self.content_id meta['content_path'] = self.path # FIXME: should be 'content_policy' everywhere # but sadly it isn't meta['policy'] = self.policy # FIXME: should be 'content_version' everywhere # but sadly it isn't meta['version'] = self.version meta['metachunk_hash'] = current_chunk.checksum meta['metachunk_size'] = current_chunk.size meta['full_path'] = self.full_path meta['oio_version'] = OIO_VERSION bytes_transferred, _ = self.blob_client.chunk_put( spare_url[0], meta, GeneratorIO(stream, sub_generator=PY2)) if expected_chunk_size is not None \ and bytes_transferred != expected_chunk_size: try: self.blob_client.chunk_delete(spare_url[0]) except Exception as exc: self.logger.warning( 'Failed to rollback the rebuild of the chunk: %s', exc) raise ChunkException('The rebuilt chunk is not the correct size') # Register the spare chunk in object's metadata if chunk_id is None: self._add_raw_chunk(current_chunk, spare_url[0], frozen=allow_frozen_container) else: self._update_spare_chunk(current_chunk, spare_url[0], frozen=allow_frozen_container) self.logger.debug('Chunk %s repaired in %s', chunk_id or chunk_pos, spare_url[0]) return bytes_transferred