def setUp(self): super(TestChunksHelper, self).setUp() self.dup_c1_1 = { "url": "http://127.0.0.1:6011/C1C1", "pos": "0", "size": 1048576, "hash": "2E47D13C3E2C47E0C537028AD637CCBF"} self.dup_c1_2 = { "url": "http://127.0.0.1:6010/C1C2", "pos": "0", "size": 1048576, "hash": "2E47D13C3E2C47E0C537028AD637CCBF"} self.dup_c2_1 = { "url": "http://127.0.0.1:6012/C2C1", "pos": "1", "size": 1048576, "hash": "045B70673D8271767D4D21BCDB040F6C"} self.dup_c2_2 = { "url": "http://127.0.0.1:6011/C2C2", "pos": "1", "size": 1048576, "hash": "045B70673D8271767D4D21BCDB040F6C" } self.dup_chunks_raw = [self.dup_c1_1, self.dup_c1_2, self.dup_c2_1, self.dup_c2_2] self.dup_chunks = ChunksHelper(self.dup_chunks_raw) self.ec_c0_0 = { "url": "http://127.0.0.1:6017/C0_0", "pos": "0.0", "size": 1048576, "hash": "00000000000000000000000000000000"} self.ec_c0_1 = { "url": "http://127.0.0.1:6016/C0_1", "pos": "0.1", "size": 1048576, "hash": "00000000000000000000000000000000"} self.ec_c0_2 = { "url": "http://127.0.0.1:6011/C0_P", "pos": "0.2", "size": 1048576, "hash": "00000000000000000000000000000000"} self.ec_c1_0 = { "url": "http://127.0.0.1:6017/C1_0", "pos": "1.0", "size": 1048576, "hash": "00000000000000000000000000000000"} self.ec_c1_1 = { "url": "http://127.0.0.1:6016/C1_1", "pos": "1.1", "size": 1048576, "hash": "00000000000000000000000000000000"} self.ec_c1_2 = { "url": "http://127.0.0.1:6011/C1_P", "pos": "1.2", "size": 1048576, "hash": "00000000000000000000000000000000"} self.ec_chunks_raw = [self.ec_c0_0, self.ec_c0_1, self.ec_c0_2, self.ec_c1_0, self.ec_c1_1, self.ec_c1_2] self.ec_chunks = ChunksHelper(self.ec_chunks_raw)
def _test_upload(self, data_size): data = random_data(data_size) content = self.content_factory.new(self.container_id, "titi", len(data), "RAIN") k = 6 m = 2 self.assertEqual(type(content), RainContent) content.upload(StringIO.StringIO(data)) meta, chunks = self.container_client.content_show( cid=self.container_id, content=content.content_id) chunks = ChunksHelper(chunks) self.assertEqual(meta['hash'], md5_data(data)) self.assertEqual(meta['length'], str(len(data))) self.assertEqual(meta['policy'], "RAIN") self.assertEqual(meta['name'], "titi") metachunk_nb = int(math.ceil(float(len(data)) / self.chunk_size)) if metachunk_nb == 0: metachunk_nb = 1 # special case for empty content nb_chunks_min = metachunk_nb * (1 + m) nb_chunks_max = metachunk_nb * (k + m) self.assertGreaterEqual(len(chunks), nb_chunks_min) self.assertLessEqual(len(chunks), nb_chunks_max) for metapos in range(metachunk_nb): chunks_at_pos = content.chunks.filter(metapos=metapos) data_chunks_at_pos = chunks_at_pos.filter(is_parity=False) parity_chunks_at_pos = chunks_at_pos.filter(is_parity=True) self.assertEquals(len(data_chunks_at_pos) >= 1, True) self.assertEquals(len(data_chunks_at_pos) <= k, True) self.assertEqual(len(parity_chunks_at_pos), m) for chunk in chunks_at_pos: meta, stream = self.blob_client.chunk_get(chunk.url) self.assertEqual(md5_stream(stream), chunk.hash) self.assertEqual(meta['content_size'], str(len(data))) self.assertEqual(meta['content_path'], "titi") self.assertEqual(meta['content_cid'], self.container_id) self.assertEqual(meta['content_id'], meta['content_id']) self.assertEqual(meta['chunk_id'], chunk.id) self.assertEqual(meta['chunk_pos'], chunk.pos) self.assertEqual(meta['chunk_hash'], chunk.hash) data_begin = metapos * self.chunk_size data_end = metapos * self.chunk_size + self.chunk_size target_metachunk_hash = md5_data(data[data_begin:data_end]) metachunk_hash = hashlib.md5() for chunk in data_chunks_at_pos: meta, stream = self.blob_client.chunk_get(chunk.url) for d in stream: metachunk_hash.update(d) self.assertEqual(metachunk_hash.hexdigest().upper(), target_metachunk_hash)
def test_sort_ec(self): ec_chunks = ChunksHelper([ self.ec_c1_2, self.ec_c1_1, self.ec_c1_0, self.ec_c0_2, self.ec_c0_1, self.ec_c0_0 ]) self.assertEqual(ec_chunks.raw(), [ self.ec_c0_0, self.ec_c0_1, self.ec_c0_2, self.ec_c1_0, self.ec_c1_1, self.ec_c1_2 ])
def test_sort_dup(self): chunks = ChunksHelper([ self.dup_c2_2, self.dup_c2_1, self.dup_c1_2, self.dup_c1_1 ]) self.assertEqual(chunks.raw(), [ self.dup_c1_1, self.dup_c1_2, self.dup_c2_1, self.dup_c2_2 ])
def test_sort_rain(self): rain_chunks = ChunksHelper([ self.rain_c1_p, self.rain_c1_1, self.rain_c1_0, self.rain_c0_p, self.rain_c0_1, self.rain_c0_0 ]) self.assertEqual(rain_chunks.raw(), [ self.rain_c0_0, self.rain_c0_1, self.rain_c0_p, self.rain_c1_0, self.rain_c1_1, self.rain_c1_p ])
def _test_create(self, stgpol, data_size): data = random_data(data_size) content = self.content_factory.new(self.container_id, self.content, len(data), stgpol) content.create(BytesIO(data)) meta, chunks = self.container_client.content_locate( cid=self.container_id, content=content.content_id) self.assertEqual(meta['hash'], md5_data(data)) self.assertEqual(meta['length'], str(len(data))) self.assertEqual(meta['policy'], stgpol) self.assertEqual(meta['name'], self.content) metachunk_nb = int(math.ceil(float(len(data)) / self.chunk_size)) if metachunk_nb == 0: metachunk_nb = 1 # special case for empty content chunks = ChunksHelper(chunks) # TODO NO NO NO if stgpol == self.stgpol_threecopies: nb_copy = 3 elif stgpol == self.stgpol_twocopies: nb_copy = 2 elif stgpol == self.stgpol: nb_copy = 1 self.assertEqual(len(chunks), metachunk_nb * nb_copy) for pos in range(metachunk_nb): chunks_at_pos = chunks.filter(pos=pos) self.assertEqual(len(chunks_at_pos), nb_copy) data_begin = pos * self.chunk_size data_end = pos * self.chunk_size + self.chunk_size chunk_hash = md5_data(data[data_begin:data_end]) for chunk in chunks_at_pos: meta, stream = self.blob_client.chunk_get(chunk.url) self.assertEqual(md5_stream(stream), chunk_hash) self.assertEqual(meta['content_path'], self.content) self.assertEqual(meta['container_id'], self.container_id) self.assertEqual(meta['content_id'], meta['content_id']) self.assertEqual(meta['chunk_id'], chunk.id) self.assertEqual(meta['chunk_pos'], str(pos)) # Check that chunk data matches chunk hash from xattr self.assertEqual(meta['chunk_hash'], chunk_hash) # Check that chunk data matches chunk hash from database self.assertEqual(chunk.checksum, chunk_hash) full_path = encode_fullpath(self.account, self.container_name, self.content, meta['content_version'], meta['content_id']) self.assertEqual(meta['full_path'], full_path) self.assertEqual(meta['oio_version'], '4.2')
def _test_create(self, data_size): # generate random test data data = random_data(data_size) # using factory create new EC content content = self.content_factory.new(self.container_id, self.content, len(data), self.stgpol) # verify the factory gave us an ECContent self.assertEqual(type(content), ECContent) # perform the content creation content.create(BytesIO(data)) meta, chunks = self.container_client.content_locate( cid=self.container_id, content=content.content_id) # verify metadata chunks = ChunksHelper(chunks) self.assertEqual(meta['hash'], md5_data(data)) self.assertEqual(meta['length'], str(len(data))) self.assertEqual(meta['policy'], self.stgpol) self.assertEqual(meta['name'], self.content) metachunk_nb = int(math.ceil(float(len(data)) / self.chunk_size)) \ if len(data) != 0 else 1 offset = 0 # verify each metachunk for metapos in range(metachunk_nb): chunks_at_pos = content.chunks.filter(metapos=metapos) if len(chunks_at_pos) < 1: break metachunk_size = chunks_at_pos[0].size metachunk_hash = md5_data(data[offset:offset + metachunk_size]) for chunk in chunks_at_pos: meta, stream = self.blob_client.chunk_get(chunk.url) self.assertEqual(meta['metachunk_size'], str(chunk.size)) self.assertEqual(meta['metachunk_hash'], chunk.checksum) self.assertEqual(meta['content_path'], self.content) self.assertEqual(meta['container_id'], self.container_id) self.assertEqual(meta['content_id'], meta['content_id']) self.assertEqual(meta['chunk_id'], chunk.id) self.assertEqual(meta['chunk_pos'], chunk.pos) self.assertEqual(meta['chunk_hash'], md5_stream(stream)) full_path = encode_fullpath(self.account, self.container_name, self.content, meta['content_version'], meta['content_id']) self.assertEqual(meta['full_path'], full_path) self.assertEqual(meta['oio_version'], '4.2') self.assertEqual(metachunk_hash, chunk.checksum) offset += metachunk_size
def _test_upload(self, stgpol, data_size): data = random_data(data_size) content = self.content_factory.new(self.container_id, "titi", len(data), stgpol) self.assertEqual(type(content), DupContent) content.upload(StringIO.StringIO(data)) meta, chunks = self.container_client.content_show( cid=self.container_id, content=content.content_id) chunks = ChunksHelper(chunks) self.assertEqual(meta['hash'], md5_data(data)) self.assertEqual(meta['length'], str(len(data))) self.assertEqual(meta['policy'], stgpol) self.assertEqual(meta['name'], "titi") metachunk_nb = int(math.ceil(float(len(data)) / self.chunk_size)) if metachunk_nb == 0: metachunk_nb = 1 # special case for empty content if stgpol == "THREECOPIES": nb_copy = 3 elif stgpol == "TWOCOPIES": nb_copy = 2 elif stgpol == "SINGLE": nb_copy = 1 self.assertEqual(len(chunks), metachunk_nb * nb_copy) for pos in range(metachunk_nb): chunks_at_pos = chunks.filter(pos=pos) self.assertEqual(len(chunks_at_pos), nb_copy) data_begin = pos * self.chunk_size data_end = pos * self.chunk_size + self.chunk_size chunk_hash = md5_data(data[data_begin:data_end]) for chunk in chunks_at_pos: meta, stream = self.blob_client.chunk_get(chunk.url) self.assertEqual(md5_stream(stream), chunk_hash) self.assertEqual(meta['content_size'], str(len(data))) self.assertEqual(meta['content_path'], "titi") self.assertEqual(meta['content_cid'], self.container_id) self.assertEqual(meta['content_id'], meta['content_id']) self.assertEqual(meta['chunk_id'], chunk.id) self.assertEqual(meta['chunk_pos'], str(pos)) self.assertEqual(meta['chunk_hash'], chunk_hash)
def _test_create(self, stgpol, data_size): data = random_data(data_size) content = self.content_factory.new(self.container_id, self.content, len(data), stgpol) content.create(StringIO(data)) meta, chunks = self.container_client.content_show( cid=self.container_id, content=content.content_id) self.assertEqual(meta['hash'], md5_data(data)) self.assertEqual(meta['length'], str(len(data))) self.assertEqual(meta['policy'], stgpol) self.assertEqual(meta['name'], self.content) metachunk_nb = int(math.ceil(float(len(data)) / self.chunk_size)) if metachunk_nb == 0: metachunk_nb = 1 # special case for empty content chunks = ChunksHelper(chunks) # TODO NO NO NO if stgpol == self.stgpol_threecopies: nb_copy = 3 elif stgpol == self.stgpol_twocopies: nb_copy = 2 elif stgpol == self.stgpol: nb_copy = 1 self.assertEqual(len(chunks), metachunk_nb * nb_copy) for pos in range(metachunk_nb): chunks_at_pos = chunks.filter(pos=pos) self.assertEqual(len(chunks_at_pos), nb_copy) data_begin = pos * self.chunk_size data_end = pos * self.chunk_size + self.chunk_size chunk_hash = md5_data(data[data_begin:data_end]) for chunk in chunks_at_pos: meta, stream = self.blob_client.chunk_get(chunk.url) self.assertEqual(md5_stream(stream), chunk_hash) self.assertEqual(meta['content_path'], self.content) self.assertEqual(meta['container_id'], self.container_id) self.assertEqual(meta['content_id'], meta['content_id']) self.assertEqual(meta['chunk_id'], chunk.id) self.assertEqual(meta['chunk_pos'], str(pos)) self.assertEqual(meta['chunk_hash'], chunk_hash)
def _test_create(self, data_size): # generate random test data data = random_data(data_size) # using factory create new EC content content = self.content_factory.new( self.container_id, self.content, len(data), self.stgpol) # verify the factory gave us an ECContent self.assertEqual(type(content), ECContent) # perform the content creation content.create(StringIO(data)) meta, chunks = self.container_client.content_show( cid=self.container_id, content=content.content_id) # verify metadata chunks = ChunksHelper(chunks) self.assertEqual(meta['hash'], md5_data(data)) self.assertEqual(meta['length'], str(len(data))) self.assertEqual(meta['policy'], self.stgpol) self.assertEqual(meta['name'], self.content) metachunk_nb = int(math.ceil(float(len(data)) / self.chunk_size)) \ if len(data) != 0 else 1 # verify each metachunk for metapos in range(metachunk_nb): chunks_at_pos = content.chunks.filter(metapos=metapos) for chunk in chunks_at_pos: meta, stream = self.blob_client.chunk_get(chunk.url) self.assertEqual(meta['metachunk_size'], str(chunk.size)) self.assertEqual(meta['metachunk_hash'], chunk.checksum) self.assertEqual(meta['content_path'], self.content) self.assertEqual(meta['container_id'], self.container_id) self.assertEqual(meta['content_id'], meta['content_id']) self.assertEqual(meta['chunk_id'], chunk.id) self.assertEqual(meta['chunk_pos'], chunk.pos) self.assertEqual(meta['chunk_hash'], md5_stream(stream))
def _upload(self, stream): global_checksum = hashlib.md5() total_bytes_transferred = 0 content_chunks = [] def _limit_stream(stream, size): read_size = 0 while read_size < size: to_read = size - read_size if to_read > WRITE_CHUNK_SIZE: to_read = WRITE_CHUNK_SIZE data = stream.read(to_read) global_checksum.update(data) read_size += to_read yield data def _decode_chunklist(chunklist): res = [] for c in chunklist.split(';'): pos, url, size, hash = c.split('|') res.append({ "url": "http://%s" % url, "pos": pos, "size": int(size), "hash": hash }) return res for pos in xrange(self._get_metachunk_nb()): chunks_at_pos = self.chunks.filter(metapos=pos) chunk_size = self.chunks[0].size remaining_bytes = self.length - total_bytes_transferred if chunk_size > remaining_bytes: chunk_size = remaining_bytes headers = {} headers["X-oio-chunk-meta-content-storage-policy"] = \ self.stgpol_name headers["X-oio-chunk-meta-rawxlist"] = \ self._encode_rawxlist(chunks_at_pos) headers[chunk_headers["content_id"]] = self.content_id headers[chunk_headers["content_version"]] = self.version headers[chunk_headers["content_path"]] = self.path headers[chunk_headers["content_size"]] = self.length headers[chunk_headers["content_chunksnb"]] = \ self._get_metachunk_nb() headers[chunk_headers["content_cid"]] = self.container_id headers[chunk_headers["chunk_pos"]] = pos headers["X-oio-chunk-meta-chunk-size"] = chunk_size headers[chunk_headers["content_mimetype"]] = self.mime_type headers[chunk_headers["content_chunkmethod"]] = self.chunk_method resp = self.session.put(self._get_rain_addr(), data=_limit_stream(stream, chunk_size), headers=headers) resp.raise_for_status() content_chunks.extend(_decode_chunklist(resp.headers['chunklist'])) total_bytes_transferred += chunk_size self.chunks = ChunksHelper(content_chunks) self.hash = global_checksum.hexdigest().upper() self._meta2_create_object()