def encode_fullpath(self, chunk_inode, chunk_id, account, container, path, version, content_id): # check if chunk exists and has the same inode if not is_hexa(chunk_id) or len(chunk_id) != STRLEN_CHUNKID: raise ValueError('chunk ID must be hexadecimal (%s)' % STRLEN_CHUNKID) try: chunk_inode2 = os.stat(self._get_path(chunk_id)).st_ino except OSError: raise OrphanChunk('No such chunk: possible orphan chunk') if chunk_inode2 != chunk_inode: raise OrphanChunk('Not the same inode: possible orphan chunk') # check fullpath and chunk ID if isinstance(version, basestring): try: version = int(version) except ValueError: raise ValueError('version must be a number') if version <= 0: raise ValueError('version must be positive') if not is_hexa(content_id): raise ValueError('content ID must be hexadecimal') fullpath = encode_fullpath(account, container, path, version, content_id.upper()) return chunk_id.upper(), fullpath
def test_copy_with_same_chunkid(self): metachunk_hash = md5().hexdigest() trailers = { 'x-oio-chunk-meta-metachunk-size': 1, 'x-oio-chunk-meta-metachunk-hash': metachunk_hash } chunkid1 = random_chunk_id() chunkdata1 = random_buffer(string.printable, 1) chunkurl1 = self._rawx_url(chunkid1) headers1 = self._chunk_attr(chunkid1, chunkdata1) self._check_not_present(chunkurl1) resp, _ = self._http_request(chunkurl1, 'PUT', chunkdata1, headers1, trailers) self.assertEqual(201, resp.status) self.assertEqual(headers1['x-oio-chunk-meta-chunk-hash'].upper(), resp.getheader('x-oio-chunk-meta-chunk-hash')) self.assertEqual(headers1['x-oio-chunk-meta-chunk-size'].upper(), resp.getheader('x-oio-chunk-meta-chunk-size')) headers = {} headers["Destination"] = chunkurl1 headers['x-oio-chunk-meta-full-path'] = encode_fullpath( "account-snapshot", "container-snapshot", "content-snapshot", 1456938361143741, random_id(32)) resp, _ = self._http_request(chunkurl1, 'COPY', '', headers) self.assertEqual(403, resp.status)
def test_copy_with_nonexistent_source(self): metachunk_hash = md5().hexdigest() trailers = {'x-oio-chunk-meta-metachunk-size': '1', 'x-oio-chunk-meta-metachunk-hash': metachunk_hash} chunkid1 = random_chunk_id() chunkurl1 = self._rawx_url(chunkid1) chunkid2 = random_chunk_id() chunkdata2 = random_buffer(string.printable, 1).encode('utf-8') chunkurl2 = self._rawx_url(chunkid2) headers2 = self._chunk_attr(chunkid2, chunkdata2) self._check_not_present(chunkurl2) resp, _ = self._http_request(chunkurl2, 'PUT', chunkdata2, headers2, trailers) self.assertEqual(201, resp.status) self.assertEqual(headers2['x-oio-chunk-meta-chunk-hash'].upper(), resp.getheader('x-oio-chunk-meta-chunk-hash')) self.assertEqual(headers2['x-oio-chunk-meta-chunk-size'], resp.getheader('x-oio-chunk-meta-chunk-size')) headers = {} headers["Destination"] = chunkurl2 headers['x-oio-chunk-meta-full-path'] = encode_fullpath( "account-snapshot", "container-snapshot", "content-snapshot", 1456938361143741, random_id(32)) resp, _ = self._http_request(chunkurl1, 'COPY', '', headers) self.assertEqual(404, resp.status)
def locate_objects(self, objects): reqid = self.app.request_id(self.reqid_prefix) for ct, obj, vers in objects: obj_item = '/'.join(quote(x) for x in ( self.app.options.account, ct, obj, str(vers))) try: obj_md, chunks = self.storage.object_locate( self.app.options.account, ct, obj, version=vers, chunk_info=True, reqid=reqid) obj_item = encode_fullpath(self.app.options.account, ct, obj, obj_md['version'], obj_md['id']) except exceptions.NoSuchContainer as err: self.logger.warn('Failed to locate object %s: %s', obj_item, err) # Already reported by upper level continue except exceptions.NoSuchObject as err: yield ('rawx', obj_item, None, None, None, str(err)) continue except Exception as err: self.logger.warn('Failed to locate object %s: %s', obj_item, err) continue finally: reqid = self.app.request_id(self.reqid_prefix) for chunk in self.format_chunks(chunks, obj_item): yield chunk
def _put_chunk(self): account = random_str(16) container = random_str(16) cid = cid_from_name(account, container) content_path = random_str(16) content_version = 1234567890 content_id = random_id(32) fullpath = encode_fullpath(account, container, content_path, content_version, content_id) chunk_id = random_chunk_id() data = random_buffer(string.printable, 100) meta = { 'full_path': fullpath, 'container_id': cid, 'content_path': content_path, 'version': content_version, 'id': content_id, 'chunk_method': 'ec/algo=liberasurecode_rs_vand,k=6,m=3', 'policy': 'TESTPOLICY', 'chunk_hash': md5(data).hexdigest().upper(), 'oio_version': OIO_VERSION, 'chunk_pos': 0, 'metachunk_hash': md5().hexdigest(), 'metachunk_size': 1024 } self.blob_client.chunk_put('http://' + self.rawx_id + '/' + chunk_id, meta, data) sleep(1) # ensure chunk event have been processed return account, container, cid, content_path, content_version, \ content_id, chunk_id
def test_strange_paths(self): answers = dict() for cname in strange_paths: content = self._new_content(self.stgpol, b"nobody cares", cname) answers[cname] = content _, listing = self.container_client.content_list( self.account, self.container_name) if PY2: obj_set = {k["name"].encode('utf-8') for k in listing["objects"]} else: obj_set = {k["name"] for k in listing["objects"]} try: # Ensure the saved path is the one we gave the object for cname in answers: self.assertEqual(cname, answers[cname].path) fullpath = encode_fullpath(self.account, self.container_name, cname, answers[cname].version, answers[cname].content_id) self.assertEqual(answers[cname].full_path, fullpath) # Ensure all objects appear in listing for cname in strange_paths: self.assertIn(cname, obj_set) finally: # Cleanup for cname in answers: try: content.delete() except Exception: pass
def __init__(self, conf, container_id, metadata, chunks, storage_method, account, container_name, blob_client=None, container_client=None, logger=None): self.conf = conf self.container_id = container_id self.metadata = metadata self.chunks = ChunksHelper(chunks) self.storage_method = storage_method self.logger = logger or get_logger(self.conf) self.blob_client = (blob_client or BlobClient(conf)) self.container_client = (container_client or ContainerClient(self.conf, logger=self.logger)) # FIXME: all these may be properties self.content_id = self.metadata["id"] self.path = self.metadata["name"] self.length = int(self.metadata["length"]) self.version = self.metadata["version"] self.checksum = self.metadata["hash"] self.chunk_method = self.metadata["chunk_method"] self.account = account self.container_name = container_name if 'full_path' in self.metadata: self.full_path = metadata['full_path'] else: self.full_path = encode_fullpath( self.account, self.container_name, self.path, self.version, self.content_id)
def test_rebuild_old_chunk(self): for c in self.chunks: convert_to_old_chunk( self._chunk_path(c), self.account, self.container, self.path, self.version, self.content_id) chunk = random.choice(self.chunks) chunk_volume = chunk['url'].split('/')[2] chunk_id = chunk['url'].split('/')[3] chunk_headers, chunk_stream = self.blob_client.chunk_get( chunk['url'], check_headers=False) os.remove(self._chunk_path(chunk)) chunks_kept = list(self.chunks) chunks_kept.remove(chunk) conf = self.conf.copy() conf['allow_same_rawx'] = True rebuilder = BlobRebuilder(conf, service_id=chunk_volume) rebuilder_worker = rebuilder.create_worker(None, None) rebuilder_worker._process_item( (self.ns, self.cid, self.content_id, chunk_id)) _, new_chunks = self.api.object_locate( self.account, self.container, self.path) new_chunk = list(new_chunks) self.assertEqual(len(new_chunks), len(chunks_kept) + 1) url_kept = [c['url'] for c in chunks_kept] new_chunk = None for c in new_chunks: if c['url'] not in url_kept: self.assertIsNone(new_chunk) new_chunk = c self.assertNotEqual(chunk['real_url'], new_chunk['real_url']) self.assertNotEqual(chunk['url'], new_chunk['url']) self.assertEqual(chunk['pos'], new_chunk['pos']) self.assertEqual(chunk['size'], new_chunk['size']) self.assertEqual(chunk['hash'], new_chunk['hash']) new_chunk_headers, new_chunk_stream = self.blob_client.chunk_get( new_chunk['url']) chunk_data = b''.join(chunk_stream) new_chunk_data = b''.join(new_chunk_stream) self.assertEqual(chunk_data, new_chunk_data) fullpath = encode_fullpath(self.account, self.container, self.path, self.version, self.content_id) self.assertEqual(fullpath, new_chunk_headers['full_path']) del new_chunk_headers['full_path'] self.assertNotEqual(chunk_headers['chunk_id'], new_chunk_headers['chunk_id']) new_chunk_id = new_chunk['url'].split('/')[3] self.assertEqual(new_chunk_id, new_chunk_headers['chunk_id']) del chunk_headers['chunk_id'] del new_chunk_headers['chunk_id'] self.assertEqual(OIO_VERSION, new_chunk_headers['oio_version']) del chunk_headers['oio_version'] del new_chunk_headers['oio_version'] self.assertEqual(chunk_headers, new_chunk_headers)
def _setup(self): self.container = 'blob' self.cid = cid_from_name(self.account, 'blob') self.content_path = 'test-plop' self.content_version = '1456938361143740' self.content_id = '0123456789ABCDEF' self.fullpath = encode_fullpath( self.account, 'blob', self.content_path, self.content_version, self.content_id)
def test_get_ec(self): meta = { "chunk_method": "ec/algo=liberasurecode_rs_vand,k=6,m=2", "ctime": "1450176946", "deleted": "False", "hash": "E952A419957A6E405BFC53EC65483F73", "hash_method": "md5", "id": "3FA2C4A1ED2605005335A276890EC458", "length": "658", "mime_type": "application/octet-stream", "name": "tox.ini", "policy": self.stgpol_ec, "version": "1450176946676289", "oio_version": "4.2", } chunks = [{ "url": "http://127.0.0.1:6012/A0A0", "pos": "0.0", "size": 512, "hash": "E7D4E4AD460971CA2E3141F2102308D4" }, { "url": "http://127.0.0.1:6010/A01", "pos": "0.1", "size": 146, "hash": "760AB5DA7C51A3654F1CA622687CD6C3" }, { "url": "http://127.0.0.1:6011/A00", "pos": "0.2", "size": 512, "hash": "B1D08B86B8CAA90A2092CCA0DF9201DB" }, { "url": "http://127.0.0.1:6013/A0A1", "pos": "0.3", "size": 512, "hash": "DA9D7F72AEEA5791565724424CE45C16" }] self.content_factory.container_client.content_locate = Mock( return_value=(meta, chunks)) c = self.content_factory.get("xxx_container_id", "xxx_content_id", account=self.account, container_name=self.container_name) self.assertEqual(type(c), ECContent) self.assertEqual(c.content_id, "3FA2C4A1ED2605005335A276890EC458") self.assertEqual(c.length, 658) self.assertEqual(c.path, "tox.ini") self.assertEqual( c.full_path, encode_fullpath(self.account, self.container_name, "tox.ini", meta['version'], meta['id'])) self.assertEqual(c.version, "1450176946676289") # TODO test storage method self.assertEqual(len(c.chunks), 4) self.assertEqual(c.chunks[0].raw(), chunks[0]) self.assertEqual(c.chunks[1].raw(), chunks[1]) self.assertEqual(c.chunks[2].raw(), chunks[2]) self.assertEqual(c.chunks[3].raw(), chunks[3])
def _test_create(self, stgpol, data_size): data = random_data(data_size) content = self.content_factory.new(self.container_id, self.content, len(data), stgpol) content.create(BytesIO(data)) meta, chunks = self.container_client.content_locate( cid=self.container_id, content=content.content_id) self.assertEqual(meta['hash'], md5_data(data)) self.assertEqual(meta['length'], str(len(data))) self.assertEqual(meta['policy'], stgpol) self.assertEqual(meta['name'], self.content) metachunk_nb = int(math.ceil(float(len(data)) / self.chunk_size)) if metachunk_nb == 0: metachunk_nb = 1 # special case for empty content chunks = ChunksHelper(chunks) # TODO NO NO NO if stgpol == self.stgpol_threecopies: nb_copy = 3 elif stgpol == self.stgpol_twocopies: nb_copy = 2 elif stgpol == self.stgpol: nb_copy = 1 self.assertEqual(len(chunks), metachunk_nb * nb_copy) for pos in range(metachunk_nb): chunks_at_pos = chunks.filter(pos=pos) self.assertEqual(len(chunks_at_pos), nb_copy) data_begin = pos * self.chunk_size data_end = pos * self.chunk_size + self.chunk_size chunk_hash = md5_data(data[data_begin:data_end]) for chunk in chunks_at_pos: meta, stream = self.blob_client.chunk_get(chunk.url) self.assertEqual(md5_stream(stream), chunk_hash) self.assertEqual(meta['content_path'], self.content) self.assertEqual(meta['container_id'], self.container_id) self.assertEqual(meta['content_id'], meta['content_id']) self.assertEqual(meta['chunk_id'], chunk.id) self.assertEqual(meta['chunk_pos'], str(pos)) # Check that chunk data matches chunk hash from xattr self.assertEqual(meta['chunk_hash'], chunk_hash) # Check that chunk data matches chunk hash from database self.assertEqual(chunk.checksum, chunk_hash) full_path = encode_fullpath(self.account, self.container_name, self.content, meta['content_version'], meta['content_id']) self.assertEqual(meta['full_path'], full_path) self.assertEqual(meta['oio_version'], '4.2')
def test_xattr_bad_xattr_content_container(self): self.init_content() xattr.setxattr( self.chunk.path, 'user.' + CHUNK_XATTR_CONTENT_FULLPATH_PREFIX + str(self.chunk.id), encode_fullpath(self.account, 'WRONG_REF', self.content.path, self.content.version, self.content.id)) self.assertRaises(exc.OrphanChunk, self.auditor.chunk_audit, self.chunk.path, self.chunk.id)
def test_xattr_bad_xattr_content_id(self): self.init_content() xattr.setxattr( self.chunk.path, 'user.' + CHUNK_XATTR_CONTENT_FULLPATH_PREFIX + str(self.chunk.id), encode_fullpath(self.account, self.ref, self.content.path, self.content.version, '0123456789ABCDEF')) self.assertRaises(exc.OrphanChunk, self.auditor.chunk_audit, self.chunk.path, self.chunk.id)
def test_move_old_chunk(self): for chunk in self.chunks: convert_to_old_chunk(self._chunk_path(chunk), self.account, self.container, self.path, self.version, self.content_id) orig_chunk = random.choice(self.chunks) chunk_volume = orig_chunk['url'].split('/')[2] chunk_id = orig_chunk['url'].split('/')[3] chunk_headers, chunk_stream = self.blob_client.chunk_get( orig_chunk['url'], check_headers=False) chunks_kept = list(self.chunks) chunks_kept.remove(orig_chunk) mover = BlobMoverWorker(self.conf, None, self.rawx_volumes[chunk_volume]) mover.chunk_move(self._chunk_path(orig_chunk), chunk_id) _, new_chunks = self.api.object_locate(self.account, self.container, self.path) new_chunk = list(new_chunks) self.assertEqual(len(new_chunks), len(chunks_kept) + 1) url_kept = [c['url'] for c in chunks_kept] new_chunk = None for chunk in new_chunks: if chunk['url'] not in url_kept: self.assertIsNone(new_chunk) new_chunk = chunk self.assertNotEqual(orig_chunk['real_url'], new_chunk['real_url']) self.assertNotEqual(orig_chunk['url'], new_chunk['url']) self.assertEqual(orig_chunk['pos'], new_chunk['pos']) self.assertEqual(orig_chunk['size'], new_chunk['size']) self.assertEqual(orig_chunk['hash'], new_chunk['hash']) new_chunk_headers, new_chunk_stream = self.blob_client.chunk_get( new_chunk['url']) chunk_data = b''.join(chunk_stream) new_chunk_data = b''.join(new_chunk_stream) self.assertEqual(chunk_data, new_chunk_data) fullpath = encode_fullpath(self.account, self.container, self.path, self.version, self.content_id) self.assertEqual(fullpath, new_chunk_headers['full_path']) del new_chunk_headers['full_path'] self.assertNotEqual(chunk_headers['chunk_id'], new_chunk_headers['chunk_id']) new_chunk_id = new_chunk['url'].split('/')[3] self.assertEqual(new_chunk_id, new_chunk_headers['chunk_id']) del chunk_headers['chunk_id'] del new_chunk_headers['chunk_id'] self.assertEqual(OIO_VERSION, new_chunk_headers['oio_version']) del chunk_headers['oio_version'] del new_chunk_headers['oio_version'] self.assertEqual(chunk_headers, new_chunk_headers)
def _convert_and_check(self, chunk_volume, chunk_path, chunk_id_info, expected_raw_meta=None, expected_errors=0): conf = self.conf conf['volume'] = self.rawx_volumes[chunk_volume] converter = BlobConverter(conf, logger=self.logger) converter.safe_convert_chunk(chunk_path) self.assertEqual(1, converter.total_chunks_processed) self.assertEqual(1, converter.passes) self.assertEqual(expected_errors, converter.errors) checker = Checker(self.ns) for chunk_id, info in chunk_id_info.items(): account, container, path, version, content_id = info fullpath = encode_fullpath(account, container, path, version, content_id) cid = cid_from_name(account, container) meta, raw_meta = read_chunk_metadata(chunk_path, chunk_id) self.assertEqual(meta.get('chunk_id'), chunk_id) self.assertEqual(meta.get('container_id'), cid) self.assertEqual(meta.get('content_path'), path) self.assertEqual(meta.get('content_version'), version) self.assertEqual(meta.get('content_id'), content_id) self.assertEqual(meta.get('full_path'), fullpath) checker.check( Target(account, container=container, obj=path, chunk='http://' + converter.volume_id + '/' + chunk_id)) for _ in checker.run(): pass self.assertTrue(checker.report()) if expected_raw_meta: self.assertDictEqual(expected_raw_meta, raw_meta) continue self.assertNotIn(CHUNK_XATTR_KEYS['chunk_id'], raw_meta) self.assertNotIn(CHUNK_XATTR_KEYS['container_id'], raw_meta) self.assertNotIn(CHUNK_XATTR_KEYS['content_path'], raw_meta) self.assertNotIn(CHUNK_XATTR_KEYS['content_version'], raw_meta) self.assertNotIn(CHUNK_XATTR_KEYS['content_id'], raw_meta) self.assertIn(CHUNK_XATTR_CONTENT_FULLPATH_PREFIX + chunk_id, raw_meta) for k in raw_meta.keys(): if k.startswith('oio:'): self.fail('old fullpath always existing') self.assertEqual(raw_meta[CHUNK_XATTR_KEYS['oio_version']], OIO_VERSION)
def __init__(self, account, ref): self.cid = cid_from_name(account, ref) self.path = random_str(6) self.version = 1 self.id = random_id(32) self.fullpath = encode_fullpath(account, ref, self.path, self.version, self.id) self.data = os.urandom(1280) self.size = len(self.data) md5 = hashlib.new('md5') md5.update(self.data) self.hash = md5.hexdigest().lower()
def _test_create(self, data_size): # generate random test data data = random_data(data_size) # using factory create new EC content content = self.content_factory.new(self.container_id, self.content, len(data), self.stgpol) # verify the factory gave us an ECContent self.assertEqual(type(content), ECContent) # perform the content creation content.create(BytesIO(data)) meta, chunks = self.container_client.content_locate( cid=self.container_id, content=content.content_id) # verify metadata chunks = ChunksHelper(chunks) self.assertEqual(meta['hash'], md5_data(data)) self.assertEqual(meta['length'], str(len(data))) self.assertEqual(meta['policy'], self.stgpol) self.assertEqual(meta['name'], self.content) metachunk_nb = int(math.ceil(float(len(data)) / self.chunk_size)) \ if len(data) != 0 else 1 offset = 0 # verify each metachunk for metapos in range(metachunk_nb): chunks_at_pos = content.chunks.filter(metapos=metapos) if len(chunks_at_pos) < 1: break metachunk_size = chunks_at_pos[0].size metachunk_hash = md5_data(data[offset:offset + metachunk_size]) for chunk in chunks_at_pos: meta, stream = self.blob_client.chunk_get(chunk.url) self.assertEqual(meta['metachunk_size'], str(chunk.size)) self.assertEqual(meta['metachunk_hash'], chunk.checksum) self.assertEqual(meta['content_path'], self.content) self.assertEqual(meta['container_id'], self.container_id) self.assertEqual(meta['content_id'], meta['content_id']) self.assertEqual(meta['chunk_id'], chunk.id) self.assertEqual(meta['chunk_pos'], chunk.pos) self.assertEqual(meta['chunk_hash'], md5_stream(stream)) full_path = encode_fullpath(self.account, self.container_name, self.content, meta['content_version'], meta['content_id']) self.assertEqual(meta['full_path'], full_path) self.assertEqual(meta['oio_version'], '4.2') self.assertEqual(metachunk_hash, chunk.checksum) offset += metachunk_size
def _item_to_string(self, item, **kwargs): try: url = item['url'] fullpath = encode_fullpath(url['account'], url['user'], url['path'], url.get('version', 1), url['content']) # TODO(FVE): maybe tell some numbers about chunks if item.get('event') == EventTypes.CONTENT_PERFECTIBLE: return 'perfectible object %s' % (fullpath, ) else: return 'object %s' % (fullpath, ) except (KeyError, ValueError) as err: return '<unknown item> ({0})'.format(repr(err))
def _create_chunk(self, rawx_path, alias="toto", suffix=''): cname = random_str(8) container_id = cid_from_name(self.account, cname) content_id = random_id(32) chunk_id = random_id(64) chunk_dir = "%s/%s" % (rawx_path, chunk_id[0:3]) if not os.path.isdir(chunk_dir): os.makedirs(chunk_dir) chunk_path = "%s/%s%s" % (chunk_dir, chunk_id, suffix) with open(chunk_path, "w") as chunk_file: chunk_file.write("toto") # pylint: disable=no-member xattr.setxattr( chunk_path, 'user.' + chunk_xattr_keys['chunk_hash'], 32 * b'0') xattr.setxattr( chunk_path, 'user.' + chunk_xattr_keys['chunk_id'], enc(chunk_id)) xattr.setxattr( chunk_path, 'user.' + chunk_xattr_keys['chunk_pos'], b'0') xattr.setxattr( chunk_path, 'user.' + chunk_xattr_keys['chunk_size'], b'4') xattr.setxattr( chunk_path, 'user.' + chunk_xattr_keys['content_policy'], b'TESTPOLICY') xattr.setxattr( chunk_path, 'user.' + chunk_xattr_keys['content_chunkmethod'], b'plain/nb_copy=3') xattr.setxattr( chunk_path, 'user.' + chunk_xattr_keys['content_version'], b'1') # Old (oio-sds < 4.2) extended attributes xattr.setxattr( chunk_path, 'user.' + chunk_xattr_keys['container_id'], enc(container_id)) xattr.setxattr( chunk_path, 'user.' + chunk_xattr_keys['content_id'], enc(content_id)) xattr.setxattr( chunk_path, 'user.' + chunk_xattr_keys['content_path'], enc(alias)) # New (oio-sds >= 4.2) extended attributes xattr.setxattr( chunk_path, 'user.' + chunk_xattr_keys['oio_version'], enc(OIO_VERSION)) fullpath = encode_fullpath(self.account, cname, alias, 1, content_id) xattr.setxattr( chunk_path, 'user.%s%s' % (CHUNK_XATTR_CONTENT_FULLPATH_PREFIX, chunk_id), enc(fullpath)) return chunk_path, container_id, content_id, chunk_id
def _link_chunk(self, target_chunk_id): account = random_str(16) container = random_str(16) cid = cid_from_name(account, container) content_path = random_str(16) content_version = 1234567890 content_id = random_id(32) fullpath = encode_fullpath(account, container, content_path, content_version, content_id) _, link = self.blob_client.chunk_link( 'http://' + self.rawx_id + '/' + target_chunk_id, None, fullpath) chunk_id = link.split('/')[-1] sleep(1) # ensure chunk event have been processed return account, container, cid, content_path, content_version, \ content_id, chunk_id
def test_get_plain(self): meta = { "chunk_method": "plain/nb_copy=2", "ctime": "1450176946", "deleted": "False", "hash": "E952A419957A6E405BFC53EC65483F73", "hash_method": "md5", "id": "3FA2C4A1ED2605005335A276890EC458", "length": "658", "mime_type": "application/octet-stream", "name": "tox.ini", "policy": self.stgpol_twocopies, "version": "1450176946676289", "oio_version": "4.2", } chunks = [{ "url": "http://127.0.0.1:6010/A0", "pos": "0", "size": 658, "hash": "E952A419957A6E405BFC53EC65483F73" }, { "url": "http://127.0.0.1:6011/A1", "pos": "0", "size": 658, "hash": "E952A419957A6E405BFC53EC65483F73" }] self.content_factory.container_client.content_locate = Mock( return_value=(meta, chunks)) c = self.content_factory.get("xxx_container_id", "xxx_content_id", account=self.account, container_name=self.container_name) self.assertEqual(type(c), PlainContent) self.assertEqual(c.content_id, "3FA2C4A1ED2605005335A276890EC458") self.assertEqual(c.length, 658) self.assertEqual(c.path, "tox.ini") self.assertEqual(c.version, "1450176946676289") self.assertEqual( c.full_path, encode_fullpath(self.account, self.container_name, "tox.ini", meta['version'], meta['id'])) # TODO test storage_method self.assertEqual(len(c.chunks), 2) self.assertEqual(c.chunks[0].raw(), chunks[0]) self.assertEqual(c.chunks[1].raw(), chunks[1])
def test_copy_errors(self): length = 100 chunkid = random_chunk_id() chunkdata = random_buffer(string.printable, length) chunkurl = self._rawx_url(chunkid) self._check_not_present(chunkurl) headers = self._chunk_attr(chunkid, chunkdata) metachunk_size = 9 * length # TODO take random legit value metachunk_hash = md5().hexdigest() # TODO should also include meta-chunk-hash trailers = { 'x-oio-chunk-meta-metachunk-size': metachunk_size, 'x-oio-chunk-meta-metachunk-hash': metachunk_hash } # Initial put that must succeed resp, body = self._http_request(chunkurl, 'PUT', chunkdata, headers, trailers) self.assertEqual(201, resp.status) self.assertEqual(headers['x-oio-chunk-meta-chunk-hash'].upper(), resp.getheader('x-oio-chunk-meta-chunk-hash')) self.assertEqual(headers['x-oio-chunk-meta-chunk-size'], resp.getheader('x-oio-chunk-meta-chunk-size')) copyid = random_chunk_id() copyid = chunkid[:-60] + copyid[-60:] copyurl = self._rawx_url(copyid) headers = {} headers["Destination"] = copyurl resp, _ = self._http_request(chunkurl, 'COPY', '', headers) self.assertEqual(400, resp.status) headers = {} headers["Destination"] = chunkurl headers['x-oio-chunk-meta-full-path'] = encode_fullpath( "account-snapshot", "container-snapshot", "test" + "-snapshot", 1456938361143741, random_id(32)) resp, _ = self._http_request(chunkurl, 'COPY', '', headers) self.assertEqual(403, resp.status) headers = {} resp, _ = self._http_request(chunkurl, 'COPY', '', headers) self.assertEqual(400, resp.status)
def test_strange_paths(self): strange_paths = [ "Annual report.txt", "foo+bar=foobar.txt", "100%_bug_free.c", "forward/slash/allowed", "I\\put\\backslashes\\and$dollar$signs$in$file$names", "Je suis tombé sur la tête, mais ça va bien.", "%s%f%u%d%%", "{1},{0},{3}", "carriage\rreturn", "line\nfeed", "ta\tbu\tla\ttion", "controlchars", "//azeaze\\//azeaz\\//azea" ] answers = dict() for cname in strange_paths: content = self._new_content(self.stgpol, "nobody cares", cname) answers[cname] = content _, listing = self.container_client.content_list( self.account, self.container_name) obj_set = { k["name"].encode("utf8", "ignore") for k in listing["objects"] } try: # Ensure the saved path is the one we gave the object for cname in answers: self.assertEqual(cname, answers[cname].path) fullpath = encode_fullpath(self.account, self.container_name, cname, answers[cname].version, answers[cname].content_id) self.assertEqual(answers[cname].full_path, fullpath) # Ensure all objects appear in listing for cname in strange_paths: self.assertIn(cname, obj_set) finally: # Cleanup for cname in answers: try: content.delete() except Exception: pass
def recover_chunk_fullpath(self, path, chunk_id=None): if not chunk_id: chunk_id = path.rsplit('/', 1)[-1] # 1. Fetch chunk list from rdir (could be cached). # Unfortunately we cannot seek for a chunk ID. entries = [ x for x in self.rdir.chunk_fetch(self.volume_id, limit=-1) if x[2] == chunk_id ] if not entries: raise KeyError('Chunk %s not found in rdir' % chunk_id) elif len(entries) > 1: self.logger.info('Chunk %s appears in %d objects', chunk_id, len(entries)) # 2. Find content and container IDs cid, content_id = entries[0][0:2] # 3a. Call ContainerClient.content_locate() # with the container ID and content ID try: meta, chunks = self.container_client.content_locate( cid=cid, content=content_id) except NotFound as err: raise OrphanChunk('Cannot check %s is valid: %s' % (path, err)) # 3b. Resolve container ID into account and container names. # FIXME(FVE): get account and container names from meta1 cmeta = self.container_client.container_get_properties(cid=cid) aname = cmeta['system']['sys.account'] cname = cmeta['system']['sys.user.name'] fullpath = encode_fullpath(aname, cname, meta['name'], meta['version'], content_id) # 4. Check if the chunk actually belongs to the object chunk_url = 'http://%s/%s' % (self.volume_id, chunk_id) if chunk_url not in [x['url'] for x in chunks]: raise OrphanChunk('Chunk %s not found in object %s' % (chunk_url, fullpath)) # 5. Regenerate the fullpath with open(path, 'w') as fd: set_fullpath_xattr(fd, {chunk_id: fullpath}) return True
def _cycle_copy(self, path): if path: self.path = path chunkid = random_chunk_id() chunkdata = random_buffer(string.printable, 1).encode('utf-8') chunkurl = self._rawx_url(chunkid) chunkpath = self._chunk_path(chunkid) headers1 = self._chunk_attr(chunkid, chunkdata) metachunk_hash = md5().hexdigest() trailers = {'x-oio-chunk-meta-metachunk-size': '1', 'x-oio-chunk-meta-metachunk-hash': metachunk_hash} self._check_not_present(chunkurl) resp, _ = self._http_request(chunkurl, 'PUT', chunkdata, headers1, trailers) self.assertEqual(201, resp.status) self.assertEqual(headers1['x-oio-chunk-meta-chunk-hash'].upper(), resp.getheader('x-oio-chunk-meta-chunk-hash')) self.assertEqual(headers1['x-oio-chunk-meta-chunk-size'], resp.getheader('x-oio-chunk-meta-chunk-size')) copyid = random_chunk_id() copyid = chunkid[:-60] + copyid[-60:] copyurl = self._rawx_url(copyid) copypath = self._chunk_path(copyid) headers2 = {} headers2["Destination"] = copyurl copy_account = "account-snapshot" copy_container = "container-snapshot" copy_container_id = cid_from_name(copy_account, copy_container) copy_path = path+"-snapshot" copy_version = 1456938361143741 copy_id = random_id(32) copy_fullpath = encode_fullpath( copy_account, copy_container, copy_path, copy_version, copy_id) headers2['x-oio-chunk-meta-full-path'] = copy_fullpath resp, _ = self._http_request(chunkurl, 'COPY', '', headers2) self.assertEqual(201, resp.status) resp, body = self._http_request(chunkurl, 'GET', '', {}) self.assertEqual(200, resp.status) headers1['x-oio-chunk-meta-chunk-hash'] = \ headers1['x-oio-chunk-meta-chunk-hash'].upper() for k, v in headers1.items(): if k == 'x-oio-chunk-meta-content-path': self.assertEqual(unquote(resp.getheader(k)), unquote(str(v))) else: self.assertEqual(resp.getheader(k), str(v)) resp, body = self._http_request(copyurl, 'GET', '', {}) self.assertEqual(200, resp.status) headers2_bis = headers1.copy() headers2_bis['x-oio-chunk-meta-full-path'] = \ headers2['x-oio-chunk-meta-full-path'] headers2_bis['x-oio-chunk-meta-content-path'] = copy_path headers2_bis['x-oio-chunk-meta-content-version'] = copy_version headers2_bis['x-oio-chunk-meta-content-id'] = copy_id headers2_bis['x-oio-chunk-meta-container-id'] = copy_container_id headers2_bis['x-oio-chunk-meta-chunk-id'] = copyid for k, v in headers2_bis.items(): if k == 'x-oio-chunk-meta-content-path': self.assertEqual(unquote(resp.getheader(k)), unquote(str(v))) else: self.assertEqual(resp.getheader(k), str(v)) with open(chunkpath, 'r') as fd: meta, _ = read_chunk_metadata(fd, chunkid) self.assertEqual(headers1['x-oio-chunk-meta-full-path'], meta['full_path']) self.assertEqual(1, len(meta['links'])) self.assertEqual(headers2['x-oio-chunk-meta-full-path'], meta['links'][copyid]) with open(copypath, 'r') as fd: meta, _ = read_chunk_metadata(fd, copyid) self.assertEqual(headers2['x-oio-chunk-meta-full-path'], meta['full_path']) self.assertEqual(1, len(meta['links'])) self.assertEqual(headers1['x-oio-chunk-meta-full-path'], meta['links'][chunkid]) resp, body = self._http_request(chunkurl, 'DELETE', '', {}) self.assertEqual(204, resp.status) resp, body = self._http_request(chunkurl, 'GET', '', {}) self.assertEqual(404, resp.status) resp, body = self._http_request(copyurl, 'GET', '', {}) self.assertEqual(200, resp.status) self.assertEqual(headers2['x-oio-chunk-meta-full-path'], resp.getheader('x-oio-chunk-meta-full-path')) with open(copypath, 'r') as fd: meta, _ = read_chunk_metadata(fd, copyid) self.assertEqual(headers2['x-oio-chunk-meta-full-path'], meta['full_path']) self.assertEqual(0, len(meta['links'])) resp, body = self._http_request(copyurl, 'DELETE', '', {}) self.assertEqual(204, resp.status) resp, body = self._http_request(copyurl, 'GET', '', {}) self.assertEqual(404, resp.status)
def test_wrong_fullpath(self): metachunk_hash = md5().hexdigest() trailers = {'x-oio-chunk-meta-metachunk-size': '1', 'x-oio-chunk-meta-metachunk-hash': metachunk_hash} chunkid = random_chunk_id() chunkdata = random_buffer(string.printable, 1).encode('utf-8') chunkurl = self._rawx_url(chunkid) hdrs = self._chunk_attr(chunkid, chunkdata) self._check_not_present(chunkurl) headers = hdrs.copy() headers['x-oio-chunk-meta-full-path'] = encode_fullpath( self.account, 'blob', self.content_path, self.content_version, self.content_id) + "/too_long" resp, _ = self._http_request(chunkurl, 'PUT', chunkdata, headers, trailers) self.assertEqual(400, resp.status) headers = hdrs.copy() headers['x-oio-chunk-meta-full-path'] = encode_fullpath( self.account, 'blob', self.content_path, self.content_version, self.content_id).rsplit('/', 2)[0] resp, _ = self._http_request(chunkurl, 'PUT', chunkdata, headers, trailers) self.assertEqual(400, resp.status) headers = hdrs.copy() headers['x-oio-chunk-meta-full-path'] = encode_fullpath( 'wrong-account', 'blob', self.content_path, self.content_version, self.content_id) resp, _ = self._http_request(chunkurl, 'PUT', chunkdata, headers, trailers) self.assertEqual(400, resp.status) headers = hdrs.copy() headers['x-oio-chunk-meta-full-path'] = encode_fullpath( self.account, 'wrong-container', self.content_path, self.content_version, self.content_id) resp, _ = self._http_request(chunkurl, 'PUT', chunkdata, headers, trailers) self.assertEqual(400, resp.status) headers = hdrs.copy() headers['x-oio-chunk-meta-full-path'] = encode_fullpath( self.account, 'blob', 'wrong-path', self.content_version, self.content_id) resp, _ = self._http_request(chunkurl, 'PUT', chunkdata, headers, trailers) self.assertEqual(400, resp.status) headers = hdrs.copy() headers['x-oio-chunk-meta-full-path'] = encode_fullpath( self.account, 'blob', self.content_path, 9999999999999999, self.content_id) resp, _ = self._http_request(chunkurl, 'PUT', chunkdata, headers, trailers) self.assertEqual(400, resp.status) headers = hdrs.copy() headers['x-oio-chunk-meta-full-path'] = encode_fullpath( self.account, 'blob', self.content_path, self.content_version, '9999999999999999') resp, _ = self._http_request(chunkurl, 'PUT', chunkdata, headers, trailers) self.assertEqual(400, resp.status) headers = hdrs.copy() del headers['x-oio-chunk-meta-container-id'] headers['x-oio-chunk-meta-full-path'] = encode_fullpath( 'empty', 'blob', self.content_path, self.content_version, self.content_id).replace('empty', '') resp, _ = self._http_request(chunkurl, 'PUT', chunkdata, headers, trailers) self.assertEqual(400, resp.status) headers = hdrs.copy() del headers['x-oio-chunk-meta-container-id'] headers['x-oio-chunk-meta-full-path'] = encode_fullpath( self.account, 'empty', self.content_path, self.content_version, self.content_id).replace('empty', '') resp, _ = self._http_request(chunkurl, 'PUT', chunkdata, headers, trailers) self.assertEqual(400, resp.status) headers = hdrs.copy() del headers['x-oio-chunk-meta-content-path'] headers['x-oio-chunk-meta-full-path'] = encode_fullpath( self.account, 'blob', 'empty', self.content_version, self.content_id).replace('empty', '') resp, _ = self._http_request(chunkurl, 'PUT', chunkdata, headers, trailers) self.assertEqual(400, resp.status) headers = hdrs.copy() del headers['x-oio-chunk-meta-content-version'] headers['x-oio-chunk-meta-full-path'] = encode_fullpath( self.account, 'blob', self.content_path, 'empty', self.content_id).replace('empty', '') resp, _ = self._http_request(chunkurl, 'PUT', chunkdata, headers, trailers) self.assertEqual(400, resp.status) headers = hdrs.copy() del headers['x-oio-chunk-meta-content-id'] headers['x-oio-chunk-meta-full-path'] = encode_fullpath( self.account, 'blob', self.content_path, self.content_version, 'empty').replace('empty', '') resp, _ = self._http_request(chunkurl, 'PUT', chunkdata, headers, trailers) self.assertEqual(400, resp.status) headers = hdrs.copy() del headers['x-oio-chunk-meta-content-version'] headers['x-oio-chunk-meta-full-path'] = encode_fullpath( self.account, 'blob', self.content_path, 'digit', self.content_id) resp, _ = self._http_request(chunkurl, 'PUT', chunkdata, headers, trailers) self.assertEqual(400, resp.status) headers = hdrs.copy() del headers['x-oio-chunk-meta-content-id'] headers['x-oio-chunk-meta-full-path'] = encode_fullpath( self.account, 'blob', self.content_path, self.content_version, 'hexa') resp, _ = self._http_request(chunkurl, 'PUT', chunkdata, headers, trailers) self.assertEqual(400, resp.status)
def test_read_old_chunk(self): metachunk_hash = md5().hexdigest() trailers = {'x-oio-chunk-meta-metachunk-size': '1', 'x-oio-chunk-meta-metachunk-hash': metachunk_hash} chunkid = random_chunk_id() chunkdata = random_buffer(string.printable, 1).encode('utf-8') chunkurl = self._rawx_url(chunkid) chunkpath = self._chunk_path(chunkid) headers = self._chunk_attr(chunkid, chunkdata) self._check_not_present(chunkurl) resp, _ = self._http_request(chunkurl, 'PUT', chunkdata, headers, trailers) self.assertEqual(201, resp.status) resp1, data1 = self._http_request(chunkurl, 'GET', '', {}) self.assertEqual(200, resp1.status) headers1 = HeadersDict(resp1.getheaders()) with open(chunkpath, 'r') as fd: meta1, _ = read_chunk_metadata(fd, chunkid) convert_to_old_chunk( chunkpath, self.account, self.container, self.content_path, self.content_version, self.content_id) resp2, data2 = self._http_request(chunkurl, 'GET', '', {}) self.assertEqual(200, resp2.status) headers2 = HeadersDict(resp2.getheaders()) with open(chunkpath, 'r') as fd: meta2, _ = read_chunk_metadata(fd, chunkid) self.assertEqual(data1, data2) del headers1[CHUNK_HEADERS['full_path']] del headers1[CHUNK_HEADERS['oio_version']] del headers2[CHUNK_HEADERS['oio_version']] del headers1["date"] del headers2["date"] self.assertDictEqual(headers1, headers2) del meta1['full_path'] del meta1['oio_version'] del meta2['oio_version'] self.assertDictEqual(meta1, meta2) # Copy old chunk copyid = random_chunk_id() copyid = chunkid[:-60] + copyid[-60:] copyurl = self._rawx_url(copyid) copypath = self._chunk_path(copyid) copycontentid = random_id(32) copyheaders = {} copyheaders["Destination"] = copyurl copyheaders['x-oio-chunk-meta-full-path'] = encode_fullpath( "account-snapshot", "container-snapshot", self.content_path+"-snapshot", 1456938361143741, copycontentid) resp, _ = self._http_request(chunkurl, 'COPY', '', copyheaders) self.assertEqual(201, resp.status) resp2, data2 = self._http_request(chunkurl, 'GET', '', {}) self.assertEqual(200, resp2.status) headers2 = HeadersDict(resp2.getheaders()) with open(chunkpath, 'r') as fd: meta2, _ = read_chunk_metadata(fd, chunkid) self.assertEqual(1, len(meta2['links'])) self.assertEqual(copyheaders['x-oio-chunk-meta-full-path'], meta2['links'][copyid]) meta2['links'] = dict() self.assertEqual(data1, data2) del headers2[CHUNK_HEADERS['oio_version']] del headers2["date"] self.assertDictEqual(headers1, headers2) del meta2['oio_version'] self.assertDictEqual(meta1, meta2) resp3, data3 = self._http_request(copyurl, 'GET', '', {}) self.assertEqual(200, resp3.status) headers3 = HeadersDict(resp3.getheaders()) with open(copypath, 'r') as fd: meta3, _ = read_chunk_metadata(fd, copyid) self.assertEqual( copyheaders['x-oio-chunk-meta-full-path'], headers3['x-oio-chunk-meta-full-path']) del headers3['x-oio-chunk-meta-full-path'] self.assertEqual( cid_from_name("account-snapshot", "container-snapshot"), headers3['x-oio-chunk-meta-container-id']) del headers1['x-oio-chunk-meta-container-id'] del headers3['x-oio-chunk-meta-container-id'] self.assertEqual( self.content_path+"-snapshot", unquote(headers3['x-oio-chunk-meta-content-path'])) del headers1['x-oio-chunk-meta-content-path'] del headers3['x-oio-chunk-meta-content-path'] self.assertEqual( '1456938361143741', headers3['x-oio-chunk-meta-content-version']) del headers1['x-oio-chunk-meta-content-version'] del headers3['x-oio-chunk-meta-content-version'] self.assertEqual( copycontentid, headers3['x-oio-chunk-meta-content-id']) del headers1['x-oio-chunk-meta-content-id'] del headers3['x-oio-chunk-meta-content-id'] self.assertEqual(copyid, headers3['x-oio-chunk-meta-chunk-id']) del headers1['x-oio-chunk-meta-chunk-id'] del headers3['x-oio-chunk-meta-chunk-id'] self.assertEqual( copyheaders['x-oio-chunk-meta-full-path'], meta3['full_path']) del meta3['full_path'] self.assertEqual( cid_from_name("account-snapshot", "container-snapshot"), meta3['container_id']) del meta1['container_id'] del meta3['container_id'] self.assertEqual(self.content_path+"-snapshot", meta3['content_path']) del meta1['content_path'] del meta3['content_path'] self.assertEqual('1456938361143741', meta3['content_version']) del meta1['content_version'] del meta3['content_version'] self.assertEqual(copycontentid, meta3['content_id']) del meta1['content_id'] del meta3['content_id'] self.assertEqual(copyid, meta3['chunk_id']) del meta1['chunk_id'] del meta3['chunk_id'] # FIXME the old chunk is invisible self.assertEqual(0, len(meta3['links'])) self.assertEqual(data1, data3) del headers3[CHUNK_HEADERS['oio_version']] del headers3["date"] self.assertDictEqual(headers1, headers3) del meta3['oio_version'] self.assertDictEqual(meta1, meta3)
def test_encode(self): fullpath = encode_fullpath("myaccount", "mycontainer", "myobject", 9876543210, "0123456789ABCDEF") self.assertEqual( "myaccount/mycontainer/myobject/9876543210/0123456789ABCDEF", fullpath)
def test_encode_with_utf8_info(self): fullpath = encode_fullpath("mŷaccount", "mycontainér", "myöbject", 9876543210, "0123456789ABCDEF") self.assertEqual( "m%C5%B7account/mycontain%C3%A9r/my%C3%B6bject/9876543210/" "0123456789ABCDEF", fullpath)
def test_rebuild_old_chunk(self): for c in self.chunks: convert_to_old_chunk(self._chunk_path(c), self.account, self.container, self.path, self.version, self.content_id) chunk = random.choice(self.chunks) chunk_volume = chunk['url'].split('/')[2] chunk_id = chunk['url'].split('/')[3] chunk_headers, chunk_stream = self.blob_client.chunk_get( chunk['url'], check_headers=False) os.remove(self._chunk_path(chunk)) chunks_kept = list(self.chunks) chunks_kept.remove(chunk) conf = self.conf.copy() conf['allow_same_rawx'] = True rebuilder = BlobRebuilder(conf, service_id=chunk_volume) rebuilder_worker = rebuilder.create_worker(None, None) rebuilder_worker._process_item( (self.ns, self.cid, self.content_id, chunk_id)) _, new_chunks = self.api.object_locate(self.account, self.container, self.path) new_chunk = list(new_chunks) self.assertEqual(len(new_chunks), len(chunks_kept) + 1) url_kept = [c['url'] for c in chunks_kept] new_chunk = None for c in new_chunks: if c['url'] not in url_kept: self.assertIsNone(new_chunk) new_chunk = c # Cannot check if the URL is different: it may be the same since we # generate predictible chunk IDs. # self.assertNotEqual(chunk['real_url'], new_chunk['real_url']) # self.assertNotEqual(chunk['url'], new_chunk['url']) self.assertEqual(chunk['pos'], new_chunk['pos']) self.assertEqual(chunk['size'], new_chunk['size']) self.assertEqual(chunk['hash'], new_chunk['hash']) new_chunk_headers, new_chunk_stream = self.blob_client.chunk_get( new_chunk['url']) chunk_data = b''.join(chunk_stream) new_chunk_data = b''.join(new_chunk_stream) self.assertEqual(chunk_data, new_chunk_data) fullpath = encode_fullpath(self.account, self.container, self.path, self.version, self.content_id) self.assertEqual(fullpath, new_chunk_headers['full_path']) del new_chunk_headers['full_path'] # Since we generate predictible chunk IDs, they can be equal # self.assertNotEqual(chunk_headers['chunk_id'], # new_chunk_headers['chunk_id']) # We could compare the modification time of the chunks, # but unfortunately they have a 1s resolution... # self.assertNotEqual(chunk_headers['chunk_mtime'], # new_chunk_headers['chunk_mtime']) new_chunk_id = new_chunk['url'].split('/')[3] self.assertEqual(new_chunk_id, new_chunk_headers['chunk_id']) del chunk_headers['chunk_id'] del new_chunk_headers['chunk_id'] self.assertEqual(OIO_VERSION, new_chunk_headers['oio_version']) del chunk_headers['oio_version'] del new_chunk_headers['oio_version'] del chunk_headers['chunk_mtime'] del new_chunk_headers['chunk_mtime'] self.assertEqual(chunk_headers, new_chunk_headers)