def test_enable(self): broker = self._make_broker() broker.update_metadata( {'X-Container-Sysmeta-Sharding': (True, Timestamp.now().internal)}) # no shard ranges out = StringIO() err = StringIO() with self.assertRaises(SystemExit): with mock.patch('sys.stdout', out), mock.patch('sys.stderr', err): main([broker.db_file, 'enable']) expected = [ "WARNING: invalid shard ranges: ['No shard ranges.'].", 'Aborting.' ] self.assertEqual(expected, out.getvalue().splitlines()) self.assertEqual(['Loaded db broker for a/c.'], err.getvalue().splitlines()) # success shard_ranges = [] for data in self.shard_data: path = ShardRange.make_path('.shards_a', 'c', 'c', Timestamp.now(), data['index']) shard_ranges.append( ShardRange(path, Timestamp.now(), data['lower'], data['upper'], data['object_count'])) broker.merge_shard_ranges(shard_ranges) out = StringIO() err = StringIO() with mock.patch('sys.stdout', out), mock.patch('sys.stderr', err): with mock_timestamp_now() as now: main([broker.db_file, 'enable']) expected = [ "Container moved to state 'sharding' with epoch %s." % now.internal, 'Run container-sharder on all nodes to shard the container.' ] self.assertEqual(expected, out.getvalue().splitlines()) self.assertEqual(['Loaded db broker for a/c.'], err.getvalue().splitlines()) self._assert_enabled(broker, now) # already enabled out = StringIO() err = StringIO() with mock.patch('sys.stdout', out), mock.patch('sys.stderr', err): main([broker.db_file, 'enable']) expected = [ "Container already in state 'sharding' with epoch %s." % now.internal, 'No action required.', 'Run container-sharder on all nodes to shard the container.' ] self.assertEqual(expected, out.getvalue().splitlines()) self.assertEqual(['Loaded db broker for a/c.'], err.getvalue().splitlines()) self._assert_enabled(broker, now)
def analyze_shard_ranges(args): shard_data = _load_and_validate_shard_data(args, require_index=False) for data in shard_data: # allow for incomplete shard range data that may have been scraped from # swift-container-info output data.setdefault('epoch', None) shard_ranges = [ShardRange.from_dict(data) for data in shard_data] whole_sr = ShardRange('whole/namespace', 0) try: find_repair_solution(shard_ranges, whole_sr, args) except ManageShardRangesException: return 1 return 0
def test_enable(self): broker = self._make_broker() broker.update_metadata({'X-Container-Sysmeta-Sharding': (True, Timestamp.now().internal)}) # no shard ranges out = StringIO() err = StringIO() with self.assertRaises(SystemExit): with mock.patch('sys.stdout', out), mock.patch('sys.stderr', err): main([broker.db_file, 'enable']) expected = ["WARNING: invalid shard ranges: ['No shard ranges.'].", 'Aborting.'] self.assertEqual(expected, out.getvalue().splitlines()) self.assertEqual(['Loaded db broker for a/c.'], err.getvalue().splitlines()) # success shard_ranges = [] for data in self.shard_data: path = ShardRange.make_path( '.shards_a', 'c', 'c', Timestamp.now(), data['index']) shard_ranges.append( ShardRange(path, Timestamp.now(), data['lower'], data['upper'], data['object_count'])) broker.merge_shard_ranges(shard_ranges) out = StringIO() err = StringIO() with mock.patch('sys.stdout', out), mock.patch('sys.stderr', err): with mock_timestamp_now() as now: main([broker.db_file, 'enable']) expected = [ "Container moved to state 'sharding' with epoch %s." % now.internal, 'Run container-sharder on all nodes to shard the container.'] self.assertEqual(expected, out.getvalue().splitlines()) self.assertEqual(['Loaded db broker for a/c.'], err.getvalue().splitlines()) self._assert_enabled(broker, now) # already enabled out = StringIO() err = StringIO() with mock.patch('sys.stdout', out), mock.patch('sys.stderr', err): main([broker.db_file, 'enable']) expected = [ "Container already in state 'sharding' with epoch %s." % now.internal, 'No action required.', 'Run container-sharder on all nodes to shard the container.'] self.assertEqual(expected, out.getvalue().splitlines()) self.assertEqual(['Loaded db broker for a/c.'], err.getvalue().splitlines()) self._assert_enabled(broker, now)
def _filter_resp_shard_ranges(self, req, cached_ranges): # filter returned shard ranges according to request constraints marker = get_param(req, 'marker', '') end_marker = get_param(req, 'end_marker') includes = get_param(req, 'includes') reverse = config_true_value(get_param(req, 'reverse')) if reverse: marker, end_marker = end_marker, marker shard_ranges = [ ShardRange.from_dict(shard_range) for shard_range in cached_ranges ] shard_ranges = filter_shard_ranges(shard_ranges, includes, marker, end_marker) if reverse: shard_ranges.reverse() return json.dumps([dict(sr) for sr in shard_ranges]).encode('ascii')
def PUT(self, req): """Handle HTTP PUT request.""" drive, part, account, container, obj = get_obj_name_and_placement(req) req_timestamp = valid_timestamp(req) if 'x-container-sync-to' in req.headers: err, sync_to, realm, realm_key = validate_sync_to( req.headers['x-container-sync-to'], self.allowed_sync_hosts, self.realms_conf) if err: return HTTPBadRequest(err) try: check_drive(self.root, drive, self.mount_check) except ValueError: return HTTPInsufficientStorage(drive=drive, request=req) if not self.check_free_space(drive): return HTTPInsufficientStorage(drive=drive, request=req) requested_policy_index = self.get_and_validate_policy_index(req) broker = self._get_container_broker(drive, part, account, container) if obj: # put container object # obj put expects the policy_index header, default is for # legacy support during upgrade. obj_policy_index = requested_policy_index or 0 self._maybe_autocreate(broker, req_timestamp, account, obj_policy_index) # redirect if a shard exists for this object name response = self._redirect_to_shard(req, broker, obj) if response: return response broker.put_object( obj, req_timestamp.internal, int(req.headers['x-size']), wsgi_to_str(req.headers['x-content-type']), wsgi_to_str(req.headers['x-etag']), 0, obj_policy_index, wsgi_to_str(req.headers.get('x-content-type-timestamp')), wsgi_to_str(req.headers.get('x-meta-timestamp'))) return HTTPCreated(request=req) record_type = req.headers.get('x-backend-record-type', '').lower() if record_type == RECORD_TYPE_SHARD: try: # validate incoming data... shard_ranges = [ ShardRange.from_dict(sr) for sr in json.loads(req.body) ] except (ValueError, KeyError, TypeError) as err: return HTTPBadRequest('Invalid body: %r' % err) created = self._maybe_autocreate(broker, req_timestamp, account, requested_policy_index) self._update_metadata(req, broker, req_timestamp, 'PUT') if shard_ranges: # TODO: consider writing the shard ranges into the pending # file, but if so ensure an all-or-none semantic for the write broker.merge_shard_ranges(shard_ranges) else: # put container if requested_policy_index is None: # use the default index sent by the proxy if available new_container_policy = req.headers.get( 'X-Backend-Storage-Policy-Default', int(POLICIES.default)) else: new_container_policy = requested_policy_index created = self._update_or_create(req, broker, req_timestamp.internal, new_container_policy, requested_policy_index) self._update_metadata(req, broker, req_timestamp, 'PUT') resp = self.account_update(req, account, container, broker) if resp: return resp if created: return HTTPCreated(request=req, headers={ 'x-backend-storage-policy-index': broker.storage_policy_index }) else: return HTTPAccepted(request=req, headers={ 'x-backend-storage-policy-index': broker.storage_policy_index })
def test_info(self): broker = self._make_broker() broker.update_metadata( {'X-Container-Sysmeta-Sharding': (True, Timestamp.now().internal)}) out = StringIO() err = StringIO() with mock.patch('sys.stdout', out), mock.patch('sys.stderr', err): main([broker.db_file, 'info']) expected = [ 'Sharding enabled = True', 'Own shard range: None', 'db_state = unsharded', 'Metadata:', ' X-Container-Sysmeta-Sharding = True' ] self.assertEqual(expected, out.getvalue().splitlines()) self.assertEqual(['Loaded db broker for a/c.'], err.getvalue().splitlines()) retiring_db_id = broker.get_info()['id'] broker.merge_shard_ranges(ShardRange('.shards/cc', Timestamp.now())) epoch = Timestamp.now() with mock_timestamp_now(epoch) as now: broker.enable_sharding(epoch) self.assertTrue(broker.set_sharding_state()) out = StringIO() err = StringIO() with mock.patch('sys.stdout', out), mock.patch('sys.stderr', err): with mock_timestamp_now(now): main([broker.db_file, 'info']) expected = [ 'Sharding enabled = True', 'Own shard range: {', ' "bytes_used": 0, ', ' "deleted": 0, ', ' "epoch": "%s", ' % epoch.internal, ' "lower": "", ', ' "meta_timestamp": "%s", ' % now.internal, ' "name": "a/c", ', ' "object_count": 0, ', ' "state": "sharding", ', ' "state_timestamp": "%s", ' % now.internal, ' "timestamp": "%s", ' % now.internal, ' "upper": ""', '}', 'db_state = sharding', 'Retiring db id: %s' % retiring_db_id, 'Cleaving context: {', ' "cleave_to_row": null, ', ' "cleaving_done": false, ', ' "cursor": "", ', ' "last_cleave_to_row": null, ', ' "max_row": -1, ', ' "misplaced_done": false, ', ' "ranges_done": 0, ', ' "ranges_todo": 0, ', ' "ref": "%s"' % retiring_db_id, '}', 'Metadata:', ' X-Container-Sysmeta-Sharding = True' ] self.assertEqual(expected, out.getvalue().splitlines()) self.assertEqual(['Loaded db broker for a/c.'], err.getvalue().splitlines()) self.assertTrue(broker.set_sharded_state()) out = StringIO() err = StringIO() with mock.patch('sys.stdout', out), mock.patch('sys.stderr', err): with mock_timestamp_now(now): main([broker.db_file, 'info']) expected = [ 'Sharding enabled = True', 'Own shard range: {', ' "bytes_used": 0, ', ' "deleted": 0, ', ' "epoch": "%s", ' % epoch.internal, ' "lower": "", ', ' "meta_timestamp": "%s", ' % now.internal, ' "name": "a/c", ', ' "object_count": 0, ', ' "state": "sharding", ', ' "state_timestamp": "%s", ' % now.internal, ' "timestamp": "%s", ' % now.internal, ' "upper": ""', '}', 'db_state = sharded', 'Metadata:', ' X-Container-Sysmeta-Sharding = True' ] self.assertEqual(expected, out.getvalue().splitlines()) self.assertEqual(['Loaded db broker for a/c.'], err.getvalue().splitlines())
def PUT(self, req): """Handle HTTP PUT request.""" drive, part, account, container, obj = split_and_validate_path( req, 4, 5, True) req_timestamp = valid_timestamp(req) if 'x-container-sync-to' in req.headers: err, sync_to, realm, realm_key = validate_sync_to( req.headers['x-container-sync-to'], self.allowed_sync_hosts, self.realms_conf) if err: return HTTPBadRequest(err) try: check_drive(self.root, drive, self.mount_check) except ValueError: return HTTPInsufficientStorage(drive=drive, request=req) if not self.check_free_space(drive): return HTTPInsufficientStorage(drive=drive, request=req) requested_policy_index = self.get_and_validate_policy_index(req) broker = self._get_container_broker(drive, part, account, container) if obj: # put container object # obj put expects the policy_index header, default is for # legacy support during upgrade. obj_policy_index = requested_policy_index or 0 self._maybe_autocreate(broker, req_timestamp, account, obj_policy_index) # redirect if a shard exists for this object name response = self._redirect_to_shard(req, broker, obj) if response: return response broker.put_object(obj, req_timestamp.internal, int(req.headers['x-size']), wsgi_to_str(req.headers['x-content-type']), wsgi_to_str(req.headers['x-etag']), 0, obj_policy_index, wsgi_to_str(req.headers.get( 'x-content-type-timestamp')), wsgi_to_str(req.headers.get('x-meta-timestamp'))) return HTTPCreated(request=req) record_type = req.headers.get('x-backend-record-type', '').lower() if record_type == RECORD_TYPE_SHARD: try: # validate incoming data... shard_ranges = [ShardRange.from_dict(sr) for sr in json.loads(req.body)] except (ValueError, KeyError, TypeError) as err: return HTTPBadRequest('Invalid body: %r' % err) created = self._maybe_autocreate(broker, req_timestamp, account, requested_policy_index) self._update_metadata(req, broker, req_timestamp, 'PUT') if shard_ranges: # TODO: consider writing the shard ranges into the pending # file, but if so ensure an all-or-none semantic for the write broker.merge_shard_ranges(shard_ranges) else: # put container if requested_policy_index is None: # use the default index sent by the proxy if available new_container_policy = req.headers.get( 'X-Backend-Storage-Policy-Default', int(POLICIES.default)) else: new_container_policy = requested_policy_index created = self._update_or_create(req, broker, req_timestamp.internal, new_container_policy, requested_policy_index) self._update_metadata(req, broker, req_timestamp, 'PUT') resp = self.account_update(req, account, container, broker) if resp: return resp if created: return HTTPCreated(request=req, headers={'x-backend-storage-policy-index': broker.storage_policy_index}) else: return HTTPAccepted(request=req, headers={'x-backend-storage-policy-index': broker.storage_policy_index})
def _get_from_shards(self, req, resp): # construct listing using shards described by the response body shard_ranges = [ ShardRange.from_dict(data) for data in json.loads(resp.body) ] self.app.logger.debug('GET listing from %s shards for: %s', len(shard_ranges), req.path_qs) if not shard_ranges: # can't find ranges or there was a problem getting the ranges. So # return what we have. return resp objects = [] req_limit = int(req.params.get('limit', CONTAINER_LISTING_LIMIT)) params = req.params.copy() params.pop('states', None) req.headers.pop('X-Backend-Record-Type', None) reverse = config_true_value(params.get('reverse')) marker = params.get('marker') end_marker = params.get('end_marker') limit = req_limit for shard_range in shard_ranges: params['limit'] = limit # Always set marker to ensure that object names less than or equal # to those already in the listing are not fetched; if the listing # is empty then the original request marker, if any, is used. This # allows misplaced objects below the expected shard range to be # included in the listing. if objects: last_name = objects[-1].get('name', objects[-1].get('subdir', u'')) params['marker'] = last_name.encode('utf-8') elif marker: params['marker'] = marker else: params['marker'] = '' # Always set end_marker to ensure that misplaced objects beyond the # expected shard range are not fetched. This prevents a misplaced # object obscuring correctly placed objects in the next shard # range. if end_marker and end_marker in shard_range: params['end_marker'] = end_marker elif reverse: params['end_marker'] = str_to_wsgi(shard_range.lower_str) else: params['end_marker'] = str_to_wsgi(shard_range.end_marker) if (shard_range.account == self.account_name and shard_range.container == self.container_name): # directed back to same container - force GET of objects headers = {'X-Backend-Record-Type': 'object'} else: headers = None self.app.logger.debug('Getting from %s %s with %s', shard_range, shard_range.name, headers) objs, shard_resp = self._get_container_listing( req, shard_range.account, shard_range.container, headers=headers, params=params) if not objs: # tolerate errors or empty shard containers continue objects.extend(objs) limit -= len(objs) if limit <= 0: break if (end_marker and reverse and (wsgi_to_bytes(end_marker) >= objects[-1]['name'].encode('utf-8'))): break if (end_marker and not reverse and (wsgi_to_bytes(end_marker) <= objects[-1]['name'].encode('utf-8'))): break resp.body = json.dumps(objects).encode('ascii') constrained = any( req.params.get(constraint) for constraint in ('marker', 'end_marker', 'path', 'prefix', 'delimiter')) if not constrained and len(objects) < req_limit: self.app.logger.debug('Setting object count to %s' % len(objects)) # prefer the actual listing stats over the potentially outdated # root stats. This condition is only likely when a sharded # container is shrinking or in tests; typically a sharded container # will have more than CONTAINER_LISTING_LIMIT objects so any # unconstrained listing will be capped by the limit and total # object stats cannot therefore be inferred from the listing. resp.headers['X-Container-Object-Count'] = len(objects) resp.headers['X-Container-Bytes-Used'] = sum( [o['bytes'] for o in objects]) return resp
def _get_from_shards(self, req, resp): # Construct listing using shards described by the response body. # The history of containers that have returned shard ranges is # maintained in the request environ so that loops can be avoided by # forcing an object listing if the same container is visited again. # This can happen in at least two scenarios: # 1. a container has filled a gap in its shard ranges with a # shard range pointing to itself # 2. a root container returns a (stale) shard range pointing to a # shard that has shrunk into the root, in which case the shrunken # shard may return the root's shard range. shard_listing_history = req.environ.setdefault( 'swift.shard_listing_history', []) shard_listing_history.append((self.account_name, self.container_name)) shard_ranges = [ ShardRange.from_dict(data) for data in json.loads(resp.body) ] self.app.logger.debug('GET listing from %s shards for: %s', len(shard_ranges), req.path_qs) if not shard_ranges: # can't find ranges or there was a problem getting the ranges. So # return what we have. return resp objects = [] req_limit = constrain_req_limit(req, CONTAINER_LISTING_LIMIT) params = req.params.copy() params.pop('states', None) req.headers.pop('X-Backend-Record-Type', None) reverse = config_true_value(params.get('reverse')) marker = wsgi_to_str(params.get('marker')) end_marker = wsgi_to_str(params.get('end_marker')) prefix = wsgi_to_str(params.get('prefix')) limit = req_limit for i, shard_range in enumerate(shard_ranges): params['limit'] = limit # Always set marker to ensure that object names less than or equal # to those already in the listing are not fetched; if the listing # is empty then the original request marker, if any, is used. This # allows misplaced objects below the expected shard range to be # included in the listing. if objects: last_name = objects[-1].get('name', objects[-1].get('subdir', u'')) params['marker'] = bytes_to_wsgi(last_name.encode('utf-8')) elif marker: params['marker'] = str_to_wsgi(marker) else: params['marker'] = '' # Always set end_marker to ensure that misplaced objects beyond the # expected shard range are not fetched. This prevents a misplaced # object obscuring correctly placed objects in the next shard # range. if end_marker and end_marker in shard_range: params['end_marker'] = str_to_wsgi(end_marker) elif reverse: params['end_marker'] = str_to_wsgi(shard_range.lower_str) else: params['end_marker'] = str_to_wsgi(shard_range.end_marker) headers = {} if ((shard_range.account, shard_range.container) in shard_listing_history): # directed back to same container - force GET of objects headers['X-Backend-Record-Type'] = 'object' if config_true_value(req.headers.get('x-newest', False)): headers['X-Newest'] = 'true' if prefix: if prefix > shard_range: continue try: just_past = prefix[:-1] + chr(ord(prefix[-1]) + 1) except ValueError: pass else: if just_past < shard_range: continue self.app.logger.debug( 'Getting listing part %d from shard %s %s with %s', i, shard_range, shard_range.name, headers) objs, shard_resp = self._get_container_listing( req, shard_range.account, shard_range.container, headers=headers, params=params) sharding_state = shard_resp.headers.get('x-backend-sharding-state', 'unknown') if objs is None: # tolerate errors self.app.logger.debug( 'Failed to get objects from shard (state=%s), total = %d', sharding_state, len(objects)) continue self.app.logger.debug( 'Found %d objects in shard (state=%s), total = %d', len(objs), sharding_state, len(objs) + len(objects)) if not objs: # tolerate empty shard containers continue objects.extend(objs) limit -= len(objs) if limit <= 0: break last_name = objects[-1].get('name', objects[-1].get('subdir', u'')) if six.PY2: last_name = last_name.encode('utf8') if end_marker and reverse and end_marker >= last_name: break if end_marker and not reverse and end_marker <= last_name: break resp.body = json.dumps(objects).encode('ascii') constrained = any( req.params.get(constraint) for constraint in ('marker', 'end_marker', 'path', 'prefix', 'delimiter')) if not constrained and len(objects) < req_limit: self.app.logger.debug('Setting object count to %s' % len(objects)) # prefer the actual listing stats over the potentially outdated # root stats. This condition is only likely when a sharded # container is shrinking or in tests; typically a sharded container # will have more than CONTAINER_LISTING_LIMIT objects so any # unconstrained listing will be capped by the limit and total # object stats cannot therefore be inferred from the listing. resp.headers['X-Container-Object-Count'] = len(objects) resp.headers['X-Container-Bytes-Used'] = sum( [o['bytes'] for o in objects]) return resp
def _get_from_shards(self, req, resp): # construct listing using shards described by the response body shard_ranges = [ShardRange.from_dict(data) for data in json.loads(resp.body)] self.app.logger.debug('GET listing from %s shards for: %s', len(shard_ranges), req.path_qs) if not shard_ranges: # can't find ranges or there was a problem getting the ranges. So # return what we have. return resp objects = [] req_limit = int(req.params.get('limit', CONTAINER_LISTING_LIMIT)) params = req.params.copy() params.pop('states', None) req.headers.pop('X-Backend-Record-Type', None) reverse = config_true_value(params.get('reverse')) marker = params.get('marker') end_marker = params.get('end_marker') limit = req_limit for shard_range in shard_ranges: params['limit'] = limit # Always set marker to ensure that object names less than or equal # to those already in the listing are not fetched; if the listing # is empty then the original request marker, if any, is used. This # allows misplaced objects below the expected shard range to be # included in the listing. if objects: last_name = objects[-1].get('name', objects[-1].get('subdir', u'')) params['marker'] = last_name.encode('utf-8') elif marker: params['marker'] = marker else: params['marker'] = '' # Always set end_marker to ensure that misplaced objects beyond the # expected shard range are not fetched. This prevents a misplaced # object obscuring correctly placed objects in the next shard # range. if end_marker and end_marker in shard_range: params['end_marker'] = end_marker elif reverse: params['end_marker'] = str_to_wsgi(shard_range.lower_str) else: params['end_marker'] = str_to_wsgi(shard_range.end_marker) if (shard_range.account == self.account_name and shard_range.container == self.container_name): # directed back to same container - force GET of objects headers = {'X-Backend-Record-Type': 'object'} else: headers = None self.app.logger.debug('Getting from %s %s with %s', shard_range, shard_range.name, headers) objs, shard_resp = self._get_container_listing( req, shard_range.account, shard_range.container, headers=headers, params=params) if not objs: # tolerate errors or empty shard containers continue objects.extend(objs) limit -= len(objs) if limit <= 0: break if (end_marker and reverse and (wsgi_to_bytes(end_marker) >= objects[-1]['name'].encode('utf-8'))): break if (end_marker and not reverse and (wsgi_to_bytes(end_marker) <= objects[-1]['name'].encode('utf-8'))): break resp.body = json.dumps(objects).encode('ascii') constrained = any(req.params.get(constraint) for constraint in ( 'marker', 'end_marker', 'path', 'prefix', 'delimiter')) if not constrained and len(objects) < req_limit: self.app.logger.debug('Setting object count to %s' % len(objects)) # prefer the actual listing stats over the potentially outdated # root stats. This condition is only likely when a sharded # container is shrinking or in tests; typically a sharded container # will have more than CONTAINER_LISTING_LIMIT objects so any # unconstrained listing will be capped by the limit and total # object stats cannot therefore be inferred from the listing. resp.headers['X-Container-Object-Count'] = len(objects) resp.headers['X-Container-Bytes-Used'] = sum( [o['bytes'] for o in objects]) return resp