def main(): parser = argparse.ArgumentParser() parser.add_argument('-d', '--disk', action='store', required=True, help='The disk to be checked') parser.add_argument('-l', '--limit', action='store', default=10, type=int, help='Only process this many objects') args = parser.parse_args() path = "/srv/node/%s/quarantined/objects/" % args.disk if not pathlib.Path(path).exists(): logger.error("Path %s not exist, exit!" % path) sys.exit(1) files = get_all_datafiles(path) limit = args.limit actionfile = 'action_%s.sh' % time.strftime("%Y%m%d-%H%M%S") logger.info("action commands will be written into file %s" % actionfile) ring = Ring('/etc/swift/', ring_name='object') for datafile in files: if limit < 1: break with open(datafile, 'rb') as fp: try: metadata = read_metadata(fp) except EOFError: logger.error("%s has invalid metadata" % datafile) continue name = metadata.get('name') account, container, obj = name.split('/', 3)[1:] part, nodes = ring.get_nodes(account, container, obj) replica_count = 0 for node in nodes: if check_replica(node['ip'], node['port'], node['device'], part, account, container, obj): replica_count = replica_count + 1 if replica_count == len(nodes): logger.info("quarantined file %s has %s copies and can be deleted " "safely" % (datafile, replica_count)) command = "rm -v %s\nrmdir -v %s\n" % (datafile, datafile.parent) elif replica_count == 0: command = recover_quarantine(datafile, metadata, part, account, container, obj) if command: with open(actionfile, 'a') as fp: fp.write(command) limit = limit - 1
def __call__(self, env, start_response): req = Request(env) if env.get('REQUEST_METHOD') == "PUT" and env.get("HTTP_X_OBJECT_META_LXC_DEPLOY"): ring = Ring(self.object_ring_path) raw_path = env.get("RAW_PATH_INFO").split("/") node_data = ring.get_nodes(raw_path[2],raw_path[3],raw_path[4]) deploy_host = node_data[1][0]["ip"] req.headers["X-Object-Meta-LXC-HOST"] = deploy_host req.headers["REMOTE_USER"] = raw_path[2] return self.app(env, start_response)
def create_account(act): ts = utils.normalize_timestamp(time()) account_ring = Ring(_testdir, ring_name='account') partition, nodes = account_ring.get_nodes(act) for node in nodes: # Note: we are just using the http_connect method in the object # controller here to talk to the account server nodes. conn = swift.proxy.controllers.obj.http_connect( node['ip'], node['port'], node['device'], partition, 'PUT', '/' + act, {'X-Timestamp': ts, 'x-trans-id': act}) resp = conn.getresponse() assert(resp.status == 201)
def create_account(act): ts = utils.normalize_timestamp(time()) account_ring = Ring(_testdir, ring_name='account') partition, nodes = account_ring.get_nodes(act) for node in nodes: # Note: we are just using the http_connect method in the object # controller here to talk to the account server nodes. conn = swift.proxy.controllers.obj.http_connect( node['ip'], node['port'], node['device'], partition, 'PUT', '/' + act, {'X-Timestamp': ts, 'x-trans-id': act}) resp = conn.getresponse() assert(resp.status == 201)
def _delete_or_save_lifecycle(self, method, lifecycle=None): path = '/.s3_bucket_lifecycle/%s/%s' % (self.account, self.container) oring = Ring('/etc/swift', ring_name='object') cring = Ring('/etc/swift', ring_name='container') part, nodes = oring.get_nodes('.s3_bucket_lifecycle', self.account, self.container) cpart, cnodes = cring.get_nodes('.s3_bucket_lifecycle', self.account) now_ts = normalize_timestamp(time.time()) i = 0 for node in nodes: ip = node['ip'] port = node['port'] dev = node['device'] headers = dict() headers['user-agent'] = 'lifecycle-uploader' headers['X-Timestamp'] = now_ts headers['referer'] = 'lifecycle-uploader' headers['X-Container-Partition'] = cpart headers['X-Container-Host'] = '%(ip)s:%(port)s' % cnodes[i] headers['X-Container-Device'] = cnodes[i]['device'] if lifecycle: headers['content-length'] = len(lifecycle) headers['etags'] = self._compute_md5(lifecycle) headers['content-type'] = 'text/plain' conn = http_connect(ip, port, dev, part, method, path, headers) if method == 'PUT': conn.send(lifecycle) response = conn.getresponse() i += 1 return response
class ObjectEndpoint(object): def __init__(self, app, conf): self.app = app self.logger = get_logger(conf, log_route='object_endpoint') swift_dir = conf.get('swift_dir', '/etc/swift') self.object_ring = Ring(swift_dir, ring_name='object') def __call__(self, env, start_response): request = Request(env) url_prefix = '/object_endpoint/' if request.path.startswith(url_prefix): if request.method != 'GET': raise HTTPMethodNotAllowed() aco = split_path(request.path[len(url_prefix) - 1:], 1, 3, True) account = aco[0] container = aco[1] obj = aco[2] if obj.endswith('/'): obj = obj[:-1] object_partition, objects = self.object_ring.get_nodes( account, container, obj) endpoint_template = 'http://{ip}:{port}/{device}/{partition}/' + \ '{account}/{container}/{obj}' endpoints = [] for element in objects: endpoint = endpoint_template.format(ip=element['ip'], port=element['port'], device=element['device'], partition=object_partition, account=account, container=container, obj=obj) endpoints.append(endpoint) start_response('200 OK', {}) return json.dumps(endpoints) return self.app(env, start_response)
def create_account(act): ts = utils.normalize_timestamp(time()) account_ring = Ring(_testdir, ring_name="account") partition, nodes = account_ring.get_nodes(act) for node in nodes: # Note: we are just using the http_connect method in the object # controller here to talk to the account server nodes. conn = swift.proxy.controllers.obj.http_connect( node["ip"], node["port"], node["device"], partition, "PUT", "/" + act, {"X-Timestamp": ts, "x-trans-id": act}, ) resp = conn.getresponse() assert resp.status == 201
class ObjectEndpoint(object): def __init__(self, app, conf): self.app = app self.logger = get_logger(conf, log_route='object_endpoint') swift_dir = conf.get('swift_dir', '/etc/swift') self.object_ring = Ring(swift_dir, ring_name='object') def __call__(self, env, start_response): request = Request(env) url_prefix = '/object_endpoint/' if request.path.startswith(url_prefix): if request.method != 'GET': raise HTTPMethodNotAllowed() aco = split_path(request.path[len(url_prefix) - 1:], 1, 3, True) account = aco[0] container = aco[1] obj = aco[2] if obj.endswith('/'): obj = obj[:-1] object_partition, objects = self.object_ring.get_nodes( account, container, obj) endpoint_template = 'http://{ip}:{port}/{device}/{partition}/' + \ '{account}/{container}/{obj}' endpoints = [] for element in objects: endpoint = endpoint_template.format(ip=element['ip'], port=element['port'], device=element['device'], partition=object_partition, account=account, container=container, obj=obj) endpoints.append(endpoint) start_response('200 OK', {}) return json.dumps(endpoints) return self.app(env, start_response)
def get_container_list(account): #Require a account eg. AUTH_ss #Return a list of containers within this account account_ring = Ring(swift_dir, ring_name="account") container_ring = Ring(swift_dir, ring_name="container") object_ring = Ring(swift_dir, ring_name="object") part, nodes = account_ring.get_nodes(account) URL="http://%s:%s/%s/%s/%s" % (nodes[0]['ip'], nodes[0]['port'], nodes[0]['device'], part, account) r = requests.get(URL) if r.status_code == 404: logger.warning("Account not existing yet") content = str(r.text) req = urllib2.Request(URL) container_list_hash = hashlib.md5(content).hexdigest() content = content.split("\n") content.remove('') return content, container_list_hash
def _get_db_info(self, account, container, number): server_type = 'container' obj_conf = self.configs['%s-server' % server_type] config_path = obj_conf[number] options = utils.readconf(config_path, 'app:container-server') root = options.get('devices') swift_dir = options.get('swift_dir', '/etc/swift') ring = Ring(swift_dir, ring_name=server_type) part, nodes = ring.get_nodes(account, container) for node in nodes: # assumes one to one mapping if node['port'] == int(options.get('bind_port')): device = node['device'] break else: return None path_hash = utils.hash_path(account, container) _dir = utils.storage_directory('%ss' % server_type, part, path_hash) db_dir = os.path.join(root, device, _dir) db_file = os.path.join(db_dir, '%s.db' % path_hash) db = ContainerBroker(db_file) return db.get_info()
def _get_db_info(self, account, container, number): server_type = 'container' obj_conf = self.configs['%s-server' % server_type] config_path = obj_conf[number] options = utils.readconf(config_path, 'app:container-server') root = options.get('devices') swift_dir = options.get('swift_dir', '/etc/swift') ring = Ring(swift_dir, ring_name=server_type) part, nodes = ring.get_nodes(account, container) for node in nodes: # assumes one to one mapping if node['port'] == int(options.get('bind_port')): device = node['device'] break else: return None path_hash = utils.hash_path(account, container) _dir = utils.storage_directory('%ss' % server_type, part, path_hash) db_dir = os.path.join(root, device, _dir) db_file = os.path.join(db_dir, '%s.db' % path_hash) db = ContainerBroker(db_file) return db.get_info()
from swift.common.ring import Ring if __name__ == '__main__': # example path for sample object # update this to and existing account/container/object # in your environment account = 'AUTH_9fbaa44c45ab4902a46110fd90629a79' container = 'testing' obj = 'testing.pem' ring = Ring('.', ring_name='object') part, nodes = ring.get_nodes(account, container, obj) print 'nodes: ' for n in nodes: print 'node: ', n print 'part = ', part morenodes = ring.get_more_nodes(part) print 'more nodes:' for n in morenodes: print 'node: ', n
def _test_ondisk_data_after_write_with_crypto(self, policy_name): policy = storage_policy.POLICIES.get_by_name(policy_name) self._create_container(self.proxy_app, policy_name=policy_name) self._put_object(self.crypto_app, self.plaintext) self._post_object(self.crypto_app) # Verify container listing etag is encrypted by direct GET to container # server. We can use any server for all nodes since they all share same # devices dir. cont_server = self._test_context['test_servers'][3] cont_ring = Ring(self._test_context['testdir'], ring_name='container') part, nodes = cont_ring.get_nodes('a', self.container_name) for node in nodes: req = Request.blank('/%s/%s/a/%s' % (node['device'], part, self.container_name), method='GET', query_string='format=json') resp = req.get_response(cont_server) listing = json.loads(resp.body) # sanity checks... self.assertEqual(1, len(listing)) self.assertEqual('o', listing[0]['name']) self.assertEqual('application/test', listing[0]['content_type']) # verify encrypted etag value parts = listing[0]['hash'].rsplit(';', 1) crypto_meta_param = parts[1].strip() crypto_meta = crypto_meta_param[len('swift_meta='):] listing_etag_iv = load_crypto_meta(crypto_meta)['iv'] exp_enc_listing_etag = base64.b64encode( encrypt(self.plaintext_etag.encode('ascii'), self.km.create_key('/a/%s' % self.container_name), listing_etag_iv)).decode('ascii') self.assertEqual(exp_enc_listing_etag, parts[0]) # Verify diskfile data and metadata is encrypted ring_object = self.proxy_app.get_object_ring(int(policy)) partition, nodes = ring_object.get_nodes('a', self.container_name, 'o') conf = { 'devices': self._test_context["testdir"], 'mount_check': 'false' } df_mgr = diskfile.DiskFileRouter(conf, FakeLogger())[policy] ondisk_data = [] exp_enc_body = None for node_index, node in enumerate(nodes): df = df_mgr.get_diskfile(node['device'], partition, 'a', self.container_name, 'o', policy=policy) with df.open(): meta = df.get_metadata() contents = b''.join(df.reader()) metadata = dict((k.lower(), v) for k, v in meta.items()) # verify on disk data - body body_iv = load_crypto_meta( metadata['x-object-sysmeta-crypto-body-meta'])['iv'] body_key_meta = load_crypto_meta( metadata['x-object-sysmeta-crypto-body-meta'])['body_key'] obj_key = self.km.create_key('/a/%s/o' % self.container_name) body_key = Crypto().unwrap_key(obj_key, body_key_meta) exp_enc_body = encrypt(self.plaintext, body_key, body_iv) ondisk_data.append((node, contents)) # verify on disk user metadata enc_val, meta = metadata[ 'x-object-transient-sysmeta-crypto-meta-fruit'].split(';') meta = meta.strip()[len('swift_meta='):] metadata_iv = load_crypto_meta(meta)['iv'] exp_enc_meta = base64.b64encode( encrypt(b'Kiwi', obj_key, metadata_iv)).decode('ascii') self.assertEqual(exp_enc_meta, enc_val) self.assertNotIn('x-object-meta-fruit', metadata) self.assertIn('x-object-transient-sysmeta-crypto-meta', metadata) meta = load_crypto_meta( metadata['x-object-transient-sysmeta-crypto-meta']) self.assertIn('key_id', meta) self.assertIn('path', meta['key_id']) self.assertEqual( '/a/%s/%s' % (self.container_name, self.object_name), meta['key_id']['path']) self.assertIn('v', meta['key_id']) self.assertEqual('2', meta['key_id']['v']) self.assertIn('cipher', meta) self.assertEqual(Crypto.cipher, meta['cipher']) # verify etag actual_enc_etag, _junk, actual_etag_meta = metadata[ 'x-object-sysmeta-crypto-etag'].partition('; swift_meta=') etag_iv = load_crypto_meta(actual_etag_meta)['iv'] exp_enc_etag = base64.b64encode( encrypt(self.plaintext_etag.encode('ascii'), obj_key, etag_iv)).decode('ascii') self.assertEqual(exp_enc_etag, actual_enc_etag) # verify etag hmac exp_etag_mac = hmac.new(obj_key, self.plaintext_etag.encode('ascii'), digestmod=hashlib.sha256).digest() exp_etag_mac = base64.b64encode(exp_etag_mac).decode('ascii') self.assertEqual(exp_etag_mac, metadata['x-object-sysmeta-crypto-etag-mac']) # verify etag override for container updates override = 'x-object-sysmeta-container-update-override-etag' parts = metadata[override].rsplit(';', 1) crypto_meta_param = parts[1].strip() crypto_meta = crypto_meta_param[len('swift_meta='):] listing_etag_iv = load_crypto_meta(crypto_meta)['iv'] cont_key = self.km.create_key('/a/%s' % self.container_name) exp_enc_listing_etag = base64.b64encode( encrypt(self.plaintext_etag.encode('ascii'), cont_key, listing_etag_iv)).decode('ascii') self.assertEqual(exp_enc_listing_etag, parts[0]) self._check_GET_and_HEAD(self.crypto_app) return exp_enc_body, ondisk_data
def _test_ondisk_data_after_write_with_crypto(self, policy_name): policy = storage_policy.POLICIES.get_by_name(policy_name) self._create_container(self.proxy_app, policy_name=policy_name) self._put_object(self.crypto_app, self.plaintext) self._post_object(self.crypto_app) # Verify container listing etag is encrypted by direct GET to container # server. We can use any server for all nodes since they all share same # devices dir. cont_server = self._test_context['test_servers'][3] cont_ring = Ring(self._test_context['testdir'], ring_name='container') part, nodes = cont_ring.get_nodes('a', self.container_name) for node in nodes: req = Request.blank('/%s/%s/a/%s' % (node['device'], part, self.container_name), method='GET', query_string='format=json') resp = req.get_response(cont_server) listing = json.loads(resp.body) # sanity checks... self.assertEqual(1, len(listing)) self.assertEqual('o', listing[0]['name']) self.assertEqual('application/test', listing[0]['content_type']) # verify encrypted etag value parts = listing[0]['hash'].rsplit(';', 1) crypto_meta_param = parts[1].strip() crypto_meta = crypto_meta_param[len('swift_meta='):] listing_etag_iv = load_crypto_meta(crypto_meta)['iv'] exp_enc_listing_etag = base64.b64encode( encrypt(self.plaintext_etag, self.km.create_key('/a/%s' % self.container_name), listing_etag_iv)) self.assertEqual(exp_enc_listing_etag, parts[0]) # Verify diskfile data and metadata is encrypted ring_object = self.proxy_app.get_object_ring(int(policy)) partition, nodes = ring_object.get_nodes('a', self.container_name, 'o') conf = {'devices': self._test_context["testdir"], 'mount_check': 'false'} df_mgr = diskfile.DiskFileRouter(conf, FakeLogger())[policy] ondisk_data = [] exp_enc_body = None for node_index, node in enumerate(nodes): df = df_mgr.get_diskfile(node['device'], partition, 'a', self.container_name, 'o', policy=policy) with df.open(): meta = df.get_metadata() contents = ''.join(df.reader()) metadata = dict((k.lower(), v) for k, v in meta.items()) # verify on disk data - body body_iv = load_crypto_meta( metadata['x-object-sysmeta-crypto-body-meta'])['iv'] body_key_meta = load_crypto_meta( metadata['x-object-sysmeta-crypto-body-meta'])['body_key'] obj_key = self.km.create_key('/a/%s/o' % self.container_name) body_key = Crypto().unwrap_key(obj_key, body_key_meta) exp_enc_body = encrypt(self.plaintext, body_key, body_iv) ondisk_data.append((node, contents)) # verify on disk user metadata enc_val, meta = metadata[ 'x-object-transient-sysmeta-crypto-meta-fruit'].split(';') meta = meta.strip()[len('swift_meta='):] metadata_iv = load_crypto_meta(meta)['iv'] exp_enc_meta = base64.b64encode(encrypt('Kiwi', obj_key, metadata_iv)) self.assertEqual(exp_enc_meta, enc_val) self.assertNotIn('x-object-meta-fruit', metadata) self.assertIn( 'x-object-transient-sysmeta-crypto-meta', metadata) meta = load_crypto_meta( metadata['x-object-transient-sysmeta-crypto-meta']) self.assertIn('key_id', meta) self.assertIn('path', meta['key_id']) self.assertEqual( '/a/%s/%s' % (self.container_name, self.object_name), meta['key_id']['path']) self.assertIn('v', meta['key_id']) self.assertEqual('1', meta['key_id']['v']) self.assertIn('cipher', meta) self.assertEqual(Crypto.cipher, meta['cipher']) # verify etag actual_enc_etag, _junk, actual_etag_meta = metadata[ 'x-object-sysmeta-crypto-etag'].partition('; swift_meta=') etag_iv = load_crypto_meta(actual_etag_meta)['iv'] exp_enc_etag = base64.b64encode(encrypt(self.plaintext_etag, obj_key, etag_iv)) self.assertEqual(exp_enc_etag, actual_enc_etag) # verify etag hmac exp_etag_mac = hmac.new( obj_key, self.plaintext_etag, digestmod=hashlib.sha256) exp_etag_mac = base64.b64encode(exp_etag_mac.digest()) self.assertEqual(exp_etag_mac, metadata['x-object-sysmeta-crypto-etag-mac']) # verify etag override for container updates override = 'x-object-sysmeta-container-update-override-etag' parts = metadata[override].rsplit(';', 1) crypto_meta_param = parts[1].strip() crypto_meta = crypto_meta_param[len('swift_meta='):] listing_etag_iv = load_crypto_meta(crypto_meta)['iv'] cont_key = self.km.create_key('/a/%s' % self.container_name) exp_enc_listing_etag = base64.b64encode( encrypt(self.plaintext_etag, cont_key, listing_etag_iv)) self.assertEqual(exp_enc_listing_etag, parts[0]) self._check_GET_and_HEAD(self.crypto_app) return exp_enc_body, ondisk_data
class UtilizationAggregator(Daemon): def __init__(self, conf): self.conf = conf self.logger = get_logger(conf, log_route='utilization-aggregator') self.interval = int(conf.get('interval') or 60) self.aggregate_account = '.utilization' self.sample_account = '.transfer_record' conf_path = conf.get('__file__') or \ '/etc/swift/swift-utilization-aggregator.conf' request_tries = int(conf.get('request_tries') or 3) self.swift = InternalClient(conf_path, 'Swift Utilization Aggregator', request_tries) self.report_interval = int(conf.get('report_interval') or 60) self.report_first_time = self.report_last_time = time() self.report_containers = 0 self.report_objects = 0 self.recon_cache_path = conf.get('recon_cache_path', '/var/cache/swift') self.rcache = join(self.recon_cache_path, 'object.recon') self.concurrency = int(conf.get('concurrency', 1)) if self.concurrency < 1: raise ValueError("concurrency must be set to at least 1") self.processes = int(self.conf.get('processes', 0)) self.process = int(self.conf.get('process', 0)) self.container_ring = Ring('/etc/swift', ring_name='container') self.sample_rate = int(self.conf.get('sample_rate', 600)) self.last_chk = iso8601_to_timestamp(self.conf.get( 'service_start')) self.kinx_api_url = self.conf.get('kinx_api_url') def report(self, final=False): if final: elapsed = time() - self.report_first_time self.logger.info(_('Pass completed in %ds; %d containers,' ' %d objects aggregated') % (elapsed, self.report_containers, self.report_objects)) dump_recon_cache({'object_aggregation_pass': elapsed, 'aggregation_last_pass': self.report_containers}, self.rcache, self.logger) elif time() - self.report_last_time >= self.report_interval: elapsed = time() - self.report_first_time self.logger.info(_('Pass so far %ds; %d objects aggregated') % (elapsed, self.report_objects)) self.report_last_time = time() def run_once(self, *args, **kwargs): processes, process = self.get_process_values(kwargs) pool = GreenPool(self.concurrency) self.report_first_time = self.report_last_time = time() self.report_objects = 0 self.report_containers = 0 containers_to_delete = [] try: self.logger.debug(_('Run begin')) containers, objects = \ self.swift.get_account_info(self.sample_account) self.logger.info(_('Pass beginning; %s possible containers; %s ' 'possible objects') % (containers, objects)) for c in self.swift.iter_containers(self.sample_account): container = c['name'] try: timestamp, account = container.split('_', 1) timestamp = float(timestamp) except ValueError: self.logger.debug('ValueError: %s, ' 'need more than 1 value to unpack' % \ container) else: if processes > 0: obj_proc = int(hashlib.md5(container).hexdigest(), 16) if obj_proc % processes != process: continue n = (float(time()) // self.sample_rate) * self.sample_rate if timestamp <= n: containers_to_delete.append(container) pool.spawn_n(self.aggregate_container, container) pool.waitall() for container in containers_to_delete: try: self.logger.debug('delete container: %s' % container) self.swift.delete_container(self.sample_account, container, acceptable_statuses=( 2, HTTP_NOT_FOUND, HTTP_CONFLICT)) except (Exception, Timeout) as err: self.logger.exception( _('Exception while deleting container %s %s') % (container, str(err))) tenants_to_fillup = list() for c in self.swift.iter_containers(self.aggregate_account): tenant_id = c['name'] if processes > 0: c_proc = int(hashlib.md5(tenant_id).hexdigest(), 16) if c_proc % processes != process: continue tenants_to_fillup.append(tenant_id) # fillup lossed usage data self.fillup_lossed_usage_data(tenants_to_fillup) self.logger.debug(_('Run end')) self.report(final=True) except (Exception, Timeout): self.logger.exception(_('Unhandled exception')) def run_forever(self, *args, **kwargs): """ Executes passes forever, looking for objects to expire. :param args: Extra args to fulfill the Daemon interface; this daemon has no additional args. :param kwargs: Extra keyword args to fulfill the Daemon interface; this daemon has no additional keyword args. """ sleep(random() * self.interval) while True: begin = time() try: self.run_once(*args, **kwargs) except (Exception, Timeout): self.logger.exception(_('Unhandled exception')) elapsed = time() - begin if elapsed < self.interval: sleep(random() * (self.interval - elapsed)) def get_process_values(self, kwargs): """ Gets the processes, process from the kwargs if those values exist. Otherwise, return processes, process set in the config file. :param kwargs: Keyword args passed into the run_forever(), run_once() methods. They have values specified on the command line when the daemon is run. """ if kwargs.get('processes') is not None: processes = int(kwargs['processes']) else: processes = self.processes if kwargs.get('process') is not None: process = int(kwargs['process']) else: process = self.process if process < 0: raise ValueError( 'process must be an integer greater than or equal to 0') if processes < 0: raise ValueError( 'processes must be an integer greater than or equal to 0') if processes and process >= processes: raise ValueError( 'process must be less than or equal to processes') return processes, process def aggregate_container(self, container): start_time = time() try: objs_to_delete = list() bytes_recvs = dict() bytes_sents = dict() ts, tenant_id, account = container.split('_', 2) ts = int(float(ts)) for o in self.swift.iter_objects(self.sample_account, container): name = o['name'] objs_to_delete.append(name) ts, bytes_rv, bytes_st, trans_id, client_ip = name.split('/') bill_type = self.get_billtype_by_client_ip(client_ip, ts) bytes_recvs[bill_type] = bytes_recvs.get(bill_type, 0) + int(bytes_rv) bytes_sents[bill_type] = bytes_sents.get(bill_type, 0) + int(bytes_st) self.report_objects += 1 for o in objs_to_delete: self.swift.delete_object(self.sample_account, container, o) for bill_type, bt_rv in bytes_recvs.items(): t_object = 'transfer/%d/%d/%d_%d_%d' % (ts, bill_type, bt_rv, bytes_sents[bill_type], self.report_objects) self._hidden_update(tenant_id, t_object) except (Exception, Timeout) as err: self.logger.increment('errors') self.logger.exception( _('Exception while aggregating sample %s %s') % (container, str(err))) self.logger.timing_since('timing', start_time) self.report() def account_info(self, tenant_id, timestamp): path = '/v1/%s/%s?prefix=usage/%d&limit=1' % (self.aggregate_account, tenant_id, timestamp) resp = self.swift.make_request('GET', path, {}, (2,)) if len(resp.body) == 0: return 0, 0, 0 usages = resp.body.split('/', 2)[2].rstrip() cont_cnt, obj_cnt, bt_used = usages.split('_') return int(cont_cnt), int(obj_cnt), int(bt_used) def _hidden_update(self, container, obj, method='PUT'): hidden_path = '/%s/%s/%s' % (self.aggregate_account, container, obj) part, nodes = self.container_ring.get_nodes(self.aggregate_account, container) for node in nodes: ip = node['ip'] port = node['port'] dev = node['device'] action_headers = dict() action_headers['user-agent'] = 'aggregator' action_headers['X-Timestamp'] = normalize_timestamp(time()) action_headers['referer'] = 'aggregator-daemon' action_headers['x-size'] = '0' action_headers['x-content-type'] = "text/plain" action_headers['x-etag'] = 'd41d8cd98f00b204e9800998ecf8427e' conn = http_connect(ip, port, dev, part, method, hidden_path, action_headers) response = conn.getresponse() response.read() def fillup_lossed_usage_data(self, tenants): now = (float(time()) // self.sample_rate) * self.sample_rate path = '/v1/%s/%s?prefix=usage/%d&limit=1' for t in tenants: last = self.last_chk cont_cnt = obj_cnt = bt_used = -1 while last <= now: p = path % (self.aggregate_account, t, last) resp = self.swift.make_request('GET', p, {}, (2,)) if len(resp.body) != 0: usages = resp.body.split('/', 2)[2].rstrip() c, o, bt = usages.split('_') cont_cnt = int(c) obj_cnt = int(o) bt_used = int(bt) else: before = last - self.sample_rate if cont_cnt == -1: cont_cnt, obj_cnt, bt_used = \ self.account_info(self.aggregate_account, before) obj = 'usage/%d/%d_%d_%d' % (last, cont_cnt, obj_cnt, bt_used) self._hidden_update(t, obj) last += self.sample_rate self.last_chk = now def get_billtype_by_client_ip(self, client_ip, timestamp): end_ts = timestamp_to_iso8601(timestamp + self.sample_rate - 1) start_ts = timestamp_to_iso8601(timestamp) params = {'start': start_ts, 'end': end_ts} path = self.kinx_api_url + '/?%s' % (urllib.urlencode(params)) data = json.loads(urllib.urlopen(path).read()) bill_type = -1 for r in data['ip_ranges']: bill_type = r['bill_type'] for cidr in r['ip_range']: if self.ip_in_cidr(client_ip, cidr): return bill_type return bill_type def ip_in_cidr(self, client_ip, cidr): bt_to_bits = lambda b: bin(int(b))[2:].rjust(8, '0') ip_to_bits = lambda ip: ''.join([bt_to_bits(b) for b in ip.split('.')]) client_ip_bits = ip_to_bits(client_ip) ip, snet = cidr.split('/') ip_bits = ip_to_bits(ip) if client_ip_bits[:int(snet)] == ip_bits[:int(snet)]: return True else: return False
class TransitionMiddleware(object): def __init__(self, app, conf, *args, **kwargs): self.app = app self.conf = conf self.logger = get_logger(self.conf, log_route='transition') self.container_ring = Ring('/etc/swift', ring_name='container') self.glacier_account_prefix = '.glacier_' self.temp_path = conf.get('temp_path', '/var/cache/s3/') def _init_glacier(self): con = Layer2(region_name='ap-northeast-1') return con.create_vault('swift-s3-transition') def transition(self, env): # GET Object body req = Request(copy(env)) req.method = 'GET' resp = req.get_response(self.app) obj_body = resp.body # Glacier로 업로드 tmpfile = self.save_to_tempfile(obj_body) try: glacier = self._init_glacier() archive_id = glacier.upload_archive(tmpfile) glacier_obj = make_glacier_hidden_object_name(self.obj, archive_id) except Exception as e: return Response(status=HTTP_INTERNAL_SERVER_ERROR, body=e.message) finally: self.delete_tempfile(tmpfile) # Object를 0KB로 만들기 req = Request(copy(env)) req.headers[GLACIER_FLAG_META] = True resp = req.get_response(self.app) # Glacier Hidden account에 기록 glacier_account = self.glacier_account_prefix + self.account part, nodes = self.container_ring.get_nodes(glacier_account, self.container) hidden_path = '/%s/%s/%s' % (glacier_account, self.container, glacier_obj) for node in nodes: ip = node['ip'] port = node['port'] dev = node['device'] headers = dict() headers['user-agent'] = 'transition-middleware' headers['X-Timestamp'] = normalize_timestamp(time.time()) headers['referer'] = req.as_referer() headers['x-size'] = '0' headers['x-content-type'] = 'text/plain' headers['x-etag'] = 'd41d8cd98f00b204e9800998ecf8427e' conn = http_connect(ip, port, dev, part, 'PUT', hidden_path, headers) conn.getresponse().read() return Response(status=HTTP_NO_CONTENT) def save_to_tempfile(self, data): tmp_path = None try: with tempfile.NamedTemporaryFile(bufsize=0, delete=False, dir=self.temp_path) as temp: temp.write(data) temp.flush() tmp_path = temp.name except Exception as e: self.logger.error(e) return tmp_path def delete_tempfile(self, tmppath): os.remove(tmppath) def __call__(self, env, start_response): req = Request(env) method = req.method self.version, self.account, self.container, self.obj = split_path( req.path, 0, 4, True) if not self.obj: return self.app(env, start_response) if method == 'POST' and \ 'X-S3-Object-Transition' in req.headers: return self.transition(env)(env, start_response) return self.app(env, start_response)
class ObjectRestorer(Daemon): """ Daemon that queries the internal hidden expiring_objects_account to discover objects that need to be deleted. :param conf: The daemon configuration. """ def __init__(self, conf): self.conf = conf self.container_ring = Ring('/etc/swift', ring_name='container') self.logger = get_logger(conf, log_route='object-restorer') self.logger.set_statsd_prefix('s3-object-restorer') self.interval = int(conf.get('interval') or 300) self.restoring_object_account = '.s3_restoring_objects' self.expiring_restored_account = '.s3_expiring_restored_objects' self.glacier_account_prefix = '.glacier_' self.todo_container = 'todo' self.restoring_container = 'restoring' conf_path = '/etc/swift/s3-object-restorer.conf' request_tries = int(conf.get('request_tries') or 3) self.glacier = self._init_glacier() self.glacier_tmpdir = conf.get('temp_path', '/var/cache/s3/') self.swift = InternalClient(conf_path, 'Swift Object Restorer', request_tries) self.report_interval = int(conf.get('report_interval') or 300) self.report_first_time = self.report_last_time = time() self.report_objects = 0 self.recon_cache_path = conf.get('recon_cache_path', '/var/cache/swift') self.rcache = join(self.recon_cache_path, 'object.recon') self.concurrency = int(conf.get('concurrency', 1)) if self.concurrency < 1: raise ValueError("concurrency must be set to at least 1") self.processes = int(self.conf.get('processes', 0)) self.process = int(self.conf.get('process', 0)) self.client = Client(self.conf.get('sentry_sdn', '')) def _init_glacier(self): con = Layer2(region_name='ap-northeast-1') return con.get_vault('swift-s3-transition') def report(self, final=False): """ Emits a log line report of the progress so far, or the final progress is final=True. :param final: Set to True for the last report once the expiration pass has completed. """ if final: elapsed = time() - self.report_first_time self.logger.info(_('Pass completed in %ds; %d objects restored') % (elapsed, self.report_objects)) dump_recon_cache({'object_expiration_pass': elapsed, 'expired_last_pass': self.report_objects}, self.rcache, self.logger) elif time() - self.report_last_time >= self.report_interval: elapsed = time() - self.report_first_time self.logger.info(_('Pass so far %ds; %d objects restored') % (elapsed, self.report_objects)) self.report_last_time = time() def run_once(self, *args, **kwargs): """ Executes a single pass, looking for objects to expire. :param args: Extra args to fulfill the Daemon interface; this daemon has no additional args. :param kwargs: Extra keyword args to fulfill the Daemon interface; this daemon accepts processes and process keyword args. These will override the values from the config file if provided. """ processes, process = self.get_process_values(kwargs) pool = GreenPool(self.concurrency) self.report_first_time = self.report_last_time = time() self.report_objects = 0 try: self.logger.debug(_('Run begin')) for o in self.swift.iter_objects(self.restoring_object_account, self.todo_container): obj = o['name'].encode('utf8') if processes > 0: obj_process = int( hashlib.md5('%s/%s' % (self.todo_container, obj)). hexdigest(), 16) if obj_process % processes != process: continue pool.spawn_n(self.start_object_restoring, obj) pool.waitall() for o in self.swift.iter_objects(self.restoring_object_account, self.restoring_container): obj = o['name'].encode('utf8') if processes > 0: obj_process = int( hashlib.md5('%s/%s' % (self.restoring_container, obj)). hexdigest(), 16) if obj_process % processes != process: continue pool.spawn_n(self.check_object_restored, obj) pool.waitall() self.logger.debug(_('Run end')) self.report(final=True) except (Exception, Timeout) as e: report_exception(self.logger, _('Unhandled exception'), self.client) def run_forever(self, *args, **kwargs): """ Executes passes forever, looking for objects to expire. :param args: Extra args to fulfill the Daemon interface; this daemon has no additional args. :param kwargs: Extra keyword args to fulfill the Daemon interface; this daemon has no additional keyword args. """ sleep(random() * self.interval) while True: begin = time() try: self.run_once(*args, **kwargs) except (Exception, Timeout): report_exception(self.logger, _('Unhandled exception'), self.client) elapsed = time() - begin if elapsed < self.interval: sleep(random() * (self.interval - elapsed)) def get_process_values(self, kwargs): """ Gets the processes, process from the kwargs if those values exist. Otherwise, return processes, process set in the config file. :param kwargs: Keyword args passed into the run_forever(), run_once() methods. They have values specified on the command line when the daemon is run. """ if kwargs.get('processes') is not None: processes = int(kwargs['processes']) else: processes = self.processes if kwargs.get('process') is not None: process = int(kwargs['process']) else: process = self.process if process < 0: raise ValueError( 'process must be an integer greater than or equal to 0') if processes < 0: raise ValueError( 'processes must be an integer greater than or equal to 0') if processes and process >= processes: raise ValueError( 'process must be less than or equal to processes') return processes, process def start_object_restoring(self, obj): start_time = time() try: actual_obj = obj account, container, obj = actual_obj.split('/', 2) archiveId = self.get_archiveid(account, container, obj) if archiveId is None: self.swift.delete_object(self.restoring_object_account, self.todo_container, actual_obj) return jobId = self.glacier.retrieve_archive(archiveId).id restoring_obj = make_glacier_hidden_object_name(actual_obj, jobId) meta_prefix = 'X-Object-Meta' meta = self.swift.get_object_metadata(account, container, obj, metadata_prefix=meta_prefix) meta = {'X-Object-Meta' + key: value for key, value in meta.iteritems()} self.update_action_hidden(self.restoring_object_account, self.restoring_container, restoring_obj, metadata=meta) self.swift.delete_object(self.restoring_object_account, self.todo_container, actual_obj) self.report_objects += 1 self.logger.increment('start') except (Exception, Timeout) as err: self.logger.increment('errors') report_exception(self.logger.exception, _('Exception while restoring object %s. %s') % (obj, str(err)), self.client) self.logger.timing_since('timing', start_time) self.report() def get_archiveid(self, account, container, obj): glacier_account = '%s%s' % (self.glacier_account_prefix, account) glacier_obj = None for o in get_objects_by_prefix(glacier_account, container, obj, swift_client=self.swift): name = get_glacier_objname_from_hidden_object(o) if name == obj: glacier_obj = o break if glacier_obj is None: return None return get_glacier_key_from_hidden_object(glacier_obj) def check_object_restored(self, restoring_object): actual_obj = get_glacier_objname_from_hidden_object(restoring_object) jobId = get_glacier_key_from_hidden_object(restoring_object) try: path = '/v1/%s' % actual_obj resp = self.swift.make_request('GET', path, {}, (2, 4,)) if resp.status_int == 404: raise Exception('Object Not Found: %s' % actual_obj) job = self.glacier.get_job(job_id=jobId) if not job.completed: return self.complete_restore(actual_obj, job) except Exception as e: # Job ID가 만료될 경우 다시 restore 를 시도한다. if not e.message.startswith('Object Not Found:'): self.start_object_restoring(actual_obj) self.logger.info(e) self.swift.delete_object(self.restoring_object_account, self.restoring_container, restoring_object) def complete_restore(self, actual_obj, job): tmppath = tempfile.NamedTemporaryFile(bufsize=0, delete=False, dir=self.glacier_tmpdir).name try: job.download_to_file(filename=tmppath) prefix = 'X-Object-Meta' a, c, o = actual_obj.split('/', 2) metadata = self.swift.get_object_metadata(a, c, o, metadata_prefix=prefix) metadata = {'X-Object-Meta' + key: value for key, value in metadata .iteritems()} days = int(metadata['X-Object-Meta-s3-restore-expire-days']) exp_time = normalize_delete_at_timestamp(calc_nextDay(time()) + (days - 1) * 86400) # send restored object to proxy server path = '/v1/%s' % actual_obj metadata['X-Object-Meta-S3-Restored'] = True exp_date = strftime("%a, %d %b %Y %H:%M:%S GMT", gmtime(float(exp_time))) metadata['X-Object-Meta-s3-restore'] = 'ongoing-request="false", ' \ 'expiry-date="%s"' % exp_date metadata['Content-Length'] = os.path.getsize(tmppath) del metadata['X-Object-Meta-s3-restore-expire-days'] obj_body = open(tmppath, 'r') self.swift.make_request('PUT', path, metadata, (2,), body_file=obj_body) # Add to .s3_expiring_restored_objects self.update_action_hidden(self.expiring_restored_account, exp_time, actual_obj) obj_body.close() self.logger.increment('done') except UnexpectedResponse as e: if e.resp.status_int == 404: self.logger.error('Restoring object not found - %s' % actual_obj) except Exception as e: self.logger.increment('errors') self.logger.debug(e) finally: os.remove(tmppath) def compute_obj_md5(self, obj): etag = hashlib.md5() etag.update(obj) etag = etag.hexdigest() return etag def update_action_hidden(self, account, container, obj, metadata=None): hidden_path = '/%s/%s/%s' % (account, container, obj) part, nodes = self.container_ring.get_nodes(account, container) for node in nodes: ip = node['ip'] port = node['port'] dev = node['device'] action_headers = dict() action_headers['user-agent'] = 'restore-daemon' action_headers['X-Timestamp'] = normalize_timestamp(time()) action_headers['referer'] = 'restore-daemon' action_headers['x-size'] = '0' action_headers['x-content-type'] = "text/plain" action_headers['x-etag'] = 'd41d8cd98f00b204e9800998ecf8427e' if metadata: action_headers.update(metadata) conn = http_connect(ip, port, dev, part, 'PUT', hidden_path, action_headers) response = conn.getresponse() response.read()
class ListEndpointsMiddleware(object): """ List endpoints for an object, account or container. See above for a full description. Uses configuration parameter `swift_dir` (default `/etc/swift`). :param app: The next WSGI filter or app in the paste.deploy chain. :param conf: The configuration dict for the middleware. """ def __init__(self, app, conf): self.app = app self.logger = get_logger(conf, log_route='endpoints') self.swift_dir = conf.get('swift_dir', '/etc/swift') self.account_ring = Ring(self.swift_dir, ring_name='account') self.container_ring = Ring(self.swift_dir, ring_name='container') self.endpoints_path = conf.get('list_endpoints_path', '/endpoints/') if not self.endpoints_path.endswith('/'): self.endpoints_path += '/' self.default_response_version = 1.0 self.response_map = { 1.0: self.v1_format_response, 2.0: self.v2_format_response, } def get_object_ring(self, policy_idx): """ Get the ring object to use to handle a request based on its policy. :policy_idx: policy index as defined in swift.conf :returns: appropriate ring object """ return POLICIES.get_object_ring(policy_idx, self.swift_dir) def _parse_version(self, raw_version): err_msg = 'Unsupported version %r' % raw_version try: version = float(raw_version.lstrip('v')) except ValueError: raise ValueError(err_msg) if not any(version == v for v in RESPONSE_VERSIONS): raise ValueError(err_msg) return version def _parse_path(self, request): """ Parse path parts of request into a tuple of version, account, container, obj. Unspecified path parts are filled in as None, except version which is always returned as a float using the configured default response version if not specified in the request. :param request: the swob request :returns: parsed path parts as a tuple with version filled in as configured default response version if not specified. :raises: ValueError if path is invalid, message will say why. """ clean_path = request.path[len(self.endpoints_path) - 1:] # try to peel off version try: raw_version, rest = split_path(clean_path, 1, 2, True) except ValueError: raise ValueError('No account specified') try: version = self._parse_version(raw_version) except ValueError: if raw_version.startswith('v') and '_' not in raw_version: # looks more like a invalid version than an account raise # probably no version specified, but if the client really # said /endpoints/v_3/account they'll probably be sorta # confused by the useless response and lack of error. version = self.default_response_version rest = clean_path else: rest = '/' + rest if rest else '/' try: account, container, obj = split_path(rest, 1, 3, True) except ValueError: raise ValueError('No account specified') return version, account, container, obj def v1_format_response(self, req, endpoints, **kwargs): return Response(json.dumps(endpoints), content_type='application/json') def v2_format_response(self, req, endpoints, storage_policy_index, **kwargs): resp = { 'endpoints': endpoints, 'headers': {}, } if storage_policy_index is not None: resp['headers'][ 'X-Backend-Storage-Policy-Index'] = str(storage_policy_index) return Response(json.dumps(resp), content_type='application/json') def __call__(self, env, start_response): request = Request(env) if not request.path.startswith(self.endpoints_path): return self.app(env, start_response) if request.method != 'GET': return HTTPMethodNotAllowed( req=request, headers={"Allow": "GET"})(env, start_response) try: version, account, container, obj = self._parse_path(request) except ValueError as err: return HTTPBadRequest(str(err))(env, start_response) if account is not None: account = unquote(account) if container is not None: container = unquote(container) if obj is not None: obj = unquote(obj) storage_policy_index = None if obj is not None: container_info = get_container_info( {'PATH_INFO': '/v1/%s/%s' % (account, container)}, self.app, swift_source='LE') storage_policy_index = container_info['storage_policy'] obj_ring = self.get_object_ring(storage_policy_index) partition, nodes = obj_ring.get_nodes( account, container, obj) endpoint_template = 'http://{ip}:{port}/{device}/{partition}/' + \ '{account}/{container}/{obj}' elif container is not None: partition, nodes = self.container_ring.get_nodes( account, container) endpoint_template = 'http://{ip}:{port}/{device}/{partition}/' + \ '{account}/{container}' else: partition, nodes = self.account_ring.get_nodes( account) endpoint_template = 'http://{ip}:{port}/{device}/{partition}/' + \ '{account}' endpoints = [] for node in nodes: endpoint = endpoint_template.format( ip=node['ip'], port=node['port'], device=node['device'], partition=partition, account=quote(account), container=quote(container or ''), obj=quote(obj or '')) endpoints.append(endpoint) resp = self.response_map[version]( request, endpoints=endpoints, storage_policy_index=storage_policy_index) return resp(env, start_response)
class ListEndpointsMiddleware(object): """ List endpoints for an object, account or container. See above for a full description. Uses configuration parameter `swift_dir` (default `/etc/swift`). :param app: The next WSGI filter or app in the paste.deploy chain. :param conf: The configuration dict for the middleware. """ def __init__(self, app, conf): self.app = app self.logger = get_logger(conf, log_route='endpoints') self.swift_dir = conf.get('swift_dir', '/etc/swift') self.account_ring = Ring(self.swift_dir, ring_name='account') self.container_ring = Ring(self.swift_dir, ring_name='container') self.endpoints_path = conf.get('list_endpoints_path', '/endpoints/') if not self.endpoints_path.endswith('/'): self.endpoints_path += '/' def get_object_ring(self, policy_idx): """ Get the ring object to use to handle a request based on its policy. :policy_idx: policy index as defined in swift.conf :returns: appropriate ring object """ return POLICIES.get_object_ring(policy_idx, self.swift_dir) def __call__(self, env, start_response): request = Request(env) if not request.path.startswith(self.endpoints_path): return self.app(env, start_response) if request.method != 'GET': return HTTPMethodNotAllowed( req=request, headers={"Allow": "GET"})(env, start_response) try: clean_path = request.path[len(self.endpoints_path) - 1:] account, container, obj = \ split_path(clean_path, 1, 3, True) except ValueError: return HTTPBadRequest('No account specified')(env, start_response) if account is not None: account = unquote(account) if container is not None: container = unquote(container) if obj is not None: obj = unquote(obj) if obj is not None: # remove 'endpoints' from call to get_container_info stripped = request.environ if stripped['PATH_INFO'][:len(self.endpoints_path)] == \ self.endpoints_path: stripped['PATH_INFO'] = "/v1/" + \ stripped['PATH_INFO'][len(self.endpoints_path):] container_info = get_container_info( stripped, self.app, swift_source='LE') obj_ring = self.get_object_ring(container_info['storage_policy']) partition, nodes = obj_ring.get_nodes( account, container, obj) endpoint_template = 'http://{ip}:{port}/{device}/{partition}/' + \ '{account}/{container}/{obj}' elif container is not None: partition, nodes = self.container_ring.get_nodes( account, container) endpoint_template = 'http://{ip}:{port}/{device}/{partition}/' + \ '{account}/{container}' else: partition, nodes = self.account_ring.get_nodes( account) endpoint_template = 'http://{ip}:{port}/{device}/{partition}/' + \ '{account}' endpoints = [] for node in nodes: endpoint = endpoint_template.format( ip=node['ip'], port=node['port'], device=node['device'], partition=partition, account=quote(account), container=quote(container or ''), obj=quote(obj or '')) endpoints.append(endpoint) return Response(json.dumps(endpoints), content_type='application/json')(env, start_response)
class ContainerCrawler(object): def __init__(self, conf, handler_class, logger=None): if not handler_class: raise RuntimeError('Handler class must be defined') self.logger = logger self.conf = conf self.root = conf['devices'] self.bulk = config_true_value(conf.get('bulk_process', False)) self.interval = 10 self.swift_dir = '/etc/swift' self.container_ring = Ring(self.swift_dir, ring_name='container') self.status_dir = conf['status_dir'] self.myips = whataremyips('0.0.0.0') self.items_chunk = conf['items_chunk'] self.poll_interval = conf.get('poll_interval', 5) self.handler_class = handler_class self._in_progress_containers = set() if self.bulk: self.workers = 1 self._init_workers(conf) self._init_ic_pool(conf) self.log('debug', 'Created the Container Crawler instance') def _init_workers(self, conf): if not self.bulk: self.workers = conf.get('workers', 10) self.worker_pool = eventlet.GreenPool(self.workers) self.work_queue = eventlet.queue.Queue(self.workers * 2) for _ in xrange(self.workers): self.worker_pool.spawn_n(self._worker) self.enumerator_workers = conf.get('enumerator_workers', 10) self.enumerator_pool = eventlet.GreenPool(self.enumerator_workers) self.enumerator_queue = eventlet.queue.Queue(self.enumerator_workers) for _ in xrange(self.enumerator_workers): self.enumerator_pool.spawn_n(self._enumerator) def _init_ic_pool(self, conf): pool_size = self.workers self._swift_pool = eventlet.pools.Pool( create=lambda: create_internal_client(conf, self.swift_dir), min_size=pool_size, max_size=pool_size) def _worker(self): while 1: try: work = self.work_queue.get() except: self.log( 'error', 'Failed to fetch items from the queue: %s' % traceback.format_exc()) eventlet.sleep(100) continue try: if not work: break (row, handler), container_job = work with self._swift_pool.item() as swift_client: handler.handle(row, swift_client) container_job.complete_task() except RetryError: container_job.complete_task(retry=True) except: container_job.complete_task(error=True) self.log( 'error', u'Failed to handle row %s (%s): %r' % (row['ROWID'], row['name'].decode('utf-8'), traceback.format_exc())) finally: self.work_queue.task_done() def _enumerator(self): job = ContainerJob() while 1: try: work = self.enumerator_queue.get() except: self.log( 'error', 'Failed to fetch containers to enumerate %s' % traceback.format_exc()) eventlet.sleep(100) continue try: if not work: break settings, per_account = work handler = self.handler_class(self.status_dir, settings, per_account=per_account) owned, verified, last_row, db_id = self.handle_container( handler, job) if not owned and not verified: continue if self.bulk or job.wait_all() == ContainerJob.PASS_SUCCEEDED: handler.save_last_row(last_row, db_id) self.log( 'info', 'Processed %d rows; verified %d rows; ' 'last row: %d' % (owned, verified, last_row)) except SkipContainer: self.log('info', "Skipping %(account)s/%(container)s" % settings) except RetryError: # Can appear from the bulk handling code. # TODO: we should do a better tying the bulk handling code into # this model. pass except: account = settings['account'] container = settings['container'] self.log( 'error', "Failed to process %s/%s with %s" % (account, container, self.handler_class.__name__)) self.log('error', traceback.format_exc()) finally: if work: self._in_progress_containers.remove( (work[0]['account'], work[0]['container'])) self.enumerator_queue.task_done() def log(self, level, message): if not self.logger: return getattr(self.logger, level)(message) def get_broker(self, account, container, part, node): db_hash = hash_path(account.encode('utf-8'), container.encode('utf-8')) db_dir = storage_directory(DATADIR, part, db_hash) db_path = os.path.join(self.root, node['device'], db_dir, db_hash + '.db') return ContainerBroker(db_path, account=account, container=container) def submit_items(self, handler, rows, job): if not rows: return if self.bulk: with self._swift_pool.item() as swift_client: handler.handle(rows, swift_client) return job.submit_tasks(map(lambda row: (row, handler), rows), self.work_queue) def process_items(self, handler, rows, nodes_count, node_id, job): owned_rows = filter(lambda row: row['ROWID'] % nodes_count == node_id, rows) verified_rows = filter( lambda row: row['ROWID'] % nodes_count != node_id, rows) self.submit_items(handler, owned_rows, job) self.submit_items(handler, verified_rows, job) return len(owned_rows), len(verified_rows) def handle_container(self, handler, job): part, container_nodes = self.container_ring.get_nodes( handler._account.encode('utf-8'), handler._container.encode('utf-8')) nodes_count = len(container_nodes) for index, node in enumerate(container_nodes): if not is_local_device(self.myips, None, node['ip'], node['port']): continue broker = self.get_broker(handler._account, handler._container, part, node) broker_info = broker.get_info() last_row = handler.get_last_row(broker_info['id']) if not last_row: last_row = 0 try: items = broker.get_items_since(last_row, self.items_chunk) except DatabaseConnectionError: continue if not items: return (0, 0, None, broker_info['id']) self.log( 'info', 'Processing %d rows since row %d for %s/%s' % (len(items), last_row, handler._account, handler._container)) owned_count, verified_count = self.process_items( handler, items, nodes_count, index, job) return (owned_count, verified_count, items[-1]['ROWID'], broker_info['id']) return (0, 0, None, None) def list_containers(self, account): # TODO: we should not have to retrieve all of the containers at once, # but it will require allocating a swift_client for this purpose from # the pool -- consider doing that at some point. However, as long as # there are fewer than a few million containers, getting all of them at # once should be cheap, paginating 10000 at a time. with self._swift_pool.item() as swift_client: return [c['name'] for c in swift_client.iter_containers(account)] def _is_processing(self, settings): # NOTE: if we allow more than one destination for (account, container), # we have to change the contents of this set key = (settings['account'], settings['container']) return key in self._in_progress_containers def _enqueue_container(self, settings, per_account=False): key = (settings['account'], settings['container']) self._in_progress_containers.add(key) self.enumerator_queue.put((settings, per_account)) def _submit_containers(self): for container_settings in self.conf['containers']: # TODO: perform validation of the settings on startup if 'container' not in container_settings: self.log( 'error', 'Container name not specified in settings -- continue') continue if 'account' not in container_settings: self.log('error', 'Account not specified in settings -- continue') continue if container_settings['container'] == '/*': all_containers = self.list_containers( container_settings['account']) for container in all_containers: settings_copy = container_settings.copy() settings_copy['container'] = container.decode('utf-8') if not self._is_processing(settings_copy): self._enqueue_container(settings_copy, per_account=True) # After iterating over all of the containers, we prune any # entries from containers that may have been deleted (so as to # avoid missing data). There is still a chance where a # container is removed and created between the calls to # CloudSync, however there is nothing we can do about that. # TODO: keep track of container creation date to detect when # they are removed and then added. if not os.path.exists( os.path.join(self.status_dir, container_settings['account'])): continue tracked_containers = os.listdir( os.path.join(self.status_dir, container_settings['account'])) disappeared = set(tracked_containers) - set(all_containers) for container in disappeared: try: os.unlink( os.path.join(self.status_dir, container_settings['account'], container)) except Exception as e: self.log( 'warning', 'Failed to remove the status file for %s: %s' % (os.path.join(container_settings['account'], container), repr(e))) else: if not self._is_processing(container_settings): self._enqueue_container(container_settings, per_account=False) def run_always(self): # Since we don't support reloading, the daemon should quit if there are # no containers configured if 'containers' not in self.conf or not self.conf['containers']: return self.log('debug', 'Entering the poll loop') while True: start = time.time() self._submit_containers() elapsed = time.time() - start if elapsed < self.poll_interval: eventlet.sleep(self.poll_interval - elapsed) def run_once(self): self._submit_containers() self.enumerator_queue.join()
class FileMover(object): def __init__(self, options, *_args, **_kwargs): self.ring = Ring(options.ring) self.path = options.path self.options = options def _get_acc_cont_obj(self, filename): """ Returns account, container, object from XFS object metadata """ obj_fd = open(filename) metadata = '' key = 0 try: while True: metadata += xattr.getxattr( obj_fd, '%s%s' % ("user.swift.metadata", (key or ''))) key += 1 except IOError: pass obj_fd.close() object_name = pickle.loads(metadata).get('name') account = object_name.split('/')[1] container = object_name.split('/')[2] obj = '/'.join(object_name.split('/')[3:]) return {'account': account, 'container': container, 'object': obj} def start(self): for root, _dirs, files in os.walk(self.path): if "quarantined" in root: continue for filename in files: fullname = os.path.join(root, filename) if (self.options.move_object_files is True and fullname.split('.')[-1] in ["data", "ts"]): self._move_file(fullname, "objects") if (self.options.move_container_dbs is True and fullname.split('.')[-1] in ["db"] and "containers" in fullname): self._move_file(fullname, "containers") if (self.options.move_account_dbs is True and fullname.split('.')[-1] in ["db"] and "accounts" in fullname): self._move_file(fullname, "accounts") def _move_file(self, filename, filetype): if filetype == 'accounts': broker = AccountBroker(filename) info = broker.get_info() elif filetype == 'containers': broker = ContainerBroker(filename) info = broker.get_info() elif filetype == 'objects': info = self._get_acc_cont_obj(filename) else: raise Exception acc = info.get('account') cont = info.get('container') obj = info.get('object') partition, _nodes = self.ring.get_nodes(acc, cont, obj) # replace the old partition value with the new one # old name like '/a/b/objects/123/c/d' # new name like '/a/b/objects/456/c/d' filename_parts = filename.split('/') part_pos = filename_parts.index(filetype) filename_parts[part_pos+1] = str(partition) newname = '/'.join(filename_parts) dst_dir = os.path.dirname(newname) try: os.makedirs(dst_dir) logging.info("mkdir %s" % dst_dir) except OSError as ex: logging.info("mkdir %s failed: %s" % (dst_dir, ex)) try: os.rename(filename, newname) logging.info("moved %s -> %s" % (filename, newname)) except OSError as ex: logging.warning("FAILED TO MOVE %s -> %s" % (filename, newname))
class Crawler(object): def __init__(self, conf, handler_factory, logger=None): if not handler_factory: raise RuntimeError('Handler class must be defined') self.logger = logger self.conf = conf self.root = conf['devices'] self.bulk = config_true_value(conf.get('bulk_process', False)) self.interval = 10 self.swift_dir = '/etc/swift' self.container_ring = Ring(self.swift_dir, ring_name='container') self.status_dir = conf['status_dir'] self.myips = whataremyips(conf.get('swift_bind_ip', '0.0.0.0')) self.items_chunk = conf['items_chunk'] # Verification slack is specified in minutes. self._verification_slack = conf.get('verification_slack', 0) * 60 self.poll_interval = conf.get('poll_interval', 5) self.handler_factory = handler_factory # NOTE: this structure is not protected. Since we use green threads, we # expect a context switch to only occur on blocking calls, so the set # operations should be safe in this context. This can lead to skipping # container cycles unnecessarily if the threading model changes. self._in_progress_containers = set() if self.bulk: self.workers = 1 self._init_workers(conf) self._init_ic_pool(conf) self.log('debug', 'Created the Container Crawler instance') def _init_workers(self, conf): if not self.bulk: self.workers = conf.get('workers', 10) self.worker_pool = eventlet.GreenPool(self.workers) self.work_queue = eventlet.queue.Queue(self.workers * 2) for _ in xrange(self.workers): self.worker_pool.spawn_n(self._worker) self.enumerator_workers = conf.get('enumerator_workers', 10) self.enumerator_pool = eventlet.GreenPool(self.enumerator_workers) self.enumerator_queue = eventlet.queue.Queue(self.enumerator_workers) for _ in xrange(self.enumerator_workers): self.enumerator_pool.spawn_n(self._enumerator) def _init_ic_pool(self, conf): pool_size = self.workers self._swift_pool = eventlet.pools.Pool( create=lambda: create_internal_client(conf, self.swift_dir), min_size=pool_size, max_size=pool_size) def _worker(self): while 1: try: work = self.work_queue.get() except Exception as e: self.log('error', 'Failed to fetch items from the queue: %r' % e) self.log('debug', 'Failed to fetch items from the queue: %s', exc_info=True) eventlet.sleep(100) continue try: if not work: break (row, handler), container_job = work with self._swift_pool.item() as swift_client: handler.handle(row, swift_client) container_job.complete_task() except RetryError: container_job.complete_task(retry=True) except Exception as e: container_job.complete_task(error=True) self.log( 'error', u'Failed to handle row %s (%s): %r' % (row['ROWID'], row['name'].decode('utf-8'), e)) self.log('debug', u'Failed to handle row %s (%s)' % (row['ROWID'], row['name'].decode('utf-8')), exc_info=True) finally: self.work_queue.task_done() def _get_new_rows(self, broker, start_row, nodes, node_id, verifying): rows = [] if verifying: cutoff = time.time() - self._verification_slack for row in broker.get_items_since(start_row, self.items_chunk): hnum = num_from_row(row) if not verifying and hnum % nodes != node_id: continue ts = decode_timestamps(row['created_at'])[2].timestamp if verifying and ts > cutoff: break rows.append(row) return rows def _enumerator(self): job = ContainerJob() while 1: try: work = self.enumerator_queue.get() except: self.log('error', 'Failed to fetch containers to enumerate', exc_info=True) eventlet.sleep(100) continue try: if not work: break settings, per_account = work account = settings['internal_account'] container = settings['internal_container'] # Should we try caching the broker to avoid doing these # look ups every time? broker, nodes_count, node_id = self.get_broker( account.encode('utf-8'), container.encode('utf-8')) if not broker: continue if getattr(broker, 'is_sharded', lambda: False)(): self._enqueue_sharded_container(settings, per_account) broker_info = broker.get_info() broker_id = broker_info['id'] handler = self.handler_factory.instance( settings, per_account=per_account) last_primary_row = handler.get_last_processed_row(broker_id) if getattr(broker, 'is_root_container', lambda: True)(): handler.handle_container_info(broker_info, broker.metadata) primary_rows = self._get_new_rows(broker, last_primary_row, nodes_count, node_id, False) if primary_rows: self.log( 'info', 'Processing %d rows since row %d for %s/%s' % (len(primary_rows), last_primary_row, account, container)) primary_status = self.submit_items(handler, primary_rows, job) if ContainerJob.PASS_SUCCEEDED == primary_status: handler.save_last_processed_row( primary_rows[-1]['ROWID'], broker_id) self.log( 'info', 'Processed %d rows; last row: %d; for %s/%s' % (len(primary_rows), primary_rows[-1]['ROWID'], account, container)) last_verified_row = handler.get_last_verified_row(broker_id) verifying_rows = self._get_new_rows(broker, last_verified_row, nodes_count, node_id, True) # Remove any ROWIDs that we uploaded uploaded_rows = set([row['ROWID'] for row in primary_rows]) verifying_rows = filter( lambda row: row['ROWID'] not in uploaded_rows, verifying_rows) if verifying_rows: self.log( 'info', 'Verifying %d rows since row %d for %s/%s' % (len(verifying_rows), last_verified_row, account, container)) verifying_status = self.submit_items( handler, verifying_rows, job) if ContainerJob.PASS_SUCCEEDED == verifying_status: handler.save_last_verified_row( verifying_rows[-1]['ROWID'], broker_id) self.log( 'info', 'Verified %d rows; last row: %d; ' 'for %s/%s' % (len(verifying_rows), verifying_rows[-1]['ROWID'], account, container)) except SkipContainer: self.log('info', "Skipping %s/%s" % (account, container)) except RetryError: # Can appear from the bulk handling code. # TODO: we should do a better job tying the bulk handling code # into this model. pass except: self.log('error', "Failed to process %s/%s with %s" % (account, container, str(self.handler_factory)), exc_info=True) finally: if work: self._in_progress_containers.remove( mapping_signature(work[0])) self.enumerator_queue.task_done() def log(self, level, message, **kwargs): if not self.logger: return getattr(self.logger, level)(message, **kwargs) def _get_db_info(self, account, container): """ Returns the database path of the container :param account: UTF-8 encoded account name :param container: UTF-8 encoded container name :returns: a tuple of (db path, nodes count, index of replica) """ part, container_nodes = self.container_ring.get_nodes( account, container) nodes_count = len(container_nodes) db_hash = hash_path(account, container) db_dir = storage_directory(DATADIR, part, db_hash) for index, node in enumerate(container_nodes): if not is_local_device(self.myips, None, node['ip'], node['port']): continue db_path = os.path.join(self.root, node['device'], db_dir, db_hash + '.db') return db_path, nodes_count, index return None, None, None def get_broker(self, account, container): """Instatiates a container database broker. :param account: UTF-8 encoded account name :param container: UTF-8 encoded container name :returns: a tuple of (ContainerBroker, nodes count, index of replica) """ db_path, nodes_count, index = self._get_db_info(account, container) if db_path: broker = ContainerBroker(db_path, account=account, container=container) if broker.is_deleted(): self.log( 'info', 'Database does not exist for %s/%s' % (account, container)) else: return broker, nodes_count, index return None, None, None def submit_items(self, handler, rows, job): if not rows: return ContainerJob.PASS_SUCCEEDED if self.bulk: with self._swift_pool.item() as swift_client: handler.handle(rows, swift_client) return ContainerJob.PASS_SUCCEEDED job.submit_tasks(map(lambda row: (row, handler), rows), self.work_queue) return job.wait_all() def list_containers(self, account, prefix=''): # TODO: we should not have to retrieve all of the containers at once, # but it will require allocating a swift_client for this purpose from # the pool -- consider doing that at some point. However, as long as # there are fewer than a few million containers, getting all of them at # once should be cheap, paginating 10000 at a time. with self._swift_pool.item() as swift_client: return [ c['name'] for c in swift_client.iter_containers(account, prefix=prefix) ] def _prune_deleted_containers(self, account, containers, prefix=None): # After iterating over all of the containers, we prune any # entries from containers that may have been deleted (so as to # avoid missing data). There is still a chance where a # container is removed and created between the iterations, however # there is nothing we can do about that. # TODO: keep track of container creation date to detect when # they are removed and then added. account_status_dir = os.path.join(self.status_dir, account.encode('utf-8')) if not os.path.exists(account_status_dir): return if prefix: container_paths = glob.glob( os.path.join(account_status_dir, prefix + '*')) tracked_containers = [ os.path.basename(path) for path in container_paths ] else: tracked_containers = os.listdir(account_status_dir) disappeared = set(tracked_containers) - set( map(lambda container: container.encode('utf-8'), containers)) for container in disappeared: try: os.unlink(os.path.join(account_status_dir, container)) except Exception as e: self.log( 'warning', 'Failed to remove the status file for %s: %s' % (os.path.join(account, container), repr(e))) def _prune_status_files(self): # Unlike _prune_deleted_containers, which prunes status files from the # per-account mappings, this prunes only unknown status files (i.e. the # mapping was removed). known_mappings = { mapping['account']: set() for mapping in self.conf['containers'] if mapping.get('container') } for mapping in self.conf['containers']: if 'container' not in mapping: continue known_mappings[mapping['account']].add(mapping['container']) for account in os.listdir(unicode(self.status_dir)): account_path = os.path.join(self.status_dir, account) if not os.path.isdir(account_path): continue if account.startswith('.shards_'): # Sharded containers are handled separately continue if account not in known_mappings: try: shutil.rmtree(account_path) except OSError as e: self.log( 'warn', 'Failed to remove {}: {}'.format( os.path.join(self.status_dir, account.encode('utf-8')), e)) continue if '/*' in known_mappings[account]: continue for container in os.listdir(account_path): if container not in known_mappings[account]: try: os.unlink(os.path.join(account_path, container)) except OSError as e: self.log( 'warn', 'Failed to remove {}: {}'.format( os.path.join( account_path.encode('utf-8'), unicode(container).encode('utf-8')), e)) def _is_container_sharded(self, account, container): """ Retrieve container metadata with a HEAD request and find out if container is sharded. :returns: True if container is sharded. False otherwise. """ with self._swift_pool.item() as swift_client: try: metadata = swift_client.get_container_metadata( account, container) except UnexpectedResponse as err: if err.resp.status_int != HTTP_NOT_FOUND: self.log( 'error', 'Failed to retrieve container metadata for %s: %s' % (os.path.join(account, container), err.message)) metadata = {} except Exception as err: self.log( 'error', 'Failed to retrieve container metadata for %s: %s' % (os.path.join(account, container), err.message)) metadata = {} return metadata.get('x-backend-sharding-state') == 'sharded' def _enqueue_sharded_container(self, settings, per_account=False): """ Get list of shards for a given containers and add them to the work queue. """ # TODO: look into saving the sharded state of the container sharded_account = '.shards_' + settings['account'] sharded_container = settings['container'] all_sharded_containers = self.list_containers(sharded_account, prefix=sharded_container) for container in all_sharded_containers: settings_copy = settings.copy() settings_copy['internal_account'] = sharded_account settings_copy['internal_container'] = container self._enqueue_container(settings_copy, per_account) self._prune_deleted_containers(sharded_account, all_sharded_containers, prefix=sharded_container) def _process_container(self, settings, per_account=False): # save internal account/containers as the actual account/containers # that will be crawled. This is currently useful for sharded containers account = settings['account'] container = settings['container'] settings['internal_account'] = account settings['internal_container'] = container try: db_path, _, _ = self._get_db_info(account.encode('utf-8'), container.encode('utf-8')) except: self.log('error', "Failed to process %s/%s" % (account, container), exc_info=True) return # if container db is not on local node, we need to check # if container is sharded with a HEAD request because # shards of that container could potentially be stored on this node # even if root container is not. Otherwise we check if container is # sharded when we have the broker. if db_path: self._enqueue_container(settings, per_account) elif self._is_container_sharded(account, container): self._enqueue_sharded_container(settings, per_account) def _enqueue_container(self, settings, per_account=False): settings_signature = mapping_signature(settings) if settings_signature not in self._in_progress_containers: self._in_progress_containers.add(settings_signature) self.enumerator_queue.put((settings, per_account)) def _submit_containers(self): for container_settings in self.conf['containers']: # TODO: perform validation of the settings on startup if 'container' not in container_settings: self.log( 'error', 'Container name not specified in settings -- continue') continue if 'account' not in container_settings: self.log('error', 'Account not specified in settings -- continue') continue if container_settings['container'] == '/*': all_containers = self.list_containers( container_settings['account']) for container in all_containers: settings_copy = container_settings.copy() settings_copy['container'] = container self._process_container(settings_copy, per_account=True) # clean status dir off containers that have been deleted self._prune_deleted_containers(container_settings['account'], all_containers) else: self._process_container(container_settings) self._prune_status_files() def run_always(self): # Since we don't support reloading, the daemon should quit if there are # no containers configured if 'containers' not in self.conf or not self.conf['containers']: return self.log('debug', 'Entering the poll loop') while True: start = time.time() self._submit_containers() elapsed = time.time() - start if elapsed < self.poll_interval: eventlet.sleep(self.poll_interval - elapsed) def run_once(self): self._submit_containers() self.enumerator_queue.join()
class UtilizationMiddleware(object): def __init__(self, app, conf, *args, **kwargs): self.app = app self.conf = conf self.sample_account = '.transfer_record' self.aggregate_account = '.utilization' self.logger = get_logger(self.conf, log_route='utilization') self.container_ring = Ring('/etc/swift', ring_name='container') self.sample_rate = int(self.conf.get('sample_rate', 600)) def swift_account(self, env, tenant_id): path = '/v1/%s/%s?format=json&prefix=account/' \ % (self.aggregate_account, tenant_id) req = make_pre_authed_request(env, 'GET', path) req.environ['swift.proxy_access_log_made'] = True resp = req.get_response(self.app) if resp.status_int == 404: return None return json.loads(resp.body)[0]['name'].split('/')[1] def check_api_call(self, env): path = env.get('RAW_PATH_INFO', None) if env['REQUEST_METHOD'] == 'GET' and path == '/api/v1/metering': return True return False def get_account_info(self, env, account): path = '/v1/%s' % account req = make_pre_authed_request(env, 'HEAD', path) req.environ['swift.proxy_access_log_made'] = True resp = req.get_response(self.app) if not resp.status_int // 100 == 2: return (0, 0, 0) return (int(resp.headers.get('x-account-container-count', 0)), int(resp.headers.get('x-account-object-count', 0)), int(resp.headers.get('x-account-bytes-used', 0))) def record_usage_data(self, env, tenant_id, account, timestamp): path = '/v1/%s/%s?prefix=usage/%d&format=json' % ( self.aggregate_account, tenant_id, timestamp) req = make_pre_authed_request(env, 'GET', path) req.environ['swift.proxy_access_log_made'] = True resp = req.get_response(self.app) if resp.status_int == 404: return body = json.loads(resp.body) if len(body) != 0: return container_cnt, obj_cnt, bt_used = self.get_account_info(env, account) u_object = 'usage/%d/%d_%d_%d' % (timestamp, container_cnt, obj_cnt, bt_used) self.put_hidden_object(self.aggregate_account, tenant_id, u_object) def iter_objects(self, env, path, prefix, marker, end, count): path_with_params = '%s?format=json&prefix=%s' % (path, prefix) seg = '' force_break = False while count > 0: l = 1000 if count > 1000 else count count -= 1000 rpath = path_with_params + ('&marker=%s' % marker) + ( '&limit=%d' % l) req = make_pre_authed_request(env, 'GET', rpath) req.environ['swift.proxy_access_log_made'] = True resp = req.get_response(self.app) segments = json.loads(resp.body) for seg in segments: name = seg['name'] record_ts = int(name.split('/')[1]) if record_ts > end: force_break = True break yield name if force_break: break if len(segments) != l: break if segments: marker = seg['name'] else: break def retrieve_utilization_data(self, env, tenant_id, start, end, count): path = '/v1/%s/%s' % (self.aggregate_account, tenant_id) data = dict() data['transfer'] = {} data['utilization'] = {} marker = 'transfer/%d' % start data['transfer'] = list() data['utilization']['container_count'] = 0 data['utilization']['object_count'] = 0 data['utilization']['bytes_used'] = 0 bytes_recvs = dict() bytes_sents = dict() req_counts = dict() for o in self.iter_objects(env, path, 'transfer/', marker, end, count): bill_type = o.split('/')[2] bytes_recv, bytes_sent, req_cnt = o.split('/')[3].split('_') bytes_recvs[bill_type] = bytes_recvs.get(bill_type, 0) + int( bytes_recv) bytes_sents[bill_type] = bytes_sents.get(bill_type, 0) + int( bytes_sent) req_counts[bill_type] = req_counts.get(bill_type, 0) + int(req_cnt) for bill_type, bt_rv in bytes_recvs.items(): d = dict() d['bill_type'] = int(bill_type) d['bytes_in'] = bt_rv d['bytes_out'] = bytes_sents[bill_type] d['req_count'] = req_counts[bill_type] data['transfer'].append(d) last = None marker = 'usage/%d' % start for o in self.iter_objects(env, path, 'usage/', marker, end, count): last = o if last: container_cnt, obj_cnt, bytes_used = last.split('/')[2].split('_') data['utilization']['container_count'] = container_cnt data['utilization']['object_count'] = obj_cnt data['utilization']['bytes_used'] = bytes_used return data def GET(self, req): start = req.params.get('start') tenant_id = req.params.get('tenantid') identity = req.environ.get('HTTP_X_IDENTITY_STATUS') roles = req.environ.get('keystone.identity', None) if identity == 'Invalid' or not roles or 'admin' not in roles['roles']: return Response(request=req, status="403 Forbidden", body="Access Denied", content_type="text/plain") if not tenant_id: return Response(request=req, status="400 Bad Request", body="tenant_id parameter doesn't exist", content_type="text/plain") if not start: return Response(request=req, status="400 Bad Request", body="start parameter doesn't exist", content_type="text/plain") end = req.params.get('end') if end is None: end = datetime.utcfromtimestamp(int(time.time())).isoformat() # check if tenant_id's users utilization was recorded. account = self.swift_account(req.environ.copy(), tenant_id) if not account: return Response(status="400 Bad Request", content_type="text/plain", body="This tenant_id never used.") try: # start time is "rounded down" start_ts = iso8601_to_timestamp(start) # end time is "rounded up" end_ts = iso8601_to_timestamp(end) except ValueError: return Response(status="400 Bad Request", content_type="text/plain", body="start or end time is incorrect format." "please check start or end parameter") if start_ts > end_ts: return Response(status="400 Bad Request", content_type="text/plain", body="start time must be before the end time") end_ts = (end_ts // 3600 + 1) * 3600 start_ts = (start_ts // 3600) * 3600 objsize = (end_ts - start_ts) / self.sample_rate content = self.retrieve_utilization_data(req.environ.copy(), tenant_id, start_ts, end_ts, objsize) content['period_start'] = timestamp_to_iso8601(start_ts) content['period_end'] = timestamp_to_iso8601(end_ts) content['tenant_id'] = tenant_id content['swift_account'] = account return Response(request=req, body=json.dumps(content), content_type="application/json") def __call__(self, env, start_response): self.logger.debug('Calling Utilization Middleware') req = Request(env) if self.check_api_call(env): return self.GET(req)(env, start_response) try: version, account, container, obj = req.split_path(2, 4, True) except ValueError: return self.app(env, start_response) remote_user = env.get('REMOTE_USER') if not remote_user or (isinstance(remote_user, basestring) and remote_user.startswith('.wsgi')): self.logger.debug('### SKIP: REMOTE_USER is %s' % remote_user) return self.app(env, start_response) start_response_args = [None] input_proxy = InputProxy(env['wsgi.input']) env['wsgi.input'] = input_proxy def my_start_response(status, headers, exc_info=None): start_response_args[0] = (status, list(headers), exc_info) def iter_response(iterable): iterator = iter(iterable) try: chunk = next(iterator) while not chunk: chunk = next(iterator) except StopIteration: chunk = '' if start_response_args[0]: start_response(*start_response_args[0]) bytes_sent = 0 try: while chunk: bytes_sent += len(chunk) yield chunk chunk = next(iterator) finally: try: self.publish_sample(env, account, input_proxy.bytes_received, bytes_sent) except Exception: self.logger.exception('Failed to publish samples') try: iterable = self.app(env, my_start_response) except Exception: self.publish_sample(env, account, input_proxy.bytes_received, 0) raise else: return iter_response(iterable) def publish_sample(self, env, account, bytes_received, bytes_sent): timestamp = normalize_timestamp(time.time()) sample_time = (float( timestamp) // self.sample_rate + 1) * self.sample_rate trans_id = env.get('swift.trans_id') tenant_id = env.get('HTTP_X_TENANT_ID') remote_addr = env.get('REMOTE_ADDR') # check if account information object is existed. if not self.swift_account(env, tenant_id): obj = 'account/%s' % account self.put_hidden_object(self.aggregate_account, tenant_id, obj) # recording account's storage usage data self.record_usage_data(env, tenant_id, account, sample_time) container = '%s_%s_%s' % (sample_time, tenant_id, account) obj = '%s/%d/%d/%s/%s' % (timestamp, bytes_received, bytes_sent, trans_id, remote_addr) self.put_hidden_object(self.sample_account, container, obj) def put_hidden_object(self, account, container, obj): hidden_path = '/%s/%s/%s' % (account, container, obj) self.logger.debug('put sample_path: %s' % hidden_path) part, nodes = self.container_ring.get_nodes(self.sample_account, container) for node in nodes: ip = node['ip'] port = node['port'] dev = node['device'] action_headers = dict() action_headers['user-agent'] = 'utilization' action_headers['X-Timestamp'] = normalize_timestamp(time.time()) action_headers['referer'] = 'utilization-middleware' action_headers['x-size'] = '0' action_headers['x-content-type'] = "text/plain" action_headers['x-etag'] = 'd41d8cd98f00b204e9800998ecf8427e' conn = http_connect(ip, port, dev, part, 'PUT', hidden_path, action_headers) response = conn.getresponse() response.read()
class ListEndpointsMiddleware(object): """ List endpoints for an object, account or container. See above for a full description. Uses configuration parameter `swift_dir` (default `/etc/swift`). :param app: The next WSGI filter or app in the paste.deploy chain. :param conf: The configuration dict for the middleware. """ def __init__(self, app, conf): self.app = app self.logger = get_logger(conf, log_route='endpoints') self.swift_dir = conf.get('swift_dir', '/etc/swift') self.account_ring = Ring(self.swift_dir, ring_name='account') self.container_ring = Ring(self.swift_dir, ring_name='container') self.endpoints_path = conf.get('list_endpoints_path', '/endpoints/') if not self.endpoints_path.endswith('/'): self.endpoints_path += '/' def get_object_ring(self, policy_idx): """ Get the ring object to use to handle a request based on its policy. :policy_idx: policy index as defined in swift.conf :returns: appropriate ring object """ return POLICIES.get_object_ring(policy_idx, self.swift_dir) def __call__(self, env, start_response): request = Request(env) if not request.path.startswith(self.endpoints_path): return self.app(env, start_response) if request.method != 'GET': return HTTPMethodNotAllowed(req=request, headers={"Allow": "GET"})(env, start_response) try: clean_path = request.path[len(self.endpoints_path) - 1:] account, container, obj = \ split_path(clean_path, 1, 3, True) except ValueError: return HTTPBadRequest('No account specified')(env, start_response) if account is not None: account = unquote(account) if container is not None: container = unquote(container) if obj is not None: obj = unquote(obj) if obj is not None: # remove 'endpoints' from call to get_container_info stripped = request.environ if stripped['PATH_INFO'][:len(self.endpoints_path)] == \ self.endpoints_path: stripped['PATH_INFO'] = "/v1/" + \ stripped['PATH_INFO'][len(self.endpoints_path):] container_info = get_container_info(stripped, self.app, swift_source='LE') obj_ring = self.get_object_ring(container_info['storage_policy']) partition, nodes = obj_ring.get_nodes(account, container, obj) endpoint_template = 'http://{ip}:{port}/{device}/{partition}/' + \ '{account}/{container}/{obj}' elif container is not None: partition, nodes = self.container_ring.get_nodes( account, container) endpoint_template = 'http://{ip}:{port}/{device}/{partition}/' + \ '{account}/{container}' else: partition, nodes = self.account_ring.get_nodes(account) endpoint_template = 'http://{ip}:{port}/{device}/{partition}/' + \ '{account}' endpoints = [] for node in nodes: endpoint = endpoint_template.format(ip=node['ip'], port=node['port'], device=node['device'], partition=partition, account=quote(account), container=quote(container or ''), obj=quote(obj or '')) endpoints.append(endpoint) return Response(json.dumps(endpoints), content_type='application/json')(env, start_response)
class ListEndpointsMiddleware(object): """ List endpoints for an object, account or container. See above for a full description. Uses configuration parameter `swift_dir` (default `/etc/swift`). :param app: The next WSGI filter or app in the paste.deploy chain. :param conf: The configuration dict for the middleware. """ def __init__(self, app, conf): self.app = app self.logger = get_logger(conf, log_route='endpoints') swift_dir = conf.get('swift_dir', '/etc/swift') self.account_ring = Ring(swift_dir, ring_name='account') self.container_ring = Ring(swift_dir, ring_name='container') self.object_ring = Ring(swift_dir, ring_name='object') self.endpoints_path = conf.get('list_endpoints_path', '/endpoints/') if not self.endpoints_path.endswith('/'): self.endpoints_path += '/' def __call__(self, env, start_response): request = Request(env) if not request.path.startswith(self.endpoints_path): return self.app(env, start_response) if request.method != 'GET': return HTTPMethodNotAllowed( req=request, headers={"Allow": "GET"})(env, start_response) try: clean_path = request.path[len(self.endpoints_path) - 1:] account, container, obj = \ split_path(clean_path, 1, 3, True) except ValueError: return HTTPBadRequest('No account specified')(env, start_response) if account is not None: account = unquote(account) if container is not None: container = unquote(container) if obj is not None: obj = unquote(obj) if obj is not None: partition, nodes = self.object_ring.get_nodes( account, container, obj) endpoint_template = 'http://{ip}:{port}/{device}/{partition}/' + \ '{account}/{container}/{obj}' elif container is not None: partition, nodes = self.container_ring.get_nodes( account, container) endpoint_template = 'http://{ip}:{port}/{device}/{partition}/' + \ '{account}/{container}' else: partition, nodes = self.account_ring.get_nodes( account) endpoint_template = 'http://{ip}:{port}/{device}/{partition}/' + \ '{account}' endpoints = [] for node in nodes: endpoint = endpoint_template.format( ip=node['ip'], port=node['port'], device=node['device'], partition=partition, account=quote(account), container=quote(container or ''), obj=quote(obj or '')) endpoints.append(endpoint) return Response(json.dumps(endpoints), content_type='application/json')(env, start_response)
class ListEndpointsMiddleware(object): """ List endpoints for an object, account or container. See above for a full description. Uses configuration parameter `swift_dir` (default `/etc/swift`). :param app: The next WSGI filter or app in the paste.deploy chain. :param conf: The configuration dict for the middleware. """ def __init__(self, app, conf): self.app = app self.logger = get_logger(conf, log_route='endpoints') self.swift_dir = conf.get('swift_dir', '/etc/swift') self.account_ring = Ring(self.swift_dir, ring_name='account') self.container_ring = Ring(self.swift_dir, ring_name='container') self.endpoints_path = conf.get('list_endpoints_path', '/endpoints/') if not self.endpoints_path.endswith('/'): self.endpoints_path += '/' self.default_response_version = 1.0 self.response_map = { 1.0: self.v1_format_response, 2.0: self.v2_format_response, } def get_object_ring(self, policy_idx): """ Get the ring object to use to handle a request based on its policy. :policy_idx: policy index as defined in swift.conf :returns: appropriate ring object """ return POLICIES.get_object_ring(policy_idx, self.swift_dir) def _parse_version(self, raw_version): err_msg = 'Unsupported version %r' % raw_version try: version = float(raw_version.lstrip('v')) except ValueError: raise ValueError(err_msg) if not any(version == v for v in RESPONSE_VERSIONS): raise ValueError(err_msg) return version def _parse_path(self, request): """ Parse path parts of request into a tuple of version, account, container, obj. Unspecified path parts are filled in as None, except version which is always returned as a float using the configured default response version if not specified in the request. :param request: the swob request :returns: parsed path parts as a tuple with version filled in as configured default response version if not specified. :raises: ValueError if path is invalid, message will say why. """ clean_path = request.path[len(self.endpoints_path) - 1:] # try to peel off version try: raw_version, rest = split_path(clean_path, 1, 2, True) except ValueError: raise ValueError('No account specified') try: version = self._parse_version(raw_version) except ValueError: if raw_version.startswith('v') and '_' not in raw_version: # looks more like a invalid version than an account raise # probably no version specified, but if the client really # said /endpoints/v_3/account they'll probably be sorta # confused by the useless response and lack of error. version = self.default_response_version rest = clean_path else: rest = '/' + rest if rest else '/' try: account, container, obj = split_path(rest, 1, 3, True) except ValueError: raise ValueError('No account specified') return version, account, container, obj def v1_format_response(self, req, endpoints, **kwargs): return Response(json.dumps(endpoints), content_type='application/json') def v2_format_response(self, req, endpoints, storage_policy_index, **kwargs): resp = { 'endpoints': endpoints, 'headers': {}, } if storage_policy_index is not None: resp['headers']['X-Backend-Storage-Policy-Index'] = str( storage_policy_index) return Response(json.dumps(resp), content_type='application/json') def __call__(self, env, start_response): request = Request(env) if not request.path.startswith(self.endpoints_path): return self.app(env, start_response) if request.method != 'GET': return HTTPMethodNotAllowed(req=request, headers={"Allow": "GET"})(env, start_response) try: version, account, container, obj = self._parse_path(request) except ValueError as err: return HTTPBadRequest(str(err))(env, start_response) if account is not None: account = unquote(account) if container is not None: container = unquote(container) if obj is not None: obj = unquote(obj) storage_policy_index = None if obj is not None: container_info = get_container_info( {'PATH_INFO': '/v1/%s/%s' % (account, container)}, self.app, swift_source='LE') storage_policy_index = container_info['storage_policy'] obj_ring = self.get_object_ring(storage_policy_index) partition, nodes = obj_ring.get_nodes(account, container, obj) endpoint_template = 'http://{ip}:{port}/{device}/{partition}/' + \ '{account}/{container}/{obj}' elif container is not None: partition, nodes = self.container_ring.get_nodes( account, container) endpoint_template = 'http://{ip}:{port}/{device}/{partition}/' + \ '{account}/{container}' else: partition, nodes = self.account_ring.get_nodes(account) endpoint_template = 'http://{ip}:{port}/{device}/{partition}/' + \ '{account}' endpoints = [] for node in nodes: endpoint = endpoint_template.format(ip=node['ip'], port=node['port'], device=node['device'], partition=partition, account=quote(account), container=quote(container or ''), obj=quote(obj or '')) endpoints.append(endpoint) resp = self.response_map[version]( request, endpoints=endpoints, storage_policy_index=storage_policy_index) return resp(env, start_response)
class ListEndpointsMiddleware(object): """ List endpoints for an object, account or container. See above for a full description. Uses configuration parameter `swift_dir` (default `/etc/swift`). :param app: The next WSGI filter or app in the paste.deploy chain. :param conf: The configuration dict for the middleware. """ def __init__(self, app, conf): self.app = app self.logger = get_logger(conf, log_route='endpoints') swift_dir = conf.get('swift_dir', '/etc/swift') self.account_ring = Ring(swift_dir, ring_name='account') self.container_ring = Ring(swift_dir, ring_name='container') self.object_ring = Ring(swift_dir, ring_name='object') self.endpoints_path = conf.get('list_endpoints_path', '/endpoints/') if not self.endpoints_path.endswith('/'): self.endpoints_path += '/' def __call__(self, env, start_response): request = Request(env) if not request.path.startswith(self.endpoints_path): return self.app(env, start_response) if request.method != 'GET': return HTTPMethodNotAllowed(req=request, headers={"Allow": "GET"})(env, start_response) try: clean_path = request.path[len(self.endpoints_path) - 1:] account, container, obj = \ split_path(clean_path, 1, 3, True) except ValueError: return HTTPBadRequest('No account specified')(env, start_response) if account is not None: account = unquote(account) if container is not None: container = unquote(container) if obj is not None: obj = unquote(obj) if obj is not None: partition, nodes = self.object_ring.get_nodes( account, container, obj) endpoint_template = 'http://{ip}:{port}/{device}/{partition}/' + \ '{account}/{container}/{obj}' elif container is not None: partition, nodes = self.container_ring.get_nodes( account, container) endpoint_template = 'http://{ip}:{port}/{device}/{partition}/' + \ '{account}/{container}' else: partition, nodes = self.account_ring.get_nodes(account) endpoint_template = 'http://{ip}:{port}/{device}/{partition}/' + \ '{account}' endpoints = [] for node in nodes: endpoint = endpoint_template.format(ip=node['ip'], port=node['port'], device=node['device'], partition=partition, account=quote(account), container=quote(container or ''), obj=quote(obj or '')) endpoints.append(endpoint) return Response(json.dumps(endpoints), content_type='application/json')(env, start_response)
class ContainerCrawler(object): def __init__(self, conf, handler_class, logger=None): self.logger = logger self.conf = conf self.root = conf['devices'] self.bulk = conf.get('bulk_process', False) self.interval = 10 self.swift_dir = '/etc/swift' self.container_ring = Ring(self.swift_dir, ring_name='container') self.status_dir = conf['status_dir'] self.myips = whataremyips('0.0.0.0') self.items_chunk = conf['items_chunk'] self.poll_interval = conf.get('poll_interval', 5) self.handler_class = handler_class if not self.bulk: self._init_workers(conf) self.log('debug', 'Created the Container Crawler instance') def _init_workers(self, conf): self.workers = conf.get('workers', 10) self.pool = eventlet.GreenPool(self.workers) self.work_queue = eventlet.queue.Queue(self.workers * 2) # max_size=None means a Queue is infinite self.error_queue = eventlet.queue.Queue(maxsize=None) self.stats_queue = eventlet.queue.Queue(maxsize=None) for _ in range(0, self.workers): self.pool.spawn_n(self._worker) def _worker(self): while 1: work = self.work_queue.get() if not work: self.work_queue.task_done() break row, handler = work try: handler.handle(row) except Exception as e: self.error_queue.put((row, e)) self.work_queue.task_done() def _stop(self): for _ in range(0, self.workers): self.work_queue.put(None) self.pool.waitall() def _check_errors(self): if self.error_queue.empty(): return while not self.error_queue.empty(): row, error = self.error_queue.get() self.log('error', 'Failed to handle row %s: %r' % (row['ROWID'], error)) raise RuntimeError('Failed to process rows') def log(self, level, message): if not self.logger: return getattr(self.logger, level)(message) def get_broker(self, account, container, part, node): db_hash = hash_path(account, container) db_dir = storage_directory(DATADIR, part, db_hash) db_path = os.path.join(self.root, node['device'], db_dir, db_hash + '.db') return ContainerBroker(db_path, account=account, container=container) def submit_items(self, handler, rows): if self.bulk: handler.handle(rows) return for row in rows: self.work_queue.put((row, handler)) self.work_queue.join() self._check_errors() def process_items(self, handler, rows, nodes_count, node_id): owned_rows = filter(lambda row: row['ROWID'] % nodes_count == node_id, rows) self.submit_items(handler, owned_rows) verified_rows = filter( lambda row: row['ROWID'] % nodes_count != node_id, rows) self.submit_items(handler, verified_rows) def handle_container(self, settings): part, container_nodes = self.container_ring.get_nodes( settings['account'], settings['container']) nodes_count = len(container_nodes) handler = self.handler_class(self.status_dir, settings) for index, node in enumerate(container_nodes): if not is_local_device(self.myips, None, node['ip'], node['port']): continue broker = self.get_broker(settings['account'], settings['container'], part, node) broker_info = broker.get_info() last_row = handler.get_last_row(broker_info['id']) if not last_row: last_row = 0 try: items = broker.get_items_since(last_row, self.items_chunk) except DatabaseConnectionError: continue if items: self.process_items(handler, items, nodes_count, index) handler.save_last_row(items[-1]['ROWID'], broker_info['id']) return def run_always(self): # Since we don't support reloading, the daemon should quit if there are # no containers configured if 'containers' not in self.conf or not self.conf['containers']: return self.log('debug', 'Entering the poll loop') while True: start = time.time() self.run_once() elapsed = time.time() - start if elapsed < self.poll_interval: time.sleep(self.poll_interval - elapsed) def run_once(self): for container_settings in self.conf['containers']: try: self.handle_container(container_settings) except Exception as e: account = container_settings.get('account', 'N/A') container = container_settings.get('container', 'N/A') self.log( 'error', "Failed to process %s/%s with %s: %s" % (account, container, self.handler_class, repr(e))) self.log('error', traceback.format_exc(e))