示例#1
0
def main():

    parser = argparse.ArgumentParser()
    parser.add_argument('-d', '--disk', action='store',
                        required=True,
                        help='The disk to be checked')
    parser.add_argument('-l', '--limit', action='store',
                        default=10, type=int,
                        help='Only process this many objects')

    args = parser.parse_args()

    path = "/srv/node/%s/quarantined/objects/" % args.disk
    if not pathlib.Path(path).exists():
        logger.error("Path %s not exist, exit!" % path)
        sys.exit(1)

    files = get_all_datafiles(path)
    limit = args.limit
    actionfile = 'action_%s.sh' % time.strftime("%Y%m%d-%H%M%S")
    logger.info("action commands will be written into file %s" % actionfile)

    ring = Ring('/etc/swift/', ring_name='object')

    for datafile in files:
        if limit < 1:
            break

        with open(datafile, 'rb') as fp:
            try:
                metadata = read_metadata(fp)
            except EOFError:
                logger.error("%s has invalid metadata" % datafile)
                continue

        name = metadata.get('name')
        account, container, obj = name.split('/', 3)[1:]
        part, nodes = ring.get_nodes(account, container, obj)

        replica_count = 0
        for node in nodes:
            if check_replica(node['ip'], node['port'], node['device'], part,
                             account, container, obj):
                replica_count = replica_count + 1

        if replica_count == len(nodes):
            logger.info("quarantined file %s has %s copies and can be deleted "
                        "safely" % (datafile, replica_count))
            command = "rm -v %s\nrmdir -v %s\n" % (datafile, datafile.parent)
        elif replica_count == 0:
            command = recover_quarantine(datafile, metadata, part,
                                         account, container, obj)

        if command:
            with open(actionfile, 'a') as fp:
                fp.write(command)
        limit = limit - 1
示例#2
0
 def __call__(self, env, start_response):
     req = Request(env)
     if env.get('REQUEST_METHOD') == "PUT" and env.get("HTTP_X_OBJECT_META_LXC_DEPLOY"):
         ring = Ring(self.object_ring_path)
         raw_path = env.get("RAW_PATH_INFO").split("/")
         node_data = ring.get_nodes(raw_path[2],raw_path[3],raw_path[4])
         deploy_host = node_data[1][0]["ip"]
         req.headers["X-Object-Meta-LXC-HOST"] = deploy_host
         req.headers["REMOTE_USER"] = raw_path[2]
     return self.app(env, start_response)
示例#3
0
文件: __init__.py 项目: LJ-hust/HS
 def create_account(act):
     ts = utils.normalize_timestamp(time())
     account_ring = Ring(_testdir, ring_name='account')
     partition, nodes = account_ring.get_nodes(act)
     for node in nodes:
         # Note: we are just using the http_connect method in the object
         # controller here to talk to the account server nodes.
         conn = swift.proxy.controllers.obj.http_connect(
             node['ip'], node['port'], node['device'], partition, 'PUT',
             '/' + act, {'X-Timestamp': ts, 'x-trans-id': act})
         resp = conn.getresponse()
         assert(resp.status == 201)
示例#4
0
文件: __init__.py 项目: bkolli/swift
 def create_account(act):
     ts = utils.normalize_timestamp(time())
     account_ring = Ring(_testdir, ring_name='account')
     partition, nodes = account_ring.get_nodes(act)
     for node in nodes:
         # Note: we are just using the http_connect method in the object
         # controller here to talk to the account server nodes.
         conn = swift.proxy.controllers.obj.http_connect(
             node['ip'], node['port'], node['device'], partition, 'PUT',
             '/' + act, {'X-Timestamp': ts, 'x-trans-id': act})
         resp = conn.getresponse()
         assert(resp.status == 201)
    def _delete_or_save_lifecycle(self, method, lifecycle=None):
        path = '/.s3_bucket_lifecycle/%s/%s' % (self.account, self.container)
        oring = Ring('/etc/swift', ring_name='object')
        cring = Ring('/etc/swift', ring_name='container')
        part, nodes = oring.get_nodes('.s3_bucket_lifecycle', self.account,
                                      self.container)
        cpart, cnodes = cring.get_nodes('.s3_bucket_lifecycle', self.account)
        now_ts = normalize_timestamp(time.time())

        i = 0
        for node in nodes:
            ip = node['ip']
            port = node['port']
            dev = node['device']
            headers = dict()
            headers['user-agent'] = 'lifecycle-uploader'
            headers['X-Timestamp'] = now_ts
            headers['referer'] = 'lifecycle-uploader'
            headers['X-Container-Partition'] = cpart
            headers['X-Container-Host'] = '%(ip)s:%(port)s' % cnodes[i]
            headers['X-Container-Device'] = cnodes[i]['device']

            if lifecycle:
                headers['content-length'] = len(lifecycle)
                headers['etags'] = self._compute_md5(lifecycle)
                headers['content-type'] = 'text/plain'

            conn = http_connect(ip, port, dev, part, method, path,
                                headers)

            if method == 'PUT':
                conn.send(lifecycle)

            response = conn.getresponse()
            i += 1
        return response
class ObjectEndpoint(object):

    def __init__(self, app, conf):
        self.app = app
        self.logger = get_logger(conf, log_route='object_endpoint')
        swift_dir = conf.get('swift_dir', '/etc/swift')
        self.object_ring = Ring(swift_dir, ring_name='object')

    def __call__(self, env, start_response):
        request = Request(env)

        url_prefix = '/object_endpoint/'

        if request.path.startswith(url_prefix):

            if request.method != 'GET':
                raise HTTPMethodNotAllowed()

            aco = split_path(request.path[len(url_prefix) - 1:], 1, 3, True)
            account = aco[0]
            container = aco[1]
            obj = aco[2]
            if obj.endswith('/'):
                obj = obj[:-1]

            object_partition, objects = self.object_ring.get_nodes(
                account, container, obj)

            endpoint_template = 'http://{ip}:{port}/{device}/{partition}/' + \
                                '{account}/{container}/{obj}'
            endpoints = []
            for element in objects:
                endpoint = endpoint_template.format(ip=element['ip'],
                                                    port=element['port'],
                                                    device=element['device'],
                                                    partition=object_partition,
                                                    account=account,
                                                    container=container,
                                                    obj=obj)
                endpoints.append(endpoint)

            start_response('200 OK', {})
            return json.dumps(endpoints)

        return self.app(env, start_response)
示例#7
0
 def create_account(act):
     ts = utils.normalize_timestamp(time())
     account_ring = Ring(_testdir, ring_name="account")
     partition, nodes = account_ring.get_nodes(act)
     for node in nodes:
         # Note: we are just using the http_connect method in the object
         # controller here to talk to the account server nodes.
         conn = swift.proxy.controllers.obj.http_connect(
             node["ip"],
             node["port"],
             node["device"],
             partition,
             "PUT",
             "/" + act,
             {"X-Timestamp": ts, "x-trans-id": act},
         )
         resp = conn.getresponse()
         assert resp.status == 201
class ObjectEndpoint(object):
    def __init__(self, app, conf):
        self.app = app
        self.logger = get_logger(conf, log_route='object_endpoint')
        swift_dir = conf.get('swift_dir', '/etc/swift')
        self.object_ring = Ring(swift_dir, ring_name='object')

    def __call__(self, env, start_response):
        request = Request(env)

        url_prefix = '/object_endpoint/'

        if request.path.startswith(url_prefix):

            if request.method != 'GET':
                raise HTTPMethodNotAllowed()

            aco = split_path(request.path[len(url_prefix) - 1:], 1, 3, True)
            account = aco[0]
            container = aco[1]
            obj = aco[2]
            if obj.endswith('/'):
                obj = obj[:-1]

            object_partition, objects = self.object_ring.get_nodes(
                account, container, obj)

            endpoint_template = 'http://{ip}:{port}/{device}/{partition}/' + \
                                '{account}/{container}/{obj}'
            endpoints = []
            for element in objects:
                endpoint = endpoint_template.format(ip=element['ip'],
                                                    port=element['port'],
                                                    device=element['device'],
                                                    partition=object_partition,
                                                    account=account,
                                                    container=container,
                                                    obj=obj)
                endpoints.append(endpoint)

            start_response('200 OK', {})
            return json.dumps(endpoints)

        return self.app(env, start_response)
def get_container_list(account):
    #Require a account eg. AUTH_ss
    #Return a list of containers within this account
    account_ring = Ring(swift_dir, ring_name="account")
    container_ring = Ring(swift_dir, ring_name="container")
    object_ring = Ring(swift_dir, ring_name="object")
    part, nodes = account_ring.get_nodes(account)

    URL="http://%s:%s/%s/%s/%s" % (nodes[0]['ip'], nodes[0]['port'], nodes[0]['device'],
                                   part, account)
    r = requests.get(URL)
    if r.status_code == 404:
        logger.warning("Account not existing yet")
    content = str(r.text)
    req = urllib2.Request(URL)
    container_list_hash = hashlib.md5(content).hexdigest()
    content = content.split("\n")
    content.remove('')
    return content, container_list_hash
示例#10
0
    def _get_db_info(self, account, container, number):
        server_type = 'container'
        obj_conf = self.configs['%s-server' % server_type]
        config_path = obj_conf[number]
        options = utils.readconf(config_path, 'app:container-server')
        root = options.get('devices')

        swift_dir = options.get('swift_dir', '/etc/swift')
        ring = Ring(swift_dir, ring_name=server_type)
        part, nodes = ring.get_nodes(account, container)
        for node in nodes:
            # assumes one to one mapping
            if node['port'] == int(options.get('bind_port')):
                device = node['device']
                break
        else:
            return None

        path_hash = utils.hash_path(account, container)
        _dir = utils.storage_directory('%ss' % server_type, part, path_hash)
        db_dir = os.path.join(root, device, _dir)
        db_file = os.path.join(db_dir, '%s.db' % path_hash)
        db = ContainerBroker(db_file)
        return db.get_info()
    def _get_db_info(self, account, container, number):
        server_type = 'container'
        obj_conf = self.configs['%s-server' % server_type]
        config_path = obj_conf[number]
        options = utils.readconf(config_path, 'app:container-server')
        root = options.get('devices')

        swift_dir = options.get('swift_dir', '/etc/swift')
        ring = Ring(swift_dir, ring_name=server_type)
        part, nodes = ring.get_nodes(account, container)
        for node in nodes:
            # assumes one to one mapping
            if node['port'] == int(options.get('bind_port')):
                device = node['device']
                break
        else:
            return None

        path_hash = utils.hash_path(account, container)
        _dir = utils.storage_directory('%ss' % server_type, part, path_hash)
        db_dir = os.path.join(root, device, _dir)
        db_file = os.path.join(db_dir, '%s.db' % path_hash)
        db = ContainerBroker(db_file)
        return db.get_info()
示例#12
0
from swift.common.ring import Ring

if __name__ == '__main__':
    # example path for sample object
    # update this to and existing account/container/object 
    # in your environment
    account = 'AUTH_9fbaa44c45ab4902a46110fd90629a79'
    container = 'testing'
    obj = 'testing.pem'

    ring = Ring('.', ring_name='object')
    part, nodes =  ring.get_nodes(account, container, obj)
    print 'nodes: '
    for n in nodes:
        print 'node: ', n

    print 'part = ', part
    morenodes = ring.get_more_nodes(part)
    print 'more nodes:'
    for n in morenodes:
        print 'node: ', n
示例#13
0
    def _test_ondisk_data_after_write_with_crypto(self, policy_name):
        policy = storage_policy.POLICIES.get_by_name(policy_name)
        self._create_container(self.proxy_app, policy_name=policy_name)
        self._put_object(self.crypto_app, self.plaintext)
        self._post_object(self.crypto_app)

        # Verify container listing etag is encrypted by direct GET to container
        # server. We can use any server for all nodes since they all share same
        # devices dir.
        cont_server = self._test_context['test_servers'][3]
        cont_ring = Ring(self._test_context['testdir'], ring_name='container')
        part, nodes = cont_ring.get_nodes('a', self.container_name)
        for node in nodes:
            req = Request.blank('/%s/%s/a/%s' %
                                (node['device'], part, self.container_name),
                                method='GET',
                                query_string='format=json')
            resp = req.get_response(cont_server)
            listing = json.loads(resp.body)
            # sanity checks...
            self.assertEqual(1, len(listing))
            self.assertEqual('o', listing[0]['name'])
            self.assertEqual('application/test', listing[0]['content_type'])
            # verify encrypted etag value
            parts = listing[0]['hash'].rsplit(';', 1)
            crypto_meta_param = parts[1].strip()
            crypto_meta = crypto_meta_param[len('swift_meta='):]
            listing_etag_iv = load_crypto_meta(crypto_meta)['iv']
            exp_enc_listing_etag = base64.b64encode(
                encrypt(self.plaintext_etag.encode('ascii'),
                        self.km.create_key('/a/%s' % self.container_name),
                        listing_etag_iv)).decode('ascii')
            self.assertEqual(exp_enc_listing_etag, parts[0])

        # Verify diskfile data and metadata is encrypted
        ring_object = self.proxy_app.get_object_ring(int(policy))
        partition, nodes = ring_object.get_nodes('a', self.container_name, 'o')
        conf = {
            'devices': self._test_context["testdir"],
            'mount_check': 'false'
        }
        df_mgr = diskfile.DiskFileRouter(conf, FakeLogger())[policy]
        ondisk_data = []
        exp_enc_body = None
        for node_index, node in enumerate(nodes):
            df = df_mgr.get_diskfile(node['device'],
                                     partition,
                                     'a',
                                     self.container_name,
                                     'o',
                                     policy=policy)
            with df.open():
                meta = df.get_metadata()
                contents = b''.join(df.reader())
                metadata = dict((k.lower(), v) for k, v in meta.items())
                # verify on disk data - body
                body_iv = load_crypto_meta(
                    metadata['x-object-sysmeta-crypto-body-meta'])['iv']
                body_key_meta = load_crypto_meta(
                    metadata['x-object-sysmeta-crypto-body-meta'])['body_key']
                obj_key = self.km.create_key('/a/%s/o' % self.container_name)
                body_key = Crypto().unwrap_key(obj_key, body_key_meta)
                exp_enc_body = encrypt(self.plaintext, body_key, body_iv)
                ondisk_data.append((node, contents))

                # verify on disk user metadata
                enc_val, meta = metadata[
                    'x-object-transient-sysmeta-crypto-meta-fruit'].split(';')
                meta = meta.strip()[len('swift_meta='):]
                metadata_iv = load_crypto_meta(meta)['iv']
                exp_enc_meta = base64.b64encode(
                    encrypt(b'Kiwi', obj_key, metadata_iv)).decode('ascii')
                self.assertEqual(exp_enc_meta, enc_val)
                self.assertNotIn('x-object-meta-fruit', metadata)

                self.assertIn('x-object-transient-sysmeta-crypto-meta',
                              metadata)
                meta = load_crypto_meta(
                    metadata['x-object-transient-sysmeta-crypto-meta'])
                self.assertIn('key_id', meta)
                self.assertIn('path', meta['key_id'])
                self.assertEqual(
                    '/a/%s/%s' % (self.container_name, self.object_name),
                    meta['key_id']['path'])
                self.assertIn('v', meta['key_id'])
                self.assertEqual('2', meta['key_id']['v'])
                self.assertIn('cipher', meta)
                self.assertEqual(Crypto.cipher, meta['cipher'])

                # verify etag
                actual_enc_etag, _junk, actual_etag_meta = metadata[
                    'x-object-sysmeta-crypto-etag'].partition('; swift_meta=')
                etag_iv = load_crypto_meta(actual_etag_meta)['iv']
                exp_enc_etag = base64.b64encode(
                    encrypt(self.plaintext_etag.encode('ascii'), obj_key,
                            etag_iv)).decode('ascii')
                self.assertEqual(exp_enc_etag, actual_enc_etag)

                # verify etag hmac
                exp_etag_mac = hmac.new(obj_key,
                                        self.plaintext_etag.encode('ascii'),
                                        digestmod=hashlib.sha256).digest()
                exp_etag_mac = base64.b64encode(exp_etag_mac).decode('ascii')
                self.assertEqual(exp_etag_mac,
                                 metadata['x-object-sysmeta-crypto-etag-mac'])

                # verify etag override for container updates
                override = 'x-object-sysmeta-container-update-override-etag'
                parts = metadata[override].rsplit(';', 1)
                crypto_meta_param = parts[1].strip()
                crypto_meta = crypto_meta_param[len('swift_meta='):]
                listing_etag_iv = load_crypto_meta(crypto_meta)['iv']
                cont_key = self.km.create_key('/a/%s' % self.container_name)
                exp_enc_listing_etag = base64.b64encode(
                    encrypt(self.plaintext_etag.encode('ascii'), cont_key,
                            listing_etag_iv)).decode('ascii')
                self.assertEqual(exp_enc_listing_etag, parts[0])

        self._check_GET_and_HEAD(self.crypto_app)
        return exp_enc_body, ondisk_data
示例#14
0
    def _test_ondisk_data_after_write_with_crypto(self, policy_name):
        policy = storage_policy.POLICIES.get_by_name(policy_name)
        self._create_container(self.proxy_app, policy_name=policy_name)
        self._put_object(self.crypto_app, self.plaintext)
        self._post_object(self.crypto_app)

        # Verify container listing etag is encrypted by direct GET to container
        # server. We can use any server for all nodes since they all share same
        # devices dir.
        cont_server = self._test_context['test_servers'][3]
        cont_ring = Ring(self._test_context['testdir'], ring_name='container')
        part, nodes = cont_ring.get_nodes('a', self.container_name)
        for node in nodes:
            req = Request.blank('/%s/%s/a/%s'
                                % (node['device'], part, self.container_name),
                                method='GET', query_string='format=json')
            resp = req.get_response(cont_server)
            listing = json.loads(resp.body)
            # sanity checks...
            self.assertEqual(1, len(listing))
            self.assertEqual('o', listing[0]['name'])
            self.assertEqual('application/test', listing[0]['content_type'])
            # verify encrypted etag value
            parts = listing[0]['hash'].rsplit(';', 1)
            crypto_meta_param = parts[1].strip()
            crypto_meta = crypto_meta_param[len('swift_meta='):]
            listing_etag_iv = load_crypto_meta(crypto_meta)['iv']
            exp_enc_listing_etag = base64.b64encode(
                encrypt(self.plaintext_etag,
                        self.km.create_key('/a/%s' % self.container_name),
                        listing_etag_iv))
            self.assertEqual(exp_enc_listing_etag, parts[0])

        # Verify diskfile data and metadata is encrypted
        ring_object = self.proxy_app.get_object_ring(int(policy))
        partition, nodes = ring_object.get_nodes('a', self.container_name, 'o')
        conf = {'devices': self._test_context["testdir"],
                'mount_check': 'false'}
        df_mgr = diskfile.DiskFileRouter(conf, FakeLogger())[policy]
        ondisk_data = []
        exp_enc_body = None
        for node_index, node in enumerate(nodes):
            df = df_mgr.get_diskfile(node['device'], partition,
                                     'a', self.container_name, 'o',
                                     policy=policy)
            with df.open():
                meta = df.get_metadata()
                contents = ''.join(df.reader())
                metadata = dict((k.lower(), v) for k, v in meta.items())
                # verify on disk data - body
                body_iv = load_crypto_meta(
                    metadata['x-object-sysmeta-crypto-body-meta'])['iv']
                body_key_meta = load_crypto_meta(
                    metadata['x-object-sysmeta-crypto-body-meta'])['body_key']
                obj_key = self.km.create_key('/a/%s/o' % self.container_name)
                body_key = Crypto().unwrap_key(obj_key, body_key_meta)
                exp_enc_body = encrypt(self.plaintext, body_key, body_iv)
                ondisk_data.append((node, contents))

                # verify on disk user metadata
                enc_val, meta = metadata[
                    'x-object-transient-sysmeta-crypto-meta-fruit'].split(';')
                meta = meta.strip()[len('swift_meta='):]
                metadata_iv = load_crypto_meta(meta)['iv']
                exp_enc_meta = base64.b64encode(encrypt('Kiwi', obj_key,
                                                        metadata_iv))
                self.assertEqual(exp_enc_meta, enc_val)
                self.assertNotIn('x-object-meta-fruit', metadata)

                self.assertIn(
                    'x-object-transient-sysmeta-crypto-meta', metadata)
                meta = load_crypto_meta(
                    metadata['x-object-transient-sysmeta-crypto-meta'])
                self.assertIn('key_id', meta)
                self.assertIn('path', meta['key_id'])
                self.assertEqual(
                    '/a/%s/%s' % (self.container_name, self.object_name),
                    meta['key_id']['path'])
                self.assertIn('v', meta['key_id'])
                self.assertEqual('1', meta['key_id']['v'])
                self.assertIn('cipher', meta)
                self.assertEqual(Crypto.cipher, meta['cipher'])

                # verify etag
                actual_enc_etag, _junk, actual_etag_meta = metadata[
                    'x-object-sysmeta-crypto-etag'].partition('; swift_meta=')
                etag_iv = load_crypto_meta(actual_etag_meta)['iv']
                exp_enc_etag = base64.b64encode(encrypt(self.plaintext_etag,
                                                        obj_key, etag_iv))
                self.assertEqual(exp_enc_etag, actual_enc_etag)

                # verify etag hmac
                exp_etag_mac = hmac.new(
                    obj_key, self.plaintext_etag, digestmod=hashlib.sha256)
                exp_etag_mac = base64.b64encode(exp_etag_mac.digest())
                self.assertEqual(exp_etag_mac,
                                 metadata['x-object-sysmeta-crypto-etag-mac'])

                # verify etag override for container updates
                override = 'x-object-sysmeta-container-update-override-etag'
                parts = metadata[override].rsplit(';', 1)
                crypto_meta_param = parts[1].strip()
                crypto_meta = crypto_meta_param[len('swift_meta='):]
                listing_etag_iv = load_crypto_meta(crypto_meta)['iv']
                cont_key = self.km.create_key('/a/%s' % self.container_name)
                exp_enc_listing_etag = base64.b64encode(
                    encrypt(self.plaintext_etag, cont_key,
                            listing_etag_iv))
                self.assertEqual(exp_enc_listing_etag, parts[0])

        self._check_GET_and_HEAD(self.crypto_app)
        return exp_enc_body, ondisk_data
class UtilizationAggregator(Daemon):
    def __init__(self, conf):
        self.conf = conf
        self.logger = get_logger(conf, log_route='utilization-aggregator')
        self.interval = int(conf.get('interval') or 60)
        self.aggregate_account = '.utilization'
        self.sample_account = '.transfer_record'
        conf_path = conf.get('__file__') or \
                    '/etc/swift/swift-utilization-aggregator.conf'
        request_tries = int(conf.get('request_tries') or 3)
        self.swift = InternalClient(conf_path,
                                    'Swift Utilization Aggregator',
                                    request_tries)
        self.report_interval = int(conf.get('report_interval') or 60)
        self.report_first_time = self.report_last_time = time()
        self.report_containers = 0
        self.report_objects = 0
        self.recon_cache_path = conf.get('recon_cache_path',
                                         '/var/cache/swift')
        self.rcache = join(self.recon_cache_path, 'object.recon')
        self.concurrency = int(conf.get('concurrency', 1))
        if self.concurrency < 1:
            raise ValueError("concurrency must be set to at least 1")
        self.processes = int(self.conf.get('processes', 0))
        self.process = int(self.conf.get('process', 0))
        self.container_ring = Ring('/etc/swift', ring_name='container')
        self.sample_rate = int(self.conf.get('sample_rate', 600))
        self.last_chk = iso8601_to_timestamp(self.conf.get(
            'service_start'))
        self.kinx_api_url = self.conf.get('kinx_api_url')

    def report(self, final=False):
        if final:
            elapsed = time() - self.report_first_time
            self.logger.info(_('Pass completed in %ds; %d containers,'
                               ' %d objects aggregated') %
                             (elapsed, self.report_containers,
                              self.report_objects))
            dump_recon_cache({'object_aggregation_pass': elapsed,
                              'aggregation_last_pass': self.report_containers},
                             self.rcache, self.logger)

        elif time() - self.report_last_time >= self.report_interval:
            elapsed = time() - self.report_first_time
            self.logger.info(_('Pass so far %ds; %d objects aggregated') %
                             (elapsed, self.report_objects))
            self.report_last_time = time()

    def run_once(self, *args, **kwargs):
        processes, process = self.get_process_values(kwargs)
        pool = GreenPool(self.concurrency)
        self.report_first_time = self.report_last_time = time()
        self.report_objects = 0
        self.report_containers = 0
        containers_to_delete = []
        try:
            self.logger.debug(_('Run begin'))
            containers, objects = \
                self.swift.get_account_info(self.sample_account)
            self.logger.info(_('Pass beginning; %s possible containers; %s '
                               'possible objects') % (containers, objects))
            for c in self.swift.iter_containers(self.sample_account):
                container = c['name']
                try:
                    timestamp, account = container.split('_', 1)
                    timestamp = float(timestamp)
                except ValueError:
                    self.logger.debug('ValueError: %s, '
                                      'need more than 1 value to unpack' % \
                                      container)
                else:
                    if processes > 0:
                        obj_proc = int(hashlib.md5(container).hexdigest(), 16)
                        if obj_proc % processes != process:
                            continue
                    n = (float(time()) // self.sample_rate) * self.sample_rate
                    if timestamp <= n:
                        containers_to_delete.append(container)
                        pool.spawn_n(self.aggregate_container, container)
            pool.waitall()
            for container in containers_to_delete:
                try:
                    self.logger.debug('delete container: %s' % container)
                    self.swift.delete_container(self.sample_account, container,
                                                acceptable_statuses=(
                                                    2, HTTP_NOT_FOUND,
                                                    HTTP_CONFLICT))
                except (Exception, Timeout) as err:
                    self.logger.exception(
                        _('Exception while deleting container %s %s') %
                        (container, str(err)))

            tenants_to_fillup = list()
            for c in self.swift.iter_containers(self.aggregate_account):
                tenant_id = c['name']
                if processes > 0:
                    c_proc = int(hashlib.md5(tenant_id).hexdigest(), 16)
                    if c_proc % processes != process:
                        continue
                    tenants_to_fillup.append(tenant_id)
            # fillup lossed usage data
            self.fillup_lossed_usage_data(tenants_to_fillup)

            self.logger.debug(_('Run end'))
            self.report(final=True)
        except (Exception, Timeout):
            self.logger.exception(_('Unhandled exception'))

    def run_forever(self, *args, **kwargs):
        """
        Executes passes forever, looking for objects to expire.

        :param args: Extra args to fulfill the Daemon interface; this daemon
                     has no additional args.
        :param kwargs: Extra keyword args to fulfill the Daemon interface; this
                       daemon has no additional keyword args.
        """
        sleep(random() * self.interval)
        while True:
            begin = time()
            try:
                self.run_once(*args, **kwargs)
            except (Exception, Timeout):
                self.logger.exception(_('Unhandled exception'))
            elapsed = time() - begin
            if elapsed < self.interval:
                sleep(random() * (self.interval - elapsed))

    def get_process_values(self, kwargs):
        """
        Gets the processes, process from the kwargs if those values exist.

        Otherwise, return processes, process set in the config file.

        :param kwargs: Keyword args passed into the run_forever(), run_once()
                       methods.  They have values specified on the command
                       line when the daemon is run.
        """
        if kwargs.get('processes') is not None:
            processes = int(kwargs['processes'])
        else:
            processes = self.processes

        if kwargs.get('process') is not None:
            process = int(kwargs['process'])
        else:
            process = self.process

        if process < 0:
            raise ValueError(
                'process must be an integer greater than or equal to 0')

        if processes < 0:
            raise ValueError(
                'processes must be an integer greater than or equal to 0')

        if processes and process >= processes:
            raise ValueError(
                'process must be less than or equal to processes')

        return processes, process

    def aggregate_container(self, container):
        start_time = time()
        try:
            objs_to_delete = list()
            bytes_recvs = dict()
            bytes_sents = dict()

            ts, tenant_id, account = container.split('_', 2)
            ts = int(float(ts))

            for o in self.swift.iter_objects(self.sample_account, container):
                name = o['name']
                objs_to_delete.append(name)
                ts, bytes_rv, bytes_st, trans_id, client_ip = name.split('/')
                bill_type = self.get_billtype_by_client_ip(client_ip, ts)
                bytes_recvs[bill_type] = bytes_recvs.get(bill_type,
                                                         0) + int(bytes_rv)
                bytes_sents[bill_type] = bytes_sents.get(bill_type,
                                                         0) + int(bytes_st)
                self.report_objects += 1

            for o in objs_to_delete:
                self.swift.delete_object(self.sample_account, container, o)

            for bill_type, bt_rv in bytes_recvs.items():
                t_object = 'transfer/%d/%d/%d_%d_%d' % (ts, bill_type, bt_rv,
                                                        bytes_sents[bill_type],
                                                        self.report_objects)
                self._hidden_update(tenant_id, t_object)
        except (Exception, Timeout) as err:
            self.logger.increment('errors')
            self.logger.exception(
                _('Exception while aggregating sample %s %s') %
                (container, str(err)))

        self.logger.timing_since('timing', start_time)
        self.report()

    def account_info(self, tenant_id, timestamp):
        path = '/v1/%s/%s?prefix=usage/%d&limit=1' % (self.aggregate_account,
                                                      tenant_id, timestamp)
        resp = self.swift.make_request('GET', path, {}, (2,))
        if len(resp.body) == 0:
            return 0, 0, 0
        usages = resp.body.split('/', 2)[2].rstrip()
        cont_cnt, obj_cnt, bt_used = usages.split('_')
        return int(cont_cnt), int(obj_cnt), int(bt_used)

    def _hidden_update(self, container, obj, method='PUT'):
        hidden_path = '/%s/%s/%s' % (self.aggregate_account, container, obj)
        part, nodes = self.container_ring.get_nodes(self.aggregate_account,
                                                    container)
        for node in nodes:
            ip = node['ip']
            port = node['port']
            dev = node['device']
            action_headers = dict()
            action_headers['user-agent'] = 'aggregator'
            action_headers['X-Timestamp'] = normalize_timestamp(time())
            action_headers['referer'] = 'aggregator-daemon'
            action_headers['x-size'] = '0'
            action_headers['x-content-type'] = "text/plain"
            action_headers['x-etag'] = 'd41d8cd98f00b204e9800998ecf8427e'

            conn = http_connect(ip, port, dev, part, method, hidden_path,
                                action_headers)
            response = conn.getresponse()
            response.read()

    def fillup_lossed_usage_data(self, tenants):
        now = (float(time()) // self.sample_rate) * self.sample_rate
        path = '/v1/%s/%s?prefix=usage/%d&limit=1'

        for t in tenants:
            last = self.last_chk
            cont_cnt = obj_cnt = bt_used = -1
            while last <= now:
                p = path % (self.aggregate_account, t, last)
                resp = self.swift.make_request('GET', p, {}, (2,))
                if len(resp.body) != 0:
                    usages = resp.body.split('/', 2)[2].rstrip()
                    c, o, bt = usages.split('_')
                    cont_cnt = int(c)
                    obj_cnt = int(o)
                    bt_used = int(bt)
                else:
                    before = last - self.sample_rate
                    if cont_cnt == -1:
                        cont_cnt, obj_cnt, bt_used = \
                            self.account_info(self.aggregate_account, before)
                    obj = 'usage/%d/%d_%d_%d' % (last, cont_cnt, obj_cnt,
                                                 bt_used)
                    self._hidden_update(t, obj)
                last += self.sample_rate
        self.last_chk = now

    def get_billtype_by_client_ip(self, client_ip, timestamp):
        end_ts = timestamp_to_iso8601(timestamp + self.sample_rate - 1)
        start_ts = timestamp_to_iso8601(timestamp)

        params = {'start': start_ts, 'end': end_ts}
        path = self.kinx_api_url + '/?%s' % (urllib.urlencode(params))

        data = json.loads(urllib.urlopen(path).read())
        bill_type = -1
        for r in data['ip_ranges']:
            bill_type = r['bill_type']
            for cidr in r['ip_range']:
                if self.ip_in_cidr(client_ip, cidr):
                    return bill_type
        return bill_type

    def ip_in_cidr(self, client_ip, cidr):
        bt_to_bits = lambda b: bin(int(b))[2:].rjust(8, '0')
        ip_to_bits = lambda ip: ''.join([bt_to_bits(b) for b in ip.split('.')])
        client_ip_bits = ip_to_bits(client_ip)
        ip, snet = cidr.split('/')
        ip_bits = ip_to_bits(ip)
        if client_ip_bits[:int(snet)] == ip_bits[:int(snet)]:
            return True
        else:
            return False
class TransitionMiddleware(object):
    def __init__(self, app, conf, *args, **kwargs):
        self.app = app
        self.conf = conf
        self.logger = get_logger(self.conf, log_route='transition')
        self.container_ring = Ring('/etc/swift', ring_name='container')
        self.glacier_account_prefix = '.glacier_'
        self.temp_path = conf.get('temp_path', '/var/cache/s3/')

    def _init_glacier(self):
        con = Layer2(region_name='ap-northeast-1')
        return con.create_vault('swift-s3-transition')

    def transition(self, env):
        # GET Object body
        req = Request(copy(env))
        req.method = 'GET'
        resp = req.get_response(self.app)

        obj_body = resp.body

        # Glacier로 업로드
        tmpfile = self.save_to_tempfile(obj_body)
        try:
            glacier = self._init_glacier()
            archive_id = glacier.upload_archive(tmpfile)
            glacier_obj = make_glacier_hidden_object_name(self.obj, archive_id)
        except Exception as e:
            return Response(status=HTTP_INTERNAL_SERVER_ERROR, body=e.message)
        finally:
            self.delete_tempfile(tmpfile)

        # Object를 0KB로 만들기
        req = Request(copy(env))
        req.headers[GLACIER_FLAG_META] = True
        resp = req.get_response(self.app)

        # Glacier Hidden account에 기록
        glacier_account = self.glacier_account_prefix + self.account
        part, nodes = self.container_ring.get_nodes(glacier_account,
                                                    self.container)
        hidden_path = '/%s/%s/%s' % (glacier_account, self.container,
                                     glacier_obj)
        for node in nodes:
            ip = node['ip']
            port = node['port']
            dev = node['device']
            headers = dict()
            headers['user-agent'] = 'transition-middleware'
            headers['X-Timestamp'] = normalize_timestamp(time.time())
            headers['referer'] = req.as_referer()
            headers['x-size'] = '0'
            headers['x-content-type'] = 'text/plain'
            headers['x-etag'] = 'd41d8cd98f00b204e9800998ecf8427e'

            conn = http_connect(ip, port, dev, part, 'PUT', hidden_path,
                                headers)
            conn.getresponse().read()
        return Response(status=HTTP_NO_CONTENT)

    def save_to_tempfile(self, data):
        tmp_path = None
        try:
            with tempfile.NamedTemporaryFile(bufsize=0, delete=False,
                                             dir=self.temp_path) as temp:
                temp.write(data)
                temp.flush()
                tmp_path = temp.name
        except Exception as e:
            self.logger.error(e)
        return tmp_path

    def delete_tempfile(self, tmppath):
        os.remove(tmppath)

    def __call__(self, env, start_response):
        req = Request(env)
        method = req.method
        self.version, self.account, self.container, self.obj = split_path(
            req.path, 0, 4, True)
        if not self.obj:
            return self.app(env, start_response)

        if method == 'POST' and \
           'X-S3-Object-Transition' in req.headers:
            return self.transition(env)(env, start_response)

        return self.app(env, start_response)
class ObjectRestorer(Daemon):
    """
    Daemon that queries the internal hidden expiring_objects_account to
    discover objects that need to be deleted.

    :param conf: The daemon configuration.
    """

    def __init__(self, conf):
        self.conf = conf
        self.container_ring = Ring('/etc/swift', ring_name='container')
        self.logger = get_logger(conf, log_route='object-restorer')
        self.logger.set_statsd_prefix('s3-object-restorer')
        self.interval = int(conf.get('interval') or 300)
        self.restoring_object_account = '.s3_restoring_objects'
        self.expiring_restored_account = '.s3_expiring_restored_objects'
        self.glacier_account_prefix = '.glacier_'
        self.todo_container = 'todo'
        self.restoring_container = 'restoring'
        conf_path = '/etc/swift/s3-object-restorer.conf'
        request_tries = int(conf.get('request_tries') or 3)
        self.glacier = self._init_glacier()
        self.glacier_tmpdir = conf.get('temp_path', '/var/cache/s3/')
        self.swift = InternalClient(conf_path,
                                    'Swift Object Restorer',
                                    request_tries)
        self.report_interval = int(conf.get('report_interval') or 300)
        self.report_first_time = self.report_last_time = time()
        self.report_objects = 0
        self.recon_cache_path = conf.get('recon_cache_path',
                                         '/var/cache/swift')
        self.rcache = join(self.recon_cache_path, 'object.recon')
        self.concurrency = int(conf.get('concurrency', 1))
        if self.concurrency < 1:
            raise ValueError("concurrency must be set to at least 1")
        self.processes = int(self.conf.get('processes', 0))
        self.process = int(self.conf.get('process', 0))
        self.client = Client(self.conf.get('sentry_sdn', ''))

    def _init_glacier(self):
        con = Layer2(region_name='ap-northeast-1')
        return con.get_vault('swift-s3-transition')

    def report(self, final=False):
        """
        Emits a log line report of the progress so far, or the final progress
        is final=True.

        :param final: Set to True for the last report once the expiration pass
                      has completed.
        """
        if final:
            elapsed = time() - self.report_first_time
            self.logger.info(_('Pass completed in %ds; %d objects restored') %
                             (elapsed, self.report_objects))
            dump_recon_cache({'object_expiration_pass': elapsed,
                              'expired_last_pass': self.report_objects},
                             self.rcache, self.logger)
        elif time() - self.report_last_time >= self.report_interval:
            elapsed = time() - self.report_first_time
            self.logger.info(_('Pass so far %ds; %d objects restored') %
                             (elapsed, self.report_objects))
            self.report_last_time = time()

    def run_once(self, *args, **kwargs):
        """
        Executes a single pass, looking for objects to expire.

        :param args: Extra args to fulfill the Daemon interface; this daemon
                     has no additional args.
        :param kwargs: Extra keyword args to fulfill the Daemon interface; this
                       daemon accepts processes and process keyword args.
                       These will override the values from the config file if
                       provided.
        """
        processes, process = self.get_process_values(kwargs)
        pool = GreenPool(self.concurrency)
        self.report_first_time = self.report_last_time = time()
        self.report_objects = 0
        try:
            self.logger.debug(_('Run begin'))

            for o in self.swift.iter_objects(self.restoring_object_account,
                                             self.todo_container):
                obj = o['name'].encode('utf8')
                if processes > 0:
                    obj_process = int(
                        hashlib.md5('%s/%s' % (self.todo_container, obj)).
                        hexdigest(), 16)
                    if obj_process % processes != process:
                        continue
                pool.spawn_n(self.start_object_restoring, obj)

            pool.waitall()

            for o in self.swift.iter_objects(self.restoring_object_account,
                                             self.restoring_container):
                obj = o['name'].encode('utf8')
                if processes > 0:
                    obj_process = int(
                        hashlib.md5('%s/%s' % (self.restoring_container, obj)).
                        hexdigest(), 16)
                    if obj_process % processes != process:
                        continue
                pool.spawn_n(self.check_object_restored, obj)

            pool.waitall()

            self.logger.debug(_('Run end'))
            self.report(final=True)
        except (Exception, Timeout) as e:
            report_exception(self.logger, _('Unhandled exception'), self.client)

    def run_forever(self, *args, **kwargs):
        """
        Executes passes forever, looking for objects to expire.

        :param args: Extra args to fulfill the Daemon interface; this daemon
                     has no additional args.
        :param kwargs: Extra keyword args to fulfill the Daemon interface; this
                       daemon has no additional keyword args.
        """
        sleep(random() * self.interval)
        while True:
            begin = time()
            try:
                self.run_once(*args, **kwargs)
            except (Exception, Timeout):
                report_exception(self.logger, _('Unhandled exception'), self.client)
            elapsed = time() - begin
            if elapsed < self.interval:
                sleep(random() * (self.interval - elapsed))

    def get_process_values(self, kwargs):
        """
        Gets the processes, process from the kwargs if those values exist.

        Otherwise, return processes, process set in the config file.

        :param kwargs: Keyword args passed into the run_forever(), run_once()
                       methods.  They have values specified on the command
                       line when the daemon is run.
        """
        if kwargs.get('processes') is not None:
            processes = int(kwargs['processes'])
        else:
            processes = self.processes

        if kwargs.get('process') is not None:
            process = int(kwargs['process'])
        else:
            process = self.process

        if process < 0:
            raise ValueError(
                'process must be an integer greater than or equal to 0')

        if processes < 0:
            raise ValueError(
                'processes must be an integer greater than or equal to 0')

        if processes and process >= processes:
            raise ValueError(
                'process must be less than or equal to processes')

        return processes, process

    def start_object_restoring(self, obj):
        start_time = time()
        try:
            actual_obj = obj
            account, container, obj = actual_obj.split('/', 2)
            archiveId = self.get_archiveid(account, container, obj)

            if archiveId is None:
                self.swift.delete_object(self.restoring_object_account,
                                         self.todo_container, actual_obj)
                return

            jobId = self.glacier.retrieve_archive(archiveId).id
            restoring_obj = make_glacier_hidden_object_name(actual_obj, jobId)

            meta_prefix = 'X-Object-Meta'
            meta = self.swift.get_object_metadata(account, container, obj,
                                                  metadata_prefix=meta_prefix)
            meta = {'X-Object-Meta' + key: value for key, value in
                    meta.iteritems()}
            self.update_action_hidden(self.restoring_object_account,
                                      self.restoring_container,
                                      restoring_obj, metadata=meta)

            self.swift.delete_object(self.restoring_object_account,
                                     self.todo_container, actual_obj)
            self.report_objects += 1
            self.logger.increment('start')
        except (Exception, Timeout) as err:
            self.logger.increment('errors')
            report_exception(self.logger.exception,
                             _('Exception while restoring object %s. %s') %
                             (obj, str(err)), self.client)
        self.logger.timing_since('timing', start_time)
        self.report()

    def get_archiveid(self, account, container, obj):
        glacier_account = '%s%s' % (self.glacier_account_prefix, account)

        glacier_obj = None
        for o in get_objects_by_prefix(glacier_account, container, obj,
                                       swift_client=self.swift):
            name = get_glacier_objname_from_hidden_object(o)
            if name == obj:
                glacier_obj = o
                break
        if glacier_obj is None:
            return None

        return get_glacier_key_from_hidden_object(glacier_obj)

    def check_object_restored(self, restoring_object):
        actual_obj = get_glacier_objname_from_hidden_object(restoring_object)
        jobId = get_glacier_key_from_hidden_object(restoring_object)
        try:
            path = '/v1/%s' % actual_obj
            resp = self.swift.make_request('GET', path, {}, (2, 4,))
            if resp.status_int == 404:
                raise Exception('Object Not Found: %s' % actual_obj)

            job = self.glacier.get_job(job_id=jobId)
            if not job.completed:
                return
            self.complete_restore(actual_obj, job)
        except Exception as e:
            # Job ID가 만료될 경우 다시 restore 를 시도한다.
            if not e.message.startswith('Object Not Found:'):
                self.start_object_restoring(actual_obj)
            self.logger.info(e)

        self.swift.delete_object(self.restoring_object_account,
                                 self.restoring_container, restoring_object)

    def complete_restore(self, actual_obj, job):
        tmppath = tempfile.NamedTemporaryFile(bufsize=0, delete=False,
                                              dir=self.glacier_tmpdir).name
        try:
            job.download_to_file(filename=tmppath)

            prefix = 'X-Object-Meta'
            a, c, o = actual_obj.split('/', 2)
            metadata = self.swift.get_object_metadata(a, c, o,
                                                      metadata_prefix=prefix)
            metadata = {'X-Object-Meta' + key: value for key, value in metadata
            .iteritems()}
            days = int(metadata['X-Object-Meta-s3-restore-expire-days'])
            exp_time = normalize_delete_at_timestamp(calc_nextDay(time()) +
                                                     (days - 1) * 86400)

            # send restored object to proxy server
            path = '/v1/%s' % actual_obj
            metadata['X-Object-Meta-S3-Restored'] = True
            exp_date = strftime("%a, %d %b %Y %H:%M:%S GMT",
                                gmtime(float(exp_time)))

            metadata['X-Object-Meta-s3-restore'] = 'ongoing-request="false", ' \
                                                   'expiry-date="%s"' % exp_date
            metadata['Content-Length'] = os.path.getsize(tmppath)
            del metadata['X-Object-Meta-s3-restore-expire-days']

            obj_body = open(tmppath, 'r')
            self.swift.make_request('PUT', path, metadata, (2,),
                                    body_file=obj_body)

            # Add to .s3_expiring_restored_objects
            self.update_action_hidden(self.expiring_restored_account,
                                      exp_time, actual_obj)
            obj_body.close()
            self.logger.increment('done')
        except UnexpectedResponse as e:
            if e.resp.status_int == 404:
                self.logger.error('Restoring object not found - %s' %
                                  actual_obj)
        except Exception as e:
            self.logger.increment('errors')
            self.logger.debug(e)
        finally:
            os.remove(tmppath)

    def compute_obj_md5(self, obj):
        etag = hashlib.md5()
        etag.update(obj)
        etag = etag.hexdigest()
        return etag

    def update_action_hidden(self, account, container, obj, metadata=None):
        hidden_path = '/%s/%s/%s' % (account, container, obj)
        part, nodes = self.container_ring.get_nodes(account, container)
        for node in nodes:
            ip = node['ip']
            port = node['port']
            dev = node['device']
            action_headers = dict()
            action_headers['user-agent'] = 'restore-daemon'
            action_headers['X-Timestamp'] = normalize_timestamp(time())
            action_headers['referer'] = 'restore-daemon'
            action_headers['x-size'] = '0'
            action_headers['x-content-type'] = "text/plain"
            action_headers['x-etag'] = 'd41d8cd98f00b204e9800998ecf8427e'

            if metadata:
                action_headers.update(metadata)

            conn = http_connect(ip, port, dev, part, 'PUT', hidden_path,
                                action_headers)
            response = conn.getresponse()
            response.read()
示例#18
0
class ListEndpointsMiddleware(object):
    """
    List endpoints for an object, account or container.

    See above for a full description.

    Uses configuration parameter `swift_dir` (default `/etc/swift`).

    :param app: The next WSGI filter or app in the paste.deploy
                chain.
    :param conf: The configuration dict for the middleware.
    """

    def __init__(self, app, conf):
        self.app = app
        self.logger = get_logger(conf, log_route='endpoints')
        self.swift_dir = conf.get('swift_dir', '/etc/swift')
        self.account_ring = Ring(self.swift_dir, ring_name='account')
        self.container_ring = Ring(self.swift_dir, ring_name='container')
        self.endpoints_path = conf.get('list_endpoints_path', '/endpoints/')
        if not self.endpoints_path.endswith('/'):
            self.endpoints_path += '/'
        self.default_response_version = 1.0
        self.response_map = {
            1.0: self.v1_format_response,
            2.0: self.v2_format_response,
        }

    def get_object_ring(self, policy_idx):
        """
        Get the ring object to use to handle a request based on its policy.

        :policy_idx: policy index as defined in swift.conf
        :returns: appropriate ring object
        """
        return POLICIES.get_object_ring(policy_idx, self.swift_dir)

    def _parse_version(self, raw_version):
        err_msg = 'Unsupported version %r' % raw_version
        try:
            version = float(raw_version.lstrip('v'))
        except ValueError:
            raise ValueError(err_msg)
        if not any(version == v for v in RESPONSE_VERSIONS):
            raise ValueError(err_msg)
        return version

    def _parse_path(self, request):
        """
        Parse path parts of request into a tuple of version, account,
        container, obj.  Unspecified path parts are filled in as None,
        except version which is always returned as a float using the
        configured default response version if not specified in the
        request.

        :param request: the swob request

        :returns: parsed path parts as a tuple with version filled in as
                  configured default response version if not specified.
        :raises: ValueError if path is invalid, message will say why.
        """
        clean_path = request.path[len(self.endpoints_path) - 1:]
        # try to peel off version
        try:
            raw_version, rest = split_path(clean_path, 1, 2, True)
        except ValueError:
            raise ValueError('No account specified')
        try:
            version = self._parse_version(raw_version)
        except ValueError:
            if raw_version.startswith('v') and '_' not in raw_version:
                # looks more like a invalid version than an account
                raise
            # probably no version specified, but if the client really
            # said /endpoints/v_3/account they'll probably be sorta
            # confused by the useless response and lack of error.
            version = self.default_response_version
            rest = clean_path
        else:
            rest = '/' + rest if rest else '/'
        try:
            account, container, obj = split_path(rest, 1, 3, True)
        except ValueError:
            raise ValueError('No account specified')
        return version, account, container, obj

    def v1_format_response(self, req, endpoints, **kwargs):
        return Response(json.dumps(endpoints),
                        content_type='application/json')

    def v2_format_response(self, req, endpoints, storage_policy_index,
                           **kwargs):
        resp = {
            'endpoints': endpoints,
            'headers': {},
        }
        if storage_policy_index is not None:
            resp['headers'][
                'X-Backend-Storage-Policy-Index'] = str(storage_policy_index)
        return Response(json.dumps(resp),
                        content_type='application/json')

    def __call__(self, env, start_response):
        request = Request(env)
        if not request.path.startswith(self.endpoints_path):
            return self.app(env, start_response)

        if request.method != 'GET':
            return HTTPMethodNotAllowed(
                req=request, headers={"Allow": "GET"})(env, start_response)

        try:
            version, account, container, obj = self._parse_path(request)
        except ValueError as err:
            return HTTPBadRequest(str(err))(env, start_response)

        if account is not None:
            account = unquote(account)
        if container is not None:
            container = unquote(container)
        if obj is not None:
            obj = unquote(obj)

        storage_policy_index = None
        if obj is not None:
            container_info = get_container_info(
                {'PATH_INFO': '/v1/%s/%s' % (account, container)},
                self.app, swift_source='LE')
            storage_policy_index = container_info['storage_policy']
            obj_ring = self.get_object_ring(storage_policy_index)
            partition, nodes = obj_ring.get_nodes(
                account, container, obj)
            endpoint_template = 'http://{ip}:{port}/{device}/{partition}/' + \
                                '{account}/{container}/{obj}'
        elif container is not None:
            partition, nodes = self.container_ring.get_nodes(
                account, container)
            endpoint_template = 'http://{ip}:{port}/{device}/{partition}/' + \
                                '{account}/{container}'
        else:
            partition, nodes = self.account_ring.get_nodes(
                account)
            endpoint_template = 'http://{ip}:{port}/{device}/{partition}/' + \
                                '{account}'

        endpoints = []
        for node in nodes:
            endpoint = endpoint_template.format(
                ip=node['ip'],
                port=node['port'],
                device=node['device'],
                partition=partition,
                account=quote(account),
                container=quote(container or ''),
                obj=quote(obj or ''))
            endpoints.append(endpoint)

        resp = self.response_map[version](
            request, endpoints=endpoints,
            storage_policy_index=storage_policy_index)
        return resp(env, start_response)
示例#19
0
class ListEndpointsMiddleware(object):
    """
    List endpoints for an object, account or container.

    See above for a full description.

    Uses configuration parameter `swift_dir` (default `/etc/swift`).

    :param app: The next WSGI filter or app in the paste.deploy
                chain.
    :param conf: The configuration dict for the middleware.
    """

    def __init__(self, app, conf):
        self.app = app
        self.logger = get_logger(conf, log_route='endpoints')
        self.swift_dir = conf.get('swift_dir', '/etc/swift')
        self.account_ring = Ring(self.swift_dir, ring_name='account')
        self.container_ring = Ring(self.swift_dir, ring_name='container')
        self.endpoints_path = conf.get('list_endpoints_path', '/endpoints/')
        if not self.endpoints_path.endswith('/'):
            self.endpoints_path += '/'

    def get_object_ring(self, policy_idx):
        """
        Get the ring object to use to handle a request based on its policy.

        :policy_idx: policy index as defined in swift.conf
        :returns: appropriate ring object
        """
        return POLICIES.get_object_ring(policy_idx, self.swift_dir)

    def __call__(self, env, start_response):
        request = Request(env)
        if not request.path.startswith(self.endpoints_path):
            return self.app(env, start_response)

        if request.method != 'GET':
            return HTTPMethodNotAllowed(
                req=request, headers={"Allow": "GET"})(env, start_response)

        try:
            clean_path = request.path[len(self.endpoints_path) - 1:]
            account, container, obj = \
                split_path(clean_path, 1, 3, True)
        except ValueError:
            return HTTPBadRequest('No account specified')(env, start_response)

        if account is not None:
            account = unquote(account)
        if container is not None:
            container = unquote(container)
        if obj is not None:
            obj = unquote(obj)

        if obj is not None:
            # remove 'endpoints' from call to get_container_info
            stripped = request.environ
            if stripped['PATH_INFO'][:len(self.endpoints_path)] == \
                    self.endpoints_path:
                stripped['PATH_INFO'] = "/v1/" + \
                    stripped['PATH_INFO'][len(self.endpoints_path):]
            container_info = get_container_info(
                stripped, self.app, swift_source='LE')
            obj_ring = self.get_object_ring(container_info['storage_policy'])
            partition, nodes = obj_ring.get_nodes(
                account, container, obj)
            endpoint_template = 'http://{ip}:{port}/{device}/{partition}/' + \
                                '{account}/{container}/{obj}'
        elif container is not None:
            partition, nodes = self.container_ring.get_nodes(
                account, container)
            endpoint_template = 'http://{ip}:{port}/{device}/{partition}/' + \
                                '{account}/{container}'
        else:
            partition, nodes = self.account_ring.get_nodes(
                account)
            endpoint_template = 'http://{ip}:{port}/{device}/{partition}/' + \
                                '{account}'

        endpoints = []
        for node in nodes:
            endpoint = endpoint_template.format(
                ip=node['ip'],
                port=node['port'],
                device=node['device'],
                partition=partition,
                account=quote(account),
                container=quote(container or ''),
                obj=quote(obj or ''))
            endpoints.append(endpoint)

        return Response(json.dumps(endpoints),
                        content_type='application/json')(env, start_response)
示例#20
0
class ContainerCrawler(object):
    def __init__(self, conf, handler_class, logger=None):
        if not handler_class:
            raise RuntimeError('Handler class must be defined')

        self.logger = logger
        self.conf = conf
        self.root = conf['devices']
        self.bulk = config_true_value(conf.get('bulk_process', False))
        self.interval = 10
        self.swift_dir = '/etc/swift'
        self.container_ring = Ring(self.swift_dir, ring_name='container')

        self.status_dir = conf['status_dir']
        self.myips = whataremyips('0.0.0.0')
        self.items_chunk = conf['items_chunk']
        self.poll_interval = conf.get('poll_interval', 5)
        self.handler_class = handler_class
        self._in_progress_containers = set()

        if self.bulk:
            self.workers = 1

        self._init_workers(conf)
        self._init_ic_pool(conf)

        self.log('debug', 'Created the Container Crawler instance')

    def _init_workers(self, conf):
        if not self.bulk:
            self.workers = conf.get('workers', 10)
            self.worker_pool = eventlet.GreenPool(self.workers)
            self.work_queue = eventlet.queue.Queue(self.workers * 2)

            for _ in xrange(self.workers):
                self.worker_pool.spawn_n(self._worker)

        self.enumerator_workers = conf.get('enumerator_workers', 10)
        self.enumerator_pool = eventlet.GreenPool(self.enumerator_workers)
        self.enumerator_queue = eventlet.queue.Queue(self.enumerator_workers)

        for _ in xrange(self.enumerator_workers):
            self.enumerator_pool.spawn_n(self._enumerator)

    def _init_ic_pool(self, conf):
        pool_size = self.workers
        self._swift_pool = eventlet.pools.Pool(
            create=lambda: create_internal_client(conf, self.swift_dir),
            min_size=pool_size,
            max_size=pool_size)

    def _worker(self):
        while 1:
            try:
                work = self.work_queue.get()
            except:
                self.log(
                    'error', 'Failed to fetch items from the queue: %s' %
                    traceback.format_exc())
                eventlet.sleep(100)
                continue

            try:
                if not work:
                    break

                (row, handler), container_job = work
                with self._swift_pool.item() as swift_client:
                    handler.handle(row, swift_client)
                container_job.complete_task()
            except RetryError:
                container_job.complete_task(retry=True)
            except:
                container_job.complete_task(error=True)
                self.log(
                    'error', u'Failed to handle row %s (%s): %r' %
                    (row['ROWID'], row['name'].decode('utf-8'),
                     traceback.format_exc()))
            finally:
                self.work_queue.task_done()

    def _enumerator(self):
        job = ContainerJob()
        while 1:
            try:
                work = self.enumerator_queue.get()
            except:
                self.log(
                    'error', 'Failed to fetch containers to enumerate %s' %
                    traceback.format_exc())
                eventlet.sleep(100)
                continue

            try:
                if not work:
                    break

                settings, per_account = work
                handler = self.handler_class(self.status_dir,
                                             settings,
                                             per_account=per_account)
                owned, verified, last_row, db_id = self.handle_container(
                    handler, job)
                if not owned and not verified:
                    continue

                if self.bulk or job.wait_all() == ContainerJob.PASS_SUCCEEDED:
                    handler.save_last_row(last_row, db_id)
                    self.log(
                        'info', 'Processed %d rows; verified %d rows; '
                        'last row: %d' % (owned, verified, last_row))
            except SkipContainer:
                self.log('info',
                         "Skipping %(account)s/%(container)s" % settings)
            except RetryError:
                # Can appear from the bulk handling code.
                # TODO: we should do a better tying the bulk handling code into
                # this model.
                pass
            except:
                account = settings['account']
                container = settings['container']
                self.log(
                    'error', "Failed to process %s/%s with %s" %
                    (account, container, self.handler_class.__name__))
                self.log('error', traceback.format_exc())
            finally:
                if work:
                    self._in_progress_containers.remove(
                        (work[0]['account'], work[0]['container']))
                self.enumerator_queue.task_done()

    def log(self, level, message):
        if not self.logger:
            return
        getattr(self.logger, level)(message)

    def get_broker(self, account, container, part, node):
        db_hash = hash_path(account.encode('utf-8'), container.encode('utf-8'))
        db_dir = storage_directory(DATADIR, part, db_hash)
        db_path = os.path.join(self.root, node['device'], db_dir,
                               db_hash + '.db')
        return ContainerBroker(db_path, account=account, container=container)

    def submit_items(self, handler, rows, job):
        if not rows:
            return

        if self.bulk:
            with self._swift_pool.item() as swift_client:
                handler.handle(rows, swift_client)
            return

        job.submit_tasks(map(lambda row: (row, handler), rows),
                         self.work_queue)

    def process_items(self, handler, rows, nodes_count, node_id, job):
        owned_rows = filter(lambda row: row['ROWID'] % nodes_count == node_id,
                            rows)
        verified_rows = filter(
            lambda row: row['ROWID'] % nodes_count != node_id, rows)

        self.submit_items(handler, owned_rows, job)
        self.submit_items(handler, verified_rows, job)

        return len(owned_rows), len(verified_rows)

    def handle_container(self, handler, job):
        part, container_nodes = self.container_ring.get_nodes(
            handler._account.encode('utf-8'),
            handler._container.encode('utf-8'))
        nodes_count = len(container_nodes)

        for index, node in enumerate(container_nodes):
            if not is_local_device(self.myips, None, node['ip'], node['port']):
                continue
            broker = self.get_broker(handler._account, handler._container,
                                     part, node)
            broker_info = broker.get_info()
            last_row = handler.get_last_row(broker_info['id'])
            if not last_row:
                last_row = 0
            try:
                items = broker.get_items_since(last_row, self.items_chunk)
            except DatabaseConnectionError:
                continue

            if not items:
                return (0, 0, None, broker_info['id'])

            self.log(
                'info', 'Processing %d rows since row %d for %s/%s' %
                (len(items), last_row, handler._account, handler._container))
            owned_count, verified_count = self.process_items(
                handler, items, nodes_count, index, job)

            return (owned_count, verified_count, items[-1]['ROWID'],
                    broker_info['id'])
        return (0, 0, None, None)

    def list_containers(self, account):
        # TODO: we should not have to retrieve all of the containers at once,
        # but it will require allocating a swift_client for this purpose from
        # the pool -- consider doing that at some point. However, as long as
        # there are fewer than a few million containers, getting all of them at
        # once should be cheap, paginating 10000 at a time.
        with self._swift_pool.item() as swift_client:
            return [c['name'] for c in swift_client.iter_containers(account)]

    def _is_processing(self, settings):
        # NOTE: if we allow more than one destination for (account, container),
        # we have to change the contents of this set
        key = (settings['account'], settings['container'])
        return key in self._in_progress_containers

    def _enqueue_container(self, settings, per_account=False):
        key = (settings['account'], settings['container'])
        self._in_progress_containers.add(key)
        self.enumerator_queue.put((settings, per_account))

    def _submit_containers(self):
        for container_settings in self.conf['containers']:
            # TODO: perform validation of the settings on startup
            if 'container' not in container_settings:
                self.log(
                    'error',
                    'Container name not specified in settings -- continue')
                continue
            if 'account' not in container_settings:
                self.log('error',
                         'Account not specified in settings -- continue')
                continue

            if container_settings['container'] == '/*':
                all_containers = self.list_containers(
                    container_settings['account'])
                for container in all_containers:
                    settings_copy = container_settings.copy()
                    settings_copy['container'] = container.decode('utf-8')
                    if not self._is_processing(settings_copy):
                        self._enqueue_container(settings_copy,
                                                per_account=True)
                # After iterating over all of the containers, we prune any
                # entries from containers that may have been deleted (so as to
                # avoid missing data). There is still a chance where a
                # container is removed and created between the calls to
                # CloudSync, however there is nothing we can do about that.
                # TODO: keep track of container creation date to detect when
                # they are removed and then added.
                if not os.path.exists(
                        os.path.join(self.status_dir,
                                     container_settings['account'])):
                    continue
                tracked_containers = os.listdir(
                    os.path.join(self.status_dir,
                                 container_settings['account']))
                disappeared = set(tracked_containers) - set(all_containers)
                for container in disappeared:
                    try:
                        os.unlink(
                            os.path.join(self.status_dir,
                                         container_settings['account'],
                                         container))
                    except Exception as e:
                        self.log(
                            'warning',
                            'Failed to remove the status file for %s: %s' %
                            (os.path.join(container_settings['account'],
                                          container), repr(e)))
            else:
                if not self._is_processing(container_settings):
                    self._enqueue_container(container_settings,
                                            per_account=False)

    def run_always(self):
        # Since we don't support reloading, the daemon should quit if there are
        # no containers configured
        if 'containers' not in self.conf or not self.conf['containers']:
            return
        self.log('debug', 'Entering the poll loop')
        while True:
            start = time.time()
            self._submit_containers()
            elapsed = time.time() - start
            if elapsed < self.poll_interval:
                eventlet.sleep(self.poll_interval - elapsed)

    def run_once(self):
        self._submit_containers()
        self.enumerator_queue.join()
示例#21
0
class FileMover(object):
    def __init__(self, options, *_args, **_kwargs):
        self.ring = Ring(options.ring)
        self.path = options.path
        self.options = options

    def _get_acc_cont_obj(self, filename):
        """ Returns account, container, object from XFS object metadata """

        obj_fd = open(filename)
        metadata = ''
        key = 0
        try:
            while True:
                metadata += xattr.getxattr(
                    obj_fd, '%s%s' % ("user.swift.metadata", (key or '')))
                key += 1
        except IOError:
            pass
        obj_fd.close()
        object_name = pickle.loads(metadata).get('name')
        account = object_name.split('/')[1]
        container = object_name.split('/')[2]
        obj = '/'.join(object_name.split('/')[3:])

        return {'account': account,
                'container': container,
                'object': obj}

    def start(self):
        for root, _dirs, files in os.walk(self.path):
            if "quarantined" in root:
                continue
            for filename in files:
                fullname = os.path.join(root, filename)
                if (self.options.move_object_files is True and
                        fullname.split('.')[-1] in ["data", "ts"]):
                    self._move_file(fullname, "objects")

                if (self.options.move_container_dbs is True and
                        fullname.split('.')[-1] in ["db"] and
                        "containers" in fullname):
                    self._move_file(fullname, "containers")

                if (self.options.move_account_dbs is True and
                        fullname.split('.')[-1] in ["db"] and
                        "accounts" in fullname):
                    self._move_file(fullname, "accounts")

    def _move_file(self, filename, filetype):
        if filetype == 'accounts':
            broker = AccountBroker(filename)
            info = broker.get_info()
        elif filetype == 'containers':
            broker = ContainerBroker(filename)
            info = broker.get_info()
        elif filetype == 'objects':
            info = self._get_acc_cont_obj(filename)
        else:
            raise Exception

        acc = info.get('account')
        cont = info.get('container')
        obj = info.get('object')

        partition, _nodes = self.ring.get_nodes(acc, cont, obj)

        # replace the old partition value with the new one
        # old name like '/a/b/objects/123/c/d'
        # new name like '/a/b/objects/456/c/d'
        filename_parts = filename.split('/')
        part_pos = filename_parts.index(filetype)
        filename_parts[part_pos+1] = str(partition)
        newname = '/'.join(filename_parts)

        dst_dir = os.path.dirname(newname)
        try:
            os.makedirs(dst_dir)
            logging.info("mkdir %s" % dst_dir)
        except OSError as ex:
            logging.info("mkdir %s failed: %s" % (dst_dir, ex))

        try:
            os.rename(filename, newname)
            logging.info("moved %s -> %s" % (filename, newname))
        except OSError as ex:
            logging.warning("FAILED TO MOVE %s -> %s" % (filename, newname))
示例#22
0
class Crawler(object):
    def __init__(self, conf, handler_factory, logger=None):
        if not handler_factory:
            raise RuntimeError('Handler class must be defined')

        self.logger = logger
        self.conf = conf
        self.root = conf['devices']
        self.bulk = config_true_value(conf.get('bulk_process', False))
        self.interval = 10
        self.swift_dir = '/etc/swift'
        self.container_ring = Ring(self.swift_dir, ring_name='container')

        self.status_dir = conf['status_dir']
        self.myips = whataremyips(conf.get('swift_bind_ip', '0.0.0.0'))
        self.items_chunk = conf['items_chunk']
        # Verification slack is specified in minutes.
        self._verification_slack = conf.get('verification_slack', 0) * 60
        self.poll_interval = conf.get('poll_interval', 5)
        self.handler_factory = handler_factory
        # NOTE: this structure is not protected. Since we use green threads, we
        # expect a context switch to only occur on blocking calls, so the set
        # operations should be safe in this context. This can lead to skipping
        # container cycles unnecessarily if the threading model changes.
        self._in_progress_containers = set()

        if self.bulk:
            self.workers = 1

        self._init_workers(conf)
        self._init_ic_pool(conf)

        self.log('debug', 'Created the Container Crawler instance')

    def _init_workers(self, conf):
        if not self.bulk:
            self.workers = conf.get('workers', 10)
            self.worker_pool = eventlet.GreenPool(self.workers)
            self.work_queue = eventlet.queue.Queue(self.workers * 2)

            for _ in xrange(self.workers):
                self.worker_pool.spawn_n(self._worker)

        self.enumerator_workers = conf.get('enumerator_workers', 10)
        self.enumerator_pool = eventlet.GreenPool(self.enumerator_workers)
        self.enumerator_queue = eventlet.queue.Queue(self.enumerator_workers)

        for _ in xrange(self.enumerator_workers):
            self.enumerator_pool.spawn_n(self._enumerator)

    def _init_ic_pool(self, conf):
        pool_size = self.workers
        self._swift_pool = eventlet.pools.Pool(
            create=lambda: create_internal_client(conf, self.swift_dir),
            min_size=pool_size,
            max_size=pool_size)

    def _worker(self):
        while 1:
            try:
                work = self.work_queue.get()
            except Exception as e:
                self.log('error',
                         'Failed to fetch items from the queue: %r' % e)
                self.log('debug',
                         'Failed to fetch items from the queue: %s',
                         exc_info=True)
                eventlet.sleep(100)
                continue

            try:
                if not work:
                    break

                (row, handler), container_job = work
                with self._swift_pool.item() as swift_client:
                    handler.handle(row, swift_client)
                container_job.complete_task()
            except RetryError:
                container_job.complete_task(retry=True)
            except Exception as e:
                container_job.complete_task(error=True)
                self.log(
                    'error', u'Failed to handle row %s (%s): %r' %
                    (row['ROWID'], row['name'].decode('utf-8'), e))
                self.log('debug',
                         u'Failed to handle row %s (%s)' %
                         (row['ROWID'], row['name'].decode('utf-8')),
                         exc_info=True)
            finally:
                self.work_queue.task_done()

    def _get_new_rows(self, broker, start_row, nodes, node_id, verifying):
        rows = []
        if verifying:
            cutoff = time.time() - self._verification_slack
        for row in broker.get_items_since(start_row, self.items_chunk):
            hnum = num_from_row(row)
            if not verifying and hnum % nodes != node_id:
                continue
            ts = decode_timestamps(row['created_at'])[2].timestamp
            if verifying and ts > cutoff:
                break
            rows.append(row)
        return rows

    def _enumerator(self):
        job = ContainerJob()
        while 1:
            try:
                work = self.enumerator_queue.get()
            except:
                self.log('error',
                         'Failed to fetch containers to enumerate',
                         exc_info=True)
                eventlet.sleep(100)
                continue

            try:
                if not work:
                    break

                settings, per_account = work
                account = settings['internal_account']
                container = settings['internal_container']

                # Should we try caching the broker to avoid doing these
                # look ups every time?
                broker, nodes_count, node_id = self.get_broker(
                    account.encode('utf-8'), container.encode('utf-8'))
                if not broker:
                    continue

                if getattr(broker, 'is_sharded', lambda: False)():
                    self._enqueue_sharded_container(settings, per_account)

                broker_info = broker.get_info()
                broker_id = broker_info['id']
                handler = self.handler_factory.instance(
                    settings, per_account=per_account)

                last_primary_row = handler.get_last_processed_row(broker_id)
                if getattr(broker, 'is_root_container', lambda: True)():
                    handler.handle_container_info(broker_info, broker.metadata)
                primary_rows = self._get_new_rows(broker, last_primary_row,
                                                  nodes_count, node_id, False)
                if primary_rows:
                    self.log(
                        'info', 'Processing %d rows since row %d for %s/%s' %
                        (len(primary_rows), last_primary_row, account,
                         container))
                    primary_status = self.submit_items(handler, primary_rows,
                                                       job)
                    if ContainerJob.PASS_SUCCEEDED == primary_status:
                        handler.save_last_processed_row(
                            primary_rows[-1]['ROWID'], broker_id)
                        self.log(
                            'info',
                            'Processed %d rows; last row: %d; for %s/%s' %
                            (len(primary_rows), primary_rows[-1]['ROWID'],
                             account, container))

                last_verified_row = handler.get_last_verified_row(broker_id)
                verifying_rows = self._get_new_rows(broker, last_verified_row,
                                                    nodes_count, node_id, True)

                # Remove any ROWIDs that we uploaded
                uploaded_rows = set([row['ROWID'] for row in primary_rows])
                verifying_rows = filter(
                    lambda row: row['ROWID'] not in uploaded_rows,
                    verifying_rows)

                if verifying_rows:
                    self.log(
                        'info', 'Verifying %d rows since row %d for %s/%s' %
                        (len(verifying_rows), last_verified_row, account,
                         container))
                    verifying_status = self.submit_items(
                        handler, verifying_rows, job)
                    if ContainerJob.PASS_SUCCEEDED == verifying_status:
                        handler.save_last_verified_row(
                            verifying_rows[-1]['ROWID'], broker_id)
                        self.log(
                            'info', 'Verified %d rows; last row: %d; '
                            'for %s/%s' %
                            (len(verifying_rows), verifying_rows[-1]['ROWID'],
                             account, container))

            except SkipContainer:
                self.log('info', "Skipping %s/%s" % (account, container))
            except RetryError:
                # Can appear from the bulk handling code.
                # TODO: we should do a better job tying the bulk handling code
                # into this model.
                pass
            except:
                self.log('error',
                         "Failed to process %s/%s with %s" %
                         (account, container, str(self.handler_factory)),
                         exc_info=True)
            finally:
                if work:
                    self._in_progress_containers.remove(
                        mapping_signature(work[0]))
                self.enumerator_queue.task_done()

    def log(self, level, message, **kwargs):
        if not self.logger:
            return
        getattr(self.logger, level)(message, **kwargs)

    def _get_db_info(self, account, container):
        """
        Returns the database path of the container

        :param account: UTF-8 encoded account name
        :param container: UTF-8 encoded container name
        :returns: a tuple of (db path, nodes count, index of replica)
        """

        part, container_nodes = self.container_ring.get_nodes(
            account, container)
        nodes_count = len(container_nodes)
        db_hash = hash_path(account, container)
        db_dir = storage_directory(DATADIR, part, db_hash)

        for index, node in enumerate(container_nodes):
            if not is_local_device(self.myips, None, node['ip'], node['port']):
                continue
            db_path = os.path.join(self.root, node['device'], db_dir,
                                   db_hash + '.db')
            return db_path, nodes_count, index
        return None, None, None

    def get_broker(self, account, container):
        """Instatiates a container database broker.

        :param account: UTF-8 encoded account name
        :param container: UTF-8 encoded container name
        :returns: a tuple of (ContainerBroker, nodes count, index of replica)
        """
        db_path, nodes_count, index = self._get_db_info(account, container)
        if db_path:
            broker = ContainerBroker(db_path,
                                     account=account,
                                     container=container)
            if broker.is_deleted():
                self.log(
                    'info',
                    'Database does not exist for %s/%s' % (account, container))
            else:
                return broker, nodes_count, index
        return None, None, None

    def submit_items(self, handler, rows, job):
        if not rows:
            return ContainerJob.PASS_SUCCEEDED

        if self.bulk:
            with self._swift_pool.item() as swift_client:
                handler.handle(rows, swift_client)
            return ContainerJob.PASS_SUCCEEDED

        job.submit_tasks(map(lambda row: (row, handler), rows),
                         self.work_queue)
        return job.wait_all()

    def list_containers(self, account, prefix=''):
        # TODO: we should not have to retrieve all of the containers at once,
        # but it will require allocating a swift_client for this purpose from
        # the pool -- consider doing that at some point. However, as long as
        # there are fewer than a few million containers, getting all of them at
        # once should be cheap, paginating 10000 at a time.
        with self._swift_pool.item() as swift_client:
            return [
                c['name']
                for c in swift_client.iter_containers(account, prefix=prefix)
            ]

    def _prune_deleted_containers(self, account, containers, prefix=None):
        # After iterating over all of the containers, we prune any
        # entries from containers that may have been deleted (so as to
        # avoid missing data). There is still a chance where a
        # container is removed and created between the iterations, however
        # there is nothing we can do about that.
        # TODO: keep track of container creation date to detect when
        # they are removed and then added.
        account_status_dir = os.path.join(self.status_dir,
                                          account.encode('utf-8'))
        if not os.path.exists(account_status_dir):
            return

        if prefix:
            container_paths = glob.glob(
                os.path.join(account_status_dir, prefix + '*'))
            tracked_containers = [
                os.path.basename(path) for path in container_paths
            ]
        else:
            tracked_containers = os.listdir(account_status_dir)

        disappeared = set(tracked_containers) - set(
            map(lambda container: container.encode('utf-8'), containers))
        for container in disappeared:
            try:
                os.unlink(os.path.join(account_status_dir, container))
            except Exception as e:
                self.log(
                    'warning', 'Failed to remove the status file for %s: %s' %
                    (os.path.join(account, container), repr(e)))

    def _prune_status_files(self):
        # Unlike _prune_deleted_containers, which prunes status files from the
        # per-account mappings, this prunes only unknown status files (i.e. the
        # mapping was removed).
        known_mappings = {
            mapping['account']: set()
            for mapping in self.conf['containers'] if mapping.get('container')
        }
        for mapping in self.conf['containers']:
            if 'container' not in mapping:
                continue
            known_mappings[mapping['account']].add(mapping['container'])

        for account in os.listdir(unicode(self.status_dir)):
            account_path = os.path.join(self.status_dir, account)
            if not os.path.isdir(account_path):
                continue
            if account.startswith('.shards_'):
                # Sharded containers are handled separately
                continue
            if account not in known_mappings:
                try:
                    shutil.rmtree(account_path)
                except OSError as e:
                    self.log(
                        'warn', 'Failed to remove {}: {}'.format(
                            os.path.join(self.status_dir,
                                         account.encode('utf-8')), e))
                continue
            if '/*' in known_mappings[account]:
                continue
            for container in os.listdir(account_path):
                if container not in known_mappings[account]:
                    try:
                        os.unlink(os.path.join(account_path, container))
                    except OSError as e:
                        self.log(
                            'warn', 'Failed to remove {}: {}'.format(
                                os.path.join(
                                    account_path.encode('utf-8'),
                                    unicode(container).encode('utf-8')), e))

    def _is_container_sharded(self, account, container):
        """
        Retrieve container metadata with a HEAD request and
        find out if container is sharded.
        :returns: True if container is sharded. False otherwise.
        """
        with self._swift_pool.item() as swift_client:
            try:
                metadata = swift_client.get_container_metadata(
                    account, container)
            except UnexpectedResponse as err:
                if err.resp.status_int != HTTP_NOT_FOUND:
                    self.log(
                        'error',
                        'Failed to retrieve container metadata for %s: %s' %
                        (os.path.join(account, container), err.message))
                metadata = {}
            except Exception as err:
                self.log(
                    'error',
                    'Failed to retrieve container metadata for %s: %s' %
                    (os.path.join(account, container), err.message))
                metadata = {}

        return metadata.get('x-backend-sharding-state') == 'sharded'

    def _enqueue_sharded_container(self, settings, per_account=False):
        """
        Get list of shards for a given containers and add them to the
        work queue.
        """
        # TODO: look into saving the sharded state of the container
        sharded_account = '.shards_' + settings['account']
        sharded_container = settings['container']
        all_sharded_containers = self.list_containers(sharded_account,
                                                      prefix=sharded_container)
        for container in all_sharded_containers:
            settings_copy = settings.copy()
            settings_copy['internal_account'] = sharded_account
            settings_copy['internal_container'] = container
            self._enqueue_container(settings_copy, per_account)
        self._prune_deleted_containers(sharded_account,
                                       all_sharded_containers,
                                       prefix=sharded_container)

    def _process_container(self, settings, per_account=False):
        # save internal account/containers as the actual account/containers
        # that will be crawled. This is currently useful for sharded containers
        account = settings['account']
        container = settings['container']
        settings['internal_account'] = account
        settings['internal_container'] = container

        try:
            db_path, _, _ = self._get_db_info(account.encode('utf-8'),
                                              container.encode('utf-8'))
        except:
            self.log('error',
                     "Failed to process %s/%s" % (account, container),
                     exc_info=True)
            return

        # if container db is not on local node, we need to check
        # if container is sharded with a HEAD request because
        # shards of that container could potentially be stored on this node
        # even if root container is not. Otherwise we check if container is
        # sharded when we have the broker.
        if db_path:
            self._enqueue_container(settings, per_account)
        elif self._is_container_sharded(account, container):
            self._enqueue_sharded_container(settings, per_account)

    def _enqueue_container(self, settings, per_account=False):
        settings_signature = mapping_signature(settings)
        if settings_signature not in self._in_progress_containers:
            self._in_progress_containers.add(settings_signature)
            self.enumerator_queue.put((settings, per_account))

    def _submit_containers(self):
        for container_settings in self.conf['containers']:
            # TODO: perform validation of the settings on startup
            if 'container' not in container_settings:
                self.log(
                    'error',
                    'Container name not specified in settings -- continue')
                continue
            if 'account' not in container_settings:
                self.log('error',
                         'Account not specified in settings -- continue')
                continue

            if container_settings['container'] == '/*':
                all_containers = self.list_containers(
                    container_settings['account'])
                for container in all_containers:
                    settings_copy = container_settings.copy()
                    settings_copy['container'] = container
                    self._process_container(settings_copy, per_account=True)

                # clean status dir off containers that have been deleted
                self._prune_deleted_containers(container_settings['account'],
                                               all_containers)
            else:
                self._process_container(container_settings)
        self._prune_status_files()

    def run_always(self):
        # Since we don't support reloading, the daemon should quit if there are
        # no containers configured
        if 'containers' not in self.conf or not self.conf['containers']:
            return
        self.log('debug', 'Entering the poll loop')
        while True:
            start = time.time()
            self._submit_containers()
            elapsed = time.time() - start
            if elapsed < self.poll_interval:
                eventlet.sleep(self.poll_interval - elapsed)

    def run_once(self):
        self._submit_containers()
        self.enumerator_queue.join()
class UtilizationMiddleware(object):
    def __init__(self, app, conf, *args, **kwargs):
        self.app = app
        self.conf = conf
        self.sample_account = '.transfer_record'
        self.aggregate_account = '.utilization'
        self.logger = get_logger(self.conf, log_route='utilization')
        self.container_ring = Ring('/etc/swift', ring_name='container')
        self.sample_rate = int(self.conf.get('sample_rate', 600))

    def swift_account(self, env, tenant_id):
        path = '/v1/%s/%s?format=json&prefix=account/' \
               % (self.aggregate_account, tenant_id)
        req = make_pre_authed_request(env, 'GET', path)
        req.environ['swift.proxy_access_log_made'] = True
        resp = req.get_response(self.app)
        if resp.status_int == 404:
            return None
        return json.loads(resp.body)[0]['name'].split('/')[1]

    def check_api_call(self, env):
        path = env.get('RAW_PATH_INFO', None)

        if env['REQUEST_METHOD'] == 'GET' and path == '/api/v1/metering':
            return True
        return False

    def get_account_info(self, env, account):
        path = '/v1/%s' % account
        req = make_pre_authed_request(env, 'HEAD', path)
        req.environ['swift.proxy_access_log_made'] = True
        resp = req.get_response(self.app)
        if not  resp.status_int // 100 == 2:
            return (0, 0, 0)
        return (int(resp.headers.get('x-account-container-count', 0)),
                int(resp.headers.get('x-account-object-count', 0)),
                int(resp.headers.get('x-account-bytes-used', 0)))

    def record_usage_data(self, env, tenant_id, account, timestamp):
        path = '/v1/%s/%s?prefix=usage/%d&format=json' % (
            self.aggregate_account, tenant_id, timestamp)
        req = make_pre_authed_request(env, 'GET', path)
        req.environ['swift.proxy_access_log_made'] = True
        resp = req.get_response(self.app)
        if resp.status_int == 404:
            return
        body = json.loads(resp.body)

        if len(body) != 0:
            return

        container_cnt, obj_cnt, bt_used = self.get_account_info(env, account)
        u_object = 'usage/%d/%d_%d_%d' % (timestamp, container_cnt,
                                          obj_cnt, bt_used)

        self.put_hidden_object(self.aggregate_account, tenant_id, u_object)

    def iter_objects(self, env, path, prefix, marker, end, count):
        path_with_params = '%s?format=json&prefix=%s' % (path, prefix)
        seg = ''
        force_break = False
        while count > 0:
            l = 1000 if count > 1000 else count
            count -= 1000
            rpath = path_with_params + ('&marker=%s' % marker) + (
                '&limit=%d' % l)
            req = make_pre_authed_request(env, 'GET', rpath)
            req.environ['swift.proxy_access_log_made'] = True
            resp = req.get_response(self.app)
            segments = json.loads(resp.body)
            for seg in segments:
                name = seg['name']
                record_ts = int(name.split('/')[1])
                if record_ts > end:
                    force_break = True
                    break
                yield name

            if force_break:
                break

            if len(segments) != l:
                break

            if segments:
                marker = seg['name']
            else:
                break

    def retrieve_utilization_data(self, env, tenant_id, start, end, count):
        path = '/v1/%s/%s' % (self.aggregate_account, tenant_id)
        data = dict()
        data['transfer'] = {}
        data['utilization'] = {}
        marker = 'transfer/%d' % start
        data['transfer'] = list()
        data['utilization']['container_count'] = 0
        data['utilization']['object_count'] = 0
        data['utilization']['bytes_used'] = 0

        bytes_recvs = dict()
        bytes_sents = dict()
        req_counts = dict()
        for o in self.iter_objects(env, path, 'transfer/', marker, end, count):
            bill_type = o.split('/')[2]
            bytes_recv, bytes_sent, req_cnt = o.split('/')[3].split('_')
            bytes_recvs[bill_type] = bytes_recvs.get(bill_type, 0) + int(
                bytes_recv)
            bytes_sents[bill_type] = bytes_sents.get(bill_type, 0) + int(
                bytes_sent)
            req_counts[bill_type] = req_counts.get(bill_type, 0) + int(req_cnt)

        for bill_type, bt_rv in bytes_recvs.items():
            d = dict()
            d['bill_type'] = int(bill_type)
            d['bytes_in'] = bt_rv
            d['bytes_out'] = bytes_sents[bill_type]
            d['req_count'] = req_counts[bill_type]
            data['transfer'].append(d)

        last = None
        marker = 'usage/%d' % start
        for o in self.iter_objects(env, path, 'usage/', marker, end, count):
            last = o

        if last:
            container_cnt, obj_cnt, bytes_used = last.split('/')[2].split('_')
            data['utilization']['container_count'] = container_cnt
            data['utilization']['object_count'] = obj_cnt
            data['utilization']['bytes_used'] = bytes_used
        return data

    def GET(self, req):
        start = req.params.get('start')
        tenant_id = req.params.get('tenantid')
        identity = req.environ.get('HTTP_X_IDENTITY_STATUS')
        roles = req.environ.get('keystone.identity', None)

        if identity == 'Invalid' or not roles or 'admin' not in roles['roles']:
            return Response(request=req, status="403 Forbidden",
                            body="Access Denied",
                            content_type="text/plain")

        if not tenant_id:
            return Response(request=req, status="400 Bad Request",
                            body="tenant_id parameter doesn't exist",
                            content_type="text/plain")

        if not start:
            return Response(request=req, status="400 Bad Request",
                            body="start parameter doesn't exist",
                            content_type="text/plain")

        end = req.params.get('end')
        if end is None:
            end = datetime.utcfromtimestamp(int(time.time())).isoformat()

        # check if tenant_id's users utilization was recorded.
        account = self.swift_account(req.environ.copy(), tenant_id)
        if not account:
            return Response(status="400 Bad Request",
                            content_type="text/plain",
                            body="This tenant_id never used.")

        try:
            # start time is "rounded down"
            start_ts = iso8601_to_timestamp(start)
            # end time is "rounded up"
            end_ts = iso8601_to_timestamp(end)
        except ValueError:
            return Response(status="400 Bad Request",
                            content_type="text/plain",
                            body="start or end time is incorrect format."
                                 "please check start or end parameter")
        if start_ts > end_ts:
            return Response(status="400 Bad Request",
                            content_type="text/plain",
                            body="start time must be before the end time")

        end_ts = (end_ts // 3600 + 1) * 3600
        start_ts = (start_ts // 3600) * 3600

        objsize = (end_ts - start_ts) / self.sample_rate

        content = self.retrieve_utilization_data(req.environ.copy(), tenant_id,
                                                 start_ts, end_ts, objsize)

        content['period_start'] = timestamp_to_iso8601(start_ts)
        content['period_end'] = timestamp_to_iso8601(end_ts)
        content['tenant_id'] = tenant_id
        content['swift_account'] = account
        return Response(request=req, body=json.dumps(content),
                        content_type="application/json")

    def __call__(self, env, start_response):
        self.logger.debug('Calling Utilization Middleware')

        req = Request(env)
        if self.check_api_call(env):
            return self.GET(req)(env, start_response)

        try:
            version, account, container, obj = req.split_path(2, 4, True)
        except ValueError:
            return self.app(env, start_response)

        remote_user = env.get('REMOTE_USER')
        if not remote_user or (isinstance(remote_user, basestring) and
                               remote_user.startswith('.wsgi')):
            self.logger.debug('### SKIP: REMOTE_USER is %s' % remote_user)
            return self.app(env, start_response)

        start_response_args = [None]
        input_proxy = InputProxy(env['wsgi.input'])
        env['wsgi.input'] = input_proxy

        def my_start_response(status, headers, exc_info=None):
            start_response_args[0] = (status, list(headers), exc_info)

        def iter_response(iterable):
            iterator = iter(iterable)
            try:
                chunk = next(iterator)
                while not chunk:
                    chunk = next(iterator)
            except StopIteration:
                chunk = ''

            if start_response_args[0]:
                start_response(*start_response_args[0])

            bytes_sent = 0
            try:
                while chunk:
                    bytes_sent += len(chunk)
                    yield chunk
                    chunk = next(iterator)
            finally:
                try:
                    self.publish_sample(env, account,
                                        input_proxy.bytes_received,
                                        bytes_sent)
                except Exception:
                    self.logger.exception('Failed to publish samples')

        try:
            iterable = self.app(env, my_start_response)
        except Exception:
            self.publish_sample(env, account, input_proxy.bytes_received, 0)
            raise
        else:
            return iter_response(iterable)

    def publish_sample(self, env, account, bytes_received, bytes_sent):
        timestamp = normalize_timestamp(time.time())
        sample_time = (float(
            timestamp) // self.sample_rate + 1) * self.sample_rate
        trans_id = env.get('swift.trans_id')
        tenant_id = env.get('HTTP_X_TENANT_ID')
        remote_addr = env.get('REMOTE_ADDR')

        # check if account information object is existed.
        if not self.swift_account(env, tenant_id):
            obj = 'account/%s' % account
            self.put_hidden_object(self.aggregate_account, tenant_id, obj)

        # recording account's storage usage data
        self.record_usage_data(env, tenant_id, account, sample_time)

        container = '%s_%s_%s' % (sample_time, tenant_id, account)

        obj = '%s/%d/%d/%s/%s' % (timestamp, bytes_received, bytes_sent,
                                  trans_id, remote_addr)
        self.put_hidden_object(self.sample_account, container, obj)

    def put_hidden_object(self, account, container, obj):
        hidden_path = '/%s/%s/%s' % (account, container, obj)
        self.logger.debug('put sample_path: %s' % hidden_path)
        part, nodes = self.container_ring.get_nodes(self.sample_account,
                                                    container)
        for node in nodes:
            ip = node['ip']
            port = node['port']
            dev = node['device']
            action_headers = dict()
            action_headers['user-agent'] = 'utilization'
            action_headers['X-Timestamp'] = normalize_timestamp(time.time())
            action_headers['referer'] = 'utilization-middleware'
            action_headers['x-size'] = '0'
            action_headers['x-content-type'] = "text/plain"
            action_headers['x-etag'] = 'd41d8cd98f00b204e9800998ecf8427e'

            conn = http_connect(ip, port, dev, part, 'PUT', hidden_path,
                                action_headers)
            response = conn.getresponse()
            response.read()
示例#24
0
class ListEndpointsMiddleware(object):
    """
    List endpoints for an object, account or container.

    See above for a full description.

    Uses configuration parameter `swift_dir` (default `/etc/swift`).

    :param app: The next WSGI filter or app in the paste.deploy
                chain.
    :param conf: The configuration dict for the middleware.
    """
    def __init__(self, app, conf):
        self.app = app
        self.logger = get_logger(conf, log_route='endpoints')
        self.swift_dir = conf.get('swift_dir', '/etc/swift')
        self.account_ring = Ring(self.swift_dir, ring_name='account')
        self.container_ring = Ring(self.swift_dir, ring_name='container')
        self.endpoints_path = conf.get('list_endpoints_path', '/endpoints/')
        if not self.endpoints_path.endswith('/'):
            self.endpoints_path += '/'

    def get_object_ring(self, policy_idx):
        """
        Get the ring object to use to handle a request based on its policy.

        :policy_idx: policy index as defined in swift.conf
        :returns: appropriate ring object
        """
        return POLICIES.get_object_ring(policy_idx, self.swift_dir)

    def __call__(self, env, start_response):
        request = Request(env)
        if not request.path.startswith(self.endpoints_path):
            return self.app(env, start_response)

        if request.method != 'GET':
            return HTTPMethodNotAllowed(req=request,
                                        headers={"Allow":
                                                 "GET"})(env, start_response)

        try:
            clean_path = request.path[len(self.endpoints_path) - 1:]
            account, container, obj = \
                split_path(clean_path, 1, 3, True)
        except ValueError:
            return HTTPBadRequest('No account specified')(env, start_response)

        if account is not None:
            account = unquote(account)
        if container is not None:
            container = unquote(container)
        if obj is not None:
            obj = unquote(obj)

        if obj is not None:
            # remove 'endpoints' from call to get_container_info
            stripped = request.environ
            if stripped['PATH_INFO'][:len(self.endpoints_path)] == \
                    self.endpoints_path:
                stripped['PATH_INFO'] = "/v1/" + \
                    stripped['PATH_INFO'][len(self.endpoints_path):]
            container_info = get_container_info(stripped,
                                                self.app,
                                                swift_source='LE')
            obj_ring = self.get_object_ring(container_info['storage_policy'])
            partition, nodes = obj_ring.get_nodes(account, container, obj)
            endpoint_template = 'http://{ip}:{port}/{device}/{partition}/' + \
                                '{account}/{container}/{obj}'
        elif container is not None:
            partition, nodes = self.container_ring.get_nodes(
                account, container)
            endpoint_template = 'http://{ip}:{port}/{device}/{partition}/' + \
                                '{account}/{container}'
        else:
            partition, nodes = self.account_ring.get_nodes(account)
            endpoint_template = 'http://{ip}:{port}/{device}/{partition}/' + \
                                '{account}'

        endpoints = []
        for node in nodes:
            endpoint = endpoint_template.format(ip=node['ip'],
                                                port=node['port'],
                                                device=node['device'],
                                                partition=partition,
                                                account=quote(account),
                                                container=quote(container
                                                                or ''),
                                                obj=quote(obj or ''))
            endpoints.append(endpoint)

        return Response(json.dumps(endpoints),
                        content_type='application/json')(env, start_response)
示例#25
0
class ListEndpointsMiddleware(object):
    """
    List endpoints for an object, account or container.

    See above for a full description.

    Uses configuration parameter `swift_dir` (default `/etc/swift`).

    :param app: The next WSGI filter or app in the paste.deploy
                chain.
    :param conf: The configuration dict for the middleware.
    """

    def __init__(self, app, conf):
        self.app = app
        self.logger = get_logger(conf, log_route='endpoints')
        swift_dir = conf.get('swift_dir', '/etc/swift')
        self.account_ring = Ring(swift_dir, ring_name='account')
        self.container_ring = Ring(swift_dir, ring_name='container')
        self.object_ring = Ring(swift_dir, ring_name='object')
        self.endpoints_path = conf.get('list_endpoints_path', '/endpoints/')
        if not self.endpoints_path.endswith('/'):
            self.endpoints_path += '/'

    def __call__(self, env, start_response):
        request = Request(env)

        if not request.path.startswith(self.endpoints_path):
            return self.app(env, start_response)

        if request.method != 'GET':
            return HTTPMethodNotAllowed(
                req=request, headers={"Allow": "GET"})(env, start_response)

        try:
            clean_path = request.path[len(self.endpoints_path) - 1:]
            account, container, obj = \
                split_path(clean_path, 1, 3, True)
        except ValueError:
            return HTTPBadRequest('No account specified')(env, start_response)

        if account is not None:
            account = unquote(account)
        if container is not None:
            container = unquote(container)
        if obj is not None:
            obj = unquote(obj)

        if obj is not None:
            partition, nodes = self.object_ring.get_nodes(
                account, container, obj)
            endpoint_template = 'http://{ip}:{port}/{device}/{partition}/' + \
                                '{account}/{container}/{obj}'
        elif container is not None:
            partition, nodes = self.container_ring.get_nodes(
                account, container)
            endpoint_template = 'http://{ip}:{port}/{device}/{partition}/' + \
                                '{account}/{container}'
        else:
            partition, nodes = self.account_ring.get_nodes(
                account)
            endpoint_template = 'http://{ip}:{port}/{device}/{partition}/' + \
                                '{account}'

        endpoints = []
        for node in nodes:
            endpoint = endpoint_template.format(
                ip=node['ip'],
                port=node['port'],
                device=node['device'],
                partition=partition,
                account=quote(account),
                container=quote(container or ''),
                obj=quote(obj or ''))
            endpoints.append(endpoint)

        return Response(json.dumps(endpoints),
                        content_type='application/json')(env, start_response)
class ListEndpointsMiddleware(object):
    """
    List endpoints for an object, account or container.

    See above for a full description.

    Uses configuration parameter `swift_dir` (default `/etc/swift`).

    :param app: The next WSGI filter or app in the paste.deploy
                chain.
    :param conf: The configuration dict for the middleware.
    """
    def __init__(self, app, conf):
        self.app = app
        self.logger = get_logger(conf, log_route='endpoints')
        self.swift_dir = conf.get('swift_dir', '/etc/swift')
        self.account_ring = Ring(self.swift_dir, ring_name='account')
        self.container_ring = Ring(self.swift_dir, ring_name='container')
        self.endpoints_path = conf.get('list_endpoints_path', '/endpoints/')
        if not self.endpoints_path.endswith('/'):
            self.endpoints_path += '/'
        self.default_response_version = 1.0
        self.response_map = {
            1.0: self.v1_format_response,
            2.0: self.v2_format_response,
        }

    def get_object_ring(self, policy_idx):
        """
        Get the ring object to use to handle a request based on its policy.

        :policy_idx: policy index as defined in swift.conf
        :returns: appropriate ring object
        """
        return POLICIES.get_object_ring(policy_idx, self.swift_dir)

    def _parse_version(self, raw_version):
        err_msg = 'Unsupported version %r' % raw_version
        try:
            version = float(raw_version.lstrip('v'))
        except ValueError:
            raise ValueError(err_msg)
        if not any(version == v for v in RESPONSE_VERSIONS):
            raise ValueError(err_msg)
        return version

    def _parse_path(self, request):
        """
        Parse path parts of request into a tuple of version, account,
        container, obj.  Unspecified path parts are filled in as None,
        except version which is always returned as a float using the
        configured default response version if not specified in the
        request.

        :param request: the swob request

        :returns: parsed path parts as a tuple with version filled in as
                  configured default response version if not specified.
        :raises: ValueError if path is invalid, message will say why.
        """
        clean_path = request.path[len(self.endpoints_path) - 1:]
        # try to peel off version
        try:
            raw_version, rest = split_path(clean_path, 1, 2, True)
        except ValueError:
            raise ValueError('No account specified')
        try:
            version = self._parse_version(raw_version)
        except ValueError:
            if raw_version.startswith('v') and '_' not in raw_version:
                # looks more like a invalid version than an account
                raise
            # probably no version specified, but if the client really
            # said /endpoints/v_3/account they'll probably be sorta
            # confused by the useless response and lack of error.
            version = self.default_response_version
            rest = clean_path
        else:
            rest = '/' + rest if rest else '/'
        try:
            account, container, obj = split_path(rest, 1, 3, True)
        except ValueError:
            raise ValueError('No account specified')
        return version, account, container, obj

    def v1_format_response(self, req, endpoints, **kwargs):
        return Response(json.dumps(endpoints), content_type='application/json')

    def v2_format_response(self, req, endpoints, storage_policy_index,
                           **kwargs):
        resp = {
            'endpoints': endpoints,
            'headers': {},
        }
        if storage_policy_index is not None:
            resp['headers']['X-Backend-Storage-Policy-Index'] = str(
                storage_policy_index)
        return Response(json.dumps(resp), content_type='application/json')

    def __call__(self, env, start_response):
        request = Request(env)
        if not request.path.startswith(self.endpoints_path):
            return self.app(env, start_response)

        if request.method != 'GET':
            return HTTPMethodNotAllowed(req=request,
                                        headers={"Allow":
                                                 "GET"})(env, start_response)

        try:
            version, account, container, obj = self._parse_path(request)
        except ValueError as err:
            return HTTPBadRequest(str(err))(env, start_response)

        if account is not None:
            account = unquote(account)
        if container is not None:
            container = unquote(container)
        if obj is not None:
            obj = unquote(obj)

        storage_policy_index = None
        if obj is not None:
            container_info = get_container_info(
                {'PATH_INFO': '/v1/%s/%s' % (account, container)},
                self.app,
                swift_source='LE')
            storage_policy_index = container_info['storage_policy']
            obj_ring = self.get_object_ring(storage_policy_index)
            partition, nodes = obj_ring.get_nodes(account, container, obj)
            endpoint_template = 'http://{ip}:{port}/{device}/{partition}/' + \
                                '{account}/{container}/{obj}'
        elif container is not None:
            partition, nodes = self.container_ring.get_nodes(
                account, container)
            endpoint_template = 'http://{ip}:{port}/{device}/{partition}/' + \
                                '{account}/{container}'
        else:
            partition, nodes = self.account_ring.get_nodes(account)
            endpoint_template = 'http://{ip}:{port}/{device}/{partition}/' + \
                                '{account}'

        endpoints = []
        for node in nodes:
            endpoint = endpoint_template.format(ip=node['ip'],
                                                port=node['port'],
                                                device=node['device'],
                                                partition=partition,
                                                account=quote(account),
                                                container=quote(container
                                                                or ''),
                                                obj=quote(obj or ''))
            endpoints.append(endpoint)

        resp = self.response_map[version](
            request,
            endpoints=endpoints,
            storage_policy_index=storage_policy_index)
        return resp(env, start_response)
示例#27
0
class ListEndpointsMiddleware(object):
    """
    List endpoints for an object, account or container.

    See above for a full description.

    Uses configuration parameter `swift_dir` (default `/etc/swift`).

    :param app: The next WSGI filter or app in the paste.deploy
                chain.
    :param conf: The configuration dict for the middleware.
    """
    def __init__(self, app, conf):
        self.app = app
        self.logger = get_logger(conf, log_route='endpoints')
        swift_dir = conf.get('swift_dir', '/etc/swift')
        self.account_ring = Ring(swift_dir, ring_name='account')
        self.container_ring = Ring(swift_dir, ring_name='container')
        self.object_ring = Ring(swift_dir, ring_name='object')
        self.endpoints_path = conf.get('list_endpoints_path', '/endpoints/')
        if not self.endpoints_path.endswith('/'):
            self.endpoints_path += '/'

    def __call__(self, env, start_response):
        request = Request(env)

        if not request.path.startswith(self.endpoints_path):
            return self.app(env, start_response)

        if request.method != 'GET':
            return HTTPMethodNotAllowed(req=request,
                                        headers={"Allow":
                                                 "GET"})(env, start_response)

        try:
            clean_path = request.path[len(self.endpoints_path) - 1:]
            account, container, obj = \
                split_path(clean_path, 1, 3, True)
        except ValueError:
            return HTTPBadRequest('No account specified')(env, start_response)

        if account is not None:
            account = unquote(account)
        if container is not None:
            container = unquote(container)
        if obj is not None:
            obj = unquote(obj)

        if obj is not None:
            partition, nodes = self.object_ring.get_nodes(
                account, container, obj)
            endpoint_template = 'http://{ip}:{port}/{device}/{partition}/' + \
                                '{account}/{container}/{obj}'
        elif container is not None:
            partition, nodes = self.container_ring.get_nodes(
                account, container)
            endpoint_template = 'http://{ip}:{port}/{device}/{partition}/' + \
                                '{account}/{container}'
        else:
            partition, nodes = self.account_ring.get_nodes(account)
            endpoint_template = 'http://{ip}:{port}/{device}/{partition}/' + \
                                '{account}'

        endpoints = []
        for node in nodes:
            endpoint = endpoint_template.format(ip=node['ip'],
                                                port=node['port'],
                                                device=node['device'],
                                                partition=partition,
                                                account=quote(account),
                                                container=quote(container
                                                                or ''),
                                                obj=quote(obj or ''))
            endpoints.append(endpoint)

        return Response(json.dumps(endpoints),
                        content_type='application/json')(env, start_response)
示例#28
0
class ContainerCrawler(object):
    def __init__(self, conf, handler_class, logger=None):
        self.logger = logger
        self.conf = conf
        self.root = conf['devices']
        self.bulk = conf.get('bulk_process', False)
        self.interval = 10
        self.swift_dir = '/etc/swift'
        self.container_ring = Ring(self.swift_dir, ring_name='container')

        self.status_dir = conf['status_dir']
        self.myips = whataremyips('0.0.0.0')
        self.items_chunk = conf['items_chunk']
        self.poll_interval = conf.get('poll_interval', 5)
        self.handler_class = handler_class

        if not self.bulk:
            self._init_workers(conf)

        self.log('debug', 'Created the Container Crawler instance')

    def _init_workers(self, conf):
        self.workers = conf.get('workers', 10)
        self.pool = eventlet.GreenPool(self.workers)
        self.work_queue = eventlet.queue.Queue(self.workers * 2)

        # max_size=None means a Queue is infinite
        self.error_queue = eventlet.queue.Queue(maxsize=None)
        self.stats_queue = eventlet.queue.Queue(maxsize=None)
        for _ in range(0, self.workers):
            self.pool.spawn_n(self._worker)

    def _worker(self):
        while 1:
            work = self.work_queue.get()
            if not work:
                self.work_queue.task_done()
                break
            row, handler = work
            try:
                handler.handle(row)
            except Exception as e:
                self.error_queue.put((row, e))
            self.work_queue.task_done()

    def _stop(self):
        for _ in range(0, self.workers):
            self.work_queue.put(None)
        self.pool.waitall()

    def _check_errors(self):
        if self.error_queue.empty():
            return

        while not self.error_queue.empty():
            row, error = self.error_queue.get()
            self.log('error',
                     'Failed to handle row %s: %r' % (row['ROWID'], error))
        raise RuntimeError('Failed to process rows')

    def log(self, level, message):
        if not self.logger:
            return
        getattr(self.logger, level)(message)

    def get_broker(self, account, container, part, node):
        db_hash = hash_path(account, container)
        db_dir = storage_directory(DATADIR, part, db_hash)
        db_path = os.path.join(self.root, node['device'], db_dir,
                               db_hash + '.db')
        return ContainerBroker(db_path, account=account, container=container)

    def submit_items(self, handler, rows):
        if self.bulk:
            handler.handle(rows)
            return

        for row in rows:
            self.work_queue.put((row, handler))
        self.work_queue.join()
        self._check_errors()

    def process_items(self, handler, rows, nodes_count, node_id):
        owned_rows = filter(lambda row: row['ROWID'] % nodes_count == node_id,
                            rows)
        self.submit_items(handler, owned_rows)

        verified_rows = filter(
            lambda row: row['ROWID'] % nodes_count != node_id, rows)
        self.submit_items(handler, verified_rows)

    def handle_container(self, settings):
        part, container_nodes = self.container_ring.get_nodes(
            settings['account'], settings['container'])
        nodes_count = len(container_nodes)
        handler = self.handler_class(self.status_dir, settings)

        for index, node in enumerate(container_nodes):
            if not is_local_device(self.myips, None, node['ip'], node['port']):
                continue
            broker = self.get_broker(settings['account'],
                                     settings['container'], part, node)
            broker_info = broker.get_info()
            last_row = handler.get_last_row(broker_info['id'])
            if not last_row:
                last_row = 0
            try:
                items = broker.get_items_since(last_row, self.items_chunk)
            except DatabaseConnectionError:
                continue
            if items:
                self.process_items(handler, items, nodes_count, index)
                handler.save_last_row(items[-1]['ROWID'], broker_info['id'])
            return

    def run_always(self):
        # Since we don't support reloading, the daemon should quit if there are
        # no containers configured
        if 'containers' not in self.conf or not self.conf['containers']:
            return
        self.log('debug', 'Entering the poll loop')
        while True:
            start = time.time()
            self.run_once()
            elapsed = time.time() - start
            if elapsed < self.poll_interval:
                time.sleep(self.poll_interval - elapsed)

    def run_once(self):
        for container_settings in self.conf['containers']:
            try:
                self.handle_container(container_settings)
            except Exception as e:
                account = container_settings.get('account', 'N/A')
                container = container_settings.get('container', 'N/A')
                self.log(
                    'error', "Failed to process %s/%s with %s: %s" %
                    (account, container, self.handler_class, repr(e)))
                self.log('error', traceback.format_exc(e))