Exemplo n.º 1
0
 def __init__(self, conf, **kwargs):
     self.conf = conf
     self.ns = conf['namespace']
     self.logger = get_logger(conf)
     self.directory = DirectoryClient(conf, logger=self.logger, **kwargs)
     self.rdir = RdirClient(conf, logger=self.logger, **kwargs)
     self._cs = None
Exemplo n.º 2
0
 def setUp(self):
     super(TestMeta2Indexing, self).setUp()
     self.rdir_client = RdirClient(self.conf)
     self.directory_client = DirectoryClient(self.conf)
     self.container_client = ContainerClient(self.conf)
     self.containers = [random_str(14) for _ in range(0, randint(1, 10))]
     self.containers_svcs = {}
     self.event_agent_name = 'event-agent-1'
Exemplo n.º 3
0
 def __init__(self, conf, rdir_client=None, **kwargs):
     self.conf = conf
     self.ns = conf['namespace']
     self.logger = get_logger(conf)
     self.directory = DirectoryClient(conf, logger=self.logger, **kwargs)
     if rdir_client:
         self.rdir = rdir_client
     else:
         self.rdir = RdirClient(conf, logger=self.logger, **kwargs)
     self._cs = None
     self._pool_options = None
Exemplo n.º 4
0
    def __init__(self, volume_path, conf, pool_manager=None):
        """
        Initializes an Indexing worker for indexing meta2 databases.

        Possible values of conf relating to this worker are:
        - interval: (int) in sec time between two full scans. Default: half an
                    hour.
        - report_interval: (int) in sec, time between two reports: Default: 300
        - scanned_per_second: (int) maximum number of indexed databases /s.
        - try_removing_faulty_indexes : In the event where we encounter a
            database that's not supposed to be handled by this volume, attempt
            to remove it from this volume rdir index if it exists
            WARNING: The decision is based off of a proxy response, that could
            be affected by cache inconsistencies for example, use at your own
            risk. Default: False

        :param volume_path: The volume path to be indexed
        :param conf: The configuration to be passed to the needed services
        :param pool_manager: A connection pool manager. If none is given, a
                new one with a default size of 10 will be created.
        """
        self.logger = get_logger(conf)
        self._stop = False
        self.volume = volume_path
        self.success_nb = 0
        self.failed_nb = 0
        self.full_scan_nb = 0
        self.last_report_time = 0
        self.last_scan_time = 0
        self.last_index_time = 0
        self.start_time = 0
        self.indexed_since_last_report = 0
        self.scans_interval = int_value(
            conf.get('interval'), 1800)
        self.report_interval = int_value(
            conf.get('report_interval'), 300)
        self.max_indexed_per_second = int_value(
            conf.get('scanned_per_second'), 3000)
        self.namespace, self.volume_id = check_volume_for_service_type(
            self.volume, "meta2")
        self.attempt_bad_index_removal = boolean_value(
            conf.get('try_removing_faulty_indexes', False)
        )

        if not pool_manager:
            pool_manager = get_pool_manager(pool_connections=10)
        self.index_client = RdirClient(conf, logger=self.logger,
                                       pool_manager=pool_manager)
        self.dir_client = DirectoryClient(conf, logger=self.logger,
                                          pool_manager=pool_manager)
Exemplo n.º 5
0
    def __init__(self, namespace, **kwargs):
        ep_parts = ["http:/",
                    load_namespace_conf(namespace).get('proxy'),
                    "v3.0",
                    namespace,
                    "content"]

        super(CheckMeta2, self).__init__(namespace, "meta2",
                                         endpoint="/".join(ep_parts), **kwargs)

        self.account = AccountClient({"namespace": self.ns})
        self.container = ContainerClient({"namespace": self.ns})
        self.directory = DirectoryClient({"namespace": self.ns})
        self.reference = random_buffer('0123456789ABCDEF', 64)
Exemplo n.º 6
0
    def __init__(self, namespace, **kwargs):
        """
        Initialize the object storage API.

        :param namespace: name of the namespace to interract with
        :type namespace: `str`

        :keyword connection_timeout: connection timeout towards rawx services
        :type connection_timeout: `float` seconds
        :keyword read_timeout: timeout for rawx responses and data reads from
            the caller (when uploading)
        :type read_timeout: `float` seconds
        :keyword write_timeout: timeout for rawx write requests
        :type write_timeout: `float` seconds
        """
        self.namespace = namespace
        self.connection_timeout = utils.float_value(
            kwargs.get("connection_timeout"), None)
        self.read_timeout = utils.float_value(kwargs.get("read_timeout"), None)
        self.write_timeout = utils.float_value(kwargs.get("write_timeout"),
                                               None)

        # FIXME: share session between all the clients
        self.directory = DirectoryClient({"namespace": self.namespace},
                                         **kwargs)
        self.account = AccountClient({"namespace": self.namespace}, **kwargs)
        self.container = ContainerClient({"namespace": self.namespace},
                                         **kwargs)
Exemplo n.º 7
0
 def __init__(self, conf, directory_client=None, **kwargs):
     super(RdirClient, self).__init__(service_type='rdir', **kwargs)
     self.directory = directory_client or DirectoryClient(conf, **kwargs)
     self.ns = conf['namespace']
     self._addr_cache = dict()
     self.conf = conf
     self._cs = None
Exemplo n.º 8
0
def make_client(instance):
    from oio.directory.client import DirectoryClient

    endpoint = instance.get_endpoint('directory')
    client = DirectoryClient({"namespace": instance.namespace},
                             endpoint=endpoint)
    return client
Exemplo n.º 9
0
    def __init__(self, namespace, logger=None, **kwargs):
        """
        Initialize the object storage API.

        :param namespace: name of the namespace to interract with
        :type namespace: `str`

        :keyword connection_timeout: connection timeout towards rawx services
        :type connection_timeout: `float` seconds
        :keyword read_timeout: timeout for rawx responses and data reads from
            the caller (when uploading)
        :type read_timeout: `float` seconds
        :keyword write_timeout: timeout for rawx write requests
        :type write_timeout: `float` seconds
        :keyword pool_manager: a pooled connection manager that will be used
            for all HTTP based APIs (except rawx)
        :type pool_manager: `urllib3.PoolManager`
        """
        self.namespace = namespace
        conf = {"namespace": self.namespace}
        self.logger = logger or get_logger(conf)
        self.timeouts = {tok: float_value(tov, None)
                         for tok, tov in kwargs.items()
                         if tok in self.__class__.TIMEOUT_KEYS}

        from oio.account.client import AccountClient
        from oio.container.client import ContainerClient
        from oio.directory.client import DirectoryClient
        self.directory = DirectoryClient(conf, logger=self.logger, **kwargs)
        self.container = ContainerClient(conf, logger=self.logger, **kwargs)

        # In AccountClient, "endpoint" is the account service, not the proxy
        acct_kwargs = kwargs.copy()
        acct_kwargs["proxy_endpoint"] = acct_kwargs.pop("endpoint", None)
        self.account = AccountClient(conf, logger=self.logger, **acct_kwargs)
Exemplo n.º 10
0
def run(args):
    pool = get_pool_manager()

    v = vars(args)

    dirclient = DirectoryClient(v)
    backend = AccountBackend(v)

    for entry, _, _, partial in full_list(backend, prefix=args.prefix):
        if partial:
            if args.verbose:
                print(":%s: partial, skip" % entry)
            continue
        try:
            dirclient.show(account=ACCOUNT, reference=entry)
            if args.verbose:
                print("%s: OK" % entry)
            continue
        except NotFound:
            pass
        except Exception as exc:
            print("Exception not managed for %s: %s" % (entry, str(exc)))
            continue
        print("%s: meta2 not found" % entry)
        if args.dry_run:
            continue

        data = {"dtime": time(), "name": entry}
        # post event to Account service
        res = pool.request('POST',
                           HOST +
                           '/v1.0/account/container/update?id=%s' % ACCOUNT,
                           headers={'Content-Type': 'application/json'},
                           body=json.dumps(data))
        if res.status_int / 100 != 2:
            print(res.status)
Exemplo n.º 11
0
class TestDirectoryAPI(BaseTestCase):
    def setUp(self):
        super(TestDirectoryAPI, self).setUp()
        self.api = DirectoryClient({'namespace': self.ns}, endpoint=self.uri)

    def _create(self, name, metadata=None):
        return self.api.create(self.account, name, properties=metadata)

    def _delete(self, name):
        self.api.delete(self.account, name)

    def _clean(self, name, clear=False):
        if clear:
            # must clean properties before
            self.api.del_properties(self.account, name, [])
        self._delete(name)

    def _get_properties(self, name, properties=None):
        return self.api.get_properties(self.account,
                                       name,
                                       properties=properties)

    def _set_properties(self, name, properties=None):
        return self.api.set_properties(self.account,
                                       name,
                                       properties=properties)

    def test_list(self):
        # get on unknown reference
        name = random_str(32)
        self.assertRaises(exc.NotFound, self.api.list, self.account, name)

        self._create(name)
        # get on existing reference
        res = self.api.list(self.account, name)
        self.assertIsNot(res['dir'], None)
        self.assertIsNot(res['srv'], None)
        self.assertEqual(res['name'], name)
        self.assertEqual(res['account'], self.account)

        self._delete(name)
        # get on deleted reference
        self.assertRaises(exc.NotFound, self.api.list, self.account, name)

    def test_show_by_cid(self):
        name = random_str(32)

        self._create(name)

        res = self.api.list(cid=cid_from_name(self.account, name))
        self.assertIsNotNone(res['dir'])
        self.assertIsNotNone(res['srv'])
        self.assertEqual(res['name'], name)
        self.assertEqual(res['account'], self.account)

        self._delete(name)

    def test_create(self):
        name = random_str(32)
        res = self._create(name)
        self.assertEqual(res, True)

        # second create
        res = self._create(name)
        self.assertEqual(res, False)

        # clean
        self._delete(name)

    def test_create_properties(self):
        name = random_str(32)

        metadata = {
            random_str(32): random_str(32),
            random_str(32): random_str(32),
        }
        res = self._create(name, metadata)
        self.assertEqual(res, True)

        data = self._get_properties(name)

        self.assertEqual(data['properties'], metadata)

        # clean
        self._clean(name, True)

    def test_create_without_account(self):
        account = random_str(32)
        name = random_str(32)
        account_client = AccountClient(self.conf)

        self.assertRaises(exc.NotFound, account_client.account_show, account)
        self.api.create(account, name)
        time.sleep(0.5)  # ensure account event have been processed
        self.assertEqual(account_client.account_show(account)['id'], account)

        # clean
        self.api.delete(account, name)
        account_client.account_delete(account)

    def test_delete(self):
        name = random_str(32)

        # delete on unknown reference
        self.assertRaises(exc.NotFound, self.api.delete, self.account, name)

        res = self._create(name)
        self.assertEqual(res, True)
        # delete on existing reference
        self._delete(name)

        # verify deleted
        self.assertRaises(exc.NotFound, self.api.list, self.account, name)

        # second delete
        self.assertRaises(exc.NotFound, self.api.delete, self.account, name)

        # verify deleted
        self.assertRaises(exc.NotFound, self.api.list, self.account, name)

    def test_get_properties(self):
        name = random_str(32)

        # get_properties on unknown reference
        self.assertRaises(exc.NotFound, self.api.get_properties, self.account,
                          name)

        res = self._create(name)
        self.assertEqual(res, True)

        # get_properties on existing reference
        data = self.api.get_properties(self.account, name)
        self.assertEqual(data['properties'], {})

        # get_properties
        metadata = {
            random_str(32): random_str(32),
            random_str(32): random_str(32),
        }
        self._set_properties(name, metadata)

        data = self.api.get_properties(self.account, name)
        self.assertEqual(data['properties'], metadata)

        # get_properties specify key
        key, old_val = metadata.popitem()

        data = self.api.get_properties(self.account, name, [key])
        self.assertEqual(data['properties'], {key: old_val})

        # clean
        self._clean(name, True)

        # get_properties on deleted reference
        self.assertRaises(exc.NotFound, self.api.get_properties, self.account,
                          name)

    def test_set_properties(self):
        name = random_str(32)

        metadata = {
            random_str(32): random_str(32),
            random_str(32): random_str(32),
        }

        # set_properties on unknown reference
        self.assertRaises(exc.NotFound, self.api.set_properties, self.account,
                          name, metadata)

        res = self._create(name)
        self.assertEqual(res, True)

        # set_properties on existing reference
        self.api.set_properties(self.account, name, metadata)
        data = self._get_properties(name)
        self.assertEqual(data['properties'], metadata)

        # set_properties
        key = random_str(32)
        value = random_str(32)
        metadata2 = {key: value}
        self._set_properties(name, metadata2)
        metadata.update(metadata2)

        data = self._get_properties(name)
        self.assertEqual(data['properties'], metadata)

        # set_properties overwrite key
        key, _ = metadata.popitem()
        value = random_str(32)
        metadata3 = {key: value}

        metadata.update(metadata3)
        self.api.set_properties(self.account, name, metadata3)
        data = self._get_properties(name)
        self.assertEqual(data['properties'], metadata)

        # set_properties overwrite key with empty value
        key = list(metadata.keys())[0]
        metadata4 = {key: ''}

        del metadata[key]
        self.api.set_properties(self.account, name, metadata4)
        data = self._get_properties(name)
        self.assertEqual(data['properties'], metadata)

        # clean
        self._clean(name, True)

        # set_properties on deleted reference
        self.assertRaises(exc.NotFound, self.api.set_properties, self.account,
                          name, metadata)

    def test_del_properties(self):
        name = random_str(32)

        metadata = {
            random_str(32): random_str(32),
            random_str(32): random_str(32),
        }

        # del_properties on unknown reference
        self.assertRaises(exc.NotFound, self.api.del_properties, self.account,
                          name, [])

        res = self._create(name, metadata)
        self.assertEqual(res, True)

        key, _ = metadata.popitem()

        # del_properties on existing reference
        self.api.del_properties(self.account, name, [key])
        data = self._get_properties(name)
        self.assertEqual(data['properties'], metadata)

        # del_properties on unknown key
        key = random_str(32)
        # We do not check if a property exists before deleting it
        # self.assertRaises(
        #     exc.NotFound, self.api.del_properties, self.account, name,
        #     [key])
        self.api.del_properties(self.account, name, [key])

        data = self._get_properties(name)
        self.assertEqual(data['properties'], metadata)

        # clean
        self._clean(name, True)

        # del_properties on deleted reference
        self.assertRaises(exc.NotFound, self.api.set_properties, self.account,
                          name, metadata)

    def test_list_services(self):
        # list_services on unknown reference
        name = random_str(32)
        echo = 'echo'
        self.assertRaises(exc.NotFound,
                          self.api.list,
                          self.account,
                          name,
                          service_type=echo)

        self._create(name)
        # list_services on existing reference
        res = self.api.list(self.account, name, service_type=echo)
        self.assertIsNot(res['dir'], None)
        self.assertIsNot(res['srv'], None)
        self.assertEqual(res['name'], name)
        self.assertEqual(res['account'], self.account)

        self._delete(name)
        # get on deleted reference
        self.assertRaises(exc.NotFound, self.api.list, self.account, name)

    def test_link_rdir_to_zero_scored_rawx(self):
        disp = RdirDispatcher({'namespace': self.ns},
                              pool_manager=self.http_pool)

        # Register a service, with score locked to zero
        new_rawx = self._srv('rawx', {'tag.loc': _fake_location})
        new_rawx['score'] = 0
        self._register_srv(new_rawx)
        self._reload_proxy()

        all_rawx = disp.assign_all_rawx()
        all_rawx_keys = [x['addr'] for x in all_rawx]
        self.assertIn(new_rawx['addr'], all_rawx_keys)
        rdir_addr = disp.rdir._get_rdir_addr(new_rawx['addr'])
        self.assertIsNotNone(rdir_addr)
        try:
            self.api.unlink(RDIR_ACCT, new_rawx['addr'], 'rdir')
            self.api.delete(RDIR_ACCT, new_rawx['addr'])
            # self._flush_cs('rawx')
        except Exception:
            pass

    def test_link_rdir_unachievable_min_dist(self):
        disp = RdirDispatcher({'namespace': self.ns},
                              pool_manager=self.http_pool)

        # Register a service, with score locked to zero
        new_rawx = self._srv('rawx', {'tag.loc': _fake_location})
        new_rawx['score'] = 90
        self._register_srv(new_rawx)
        self._reload_proxy()

        self.assertRaises(exc.OioException, disp.assign_all_rawx, min_dist=4)
        all_rawx, _ = disp.get_assignments('rawx')
        all_rawx_keys = [x['addr'] for x in all_rawx]
        self.assertIn(new_rawx['addr'], all_rawx_keys)
        self.assertRaises(exc.VolumeException, disp.rdir._get_rdir_addr,
                          new_rawx['addr'])

    def _generate_services(self, types, score=50):
        all_srvs = dict()
        for type_, count in types.items():
            srvs = [
                self._srv(type_, {'tag.loc': 'whatever%d' % i})
                for i in range(count)
            ]
            for srv in srvs:
                srv['score'] = score
                srv['id'] = _make_id(self.ns, type_, srv['addr'])
            all_srvs[type_] = srvs
        return all_srvs

    def _test_link_rdir_fail_to_force(self, side_effects, expected_exc):
        disp = RdirDispatcher({'namespace': self.ns})

        # Mock rdir and rawx services so we do not pollute following tests
        all_srvs = self._generate_services({'rdir': 3, 'rawx': 3})

        def _all_services(type_, *args, **kwargs):
            """Return all mocked services of specified type"""
            return all_srvs[type_]

        def _poll(*args, **kwargs):
            """Pick one mocked random service"""
            return [random.choice(all_srvs['rdir'])]

        disp.cs.all_services = Mock(side_effect=_all_services)
        disp.cs.poll = Mock(side_effect=_poll)

        # Mock the check method to avoid calling the proxy
        disp.directory.list = Mock(side_effect=exc.NotFound)

        # Mock the assignation methods so we can check the calls
        disp._smart_link_rdir = \
            Mock(wraps=disp._smart_link_rdir)
        disp.directory.force = \
            Mock(wraps=disp.directory.force,
                 side_effect=side_effects)

        # Expect an exception since some assignations will fail
        self.assertRaises(expected_exc, disp.assign_all_rawx, max_attempts=1)

        # But ensure all calls have been made
        link_calls = [
            call(rawx['addr'],
                 ANY,
                 max_per_rdir=ANY,
                 max_attempts=1,
                 min_dist=ANY,
                 service_type='rawx',
                 reassign=False) for rawx in all_srvs['rawx']
        ]
        disp._smart_link_rdir.assert_has_calls(link_calls)
        force_calls = \
            [call(RDIR_ACCT, rawx['addr'], 'rdir', ANY, autocreate=True,
                  replace=ANY)
             for rawx in all_srvs['rawx']]
        disp.directory.force.assert_has_calls(force_calls)

    def test_link_rdir_fail_to_force_one(self):
        """
        Verify that the failure of one 'force' operation does
        not break the whole operation.
        """
        self._test_link_rdir_fail_to_force(
            [exc.ServiceBusy(message='Failed :(', status=503), None, None],
            exc.ServiceBusy)

    def test_link_rdir_fail_to_force_several(self):
        """
        Verify that the failure of two 'force' operations does
        not break the whole operation.
        """
        self._test_link_rdir_fail_to_force([
            exc.ServiceBusy(message='Failed :(', status=503),
            exc.OioTimeout('Timeout :('), None
        ], exc.OioException)

    def test_rdir_repartition(self):
        # FIXME(FVE): this test will fail if run after self._flush_cs('rawx')
        client = RdirDispatcher({'namespace': self.ns})
        self._reload_proxy()
        all_rawx = client.assign_all_rawx()
        self.assertGreater(len(all_rawx), 0)
        by_rdir = dict()
        total = 0
        for rawx in all_rawx:
            count = by_rdir.get(rawx['rdir']['addr'], 0)
            total += 1
            by_rdir[rawx['rdir']['addr']] = count + 1
        avg = total / float(len(by_rdir))
        print("Ideal number of bases per rdir: ", avg)
        print("Current repartition: ", by_rdir)
        for count in by_rdir.values():
            self.assertLessEqual(count, avg + 1)
Exemplo n.º 12
0
class RdirDispatcher(object):
    def __init__(self, conf, **kwargs):
        self.conf = conf
        self.ns = conf['namespace']
        self.logger = get_logger(conf)
        self.directory = DirectoryClient(conf, logger=self.logger, **kwargs)
        self.rdir = RdirClient(conf, logger=self.logger, **kwargs)
        self._cs = None

    @property
    def cs(self):
        if not self._cs:
            self._cs = ConscienceClient(self.conf, logger=self.logger)
        return self._cs

    def get_assignation(self):
        all_rawx = self.cs.all_services('rawx')
        all_rdir = self.cs.all_services('rdir', True)
        by_id = {_make_id(self.ns, 'rdir', x['addr']): x for x in all_rdir}

        for rawx in all_rawx:
            try:
                # Verify that there is no rdir linked
                resp = self.directory.list(RDIR_ACCT,
                                           rawx['addr'],
                                           service_type='rdir')
                rdir_host = _filter_rdir_host(resp)
                try:
                    rawx['rdir'] = by_id[_make_id(self.ns, 'rdir', rdir_host)]
                except KeyError:
                    self.logger.warn("rdir %s linked to rawx %s seems down",
                                     rdir_host, rawx['addr'])
                    rawx['rdir'] = {"addr": rdir_host, "tags": dict()}
                    by_id[_make_id(self.ns, 'rdir', rdir_host)] = rawx['rdir']
            except NotFound:
                self.logger.info("No rdir linked to %s", rawx['addr'])
        return all_rawx, all_rdir

    def assign_all_rawx(self, max_per_rdir=None):
        """
        Find a rdir service for all rawx that don't have one already.

        :param max_per_rdir: maximum number or rawx services that an rdir
                             can be linked to
        :type max_per_rdir: `int`
        """
        all_rawx = self.cs.all_services('rawx')
        all_rdir = self.cs.all_services('rdir', True)
        if len(all_rdir) <= 0:
            raise ServiceUnavailable("No rdir service found in %s" % self.ns)

        by_id = {_make_id(self.ns, 'rdir', x['addr']): x for x in all_rdir}

        for rawx in all_rawx:
            try:
                # Verify that there is no rdir linked
                resp = self.directory.list(RDIR_ACCT,
                                           rawx['addr'],
                                           service_type='rdir')
                rdir_host = _filter_rdir_host(resp)
                try:
                    rawx['rdir'] = by_id[_make_id(self.ns, 'rdir', rdir_host)]
                except KeyError:
                    self.logger.warn("rdir %s linked to rawx %s seems down",
                                     rdir_host, rawx['addr'])
            except (NotFound, ClientException):
                if rawx['score'] <= 0:
                    self.logger.warn(
                        "rawx %s has score %s, and thus cannot be"
                        " affected a rdir (load balancer "
                        "limitation)", rawx['addr'], rawx['score'])
                    continue
                rdir = self._smart_link_rdir(rawx['addr'], all_rdir,
                                             max_per_rdir)
                n_bases = by_id[rdir]['tags'].get("stat.opened_db_count", 0)
                by_id[rdir]['tags']["stat.opened_db_count"] = n_bases + 1
                rawx['rdir'] = by_id[rdir]
        return all_rawx

    def _smart_link_rdir(self, volume_id, all_rdir, max_per_rdir=None):
        """
        Force the load balancer to avoid services that already host more
        bases than the average (or more than `max_per_rdir`)
        while selecting rdir services.
        """
        opened_db = [
            x['tags']['stat.opened_db_count'] for x in all_rdir
            if x['score'] > 0
        ]
        if len(opened_db) <= 0:
            raise ServiceUnavailable("No valid rdir service found in %s" %
                                     self.ns)
        if not max_per_rdir:
            upper_limit = sum(opened_db) / float(len(opened_db))
        else:
            upper_limit = max_per_rdir - 1
        avoids = [
            _make_id(self.ns, "rdir", x['addr']) for x in all_rdir if
            x['score'] > 0 and x['tags']['stat.opened_db_count'] > upper_limit
        ]
        known = [_make_id(self.ns, "rawx", volume_id)]
        try:
            polled = self._poll_rdir(avoid=avoids, known=known)
        except ClientException as exc:
            if exc.status != 481 or max_per_rdir:
                raise
            # Retry without `avoids`, hoping the next iteration will rebalance
            polled = self._poll_rdir(known=known)
        forced = {
            'host': polled['addr'],
            'type': 'rdir',
            'seq': 1,
            'args': "",
            'id': polled['id']
        }
        self.directory.force(RDIR_ACCT,
                             volume_id,
                             'rdir',
                             forced,
                             autocreate=True)
        try:
            self.rdir.create(volume_id)
        except Exception as exc:
            self.logger.warn("Failed to create database for %s on %s: %s",
                             volume_id, polled['addr'], exc)
        return polled['id']

    def _poll_rdir(self, avoid=None, known=None):
        """Call the special rdir service pool (created if missing)"""
        try:
            svcs = self.cs.poll('__rawx_rdir', avoid=avoid, known=known)
        except ClientException as exc:
            if exc.status != 400:
                raise
            self.cs.lb.create_pool('__rawx_rdir', ((1, 'rawx'), (1, 'rdir')))
            svcs = self.cs.poll('__rawx_rdir', avoid=avoid, known=known)
        for svc in svcs:
            # FIXME: we should include the service type in a dedicated field
            if 'rdir' in svc['id']:
                return svc
        raise ServerException("LB returned incoherent result: %s" % svcs)
Exemplo n.º 13
0
class RdirClient(HttpApi):
    """
    Client class for rdir services.
    """

    base_url = {
        'rawx': 'rdir',
        'meta2': 'rdir/meta2',
    }

    def __init__(self, conf, **kwargs):
        super(RdirClient, self).__init__(service_type='rdir', **kwargs)
        self.directory = DirectoryClient(conf, **kwargs)
        self.ns = conf['namespace']
        self._addr_cache = dict()

    def _clear_cache(self, volume_id):
        self._addr_cache.pop(volume_id, None)

    def _get_rdir_addr(self, volume_id, reqid=None):
        # Initial lookup in the cache
        if volume_id in self._addr_cache:
            return self._addr_cache[volume_id]
        # Not cached, try a direct lookup
        try:
            headers = {REQID_HEADER: reqid or request_id()}
            resp = self.directory.list(RDIR_ACCT, volume_id,
                                       service_type='rdir',
                                       headers=headers)
            host = _filter_rdir_host(resp)
            # Add the new service to the cache
            self._addr_cache[volume_id] = host
            return host
        except NotFound:
            raise VolumeException('No rdir assigned to volume %s' % volume_id)

    def _make_uri(self, action, volume_id, reqid=None, service_type='rawx'):
        rdir_host = self._get_rdir_addr(volume_id, reqid)
        return 'http://%s/v1/%s/%s' % (rdir_host,
                                       self.__class__.base_url[service_type],
                                       action)

    @ensure_headers
    @ensure_request_id
    def _rdir_request(self, volume, method, action, create=False, params=None,
                      service_type='rawx', **kwargs):
        if params is None:
            params = dict()
        params['vol'] = volume
        if create:
            params['create'] = '1'
        uri = self._make_uri(action, volume,
                             reqid=kwargs['headers'][REQID_HEADER],
                             service_type=service_type)
        try:
            resp, body = self._direct_request(method, uri, params=params,
                                              **kwargs)
        except OioNetworkException:
            self._clear_cache(volume)
            raise

        return resp, body

    def create(self, volume_id, service_type='rawx', **kwargs):
        """Create the database for `volume_id` on the appropriate rdir"""
        self._rdir_request(volume_id, 'POST', 'create',
                           service_type=service_type, **kwargs)

    def chunk_push(self, volume_id, container_id, content_id, chunk_id,
                   headers=None, **data):
        """Reference a chunk in the reverse directory"""
        body = {'container_id': container_id,
                'content_id': content_id,
                'chunk_id': chunk_id}

        for key, value in data.iteritems():
            body[key] = value

        self._rdir_request(volume_id, 'POST', 'push', create=True,
                           json=body, headers=headers)

    def chunk_delete(self, volume_id, container_id, content_id, chunk_id,
                     **kwargs):
        """Unreference a chunk from the reverse directory"""
        body = {'container_id': container_id,
                'content_id': content_id,
                'chunk_id': chunk_id}

        self._rdir_request(volume_id, 'DELETE', 'delete',
                           json=body, **kwargs)

    def chunk_fetch(self, volume, limit=1000, rebuild=False,
                    container_id=None, max_attempts=3,
                    start_after=None, shuffle=False, **kwargs):
        """
        Fetch the list of chunks belonging to the specified volume.

        :param volume: the volume to get chunks from
        :type volume: `str`
        :param limit: maximum number of results to return per request
            to the rdir server.
        :type limit: `int`
        :param rebuild: fetch only the chunks that were there
            before the last incident.
        :type rebuild: `bool`
        :keyword container_id: get only chunks belonging to
           the specified container
        :type container_id: `str`
        :keyword start_after: fetch only chunk that appear after
            this container ID
        :type start_after: `str`
        """
        req_body = {'limit': limit}
        if rebuild:
            req_body['rebuild'] = True
        if container_id:
            req_body['container_id'] = container_id
        if start_after:
            req_body['start_after'] = start_after

        while True:
            for i in range(max_attempts):
                try:
                    _resp, resp_body = self._rdir_request(
                        volume, 'POST', 'fetch', json=req_body, **kwargs)
                    break
                except OioNetworkException:
                    # Monotonic backoff
                    if i < max_attempts - 1:
                        sleep(i * 1.0)
                        continue
                    # Too many attempts
                    raise

            truncated = _resp.headers.get(
                    HEADER_PREFIX + 'list-truncated')
            if truncated is None:
                # TODO(adu): Delete when it will no longer be used
                if not resp_body:
                    break
                truncated = True
                req_body['start_after'] = resp_body[-1][0]
            else:
                truncated = true_value(truncated)
                if truncated:
                    req_body['start_after'] = _resp.headers[
                        HEADER_PREFIX + 'list-marker']

            if shuffle:
                random.shuffle(resp_body)
            for (key, value) in resp_body:
                container, content, chunk = key.split('|')
                yield container, content, chunk, value

            if not truncated:
                break

    def admin_incident_set(self, volume, date, **kwargs):
        body = {'date': int(float(date))}
        self._rdir_request(volume, 'POST', 'admin/incident',
                           json=body, **kwargs)

    def admin_incident_get(self, volume, **kwargs):
        _resp, body = self._rdir_request(volume, 'GET',
                                         'admin/incident', **kwargs)
        return body.get('date')

    def admin_lock(self, volume, who, **kwargs):
        body = {'who': who}

        self._rdir_request(volume, 'POST', 'admin/lock', json=body, **kwargs)

    def admin_unlock(self, volume, **kwargs):
        self._rdir_request(volume, 'POST', 'admin/unlock', **kwargs)

    def admin_show(self, volume, **kwargs):
        _resp, body = self._rdir_request(volume, 'GET', 'admin/show',
                                         **kwargs)
        return body

    def admin_clear(self, volume, clear_all=False, before_incident=False,
                    repair=False, **kwargs):
        params = {'all': clear_all, 'before_incident': before_incident,
                  'repair': repair}
        _resp, resp_body = self._rdir_request(
            volume, 'POST', 'admin/clear', params=params, **kwargs)
        return resp_body

    def status(self, volume, max=1000, prefix=None, marker=None,
               max_attempts=3, **kwargs):
        """
        Get the status of chunks belonging to the specified volume.

        :param volume: the volume to get chunks from
        :type volume: `str`
        :param max: maximum number of results to return per request
            to the rdir server.
        :type max: `int`
        :keyword prefix: get only chunks belonging to
           the specified prefix
        :type prefix: `str`
        :keyword marker: fetch only chunk that appear after
            this marker
        :type marker: `str`
        """
        req_params = {'max': max}
        if prefix:
            req_params['prefix'] = prefix
        if marker:
            req_params['marker'] = marker
        chunks = dict()
        containers = dict()

        while True:
            for i in range(max_attempts):
                try:
                    _resp, resp_body = self._rdir_request(
                        volume, 'GET', 'status', params=req_params, **kwargs)
                    break
                except OioNetworkException:
                    # Monotonic backoff
                    if i < max_attempts - 1:
                        sleep(i * 1.0)
                        continue
                    # Too many attempts
                    raise

            for (key, value) in resp_body.get('chunk', dict()).items():
                chunks[key] = chunks.get(key, 0) + value
            for (cid, info) in resp_body.get('container', dict()).items():
                for (key, value) in info.items():
                    containers[cid][key] = containers.setdefault(
                        cid, dict()).get(key, 0) + value

            if not true_value(_resp.headers.get(
                    HEADER_PREFIX + 'list-truncated')):
                break
            req_params['marker'] = _resp.headers[HEADER_PREFIX + 'list-marker']

        return {'chunk': chunks, 'container': containers}

    def meta2_index_create(self, volume_id, **kwargs):
        """
        Create a new meta2 rdir index.

        :param volume_id: The meta2 volume.
        """
        return self.create(volume_id, service_type='meta2', **kwargs)

    def meta2_index_push(self, volume_id, container_url, container_id, mtime,
                         **kwargs):
        """
        Add a newly created container to the list of containers handled
        by the meta2 server in question.

        :param volume_id: The meta2 volume.
        :param container_url: The container path (NS/account/container)
        :param container_id: The container ID.
        :param mtime: The last time it was spotted on this volume.
        :param headers: Optional headers to pass along to the request.
        """
        body = {'container_url': container_url,
                'container_id': container_id,
                'mtime': int(mtime)}

        for key, value in kwargs.iteritems():
            body[key] = value

        return self._rdir_request(volume=volume_id, method='POST',
                                  action='push', create=True, json=body,
                                  service_type='meta2',
                                  **kwargs)

    def _resolve_cid_to_path(self, cid):
        """
        Resolves a container ID into a a container path.

        :param cid: The container ID.
        :return: NS/account/container path.
        """
        resp = self.directory.list(cid=cid)
        return '{0}/{1}/{2}'.format(
            self.ns,
            resp['account'],
            resp['name']
        )

    def meta2_index_delete(self, volume_id, container_path=None,
                           container_id=None, **kwargs):
        """
        Remove a meta2 record from the volume's index. Either the container ID
        or the container path have to be given.

        :param volume_id: The meta2 volume.
        :param container_id: The container ID.
        :param container_path: The container path
        """
        if not container_path and container_id:
            container_path = self._resolve_cid_to_path(container_id)
        elif container_path and not container_id:
            _tmp = container_path.rsplit("/")
            container_id = cid_from_name(_tmp[1], _tmp[3])
        elif not container_path and not container_id:
            raise ValueError("At least the container ID or the container path "
                             "should be given.")

        body = {'container_url': container_path,
                'container_id': container_id}

        for key, value in kwargs.iteritems():
            body[key] = value

        return self._rdir_request(volume=volume_id, method='POST',
                                  action='delete', create=False, json=body,
                                  service_type='meta2', **kwargs)

    def meta2_index_fetch(self, volume_id, prefix=None, marker=None,
                          limit=4096, **kwargs):
        """
        Fetch specific meta2 records, or a range of records.

        :param volume_id: The meta2 volume.
        :param prefix: The prefix all meta2 records should have.
        :param marker: The container path from which the API will start the
                        listing. The marker will not be included in the result.
        :param limit: The number of records to be returned. Capped at 4096
        :return: A dictionary containing the following entries:
                  - records: A list containing the actual records.
                  - truncated: A boolean value representing whether there
                  are still records left that fulfill this query.
        """
        params = {}
        if prefix:
            params['prefix'] = prefix
        if marker:
            # FIXME(ABO): Validate this one.
            params['marker'] = marker
        if limit:
            params['limit'] = limit
        _resp, body = self._rdir_request(volume=volume_id, method='POST',
                                         action='fetch', json=params,
                                         service_type='meta2', **kwargs)
        return body

    def meta2_index_fetch_all(self, volume_id, **kwargs):
        """
        A wrapper around meta2_index_fetch that loops until no more records
        are available, returning all the records in a certain volume's index.

        WARNING: For testing purposes only
        """
        return depaginate(
            self.meta2_index_fetch,
            volume_id=volume_id,
            listing_key=lambda x: x['records'],
            truncated_key=lambda x: x['truncated'],
            # The following is only called when the list is truncated
            # So we can assume there are records in the list
            marker_key=lambda x: x['records'][-1]['container_url'],
            **kwargs
        )
Exemplo n.º 14
0
class RdirClient(Client):
    def __init__(self, conf, **kwargs):
        super(RdirClient, self).__init__(conf, **kwargs)
        self.autocreate = true_value(conf.get('autocreate', True))
        self.directory_client = DirectoryClient(conf)

    # TODO keep rdir addr in local cache to avoid lookup requests
    def _get_rdir_addr(self, volume_id):
        try:
            resp = self.directory_client.show(acct='_RDIR',
                                              ref=volume_id,
                                              srv_type='rdir')
        except NotFound as e:
            if self.autocreate:
                self.directory_client.link('_RDIR',
                                           volume_id,
                                           'rdir',
                                           autocreate=True)
                resp = self.directory_client.show(acct='_RDIR',
                                                  ref=volume_id,
                                                  srv_type='rdir')
            else:
                raise e

        for srv in resp['srv']:
            if srv['type'] == 'rdir':
                return srv['host']
        raise ClientException("No rdir service found")

    def _make_uri(self, action, volume_id):
        rdir_host = self._get_rdir_addr(volume_id)
        uri = 'http://%s/v1/%s/%s?vol=%s' % (rdir_host, self.ns, action,
                                             volume_id)
        return uri

    def _rdir_request(self, volume, method, action, **kwargs):
        uri = self._make_uri(action, volume)
        resp, body = self._direct_request(method, uri, **kwargs)
        return resp, body

    def chunk_push(self, volume_id, container_id, content_id, chunk_id,
                   **data):
        body = {
            'container_id': container_id,
            'content_id': content_id,
            'chunk_id': chunk_id
        }

        for key, value in data.iteritems():
            body[key] = value

        headers = {}

        self._rdir_request(volume_id,
                           'POST',
                           'rdir/push',
                           json=body,
                           headers=headers)

    def chunk_delete(self, volume_id, container_id, content_id, chunk_id):
        body = {
            'container_id': container_id,
            'content_id': content_id,
            'chunk_id': chunk_id
        }

        self._rdir_request(volume_id, 'DELETE', 'rdir/delete', json=body)

    def chunk_fetch(self, volume, limit=100, rebuild=False):
        req_body = {'limit': limit}
        if rebuild:
            req_body['rebuild'] = True

        while True:
            resp, resp_body = self._rdir_request(volume,
                                                 'POST',
                                                 'rdir/fetch',
                                                 json=req_body)
            resp.raise_for_status()
            if len(resp_body) == 0:
                break
            for (key, value) in resp_body:
                container, content, chunk = key.split('|')
                yield container, content, chunk, value
            req_body['start_after'] = key

    def admin_incident_set(self, volume, date):
        body = {'date': date}
        self._rdir_request(volume, 'POST', 'rdir/admin/incident', json=body)

    def admin_incident_get(self, volume):
        resp, resp_body = self._rdir_request(volume, 'GET',
                                             'rdir/admin/incident')
        return resp_body.get('date')

    def admin_lock(self, volume, who):
        body = {'who': who}

        self._rdir_request(volume, 'POST', 'rdir/admin/lock', json=body)

    def admin_unlock(self, volume):
        self._rdir_request(volume, 'POST', 'rdir/admin/unlock')

    def admin_show(self, volume):
        resp, resp_body = self._rdir_request(volume, 'GET', 'rdir/admin/show')
        return resp_body
Exemplo n.º 15
0
def make_client(instance):
    client = DirectoryClient({"namespace": instance.namespace},
                             endpoint=instance.get_endpoint('directory'))
    return client
Exemplo n.º 16
0
 def reference(self):
     if not self._reference:
         self._reference = DirectoryClient(self.conf)
     return self._reference
Exemplo n.º 17
0
 def __init__(self, conf, **kwargs):
     super(RdirClient, self).__init__(**kwargs)
     self.directory = DirectoryClient(conf, **kwargs)
     self.ns = conf['namespace']
     self._addr_cache = dict()
Exemplo n.º 18
0
 def setUp(self):
     super(TestDirectoryAPI, self).setUp()
     self.api = DirectoryClient({'namespace': self.ns}, endpoint=self.uri)
Exemplo n.º 19
0
class TestDirectoryAPI(BaseTestCase):
    def setUp(self):
        super(TestDirectoryAPI, self).setUp()
        self.api = DirectoryClient({'namespace': self.ns}, endpoint=self.uri)

    def _create(self, name, metadata=None):
        return self.api.create(self.account, name, properties=metadata)

    def _delete(self, name):
        self.api.delete(self.account, name)

    def _clean(self, name, clear=False):
        if clear:
            # must clean properties before
            self.api.del_properties(self.account, name, [])
        self._delete(name)

    def _get_properties(self, name, properties=None):
        return self.api.get_properties(
            self.account, name, properties=properties)

    def _set_properties(self, name, properties=None):
        return self.api.set_properties(
            self.account, name, properties=properties)

    def test_list(self):
        # get on unknown reference
        name = random_str(32)
        self.assertRaises(exc.NotFound, self.api.list, self.account, name)

        self._create(name)
        # get on existing reference
        res = self.api.list(self.account, name)
        self.assertIsNot(res['dir'], None)
        self.assertIsNot(res['srv'], None)

        self._delete(name)
        # get on deleted reference
        self.assertRaises(exc.NotFound, self.api.list, self.account, name)

    def test_create(self):
        name = random_str(32)
        res = self._create(name)
        self.assertEqual(res, True)

        # second create
        res = self._create(name)
        self.assertEqual(res, False)

        # clean
        self._delete(name)

    def test_create_properties(self):
        name = random_str(32)

        metadata = {
            random_str(32): random_str(32),
            random_str(32): random_str(32),
        }
        res = self._create(name, metadata)
        self.assertEqual(res, True)

        data = self._get_properties(name)

        self.assertEqual(data['properties'], metadata)

        # clean
        self._clean(name, True)

    def test_create_without_account(self):
        account = random_str(32)
        name = random_str(32)
        account_client = AccountClient(self.conf)

        self.assertRaises(exc.NotFound, account_client.account_show, account)
        self.api.create(account, name)
        time.sleep(0.5)  # ensure account event have been processed
        self.assertEqual(account_client.account_show(account)['id'],
                         account)

        # clean
        self.api.delete(account, name)
        account_client.account_delete(account)

    def test_delete(self):
        name = random_str(32)

        # delete on unknown reference
        self.assertRaises(exc.NotFound, self.api.delete, self.account, name)

        res = self._create(name)
        self.assertEqual(res, True)
        # delete on existing reference
        self._delete(name)

        # verify deleted
        self.assertRaises(exc.NotFound, self.api.list, self.account, name)

        # second delete
        self.assertRaises(exc.NotFound, self.api.delete, self.account, name)

        # verify deleted
        self.assertRaises(exc.NotFound, self.api.list, self.account, name)

    def test_get_properties(self):
        name = random_str(32)

        # get_properties on unknown reference
        self.assertRaises(
            exc.NotFound, self.api.get_properties, self.account, name)

        res = self._create(name)
        self.assertEqual(res, True)

        # get_properties on existing reference
        data = self.api.get_properties(self.account, name)
        self.assertEqual(data['properties'], {})

        # get_properties
        metadata = {
            random_str(32): random_str(32),
            random_str(32): random_str(32),
        }
        self._set_properties(name, metadata)

        data = self.api.get_properties(self.account, name)
        self.assertEqual(data['properties'], metadata)

        # get_properties specify key
        key = metadata.keys().pop(0)

        data = self.api.get_properties(self.account, name, [key])
        self.assertEqual(data['properties'], {key: metadata[key]})

        # clean
        self._clean(name, True)

        # get_properties on deleted reference
        self.assertRaises(
            exc.NotFound, self.api.get_properties, self.account, name)

    def test_set_properties(self):
        name = random_str(32)

        metadata = {
            random_str(32): random_str(32),
            random_str(32): random_str(32),
        }

        # set_properties on unknown reference
        self.assertRaises(
            exc.NotFound, self.api.set_properties, self.account, name,
            metadata)

        res = self._create(name)
        self.assertEqual(res, True)

        # set_properties on existing reference
        self.api.set_properties(self.account, name, metadata)
        data = self._get_properties(name)
        self.assertEqual(data['properties'], metadata)

        # set_properties
        key = random_str(32)
        value = random_str(32)
        metadata2 = {key: value}
        self._set_properties(name, metadata2)
        metadata.update(metadata2)

        data = self._get_properties(name)
        self.assertEqual(data['properties'], metadata)

        # set_properties overwrite key
        key = metadata.keys().pop(0)
        value = random_str(32)
        metadata3 = {key: value}

        metadata.update(metadata3)
        self.api.set_properties(self.account, name, metadata3)
        data = self._get_properties(name)
        self.assertEqual(data['properties'], metadata)

        # clean
        self._clean(name, True)

        # set_properties on deleted reference
        self.assertRaises(
            exc.NotFound, self.api.set_properties, self.account, name,
            metadata)

    def test_del_properties(self):
        name = random_str(32)

        metadata = {
            random_str(32): random_str(32),
            random_str(32): random_str(32),
        }

        # del_properties on unknown reference
        self.assertRaises(
            exc.NotFound, self.api.del_properties, self.account, name, [])

        res = self._create(name, metadata)
        self.assertEqual(res, True)

        key = metadata.keys().pop()
        del metadata[key]

        # del_properties on existing reference
        self.api.del_properties(self.account, name, [key])
        data = self._get_properties(name)
        self.assertEqual(data['properties'], metadata)

        # del_properties on unknown key
        key = random_str(32)
        # We do not check if a property exists before deleting it
        # self.assertRaises(
        #     exc.NotFound, self.api.del_properties, self.account, name,
        #     [key])
        self.api.del_properties(self.account, name, [key])

        data = self._get_properties(name)
        self.assertEqual(data['properties'], metadata)

        # clean
        self._clean(name, True)

        # del_properties on deleted reference
        self.assertRaises(
            exc.NotFound, self.api.set_properties, self.account, name,
            metadata)

    def test_list_services(self):
        # list_services on unknown reference
        name = random_str(32)
        echo = 'echo'
        self.assertRaises(
            exc.NotFound, self.api.list, self.account, name,
            service_type=echo)

        self._create(name)
        # list_services on existing reference
        res = self.api.list(self.account, name, service_type=echo)
        self.assertIsNot(res['dir'], None)
        self.assertIsNot(res['srv'], None)

        self._delete(name)
        # get on deleted reference
        self.assertRaises(exc.NotFound, self.api.list, self.account, name)

    def test_rdir_linking_old(self):
        """
        Tests that rdir services linked to rawx services
        are not on the same locations
        """
        self.skipTest('Deprecated way of linking rdir services')
        self._reload_proxy()
        cs = ConscienceClient({'namespace': self.ns})
        rawx_list = cs.all_services('rawx')
        rdir_dict = {x['addr']: x for x in cs.all_services('rdir')}
        # Link the services
        for rawx in rawx_list:
            self.api.link('_RDIR_TEST', rawx['addr'], 'rdir',
                          autocreate=True)
        # Do the checks
        for rawx in rawx_list:
            linked_rdir = self.api.list(
                '_RDIR_TEST', rawx['addr'], service_type='rdir')['srv']
            rdir = rdir_dict[linked_rdir[0]['host']]
            rawx_loc = rawx['tags'].get('tag.loc')
            rdir_loc = rdir['tags'].get('tag.loc')
            self.assertNotEqual(rawx_loc, rdir_loc)
        # Unlink the services
        for rawx in rawx_list:
            self.api.unlink('_RDIR_TEST', rawx['addr'], 'rdir')
            self.api.delete('_RDIR_TEST', rawx['addr'])

    def test_link_rdir_to_zero_scored_rawx(self):
        client = RdirClient({'namespace': self.ns})
        disp = RdirDispatcher({'namespace': self.ns})

        # Register a service, with score locked to zero
        new_rawx = self._srv('rawx', {'tag.loc': 'whatever'})
        new_rawx['score'] = 0
        self._register_srv(new_rawx)
        self._reload_proxy()

        all_rawx = disp.assign_all_rawx()
        all_rawx_keys = [x['addr'] for x in all_rawx]
        self.assertIn(new_rawx['addr'], all_rawx_keys)
        rdir_addr = client._get_rdir_addr(new_rawx['addr'])
        self.assertIsNotNone(rdir_addr)
        try:
            self.api.unlink('_RDIR', new_rawx['addr'], 'rdir')
            self.api.delete('_RDIR', new_rawx['addr'])
            # self._flush_cs('rawx')
        except Exception:
            pass

    def test_rdir_repartition(self):
        client = RdirDispatcher({'namespace': self.ns})
        self._reload_proxy()
        all_rawx = client.assign_all_rawx()
        self.assertGreater(len(all_rawx), 0)
        by_rdir = dict()
        total = 0
        for rawx in all_rawx:
            count = by_rdir.get(rawx['rdir']['addr'], 0)
            total += 1
            by_rdir[rawx['rdir']['addr']] = count + 1
        avg = total / float(len(by_rdir))
        print("Ideal number of bases per rdir: ", avg)
        print("Current repartition: ", by_rdir)
        for count in by_rdir.itervalues():
            self.assertLessEqual(count, avg + 1)
Exemplo n.º 20
0
class RdirClient(Client):
    def __init__(self, conf, **kwargs):
        super(RdirClient, self).__init__(conf, **kwargs)
        self.autocreate = true_value(conf.get('autocreate', True))
        self.directory_client = DirectoryClient(conf)

    # TODO keep rdir addr in local cache to avoid lookup requests
    def _get_rdir_addr(self, volume_id):
        try:
            resp = self.directory_client.show(acct='_RDIR', ref=volume_id,
                                              srv_type='rdir')
        except NotFound as e:
            if self.autocreate:
                self.directory_client.link('_RDIR', volume_id, 'rdir',
                                           autocreate=True)
                resp = self.directory_client.show(acct='_RDIR', ref=volume_id,
                                                  srv_type='rdir')
            else:
                raise e

        for srv in resp['srv']:
            if srv['type'] == 'rdir':
                return srv['host']
        raise ClientException("No rdir service found")

    def _make_uri(self, action, volume_id):
        rdir_host = self._get_rdir_addr(volume_id)
        uri = 'http://%s/v1/%s/%s?vol=%s' % (
            rdir_host, self.ns, action, volume_id)
        return uri

    def _rdir_request(self, volume, method, action, **kwargs):
        uri = self._make_uri(action, volume)
        resp, body = self._direct_request(method, uri, **kwargs)
        return resp, body

    def chunk_push(self, volume_id, container_id, content_id, chunk_id,
                   **data):
        body = {'container_id': container_id,
                'content_id': content_id,
                'chunk_id': chunk_id}

        for key, value in data.iteritems():
            body[key] = value

        headers = {}

        self._rdir_request(volume_id, 'POST', 'rdir/push',
                           json=body, headers=headers)

    def chunk_delete(self, volume_id, container_id, content_id, chunk_id):
        body = {'container_id': container_id,
                'content_id': content_id,
                'chunk_id': chunk_id}

        self._rdir_request(volume_id, 'DELETE', 'rdir/delete', json=body)

    def chunk_fetch(self, volume, limit=100, rebuild=False):
        req_body = {'limit': limit}
        if rebuild:
            req_body['rebuild'] = True

        while True:
            resp, resp_body = self._rdir_request(volume, 'POST', 'rdir/fetch',
                                                 json=req_body)
            resp.raise_for_status()
            if len(resp_body) == 0:
                break
            for (key, value) in resp_body:
                container, content, chunk = key.split('|')
                yield container, content, chunk, value
            req_body['start_after'] = key

    def admin_incident_set(self, volume, date):
        body = {'date': date}
        self._rdir_request(volume, 'POST', 'rdir/admin/incident', json=body)

    def admin_incident_get(self, volume):
        resp, resp_body = self._rdir_request(volume, 'GET',
                                             'rdir/admin/incident')
        return resp_body.get('date')

    def admin_lock(self, volume, who):
        body = {'who': who}

        self._rdir_request(volume, 'POST', 'rdir/admin/lock', json=body)

    def admin_unlock(self, volume):
        self._rdir_request(volume, 'POST', 'rdir/admin/unlock')

    def admin_show(self, volume):
        resp, resp_body = self._rdir_request(volume, 'GET', 'rdir/admin/show')
        return resp_body
Exemplo n.º 21
0
 def __init__(self, conf, **kwargs):
     super(RdirClient, self).__init__(conf, **kwargs)
     self.autocreate = true_value(conf.get('autocreate', True))
     self.directory_client = DirectoryClient(conf)
Exemplo n.º 22
0
class TestMeta2Indexing(BaseTestCase):
    def setUp(self):
        super(TestMeta2Indexing, self).setUp()
        self.rdir_client = RdirClient(self.conf)
        self.directory_client = DirectoryClient(self.conf)
        self.container_client = ContainerClient(self.conf)
        self.containers = [random_str(14) for _ in range(0, randint(1, 10))]
        self.containers_svcs = {}
        self.event_agent_name = 'event-agent-1'

    def tearDown(self):
        super(TestMeta2Indexing, self).tearDown()
        self._containers_cleanup()
        self._service(self.event_agent_name, 'start', wait=3)

    def _containers_cleanup(self):
        for container in self.containers:
            self.container_client.container_delete(self.account, container)
            for svc in self.containers_svcs[container]:
                self.rdir_client.meta2_index_delete(
                    volume_id=svc['host'],
                    container_path="{0}/{1}/{2}".format(
                        self.ns, self.account, container),
                    container_id=cid_from_name(self.account, container))

    def _filter_by_managing_svc(self, all_containers, svc_of_interest):
        """
        Filters through the containers returning only those that have
        svc_of_interest in their list of managing services.
        """
        containers_list = []
        for key in all_containers.keys():
            if svc_of_interest in [x['host'] for x in all_containers[key]]:
                containers_list.append(key)

        return sorted(containers_list)

    def test_volume_indexing_worker(self):
        """
        Test steps:
        - Generate a list of container names and create them
        - Collect their respective meta2 servers
        - For each meta2 server:
            - Run a meta2 indexing worker
            - List all rdir index records and match then with the
              services we're expecting.
        :return:
        """
        self._service(self.event_agent_name, "stop", wait=3)

        for container in self.containers:
            self.container_client.container_create(account=self.account,
                                                   reference=container)

        for container in self.containers:
            self.containers_svcs[container] = [
                x
                for x in self.directory_client.list(account=self.account,
                                                    reference=container)['srv']
                if x['type'] == 'meta2'
            ]

        meta2_data_paths = {}
        for svc in self.conf['services']['meta2']:
            svc_host = svc.get('service_id', svc['addr'])
            meta2_data_paths[svc_host] = svc['path']

        distinct_meta2_servers = set()
        for svc_list in self.containers_svcs.values():
            for svc in svc_list:
                distinct_meta2_servers.add(svc['host'])

        for svc in distinct_meta2_servers:
            expected_containers = self._filter_by_managing_svc(
                self.containers_svcs, svc)
            worker = Meta2IndexingWorker(meta2_data_paths[svc], self.conf)
            worker.crawl_volume()
            indexed_containers = sorted([
                x['container_url'].split('/')[-1]
                for x in self.rdir_client.meta2_index_fetch_all(volume_id=svc)
            ])

            for cont in expected_containers:
                self.assertIn(cont, indexed_containers)
Exemplo n.º 23
0
 def __init__(self, conf, **kwargs):
     super(RdirClient, self).__init__(conf, **kwargs)
     self.autocreate = true_value(conf.get('autocreate', True))
     self.directory_client = DirectoryClient(conf)
Exemplo n.º 24
0
 def directory(self):
     if not self._directory:
         self._directory = DirectoryClient(self.conf)
     return self._directory
Exemplo n.º 25
0
class RdirDispatcher(object):
    def __init__(self, conf, rdir_client=None, **kwargs):
        self.conf = conf
        self.ns = conf['namespace']
        self.logger = get_logger(conf)
        self.directory = DirectoryClient(conf, logger=self.logger, **kwargs)
        if rdir_client:
            self.rdir = rdir_client
        else:
            self.rdir = RdirClient(conf, logger=self.logger, **kwargs)
        self._cs = None
        self._pool_options = None

    @property
    def cs(self):
        if not self._cs:
            self._cs = ConscienceClient(self.conf, logger=self.logger,
                                        pool_manager=self.rdir.pool_manager)
        return self._cs

    def get_assignments(self, service_type, **kwargs):
        """
        Get rdir assignments for all services of the specified type.

        :returns: a tuple with a list all services of the specified type,
            and a list of all rdir services.
        :rtype: `tuple<list<dict>,list<dict>>`
        """
        all_services = self.cs.all_services(service_type, **kwargs)
        all_rdir = self.cs.all_services('rdir', True, **kwargs)
        by_id = {_make_id(self.ns, 'rdir', x['addr']): x
                 for x in all_rdir}

        for service in all_services:
            try:
                ref = service.get('tags', {}).get('tag.service_id')
                resp = self.directory.list(RDIR_ACCT,
                                           ref or service['addr'],
                                           service_type='rdir',
                                           **kwargs)
                rdir_host = _filter_rdir_host(resp)
                try:
                    service['rdir'] = by_id[
                        _make_id(self.ns, 'rdir', rdir_host)]
                except KeyError:
                    self.logger.warn("rdir %s linked to %s %s seems down",
                                     rdir_host, service_type,
                                     service['addr'])
                    service['rdir'] = {"addr": rdir_host,
                                       "tags": dict()}
                    loc_rdir = service['rdir']
                    by_id[_make_id(self.ns, 'rdir', rdir_host)] = loc_rdir
            except NotFound:
                self.logger.info("No rdir linked to %s",
                                 service['addr'])
            except OioException as exc:
                self.logger.warn('Failed to get rdir linked to %s: %s',
                                 service['addr'], exc)
        return all_services, all_rdir

    def assign_services(self, service_type,
                        max_per_rdir=None, min_dist=None, **kwargs):
        """
        Assign an rdir service to all `service_type` servers that aren't
        already assigned one.

        :param max_per_rdir: Maximum number of services an rdir can handle.
        :type max_per_rdir: `int`
        :param min_dist: minimum required distance between any service and
            its assigned rdir service.
        :type min_dist: `int`
        :returns: The list of `service_type` services that were assigned
            rdir services.
        """
        all_services = self.cs.all_services(service_type, **kwargs)
        all_rdir = self.cs.all_services('rdir', True, **kwargs)
        if len(all_rdir) <= 0:
            raise ServiceUnavailable("No rdir service found in %s" % self.ns)

        by_id = {_make_id(self.ns, 'rdir', x['addr']): x
                 for x in all_rdir}

        errors = list()
        for provider in all_services:
            provider_id = provider['tags'].get('tag.service_id',
                                               provider['addr'])

            try:
                resp = self.directory.list(RDIR_ACCT, provider_id,
                                           service_type='rdir', **kwargs)
                rdir_host = _filter_rdir_host(resp)
                try:
                    provider['rdir'] = by_id[_make_id(self.ns, 'rdir',
                                                      rdir_host)]
                except KeyError:
                    self.logger.warn("rdir %s linked to %s %s seems down",
                                     rdir_host, service_type,
                                     provider_id)
            except NotFound:
                try:
                    rdir = self._smart_link_rdir(provider_id, all_rdir,
                                                 service_type=service_type,
                                                 max_per_rdir=max_per_rdir,
                                                 min_dist=min_dist,
                                                 **kwargs)
                except OioException as exc:
                    self.logger.warn("Failed to link an rdir to %s %s: %s",
                                     service_type, provider_id, exc)
                    errors.append((provider_id, exc))
                    continue
                n_bases = by_id[rdir]['tags'].get("stat.opened_db_count", 0)
                by_id[rdir]['tags']["stat.opened_db_count"] = n_bases + 1
                provider['rdir'] = by_id[rdir]
            except OioException as exc:
                self.logger.warn("Failed to check rdir linked to %s %s "
                                 "(thus won't try to make the link): %s",
                                 service_type, provider_id, exc)
                errors.append((provider_id, exc))
        if errors:
            # group_chunk_errors is flexible enough to accept service addresses
            errors = group_chunk_errors(errors)
            if len(errors) == 1:
                err, addrs = errors.popitem()
                oio_reraise(type(err), err, str(addrs))
            else:
                raise OioException('Several errors encountered: %s' %
                                   errors)
        return all_services

    def assign_all_meta2(self, max_per_rdir=None, **kwargs):
        """
        Assign an rdir service to all meta2 servers that aren't already
        assigned one.

        :param max_per_rdir: Maximum number of services an rdir can handle.
        :type max_per_rdir: `int`
        :returns: The list of meta2 that were assigned rdir services.
        """
        return self.assign_services("meta2", max_per_rdir, **kwargs)

    def assign_all_rawx(self, max_per_rdir=None, **kwargs):
        """
        Find an rdir service for all rawx that don't have one already.

        :param max_per_rdir: maximum number or rawx services that an rdir
                             can be linked to
        :type max_per_rdir: `int`
        """
        return self.assign_services("rawx", max_per_rdir, **kwargs)

    def _smart_link_rdir(self, volume_id, all_rdir, max_per_rdir=None,
                         max_attempts=7, service_type='rawx', min_dist=None,
                         **kwargs):
        """
        Force the load balancer to avoid services that already host more
        bases than the average (or more than `max_per_rdir`)
        while selecting rdir services.
        """
        opened_db = [x['tags'].get('stat.opened_db_count', 0) for x in all_rdir
                     if x['score'] > 0]
        if len(opened_db) <= 0:
            raise ServiceUnavailable(
                "No valid rdir service found in %s" % self.ns)
        if not max_per_rdir:
            upper_limit = sum(opened_db) / float(len(opened_db))
        else:
            upper_limit = max_per_rdir - 1
        avoids = [_make_id(self.ns, "rdir", x['addr'])
                  for x in all_rdir
                  if x['score'] > 0 and
                  x['tags'].get('stat.opened_db_count', 0) > upper_limit]
        known = [_make_id(self.ns, service_type, volume_id)]
        try:
            polled = self._poll_rdir(avoid=avoids, known=known,
                                     min_dist=min_dist, **kwargs)
        except ClientException as exc:
            if exc.status != 481 or max_per_rdir:
                raise
            # Retry without `avoids`, hoping the next iteration will rebalance
            polled = self._poll_rdir(known=known, min_dist=min_dist, **kwargs)

        # Associate the rdir to the rawx
        forced = {'host': polled['addr'], 'type': 'rdir',
                  'seq': 1, 'args': "", 'id': polled['id']}
        for i in range(max_attempts):
            try:
                self.directory.force(RDIR_ACCT, volume_id, 'rdir',
                                     forced, autocreate=True, **kwargs)
                break
            except ClientException as ex:
                # Already done
                done = (455,)
                if ex.status in done:
                    break
                if ex.message.startswith(
                        'META1 error: (SQLITE_CONSTRAINT) '
                        'UNIQUE constraint failed'):
                    self.logger.info(
                        "Ignored exception (already0): %s", ex)
                    break
                if ex.message.startswith(
                        'META1 error: (SQLITE_CONSTRAINT) '
                        'columns cid, srvtype, seq are not unique'):
                    self.logger.info(
                        "Ignored exception (already1): %s", ex)
                    break
                # Manage several unretriable errors
                retry = (406, 450, 503, 504)
                if ex.status >= 400 and ex.status not in retry:
                    raise
                # Monotonic backoff (retriable and net erorrs)
                if i < max_attempts - 1:
                    sleep(i * 1.0)
                    continue
                # Too many attempts
                raise

        # Do the creation in the rdir itself
        try:
            self.rdir.create(volume_id, service_type=service_type, **kwargs)
        except Exception as exc:
            self.logger.warn("Failed to create database for %s on %s: %s",
                             volume_id, polled['addr'], exc)
        return polled['id']

    def _create_special_pool(self, options=None, force=False, **kwargs):
        """
        Create the special pool for rdir services.

        :param options: dictionary of custom options for the pool.
        :param force: overwrite the pool if it exists already.
        """
        self.cs.lb.create_pool(
            '__rawx_rdir', ((1, JOKER_SVC_TARGET), (1, 'rdir')),
            options=options, force=force, **kwargs)

    def _poll_rdir(self, avoid=None, known=None, min_dist=None, **kwargs):
        """
        Call the special rdir service pool (created if missing).

        :param min_dist: minimum distance to ensure between the known
            service and the selected rdir service.
        """
        if not known or len(known) > 1:
            raise ValueError('There should be exactly one "known" service')

        options = dict()
        if min_dist is not None:
            options['min_dist'] = min_dist
        if options != self._pool_options:
            # Options have changed, overwrite the pool.
            self._pool_options = options
            self._create_special_pool(self._pool_options, force=True, **kwargs)

        try:
            svcs = self.cs.poll('__rawx_rdir', avoid=avoid, known=known,
                                **kwargs)
        except ClientException as exc:
            if exc.status != 400:
                raise
            self._create_special_pool(self._pool_options, **kwargs)
            svcs = self.cs.poll('__rawx_rdir', avoid=avoid, known=known,
                                **kwargs)
        for svc in svcs:
            # FIXME: we should include the service type in a dedicated field
            if 'rdir' in svc['id']:
                return svc
        raise ServerException("LB returned incoherent result: %s" % svcs)
Exemplo n.º 26
0
class RdirClient(Client):
    def __init__(self, conf, **kwargs):
        super(RdirClient, self).__init__(conf, **kwargs)
        self.directory_client = DirectoryClient(conf, **kwargs)

    def _lookup_rdir_host(self, resp):
        host = None
        for srv in resp.get('srv', {}):
            if srv['type'] == 'rdir':
                host = srv['host']
        if not host:
            raise ClientException("No rdir service found")
        return host

    def _link_rdir(self, volume_id):
        self.directory_client.link(acct=RDIR_ACCT, ref=volume_id,
                                   srv_type='rdir', autocreate=True)
        return self.directory_client.show(
                acct=RDIR_ACCT, ref=volume_id, srv_type='rdir')

    # TODO keep rdir addr in local cache to avoid lookup requests
    def _get_rdir_addr(self, volume_id, create=False):
        resp = {}
        try:
            resp = self.directory_client.show(acct=RDIR_ACCT, ref=volume_id,
                                              srv_type='rdir')
        except NotFound:
            if not create:
                raise

        try:
            host = self._lookup_rdir_host(resp)
        except ClientException:
            # Reference exists but no rdir linked
            if not create:
                raise
            resp = self._link_rdir(volume_id)
            host = self._lookup_rdir_host(resp)
        return host

    def _make_uri(self, action, volume_id, create=False):
        rdir_host = self._get_rdir_addr(volume_id, create=create)
        uri = 'http://%s/v1/%s/%s' % (rdir_host, self.ns, action)
        return uri

    def _rdir_request(self, volume, method, action, create=False, **kwargs):
        uri = self._make_uri(action, volume, create=create)
        params = {'vol': volume}
        if create:
            params['create'] = '1'
        resp, body = self._direct_request(method, uri, params=params, **kwargs)
        return resp, body

    def chunk_push(self, volume_id, container_id, content_id, chunk_id,
                   **data):
        body = {'container_id': container_id,
                'content_id': content_id,
                'chunk_id': chunk_id}

        for key, value in data.iteritems():
            body[key] = value

        headers = {}

        self._rdir_request(volume_id, 'POST', 'rdir/push', create=True,
                           json=body, headers=headers)

    def chunk_delete(self, volume_id, container_id, content_id, chunk_id):
        body = {'container_id': container_id,
                'content_id': content_id,
                'chunk_id': chunk_id}

        self._rdir_request(volume_id, 'DELETE', 'rdir/delete', json=body)

    def chunk_fetch(self, volume, limit=100, rebuild=False):
        req_body = {'limit': limit}
        if rebuild:
            req_body['rebuild'] = True

        while True:
            resp, resp_body = self._rdir_request(volume, 'POST', 'rdir/fetch',
                                                 json=req_body)
            resp.raise_for_status()
            if len(resp_body) == 0:
                break
            for (key, value) in resp_body:
                container, content, chunk = key.split('|')
                yield container, content, chunk, value
            req_body['start_after'] = key

    def admin_incident_set(self, volume, date):
        body = {'date': date}
        self._rdir_request(volume, 'POST', 'rdir/admin/incident', json=body)

    def admin_incident_get(self, volume):
        resp, resp_body = self._rdir_request(volume, 'GET',
                                             'rdir/admin/incident')
        return resp_body.get('date')

    def admin_lock(self, volume, who):
        body = {'who': who}

        self._rdir_request(volume, 'POST', 'rdir/admin/lock', json=body)

    def admin_unlock(self, volume):
        self._rdir_request(volume, 'POST', 'rdir/admin/unlock')

    def admin_show(self, volume):
        resp, resp_body = self._rdir_request(volume, 'GET', 'rdir/admin/show')
        return resp_body

    def status(self, volume):
        resp, resp_body = self._rdir_request(volume, 'GET', 'rdir/status')
        return resp_body
Exemplo n.º 27
0
class RdirDispatcher(object):
    def __init__(self, conf, **kwargs):
        self.conf = conf
        self.ns = conf['namespace']
        self.logger = get_logger(conf)
        self.directory = DirectoryClient(conf, logger=self.logger, **kwargs)
        self.rdir = RdirClient(conf, logger=self.logger, **kwargs)
        self._cs = None

    @property
    def cs(self):
        if not self._cs:
            self._cs = ConscienceClient(self.conf, logger=self.logger)
        return self._cs

    def get_assignation(self, **kwargs):
        all_rawx = self.cs.all_services('rawx', **kwargs)
        all_rdir = self.cs.all_services('rdir', True, **kwargs)
        by_id = {_make_id(self.ns, 'rdir', x['addr']): x for x in all_rdir}

        for rawx in all_rawx:
            try:
                # Verify that there is no rdir linked
                resp = self.directory.list(RDIR_ACCT,
                                           rawx['addr'],
                                           service_type='rdir',
                                           **kwargs)
                rdir_host = _filter_rdir_host(resp)
                try:
                    rawx['rdir'] = by_id[_make_id(self.ns, 'rdir', rdir_host)]
                except KeyError:
                    self.logger.warn("rdir %s linked to rawx %s seems down",
                                     rdir_host, rawx['addr'])
                    rawx['rdir'] = {"addr": rdir_host, "tags": dict()}
                    by_id[_make_id(self.ns, 'rdir', rdir_host)] = rawx['rdir']
            except NotFound:
                self.logger.info("No rdir linked to %s", rawx['addr'])
        return all_rawx, all_rdir

    def assign_all_rawx(self, max_per_rdir=None, **kwargs):
        """
        Find a rdir service for all rawx that don't have one already.

        :param max_per_rdir: maximum number or rawx services that an rdir
                             can be linked to
        :type max_per_rdir: `int`
        """
        all_rawx = self.cs.all_services('rawx', **kwargs)
        all_rdir = self.cs.all_services('rdir', True, **kwargs)
        if len(all_rdir) <= 0:
            raise ServiceUnavailable("No rdir service found in %s" % self.ns)

        by_id = {_make_id(self.ns, 'rdir', x['addr']): x for x in all_rdir}

        for rawx in all_rawx:
            try:
                # Verify that there is no rdir linked
                resp = self.directory.list(RDIR_ACCT,
                                           rawx['addr'],
                                           service_type='rdir',
                                           **kwargs)
                rdir_host = _filter_rdir_host(resp)
                try:
                    rawx['rdir'] = by_id[_make_id(self.ns, 'rdir', rdir_host)]
                except KeyError:
                    self.logger.warn("rdir %s linked to rawx %s seems down",
                                     rdir_host, rawx['addr'])
            except (NotFound, ClientException):
                rdir = self._smart_link_rdir(rawx['addr'], all_rdir,
                                             max_per_rdir, **kwargs)
                n_bases = by_id[rdir]['tags'].get("stat.opened_db_count", 0)
                by_id[rdir]['tags']["stat.opened_db_count"] = n_bases + 1
                rawx['rdir'] = by_id[rdir]
        return all_rawx

    def _smart_link_rdir(self,
                         volume_id,
                         all_rdir,
                         max_per_rdir=None,
                         **kwargs):
        """
        Force the load balancer to avoid services that already host more
        bases than the average (or more than `max_per_rdir`)
        while selecting rdir services.
        """
        opened_db = [
            x['tags']['stat.opened_db_count'] for x in all_rdir
            if x['score'] > 0
        ]
        if len(opened_db) <= 0:
            raise ServiceUnavailable("No valid rdir service found in %s" %
                                     self.ns)
        if not max_per_rdir:
            upper_limit = sum(opened_db) / float(len(opened_db))
        else:
            upper_limit = max_per_rdir - 1
        avoids = [
            _make_id(self.ns, "rdir", x['addr']) for x in all_rdir if
            x['score'] > 0 and x['tags']['stat.opened_db_count'] > upper_limit
        ]
        known = [_make_id(self.ns, "rawx", volume_id)]
        try:
            polled = self._poll_rdir(avoid=avoids, known=known, **kwargs)
        except ClientException as exc:
            if exc.status != 481 or max_per_rdir:
                raise
            # Retry without `avoids`, hoping the next iteration will rebalance
            polled = self._poll_rdir(known=known, **kwargs)

        # Associate the rdir to the rawx
        forced = {
            'host': polled['addr'],
            'type': 'rdir',
            'seq': 1,
            'args': "",
            'id': polled['id']
        }
        max_attempts = 7
        for i in range(max_attempts):
            try:
                self.directory.force(RDIR_ACCT,
                                     volume_id,
                                     'rdir',
                                     forced,
                                     autocreate=True,
                                     **kwargs)
                break
            except ClientException as ex:
                # Already done
                done = (455, )
                if ex.status in done:
                    break
                if ex.message.startswith('META1 error: (SQLITE_CONSTRAINT) '
                                         'UNIQUE constraint failed'):
                    self.logger.info("Ignored exception (already0): %s", ex)
                    break
                if ex.message.startswith(
                        'META1 error: (SQLITE_CONSTRAINT) '
                        'columns cid, srvtype, seq are not unique'):
                    self.logger.info("Ignored exception (already1): %s", ex)
                    break
                # Manage several unretriable errors
                retry = (406, 450, 503, 504)
                if ex.status >= 400 and ex.status not in retry:
                    raise
                # Monotonic backoff (retriable and net erorrs)
                if i < max_attempts - 1:
                    from time import sleep
                    sleep(i * 1.0)
                    continue
                # Too many attempts
                raise

        # Do the creation in the rdir itself
        try:
            self.rdir.create(volume_id, **kwargs)
        except Exception as exc:
            self.logger.warn("Failed to create database for %s on %s: %s",
                             volume_id, polled['addr'], exc)
        return polled['id']

    def _poll_rdir(self, avoid=None, known=None, **kwargs):
        """Call the special rdir service pool (created if missing)"""
        try:
            svcs = self.cs.poll('__rawx_rdir',
                                avoid=avoid,
                                known=known,
                                **kwargs)
        except ClientException as exc:
            if exc.status != 400:
                raise
            self.cs.lb.create_pool('__rawx_rdir',
                                   ((1, JOKER_SVC_TARGET), (1, 'rdir')),
                                   **kwargs)
            svcs = self.cs.poll('__rawx_rdir',
                                avoid=avoid,
                                known=known,
                                **kwargs)
        for svc in svcs:
            # FIXME: we should include the service type in a dedicated field
            if 'rdir' in svc['id']:
                return svc
        raise ServerException("LB returned incoherent result: %s" % svcs)
Exemplo n.º 28
0
class CheckMeta2(CheckService):

    account_name = "_meta2_probe"

    def __init__(self, namespace, **kwargs):
        ep_parts = ["http:/",
                    load_namespace_conf(namespace).get('proxy'),
                    "v3.0",
                    namespace,
                    "content"]

        super(CheckMeta2, self).__init__(namespace, "meta2",
                                         endpoint="/".join(ep_parts), **kwargs)

        self.account = AccountClient({"namespace": self.ns})
        self.container = ContainerClient({"namespace": self.ns})
        self.directory = DirectoryClient({"namespace": self.ns})
        self.reference = random_buffer('0123456789ABCDEF', 64)

    def _get_params(self):
        path = random_buffer('0123456789ABCDEF', 64)
        return {'acct': self.account_name, 'ref': self.reference, 'path': path}

    def _compare_chunks(self, chunks1, chunks2):
        def light_chunks(chunks):
            new_chunks = []
            for chunk in chunks:
                new_chunk = dict()
                new_chunk["url"] = chunk["url"]
                new_chunk["hash"] = chunk["hash"]
                new_chunks.append(new_chunk)
            return new_chunks
        try:
            chunks1 = light_chunks(chunks1)
            chunks1.sort()
            chunks2 = light_chunks(chunks2)
            chunks2.sort()
            return cmp(chunks1, chunks2) == 0
        except TypeError:
            return False

    def _cycle(self, meta2_host):
        self.directory.unlink(
            account=self.account_name, reference=self.reference,
            service_type=self.service_type)
        service = {"host": meta2_host, "type": self.service_type, "args": "",
                   "seq": 1}
        self.directory.force(
            account=self.account_name, reference=self.reference,
            service_type=self.service_type, services=service)

        params = self._get_params()
        global_success = True

        _, body, success = self._request(
            "GET", "/locate", params=params, expected_status=404)
        global_success &= success
        headers = {'X-oio-action-mode': 'autocreate'}
        _, body, success = self._request(
            "POST", "/prepare", params=params, headers=headers,
            json={"size": "1024"}, expected_status=200)
        global_success &= success
        chunks = body
        _, body, success = self._request(
            "GET", "/locate", params=params, expected_status=404)
        global_success &= success
        headers = {"x-oio-content-meta-length": "1024"}
        _, _, success = self._request(
            "POST", "/create", params=params, headers=headers, json=chunks,
            expected_status=204)
        global_success &= success
        _, body, success = self._request(
            "GET", "/locate", params=params, expected_status=200)
        global_success &= success
        success = self._compare_chunks(chunks, body)
        global_success &= success
        _, _, success = self._request(
            "POST", "/delete", params=params, expected_status=204)
        global_success &= success
        _, body, success = self._request(
            "GET", "/locate", params=params, expected_status=404)
        global_success &= success

        return global_success

    def run(self):
        try:
            self.container.container_create(account=self.account_name,
                                            reference=self.reference)
            super(CheckMeta2, self).run()
            self.container.container_delete(account=self.account_name,
                                            reference=self.reference)
            sleep(1)
            self.account.account_delete(self.account_name)
        except Exception as exc:
            print("Exception - " + str(exc))
Exemplo n.º 29
0
class Meta2IndexingWorker(object):
    """
    Indexing worker responsible for a single volume.
    """

    def __init__(self, volume_path, conf, pool_manager=None):
        """
        Initializes an Indexing worker for indexing meta2 databases.

        Possible values of conf relating to this worker are:
        - interval: (int) in sec time between two full scans. Default: half an
                    hour.
        - report_interval: (int) in sec, time between two reports: Default: 300
        - scanned_per_second: (int) maximum number of indexed databases /s.
        - try_removing_faulty_indexes : In the event where we encounter a
            database that's not supposed to be handled by this volume, attempt
            to remove it from this volume rdir index if it exists
            WARNING: The decision is based off of a proxy response, that could
            be affected by cache inconsistencies for example, use at your own
            risk. Default: False

        :param volume_path: The volume path to be indexed
        :param conf: The configuration to be passed to the needed services
        :param pool_manager: A connection pool manager. If none is given, a
                new one with a default size of 10 will be created.
        """
        self.logger = get_logger(conf)
        self._stop = False
        self.volume = volume_path
        self.success_nb = 0
        self.failed_nb = 0
        self.full_scan_nb = 0
        self.last_report_time = 0
        self.last_scan_time = 0
        self.last_index_time = 0
        self.start_time = 0
        self.indexed_since_last_report = 0
        self.scans_interval = int_value(
            conf.get('interval'), 1800)
        self.report_interval = int_value(
            conf.get('report_interval'), 300)
        self.max_indexed_per_second = int_value(
            conf.get('scanned_per_second'), 3000)
        self.namespace, self.volume_id = check_volume_for_service_type(
            self.volume, "meta2")
        self.attempt_bad_index_removal = boolean_value(
            conf.get('try_removing_faulty_indexes'), False)

        if not pool_manager:
            pool_manager = get_pool_manager(pool_connections=10)
        self.index_client = RdirClient(conf, logger=self.logger,
                                       pool_manager=pool_manager)
        self.dir_client = DirectoryClient(conf, logger=self.logger,
                                          pool_manager=pool_manager)

    def report(self, tag):
        """
        Log the status of indexer

        :param tag: One of three: starting, running, ended.
        """
        total = self.success_nb + self.failed_nb
        now = time.time()
        elapsed = (now - self.start_time) or 0.00001
        since_last_rprt = (now - self.last_report_time) or 0.00001
        self.logger.info(
            'volume_id=%(volume_id)s %(tag)s=%(current_time)s '
            'elapsed=%(elapsed).02f '
            'pass=%(pass)d '
            'errors=%(errors)d '
            'containers_indexed=%(total_indexed)d %(index_rate).2f/s',
            {
                'volume_id': self.volume_id,
                'tag': tag,
                'current_time': datetime.fromtimestamp(
                    int(now)).isoformat(),
                'pass': self.full_scan_nb,
                'errors': self.failed_nb,
                'total_indexed': total,
                'index_rate': self.indexed_since_last_report / since_last_rprt,
                'elapsed': elapsed
            }
        )
        self.last_report_time = now
        self.indexed_since_last_report = 0

    def warn(self, msg, container_id):
        self.logger.warn(
            'volume_id=%(volume_id)s container_id=%(container_id)s %(error)s',
            {
                'volume_id': self.volume_id,
                'container_id': container_id,
                'error': msg
            }
        )

    def _attempt_index_removal(self, db_path, cid):
        """
        Fail safe removal attempt.
        """
        try:
            self.index_client.meta2_index_delete(self.volume_id, db_path, cid)
        except exc.OioException as exception:
            self.warn(
                container_id=cid,
                msg="Unable to remove database from the volume "
                    "index : {0}".format(str(exception))
            )

    def index_meta2_database(self, db_id):
        """
        Add a meta2 database to the rdir index. Fails if the database isn't
        handled by the current volume.

        :param db_id: The ContentID representing the reference to the database.
        """
        if len(db_id) < STRLEN_REFERENCEID:
            self.warn('Not a valid container ID', db_id)
            return
        try:
            srvcs = self.dir_client.list(cid=db_id)
            account, container = srvcs['account'], srvcs['name']
            is_peer = self.volume_id in [x['host'] for x in srvcs['srv'] if
                                         x['type'] == 'meta2']

            container_id = db_id.rsplit(".")[0]

            if six.PY2:
                if isinstance(account, six.text_type):
                    account = account.encode('utf-8')
                if isinstance(container, six.text_type):
                    container = container.encode('utf-8')
            cont_url = "{0}/{1}/{2}".format(self.namespace, account, container)

            if not is_peer:
                self.warn("Trying to index a container that isn't handled by"
                          "this volume", db_id)
                if self.attempt_bad_index_removal:
                    self._attempt_index_removal(cont_url, container_id)
                return

            self.index_client.meta2_index_push(volume_id=self.volume_id,
                                               container_url=cont_url,
                                               mtime=time.time(),
                                               container_id=container_id)

            self.success_nb += 1
        except exc.OioException as exception:
            self.failed_nb += 1
            self.warn("Unable to to index container: %s" % str(exception),
                      db_id)

        self.indexed_since_last_report += 1

    def crawl_volume(self):
        """
        Crawl the volume assigned to this worker, and index every database.
        """
        paths = paths_gen(self.volume)
        self.full_scan_nb += 1
        self.success_nb = 0
        self.failed_nb = 0
        now = time.time()
        self.last_report_time = now

        self.report("starting")

        for db_path in paths:

            # Graceful exit, hopefully
            if self._stop:
                break

            db_id = db_path.rsplit("/")[-1].rsplit(".")

            if len(db_id) != 3:
                self.warn("Malformed db file name !", db_path)
                continue

            db_id = ".".join(db_id[:2])
            self.index_meta2_database(db_id)

            self.last_index_time = ratelimit(
                self.last_index_time,
                self.max_indexed_per_second
            )

            now = time.time()
            if now - self.last_report_time >= self.report_interval:
                self.report("running")

        self.report("ended")

    def run(self):
        """
        Main worker loop
        """
        self.start_time = time.time()
        while not self._stop:
            try:
                self.crawl_volume()
                self.last_scan_time = time.time()
                time.sleep(self.scans_interval)
            except exc.OioException as exception:
                self.logger.exception("ERROR during indexing meta2: %s",
                                      exception)

    def stop(self):
        """
        Could be needed for eventually gracefully stopping.
        """
        self._stop = True
Exemplo n.º 30
0
class RdirClient(HttpApi):
    """
    Client class for rdir services.
    """
    def __init__(self, conf, **kwargs):
        super(RdirClient, self).__init__(conf, **kwargs)
        self.directory = DirectoryClient(conf, **kwargs)
        self._addr_cache = dict()

    def _clear_cache(self, volume_id):
        del self._addr_cache[volume_id]

    def _get_rdir_addr(self, volume_id):
        # Initial lookup in the cache
        if volume_id in self._addr_cache:
            return self._addr_cache[volume_id]
        # Not cached, try a direct lookup
        try:
            resp = self.directory.list(RDIR_ACCT,
                                       volume_id,
                                       service_type='rdir')
            host = _filter_rdir_host(resp)
            # Add the new service to the cache
            self._addr_cache[volume_id] = host
            return host
        except NotFound:
            raise VolumeException('No rdir assigned to volume %s' % volume_id)

    def _make_uri(self, action, volume_id):
        rdir_host = self._get_rdir_addr(volume_id)
        return 'http://%s/v1/rdir/%s' % (rdir_host, action)

    def _rdir_request(self, volume, method, action, create=False, **kwargs):
        params = {'vol': volume}
        if create:
            params['create'] = '1'
        uri = self._make_uri(action, volume)
        try:
            resp, body = self._direct_request(method,
                                              uri,
                                              params=params,
                                              **kwargs)
        except OioNetworkException:
            self._clear_cache(volume)
            raise

        return resp, body

    def create(self, volume_id):
        """Create the database for `volume_id` on the appropriate rdir"""
        self._rdir_request(volume_id, 'POST', 'create')

    def chunk_push(self, volume_id, container_id, content_id, chunk_id,
                   **data):
        """Reference a chunk in the reverse directory"""
        body = {
            'container_id': container_id,
            'content_id': content_id,
            'chunk_id': chunk_id
        }

        for key, value in data.iteritems():
            body[key] = value

        self._rdir_request(volume_id, 'POST', 'push', create=True, json=body)

    def chunk_delete(self, volume_id, container_id, content_id, chunk_id):
        """Unreference a chunk from the reverse directory"""
        body = {
            'container_id': container_id,
            'content_id': content_id,
            'chunk_id': chunk_id
        }

        self._rdir_request(volume_id, 'DELETE', 'delete', json=body)

    def chunk_fetch(self, volume, limit=100, rebuild=False, container_id=None):
        """
        Fetch the list of chunks belonging to the specified volume.

        :param volume: the volume to get chunks from
        :type volume: `str`
        :param limit: maximum number of results to return
        :type limit: `int`
        :param rebuild:
        :type rebuild: `bool`
        :keyword container_id: get only chunks belonging to
           the specified container
        :type container_id: `str`
        """
        req_body = {'limit': limit}
        if rebuild:
            req_body['rebuild'] = True
        if container_id:
            req_body['container_id'] = container_id

        while True:
            resp, resp_body = self._rdir_request(volume,
                                                 'POST',
                                                 'fetch',
                                                 json=req_body)
            if len(resp_body) == 0:
                break
            for (key, value) in resp_body:
                container, content, chunk = key.split('|')
                yield container, content, chunk, value
            req_body['start_after'] = key

    def admin_incident_set(self, volume, date):
        body = {'date': int(float(date))}
        self._rdir_request(volume, 'POST', 'admin/incident', json=body)

    def admin_incident_get(self, volume):
        resp, resp_body = self._rdir_request(volume, 'GET', 'admin/incident')
        return resp_body.get('date')

    def admin_lock(self, volume, who):
        body = {'who': who}

        self._rdir_request(volume, 'POST', 'admin/lock', json=body)

    def admin_unlock(self, volume):
        self._rdir_request(volume, 'POST', 'admin/unlock')

    def admin_show(self, volume):
        resp, resp_body = self._rdir_request(volume, 'GET', 'admin/show')
        return resp_body

    def admin_clear(self, volume, clear_all=False):
        body = {'all': clear_all}
        resp, resp_body = self._rdir_request(volume,
                                             'POST',
                                             'admin/clear',
                                             json=body)
        return resp_body

    def status(self, volume):
        resp, resp_body = self._rdir_request(volume, 'GET', 'status')
        return resp_body
Exemplo n.º 31
0
 def __init__(self, conf, **kwargs):
     super(RdirClient, self).__init__(conf, **kwargs)
     self.directory_client = DirectoryClient(conf, **kwargs)
Exemplo n.º 32
0
 def __init__(self, conf, **kwargs):
     super(RdirClient, self).__init__(conf, **kwargs)
     self.directory = DirectoryClient(conf, **kwargs)
     self._addr_cache = dict()