Пример #1
0
 def test_clear_lock_skip_after_steal(self):
     handler1 = consistency_db.HashHandler()
     handler1.read_for_update()  # lock the table
     handler2 = consistency_db.HashHandler()
     with mock.patch.object(consistency_db, 'MAX_LOCK_WAIT_TIME', new=0):
         handler2.read_for_update()
         before = self._get_hash_from_handler_db(handler1)
         # handler1 should not clear handler2's lock
         handler1.clear_lock()
         self.assertEqual(before, self._get_hash_from_handler_db(handler1))
Пример #2
0
 def test_failure_to_steal_lock(self):
     handler1 = consistency_db.HashHandler()
     handler1.read_for_update()  # lock the table
     handler2 = consistency_db.HashHandler()
     with contextlib.nested(
             mock.patch.object(consistency_db, 'MAX_LOCK_WAIT_TIME'),
             mock.patch.object(handler2,
                               '_optimistic_update_hash_record',
                               side_effect=[False,
                                            True])) as (mlock, oplock):
         # handler2 will go through 2 iterations since the lock will fail on
         # the first attempt
         mlock.__lt__.side_effect = [False, True, False, True]
         handler2.read_for_update()
         self.assertEqual(4, mlock.__lt__.call_count)
         self.assertEqual(2, oplock.call_count)
Пример #3
0
 def test_take_lock_from_other(self):
     handler1 = consistency_db.HashHandler()
     handler1.read_for_update()  # lock the table
     handler2 = consistency_db.HashHandler()
     with mock.patch.object(consistency_db, 'MAX_LOCK_WAIT_TIME') as mlock:
         # make handler2 wait for only one iteration
         mlock.__lt__.side_effect = [False, True]
         handler2.read_for_update()
         # once MAX LOCK exceeded, comparisons should stop due to lock steal
         self.assertEqual(2, mlock.__lt__.call_count)
         dbentry = self._get_hash_from_handler_db(handler1)
         # handler2 should have the lock
         self.assertIn(handler2.lock_marker, dbentry)
         self.assertNotIn(handler1.lock_marker, dbentry)
         # lock protection only blocks read_for_update, anyone can change
         handler1.put_hash('H1')
Пример #4
0
    def initialize(self):
        LOG.debug('Initializing driver')

        # register plugin config opts
        pl_config.register_config()
        self.evpool = eventlet.GreenPool(cfg.CONF.RESTPROXY.thread_pool_size)

        LOG.debug("Force topology sync if consistency hash is empty")
        hash_handler = cdb.HashHandler()
        cur_hash = hash_handler.read_for_update()
        if not cur_hash:
            hash_handler.put_hash('intial:hash,code')
            LOG.debug("Force topology sync Done")

        # init network ctrl connections
        self.servers = servermanager.ServerPool()
        self.servers.get_topo_function = self._get_all_data_auto
        self.segmentation_types = ', '.join(cfg.CONF.ml2.type_drivers)
        # Track hosts running IVS to avoid excessive calls to the backend
        self.vswitch_host_cache = {}
        self.setup_sg_rpc_callbacks()
        self.unsupported_vnic_types = [
            portbindings.VNIC_DIRECT, portbindings.VNIC_DIRECT_PHYSICAL
        ]

        LOG.debug("Initialization done")
Пример #5
0
 def test_clear_lock(self):
     handler = consistency_db.HashHandler()
     handler.put_hash('SOMEHASH')
     handler.read_for_update()  # lock the table
     self.assertEqual(handler.lock_marker + 'SOMEHASH',
                      self._get_hash_from_handler_db(handler))
     handler.clear_lock()
     self.assertEqual('SOMEHASH', self._get_hash_from_handler_db(handler))
Пример #6
0
 def test_handler_already_holding_lock(self):
     handler = consistency_db.HashHandler()
     handler.read_for_update()  # lock the table
     with mock.patch.object(handler._FACADE, 'get_engine') as ge:
         handler.read_for_update()
         # get engine should not have been called because no update
         # should have been made
         self.assertFalse(ge.called)
Пример #7
0
 def test_db_duplicate_on_insert(self):
     handler = consistency_db.HashHandler()
     with mock.patch.object(handler.session,
                            'add',
                            side_effect=[db_exc.DBDuplicateEntry,
                                         '']) as add_mock:
         handler.read_for_update()
         # duplicate insert failure should result in retry
         self.assertEqual(2, add_mock.call_count)
Пример #8
0
 def test_delete_failure_sets_bad_hash(self):
     pl = directory.get_plugin()
     hash_handler = consistency_db.HashHandler()
     with mock.patch(SERVERMANAGER + '.ServerProxy.rest_call',
                     return_value=(httplib.INTERNAL_SERVER_ERROR, 0, 0, 0)):
         # a failed delete call should put a bad hash in the DB
         pl.servers.rest_call('DELETE', '/', '', None, [])
         self.assertEqual('INCONSISTENT,INCONSISTENT',
                          hash_handler.read_for_update())
Пример #9
0
 def test_hash_handle_lock_no_initial_record(self):
     handler = consistency_db.HashHandler()
     h1 = handler.read_for_update()
     # return to caller should be empty even with lock in DB
     self.assertFalse(h1)
     # db should have a lock marker
     self.assertEqual(handler.lock_marker,
                      self._get_hash_from_handler_db(handler))
     # an entry should clear the lock
     handler.put_hash('DIGEST')
     self.assertEqual('DIGEST', self._get_hash_from_handler_db(handler))
Пример #10
0
    def test_hash_handle_lock_existing_record(self):
        handler = consistency_db.HashHandler()
        handler.put_hash('DIGEST')  # set initial hash

        h1 = handler.read_for_update()
        self.assertEqual('DIGEST', h1)
        self.assertEqual(handler.lock_marker + 'DIGEST',
                         self._get_hash_from_handler_db(handler))

        # make sure update works
        handler.put_hash('DIGEST2')
        self.assertEqual('DIGEST2', self._get_hash_from_handler_db(handler))
Пример #11
0
 def keep_updating_lock(self):
     topo_index = ''.join(
         random.choice(string.ascii_uppercase + string.digits)
         for _ in range(2))
     # topology sync will lock the consistency hash table
     # the lock starts with TOPO
     prefix = "TOPO" + topo_index
     while self._topo_sync_in_progress:
         handler = cdb.HashHandler(prefix=prefix, length=4)
         new = handler.lock_marker + "initial:hash,code"
         handler.put_hash(new)
         time.sleep(2)
Пример #12
0
 def test_update_hit_no_records(self):
     handler = consistency_db.HashHandler()
     # set initial hash so update will be required
     handler.put_hash('DIGEST')
     with mock.patch.object(handler._FACADE, 'get_engine') as ge:
         conn = ge.return_value.begin.return_value.__enter__.return_value
         firstresult = mock.Mock()
         # a rowcount of 0 simulates the effect of another db client
         # updating the same record the handler was trying to update
         firstresult.rowcount = 0
         secondresult = mock.Mock()
         secondresult.rowcount = 1
         conn.execute.side_effect = [firstresult, secondresult]
         handler.read_for_update()
         # update should have been called again after the failure
         self.assertEqual(2, conn.execute.call_count)
Пример #13
0
    def _update_tenant_cache(self, reconcile=True):
        try:
            auth = v3.Password(auth_url=self.auth_url,
                               username=self.auth_user,
                               password=self.auth_password,
                               project_name=self.auth_tenant,
                               user_domain_id=self.user_domain_id,
                               project_domain_id=self.project_domain_id)
            sess = session.Session(auth=auth)
            keystone_client = ksclient.Client(session=sess)
            tenants = keystone_client.projects.list()
            new_cached_tenants = {tn.id: tn.name for tn in tenants}
            # Add SERVICE_TENANT to handle hidden network for VRRP
            new_cached_tenants[SERVICE_TENANT] = SERVICE_TENANT

            LOG.debug("New TENANTS: %s \nPrevious Tenants %s" %
                      (new_cached_tenants, self.keystone_tenants))
            diff = DictDiffer(new_cached_tenants, self.keystone_tenants)
            self.keystone_tenants = new_cached_tenants
            if reconcile:
                for tenant_id in diff.added():
                    LOG.debug("TENANT create: id %s name %s" %
                              (tenant_id, self.keystone_tenants[tenant_id]))
                    self._rest_create_tenant(tenant_id)
                for tenant_id in diff.removed():
                    LOG.debug("TENANT delete: id %s" % tenant_id)
                    self.rest_delete_tenant(tenant_id)
                if diff.changed():
                    hash_handler = cdb.HashHandler()
                    res = hash_handler._get_current_record()
                    if res:
                        lock_owner = hash_handler._get_lock_owner(res.hash)
                        if lock_owner and "TOPO" in lock_owner:
                            # topology sync is still going on
                            return True
                    LOG.debug("TENANT changed: force topo sync")
                    hash_handler.put_hash('initial:hash,code')
            return True
        except Exception:
            LOG.exception(_LE("Encountered an error syncing with "
                              "keystone."))
            return False
Пример #14
0
    def rest_call(self,
                  action,
                  resource,
                  data,
                  headers,
                  ignore_codes,
                  timeout=False):
        context = self.get_context_ref()
        if context:
            # include the requesting context information if available
            cdict = context.to_dict()
            # remove the auth token so it's not present in debug logs on the
            # backend controller
            cdict.pop('auth_token', None)
            headers[REQ_CONTEXT_HEADER] = jsonutils.dumps(cdict)
        hash_handler = cdb.HashHandler()
        good_first = sorted(self.servers, key=lambda x: x.failed)
        first_response = None
        for active_server in good_first:
            LOG.debug(
                "ServerProxy: %(action)s to servers: "
                "%(server)r, %(resource)s" % {
                    'action': action,
                    'server': (active_server.server, active_server.port),
                    'resource': resource
                })
            for x in range(HTTP_SERVICE_UNAVAILABLE_RETRY_COUNT + 1):
                ret = active_server.rest_call(action,
                                              resource,
                                              data,
                                              headers,
                                              timeout,
                                              reconnect=self.always_reconnect,
                                              hash_handler=hash_handler)
                if ret[0] != httplib.SERVICE_UNAVAILABLE:
                    break
                time.sleep(HTTP_SERVICE_UNAVAILABLE_RETRY_INTERVAL)

            # If inconsistent, do a full synchronization
            if ret[0] == httplib.CONFLICT:
                if not self.get_topo_function:
                    raise cfg.Error(
                        _('Server requires synchronization, '
                          'but no topology function was defined.'))

                LOG.info(
                    _LI("ServerProxy: HashConflict detected with request "
                        "%(action)s %(resource)s Starting Topology sync"), {
                            'action': action,
                            'resource': resource
                        })
                self._topo_sync_in_progress = True
                eventlet.spawn_n(self.keep_updating_lock)
                try:
                    data = self.get_topo_function(
                        **self.get_topo_function_args)
                    if data:
                        data = self._sanitize_data_for_topo_sync(data)
                        ret_ts = active_server.rest_call('POST',
                                                         TOPOLOGY_PATH,
                                                         data,
                                                         timeout=None)
                        if self.server_failure(ret_ts, ignore_codes):
                            LOG.error(_LE("ServerProxy: Topology sync failed"))
                            raise RemoteRestError(reason=ret_ts[2],
                                                  status=ret_ts[0])
                finally:
                    LOG.info(_LI("ServerProxy: Topology sync completed"))
                    self._topo_sync_in_progress = False
                    if data is None:
                        return None

            # Store the first response as the error to be bubbled up to the
            # user since it was a good server. Subsequent servers will most
            # likely be cluster slaves and won't have a useful error for the
            # user (e.g. 302 redirect to master)
            if not first_response:
                first_response = ret
            if not self.server_failure(ret, ignore_codes):
                active_server.failed = False
                LOG.debug(
                    "ServerProxy: %(action)s succeed for servers: "
                    "%(server)r Response: %(response)s" % {
                        'action': action,
                        'server': (active_server.server, active_server.port),
                        'response': ret[3]
                    })
                return ret
            else:
                LOG.warning(
                    _LW('ServerProxy: %(action)s failure for servers:'
                        '%(server)r Response: %(response)s'), {
                            'action': action,
                            'server':
                            (active_server.server, active_server.port),
                            'response': ret[3]
                        })
                LOG.warning(
                    _LW("ServerProxy: Error details: "
                        "status=%(status)d, reason=%(reason)r, "
                        "ret=%(ret)s, data=%(data)r"), {
                            'status': ret[0],
                            'reason': ret[1],
                            'ret': ret[2],
                            'data': ret[3]
                        })
                active_server.failed = True

        # A failure on a delete means the object is gone from Neutron but not
        # from the controller. Set the consistency hash to a bad value to
        # trigger a sync on the next check.
        # NOTE: The hash must have a comma in it otherwise it will be ignored
        # by the backend.
        if action == 'DELETE':
            hash_handler.put_hash('INCONSISTENT,INCONSISTENT')
        # All servers failed, reset server list and try again next time
        LOG.error(
            _LE('ServerProxy: %(action)s failure for all servers: '
                '%(server)r'), {
                    'action': action,
                    'server': tuple((s.server, s.port) for s in self.servers)
                })
        return first_response
Пример #15
0
    def rest_call(self,
                  action,
                  resource,
                  data='',
                  headers=None,
                  timeout=False,
                  reconnect=False,
                  hash_handler=None):
        uri = self.base_uri + resource
        body = jsonutils.dumps(data)
        headers = headers or {}
        headers['Content-type'] = 'application/json'
        headers['Accept'] = 'application/json'
        headers['NeutronProxy-Agent'] = self.name
        headers['Instance-ID'] = self.neutron_id
        headers['Orchestration-Service-ID'] = ORCHESTRATION_SERVICE_ID
        if hash_handler:
            # this will be excluded on calls that don't need hashes
            # (e.g. topology sync, capability checks)
            headers[HASH_MATCH_HEADER] = hash_handler.read_for_update()
        else:
            hash_handler = cdb.HashHandler()
        # TODO(kevinbenton): Re-enable keep-alive in a thread-safe fashion.
        # When multiple workers are enabled the saved connection gets mangled
        # by multiple threads so we always reconnect.
        if 'keep-alive' in self.capabilities and False:
            headers['Connection'] = 'keep-alive'
        else:
            reconnect = True
        if self.auth:
            headers['Authorization'] = self.auth

        LOG.debug(
            "ServerProxy: server=%(server)s, port=%(port)d, "
            "ssl=%(ssl)r", {
                'server': self.server,
                'port': self.port,
                'ssl': self.ssl
            })
        LOG.debug(
            "ServerProxy: resource=%(resource)s, data=%(data)r, "
            "headers=%(headers)r, action=%(action)s", {
                'resource': resource,
                'data': data,
                'headers': headers,
                'action': action
            })

        # unspecified timeout is False because a timeout can be specified as
        # None to indicate no timeout.
        if timeout is False:
            timeout = self.timeout

        if timeout != self.timeout:
            # need a new connection if timeout has changed
            reconnect = True

        if not self.currentconn or reconnect:
            if self.currentconn:
                self.currentconn.close()
            if self.ssl:
                currentconn = HTTPSConnectionWithValidation(self.server,
                                                            self.port,
                                                            timeout=timeout)
                if currentconn is None:
                    LOG.error(
                        _LE('ServerProxy: Could not establish HTTPS '
                            'connection'))
                    return 0, None, None, None
                currentconn.combined_cert = self.combined_cert
            else:
                currentconn = httplib.HTTPConnection(self.server,
                                                     self.port,
                                                     timeout=timeout)
                if currentconn is None:
                    LOG.error(
                        _LE('ServerProxy: Could not establish HTTP '
                            'connection'))
                    return 0, None, None, None

        try:
            currentconn.request(action, uri, body, headers)
            response = currentconn.getresponse()
            respstr = response.read()
            respdata = respstr
            if response.status in self.success_codes:
                hash_value = response.getheader(HASH_MATCH_HEADER)
                # don't clear hash from DB if a hash header wasn't present
                if hash_value is not None:
                    # BVS-6979: race-condition(#1) set sync=false so that
                    # keep_updating_thread doesn't squash updated HASH
                    # Delay is required in-case the loop is already executing
                    if resource == TOPOLOGY_PATH:
                        self._topo_sync_in_progress = False
                        time.sleep(0.10)
                    hash_handler.put_hash(hash_value)
                else:
                    hash_handler.clear_lock()
                try:
                    respdata = jsonutils.loads(respstr)
                except ValueError:
                    # response was not JSON, ignore the exception
                    pass
            else:
                # BVS-6979: race-condition(#2) on HashConflict, don't unlock
                # to ensure topo_sync is scheduled next (it force grabs lock)
                if response.status != httplib.CONFLICT:
                    # release lock so others don't have to wait for timeout
                    hash_handler.clear_lock()

            ret = (response.status, response.reason, respstr, respdata)
        except httplib.HTTPException:
            # If we were using a cached connection, try again with a new one.
            with excutils.save_and_reraise_exception() as ctxt:
                currentconn.close()
                if reconnect:
                    # if reconnect is true, this was on a fresh connection so
                    # reraise since this server seems to be broken
                    ctxt.reraise = True
                else:
                    # if reconnect is false, it was a cached connection so
                    # try one more time before re-raising
                    ctxt.reraise = False
            return self.rest_call(action,
                                  resource,
                                  data,
                                  headers,
                                  timeout=timeout,
                                  reconnect=True)
        except (socket.timeout, socket.error) as e:
            currentconn.close()
            LOG.error(_LE('ServerProxy: %(action)s failure, %(e)r'), {
                'action': action,
                'e': e
            })
            ret = 0, None, None, None
        LOG.debug(
            "ServerProxy: status=%(status)d, reason=%(reason)r, "
            "ret=%(ret)s, data=%(data)r", {
                'status': ret[0],
                'reason': ret[1],
                'ret': ret[2],
                'data': ret[3]
            })
        return ret