def _validate_names(self, obj, name=None): """ :returns True, if obj name, obj's tenant name and name have supported chars False, otherwise """ if name and not servermanager.is_valid_bcf_name(name): LOG.warning(_LW('Unsupported characters in Name: %(name)s. '), {'name': name}) return False if (obj and 'name' in obj and not servermanager.is_valid_bcf_name(obj['name'])): LOG.warning(_LW('Unsupported characters in Name: %(name)s. ' 'Object details: %(obj)s'), {'name': obj['name'], 'obj': obj}) return False if (obj and 'tenant_name' in obj and not servermanager.is_valid_bcf_name(obj['tenant_name'])): LOG.warning(_LW('Unsupported characters in TenantName: %(tname)s. ' 'Object details: %(obj)s'), {'tname': obj['tenant_name'], 'obj': obj}) return False return True
def _log_unsupported_name(**kwargs): """ LOG an error message for objects not synced due to unsupported char in name. Unsupported char is anything that doesn't match BCF_IDENTIFIER_RE regular expression """ obj_type = kwargs.pop('obj_type', None) if obj_type == ObjTypeEnum.tenant: obj_id = kwargs.pop('obj_id', 'obj_id not found') obj_name = kwargs.pop('obj_name', 'obj_name not found') LOG.warning( _LW('TOPO_SYNC_UNSUPPORTED_CHAR: Tenant with id ' '%(obj_id)s has unsupported char in name: ' '%(obj_name)s. Tenant will not be synced to ' 'the controller.'), { 'obj_id': obj_id, 'obj_name': obj_name }) else: obj = kwargs.pop('obj', None) LOG.warning( _LW('TOPO_SYNC_UNSUPPORTED_CHAR: %(obj_type)s ' 'under tenant %(tenant_name)s has unsupported' ' char in name: %(obj_name)s. %(obj_type)s ' '\"%(obj_name)s\" will not be synced to the ' 'controller.'), { 'obj_type': obj_type, 'tenant_name': obj['tenant_name'], 'obj_name': obj['name'] })
def _warn_on_state_status(self, resource): if resource.get('admin_state_up', True) is False: LOG.warning(_LW("Setting admin_state_up=False is not supported " "in this plugin version. Ignoring setting for " "resource: %s"), resource) if 'status' in resource: if resource['status'] != const.NET_STATUS_ACTIVE: LOG.warning(_LW("Operational status is internally set by the " "plugin. Ignoring setting status=%s."), resource['status'])
def _extend_port_dict_binding(self, context, port): cfg_vif_type = cfg.CONF.NOVA.vif_type.lower() if cfg_vif_type not in (portbindings.VIF_TYPE_OVS, pl_config.VIF_TYPE_IVS): LOG.warning(_LW("Unrecognized vif_type in configuration " "[%s]. Defaulting to ovs."), cfg_vif_type) cfg_vif_type = portbindings.VIF_TYPE_OVS # In ML2, the host_id is already populated if portbindings.HOST_ID in port: hostid = port[portbindings.HOST_ID] elif 'id' in port: hostid = porttracker_db.get_port_hostid(context, port['id']) else: hostid = None if hostid: port[portbindings.HOST_ID] = hostid override = self._check_hostvif_override(hostid) if override: cfg_vif_type = override port[portbindings.VIF_TYPE] = cfg_vif_type sg_enabled = sg_rpc.is_firewall_enabled() port[portbindings.VIF_DETAILS] = { # TODO(rkukura): Replace with new VIF security details portbindings.CAP_PORT_FILTER: 'security-group' in self.supported_extension_aliases, portbindings.OVS_HYBRID_PLUG: sg_enabled } return port
def _fetch_and_store_cert(self, server, port, path): ''' Grabs a certificate from a server and writes it to a given path. ''' try: cert = ssl.get_server_certificate((server, port), ssl_version=ssl.PROTOCOL_TLSv1) except Exception as e: raise cfg.Error( _('Could not retrieve initial ' 'certificate from controller %(server)s. ' 'Error details: %(error)s') % { 'server': server, 'error': e }) LOG.warning( _LW("Storing to certificate for host %(server)s " "at %(path)s"), { 'server': server, 'path': path }) self._file_put_contents(path, cert) return cert
def _prepare_port_for_controller(self, context): # make a copy so the context isn't changed for other drivers port = copy.deepcopy(context.current) net = context.network.current port['network'] = net port['bound_segment'] = context.top_bound_segment prepped_port = self._map_tenant_name(port) prepped_port = self._map_state_and_status(prepped_port) if (portbindings.HOST_ID not in prepped_port or prepped_port[portbindings.HOST_ID] == ''): LOG.warning( _LW("Ignoring port notification to controller because " "of missing host ID.")) # in ML2, controller doesn't care about ports without # the host_id set return False # Update HOST_ID if vif_details has VIF_DET_BSN_VSWITCH_HOST_ID vif_details = prepped_port[portbindings.VIF_DETAILS] if vif_details: host_id = vif_details.get(VIF_DET_BSN_VSWITCH_HOST_ID) if host_id: prepped_port[portbindings.HOST_ID] = host_id return prepped_port
def clear_lock(self): LOG.debug("Clearing hash record lock of id %s", self.random_lock_id) with self.session.begin(subtransactions=True): res = (self.session.query(ConsistencyHash). filter_by(hash_id=self.hash_id).first()) if not res: LOG.warning(_LW("Hash record already gone, no lock to clear.")) return else: self.session.refresh(res) # get the latest res from db if not res.hash.startswith(self.lock_marker): # if these are frequent the server is too slow LOG.warning(_LW("Another server already removed the lock. %s"), res.hash) return res.hash = res.hash.replace(self.lock_marker, '')
def clear_lock(self): LOG.debug("Clearing hash record lock of id %s", self.random_lock_id) with self.session.begin(subtransactions=True): res = (self.session.query(ConsistencyHash).filter_by( hash_id=self.hash_id).first()) if not res: LOG.warning(_LW("Hash record already gone, no lock to clear.")) return else: self.session.refresh(res) # get the latest res from db if not res.hash.startswith(self.lock_marker): # if these are frequent the server is too slow LOG.warning(_LW("Another server already removed the lock. %s"), res.hash) return res.hash = res.hash.replace(self.lock_marker, '')
def put_port_hostid(context, port_id, host): # REVISIT(kevinbenton): this is a workaround to avoid portbindings_db # relational table generation until one of the functions is called. from neutron.db.models import portbinding if not validators.is_attr_set(host): LOG.warning(_LW("No host_id in port request to track port location.")) return if port_id == '': LOG.warning(_LW("Received an empty port ID for host_id '%s'"), host) return if host == '': LOG.debug("Received an empty host_id for port '%s'", port_id) return LOG.debug("Logging port %(port)s on host_id %(host)s", {'port': port_id, 'host': host}) with context.session.begin(subtransactions=True): location = portbinding.PortBindingPort(port_id=port_id, host=host) context.session.merge(location)
def put_port_hostid(context, port_id, host): # REVISIT(kevinbenton): this is a workaround to avoid portbindings_db # relational table generation until one of the functions is called. from neutron.db import portbindings_db if not attributes.is_attr_set(host): LOG.warning(_LW("No host_id in port request to track port location.")) return if port_id == '': LOG.warning(_LW("Received an empty port ID for host_id '%s'"), host) return if host == '': LOG.debug("Received an empty host_id for port '%s'", port_id) return LOG.debug("Logging port %(port)s on host_id %(host)s", {'port': port_id, 'host': host}) with context.session.begin(subtransactions=True): location = portbindings_db.PortBindingPort(port_id=port_id, host=host) context.session.merge(location)
def _consistency_watchdog(self, polling_interval=60): if 'consistency' not in self.get_capabilities(): LOG.warning(_LW("Backend server(s) do not support automated " "consitency checks.")) return if not polling_interval: LOG.warning(_LW("Consistency watchdog disabled by polling " "interval setting of %s."), polling_interval) return while True: # If consistency is supported, all we have to do is make any # rest call and the consistency header will be added. If it # doesn't match, the backend will return a synchronization error # that will be handled by the rest_action. eventlet.sleep(polling_interval) try: self.rest_action('GET', HEALTH_PATH) except Exception: LOG.exception(_LE("Encountered an error checking controller " "health."))
def rest_create_port(self, tenant_id, net_id, port): resource = ATTACHMENT_PATH % (tenant_id, net_id, port["id"]) data = {"port": port} device_id = port.get("device_id") if not port["mac_address"] or not device_id: # controller only cares about ports attached to devices LOG.warning( _LW("No device MAC attached to port %s. " "Skipping notification to controller."), port["id"]) return data["attachment"] = {"id": device_id, "mac": port["mac_address"]} errstr = _("Unable to create remote port: %s") self.rest_action('PUT', resource, data, errstr)
def rest_create_port(self, tenant_id, net_id, port): resource = ATTACHMENT_PATH % (tenant_id, net_id, port["id"]) data = {"port": port} device_id = port.get("device_id") if not port["mac_address"] or not device_id: # controller only cares about ports attached to devices LOG.warning(_LW("No device MAC attached to port %s. " "Skipping notification to controller."), port["id"]) return data["attachment"] = {"id": device_id, "mac": port["mac_address"]} errstr = _("Unable to create remote port: %s") self.rest_action('PUT', resource, data, errstr)
def _consistency_watchdog(self, polling_interval=60): if 'consistency' not in self.get_capabilities(): LOG.warning( _LW("Backend server(s) do not support automated " "consitency checks.")) return if not polling_interval: LOG.warning( _LW("Consistency watchdog disabled by polling " "interval setting of %s."), polling_interval) return while True: # If consistency is supported, all we have to do is make any # rest call and the consistency header will be added. If it # doesn't match, the backend will return a synchronization error # that will be handled by the rest_action. eventlet.sleep(polling_interval) try: self.rest_action('GET', HEALTH_PATH) except Exception: LOG.exception( _LE("Encountered an error checking controller " "health."))
def bsn_create_security_group(self, sg_id=None, sg=None, context=None): if sg_id: # overwrite sg if both sg and sg_id are given sg = self.get_security_group(context, sg_id) if sg: self._tenant_check_for_security_group(sg) # skip the security group if its tenant is unknown if sg['tenant_name']: if sg['tenant_name'] == servermanager.SERVICE_TENANT: self.bsn_create_tenant(servermanager.SERVICE_TENANT, context=context) self.servers.rest_create_securitygroup(sg) else: LOG.warning(_LW("No scurity group is provided for creation."))
def _prepare_port_for_controller(self, context): # make a copy so the context isn't changed for other drivers port = copy.deepcopy(context.current) net = context.network.current port['network'] = net port['bound_segment'] = context.top_bound_segment prepped_port = self._map_tenant_name(port) prepped_port = self._map_state_and_status(prepped_port) if (portbindings.HOST_ID not in prepped_port or prepped_port[portbindings.HOST_ID] == ''): LOG.warning(_LW("Ignoring port notification to controller because " "of missing host ID.")) # in ML2, controller doesn't care about ports without # the host_id set return False return prepped_port
def _fetch_and_store_cert(self, server, port, path): ''' Grabs a certificate from a server and writes it to a given path. ''' try: cert = ssl.get_server_certificate((server, port), ssl_version=ssl.PROTOCOL_TLSv1) except Exception as e: raise cfg.Error(_('Could not retrieve initial ' 'certificate from controller %(server)s. ' 'Error details: %(error)s') % {'server': server, 'error': e}) LOG.warning(_LW("Storing to certificate for host %(server)s " "at %(path)s"), {'server': server, 'path': path}) self._file_put_contents(path, cert) return cert
def read_for_update(self): # An optimistic locking strategy with a timeout to avoid using a # consistency hash while another server is using it. This will # not return until a lock is acquired either normally or by stealing # it after an individual ID holds it for greater than # MAX_LOCK_WAIT_TIME. lock_wait_start = None last_lock_owner = None while True: res = self._get_current_record() if not res: # no current entry. try to insert to grab lock if not self._insert_empty_hash_with_lock(): # A failed insert after missing current record means # a concurrent insert occured. Start process over to # find the new record. LOG.debug("Concurrent record inserted. Retrying.") time.sleep(0.25) continue # The empty hash was successfully inserted with our lock return '' current_lock_owner = self._get_lock_owner(res.hash) if not current_lock_owner: # no current lock. attempt to lock new = self.lock_marker + res.hash if not self._optimistic_update_hash_record(res, new): # someone else beat us to it. restart process to wait # for new lock ID to be removed LOG.debug( "Failed to acquire lock. Restarting lock wait. " "Previous hash: %(prev)s. Attempted update: %(new)s", { 'prev': res.hash, 'new': new }) time.sleep(0.25) continue # successfully got the lock return res.hash LOG.debug( "This request's lock ID is %(this)s. " "DB lock held by %(that)s", { 'this': self.random_lock_id, 'that': current_lock_owner }) if current_lock_owner == self.random_lock_id: # no change needed, we already have the table lock due to # previous read_for_update call. # return hash with lock tag stripped off for use in a header return res.hash.replace(self.lock_marker, '') if current_lock_owner != last_lock_owner: # The owner changed since the last iteration, but it # wasn't to us. Reset the counter. Log if not # first iteration. if lock_wait_start: LOG.debug( "Lock owner changed from %(old)s to %(new)s " "while waiting to acquire it.", { 'old': last_lock_owner, 'new': current_lock_owner }) lock_wait_start = time.time() last_lock_owner = current_lock_owner if time.time() - lock_wait_start > MAX_LOCK_WAIT_TIME: # the lock has been held too long, steal it LOG.warning( _LW("Gave up waiting for consistency DB " "lock, trying to take it. " "Current hash is: %s"), res.hash) new_db_value = res.hash.replace(current_lock_owner, self.random_lock_id) if self._optimistic_update_hash_record(res, new_db_value): return res.hash.replace(new_db_value, '') LOG.info( _LI("Failed to take lock. Another process updated " "the DB first."))
def _bind_port_nfvswitch(self, context): """Perform bind_port for nfvswitch. A NFV VM needs to be attached to a nfv-switch socket. So, during bind_port() we create a NFV VM endpoint on BCF, thereby reserving the socket for it's use. Then pass the sock_path in the set_binding() for Nova to plug the VM to the nfv-switch. @param context: PortContext object """ vif_type = portbindings.VIF_TYPE_VHOST_USER port = self._prepare_port_for_controller(context) if not port: LOG.warning(_LW("nfv-switch bind_port() skipped due to missing " "Host ID.")) return # Create an endpoint corresponding to the port on the Controller, # thereby asking the Controller to reserve a vhost_sock for it tenant_id = port["network"]["tenant_id"] network_id = port["network"]["id"] # Set vif_type to 'vhost_user' for the Controller to reserve vhost_sock port[portbindings.VIF_TYPE] = vif_type try: self.async_port_create(tenant_id, network_id, port) except servermanager.RemoteRestError as e: with excutils.save_and_reraise_exception() as ctxt: if (cfg.CONF.RESTPROXY.auto_sync_on_failure and e.status == httplib.NOT_FOUND and servermanager.NXNETWORK in e.reason): ctxt.reraise = False LOG.error(_LE("Inconsistency with backend controller " "triggering full synchronization.")) self._send_all_data_auto(triggered_by_tenant=tenant_id) LOG.debug('Successfully created endpoint for nfv-switch VM %s' % port['id']) # Retrieve the vhost_socket reserved for the port(endpoint) by the # Controller and use it in set_binding() resp = self.servers.rest_get_port(tenant_id, network_id, port["id"]) if not resp or not isinstance(resp, list): LOG.warning(_LW("Controller failed to reserve a nfv-switch sock")) return vhost_sock = None attachment_point = resp[0].get('attachment-point') if attachment_point: vhost_sock = attachment_point.get('interface') if not vhost_sock: LOG.warning(_LW("Controller failed to reserve a nfv-switch sock")) return vhost_sock_path = self._get_vhost_user_sock_path(vhost_sock) LOG.debug('nfv-switch VM alloted sock_path %s' % vhost_sock_path) vif_details = { portbindings.CAP_PORT_FILTER: False, portbindings.VHOST_USER_MODE: portbindings.VHOST_USER_MODE_SERVER, portbindings.VHOST_USER_OVS_PLUG: False, portbindings.VHOST_USER_SOCKET: vhost_sock_path } for segment in context.segments_to_bind: if segment[api.NETWORK_TYPE] == pconst.TYPE_VLAN: context.set_binding(segment[api.ID], vif_type, vif_details)
def read_for_update(self): # An optimistic locking strategy with a timeout to avoid using a # consistency hash while another server is using it. This will # not return until a lock is acquired either normally or by stealing # it after an individual ID holds it for greater than # MAX_LOCK_WAIT_TIME. lock_wait_start = None last_lock_owner = None while True: res = self._get_current_record() if not res: # no current entry. try to insert to grab lock if not self._insert_empty_hash_with_lock(): # A failed insert after missing current record means # a concurrent insert occured. Start process over to # find the new record. LOG.debug("Concurrent record inserted. Retrying.") time.sleep(0.25) continue # The empty hash was successfully inserted with our lock return '' current_lock_owner = self._get_lock_owner(res.hash) if not current_lock_owner: # no current lock. attempt to lock new = self.lock_marker + res.hash if not self._optimistic_update_hash_record(res, new): # someone else beat us to it. restart process to wait # for new lock ID to be removed LOG.debug( "Failed to acquire lock. Restarting lock wait. " "Previous hash: %(prev)s. Attempted update: %(new)s", {'prev': res.hash, 'new': new}) time.sleep(0.25) continue # successfully got the lock return res.hash LOG.debug("This request's lock ID is %(this)s. " "DB lock held by %(that)s", {'this': self.random_lock_id, 'that': current_lock_owner}) if current_lock_owner == self.random_lock_id: # no change needed, we already have the table lock due to # previous read_for_update call. # return hash with lock tag stripped off for use in a header return res.hash.replace(self.lock_marker, '') if current_lock_owner != last_lock_owner: # The owner changed since the last iteration, but it # wasn't to us. Reset the counter. Log if not # first iteration. if lock_wait_start: LOG.debug("Lock owner changed from %(old)s to %(new)s " "while waiting to acquire it.", {'old': last_lock_owner, 'new': current_lock_owner}) lock_wait_start = time.time() last_lock_owner = current_lock_owner if time.time() - lock_wait_start > MAX_LOCK_WAIT_TIME: # the lock has been held too long, steal it LOG.warning(_LW("Gave up waiting for consistency DB " "lock, trying to take it. " "Current hash is: %s"), res.hash) new_db_value = res.hash.replace(current_lock_owner, self.random_lock_id) if self._optimistic_update_hash_record(res, new_db_value): return res.hash.replace(new_db_value, '') LOG.info(_LI("Failed to take lock. Another process updated " "the DB first."))
def _bind_port_nfvswitch(self, context, segment, host_id): """Perform bind_port for nfvswitch. A NFV VM needs to be attached to a nfv-switch socket. So, during bind_port() we create a NFV VM endpoint on BCF, thereby reserving the socket for it's use. Then pass the sock_path in the set_binding() for Nova to plug the VM to the nfv-switch. @param context: PortContext object """ vif_type = portbindings.VIF_TYPE_VHOST_USER port = self._prepare_port_for_controller(context) if not port: LOG.warning( _LW("nfv-switch bind_port() skipped due to missing " "Host ID.")) return # Create an endpoint corresponding to the port on the Controller, # thereby asking the Controller to reserve a vhost_sock for it tenant_id = port["network"]["tenant_id"] network_id = port["network"]["id"] # Set vif_type to 'vhost_user' for the Controller to reserve vhost_sock port[portbindings.VIF_TYPE] = vif_type # Update host_id so that endpoint create will have the correct value port[portbindings.HOST_ID] = host_id try: self.async_port_create(tenant_id, network_id, port) except servermanager.RemoteRestError as e: with excutils.save_and_reraise_exception() as ctxt: if (cfg.CONF.RESTPROXY.auto_sync_on_failure and e.status == httplib.NOT_FOUND and servermanager.NXNETWORK in e.reason): ctxt.reraise = False LOG.error( _LE("Inconsistency with backend controller " "triggering full synchronization.")) self._send_all_data_auto(triggered_by_tenant=tenant_id) # Retrieve the vhost_socket reserved for the port(endpoint) by the # Controller and use it in set_binding() resp = self.servers.rest_get_port(tenant_id, network_id, port["id"]) if not resp or not isinstance(resp, list): LOG.warning(_LW("Controller failed to reserve a nfv-switch sock")) return vhost_sock = None attachment_point = resp[0].get('attachment-point') if attachment_point: vhost_sock = attachment_point.get('interface') if not vhost_sock: LOG.warning(_LW("Controller failed to reserve a nfv-switch sock")) return vhost_sock_path = self._get_vhost_user_sock_path(vhost_sock) LOG.info(_LI('nfv-switch VM %(port)s alloted sock_path %(sock)s'), { 'port': port['id'], 'sock': vhost_sock_path }) # Update vif_details with host_id. This way, for all BCF # communications, we we shall use it as HOST_ID (i.e. interface-group # on BCF) vif_details = { portbindings.CAP_PORT_FILTER: False, portbindings.VHOST_USER_MODE: portbindings.VHOST_USER_MODE_SERVER, portbindings.VHOST_USER_OVS_PLUG: False, portbindings.VHOST_USER_SOCKET: vhost_sock_path, VIF_DET_BSN_VSWITCH_HOST_ID: host_id } context.set_binding(segment[api.ID], vif_type, vif_details)
def rest_call(self, action, resource, data, headers, ignore_codes, timeout=False): context = self.get_context_ref() if context: # include the requesting context information if available cdict = context.to_dict() # remove the auth token so it's not present in debug logs on the # backend controller cdict.pop('auth_token', None) headers[REQ_CONTEXT_HEADER] = jsonutils.dumps(cdict) hash_handler = cdb.HashHandler() good_first = sorted(self.servers, key=lambda x: x.failed) first_response = None for active_server in good_first: LOG.debug("ServerProxy: %(action)s to servers: " "%(server)r, %(resource)s" % {'action': action, 'server': (active_server.server, active_server.port), 'resource': resource}) for x in range(HTTP_SERVICE_UNAVAILABLE_RETRY_COUNT + 1): ret = active_server.rest_call(action, resource, data, headers, timeout, reconnect=self.always_reconnect, hash_handler=hash_handler) if ret[0] != httplib.SERVICE_UNAVAILABLE: break time.sleep(HTTP_SERVICE_UNAVAILABLE_RETRY_INTERVAL) # If inconsistent, do a full synchronization if ret[0] == httplib.CONFLICT: if not self.get_topo_function: raise cfg.Error(_('Server requires synchronization, ' 'but no topology function was defined.')) self._topo_sync_in_progress = True eventlet.spawn_n(self.keep_updating_lock) try: data = self.get_topo_function( **self.get_topo_function_args) if data: active_server.rest_call('POST', TOPOLOGY_PATH, data, timeout=None) finally: self._topo_sync_in_progress = False if data is None: return None # Store the first response as the error to be bubbled up to the # user since it was a good server. Subsequent servers will most # likely be cluster slaves and won't have a useful error for the # user (e.g. 302 redirect to master) if not first_response: first_response = ret if not self.server_failure(ret, ignore_codes): active_server.failed = False LOG.debug("ServerProxy: %(action)s succeed for servers: " "%(server)r Response: %(response)s" % {'action': action, 'server': (active_server.server, active_server.port), 'response': ret[3]}) return ret else: LOG.warning(_LW('ServerProxy: %(action)s failure for servers:' '%(server)r Response: %(response)s'), {'action': action, 'server': (active_server.server, active_server.port), 'response': ret[3]}) LOG.warning(_LW("ServerProxy: Error details: " "status=%(status)d, reason=%(reason)r, " "ret=%(ret)s, data=%(data)r"), {'status': ret[0], 'reason': ret[1], 'ret': ret[2], 'data': ret[3]}) active_server.failed = True # A failure on a delete means the object is gone from Neutron but not # from the controller. Set the consistency hash to a bad value to # trigger a sync on the next check. # NOTE: The hash must have a comma in it otherwise it will be ignored # by the backend. if action == 'DELETE': hash_handler.put_hash('INCONSISTENT,INCONSISTENT') # All servers failed, reset server list and try again next time LOG.error(_LE('ServerProxy: %(action)s failure for all servers: ' '%(server)r'), {'action': action, 'server': tuple((s.server, s.port) for s in self.servers)}) return first_response
def rest_call(self, action, resource, data, headers, ignore_codes, timeout=False): context = self.get_context_ref() if context: # include the requesting context information if available cdict = context.to_dict() # remove the auth token so it's not present in debug logs on the # backend controller cdict.pop('auth_token', None) headers[REQ_CONTEXT_HEADER] = jsonutils.dumps(cdict) hash_handler = cdb.HashHandler() good_first = sorted(self.servers, key=lambda x: x.failed) first_response = None for active_server in good_first: LOG.debug( "ServerProxy: %(action)s to servers: " "%(server)r, %(resource)s" % { 'action': action, 'server': (active_server.server, active_server.port), 'resource': resource }) for x in range(HTTP_SERVICE_UNAVAILABLE_RETRY_COUNT + 1): ret = active_server.rest_call(action, resource, data, headers, timeout, reconnect=self.always_reconnect, hash_handler=hash_handler) if ret[0] != httplib.SERVICE_UNAVAILABLE: break time.sleep(HTTP_SERVICE_UNAVAILABLE_RETRY_INTERVAL) # If inconsistent, do a full synchronization if ret[0] == httplib.CONFLICT: if not self.get_topo_function: raise cfg.Error( _('Server requires synchronization, ' 'but no topology function was defined.')) LOG.info( _LI("ServerProxy: HashConflict detected with request " "%(action)s %(resource)s Starting Topology sync"), { 'action': action, 'resource': resource }) self._topo_sync_in_progress = True eventlet.spawn_n(self.keep_updating_lock) try: data = self.get_topo_function( **self.get_topo_function_args) if data: data = self._sanitize_data_for_topo_sync(data) ret_ts = active_server.rest_call('POST', TOPOLOGY_PATH, data, timeout=None) if self.server_failure(ret_ts, ignore_codes): LOG.error(_LE("ServerProxy: Topology sync failed")) raise RemoteRestError(reason=ret_ts[2], status=ret_ts[0]) finally: LOG.info(_LI("ServerProxy: Topology sync completed")) self._topo_sync_in_progress = False if data is None: return None # Store the first response as the error to be bubbled up to the # user since it was a good server. Subsequent servers will most # likely be cluster slaves and won't have a useful error for the # user (e.g. 302 redirect to master) if not first_response: first_response = ret if not self.server_failure(ret, ignore_codes): active_server.failed = False LOG.debug( "ServerProxy: %(action)s succeed for servers: " "%(server)r Response: %(response)s" % { 'action': action, 'server': (active_server.server, active_server.port), 'response': ret[3] }) return ret else: LOG.warning( _LW('ServerProxy: %(action)s failure for servers:' '%(server)r Response: %(response)s'), { 'action': action, 'server': (active_server.server, active_server.port), 'response': ret[3] }) LOG.warning( _LW("ServerProxy: Error details: " "status=%(status)d, reason=%(reason)r, " "ret=%(ret)s, data=%(data)r"), { 'status': ret[0], 'reason': ret[1], 'ret': ret[2], 'data': ret[3] }) active_server.failed = True # A failure on a delete means the object is gone from Neutron but not # from the controller. Set the consistency hash to a bad value to # trigger a sync on the next check. # NOTE: The hash must have a comma in it otherwise it will be ignored # by the backend. if action == 'DELETE': hash_handler.put_hash('INCONSISTENT,INCONSISTENT') # All servers failed, reset server list and try again next time LOG.error( _LE('ServerProxy: %(action)s failure for all servers: ' '%(server)r'), { 'action': action, 'server': tuple((s.server, s.port) for s in self.servers) }) return first_response