def __init__(self): super(NeutronRestProxyV2, self).__init__() LOG.info(_LI('NeutronRestProxy: Starting plugin. Version=%s'), version.version_string_with_vcs()) pl_config.register_config() self.evpool = eventlet.GreenPool(cfg.CONF.RESTPROXY.thread_pool_size) # Include the Big Switch Extensions path in the api_extensions neutron_extensions.append_api_extensions_path(extensions.__path__) self.add_meta_server_route = cfg.CONF.RESTPROXY.add_meta_server_route # init network ctrl connections self.servers = servermanager.ServerPool() self.servers.get_topo_function = self._get_all_data self.servers.get_topo_function_args = {'get_ports': True, 'get_floating_ips': True, 'get_routers': True, 'get_sgs': True} self.network_scheduler = importutils.import_object( cfg.CONF.network_scheduler_driver ) # setup rpc for security and DHCP agents self._setup_rpc() if cfg.CONF.RESTPROXY.sync_data: self._send_all_data_auto() self.add_periodic_dhcp_agent_status_check() LOG.debug("NeutronRestProxyV2: initialization done")
def prepare_devices_filter(self, device_ids): if not device_ids: return # use tap as a prefix because ml2 is hard-coded to expect that device_ids = [d.replace('qvo', 'tap') for d in device_ids] LOG.info(_LI("Preparing filters for devices %s"), device_ids) if self.use_enhanced_rpc: devices_info = self.plugin_rpc.security_group_info_for_devices( self.context, list(device_ids)) devices = devices_info['devices'] security_groups = devices_info['security_groups'] security_group_member_ips = devices_info['sg_member_ips'] else: devices = self.plugin_rpc.security_group_rules_for_devices( self.context, list(device_ids)) with self.firewall.defer_apply(): for device in devices.values(): # strip tap back off since prepare_port_filter will apply it device['device'] = device['device'].replace('tap', '') # fuel backport fix with conntrack, # which may not exist in other installers try: self.set_local_zone(device) except AttributeError: LOG.debug("set_local_zone is not defined.") self.firewall.prepare_port_filter(device) if self.use_enhanced_rpc: LOG.debug("Update security group information for ports %s", devices.keys()) self._update_security_group_info( security_groups, security_group_member_ips)
def rest_action(self, action, resource, data='', errstr='%s', ignore_codes=None, headers=None, timeout=False): """ Wrapper for rest_call that verifies success and raises a RemoteRestError on failure with a provided error string By default, 404 errors on DELETE calls are ignored because they already do not exist on the backend. """ ignore_codes = ignore_codes or [] headers = headers or {} if not ignore_codes and action == 'DELETE': ignore_codes = [404] resp = self.rest_call(action, resource, data, headers, ignore_codes, timeout) if self.server_failure(resp, ignore_codes): LOG.error(errstr, resp[2]) raise RemoteRestError(reason=resp[2], status=resp[0]) if resp[0] in ignore_codes: LOG.info( _LI("NeutronRestProxyV2: Received and ignored error " "code %(code)s on %(action)s action to resource " "%(resource)s"), { 'code': resp[2], 'action': action, 'resource': resource }) return resp
def get_capabilities(self): try: body = self.rest_call('GET', CAPABILITIES_PATH)[2] if body: self.capabilities = jsonutils.loads(body) except Exception: LOG.exception(_LE("Couldn't retrieve capabilities. " "Newer API calls won't be supported.")) LOG.info(_LI("The following capabilities were received " "for %(server)s: %(cap)s"), {'server': self.server, 'cap': self.capabilities}) return self.capabilities
def get_capabilities(self): try: body = self.rest_call('GET', CAPABILITIES_PATH)[2] if body: self.capabilities = jsonutils.loads(body) except Exception: LOG.exception( _LE("Couldn't retrieve capabilities. " "Newer API calls won't be supported.")) LOG.info( _LI("The following capabilities were received " "for %(server)s: %(cap)s"), { 'server': self.server, 'cap': self.capabilities }) return self.capabilities
def rest_action(self, action, resource, data='', errstr='%s', ignore_codes=None, headers=None, timeout=False): """ Wrapper for rest_call that verifies success and raises a RemoteRestError on failure with a provided error string By default, 404 errors on DELETE calls are ignored because they already do not exist on the backend. """ ignore_codes = ignore_codes or [] headers = headers or {} if not ignore_codes and action == 'DELETE': ignore_codes = [404] resp = self.rest_call(action, resource, data, headers, ignore_codes, timeout) if self.server_failure(resp, ignore_codes): LOG.error(errstr, resp[2]) raise RemoteRestError(reason=resp[2], status=resp[0]) if resp[0] in ignore_codes: LOG.info(_LI("NeutronRestProxyV2: Received and ignored error " "code %(code)s on %(action)s action to resource " "%(resource)s"), {'code': resp[2], 'action': action, 'resource': resource}) return resp
def read_for_update(self): # An optimistic locking strategy with a timeout to avoid using a # consistency hash while another server is using it. This will # not return until a lock is acquired either normally or by stealing # it after an individual ID holds it for greater than # MAX_LOCK_WAIT_TIME. lock_wait_start = None last_lock_owner = None while True: res = self._get_current_record() if not res: # no current entry. try to insert to grab lock if not self._insert_empty_hash_with_lock(): # A failed insert after missing current record means # a concurrent insert occured. Start process over to # find the new record. LOG.debug("Concurrent record inserted. Retrying.") time.sleep(0.25) continue # The empty hash was successfully inserted with our lock return '' current_lock_owner = self._get_lock_owner(res.hash) if not current_lock_owner: # no current lock. attempt to lock new = self.lock_marker + res.hash if not self._optimistic_update_hash_record(res, new): # someone else beat us to it. restart process to wait # for new lock ID to be removed LOG.debug( "Failed to acquire lock. Restarting lock wait. " "Previous hash: %(prev)s. Attempted update: %(new)s", { 'prev': res.hash, 'new': new }) time.sleep(0.25) continue # successfully got the lock return res.hash LOG.debug( "This request's lock ID is %(this)s. " "DB lock held by %(that)s", { 'this': self.random_lock_id, 'that': current_lock_owner }) if current_lock_owner == self.random_lock_id: # no change needed, we already have the table lock due to # previous read_for_update call. # return hash with lock tag stripped off for use in a header return res.hash.replace(self.lock_marker, '') if current_lock_owner != last_lock_owner: # The owner changed since the last iteration, but it # wasn't to us. Reset the counter. Log if not # first iteration. if lock_wait_start: LOG.debug( "Lock owner changed from %(old)s to %(new)s " "while waiting to acquire it.", { 'old': last_lock_owner, 'new': current_lock_owner }) lock_wait_start = time.time() last_lock_owner = current_lock_owner if time.time() - lock_wait_start > MAX_LOCK_WAIT_TIME: # the lock has been held too long, steal it LOG.warning( _LW("Gave up waiting for consistency DB " "lock, trying to take it. " "Current hash is: %s"), res.hash) new_db_value = res.hash.replace(current_lock_owner, self.random_lock_id) if self._optimistic_update_hash_record(res, new_db_value): return res.hash.replace(new_db_value, '') LOG.info( _LI("Failed to take lock. Another process updated " "the DB first."))
def read_for_update(self): # An optimistic locking strategy with a timeout to avoid using a # consistency hash while another server is using it. This will # not return until a lock is acquired either normally or by stealing # it after an individual ID holds it for greater than # MAX_LOCK_WAIT_TIME. lock_wait_start = None last_lock_owner = None while True: res = self._get_current_record() if not res: # no current entry. try to insert to grab lock if not self._insert_empty_hash_with_lock(): # A failed insert after missing current record means # a concurrent insert occured. Start process over to # find the new record. LOG.debug("Concurrent record inserted. Retrying.") time.sleep(0.25) continue # The empty hash was successfully inserted with our lock return '' current_lock_owner = self._get_lock_owner(res.hash) if not current_lock_owner: # no current lock. attempt to lock new = self.lock_marker + res.hash if not self._optimistic_update_hash_record(res, new): # someone else beat us to it. restart process to wait # for new lock ID to be removed LOG.debug( "Failed to acquire lock. Restarting lock wait. " "Previous hash: %(prev)s. Attempted update: %(new)s", {'prev': res.hash, 'new': new}) time.sleep(0.25) continue # successfully got the lock return res.hash LOG.debug("This request's lock ID is %(this)s. " "DB lock held by %(that)s", {'this': self.random_lock_id, 'that': current_lock_owner}) if current_lock_owner == self.random_lock_id: # no change needed, we already have the table lock due to # previous read_for_update call. # return hash with lock tag stripped off for use in a header return res.hash.replace(self.lock_marker, '') if current_lock_owner != last_lock_owner: # The owner changed since the last iteration, but it # wasn't to us. Reset the counter. Log if not # first iteration. if lock_wait_start: LOG.debug("Lock owner changed from %(old)s to %(new)s " "while waiting to acquire it.", {'old': last_lock_owner, 'new': current_lock_owner}) lock_wait_start = time.time() last_lock_owner = current_lock_owner if time.time() - lock_wait_start > MAX_LOCK_WAIT_TIME: # the lock has been held too long, steal it LOG.warning(_LW("Gave up waiting for consistency DB " "lock, trying to take it. " "Current hash is: %s"), res.hash) new_db_value = res.hash.replace(current_lock_owner, self.random_lock_id) if self._optimistic_update_hash_record(res, new_db_value): return res.hash.replace(new_db_value, '') LOG.info(_LI("Failed to take lock. Another process updated " "the DB first."))
def rest_call(self, action, resource, data, headers, ignore_codes, timeout=False): context = self.get_context_ref() if context: # include the requesting context information if available cdict = context.to_dict() # remove the auth token so it's not present in debug logs on the # backend controller cdict.pop('auth_token', None) headers[REQ_CONTEXT_HEADER] = jsonutils.dumps(cdict) hash_handler = cdb.HashHandler() good_first = sorted(self.servers, key=lambda x: x.failed) first_response = None for active_server in good_first: LOG.debug( "ServerProxy: %(action)s to servers: " "%(server)r, %(resource)s" % { 'action': action, 'server': (active_server.server, active_server.port), 'resource': resource }) for x in range(HTTP_SERVICE_UNAVAILABLE_RETRY_COUNT + 1): ret = active_server.rest_call(action, resource, data, headers, timeout, reconnect=self.always_reconnect, hash_handler=hash_handler) if ret[0] != httplib.SERVICE_UNAVAILABLE: break time.sleep(HTTP_SERVICE_UNAVAILABLE_RETRY_INTERVAL) # If inconsistent, do a full synchronization if ret[0] == httplib.CONFLICT: if not self.get_topo_function: raise cfg.Error( _('Server requires synchronization, ' 'but no topology function was defined.')) LOG.info( _LI("ServerProxy: HashConflict detected with request " "%(action)s %(resource)s Starting Topology sync"), { 'action': action, 'resource': resource }) self._topo_sync_in_progress = True eventlet.spawn_n(self.keep_updating_lock) try: data = self.get_topo_function( **self.get_topo_function_args) if data: data = self._sanitize_data_for_topo_sync(data) ret_ts = active_server.rest_call('POST', TOPOLOGY_PATH, data, timeout=None) if self.server_failure(ret_ts, ignore_codes): LOG.error(_LE("ServerProxy: Topology sync failed")) raise RemoteRestError(reason=ret_ts[2], status=ret_ts[0]) finally: LOG.info(_LI("ServerProxy: Topology sync completed")) self._topo_sync_in_progress = False if data is None: return None # Store the first response as the error to be bubbled up to the # user since it was a good server. Subsequent servers will most # likely be cluster slaves and won't have a useful error for the # user (e.g. 302 redirect to master) if not first_response: first_response = ret if not self.server_failure(ret, ignore_codes): active_server.failed = False LOG.debug( "ServerProxy: %(action)s succeed for servers: " "%(server)r Response: %(response)s" % { 'action': action, 'server': (active_server.server, active_server.port), 'response': ret[3] }) return ret else: LOG.warning( _LW('ServerProxy: %(action)s failure for servers:' '%(server)r Response: %(response)s'), { 'action': action, 'server': (active_server.server, active_server.port), 'response': ret[3] }) LOG.warning( _LW("ServerProxy: Error details: " "status=%(status)d, reason=%(reason)r, " "ret=%(ret)s, data=%(data)r"), { 'status': ret[0], 'reason': ret[1], 'ret': ret[2], 'data': ret[3] }) active_server.failed = True # A failure on a delete means the object is gone from Neutron but not # from the controller. Set the consistency hash to a bad value to # trigger a sync on the next check. # NOTE: The hash must have a comma in it otherwise it will be ignored # by the backend. if action == 'DELETE': hash_handler.put_hash('INCONSISTENT,INCONSISTENT') # All servers failed, reset server list and try again next time LOG.error( _LE('ServerProxy: %(action)s failure for all servers: ' '%(server)r'), { 'action': action, 'server': tuple((s.server, s.port) for s in self.servers) }) return first_response
def _bind_port_nfvswitch(self, context, segment, host_id): """Perform bind_port for nfvswitch. A NFV VM needs to be attached to a nfv-switch socket. So, during bind_port() we create a NFV VM endpoint on BCF, thereby reserving the socket for it's use. Then pass the sock_path in the set_binding() for Nova to plug the VM to the nfv-switch. @param context: PortContext object """ vif_type = portbindings.VIF_TYPE_VHOST_USER port = self._prepare_port_for_controller(context) if not port: LOG.warning( _LW("nfv-switch bind_port() skipped due to missing " "Host ID.")) return # Create an endpoint corresponding to the port on the Controller, # thereby asking the Controller to reserve a vhost_sock for it tenant_id = port["network"]["tenant_id"] network_id = port["network"]["id"] # Set vif_type to 'vhost_user' for the Controller to reserve vhost_sock port[portbindings.VIF_TYPE] = vif_type # Update host_id so that endpoint create will have the correct value port[portbindings.HOST_ID] = host_id try: self.async_port_create(tenant_id, network_id, port) except servermanager.RemoteRestError as e: with excutils.save_and_reraise_exception() as ctxt: if (cfg.CONF.RESTPROXY.auto_sync_on_failure and e.status == httplib.NOT_FOUND and servermanager.NXNETWORK in e.reason): ctxt.reraise = False LOG.error( _LE("Inconsistency with backend controller " "triggering full synchronization.")) self._send_all_data_auto(triggered_by_tenant=tenant_id) # Retrieve the vhost_socket reserved for the port(endpoint) by the # Controller and use it in set_binding() resp = self.servers.rest_get_port(tenant_id, network_id, port["id"]) if not resp or not isinstance(resp, list): LOG.warning(_LW("Controller failed to reserve a nfv-switch sock")) return vhost_sock = None attachment_point = resp[0].get('attachment-point') if attachment_point: vhost_sock = attachment_point.get('interface') if not vhost_sock: LOG.warning(_LW("Controller failed to reserve a nfv-switch sock")) return vhost_sock_path = self._get_vhost_user_sock_path(vhost_sock) LOG.info(_LI('nfv-switch VM %(port)s alloted sock_path %(sock)s'), { 'port': port['id'], 'sock': vhost_sock_path }) # Update vif_details with host_id. This way, for all BCF # communications, we we shall use it as HOST_ID (i.e. interface-group # on BCF) vif_details = { portbindings.CAP_PORT_FILTER: False, portbindings.VHOST_USER_MODE: portbindings.VHOST_USER_MODE_SERVER, portbindings.VHOST_USER_OVS_PLUG: False, portbindings.VHOST_USER_SOCKET: vhost_sock_path, VIF_DET_BSN_VSWITCH_HOST_ID: host_id } context.set_binding(segment[api.ID], vif_type, vif_details)