def _reconnect(self): """Reconnect with jittered exponential backoff increase.""" LOG.info(_LI('Reconnecting to coordination backend.')) cap = cfg.CONF.coordination.max_reconnect_backoff backoff = base = cfg.CONF.coordination.initial_reconnect_backoff for attempt in itertools.count(1): try: self._start() break except coordination.ToozError: backoff = min(cap, random.uniform(base, backoff * 3)) msg = _LW('Reconnect attempt %(attempt)s failed. ' 'Next try in %(backoff).2fs.') LOG.warning(msg, {'attempt': attempt, 'backoff': backoff}) self._dead.wait(backoff) LOG.info(_LI('Reconnected to coordination backend.'))
def _error(self, inner, req): if not isinstance(inner, exception.QuotaError): LOG.exception(_LE("Caught error: %(type)s %(error)s"), { 'type': type(inner), 'error': inner }) safe = getattr(inner, 'safe', False) headers = getattr(inner, 'headers', None) status = getattr(inner, 'code', 500) if status is None: status = 500 msg_dict = dict(url=req.url, status=status) LOG.info(_LI("%(url)s returned with HTTP %(status)d"), msg_dict) outer = self.status_to_type(status) if headers: outer.headers = headers # NOTE(johannes): We leave the explanation empty here on # purpose. It could possibly have sensitive information # that should not be returned back to the user. See # bugs 868360 and 874472 # NOTE(eglynn): However, it would be over-conservative and # inconsistent with the EC2 API to hide every exception, # including those that are safe to expose, see bug 1021373 if safe: msg = (inner.msg if isinstance(inner, exception.WaterfallException) else six.text_type(inner)) params = { 'exception': inner.__class__.__name__, 'explanation': msg } outer.explanation = _('%(exception)s: %(explanation)s') % params return wsgi.Fault(outer)
def reset(self): """Method executed when SIGHUP is caught by the process. We're utilizing it to reset RPC API version pins to avoid restart of the service when rolling upgrade is completed. """ LOG.info(_LI('Resetting cached RPC version pins.')) rpc.LAST_OBJ_VERSIONS = {} rpc.LAST_RPC_VERSIONS = {}
def register(self, ext): # Do nothing if the extension doesn't check out if not self._check_extension(ext): return alias = ext.alias LOG.info(_LI('Loaded extension: %s'), alias) if alias in self.extensions: raise exception.Error("Found duplicate extension: %s" % alias) self.extensions[alias] = ext
def __init__(self, ip, port, conn_timeout, login, password=None, privatekey=None, *args, **kwargs): self.ip = ip self.port = port self.login = login self.password = password self.conn_timeout = conn_timeout if conn_timeout else None self.privatekey = privatekey self.hosts_key_file = None # Validate good config setting here. # Paramiko handles the case where the file is inaccessible. if not CONF.ssh_hosts_key_file: raise exception.ParameterNotFound(param='ssh_hosts_key_file') elif not os.path.isfile(CONF.ssh_hosts_key_file): # If using the default path, just create the file. if CONF.state_path in CONF.ssh_hosts_key_file: open(CONF.ssh_hosts_key_file, 'a').close() else: msg = (_("Unable to find ssh_hosts_key_file: %s") % CONF.ssh_hosts_key_file) raise exception.InvalidInput(reason=msg) if 'hosts_key_file' in kwargs.keys(): self.hosts_key_file = kwargs.pop('hosts_key_file') LOG.info( _LI("Secondary ssh hosts key file %(kwargs)s will be " "loaded along with %(conf)s from /etc/waterfall.conf."), { 'kwargs': self.hosts_key_file, 'conf': CONF.ssh_hosts_key_file }) LOG.debug( "Setting strict_ssh_host_key_policy to '%(policy)s' " "using ssh_hosts_key_file '%(key_file)s'.", { 'policy': CONF.strict_ssh_host_key_policy, 'key_file': CONF.ssh_hosts_key_file }) self.strict_ssh_host_key_policy = CONF.strict_ssh_host_key_policy if not self.hosts_key_file: self.hosts_key_file = CONF.ssh_hosts_key_file else: self.hosts_key_file += ',' + CONF.ssh_hosts_key_file super(SSHPool, self).__init__(*args, **kwargs)
def start(self): """Connect to coordination backend and start heartbeat.""" if not self.started: try: self._dead = threading.Event() self._start() self.started = True # NOTE(bluex): Start heartbeat in separate thread to avoid # being blocked by long coroutines. if self.coordinator and self.coordinator.requires_beating: self._ev = eventlet.spawn( lambda: tpool.execute(self.heartbeat)) except coordination.ToozError: LOG.exception(_LE('Error starting coordination backend.')) raise LOG.info(_LI('Coordination backend started successfully.'))
def _determine_obj_version_cap(self): global LAST_OBJ_VERSIONS if self.BINARY in LAST_OBJ_VERSIONS: return LAST_OBJ_VERSIONS[self.BINARY] version_cap = objects.Service.get_minimum_obj_version( waterfall.context.get_admin_context(), self.BINARY) # If there is no service we assume they will come up later and will # have the same version as we do. if not version_cap: version_cap = base.OBJ_VERSIONS.get_current() LOG.info(_LI('Automatically selected %(binary)s objects version ' '%(version)s as minimum service version.'), {'binary': self.BINARY, 'version': version_cap}) LAST_OBJ_VERSIONS[self.BINARY] = version_cap return version_cap
def _determine_rpc_version_cap(self): global LAST_RPC_VERSIONS if self.BINARY in LAST_RPC_VERSIONS: return LAST_RPC_VERSIONS[self.BINARY] version_cap = objects.Service.get_minimum_rpc_version( waterfall.context.get_admin_context(), self.BINARY) if version_cap == 'liberty': # NOTE(dulek): This means that one of the services is Liberty, # we should cap to it's RPC version. version_cap = LIBERTY_RPC_VERSIONS[self.BINARY] elif not version_cap: # If there is no service we assume they will come up later and will # have the same version as we do. version_cap = self.RPC_API_VERSION LOG.info(_LI('Automatically selected %(binary)s RPC version ' '%(version)s as minimum service version.'), {'binary': self.BINARY, 'version': version_cap}) LAST_RPC_VERSIONS[self.BINARY] = version_cap return version_cap
def start(self): version_string = version.version_string() LOG.info(_LI('Starting %(topic)s node (version %(version_string)s)'), { 'topic': self.topic, 'version_string': version_string }) self.model_disconnected = False self.manager.init_host() LOG.debug("Creating RPC server for service %s", self.topic) target = messaging.Target(topic=self.topic, server=self.host) endpoints = [self.manager] endpoints.extend(self.manager.additional_endpoints) serializer = objects_base.WaterfallObjectSerializer() self.rpcserver = rpc.get_server(target, endpoints, serializer) self.rpcserver.start() self.manager.init_host_with_rpc() #if self.report_interval: # pulse = loopingcall.FixedIntervalLoopingCall( # self.report_state) # pulse.start(interval=self.report_interval, # initial_delay=self.report_interval) # self.timers.append(pulse) if self.periodic_interval: if self.periodic_fuzzy_delay: initial_delay = random.randint(0, self.periodic_fuzzy_delay) else: initial_delay = None periodic = loopingcall.FixedIntervalLoopingCall( self.periodic_tasks) periodic.start(interval=self.periodic_interval, initial_delay=initial_delay) self.timers.append(periodic)
def __init__(self): LOG.info(_LI('Initializing extension manager.')) self.cls_list = CONF.osapi_workflow_extension self.extensions = {} self._load_extensions()