def report_state(self): """Update the state of this service in the datastore.""" if not self.manager.is_working(): # NOTE(dulek): If manager reports a problem we're not sending # heartbeats - to indicate that service is actually down. LOG.error( _LE('Manager for service %(binary)s %(host)s is ' 'reporting problems, not sending heartbeat. ' 'Service will appear "down".'), { 'binary': self.binary, 'host': self.host }) return ctxt = context.get_admin_context() zone = CONF.storage_availability_zone try: try: service_ref = objects.Service.get_by_id(ctxt, self.service_id) except exception.NotFound: LOG.debug('The service database object disappeared, ' 'recreating it.') self._create_service_ref(ctxt) service_ref = objects.Service.get_by_id(ctxt, self.service_id) service_ref.report_count += 1 if zone != service_ref.availability_zone: service_ref.availability_zone = zone service_ref.save() # TODO(termie): make this pattern be more elegant. if getattr(self, 'model_disconnected', False): self.model_disconnected = False LOG.error(_LE('Recovered model server connection!')) except db_exc.DBConnectionError: if not getattr(self, 'model_disconnected', False): self.model_disconnected = True LOG.exception(_LE('model server went away')) # NOTE(jsbryant) Other DB errors can happen in HA configurations. # such errors shouldn't kill this thread, so we handle them here. except db_exc.DBError: if not getattr(self, 'model_disconnected', False): self.model_disconnected = True LOG.exception(_LE('DBError encountered: ')) except Exception: if not getattr(self, 'model_disconnected', False): self.model_disconnected = True LOG.exception(_LE('Exception encountered: '))
def main(): objects.register_all() gmr_opts.set_defaults(CONF) CONF(sys.argv[1:], project='waterfall', version=version.version_string()) config.set_middleware_defaults() logging.setup(CONF, "waterfall") LOG = logging.getLogger('waterfall.all') utils.monkey_patch() gmr.TextGuruMeditation.setup_autorun(version, conf=CONF) rpc.init(CONF) launcher = service.process_launcher() # waterfall-api try: server = service.WSGIService('osapi_workflow') launcher.launch_service(server, workers=server.workers or 1) except (Exception, SystemExit): LOG.exception(_LE('Failed to load osapi_workflow')) for binary in ['waterfall-scheduler', 'waterfall-backup']: try: launcher.launch_service(service.Service.create(binary=binary)) except (Exception, SystemExit): LOG.exception(_LE('Failed to load %s'), binary) # waterfall-workflow try: if CONF.enabled_backends: for backend in CONF.enabled_backends: CONF.register_opt(workflow_cmd.host_opt, group=backend) backend_host = getattr(CONF, backend).backend_host host = "%s@%s" % (backend_host or CONF.host, backend) server = service.Service.create(host=host, service_name=backend, binary='waterfall-workflow') # Dispose of the whole DB connection pool here before # starting another process. Otherwise we run into cases # where child processes share DB connections which results # in errors. session.dispose_engine() launcher.launch_service(server) else: server = service.Service.create(binary='waterfall-workflow') launcher.launch_service(server) except (Exception, SystemExit): LOG.exception(_LE('Failed to load conder-workflow')) launcher.wait()
def _heartbeat(self): try: self.coordinator.heartbeat() return True except coordination.ToozConnectionError: LOG.exception( _LE('Connection error while sending a heartbeat ' 'to coordination backend.')) raise except coordination.ToozError: LOG.exception( _LE('Error sending a heartbeat to coordination ' 'backend.')) return False
def _error(self, inner, req): if not isinstance(inner, exception.QuotaError): LOG.exception(_LE("Caught error: %(type)s %(error)s"), { 'type': type(inner), 'error': inner }) safe = getattr(inner, 'safe', False) headers = getattr(inner, 'headers', None) status = getattr(inner, 'code', 500) if status is None: status = 500 msg_dict = dict(url=req.url, status=status) LOG.info(_LI("%(url)s returned with HTTP %(status)d"), msg_dict) outer = self.status_to_type(status) if headers: outer.headers = headers # NOTE(johannes): We leave the explanation empty here on # purpose. It could possibly have sensitive information # that should not be returned back to the user. See # bugs 868360 and 874472 # NOTE(eglynn): However, it would be over-conservative and # inconsistent with the EC2 API to hide every exception, # including those that are safe to expose, see bug 1021373 if safe: msg = (inner.msg if isinstance(inner, exception.WaterfallException) else six.text_type(inner)) params = { 'exception': inner.__class__.__name__, 'explanation': msg } outer.explanation = _('%(exception)s: %(explanation)s') % params return wsgi.Fault(outer)
def require_driver_initialized(driver): """Verifies if `driver` is initialized If the driver is not initialized, an exception will be raised. :params driver: The driver instance. :raises: `exception.DriverNotInitialized` """ # we can't do anything if the driver didn't init if not driver.initialized: driver_name = driver.__class__.__name__ LOG.error(_LE("Workflow driver %s not initialized"), driver_name) raise exception.DriverNotInitialized()
def __init__(self, message=None, **kwargs): self.kwargs = kwargs self.kwargs['message'] = message if 'code' not in self.kwargs: try: self.kwargs['code'] = self.code except AttributeError: pass for k, v in self.kwargs.items(): if isinstance(v, Exception): self.kwargs[k] = six.text_type(v) if self._should_format(): try: message = self.message % kwargs except Exception: exc_info = sys.exc_info() # kwargs doesn't match a variable in the message # log the issue and the kwargs LOG.exception(_LE('Exception in string format operation')) for name, value in kwargs.items(): LOG.error(_LE("%(name)s: %(value)s"), {'name': name, 'value': value}) if CONF.fatal_exception_format_errors: six.reraise(*exc_info) # at least get the core message out if something happened message = self.message elif isinstance(message, Exception): message = six.text_type(message) # NOTE(luisg): We put the actual message in 'msg' so that we can access # it, because if we try to access the message via 'message' it will be # overshadowed by the class' message attribute self.msg = message super(WaterfallException, self).__init__(message)
def _check_extension(self, extension): """Checks for required methods in extension objects.""" try: LOG.debug('Ext name: %s', extension.name) LOG.debug('Ext alias: %s', extension.alias) LOG.debug('Ext description: %s', ' '.join(extension.__doc__.strip().split())) LOG.debug('Ext namespace: %s', extension.namespace) LOG.debug('Ext updated: %s', extension.updated) except AttributeError: LOG.exception(_LE("Exception loading extension.")) return False return True
def start(self): """Connect to coordination backend and start heartbeat.""" if not self.started: try: self._dead = threading.Event() self._start() self.started = True # NOTE(bluex): Start heartbeat in separate thread to avoid # being blocked by long coroutines. if self.coordinator and self.coordinator.requires_beating: self._ev = eventlet.spawn( lambda: tpool.execute(self.heartbeat)) except coordination.ToozError: LOG.exception(_LE('Error starting coordination backend.')) raise LOG.info(_LI('Coordination backend started successfully.'))
def robust_file_write(directory, filename, data): """Robust file write. Use "write to temp file and rename" model for writing the persistence file. :param directory: Target directory to create a file. :param filename: File name to store specified data. :param data: String data. """ tempname = None dirfd = None try: dirfd = os.open(directory, os.O_DIRECTORY) # write data to temporary file with tempfile.NamedTemporaryFile(prefix=filename, dir=directory, delete=False) as tf: tempname = tf.name tf.write(data.encode('utf-8')) tf.flush() os.fdatasync(tf.fileno()) tf.close() # Fsync the directory to ensure the fact of the existence of # the temp file hits the disk. os.fsync(dirfd) # If destination file exists, it will be replaced silently. os.rename(tempname, os.path.join(directory, filename)) # Fsync the directory to ensure the rename hits the disk. os.fsync(dirfd) except OSError: with excutils.save_and_reraise_exception(): LOG.error(_LE("Failed to write persistence file: %(path)s."), {'path': os.path.join(directory, filename)}) if os.path.isfile(tempname): os.unlink(tempname) finally: if dirfd: os.close(dirfd)