def on_map(self, sync_type, sync_object): if self._awaiting_pgs: assert sync_type == PgSummary pg_summary = sync_object pgs_not_creating = 0 for state_tuple, count in pg_summary.data['by_pool'][self._pool_id].items(): states = state_tuple.split("+") if 'creating' not in states: pgs_not_creating += count if pgs_not_creating >= self._pg_count: self.complete() elif self._await_version: assert sync_type == OsdMap osd_map = sync_object if osd_map.version >= self._await_version: for pool_id, pool in osd_map.pools_by_id.items(): if pool['pool_name'] == self._pool_name: self._pool_id = pool_id self._pg_count = pool['pg_num'] break if self._pool_id is None: log.error("'{0}' not found, pools are {1}".format( self._pool_name, [p['pool_name'] for p in osd_map.pools_by_id.values()] )) self.set_error("Expected pool '{0}' not found".format(self._pool_name)) self.complete() self._awaiting_pgs = True else: raise NotImplementedError("Unexpected map {0}".format(sync_type))
def on_map(self, sync_type, sync_objects): if self._awaiting_pgs: assert sync_type == PgSummary pg_summary = sync_objects.get(PgSummary) pgs_not_creating = 0 for state_tuple, count in pg_summary.data['by_pool'][self._pool_id].items(): states = state_tuple.split("+") if 'creating' not in states: pgs_not_creating += count if pgs_not_creating >= self._pg_count: self.complete() elif self._await_version: assert sync_type == OsdMap osd_map = sync_objects.get(OsdMap) if osd_map.version >= self._await_version: for pool_id, pool in osd_map.pools_by_id.items(): if pool['pool_name'] == self._pool_name: self._pool_id = pool_id self._pg_count = pool['pg_num'] break if self._pool_id is None: log.error("'{0}' not found, pools are {1}".format( self._pool_name, [p['pool_name'] for p in osd_map.pools_by_id.values()] )) self.set_error("Expected pool '{0}' not found".format(self._pool_name)) self.complete() self._awaiting_pgs = True else: raise NotImplementedError("Unexpected map {0}".format(sync_type))
def __init__(self): self._complete = gevent.event.Event() self._rpc_thread = RpcThread(self) self._discovery_thread = TopLevelEvents(self) self._process_monitor = ProcessMonitorThread() db_path = config.get('cthulhu', 'db_path') if sqlalchemy is not None and db_path: try: # Prepare persistence engine = create_engine(config.get('cthulhu', 'db_path')) # noqa Session.configure(bind=engine) self.persister = Persister() except sqlalchemy.exc.ArgumentError as e: log.error("Database error: %s" % e) raise else: class NullPersister(object): def start(self): pass def stop(self): pass def join(self): pass def __getattribute__(self, item): if item.startswith('_'): return object.__getattribute__(self, item) else: try: return object.__getattribute__(self, item) except AttributeError: def blackhole(*args, **kwargs): pass return blackhole self.persister = NullPersister() # Remote operations self.requests = RequestCollection(self) self._request_ticker = Ticker(request_collection.TICK_PERIOD, lambda: self.requests.tick()) # FSID to ClusterMonitor self.clusters = {} # Generate events on state changes self.eventer = Eventer(self) # Handle all ceph/server messages self.servers = ServerMonitor(self.persister, self.eventer, self.requests)
def __init__(self): self._complete = gevent.event.Event() self._rpc_thread = RpcThread(self) self._discovery_thread = DiscoveryThread(self) self._process_monitor = ProcessMonitorThread() self.notifier = NotificationThread() try: # Prepare persistence engine = create_engine(config.get('cthulhu', 'db_path')) Session.configure(bind=engine) self.persister = Persister() except sqlalchemy.exc.ArgumentError as e: log.error("Database error: %s" % e) raise # FSID to ClusterMonitor self.clusters = {} # Generate events on state changes self.eventer = Eventer(self) # Handle all ceph/server messages self.servers = ServerMonitor(self.persister, self.eventer)
def fetch(self, minion_id, sync_type): log.debug("SyncObjects.fetch: %s/%s" % (minion_id, sync_type)) if minion_id is None: # We're probably being replayed to from the database log.warn("SyncObjects.fetch called with minion_id=None") return self._fetching_at[sync_type] = now() client = LocalClient(config.get('cthulhu', 'salt_config_path')) # TODO clean up unused 'since' argument pub_data = client.run_job( minion_id, 'ceph.get_cluster_object', condition_kwarg( [], { 'cluster_name': self._cluster_name, 'sync_type': sync_type.str, 'since': None })) if not pub_data: log.error("Failed to start fetch job %s/%s" % (minion_id, sync_type)) # Don't throw an exception because if a fetch fails we should always else: log.debug("SyncObjects.fetch: jid=%s minions=%s" % (pub_data['jid'], pub_data['minions']))
def main(): log.info('calamari-list: starting') complete = gevent.event.Event() ceph_argparse = None while not ceph_argparse: try: import ceph_argparse except ImportError: log.error( 'Cannot import ceph_arg_parse module -- please install ceph') complete.wait(timeout=50) from cthulhu.manager.manager import Manager carbon = ShallowCarbonCache() carbon.start() cthulhu = Manager() cthulhu_started = False while not cthulhu_started: try: if not cthulhu_started: cthulhu_started = cthulhu.start() except Exception, e: log.exception('It borked') log.error(str(e)) complete.wait(timeout=5)
def on_map(self, sync_type, sync_objects): """ Callback for when a new cluster map is available, in which we notify any interested ongoing UserRequests of the new map so that they can progress if they were waiting for it. """ with self._lock: requests = self.get_all(state=UserRequest.SUBMITTED) for request in requests: try: # If this is one of the types that this request # is waiting for, invoke on_map. for awaited_type in request.awaiting_versions.keys(): if awaited_type == sync_type: with self._update_index(request): request.on_map(sync_type, sync_objects) except Exception as e: log.exception("Request %s threw exception in on_map", request.id) if request.jid: log.error("Abandoning job {0}".format(request.jid)) request.jid = None request.set_error("Internal error %s" % e) request.complete() if request.state == UserRequest.COMPLETE: self._eventer.on_user_request_complete(request)
def main(): log.info('calamari-list: starting') complete = gevent.event.Event() ceph_argparse = None while not ceph_argparse: try: import ceph_argparse except ImportError: log.error('Cannot import ceph_arg_parse module -- please install ceph') complete.wait(timeout=50) from cthulhu.manager.manager import Manager carbon = ShallowCarbonCache() carbon.start() cthulhu = Manager() cthulhu_started = False while not cthulhu_started: try: if not cthulhu_started: cthulhu_started = cthulhu.start() except Exception, e: log.exception('It borked') log.error(str(e)) complete.wait(timeout=5)
def create(self, attributes): # get the text map crush_map = self.osd_map.data['crush_map_text'] merged_map = _merge_rule_and_map(crush_map, attributes) commands = [('osd setcrushmap', {'data': merged_map})] log.error('setcrushmap {0} {1}'.format(merged_map, attributes)) message = "Creating CRUSH rule in {cluster_name}".format(cluster_name=self._cluster_monitor.name) return OsdMapModifyingRequest(message, self._cluster_monitor.fsid, self._cluster_monitor.name, commands)
def __init__(self, manager): super(RpcThread, self).__init__() self._manager = manager self._complete = gevent.event.Event() if zerorpc is None: log.error("zerorpc package is missing") raise RuntimeError("Cannot run without zerorpc installed!") self._server = zerorpc.Server(RpcInterface(manager)) self._bound = False
def dump_stacks(): """ This is for use in debugging, especially using manhole """ for ob in gc.get_objects(): if not isinstance(ob, greenlet.greenlet): continue if not ob: continue log.error(''.join(traceback.format_stack(ob.gr_frame)))
def create(self, attributes): # get the text map crush_map = self.osd_map.data['crush_map_text'] merged_map = _merge_rule_and_map(crush_map, attributes) commands = [('osd setcrushmap', {'data': merged_map})] log.error('setcrushmap {0} {1}'.format(merged_map, attributes)) message = "Creating CRUSH rule in {cluster_name}".format( cluster_name=self._cluster_monitor.name) return OsdMapModifyingRequest(message, self._cluster_monitor.fsid, self._cluster_monitor.name, commands)
def fail_all(self, failed_minion): """ For use when we lose contact with the minion that was in use for running requests: assume all these requests are never going to return now. """ for request in self.get_all(UserRequest.SUBMITTED): with self._update_index(request): request.set_error("Lost contact with server %s" % failed_minion) if request.jid: log.error("Giving up on JID %s" % request.jid) request.jid = None request.complete()
def _run(self): assert self._bound while not self._complete.is_set(): try: log.info("%s run..." % self.__class__.__name__) self._server.run() except: log.error(traceback.format_exc()) self._complete.wait(self.EXCEPTION_BACKOFF) log.info("%s complete..." % self.__class__.__name__)
def tick(self): """ For walltime-based monitoring of running requests. Long-running requests get a periodic call to saltutil.running to verify that things really are still happening. """ if not self._by_jid: return else: log.debug("RequestCollection.tick: %s JIDs underway" % len(self._by_jid)) # Identify JIDs who haven't had a saltutil.running reponse for too long. # Kill requests in a separate phase because request:JID is not 1:1 stale_jobs = set() _now = now() for request in self._by_jid.values(): if _now - request.alive_at > datetime.timedelta( seconds=TICK_PERIOD * 3): log.error("Request %s JID %s stale: now=%s, alive_at=%s" % (request.id, request.jid, _now, request.alive_at)) stale_jobs.add(request) # Any identified stale jobs are errored out. for request in stale_jobs: with self._update_index(request): request.set_error("Lost contact") request.jid = None request.complete() # Identify minions associated with JIDs in flight query_minions = set() for jid, request in self._by_jid.items(): query_minions.add(request.minion_id) # Attempt to emit a saltutil.running to ping jobs, next tick we # will see if we got updates to the alive_at attribute to indicate non-staleness if query_minions: log.info("RequestCollection.tick: sending saltutil.running to {0}". format(query_minions)) client = LocalClient(config.get('cthulhu', 'salt_config_path')) pub_data = client.run_job(list(query_minions), 'saltutil.running', [], expr_form="list") if not pub_data: log.warning("Failed to publish saltutil.running to {0}".format( query_minions))
def tick(self): """ For walltime-based monitoring of running requests. Long-running requests get a periodic call to saltutil.running to verify that things really are still happening. """ if not self._by_jid: return else: log.debug("RequestCollection.tick: %s JIDs underway" % len(self._by_jid)) # Identify JIDs who haven't had a saltutil.running reponse for too long. # Kill requests in a separate phase because request:JID is not 1:1 stale_jobs = set() _now = now() for request in self._by_jid.values(): if _now - request.alive_at > datetime.timedelta(seconds=TICK_PERIOD * 3): log.error("Request %s JID %s stale: now=%s, alive_at=%s" % ( request.id, request.jid, _now, request.alive_at )) stale_jobs.add(request) # Any identified stale jobs are errored out. for request in stale_jobs: with self._update_index(request): request.set_error("Lost contact") request.jid = None request.complete() # Identify minions associated with JIDs in flight query_minions = set() for jid, request in self._by_jid.items(): query_minions.add(request.minion_id) # Attempt to emit a saltutil.running to ping jobs, next tick we # will see if we got updates to the alive_at attribute to indicate non-staleness if query_minions: log.info("RequestCollection.tick: sending saltutil.running to {0}".format(query_minions)) client = LocalClient(config.get('cthulhu', 'salt_config_path')) pub_data = client.run_job(list(query_minions), 'saltutil.running', [], expr_form="list") if not pub_data: log.warning("Failed to publish saltutil.running to {0}".format(query_minions))
def on_job_complete(self, fqdn, jid, success, result, cmd, args): # It would be much nicer to put the FSID at the start of # the tag, if salt would only let us add custom tags to our jobs. # Instead we enforce a convention that calamari jobs include # fsid in their return value. if 'fsid' not in result or result['fsid'] != self.fsid: # Something for a different ClusterMonitor log.debug("Ignoring job return, not for my FSID") return if cmd == 'ceph.get_cluster_object': # A ceph.get_cluster_object response if not success: log.error("on_sync_object: failure from %s: %s" % (fqdn, result)) return self.on_sync_object(fqdn, result) else: log.warning("Unexpected function '%s' (%s)" % (cmd, cmd))
def fetch(self, minion_id, sync_type): log.debug("SyncObjects.fetch: %s/%s" % (minion_id, sync_type)) if minion_id is None: # We're probably being replayed to from the database log.warn("SyncObjects.fetch called with minion_id=None") return self._fetching_at[sync_type] = now() client = LocalClient(config.get('cthulhu', 'salt_config_path')) # TODO clean up unused 'since' argument pub_data = client.run_job(minion_id, 'ceph.get_cluster_object', condition_kwarg([], {'cluster_name': self._cluster_name, 'sync_type': sync_type.str, 'since': None})) if not pub_data: log.error("Failed to start fetch job %s/%s" % (minion_id, sync_type)) # Don't throw an exception because if a fetch fails we should always else: log.debug("SyncObjects.fetch: jid=%s minions=%s" % (pub_data['jid'], pub_data['minions']))
def fetch(self, minion_id, sync_type): log.debug("SyncObjects.fetch: %s/%s" % (minion_id, sync_type)) if minion_id is None: # We're probably being replayed to from the database log.warn("SyncObjects.fetch called with minion_id=None") return self._fetching_at[sync_type] = now() try: # TODO clean up unused 'since' argument jid = remote.run_job(minion_id, 'ceph.get_cluster_object', {'cluster_name': self._cluster_name, 'sync_type': sync_type.str, 'since': None}) except Unavailable: # Don't throw an exception because if a fetch fails we should end up # issuing another on next heartbeat log.error("Failed to start fetch job %s/%s" % (minion_id, sync_type)) else: log.debug("SyncObjects.fetch: jid=%s" % jid)
def fetch(self, minion_id, sync_type): log.debug("SyncObjects.fetch: %s/%s" % (minion_id, sync_type)) if minion_id is None: # We're probably being replayed to from the database log.warn("SyncObjects.fetch called with minion_id=None") return self._fetching_at[sync_type] = now() try: # TODO clean up unused 'since' argument jid = remote.run_job( minion_id, 'ceph.get_cluster_object', { 'cluster_name': self._cluster_name, 'sync_type': sync_type.str, 'since': None }) except Unavailable: # Don't throw an exception because if a fetch fails we should end up # issuing another on next heartbeat log.error("Failed to start fetch job %s/%s" % (minion_id, sync_type)) else: log.debug("SyncObjects.fetch: jid=%s" % jid)
def on_completion(self, data): """ Callback for when a salt/job/<jid>/ret event is received, in which we find the UserRequest that created the job, and inform it of completion so that it can progress. """ with self._lock: jid = data['jid'] result = data['return'] log.debug("on_completion: jid=%s data=%s" % (jid, data)) try: request = self.get_by_jid(jid) log.debug("on_completion: jid %s belongs to request %s" % (jid, request.id)) except KeyError: log.warning("on_completion: unknown jid {0}".format(jid)) return if not data['success']: # This indicates a failure at the salt level, i.e. job threw an exception log.error("Remote execution failed for request %s: %s" % (request.id, result)) if isinstance(result, dict): # Handler ran and recorded an error for us request.set_error(result['error_status']) else: # An exception, probably, stringized by salt for us request.set_error(result) request.complete() elif result['error']: # This indicates a failure within ceph.rados_commands which was caught # by our code, like one of our Ceph commands returned an error code. # NB in future there may be UserRequest subclasses which want to receive # and handle these errors themselves, so this branch would be refactored # to allow that. log.error("Request %s experienced an error: %s" % (request.id, result['error_status'])) request.jid = None request.set_error(result['error_status']) request.complete() else: if request.state != UserRequest.SUBMITTED: # Unexpected, ignore. log.error( "Received completion for request %s/%s in state %s" % (request.id, request.jid, request.state)) return try: with self._update_index(request): old_jid = request.jid request.complete_jid(result) assert request.jid != old_jid # After a jid completes, requests may start waiting for cluster # map updates, we ask ClusterMonitor to hurry up and get them on # behalf of the request. if request.awaiting_versions: for sync_type, version in request.awaiting_versions.items( ): if version is not None: log.debug( "Notifying SyncObjects of awaited version %s/%s" % (sync_type.str, version)) self._sync_objects.on_version( data['id'], sync_type, version) # The request may be waiting for an epoch that we already have, if so # give it to the request right away for sync_type, want_version in request.awaiting_versions.items( ): got_version = self._sync_objects.get_version( sync_type) if want_version and sync_type.cmp( got_version, want_version) >= 0: log.info( "Awaited %s %s is immediately available" % (sync_type, want_version)) request.on_map(sync_type, self._sync_objects) except Exception as e: # Ensure that a misbehaving piece of code in a UserRequest subclass # results in a terminated job, not a zombie job log.exception("Calling complete_jid for %s/%s" % (request.id, request.jid)) request.jid = None request.set_error("Internal error %s" % e) request.complete() if request.state == UserRequest.COMPLETE: self._eventer.on_user_request_complete(request)
def handle_exception(exc_type, exc_value, exc_traceback): log.error("Uncaught exception", exc_info=(exc_type, exc_value, exc_traceback))
def custom_handle_error(self, context, type, value, tb): if not issubclass(type, Hub.SYSTEM_ERROR + Hub.NOT_ERROR): log.error("Uncaught exception", exc_info=(type, value, tb)) self._origin_handle_error(context, type, value, tb)
def on_completion(self, data): """ Callback for when a salt/job/<jid>/ret event is received, in which we find the UserRequest that created the job, and inform it of completion so that it can progress. """ with self._lock: jid = data['jid'] result = data['return'] log.debug("on_completion: jid=%s data=%s" % (jid, data)) try: request = self.get_by_jid(jid) log.debug("on_completion: jid %s belongs to request %s" % (jid, request.id)) except KeyError: log.warning("on_completion: unknown jid {0}".format(jid)) return if not data['success']: # This indicates a failure at the salt level, i.e. job threw an exception log.error("Remote execution failed for request %s: %s" % (request.id, result)) if isinstance(result, dict): # Handler ran and recorded an error for us request.set_error(result['error_status']) else: # An exception, probably, stringized by salt for us request.set_error(result) request.complete() elif result['error']: # This indicates a failure within ceph.rados_commands which was caught # by our code, like one of our Ceph commands returned an error code. # NB in future there may be UserRequest subclasses which want to receive # and handle these errors themselves, so this branch would be refactored # to allow that. log.error("Request %s experienced an error: %s" % (request.id, result['error_status'])) request.jid = None request.set_error(result['error_status']) request.complete() else: if request.state != UserRequest.SUBMITTED: # Unexpected, ignore. log.error("Received completion for request %s/%s in state %s" % ( request.id, request.jid, request.state )) return try: with self._update_index(request): old_jid = request.jid request.complete_jid(result) assert request.jid != old_jid # After a jid completes, requests may start waiting for cluster # map updates, we ask ClusterMonitor to hurry up and get them on # behalf of the request. if request.awaiting_versions: for sync_type, version in request.awaiting_versions.items(): if version is not None: log.debug("Notifying SyncObjects of awaited version %s/%s" % (sync_type.str, version)) self._sync_objects.on_version(data['id'], sync_type, version) # The request may be waiting for an epoch that we already have, if so # give it to the request right away for sync_type, want_version in request.awaiting_versions.items(): got_version = self._sync_objects.get_version(sync_type) if want_version and sync_type.cmp(got_version, want_version) >= 0: log.info("Awaited %s %s is immediately available" % (sync_type, want_version)) request.on_map(sync_type, self._sync_objects) except Exception as e: # Ensure that a misbehaving piece of code in a UserRequest subclass # results in a terminated job, not a zombie job log.exception("Calling complete_jid for %s/%s" % (request.id, request.jid)) request.jid = None request.set_error("Internal error %s" % e) request.complete() if request.state == UserRequest.COMPLETE: self._eventer.on_user_request_complete(request)
CONTACT_THRESHOLD_FACTOR = int(config.get('cthulhu', 'server_timeout_factor')) # multiple of contact period CLUSTER_CONTACT_THRESHOLD = int(config.get('cthulhu', 'cluster_contact_threshold')) # in seconds MINION_CONFIG = str(config.get('cthulhu', 'salt_config_path')).replace('master', 'minion') EMIT_EVENTS_TO_SALT_EVENT_BUS = bool(strtobool(config.get('cthulhu', 'emit_events_to_salt_event_bus'))) EVENT_TAG_PREFIX = str(config.get('cthulhu', 'event_tag_prefix')) if EMIT_EVENTS_TO_SALT_EVENT_BUS: try: # TODO move this to import # from calamari_common import Caller import salt.client except ImportError as e: EMIT_EVENTS_TO_SALT_EVENT_BUS = False log.error("Could not import salt.client: %s. Events cannot be emitted to salt event bus", str(e)) class Event(object): def __init__(self, severity, message, **associations): self.severity = severity self.message = message self.associations = associations self.when = now() class Eventer(gevent.greenlet.Greenlet): """ I listen to changes from ClusterMonitor and ServerMonitor, and feed events into the event log. I also periodically check some time-based conditions in my on_tick method.
def _run(self): self._plugin_monitor.start() self._ready.set() log.debug("ClusterMonitor._run: ready") event = SaltEventSource(log, salt_config) while not self._complete.is_set(): # No salt tag filtering: https://github.com/saltstack/salt/issues/11582 ev = event.get_event(full=True) if ev is not None: data = ev['data'] tag = ev['tag'] log.debug("_run.ev: %s/tag=%s" % (data['id'] if 'id' in data else None, tag)) # I am interested in the following tags: # - salt/job/<jid>/ret/<minion id> where jid is one that I started # (this includes ceph.rados_command and ceph.get_cluster_object) # - ceph/cluster/<fsid> where fsid is my fsid try: if tag.startswith("ceph/cluster/{0}".format(self.fsid)): # A ceph.heartbeat beacon self.on_heartbeat(data['id'], data['data']) elif re.match("^salt/job/\d+/ret/[^/]+$", tag): if data['fun'] == "saltutil.running": # Update on what jobs are running # It would be nice to filter these down to those which really are for # this cluster, but as long as N_clusters and N_jobs are reasonably small # it's not an efficiency problem. self._requests.on_tick_response(data['id'], data['return']) # It would be much nicer to put the FSID at the start of # the tag, if salt would only let us add custom tags to our jobs. # Instead we enforce a convention that all calamari jobs must include # fsid in their return value. if (not isinstance(data, dict)) or not isinstance(data['return'], dict): # Something not formatted for ClusterMonitor log.warning("Ignoring event %s" % tag) continue if 'fsid' not in data['return'] or data['return']['fsid'] != self.fsid: # Something for a different ClusterMonitor log.debug("Ignoring job return, not for my FSID") continue if data['fun'] == 'ceph.get_cluster_object': # A ceph.get_cluster_object response if not data['success']: log.error("on_sync_object: failure from %s: %s" % (data['id'], data['return'])) continue self.on_sync_object(data['id'], data['return']) else: log.warning("Unexpected function '%s' (%s)" % (data['fun'], tag)) else: # This does not concern us, ignore it pass except: # Because this is our main event handling loop, swallow exceptions # instead of letting them end the world. log.exception("Exception handling message with tag %s" % tag) log.debug("Message content: %s" % data) log.info("%s complete" % self.__class__.__name__) self._plugin_monitor.stop() self._plugin_monitor.join() self.done.set()
MINION_CONFIG = str(config.get('cthulhu', 'salt_config_path')).replace( 'master', 'minion') EMIT_EVENTS_TO_SALT_EVENT_BUS = bool( strtobool(config.get('cthulhu', 'emit_events_to_salt_event_bus'))) EVENT_TAG_PREFIX = str(config.get('cthulhu', 'event_tag_prefix')) if EMIT_EVENTS_TO_SALT_EVENT_BUS: try: # TODO move this to import # from calamari_common import Caller import salt.client except ImportError as e: EMIT_EVENTS_TO_SALT_EVENT_BUS = False log.error( "Could not import salt.client: %s. Events cannot be emitted to salt event bus", str(e)) class Event(object): def __init__(self, severity, message, **associations): self.severity = severity self.message = message self.associations = associations self.when = now() class Eventer(gevent.greenlet.Greenlet): """ I listen to changes from ClusterMonitor and ServerMonitor, and feed events into the event log. I also periodically check some time-based