def _run(self): log.info("Starting %s" % self.__class__.__name__) subscription = SaltEventSource(log, salt_config) while not self._complete.is_set(): # No salt tag filtering: https://github.com/saltstack/salt/issues/11582 ev = subscription.get_event(full=True) if ev is not None and ev['tag'].startswith("ceph/server"): data = ev['data'] log.debug("ServerMonitor got ceph/server message from %s" % data['id']) try: # NB assumption that FQDN==minion_id is true unless # someone has modded their salt minion config. self.on_server_heartbeat(data['id'], data['data']) except: log.debug("Message detail: %s" % json.dumps(ev)) log.exception("Error handling ceph/server message from %s" % data['id']) if ev is not None and ev['tag'].startswith("salt/presen"): # so these exist but i'm not convinced they work, I started a minion # and then saw several messages indicating no mininions were present log.debug("ServerMonitor: presence %s" % ev['data']) log.info("Completed %s" % self.__class__.__name__)
def listen(self, completion, on_heartbeat=None, on_job=None, on_server_heartbeat=None, on_running_jobs=None, fsid=None): """ :param on_heartbeat: Callback for heartbeats :param on_job: Callback for job completions :param fsid: Optionally filter heartbeats to one FSID """ event = SaltEventSource(log, salt_config) if fsid: heartbeat_tag = "ceph/cluster/{0}".format(fsid) else: heartbeat_tag = "ceph/cluster/" while not completion.is_set(): # No salt tag filtering: https://github.com/saltstack/salt/issues/11582 ev = event.get_event(full=True) if ev is not None: data = ev['data'] tag = ev['tag'] log.debug("_run.ev: %s/tag=%s" % (data['id'] if 'id' in data else None, tag)) # I am interested in the following tags: # - salt/job/<jid>/ret/<minion id> where jid is one that I started # (this includes ceph.rados_command and ceph.get_cluster_object) # - ceph/cluster/<fsid> where fsid is my fsid try: if tag.startswith(heartbeat_tag): if on_heartbeat: on_heartbeat(data['id'], data['data']) elif tag.startswith("ceph/server"): if on_server_heartbeat: on_server_heartbeat(data['id'], data['data']) elif re.match("^salt/job/\d+/ret/[^/]+$", tag): if data['fun'] == 'saltutil.running': if on_running_jobs and data['success']: on_running_jobs(data['id'], data['return']) else: if on_job: on_job(data['id'], data['jid'], data['success'], data['return'], data['fun'], data['fun_args']) else: # This does not concern us, ignore it pass except: # Because this is our main event handling loop, swallow exceptions # instead of letting them end the world. log.exception("Exception handling message with tag %s" % tag) log.debug("Message content: %s" % data)
def _run(self): log.info("%s running" % self.__class__.__name__) event = SaltEventSource(log, salt_config) while not self._complete.is_set(): # No salt tag filtering: https://github.com/saltstack/salt/issues/11582 ev = event.get_event(full=True) if ev is not None and 'tag' in ev: tag = ev['tag'] data = ev['data'] try: if tag.startswith("ceph/cluster/"): cluster_data = data['data'] if not cluster_data['fsid'] in self._manager.clusters: self._manager.on_discovery(data['id'], cluster_data) else: log.debug( "%s: heartbeat from existing cluster %s" % (self.__class__.__name__, cluster_data['fsid'])) elif re.match("^salt/job/\d+/ret/[^/]+$", tag): if data['fun'] == 'saltutil.running': self._manager.requests.on_tick_response( data['id'], data['return']) else: self._manager.requests.on_completion(data) else: # This does not concern us, ignore it log.debug("TopLevelEvents: ignoring %s" % tag) pass except: log.exception("Exception handling message tag=%s" % tag) log.info("%s complete" % self.__class__.__name__)
def _run(self): self._plugin_monitor.start() self._ready.set() log.debug("ClusterMonitor._run: ready") event = SaltEventSource(log, salt_config) while not self._complete.is_set(): # No salt tag filtering: https://github.com/saltstack/salt/issues/11582 ev = event.get_event(full=True) if ev is not None: data = ev['data'] tag = ev['tag'] log.debug("_run.ev: %s/tag=%s" % (data['id'] if 'id' in data else None, tag)) # I am interested in the following tags: # - salt/job/<jid>/ret/<minion id> where jid is one that I started # (this includes ceph.rados_command and ceph.get_cluster_object) # - ceph/cluster/<fsid> where fsid is my fsid try: if tag.startswith("ceph/cluster/{0}".format(self.fsid)): # A ceph.heartbeat beacon self.on_heartbeat(data['id'], data['data']) elif re.match("^salt/job/\d+/ret/[^/]+$", tag): if data['fun'] == "saltutil.running": # Update on what jobs are running # It would be nice to filter these down to those which really are for # this cluster, but as long as N_clusters and N_jobs are reasonably small # it's not an efficiency problem. self._requests.on_tick_response(data['id'], data['return']) # It would be much nicer to put the FSID at the start of # the tag, if salt would only let us add custom tags to our jobs. # Instead we enforce a convention that all calamari jobs must include # fsid in their return value. if (not isinstance(data, dict)) or not isinstance(data['return'], dict): # Something not formatted for ClusterMonitor log.warning("Ignoring event %s" % tag) continue if 'fsid' not in data['return'] or data['return']['fsid'] != self.fsid: # Something for a different ClusterMonitor log.debug("Ignoring job return, not for my FSID") continue if data['fun'] == 'ceph.get_cluster_object': # A ceph.get_cluster_object response if not data['success']: log.error("on_sync_object: failure from %s: %s" % (data['id'], data['return'])) continue self.on_sync_object(data['id'], data['return']) else: log.warning("Unexpected function '%s' (%s)" % (data['fun'], tag)) else: # This does not concern us, ignore it pass except: # Because this is our main event handling loop, swallow exceptions # instead of letting them end the world. log.exception("Exception handling message with tag %s" % tag) log.debug("Message content: %s" % data) log.info("%s complete" % self.__class__.__name__) self._plugin_monitor.stop() self._plugin_monitor.join() self.done.set()