def testCouchSinkBasic(self): sink = CouchSink(self.config) docIds = [] for i in range(10): a = Alert(Source=__file__, Level=i, Timestamp=time.time(), Type="Test") retVals = sink.send([a]) # return value is following format: # [{'rev': '1-ba0a0903d4d6ddcbb85ff64d48d8be14', 'id': 'b7e8f807c96f572418b39422ccea252c'}] # just 1 item was added in the list of alerts, so retVal is also 1 item list # and CMSCouch call commitOne also returns a list - hence second nesting docIds.append(retVals[0][0]["id"]) changes = sink.database.changes() self.assertEqual(len(changes[u"results"]), 10) self.assertEqual(changes[u"last_seq"], 10) for i in range(10, 20): a = Alert(Source=__file__, Level=i, Timestamp=time.time(), Type="Test") retVals = sink.send([a]) # just 1 item was added in the list of alerts, so retVal is also 1 item list # and CMSCouch call commitOne also returns a list - hence second nesting docIds.append(retVals[0][0]["id"]) changes = sink.database.changes() self.assertEqual(len(changes[u"results"]), 10) self.assertEqual(changes[u"last_seq"], 20) # check documents presence for id, level in zip(docIds, range(20)): doc = sink.database.document(id) self.assertEqual(doc["Level"], level)
def testFileSinkBasic(self): sink = FileSink(self.config) alerts = [] nAlerts = 10 for i in range(nAlerts): a = Alert(Source=__file__, Level=i, Timestamp=time.time(), Type="Test") alerts.append(a) sink.send(alerts) # test by reading back loadAlerts = sink.load() self.assertEqual(len(loadAlerts), nAlerts) # Since FileSink implementation depends on line-separated JSONs of # Alert instance, test handling new lines in the payload alerts = [] testMsg = "addtional \n message" for i in range(10, 20): a = Alert(Source=__file__, Level=i, Timestamp=time.time(), Type="Test", Details={"message": testMsg}) alerts.append(a) self.failUnless(os.path.exists(self.config.outputfile)) sink.send(alerts) # test by reading back loadAlerts = sink.load() self.assertEqual(len(loadAlerts), 20) for a in loadAlerts[10:]: self.assertEqual(a["Details"]["message"], testMsg)
def testAlertBasic(self): a = Alert() self.assertEqual(a.level, 0) self.assertEqual(a["Source"], None) self.assertEqual(a["Type"], None) self.assertEqual(a["Workload"], None) self.assertEqual(a["Component"], None) self.assertEqual(a["Details"], {}) self.assertEqual(a["Timestamp"], None) self.assertEqual(a["TimestampDecoded"], None) details = dict(detail="detail") a = Alert(Level=5, Source="src", Type="type", Workload="work", Component="comp", Details=details, Timestamp="time") self.assertEqual(a.level, 5) self.assertEqual(a["Source"], "src") self.assertEqual(a["Type"], "type") self.assertEqual(a["Workload"], "work") self.assertEqual(a["Component"], "comp") self.assertEqual(a["Details"], details) self.assertEqual(a["Timestamp"], "time") a.toMsg()
def testRESTSinkBasic(self): sink = RESTSink(self.config) docIds = [] alerts = [] for i in range(10): a = Alert(Source = __file__, Level = i, Timestamp = time.time(), Type = "Test") alerts.append(a) retVal = sink.send(alerts) # return value is following format: # [{'rev': '1-ba0a0903d4d6ddcbb85ff64d48d8be14', 'id': 'b7e8f807c96f572418b39422ccea252c'}] # just 1 item was added in the list of alerts, so retVal is also 1 item list # and CMSCouch call commitOne also returns a list - hence second nesting changes = sink._database.changes() self.assertEqual(len(changes[u"results"]), 10) self.assertEqual(changes[u"last_seq"], 10) alerts = [] for i in range(10, 20): a = Alert(Source = __file__, Level = i, Timestamp = time.time(), Type = "Test") alerts.append(a) retVals = sink.send(alerts) changes = sink._database.changes() self.assertEqual(len(changes[u"results"]), 10) self.assertEqual(changes[u"last_seq"], 20)
def check(self): """ Method called from the base class. Iterate over all HTTP status listed in observable config value and check number of occurrences of each by querying statistics of CouchDB. """ for code in self.config.observables: occurrences = self.sample(str(code)) if occurrences is not None: for threshold, level in zip(self.thresholds, self.levels): if occurrences >= threshold: details = dict(HTTPCode=code, occurrences=occurrences, threshold=threshold) a = Alert(**self.preAlert) a.setTimestamp() a["Source"] = self.__class__.__name__ a["Details"] = details a["Level"] = level logging.debug("Sending an alert (%s): %s" % (self.__class__.__name__, a)) self.sender(a) break # send only one alert, critical threshold tested first m = ("%s: checked code:%s current occurrences:%s" % (self.__class__.__name__, code, occurrences)) logging.debug(m)
def testEmailSinkBasic(self): # pre-generate the entire email message subj = "Alert from %s" % None # this is default Alert value for HostName msg = EmailSink.EMAIL_HEADER % (self.config.fromAddr, subj, ", ".join( self.config.toAddr)) alerts = [] for i in range(10): a = Alert(Source=__file__, Level=i, Timestamp=time.time(), Type="Test") msg += "\n%s\n" % a.toMsg() alerts.append(a) # method calls definition, ordered EmailSinkMod.smtplib.SMTP(self.config.smtpServer).AndReturn( self.smtp) # 1 # leave for test / debugging # self.smtp.sendmail('*****@*****.**', '*****@*****.**', 'Subject: subject\n\nbody') self.smtp.sendmail(self.config.fromAddr, self.config.toAddr, msg) # 2 self.smtp.quit() # 3 self.mox.ReplayAll() sink = EmailSink(self.config) # 1 # leave for test / debugging #self.smtp.sendmail('*****@*****.**', '*****@*****.**', 'Subject: subject\n\nbody') sink.send(alerts) # 2 del sink # 3 self.mox.VerifyAll()
def check(self): """ First gets number on directory usage. If the usage exceeds soft, resp. critical limits, the alert is sent. """ if not self._dbDirectory: return usage = self.sample(self._dbDirectory) if usage == None: # should be logged above return usageStr = "%s %s" % (usage, self._currSizeUnit) for threshold, level in zip(self.thresholds, self.levels): if usage >= threshold: details = dict(databasedir = self._dbDirectory, usage = usageStr, threshold = threshold) a = Alert(**self.preAlert) a.setTimestamp() a["Source"] = self._myName a["Details"] = details a["Level"] = level logging.debug("Sending an alert (%s): %s" % (self.__class__.__name__, a)) self.sender(a) break # send only one alert, critical threshold tested first m = "%s: measurements results: %s" % (self._myName, usageStr) logging.debug(m)
def sendAlertFunc(level, **args): if sender: alert = Alert(**preAlert) alert.setTimestamp() alert["Level"] = level alert["Details"] = args sender(alert)
def testAgentConfigurationRetrieving(self): """ Test that getting some agent details (config values from config.Agent section) will be correctly propagated into Alert instances. Alert instance is obtained via API.getPredefinedAlert factory. """ d = dict(Additional="detail") # instantiate just plain Alert, no configuration to take # into account at this point a = Alert(**d) self.assertEqual(a["HostName"], None) self.assertEqual(a["Contact"], None) self.assertEqual(a["TeamName"], None) self.assertEqual(a["AgentName"], None) self.assertEqual(a["Additional"], "detail") # instantiate via factory which reads configuration instance config = Configuration() config.section_("Agent") config.Agent.hostName = "some1" config.Agent.contact = "some2" config.Agent.teamName = "some3" config.Agent.agentName = "some4" a = alertAPI.getPredefinedAlert(**d) self.assertEqual(a["HostName"], "some1") self.assertEqual(a["Contact"], "some2") self.assertEqual(a["TeamName"], "some3") self.assertEqual(a["AgentName"], "some4") self.assertEqual(a["Additional"], "detail")
def testSetTimestamp(self): a = Alert() self.assertEqual(a["Timestamp"], None) self.assertEqual(a["TimestampDecoded"], None) a.setTimestamp() self.assertTrue(isinstance(a["Timestamp"], float)) tsd = a["TimestampDecoded"] tsdTested = time.strftime(a.TIMESTAMP_FORMAT, time.gmtime(a["Timestamp"])) self.assertEqual(tsd, tsdTested)
def worker(addr, ctrl, nAlerts, workerId="ForwardSinkTestSource"): """ Send a few alerts. """ s = Sender(addr, ctrl, workerId) s.register() d = dict(very="interesting") [s(Alert(Type="Alert", Level=i, Details=d)) for i in range(0, nAlerts)] s.unregister() s.sendShutdown()
def __call__(self, alertData): """ Inject a new alert into the processing pipeline The alert data will be plain JSON & needs to be converted into an alert instance before being dispatched to the pipeline """ logging.debug("Processing incoming Alert data to sinks ...") alert = Alert() alert.update(alertData) self.pipeline.send(alert) logging.debug("Incoming Alert data processing done.")
def getPredefinedAlert(**args): preAlert = Alert(**args) config = Configuration.getInstance() # try fill in some values from configuration to identify the source # of alerts down the alerts framework processing chain if config: agentConfig = getattr(config, "Agent", None) if agentConfig: preAlert["HostName"] = getattr(agentConfig, "hostName", None) preAlert["Contact"] = getattr(agentConfig, "contact", None) preAlert["TeamName"] = getattr(agentConfig, "teamName", None) preAlert["AgentName"] = getattr(agentConfig, "agentName", None) return preAlert
def testSenderReceiverBasic(self): sender = Sender(self.config.Alert.address, self.config.Alert.controlAddr, self.__class__.__name__) handler, receiver = utils.setUpReceiver(self.config.Alert.address, self.config.Alert.controlAddr) a = Alert(Component=inspect.stack()[0][3]) sender(a) while len(handler.queue) == 0: time.sleep(0.5) print "%s waiting for alert to arrive" % inspect.stack()[0][3] receiver.shutdown() self.assertEqual(len(handler.queue), 1) self.assertEqual(handler.queue[0]["Component"], inspect.stack()[0][3])
def check(self): """ Checks the output of df command for percentage of disk space usage. The command output pattern: ' Filesystem 1K-blocks Used Available Use% Mounted on /dev/sda2 1953276 382040 1467026 21% / udev 4085528 336 4085192 1% /dev none 4085528 628 4084900 1% /dev/shm ' """ out = self.sample() if out == None: # should be logged above return percs = [] try: # don't do the first line and also the last line is empty (iterate over partitions) for line in out.split('\n')[1:-1]: arr = line.split() if len(arr ) < 6: # 6 elements on the partition entry of df output continue percStr, mount = arr[4:6] # see the df output pattern if mount == "/usr/vice/cache": # do not check AFS cache dir continue perc = int(percStr[:-1]) # without the percent sign for threshold, level in zip(self.thresholds, self.levels): if perc >= threshold: details = dict(mountPoint=mount, usage="%s%%" % perc, threshold="%s%%" % threshold) a = Alert(**self.preAlert) a.setTimestamp() a["Source"] = self.__class__.__name__ a["Details"] = details a["Level"] = level logging.debug("Sending an alert (%s): %s" % (self.__class__.__name__, a)) self.sender(a) break # send only one alert, critical threshold tested first percs.append(percStr) except (ValueError, IndexError) as ex: logging.error("Could not check available disk space, reason: %s" % ex) m = "%s: measurements results: %s" % (self.__class__.__name__, percs) logging.debug(m)
def worker(addr, ctrl, nAlerts, workerId = "Processor_t"): """ Instantiate an alert Sender instance and register with Received instance identified by addr (alerts channel), ctrl (control channel) addresses. Then send a desired amount of alerts, unregister and send Shutdown control message instructing the Receive to stop and release sockets. """ s = Sender(addr, ctrl, workerId) s.register() for i in range(0, nAlerts): a = Alert(Type = "Alert", Level = i) s(a) s.unregister() s.sendShutdown()
def testReceiverShutdownByCall(self): # start a Receiver rec = Receiver(self.addr, self.printer, self.ctrl) rec.startReceiver() # non blocking call workChann, contChann = self._getSenderChannels() # send some messages to the receiver and shut it eventually contChann.send_json(RegisterMsg("Receiver_t")) workChann.send_json(Alert(Type="Alert", Level=20)) contChann.send_json(UnregisterMsg("Receiver_t")) # now messages are sent so shutdown the Receiver by a convenience # call, should block until the Receiver finishes, don't have to wait rec.shutdown()
def testReceiverShutdownByMessage(self): # start a Receiver rec = Receiver(self.addr, self.printer, self.ctrl) rec.startReceiver() # non blocking call workChann, contChann = self._getSenderChannels() # send some messages to the receiver and shut it eventually contChann.send_json(RegisterMsg("Receiver_t")) workChann.send_json(Alert(Type="Alert", Level=10)) contChann.send_json(UnregisterMsg("Receiver_t")) # terminate the Receiver contChann.send_json(ShutdownMsg()) # wait until the Receiver is properly shut # this will not be necessary when shutting down by a call while rec.isReady(): time.sleep(0.1)
def _handleFailedPolling(self, ex): """ Handle (log and send alert) if polling failed. """ trace = traceback.format_exception(*sys.exc_info()) traceString = '\n '.join(trace) errMsg = ("Polling failed in %s, reason: %s" % (self.__class__.__name__, ex)) logging.error("%s\n%s" % (errMsg, traceString)) a = Alert(**self.preAlert) a.setTimestamp() a["Source"] = self.__class__.__name__ a["Details"] = dict(msg=errMsg) a["Level"] = 10 logging.info("Sending an alert (%s): %s" % (self.__class__.__name__, a)) self.sender(a)
def run(self): """ Start a sender and send some alert messages to the Receiver. """ context = zmq.Context() # set up a channel to send work sender = context.socket(zmq.PUSH) sender.connect(self.addr) controller = context.socket(zmq.PUB) controller.connect(self.ctrl) controller.send_json(RegisterMsg("Receiver_t")) for i in range(0, self.nAlerts): a = Alert(Type="Alert", Level=i) sender.send_json(a) controller.send_json(UnregisterMsg("Receiver_t")) controller.send_json(ShutdownMsg())
def testSenderNonBlockingWhenReceiverNotAvailable(self): """ Repeatedly instantiate Sender, register, send alerts, etc and test that the Sender is not blocking due to undelivered messages since no Receiver is available. This test shall wait (between iterations) only delay specified in the Sender. """ iterations = 2 nAlerts = 3 for i in range(iterations): # instantiate sender and send ... s = Sender(self.addr, self.control, "Sender_t") s.register() # send some alerts for i in range(0, nAlerts): a = Alert(Level=10, Type="Alert") s(a) # actual alert message sending s.unregister() # call destructor explicitly, the hanging should not occur here del s
def check(self, pd, measurements): """ Method is used commonly for system properties (e.g. overall CPU) as well as for particular process monitoring. pd - (processDetail) - information about monitored process, may be None if this method is called from system monitoring pollers (e.g. CPU usage). measurements - Measurements class instance. """ v = self.sample(pd) measurements.append(v) avgPerc = None if len(measurements) >= measurements._numOfMeasurements: # evaluate: calculate average value and react avgPerc = round((sum(measurements) / len(measurements)), 2) details = dict(period=self.config.period, numMeasurements=len(measurements), average="%s%%" % avgPerc) if pd: details.update(pd.getDetails()) measurements.clear() for threshold, level in zip(self.thresholds, self.levels): if avgPerc >= threshold: a = Alert(**self.preAlert) a.setTimestamp() a["Source"] = self.__class__.__name__ details["threshold"] = "%s%%" % threshold a["Details"] = details a["Level"] = level logging.debug("Sending an alert (%s): %s" % (self.__class__.__name__, a)) self.sender(a) break # send only one alert, critical threshold tested first if avgPerc != None: m = ("%s: measurements result: %s%%" % (self.__class__.__name__, avgPerc)) logging.debug(m)
def testSenderBasic(self): """ Immediate testing register, unregister messages. Alert messages tested as saved in the queue. """ nAlerts = 10 # start Receiver, handler is list for alerts # wait for control messages to arrive and test immediately alertsQueue = [] handler = lambda x: alertsQueue.append(x) self.receiver = Receiver(self.addr, handler, self.control) self.receiver.startReceiver() # non blocking call # instantiate sender and send ... s = Sender(self.addr, self.control, "Sender_t") # nothing is registered up to now with the Receiver self.assertEqual(len(self.receiver._receiver._registSenders), 0) s.register() # test that RegisterMsg arrived, consider delay while len(self.receiver._receiver._registSenders) == 0: time.sleep(0.2) self.assertEqual(len(self.receiver._receiver._registSenders), 1) # send some alerts for i in range(0, nAlerts): a = Alert(Level=i, Type="Alert") s(a) # actual alert message sending s.unregister() while len(self.receiver._receiver._registSenders) == 1: time.sleep(0.2) self.assertEqual(len(self.receiver._receiver._registSenders), 0) # this makes sure that Receiver waits certain delay even after shutdown # is received if there is no more messages coming self.receiver.shutdown() self.assertEqual(nAlerts, len(alertsQueue))
import time from WMCore.Alerts import API as alertAPI from WMCore.Alerts.Alert import Alert from WMCore.Alerts.ZMQ.Sender import Sender machine = "maxatest.cern.ch" target = "tcp://%s:6557" % machine targetController = "tcp://%s:6559" % machine if len(sys.argv) > 2: target = sys.argv[1] targetController = sys.argv[2] dictAlert = dict(Type="AlertTestClient", Workload="n/a", Component=__name__, Source=__name__) preAlert = alertAPI.getPredefinedAlert(**dictAlert) sender = Sender(target, targetController, "AlertTestClient") print("created Sender client for alerts target: %s controller: %s" % (target, targetController)) sender.register() a = Alert(**preAlert) a["Timestamp"] = time.time() a["Level"] = 6 print "sending alert:\n'%s'" % a sender(a) sender.unregister()