def _handleModelCommandResult(self, body): """ ModelCommandResult handler. Handles model creation/deletion events and makes the associated put_item() and delete() calls to appropriate dynamodb tables :param body: Incoming message payload :type body: str """ try: modelCommandResult = AnomalyService.deserializeModelResult(body) except Exception: g_log.exception("Error deserializing model command result") raise if modelCommandResult["status"] != htmengineerrno.SUCCESS: return # Ignore... if modelCommandResult["method"] == "defineModel": g_log.info("Handling `defineModel` for %s", modelCommandResult.get("modelId")) metricItem = convertDefineModelResultToMetricItem(modelCommandResult) g_log.info("Saving %r to dynamodb", metricItem) self._metric.put_item(data=metricItem._asdict(), overwrite=True) elif modelCommandResult["method"] == "deleteModel": self._purgeMetricFromDynamoDB(modelCommandResult["modelId"])
def _handleModelCommandResult(self, body): """ ModelCommandResult handler. Handles model creation/deletion events and makes the associated put_item() and delete() calls to appropriate dynamodb tables :param body: Incoming message payload :type body: str """ try: modelCommandResult = AnomalyService.deserializeModelResult(body) except Exception: g_log.exception("Error deserializing model command result") raise if modelCommandResult["status"] != htmengineerrno.SUCCESS: return # Ignore... if modelCommandResult["method"] == "defineModel": g_log.info("Handling `defineModel` for %s", modelCommandResult.get("modelId")) metricItem = convertDefineModelResultToMetricItem( modelCommandResult) g_log.info("Saving %r to dynamodb", metricItem) self._metric.put_item(data=metricItem._asdict(), overwrite=True) elif modelCommandResult["method"] == "deleteModel": self._purgeMetricFromDynamoDB(modelCommandResult["modelId"])
def handleModelInferenceResults(body): """ Model results batch handler. :param body: Serialized message payload; the message is compliant with htmengine/runtime/json_schema/model_inference_results_msg_schema.json. :type body: str """ try: batch = AnomalyService.deserializeModelResult(body) except Exception: print "Error deserializing model result" raise metricId = batch["metric"]["uid"] metricName = batch["metric"]["name"] print "Handling %d model result(s) for %s - %s" % (len(batch["results"]), metricId, metricName) if not batch["results"]: print "Empty results in model inference results batch; model=%s" % metricId return print metricId, batch["results"]
def handleModelCommandResult(body): """ ModelCommandResult handler. Handles model creation/deletion events :param body: Incoming message payload :type body: str """ try: modelCommandResult = AnomalyService.deserializeModelResult(body) except Exception: print "Error deserializing model command result" raise if modelCommandResult["status"] != htmengineerrno.SUCCESS: return # Ignore... if modelCommandResult["method"] == "defineModel": print "Handling `defineModel` for %s" % modelCommandResult.get("modelId") print modelCommandResult elif modelCommandResult["method"] == "deleteModel": print "Handling `deleteModel` for %s" % modelCommandResult.get("modelId") print modelCommandResult
def handleModelCommandResult(body): """ ModelCommandResult handler. Handles model creation/deletion events :param body: Incoming message payload :type body: str """ try: modelCommandResult = AnomalyService.deserializeModelResult(body) except Exception: print "Error deserializing model command result" raise if modelCommandResult["status"] != htmengineerrno.SUCCESS: return # Ignore... if modelCommandResult["method"] == "defineModel": print "Handling `defineModel` for %s" % modelCommandResult.get( "modelId") print modelCommandResult elif modelCommandResult["method"] == "deleteModel": print "Handling `deleteModel` for %s" % modelCommandResult.get( "modelId") print modelCommandResult
def handleModelInferenceResults(body): """ Model results batch handler. :param body: Serialized message payload; the message is compliant with htmengine/runtime/json_schema/model_inference_results_msg_schema.json. :type body: str """ try: batch = AnomalyService.deserializeModelResult(body) except Exception: print "Error deserializing model result" raise metricId = batch["metric"]["uid"] metricName = batch["metric"]["name"] print "Handling %d model result(s) for %s - %s" % (len( batch["results"]), metricId, metricName) if not batch["results"]: print "Empty results in model inference results batch; model=%s" % metricId return print metricId, batch["results"]
def _reapAnomalyServiceResults(self, metricId, numRowsExpected): """ Retrieve likelihood results from our AMQP message queue that is bound to Anomaly Service's results fanout exchange NOTE that Anomaly Service fans out all results for all models via "fanout" exchange, so our queue might contain results from additional models, which we filter out. :param metricId: unique id of our metric/model :param numRowsExpected: number of result rows expected by caller :returns: a sequence of dicts conforming to the schema of the results items per model_inference_results_msg_schema.json """ rows = [] @test_case_base.retry(duration=30) def getBatch(amqpClient): message = amqpClient.getOneMessage(self.resultsQueueName, noAck=False) try: self.assertIsNotNone(message) except AssertionError: LOGGER.info("Got %d rows so far, waiting for %d more", len(rows), numRowsExpected - len(rows)) raise return message amqp.connection.getRabbitmqConnectionParameters() with amqp.synchronous_amqp_client.SynchronousAmqpClient( amqp.connection.getRabbitmqConnectionParameters( )) as amqpClient: lastMessage = None while len(rows) < numRowsExpected: message = getBatch(amqpClient) lastMessage = message batch = AnomalyService.deserializeModelResult(message.body) dataType = (message.properties.headers.get("dataType") if message.properties.headers else None) if dataType: continue # Not a model inference result # batch is a dict compliant with model_inference_results_msg_schema.json if batch["metric"]["uid"] != metricId: # Another model's result continue # Extract data rows; each row is a dict from the "results" attribute per # model_inference_results_msg_schema.json rows.extend(batch["results"]) lastMessage.ack(multiple=True) return rows
def _handleModelInferenceResults(self, body): """ Model results batch handler. Publishes metric data to DynamoDB for a given model inference results batch pulled off of the `dynamodb` queue. :param body: Serialized message payload; the message is compliant with htmengine/runtime/json_schema/model_inference_results_msg_schema.json. :type body: str """ try: batch = AnomalyService.deserializeModelResult(body) except Exception: g_log.exception("Error deserializing model result") raise metricId = batch["metric"]["uid"] metricName = batch["metric"]["name"] g_log.info("Handling %d model result(s) for %s - %s", len(batch["results"]), metricId, metricName) if not batch["results"]: g_log.error( "Empty results in model inference results batch; model=%s", metricId) return lastRow = batch["results"][-1] if (datetime.utcfromtimestamp(lastRow["ts"]) < (datetime.utcnow() - timedelta(days=self._FRESH_DATA_THRESHOLD_DAYS))): g_log.info( "Dropping stale result batch from model=%s; first=%s; last=%s", metricId, batch["results"][0], lastRow) return instanceName = batch["metric"]["resource"] metricSpec = batch["metric"]["spec"] userInfo = metricSpec.get("userInfo", {}) metricType = userInfo.get("metricType") metricTypeName = userInfo.get("metricTypeName") symbol = userInfo.get("symbol") # Although not relevant in a production setting, since dynamodb service # sits atop htmengine and is running during htmengine integration tests # there are inbound custom metrics that lack crucial Taurus-specific # user-data not intended to be published on dynamodb. If the metric lacks # any of the Taurus-required `metricType`, `metricTypeName`, or `symbol` # userInfo keys, log it as a warning and don't publish to dynamodb. if not metricType: g_log.warning("Missing value for metricType, uid=%s, name=%s", metricId, metricName) return if not metricTypeName: g_log.warning("Missing value for metricTypeName, uid=%s, name=%s", metricId, metricName) return if not symbol: g_log.warning("Missing value for symbol, uid=%s, name=%s", metricId, metricName) return self._publishMetricData(metricId, batch["results"]) self._publishInstanceDataHourly(instanceName, metricType, batch["results"])
def messageHandler(self, message): """ Inspect all inbound model results in a batch for anomaly thresholds and trigger notifications where applicable. :param amqp.messages.ConsumerMessage message: ``message.body`` is a serialized batch of model inference results generated in ``AnomalyService`` and must be deserialized using ``AnomalyService.deserializeModelResult()``. The message conforms to htmengine/runtime/json_schema/model_inference_results_msg_schema.json """ if message.properties.headers and "dataType" in message.properties.headers: # Not a model inference result message.ack() return htm.it.app.config.loadConfig() # reload config on every batch engine = repository.engineFactory() # Cache minimum threshold to trigger any notification to avoid permuting # settings x metricDataRows try: try: batch = AnomalyService.deserializeModelResult(message.body) except Exception: self._log.exception("Error deserializing model result") raise # Load all settings for all users (once per incoming batch) with engine.connect() as conn: settings = repository.retryOnTransientErrors( repository.getAllNotificationSettings)(conn) self._log.debug("settings: %r" % settings) if settings: minThreshold = min(setting.sensitivity for setting in settings) else: minThreshold = 0.99999 metricInfo = batch["metric"] metricId = metricInfo["uid"] resource = metricInfo["resource"] for row in batch["results"]: if row["anomaly"] >= minThreshold: rowDatetime = datetime.utcfromtimestamp(row["ts"]) if not settings: # There are no device notification settings stored on this server, # no notifications will be generated. However, log that a # an anomaly was detected and notification would be sent if there # were any configured devices self._log.info("<%r>" % (metricInfo) + ("{TAG:APP.NOTIFICATION} Anomaly " "detected at %s, but no devices are " "configured.") % rowDatetime) continue for settingObj in settings: if row["rowid"] <= 1000: continue # Not enough data if rowDatetime < datetime.utcnow() - timedelta( seconds=3600): continue # Skip old if row["anomaly"] >= settingObj.sensitivity: # First let's clear any old users out of the database. with engine.connect() as conn: repository.retryOnTransientErrors( repository.deleteStaleNotificationDevices)( conn, _NOTIFICATION_DEVICE_STALE_DAYS) # If anomaly_score meets or exceeds any of the device # notification sensitivity settings, trigger notification. # repository.addNotification() will handle throttling. notificationId = str(uuid.uuid4()) with engine.connect() as conn: result = repository.retryOnTransientErrors( repository.addNotification)( conn, uid=notificationId, server=resource, metric=metricId, rowid=row["rowid"], device=settingObj.uid, windowsize=(settingObj.windowsize), timestamp=rowDatetime, acknowledged=0, seen=0) self._log.info( "NOTIFICATION=%s SERVER=%s METRICID=%s DEVICE=%s " "Notification generated. " % (notificationId, resource, metricId, settingObj.uid)) if (result is not None and result.rowcount > 0 and settingObj.email_addr): # Notification was generated. Attempt to send email with engine.connect() as conn: notificationObj = repository.getNotification( conn, notificationId) self.sendNotificationEmail( engine, settingObj, notificationObj) finally: message.ack() # Do cleanup with engine.connect() as conn: repository.clearOldNotifications( conn) # Delete all notifications outside
def messageHandler(self, message): """ Inspect all inbound model results in a batch for anomaly thresholds and trigger notifications where applicable. :param amqp.messages.ConsumerMessage message: ``message.body`` is a serialized batch of model inference results generated in ``AnomalyService`` and must be deserialized using ``AnomalyService.deserializeModelResult()``. The message conforms to htmengine/runtime/json_schema/model_inference_results_msg_schema.json """ if message.properties.headers and "dataType" in message.properties.headers: # Not a model inference result return grok.app.config.loadConfig() # reload config on every batch engine = repository.engineFactory() # Cache minimum threshold to trigger any notification to avoid permuting # settings x metricDataRows try: try: batch = AnomalyService.deserializeModelResult(message.body) except Exception: self._log.exception("Error deserializing model result") raise # Load all settings for all users (once per incoming batch) with engine.connect() as conn: settings = repository.retryOnTransientErrors( repository.getAllNotificationSettings)(conn) self._log.debug("settings: %r" % settings) if settings: minThreshold = min(setting.sensitivity for setting in settings) else: minThreshold = 0.99999 metricInfo = batch["metric"] metricId = metricInfo["uid"] resource = metricInfo["resource"] for row in batch["results"]: if row["anomaly"] >= minThreshold: for settingObj in settings: if row["rowid"] <= 1000: continue # Not enough data rowDatetime = datetime.utcfromtimestamp(row["ts"]) if rowDatetime < datetime.utcnow() - timedelta(seconds=3600): continue # Skip old if row["anomaly"] >= settingObj.sensitivity: # First let's clear any old users out of the database. with engine.connect() as conn: repository.retryOnTransientErrors( repository.deleteStaleNotificationDevices)( conn, _NOTIFICATION_DEVICE_STALE_DAYS) # If anomaly_score meets or exceeds any of the device # notification sensitivity settings, trigger notification. # repository.addNotification() will handle throttling. notificationId = str(uuid.uuid4()) with engine.connect() as conn: result = repository.retryOnTransientErrors( repository.addNotification)(conn, uid=notificationId, server=resource, metric=metricId, rowid=row["rowid"], device=settingObj.uid, windowsize=( settingObj.windowsize), timestamp=rowDatetime, acknowledged=0, seen=0) self._log.info("NOTIFICATION=%s SERVER=%s METRICID=%s DEVICE=%s " "Notification generated. " % (notificationId, resource, metricId, settingObj.uid)) if (result is not None and result.rowcount > 0 and settingObj.email_addr): # Notification was generated. Attempt to send email with engine.connect() as conn: notificationObj = repository.getNotification(conn, notificationId) self.sendNotificationEmail(engine, settingObj, notificationObj) if not settings: # There are no device notification settings stored on this server, # no notifications will be generated. However, log that a # an anomaly was detected and notification would be sent if there # were any configured devices self._log.info("<%r>" % (metricInfo) + ( "{TAG:APP.NOTIFICATION} Anomaly " "detected at %s, but no devices are " "configured.") % rowDatetime) finally: message.ack() # Do cleanup with engine.connect() as conn: repository.clearOldNotifications(conn) # Delete all notifications outside
def _reapAnomalyServiceResults(self, metricId, numRowsExpected): """ Retrieve likelihood results from our AMQP message queue that is bound to Anomaly Service's results fanout exchange NOTE that Anomaly Service fans out all results for all models via "fanout" exchange, so our queue might contain results from additional models, which we filter out. :param metricId: unique id of our metric/model :param numRowsExpected: number of result rows expected by caller :returns: a sequence of dicts conforming to the schema of the results items per model_inference_results_msg_schema.json """ rows = [] @test_case_base.retry(duration=30) def getBatch(amqpClient): message = amqpClient.getOneMessage(self.resultsQueueName, noAck=False) try: self.assertIsNotNone(message) except AssertionError: LOGGER.info("Got %d rows so far, waiting for %d more", len(rows), numRowsExpected - len(rows)) raise return message connParams = amqp.connection.getRabbitmqConnectionParameters() with amqp.synchronous_amqp_client.SynchronousAmqpClient( amqp.connection.getRabbitmqConnectionParameters()) as amqpClient: lastMessage = None while len(rows) < numRowsExpected: message = getBatch(amqpClient) lastMessage = message batch = AnomalyService.deserializeModelResult(message.body) dataType = (message.properties.headers.get("dataType") if message.properties.headers else None) if dataType: continue # Not a model inference result # batch is a dict compliant with model_inference_results_msg_schema.json if batch["metric"]["uid"] != metricId: # Another model's result continue # Extract data rows; each row is a dict from the "results" attribute per # model_inference_results_msg_schema.json rows.extend(batch["results"]) lastMessage.ack(multiple=True) return rows
def _handleModelInferenceResults(self, body): """ Model results batch handler. Publishes metric data to DynamoDB for a given model inference results batch pulled off of the `dynamodb` queue. :param body: Serialized message payload; the message is compliant with htmengine/runtime/json_schema/model_inference_results_msg_schema.json. :type body: str """ try: batch = AnomalyService.deserializeModelResult(body) except Exception: g_log.exception("Error deserializing model result") raise metricId = batch["metric"]["uid"] metricName = batch["metric"]["name"] g_log.info("Handling %d model result(s) for %s - %s", len(batch["results"]), metricId, metricName) if not batch["results"]: g_log.error("Empty results in model inference results batch; model=%s", metricId) return lastRow = batch["results"][-1] if (datetime.utcfromtimestamp(lastRow["ts"]) < (datetime.utcnow() - timedelta(days=self._FRESH_DATA_THRESHOLD_DAYS))): g_log.info("Dropping stale result batch from model=%s; first=%s; last=%s", metricId, batch["results"][0], lastRow) return instanceName = batch["metric"]["resource"] metricSpec = batch["metric"]["spec"] userInfo = metricSpec.get("userInfo", {}) metricType = userInfo.get("metricType") metricTypeName = userInfo.get("metricTypeName") symbol = userInfo.get("symbol") # Although not relevant in a production setting, since dynamodb service # sits atop htmengine and is running during htmengine integration tests # there are inbound custom metrics that lack crucial Taurus-specific # user-data not intended to be published on dynamodb. If the metric lacks # any of the Taurus-required `metricType`, `metricTypeName`, or `symbol` # userInfo keys, log it as a warning and don't publish to dynamodb. if not metricType: g_log.warning("Missing value for metricType, uid=%s, name=%s", metricId, metricName) return if not metricTypeName: g_log.warning("Missing value for metricTypeName, uid=%s, name=%s", metricId, metricName) return if not symbol: g_log.warning("Missing value for symbol, uid=%s, name=%s", metricId, metricName) return self._publishMetricData(metricId, batch["results"]) self._publishInstanceDataHourly(instanceName, metricType, batch["results"])