def GET(self, metricId=None): """ Get Model Data :: GET /_models/{model-id}/data?from={fromTimestamp}&to={toTimestamp}&anomaly={anomalyScore}&limit={numOfRows} Parameters: :param limit: (optional) max number of records to return :type limit: int :param from: (optional) return records from this timestamp :type from: timestamp :param to: (optional) return records up to this timestamp :type to: timestamp :param anomaly: anomaly score to filter :type anomaly: float Returns: :: { "data": [ ["2013-08-15 21:34:00", 222, 0.025, 125], ["2013-08-15 21:32:00", 202, 0, 124], ["2013-08-15 21:30:00", 202, 0, 123], ... ], "names": [ "timestamp", "value", "anomaly_score", "rowid ] } """ queryParams = dict(urlparse.parse_qsl(web.ctx.env['QUERY_STRING'])) fromTimestamp = queryParams.get("from") toTimestamp = queryParams.get("to") anomaly = float(queryParams.get("anomaly") or 0.0) limit = int(queryParams.get("limit") or 0) with web.ctx.connFactory() as conn: fields = (schema.metric_data.c.uid, schema.metric_data.c.timestamp, schema.metric_data.c.metric_value, schema.metric_data.c.anomaly_score, schema.metric_data.c.rowid) names = ("names", ) + tuple([ "value" if col.name == "metric_value" else col.name for col in fields ]) if fromTimestamp: sort = schema.metric_data.c.timestamp.asc() else: sort = schema.metric_data.c.timestamp.desc() result = repository.getMetricData(conn, metricId=metricId, fields=fields, fromTimestamp=fromTimestamp, toTimestamp=toTimestamp, score=anomaly, sort=sort) if "application/octet-stream" in web.ctx.env.get('HTTP_ACCEPT', ""): results_per_uid = defaultdict(int) packer = msgpack.Packer() self.addStandardHeaders(content_type='application/octet-stream') web.header('X-Accel-Buffering', 'no') yield packer.pack(names) for row in result: if not limit or (limit and len(results_per_uid[row.uid]) < limit): resultTuple = ( row.uid, calendar.timegm(row.timestamp.timetuple()), row.metric_value, row.anomaly_score, row.rowid, ) yield packer.pack(resultTuple) results_per_uid[row.uid] += 1 else: if metricId is None: output = {} for row in result: uid = row.uid default = {"uid": uid, "data": []} recordTuple = (row.timestamp.strftime("%Y-%m-%d %H:%M:%S"), row.metric_value, row.anomaly_score, row.rowid) metricDataRecord = output.setdefault(uid, default) if not limit or (limit and len(metricDataRecord["data"]) < limit): metricDataRecord["data"].append(recordTuple) results = {"metrics": output.values(), "names": names[2:]} else: if limit: results = { "names": names[2:], "data": [(row.timestamp.strftime("%Y-%m-%d %H:%M:%S"), row.metric_value, row.anomaly_score, row.rowid) for row in itertools.islice(result, 0, limit)] } else: results = { "names": names[2:], "data": [(row.timestamp.strftime("%Y-%m-%d %H:%M:%S"), row.metric_value, row.anomaly_score, row.rowid) for row in result] } self.addStandardHeaders() yield utils.jsonEncode(results)
def replayMetricDataToModelResultsExchange(messageBus, chunksize=DEFAULT_CHUNKSIZE): """ Reads metric data and synthesizes model inference result messages to the "model results" exchange, simulating the end result of the AnomalyService. This will afford the dynamodb service an opportunity to backfill older data :param messageBus: message bus connection :type messageBus: nta.utils.message_bus_connector.MessageBusConnector """ engine = repository.engineFactory() twoWeeksAgo = datetime.datetime.utcnow() - datetime.timedelta(days=14) # Properties for publishing model command results on RabbitMQ exchange # (same as AnomalyService) modelCommandResultProperties = MessageProperties( deliveryMode=amqp.constants.AMQPDeliveryModes.PERSISTENT_MESSAGE, headers=dict(dataType="model-cmd-result")) # Properties for publishing model inference results on RabbitMQ exchange # (same as AnomalyService) modelInferenceResultProperties = MessageProperties( deliveryMode=amqp.constants.AMQPDeliveryModes.PERSISTENT_MESSAGE) g_log.info("Getting metric data...") result = repository.getMetricData( engine, score=0, fromTimestamp=twoWeeksAgo, sort=[metric_data.c.uid, metric_data.c.rowid.asc()]) numMetricDataRows = result.rowcount g_log.info("Got %d rows", numMetricDataRows) numModels = 0 for uid, group in groupby(result, key=lambda x: x.uid): @retryOnTransientErrors def _getMetric(): return repository.getMetric(engine, uid) metricObj = _getMetric() # Send defineModel command to ensure that the metric table entry is created numModels += 1 modelCommandResult = { "status": htmengineerrno.SUCCESS, "method": "defineModel", "modelId": uid, "modelInfo": { "metricName": metricObj.name, "resource": metricObj.server, "modelSpec": json.loads(metricObj.parameters) } } # Serialize payload = anomaly_service.AnomalyService._serializeModelResult( modelCommandResult) g_log.info("Sending `defineModel` command: %r", repr(modelCommandResult)) messageBus.publishExg(exchange=config.get("metric_streamer", "results_exchange_name"), routingKey="", body=payload, properties=modelCommandResultProperties) metricInfo = dict(uid=metricObj.uid, name=metricObj.name, description=metricObj.description, resource=metricObj.server, location=metricObj.location, datasource=metricObj.datasource, spec=json.loads(metricObj.parameters)["metricSpec"]) args = [iter(group)] * chunksize for num, chunk in enumerate(izip_longest(fillvalue=None, *args)): # Create inferenceResultsMessage = dict( metric=metricInfo, results=[ dict(rowid=row.rowid, ts=epochFromNaiveUTCDatetime(row.timestamp), value=row.metric_value, rawAnomaly=row.raw_anomaly_score, anomaly=row.anomaly_score) for row in chunk if row is not None ]) # Serialize payload = anomaly_service.AnomalyService._serializeModelResult( inferenceResultsMessage) g_log.info( "uid=%s chunk=%d rows=%d payload_size=%d bytes from %s to %s", uid, num, len(inferenceResultsMessage["results"]), sys.getsizeof(payload), datetime.datetime.utcfromtimestamp( inferenceResultsMessage["results"][0].ts), datetime.datetime.utcfromtimestamp( inferenceResultsMessage["results"][-1].timestamp)) messageBus.publishExg(exchange=config.get("metric_streamer", "results_exchange_name"), routingKey="", body=payload, properties=modelInferenceResultProperties) g_log.info("Done! numMetricDataRows=%d; numModels=%d", numMetricDataRows, numModels)
def GET(self, metricId=None): """ Get Model Data :: GET /_models/{model-id}/data?from={fromTimestamp}&to={toTimestamp}&anomaly={anomalyScore}&limit={numOfRows} Parameters: :param limit: (optional) max number of records to return :type limit: int :param from: (optional) return records from this timestamp :type from: timestamp :param to: (optional) return records up to this timestamp :type to: timestamp :param anomaly: anomaly score to filter :type anomaly: float Returns: :: { "data": [ ["2013-08-15 21:34:00", 222, 0.025, 125], ["2013-08-15 21:32:00", 202, 0, 124], ["2013-08-15 21:30:00", 202, 0, 123], ... ], "names": [ "timestamp", "value", "anomaly_score", "rowid ] } """ queryParams = dict(urlparse.parse_qsl(web.ctx.env['QUERY_STRING'])) fromTimestamp = queryParams.get("from") toTimestamp = queryParams.get("to") anomaly = float(queryParams.get("anomaly") or 0.0) limit = int(queryParams.get("limit") or 0) with web.ctx.connFactory() as conn: fields = (schema.metric_data.c.uid, schema.metric_data.c.timestamp, schema.metric_data.c.metric_value, schema.metric_data.c.anomaly_score, schema.metric_data.c.rowid) names = ("names",) + tuple(["value" if col.name == "metric_value" else col.name for col in fields]) if fromTimestamp: sort = schema.metric_data.c.timestamp.asc() else: sort = schema.metric_data.c.timestamp.desc() result = repository.getMetricData(conn, metricId=metricId, fields=fields, fromTimestamp=fromTimestamp, toTimestamp=toTimestamp, score=anomaly, sort=sort) if "application/octet-stream" in web.ctx.env.get('HTTP_ACCEPT', ""): results_per_uid = defaultdict(int) packer = msgpack.Packer() self.addStandardHeaders(content_type='application/octet-stream') web.header('X-Accel-Buffering', 'no') yield packer.pack(names) for row in result: if not limit or (limit and len(results_per_uid[row.uid]) < limit): resultTuple = ( row.uid, calendar.timegm(row.timestamp.timetuple()), row.metric_value, row.anomaly_score, row.rowid, ) yield packer.pack(resultTuple) results_per_uid[row.uid] += 1 else: if metricId is None: output = {} for row in result: uid = row.uid default = {"uid": uid, "data": []} recordTuple = ( row.timestamp.strftime("%Y-%m-%d %H:%M:%S"), row.metric_value, row.anomaly_score, row.rowid ) metricDataRecord = output.setdefault(uid, default) if not limit or (limit and len(metricDataRecord["data"]) < limit): metricDataRecord["data"].append(recordTuple) results = { "metrics": output.values(), "names": names[2:] } else: if limit: results = {"names": names[2:], "data": [(row.timestamp.strftime("%Y-%m-%d %H:%M:%S"), row.metric_value, row.anomaly_score, row.rowid) for row in itertools.islice(result, 0, limit)]} else: results = {"names": names[2:], "data": [(row.timestamp.strftime("%Y-%m-%d %H:%M:%S"), row.metric_value, row.anomaly_score, row.rowid) for row in result]} self.addStandardHeaders() yield utils.jsonEncode(results)
def replayMetricDataToModelResultsExchange(messageBus, chunksize=DEFAULT_CHUNKSIZE): """ Reads metric data and synthesizes model inference result messages to the "model results" exchange, simulating the end result of the AnomalyService. This will afford the dynamodb service an opportunity to backfill older data :param messageBus: message bus connection :type messageBus: nta.utils.message_bus_connector.MessageBusConnector """ engine = repository.engineFactory() twoWeeksAgo = datetime.datetime.utcnow() - datetime.timedelta(days=14) # Properties for publishing model command results on RabbitMQ exchange # (same as AnomalyService) modelCommandResultProperties = MessageProperties( deliveryMode=amqp.constants.AMQPDeliveryModes.PERSISTENT_MESSAGE, headers=dict(dataType="model-cmd-result") ) # Properties for publishing model inference results on RabbitMQ exchange # (same as AnomalyService) modelInferenceResultProperties = MessageProperties(deliveryMode=amqp.constants.AMQPDeliveryModes.PERSISTENT_MESSAGE) g_log.info("Getting metric data...") result = repository.getMetricData( engine, score=0, fromTimestamp=twoWeeksAgo, sort=[metric_data.c.uid, metric_data.c.rowid.asc()] ) numMetricDataRows = result.rowcount g_log.info("Got %d rows", numMetricDataRows) numModels = 0 for uid, group in groupby(result, key=lambda x: x.uid): @retryOnTransientErrors def _getMetric(): return repository.getMetric(engine, uid) metricObj = _getMetric() # Send defineModel command to ensure that the metric table entry is created numModels += 1 modelCommandResult = { "status": htmengineerrno.SUCCESS, "method": "defineModel", "modelId": uid, "modelInfo": { "metricName": metricObj.name, "resource": metricObj.server, "modelSpec": json.loads(metricObj.parameters), }, } # Serialize payload = anomaly_service.AnomalyService._serializeModelResult(modelCommandResult) g_log.info("Sending `defineModel` command: %r", repr(modelCommandResult)) messageBus.publishExg( exchange=config.get("metric_streamer", "results_exchange_name"), routingKey="", body=payload, properties=modelCommandResultProperties, ) metricInfo = dict( uid=metricObj.uid, name=metricObj.name, description=metricObj.description, resource=metricObj.server, location=metricObj.location, datasource=metricObj.datasource, spec=json.loads(metricObj.parameters)["metricSpec"], ) args = [iter(group)] * chunksize for num, chunk in enumerate(izip_longest(fillvalue=None, *args)): # Create inferenceResultsMessage = dict( metric=metricInfo, results=[ dict( rowid=row.rowid, ts=epochFromNaiveUTCDatetime(row.timestamp), value=row.metric_value, rawAnomaly=row.raw_anomaly_score, anomaly=row.anomaly_score, ) for row in chunk if row is not None ], ) # Serialize payload = anomaly_service.AnomalyService._serializeModelResult(inferenceResultsMessage) g_log.info( "uid=%s chunk=%d rows=%d payload_size=%d bytes from %s to %s", uid, num, len(inferenceResultsMessage["results"]), sys.getsizeof(payload), datetime.datetime.utcfromtimestamp(inferenceResultsMessage["results"][0].ts), datetime.datetime.utcfromtimestamp(inferenceResultsMessage["results"][-1].timestamp), ) messageBus.publishExg( exchange=config.get("metric_streamer", "results_exchange_name"), routingKey="", body=payload, properties=modelInferenceResultProperties, ) g_log.info("Done! numMetricDataRows=%d; numModels=%d", numMetricDataRows, numModels)