Exemplo n.º 1
0
    def GET(self, metricId=None):
        """
    Get Model Data

    ::

        GET /_models/{model-id}/data?from={fromTimestamp}&to={toTimestamp}&anomaly={anomalyScore}&limit={numOfRows}

    Parameters:

      :param limit: (optional) max number of records to return
      :type limit: int
      :param from: (optional) return records from this timestamp
      :type from: timestamp
      :param to: (optional) return records up to this timestamp
      :type to: timestamp
      :param anomaly: anomaly score to filter
      :type anomaly: float

    Returns:

    ::

        {
            "data": [
                ["2013-08-15 21:34:00", 222, 0.025, 125],
                ["2013-08-15 21:32:00", 202, 0, 124],
                ["2013-08-15 21:30:00", 202, 0, 123],
                ...
            ],
            "names": [
                "timestamp",
                "value",
                "anomaly_score",
                "rowid
            ]
        }
    """
        queryParams = dict(urlparse.parse_qsl(web.ctx.env['QUERY_STRING']))
        fromTimestamp = queryParams.get("from")
        toTimestamp = queryParams.get("to")
        anomaly = float(queryParams.get("anomaly") or 0.0)
        limit = int(queryParams.get("limit") or 0)

        with web.ctx.connFactory() as conn:
            fields = (schema.metric_data.c.uid, schema.metric_data.c.timestamp,
                      schema.metric_data.c.metric_value,
                      schema.metric_data.c.anomaly_score,
                      schema.metric_data.c.rowid)
            names = ("names", ) + tuple([
                "value" if col.name == "metric_value" else col.name
                for col in fields
            ])
            if fromTimestamp:
                sort = schema.metric_data.c.timestamp.asc()
            else:
                sort = schema.metric_data.c.timestamp.desc()

            result = repository.getMetricData(conn,
                                              metricId=metricId,
                                              fields=fields,
                                              fromTimestamp=fromTimestamp,
                                              toTimestamp=toTimestamp,
                                              score=anomaly,
                                              sort=sort)

        if "application/octet-stream" in web.ctx.env.get('HTTP_ACCEPT', ""):
            results_per_uid = defaultdict(int)
            packer = msgpack.Packer()
            self.addStandardHeaders(content_type='application/octet-stream')
            web.header('X-Accel-Buffering', 'no')

            yield packer.pack(names)
            for row in result:
                if not limit or (limit
                                 and len(results_per_uid[row.uid]) < limit):
                    resultTuple = (
                        row.uid,
                        calendar.timegm(row.timestamp.timetuple()),
                        row.metric_value,
                        row.anomaly_score,
                        row.rowid,
                    )
                    yield packer.pack(resultTuple)
                    results_per_uid[row.uid] += 1
        else:

            if metricId is None:
                output = {}
                for row in result:
                    uid = row.uid
                    default = {"uid": uid, "data": []}
                    recordTuple = (row.timestamp.strftime("%Y-%m-%d %H:%M:%S"),
                                   row.metric_value, row.anomaly_score,
                                   row.rowid)
                    metricDataRecord = output.setdefault(uid, default)
                    if not limit or (limit and
                                     len(metricDataRecord["data"]) < limit):
                        metricDataRecord["data"].append(recordTuple)

                results = {"metrics": output.values(), "names": names[2:]}

            else:
                if limit:
                    results = {
                        "names":
                        names[2:],
                        "data":
                        [(row.timestamp.strftime("%Y-%m-%d %H:%M:%S"),
                          row.metric_value, row.anomaly_score, row.rowid)
                         for row in itertools.islice(result, 0, limit)]
                    }
                else:
                    results = {
                        "names":
                        names[2:],
                        "data":
                        [(row.timestamp.strftime("%Y-%m-%d %H:%M:%S"),
                          row.metric_value, row.anomaly_score, row.rowid)
                         for row in result]
                    }
            self.addStandardHeaders()
            yield utils.jsonEncode(results)
def replayMetricDataToModelResultsExchange(messageBus,
                                           chunksize=DEFAULT_CHUNKSIZE):
    """ Reads metric data and synthesizes model inference result messages to the
  "model results" exchange, simulating the end result of the AnomalyService.
  This will afford the dynamodb service an opportunity to backfill older data
  :param messageBus: message bus connection
  :type messageBus: nta.utils.message_bus_connector.MessageBusConnector
  """
    engine = repository.engineFactory()

    twoWeeksAgo = datetime.datetime.utcnow() - datetime.timedelta(days=14)

    # Properties for publishing model command results on RabbitMQ exchange
    # (same as AnomalyService)
    modelCommandResultProperties = MessageProperties(
        deliveryMode=amqp.constants.AMQPDeliveryModes.PERSISTENT_MESSAGE,
        headers=dict(dataType="model-cmd-result"))

    # Properties for publishing model inference results on RabbitMQ exchange
    # (same as AnomalyService)
    modelInferenceResultProperties = MessageProperties(
        deliveryMode=amqp.constants.AMQPDeliveryModes.PERSISTENT_MESSAGE)

    g_log.info("Getting metric data...")
    result = repository.getMetricData(
        engine,
        score=0,
        fromTimestamp=twoWeeksAgo,
        sort=[metric_data.c.uid, metric_data.c.rowid.asc()])
    numMetricDataRows = result.rowcount
    g_log.info("Got %d rows", numMetricDataRows)

    numModels = 0
    for uid, group in groupby(result, key=lambda x: x.uid):

        @retryOnTransientErrors
        def _getMetric():
            return repository.getMetric(engine, uid)

        metricObj = _getMetric()

        # Send defineModel command to ensure that the metric table entry is created
        numModels += 1
        modelCommandResult = {
            "status": htmengineerrno.SUCCESS,
            "method": "defineModel",
            "modelId": uid,
            "modelInfo": {
                "metricName": metricObj.name,
                "resource": metricObj.server,
                "modelSpec": json.loads(metricObj.parameters)
            }
        }

        # Serialize
        payload = anomaly_service.AnomalyService._serializeModelResult(
            modelCommandResult)

        g_log.info("Sending `defineModel` command: %r",
                   repr(modelCommandResult))
        messageBus.publishExg(exchange=config.get("metric_streamer",
                                                  "results_exchange_name"),
                              routingKey="",
                              body=payload,
                              properties=modelCommandResultProperties)

        metricInfo = dict(uid=metricObj.uid,
                          name=metricObj.name,
                          description=metricObj.description,
                          resource=metricObj.server,
                          location=metricObj.location,
                          datasource=metricObj.datasource,
                          spec=json.loads(metricObj.parameters)["metricSpec"])

        args = [iter(group)] * chunksize
        for num, chunk in enumerate(izip_longest(fillvalue=None, *args)):
            # Create
            inferenceResultsMessage = dict(
                metric=metricInfo,
                results=[
                    dict(rowid=row.rowid,
                         ts=epochFromNaiveUTCDatetime(row.timestamp),
                         value=row.metric_value,
                         rawAnomaly=row.raw_anomaly_score,
                         anomaly=row.anomaly_score) for row in chunk
                    if row is not None
                ])

            # Serialize
            payload = anomaly_service.AnomalyService._serializeModelResult(
                inferenceResultsMessage)

            g_log.info(
                "uid=%s chunk=%d rows=%d payload_size=%d bytes from %s to %s",
                uid, num, len(inferenceResultsMessage["results"]),
                sys.getsizeof(payload),
                datetime.datetime.utcfromtimestamp(
                    inferenceResultsMessage["results"][0].ts),
                datetime.datetime.utcfromtimestamp(
                    inferenceResultsMessage["results"][-1].timestamp))

            messageBus.publishExg(exchange=config.get("metric_streamer",
                                                      "results_exchange_name"),
                                  routingKey="",
                                  body=payload,
                                  properties=modelInferenceResultProperties)

    g_log.info("Done! numMetricDataRows=%d; numModels=%d", numMetricDataRows,
               numModels)
Exemplo n.º 3
0
  def GET(self, metricId=None):
    """
    Get Model Data

    ::

        GET /_models/{model-id}/data?from={fromTimestamp}&to={toTimestamp}&anomaly={anomalyScore}&limit={numOfRows}

    Parameters:

      :param limit: (optional) max number of records to return
      :type limit: int
      :param from: (optional) return records from this timestamp
      :type from: timestamp
      :param to: (optional) return records up to this timestamp
      :type to: timestamp
      :param anomaly: anomaly score to filter
      :type anomaly: float

    Returns:

    ::

        {
            "data": [
                ["2013-08-15 21:34:00", 222, 0.025, 125],
                ["2013-08-15 21:32:00", 202, 0, 124],
                ["2013-08-15 21:30:00", 202, 0, 123],
                ...
            ],
            "names": [
                "timestamp",
                "value",
                "anomaly_score",
                "rowid
            ]
        }
    """
    queryParams = dict(urlparse.parse_qsl(web.ctx.env['QUERY_STRING']))
    fromTimestamp = queryParams.get("from")
    toTimestamp = queryParams.get("to")
    anomaly = float(queryParams.get("anomaly") or 0.0)
    limit = int(queryParams.get("limit") or 0)

    with web.ctx.connFactory() as conn:
      fields = (schema.metric_data.c.uid,
                schema.metric_data.c.timestamp,
                schema.metric_data.c.metric_value,
                schema.metric_data.c.anomaly_score,
                schema.metric_data.c.rowid)
      names = ("names",) + tuple(["value" if col.name == "metric_value"
                                  else col.name
                                  for col in fields])
      if fromTimestamp:
        sort = schema.metric_data.c.timestamp.asc()
      else:
        sort = schema.metric_data.c.timestamp.desc()

      result = repository.getMetricData(conn,
                                        metricId=metricId,
                                        fields=fields,
                                        fromTimestamp=fromTimestamp,
                                        toTimestamp=toTimestamp,
                                        score=anomaly,
                                        sort=sort)

    if "application/octet-stream" in web.ctx.env.get('HTTP_ACCEPT', ""):
      results_per_uid = defaultdict(int)
      packer = msgpack.Packer()
      self.addStandardHeaders(content_type='application/octet-stream')
      web.header('X-Accel-Buffering', 'no')

      yield packer.pack(names)
      for row in result:
        if not limit or (limit and len(results_per_uid[row.uid]) < limit):
          resultTuple = (
              row.uid,
              calendar.timegm(row.timestamp.timetuple()),
              row.metric_value,
              row.anomaly_score,
              row.rowid,
            )
          yield packer.pack(resultTuple)
          results_per_uid[row.uid] += 1
    else:

      if metricId is None:
        output = {}
        for row in result:
          uid = row.uid
          default = {"uid": uid, "data": []}
          recordTuple = (
            row.timestamp.strftime("%Y-%m-%d %H:%M:%S"),
            row.metric_value,
            row.anomaly_score,
            row.rowid
          )
          metricDataRecord = output.setdefault(uid, default)
          if not limit or (limit and len(metricDataRecord["data"]) < limit):
            metricDataRecord["data"].append(recordTuple)

        results = {
          "metrics":  output.values(),
          "names": names[2:]
        }

      else:
        if limit:
          results = {"names": names[2:],
                     "data": [(row.timestamp.strftime("%Y-%m-%d %H:%M:%S"),
                               row.metric_value,
                               row.anomaly_score,
                               row.rowid)
                              for row in itertools.islice(result, 0, limit)]}
        else:
          results = {"names": names[2:],
                     "data": [(row.timestamp.strftime("%Y-%m-%d %H:%M:%S"),
                               row.metric_value,
                               row.anomaly_score,
                               row.rowid) for row in result]}
      self.addStandardHeaders()
      yield utils.jsonEncode(results)
def replayMetricDataToModelResultsExchange(messageBus, chunksize=DEFAULT_CHUNKSIZE):
    """ Reads metric data and synthesizes model inference result messages to the
  "model results" exchange, simulating the end result of the AnomalyService.
  This will afford the dynamodb service an opportunity to backfill older data
  :param messageBus: message bus connection
  :type messageBus: nta.utils.message_bus_connector.MessageBusConnector
  """
    engine = repository.engineFactory()

    twoWeeksAgo = datetime.datetime.utcnow() - datetime.timedelta(days=14)

    # Properties for publishing model command results on RabbitMQ exchange
    # (same as AnomalyService)
    modelCommandResultProperties = MessageProperties(
        deliveryMode=amqp.constants.AMQPDeliveryModes.PERSISTENT_MESSAGE, headers=dict(dataType="model-cmd-result")
    )

    # Properties for publishing model inference results on RabbitMQ exchange
    # (same as AnomalyService)
    modelInferenceResultProperties = MessageProperties(deliveryMode=amqp.constants.AMQPDeliveryModes.PERSISTENT_MESSAGE)

    g_log.info("Getting metric data...")
    result = repository.getMetricData(
        engine, score=0, fromTimestamp=twoWeeksAgo, sort=[metric_data.c.uid, metric_data.c.rowid.asc()]
    )
    numMetricDataRows = result.rowcount
    g_log.info("Got %d rows", numMetricDataRows)

    numModels = 0
    for uid, group in groupby(result, key=lambda x: x.uid):

        @retryOnTransientErrors
        def _getMetric():
            return repository.getMetric(engine, uid)

        metricObj = _getMetric()

        # Send defineModel command to ensure that the metric table entry is created
        numModels += 1
        modelCommandResult = {
            "status": htmengineerrno.SUCCESS,
            "method": "defineModel",
            "modelId": uid,
            "modelInfo": {
                "metricName": metricObj.name,
                "resource": metricObj.server,
                "modelSpec": json.loads(metricObj.parameters),
            },
        }

        # Serialize
        payload = anomaly_service.AnomalyService._serializeModelResult(modelCommandResult)

        g_log.info("Sending `defineModel` command: %r", repr(modelCommandResult))
        messageBus.publishExg(
            exchange=config.get("metric_streamer", "results_exchange_name"),
            routingKey="",
            body=payload,
            properties=modelCommandResultProperties,
        )

        metricInfo = dict(
            uid=metricObj.uid,
            name=metricObj.name,
            description=metricObj.description,
            resource=metricObj.server,
            location=metricObj.location,
            datasource=metricObj.datasource,
            spec=json.loads(metricObj.parameters)["metricSpec"],
        )

        args = [iter(group)] * chunksize
        for num, chunk in enumerate(izip_longest(fillvalue=None, *args)):
            # Create
            inferenceResultsMessage = dict(
                metric=metricInfo,
                results=[
                    dict(
                        rowid=row.rowid,
                        ts=epochFromNaiveUTCDatetime(row.timestamp),
                        value=row.metric_value,
                        rawAnomaly=row.raw_anomaly_score,
                        anomaly=row.anomaly_score,
                    )
                    for row in chunk
                    if row is not None
                ],
            )

            # Serialize
            payload = anomaly_service.AnomalyService._serializeModelResult(inferenceResultsMessage)

            g_log.info(
                "uid=%s chunk=%d rows=%d payload_size=%d bytes from %s to %s",
                uid,
                num,
                len(inferenceResultsMessage["results"]),
                sys.getsizeof(payload),
                datetime.datetime.utcfromtimestamp(inferenceResultsMessage["results"][0].ts),
                datetime.datetime.utcfromtimestamp(inferenceResultsMessage["results"][-1].timestamp),
            )

            messageBus.publishExg(
                exchange=config.get("metric_streamer", "results_exchange_name"),
                routingKey="",
                body=payload,
                properties=modelInferenceResultProperties,
            )

    g_log.info("Done! numMetricDataRows=%d; numModels=%d", numMetricDataRows, numModels)