示例#1
0
    def _handleMetricCollectionError(self, engine, metricObj, startTime,
                                     error):
        """ Update the metric's collector_error member and promote the metric
    to ERROR state if needed

    :param engine: SQLAlchemy engine object
    :type engine: sqlalchemy.engine.Engine

    :param metricObj: Metric instance

    :param startTime: metric collection start time; unix epoch timestamp

    :param error: exception object that is the result of metric data collection
      or (possibly pending) resource status check
    """
        # Quarantine the metric temporarily
        self._metricInfoCache[metricObj.uid].quarantineEndTime = (
            startTime +
            metricObj.poll_interval * self._METRIC_QUARANTINE_DURATION_RATIO)

        try:
            if metricObj.collector_error is None:
                # TODO: unit-test
                # Begin error grace period for this metric
                deadline = time.time() + self._metricErrorGracePeriod
                with engine.connect() as conn:
                    repository.retryOnTransientErrors(
                        repository.setMetricCollectorError)(
                            conn, metricObj.uid,
                            utils.jsonEncode(
                                dict(deadline=deadline, message=repr(error))))
                self._log.error(
                    "Started error grace period on metric=<%r> through %sZ due to "
                    "error=%r", metricObj,
                    datetime.utcfromtimestamp(deadline).isoformat(), error)
            elif (time.time() > utils.jsonDecode(
                    metricObj.collector_error)["deadline"]):
                # TODO: unit-test
                # Error grace period expired: promote the metric to ERROR state
                with engine.connect() as conn:
                    repository.retryOnTransientErrors(
                        repository.setMetricStatus)(conn, metricObj.uid,
                                                    MetricStatus.ERROR,
                                                    repr(error))
                self._log.error(
                    "Metric Collector: grace period expired; placed metric=<%r> in "
                    "ERROR state due to error=%r", metricObj, error)
        except app_exceptions.ObjectNotFoundError:
            # TODO: unit-test
            self._log.warning("Metric deleted? metric=%r",
                              metricObj,
                              exc_info=True)
示例#2
0
  def _getCandidateMetrics(self, engine):
    """ Return the metrics that are due for an update

    :param engine: SQLAlchemy engine object
    :type engine: sqlalchemy.engine.Engine

    :returns: a (possibly empty) sequence of Metric instances that are due for
      an update (after removing quarantined indexes)

    TODO: unit-test
    """
    with engine.connect() as conn:
      metricsToUpdate = repository.retryOnTransientErrors(
        repository.getCloudwatchMetricsPendingDataCollection)(conn)

    # Remove quarantined metrics
    quarantinedIndexes = None
    if metricsToUpdate:
      now = time.time()
      quarantinedIndexes = set(
        i for i, m in enumerate(metricsToUpdate)
        if now < self._metricInfoCache[m.uid].quarantineEndTime)

      metricsToUpdate = OrderedDict((m.uid, m)
                                    for i, m in enumerate(metricsToUpdate)
                                    if i not in quarantinedIndexes)

      if not metricsToUpdate:
        # TODO: unit-test
        self._log.debug("All candidate numMetrics=%d are quarantined",
                        len(quarantinedIndexes))

    return metricsToUpdate
示例#3
0
    def _getCandidateMetrics(self, engine):
        """ Return the metrics that are due for an update

    :param engine: SQLAlchemy engine object
    :type engine: sqlalchemy.engine.Engine

    :returns: a (possibly empty) sequence of Metric instances that are due for
      an update (after removing quarantined indexes)

    TODO: unit-test
    """
        with engine.connect() as conn:
            metricsToUpdate = repository.retryOnTransientErrors(
                repository.getCloudwatchMetricsPendingDataCollection)(conn)

        # Remove quarantined metrics
        quarantinedIndexes = None
        if metricsToUpdate:
            now = time.time()
            quarantinedIndexes = set(
                i for i, m in enumerate(metricsToUpdate)
                if now < self._metricInfoCache[m.uid].quarantineEndTime)

            metricsToUpdate = OrderedDict(
                (m.uid, m) for i, m in enumerate(metricsToUpdate)
                if i not in quarantinedIndexes)

            if not metricsToUpdate:
                # TODO: unit-test
                self._log.debug("All candidate numMetrics=%d are quarantined",
                                len(quarantinedIndexes))

        return metricsToUpdate
示例#4
0
  def _handleMetricCollectionError(self, engine, metricObj, startTime, error):
    """ Update the metric's collector_error member and promote the metric
    to ERROR state if needed

    :param engine: SQLAlchemy engine object
    :type engine: sqlalchemy.engine.Engine

    :param metricObj: Metric instance

    :param startTime: metric collection start time; unix epoch timestamp

    :param error: exception object that is the result of metric data collection
      or (possibly pending) resource status check
    """
    # Quarantine the metric temporarily
    self._metricInfoCache[metricObj.uid].quarantineEndTime = (
      startTime +
      metricObj.poll_interval * self._METRIC_QUARANTINE_DURATION_RATIO)

    try:
      if metricObj.collector_error is None:
        # TODO: unit-test
        # Begin error grace period for this metric
        deadline = time.time() + self._metricErrorGracePeriod
        with engine.connect() as conn:
          repository.retryOnTransientErrors(repository.setMetricCollectorError)(
            conn,
            metricObj.uid,
            utils.jsonEncode(dict(deadline=deadline,
                                  message=repr(error))))
        self._log.error(
          "Started error grace period on metric=<%r> through %sZ due to "
          "error=%r",
          metricObj, datetime.utcfromtimestamp(deadline).isoformat(), error)
      elif (time.time() >
            utils.jsonDecode(metricObj.collector_error)["deadline"]):
        # TODO: unit-test
        # Error grace period expired: promote the metric to ERROR state
        with engine.connect() as conn:
          repository.retryOnTransientErrors(repository.setMetricStatus)(
            conn, metricObj.uid, MetricStatus.ERROR, repr(error))
        self._log.error(
          "Metric Collector: grace period expired; placed metric=<%r> in "
          "ERROR state due to error=%r", metricObj, error)
    except app_exceptions.ObjectNotFoundError:
      # TODO: unit-test
      self._log.warning("Metric deleted? metric=%r", metricObj, exc_info=True)
示例#5
0
  def unmonitorMetric(self, metricId):
    """ Unmonitor a metric

    :param metricId: unique identifier of the metric row

    :raises YOMP.app.exceptions.ObjectNotFoundError: if metric with the
      referenced metric uid doesn't exist
    """
    # Delete the metric from the database
    with self.connectionFactory() as conn:
      repository.retryOnTransientErrors(repository.deleteMetric)(
        conn,
        metricId)

    # Send request to delete HTM model
    model_swapper_utils.deleteHTMModel(metricId)

    self._log.info("Cloudwatch Metric unmonitored: metric=%r", metricId)
示例#6
0
  def exportModel(self, metricId):
    """ Export the given model.

    :param metricId: datasource-specific unique metric identifier

    :returns: Model-export specification for Cloudwatch model
    :rtype: dict

    ::
        {
          "datasource": "cloudwatch",

          "metricSpec": {
            "region": "us-west-2",
            "namespace": "AWS/EC2",
            "metric": "CPUUtilization",
            "dimensions": {
              "InstanceId": "i-12d67826"
            }
          },

          # Same modelParams with which model was created, if any
          "modelParams": {
            "min": 0,  # optional
            "max": 100  # optional
          }
        }

    :raises YOMP.app.exceptions.ObjectNotFoundError: if referenced metric
      doesn't exist
    """
    with self.connectionFactory() as conn:
      metricObj = repository.retryOnTransientErrors(repository.getMetric)(
        conn, metricId, fields=[schema.metric.c.parameters])

    parameters = htmengine.utils.jsonDecode(metricObj.parameters)

    return parameters
示例#7
0
    def run(self):
        with ModelSwapperInterface() as modelSwapper:
            engine = repository.engineFactory()
            while True:
                with engine.connect() as conn:
                    pendingStacks = repository.retryOnTransientErrors(
                        repository.getAutostackMetricsPendingDataCollection
                    )(conn)

                if not pendingStacks:
                    time.sleep(self._NOTHING_READY_SLEEP_TIME_SEC)
                    continue

                # Build a sequence of autostack metric requests
                requests = []
                for autostack, metrics in pendingStacks:
                    refBase = len(requests)
                    requests.extend(
                        AutostackMetricRequest(refID=refBase + i, autostack=autostack, metric=metric)
                        for i, metric in enumerate(metrics)
                    )

                # Collect, aggregate, and stream metric data
                self._processAutostackMetricRequests(engine, requests, modelSwapper)
示例#8
0
  def run(self):
    with ModelSwapperInterface() as modelSwapper:
      engine = repository.engineFactory()
      while True:
        with engine.connect() as conn:
          pendingStacks = repository.retryOnTransientErrors(
            repository.getAutostackMetricsPendingDataCollection)(conn)

        if not pendingStacks:
          time.sleep(self._NOTHING_READY_SLEEP_TIME_SEC)
          continue

        # Build a sequence of autostack metric requests
        requests = []
        for autostack, metrics in pendingStacks:
          refBase = len(requests)
          requests.extend(
            AutostackMetricRequest(refID=refBase + i,
                                   autostack=autostack,
                                   metric=metric)
            for i, metric in enumerate(metrics))

        # Collect, aggregate, and stream metric data
        self._processAutostackMetricRequests(engine, requests, modelSwapper)
示例#9
0
  def _processCollectedData(self, engine, metricsToUpdate, modelSwapper,
                            collectResult):
    """ Process collected metrics data; publish it on the destination
    message queue, and update metric instance status and metric collection
    state.

    :param engine: SQLAlchemy engine object
    :type engine: sqlalchemy.engine.Engine

    :param metricsToUpdate: a sequence of Metric instances which were due for
      an update
    :type metricsToUpdate: collections.OrderedDict

    :param modelSwapper: ModelSwapperInterface object for running models

    :param collectResult: _DataCollectionResult object

    :returns: 2-tuple (numEmpty, numErrors), where numEmpty is the count of
      metrics with empty data (not ready or data gap) numErrors is the count
      of metrics with errors
    """

    numEmpty = numErrors = 0

    metricObj = metricsToUpdate[collectResult.metricID]
    # Update cached resource status
    resourceInfoCacheItem = self._resourceInfoCache[metricObj.server]
    newResourceStatus = None
    if collectResult.resourceStatus is not None:
      if isinstance(collectResult.resourceStatus, Exception):
        resourceInfoCacheItem.error = collectResult.resourceStatus
      else:
        newResourceStatus = collectResult.resourceStatus
        resourceInfoCacheItem.error = None

    if isinstance(collectResult.data, Exception):
      dataError = collectResult.data
      newData = None
      nextStartTime = None
    else:
      dataError = None
      newData = collectResult.data
      nextStartTime = collectResult.nextCallStart

    if newData is not None or newResourceStatus is not None:
      rangeStart = metricObj.last_timestamp

      if not newData:
        # Data is not ready yet or data gap
        numEmpty += 1
        self._metricInfoCache[metricObj.uid].quarantineEndTime = (
          collectResult.creationTime +
          metricObj.poll_interval * self._METRIC_QUARANTINE_DURATION_RATIO)
      else:
        headTS = newData[0][0]
        tailTS = newData[-1][0]

      startTime = time.time()

      # Update resource status and next metric start time
      @repository.retryOnTransientErrors
      def runSQL(engine):
        with engine.begin() as conn:
          try:
            # Save resource status
            if newResourceStatus is not None:
              repository.saveMetricInstanceStatus(conn,
                                                  metricObj.server,
                                                  newResourceStatus)

            # Save starting time for the next data query in metric row
            if nextStartTime is not None:
              repository.setMetricLastTimestamp(conn,
                                                metricObj.uid,
                                                nextStartTime)
          except app_exceptions.ObjectNotFoundError:
            self._log.warning("Metric deleted?", exc_info=True)

      if newResourceStatus is not None or nextStartTime is not None:
        runSQL(engine)

      if newData:
        # This is done outside the cursor context to minimize lock duration.
        #
        # NOTE: It is still possible for duplicate data to be sent to the
        # exchange so the recipient will need to de-dupe it. It is also
        # possible for the metric to be updated in the database but data not
        # sent to the exchange.
        try:
          self.metricStreamer.streamMetricData(newData,
                                               metricID=metricObj.uid,
                                               modelSwapper=modelSwapper)
        except ValueError:
          self._log.exception("Failed to stream metric data %r" % newData)
        except app_exceptions.ObjectNotFoundError:
          # We expect that the model exists but in the odd case that it has
          # already been deleted we don't want to crash the process.
          self._log.info("Metric not found when adding data.")

        if self._profiling:
          self._log.info(
            "{TAG:APP.MC.DATA.TO_STRM.DONE} numItems=%d for metric=%r; "
            "rangeStart=%s; ts=[%s]; duration=%.4fs; ",
            len(newData), metricObj,
            rangeStart.isoformat() + "Z" if rangeStart is not None else None,
            (("%sZ..%sZ" % (headTS.isoformat(), tailTS.isoformat()))
              if len(newData) > 1 else (headTS.isoformat() + "Z")),
            time.time() - startTime + collectResult.duration)
      else:
        if self._log.isEnabledFor(logging.DEBUG):
          self._log.debug(
            "{TAG:APP.MC.DATA.NONE} No data for metric=%r; rangeStart=%s; "
            "duration=%.4fs", metricObj,
            rangeStart.isoformat() + "Z" if rangeStart is not None else None,
            time.time() - startTime + collectResult.duration)

    # Take care of errors
    error = dataError or resourceInfoCacheItem.error
    if error is not None:
      # Metric data or resource status collection error
      numErrors += 1
      # Update metric error grace period and quarantine info
      self._handleMetricCollectionError(engine,
                                        metricObj,
                                        startTime=collectResult.creationTime,
                                        error=error)
    else:
      if metricObj.collector_error is not None:
        oldErrorInfo = metricObj.collector_error

        # There was pending collector error before, but no error this time, so
        # clear metric's collector_error
        try:
          with engine.connect() as conn:
            repository.retryOnTransientErrors(
              repository.setMetricCollectorError)(conn, metricObj.uid, None)
        except app_exceptions.ObjectNotFoundError:
          self._log.warning("Metric deleted?", exc_info=True)

        self._log.info("metric=<%r> exited error grace state %s",
                       metricObj, oldErrorInfo)

    return (numEmpty, numErrors)
示例#10
0
    def _processCollectedData(self, engine, metricsToUpdate, modelSwapper,
                              collectResult):
        """ Process collected metrics data; publish it on the destination
    message queue, and update metric instance status and metric collection
    state.

    :param engine: SQLAlchemy engine object
    :type engine: sqlalchemy.engine.Engine

    :param metricsToUpdate: a sequence of Metric instances which were due for
      an update
    :type metricsToUpdate: collections.OrderedDict

    :param modelSwapper: ModelSwapperInterface object for running models

    :param collectResult: _DataCollectionResult object

    :returns: 2-tuple (numEmpty, numErrors), where numEmpty is the count of
      metrics with empty data (not ready or data gap) numErrors is the count
      of metrics with errors
    """

        numEmpty = numErrors = 0

        metricObj = metricsToUpdate[collectResult.metricID]
        # Update cached resource status
        resourceInfoCacheItem = self._resourceInfoCache[metricObj.server]
        newResourceStatus = None
        if collectResult.resourceStatus is not None:
            if isinstance(collectResult.resourceStatus, Exception):
                resourceInfoCacheItem.error = collectResult.resourceStatus
            else:
                newResourceStatus = collectResult.resourceStatus
                resourceInfoCacheItem.error = None

        if isinstance(collectResult.data, Exception):
            dataError = collectResult.data
            newData = None
            nextStartTime = None
        else:
            dataError = None
            newData = collectResult.data
            nextStartTime = collectResult.nextCallStart

        if newData is not None or newResourceStatus is not None:
            rangeStart = metricObj.last_timestamp

            if not newData:
                # Data is not ready yet or data gap
                numEmpty += 1
                self._metricInfoCache[metricObj.uid].quarantineEndTime = (
                    collectResult.creationTime + metricObj.poll_interval *
                    self._METRIC_QUARANTINE_DURATION_RATIO)
            else:
                headTS = newData[0][0]
                tailTS = newData[-1][0]

            startTime = time.time()

            # Update resource status and next metric start time
            @repository.retryOnTransientErrors
            def runSQL(engine):
                with engine.begin() as conn:
                    try:
                        # Save resource status
                        if newResourceStatus is not None:
                            repository.saveMetricInstanceStatus(
                                conn, metricObj.server, newResourceStatus)

                        # Save starting time for the next data query in metric row
                        if nextStartTime is not None:
                            repository.setMetricLastTimestamp(
                                conn, metricObj.uid, nextStartTime)
                    except app_exceptions.ObjectNotFoundError:
                        self._log.warning("Metric deleted?", exc_info=True)

            if newResourceStatus is not None or nextStartTime is not None:
                runSQL(engine)

            if newData:
                # This is done outside the cursor context to minimize lock duration.
                #
                # NOTE: It is still possible for duplicate data to be sent to the
                # exchange so the recipient will need to de-dupe it. It is also
                # possible for the metric to be updated in the database but data not
                # sent to the exchange.
                try:
                    self.metricStreamer.streamMetricData(
                        newData,
                        metricID=metricObj.uid,
                        modelSwapper=modelSwapper)
                except ValueError:
                    self._log.exception("Failed to stream metric data %r" %
                                        newData)
                except app_exceptions.ObjectNotFoundError:
                    # We expect that the model exists but in the odd case that it has
                    # already been deleted we don't want to crash the process.
                    self._log.info("Metric not found when adding data.")

                if self._profiling:
                    self._log.info(
                        "{TAG:APP.MC.DATA.TO_STRM.DONE} numItems=%d for metric=%r; "
                        "rangeStart=%s; ts=[%s]; duration=%.4fs; ",
                        len(newData), metricObj,
                        rangeStart.isoformat() +
                        "Z" if rangeStart is not None else None,
                        (("%sZ..%sZ" %
                          (headTS.isoformat(), tailTS.isoformat()))
                         if len(newData) > 1 else (headTS.isoformat() + "Z")),
                        time.time() - startTime + collectResult.duration)
            else:
                if self._log.isEnabledFor(logging.DEBUG):
                    self._log.debug(
                        "{TAG:APP.MC.DATA.NONE} No data for metric=%r; rangeStart=%s; "
                        "duration=%.4fs", metricObj,
                        rangeStart.isoformat() +
                        "Z" if rangeStart is not None else None,
                        time.time() - startTime + collectResult.duration)

        # Take care of errors
        error = dataError or resourceInfoCacheItem.error
        if error is not None:
            # Metric data or resource status collection error
            numErrors += 1
            # Update metric error grace period and quarantine info
            self._handleMetricCollectionError(
                engine,
                metricObj,
                startTime=collectResult.creationTime,
                error=error)
        else:
            if metricObj.collector_error is not None:
                oldErrorInfo = metricObj.collector_error

                # There was pending collector error before, but no error this time, so
                # clear metric's collector_error
                try:
                    with engine.connect() as conn:
                        repository.retryOnTransientErrors(
                            repository.setMetricCollectorError)(conn,
                                                                metricObj.uid,
                                                                None)
                except app_exceptions.ObjectNotFoundError:
                    self._log.warning("Metric deleted?", exc_info=True)

                self._log.info("metric=<%r> exited error grace state %s",
                               metricObj, oldErrorInfo)

        return (numEmpty, numErrors)
示例#11
0
    def _processAutostackMetricRequests(self, engine, requests, modelSwapper):
        """ Execute autostack metric requests, aggregate and stream
    collected metric data

    :param engine: SQLAlchemy engine object
    :type engine: sqlalchemy.engine.Engine
    :param requests: sequence of AutostackMetricRequest objects
    :param modelSwapper: Model Swapper
    """
        # Start collecting requested metric data
        collectionIter = self._metricGetter.collectMetricData(requests)

        # Aggregate each collection and dispatch to app MetricStreamer
        for metricCollection in collectionIter:
            request = requests[metricCollection.refID]

            metricObj = request.metric
            data = None

            if metricCollection.slices:
                aggregationFn = getAggregationFn(metricObj)
                if aggregationFn:
                    data = aggregate(metricCollection.slices, aggregationFn=aggregationFn)
                else:
                    data = aggregate(metricCollection.slices)

            try:
                with engine.connect() as conn:
                    repository.retryOnTransientErrors(repository.setMetricLastTimestamp)(
                        conn, metricObj.uid, metricCollection.nextMetricTime
                    )
            except ObjectNotFoundError:
                self._log.warning(
                    "Processing autostack data collection results for " "unknown model=%s (model deleted?)",
                    metricObj.uid,
                )
                continue

            if data:
                try:
                    self.metricStreamer.streamMetricData(data, metricID=metricObj.uid, modelSwapper=modelSwapper)
                except ObjectNotFoundError:
                    # We expect that the model exists but in the odd case that it has
                    # already been deleted we don't want to crash the process.
                    self._log.info("Metric not found when adding data. metric=%s" % metricObj.uid)

                self._log.debug(
                    "{TAG:APP.AGG.DATA.PUB} Published numItems=%d for metric=%s;"
                    "timeRange=[%sZ-%sZ]; headTS=%sZ; tailTS=%sZ",
                    len(data),
                    getMetricLogPrefix(metricObj),
                    metricCollection.timeRange.start.isoformat(),
                    metricCollection.timeRange.end.isoformat(),
                    data[0][0].isoformat(),
                    data[-1][0].isoformat(),
                )

            else:
                self._log.info(
                    "{TAG:APP.AGG.DATA.NONE} No data for metric=%s;" "timeRange=[%sZ-%sZ]",
                    getMetricLogPrefix(metricObj),
                    metricCollection.timeRange.start.isoformat(),
                    metricCollection.timeRange.end.isoformat(),
                )
示例#12
0
    def messageHandler(self, message):
        """ Inspect all inbound model results in a batch for anomaly thresholds and
        trigger notifications where applicable.

        :param amqp.messages.ConsumerMessage message: ``message.body`` is a
          serialized batch of model inference results generated in
          ``AnomalyService`` and must be deserialized using
          ``AnomalyService.deserializeModelResult()``. The message conforms to
          htmengine/runtime/json_schema/model_inference_results_msg_schema.json
    """
        if message.properties.headers and "dataType" in message.properties.headers:
            # Not a model inference result
            return

        YOMP.app.config.loadConfig()  # reload config on every batch
        engine = repository.engineFactory()
        # Cache minimum threshold to trigger any notification to avoid permuting
        # settings x metricDataRows
        try:
            try:
                batch = AnomalyService.deserializeModelResult(message.body)
            except Exception:
                self._log.exception("Error deserializing model result")
                raise

            # Load all settings for all users (once per incoming batch)
            with engine.connect() as conn:
                settings = repository.retryOnTransientErrors(repository.getAllNotificationSettings)(conn)

            self._log.debug("settings: %r" % settings)

            if settings:
                minThreshold = min(setting.sensitivity for setting in settings)
            else:
                minThreshold = 0.99999

            metricInfo = batch["metric"]
            metricId = metricInfo["uid"]
            resource = metricInfo["resource"]

            for row in batch["results"]:

                if row["anomaly"] >= minThreshold:
                    for settingObj in settings:
                        if row["rowid"] <= 1000:
                            continue  # Not enough data

                        rowDatetime = datetime.utcfromtimestamp(row["ts"])

                        if rowDatetime < datetime.utcnow() - timedelta(seconds=3600):
                            continue  # Skip old

                        if row["anomaly"] >= settingObj.sensitivity:
                            # First let's clear any old users out of the database.
                            with engine.connect() as conn:
                                repository.retryOnTransientErrors(repository.deleteStaleNotificationDevices)(
                                    conn, _NOTIFICATION_DEVICE_STALE_DAYS
                                )

                            # If anomaly_score meets or exceeds any of the device
                            # notification sensitivity settings, trigger notification.
                            # repository.addNotification() will handle throttling.
                            notificationId = str(uuid.uuid4())

                            with engine.connect() as conn:
                                result = repository.retryOnTransientErrors(repository.addNotification)(
                                    conn,
                                    uid=notificationId,
                                    server=resource,
                                    metric=metricId,
                                    rowid=row["rowid"],
                                    device=settingObj.uid,
                                    windowsize=(settingObj.windowsize),
                                    timestamp=rowDatetime,
                                    acknowledged=0,
                                    seen=0,
                                )

                            self._log.info(
                                "NOTIFICATION=%s SERVER=%s METRICID=%s DEVICE=%s "
                                "Notification generated. " % (notificationId, resource, metricId, settingObj.uid)
                            )

                            if result is not None and result.rowcount > 0 and settingObj.email_addr:
                                # Notification was generated.  Attempt to send email
                                with engine.connect() as conn:
                                    notificationObj = repository.getNotification(conn, notificationId)

                                self.sendNotificationEmail(engine, settingObj, notificationObj)

                    if not settings:
                        # There are no device notification settings stored on this server,
                        # no notifications will be generated.  However, log that a
                        # an anomaly was detected and notification would be sent if there
                        # were any configured devices
                        self._log.info(
                            "<%r>" % (metricInfo)
                            + ("{TAG:APP.NOTIFICATION} Anomaly " "detected at %s, but no devices are " "configured.")
                            % rowDatetime
                        )

        finally:
            message.ack()

        # Do cleanup
        with engine.connect() as conn:
            repository.clearOldNotifications(conn)  # Delete all notifications outside
示例#13
0
  def _processAutostackMetricRequests(self, engine, requests, modelSwapper):
    """ Execute autostack metric requests, aggregate and stream
    collected metric data

    :param engine: SQLAlchemy engine object
    :type engine: sqlalchemy.engine.Engine
    :param requests: sequence of AutostackMetricRequest objects
    :param modelSwapper: Model Swapper
    """
    # Start collecting requested metric data
    collectionIter = self._metricGetter.collectMetricData(requests)

    # Aggregate each collection and dispatch to app MetricStreamer
    for metricCollection in collectionIter:
      request = requests[metricCollection.refID]

      metricObj = request.metric
      data = None

      if metricCollection.slices:
        aggregationFn = getAggregationFn(metricObj)
        if aggregationFn:
          data = aggregate(metricCollection.slices,
                           aggregationFn=aggregationFn)
        else:
          data = aggregate(metricCollection.slices)

      try:
        with engine.connect() as conn:
          repository.retryOnTransientErrors(repository.setMetricLastTimestamp)(
            conn, metricObj.uid, metricCollection.nextMetricTime)
      except ObjectNotFoundError:
        self._log.warning("Processing autostack data collection results for "
                          "unknown model=%s (model deleted?)", metricObj.uid)
        continue

      if data:
        try:
          self.metricStreamer.streamMetricData(data,
                                               metricID=metricObj.uid,
                                               modelSwapper=modelSwapper)
        except ObjectNotFoundError:
          # We expect that the model exists but in the odd case that it has
          # already been deleted we don't want to crash the process.
          self._log.info("Metric not found when adding data. metric=%s" %
                         metricObj.uid)

        self._log.debug(
          "{TAG:APP.AGG.DATA.PUB} Published numItems=%d for metric=%s;"
          "timeRange=[%sZ-%sZ]; headTS=%sZ; tailTS=%sZ",
          len(data), getMetricLogPrefix(metricObj),
          metricCollection.timeRange.start.isoformat(),
          metricCollection.timeRange.end.isoformat(),
          data[0][0].isoformat(), data[-1][0].isoformat())

      else:
        self._log.info(
          "{TAG:APP.AGG.DATA.NONE} No data for metric=%s;"
          "timeRange=[%sZ-%sZ]", getMetricLogPrefix(metricObj),
          metricCollection.timeRange.start.isoformat(),
          metricCollection.timeRange.end.isoformat())