Exemplo n.º 1
0
  def _scrubInferenceResultsAndInitMetricData(self, engine, inferenceResults,
                                              metricDataRows, metricObj):
    """ Validate the given inferenceResults against metricDataRows, update
    corresponding MetricData instances by initializing their
    `raw_anomaly_score` property from results and the `anomaly_score` property
    with 0. Replace elements in metricDataRows with MutableMetricDataRow
    objects.

    *NOTE:* does NOT update the MetricData instances to the database (we do that
    once after we process the batch for efficiency)

    :param engine: SQLAlchemy engine object
    :type engine: sqlalchemy.engine.Engine

    :param inferenceResults: a sequence of ModelInferenceResult instances
      representing the inference result batch ordered by row id

    :param metricDataRows: a mutable list of MetricData instances with row ids
      in the range of inferenceResults[0].rowID to inferenceResults[-1].rowID

    :param metricObj: a Metric instance associated with the given
      inferenceResults

    :raises RejectedInferenceResultBatch: if the given result batch is rejected
    """
    for result, enumeratedMetricData in itertools.izip_longest(inferenceResults,
                                                               enumerate(
                                                                 metricDataRows)
                                                              ):

      if enumeratedMetricData is None:
        raise RejectedInferenceResultBatch(
          "No MetricData row for inference result=%r of model=<%r>" % (
            result, metricObj))
      index, metricData = enumeratedMetricData

      if result is None:
        raise RejectedInferenceResultBatch(
          "Truncated inference result batch; no result for metric data row=%r "
          "of model=<%r>" % (metricData, metricObj))

      if metricData is None:
        raise RejectedInferenceResultBatch(
          "No MetricData row for inference result=%r of model=<%r>" %
          (result, metricObj))

      if result.rowID != metricData.rowid:
        raise RejectedInferenceResultBatch(
          "RowID mismatch between inference result=%r and ModelData row=%r of "
          "model=<%r>" % (result, metricData, metricObj))

      if metricData.raw_anomaly_score is not None:
        # Side-effect of at-least-once delivery guarantee?
        self._log.error(
          "Anomaly was already processed on data row=%s; new result=%r",
          metricData, result)

      # Validate the result
      if result.status != 0:
        self._log.error(result.errorMessage)
        if metricObj.status == MetricStatus.ERROR:
          raise RejectedInferenceResultBatch(
            "inferenceResult=%r failed and model=<%s> was in ERROR state" %
            (result, getMetricLogPrefix(metricObj)))
        else:
          self._log.error("Placing model=<%r> in ERROR state due to "
                          "inferenceResult=%r", metricObj, result)
          with engine.connect() as conn:
            repository.setMetricStatus(conn,
                                       metricObj.uid,
                                       MetricStatus.ERROR,
                                       result.errorMessage)
          raise RejectedInferenceResultBatch(
            "inferenceResult=%r failed and model=<%s> promoted to ERROR state" %
            (result, getMetricLogPrefix(metricObj)))

      #self._log.info("{TAG:ANOM.METRIC} metric=%s:%s:%s",
      #               metricObj.name,
      #               calendar.timegm(metricData.timestamp.timetuple()),
      #               metricData.metric_value)

      mutableMetricData = MutableMetricDataRow(**dict(metricData.items()))
      mutableMetricData.raw_anomaly_score = result.anomalyScore
      mutableMetricData.anomaly_score = 0
      mutableMetricData.multi_step_best_predictions = (
        result.multiStepBestPredictions)
      metricDataRows[index] = mutableMetricData
Exemplo n.º 2
0
def startMonitoring(conn, metricId, swarmParams, logger):
    """ Start monitoring an UNMONITORED metric.

  NOTE: typically called either inside a transaction and/or with locked tables

  Starts the CLA model if provided non-None swarmParams; otherwise defers model
  creation to a later time and places the metric in MetricStatus.PENDING_DATA
  state.

  :param conn: SQLAlchemy Connection object for executing SQL
  :type conn: sqlalchemy.engine.Connection

  :param metricId: unique identifier of the metric row

  :param swarmParams: swarmParams generated via
    scalar_metric_utils.generateSwarmParams() or None.

  :param logger: logger object

  :returns: True if model was started; False if not

  :raises htmengine.exceptions.ObjectNotFoundError: if metric with the
    referenced metric uid doesn't exist

  :raises htmengine.exceptions.MetricStatusChangedError: if Metric status was
      changed by someone else (most likely another process) before this
      operation could complete
  """
    modelStarted = False

    startTime = time.time()

    metricObj = repository.getMetric(conn, metricId)

    assert metricObj.status == MetricStatus.UNMONITORED, (
        "startMonitoring: metric=%s is already monitored; status=%s" % (
            metricId,
            metricObj.status,
        ))

    if swarmParams is not None:
        # We have swarmParams, so start the model
        modelStarted = _startModelHelper(conn=conn,
                                         metricObj=metricObj,
                                         swarmParams=swarmParams,
                                         logger=logger)
    else:
        # Put the metric into the PENDING_DATA state until enough data arrives for
        # stats
        refStatus = metricObj.status

        repository.setMetricStatus(conn,
                                   metricId,
                                   MetricStatus.PENDING_DATA,
                                   refStatus=refStatus)
        # refresh
        metricStatus = repository.getMetric(conn,
                                            metricId,
                                            fields=[schema.metric.c.status
                                                    ]).status

        if metricStatus == MetricStatus.PENDING_DATA:
            logger.info(
                "startMonitoring: promoted metric to model in PENDING_DATA; "
                "metric=%s; duration=%.4fs", metricId,
                time.time() - startTime)
        else:
            raise app_exceptions.MetricStatusChangedError(
                "startMonitoring: unable to promote metric=%s to model as "
                "PENDING_DATA; metric status morphed from %s to %s" % (
                    metricId,
                    refStatus,
                    metricStatus,
                ))

    return modelStarted
Exemplo n.º 3
0
  def _processModelCommandResult(self, metricID, result):
    """
    Process a single model command result
    """
    engine = repository.engineFactory(config)

    # Check if deleting model
    if result.method == "deleteModel":
      self._log.info("Model=%s was deleted", metricID)
      return

    # Validate model ID
    try:
      # NOTE: use shared lock to prevent race condition with adapter's
      # monitorMetric, whereby adapter creates and/or activates a metric inside
      # a transaction, and we might get the defineModel command before the
      # metric row updates are committed
      with engine.connect() as conn:
        metricObj = repository.getMetricWithSharedLock(conn, metricID)
    except ObjectNotFoundError:
      # This may occur if the user deletes the model before the result was
      # delivered while there are result messages still on the message bus.
      self._log.warn("Received command result=%r for unknown model=%s "
                     "(model deleted?)", result, metricID)
      return

    if result.status != 0:
      self._log.error(result.errorMessage)
      if metricObj.status != MetricStatus.ERROR:
        self._log.error("Placing model=<%s> in ERROR state due to "
                        "commandResult=%s",
                        getMetricLogPrefix(metricObj),
                        result)
        with engine.connect() as conn:
          repository.setMetricStatus(conn, metricID, MetricStatus.ERROR,
                                     result.errorMessage)



      else:
        # NOTE: could be a race condition between app-layer and Model Swapper
        #   or a side-effect of the at-least-once delivery guarantee
        self._log.warn("Received command result=%r for metricID=%s of "
                       "metric=<%s> that was already in ERROR state",
                       result, metricID, getMetricLogPrefix(metricObj))
      return

    # Create Model
    if result.method == "defineModel":
      self._log.info("Model was created for <%s>",
                     getMetricLogPrefix(metricObj))

      if metricObj.status == MetricStatus.CREATE_PENDING:
        with engine.connect() as conn:
          repository.setMetricStatus(conn, metricID, MetricStatus.ACTIVE)
      else:
        # NOTE: could be a race condition between app-layer and Model Swapper
        #   or a side-effect of the at-least-once delivery guarantee
        self._log.warn("Received command result=%r for model=%s of metric=<%s> "
                       "that was not in CREATE_PENDING state",
                       result, metricID, getMetricLogPrefix(metricObj))
      return

    self._log.error("Unexpected model result=%r", result)
Exemplo n.º 4
0
def _startModelHelper(conn, metricObj, swarmParams, logger):
    """ Start the model

  :param conn: SQLAlchemy Connection object for executing SQL
  :type conn: sqlalchemy.engine.Connection

  :param metricObj: metric, freshly-loaded
  :type metricObj: sqlalchemy.engine.RowProxy (see repository.getMetric())

  :param swarmParams: non-None swarmParams generated via
    scalar_metric_utils.generateSwarmParams().

  :param logger: logger object

  :returns: True if model was started; False if not

  :raises htmengine.exceptions.ObjectNotFoundError: if the metric doesn't exist;
      this may happen if it got deleted by another process in the meantime.

  :raises htmengine.exceptions.MetricStatusChangedError: if Metric status was
      changed by someone else (most likely another process) before this
      operation could complete
  """
    if swarmParams is None:
        raise ValueError(
            "startModel: 'swarmParams' must be non-None: metric=%s" %
            (metricObj.uid, ))

    if metricObj.status not in (MetricStatus.UNMONITORED,
                                MetricStatus.PENDING_DATA):
        if metricObj.status in (MetricStatus.CREATE_PENDING,
                                MetricStatus.ACTIVE):
            return False

        logger.error("Unexpected metric status; metric=%r", metricObj)
        raise ValueError("startModel: unexpected metric status; metric=%r" %
                         (metricObj, ))

    startTime = time.time()

    # Save swarm parameters and update metric status
    refStatus = metricObj.status
    repository.updateMetricColumnsForRefStatus(
        conn, metricObj.uid, refStatus, {
            "status": MetricStatus.CREATE_PENDING,
            "model_params": htmengine.utils.jsonEncode(swarmParams)
        })

    metricObj = repository.getMetric(
        conn,
        metricObj.uid,
        fields=[schema.metric.c.uid, schema.metric.c.status])  # refresh

    if metricObj.status != MetricStatus.CREATE_PENDING:
        raise app_exceptions.MetricStatusChangedError(
            "startModel: unable to start model=%s; "
            "metric status morphed from %s to %s" % (
                metricObj.uid,
                refStatus,
                metricObj.status,
            ))

    # Request to create the CLA model
    try:
        model_swapper_utils.createHTMModel(metricObj.uid, swarmParams)
    except Exception:
        logger.exception("startModel: createHTMModel failed.")
        repository.setMetricStatus(conn,
                                   metricObj.uid,
                                   status=MetricStatus.ERROR,
                                   message=repr(sys.exc_info()[1]))
        raise

    logger.info("startModel: started model=%r; duration=%.4fs", metricObj,
                time.time() - startTime)

    return True
def startMonitoring(conn, metricId, swarmParams, logger):
  """ Start monitoring an UNMONITORED metric.

  NOTE: typically called either inside a transaction and/or with locked tables

  Starts the CLA model if provided non-None swarmParams; otherwise defers model
  creation to a later time and places the metric in MetricStatus.PENDING_DATA
  state.

  :param conn: SQLAlchemy Connection object for executing SQL
  :type conn: sqlalchemy.engine.Connection

  :param metricId: unique identifier of the metric row

  :param swarmParams: swarmParams generated via
    scalar_metric_utils.generateSwarmParams() or None.

  :param logger: logger object

  :returns: True if model was started; False if not

  :raises htmengine.exceptions.ObjectNotFoundError: if metric with the
    referenced metric uid doesn't exist

  :raises htmengine.exceptions.MetricStatusChangedError: if Metric status was
      changed by someone else (most likely another process) before this
      operation could complete
  """
  modelStarted = False

  startTime = time.time()

  metricObj = repository.getMetric(conn, metricId)

  assert metricObj.status == MetricStatus.UNMONITORED, (
    "startMonitoring: metric=%s is already monitored; status=%s" % (
      metricId, metricObj.status,))

  if swarmParams is not None:
    # We have swarmParams, so start the model
    modelStarted = _startModelHelper(conn=conn,
                                     metricObj=metricObj,
                                     swarmParams=swarmParams,
                                     logger=logger)
  else:
    # Put the metric into the PENDING_DATA state until enough data arrives for
    # stats
    refStatus = metricObj.status

    repository.setMetricStatus(conn,
                               metricId,
                               MetricStatus.PENDING_DATA,
                               refStatus=refStatus)
    # refresh
    metricStatus = repository.getMetric(conn,
                                        metricId,
                                        fields=[schema.metric.c.status]).status

    if metricStatus == MetricStatus.PENDING_DATA:
      logger.info("startMonitoring: promoted metric to model in PENDING_DATA; "
                  "metric=%s; duration=%.4fs",
                  metricId, time.time() - startTime)
    else:
      raise app_exceptions.MetricStatusChangedError(
        "startMonitoring: unable to promote metric=%s to model as "
        "PENDING_DATA; metric status morphed from %s to %s"
        % (metricId, refStatus, metricStatus,))

  return modelStarted
def _startModelHelper(conn, metricObj, swarmParams, logger):
  """ Start the model

  :param conn: SQLAlchemy Connection object for executing SQL
  :type conn: sqlalchemy.engine.Connection

  :param metricObj: metric, freshly-loaded
  :type metricObj: sqlalchemy.engine.RowProxy (see repository.getMetric())

  :param swarmParams: non-None swarmParams generated via
    scalar_metric_utils.generateSwarmParams().

  :param logger: logger object

  :returns: True if model was started; False if not

  :raises htmengine.exceptions.ObjectNotFoundError: if the metric doesn't exist;
      this may happen if it got deleted by another process in the meantime.

  :raises htmengine.exceptions.MetricStatusChangedError: if Metric status was
      changed by someone else (most likely another process) before this
      operation could complete
  """
  if swarmParams is None:
    raise ValueError(
      "startModel: 'swarmParams' must be non-None: metric=%s"
      % (metricObj.uid,))

  if metricObj.status not in (MetricStatus.UNMONITORED,
                           MetricStatus.PENDING_DATA):
    if metricObj.status in (MetricStatus.CREATE_PENDING, MetricStatus.ACTIVE):
      return False

    logger.error("Unexpected metric status; metric=%r", metricObj)
    raise ValueError("startModel: unexpected metric status; metric=%r"
                     % (metricObj,))

  startTime = time.time()

  # Save swarm parameters and update metric status
  refStatus = metricObj.status
  repository.updateMetricColumnsForRefStatus(
    conn,
    metricObj.uid,
    refStatus,
    {"status": MetricStatus.CREATE_PENDING,
     "model_params": htmengine.utils.jsonEncode(swarmParams)})

  metricObj = repository.getMetric(conn,
                                   metricObj.uid,
                                   fields=[schema.metric.c.uid,
                                           schema.metric.c.status]) # refresh

  if metricObj.status != MetricStatus.CREATE_PENDING:
    raise app_exceptions.MetricStatusChangedError(
      "startModel: unable to start model=%s; "
      "metric status morphed from %s to %s"
      % (metricObj.uid, refStatus, metricObj.status,))

  # Request to create the CLA model
  try:
    model_swapper_utils.createHTMModel(metricObj.uid, swarmParams)
  except Exception:
    logger.exception("startModel: createHTMModel failed.")
    repository.setMetricStatus(conn,
                               metricObj.uid,
                               status=MetricStatus.ERROR,
                               message=repr(sys.exc_info()[1]))
    raise

  logger.info("startModel: started model=%r; duration=%.4fs",
              metricObj, time.time() - startTime)

  return True
Exemplo n.º 7
0
    def _scrubInferenceResultsAndInitMetricData(self, engine, inferenceResults,
                                                metricDataRows, metricObj):
        """ Validate the given inferenceResults against metricDataRows, update
    corresponding MetricData instances by initializing their
    `raw_anomaly_score` property from results and the `anomaly_score` property
    with 0. Replace elements in metricDataRows with MutableMetricDataRow
    objects.

    *NOTE:* does NOT update the MetricData instances to the database (we do that
    once after we process the batch for efficiency)

    :param engine: SQLAlchemy engine object
    :type engine: sqlalchemy.engine.Engine

    :param inferenceResults: a sequence of ModelInferenceResult instances
      representing the inference result batch ordered by row id

    :param metricDataRows: a mutable list of MetricData instances with row ids
      in the range of inferenceResults[0].rowID to inferenceResults[-1].rowID

    :param metricObj: a Metric instance associated with the given
      inferenceResults

    :raises RejectedInferenceResultBatch: if the given result batch is rejected
    """

        for result, enumeratedMetricData in itertools.izip_longest(
                inferenceResults, enumerate(metricDataRows)):

            if enumeratedMetricData is None:
                raise RejectedInferenceResultBatch(
                    "No MetricData row for inference result=%r of model=<%r>" %
                    (result, metricObj))
            index, metricData = enumeratedMetricData

            if result is None:
                raise RejectedInferenceResultBatch(
                    "Truncated inference result batch; no result for metric data row=%r "
                    "of model=<%r>" % (metricData, metricObj))

            if metricData is None:
                raise RejectedInferenceResultBatch(
                    "No MetricData row for inference result=%r of model=<%r>" %
                    (result, metricObj))

            if result.rowID != metricData.rowid:
                raise RejectedInferenceResultBatch(
                    "RowID mismatch between inference result=%r and ModelData row=%r of "
                    "model=<%r>" % (result, metricData, metricObj))

            if metricData.raw_anomaly_score is not None:
                # Side-effect of at-least-once delivery guarantee?
                self._log.error(
                    "Anomaly was already processed on data row=%s; new result=%r",
                    metricData, result)

            # Validate the result
            if result.status != 0:
                self._log.error(result.errorMessage)
                if metricObj.status == MetricStatus.ERROR:
                    raise RejectedInferenceResultBatch(
                        "inferenceResult=%r failed and model=<%s> was in ERROR state"
                        % (result, getMetricLogPrefix(metricObj)))
                else:
                    self._log.error(
                        "Placing model=<%r> in ERROR state due to "
                        "inferenceResult=%r", metricObj, result)
                    with engine.connect() as conn:
                        repository.setMetricStatus(conn, metricObj.uid,
                                                   MetricStatus.ERROR,
                                                   result.errorMessage)
                    raise RejectedInferenceResultBatch(
                        "inferenceResult=%r failed and model=<%s> promoted to ERROR state"
                        % (result, getMetricLogPrefix(metricObj)))

            #self._log.info("{TAG:ANOM.METRIC} metric=%s:%s:%s",
            #               metricObj.name,
            #               calendar.timegm(metricData.timestamp.timetuple()),
            #               metricData.metric_value)

            mutableMetricData = MutableMetricDataRow(
                **dict(metricData.items()))
            mutableMetricData.raw_anomaly_score = result.anomalyScore
            mutableMetricData.anomaly_score = 0
            metricDataRows[index] = mutableMetricData
Exemplo n.º 8
0
    def _processModelCommandResult(self, metricID, result):
        """
    Process a single model command result
    """
        engine = repository.engineFactory(config)

        # Check if deleting model
        if result.method == "deleteModel":
            self._log.info("Model=%s was deleted", metricID)
            return

        # Validate model ID
        try:
            # NOTE: use shared lock to prevent race condition with adapter's
            # monitorMetric, whereby adapter creates and/or activates a metric inside
            # a transaction, and we might get the defineModel command before the
            # metric row updates are committed
            with engine.connect() as conn:
                metricObj = repository.getMetricWithSharedLock(conn, metricID)
        except ObjectNotFoundError:
            # This may occur if the user deletes the model before the result was
            # delivered while there are result messages still on the message bus.
            self._log.warn(
                "Received command result=%r for unknown model=%s "
                "(model deleted?)", result, metricID)
            return

        if result.status != 0:
            self._log.error(result.errorMessage)
            if metricObj.status != MetricStatus.ERROR:
                self._log.error(
                    "Placing model=<%s> in ERROR state due to "
                    "commandResult=%s", getMetricLogPrefix(metricObj), result)
                with engine.connect() as conn:
                    repository.setMetricStatus(conn, metricID,
                                               MetricStatus.ERROR,
                                               result.errorMessage)

            else:
                # NOTE: could be a race condition between app-layer and Model Swapper
                #   or a side-effect of the at-least-once delivery guarantee
                self._log.warn(
                    "Received command result=%r for metricID=%s of "
                    "metric=<%s> that was already in ERROR state", result,
                    metricID, getMetricLogPrefix(metricObj))
            return

        # Create Model
        if result.method == "defineModel":
            self._log.info("Model was created for <%s>" %
                           (getMetricLogPrefix(metricObj)))

            if metricObj.status == MetricStatus.CREATE_PENDING:
                with engine.connect() as conn:
                    repository.setMetricStatus(conn, metricID,
                                               MetricStatus.ACTIVE)
            else:
                # NOTE: could be a race condition between app-layer and Model Swapper
                #   or a side-effect of the at-least-once delivery guarantee
                self._log.warn(
                    "Received command result=%r for model=%s of metric=<%s> "
                    "that was not in CREATE_PENDING state", result, metricID,
                    getMetricLogPrefix(metricObj))
            return

        self._log.error("Unexpected model result=%r", result)