示例#1
0
  def testCalcScoreByThresholdReturnsExpectedScores(self):
    fnWeight = 5.0
    o = Sweeper()
    o.fnWeight = fnWeight

    fakeInput = [
      AnomalyPoint(0, 0.5, -1000, 'probationary'),  # Should never contribute to score (probationary)
      AnomalyPoint(1, 0.5, -1000, 'probationary'),  # Should never contribute to score (probationary)
      AnomalyPoint(2, 0.0, -3, None),  # Should never contribute to score (anomaly == 0.0)
      AnomalyPoint(4, 0.2, 20, 'windowA'),  # Should be used instead of next row when threshold <= 0.2
      AnomalyPoint(5, 0.3, 10, 'windowA'),  # Should be used for winowA _until_ threshold <= 0.2
      AnomalyPoint(6, 0.5, 5, 'windowB'),  # Only score for windowB, but won't be used until threshold <= 0.5
      AnomalyPoint(7, 0.5, -3, None),
    ]

    expectedScoresByThreshold = [
      ThresholdScore(1.1, -2 * fnWeight, 0, 2, 0, 3, 5),  # two windows, both false negatives at this threshold
      ThresholdScore(0.5, 5 - 3 - fnWeight, 1, 1, 1, 2, 5),  # Both 'anomalyScore == 0.5' score, windowA is still FN
      ThresholdScore(0.3, 5 - 3 + 10, 2, 1, 1, 1, 5),  # Both windows now have a TP
      ThresholdScore(0.2, 5 - 3 + 20, 3, 1, 1, 0, 5),  # windowA gets a new max value due to row 4 becoming active
      ThresholdScore(0.0, 5 - 3 + 20 - 3, 3, 0, 2, 0, 5),
    ]

    actual = o.calcScoreByThreshold(fakeInput)

    assert actual == expectedScoresByThreshold
    def testCalcScoreByThresholdReturnsExpectedScores(self):
        fnWeight = 5.0
        o = Sweeper()
        o.fnWeight = fnWeight

        fakeInput = [
            AnomalyPoint(0, 0.5, -1000, 'probationary'
                         ),  # Should never contribute to score (probationary)
            AnomalyPoint(1, 0.5, -1000, 'probationary'
                         ),  # Should never contribute to score (probationary)
            AnomalyPoint(
                2, 0.0, -3,
                None),  # Should never contribute to score (anomaly == 0.0)
            AnomalyPoint(
                4, 0.2, 20, 'windowA'
            ),  # Should be used instead of next row when threshold <= 0.2
            AnomalyPoint(
                5, 0.3, 10, 'windowA'
            ),  # Should be used for winowA _until_ threshold <= 0.2
            AnomalyPoint(
                6, 0.5, 5, 'windowB'
            ),  # Only score for windowB, but won't be used until threshold <= 0.5
            AnomalyPoint(7, 0.5, -3, None),
        ]

        expectedScoresByThreshold = [
            ThresholdScore(
                1.1, -2 * fnWeight, 0, 2, 0, 3,
                5),  # two windows, both false negatives at this threshold
            ThresholdScore(
                0.5, 5 - 3 - fnWeight, 1, 1, 1, 2,
                5),  # Both 'anomalyScore == 0.5' score, windowA is still FN
            ThresholdScore(0.3, 5 - 3 + 10, 2, 1, 1, 1,
                           5),  # Both windows now have a TP
            ThresholdScore(
                0.2, 5 - 3 + 20, 3, 1, 1, 0, 5
            ),  # windowA gets a new max value due to row 4 becoming active
            ThresholdScore(0.0, 5 - 3 + 20 - 3, 3, 0, 2, 0, 5),
        ]

        actual = o.calcScoreByThreshold(fakeInput)

        assert actual == expectedScoresByThreshold
示例#3
0
def optimizeThreshold(args):
    """Optimize the threshold for a given combination of detector and profile.

  @param args       (tuple)   Contains:

    detectorName        (string)                Name of detector.

    costMatrix          (dict)                  Cost matrix to weight the
                                                true positives, false negatives,
                                                and false positives during
                                                scoring.
    resultsCorpus       (nab.Corpus)            Corpus object that holds the per
                                                record anomaly scores for a
                                                given detector.
    corpusLabel         (nab.CorpusLabel)       Ground truth anomaly labels for
                                                the NAB corpus.
    probationaryPercent (float)                 Percent of each data file not
                                                to be considered during scoring.

  @return (dict) Contains:
        "threshold" (float)   Threshold that returns the largest score from the
                              Objective function.

        "score"     (float)   The score from the objective function given the
                              threshold.
  """
    (detectorName, costMatrix, resultsCorpus, corpusLabel,
     probationaryPercent) = args

    sweeper = Sweeper(probationPercent=probationaryPercent,
                      costMatrix=costMatrix)

    # First, get the sweep-scores for each row in each data set
    allAnomalyRows = []
    for relativePath, dataSet in resultsCorpus.dataFiles.iteritems():
        if "_scores.csv" in relativePath:
            continue

        # relativePath: raw dataset file,
        # e.g. 'artificialNoAnomaly/art_noisy.csv'
        relativePath = convertResultsPathToDataPath(
            os.path.join(detectorName, relativePath))

        windows = corpusLabel.windows[relativePath]
        labels = corpusLabel.labels[relativePath]
        timestamps = labels['timestamp']
        anomalyScores = dataSet.data["anomaly_score"]

        curAnomalyRows = sweeper.calcSweepScore(timestamps, anomalyScores,
                                                windows, relativePath)
        allAnomalyRows.extend(curAnomalyRows)

    # Get scores by threshold for the entire corpus
    scoresByThreshold = sweeper.calcScoreByThreshold(allAnomalyRows)
    scoresByThreshold = sorted(scoresByThreshold,
                               key=lambda x: x.score,
                               reverse=True)
    bestParams = scoresByThreshold[0]

    print(
        "Optimizer found a max score of {} with anomaly threshold {}.".format(
            bestParams.score, bestParams.threshold))

    return {"threshold": bestParams.threshold, "score": bestParams.score}
示例#4
0
文件: optimizer.py 项目: numenta/NAB
def optimizeThreshold(args):
  """Optimize the threshold for a given combination of detector and profile.

  @param args       (tuple)   Contains:

    detectorName        (string)                Name of detector.

    costMatrix          (dict)                  Cost matrix to weight the
                                                true positives, false negatives,
                                                and false positives during
                                                scoring.
    resultsCorpus       (nab.Corpus)            Corpus object that holds the per
                                                record anomaly scores for a
                                                given detector.
    corpusLabel         (nab.CorpusLabel)       Ground truth anomaly labels for
                                                the NAB corpus.
    probationaryPercent (float)                 Percent of each data file not
                                                to be considered during scoring.

  @return (dict) Contains:
        "threshold" (float)   Threshold that returns the largest score from the
                              Objective function.

        "score"     (float)   The score from the objective function given the
                              threshold.
  """
  (detectorName,
   costMatrix,
   resultsCorpus,
   corpusLabel,
   probationaryPercent) = args

  sweeper = Sweeper(
    probationPercent=probationaryPercent,
    costMatrix=costMatrix
  )

  # First, get the sweep-scores for each row in each data set
  allAnomalyRows = []
  for relativePath, dataSet in resultsCorpus.dataFiles.iteritems():
    if "_scores.csv" in relativePath:
      continue

    # relativePath: raw dataset file,
    # e.g. 'artificialNoAnomaly/art_noisy.csv'
    relativePath = convertResultsPathToDataPath(
      os.path.join(detectorName, relativePath))

    windows = corpusLabel.windows[relativePath]
    labels = corpusLabel.labels[relativePath]
    timestamps = labels['timestamp']
    anomalyScores = dataSet.data["anomaly_score"]

    curAnomalyRows = sweeper.calcSweepScore(
      timestamps,
      anomalyScores,
      windows,
      relativePath
    )
    allAnomalyRows.extend(curAnomalyRows)

  # Get scores by threshold for the entire corpus
  scoresByThreshold = sweeper.calcScoreByThreshold(allAnomalyRows)
  scoresByThreshold = sorted(
    scoresByThreshold,key=lambda x: x.score, reverse=True)
  bestParams = scoresByThreshold[0]

  print("Optimizer found a max score of {} with anomaly threshold {}.".format(
    bestParams.score, bestParams.threshold
  ))

  return {
    "threshold": bestParams.threshold,
    "score": bestParams.score
  }