def testCalcScoreByThresholdReturnsExpectedScores(self): fnWeight = 5.0 o = Sweeper() o.fnWeight = fnWeight fakeInput = [ AnomalyPoint(0, 0.5, -1000, 'probationary'), # Should never contribute to score (probationary) AnomalyPoint(1, 0.5, -1000, 'probationary'), # Should never contribute to score (probationary) AnomalyPoint(2, 0.0, -3, None), # Should never contribute to score (anomaly == 0.0) AnomalyPoint(4, 0.2, 20, 'windowA'), # Should be used instead of next row when threshold <= 0.2 AnomalyPoint(5, 0.3, 10, 'windowA'), # Should be used for winowA _until_ threshold <= 0.2 AnomalyPoint(6, 0.5, 5, 'windowB'), # Only score for windowB, but won't be used until threshold <= 0.5 AnomalyPoint(7, 0.5, -3, None), ] expectedScoresByThreshold = [ ThresholdScore(1.1, -2 * fnWeight, 0, 2, 0, 3, 5), # two windows, both false negatives at this threshold ThresholdScore(0.5, 5 - 3 - fnWeight, 1, 1, 1, 2, 5), # Both 'anomalyScore == 0.5' score, windowA is still FN ThresholdScore(0.3, 5 - 3 + 10, 2, 1, 1, 1, 5), # Both windows now have a TP ThresholdScore(0.2, 5 - 3 + 20, 3, 1, 1, 0, 5), # windowA gets a new max value due to row 4 becoming active ThresholdScore(0.0, 5 - 3 + 20 - 3, 3, 0, 2, 0, 5), ] actual = o.calcScoreByThreshold(fakeInput) assert actual == expectedScoresByThreshold
def testScoringAllMetrics(self): """ This tests an example set of detections, where all metrics have counts > 0. """ start = datetime.datetime.now() increment = datetime.timedelta(minutes=5) length = 100 numWindows = 2 windowSize = 5 threshold = 0.5 timestamps = generateTimestamps(start, increment, length) windows = generateWindows(timestamps, numWindows, windowSize) anomalyScores = pandas.Series([0] * length) index = timestamps[timestamps == windows[0][0]].index[0] # TP, add'l TP, and FP anomalyScores[index] = 1 anomalyScores[index + 1] = 1 anomalyScores[index + 7] = 1 sweeper = Sweeper(probationPercent=0, costMatrix=self.costMatrix) (scores, matchingRow) = sweeper.scoreDataSet(timestamps, anomalyScores, windows, "testData", threshold) self.assertAlmostEquals(matchingRow.score, -0.9540, 4) self._checkCounts(matchingRow, length - windowSize * numWindows - 1, 2, 1, 8)
def testOneFalsePositiveNoWindow(self): """ When there is no window (i.e. no anomaly), a false positive should still result in a negative score, specifically negative the FP weight. """ start = datetime.datetime.now() increment = datetime.timedelta(minutes=5) length = 1000 numWindows = 0 windowSize = 10 threshold = 0.5 timestamps = generateTimestamps(start, increment, length) windows = generateWindows(timestamps, numWindows, windowSize) anomalyScores = pandas.Series([0]*length) anomalyScores[0] = 1 sweeper = Sweeper(probationPercent=0, costMatrix=self.costMatrix) (scores, matchingRow) = sweeper.scoreDataSet( timestamps, anomalyScores, windows, "testData", threshold ) self.assertEqual(matchingRow.score, -self.costMatrix["fpWeight"]) self._checkCounts(matchingRow, length-windowSize*numWindows-1, 0, 1, windowSize*numWindows)
def testPrepareScoreParts(self): fakeInput = [ AnomalyPoint(0, 0.5, 0, 'probationary'), AnomalyPoint(1, 0.5, 0, 'probationary'), AnomalyPoint(2, 0.0, 0, None), AnomalyPoint(4, 0.2, 0, 'windowA'), AnomalyPoint(5, 0.2, 0, 'windowA'), AnomalyPoint(6, 0.5, 0, 'windowB'), AnomalyPoint(7, 0.5, 0, None), ] fakeFNWeight = 33.0 o = Sweeper() o.fnWeight = fakeFNWeight # Expect one entry for all false positives and one entry per unique window name, # initialized to a starting score of `-self.fnWeight` expectedOutput = { "fp": 0, "windowA": -fakeFNWeight, "windowB": -fakeFNWeight } actualScoreParts = o._prepareScoreByThresholdParts(fakeInput) assert actualScoreParts == expectedOutput
def testFourFalseNegatives(self): """ A false negative with four windows should have exactly four times the negative of the false negative score. """ start = datetime.datetime.now() increment = datetime.timedelta(minutes=5) length = 2000 numWindows = 4 windowSize = 10 timestamps = generateTimestamps(start, increment, length) windows = generateWindows(timestamps, numWindows, windowSize) anomalyScores = pandas.Series([0] * length) threshold = 1 sweeper = Sweeper(probationPercent=0, costMatrix=self.costMatrix) (scores, matchingRow) = sweeper.scoreDataSet( timestamps, anomalyScores, windows, "testData", threshold ) self.assertEqual(matchingRow.score, 4 * -self.costMatrix["fnWeight"]) self._checkCounts(matchingRow, length - windowSize * numWindows, 0, 0, windowSize * numWindows)
def testFalsePositiveMeansNegativeScore(self): """ A false positive should make the score negative. """ start = datetime.datetime.now() increment = datetime.timedelta(minutes=5) length = 1000 numWindows = 1 windowSize = 10 threshold = 0.5 timestamps = generateTimestamps(start, increment, length) windows = generateWindows(timestamps, numWindows, windowSize) anomalyScores = pandas.Series([0]*length) anomalyScores[0] = 1 sweeper = Sweeper(probationPercent=0, costMatrix=self.costMatrix) (scores, matchingRow) = sweeper.scoreDataSet( timestamps, anomalyScores, windows, "testData", threshold ) self.assertTrue(matchingRow.score < 0) self._checkCounts(matchingRow, length-windowSize*numWindows-1, 0, 1, windowSize*numWindows)
def testOneFalsePositiveNoWindow(self): """ When there is no window (i.e. no anomaly), a false positive should still result in a negative score, specifically negative the FP weight. """ start = datetime.datetime.now() increment = datetime.timedelta(minutes=5) length = 1000 numWindows = 0 windowSize = 10 threshold = 0.5 timestamps = generateTimestamps(start, increment, length) windows = generateWindows(timestamps, numWindows, windowSize) anomalyScores = pandas.Series([0] * length) anomalyScores[0] = 1 sweeper = Sweeper(probationPercent=0, costMatrix=self.costMatrix) (scores, matchingRow) = sweeper.scoreDataSet(timestamps, anomalyScores, windows, "testData", threshold) self.assertEqual(matchingRow.score, -self.costMatrix["fpWeight"]) self._checkCounts(matchingRow, length - windowSize * numWindows - 1, 0, 1, windowSize * numWindows)
def testPrepareScoreParts(self): fakeInput = [ AnomalyPoint(0, 0.5, 0, 'probationary'), AnomalyPoint(1, 0.5, 0, 'probationary'), AnomalyPoint(2, 0.0, 0, None), AnomalyPoint(4, 0.2, 0, 'windowA'), AnomalyPoint(5, 0.2, 0, 'windowA'), AnomalyPoint(6, 0.5, 0, 'windowB'), AnomalyPoint(7, 0.5, 0, None), ] fakeFNWeight = 33.0 o = Sweeper() o.fnWeight = fakeFNWeight # Expect one entry for all false positives and one entry per unique window name, # initialized to a starting score of `-self.fnWeight` expectedOutput = { "fp": 0, "windowA": -fakeFNWeight, "windowB": -fakeFNWeight } actualScoreParts = o._prepareScoreByThresholdParts(fakeInput) assert actualScoreParts == expectedOutput
def testScoringAllMetrics(self): """ This tests an example set of detections, where all metrics have counts > 0. """ start = datetime.datetime.now() increment = datetime.timedelta(minutes=5) length = 100 numWindows = 2 windowSize = 5 threshold = 0.5 timestamps = generateTimestamps(start, increment, length) windows = generateWindows(timestamps, numWindows, windowSize) anomalyScores = pandas.Series([0]*length) index = timestamps[timestamps == windows[0][0]].index[0] # TP, add'l TP, and FP anomalyScores[index] = 1 anomalyScores[index+1] = 1 anomalyScores[index+7] = 1 sweeper = Sweeper(probationPercent=0, costMatrix=self.costMatrix) (scores, matchingRow) = sweeper.scoreDataSet( timestamps, anomalyScores, windows, "testData", threshold ) self.assertAlmostEquals(matchingRow.score, -0.9540, 4) self._checkCounts(matchingRow, length-windowSize*numWindows-1, 2, 1, 8)
def testFirstTruePositiveWithinWindow(self): """ First record within window has a score approximately equal to self.costMatrix["tpWeight"]; within 4 decimal places is more than enough precision. """ start = datetime.datetime.now() increment = datetime.timedelta(minutes=5) length = 10 numWindows = 1 windowSize = 2 timestamps = generateTimestamps(start, increment, length) windows = generateWindows(timestamps, numWindows, windowSize) anomalyScores = pandas.Series([0] * length) threshold = 0.5 # Set a single true positive index = timestamps[timestamps == windows[0][0]].index[0] anomalyScores[index] = 1.0 sweeper = Sweeper(probationPercent=0, costMatrix=self.costMatrix) (scores, matchingRow) = sweeper.scoreDataSet( timestamps, anomalyScores, windows, "testData", threshold ) self.assertEqual(matchingRow.score, self.costMatrix["tpWeight"]) self._checkCounts(matchingRow, length - windowSize * numWindows, 1, 0, windowSize * numWindows - 1)
def testOptimizerInit(self): o = Sweeper() assert o.probationPercent is not None o = Sweeper(probationPercent=0.30) assert o.probationPercent == 0.30 o = Sweeper(costMatrix={"tpWeight": 0, "fpWeight": 1, "fnWeight": 2}) assert o.tpWeight == 0 assert o.fpWeight == 1 assert o.fnWeight == 2
def scoreDataSet(args): """Function called to score each dataset in the corpus. @param args (tuple) Arguments to get the detection score for a dataset. @return (tuple) Contains: detectorName (string) Name of detector used to get anomaly scores. profileName (string) Name of profile used to weight each detection type. (tp, tn, fp, fn) relativePath (string) Path of dataset scored. threshold (float) Threshold used to convert anomaly scores to detections. score (float) The score of the dataset. counts, tp (int) The number of true positive records. counts, tn (int) The number of true negative records. counts, fp (int) The number of false positive records. counts, fn (int) The number of false negative records. total count (int) The total number of records. """ (detectorName, profileName, relativePath, outputPath, threshold, timestamps, anomalyScores, windows, costMatrix, probationaryPercent, scoreFlag) = args scorer = Sweeper(probationPercent=probationaryPercent, costMatrix=costMatrix) (scores, bestRow) = scorer.scoreDataSet( timestamps, anomalyScores, windows, relativePath, threshold, ) if scoreFlag: # Append scoring function values to the respective results file dfCSV = pandas.read_csv(outputPath, header=0, parse_dates=[0]) dfCSV["S(t)_%s" % profileName] = scores dfCSV.to_csv(outputPath, index=False) return (detectorName, profileName, relativePath, threshold, bestRow.score, bestRow.tp, bestRow.tn, bestRow.fp, bestRow.fn, bestRow.total)
def testCalcSweepScoreWindowScoreInteraction(self): """Scores inside a window should be positive; all others should be negative.""" numRows = 100 fakeAnomalyScores = [1 for _ in range(numRows)] fakeTimestamps = [ i for i in range(numRows) ] # We'll use numbers, even though real data uses dates fakeName = "TestDataSet" windowA = (30, 39) windowB = (75, 95) windowLimits = [windowA, windowB] expectedInWindowCount = (windowA[1] - windowA[0] + 1) + (windowB[1] - windowB[0] + 1) # Standard profile costMatrix = { "tpWeight": 1.0, "fnWeight": 1.0, "fpWeight": 0.11, } probationPercent = 0.1 o = Sweeper(probationPercent=probationPercent, costMatrix=costMatrix) scoredAnomalies = o.calcSweepScore(fakeTimestamps, fakeAnomalyScores, windowLimits, fakeName) # Check that correct number of AnomalyPoints returned assert len(scoredAnomalies) == numRows assert all(isinstance(x, AnomalyPoint) for x in scoredAnomalies) # Expected number of points marked 'probationary' probationary = [ x for x in scoredAnomalies if x.windowName == "probationary" ] assert len(probationary) == o._getProbationaryLength(numRows) # Expected number of points marked 'in window' inWindow = [ x for x in scoredAnomalies if x.windowName not in ("probationary", None) ] assert len(inWindow) == expectedInWindowCount # Points in window have positive score; others have negative score for point in scoredAnomalies: if point.windowName not in ("probationary", None): assert point.sweepScore > 0 else: assert point.sweepScore < 0
def testTruePositiveAtRightEdgeOfWindow(self): """ True positives at the right edge of a window should yield a score of approximately zero; the scaled sigmoid scoring function crosses the zero between a given window's last timestamp and the next timestamp (immediately following the window. """ start = datetime.datetime.now() increment = datetime.timedelta(minutes=5) length = 1000 numWindows = 1 windowSize = 100 threshold = 0.5 timestamps = generateTimestamps(start, increment, length) windows = generateWindows(timestamps, numWindows, windowSize) anomalyScores = pandas.Series([0]*length) # Make prediction at end of the window; TP index = timestamps[timestamps == windows[0][1]].index[0] anomalyScores[index] = 1 sweeper = Sweeper(probationPercent=0, costMatrix=self.costMatrix) (_, matchingRow1) = sweeper.scoreDataSet( timestamps, anomalyScores, windows, "testData", threshold ) # Make prediction just after the window; FP anomalyScores[index] = 0 index += 1 anomalyScores[index] = 1 (_, matchingRow2) = sweeper.scoreDataSet( timestamps, anomalyScores, windows, "testData", threshold ) # TP score + FP score + 1 should be very close to 0; the 1 is added to # account for the subsequent FN contribution. self.assertAlmostEqual(matchingRow1.score + matchingRow2.score + 1, 0.0, 3) self._checkCounts(matchingRow1, length-windowSize*numWindows, 1, 0, windowSize*numWindows-1) self._checkCounts(matchingRow2, length-windowSize*numWindows-1, 0, 1, windowSize*numWindows)
def testPrepAnomalyListForScoring(self): fakeInput = [ AnomalyPoint(0, 0.5, 0, 'probationary'), # filter because 'probationary' AnomalyPoint(1, 0.5, 0, 'probationary'), # filter because 'probationary' AnomalyPoint(2, 0.0, 0, None), AnomalyPoint(3, 0.1, 0, None), AnomalyPoint(4, 0.2, 0, 'windowA'), AnomalyPoint(5, 0.5, 0, 'windowB'), AnomalyPoint(6, 0.5, 0, None), AnomalyPoint(7, 0.0, 0, None), ] # Expected: sorted by anomaly score descending, with probationary rows filtered out. expectedList = [ AnomalyPoint(5, 0.5, 0, 'windowB'), AnomalyPoint(6, 0.5, 0, None), AnomalyPoint(4, 0.2, 0, 'windowA'), AnomalyPoint(3, 0.1, 0, None), AnomalyPoint(2, 0.0, 0, None), AnomalyPoint(7, 0.0, 0, None), ] o = Sweeper() sortedList = prepAnomalyListForScoring(fakeInput) assert sortedList == expectedList
def testEarlierTruePositiveIsBetter(self): """ If two algorithms both get a true positive within a window, the algorithm with the earlier true positive (in the window) should get a higher score. """ start = datetime.datetime.now() increment = datetime.timedelta(minutes=5) length = 10 numWindows = 1 windowSize = 2 timestamps = generateTimestamps(start, increment, length) windows = generateWindows(timestamps, numWindows, windowSize) anomalyScores1 = pandas.Series([0] * length) anomalyScores2 = pandas.Series([0] * length) threshold = 0.5 t1, t2 = windows[0] index1 = timestamps[timestamps == t1].index[0] anomalyScores1[index1] = 1 sweeper = Sweeper(probationPercent=0, costMatrix=self.costMatrix) (_, matchingRow1) = sweeper.scoreDataSet( timestamps, anomalyScores1, windows, "testData", threshold ) index2 = timestamps[timestamps == t2].index[0] anomalyScores2[index2] = 1 (_, matchingRow2) = sweeper.scoreDataSet( timestamps, anomalyScores2, windows, "testData", threshold ) score1 = matchingRow1.score score2 = matchingRow2.score self.assertTrue(score1 > score2, "The earlier TP score is not greater than " "the later TP. They are %f and %f, respectively." % (score1, score2)) self._checkCounts(matchingRow1, length-windowSize*numWindows, 1, 0, windowSize*numWindows-1) self._checkCounts(matchingRow2, length-windowSize*numWindows, 1, 0, windowSize*numWindows-1)
def testRewardLowFalseNegatives(self): """ Given false negatives in the set of detections, the score output with the Reward Low False Negatives application profile will be greater than with the Standard application profile. """ start = datetime.datetime.now() increment = datetime.timedelta(minutes=5) length = 100 numWindows = 1 windowSize = 10 threshold = 0.5 timestamps = generateTimestamps(start, increment, length) windows = generateWindows(timestamps, numWindows, windowSize) anomalyScores = pandas.Series([0]*length) costMatrixFN = copy.deepcopy(self.costMatrix) costMatrixFN["fnWeight"] = 2.0 costMatrixFN["fpWeight"] = 0.055 sweeper1 = Sweeper(probationPercent=0, costMatrix=self.costMatrix) sweeper2 = Sweeper(probationPercent=0, costMatrix=costMatrixFN) (scores, matchingRow1) = sweeper1.scoreDataSet( timestamps, anomalyScores, windows, "testData", threshold ) (scores, matchingRow2) = sweeper2.scoreDataSet( timestamps, anomalyScores, windows, "testData", threshold ) self.assertEqual(matchingRow1.score, 0.5*matchingRow2.score) self._checkCounts(matchingRow1, length-windowSize*numWindows, 0, 0, windowSize*numWindows) self._checkCounts(matchingRow2, length-windowSize*numWindows, 0, 0, windowSize*numWindows)
def testOnlyScoreFirstTruePositiveWithinWindow(self): """ An algorithm making multiple detections within a window (i.e. true positive) should only be scored for the earliest true positive. """ start = datetime.datetime.now() increment = datetime.timedelta(minutes=5) length = 10 numWindows = 1 windowSize = 2 timestamps = generateTimestamps(start, increment, length) windows = generateWindows(timestamps, numWindows, windowSize) anomalyScores = pandas.Series([0] * length) threshold = 0.5 window = windows[0] t1, t2 = window # Score with a single true positive at start of window index1 = timestamps[timestamps == t1].index[0] anomalyScores[index1] = 1 sweeper = Sweeper(probationPercent=0, costMatrix=self.costMatrix) (_, matchingRow1) = sweeper.scoreDataSet( timestamps, anomalyScores, windows, "testData", threshold ) # Add a second true positive to end of window index2 = timestamps[timestamps == t2].index[0] anomalyScores[index2] = 1 (_, matchingRow2) = sweeper.scoreDataSet( timestamps, anomalyScores, windows, "testData", threshold ) self.assertEqual(matchingRow1.score, matchingRow2.score) self._checkCounts(matchingRow1, length-windowSize*numWindows, 1, 0, windowSize*numWindows-1) self._checkCounts(matchingRow2, length-windowSize*numWindows, 2, 0, windowSize*numWindows-2)
def testTruePositivesWithDifferentWindowSizes(self): """ True positives at the left edge of windows should have the same score regardless of width of window. """ start = datetime.datetime.now() increment = datetime.timedelta(minutes=5) length = 10 numWindows = 1 timestamps = generateTimestamps(start, increment, length) threshold = 0.5 windowSize1 = 2 windows1 = generateWindows(timestamps, numWindows, windowSize1) index = timestamps[timestamps == windows1[0][0]].index[0] anomalyScores1 = pandas.Series([0]*length) anomalyScores1[index] = 1 windowSize2 = 3 windows2 = generateWindows(timestamps, numWindows, windowSize2) index = timestamps[timestamps == windows2[0][0]].index[0] anomalyScores2 = pandas.Series([0]*length) anomalyScores2[index] = 1 sweeper = Sweeper(probationPercent=0, costMatrix=self.costMatrix) (_, matchingRow1) = sweeper.scoreDataSet( timestamps, anomalyScores1, windows1, "testData", threshold ) (_, matchingRow2) = sweeper.scoreDataSet( timestamps, anomalyScores2, windows2, "testData", threshold ) self.assertEqual(matchingRow1.score, matchingRow2.score) self._checkCounts(matchingRow1, length-windowSize1*numWindows, 1, 0, windowSize1*numWindows-1) self._checkCounts(matchingRow2, length-windowSize2*numWindows, 1, 0, windowSize2*numWindows-1)
def testNullCase(self): """No windows and no predictions should yield a score of 0.0.""" start = datetime.datetime.now() increment = datetime.timedelta(minutes=5) length = 10 threshold = 0.5 timestamps = generateTimestamps(start, increment, length) anomalyScores = pandas.Series([0] * length) windows = [] sweeper = Sweeper(probationPercent=0, costMatrix=self.costMatrix) (scores, matchingRow) = sweeper.scoreDataSet(timestamps, anomalyScores, windows, "testData", threshold) self.assertEqual(matchingRow.score, 0.0) self._checkCounts(matchingRow, 10, 0, 0, 0)
def testFalsePositiveScaling(self): """ Test scaling the weight of false positives results in an approximate balance with the true positives. The contributions of TP and FP scores should approximately cancel; i.e. total score =0. With x windows, this total score should on average decrease x/2 because of x FNs. Thus, the acceptable range for score should be centered about -x/2. """ start = datetime.datetime.now() increment = datetime.timedelta(minutes=5) length = 100 numWindows = 1 windowSize = 10 threshold = 0.5 timestamps = generateTimestamps(start, increment, length) windows = generateWindows(timestamps, numWindows, windowSize) # Scale for 10% = windowSize/length self.costMatrix["fpWeight"] = 0.11 sweeper = Sweeper(probationPercent=0, costMatrix=self.costMatrix) # Make arbitrary detections, score, repeat scores = [] for _ in xrange(20): anomalyScores = pandas.Series([0]*length) indices = random.sample(range(length), 10) anomalyScores[indices] = 1 (scores, matchingRow) = sweeper.scoreDataSet( timestamps, anomalyScores, windows, "testData", threshold ) scores.append(matchingRow.score) avgScore = sum(scores)/float(len(scores)) self.assertTrue(-1.5 <= avgScore <= 0.5, "The average score across 20 sets " "of random detections is %f, which is not within the acceptable range " "-1.5 to 0.5." % avgScore)
def testEarlierFalsePositiveAfterWindowIsBetter(self): """For two false positives A and B, where A occurs earlier than B, the score change due to A will be less than the score change due to B. """ start = datetime.datetime.now() increment = datetime.timedelta(minutes=5) length = 10 numWindows = 1 windowSize = 2 threshold = 0.5 timestamps = generateTimestamps(start, increment, length) windows = generateWindows(timestamps, numWindows, windowSize) anomalyScores1 = pandas.Series([0]*length) anomalyScores2 = pandas.Series([0]*length) t1, t2 = windows[0] index1 = timestamps[timestamps == t2].index[0] + 1 anomalyScores1[index1] = 1 sweeper = Sweeper(probationPercent=0, costMatrix=self.costMatrix) (scores, matchingRow1) = sweeper.scoreDataSet( timestamps, anomalyScores1, windows, "testData", threshold ) anomalyScores2[index1+1] = 1 sweeper = Sweeper(probationPercent=0, costMatrix=self.costMatrix) (scores, matchingRow2) = sweeper.scoreDataSet( timestamps, anomalyScores2, windows, "testData", threshold ) self.assertTrue(matchingRow1.score > matchingRow2.score) self._checkCounts(matchingRow1, length-windowSize*numWindows-1, 0, 1, windowSize*numWindows) self._checkCounts(matchingRow2, length-windowSize*numWindows-1, 0, 1, windowSize*numWindows)
def testCalcScoreByThresholdReturnsExpectedScores(self): fnWeight = 5.0 o = Sweeper() o.fnWeight = fnWeight fakeInput = [ AnomalyPoint(0, 0.5, -1000, 'probationary' ), # Should never contribute to score (probationary) AnomalyPoint(1, 0.5, -1000, 'probationary' ), # Should never contribute to score (probationary) AnomalyPoint( 2, 0.0, -3, None), # Should never contribute to score (anomaly == 0.0) AnomalyPoint( 4, 0.2, 20, 'windowA' ), # Should be used instead of next row when threshold <= 0.2 AnomalyPoint( 5, 0.3, 10, 'windowA' ), # Should be used for winowA _until_ threshold <= 0.2 AnomalyPoint( 6, 0.5, 5, 'windowB' ), # Only score for windowB, but won't be used until threshold <= 0.5 AnomalyPoint(7, 0.5, -3, None), ] expectedScoresByThreshold = [ ThresholdScore( 1.1, -2 * fnWeight, 0, 2, 0, 3, 5), # two windows, both false negatives at this threshold ThresholdScore( 0.5, 5 - 3 - fnWeight, 1, 1, 1, 2, 5), # Both 'anomalyScore == 0.5' score, windowA is still FN ThresholdScore(0.3, 5 - 3 + 10, 2, 1, 1, 1, 5), # Both windows now have a TP ThresholdScore( 0.2, 5 - 3 + 20, 3, 1, 1, 0, 5 ), # windowA gets a new max value due to row 4 becoming active ThresholdScore(0.0, 5 - 3 + 20 - 3, 3, 0, 2, 0, 5), ] actual = o.calcScoreByThreshold(fakeInput) assert actual == expectedScoresByThreshold
def testFalsePositiveScaling(self): """ Test scaling the weight of false positives results in an approximate balance with the true positives. The contributions of TP and FP scores should approximately cancel; i.e. total score =0. With x windows, this total score should on average decrease x/2 because of x FNs. Thus, the acceptable range for score should be centered about -x/2. """ start = datetime.datetime.now() increment = datetime.timedelta(minutes=5) length = 100 numWindows = 1 windowSize = 10 threshold = 0.5 timestamps = generateTimestamps(start, increment, length) windows = generateWindows(timestamps, numWindows, windowSize) # Scale for 10% = windowSize/length self.costMatrix["fpWeight"] = 0.11 sweeper = Sweeper(probationPercent=0, costMatrix=self.costMatrix) # Make arbitrary detections, score, repeat scores = [] for _ in xrange(20): anomalyScores = pandas.Series([0] * length) indices = random.sample(range(length), 10) anomalyScores[indices] = 1 (scores, matchingRow) = sweeper.scoreDataSet(timestamps, anomalyScores, windows, "testData", threshold) scores.append(matchingRow.score) avgScore = sum(scores) / float(len(scores)) self.assertTrue( -1.5 <= avgScore <= 0.5, "The average score across 20 sets " "of random detections is %f, which is not within the acceptable range " "-1.5 to 0.5." % avgScore)
def testTwoFalsePositivesIsWorseThanOne(self): """ For two false positives A and B in a file, the score given A and B should be more negative than the score given just A. """ start = datetime.datetime.now() increment = datetime.timedelta(minutes=5) length = 1000 numWindows = 1 windowSize = 10 threshold = 0.5 timestamps = generateTimestamps(start, increment, length) windows = generateWindows(timestamps, numWindows, windowSize) anomalyScores = pandas.Series([0]*length) anomalyScores[0] = 1 sweeper = Sweeper(probationPercent=0, costMatrix=self.costMatrix) (scores, matchingRow1) = sweeper.scoreDataSet( timestamps, anomalyScores, windows, "testData", threshold ) anomalyScores[1] = 1 sweeper = Sweeper(probationPercent=0, costMatrix=self.costMatrix) (scores, matchingRow2) = sweeper.scoreDataSet( timestamps, anomalyScores, windows, "testData", threshold ) self.assertTrue(matchingRow2.score < matchingRow1.score) self._checkCounts(matchingRow1, length-windowSize*numWindows-1, 0, 1, windowSize*numWindows) self._checkCounts(matchingRow2, length-windowSize*numWindows-2, 0, 2, windowSize*numWindows)
def testEarlierFalsePositiveAfterWindowIsBetter(self): """For two false positives A and B, where A occurs earlier than B, the score change due to A will be less than the score change due to B. """ start = datetime.datetime.now() increment = datetime.timedelta(minutes=5) length = 10 numWindows = 1 windowSize = 2 threshold = 0.5 timestamps = generateTimestamps(start, increment, length) windows = generateWindows(timestamps, numWindows, windowSize) anomalyScores1 = pandas.Series([0] * length) anomalyScores2 = pandas.Series([0] * length) t1, t2 = windows[0] index1 = timestamps[timestamps == t2].index[0] + 1 anomalyScores1[index1] = 1 sweeper = Sweeper(probationPercent=0, costMatrix=self.costMatrix) (scores, matchingRow1) = sweeper.scoreDataSet(timestamps, anomalyScores1, windows, "testData", threshold) anomalyScores2[index1 + 1] = 1 sweeper = Sweeper(probationPercent=0, costMatrix=self.costMatrix) (scores, matchingRow2) = sweeper.scoreDataSet(timestamps, anomalyScores2, windows, "testData", threshold) self.assertTrue(matchingRow1.score > matchingRow2.score) self._checkCounts(matchingRow1, length - windowSize * numWindows - 1, 0, 1, windowSize * numWindows) self._checkCounts(matchingRow2, length - windowSize * numWindows - 1, 0, 1, windowSize * numWindows)
def testTwoFalsePositivesIsWorseThanOne(self): """ For two false positives A and B in a file, the score given A and B should be more negative than the score given just A. """ start = datetime.datetime.now() increment = datetime.timedelta(minutes=5) length = 1000 numWindows = 1 windowSize = 10 threshold = 0.5 timestamps = generateTimestamps(start, increment, length) windows = generateWindows(timestamps, numWindows, windowSize) anomalyScores = pandas.Series([0] * length) anomalyScores[0] = 1 sweeper = Sweeper(probationPercent=0, costMatrix=self.costMatrix) (scores, matchingRow1) = sweeper.scoreDataSet(timestamps, anomalyScores, windows, "testData", threshold) anomalyScores[1] = 1 sweeper = Sweeper(probationPercent=0, costMatrix=self.costMatrix) (scores, matchingRow2) = sweeper.scoreDataSet(timestamps, anomalyScores, windows, "testData", threshold) self.assertTrue(matchingRow2.score < matchingRow1.score) self._checkCounts(matchingRow1, length - windowSize * numWindows - 1, 0, 1, windowSize * numWindows) self._checkCounts(matchingRow2, length - windowSize * numWindows - 2, 0, 2, windowSize * numWindows)
def testCalcSweepScoreWindowScoreInteraction(self): """Scores inside a window should be positive; all others should be negative.""" numRows = 100 fakeAnomalyScores = [1 for _ in range(numRows)] fakeTimestamps = [i for i in range(numRows)] # We'll use numbers, even though real data uses dates fakeName = "TestDataSet" windowA = (30, 39) windowB = (75, 95) windowLimits = [windowA, windowB] expectedInWindowCount = (windowA[1] - windowA[0] + 1) + (windowB[1] - windowB[0] + 1) # Standard profile costMatrix = { "tpWeight": 1.0, "fnWeight": 1.0, "fpWeight": 0.11, } probationPercent = 0.1 o = Sweeper(probationPercent=probationPercent, costMatrix=costMatrix) scoredAnomalies = o.calcSweepScore(fakeTimestamps, fakeAnomalyScores, windowLimits, fakeName) # Check that correct number of AnomalyPoints returned assert len(scoredAnomalies) == numRows assert all(isinstance(x, AnomalyPoint) for x in scoredAnomalies) # Expected number of points marked 'probationary' probationary = [x for x in scoredAnomalies if x.windowName == "probationary"] assert len(probationary) == o._getProbationaryLength(numRows) # Expected number of points marked 'in window' inWindow = [x for x in scoredAnomalies if x.windowName not in ("probationary", None)] assert len(inWindow) == expectedInWindowCount # Points in window have positive score; others have negative score for point in scoredAnomalies: if point.windowName not in ("probationary", None): assert point.sweepScore > 0 else: assert point.sweepScore < 0
def testSetCostMatrix(self): o = Sweeper() assert o.tpWeight == 0 assert o.fpWeight == 0 assert o.fnWeight == 0 # These are all arbitrary. expectedTP = 2.0 expectedFN = 3.0 expectedFP = 4.0 costMatrix = { "tpWeight": expectedTP, "fnWeight": expectedFN, "fpWeight": expectedFP } o.setCostMatrix(costMatrix) assert o.tpWeight == expectedTP assert o.fnWeight == expectedFN assert o.fpWeight == expectedFP
def testNullCase(self): """No windows and no predictions should yield a score of 0.0.""" start = datetime.datetime.now() increment = datetime.timedelta(minutes=5) length = 10 threshold = 0.5 timestamps = generateTimestamps(start, increment, length) anomalyScores = pandas.Series([0]*length) windows = [] sweeper = Sweeper(probationPercent=0, costMatrix=self.costMatrix) (scores, matchingRow) = sweeper.scoreDataSet( timestamps, anomalyScores, windows, "testData", threshold ) self.assertEqual(matchingRow.score, 0.0) self._checkCounts(matchingRow, 10, 0, 0, 0)
def testSetCostMatrix(self): o = Sweeper() assert o.tpWeight == 0 assert o.fpWeight == 0 assert o.fnWeight == 0 # These are all arbitrary. expectedTP = 2.0 expectedFN = 3.0 expectedFP = 4.0 costMatrix = { "tpWeight": expectedTP, "fnWeight": expectedFN, "fpWeight": expectedFP } o.setCostMatrix(costMatrix) assert o.tpWeight == expectedTP assert o.fnWeight == expectedFN assert o.fpWeight == expectedFP
def testFalsePositiveMeansNegativeScore(self): """ A false positive should make the score negative. """ start = datetime.datetime.now() increment = datetime.timedelta(minutes=5) length = 1000 numWindows = 1 windowSize = 10 threshold = 0.5 timestamps = generateTimestamps(start, increment, length) windows = generateWindows(timestamps, numWindows, windowSize) anomalyScores = pandas.Series([0] * length) anomalyScores[0] = 1 sweeper = Sweeper(probationPercent=0, costMatrix=self.costMatrix) (scores, matchingRow) = sweeper.scoreDataSet(timestamps, anomalyScores, windows, "testData", threshold) self.assertTrue(matchingRow.score < 0) self._checkCounts(matchingRow, length - windowSize * numWindows - 1, 0, 1, windowSize * numWindows)
def testFourFalseNegatives(self): """ A false negative with four windows should have exactly four times the negative of the false negative score. """ start = datetime.datetime.now() increment = datetime.timedelta(minutes=5) length = 2000 numWindows = 4 windowSize = 10 timestamps = generateTimestamps(start, increment, length) windows = generateWindows(timestamps, numWindows, windowSize) anomalyScores = pandas.Series([0] * length) threshold = 1 sweeper = Sweeper(probationPercent=0, costMatrix=self.costMatrix) (scores, matchingRow) = sweeper.scoreDataSet(timestamps, anomalyScores, windows, "testData", threshold) self.assertEqual(matchingRow.score, 4 * -self.costMatrix["fnWeight"]) self._checkCounts(matchingRow, length - windowSize * numWindows, 0, 0, windowSize * numWindows)
def testRewardLowFalsePositives(self): """ Given false positives in the set of detections, the score output with the Reward Low False Positives application profile will be greater than with the Standard application profile. """ start = datetime.datetime.now() increment = datetime.timedelta(minutes=5) length = 100 numWindows = 0 windowSize = 10 threshold = 0.5 timestamps = generateTimestamps(start, increment, length) windows = [] anomalyScores = pandas.Series([0] * length) costMatrixFP = copy.deepcopy(self.costMatrix) costMatrixFP["fpWeight"] = 2.0 costMatrixFP["fnWeight"] = 0.5 # FP anomalyScores[0] = 1 sweeper1 = Sweeper(probationPercent=0, costMatrix=self.costMatrix) sweeper2 = Sweeper(probationPercent=0, costMatrix=costMatrixFP) (scores, matchingRow1) = sweeper1.scoreDataSet(timestamps, anomalyScores, windows, "testData", threshold) (scores, matchingRow2) = sweeper2.scoreDataSet(timestamps, anomalyScores, windows, "testData", threshold) self.assertEqual(matchingRow1.score, 0.5 * matchingRow2.score) self._checkCounts(matchingRow1, length - windowSize * numWindows - 1, 0, 1, 0) self._checkCounts(matchingRow2, length - windowSize * numWindows - 1, 0, 1, 0)
def optimizeThreshold(args): """Optimize the threshold for a given combination of detector and profile. @param args (tuple) Contains: detectorName (string) Name of detector. costMatrix (dict) Cost matrix to weight the true positives, false negatives, and false positives during scoring. resultsCorpus (nab.Corpus) Corpus object that holds the per record anomaly scores for a given detector. corpusLabel (nab.CorpusLabel) Ground truth anomaly labels for the NAB corpus. probationaryPercent (float) Percent of each data file not to be considered during scoring. @return (dict) Contains: "threshold" (float) Threshold that returns the largest score from the Objective function. "score" (float) The score from the objective function given the threshold. """ (detectorName, costMatrix, resultsCorpus, corpusLabel, probationaryPercent) = args sweeper = Sweeper(probationPercent=probationaryPercent, costMatrix=costMatrix) # First, get the sweep-scores for each row in each data set allAnomalyRows = [] for relativePath, dataSet in resultsCorpus.dataFiles.iteritems(): if "_scores.csv" in relativePath: continue # relativePath: raw dataset file, # e.g. 'artificialNoAnomaly/art_noisy.csv' relativePath = convertResultsPathToDataPath( os.path.join(detectorName, relativePath)) windows = corpusLabel.windows[relativePath] labels = corpusLabel.labels[relativePath] timestamps = labels['timestamp'] anomalyScores = dataSet.data["anomaly_score"] curAnomalyRows = sweeper.calcSweepScore(timestamps, anomalyScores, windows, relativePath) allAnomalyRows.extend(curAnomalyRows) # Get scores by threshold for the entire corpus scoresByThreshold = sweeper.calcScoreByThreshold(allAnomalyRows) scoresByThreshold = sorted(scoresByThreshold, key=lambda x: x.score, reverse=True) bestParams = scoresByThreshold[0] print( "Optimizer found a max score of {} with anomaly threshold {}.".format( bestParams.score, bestParams.threshold)) return {"threshold": bestParams.threshold, "score": bestParams.score}
def testGetProbationaryLength(self, numRows, probationaryPercent, expectedLength): o = Sweeper(probationPercent=probationaryPercent) actualLength = o._getProbationaryLength(numRows) assert actualLength == expectedLength
def optimizeThreshold(args): """Optimize the threshold for a given combination of detector and profile. @param args (tuple) Contains: detectorName (string) Name of detector. costMatrix (dict) Cost matrix to weight the true positives, false negatives, and false positives during scoring. resultsCorpus (nab.Corpus) Corpus object that holds the per record anomaly scores for a given detector. corpusLabel (nab.CorpusLabel) Ground truth anomaly labels for the NAB corpus. probationaryPercent (float) Percent of each data file not to be considered during scoring. @return (dict) Contains: "threshold" (float) Threshold that returns the largest score from the Objective function. "score" (float) The score from the objective function given the threshold. """ (detectorName, costMatrix, resultsCorpus, corpusLabel, probationaryPercent) = args sweeper = Sweeper( probationPercent=probationaryPercent, costMatrix=costMatrix ) # First, get the sweep-scores for each row in each data set allAnomalyRows = [] for relativePath, dataSet in resultsCorpus.dataFiles.iteritems(): if "_scores.csv" in relativePath: continue # relativePath: raw dataset file, # e.g. 'artificialNoAnomaly/art_noisy.csv' relativePath = convertResultsPathToDataPath( os.path.join(detectorName, relativePath)) windows = corpusLabel.windows[relativePath] labels = corpusLabel.labels[relativePath] timestamps = labels['timestamp'] anomalyScores = dataSet.data["anomaly_score"] curAnomalyRows = sweeper.calcSweepScore( timestamps, anomalyScores, windows, relativePath ) allAnomalyRows.extend(curAnomalyRows) # Get scores by threshold for the entire corpus scoresByThreshold = sweeper.calcScoreByThreshold(allAnomalyRows) scoresByThreshold = sorted( scoresByThreshold,key=lambda x: x.score, reverse=True) bestParams = scoresByThreshold[0] print("Optimizer found a max score of {} with anomaly threshold {}.".format( bestParams.score, bestParams.threshold )) return { "threshold": bestParams.threshold, "score": bestParams.score }
def testGetProbationaryLength(self, numRows, probationaryPercent, expectedLength): o = Sweeper(probationPercent=probationaryPercent) actualLength = o._getProbationaryLength(numRows) assert actualLength == expectedLength