Exemplo n.º 1
0
Arquivo: runner.py Projeto: bertdg/NAB
    def normalize(self):
        """Normalize the detectors' scores according to the Baseline, and print to
    the console.
    
    Function can only be called with the scoring step (i.e. runner.score())
    preceding it.
    This reads the total score values from the results CSVs, and
    adds the relevant baseline value. The scores are then normalized by
    multiplying by 100/perfect, where the perfect score is the number of TPs
    possible (i.e. 44.0).
    Note the results CSVs still contain the original scores, not normalized.
    """
        print "\nRunning score normalization step"

        # Get baselines for each application profile.
        baselineDir = os.path.join(self.resultsDir, "baseline")
        if not os.path.isdir(baselineDir):
            raise IOError(
                "No results directory for baseline. You must "
                "run the baseline detector before normalizing scores.")

        baselines = {}
        for profileName, _ in self.profiles.iteritems():
            fileName = os.path.join(baselineDir,
                                    "baseline_" + profileName + "_scores.csv")
            with open(fileName) as f:
                results = pandas.read_csv(f)
                baselines[profileName] = results["Score"].iloc[-1]

        # Normalize the score from each results file.
        finalResults = {}
        for resultsFile in self.resultsFiles:
            profileName = [k for k in baselines.keys() if k in resultsFile][0]
            base = baselines[profileName]

            with open(resultsFile) as f:
                results = pandas.read_csv(f)

                # Calculate score:
                perfect = 44.0 - base
                score = (-base + results["Score"].iloc[-1]) * (100 / perfect)

                # Add to results dict:
                resultsInfo = resultsFile.split('/')[-1].split('.')[0]
                detector = resultsInfo.split('_')[0]
                profile = resultsInfo.replace(detector + "_",
                                              "").replace("_scores", "")
                if detector not in finalResults:
                    finalResults[detector] = {}
                finalResults[detector][profile] = score

            print("Final score for \'%s\' detector on \'%s\' profile = %.2f" %
                  (detector, profile, score))

        resultsPath = os.path.join(self.resultsDir, "final_results.json")
        updateFinalResults(finalResults, resultsPath)
        print "Final scores have been written to %s." % resultsPath
Exemplo n.º 2
0
  def normalize(self):
    """Normalize the detectors' scores according to the Baseline, and print to
    the console.
    
    Function can only be called with the scoring step (i.e. runner.score())
    preceding it.
    This reads the total score values from the results CSVs, and
    adds the relevant baseline value. The scores are then normalized by
    multiplying by 100/perfect, where the perfect score is the number of TPs
    possible (i.e. 44.0).
    Note the results CSVs still contain the original scores, not normalized.
    """
    print "\nRunning score normalization step"

    # Get baselines for each application profile.
    baselineDir = os.path.join(self.resultsDir, "baseline")
    if not os.path.isdir(baselineDir):
      raise IOError("No results directory for baseline. You must "
                    "run the baseline detector before normalizing scores.")

    baselines = {}
    for profileName, _ in self.profiles.iteritems():
      fileName = os.path.join(baselineDir,
                              "baseline_" + profileName + "_scores.csv")
      with open(fileName) as f:
        results = pandas.read_csv(f)
        baselines[profileName] = results["Score"].iloc[-1]

    # Normalize the score from each results file.
    finalResults = {}
    for resultsFile in self.resultsFiles:
      profileName = [k for k in baselines.keys() if k in resultsFile][0]
      base = baselines[profileName]
      
      with open(resultsFile) as f:
        results = pandas.read_csv(f)
        
        # Calculate score:
        perfect = 44.0 - base
        score = (-base + results["Score"].iloc[-1]) * (100/perfect)
        
        # Add to results dict:
        resultsInfo = resultsFile.split('/')[-1].split('.')[0]
        detector = resultsInfo.split('_')[0]
        profile = resultsInfo.replace(detector + "_", "").replace("_scores", "")
        if detector not in finalResults:
          finalResults[detector] = {}
        finalResults[detector][profile] = score

      print ("Final score for \'%s\' detector on \'%s\' profile = %.2f"
             % (detector, profile, score))

    resultsPath = os.path.join(self.resultsDir, "final_results.json")
    updateFinalResults(finalResults, resultsPath)
    print "Final scores have been written to %s." % resultsPath
    
Exemplo n.º 3
0
  def normalize(self):
    """
    Normalize the detectors' scores according to the baseline defined by the
    null detector, and print to the console.

    Function can only be called with the scoring step (i.e. runner.score())
    preceding it.

    This reads the total score values from the results CSVs, and
    subtracts the relevant baseline value. The scores are then normalized by
    multiplying by 100 and dividing by perfect less the baseline, where the
    perfect score is the number of TPs possible.

    Note the results CSVs still contain the original scores, not normalized.
    """
    print("\nRunning score normalization step")

    # Get baseline scores for each application profile.
    nullDir = os.path.join(self.resultsDir, "null")
    if not os.path.isdir(nullDir):
      raise IOError("No results directory for null detector. You must "
                    "run the null detector before normalizing scores.")

    baselines = {}
    for profileName, _ in self.profiles.items():
      fileName = os.path.join(nullDir,
                              "null_" + profileName + "_scores.csv")
      with open(fileName) as f:
        results = pandas.read_csv(f)
        baselines[profileName] = results["Score"].iloc[-1]

    # Get total number of TPs
    with open(self.labelPath, "rb") as f:
      labelsDict = json.load(f)
    tpCount = 0
    for labels in list(labelsDict.values()):
      tpCount += len(labels)

    # Normalize the score from each results file.
    finalResults = {}
    for resultsFile in self.resultsFiles:
      profileName = [k for k in list(baselines.keys()) if k in resultsFile][0]
      base = baselines[profileName]

      with open(resultsFile) as f:
        results = pandas.read_csv(f)

        # Calculate score:
        perfect = tpCount * self.profiles[profileName]["CostMatrix"]["tpWeight"]
        score = 100 * (results["Score"].iloc[-1] - base) / (perfect - base)

        # Add to results dict:
        resultsInfo = resultsFile.split(os.path.sep)[-1].split('.')[0]
        detector = resultsInfo.split('_')[0]
        profile = resultsInfo.replace(detector + "_", "").replace("_scores", "")
        if detector not in finalResults:
          finalResults[detector] = {}
        finalResults[detector][profile] = score

      print(("Final score for \'%s\' detector on \'%s\' profile = %.2f"
             % (detector, profile, score)))

    resultsPath = os.path.join(self.resultsDir, "final_results.json")
    updateFinalResults(finalResults, resultsPath)
    print("Final scores have been written to %s." % resultsPath)
Exemplo n.º 4
0
    def normalize(self):
        """
    Normalize the detectors' scores according to the baseline defined by the
    null detector, and print to the console.

    Function can only be called with the scoring step (i.e. runner.score())
    preceding it.

    This reads the total score values from the results CSVs, and
    subtracts the relevant baseline value. The scores are then normalized by
    multiplying by 100 and dividing by perfect less the baseline, where the
    perfect score is the number of TPs possible.

    Note the results CSVs still contain the original scores, not normalized.
    """
        print "\nRunning score normalization step"

        # Get baseline scores for each application profile.
        nullDir = os.path.join(self.resultsDir, "null")
        if not os.path.isdir(nullDir):
            raise IOError(
                "No results directory for null detector. You must " "run the null detector before normalizing scores."
            )

        baselines = {}
        for profileName, _ in self.profiles.iteritems():
            fileName = os.path.join(nullDir, "null_" + profileName + "_scores.csv")
            with open(fileName) as f:
                results = pandas.read_csv(f)
                baselines[profileName] = results["Score"].iloc[-1]

        # Get total number of TPs
        with open(self.labelPath, "rb") as f:
            labelsDict = json.load(f)
        tpCount = 0
        for labels in labelsDict.values():
            tpCount += len(labels)

        # Normalize the score from each results file.
        finalResults = {}
        for resultsFile in self.resultsFiles:
            profileName = [k for k in baselines.keys() if k in resultsFile][0]
            base = baselines[profileName]

            with open(resultsFile) as f:
                results = pandas.read_csv(f)

                # Calculate score:
                perfect = tpCount * self.profiles[profileName]["CostMatrix"]["tpWeight"]
                score = 100 * (results["Score"].iloc[-1] - base) / (perfect - base)

                # Add to results dict:
                resultsInfo = resultsFile.split("/")[-1].split(".")[0]
                detector = resultsInfo.split("_")[0]
                profile = resultsInfo.replace(detector + "_", "").replace("_scores", "")
                if detector not in finalResults:
                    finalResults[detector] = {}
                finalResults[detector][profile] = score

            print ("Final score for '%s' detector on '%s' profile = %.2f" % (detector, profile, score))

        resultsPath = os.path.join(self.resultsDir, "final_results.json")
        updateFinalResults(finalResults, resultsPath)
        print "Final scores have been written to %s." % resultsPath