示例#1
0
 def run(self):
     self.initialize()
     scoreKeeper = Data.buildDictByKeys(self.classifiers.keys(), [])
     for i in range(0, self.cycles):
         Msg.show("Cycle {0} of {1}".format(i + 1, self.cycles))
         scores = self.build(self.inputDataPath)
         for classifier in scores.keys():
             scoreKeeper[classifier].extend(scores[classifier])
     df = pd.DataFrame.from_dict(scoreKeeper)
     Msg.raw(df)
     df.to_csv(self.outputDataPath, index=False, float_format='%.2f')
     Msg.show("Saved results -> {0}".format(self.outputDataPath))
示例#2
0
 def scaleData(self):
     scaler = preprocessing.RobustScaler()
     for path in self.paths:
         dfData = pd.read_csv(path)
         labelName = dfData.columns.values[0].strip()
         joinNames = dfData.columns.values[1:3]
         featureNames = dfData.columns.values[3:]
         allNames = [labelName] + joinNames.tolist() + featureNames.tolist()
         df = pd.DataFrame(columns=allNames)
         df[labelName] = dfData[labelName].values
         df[joinNames] = dfData[joinNames].values
         df[featureNames] = scaler.fit_transform(dfData[featureNames])
         Msg.show("Rescaling {0}".format(path))
         df.to_csv(path, index=False, float_format='%.2f')
示例#3
0
 def saveDataFeature(self, featureName, featureDf):
     joinCols = ["sessionId", "projectName"]
     featureCols = self._prepareColumns(featureDf.columns.values.tolist(),
                                        self.aliases[featureName], joinCols)
     featureDf.columns = featureCols
     for labelName in self.labels:
         path = "{0}/{1}-{2}.csv".format(self.outputDir, labelName,
                                         featureName)
         labelDf = self.dfs[labelName].copy(deep=True)
         labelCols = self._prepareColumns(labelDf.columns.values.tolist(),
                                          self.aliases[labelName], joinCols)
         labelDf.columns = labelCols
         df = labelDf.merge(featureDf, how="inner", on=joinCols)
         Msg.show("Saving {0}/{1} -> {2}".format(labelName, featureName,
                                                 path))
         self.paths.append(path)
         df.to_csv(path, index=False, float_format='%.2f')
示例#4
0
 def initialize(self):
     Msg.show("Initializing")
     Msg.show("CVSS scoring version: {0}".format(self.cvssVersion))
     Msg.show("Scale data: {0}".format(self.scaleDataFlag))
     cfg = File.read(path=self.cfgPath, asJsonFlag=True)
     self.features = cfg["features"]
     self.labels = cfg["labels"]
     self.aliases = cfg["aliases"]
     self.labelMap = {}
     for label in self.labels:
         self.labelMap[label] = self.labels[label][0]
     Dir.make(self.outputDir)
     self.paths = []
     if self.excludeMissingDataFlag:
         Msg.show("Excluding rows with missing data")
     else:
         Msg.show("Replacing missing data using '{0}' method".format(
             self.replaceMissingDataMethod))
示例#5
0
 def ingestLogData(self, logPaths, name, attributes):
     Msg.show("Ingesting {0} logs".format(name))
     if not "projectName" in attributes:
         attributes.insert(0, "projectName")
     if not "sessionId" in attributes:
         attributes.insert(0, "sessionId")
     samples = []
     for logPath in logPaths:
         results = File.read(path=logPath, asJsonFlag=True)
         samples.append(Data.getDictByKeys(results, attributes, None))
     df = pd.DataFrame(samples)
     if self.excludeMissingDataFlag:
         df.dropna(inplace=True)
     else:
         if self.replaceMissingDataMethod == "median":
             df.fillna(df.median(), inplace=True)
         else:
             df.fillna(df.mean(), inplace=True)
     alias = self.aliases[name]
     return df
示例#6
0
 def initialize(self):
     Msg.show("Initializing")
     Dir.make(File.getDirectory(self.outputDataPath))