def run(self): self.initialize() scoreKeeper = Data.buildDictByKeys(self.classifiers.keys(), []) for i in range(0, self.cycles): Msg.show("Cycle {0} of {1}".format(i + 1, self.cycles)) scores = self.build(self.inputDataPath) for classifier in scores.keys(): scoreKeeper[classifier].extend(scores[classifier]) df = pd.DataFrame.from_dict(scoreKeeper) Msg.raw(df) df.to_csv(self.outputDataPath, index=False, float_format='%.2f') Msg.show("Saved results -> {0}".format(self.outputDataPath))
def scaleData(self): scaler = preprocessing.RobustScaler() for path in self.paths: dfData = pd.read_csv(path) labelName = dfData.columns.values[0].strip() joinNames = dfData.columns.values[1:3] featureNames = dfData.columns.values[3:] allNames = [labelName] + joinNames.tolist() + featureNames.tolist() df = pd.DataFrame(columns=allNames) df[labelName] = dfData[labelName].values df[joinNames] = dfData[joinNames].values df[featureNames] = scaler.fit_transform(dfData[featureNames]) Msg.show("Rescaling {0}".format(path)) df.to_csv(path, index=False, float_format='%.2f')
def saveDataFeature(self, featureName, featureDf): joinCols = ["sessionId", "projectName"] featureCols = self._prepareColumns(featureDf.columns.values.tolist(), self.aliases[featureName], joinCols) featureDf.columns = featureCols for labelName in self.labels: path = "{0}/{1}-{2}.csv".format(self.outputDir, labelName, featureName) labelDf = self.dfs[labelName].copy(deep=True) labelCols = self._prepareColumns(labelDf.columns.values.tolist(), self.aliases[labelName], joinCols) labelDf.columns = labelCols df = labelDf.merge(featureDf, how="inner", on=joinCols) Msg.show("Saving {0}/{1} -> {2}".format(labelName, featureName, path)) self.paths.append(path) df.to_csv(path, index=False, float_format='%.2f')
def initialize(self): Msg.show("Initializing") Msg.show("CVSS scoring version: {0}".format(self.cvssVersion)) Msg.show("Scale data: {0}".format(self.scaleDataFlag)) cfg = File.read(path=self.cfgPath, asJsonFlag=True) self.features = cfg["features"] self.labels = cfg["labels"] self.aliases = cfg["aliases"] self.labelMap = {} for label in self.labels: self.labelMap[label] = self.labels[label][0] Dir.make(self.outputDir) self.paths = [] if self.excludeMissingDataFlag: Msg.show("Excluding rows with missing data") else: Msg.show("Replacing missing data using '{0}' method".format( self.replaceMissingDataMethod))
def ingestLogData(self, logPaths, name, attributes): Msg.show("Ingesting {0} logs".format(name)) if not "projectName" in attributes: attributes.insert(0, "projectName") if not "sessionId" in attributes: attributes.insert(0, "sessionId") samples = [] for logPath in logPaths: results = File.read(path=logPath, asJsonFlag=True) samples.append(Data.getDictByKeys(results, attributes, None)) df = pd.DataFrame(samples) if self.excludeMissingDataFlag: df.dropna(inplace=True) else: if self.replaceMissingDataMethod == "median": df.fillna(df.median(), inplace=True) else: df.fillna(df.mean(), inplace=True) alias = self.aliases[name] return df
def initialize(self): Msg.show("Initializing") Dir.make(File.getDirectory(self.outputDataPath))