def run(self): self.df = pd.read_csv(self.inputDataPath) wb = xlsxwriter.Workbook(self.outputDataPath) fmtBold = wb.add_format({'bold': True}) wsSummary = wb.add_worksheet("Summary") wsData = wb.add_worksheet("Data") wsChart = wb.add_worksheet("Chart") wsSummary.set_column("A:I", 18) wsData.set_column("A:I", 18) columns = sorted(self.df.columns) columns.insert(0, "K") i = 0 for j in range(0, len(columns)): wsData.write(i, j, columns[j], fmtBold) i = 1 beginRow = i for index, row in self.df.iterrows(): wsData.write(i, 0, i) for j in range(1, len(columns)): wsData.write(i, j, row[columns[j]]) j += 1 i += 1 endRow = i chart = wb.add_chart({"type": "scatter"}) categories = "=Data!$A$2:$A101" series1 = "=Data!$B$2:$B101" series2 = "=Data!$C$2:$C101" series3 = "=Data!$D$2:$D101" chart.add_series({"categories": categories, "values": series1}) chart.add_series({"categories": categories, "values": series2}) chart.add_series({"categories": categories, "values": series3}) chart.set_x_axis({"Name": "KFold"}) chart.set_y_axis({"Name": "Score"}) wsChart.insert_chart("A1", chart) columns.pop(0) stats = ["Classifier", "Mean", "Median", "Std", "Var", "Min", "Max"] for j in range(0, len(stats)): wsSummary.write(0, j, stats[j], fmtBold) for i in range(0, len(columns)): classifier = columns[i] sMean = Number.asFloat(np.mean(self.df[classifier]), 3) sMedian = Number.asFloat(np.median(self.df[classifier]), 3) sStd = Number.asFloat(np.std(self.df[classifier]), 3) sVar = Number.asFloat(np.var(self.df[classifier]), 3) sMin = np.min(self.df[classifier]) sMax = np.max(self.df[classifier]) stats = [classifier, sMean, sMedian, sStd, sVar, sMin, sMax] for j in range(0, len(stats)): if j == 0: wsSummary.write(i + 1, j, stats[j], fmtBold) else: wsSummary.write(i + 1, j, stats[j]) wb.close() Msg.raw(self.df)
def run(self): self.initialize() scoreKeeper = Data.buildDictByKeys(self.classifiers.keys(), []) for i in range(0, self.cycles): Msg.show("Cycle {0} of {1}".format(i + 1, self.cycles)) scores = self.build(self.inputDataPath) for classifier in scores.keys(): scoreKeeper[classifier].extend(scores[classifier]) df = pd.DataFrame.from_dict(scoreKeeper) Msg.raw(df) df.to_csv(self.outputDataPath, index=False, float_format='%.2f') Msg.show("Saved results -> {0}".format(self.outputDataPath))
def build(self, inputDataPath): Msg.raw("Analyzing <-> {0}".format(inputDataPath)) self.ingestData(inputDataPath) kf = KFold(n_splits=self.kFolds, shuffle=self.randomizeDataFlag, random_state=None) X = self.dfFeatures.values y = self.dfLabel.values.astype(dtype="int64") scoreKeeper = {} for name, classifier in sorted(list(self.classifiers.items())): scores = cross_val_score(classifier, X, y, cv=kf) if not name in scoreKeeper: scoreKeeper[name] = [] scoreKeeper[name].extend(scores) return scoreKeeper