def UpdateROC(self): if not self.__is_ui_ready: return if (self.comboNormalizer.count() == 0) or \ (self.comboDimensionReduction.count() == 0) or \ (self.comboFeatureSelector.count() == 0) or \ (self.comboClassifier.count() == 0) or \ (self.spinBoxFeatureNumber.value() == 0): return case_name = self.comboNormalizer.currentText() + '_' + \ self.comboDimensionReduction.currentText() + '_' + \ self.comboFeatureSelector.currentText() + '_' + \ str(self.spinBoxFeatureNumber.value()) + '_' + \ self.comboClassifier.currentText() case_folder = os.path.join(self._root_folder, case_name) pred_list, label_list, name_list = [], [], [] if self.checkROCCVTrain.isChecked(): train_pred = np.load(os.path.join(case_folder, 'train_predict.npy')) train_label = np.load(os.path.join(case_folder, 'train_label.npy')) pred_list.append(train_pred) label_list.append(train_label) name_list.append('CV Train') if self.checkROCCVValidation.isChecked(): val_pred = np.load(os.path.join(case_folder, 'val_predict.npy')) val_label = np.load(os.path.join(case_folder, 'val_label.npy')) pred_list.append(val_pred) label_list.append(val_label) name_list.append('CV Validation') if self.checkROCTrain.isChecked(): all_train_pred = np.load( os.path.join(case_folder, 'all_train_predict.npy')) all_train_label = np.load( os.path.join(case_folder, 'all_train_label.npy')) pred_list.append(all_train_pred) label_list.append(all_train_label) name_list.append('Train') if self.checkROCTest.isChecked(): if os.path.exists(os.path.join(case_folder, 'test_label.npy')): test_pred = np.load( os.path.join(case_folder, 'test_predict.npy')) test_label = np.load( os.path.join(case_folder, 'test_label.npy')) pred_list.append(test_pred) label_list.append(test_label) name_list.append('Test') if len(pred_list) > 0: DrawROCList(pred_list, label_list, name_list=name_list, is_show=False, fig=self.canvasROC.getFigure()) self.canvasROC.draw()
def UpdateROC(self): if (self.comboNormalizer.count() == 0) or \ (self.comboDimensionReduction.count() == 0) or \ (self.comboFeatureSelector.count() == 0) or \ (self.comboClassifier.count() == 0) or \ (self.spinBoxFeatureNumber.value() == 0): return case_name = self.comboNormalizer.currentText() + '_' + \ self.comboDimensionReduction.currentText() + '_' + \ self.comboFeatureSelector.currentText() + '_' + \ str(self.spinBoxFeatureNumber.value()) + '_' + \ self.comboClassifier.currentText() case_folder = os.path.join(self._root_folder, case_name) try: self._current_pipeline.LoadPipeline( os.path.join(case_folder, 'pipeline_info.csv')) except Exception as ex: QMessageBox.about(self, "Load Error", ex.__str__()) self.logger.log('Load Pipeline Error, The reason is ' + str(ex)) pred_list, label_list, name_list = [], [], [] if self.checkROCTrain.isChecked(): train_pred = np.load(os.path.join(case_folder, 'train_predict.npy')) train_label = np.load(os.path.join(case_folder, 'train_label.npy')) pred_list.append(train_pred) label_list.append(train_label) name_list.append('train') if self.checkROCValidation.isChecked(): val_pred = np.load(os.path.join(case_folder, 'val_predict.npy')) val_label = np.load(os.path.join(case_folder, 'val_label.npy')) pred_list.append(val_pred) label_list.append(val_label) name_list.append('validation') if self.checkROCTest.isChecked(): if os.path.exists(os.path.join(case_folder, 'test_label.npy')): test_pred = np.load( os.path.join(case_folder, 'test_predict.npy')) test_label = np.load( os.path.join(case_folder, 'test_label.npy')) pred_list.append(test_pred) label_list.append(test_label) name_list.append('Test') if len(pred_list) > 0: DrawROCList(pred_list, label_list, name_list=name_list, is_show=False, fig=self.canvasROC.getFigure()) self.canvasROC.draw()
def UpdateROC(self): if not self.__is_ui_ready: return if (self.comboNormalizer.count() == 0) or \ (self.comboDimensionReduction.count() == 0) or \ (self.comboFeatureSelector.count() == 0) or \ (self.comboClassifier.count() == 0) or \ (self.spinBoxFeatureNumber.value() == 0): return pipeline_name = self._fae.GetStoreName( self.comboNormalizer.currentText(), self.comboDimensionReduction.currentText(), self.comboFeatureSelector.currentText(), str(self.spinBoxFeatureNumber.value()), self.comboClassifier.currentText()) cls_folder = self._fae.SplitFolder(pipeline_name, self._root_folder)[3] pred_list, label_list, name_list = [], [], [] if self.checkROCCVTrain.isChecked(): self.__AddOneCurveInRoc(pred_list, label_list, name_list, cls_folder, CV_TRAIN) if self.checkROCCVValidation.isChecked(): self.__AddOneCurveInRoc(pred_list, label_list, name_list, cls_folder, CV_VAL) if self.checkROCTrain.isChecked(): self.__AddOneCurveInRoc(pred_list, label_list, name_list, cls_folder, TRAIN) if self.checkROCTest.isChecked(): self.__AddOneCurveInRoc(pred_list, label_list, name_list, cls_folder, TEST) if len(pred_list) > 0: DrawROCList(pred_list, label_list, name_list=name_list, is_show=False, fig=self.canvasROC.getFigure()) self.canvasROC.draw()
def Run(self, data_container, test_data_container=DataContainer(), store_folder=''): train_pred_list, train_label_list, val_pred_list, val_label_list = [], [], [], [] data = data_container.GetArray() label = data_container.GetLabel() val_index_store = [] for train_index, val_index in self.__cv.split(data, label): val_index_store.extend(val_index) train_data = data[train_index, :] train_label = label[train_index] val_data = data[val_index, :] val_label = label[val_index] self.__classifier.SetData(train_data, train_label) self.__classifier.Fit() train_prob = self.__classifier.Predict(train_data) val_prob = self.__classifier.Predict(val_data) train_pred_list.extend(train_prob) train_label_list.extend(train_label) val_pred_list.extend(val_prob) val_label_list.extend(val_label) total_train_label = np.asarray(train_label_list, dtype=np.uint8) total_train_pred = np.asarray(train_pred_list, dtype=np.float32) train_metric = EstimateMetirc(total_train_pred, total_train_label, 'train') total_label = np.asarray(val_label_list, dtype=np.uint8) total_pred = np.asarray(val_pred_list, dtype=np.float32) val_metric = EstimateMetirc(total_pred, total_label, 'val') self.__classifier.SetDataContainer(data_container) self.__classifier.Fit() test_metric = {} if test_data_container.GetArray().size > 0: test_data = test_data_container.GetArray() test_label = test_data_container.GetLabel() test_pred = self.__classifier.Predict(test_data) test_metric = EstimateMetirc(test_pred, test_label, 'test') if store_folder: if not os.path.exists(store_folder): os.mkdir(store_folder) info = {} info.update(train_metric) info.update(val_metric) np.save(os.path.join(store_folder, 'train_predict.npy'), total_train_pred) np.save(os.path.join(store_folder, 'val_predict.npy'), total_pred) np.save(os.path.join(store_folder, 'train_label.npy'), total_train_label) np.save(os.path.join(store_folder, 'val_label.npy'), total_label) cv_info_path = os.path.join(store_folder, 'cv_info.csv') df = pd.DataFrame(data=val_index_store) df.to_csv(cv_info_path) DrawROCList(total_train_pred, total_train_label, store_path=os.path.join(store_folder, 'train_ROC.jpg'), is_show=False) DrawROCList(total_pred, total_label, store_path=os.path.join(store_folder, 'val_ROC.jpg'), is_show=False) if test_data_container.GetArray().size > 0: info.update(test_metric) np.save(os.path.join(store_folder, 'test_predict.npy'), test_pred) np.save(os.path.join(store_folder, 'test_label.npy'), test_label) DrawROCList(test_pred, test_label, store_path=os.path.join(store_folder, 'test_ROC.jpg'), is_show=False) self.__classifier.Save(store_folder) self.SaveResult(info, store_folder) return train_metric, val_metric, test_metric
def Run(self, training_data_container, pipeline, result_folder, store_folder, testing_data_container=DataContainer()): # Data Description data_description_text = " " if len(np.unique(training_data_container.GetLabel())) != 2: print('Only works for the 2-label classification') return False positive_number = len( np.where(training_data_container.GetLabel() == np.max(training_data_container.GetLabel()))[0]) negative_number = len(training_data_container.GetLabel()) - positive_number data_description_text += "We selected {:d} cases as the training data set. {:d} of them were marked as positive and the left {:d} " \ "were marked as negative. ".format(len(training_data_container.GetCaseName()), positive_number, negative_number) if testing_data_container.IsEmpty(): data_description_text += "Since the number of the samples were limited, there were no independent testing data. " else: positive_number = len( np.where(testing_data_container.GetLabel() == np.max(testing_data_container.GetLabel()))[0]) negative_number = len(testing_data_container.GetLabel()) - positive_number data_description_text += "We also selected another {:d} cases as the independent testing data set ({:d}/{:d} = positive/negative). \n" \ "".format(len(testing_data_container.GetCaseName()), positive_number, negative_number) # Method Description method_description_text = " " method_description_text += pipeline.GetNormalizer().GetDescription() method_description_text += pipeline.GetDimensionReduction().GetDescription() method_description_text += pipeline.GetFeatureSelector().GetDescription() method_description_text += pipeline.GetClassifier().GetDescription() method_description_text += pipeline.GetCrossValidatiaon().GetDescription() method_description_text += "\n" statistic_description_text = " The performance of the model was evaluated using receiver operating characteristic " \ "(ROC) curve analysis. The area under the ROC curve (AUC) was calculated for quantification. " \ "The accuracy, sensitivity, specificity, positive predictive value (PPV), and negative " \ "predictive value (NPV) were also calculated at a cutoff value that maximum the " \ "value of the Yorden index. We also boosted estimation 1000 times and applied paired " \ "t-test to give the 95% confidence interval. All above processes were implemented with " \ "FeAture Explorer (FAE, v0.2.2, https://github.com/salan668/FAE) on Python (3.5.4, https://www.python.org/). \n" # Result Description result_folder = os.path.join(result_folder, pipeline.GetStoreName()) result = pd.read_csv(os.path.join(result_folder, 'result.csv'), index_col=0) train_pred = np.load(os.path.join(result_folder, 'train_predict.npy')) train_label = np.load(os.path.join(result_folder, 'train_label.npy')) val_pred = np.load(os.path.join(result_folder, 'val_predict.npy')) val_label = np.load(os.path.join(result_folder, 'val_label.npy')) from FAE.Visualization.DrawROCList import DrawROCList if not testing_data_container.IsEmpty(): result_description_text = "We found that the model based on {:d} features can get the highest AUC on the " \ "validation data set. The AUC and the accuracy could achieve {:.3f} and {:.3f}, respectively. In this point, " \ "The AUC and the accuracy of the model achieve {:.3f} and {:.3f} on testing data set. " \ "The clinical statistics in the diagonsis and the selected features were shown in Table 1 and Table 2. " \ "The ROC curve was shown in Figure 1. \n" \ "".format(pipeline.GetFeatureSelector().GetSelectedFeatureNumber(), float(result.loc['val_auc'].values), float(result.loc['val_accuracy'].values), float(result.loc['test_auc'].values), float(result.loc['test_accuracy'].values) ) test_pred = np.load(os.path.join(result_folder, 'test_predict.npy')) test_label = np.load(os.path.join(result_folder, 'test_label.npy')) DrawROCList([train_pred, val_pred, test_pred], [train_label, val_label, test_label], name_list=['train', 'val', 'test'], store_path=os.path.join(store_folder, 'ROC.jpg'), is_show=False) else: result_description_text = "We found that the model based on {:d} features can get the highest AUC on the " \ "validation data set. The AUC and the accuracy could achieve {:.3f} and {:.3f}, respectively. " \ "The clinical statistics in the diagonsis and the selected features were shown in Table 1 and Table 2. " \ "The ROC curve was shown in Figure 1. \n" \ "".format(pipeline.GetFeatureSelector().GetSelectedFeatureNumber(), float(result.loc['val_auc'].values), float(result.loc['val_accuracy'].values)) DrawROCList([train_pred, val_pred], [train_label, val_label], name_list=['train', 'val'], store_path=os.path.join(store_folder, 'ROC.jpg'), is_show=False) pass from reportlab.lib import colors table_stype = ( ('FONT', (0, 0), (-1, -1), '%s' % 'Helvetica', 9), ('LINEABOVE', (0, 0), (-1, 0), 1, colors.black), ('LINEABOVE', (0, 1), (-1, 1), 1, colors.black), ('LINEBELOW', (0, -1), (-1, -1), 1, colors.black), ('VALIGN', (0, 0), (-1, -1), 'MIDDLE'), ('ALIGN', (0, 0), (-1, -1), 'CENTER') ) table_1_header = "Table 1. Clinical statistics in the diagnosis. " if testing_data_container.IsEmpty(): table_1 = [['Statistics', 'Value'], ['Accuracy', str(result.loc['val_accuracy'].values[0])], ['AUC', str(result.loc['val_auc'].values[0])], ['AUC 95% CIs', str(result.loc['val_auc 95% CIs'].values[0])], ['NPV', str(result.loc['val_negative predictive value'].values[0])], ['PPV', str(result.loc['val_positive predictive value'].values[0])], ['Sensitivity', str(result.loc['val_sensitivity'].values[0])], ['Specificity', str(result.loc['val_specificity'].values[0])]] else: table_1 = [['Statistics', 'Value'], ['Accuracy', str(result.loc['test_accuracy'].values[0])], ['AUC', str(result.loc['test_auc'].values[0])], ['AUC 95% CIs', str(result.loc['test_auc 95% CIs'].values[0])], ['NPV', str(result.loc['test_negative predictive value'].values[0])], ['PPV', str(result.loc['test_positive predictive value'].values[0])], ['Sensitivity', str(result.loc['test_sensitivity'].values[0])], ['Specificity', str(result.loc['test_specificity'].values[0])]] candidate_file = glob.glob(os.path.join(result_folder, '*coef.csv')) if len(candidate_file) > 0: coef = pd.read_csv(candidate_file[0], index_col=0, header=0) table_2_header = 'Table 2. The coefficients of features in the model. ' table_2 = [['Features', 'Coef in model']] for index in coef.index: table_2.append([str(index), "{:.3f}".format(coef.loc[index].values[0])]) else: with open(os.path.join(result_folder, 'feature_select_info.csv'), 'r', newline='') as file: reader = csv.reader(file) for row in reader: if row[0] == 'selected_feature': features = row[1:] table_2_header = 'Table 2. The selected of features. ' table_2 = [['Features', 'Rank']] for index in range(len(features)): table_2.append([features[index], str(index + 1)]) figure_title = "Figure 1. The ROC curve. " # Build PDF pdf = PDFDocument(os.path.join(store_folder, 'report.pdf')) pdf.init_report() pdf.h1("Materials and Methods") pdf.p(data_description_text) pdf.p(method_description_text) pdf.p(statistic_description_text) pdf.h1("Result") pdf.p(result_description_text) pdf.table_header(table_1_header) pdf.table(table_1, 130, style=table_stype) pdf.table_header(table_2_header) pdf.table(table_2, 200, style=table_stype) pdf.p("\n\n") pdf.image(os.path.join(store_folder, 'ROC.jpg')) pdf.table_header(figure_title) pdf.end_connect("Thanks for using FAE v.0.2. If you need a specific report, please connect to Yang Song ([email protected]) or Guang Yang " "([email protected]). Welcome any co-operation and discussion. ") pdf.generate()
def Run(self, data_container, test_data_container=DataContainer(), store_folder=''): train_pred_list, train_label_list, val_pred_list, val_label_list = [], [], [], [] data = data_container.GetArray() label = data_container.GetLabel() for train_index, val_index in self.__cv.split(data, label): train_data = data[train_index, :] train_label = label[train_index] val_data = data[val_index, :] val_label = label[val_index] self.__classifier.SetData(train_data, train_label) self.__classifier.Fit() train_prob = self.__classifier.Predict(train_data) val_prob = self.__classifier.Predict(val_data) train_pred_list.extend(train_prob) train_label_list.extend(train_label) val_pred_list.extend(val_prob) val_label_list.extend(val_label) total_train_label = np.asarray(train_label_list, dtype=np.uint8) total_train_pred = np.asarray(train_pred_list, dtype=np.float32) train_metric = EstimateMetirc(total_train_pred, total_train_label, 'train') total_label = np.asarray(val_label_list, dtype=np.uint8) total_pred = np.asarray(val_pred_list, dtype=np.float32) val_metric = EstimateMetirc(total_pred, total_label, 'val') self.__classifier.SetDataContainer(data_container) self.__classifier.Fit() test_metric = {} if test_data_container.GetArray().size > 0: selected_feature_name = data_container.GetFeatureName() fs = FeatureSelector() test_data_container = fs.SelectFeatureByName( test_data_container, selected_feature_name) test_data = test_data_container.GetArray() test_label = test_data_container.GetLabel() test_pred = self.__classifier.Predict(test_data) test_metric = EstimateMetirc(test_pred, test_label, 'test') if store_folder and os.path.isdir(store_folder): info = {} info.update(train_metric) info.update(val_metric) np.save(os.path.join(store_folder, 'train_predict.npy'), total_train_pred) np.save(os.path.join(store_folder, 'val_predict.npy'), total_pred) np.save(os.path.join(store_folder, 'train_label.npy'), total_train_label) np.save(os.path.join(store_folder, 'val_label.npy'), total_label) DrawROCList(total_train_pred, total_train_label, store_path=os.path.join(store_folder, 'train_ROC.jpg'), is_show=False) DrawROCList(total_pred, total_label, store_path=os.path.join(store_folder, 'val_ROC.jpg'), is_show=False) if test_data_container.GetArray().size > 0: info.update(test_metric) np.save(os.path.join(store_folder, 'test_predict.npy'), test_pred) np.save(os.path.join(store_folder, 'test_label.npy'), test_label) DrawROCList(test_pred, test_label, store_path=os.path.join(store_folder, 'test_ROC.jpg'), is_show=False) self.__classifier.Save(store_folder) self.SaveCVInfo(info, store_folder) return train_metric, val_metric, test_metric