def run(self): """ """ # Load model output = pickle.load(open(self.model_file, "rb")) preprocessor = output["preprocessor"] best_model = output["model"] if hasattr(best_model, "best_estimator_"): best_model = best_model.best_estimator_ # Get data data_loader = DataLoader(configuration_file=self.data_file) data_loader.load_data() feature = data_loader.features_ target = data_loader.targets_ # Predict if not isinstance(best_model, list): # Force the model and preprocessor is a list best_model = [best_model, best_model] if not isinstance(preprocessor, list): # Force the model and preprocessor is a list preprocessor = [preprocessor, preprocessor] predict_label = [] pred_prob = [] for prep, model_ in zip(preprocessor, best_model): # Feature Preprocessing feature = prep.transform(feature) # Predict predict_label.append(model_.predict(feature)) if hasattr(model_, 'predict_proba'): pred_prob.append(model_.predict_proba(feature)) elif hasattr(model_, 'decision_function'): prob_pos = model_.decision_function(feature) prob_pos = \ (prob_pos - prob_pos.min()) / (prob_pos.max() - prob_pos.min()) pred_prob.append(prob_pos) else: pred_prob = predict_label # Get voted predict label final_label = self.vote(predict_label) final_prob = np.mean(pred_prob,0) # Evaluation acc, sens, spec, _, _ = ModelEvaluator().binary_evaluator( target, final_label, final_prob, verbose=1, is_showfig=False, is_savefig=False ) return acc, sens, spec
def main_run(self): # Get all inputs self.load_data() self.get_all_inputs() # Make pipeline self.make_pipeline_() # Get training and test datasets cv = self.method_model_evaluation_ target_test_all = [] for train_index, test_index in cv.split(self.features_, self.targets_): feature_train = self.features_[train_index, :] feature_test = self.features_[test_index, :] target_train = self.targets_[train_index] target_test = self.targets_[test_index] target_test_all.extend(target_test) # Resample imbalance_resample = self.method_unbalance_treatment_ if imbalance_resample: feature_train, target_train = imbalance_resample.fit_resample(feature_train, target_train) print(f"After re-sampling, the sample size are: {sorted(Counter(target_train).items())}") # Fit self.fit_(feature_train, target_train) # Get weights self.get_weights_(feature_train, target_train) # Predict y_pred, y_prob = self.predict(feature_test) # Eval performances acc, sens, spec, auc = ModelEvaluator().binary_evaluator( target_test, y_pred, y_prob, accuracy_kfold=None, sensitivity_kfold=None, specificity_kfold=None, AUC_kfold=None, verbose=1, is_showfig=False, is_savefig=False ) return y_pred, y_prob
def main_run(self): self.preprocessing() # Get training and test datasets self.target_test_all = [] self.pred_prob = [] self.real_score = [] models = [] weights = [] subname = [] for train_index, test_index in self.method_model_evaluation_.split( self.features_, self.targets_): feature_train = self.features_[train_index, :] feature_test = self.features_[test_index, :] target_train = self.targets_[train_index] target_test = self.targets_[test_index] # Preprocessing self.prep_ = Denan(how='median') feature_train = self.prep_.fit_transform(feature_train) feature_test = self.prep_.transform(feature_test) preprocessor.append(self.prep_) self.target_test_all.extend(target_test) subname_ = self.id_[test_index] subname.extend(subname_) # Fit self.fit_(self.model_, feature_train, target_train, self.memory) models.append(self.model_) # Get weights _, weights_ = self.get_weights_(feature_train, target_train) # Predict y_prob = self.predict_(self.model_, feature_test) # Eval performances score = self.metric(target_test, y_prob) self.real_score.append(score) self.pred_prob.extend(y_prob) weights.append(weights_) # Eval performances for all fold out_name_perf = os.path.join(self.out_dir, "regression_performances.pdf") all_score = ModelEvaluator().regression_evaluator( self.target_test_all, self.pred_prob, self.real_score, is_showfig=False, is_savefig=True, out_name=out_name_perf) # Save weight self.save_weight(weights, self.out_dir) # Save outputs self.outputs = { "preprocessor": preprocessor, "model": models, "subname": subname, "test_targets": self.target_test_all, "test_probability": self.pred_prob, "score": self.real_score } pickle.dump(self.outputs, open(os.path.join(self.out_dir, "outputs.pickle"), "wb")) return self
def main_run(self): # Get all inputs self.load_data() self.get_all_inputs() # Make pipeline self.make_pipeline_() # Get training and test datasets cv = self.method_model_evaluation_ accuracy = [] sensitivity = [] specificity = [] auc = [] pred_test = [] decision = [] weights = [] target_test_all = [] for train_index, test_index in cv.split(self.features_, self.targets_): feature_train = self.features_[train_index, :] feature_test = self.features_[test_index, :] target_train = self.targets_[train_index] target_test = self.targets_[test_index] target_test_all.extend(target_test) # Resample imbalance_resample = self.method_unbalance_treatment_ if imbalance_resample: feature_train, target_train = imbalance_resample.fit_resample(feature_train, target_train) print(f"After re-sampling, the sample size are: {sorted(Counter(target_train).items())}") # Fit self.fit_(feature_train, target_train) # Get weights self.get_weights_(feature_train, target_train) # Predict y_pred, y_prob = self.predict(feature_test) # Eval performances acc, sens, spec, auc_ = ModelEvaluator().binary_evaluator( target_test, y_pred, y_prob, accuracy_kfold=None, sensitivity_kfold=None, specificity_kfold=None, AUC_kfold=None, verbose=False, is_showfig=False, is_savefig=False ) accuracy.append(acc) sensitivity.append(sens) specificity.append(spec) auc.append(auc_) pred_test.extend(y_pred) decision.extend(y_prob) weights.append(self.weights_) # Eval performances for all fold acc, sens, spec, auc = ModelEvaluator().binary_evaluator( target_test_all, pred_test, decision, accuracy_kfold=accuracy, sensitivity_kfold=sensitivity, specificity_kfold=specificity, AUC_kfold=auc, verbose=1, is_showfig=False, is_savefig=False, legend1='EMCI', legend2='AD', out_name=r"D:\悦影科技\数据处理业务1\data_variance_22_30_z\分类结果\adVSemci.pdf") return y_pred, y_prob
def main_run(self): self.preprocessing() # Get training and test datasets self.real_accuracy = [] self.real_sensitivity = [] self.real_specificity = [] self.real_auc = [] self.pred_label = [] pred_prob = [] weights = [] self.target_test_all = [] subname = [] for train_index, test_index in self.method_model_evaluation_.split( self.features_, self.targets_): feature_train = self.features_[train_index, :] feature_test = self.features_[test_index, :] target_train = self.targets_[train_index] target_test = self.targets_[test_index] subname_ = self.id_[test_index] subname.extend(subname_) # Preprocessing self.prep_ = Denan(how='median') feature_train = self.prep_.fit_transform(feature_train) feature_test = self.prep_.transform(feature_test) # Extend sorted real target of test data self.target_test_all.extend(target_test) # Resample imbalance_resample = self.method_unbalance_treatment_ if imbalance_resample: print( f"Before re-sampling, the sample size are: {sorted(Counter(target_train).items())}" ) feature_train, target_train = imbalance_resample.fit_resample( feature_train, target_train) print( f"After re-sampling, the sample size are: {sorted(Counter(target_train).items())}" ) # Fit self.fit_(self.model_, feature_train, target_train, self.memory) # Weights weights_, _ = self.get_weights_(feature_train, target_train) # Predict y_pred, y_prob = self.predict_(self.model_, feature_test) # Eval performances acc, sens, spec, auc_, _ = ModelEvaluator().binary_evaluator( target_test, y_pred, y_prob, accuracy_kfold=None, sensitivity_kfold=None, specificity_kfold=None, AUC_kfold=None, verbose=False, is_showfig=False, is_savefig=False) self.real_accuracy.append(acc) self.real_sensitivity.append(sens) self.real_specificity.append(spec) self.real_auc.append(auc_) self.pred_label.extend(y_pred) pred_prob.extend(y_prob) weights.append(weights_) # Save weight self.save_weight(weights, self.out_dir) # Eval performances for all fold out_name_perf = os.path.join(self.out_dir, "classification_performances.pdf") if os.path.exists(out_name_perf): time_ = time.strftime('%Y%m%d%H%M%S') out_name_perf = os.path.join( self.out_dir, f"classification_performances_{time_}.pdf") acc, sens, spec, auc, _ = ModelEvaluator().binary_evaluator( self.target_test_all, self.pred_label, pred_prob, accuracy_kfold=self.real_accuracy, sensitivity_kfold=self.real_sensitivity, specificity_kfold=self.real_specificity, AUC_kfold=self.real_auc, verbose=1, is_showfig=False, is_savefig=True, legend1='Controls', legend2='Patients', out_name=out_name_perf) # Save outputs self.outputs = { "preprocessor": self.prep_, "model": self.model_, "subname": subname, "test_targets": self.target_test_all, "test_prediction": self.pred_label, "test_probability": pred_prob, "accuracy": self.real_accuracy, "sensitivity": self.real_sensitivity, "specificity": self.real_specificity, "auc": self.real_auc } pickle.dump(self.outputs, open(os.path.join(self.out_dir, "outputs.pickle"), "wb")) return self
import tensorflow from tensorflow import keras from tensorflow.keras.models import Sequential from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten from tensorflow.keras.layers import Conv2D, MaxPooling2D from tensorflow.python.keras.utils import np_utils from sklearn.model_selection import train_test_split import numpy as np import pickle from sklearn.metrics import accuracy_score from eslearn.model_evaluator import ModelEvaluator import matplotlib.pyplot as plt from eslearn.machine_learning.neural_network.eeg.el_eeg_prep_data import parse_configuration from tensorflow.keras import backend as K from tensorflow.keras.models import load_model meval = ModelEvaluator() class Trainer(): def __init__(self, out_dir=None): self.out_dir = out_dir self._model_file = "eegModel.h5" self._modelSaveName = os.path.join(out_dir, self._model_file) self._historySaveName = os.path.join(self.out_dir, "trainHistoryDict.json") self._lossSaveName = os.path.join(self.out_dir, "loss.pdf") def prep_data(self, x, y, num_classes): x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.4, shuffle=True, random_state=666)
def permutation_test(self): print(f"Permutation test: {self.time_permutation} times...\n") self.permuted_accuracy = [] self.permuted_sensitivity = [] self.permuted_specificity = [] self.permuted_auc = [] count = 0 widgets = [ 'Permutation testing', Percentage(), ' ', Bar('='), ' ', Timer(), ' ', ETA() ] pbar = ProgressBar(widgets=widgets, maxval=self.time_permutation).start() for i in range(self.time_permutation): # Get training and test datasets accuracy = [] sensitivity = [] specificity = [] AUC = [] for train_index, test_index in self.method_model_evaluation.split( self.feature, self.label): feature_train = self.feature[train_index, :] feature_test = self.feature[test_index, :] permuted_target_train = self.label[train_index][ np.random.permutation(len(train_index))] target_test = self.label[test_index] # Preprocessing feature_train = self.prep_.fit_transform(feature_train) feature_test = self.prep_.transform(feature_test) # Resample imbalance_resample = self.method_unbalance_treatment_ if imbalance_resample: feature_train, permuted_target_train = imbalance_resample.fit_resample( feature_train, permuted_target_train) # Fit self.fit_(self.model, feature_train, permuted_target_train, self.memory) # Predict y_pred, y_prob = self.predict_(self.model, feature_test) # Eval performances acc, sens, spec, auc_, _ = ModelEvaluator().binary_evaluator( target_test, y_pred, y_prob, accuracy_kfold=None, sensitivity_kfold=None, specificity_kfold=None, AUC_kfold=None, verbose=False, is_showfig=False, is_savefig=False) accuracy.append(acc) sensitivity.append(sens) specificity.append(spec) AUC.append(auc_) # Average performances of one permutation self.permuted_accuracy.append(np.mean(accuracy)) self.permuted_sensitivity.append(np.mean(sensitivity)) self.permuted_specificity.append(np.mean(specificity)) self.permuted_auc.append(np.mean(AUC)) # Progress bar pbar.update(count) count += 1 pbar.finish() # Get p values pvalue_acc = self.calc_pvalue(self.permuted_accuracy, np.mean(self.real_accuracy)) pvalue_sens = self.calc_pvalue(self.permuted_sensitivity, np.mean(self.real_sensitivity)) pvalue_spec = self.calc_pvalue(self.permuted_specificity, np.mean(self.real_specificity)) pvalue_auc = self.calc_pvalue(self.permuted_auc, np.mean(self.real_auc)) print(f"p value for acc = {pvalue_acc:.3f}") return pvalue_acc, pvalue_sens, pvalue_spec, pvalue_auc
def loop(self): self.get_configuration_( configuration_file=r'D:\My_Codes\virtualenv_eslearn\Lib\site-packages\eslearn\GUI\test\configuration_file.json') self.get_preprocessing_parameters() self.get_dimension_reduction_parameters() self.get_feature_selection_parameters() self.get_unbalance_treatment_parameters() self.get_machine_learning_parameters() self.get_model_evaluation_parameters() method_feature_preprocessing = self.method_feature_preprocessing param_feature_preprocessing = self.param_feature_preprocessing method_dim_reduction = self.method_dim_reduction param_dim_reduction = self.param_dim_reduction method_feature_selection = self.method_feature_selection param_feature_selection = self.param_feature_selection method_machine_learning = self.method_machine_learning param_machine_learning = self.param_machine_learning # Load self._load_data_infolder() # Split data into training and test datasets accuracy = [] sensitivity = [] specificity = [] auc = [] pred_test = [] decision = [] weights = [] label_test_all = [] cv = StratifiedKFold(n_splits=3, random_state=666) for train_index, test_index in cv.split(self.data, self.label): data_train = self.data[train_index, :] data_test = self.data[test_index, :] label_train = self.label[train_index] label_test = self.label[test_index] label_test_all.extend(label_test) # Resample ros = RandomOverSampler(random_state=0) data_train, label_train = ros.fit_resample(data_train, label_train) print(f"After re-sampling, the sample size are: {sorted(Counter(label_train).items())}") acc, sens, spec, auc_, pred_test_, dec, wei = self.pipeline_grid( method_feature_preprocessing=method_feature_preprocessing, param_feature_preprocessing=param_feature_preprocessing, method_dim_reduction=method_dim_reduction, param_dim_reduction=param_dim_reduction, method_feature_selection=method_feature_selection, param_feature_selection=param_feature_selection, method_machine_learning=method_machine_learning, param_machine_learning=param_machine_learning, data_train=data_train, data_test=data_test, label_train=label_train, label_test=label_test ) accuracy.append(acc) sensitivity.append(sens) specificity.append(spec) auc.append(auc_) pred_test.extend(pred_test_) decision.extend(dec) weights.append(wei) # Eval performances acc, sens, spec, auc = ModelEvaluator.binary_evaluator( label_test_all, pred_test, decision, accuracy_kfold=accuracy, sensitivity_kfold=sensitivity, specificity_kfold=specificity, AUC_kfold=auc, verbose=1, is_showfig=True, legend1=self.legend1, legend2=self.legend2, is_savefig=False, out_name=self.performances_save_name ) # save weight to nii # self._weight2nii(weights) return accuracy, sensitivity, specificity, auc, weights
def pipeline_grid(self, method_feature_preprocessing=None, param_feature_preprocessing=None, method_dim_reduction=None, param_dim_reduction=None, method_feature_selection=None, param_feature_selection=None, method_machine_learning=None, param_machine_learning=None): self.make_pipeline_( method_feature_preprocessing=method_feature_preprocessing, param_feature_preprocessing=param_feature_preprocessing, method_dim_reduction=method_dim_reduction, param_dim_reduction=param_dim_reduction, method_feature_selection=method_feature_selection, param_feature_selection=param_feature_selection, method_machine_learning=method_machine_learning, param_machine_learning=param_machine_learning) print(self.param_search_) # Train self.fit_pipeline_(self.data_train, self.label_train) # Get weights self.get_weights_(self.data_train, self.label_train) self._weight2nii(self.weights_) # Predict pred_train, dec_train = self.predict(self.data_train) self.predict_validation, self.decision = self.predict( self.data_validation) # Eval performances print("Evaluating training data...") bi_evaluator = ModelEvaluator().binary_evaluator acc, sens, spec, auc = bi_evaluator(self.label_train, pred_train, dec_train, accuracy_kfold=None, sensitivity_kfold=None, specificity_kfold=None, AUC_kfold=None, verbose=1, is_showfig=False, is_savefig=True, out_name=os.path.join( self.save_directory, "performances_train.pdf")) print("Evaluating test data...") self.val_label = np.loadtxt(self.val_label) acc, sens, spec, auc = bi_evaluator(self.val_label, self.predict_validation, self.decision, accuracy_kfold=None, sensitivity_kfold=None, specificity_kfold=None, AUC_kfold=None, verbose=1, is_showfig=False, is_savefig=True, out_name=os.path.join( self.save_directory, "performances_test.pdf"))
def pipeline_grid(self, method_feature_preprocessing=None, param_feature_preprocessing=None, method_dim_reduction=None, param_dim_reduction=None, method_feature_selection=None, param_feature_selection=None, method_machine_learning=None, param_machine_learning=None): self.make_pipeline_( method_feature_preprocessing=method_feature_preprocessing, param_feature_preprocessing=param_feature_preprocessing, method_dim_reduction=method_dim_reduction, param_dim_reduction=param_dim_reduction, method_feature_selection=method_feature_selection, param_feature_selection=param_feature_selection, method_machine_learning=method_machine_learning, param_machine_learning=param_machine_learning) accuracy_train = np.zeros([self.n_perm, 1]) sensitivity_train = np.zeros([self.n_perm, 1]) specificity_train = np.zeros([self.n_perm, 1]) auc_train = np.zeros([self.n_perm, 1]) accuracy_validation = np.zeros([self.n_perm, 1]) sensitivity_validation = np.zeros([self.n_perm, 1]) specificity_validation = np.zeros([self.n_perm, 1]) auc_validation = np.zeros([self.n_perm, 1]) for i in range(self.n_perm): print(f"Permutation {i+1}/{self.n_perm}\n") label_train_perm = np.random.permutation(self.label_train) # Train self.fit_pipeline_(self.data_train, label_train_perm) # Predict pred_train, dec_train = self.predict(self.data_train) self.predict_validation, self.decision = self.predict( self.data_validation) # Eval performances bi_evaluator = ModelEvaluator().binary_evaluator acc, sens, spec, auc = bi_evaluator(self.label_train, pred_train, dec_train, accuracy_kfold=None, sensitivity_kfold=None, specificity_kfold=None, AUC_kfold=None, verbose=False, is_showfig=False, is_savefig=False, out_name=os.path.join( self.save_directory, "performances_train.pdf")) accuracy_train[i] = acc sensitivity_train[i] = sens specificity_train[i] = spec auc_train[i] = auc acc, sens, spec, auc = bi_evaluator(self.label_validation, self.predict_validation, self.decision, accuracy_kfold=None, sensitivity_kfold=None, specificity_kfold=None, AUC_kfold=None, verbose=False, is_showfig=False, is_savefig=False, out_name=os.path.join( self.save_directory, "performances_test.pdf")) accuracy_validation[i] = acc sensitivity_validation[i] = sens specificity_validation[i] = spec auc_validation[i] = auc np.save( os.path.join(self.save_directory, "permutation_test_results_train"), [accuracy_train, sensitivity_train, specificity_train, auc_train]) np.save( os.path.join(self.save_directory, "permutation_test_results_validation"), [ accuracy_validation, sensitivity_validation, specificity_validation, auc_validation ])