def predict(X_train, X_test, y_train, y_test, k, method_name): print('Start knn predicting...') knn = neighbors.KNeighborsClassifier(n_neighbors=k, weights='distance', algorithm='auto', leaf_size=30, p=2, metric='minkowski', metric_params=None, n_jobs=-1) knn_ovo = OneVsOneClassifier(knn) knn_ovo.fit(X_train, y_train.values.ravel()) print('Accuracy score of knn_ovo: ' + '%.3f' % knn_ovo.score(X_test, y_test)) knn_ovr = OneVsRestClassifier(knn) knn_ovr.fit(X_train, y_train.values.ravel()) print('Accuracy score of knn_ovr: ' + '%.3f' % knn_ovr.score(X_test, y_test)) plot.plot_conf_matrix(X_test, y_test, knn_ovr, method_name + '_ovr') plot.plot_conf_matrix(X_test, y_test, knn_ovo, method_name + '_ovo') plot.plot_roc(X_train, X_test, y_train, y_test, knn_ovr, method_name + '_ovr')
def display_scores(self, plot_flag=False): Scorer.display_scores(self) # if fprs/tprs is an array, don't print it if isinstance(self.rows[2][2], np.ndarray): rows_print = [row[:2] for row in self.rows] else: rows_print = self.rows print(tabulate(rows_print, headers=self.headers, floatfmt=".3f")) if plot_flag is True: try: plot.plot_roc(classes=self.rows_by_classes.keys(), class_data=self.rows_by_classes, metric=self.metric) except: print("Cannot plot.")
def predict(X_train, X_test, y_train, y_test, method_name): print('Start SVM predicting...') svm_ovo = OneVsOneClassifier(SVC(kernel='rbf', probability=True)) svm_ovo.fit(X_train, y_train.values.ravel()) print('Accuracy score of svm_ovo: ' + '%.3f' % svm_ovo.score(X_test, y_test)) svm_ovr = OneVsRestClassifier(SVC(kernel='rbf', probability=True)) svm_ovr.fit(X_train, y_train.values.ravel()) print('Accuracy score of svm_ovr: ' + '%.3f' % svm_ovr.score(X_test, y_test)) plot.plot_conf_matrix(X_test, y_test, svm_ovo, method_name + '_ovo') plot.plot_conf_matrix(X_test, y_test, svm_ovr, method_name + '_ovr') plot.plot_roc(X_train, X_test, y_train, y_test, svm_ovr, method_name + '_ovr')
def training(train_ds, val_ds, test_ds, model, EPOCHS): @tf.function def train_step(images, labels): with tf.GradientTape() as tape: # training=True is only needed if there are layers with different # behavior during training versus inference (e.g. Dropout). predictions, feature_map = model(images, training=True) loss = loss_object(labels, predictions) gradients = tape.gradient(loss, model.trainable_variables) optimizer.apply_gradients(zip(gradients, model.trainable_variables)) train_loss(loss) train_accuracy(labels, predictions) pred_label = tf.math.argmax(predictions, axis=1) _ = train_AUC.update_state(labels, pred_label) # pred_axis0, pred_axis1 = tf.unstack(predictions, axis=1) # _ = train_ROC.update_state(labels, pred_axis0, pred_axis1) @tf.function def val_step(images, labels): # training=False is only needed if there are layers with different # behavior during training versus inference (e.g. Dropout). predictions = model(images, training=False) v_loss = loss_object(labels, predictions) val_loss(v_loss) val_accuracy(labels, predictions) @tf.function def test_step(images, labels): # training=False is only needed if there are layers with different # behavior during training versus inference (e.g. Dropout). predictions = model(images, training=False) t_loss = loss_object(labels, predictions) test_loss(t_loss) test_accuracy(labels, predictions) pred_label = tf.math.argmax(predictions, axis=1) _ = test_AUC.update_state(labels, pred_label) # _ = test_ROC.update_state(labels, predictions) loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False) optimizer = tf.keras.optimizers.Adam() ckpt = tf.train.Checkpoint(step=tf.Variable(1), optimizer=optimizer, model=model) manager = tf.train.CheckpointManager(ckpt, './tf_ckpts', max_to_keep=3) train_loss = tf.keras.metrics.Mean(name='train_loss') train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy') train_AUC = tf.keras.metrics.AUC( num_thresholds=200, curve='ROC', summation_method='interpolation') train_ROC = ROC() num_class = 2 # train_CM = C_M(num_class) val_loss = tf.keras.metrics.Mean(name='val_loss') val_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='val_accuracy') test_loss = tf.keras.metrics.Mean(name='test_loss') test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='test_accuracy') test_AUC = tf.keras.metrics.AUC( num_thresholds=200, curve='ROC', summation_method='interpolation') test_ROC = ROC() # test_CM = C_M(num_class) # EPOCHS = 10 current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") train_log_dir = 'logs/gradient_tape/' + current_time + '/train' test_log_dir = 'logs/gradient_tape/' + current_time + '/test' val_log_dir = 'logs/gradient_tape/' + current_time + '/val' ROC_log_dir = 'logs/gradient_tape/' + current_time + '/ROC' train_summary_writer = tf.summary.create_file_writer(train_log_dir) test_summary_writer = tf.summary.create_file_writer(test_log_dir) validation_summary_writer = tf.summary.create_file_writer(val_log_dir) ROC_summary_writer = tf.summary.create_file_writer(ROC_log_dir) ckpt.restore(manager.latest_checkpoint) if manager.latest_checkpoint: print("Restored from {}".format(manager.latest_checkpoint)) else: print("Initializing from scratch.") for epoch in range(EPOCHS): train_loss.reset_states() train_accuracy.reset_states() train_AUC.reset_states() # train_CM.reset_states() train_ROC.reset_states() test_loss.reset_states() test_accuracy.reset_states() test_AUC.reset_states() test_ROC.reset_states() # test_CM.reset_states() val_loss.reset_states() val_accuracy.reset_states() for sample in train_ds: size_shape = tf.random.uniform([], minval=200, maxval=256) train_img = sample[0] train_label = sample[2] train_img, train_label = augment(train_img, train_label, size_shape) train_step(train_img, train_label) predictions = model(train_img, training=True) label_pred = tf.math.argmax(predictions, axis=1) _ = train_ROC.update_state(train_label, predictions) # _ = train_CM.update_state(train_label,label_pred) ckpt.step.assign_add(1) if int(ckpt.step) % 2 == 0: save_path = manager.save() print("Saved checkpoint for step {}: {}".format(int(ckpt.step), save_path)) print(manager.checkpoints) # 画ROC的图并保存 fp, tp = train_ROC.result() plot_roc('ROC_train', fp, tp) # create figure & 1 axis plt.savefig('ROC_train_img.png') # save the figure to file plt.show() fig = mpimg.imread('ROC_train_img.png') fig = tf.expand_dims(fig, 0) with train_summary_writer.as_default(): tf.summary.scalar('train_loss', train_loss.result(), step=epoch) tf.summary.scalar('train_accuracy', train_accuracy.result(), step=epoch) tf.summary.scalar('AUC_train', train_AUC.result(), step=epoch) tf.summary.image("ROC_train", fig, step=epoch) # tf.summary.image('Confusion Matrix train', fig_CM, step=epoch) for sample in val_ds: val_img = sample[0] val_label = sample[2] val_step(val_img, val_label) with validation_summary_writer.as_default(): tf.summary.scalar('val_loss', val_loss.result(), step=epoch) tf.summary.scalar('val_accuracy', val_accuracy.result(), step=epoch) template = 'Epoch {}, Loss: {}, Accuracy: {}, Val Loss: {}, Val Accuracy: {}' print(template.format(epoch + 1, train_loss.result(), train_accuracy.result() * 100, val_loss.result(), val_accuracy.result() * 100))
train_loss.result(), train_accuracy.result() * 100, val_loss.result(), val_accuracy.result() * 100)) for sample in test_ds: test_img = sample[0] test_label = sample[2] test_step(test_img, test_label) predictions = model(test_img, training=True) label_pred = tf.math.argmax(predictions, axis=1) _ = test_ROC.update_state(test_label, predictions) # _ = test_CM.update_state(test_label,label_pred) # 画ROC的图并保存 fp, tp = test_ROC.result() plot_roc('ROC_test', fp, tp) # create figure & 1 axis plt.savefig('ROC_test_img.png') # save the figure to file plt.show() fig = mpimg.imread('ROC_test_img.png') fig = tf.expand_dims(fig, 0) with test_summary_writer.as_default(): tf.summary.scalar('test_loss', test_loss.result(), step=0) tf.summary.scalar('test_accuracy', test_accuracy.result(), step=0) tf.summary.scalar('AUC_test', train_AUC.result(), step=0) tf.summary.image("ROC_train", fig, step=0) # tf.summary.image('Confusion Matrix test', fig_CM, step=0) template = 'Test Loss: {}, Test Accuracy: {}' print(template.format(test_loss.result(), test_accuracy.result() * 100))
print("max_specificity:", max_specificity) #FINAL TESTING k_best_features = 4 datasetX = do_feature_selection(X, y, k_best_features) X_train, X_test, y_train, y_test = train_test_split(datasetX, y, test_size=0.2, random_state=1, shuffle=False) X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, random_state=1, shuffle=False) X_train_final = np.concatenate((X_train, X_test), axis=0) y_train_final = np.concatenate((y_train, y_test), axis=0) dtree = train_DecisionTree(X_train_final, y_train_final) y_pred = predict_testdata(dtree, X_val) evaluation_metric = EvaluationMetric() # confusion_matrix = confusion_matrix(y_test.values, y_pred) result = evaluation_metric.get_evaluation_metrics(y_val.values, y_pred) print(result) probs = dtree.predict_proba(X_val) probs = probs[:, 1] plot_roc(y_val.values, probs) plot_precision_recall(y_val.values, y_pred, probs)
# please provide the path to your training and testing datasets training = pd.read_csv("...") test = pd.read_csv("....") complications = ['SBI', 'AKI', 'ARDS'] framework_train = apply_stratified_framework(training, complications) framework_test = apply_stratified_framework(test, complications) train_columns = [ 'Diastolic Blood Pressure_mean', 'Diastolic Blood Pressure_min', 'Oxygen Saturation_max', 'Oxygen Saturation_mean', 'Oxygen Saturation_min', 'Peripheral Pulse Rate_max', 'Peripheral Pulse Rate_mean', 'Peripheral Pulse Rate_min', 'Respiratory Rate_max', 'Respiratory Rate_mean', 'Respiratory Rate_min', 'Systolic Blood Pressure_max', 'Systolic Blood Pressure_mean', 'Systolic Blood Pressure_min', 'Temperature Axillary_max', 'Temperature Axillary_mean', 'Temperature Axillary_min', 'GCS_mean', 'GCS_min', 'GCS_max', 'GENDER', 'AGE', 'COUGH', 'FEVER', 'SOB', 'SORE_THROAT', 'RASH', 'BMI', 'DIABETES', 'HYPERTENSION', 'CKD', 'CANCER' ] models_all, trainsets, classifers = get_models(complications, framework_train, train_columns) true_ouctomes, predicted_ouctomes = get_results(framework_test, complications, models_all, train_columns) plot_roc(complications, true_ouctomes, predicted_ouctomes, "testset") plot_PRC(complications, true_ouctomes, predicted_ouctomes, "testset")
# plt.scatter(x_train[:,0].tolist(), y_train.tolist()) # plt.show() # plt.scatter(x_train[:, 1].tolist(), y_train.tolist()) # plt.show() # plt.scatter(x_train[:, 2].tolist(), y_train.tolist()) # plt.show() # plt.scatter(x_train[:, 3].tolist(), y_train.tolist()) # plt.show() # plt.scatter(x_train[:, 4].tolist(), y_train.tolist()) # plt.show() # plt.scatter(x_train[:, 5].tolist(), y_train.tolist()) # plt.show() precision, recall, F1, _ = precision_recall_fscore_support( y_test, y_test_predict) print(precision, recall, F1) plot.plot_roc(x_train, x_test, y_train, y_test) #绘制混淆矩阵 obj1 = confusion_matrix(y_test, y_test_predict) print('confusion_matrix\n', obj1) classes = list(set(y_test_predict)) classes.sort() plt.figure(figsize=(12, 8), dpi=100) plt.imshow(obj1, cmap=plt.cm.Blues) indices = range(len(obj1)) plt.xticks(indices, classes) plt.yticks(indices, classes) plt.colorbar() plt.xlabel('label of test set sample') plt.ylabel('label of predict') plt.title('Confusion Matrix', fontsize='10', fontproperties='arial')
test_size=0.3, random_state=56) df_train = df_notnull.loc[train_idx] features_train = features.loc[train_idx] target_train = target.loc[train_idx] df_test = df_notnull.loc[test_idx] features_test = features.loc[test_idx] target_test = target.loc[test_idx] #%% train logistic classifier classifier = LogisticRegression() classifier.fit(features_train, target_train) #%% score on train scores_train = classifier.predict_proba(features_train)[:, 1] (tp_train, fp_train, tsh) = plot_roc(target_train, scores_train) #%% score on test scores_test = classifier.predict_proba(features_test)[:, 1] (tp_test, fp_test, tsh) = plot_roc(target_test, scores_test) #%% plot treshold values pd.DataFrame(nested_to_list(zip(tsh, tp_test, fp_test, fp_test - tp_test)), columns=[ 'Threshold', 'True Positive Rate', 'False Positive Rate', 'Difference' ]).plot(x='Threshold') plt.xlim(0, 0.4) plt.ylim([0, 1]) plt.grid() plt.show()
try: os.mkdir('plots') except OSError as error: print(error) # please provide the path to your training and testing datasets training = pd.read_csv("...") test = pd.read_csv("....") complications = [ 'Elevated_troponin', 'Elevated_d-dimer', 'Elevated_Amino', 'Elevated_IL6', 'SBI', 'AKI', 'ARDS' ] framework_train = apply_stratified_framework(training, complications) framework_test = apply_stratified_framework(test, complications) targets = get_targets(complications) train_columns = [x for x in training.columns if x not in targets] models_all, trainsets, classifers = get_models(complications, framework_train, train_columns) true_ouctomes, predicted_ouctomes = get_results(framework_test, complications, models_all, train_columns) plot_roc(complications, true_ouctomes, predicted_ouctomes) plot_PRC(complications, true_ouctomes, predicted_ouctomes)