def predict(X_train, X_test, y_train, y_test, k, method_name):

    print('Start knn predicting...')

    knn = neighbors.KNeighborsClassifier(n_neighbors=k,
                                         weights='distance',
                                         algorithm='auto',
                                         leaf_size=30,
                                         p=2,
                                         metric='minkowski',
                                         metric_params=None,
                                         n_jobs=-1)
    knn_ovo = OneVsOneClassifier(knn)
    knn_ovo.fit(X_train, y_train.values.ravel())
    print('Accuracy score of knn_ovo: ' +
          '%.3f' % knn_ovo.score(X_test, y_test))

    knn_ovr = OneVsRestClassifier(knn)
    knn_ovr.fit(X_train, y_train.values.ravel())

    print('Accuracy score of knn_ovr: ' +
          '%.3f' % knn_ovr.score(X_test, y_test))

    plot.plot_conf_matrix(X_test, y_test, knn_ovr, method_name + '_ovr')
    plot.plot_conf_matrix(X_test, y_test, knn_ovo, method_name + '_ovo')
    plot.plot_roc(X_train, X_test, y_train, y_test, knn_ovr,
                  method_name + '_ovr')
Exemplo n.º 2
0
 def display_scores(self, plot_flag=False):
     Scorer.display_scores(self)
     # if fprs/tprs is an array, don't print it
     if isinstance(self.rows[2][2], np.ndarray):
         rows_print = [row[:2] for row in self.rows]
     else:
         rows_print = self.rows
     print(tabulate(rows_print, headers=self.headers, floatfmt=".3f"))
     if plot_flag is True:
         try:
             plot.plot_roc(classes=self.rows_by_classes.keys(),
                           class_data=self.rows_by_classes,
                           metric=self.metric)
         except:
             print("Cannot plot.")
def predict(X_train, X_test, y_train, y_test, method_name):
    print('Start SVM predicting...')

    svm_ovo = OneVsOneClassifier(SVC(kernel='rbf', probability=True))
    svm_ovo.fit(X_train, y_train.values.ravel())

    print('Accuracy score of svm_ovo: ' +
          '%.3f' % svm_ovo.score(X_test, y_test))

    svm_ovr = OneVsRestClassifier(SVC(kernel='rbf', probability=True))
    svm_ovr.fit(X_train, y_train.values.ravel())

    print('Accuracy score of svm_ovr: ' +
          '%.3f' % svm_ovr.score(X_test, y_test))

    plot.plot_conf_matrix(X_test, y_test, svm_ovo, method_name + '_ovo')
    plot.plot_conf_matrix(X_test, y_test, svm_ovr, method_name + '_ovr')
    plot.plot_roc(X_train, X_test, y_train, y_test, svm_ovr,
                  method_name + '_ovr')
Exemplo n.º 4
0
def training(train_ds, val_ds, test_ds, model, EPOCHS):
    @tf.function
    def train_step(images, labels):
        with tf.GradientTape() as tape:
        # training=True is only needed if there are layers with different
        # behavior during training versus inference (e.g. Dropout).
            predictions, feature_map = model(images, training=True)
            loss = loss_object(labels, predictions)
        gradients = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))

        train_loss(loss)
        train_accuracy(labels, predictions)
        pred_label = tf.math.argmax(predictions, axis=1)
        _ = train_AUC.update_state(labels, pred_label)
        # pred_axis0, pred_axis1 = tf.unstack(predictions, axis=1)
        # _ = train_ROC.update_state(labels, pred_axis0, pred_axis1)

    @tf.function
    def val_step(images, labels):
    # training=False is only needed if there are layers with different
    # behavior during training versus inference (e.g. Dropout).
        predictions = model(images, training=False)
        v_loss = loss_object(labels, predictions)

        val_loss(v_loss)
        val_accuracy(labels, predictions)

    @tf.function
    def test_step(images, labels):
    # training=False is only needed if there are layers with different
    # behavior during training versus inference (e.g. Dropout).
        predictions = model(images, training=False)
        t_loss = loss_object(labels, predictions)

        test_loss(t_loss)
        test_accuracy(labels, predictions)
        pred_label = tf.math.argmax(predictions, axis=1)
        _ = test_AUC.update_state(labels, pred_label)
        # _ = test_ROC.update_state(labels, predictions)

    loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False)
    optimizer = tf.keras.optimizers.Adam()
    ckpt = tf.train.Checkpoint(step=tf.Variable(1), optimizer=optimizer, model=model)
    manager = tf.train.CheckpointManager(ckpt, './tf_ckpts', max_to_keep=3)

    train_loss = tf.keras.metrics.Mean(name='train_loss')
    train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')
    train_AUC = tf.keras.metrics.AUC(
                                     num_thresholds=200, curve='ROC', summation_method='interpolation')
    train_ROC = ROC()
    num_class = 2
    # train_CM = C_M(num_class)

    val_loss = tf.keras.metrics.Mean(name='val_loss')
    val_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='val_accuracy')

    test_loss = tf.keras.metrics.Mean(name='test_loss')
    test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='test_accuracy')
    test_AUC = tf.keras.metrics.AUC(
                                    num_thresholds=200, curve='ROC', summation_method='interpolation')
    test_ROC = ROC()
    # test_CM = C_M(num_class)




    # EPOCHS = 10

    current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
    train_log_dir = 'logs/gradient_tape/' + current_time + '/train'
    test_log_dir = 'logs/gradient_tape/' + current_time + '/test'
    val_log_dir = 'logs/gradient_tape/' + current_time + '/val'
    ROC_log_dir = 'logs/gradient_tape/' + current_time + '/ROC'
    train_summary_writer = tf.summary.create_file_writer(train_log_dir)
    test_summary_writer = tf.summary.create_file_writer(test_log_dir)
    validation_summary_writer = tf.summary.create_file_writer(val_log_dir)
    ROC_summary_writer = tf.summary.create_file_writer(ROC_log_dir)

    ckpt.restore(manager.latest_checkpoint)
    if manager.latest_checkpoint:
       print("Restored from {}".format(manager.latest_checkpoint))
    else:
       print("Initializing from scratch.")

    for epoch in range(EPOCHS):
      train_loss.reset_states()
      train_accuracy.reset_states()
      train_AUC.reset_states()
  # train_CM.reset_states()
      train_ROC.reset_states()
      test_loss.reset_states()
      test_accuracy.reset_states()
      test_AUC.reset_states()
      test_ROC.reset_states()
  # test_CM.reset_states()
      val_loss.reset_states()
      val_accuracy.reset_states()

      for sample in train_ds:
          size_shape = tf.random.uniform([], minval=200, maxval=256)
          train_img = sample[0]
          train_label = sample[2]
          train_img, train_label = augment(train_img, train_label, size_shape)
          train_step(train_img, train_label)
          predictions = model(train_img, training=True)

          label_pred = tf.math.argmax(predictions, axis=1)
          _ = train_ROC.update_state(train_label, predictions)
  #  _ = train_CM.update_state(train_label,label_pred)

      ckpt.step.assign_add(1)
           if int(ckpt.step) % 2 == 0:
              save_path = manager.save()
              print("Saved checkpoint for step {}: {}".format(int(ckpt.step), save_path))
              print(manager.checkpoints)

    #  画ROC的图并保存
      fp, tp = train_ROC.result()
      plot_roc('ROC_train', fp, tp)  # create figure & 1 axis
      plt.savefig('ROC_train_img.png')  # save the figure to file
      plt.show()
      fig = mpimg.imread('ROC_train_img.png')
      fig = tf.expand_dims(fig, 0)

      with train_summary_writer.as_default():
           tf.summary.scalar('train_loss', train_loss.result(), step=epoch)
           tf.summary.scalar('train_accuracy', train_accuracy.result(), step=epoch)
           tf.summary.scalar('AUC_train', train_AUC.result(), step=epoch)
           tf.summary.image("ROC_train", fig, step=epoch)
  #  tf.summary.image('Confusion Matrix train', fig_CM, step=epoch)
      for sample in val_ds:
          val_img = sample[0]
          val_label = sample[2]
          val_step(val_img, val_label)
      with validation_summary_writer.as_default():
           tf.summary.scalar('val_loss', val_loss.result(), step=epoch)
           tf.summary.scalar('val_accuracy', val_accuracy.result(), step=epoch)

      template = 'Epoch {}, Loss: {}, Accuracy: {}, Val Loss: {}, Val Accuracy: {}'
      print(template.format(epoch + 1,
                        train_loss.result(),
                        train_accuracy.result() * 100,
                        val_loss.result(),
                        val_accuracy.result() * 100))
Exemplo n.º 5
0
                        train_loss.result(),
                        train_accuracy.result() * 100,
                        val_loss.result(),
                        val_accuracy.result() * 100))

    for sample in test_ds:
        test_img = sample[0]
        test_label = sample[2]
        test_step(test_img, test_label)
        predictions = model(test_img, training=True)
        label_pred = tf.math.argmax(predictions, axis=1)
        _ = test_ROC.update_state(test_label, predictions)
  # _ = test_CM.update_state(test_label,label_pred)
#  画ROC的图并保存
    fp, tp = test_ROC.result()
    plot_roc('ROC_test', fp, tp)  # create figure & 1 axis
    plt.savefig('ROC_test_img.png')  # save the figure to file
    plt.show()
    fig = mpimg.imread('ROC_test_img.png')
    fig = tf.expand_dims(fig, 0)


    with test_summary_writer.as_default():
        tf.summary.scalar('test_loss', test_loss.result(), step=0)
        tf.summary.scalar('test_accuracy', test_accuracy.result(), step=0)
        tf.summary.scalar('AUC_test', train_AUC.result(), step=0)
        tf.summary.image("ROC_train", fig, step=0)
        # tf.summary.image('Confusion Matrix test', fig_CM, step=0)
        template = 'Test Loss: {}, Test Accuracy: {}'
        print(template.format(test_loss.result(),
                      test_accuracy.result() * 100))
    print("max_specificity:", max_specificity)

    #FINAL TESTING
    k_best_features = 4
    datasetX = do_feature_selection(X, y, k_best_features)

    X_train, X_test, y_train, y_test = train_test_split(datasetX,
                                                        y,
                                                        test_size=0.2,
                                                        random_state=1,
                                                        shuffle=False)
    X_train, X_val, y_train, y_val = train_test_split(X_train,
                                                      y_train,
                                                      test_size=0.25,
                                                      random_state=1,
                                                      shuffle=False)

    X_train_final = np.concatenate((X_train, X_test), axis=0)
    y_train_final = np.concatenate((y_train, y_test), axis=0)

    dtree = train_DecisionTree(X_train_final, y_train_final)
    y_pred = predict_testdata(dtree, X_val)
    evaluation_metric = EvaluationMetric()
    # confusion_matrix = confusion_matrix(y_test.values, y_pred)
    result = evaluation_metric.get_evaluation_metrics(y_val.values, y_pred)
    print(result)
    probs = dtree.predict_proba(X_val)
    probs = probs[:, 1]
    plot_roc(y_val.values, probs)
    plot_precision_recall(y_val.values, y_pred, probs)
Exemplo n.º 7
0
# please provide the path to your training and testing datasets
training = pd.read_csv("...")
test = pd.read_csv("....")

complications = ['SBI', 'AKI', 'ARDS']

framework_train = apply_stratified_framework(training, complications)
framework_test = apply_stratified_framework(test, complications)

train_columns = [
    'Diastolic Blood Pressure_mean', 'Diastolic Blood Pressure_min',
    'Oxygen Saturation_max', 'Oxygen Saturation_mean', 'Oxygen Saturation_min',
    'Peripheral Pulse Rate_max', 'Peripheral Pulse Rate_mean',
    'Peripheral Pulse Rate_min', 'Respiratory Rate_max',
    'Respiratory Rate_mean', 'Respiratory Rate_min',
    'Systolic Blood Pressure_max', 'Systolic Blood Pressure_mean',
    'Systolic Blood Pressure_min', 'Temperature Axillary_max',
    'Temperature Axillary_mean', 'Temperature Axillary_min', 'GCS_mean',
    'GCS_min', 'GCS_max', 'GENDER', 'AGE', 'COUGH', 'FEVER', 'SOB',
    'SORE_THROAT', 'RASH', 'BMI', 'DIABETES', 'HYPERTENSION', 'CKD', 'CANCER'
]

models_all, trainsets, classifers = get_models(complications, framework_train,
                                               train_columns)

true_ouctomes, predicted_ouctomes = get_results(framework_test, complications,
                                                models_all, train_columns)

plot_roc(complications, true_ouctomes, predicted_ouctomes, "testset")

plot_PRC(complications, true_ouctomes, predicted_ouctomes, "testset")
Exemplo n.º 8
0
    # plt.scatter(x_train[:,0].tolist(), y_train.tolist())
    # plt.show()
    # plt.scatter(x_train[:, 1].tolist(), y_train.tolist())
    # plt.show()
    # plt.scatter(x_train[:, 2].tolist(), y_train.tolist())
    # plt.show()
    # plt.scatter(x_train[:, 3].tolist(), y_train.tolist())
    # plt.show()
    # plt.scatter(x_train[:, 4].tolist(), y_train.tolist())
    # plt.show()
    # plt.scatter(x_train[:, 5].tolist(), y_train.tolist())
    # plt.show()
    precision, recall, F1, _ = precision_recall_fscore_support(
        y_test, y_test_predict)
    print(precision, recall, F1)
    plot.plot_roc(x_train, x_test, y_train, y_test)
    #绘制混淆矩阵
    obj1 = confusion_matrix(y_test, y_test_predict)
    print('confusion_matrix\n', obj1)
    classes = list(set(y_test_predict))
    classes.sort()
    plt.figure(figsize=(12, 8), dpi=100)
    plt.imshow(obj1, cmap=plt.cm.Blues)

    indices = range(len(obj1))
    plt.xticks(indices, classes)
    plt.yticks(indices, classes)
    plt.colorbar()
    plt.xlabel('label of test set sample')
    plt.ylabel('label of predict')
    plt.title('Confusion Matrix', fontsize='10', fontproperties='arial')
Exemplo n.º 9
0
                                       test_size=0.3,
                                       random_state=56)
df_train = df_notnull.loc[train_idx]
features_train = features.loc[train_idx]
target_train = target.loc[train_idx]
df_test = df_notnull.loc[test_idx]
features_test = features.loc[test_idx]
target_test = target.loc[test_idx]

#%% train logistic classifier
classifier = LogisticRegression()
classifier.fit(features_train, target_train)

#%% score on train
scores_train = classifier.predict_proba(features_train)[:, 1]
(tp_train, fp_train, tsh) = plot_roc(target_train, scores_train)

#%% score on test
scores_test = classifier.predict_proba(features_test)[:, 1]
(tp_test, fp_test, tsh) = plot_roc(target_test, scores_test)

#%% plot treshold values
pd.DataFrame(nested_to_list(zip(tsh, tp_test, fp_test, fp_test - tp_test)),
             columns=[
                 'Threshold', 'True Positive Rate', 'False Positive Rate',
                 'Difference'
             ]).plot(x='Threshold')
plt.xlim(0, 0.4)
plt.ylim([0, 1])
plt.grid()
plt.show()
Exemplo n.º 10
0
try:
    os.mkdir('plots')
except OSError as error:
    print(error)

# please provide the path to your training and testing datasets
training = pd.read_csv("...")
test = pd.read_csv("....")

complications = [
    'Elevated_troponin', 'Elevated_d-dimer', 'Elevated_Amino', 'Elevated_IL6',
    'SBI', 'AKI', 'ARDS'
]

framework_train = apply_stratified_framework(training, complications)
framework_test = apply_stratified_framework(test, complications)

targets = get_targets(complications)
train_columns = [x for x in training.columns if x not in targets]

models_all, trainsets, classifers = get_models(complications, framework_train,
                                               train_columns)

true_ouctomes, predicted_ouctomes = get_results(framework_test, complications,
                                                models_all, train_columns)

plot_roc(complications, true_ouctomes, predicted_ouctomes)

plot_PRC(complications, true_ouctomes, predicted_ouctomes)