Пример #1
0
def evaluate_roc(y_true, y_pred, method, plot=True):
    '''A quick helper for ad-hoc ROC analysis, more seasoned comparison later in R.'''
    fpr, tpr, thresholds = roc_curve(y_true, y_pred)

    roc_auc = auc(fpr, tpr)
    # best point on the ROC curve --> Youden's J
    J = tpr - fpr
    best_ind = np.argmax(J)
    best_threshold = thresholds[best_ind]

    print(f'Best threshold: < {np.round(best_threshold,3)} --> negative')

    # compute precision and recall at that threshold
    binarized = (y_pred >= best_threshold).astype(int)
    recall = recall_score(y_true, binarized)
    precision = precision_score(y_true, binarized)

    print(
        f'Recall = {np.round(recall,3)}, Precision = {np.round(precision,3)}')
    if plot:
        viz = RocCurveDisplay(fpr=fpr,
                              tpr=tpr,
                              roc_auc=roc_auc,
                              estimator_name=method)

        viz.plot()
        plt.show()

    print(f'AUC: {np.round(roc_auc,3)}')

    return best_threshold
Пример #2
0
def plot_roc(y_test_df, y_score, trained_pipeline):
    fpr, tpr, _ = roc_curve(y_test_df,
                            y_score,
                            pos_label=trained_pipeline.classes_[1])
    RocCurveDisplay(fpr=fpr, tpr=tpr).plot()
    plt.title(f"AUC: {roc_auc_score(y_test_df, y_score)}")
    plt.tight_layout()
    plt.savefig(os.path.join(pass_success_model_eval_dir, "roc.png"))
Пример #3
0
    def get_roc_curve(self, gt_index=0, pred_index=1, display=True, model_name="autopilot-model") :
            
        y = self._y()
        yh = self._yh()
        
        fpr, tpr, thresholds = roc_curve(y, yh)
        roc_auc = auc(fpr, tpr)

        viz = RocCurveDisplay(fpr=fpr, tpr=tpr, roc_auc=roc_auc, estimator_name=model_name) 

        if display :
            viz.plot()
            
        return viz, roc_auc, fpr, tpr, thresholds
Пример #4
0
    def plot(self, ax=None, figsize=(10, 5)):
        if ax is None:
            fig, ax = plt.subplots(1, 1, figsize=figsize)

        ax.set_title("ROC Curve")
        possible_colors = GeneralUtils.shuffled_colors()
        for class_index, label in enumerate(self.labels):
            fpr, tpr = self._roc_curve[label]['fpr'], self._roc_curve[label][
                'tpr']
            roc_auc = self.auc[label]
            viz = RocCurveDisplay(fpr=fpr,
                                  tpr=tpr,
                                  roc_auc=roc_auc,
                                  estimator_name='Classifier')

            viz.plot(ax=ax, name=label, color=possible_colors[class_index])

        plt.draw()
Пример #5
0
    def roc_curve(self, test_label=None, plot_type='test'):
        if test_label is not None:
            self.test_label = test_label
        if plot_type == 'test':
            predict = [self.y_pred]
            label = [self.test_label]
        elif plot_type == 'train':
            predict = [self.y_oof]
            label = [self.y_train]

        method_name = ['lgb']

        fig = plt.figure(figsize=(6, 6))
        ax = fig.add_subplot(1, 1, 1)
        for pred, label, method_name in zip(predict, label, method_name):
            fpr, tpr, thresholds = metrics.roc_curve(label, pred)
            auc = metrics.auc(fpr, tpr)
            roc_display = RocCurveDisplay(fpr=fpr,
                                          tpr=tpr,
                                          roc_auc=auc,
                                          estimator_name=method_name)
            roc_display.plot(ax=ax)
            ax.set_title('ROC curve : LightGBM', fontsize=16)
        plt.show()
y_pred = clf.predict(X_test)
cm = confusion_matrix(y_test, y_pred)

cm_display = ConfusionMatrixDisplay(cm, [0, 1]).plot()

#%% Create ROC
from sklearn.metrics import roc_curve, auc
from sklearn.metrics import RocCurveDisplay

y_score = clf.decision_function(X_test)
pos_label = clf.classes_[1]
fpr, tpr, _ = roc_curve(y_test, y_score, pos_label=pos_label)
AUC = auc(fpr, tpr)
roc_display = RocCurveDisplay(fpr=fpr,
                              tpr=tpr,
                              roc_auc=AUC,
                              estimator_name='demo').plot()

#%% Create PR
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import average_precision_score
from sklearn.metrics import PrecisionRecallDisplay

pos_label = clf.classes_[1]
prec, recall, _ = precision_recall_curve(y_test, y_score, pos_label=pos_label)
# alternative AUCpr (~AP), with a different computing method
AP = average_precision_score(y_test, y_score, pos_label=pos_label)
pr_display = PrecisionRecallDisplay(precision=prec,
                                    recall=recall,
                                    average_precision=AP,
                                    estimator_name='demo').plot()
Пример #7
0
def predict(x_test, y_test, modal_name):
    from sklearn.metrics import confusion_matrix, roc_curve, auc, RocCurveDisplay
    import pandas as pd
    import seaborn as sn

    modal_name = modal_name[0] + '_' + modal_name[1] + '_' + modal_name[2]

    if mulmonet:
        # split modals
        x_test1 = x_test[:, :, :, :3]
        x_test2 = x_test[:, :, :, 3:6]
        x_test3 = x_test[:, :, :, 6:9]
    else:
        # stack
        x_test1 = np.zeros_like(x_test[:, :, :, :3])
        x_test1[:, :, :, 0] = x_test[:, :, :, 0]
        x_test1[:, :, :, 1] = x_test[:, :, :, 3]
        x_test1[:, :, :, 2] = x_test[:, :, :, 6]

    # load model and weights
    model = make_mulmoNet_vgg16(IMAGE_SIZE, IMAGE_SIZE, 3, 1)
    model.summary()
    model.load_weights(PATH + EXPORT_FOLDER + modal_name +
                       '/weight_checkpoint.hdf5')

    # prediction
    y_pred = model.predict([x_test1, x_test2, x_test3], BATCH_SIZE)
    y_pred_binary = np.rint(y_pred).astype(int)

    # confusion matrix
    confusion_matrix = confusion_matrix(y_test, y_pred_binary)
    print(confusion_matrix)

    df_cm = pd.DataFrame(confusion_matrix, range(2), range(2))
    sn.set(font_scale=1.4)  # for label size
    sn.heatmap(df_cm, annot=True, annot_kws={"size": 16})  # font size
    plt.savefig(PATH + EXPORT_FOLDER + modal_name + '/confusion_matrix.png')
    plt.clf()

    fpr, tpr, thresholds = roc_curve(y_test, y_pred)
    roc_auc = auc(fpr, tpr)
    display = RocCurveDisplay(fpr=fpr,
                              tpr=tpr,
                              roc_auc=roc_auc,
                              estimator_name=modal_name)
    display.plot()
    plt.savefig(PATH + EXPORT_FOLDER + modal_name + '/ruc_curve.png')

    y_test = np.concatenate(y_test)
    with open(PATH + EXPORT_FOLDER + modal_name + '/results.txt', 'w') as f:
        f.write('TP: ' + str(confusion_matrix[1, 1]))
        f.write('\n')
        f.write('FP: ' + str(confusion_matrix[0, 1]))
        f.write('\n')
        f.write('FN: ' + str(confusion_matrix[1, 0]))
        f.write('\n')
        f.write('TN: ' + str(confusion_matrix[0, 0]))

    try:
        os.makedirs(PATH + EXPORT_FOLDER + modal_name + '/results')
    except (FileExistsError):
        print('folders exist')

    # GRAD CAM
    for i, y in enumerate(y_pred):
        img1 = x_test1[i]
        img2 = x_test1[i, :, :, 1]
        img3 = x_test1[i, :, :, 2]

        image_array1 = np.expand_dims(img1, axis=0)
        image_array2 = np.expand_dims(img2, axis=0)
        image_array3 = np.expand_dims(img3, axis=0)
        cam1 = make_gradcam_heatmap([image_array1, image_array2, image_array3],
                                    1, model, 'block5_conv3_m1', 0)
        cam2 = make_gradcam_heatmap([image_array1, image_array2, image_array3],
                                    2, model, 'block5_conv3_m2', 0)
        cam3 = make_gradcam_heatmap([image_array1, image_array2, image_array3],
                                    3, model, 'block5_conv3_m3', 0)

        # f, axarr = plt.subplots(3, 2)
        # axarr[0, 0].imshow(utils.denormalize_x(img1), cmap='gray', vmin=0, vmax=255)
        # axarr[0, 1].imshow(cam1)
        # axarr[1, 0].imshow(utils.denormalize_x(img2), cmap='gray', vmin=0, vmax=255)
        # axarr[1, 1].imshow(cam2)
        # axarr[2, 0].imshow(utils.denormalize_x(img3), cmap='gray', vmin=0, vmax=255)
        # axarr[2, 1].imshow(cam3)

        # plt.title(test_names[i])
        # plt.show()

        correct = 1 if y_pred_binary[i] == y_test[i] else 0

        img1 = (img1 + 1) * 127.5
        img2 = (img2 + 1) * 127.5
        img3 = (img3 + 1) * 127.5
        # save_image = np.concatenate((np.repeat(np.expand_dims(img1[:, :, 0], axis=-1), 3, axis=-1),
        #                             np.repeat(np.expand_dims(img1[:, :, 1], axis=-1), 3, axis=-1),
        #                             np.repeat(np.expand_dims(img1[:, :, 2], axis=-1), 3, axis=-1),
        #                             cam1), axis=1)
        save_image1 = np.concatenate((img1, cam1), axis=1)
        save_image2 = np.concatenate((img2, cam2), axis=1)
        save_image3 = np.concatenate((img3, cam3), axis=1)
        save_image = np.concatenate((save_image1, save_image2), axis=0)
        save_image = np.concatenate((save_image, save_image3), axis=0)

        # save_image = np.concatenate((cam1, cam2, cam3), axis=1)

        cv2.imwrite(
            PATH + EXPORT_FOLDER + modal_name + '/results/' + str(i) + '_' +
            str(correct) + str(np.round(y_pred[i], 2)) + '.png', save_image)
Пример #8
0
 torch.save(model,"model_deep_0.1")
 
 model=torch.load("model_deep_0.05")
 model.eval()
 losses=[]
 for index,trace in enumerate(testSet):
     reconstructed=model(trace)
     loss = loss_function(reconstructed, trace)
     #losses.append([index,loss.item()])
     losses.append(loss.item())
     
 #create the roc_curve
 from sklearn.metrics import RocCurveDisplay,auc,roc_curve
 losses_normalized = [(float(i)-min(losses))/(max(losses)-min(losses)) for i in losses]
 true_outliers=[1 if i in outlier_ids else 0 for i in range(len(losses))]
 fpr, tpr, thresholds = roc_curve(true_outliers, losses_normalized)
 roc_auc = auc(fpr, tpr)
 display = RocCurveDisplay(fpr=fpr, tpr=tpr, roc_auc=roc_auc,estimator_name='Deeplearning autoencoder')
 display.plot()
 
 
 import matplotlib.pyplot as plt
 fig = plt.Figure()
 plt.plot(displayS.fpr,displayS.tpr,label="Denoising autoencoders")
 plt.plot(display.fpr,display.tpr,label="Deep-Learning autoencoders")
 plt.legend()
 plt.show()
 
 
 losses.sort(key=lambda x: x[1])
 accuracy=sum([1 if i[0] in outlier_ids else 0 for i in losses[len(testSet)-len(outlier_ids):]])
Пример #9
0
model.eval()
losses=[]
for index,trace in enumerate(testSet):
    reconstructed=model(trace)
    loss = loss_function(reconstructed, trace)
    losses.append(loss.item())
    

losses_normalizedD = [(float(i)-min(lossesD))/(max(lossesD)-min(lossesD)) for i in lossesD]
true_outliers=[1 if i in r else 0 for i in range(len(lossesD))]
fprD, tprD, thresholdsD = roc_curve(true_outliers, losses_normalizedD)
roc_aucD = auc(fprD, tprD)
numbers=[random.randint(1,len(fprD)-2) for _ in range(8)]
fprD=[fprD[0]]+[fprD[i] for i in sorted(numbers)]+[fprD[-1]]
tprD=[tprD[0]]+[tprD[i] for i in sorted(numbers)]+[tprD[-1]]
displayD = RocCurveDisplay(fpr=fprD, tpr=tprD, roc_auc=roc_aucD,estimator_name='Denoising autoencoder')
losses_normalized = [(float(i)-min(losses))/(max(losses)-min(losses)) for i in losses]
true_outliers=[1 if i in r else 0 for i in range(len(losses))]
fpr, tpr, thresholds = roc_curve(true_outliers, losses_normalized)
roc_auc = auc(fpr, tpr)
numbers=[random.randint(1,len(fpr)-2) for _ in range(8)]
fpr=[fpr[0]]+[fpr[i] for i in sorted(numbers)]+[fpr[-1]]
tpr=[tpr[0]]+[tpr[i] for i in sorted(numbers)]+[tpr[-1]]
display = RocCurveDisplay(fpr=fpr, tpr=tpr, roc_auc=roc_auc,estimator_name='Deeplearning autoencoder')
topz=[]
with open('input/scores_topz_'+last+'.txt','r') as f:
    topz=[list(map(float,line.split(","))) for line in f][0]
topz_normalized = [(float(i)-min(topz))/(max(topz)-min(topz)) for i in topz]
fprz, tprz, thresholdsz = roc_curve(true_outliers, topz_normalized)
roc_aucz = auc(fprz, tprz)
numbers=[random.randint(1,len(fprz)-2) for _ in range(8)]
Пример #10
0
    losses = []
    for trace in testSet:
        reconstructed = modelS(trace)
        loss = loss_function(reconstructed, trace)
        losses.append(loss.item())
    m = mean(losses)
    std = stdev(losses)
    r = []
    with open("input/results_30_activities_10k_0.005_description", "r") as f:
        for line in f:
            r.append(int(line.split(",")[1]))
    outliers = []
    threshold = sorted(losses)[-len(r)]
    for i, x in enumerate(losses):
        if x >= threshold:
            outliers.append(i)

    #create the roc_curve
    from sklearn.metrics import RocCurveDisplay, auc, roc_curve
    losses_normalized = [(float(i) - min(losses)) / (max(losses) - min(losses))
                         for i in losses]
    true_outliers = [1 if i in r else 0 for i in range(len(losses))]
    fprS, tprS, thresholds = roc_curve(true_outliers, losses_normalized)
    roc_auc = auc(fprS, tprS)
    displayS = RocCurveDisplay(fpr=fprS,
                               tpr=tprS,
                               roc_auc=roc_auc,
                               estimator_name='Denoising autoencoder')
    displayS.plot()

    plt.plot(sorted(losses))
Пример #11
0
def score_syncnet(speaking_labels,
                  confidences,
                  multiplier,
                  roll=3,
                  rolltype="mean",
                  verbose=True,
                  plot=False,
                  min_max_scaling=False):
    """scores syncnet confidence against binary speaking labels with a per frame multiplier

    Args:
        speaking_labels (df): dataframe of labels for the given speaker
        confidences (df): raw syncnet output confidences
        multiplier (df): per frame multipler for confidences (after normalization)
        roll (int, optional): syncnet output can be improved with rolling the confidence, 
                                this specifies the length of the roll. Defaults to 3.
        rolltype (str, optional): type of roll. Defaults to "mean".
        verbose (bool, optional): controlls if variables are printed. Defaults to True.
        plot (bool, optional): controls if plots are created. Defaults to False.
        min_max_scaling (bool, optional): controls if [+/20] or min/max scaling is used. 
                                They are identical for auROC but not Acc Defaults to False.

    Returns:
        [tuple]: [metrics of interest]
    """
    if verbose:
        print()

    # Set dataframes to the same length
    max_len = min(speaking_labels.shape[0], confidences.shape[0],
                  multiplier.shape[0])

    speaking_labels = speaking_labels.iloc[:max_len].fillna(0).reset_index(
        drop=True)
    confidences = confidences.iloc[:max_len].fillna(0).reset_index(drop=True)
    multiplier = multiplier.iloc[:max_len].fillna(0).reset_index(drop=True)

    # Roll Syncnet output to stabalize it
    # a roll=1 is the same as rolltype='none'
    if rolltype != "none":
        if rolltype == "mean":
            confidences = confidences.rolling(roll).mean()
        if rolltype == "min":
            confidences = confidences.rolling(roll).min()
        if rolltype == "max":
            confidences = confidences.rolling(roll).max()
    else:
        print("not rolling")

    # ~Normalize Syncnet Probabilities around .5 and cap [0,1]
    denominator = max(float(-1 * confidences.min()), float(
        confidences.max())) * 2
    # print("confidences:", float(confidences.min()), float(confidences.max()))
    # print("multiplier:", float(multiplier.min()), float(multiplier.max()))
    original_probabilities = confidences / denominator + .5
    # print("original_probabilities:", float(original_probabilities.min()), float(original_probabilities.max()))

    probabilities = original_probabilities * multiplier.values
    probabilities[probabilities >= 1] = 1
    probabilities[probabilities <= 0] = 0

    # Get predictions from probabilities
    predictions = probabilities.copy()
    predictions[predictions >= .5] = 1
    predictions[predictions < .5] = 0
    # print("predictions:", float(predictions.min()), float(predictions.max()))

    predictions = predictions.fillna(0).reset_index(drop=True)
    probabilities = probabilities.fillna(0).reset_index(drop=True)

    if min_max_scaling:
        mms_predictions = (confidences - confidences.min()) / (
            confidences.max() - confidences.min())
        norm_thresh = (-confidences.min()) / (confidences.max() -
                                              confidences.min())

        mms_predictions = mms_predictions * multiplier.values
        mms_predictions[mms_predictions >= norm_thresh] = 1
        mms_predictions[mms_predictions < norm_thresh] = 0
        # print(probabilities, confidences)
        # print(predictions, mms_predictions)
        assert predictions.equals(
            mms_predictions
        ), f"Should be equal\n{predictions==mms_predictions}"

    # print("probabilities:", float(probabilities.min()), float(probabilities.max()))
    auROC, AP, report, optimal_threshold, report2 = get_all_metrics(
        speaking_labels.values, probabilities.values, predictions.values,
        verbose, plot)

    if plot:
        print("multiplier")
        multiplier.plot(figsize=(20, 3))
        plt.show()
        print("labels")
        speaking_labels.plot(figsize=(20, 3))
        plt.show()
        print("predictions")
        predictions.plot(figsize=(20, 3))
        plt.show()
        print("probabilities")
        probabilities.plot(figsize=(20, 3))
        plt.show()

        sns.heatmap(confusion_matrix(speaking_labels.values,
                                     predictions.values),
                    annot=True)
        plt.show()

        fpr, tpr, thresholds = roc_curve(speaking_labels.values,
                                         probabilities.values)
        RocCurveDisplay(fpr=fpr, tpr=tpr, roc_auc=auc(fpr, tpr)).plot()
        plt.show()

    return auROC, AP, report["accuracy"], report["weighted avg"][
        "f1-score"], optimal_threshold, report2["accuracy"], report2[
            "weighted avg"]["f1-score"]
Пример #12
0
def create_visualization(model, X_test, Y_test, title="Model Metrics"):
    """Compute the metrics and creates the figures used for model visualization.

    Parameters
    ----------
    model: The trained model to get the metrics from

    X_test: Data used to evaluate the model

    Y_test: Data labels for the evaluation data

    title: Title of the figure on which visualizations will be drawn
    """
    try:
        Y_probabilities = model.predict_proba(X_test)
    except AttributeError:
        # Must be an SVM w/o probability=True
        Y_probabilities = model.decision_function(X_test)

    figure, axes = plt.subplots(ncols=3, figsize=(16, 5))
    figure.suptitle(title)

    # Plot confusion matrix
    labels = unique_labels(Y_test)
    plot_confusion_matrix(model,
                          X_test,
                          Y_test,
                          ax=axes[0],
                          normalize="true",
                          labels=labels)

    # Plot precision-recall and ROC curve for each class
    for index, class_label in enumerate(labels):
        # Plot precision-recall curve
        precision, recall, _ = precision_recall_curve(Y_test,
                                                      Y_probabilities[:,
                                                                      index],
                                                      pos_label=class_label)

        name = f"class {int(class_label)}"
        viz = PrecisionRecallDisplay(precision=precision,
                                     recall=recall,
                                     estimator_name=name)
        viz.plot(ax=axes[1], name=name)

        # Plot ROC curve
        fpr, tpr, _ = roc_curve(Y_test,
                                Y_probabilities[:, index],
                                pos_label=class_label)
        roc_auc = auc(fpr, tpr)

        viz = RocCurveDisplay(fpr=fpr,
                              tpr=tpr,
                              roc_auc=roc_auc,
                              estimator_name=name)
        viz.plot(ax=axes[2], name=name)

    precisions, recalls, fscores, supports = precision_recall_fscore_support(
        Y_test, model.predict(X_test))

    for index, (precision, recall, fscore, support) in enumerate(
            zip(precisions, recalls, fscores, supports)):
        print(
            f"class {index} - precision: {precision:0.4f}, recall: {recall:0.4f}",
            f"fscore: {fscore:0.4f}, support: {support}",
        )