示例#1
0
def get_confusion_tensor(true_labels_tensor, predicted_labels_tensor, n_dim):
    """Creates a confusion matrix for each feature and returns it as an array with shape (n_features, 2, 2)"""
    confusion_tensor = np.zeros(shape=(n_dim, 2, 2))
    for feature_idx in range(n_dim):
        confusion_tensor[feature_idx] = sklearn_confusion_matrix(
            true_labels_tensor, predicted_labels_tensor, labels=[0, 1])
    return confusion_tensor
  def evaluate(self, dataset):
      predictions = self.predict(dataset[:,0])
      confusion_matrix = sklearn_confusion_matrix(dataset[:,1], predictions, labels=self.__classes)

      precisions = []
      recalls = []
      accuracies = []

      for gender in self.__classes:
          idx = self.__classes_indexes[gender]
          precision = 1
          recall = 1
          if np.sum(confusion_matrix[idx,:]) > 0:
              precision = confusion_matrix[idx][idx]/np.sum(confusion_matrix[idx,:])
          if np.sum(confusion_matrix[:, idx]) > 0:
              recall = confusion_matrix[idx][idx]/np.sum(confusion_matrix[:, idx])
          precisions.append(precision)
          recalls.append(recall)

      precision = np.mean(precisions)
      recall = np.mean(recalls)
      f1 = (2*(precision*recall))/float(precision+recall)
      accuracy = np.sum(confusion_matrix.diagonal())/float(np.sum(confusion_matrix))

      return precision, recall, accuracy, f1
示例#3
0
def confusion_matrix(y_true, y_pred, target_names, normalize=False):
    if any((val is None for val in (y_true, y_pred))):
        raise ValueError("y_true and y_pred are needed to plot confusion "
                         "matrix")

    # calculate how many names you expect
    values = set(list(y_true)).union(set(list(y_pred)))
    expected_len = len(values)
    if target_names is not None:
        len_target = len(target_names)
    if target_names is not None and (expected_len != len_target):
        raise ValueError(
            ('Data cointains {} different values, but target'
             ' names contains {} values.'.format(expected_len,
                                                 len(target_names))))

    # if the user didn't pass target_names, create generic ones
    if target_names is not None:
        values = list(values)
        values.sort()
        target_names = ['Class {}'.format(v) for v in values]

    cm = sklearn_confusion_matrix(y_true, y_pred)

    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
    return cm
示例#4
0
def confusion_matrix(predictions, labels, columns=None):
    if columns is None:
        columns = predictions.classALeRCE.unique()
    matrix = sklearn_confusion_matrix(labels.classALeRCE,
                                      predictions.classALeRCE,
                                      labels=columns)
    confusion_matrix_df = pd.DataFrame(matrix, columns=columns, index=columns)
    return confusion_matrix_df
示例#5
0
def confusion_matrix(y_pred: Tensor, y_true: Tensor, **kwargs):
    try:
        y_pred = np.argmax(y_pred.detach().cpu().numpy(), axis=1)
        return sklearn_confusion_matrix(
            y_true.detach().cpu().numpy(), y_pred, labels=kwargs.get("labels"),
        )
    except Exception as e:
        logger.error(e)
示例#6
0
def confusion_matrix(y_test, y_pred):
    logger.info('generating confusion matrix')
    cm = sklearn_confusion_matrix(y_test, y_pred)
    plt.matshow(cm)
    plt.title('Confusion matrix')
    plt.colorbar()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.show()
示例#7
0
def confusion_matrix(y_test, y_pred):
    logger.info('generating confusion matrix')
    cm = sklearn_confusion_matrix(y_test, y_pred)
    plt.matshow(cm)
    plt.title('Confusion matrix')
    plt.colorbar()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.show()
示例#8
0
 def np_to_conf_mat(self):
     conf_mat = sklearn_confusion_matrix(self.np_label, self.np_pred)
     temp_classes = np.array(list(set(self.np_label)))
     temp_classes.sort()
     classes = [str(i) for i in temp_classes]
     self.conf_mat = conf_mat
     self.classes = classes
     if self.bin_data:
         self.classes = self.intervals
     return conf_mat, classes
示例#9
0
def confusion_matrix(y_pred: np.array, y: np.array, padding: int) -> np.array:
    """ Function that creates a confusion matrix using the Wikipedia convention for the axis. 
    :param y_pred: predicted tags.
    :param y: the ground-truth tags.
    :param padding: padding index to be ignored.
    
    Returns:
        - Confusion matrix for all the labels + padding label."""
    y_pred = np.ma.masked_array(data=y_pred,
                                mask=(y == padding)).filled(padding)
    return sklearn_confusion_matrix(y_pred, y)
示例#10
0
def get_detection_metrics(true_labels, predicted_labels):
    """Calculates tp, fp, fn, and tn from a confusion matrix, then get precision, recall, and acc"""
    tn, fp, fn, tp = sklearn_confusion_matrix(true_labels,
                                              predicted_labels,
                                              labels=[0, 1]).flatten()
    precision = tp / (tp + fp)
    recall = tp / (tp + fn)
    acc = (tp + tn) / (true_labels.shape[0])
    confusion_matrix = np.array([[tn, fp], [fn, tp]])
    return {
        'tn': tn,
        'fp': fp,
        'fn': fn,
        'tp': tp,
        'precision': precision,
        'recall': recall,
        'acc': acc,
        'confusion_matrix': confusion_matrix
    }
示例#11
0
def get_localization_metrics(true_labels_tensor, predicted_labels_tensor,
                             n_dim):
    """Creates a confusion matrix for each feature as an array with shape (n_features, 2, 2),
    then calculates the micro-precision and micro-recall and returns as a dict"""
    confusion_tensor = np.zeros(shape=(n_dim, 2, 2))
    for feature_idx in range(n_dim):
        confusion_tensor[feature_idx] = sklearn_confusion_matrix(
            true_labels_tensor[feature_idx],
            predicted_labels_tensor[feature_idx],
            labels=[0, 1])
    # here we will sum along the feature axis of the confusion_tensor to get the micro-precision and recall
    tn, fp, fn, tp = confusion_tensor.sum(axis=0).flatten()
    micro_precision = tp / (tp + fp)
    micro_recall = tp / (tp + fn)
    return {
        'tn': tn,
        'fp': fp,
        'fn': fn,
        'tp': tp,
        'micro-precision': micro_precision,
        'micro-recall': micro_recall,
        'confusion_tensor': confusion_tensor
    }
示例#12
0
def confusion_matrix(y_true, y_predicted, normalize_method='true'):
    """Confusion matrix for binary and multiclass classification.

    Arguments:
        y_true (ww.DataColumn, pd.Series or np.ndarray): True binary labels.
        y_pred (ww.DataColumn, pd.Series or np.ndarray): Predictions from a binary classifier.
        normalize_method ({'true', 'pred', 'all', None}): Normalization method to use, if not None. Supported options are: 'true' to normalize by row, 'pred' to normalize by column, or 'all' to normalize by all values. Defaults to 'true'.

    Returns:
        pd.DataFrame: Confusion matrix. The column header represents the predicted labels while row header represents the actual labels.
    """
    y_true = _convert_to_woodwork_structure(y_true)
    y_predicted = _convert_to_woodwork_structure(y_predicted)
    y_true = _convert_woodwork_types_wrapper(y_true.to_series()).to_numpy()
    y_predicted = _convert_woodwork_types_wrapper(
        y_predicted.to_series()).to_numpy()
    labels = unique_labels(y_true, y_predicted)
    conf_mat = sklearn_confusion_matrix(y_true, y_predicted)
    conf_mat = pd.DataFrame(conf_mat, index=labels, columns=labels)
    if normalize_method is not None:
        return normalize_confusion_matrix(conf_mat,
                                          normalize_method=normalize_method)
    return conf_mat
示例#13
0
                    f'Current detection_results size: {detection_results.shape[1]} trials'
                )
                print('Removing trials which ran into an error')
                detection_results = detection_results[:, ~exception_array, :]
                time_list = time_list[~exception_array]
                global_truth = global_truth[~exception_array]
                detection = detection[~exception_array]
                print(
                    f'Detection_results size after drop: {detection_results.shape[1]} trials'
                )

            # Recording Attack Results
            confusion_tensor = np.zeros(shape=(n_dim, 2, 2))
            for feature_idx, feature_results in enumerate(detection_results):
                confusion_tensor[feature_idx] = sklearn_confusion_matrix(
                    feature_results[:, 1],
                    feature_results[:, 2],
                    labels=[0, 1])

            # overall detection confusion matrix
            global_detection_confusion_matrix = sklearn_confusion_matrix(
                global_truth, detection, labels=[0, 1])
            # Plotting results
            # fig, axes = plt.subplots(sqrtn, sqrtn)
            # axes_flat = axes.flatten()
            # for feature, axis in enumerate(axes_flat):
            #     names = ['TN', 'FP', 'FN', 'TP']
            #     counts = confusion_tensor[feature].astype(np.int).flatten()
            #     labels = [f'{n}\n{c}' for n, c in zip(names, counts)]
            #     labels = np.array(labels).reshape(2, 2)
            #     sn.heatmap(confusion_tensor[feature].astype(np.int), annot=labels, fmt='', xticklabels=False,
            #                yticklabels=False, linewidth=.5, cbar=False, ax=axis, cmap='Blues')
示例#14
0
文件: plots.py 项目: AlonMaor14/mlrun
def eval_model_v2(
    context,
    xtest,
    ytest,
    model,
    pcurve_bins: int = 10,
    pcurve_names: List[str] = ["my classifier"],
    plots_artifact_path: str = "",
    pred_params: dict = {},
    cmap="Blues",
):
    """generate predictions and validation stats

    pred_params are non-default, scikit-learn api prediction-function
    parameters. For example, a tree-type of model may have a tree depth
    limit for its prediction function.

    :param xtest:        features array type Union(DataItem, DataFrame,
                         numpy array)
    :param ytest:        ground-truth labels Union(DataItem, DataFrame,
                         Series, numpy array, List)
    :param model:        estimated model
    :param pcurve_bins:  (10) subdivide [0,1] interval into n bins, x-axis
    :param pcurve_names: label for each calibration curve
    :param pred_params:  (None) dict of predict function parameters
    :param cmap:         ('Blues') matplotlib color map
    """

    import numpy as np

    def df_blob(df):
        return bytes(df.to_csv(index=False), encoding="utf-8")

    if isinstance(ytest, np.ndarray):
        unique_labels = np.unique(ytest)
    elif isinstance(ytest, list):
        unique_labels = set(ytest)
    else:
        try:
            ytest = ytest.values
            unique_labels = np.unique(ytest)
        except Exception as exc:
            raise Exception(f"unrecognized data type for ytest {exc}")

    n_classes = len(unique_labels)
    is_multiclass = True if n_classes > 2 else False

    # INIT DICT...OR SOME OTHER COLLECTOR THAT CAN BE ACCESSED
    plots_path = plots_artifact_path or context.artifact_subpath("plots")
    extra_data = {}

    ypred = model.predict(xtest, **pred_params)
    context.log_results({
        "accuracy": float(metrics.accuracy_score(ytest, ypred)),
        "test-error": np.sum(ytest != ypred) / ytest.shape[0],
    })

    # PROBABILITIES
    if hasattr(model, "predict_proba"):
        yprob = model.predict_proba(xtest, **pred_params)
        if not is_multiclass:
            fraction_of_positives, mean_predicted_value = calibration_curve(
                ytest, yprob[:, -1], n_bins=pcurve_bins, strategy="uniform")
            cmd = plot_calibration_curve(ytest, [yprob], pcurve_names)
            calibration = context.log_artifact(
                PlotArtifact(
                    "probability-calibration",
                    body=cmd.get_figure(),
                    title="probability calibration plot",
                ),
                artifact_path=plots_path,
                db_key=False,
            )
            extra_data["probability calibration"] = calibration

    # CONFUSION MATRIX
    cm = sklearn_confusion_matrix(ytest, ypred, normalize="all")
    df = pd.DataFrame(data=cm)
    extra_data["confusion matrix table.csv"] = df_blob(df)

    cmd = metrics.plot_confusion_matrix(
        model,
        xtest,
        ytest,
        normalize="all",
        values_format=".2g",
        cmap=plt.get_cmap(cmap),
    )
    confusion = context.log_artifact(
        PlotArtifact(
            "confusion-matrix",
            body=cmd.figure_,
            title="Confusion Matrix - Normalized Plot",
        ),
        artifact_path=plots_path,
        db_key=False,
    )
    extra_data["confusion matrix"] = confusion

    # LEARNING CURVES
    if hasattr(model, "evals_result"):
        results = model.evals_result()
        train_set = list(results.items())[0]
        valid_set = list(results.items())[1]

        learning_curves_df = None
        if is_multiclass:
            if hasattr(train_set[1], "merror"):
                learning_curves_df = pd.DataFrame({
                    "train_error":
                    train_set[1]["merror"],
                    "valid_error":
                    valid_set[1]["merror"],
                })
        else:
            if hasattr(train_set[1], "error"):
                learning_curves_df = pd.DataFrame({
                    "train_error":
                    train_set[1]["error"],
                    "valid_error":
                    valid_set[1]["error"],
                })

        if learning_curves_df:
            extra_data["learning curve table.csv"] = df_blob(
                learning_curves_df)

            _, ax = plt.subplots()
            plt.xlabel("# training examples")
            plt.ylabel("error rate")
            plt.title("learning curve - error")
            ax.plot(learning_curves_df["train_error"], label="train")
            ax.plot(learning_curves_df["valid_error"], label="valid")
            learning = context.log_artifact(
                PlotArtifact("learning-curve",
                             body=plt.gcf(),
                             title="Learning Curve - erreur"),
                artifact_path=plots_path,
                db_key=False,
            )
            extra_data["learning curve"] = learning

    # FEATURE IMPORTANCES
    if hasattr(model, "feature_importances_"):
        (fi_plot, fi_tbl) = feature_importances(model, xtest.columns)
        extra_data["feature importances"] = context.log_artifact(
            fi_plot, db_key=False, artifact_path=plots_path)
        extra_data["feature importances table.csv"] = df_blob(fi_tbl)

    # AUC - ROC - PR CURVES
    if is_multiclass:
        lb = LabelBinarizer()
        ytest_b = lb.fit_transform(ytest)

        extra_data["precision_recall_multi"] = context.log_artifact(
            precision_recall_multi(ytest_b, yprob, unique_labels),
            artifact_path=plots_path,
            db_key=False,
        )
        extra_data["roc_multi"] = context.log_artifact(
            roc_multi(ytest_b, yprob, unique_labels),
            artifact_path=plots_path,
            db_key=False,
        )

        # AUC multiclass
        aucmicro = metrics.roc_auc_score(ytest_b,
                                         yprob,
                                         multi_class="ovo",
                                         average="micro")
        aucweighted = metrics.roc_auc_score(ytest_b,
                                            yprob,
                                            multi_class="ovo",
                                            average="weighted")

        context.log_results({
            "auc-micro": aucmicro,
            "auc-weighted": aucweighted
        })

        # others (todo - macro, micro...)
        f1 = metrics.f1_score(ytest, ypred, average="macro")
        ps = metrics.precision_score(ytest, ypred, average="macro")
        rs = metrics.recall_score(ytest, ypred, average="macro")
        context.log_results({
            "f1-score": f1,
            "precision_score": ps,
            "recall_score": rs
        })

    else:
        yprob_pos = yprob[:, 1]
        extra_data["precision_recall_bin"] = context.log_artifact(
            precision_recall_bin(model, xtest, ytest, yprob_pos),
            artifact_path=plots_path,
            db_key=False,
        )
        extra_data["roc_bin"] = context.log_artifact(
            roc_bin(ytest, yprob_pos, clear=True),
            artifact_path=plots_path,
            db_key=False,
        )

        rocauc = metrics.roc_auc_score(ytest, yprob_pos)
        brier_score = metrics.brier_score_loss(ytest,
                                               yprob_pos,
                                               pos_label=ytest.max())
        f1 = metrics.f1_score(ytest, ypred)
        ps = metrics.precision_score(ytest, ypred)
        rs = metrics.recall_score(ytest, ypred)
        context.log_results({
            "rocauc": rocauc,
            "brier_score": brier_score,
            "f1-score": f1,
            "precision_score": ps,
            "recall_score": rs,
        })

    # return all model metrics and plots
    return extra_data
示例#15
0
def __compute_unnormalized_confusion_matrices_for_label_group(
    true_label_idx: List[Set[int]],
    predicted_label_idx: List[Set[int]],
    label_group: LabelGroup,
    task_labels: List[LabelEntity],
) -> MatrixMetric:
    """
    Returns matrix metric for a certain label group

    :param true_label_idx:
    :param predicted_label_idx:
    :param label_group:
    :param task_labels:
    """
    map_task_labels_idx_to_group_idx = {
        task_labels.index(label): i_group
        for i_group, label in enumerate(label_group.labels)
    }
    set_group_labels_idx = set(map_task_labels_idx_to_group_idx.keys())
    group_label_names = [
        task_labels[label_idx].name for label_idx in set_group_labels_idx
    ]

    if len(group_label_names) == 1:
        # Single-class
        # we use "not" to make presence of a class to be at index 0, while the absence of it at index 1
        y_true = [
            int(not set_group_labels_idx.issubset(true_labels))
            for true_labels in true_label_idx
        ]
        y_pred = [
            int(not set_group_labels_idx.issubset(pred_labels))
            for pred_labels in predicted_label_idx
        ]
        group_label_names += [f"~ {group_label_names[0]}"]
        column_labels = group_label_names.copy()
        remove_last_row = False
    else:
        # Multiclass
        undefined_idx = len(group_label_names)  # to define missing value

        # find the intersections between GT and task labels, and Prediction and task labels
        true_intersections = [
            true_labels.intersection(set_group_labels_idx)
            for true_labels in true_label_idx
        ]
        pred_intersections = [
            pred_labels.intersection(set_group_labels_idx)
            for pred_labels in predicted_label_idx
        ]

        # map the intersection to 0-index value
        y_true = [
            map_task_labels_idx_to_group_idx[list(true_intersection)[0]]
            if len(true_intersection) != 0 else undefined_idx
            for true_intersection in true_intersections
        ]
        y_pred = [
            map_task_labels_idx_to_group_idx[list(pred_intersection)[0]]
            if len(pred_intersection) != 0 else undefined_idx
            for pred_intersection in pred_intersections
        ]

        column_labels = group_label_names.copy()
        column_labels.append("Other")
        remove_last_row = True

    matrix_data = sklearn_confusion_matrix(y_true,
                                           y_pred,
                                           labels=list(
                                               range(len(column_labels))))
    if remove_last_row:
        # matrix clean up
        matrix_data = np.delete(matrix_data, -1, 0)
        if sum(matrix_data[:, -1]) == 0:
            # if none of the GT is classified as classes from other groups, clean it up too
            matrix_data = np.delete(matrix_data, -1, 1)
            column_labels.remove(column_labels[-1])

    # Use unnormalized matrix for statistics computation (accuracy, precision, recall)
    return MatrixMetric(
        name=f"{label_group.name}",
        matrix_values=matrix_data,
        row_labels=group_label_names,
        column_labels=column_labels,
        normalize=False,
    )
示例#16
0
def maskrcnn_eval_ycb_video(model, test_loader):
    print('\nevaluating MaskRCNN ..')

    model.eval()

    # Init folders.
    if not os.path.exists(config.YCB_TEST_SAVE_FOLDER):
        os.makedirs(config.YCB_TEST_SAVE_FOLDER)

    gt_pred_images = glob.glob(config.YCB_TEST_SAVE_FOLDER + '*')
    for images in gt_pred_images:
        os.remove(images)

    APs = []
    gt_obj_ids_list, pred_obj_ids_list = [], []
    for image_idx, (images, targets) in enumerate(test_loader):
        image, target = copy.deepcopy(images), copy.deepcopy(targets)
        images = list(image.to(config.DEVICE) for image in images)

        with torch.no_grad():
            outputs = model(images)
            outputs = [{k: v.to(config.CPU_DEVICE)
                        for k, v in t.items()} for t in outputs]

        # Formatting input.
        image = image[0]
        image = image.to(config.CPU_DEVICE)
        image = np.squeeze(np.array(image)).transpose(1, 2, 0)
        image = np.array(image * (2**8 - 1), dtype=np.uint8)
        H, W, C = image.shape

        # Formatting targets.
        target = target[0]
        target = {k: v.to(config.CPU_DEVICE) for k, v in target.items()}
        target = ycb_video_dataset_utils.format_target_data(
            image.copy(), target.copy())
        gt_obj_ids = np.array(target['obj_ids'], dtype=np.int32).flatten()
        gt_obj_boxes = np.array(target['obj_boxes'],
                                dtype=np.int32).reshape(-1, 4)
        gt_obj_binary_masks = np.array(target['obj_binary_masks'],
                                       dtype=np.uint8).reshape(-1, H, W)

        # format outputs.
        outputs = outputs.pop()
        outputs = maskrcnn_format_outputs(image.copy(), outputs.copy())
        outputs = maskrcnn_threshold_outputs(image.copy(), outputs.copy())
        matched_outputs = maskrcnn_match_pred_to_gt(image.copy(),
                                                    target.copy(),
                                                    outputs.copy())
        scores = np.array(matched_outputs['scores'],
                          dtype=np.float32).flatten()
        obj_ids = np.array(matched_outputs['obj_ids'],
                           dtype=np.int32).flatten()
        obj_boxes = np.array(matched_outputs['obj_boxes'],
                             dtype=np.int32).reshape(-1, 4)
        obj_binary_masks = np.array(matched_outputs['obj_binary_masks'],
                                    dtype=np.uint8).reshape(-1, H, W)

        # confusion matrix.
        gt_obj_ids_list.extend(gt_obj_ids.tolist())
        pred_obj_ids_list.extend(obj_ids.tolist())

        # get average precision.
        AP = compute_ap_range(
            gt_class_id=gt_obj_ids,
            gt_box=gt_obj_boxes,
            gt_mask=gt_obj_binary_masks.reshape(H, W, -1),
            pred_score=scores,
            pred_class_id=obj_ids,
            pred_box=obj_boxes,
            pred_mask=obj_binary_masks.reshape(H, W, -1),
            verbose=False,
        )
        APs.append(AP)

        # create masks.
        pred_obj_mask = ycb_video_dataset_utils.get_segmentation_masks(
            image=image, obj_ids=obj_ids, binary_masks=obj_binary_masks)

        # save masks.
        gt_name = config.YCB_TEST_SAVE_FOLDER + str(
            image_idx) + config.TEST_GT_EXT
        cv2.imwrite(gt_name, target['obj_mask'])

        pred_name = config.YCB_TEST_SAVE_FOLDER + str(
            image_idx) + config.TEST_PRED_EXT
        cv2.imwrite(pred_name, pred_obj_mask)

    # Confusion Matrix.
    cm = sklearn_confusion_matrix(y_true=gt_obj_ids_list,
                                  y_pred=pred_obj_ids_list)
    print(f'\n{cm}')

    # mAP
    mAP = np.mean(APs)
    print(f'\nmAP: {mAP:.5f}')

    # getting Fwb
    os.chdir(config.MATLAB_SCRIPTS_DIR)
    import matlab.engine
    eng = matlab.engine.start_matlab()
    Fwb = eng.evaluate_arl_affpose_maskrcnn(config.ARL_TEST_SAVE_FOLDER,
                                            nargout=1)
    os.chdir(config.ROOT_DIR_PATH)

    model.train()
    return model, mAP, Fwb
def main():

    # if SAVE_AND_EVAL_PRED:
    #     # Init folders
    #     print('\neval in .. {}'.format(config.ARL_AFF_EVAL_SAVE_FOLDER))
    #
    #     if not os.path.exists(config.ARL_AFF_EVAL_SAVE_FOLDER):
    #         os.makedirs(config.ARL_AFF_EVAL_SAVE_FOLDER)
    #
    #     gt_pred_images = glob.glob(config.ARL_AFF_EVAL_SAVE_FOLDER + '*')
    #     for images in gt_pred_images:
    #         os.remove(images)

    # Load the Model.
    print()
    model = affnet.ResNetAffNet(pretrained=config.IS_PRETRAINED,
                                num_classes=config.NUM_CLASSES)
    model.to(config.DEVICE)

    # Load saved weights.
    print(
        f"\nrestoring pre-trained MaskRCNN weights: {config.RESTORE_ARL_AFFNET_WEIGHTS} .. "
    )
    checkpoint = torch.load(config.RESTORE_ARL_AFFNET_WEIGHTS,
                            map_location=config.DEVICE)
    model.load_state_dict(checkpoint["model"])
    model.eval()

    # Load the dataset.
    test_loader = arl_affpose_dataset_loaders.load_arl_affpose_eval_datasets(
        random_images=RANDOM_IMAGES,
        num_random=NUM_RANDOM,
        shuffle_images=SHUFFLE_IMAGES)

    # run the predictions.
    APs = []
    gt_obj_ids_list, pred_obj_ids_list = [], []
    for image_idx, (images, targets) in enumerate(test_loader):
        print(f'\nImage:{image_idx+1}/{len(test_loader)}')

        image, target = copy.deepcopy(images), copy.deepcopy(targets)
        images = list(image.to(config.DEVICE) for image in images)

        with torch.no_grad():
            outputs = model(images)
            outputs = [{k: v.to(config.CPU_DEVICE)
                        for k, v in t.items()} for t in outputs]

        # Formatting input.
        image = image[0]
        image = image.to(config.CPU_DEVICE)
        image = np.squeeze(np.array(image)).transpose(1, 2, 0)
        image = np.array(image * (2**8 - 1), dtype=np.uint8)
        H, W, C = image.shape

        # Formatting targets.
        target = target[0]
        target = {k: v.to(config.CPU_DEVICE) for k, v in target.items()}
        target = arl_affpose_dataset_utils.format_target_data(
            image.copy(), target.copy())
        gt_obj_ids = np.array(target['obj_ids'], dtype=np.int32).flatten()
        gt_obj_boxes = np.array(target['obj_boxes'],
                                dtype=np.int32).reshape(-1, 4)
        gt_obj_binary_masks = np.array(target['obj_binary_masks'],
                                       dtype=np.uint8).reshape(-1, H, W)

        # Formatting Output.
        outputs = outputs.pop()
        outputs = eval_utils.affnet_format_outputs(image.copy(),
                                                   outputs.copy())
        outputs = eval_utils.affnet_threshold_outputs(image.copy(),
                                                      outputs.copy())
        outputs = eval_utils.maskrcnn_match_pred_to_gt(image.copy(),
                                                       target.copy(),
                                                       outputs.copy())
        scores = np.array(outputs['scores'], dtype=np.float32).flatten()
        obj_ids = np.array(outputs['obj_ids'], dtype=np.int32).flatten()
        obj_boxes = np.array(outputs['obj_boxes'],
                             dtype=np.int32).reshape(-1, 4)
        obj_binary_masks = np.array(outputs['obj_binary_masks'],
                                    dtype=np.uint8).reshape(-1, H, W)
        aff_scores = np.array(outputs['aff_scores'],
                              dtype=np.float32).flatten()
        obj_part_ids = np.array(outputs['obj_part_ids'],
                                dtype=np.int32).flatten()
        aff_ids = np.array(outputs['aff_ids'], dtype=np.int32).flatten()
        aff_binary_masks = np.array(outputs['aff_binary_masks'],
                                    dtype=np.uint8).reshape(-1, H, W)

        # confusion matrix.
        gt_obj_ids_list.extend(gt_obj_ids.tolist())
        pred_obj_ids_list.extend(obj_ids.tolist())

        # for obj_binary_mask, gt_obj_binary_mask in zip(obj_binary_masks, gt_obj_binary_masks):
        #     cv2.imshow('obj_binary_mask', obj_binary_mask*20)
        #     cv2.imshow('gt_obj_binary_mask', gt_obj_binary_mask*20)
        #     cv2.waitKey(0)

        # get average precision.
        AP = eval_utils.compute_ap_range(
            gt_class_id=gt_obj_ids,
            gt_box=gt_obj_boxes,
            gt_mask=gt_obj_binary_masks.reshape(H, W, -1),
            pred_score=scores,
            pred_class_id=obj_ids,
            pred_box=obj_boxes,
            pred_mask=obj_binary_masks.reshape(H, W, -1),
            verbose=False,
        )
        APs.append(AP)

        # print outputs.
        for gt_idx, pred_idx in zip(range(len(gt_obj_ids)),
                                    range(len(obj_ids))):
            gt_obj_name = "{:<15}".format(
                arl_affpose_dataset_utils.map_obj_id_to_name(
                    gt_obj_ids[gt_idx]))
            pred_obj_name = "{:<15}".format(
                arl_affpose_dataset_utils.map_obj_id_to_name(
                    obj_ids[pred_idx]))
            score = scores[pred_idx]
            bbox_iou = eval_utils.get_iou(obj_boxes[pred_idx],
                                          gt_obj_boxes[gt_idx])

            print(
                f'GT: {gt_obj_name}',
                f'Pred: {pred_obj_name}'
                f'Score: {score:.3f},\t\t',
                f'IoU: {bbox_iou:.3f},',
            )
        print("AP @0.5-0.95: {:.5f}".format(AP))

        # visualize bbox.
        pred_bbox_img = arl_affpose_dataset_utils.draw_bbox_on_img(
            image=image, scores=scores, obj_ids=obj_ids, boxes=obj_boxes)

        # visualize affordance masks.
        pred_aff_mask = arl_affpose_dataset_utils.get_segmentation_masks(
            image=image,
            obj_ids=aff_ids,
            binary_masks=aff_binary_masks,
        )
        color_aff_mask = arl_affpose_dataset_utils.colorize_aff_mask(
            pred_aff_mask)
        color_aff_mask = cv2.addWeighted(pred_bbox_img, 0.5, color_aff_mask,
                                         0.5, 0)

        # get obj part mask.
        pred_obj_part_mask = arl_affpose_dataset_utils.get_obj_part_mask(
            image=image,
            obj_part_ids=obj_part_ids,
            aff_binary_masks=aff_binary_masks,
        )
        # visualize object masks.
        pred_obj_mask = arl_affpose_dataset_utils.convert_obj_part_mask_to_obj_mask(
            pred_obj_part_mask)
        color_obj_mask = arl_affpose_dataset_utils.colorize_obj_mask(
            pred_obj_mask)
        color_obj_mask = cv2.addWeighted(pred_bbox_img, 0.5, color_obj_mask,
                                         0.5, 0)

        if SAVE_AND_EVAL_PRED:
            # saving predictions.
            _image_idx = target["image_id"].detach().numpy()[0]
            _image_idx = str(1000000 + _image_idx)[1:]

            gt_name = config.ARL_AFF_EVAL_SAVE_FOLDER + _image_idx + config.TEST_GT_EXT
            pred_name = config.ARL_AFF_EVAL_SAVE_FOLDER + _image_idx + config.TEST_PRED_EXT
            obj_part_name = config.ARL_AFF_EVAL_SAVE_FOLDER + _image_idx + config.TEST_OBJ_PART_EXT

            cv2.imwrite(gt_name, target['aff_mask'])
            cv2.imwrite(pred_name, pred_aff_mask)
            cv2.imwrite(obj_part_name, pred_obj_part_mask)

        # show plot.
        if SHOW_IMAGES:
            cv2.imshow('pred_bbox',
                       cv2.cvtColor(pred_bbox_img, cv2.COLOR_BGR2RGB))
            cv2.imshow('pred_aff_mask',
                       cv2.cvtColor(color_aff_mask, cv2.COLOR_BGR2RGB))
            cv2.imshow('pred_obj_part_mask',
                       cv2.cvtColor(color_obj_mask, cv2.COLOR_BGR2RGB))
            cv2.waitKey(0)

    # Confusion Matrix.
    cm = sklearn_confusion_matrix(y_true=gt_obj_ids_list,
                                  y_pred=pred_obj_ids_list)
    print(f'\n{cm}')

    # Plot Confusion Matrix.
    # eval_utils.plot_confusion_matrix(cm, arl_affpose_dataset_utils.OBJ_NAMES)

    # mAP
    print("\nmAP @0.5-0.95: over {} test images is {:.3f}".format(
        len(APs), np.mean(APs)))

    if SAVE_AND_EVAL_PRED:
        print()
        # getting FwB.
        os.chdir(config.MATLAB_SCRIPTS_DIR)
        import matlab.engine
        eng = matlab.engine.start_matlab()
        Fwb = eng.evaluate_arl_affpose_affnet(config.ARL_AFF_EVAL_SAVE_FOLDER,
                                              nargout=1)
        os.chdir(config.ROOT_DIR_PATH)