Пример #1
0
def _daal_roc_auc_score(y_true, y_score, *, average="macro", sample_weight=None,
                  max_fpr=None, multi_class="raise", labels=None):
    y_type = _daal_type_of_target(y_true)
    y_true = check_array(y_true, ensure_2d=False, dtype=None)
    y_score = check_array(y_score, ensure_2d=False)

    if y_type[0] == "multiclass" or (y_type[0] == "binary" and
                                  y_score.ndim == 2 and
                                  y_score.shape[1] > 2):
        # do not support partial ROC computation for multiclass
        if max_fpr is not None and max_fpr != 1.:
            raise ValueError("Partial AUC computation not available in "
                             "multiclass setting, 'max_fpr' must be"
                             " set to `None`, received `max_fpr={0}` "
                             "instead".format(max_fpr))
        if multi_class == 'raise':
            raise ValueError("multi_class must be in ('ovo', 'ovr')")
        logging.info("sklearn.metrics.roc_auc_score: " + get_patch_message("sklearn"))
        result = multiclass_roc_auc_score(y_true, y_score, labels,
                                          multi_class, average, sample_weight)
    elif y_type[0] == "binary":
        labels = y_type[1]
        daal_use = max_fpr is None and sample_weight is None and len(labels) == 2
        if daal_use:
            logging.info("sklearn.metrics.roc_auc_score: " + get_patch_message("daal"))
            if not np.array_equal(labels, [0, 1]):
                y_true = label_binarize(y_true, classes=labels)[:, 0]
            result = d4p.daal_roc_auc_score(y_true.reshape(-1, 1), y_score.reshape(-1, 1))

        if not daal_use or result == -1:
            y_true = label_binarize(y_true, classes=labels)[:, 0]
            logging.info("sklearn.metrics.roc_auc_score: " + get_patch_message("sklearn"))
            if sklearn_check_version('0.22'):
                result = _average_binary_score(partial(_binary_roc_auc_score,
                                                       max_fpr=max_fpr),
                                               y_true, y_score, average,
                                               sample_weight=sample_weight)
            else:
                result = multiclass_roc_auc_score(y_true, y_score, average,
                                                  sample_weight=sample_weight,
                                                  max_fpr=max_fpr)
    else:
        logging.info("sklearn.metrics.roc_auc_score: " + get_patch_message("sklearn"))
        if sklearn_check_version('0.22'):
            result = _average_binary_score(partial(_binary_roc_auc_score,
                                                   max_fpr=max_fpr),
                                           y_true, y_score, average,
                                           sample_weight=sample_weight)
        else:
            result = multiclass_roc_auc_score(y_true, y_score, average,
                                              sample_weight=sample_weight,
                                              max_fpr=max_fpr)
    return result
Пример #2
0
def average_precision_score(y_true,
                            y_score,
                            average="macro",
                            sample_weight=None):
    def _binary_average_precision(y_true, y_score, sample_weight=None):
        precision, recall, thresholds = precision_recall_curve(
            y_true, y_score, sample_weight=sample_weight)
        return auc(recall, precision)

    return _average_binary_score(_binary_average_precision,
                                 y_true,
                                 y_score,
                                 average,
                                 sample_weight=sample_weight)
Пример #3
0
def min_aupdc(y_true,
              pos_label,
              average,
              sample_weight=None,
              known_skew=None,
              new_skew=None):
    """
    Compute the minimum possible area under the performance 
    diagram curve. Essentially, a vote of NO for all predictions. 
    """
    min_score = np.zeros((len(y_true)))
    average_precision = partial(_binary_uninterpolated_average_precision,
                                known_skew=known_skew,
                                new_skew=new_skew,
                                pos_label=pos_label)
    ap_min = _average_binary_score(average_precision,
                                   y_true,
                                   min_score,
                                   average,
                                   sample_weight=sample_weight)

    return ap_min
Пример #4
0
def norm_aupdc(y_true,
               y_score,
               known_skew=None,
               *,
               average="macro",
               pos_label=1,
               sample_weight=None,
               min_method='random'):
    """
    Compute the normalized modified average precision. Normalization removes 
    the no-skill region either based on skew or random classifier performance. 
    Modification alters success ratio to be consistent with a known skew. 
  
    Parameters:
    -------------------
        y_true, array of (n_samples,)
            Binary, truth labels (0,1)
        y_score, array of (n_samples,)
            Model predictions (either determinstic or probabilistic)
        known_skew, float between 0 and 1 
            Known or reference skew (# of 1 / n_samples) for 
            computing the modified success ratio.
        min_method, 'skew' or 'random'
            If 'skew', then the normalization is based on the minimum AUPDC 
            formula presented in Boyd et al. (2012).
            
            If 'random', then the normalization is based on the 
            minimum AUPDC for a random classifier, which is equal 
            to the known skew. 
    
    
    Boyd, 2012: Unachievable Region in Precision-Recall Space and Its Effect on Empirical Evaluation, ArXiv
    """
    new_skew = np.mean(y_true)
    if known_skew is None:
        known_skew = new_skew

    y_type = type_of_target(y_true)
    if y_type == "multilabel-indicator" and pos_label != 1:
        raise ValueError("Parameter pos_label is fixed to 1 for "
                         "multilabel-indicator y_true. Do not set "
                         "pos_label or set pos_label to 1.")
    elif y_type == "binary":
        # Convert to Python primitive type to avoid NumPy type / Python str
        # comparison. See https://github.com/numpy/numpy/issues/6784
        present_labels = np.unique(y_true).tolist()
        if len(present_labels) == 2 and pos_label not in present_labels:
            raise ValueError(
                f"pos_label={pos_label} is not a valid label. It should be "
                f"one of {present_labels}")
    average_precision = partial(_binary_uninterpolated_average_precision,
                                known_skew=known_skew,
                                new_skew=new_skew,
                                pos_label=pos_label)

    ap = _average_binary_score(average_precision,
                               y_true,
                               y_score,
                               average,
                               sample_weight=sample_weight)

    if min_method == 'random':
        ap_min = known_skew
    elif min_method == 'skew':
        ap_min = min_aupdc(y_true,
                           pos_label,
                           average,
                           sample_weight=sample_weight,
                           known_skew=known_skew,
                           new_skew=new_skew)

    naupdc = (ap - ap_min) / (1.0 - ap_min)

    return naupdc
Пример #5
0
def multiclass_roc_auc_score(
    y_true,
    y_score,
    labels,
    multi_class,
    average,
    sample_weight=None,
    invalid_proba_tolerance: float = 1e-6,
):
    """Multiclass roc auc score (copied from sklearn)

    Parameters
    ----------
    y_true : array-like of shape (n_samples,)
        True multiclass labels.

    y_score : array-like of shape (n_samples, n_classes)
        Target scores corresponding to probability estimates of a sample
        belonging to a particular class

    labels : array, shape = [n_classes] or None, optional (default=None)
        List of labels to index ``y_score`` used for multiclass. If ``None``,
        the lexical order of ``y_true`` is used to index ``y_score``.

    multi_class : string, 'ovr' or 'ovo'
        Determines the type of multiclass configuration to use.
        ``'ovr'``:
            Calculate metrics for the multiclass case using the one-vs-rest
            approach.
        ``'ovo'``:
            Calculate metrics for the multiclass case using the one-vs-one
            approach.

    average : 'macro' or 'weighted', optional (default='macro')
        Determines the type of averaging performed on the pairwise binary
        metric scores
        ``'macro'``:
            Calculate metrics for each label, and find their unweighted
            mean. This does not take label imbalance into account. Classes
            are assumed to be uniformly distributed.
        ``'weighted'``:
            Calculate metrics for each label, taking into account the
            prevalence of the classes.

    sample_weight : array-like of shape (n_samples,), default=None
        Sample weights.

    :param invalid_proba_tolerance: float in [0, 1]
        The proportion of samples that can eventually be ignored if their class scores do not sum up to 1.
    """
    # validation of the input y_score
    are_close = np.isclose(1, y_score.sum(axis=1))

    # I added this try-except to deal with cases where a very small amount of voxels have an issue
    # to sum the probabilities to 1, which might happen (probably, i suppose) because I use float16 instead of 64
    try:
        if not np.all(are_close):
            raise ValueError(
                "Target scores need to be probabilities for multiclass "
                "roc_auc, i.e. they should sum up to 1.0 over classes")

    except ValueError as ex:

        logger.exception(ex)

        assert 0 <= invalid_proba_tolerance <= 1, f"{invalid_proba_tolerance=}"

        nsamples_not_close = int((~are_close).sum())
        percentage_samples_not_close = nsamples_not_close / are_close.size

        logger.warning(
            f"{nsamples_not_close=} ({percentage_samples_not_close=:.7%})")

        if percentage_samples_not_close > invalid_proba_tolerance:
            raise ValueError(
                f"Too many samples are not close 1 {nsamples_not_close=} {percentage_samples_not_close=:.7%} {invalid_proba_tolerance=:.7%}."
            )

        else:
            logger.warning(
                f"The amount of probabilities not summing up to 1 will be tolerated "
                f"{percentage_samples_not_close=:.7%} {invalid_proba_tolerance=:.7%}. "
                f"The bad samples will be ignored!")

            y_true = y_true[are_close]
            y_score = y_score[are_close, :]

    # validation for multiclass parameter specifications
    average_options = ("macro", "weighted")
    if average not in average_options:
        raise ValueError("average must be one of {0} for "
                         "multiclass problems".format(average_options))

    multiclass_options = ("ovo", "ovr")
    if multi_class not in multiclass_options:
        raise ValueError("multi_class='{0}' is not supported "
                         "for multiclass ROC AUC, multi_class must be "
                         "in {1}".format(multi_class, multiclass_options))

    from sklearn.utils import column_or_1d
    from sklearn.preprocessing._label import _encode
    from sklearn.metrics._base import _average_multiclass_ovo_score
    from sklearn.preprocessing import label_binarize
    from sklearn.metrics._ranking import _binary_roc_auc_score
    from sklearn.metrics._base import _average_binary_score

    if labels is not None:
        labels = column_or_1d(labels)
        classes = _encode(labels)
        if len(classes) != len(labels):
            raise ValueError("Parameter 'labels' must be unique")
        if not np.array_equal(classes, labels):
            raise ValueError("Parameter 'labels' must be ordered")
        if len(classes) != y_score.shape[1]:
            raise ValueError(
                "Number of given labels, {0}, not equal to the number "
                "of columns in 'y_score', {1}".format(len(classes),
                                                      y_score.shape[1]))
        if len(np.setdiff1d(y_true, classes)):
            raise ValueError(
                "'y_true' contains labels not in parameter 'labels'")
    else:
        classes = _encode(y_true)
        if len(classes) != y_score.shape[1]:
            raise ValueError(
                "Number of classes in y_true not equal to the number of "
                "columns in 'y_score'")

    if multi_class == "ovo":
        if sample_weight is not None:
            raise ValueError("sample_weight is not supported "
                             "for multiclass one-vs-one ROC AUC, "
                             "'sample_weight' must be None in this case.")
        _, y_true_encoded = _encode(y_true, uniques=classes, encode=True)
        # Hand & Till (2001) implementation (ovo)
        return _average_multiclass_ovo_score(_binary_roc_auc_score,
                                             y_true_encoded,
                                             y_score,
                                             average=average)
    else:
        # ovr is same as multi-label
        y_true_multilabel = label_binarize(y_true, classes=classes)
        return _average_binary_score(_binary_roc_auc_score,
                                     y_true_multilabel,
                                     y_score,
                                     average,
                                     sample_weight=sample_weight)
Пример #6
0
def _daal_roc_auc_score(
    y_true,
    y_score,
    *,
    average="macro",
    sample_weight=None,
    max_fpr=None,
    multi_class="raise",
    labels=None,
):
    y_type = _daal_type_of_target(y_true)
    y_true = check_array(y_true, ensure_2d=False, dtype=None)
    y_score = check_array(y_score, ensure_2d=False)

    _patching_status = PatchingConditionsChain("sklearn.metrics.roc_auc_score")
    _dal_ready = _patching_status.and_conditions([
        (y_type[0] == "binary"
         and not (y_score.ndim == 2 and y_score.shape[1] > 2),
         "y_true type is not one-dimensional binary.")
    ])

    _patching_status.write_log()
    if y_type[0] == "multiclass" or (y_type[0] == "binary"
                                     and y_score.ndim == 2
                                     and y_score.shape[1] > 2):
        # do not support partial ROC computation for multiclass
        if max_fpr is not None and max_fpr != 1.:
            raise ValueError("Partial AUC computation not available in "
                             "multiclass setting, 'max_fpr' must be"
                             " set to `None`, received `max_fpr={0}` "
                             "instead".format(max_fpr))
        if multi_class == 'raise':
            raise ValueError("multi_class must be in ('ovo', 'ovr')")

        return multiclass_roc_auc_score(y_true, y_score, labels, multi_class,
                                        average, sample_weight)

    if y_type[0] == "binary":
        labels = y_type[1]
        _dal_ready = _patching_status.and_conditions([
            (len(labels) == 2, "Number of unique labels is not equal to 2."),
            (max_fpr is None, "Maximum false-positive rate is not supported."),
            (sample_weight is None, "Sample weights are not supported.")
        ])
        if _dal_ready:
            if not np.array_equal(labels, [0, 1]) or labels.dtype == bool:
                y_true = label_binarize(y_true, classes=labels)[:, 0]
            result = d4p.daal_roc_auc_score(y_true.reshape(-1, 1),
                                            y_score.reshape(-1, 1))
            if result != -1:
                return result
            logging.info("sklearn.metrics.roc_auc_score: " +
                         get_patch_message("sklearn_after_daal"))
        # return to sklearn implementation
        y_true = label_binarize(y_true, classes=labels)[:, 0]

    return _average_binary_score(
        partial(_binary_roc_auc_score, max_fpr=max_fpr),
        y_true,
        y_score,
        average,
        sample_weight=sample_weight,
    )