def average_precision_score(y_true,
                            y_score,
                            average="macro",
                            pos_label=1,
                            sample_weight=None):
    def _binary_uninterpolated_average_precision(y_true,
                                                 y_score,
                                                 pos_label=1,
                                                 sample_weight=None):
        precision, recall, _ = precision_recall_curve_modified(
            y_true, y_score, pos_label=pos_label, sample_weight=sample_weight)
        # Return the step function integral
        # The following works because the last entry of precision is
        # guaranteed to be 1, as returned by precision_recall_curve
        return -np.sum(np.diff(recall) * np.array(precision)[:-1])

    y_type = type_of_target(y_true)
    if y_type == "multilabel-indicator" and pos_label != 1:
        raise ValueError("Parameter pos_label is fixed to 1 for "
                         "multilabel-indicator y_true. Do not set "
                         "pos_label or set pos_label to 1.")
    elif y_type == "binary":
        present_labels = np.unique(y_true)
        if len(present_labels) == 2 and pos_label not in present_labels:
            raise ValueError("pos_label=%r is invalid. Set it to a label in "
                             "y_true." % pos_label)
    average_precision = partial(_binary_uninterpolated_average_precision,
                                pos_label=pos_label)
    return _average_binary_score(average_precision,
                                 y_true,
                                 y_score,
                                 average,
                                 sample_weight=sample_weight)
Exemplo n.º 2
0
def average_precision_score(y_true, y_score, average="macro",
                            sample_weight=None):
    def _binary_average_precision(y_true, y_score, sample_weight=None):
        precision, recall, thresholds = precision_recall_curve(
            y_true, y_score, sample_weight=sample_weight)
        return auc(recall, precision)

    return _average_binary_score(_binary_average_precision, y_true, y_score,
                                 average, sample_weight=sample_weight)
Exemplo n.º 3
0
    def average_precision_score(y_true, y_score, average="macro", pos_label=1, sample_weight=None):
        def _binary_uninterpolated_average_precision(y_true, y_score, pos_label=1, sample_weight=None):
            precision, recall, _ = precision_recall_curve(y_true, y_score, pos_label, sample_weight)
            recall[np.isnan(recall)] = 0
            return -np.sum(np.diff(recall) * np.array(precision)[:-1])

        average_precision = functools.partial(_binary_uninterpolated_average_precision, pos_label=pos_label)

        return _average_binary_score(average_precision, y_true, y_score, average, sample_weight)
Exemplo n.º 4
0
def test_averaging_binary_multilabel_all_zeroes():
    y_true = np.zeros((20, 3))
    y_pred = np.zeros((20, 3))
    y_true_binarize = y_true
    y_pred_binarize = y_pred
    # Test _average_binary_score for weight.sum() == 0
    binary_metric = (lambda y_true, y_score, average="macro":
                     _average_binary_score(
                         precision_score, y_true, y_score, average))
    _check_averaging(binary_metric, y_true, y_pred, y_true_binarize,
                     y_pred_binarize, is_multilabel=True)
Exemplo n.º 5
0
def test_averaging_binary_multilabel_all_zeroes():
    y_true = np.zeros((20, 3))
    y_pred = np.zeros((20, 3))
    y_true_binarize = y_true
    y_pred_binarize = y_pred
    # Test _average_binary_score for weight.sum() == 0
    binary_metric = (lambda y_true, y_score, average="macro":
                     _average_binary_score(
                         precision_score, y_true, y_score, average))
    _check_averaging(binary_metric, y_true, y_pred, y_true_binarize,
                     y_pred_binarize, is_multilabel=True)
Exemplo n.º 6
0
def binary_average_precision(y_true, y_score, interpolated_auc=True):
    def _average_precision(y_true_, y_score_, sample_weight=None):
        precision, recall, _ = precision_recall_curve(y_true_, y_score_, sample_weight)
        if not interpolated_auc:
            # Return the step function integral
            # The following works because the last entry of precision is
            # guaranteed to be 1, as returned by precision_recall_curve
            return -1 * np.sum(np.diff(recall) * np.array(precision)[:-1])

        return auc(recall, precision)

    return _average_binary_score(_average_precision, y_true, y_score, average="macro")
Exemplo n.º 7
0
def pr_auc_score(y_true, y_score, average='micro', sample_weight=None):
    def _binary_pr_auc_score(y_true, y_score, sample_weight=None):
        if len(np.unique(y_true)) != 2:
            raise ValueError("Only one class present in y_true. AUPRC score "
                             "is not defined in that case.")
        fpr, tpr, thresholds = precision_recall_curve(
            y_true, y_score, sample_weight=sample_weight)
        return auc(tpr, fpr, reorder=False)

    return _average_binary_score(_binary_pr_auc_score,
                                 y_true,
                                 y_score,
                                 average,
                                 sample_weight=sample_weight)
Exemplo n.º 8
0
def test_averaging_multilabel_all_zeroes():
    y_true = np.zeros((20, 3))
    y_pred = np.zeros((20, 3))
    y_score = np.zeros((20, 3))
    y_true_binarize = y_true
    y_pred_binarize = y_pred

    for name in METRICS_WITH_AVERAGING:
        yield (check_averaging, name, y_true, y_true_binarize, y_pred, y_pred_binarize, y_score)

    # Test _average_binary_score for weight.sum() == 0
    binary_metric = lambda y_true, y_score, average="macro": _average_binary_score(
        precision_score, y_true, y_score, average
    )
    _check_averaging(binary_metric, y_true, y_pred, y_true_binarize, y_pred_binarize, is_multilabel=True)
Exemplo n.º 9
0
def test_averaging_multilabel_all_zeroes():
    y_true = np.zeros((20, 3))
    y_pred = np.zeros((20, 3))
    y_score = np.zeros((20, 3))
    y_true_binarize = y_true
    y_pred_binarize = y_pred

    for name in METRICS_WITH_AVERAGING:
        yield (check_averaging, name, y_true, y_true_binarize, y_pred,
               y_pred_binarize, y_score)

    # Test _average_binary_score for weight.sum() == 0
    binary_metric = (lambda y_true, y_score, average="macro":
                     _average_binary_score(
                         precision_score, y_true, y_score, average))
    _check_averaging(binary_metric, y_true, y_pred, y_true_binarize,
                     y_pred_binarize, is_multilabel=True)
Exemplo n.º 10
0
def VOC_mAP(y_trues, y_scores, sample_weight=None, ignore_index=-100):
    '''Calculate mean Average Precision (mAP) for VOC outputs.
    Assumes [n_samples, n_classes] for y_trues, y_scores.'''

    if y_trues.ndim == y_scores.ndim == 1 and y_trues.shape==y_scores.shape:
        y_trues = y_trues[:,None]
        y_scores = y_scores[:,None]
    assert y_trues.ndim==2 and y_scores.ndim==2 and y_trues.shape==y_scores.shape
    rets = []
    for x in range(y_scores.shape[1]):
        y_true, y_score = y_trues[:,x], y_scores[:,x]
        mask = y_true!=ignore_index
        y_true, y_score = y_true[mask], y_score[mask]
        ret = _average_binary_score(VOC_AP,
            y_true, y_score, average='macro',
            sample_weight=sample_weight if sample_weight is None else sample_weight[mask]
        )
        rets.append(ret)
    ret = np.mean(rets)

    return ret
Exemplo n.º 11
0
def average_precision_score(y_true,
                            y_score,
                            average="macro",
                            sample_weight=None,
                            interpolation="linear"):
    """Compute average precision (AP) from prediction scores

    This score corresponds to the area under the precision-recall curve, where
    points are joined using either linear or step-wise interpolation.

    Note: this implementation is restricted to the binary classification task
    or multilabel classification task.

    Read more in the :ref:`User Guide <precision_recall_f_measure_metrics>`.

    Parameters
    ----------
    y_true : array, shape = [n_samples] or [n_samples, n_classes]
        True binary labels in binary label indicators.

    y_score : array, shape = [n_samples] or [n_samples, n_classes]
        Target scores, can either be probability estimates of the positive
        class, confidence values, or binary decisions.

    average : string, [None, 'micro', 'macro' (default), 'samples', 'weighted']
        If ``None``, the scores for each class are returned. Otherwise,
        this determines the type of averaging performed on the data:

        ``'micro'``:
            Calculate metrics globally by considering each element of the label
            indicator matrix as a label.
        ``'macro'``:
            Calculate metrics for each label, and find their unweighted
            mean.  This does not take label imbalance into account.
        ``'weighted'``:
            Calculate metrics for each label, and find their average, weighted
            by support (the number of true instances for each label).
        ``'samples'``:
            Calculate metrics for each instance, and find their average.

    sample_weight : array-like of shape = [n_samples], optional
        Sample weights.

    interpolation : string ['linear' (default), 'step']
        Determines the kind of interpolation used when computed AUC. If there are
        many repeated scores, 'step' is recommended to avoid under- or over-
        estimating the AUC. See www.roamanalytics.com/etc for details.

        ``'linear'``:
            Linearly interpolates between operating points.
        ``'step'``:
            Uses a step function to interpolate between operating points.

    Returns
    -------
    average_precision : float

    References
    ----------
    .. [1] `Wikipedia entry for the Average precision
           <http://en.wikipedia.org/wiki/Average_precision>`_

    See also
    --------
    roc_auc_score : Area under the ROC curve

    precision_recall_curve :
        Compute precision-recall pairs for different probability thresholds

    Examples
    --------
    >>> import numpy as np
    >>> from sklearn.metrics import average_precision_score
    >>> y_true = np.array([0, 0, 1, 1])
    >>> y_scores = np.array([0.1, 0.4, 0.35, 0.8])
    >>> average_precision_score(y_true, y_scores)  # doctest: +ELLIPSIS
    0.79...

    """
    def _binary_average_precision(y_true, y_score, sample_weight=None):
        precision, recall, thresholds = precision_recall_curve(
            y_true, y_score, sample_weight=sample_weight)
        return auc(recall,
                   precision,
                   interpolation=interpolation,
                   interpolation_direction='right')

    if interpolation == "linear":
        # Check for number of unique predictions. If this is substantially less
        # than the number of predictions, linear interpolation is likely to be
        # biased.
        n_discrete_predictions = len(np.unique(y_score))
        if n_discrete_predictions < 0.75 * len(y_score):
            warnings.warn("Number of unique scores is less than 75% of the "
                          "number of scores provided. Linear interpolation "
                          "is likely to be biased in this case. You may wish "
                          "to use step interpolation instead. See docstring "
                          "for details.")
    return _average_binary_score(_binary_average_precision,
                                 y_true,
                                 y_score,
                                 average,
                                 sample_weight=sample_weight)
Exemplo n.º 12
0
def roc_auc_score(y_true, y_score, average="macro", sample_weight=None,
                  max_fpr=None):
    """Compute Area Under the Curve (AUC) from prediction scores

    Note: this implementation is restricted to the binary classification task
    or multilabel classification task in label indicator format.

    Parameters
    ----------
    y_true : array, shape = [n_samples] or [n_samples, n_classes]
        True binary labels in binary label indicators.

    y_score : array, shape = [n_samples] or [n_samples, n_classes]
        Target scores, can either be probability estimates of the positive
        class, confidence values, or binary decisions.

    average : string, [None, 'micro', 'macro' (default), 'samples', 'weighted']
        If ``None``, the scores for each class are returned. Otherwise,
        this determines the type of averaging performed on the data:

        ``'micro'``:
            Calculate metrics globally by considering each element of the label
            indicator matrix as a label.
        ``'macro'``:
            Calculate metrics for each label, and find their unweighted
            mean.  This does not take label imbalance into account.
        ``'weighted'``:
            Calculate metrics for each label, and find their average, weighted
            by support (the number of true instances for each label).
        ``'samples'``:
            Calculate metrics for each instance, and find their average.

    sample_weight : array-like of shape = [n_samples], optional
        Sample weights.

    max_fpr : float, optional
       If not ``None``, the standardized partial AUC over
       the range [0, max_fpr] is returned.

    Returns
    -------
    auc : float

    References
    ----------
    .. [1] `Wikipedia entry for the Receiver operating characteristic
            <http://en.wikipedia.org/wiki/Receiver_operating_characteristic>`_

    .. [2] `Analyzing a portion of the ROC curve. McClish, 1989
            <http://www.ncbi.nlm.nih.gov/pubmed/2668680>`_

    See also
    --------
    average_precision_score : Area under the precision-recall curve

    roc_curve : Compute Receiver operating characteristic (ROC)

    Examples
    --------
    >>> import numpy as np
    >>> from sklearn.metrics import roc_auc_score
    >>> y_true = np.array([0, 0, 1, 1])
    >>> y_scores = np.array([0.1, 0.4, 0.35, 0.8])
    >>> roc_auc_score(y_true, y_scores)
    0.75

    """
    def _binary_roc_auc_score(y_true, y_score, sample_weight=None,
                              max_fpr=max_fpr):
        if len(np.unique(y_true)) != 2:
            raise ValueError("Only one class present in y_true. ROC AUC score "
                             "is not defined in that case.")

        fpr, tpr, tresholds = roc_curve(y_true, y_score,
                                        sample_weight=sample_weight)
        if max_fpr:
            idx = np.where(fpr <= max_fpr)[0]

            # linearly interpolate the ROC curve until max_fpr
            idx_last = idx.max()
            idx_next = idx_last + 1
            xc = [fpr[idx_last], fpr[idx_next]]
            yc = [tpr[idx_last], fpr[idx_next]]
            tpr = np.r_[tpr[idx], np.interp(max_fpr, xc, yc)]
            fpr = np.r_[fpr[idx], max_fpr]
            partial_roc = auc(fpr, tpr, reorder=True)

            # standardize result to lie between 0.5 and 1
            min_area = max_fpr**2/2
            max_area = max_fpr
            return 0.5*(1+(partial_roc-min_area)/(max_area-min_area))

        return auc(fpr, tpr, reorder=True)

    return _average_binary_score(
        _binary_roc_auc_score, y_true, y_score, average,
        sample_weight=sample_weight)
Exemplo n.º 13
0
def average_precision_score(y_true,
                            y_score,
                            average="macro",
                            sample_weight=None):
    """Compute average precision (AP) from prediction scores

    This score corresponds to the area under the precision-recall curve.

    Note: this implementation is restricted to the binary classification task
    or multilabel classification task.

    Read more in the :ref:`User Guide <precision_recall_f_measure_metrics>`.

    Parameters
    ----------
    y_true : array, shape = [n_samples] or [n_samples, n_classes]
        True binary labels in binary label indicators.

    y_score : array, shape = [n_samples] or [n_samples, n_classes]
        Target scores, can either be probability estimates of the positive
        class, confidence values, or non-thresholded measure of decisions
        (as returned by "decision_function" on some classifiers).

    average : string, [None, 'micro', 'macro' (default), 'samples', 'weighted']
        If ``None``, the scores for each class are returned. Otherwise,
        this determines the type of averaging performed on the data:

        ``'micro'``:
            Calculate metrics globally by considering each element of the label
            indicator matrix as a label.
        ``'macro'``:
            Calculate metrics for each label, and find their unweighted
            mean.  This does not take label imbalance into account.
        ``'weighted'``:
            Calculate metrics for each label, and find their average, weighted
            by support (the number of true instances for each label).
        ``'samples'``:
            Calculate metrics for each instance, and find their average.

    sample_weight : array-like of shape = [n_samples], optional
        Sample weights.

    Returns
    -------
    average_precision : float

    References
    ----------
    .. [1] `Wikipedia entry for the Average precision
           <https://en.wikipedia.org/wiki/Average_precision>`_

    See also
    --------
    roc_auc_score : Area under the ROC curve

    precision_recall_curve :
        Compute precision-recall pairs for different probability thresholds

    Examples
    --------
    >>> import numpy as np
    >>> from sklearn.metrics import average_precision_score
    >>> y_true = np.array([0, 0, 1, 1])
    >>> y_scores = np.array([0.1, 0.4, 0.35, 0.8])
    >>> average_precision_score(y_true, y_scores)  # doctest: +ELLIPSIS
    0.79...

    """
    def _binary_average_precision(y_true, y_score, sample_weight=None):
        precision, recall, thresholds = precision_recall_curve(
            y_true, y_score, sample_weight=sample_weight)
        return auc(recall, precision)

    return _average_binary_score(_binary_average_precision,
                                 y_true,
                                 y_score,
                                 average,
                                 sample_weight=sample_weight)