Exemplo n.º 1
0
 def check_case(values, method, expected):
     r = rankdata(values, method=method)
     assert_array_equal(r, expected)
Exemplo n.º 2
0
def test_cases_rankdata(values, method, expected):

    # Test deprecated backport to be removed in 0.21
    with ignore_warnings():
        r = rankdata(values, method=method)
        assert_array_equal(r, expected)
Exemplo n.º 3
0
def label_ranking_average_precision_score(y_true, y_score):
    """Compute ranking-based average precision

    Label ranking average precision (LRAP) is the average over each ground
    truth label assigned to each sample, of the ratio of true vs. total
    labels with lower score.

    This metric is used in multilabel ranking problem, where the goal
    is to give better rank to the labels associated to each sample.

    The obtained score is always strictly greater than 0 and
    the best value is 1.

    Read more in the :ref:`User Guide <label_ranking_average_precision>`.

    Parameters
    ----------
    y_true : array or sparse matrix, shape = [n_samples, n_labels]
        True binary labels in binary indicator format.

    y_score : array, shape = [n_samples, n_labels]
        Target scores, can either be probability estimates of the positive
        class, confidence values, or binary decisions.

    Returns
    -------
    score : float

    Examples
    --------
    >>> import numpy as np
    >>> from sklearn.metrics import label_ranking_average_precision_score
    >>> y_true = np.array([[1, 0, 0], [0, 0, 1]])
    >>> y_score = np.array([[0.75, 0.5, 1], [1, 0.2, 0.1]])
    >>> label_ranking_average_precision_score(y_true, y_score) \
        # doctest: +ELLIPSIS
    0.416...

    """
    check_consistent_length(y_true, y_score)
    y_true = check_array(y_true, ensure_2d=False)
    y_score = check_array(y_score, ensure_2d=False)

    if y_true.shape != y_score.shape:
        raise ValueError("y_true and y_score have different shape")

    # Handle badly formated array and the degenerate case with one label
    y_type = type_of_target(y_true)
    if (y_type != "multilabel-indicator"
            and not (y_type == "binary" and y_true.ndim == 2)):
        raise ValueError("{0} format is not supported".format(y_type))

    y_true = csr_matrix(y_true)
    y_score = -y_score

    n_samples, n_labels = y_true.shape

    out = 0.
    for i, (start, stop) in enumerate(zip(y_true.indptr, y_true.indptr[1:])):
        relevant = y_true.indices[start:stop]

        if (relevant.size == 0 or relevant.size == n_labels):
            # If all labels are relevant or unrelevant, the score is also
            # equal to 1. The label ranking has no meaning.
            out += 1.
            continue

        scores_i = y_score[i]
        rank = rankdata(scores_i, 'max')[relevant]
        L = rankdata(scores_i[relevant], 'max')
        out += (L / rank).mean()

    return out / n_samples
Exemplo n.º 4
0
def test_cases_rankdata(values, method, expected):

    # Test deprecated backport to be removed in 0.21
    with ignore_warnings():
        r = rankdata(values, method=method)
        assert_array_equal(r, expected)
Exemplo n.º 5
0
 def check_case(values, method, expected):
     r = rankdata(values, method=method)
     assert_array_equal(r, expected)