def testRocCurve(self):
        import numpy as np
        import pandas as pd
        import mars.dataframe as md
        from mars.learn.metrics import roc_curve, auc
        from sklearn.metrics import roc_curve as sklearn_roc_curve, auc as sklearn_auc

        client = self.odps.create_mars_cluster(1, 4, 8, name=str(uuid.uuid4()))
        try:
            rs = np.random.RandomState(0)
            raw = pd.DataFrame({
                'a': rs.randint(0, 10, (10, )),
                'b': rs.rand(10)
            })

            df = md.DataFrame(raw)
            y = df['a'].to_tensor().astype('int')
            pred = df['b'].to_tensor().astype('float')
            fpr, tpr, thresholds = roc_curve(y, pred, pos_label=2)
            m = auc(fpr, tpr)

            sk_fpr, sk_tpr, sk_threshod = sklearn_roc_curve(
                raw['a'].to_numpy().astype('int'),
                raw['b'].to_numpy().astype('float'),
                pos_label=2)
            expect_m = sklearn_auc(sk_fpr, sk_tpr)
            self.assertAlmostEqual(m.fetch(), expect_m)
        finally:
            client.stop_server()
Exemplo n.º 2
0
def roc_curve(predictions, targets):
    r""""""
    predictions = predictions.view(-1, 1).numpy()
    targets = targets.view(-1, 1).numpy()
    fpr, tpr, _ = sklearn_roc_curve(targets, predictions)

    return fpr, tpr
Exemplo n.º 3
0
    def testRocCurveAuc(self):
        service_ep = 'http://127.0.0.1:' + self.web_port
        timeout = 120 if 'CI' in os.environ else -1
        with new_session(service_ep) as sess:
            run_kwargs = {'timeout': timeout}

            rs = np.random.RandomState(0)
            raw = pd.DataFrame({
                'a': rs.randint(0, 10, (10, )),
                'b': rs.rand(10)
            })

            df = md.DataFrame(raw)
            y = df['a'].to_tensor().astype('int')
            pred = df['b'].to_tensor().astype('float')
            fpr, tpr, thresholds = roc_curve(y,
                                             pred,
                                             pos_label=2,
                                             session=sess,
                                             run_kwargs=run_kwargs)
            m = auc(fpr, tpr, session=sess, run_kwargs=run_kwargs)

            sk_fpr, sk_tpr, sk_threshod = sklearn_roc_curve(
                raw['a'].to_numpy().astype('int'),
                raw['b'].to_numpy().astype('float'),
                pos_label=2)
            expect_m = sklearn_auc(sk_fpr, sk_tpr)
            self.assertAlmostEqual(m.fetch(session=sess), expect_m)
Exemplo n.º 4
0
def roc_curve(y_true, y_pred_proba):
    """
    Given labels and classifier predicted probabilities, compute and return the data representing a Receiver Operating Characteristic (ROC) curve. Works with binary or multiclass problems.

    Arguments:
        y_true (ww.DataColumn, pd.Series or np.ndarray): True labels.
        y_pred_proba (ww.DataColumn, pd.Series or np.ndarray): Predictions from a classifier, before thresholding has been applied.

    Returns:
        list(dict): A list of dictionaries (with one for each class) is returned. Binary classification problems return a list with one dictionary.
            Each dictionary contains metrics used to generate an ROC plot with the following keys:
                  * `fpr_rate`: False positive rate.
                  * `tpr_rate`: True positive rate.
                  * `threshold`: Threshold values used to produce each pair of true/false positive rates.
                  * `auc_score`: The area under the ROC curve.
    """
    y_true = _convert_to_woodwork_structure(y_true)
    y_pred_proba = _convert_to_woodwork_structure(y_pred_proba)
    if isinstance(y_pred_proba, ww.DataTable):
        y_pred_proba = _convert_woodwork_types_wrapper(
            y_pred_proba.to_dataframe()).to_numpy()
    else:
        y_pred_proba = _convert_woodwork_types_wrapper(
            y_pred_proba.to_series()).to_numpy()
    y_true = _convert_woodwork_types_wrapper(y_true.to_series()).to_numpy()

    if len(y_pred_proba.shape) == 1:
        y_pred_proba = y_pred_proba.reshape(-1, 1)
    if y_pred_proba.shape[1] == 2:
        y_pred_proba = y_pred_proba[:, 1].reshape(-1, 1)
    nan_indices = np.logical_or(pd.isna(y_true),
                                np.isnan(y_pred_proba).any(axis=1))
    y_true = y_true[~nan_indices]
    y_pred_proba = y_pred_proba[~nan_indices]

    lb = LabelBinarizer()
    lb.fit(np.unique(y_true))
    y_one_hot_true = lb.transform(y_true)
    n_classes = y_one_hot_true.shape[1]

    curve_data = []
    for i in range(n_classes):
        fpr_rates, tpr_rates, thresholds = sklearn_roc_curve(
            y_one_hot_true[:, i], y_pred_proba[:, i])
        auc_score = sklearn_auc(fpr_rates, tpr_rates)
        curve_data.append({
            'fpr_rates': fpr_rates,
            'tpr_rates': tpr_rates,
            'thresholds': thresholds,
            'auc_score': auc_score
        })

    return curve_data
Exemplo n.º 5
0
def test_dataframe_roc_curve_auc(setup):
    rs = np.random.RandomState(0)
    raw = pd.DataFrame({'a': rs.randint(0, 10, (10, )), 'b': rs.rand(10)})

    df = md.DataFrame(raw)
    y = df['a'].to_tensor().astype('int')
    pred = df['b'].to_tensor().astype('float')
    fpr, tpr, thresholds = roc_curve(y, pred, pos_label=2)
    m = auc(fpr, tpr)

    sk_fpr, sk_tpr, sk_threshod = sklearn_roc_curve(
        raw['a'].to_numpy().astype('int'),
        raw['b'].to_numpy().astype('float'),
        pos_label=2)
    expect_m = sklearn_auc(sk_fpr, sk_tpr)
    assert pytest.approx(m.fetch()) == expect_m
Exemplo n.º 6
0
    def testLearnInLocalCluster(self, *_):
        from mars.learn.neighbors import NearestNeighbors
        from sklearn.neighbors import NearestNeighbors as SkNearestNeighbors
        from mars.learn.metrics import roc_curve, auc
        from sklearn.metrics import roc_curve as sklearn_roc_curve, auc as sklearn_auc

        with new_cluster(scheduler_n_process=2,
                         worker_n_process=3,
                         shared_memory='20M') as cluster:
            rs = np.random.RandomState(0)
            raw_X = rs.rand(10, 5)
            raw_Y = rs.rand(8, 5)

            X = mt.tensor(raw_X, chunk_size=7)
            Y = mt.tensor(raw_Y, chunk_size=(5, 3))
            nn = NearestNeighbors(n_neighbors=3)
            nn.fit(X)

            ret = nn.kneighbors(Y, session=cluster.session)

            snn = SkNearestNeighbors(n_neighbors=3)
            snn.fit(raw_X)
            expected = snn.kneighbors(raw_Y)

            result = [r.fetch() for r in ret]
            np.testing.assert_almost_equal(result[0], expected[0])
            np.testing.assert_almost_equal(result[1], expected[1])

            rs = np.random.RandomState(0)
            raw = pd.DataFrame({
                'a': rs.randint(0, 10, (10, )),
                'b': rs.rand(10)
            })

            df = md.DataFrame(raw)
            y = df['a'].to_tensor().astype('int')
            pred = df['b'].to_tensor().astype('float')
            fpr, tpr, thresholds = roc_curve(y, pred, pos_label=2)
            m = auc(fpr, tpr)

            sk_fpr, sk_tpr, sk_threshod = sklearn_roc_curve(
                raw['a'].to_numpy().astype('int'),
                raw['b'].to_numpy().astype('float'),
                pos_label=2)
            expect_m = sklearn_auc(sk_fpr, sk_tpr)
            self.assertAlmostEqual(m.fetch(), expect_m)
Exemplo n.º 7
0
    y_true = np.array([
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0
    ])
    pred_prob = np.array([
        0.7, 0.9, 0.2, 0.8, 0.3, 0.64, 0.53, 0.12, 0.34, 0.52, 0.98, 0.03,
        0.32, 0.4, 0.8, 0.21, 0.01, 0.67, 0.32, 0.08, 0.05, 0.8, 0.34, 0.8
    ])

    import matplotlib.pyplot as plt
    Ps, Rs = precision_recall_curve(y_true, pred_prob)
    plt.plot(Rs, Ps, label='tinyml')

    from sklearn.metrics import precision_recall_curve as sklearn_pr_curve
    Ps, Rs, _ = sklearn_pr_curve(y_true, pred_prob)
    plt.plot(Rs, Ps, label='sklearn')
    plt.legend()
    plt.title('PRC')
    plt.show()

    FPR, TPR = roc_curve(y_true, pred_prob)
    plt.plot(FPR, TPR, label='tinyml')
    print('tinyml_auc:', roc_auc_score(y_true, pred_prob))
    from sklearn.metrics import roc_curve as sklearn_roc_curve
    from sklearn.metrics import roc_auc_score as sklearn_roc_auc_score
    FPR, TPR, _ = sklearn_roc_curve(y_true, pred_prob)
    plt.plot(FPR, TPR, label='sklearn')
    plt.legend()
    plt.title('ROC')
    plt.show()
    print('sklearn auc:', sklearn_roc_auc_score(y_true, pred_prob))