def test_roc_curve_hard(setup): # roc_curve for hard decisions y_true, pred, probas_pred = make_prediction(binary=True) # always predict one trivial_pred = np.ones(y_true.shape) fpr, tpr, thresholds = roc_curve(y_true, trivial_pred) roc_auc = auc(fpr, tpr).fetch() np.testing.assert_array_almost_equal(roc_auc, 0.50, decimal=2) assert fpr.shape == tpr.shape assert fpr.shape == thresholds.shape # always predict zero trivial_pred = np.zeros(y_true.shape) fpr, tpr, thresholds = roc_curve(y_true, trivial_pred) roc_auc = auc(fpr, tpr).fetch() np.testing.assert_array_almost_equal(roc_auc, 0.50, decimal=2) assert fpr.shape == tpr.shape assert fpr.shape == thresholds.shape # hard decisions fpr, tpr, thresholds = roc_curve(y_true, pred) roc_auc = auc(fpr, tpr).fetch() np.testing.assert_array_almost_equal(roc_auc, 0.78, decimal=2) assert fpr.shape == tpr.shape assert fpr.shape == thresholds.shape
def test_binary_clf_curve_multiclass_error(setup): rng = check_random_state(404) y_true = rng.randint(0, 3, size=10) y_pred = rng.rand(10) msg = "multiclass format is not supported" with pytest.raises(ValueError, match=msg): roc_curve(y_true, y_pred)
def test_roc_curve_drop_intermediate(setup): # Test that drop_intermediate drops the correct thresholds y_true = [0, 0, 0, 0, 1, 1] y_score = [0., 0.2, 0.5, 0.6, 0.7, 1.0] tpr, fpr, thresholds = roc_curve(y_true, y_score, drop_intermediate=True) np.testing.assert_array_almost_equal(thresholds.fetch(), [2., 1., 0.7, 0.]) # Test dropping thresholds with repeating scores y_true = [0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1] y_score = [0., 0.1, 0.6, 0.6, 0.7, 0.8, 0.9, 0.6, 0.7, 0.8, 0.9, 0.9, 1.0] tpr, fpr, thresholds = roc_curve(y_true, y_score, drop_intermediate=True) np.testing.assert_array_almost_equal(thresholds.fetch(), [2.0, 1.0, 0.9, 0.7, 0.6, 0.])
def testRocCurve(self): import numpy as np import pandas as pd import mars.dataframe as md from mars.learn.metrics import roc_curve, auc from sklearn.metrics import roc_curve as sklearn_roc_curve, auc as sklearn_auc client = self.odps.create_mars_cluster(1, 4, 8, name=str(uuid.uuid4())) try: rs = np.random.RandomState(0) raw = pd.DataFrame({ 'a': rs.randint(0, 10, (10, )), 'b': rs.rand(10) }) df = md.DataFrame(raw) y = df['a'].to_tensor().astype('int') pred = df['b'].to_tensor().astype('float') fpr, tpr, thresholds = roc_curve(y, pred, pos_label=2) m = auc(fpr, tpr) sk_fpr, sk_tpr, sk_threshod = sklearn_roc_curve( raw['a'].to_numpy().astype('int'), raw['b'].to_numpy().astype('float'), pos_label=2) expect_m = sklearn_auc(sk_fpr, sk_tpr) self.assertAlmostEqual(m.fetch(), expect_m) finally: client.stop_server()
def testRocCurveAuc(self): service_ep = 'http://127.0.0.1:' + self.web_port timeout = 120 if 'CI' in os.environ else -1 with new_session(service_ep) as sess: run_kwargs = {'timeout': timeout} rs = np.random.RandomState(0) raw = pd.DataFrame({ 'a': rs.randint(0, 10, (10, )), 'b': rs.rand(10) }) df = md.DataFrame(raw) y = df['a'].to_tensor().astype('int') pred = df['b'].to_tensor().astype('float') fpr, tpr, thresholds = roc_curve(y, pred, pos_label=2, session=sess, run_kwargs=run_kwargs) m = auc(fpr, tpr, session=sess, run_kwargs=run_kwargs) sk_fpr, sk_tpr, sk_threshod = sklearn_roc_curve( raw['a'].to_numpy().astype('int'), raw['b'].to_numpy().astype('float'), pos_label=2) expect_m = sklearn_auc(sk_fpr, sk_tpr) self.assertAlmostEqual(m.fetch(session=sess), expect_m)
def test_roc_curve_confidence(setup): # roc_curve for confidence scores y_true, _, probas_pred = make_prediction(binary=True) fpr, tpr, thresholds = roc_curve(y_true, probas_pred - 0.5) roc_auc = auc(fpr, tpr).fetch() np.testing.assert_array_almost_equal(roc_auc, 0.90, decimal=2) assert fpr.shape == tpr.shape assert fpr.shape == thresholds.shape
def test_roc_curve_fpr_tpr_increasing(setup): # Ensure that fpr and tpr returned by roc_curve are increasing. # Construct an edge case with float y_score and sample_weight # when some adjacent values of fpr and tpr are actually the same. y_true = [0, 0, 1, 1, 1] y_score = [0.1, 0.7, 0.3, 0.4, 0.5] sample_weight = np.repeat(0.2, 5) fpr, tpr, _ = roc_curve(y_true, y_score, sample_weight=sample_weight) assert ((mt.diff(fpr) < 0).sum() == 0).to_numpy() assert ((mt.diff(tpr) < 0).sum() == 0).to_numpy()
def test_roc_curve_end_points(setup): # Make sure that roc_curve returns a curve start at 0 and ending and # 1 even in corner cases rng = np.random.RandomState(0) y_true = np.array([0] * 50 + [1] * 50) y_pred = rng.randint(3, size=100) fpr, tpr, thr = roc_curve(y_true, y_pred, drop_intermediate=True).fetch() assert fpr[0] == 0 assert fpr[-1] == 1 assert fpr.shape == tpr.shape assert fpr.shape == thr.shape
def test_roc_curve(setup): for drop in [True, False]: # Test Area under Receiver Operating Characteristic (ROC) curve y_true, _, probas_pred = make_prediction(binary=True) expected_auc = _auc(y_true, probas_pred) fpr, tpr, thresholds = roc_curve(y_true, probas_pred, drop_intermediate=drop).execute().fetch() roc_auc = auc(fpr, tpr).to_numpy() np.testing.assert_array_almost_equal(roc_auc, expected_auc, decimal=2) np.testing.assert_almost_equal(roc_auc, roc_auc_score(y_true, probas_pred)) assert fpr.shape == tpr.shape assert fpr.shape == thresholds.shape
def test_dataframe_roc_curve_auc(setup): rs = np.random.RandomState(0) raw = pd.DataFrame({'a': rs.randint(0, 10, (10, )), 'b': rs.rand(10)}) df = md.DataFrame(raw) y = df['a'].to_tensor().astype('int') pred = df['b'].to_tensor().astype('float') fpr, tpr, thresholds = roc_curve(y, pred, pos_label=2) m = auc(fpr, tpr) sk_fpr, sk_tpr, sk_threshod = sklearn_roc_curve( raw['a'].to_numpy().astype('int'), raw['b'].to_numpy().astype('float'), pos_label=2) expect_m = sklearn_auc(sk_fpr, sk_tpr) assert pytest.approx(m.fetch()) == expect_m
def test_roc_returns_consistency(setup): # Test whether the returned threshold matches up with tpr # make small toy dataset y_true, _, probas_pred = make_prediction(binary=True) fpr, tpr, thresholds = roc_curve(y_true, probas_pred).fetch() # use the given thresholds to determine the tpr tpr_correct = [] for t in thresholds: tp = np.sum((probas_pred >= t) & y_true) p = np.sum(y_true) tpr_correct.append(1.0 * tp / p) # compare tpr and tpr_correct to see if the thresholds' order was correct np.testing.assert_array_almost_equal(tpr, tpr_correct, decimal=2) assert fpr.shape == tpr.shape assert fpr.shape == thresholds.shape
def testLearnInLocalCluster(self, *_): from mars.learn.neighbors import NearestNeighbors from sklearn.neighbors import NearestNeighbors as SkNearestNeighbors from mars.learn.metrics import roc_curve, auc from sklearn.metrics import roc_curve as sklearn_roc_curve, auc as sklearn_auc with new_cluster(scheduler_n_process=2, worker_n_process=3, shared_memory='20M') as cluster: rs = np.random.RandomState(0) raw_X = rs.rand(10, 5) raw_Y = rs.rand(8, 5) X = mt.tensor(raw_X, chunk_size=7) Y = mt.tensor(raw_Y, chunk_size=(5, 3)) nn = NearestNeighbors(n_neighbors=3) nn.fit(X) ret = nn.kneighbors(Y, session=cluster.session) snn = SkNearestNeighbors(n_neighbors=3) snn.fit(raw_X) expected = snn.kneighbors(raw_Y) result = [r.fetch() for r in ret] np.testing.assert_almost_equal(result[0], expected[0]) np.testing.assert_almost_equal(result[1], expected[1]) rs = np.random.RandomState(0) raw = pd.DataFrame({ 'a': rs.randint(0, 10, (10, )), 'b': rs.rand(10) }) df = md.DataFrame(raw) y = df['a'].to_tensor().astype('int') pred = df['b'].to_tensor().astype('float') fpr, tpr, thresholds = roc_curve(y, pred, pos_label=2) m = auc(fpr, tpr) sk_fpr, sk_tpr, sk_threshod = sklearn_roc_curve( raw['a'].to_numpy().astype('int'), raw['b'].to_numpy().astype('float'), pos_label=2) expect_m = sklearn_auc(sk_fpr, sk_tpr) self.assertAlmostEqual(m.fetch(), expect_m)
def test_roc_curve_multi(setup): # roc_curve not applicable for multi-class problems y_true, _, probas_pred = make_prediction(binary=False) with pytest.raises(ValueError): roc_curve(y_true, probas_pred)
def testRocCurveMulti(self): # roc_curve not applicable for multi-class problems y_true, _, probas_pred = make_prediction(binary=False) with self.assertRaises(ValueError): roc_curve(y_true, probas_pred)