def test_roc_curve_hard(setup): # roc_curve for hard decisions y_true, pred, probas_pred = make_prediction(binary=True) # always predict one trivial_pred = np.ones(y_true.shape) fpr, tpr, thresholds = roc_curve(y_true, trivial_pred) roc_auc = auc(fpr, tpr).fetch() np.testing.assert_array_almost_equal(roc_auc, 0.50, decimal=2) assert fpr.shape == tpr.shape assert fpr.shape == thresholds.shape # always predict zero trivial_pred = np.zeros(y_true.shape) fpr, tpr, thresholds = roc_curve(y_true, trivial_pred) roc_auc = auc(fpr, tpr).fetch() np.testing.assert_array_almost_equal(roc_auc, 0.50, decimal=2) assert fpr.shape == tpr.shape assert fpr.shape == thresholds.shape # hard decisions fpr, tpr, thresholds = roc_curve(y_true, pred) roc_auc = auc(fpr, tpr).fetch() np.testing.assert_array_almost_equal(roc_auc, 0.78, decimal=2) assert fpr.shape == tpr.shape assert fpr.shape == thresholds.shape
def testRocCurve(self): import numpy as np import pandas as pd import mars.dataframe as md from mars.learn.metrics import roc_curve, auc from sklearn.metrics import roc_curve as sklearn_roc_curve, auc as sklearn_auc client = self.odps.create_mars_cluster(1, 4, 8, name=str(uuid.uuid4())) try: rs = np.random.RandomState(0) raw = pd.DataFrame({ 'a': rs.randint(0, 10, (10, )), 'b': rs.rand(10) }) df = md.DataFrame(raw) y = df['a'].to_tensor().astype('int') pred = df['b'].to_tensor().astype('float') fpr, tpr, thresholds = roc_curve(y, pred, pos_label=2) m = auc(fpr, tpr) sk_fpr, sk_tpr, sk_threshod = sklearn_roc_curve( raw['a'].to_numpy().astype('int'), raw['b'].to_numpy().astype('float'), pos_label=2) expect_m = sklearn_auc(sk_fpr, sk_tpr) self.assertAlmostEqual(m.fetch(), expect_m) finally: client.stop_server()
def testRocCurveAuc(self): service_ep = 'http://127.0.0.1:' + self.web_port timeout = 120 if 'CI' in os.environ else -1 with new_session(service_ep) as sess: run_kwargs = {'timeout': timeout} rs = np.random.RandomState(0) raw = pd.DataFrame({ 'a': rs.randint(0, 10, (10, )), 'b': rs.rand(10) }) df = md.DataFrame(raw) y = df['a'].to_tensor().astype('int') pred = df['b'].to_tensor().astype('float') fpr, tpr, thresholds = roc_curve(y, pred, pos_label=2, session=sess, run_kwargs=run_kwargs) m = auc(fpr, tpr, session=sess, run_kwargs=run_kwargs) sk_fpr, sk_tpr, sk_threshod = sklearn_roc_curve( raw['a'].to_numpy().astype('int'), raw['b'].to_numpy().astype('float'), pos_label=2) expect_m = sklearn_auc(sk_fpr, sk_tpr) self.assertAlmostEqual(m.fetch(session=sess), expect_m)
def test_auc(setup): # Test Area Under Curve (AUC) computation x = [0, 1] y = [0, 1] np.testing.assert_array_almost_equal(auc(x, y).fetch(), 0.5) x = [1, 0] y = [0, 1] np.testing.assert_array_almost_equal(auc(x, y).fetch(), 0.5) x = [1, 0, 0] y = [0, 1, 1] np.testing.assert_array_almost_equal(auc(x, y).fetch(), 0.5) x = [0, 1] y = [1, 1] np.testing.assert_array_almost_equal(auc(x, y).fetch(), 1) x = [0, 0.5, 1] y = [0, 0.5, 1] np.testing.assert_array_almost_equal(auc(x, y).fetch(), 0.5)
def test_roc_curve_confidence(setup): # roc_curve for confidence scores y_true, _, probas_pred = make_prediction(binary=True) fpr, tpr, thresholds = roc_curve(y_true, probas_pred - 0.5) roc_auc = auc(fpr, tpr).fetch() np.testing.assert_array_almost_equal(roc_auc, 0.90, decimal=2) assert fpr.shape == tpr.shape assert fpr.shape == thresholds.shape
def test_roc_curve(setup): for drop in [True, False]: # Test Area under Receiver Operating Characteristic (ROC) curve y_true, _, probas_pred = make_prediction(binary=True) expected_auc = _auc(y_true, probas_pred) fpr, tpr, thresholds = roc_curve(y_true, probas_pred, drop_intermediate=drop).execute().fetch() roc_auc = auc(fpr, tpr).to_numpy() np.testing.assert_array_almost_equal(roc_auc, expected_auc, decimal=2) np.testing.assert_almost_equal(roc_auc, roc_auc_score(y_true, probas_pred)) assert fpr.shape == tpr.shape assert fpr.shape == thresholds.shape
def test_dataframe_roc_curve_auc(setup): rs = np.random.RandomState(0) raw = pd.DataFrame({'a': rs.randint(0, 10, (10, )), 'b': rs.rand(10)}) df = md.DataFrame(raw) y = df['a'].to_tensor().astype('int') pred = df['b'].to_tensor().astype('float') fpr, tpr, thresholds = roc_curve(y, pred, pos_label=2) m = auc(fpr, tpr) sk_fpr, sk_tpr, sk_threshod = sklearn_roc_curve( raw['a'].to_numpy().astype('int'), raw['b'].to_numpy().astype('float'), pos_label=2) expect_m = sklearn_auc(sk_fpr, sk_tpr) assert pytest.approx(m.fetch()) == expect_m
def testLearnInLocalCluster(self, *_): from mars.learn.neighbors import NearestNeighbors from sklearn.neighbors import NearestNeighbors as SkNearestNeighbors from mars.learn.metrics import roc_curve, auc from sklearn.metrics import roc_curve as sklearn_roc_curve, auc as sklearn_auc with new_cluster(scheduler_n_process=2, worker_n_process=3, shared_memory='20M') as cluster: rs = np.random.RandomState(0) raw_X = rs.rand(10, 5) raw_Y = rs.rand(8, 5) X = mt.tensor(raw_X, chunk_size=7) Y = mt.tensor(raw_Y, chunk_size=(5, 3)) nn = NearestNeighbors(n_neighbors=3) nn.fit(X) ret = nn.kneighbors(Y, session=cluster.session) snn = SkNearestNeighbors(n_neighbors=3) snn.fit(raw_X) expected = snn.kneighbors(raw_Y) result = [r.fetch() for r in ret] np.testing.assert_almost_equal(result[0], expected[0]) np.testing.assert_almost_equal(result[1], expected[1]) rs = np.random.RandomState(0) raw = pd.DataFrame({ 'a': rs.randint(0, 10, (10, )), 'b': rs.rand(10) }) df = md.DataFrame(raw) y = df['a'].to_tensor().astype('int') pred = df['b'].to_tensor().astype('float') fpr, tpr, thresholds = roc_curve(y, pred, pos_label=2) m = auc(fpr, tpr) sk_fpr, sk_tpr, sk_threshod = sklearn_roc_curve( raw['a'].to_numpy().astype('int'), raw['b'].to_numpy().astype('float'), pos_label=2) expect_m = sklearn_auc(sk_fpr, sk_tpr) self.assertAlmostEqual(m.fetch(), expect_m)
def test_auc_errors(setup): # Incompatible shapes with pytest.raises(ValueError): auc([0.0, 0.5, 1.0], [0.1, 0.2]) # Too few x values with pytest.raises(ValueError): auc([0.0], [0.1]) # x is not in order x = [2, 1, 3, 4] y = [5, 6, 7, 8] error_message = f"x is neither increasing nor decreasing : {np.array(x)}" with pytest.raises(ValueError, match=re.escape(error_message)): auc(x, y)
def testAucErrors(self): # Incompatible shapes with self.assertRaises(ValueError): auc([0.0, 0.5, 1.0], [0.1, 0.2]) # Too few x values with self.assertRaises(ValueError): auc([0.0], [0.1]) # x is not in order x = [2, 1, 3, 4] y = [5, 6, 7, 8] error_message = ("x is neither increasing nor decreasing : " "{}".format(np.array(x))) with pytest.raises(ValueError, match=re.escape(error_message)): auc(x, y)