class TestAUCROCStatistics(unittest.TestCase): def setUp(self): self.meta = simple_meta_attrs(['-', '+']) self.cs = lambda i, v: Sample([i, self.meta[1].set_value(v)], self.meta, last_is_class=True) self.classifier = OrangeClassifier('kNNLearner', k=1) test_samples = '+++-++-+-+--+---' N = len(test_samples) train_samples = ('+' * (N / 2)) + ('-' * (N / 2)) self.test_samples, self.train_samples = ([self.cs(i, v) for i, v in enumerate(samples)] for samples in [test_samples, train_samples]) random.shuffle(self.test_samples) self.classifier.train(self.train_samples) def _test_roc_eq(self): roc = ROC(self.classifier, self.test_samples, '+') middle = roc[1] tpr, fpr = map(lambda f: f(self.classifier, self.test_samples, '+'), [TPR, FPR]) self.assertEqual(middle[1], tpr) self.assertEqual(middle[0], fpr) def test_ROC(self): self._test_roc_eq() def _test_auc_eq(self): tpr, fpr = map(lambda f: f(self.classifier, self.test_samples, '+'), [TPR, FPR]) auc = AUCROC(self.classifier, self.test_samples) expected_area = fpr * tpr / 2 + (1 - fpr) * (tpr + 1) / 2 expected_area_v2 = (1 + tpr - fpr) / 2. self.assertEqual(expected_area, expected_area_v2) # ^^^ just checking my math :) self.assertEqual(auc, expected_area) def test_AUC(self): self._test_auc_eq() def test_multiplerandom_test(self): N = len(self.test_samples) def gen_test_case(): schema = ''.join(['+' if random.random() >= 0.5 else '-' for _ in xrange(N)]) return [self.cs(i, v) for i, v in enumerate(schema)] for _ in xrange(200): train = gen_test_case() self.classifier.train(train) self._test_roc_eq() self._test_auc_eq() def test_avg_auc_roc_with_splited_cv(self): sets = split_data_cv(self.test_samples) def tmp(train, test): self.classifier.train(train) return AUCROC(self.classifier, test) aucs = [tmp(train, test) for train, test in sets] max_auc, min_auc = (f(aucs) for f in (max, min)) avg_auc = aucroc_avg_classifier_performance(self.classifier, sets) self.assertTrue(min_auc <= avg_auc[0] <= max_auc)
class TestAUCROCStatistics(unittest.TestCase): def setUp(self): self.meta = simple_meta_attrs(['-', '+']) self.cs = lambda i, v: Sample( [i, self.meta[1].set_value(v)], self.meta, last_is_class=True) self.classifier = OrangeClassifier('kNNLearner', k=1) test_samples = '+++-++-+-+--+---' N = len(test_samples) train_samples = ('+' * (N / 2)) + ('-' * (N / 2)) self.test_samples, self.train_samples = ([ self.cs(i, v) for i, v in enumerate(samples) ] for samples in [test_samples, train_samples]) random.shuffle(self.test_samples) self.classifier.train(self.train_samples) def _test_roc_eq(self): roc = ROC(self.classifier, self.test_samples, '+') middle = roc[1] tpr, fpr = map(lambda f: f(self.classifier, self.test_samples, '+'), [TPR, FPR]) self.assertEqual(middle[1], tpr) self.assertEqual(middle[0], fpr) def test_ROC(self): self._test_roc_eq() def _test_auc_eq(self): tpr, fpr = map(lambda f: f(self.classifier, self.test_samples, '+'), [TPR, FPR]) auc = AUCROC(self.classifier, self.test_samples) expected_area = fpr * tpr / 2 + (1 - fpr) * (tpr + 1) / 2 expected_area_v2 = (1 + tpr - fpr) / 2. self.assertEqual(expected_area, expected_area_v2) # ^^^ just checking my math :) self.assertEqual(auc, expected_area) def test_AUC(self): self._test_auc_eq() def test_multiplerandom_test(self): N = len(self.test_samples) def gen_test_case(): schema = ''.join( ['+' if random.random() >= 0.5 else '-' for _ in xrange(N)]) return [self.cs(i, v) for i, v in enumerate(schema)] for _ in xrange(200): train = gen_test_case() self.classifier.train(train) self._test_roc_eq() self._test_auc_eq() def test_avg_auc_roc_with_splited_cv(self): sets = split_data_cv(self.test_samples) def tmp(train, test): self.classifier.train(train) return AUCROC(self.classifier, test) aucs = [tmp(train, test) for train, test in sets] max_auc, min_auc = (f(aucs) for f in (max, min)) avg_auc = aucroc_avg_classifier_performance(self.classifier, sets) self.assertTrue(min_auc <= avg_auc[0] <= max_auc)