示例#1
0
    def test_slightly_different_domain(self):
        """ If test data has a slightly different domain then (with interpolation)
        we should obtain a similar classification score. """
        # rows full of unknowns make LogisticRegression undefined
        # we can obtain them, for example, with EMSC, if one of the badspectra
        # is a spectrum from the data
        learner = LogisticRegressionLearner(preprocessors=[_RemoveNaNRows()])

        for proc in PREPROCESSORS:
            if hasattr(proc, "skip_add_zeros"):
                continue
            # LR that can not handle unknown values
            train, test = separate_learn_test(self.collagen)
            train1 = proc(train)
            aucorig = AUC(TestOnTestData()(train1, test, [learner]))
            test = slightly_change_wavenumbers(test, 0.00001)
            test = odd_attr(test)
            # a subset of points for training so that all test sets points
            # are within the train set points, which gives no unknowns
            train = Interpolate(points=getx(train)[1:-3])(train)  # interpolatable train
            train = proc(train)
            # explicit domain conversion test to catch exceptions that would
            # otherwise be silently handled in TestOnTestData
            _ = Orange.data.Table(train.domain, test)
            aucnow = AUC(TestOnTestData()(train, test, [learner]))
            self.assertAlmostEqual(aucnow, aucorig, delta=0.02, msg="Preprocessor " + str(proc))
            test = Interpolate(points=getx(test) - 1.)(test)  # also do a shift
            _ = Orange.data.Table(train.domain, test)  # explicit call again
            aucnow = AUC(TestOnTestData()(train, test, [learner]))
            # the difference should be slight
            self.assertAlmostEqual(aucnow, aucorig, delta=0.05, msg="Preprocessor " + str(proc))
示例#2
0
 def test_predict_samename_domain_interpolation(self):
     train, test = separate_learn_test(self.collagen)
     aucorig = AUC(TestOnTestData()(train, test, [LogisticRegressionLearner()]))
     test = destroy_atts_conversion(test)
     train = Interpolate(points=getx(train))(train) # make train capable of interpolation
     auc = AUC(TestOnTestData()(train, test, [LogisticRegressionLearner()]))
     self.assertEqual(aucorig, auc)
示例#3
0
 def test_predict_savgol_another_interpolate(self):
     train, test = separate_learn_test(self.collagen)
     train = SavitzkyGolayFiltering(window=9, polyorder=2, deriv=2)(train)
     auc = AUC(TestOnTestData()(train, test, [LogisticRegressionLearner()]))
     train = Interpolate(points=getx(train))(train)
     aucai = AUC(TestOnTestData()(train, test, [LogisticRegressionLearner()]))
     self.assertAlmostEqual(auc, aucai, delta=0.02)
示例#4
0
 def test_slightly_different_domain(self):
     """ If test data has a slightly different domain then (with interpolation)
     we should obtain a similar classification score. """
     learner = LogisticRegressionLearner(preprocessors=[])
     for proc in PREPROCESSORS:
         # LR that can not handle unknown values
         train, test = separate_learn_test(self.collagen)
         train1 = proc(train)
         aucorig = AUC(TestOnTestData(train1, test, [learner]))
         test = destroy_atts_conversion(test)
         test = odd_attr(test)
         # a subset of points for training so that all test sets points
         # are within the train set points, which gives no unknowns
         train = Interpolate(points=getx(train)[1:-3])(
             train)  # make train capable of interpolation
         train = proc(train)
         # explicit domain conversion test to catch exceptions that would
         # otherwise be silently handled in TestOnTestData
         _ = Orange.data.Table(train.domain, test)
         aucnow = AUC(TestOnTestData(train, test, [learner]))
         self.assertAlmostEqual(aucnow, aucorig, delta=0.02)
         test = Interpolate(points=getx(test) - 1.)(test)  # also do a shift
         _ = Orange.data.Table(train.domain, test)  # explicit call again
         aucnow = AUC(TestOnTestData(train, test, [learner]))
         self.assertAlmostEqual(
             aucnow, aucorig, delta=0.05)  # the difference should be slight
示例#5
0
 def test_predict_different_domain_interpolation(self):
     train, test = separate_learn_test(self.collagen)
     aucorig = AUC(TestOnTestData()(train, test, [LogisticRegressionLearner()]))
     test = Interpolate(points=getx(test) - 1.)(test) # other test domain
     train = Interpolate(points=getx(train))(train)  # make train capable of interpolation
     aucshift = AUC(TestOnTestData()(train, test, [LogisticRegressionLearner()]))
     self.assertAlmostEqual(aucorig, aucshift, delta=0.01)  # shift can decrease AUC slightly
     test = Cut(1000, 1700)(test)
     auccut1 = AUC(TestOnTestData()(train, test, [LogisticRegressionLearner()]))
     test = Cut(1100, 1600)(test)
     auccut2 = AUC(TestOnTestData()(train, test, [LogisticRegressionLearner()]))
     test = Cut(1200, 1500)(test)
     auccut3 = AUC(TestOnTestData()(train, test, [LogisticRegressionLearner()]))
     # the more we cut the lower precision we get
     self.assertTrue(aucorig > auccut1 > auccut2 > auccut3)
 def test_whole_and_train_separete(self):
     """ Applying a preprocessor before spliting data into train and test
     and applying is just on train data should yield the same transformation of
     the test data. """
     data = self.collagen
     for proc in PREPROCESSORS_INDEPENDENT_SAMPLES:
         train1, test1 = separate_learn_test(proc(data))
         train, test = separate_learn_test(data)
         train = proc(train)
         test_transformed = Orange.data.Table(train.domain, test)
         np.testing.assert_equal(test_transformed.X, test1.X)
         aucorig = AUC(
             TestOnTestData(train1, test1, [LogisticRegressionLearner()]))
         aucnow = AUC(
             TestOnTestData(train, test, [LogisticRegressionLearner()]))
         self.assertEqual(aucorig, aucnow)
示例#7
0
 def test_predict_different_domain(self):
     train, test = separate_learn_test(self.collagen)
     test = Interpolate(points=getx(test) - 1)(test) # other test domain
     try:
         from Orange.data.table import DomainTransformationError
         with self.assertRaises(DomainTransformationError):
             LogisticRegressionLearner()(train)(test)
     except ImportError:  # until Orange 3.19
         aucdestroyed = AUC(TestOnTestData(train, test, [LogisticRegressionLearner()]))
         self.assertTrue(0.45 < aucdestroyed < 0.55)
示例#8
0
 def test_predict_samename_domain(self):
     train, test = separate_learn_test(self.collagen)
     test = destroy_atts_conversion(test)
     try:
         from Orange.data.table import DomainTransformationError
         with self.assertRaises(DomainTransformationError):
             LogisticRegressionLearner()(train)(test)
     except ImportError:  # until Orange 3.19
         aucdestroyed = AUC(TestOnTestData()(train, test, [LogisticRegressionLearner()]))
         self.assertTrue(0.45 < aucdestroyed < 0.55)
 def test_slightly_different_domain(self):
     """ If test data has a slightly different domain then (with interpolation)
     we should obtain a similar classification score. """
     for proc in PREPROCESSORS:
         train, test = separate_learn_test(self.collagen)
         train1 = proc(train)
         aucorig = AUC(
             TestOnTestData(train1, test, [LogisticRegressionLearner()]))
         test = destroy_atts_conversion(test)
         test = odd_attr(test)
         train = Interpolate(points=getx(train))(
             train)  # make train capable of interpolation
         train = proc(train)
         aucnow = AUC(
             TestOnTestData(train, test, [LogisticRegressionLearner()]))
         self.assertAlmostEqual(aucnow, aucorig, delta=0.02)
         test = Interpolate(points=getx(test) - 1.)(test)  # also do a shift
         aucnow = AUC(
             TestOnTestData(train, test, [LogisticRegressionLearner()]))
         self.assertAlmostEqual(
             aucnow, aucorig, delta=0.05)  # the difference should be slight
示例#10
0
 def test_predict_savgov_same_domain(self):
     data = SavitzkyGolayFiltering(window=9, polyorder=2, deriv=2)(self.collagen)
     train, test = separate_learn_test(data)
     auc = AUC(TestOnTestData()(train, test, [LogisticRegressionLearner()]))
     self.assertGreater(auc, 0.85)
示例#11
0
 def test_predict_same_domain(self):
     train, test = separate_learn_test(self.collagen)
     auc = AUC(TestOnTestData()(train, test, [LogisticRegressionLearner()]))
     self.assertGreater(auc, 0.9) # easy dataset
示例#12
0
 def test_predict_different_domain(self):
     train, test = separate_learn_test(self.collagen)
     test = Interpolate(points=getx(test) - 1)(test)  # other test domain
     aucdestroyed = AUC(
         TestOnTestData(train, test, [LogisticRegressionLearner()]))
     self.assertTrue(0.45 < aucdestroyed < 0.55)
示例#13
0
 def test_predict_samename_domain(self):
     train, test = separate_learn_test(self.collagen)
     test = destroy_atts_conversion(test)
     aucdestroyed = AUC(
         TestOnTestData(train, test, [LogisticRegressionLearner()]))
     self.assertTrue(0.45 < aucdestroyed < 0.55)
""" Documentation script """
from Orange.classification import LogisticRegressionLearner
from Orange.evaluation.testing import CrossValidation
from Orange.evaluation.scoring import AUC

from orangecontrib.bioinformatics.geo.dataset import GDS

gds = GDS("GDS2960")
data = gds.get_data(sample_type="disease state", transpose=True, report_genes=True)
print("Samples: %d, Genes: %d" % (len(data), len(data.domain.attributes)))

learners = [LogisticRegressionLearner()]
results = CrossValidation(data, learners, k=10)

print("AUC = %.3f" % AUC(results)[0])