Пример #1
0
 def test_whole_and_train_separete(self):
     """ Applying a preprocessor before spliting data into train and test
     and applying is just on train data should yield the same transformation of
     the test data. """
     for proc in PREPROCESSORS_INDEPENDENT_SAMPLES:
         data = preprocessor_data(proc)
         _, test1 = separate_learn_test(proc(data))
         train, test = separate_learn_test(data)
         train = proc(train)
         test_transformed = test.transform(train.domain)
         np.testing.assert_almost_equal(test_transformed.X,
                                        test1.X,
                                        err_msg="Preprocessor " + str(proc))
Пример #2
0
    def test_slightly_different_domain(self):
        """ If test data has a slightly different domain then (with interpolation)
        we should obtain a similar classification score. """
        # rows full of unknowns make LogisticRegression undefined
        # we can obtain them, for example, with EMSC, if one of the badspectra
        # is a spectrum from the data
        learner = LogisticRegressionLearner(max_iter=1000,
                                            preprocessors=[_RemoveNaNRows()])

        for proc in PREPROCESSORS:
            if hasattr(proc, "skip_add_zeros"):
                continue
            # LR that can not handle unknown values
            train, test = separate_learn_test(preprocessor_data(proc))
            train1 = proc(train)
            aucorig = AUC(TestOnTestData()(train1, test, [learner]))
            test = slightly_change_wavenumbers(test, 0.00001)
            test = odd_attr(test)
            # a subset of points for training so that all test sets points
            # are within the train set points, which gives no unknowns
            train = Interpolate(points=getx(train)[1:-3])(
                train)  # interpolatable train
            train = proc(train)
            # explicit domain conversion test to catch exceptions that would
            # otherwise be silently handled in TestOnTestData
            _ = test.transform(train.domain)
            aucnow = AUC(TestOnTestData()(train, test, [learner]))
            self.assertAlmostEqual(aucnow,
                                   aucorig,
                                   delta=0.03,
                                   msg="Preprocessor " + str(proc))
            test = Interpolate(points=getx(test) - 1.)(test)  # also do a shift
            _ = test.transform(train.domain)  # explicit call again
            aucnow = AUC(TestOnTestData()(train, test, [learner]))
            # the difference should be slight
            self.assertAlmostEqual(aucnow,
                                   aucorig,
                                   delta=0.05,
                                   msg="Preprocessor " + str(proc))