示例#1
0
    def test_error_model(self):
        for loo in [False, True]:
            icr = InductiveRegressor(
                ErrorModelNC(LinearRegressionLearner(),
                             LinearRegressionLearner(),
                             loo=loo), self.train, self.calibrate)
            lo, hi = icr(self.test.x, 0.1)
            self.assertLess(hi - lo, 30.0)

        icr = InductiveRegressor(AbsError(RandomForestRegressionLearner()))
        r = run(icr, 0.1, CrossSampler(Table('housing'), 20))
        self.assertGreater(r.accuracy(), 0.85)
        print(r.accuracy(), r.median_range(), r.interdecile_mean())

        icr = InductiveRegressor(
            ErrorModelNC(RandomForestRegressionLearner(),
                         LinearRegressionLearner()))
        r = run(icr, 0.1, CrossSampler(Table('housing'), 20))
        self.assertGreater(r.accuracy(), 0.85)
        print(r.accuracy(), r.median_range(), r.interdecile_mean())

        icr = InductiveRegressor(
            ErrorModelNC(RandomForestRegressionLearner(),
                         LinearRegressionLearner(),
                         loo=True))
        r = run(icr, 0.1, CrossSampler(Table('housing'), 20))
        self.assertGreater(r.accuracy(), 0.85)
        print(r.accuracy(), r.median_range(), r.interdecile_mean())
示例#2
0
    def test_abs_error_normalized(self):
        tab = Table('housing')
        normalizer = Normalize(zero_based=True,
                               norm_type=Normalize.NormalizeBySpan)
        tab = normalizer(tab)

        icr = InductiveRegressor(AbsError(LinearRegressionLearner()))
        icr_knn = InductiveRegressor(AbsError(KNNRegressionLearner(4)))
        icr_norm = InductiveRegressor(
            AbsErrorNormalized(KNNRegressionLearner(4),
                               Euclidean,
                               4,
                               exp=False))
        icr_norm_exp = InductiveRegressor(
            AbsErrorNormalized(KNNRegressionLearner(4), Euclidean, 4,
                               exp=True))
        icr_norm_rf = InductiveRegressor(
            AbsErrorNormalized(KNNRegressionLearner(4),
                               Euclidean,
                               4,
                               rf=RandomForestRegressor()))

        r, r_knn, r_norm, r_norm_exp, r_norm_rf = ResultsRegr(), ResultsRegr(
        ), ResultsRegr(), ResultsRegr(), ResultsRegr()
        eps = 0.05
        for rep in range(10):
            for train, test in CrossSampler(tab, 10):
                train, calibrate = next(
                    RandomSampler(train,
                                  len(train) - 100, 100))
                r.concatenate(run_train_test(icr, eps, train, test, calibrate))
                r_knn.concatenate(
                    run_train_test(icr_knn, eps, train, test, calibrate))
                r_norm.concatenate(
                    run_train_test(icr_norm, eps, train, test, calibrate))
                r_norm_exp.concatenate(
                    run_train_test(icr_norm_exp, eps, train, test, calibrate))
                r_norm_rf.concatenate(
                    run_train_test(icr_norm_rf, eps, train, test, calibrate))

        print(r.median_range(), r.interdecile_mean(), 1 - r.accuracy())
        print(r_knn.median_range(), r_knn.interdecile_mean(),
              1 - r_knn.accuracy())
        print(r_norm.median_range(), r_norm.interdecile_mean(),
              1 - r_norm.accuracy())
        print(r_norm_exp.median_range(), r_norm_exp.interdecile_mean(),
              1 - r_norm_exp.accuracy())
        print(r_norm_rf.median_range(), r_norm_rf.interdecile_mean(),
              1 - r_norm_rf.accuracy())
        self.assertGreater(r.accuracy(), 1 - eps - 0.03)
        self.assertGreater(r_knn.accuracy(), 1 - eps - 0.03)
        self.assertGreater(r_norm.accuracy(), 1 - eps - 0.03)
        self.assertGreater(r_norm_exp.accuracy(), 1 - eps - 0.03)
        self.assertGreater(r_norm_rf.accuracy(), 1 - eps - 0.03)
        """
示例#3
0
    def test_abs_error_knn(self):
        icr = InductiveRegressor(AbsErrorKNN(Euclidean, 5), self.train,
                                 self.calibrate)
        lo, hi = icr(self.test.x, 0.1)
        self.assertLess(hi - lo, 30.0)

        icr = InductiveRegressor(AbsErrorKNN(Euclidean, 5, average=True),
                                 self.train, self.calibrate)
        lo, hi = icr(self.test.x, 0.1)
        self.assertLess(hi - lo, 30.0)

        icr = InductiveRegressor(AbsErrorKNN(Euclidean, 5, variance=True),
                                 self.train, self.calibrate)
        lo, hi = icr(self.test.x, 0.1)
        self.assertLess(hi - lo, 30.0)
示例#4
0
 def test_nc_type(self):
     nc_regr = AbsError(LinearRegressionLearner())
     nc_class = InverseProbability(LogisticRegressionLearner())
     InductiveRegressor(nc_regr)
     self.assertRaises(AssertionError, InductiveRegressor, nc_class)
     CrossRegressor(nc_regr, 5)
     self.assertRaises(AssertionError, CrossRegressor, nc_class, 5)
示例#5
0
    def test_abs_error_rf(self):
        icr = InductiveRegressor(
            AbsErrorRF(RandomForestRegressionLearner(),
                       RandomForestRegressor()), self.train, self.calibrate)
        lo, hi = icr(self.test.x, 0.1)
        self.assertLess(hi - lo, 30.0)

        icr = InductiveRegressor(
            AbsErrorRF(LinearRegressionLearner(), RandomForestRegressor()),
            self.train, self.calibrate)
        lo, hi = icr(self.test.x, 0.1)
        self.assertLess(hi - lo, 30.0)

        icr = InductiveRegressor(
            AbsErrorRF(RandomForestRegressionLearner(),
                       RandomForestRegressor()))
        r = run(icr, 0.1, CrossSampler(Table('housing'), 10))
        self.assertGreater(r.accuracy(), 0.85)
        print(r.median_range(), r.interdecile_mean())
示例#6
0
    def test_run(self):
        tab = Table('iris')
        cp = CrossClassifier(InverseProbability(LogisticRegressionLearner()),
                             5)
        r = run(cp, 0.1, RandomSampler(tab, 4, 1), rep=3)
        self.assertEqual(len(r.preds), 3 * 1 / 5 * len(tab))

        tab = Table('housing')
        cr = InductiveRegressor(AbsError(LinearRegressionLearner()))
        r = run(cr, 0.1, CrossSampler(tab, 4), rep=3)
        self.assertEqual(len(r.preds), 3 * len(tab))
示例#7
0
    def test_LOORegrNC(self):
        for incl in [False, True]:
            for rel in [False, True]:
                for neigh in ['fixed', 'variable']:
                    nc = LOORegrNC(LinearRegressionLearner,
                                   Euclidean,
                                   150,
                                   relative=rel,
                                   include=incl,
                                   neighbourhood=neigh)
                    icr = InductiveRegressor(nc, self.train, self.calibrate)
                    lo, hi = icr(self.test.x, 0.1)
                    print(lo, hi)
                    self.assertLess(hi - lo, 20.0)

        tab = Table('housing')
        icr = InductiveRegressor(
            LOORegrNC(LinearRegressionLearner, Euclidean, 150))
        r = run(icr, 0.1, CrossSampler(tab, 4))
        self.assertGreater(r.accuracy(), 0.85)
        self.assertLess(r.mean_range(), 15.0)
示例#8
0
 def test_validate_regression(self):
     tab = Table('housing')
     eps = 0.1
     correct, num, all = 0, 0, len(tab)
     for i in range(all):
         train, test = get_instance(tab, i)
         train, calibrate = split_data(shuffle_data(train), 2, 1)
         icr = InductiveRegressor(AbsError(LinearRegressionLearner()),
                                  train, calibrate)
         y_min, y_max = icr(test.x, eps)
         if y_min <= test.y <= y_max: correct += 1
         num += y_max - y_min
     self.assertAlmostEqual(correct / all, 1.0 - eps, delta=0.02)
示例#9
0
    def test_avg_error_knn(self):
        ncm = AvgErrorKNN(Euclidean)
        self.assertEqual(ncm.avg_abs_inv(6 / 5, [1, 2, 3, 4, 5]), (3, 3))
        for odd in [0, 1]:
            ys = np.random.uniform(0, 1, 10 + odd)
            nc = 0.4
            lo, hi = ncm.avg_abs_inv(nc, ys)
            self.assertGreater(ncm.avg_abs(lo - 0.001, ys), nc)
            self.assertLess(ncm.avg_abs(lo + 0.001, ys), nc)
            self.assertLess(ncm.avg_abs(hi - 0.001, ys), nc)
            self.assertGreater(ncm.avg_abs(hi + 0.001, ys), nc)

        icr = InductiveRegressor(AvgErrorKNN(Euclidean, 10), self.train,
                                 self.calibrate)
        lo, hi = icr(self.test.x, 0.1)
        self.assertLess(hi - lo, 30.0)

        r = run(InductiveRegressor(AvgErrorKNN(Euclidean, 10)),
                0.1,
                RandomSampler(Table("housing"), 2, 1),
                rep=10)
        self.assertFalse(any([np.isnan(w) for w in r.widths()]))
示例#10
0
 def test_validate_AbsErrorKNN(self):
     eps = 0.1
     correct, num, all = 0, 0, 0
     for it in range(10):
         train, test = split_data(shuffle_data(Table('housing')), 4, 1)
         train, calibrate = split_data(shuffle_data(train), 3, 1)
         icr = InductiveRegressor(
             AbsErrorKNN(Euclidean, 10, average=True, variance=True), train,
             calibrate)
         for i, inst in enumerate(test):
             y_min, y_max = icr(inst.x, eps)
             if y_min <= inst.y <= y_max: correct += 1
             num += y_max - y_min
             all += 1
         print(correct / all, num / all)
     self.assertAlmostEqual(correct / all, 1.0 - eps, delta=0.03)
示例#11
0
def evaluate_nc_dataset_eps(nc_str, dataset, eps, id):
    nc = eval(nc_str)
    tab = Table(dataset)
    res = None
    for rep in range(100):
        if isinstance(nc, ClassNC):
            r = run(InductiveClassifier(nc), eps, RandomSampler(tab, 2, 1))
        else:
            r = run(InductiveRegressor(nc), eps, RandomSampler(tab, 2, 1))
        if res is None:
            res = r
        else:
            res.concatenate(r)
        print(rep + 1, nc_str, dataset, eps)
        with open('results/qsar/%d.p' % id, 'wb') as f:
            pickle.dump((res, rep + 1), f)
示例#12
0
 def test_experimental(self):
     icr = InductiveRegressor(
         ExperimentalNC(RandomForestRegressor(n_estimators=20)), self.train,
         self.calibrate)
     r = run(icr, 0.1, CrossSampler(Table('housing'), 10))
     print(r.accuracy(), r.median_range())