def test_error_model(self): for loo in [False, True]: icr = InductiveRegressor( ErrorModelNC(LinearRegressionLearner(), LinearRegressionLearner(), loo=loo), self.train, self.calibrate) lo, hi = icr(self.test.x, 0.1) self.assertLess(hi - lo, 30.0) icr = InductiveRegressor(AbsError(RandomForestRegressionLearner())) r = run(icr, 0.1, CrossSampler(Table('housing'), 20)) self.assertGreater(r.accuracy(), 0.85) print(r.accuracy(), r.median_range(), r.interdecile_mean()) icr = InductiveRegressor( ErrorModelNC(RandomForestRegressionLearner(), LinearRegressionLearner())) r = run(icr, 0.1, CrossSampler(Table('housing'), 20)) self.assertGreater(r.accuracy(), 0.85) print(r.accuracy(), r.median_range(), r.interdecile_mean()) icr = InductiveRegressor( ErrorModelNC(RandomForestRegressionLearner(), LinearRegressionLearner(), loo=True)) r = run(icr, 0.1, CrossSampler(Table('housing'), 20)) self.assertGreater(r.accuracy(), 0.85) print(r.accuracy(), r.median_range(), r.interdecile_mean())
def test_cross(self): folds = 7 s = CrossSampler(self.data, k=folds) l = [(len(train), len(test)) for train, test in s] self.assertEqual(len(l), folds) self.assertTrue(all(a + b == len(self.data) for a, b in l)) t = [b for a, b in l] self.assertLessEqual(max(t) - min(t), 1) s = CrossSampler(self.data, k=folds) ids = frozenset( frozenset(inst.id for inst in test) for train, test in s) self.assertEqual(len(ids), folds)
def evaluate_ncs(tab, cp, nc_strs, id): res = {} for nc_str in nc_strs: nc = eval(nc_str) r = run(cp(nc), 0.1, CrossSampler(tab, 5), rep=5) res[nc_str] = r print(id, nc_str) with open('results/%s.p' % id, 'wb') as f: pickle.dump(res, f)
def test_abs_error_normalized(self): tab = Table('housing') normalizer = Normalize(zero_based=True, norm_type=Normalize.NormalizeBySpan) tab = normalizer(tab) icr = InductiveRegressor(AbsError(LinearRegressionLearner())) icr_knn = InductiveRegressor(AbsError(KNNRegressionLearner(4))) icr_norm = InductiveRegressor( AbsErrorNormalized(KNNRegressionLearner(4), Euclidean, 4, exp=False)) icr_norm_exp = InductiveRegressor( AbsErrorNormalized(KNNRegressionLearner(4), Euclidean, 4, exp=True)) icr_norm_rf = InductiveRegressor( AbsErrorNormalized(KNNRegressionLearner(4), Euclidean, 4, rf=RandomForestRegressor())) r, r_knn, r_norm, r_norm_exp, r_norm_rf = ResultsRegr(), ResultsRegr( ), ResultsRegr(), ResultsRegr(), ResultsRegr() eps = 0.05 for rep in range(10): for train, test in CrossSampler(tab, 10): train, calibrate = next( RandomSampler(train, len(train) - 100, 100)) r.concatenate(run_train_test(icr, eps, train, test, calibrate)) r_knn.concatenate( run_train_test(icr_knn, eps, train, test, calibrate)) r_norm.concatenate( run_train_test(icr_norm, eps, train, test, calibrate)) r_norm_exp.concatenate( run_train_test(icr_norm_exp, eps, train, test, calibrate)) r_norm_rf.concatenate( run_train_test(icr_norm_rf, eps, train, test, calibrate)) print(r.median_range(), r.interdecile_mean(), 1 - r.accuracy()) print(r_knn.median_range(), r_knn.interdecile_mean(), 1 - r_knn.accuracy()) print(r_norm.median_range(), r_norm.interdecile_mean(), 1 - r_norm.accuracy()) print(r_norm_exp.median_range(), r_norm_exp.interdecile_mean(), 1 - r_norm_exp.accuracy()) print(r_norm_rf.median_range(), r_norm_rf.interdecile_mean(), 1 - r_norm_rf.accuracy()) self.assertGreater(r.accuracy(), 1 - eps - 0.03) self.assertGreater(r_knn.accuracy(), 1 - eps - 0.03) self.assertGreater(r_norm.accuracy(), 1 - eps - 0.03) self.assertGreater(r_norm_exp.accuracy(), 1 - eps - 0.03) self.assertGreater(r_norm_rf.accuracy(), 1 - eps - 0.03) """
def test_run(self): tab = Table('iris') cp = CrossClassifier(InverseProbability(LogisticRegressionLearner()), 5) r = run(cp, 0.1, RandomSampler(tab, 4, 1), rep=3) self.assertEqual(len(r.preds), 3 * 1 / 5 * len(tab)) tab = Table('housing') cr = InductiveRegressor(AbsError(LinearRegressionLearner())) r = run(cr, 0.1, CrossSampler(tab, 4), rep=3) self.assertEqual(len(r.preds), 3 * len(tab))
def test_repeat(self): rep = 5 s = RandomSampler(self.data, 3, 2) ids = frozenset( frozenset(inst.id for inst in test) for train, test in s.repeat(5)) self.assertEqual(len(ids), 5) s = CrossSampler(self.data, 3) ids = frozenset( frozenset(inst.id for inst in test) for train, test in s.repeat(5)) self.assertEqual(len(ids), 15)
def evaluate_datasets(datasets, cp, nc_str, id): res = {} for dataset in datasets: dataset_id = dataset.split('/')[-1].split('.')[0] imp = Orange.preprocess.Impute() rc = Orange.preprocess.preprocess.RemoveConstant() tab = rc(imp(Table(dataset))) nc = eval(nc_str) r = run(cp(nc), 0.1, CrossSampler(tab, 5), rep=5) res[dataset_id] = r print(nc_str, dataset_id) print(nc_str.upper()) with open('results/nc/%d.p' % id, 'wb') as f: pickle.dump(res, f)
def test_abs_error_rf(self): icr = InductiveRegressor( AbsErrorRF(RandomForestRegressionLearner(), RandomForestRegressor()), self.train, self.calibrate) lo, hi = icr(self.test.x, 0.1) self.assertLess(hi - lo, 30.0) icr = InductiveRegressor( AbsErrorRF(LinearRegressionLearner(), RandomForestRegressor()), self.train, self.calibrate) lo, hi = icr(self.test.x, 0.1) self.assertLess(hi - lo, 30.0) icr = InductiveRegressor( AbsErrorRF(RandomForestRegressionLearner(), RandomForestRegressor())) r = run(icr, 0.1, CrossSampler(Table('housing'), 10)) self.assertGreater(r.accuracy(), 0.85) print(r.median_range(), r.interdecile_mean())
def test_LOOClassNC(self): for incl in [False, True]: for rel in [False, True]: for neigh in ['fixed', 'variable']: nc = LOOClassNC(NaiveBayesLearner(), Euclidean, 20, relative=rel, include=incl, neighbourhood=neigh) icp = InductiveClassifier(nc, self.train, self.calibrate) pred = icp(self.test.x, 0.1) print(pred) self.assertEqual(pred, ['Iris-setosa']) icp = InductiveClassifier( LOOClassNC(NaiveBayesLearner(), Euclidean, 20)) r = run(icp, 0.1, CrossSampler(Table('iris'), 4)) self.assertGreater(r.accuracy(), 0.85) self.assertGreater(r.singleton_criterion(), 0.8)
def test_LOORegrNC(self): for incl in [False, True]: for rel in [False, True]: for neigh in ['fixed', 'variable']: nc = LOORegrNC(LinearRegressionLearner, Euclidean, 150, relative=rel, include=incl, neighbourhood=neigh) icr = InductiveRegressor(nc, self.train, self.calibrate) lo, hi = icr(self.test.x, 0.1) print(lo, hi) self.assertLess(hi - lo, 20.0) tab = Table('housing') icr = InductiveRegressor( LOORegrNC(LinearRegressionLearner, Euclidean, 150)) r = run(icr, 0.1, CrossSampler(tab, 4)) self.assertGreater(r.accuracy(), 0.85) self.assertLess(r.mean_range(), 15.0)
def test_experimental(self): icr = InductiveRegressor( ExperimentalNC(RandomForestRegressor(n_estimators=20)), self.train, self.calibrate) r = run(icr, 0.1, CrossSampler(Table('housing'), 10)) print(r.accuracy(), r.median_range())