def test_nonexchangeability(self): tab = Table(os.path.join(os.path.dirname(__file__), '../data/usps.tab')) train, test = split_data(tab, 7291, 2007) test = test[:200] train, calibrate = split_data(train, 3, 1) icp = InductiveClassifier( InverseProbability(LogisticRegressionLearner()), train, calibrate) err = [inst.get_class() not in icp(inst.x, 0.1) for inst in test] self.assertGreater(sum(err) / len(test), 0.13)
def test_validate_AbsErrorKNN(self): eps = 0.1 correct, num, all = 0, 0, 0 for it in range(10): train, test = split_data(shuffle_data(Table('housing')), 4, 1) train, calibrate = split_data(shuffle_data(train), 3, 1) icr = InductiveRegressor( AbsErrorKNN(Euclidean, 10, average=True, variance=True), train, calibrate) for i, inst in enumerate(test): y_min, y_max = icr(inst.x, eps) if y_min <= inst.y <= y_max: correct += 1 num += y_max - y_min all += 1 print(correct / all, num / all) self.assertAlmostEqual(correct / all, 1.0 - eps, delta=0.03)
def test_validate_regression(self): tab = Table('housing') eps = 0.1 correct, num, all = 0, 0, len(tab) for i in range(all): train, test = get_instance(tab, i) train, calibrate = split_data(shuffle_data(train), 2, 1) icr = InductiveRegressor(AbsError(LinearRegressionLearner()), train, calibrate) y_min, y_max = icr(test.x, eps) if y_min <= test.y <= y_max: correct += 1 num += y_max - y_min self.assertAlmostEqual(correct / all, 1.0 - eps, delta=0.02)
def test_validate_inductive(self): eps = 0.1 correct, num, all = 0, 0, len(self.tab) for i in range(all): train, test = get_instance(self.tab, i) train, calibrate = split_data(shuffle_data(train), 2, 1) icp = InductiveClassifier( InverseProbability(LogisticRegressionLearner()), train, calibrate) pred = icp(test.x, eps) if test.get_class() in pred: correct += 1 num += len(pred) self.assertAlmostEqual(correct / all, 1.0 - eps, delta=0.01)
def setUp(self): self.tab = Table('iris') train, self.test = get_instance(self.tab, 0) self.train, self.calibrate = split_data(shuffle_data(train), 2, 1)
def setUp(self): self.train, self.test = get_instance(Table('housing'), 0) self.train, self.calibrate = split_data(shuffle_data(self.train), 2, 1)