def test_miss_majority(): x = np.zeros((50, 3)) y = x[:, -1] x[49] = 1 data = Table(x, y) res = LeaveOneOut()(data, [MajorityLearner()]) np.testing.assert_equal(res.predicted[0][:49], 0) x[49] = 0 res = LeaveOneOut()(data, [MajorityLearner()]) np.testing.assert_equal(res.predicted[0][:49], 0) x[25:] = 1 data = Table(x, y) res = LeaveOneOut()(data, [MajorityLearner()]) np.testing.assert_equal(res.predicted[0], 1 - data.Y[res.row_indices].flatten())
def test_miss_majority(): x = np.zeros((50, 3)) y = x[:, -1] x[49] = 1 data = Table(x, y) res = TestOnTrainingData()(data, [MajorityLearner()]) np.testing.assert_equal(res.predicted[0][:49], 0) x[49] = 0 res = TestOnTrainingData()(data, [MajorityLearner()]) np.testing.assert_equal(res.predicted[0][:49], 0) x[25:] = 1 y = x[:, -1] data = Table(x, y) res = TestOnTrainingData()(data, [MajorityLearner()]) np.testing.assert_equal(res.predicted[0], res.predicted[0][0])
def test_returns_random_class(self): iris = self.iris train = np.ones((150, ), dtype='bool') train[0] = False majority = MajorityLearner()(iris[train]) pred1 = majority(iris[0]) self.assertIn(pred1, [1, 2]) for i in range(1, 50): train[i] = train[50 + i] = train[100 + i] = False majority = MajorityLearner()(iris[train]) pred2 = majority(iris[0]) self.assertIn(pred2, [1, 2]) if pred1 != pred2: break else: self.fail("Majority always returns the same value.")
def test_probs(self): data = self.iris[30:130] learners = [MajorityLearner(), MajorityLearner()] results = TestOnTestData()(data, data, learners) self.assertEqual(results.predicted.shape, (2, len(data))) np.testing.assert_equal(results.predicted, np.ones((2, 100))) probs = results.probabilities self.assertTrue((probs[:, :, 0] < probs[:, :, 2]).all()) self.assertTrue((probs[:, :, 2] < probs[:, :, 1]).all()) train = self.iris[50:120] test = self.iris[:50] results = TestOnTestData()(train, test, learners) self.assertEqual(results.predicted.shape, (2, len(test))) np.testing.assert_equal(results.predicted, np.ones((2, 50))) probs = results.probabilities self.assertTrue((probs[:, :, 0] == 0).all())
def test_store_models(self): t = self.random_table learners = [NaiveBayesLearner(), MajorityLearner()] res = LeaveOneOut()(t, learners) self.assertIsNone(res.models) res = LeaveOneOut(store_models=True)(t, learners) self.check_models(res, learners, self.nrows)
def test_store_models(self): learners = [NaiveBayesLearner(), MajorityLearner()] res = CrossValidation(self.random_table, learners, k=5, store_models=False) self.assertIsNone(res.models) res = CrossValidation(self.random_table, learners, k=5, store_models=True) self.assertEqual(len(res.models), 5) self.check_models(res, learners, 5)
def test_miss_majority(): x = np.zeros((50, 3)) y = x[:, -1] x[49] = 1 data = Table.from_numpy(None, x, y) res = TestOnTrainingData()(data, [MajorityLearner()]) np.testing.assert_equal(res.predicted[0][:49], 0) with data.unlocked(data.X): x[49] = 0 res = TestOnTrainingData()(data, [MajorityLearner()]) np.testing.assert_equal(res.predicted[0][:49], 0) with data.unlocked(data.X): x[25:] = 1 data = Table.from_numpy(None, x, y) res = TestOnTrainingData()(data, [MajorityLearner()]) np.testing.assert_equal(res.predicted[0], res.predicted[0][0])
def test_store_models(self): t = self.random_table learners = [NaiveBayesLearner(), MajorityLearner()] res = TestOnTrainingData()(t, learners) self.assertIsNone(res.models) res = TestOnTrainingData(store_models=True)(t, learners) self.check_models(res, learners, 1)
def test_split_by_model(self): learners = [NaiveBayesLearner(), MajorityLearner()] res = CrossValidation(self.random_table, learners, k=5, store_models=True) for i, result in enumerate(res.split_by_model()): self.assertIsInstance(result, Results) self.assertTrue((result.predicted == res.predicted[i]).all()) self.assertTrue((result.probabilities == res.probabilities[i]).all()) self.assertEqual(len(result.models), 5) for model in result.models: self.assertIsInstance(model, learners[i].__returns__)
def test_store_models(self): data = self.random_table train = data[:int(self.nrows * .75)] test = data[int(self.nrows * .75):] learners = [NaiveBayesLearner(), MajorityLearner()] res = TestOnTestData()(train, test, learners) self.assertIsNone(res.models) res = TestOnTestData(store_models=True)(train, test, learners) self.check_models(res, learners, 1)
def test_store_data(self): data = self.random_table train = data[:int(self.nrows * .75)] test = data[int(self.nrows * .75):] learners = [MajorityLearner()] res = TestOnTestData()(train, test, learners) self.assertIsNone(res.data) res = TestOnTestData(store_data=True)(train, test, learners) self.assertIs(res.data, test)
def test_support(self): table = data.Table.from_file("iris") continuous = table.domain.variables[0] discrete = table.domain.variables[-1] imputer = impute.Model(MajorityLearner()) self.assertTrue(imputer.supports_variable(discrete)) self.assertFalse(imputer.supports_variable(continuous)) imputer = impute.Model(MeanLearner()) self.assertFalse(imputer.supports_variable(discrete)) self.assertTrue(imputer.supports_variable(continuous))
def test_auc_on_multiclass_data_returns_1d_array(self): titanic = Table("titanic")[:100] lenses = Table("lenses")[:100] majority = MajorityLearner() results = TestOnTrainingData(lenses, [majority]) auc = AUC(results) self.assertEqual(auc.ndim, 1) results = TestOnTrainingData(titanic, [majority]) auc = AUC(results) self.assertEqual(auc.ndim, 1)
def test_majority(self): nrows = 1000 ncols = 10 x = np.random.random_integers(1, 3, (nrows, ncols)) y = np.random.random_integers(1, 3, (nrows, 1)) // 2 t = Table(x, y) learn = MajorityLearner() clf = learn(t) x2 = np.random.random_integers(1, 3, (nrows, ncols)) y2 = clf(x2) self.assertTrue((y2 == 1).all())
def test_auc_on_multiclass_data_returns_1d_array(self): titanic = Table('titanic')[:100] lenses = Table(test_filename('datasets/lenses.tab'))[:100] majority = MajorityLearner() results = TestOnTrainingData(lenses, [majority]) auc = AUC(results) self.assertEqual(auc.ndim, 1) results = TestOnTrainingData(titanic, [majority]) auc = AUC(results) self.assertEqual(auc.ndim, 1)
def test_store_data(self): nrows, ncols = 50, 10 data = random_data(nrows, ncols) train = data[:80] test = data[80:] learners = [MajorityLearner()] res = TestOnTestData(train, test, learners) self.assertIsNone(res.data) res = TestOnTestData(train, test, learners, store_data=True) self.assertIs(res.data, test)
def test_multiple_learners(self): def check_evres_names(expeced): res = self.get_output(self.widget.Outputs.evaluations_results) self.assertSequenceEqual(res.learner_names, expeced) data = Table("iris")[::15].copy() m1 = MajorityLearner() m1.name = "M1" m2 = MajorityLearner() m2.name = "M2" self.send_signal(self.widget.Inputs.train_data, data) self.send_signal(self.widget.Inputs.learner, m1, 1) self.send_signal(self.widget.Inputs.learner, m2, 2) res = self.get_output(self.widget.Outputs.evaluations_results) np.testing.assert_equal(res.probabilities[0], res.probabilities[1]) check_evres_names(["M1", "M2"]) self.send_signal(self.widget.Inputs.learner, None, 1) check_evres_names(["M2"]) self.send_signal(self.widget.Inputs.learner, m1, 1) check_evres_names(["M1", "M2"]) self.send_signal(self.widget.Inputs.learner, self.widget.Inputs.learner.closing_sentinel, 1) check_evres_names(["M2"]) self.send_signal(self.widget.Inputs.learner, m1, 1) check_evres_names(["M2", "M1"])
def test_mismatching_targets(self): titanic = Table("titanic") majority_titanic = MajorityLearner()(titanic) majority_iris = MajorityLearner()(self.iris) self.send_signal("Data", self.iris) self.send_signal("Predictors", majority_iris, 1) self.send_signal("Predictors", majority_titanic, 2) self.assertTrue(self.widget.Error.predictor_failed.is_shown()) output = self.get_output("Predictions") self.assertEqual(len(output.domain.metas), 4) self.send_signal("Predictors", None, 1) self.assertTrue(self.widget.Error.predictor_failed.is_shown()) self.assertIsNone(self.get_output("Predictions")) self.send_signal("Data", None) self.assertFalse(self.widget.Error.predictor_failed.is_shown()) self.assertIsNone(self.get_output("Predictions")) self.send_signal("Predictors", None, 2) self.assertFalse(self.widget.Error.predictor_failed.is_shown()) self.assertIsNone(self.get_output("Predictions")) self.send_signal("Predictors", majority_titanic, 2) self.assertFalse(self.widget.Error.predictor_failed.is_shown()) self.assertIsNone(self.get_output("Predictions")) self.send_signal("Data", self.iris) self.assertTrue(self.widget.Error.predictor_failed.is_shown()) self.assertIsNone(self.get_output("Predictions")) self.send_signal("Predictors", majority_iris, 2) self.assertFalse(self.widget.Error.predictor_failed.is_shown()) self.assertEqual(len(output.domain.metas), 4) self.send_signal("Predictors", majority_iris, 1) self.send_signal("Predictors", majority_titanic, 3) output = self.get_output("Predictions") self.assertEqual(len(output.domain.metas), 8)
def test_weights(self): nrows = 100 ncols = 10 x = np.random.random_integers(1, 3, (nrows, ncols)) y = np.array(70 * [0] + 30 * [1]).reshape((nrows, 1)) heavy_class = 1 w = (y == heavy_class) * 2 + 1 t = Table(x, y, W=w) learn = MajorityLearner() clf = learn(t) y2 = clf(x) self.assertTrue((y2 == heavy_class).all())
def test_missing(self): iris = Table('iris') learn = MajorityLearner() for e in iris[:len(iris) // 2:2]: e.set_class("?") clf = learn(iris) y = clf(iris) self.assertTrue((y == 2).all()) for e in iris: e.set_class("?") clf = learn(iris) y = clf(iris) self.assertEqual(y.all(), 1)
def test_memory_error(self): """ Handling memory error. GH-2316 """ data = Table("iris")[::15] self.send_signal(self.widget.Inputs.train_data, data) self.assertFalse(self.widget.Error.memory_error.is_shown()) with unittest.mock.patch( "Orange.evaluation.testing.Results.get_augmented_data", side_effect=MemoryError): self.send_signal(self.widget.Inputs.learner, MajorityLearner(), 0, wait=5000) self.assertTrue(self.widget.Error.memory_error.is_shown())
def test_store_models(self): nrows, ncols = 100, 10 t = random_data(nrows, ncols) learners = [NaiveBayesLearner(), MajorityLearner()] res = CrossValidation(t, learners, k=5) self.assertIsNone(res.models) res = CrossValidation(t, learners, k=5, store_models=True) self.assertEqual(len(res.models), 5) for models in res.models: self.assertEqual(len(models), 2) self.assertIsInstance(models[0], NaiveBayesModel) self.assertIsInstance(models[1], ConstantModel)
def test_store_models(self): nrows, ncols = 50, 10 t = random_data(nrows, ncols) learners = [NaiveBayesLearner(), MajorityLearner()] res = TestOnTrainingData(t, learners) self.assertIsNone(res.models) res = TestOnTrainingData(t, learners, store_models=True) self.assertEqual(len(res.models), 1) for models in res.models: self.assertEqual(len(models), 2) self.assertIsInstance(models[0], NaiveBayesModel) self.assertIsInstance(models[1], ConstantModel)
def test_no_stratification(self): w = self.widget w.cv_stratified = True self._test_scores( Table("zoo"), None, MajorityLearner(), OWTestAndScore.KFold, 2) self.assertTrue(w.Warning.cant_stratify.is_shown()) w.controls.cv_stratified.click() self.assertFalse(w.Warning.cant_stratify.is_shown()) w.controls.cv_stratified.click() self.assertTrue(w.Warning.cant_stratify.is_shown()) w.controls.n_folds.setCurrentIndex(0) w.controls.n_folds.activated[int].emit(0) self.assertFalse(w.Warning.cant_stratify.is_shown()) w.controls.n_folds.setCurrentIndex(2) w.controls.n_folds.activated[int].emit(2) self.assertTrue(w.Warning.cant_stratify.is_shown()) self._test_scores( Table("iris"), None, MajorityLearner(), OWTestAndScore.KFold, 2) self.assertFalse(w.Warning.cant_stratify.is_shown()) self._test_scores( Table("zoo"), None, MajorityLearner(), OWTestAndScore.KFold, 2) self.assertTrue(w.Warning.cant_stratify.is_shown()) self._test_scores( Table("housing"), None, MeanLearner(), OWTestAndScore.KFold, 2) self.assertFalse(w.Warning.cant_stratify.is_shown()) self.assertTrue(w.Information.cant_stratify_numeric.is_shown()) w.controls.cv_stratified.click() self.assertFalse(w.Warning.cant_stratify.is_shown())
def test_obsolete_call_constructor(self, validation_call): class MockValidation(Validation): args = kwargs = None def __init__(self, *args, **kwargs): super().__init__() MockValidation.args = args MockValidation.kwargs = kwargs def get_indices(self, data): pass data = self.data learners = [MajorityLearner(), MajorityLearner()] kwargs = dict(foo=42, store_data=43, store_models=44, callback=45, n_jobs=46) self.assertWarns(DeprecationWarning, MockValidation, data, learners=learners, **kwargs) self.assertEqual(MockValidation.args, ()) kwargs.pop( "n_jobs" ) # do not pass n_jobs and callback from __new__ to __init__ kwargs.pop("callback") self.assertEqual(MockValidation.kwargs, kwargs) cargs, ckwargs = validation_call.call_args self.assertEqual(len(cargs), 1) self.assertIs(cargs[0], data) self.assertIs(ckwargs["learners"], learners) self.assertEqual(ckwargs["callback"], 45)
def test_one_class_value(self): """ Data with a class with one value causes widget to crash when that value is selected. GH-2351 """ table = Table.from_list( Domain([ContinuousVariable("a"), ContinuousVariable("b")], [DiscreteVariable("c", values=("y", ))]), list(zip([42.48, 16.84, 15.23, 23.8], [1., 2., 3., 4.], "yyyy"))) self.widget.n_folds = 0 self.assertFalse(self.widget.Error.train_data_error.is_shown()) self.send_signal("Data", table) self.send_signal("Learner", MajorityLearner(), 0, wait=1000) self.assertTrue(self.widget.Error.train_data_error.is_shown())
def test_missing(self): iris = Table('iris') learn = MajorityLearner() sub_table = iris[:len(iris) // 2:2] with sub_table.unlocked(): for e in sub_table: e.set_class("?") clf = learn(iris) y = clf(iris) self.assertTrue((y == 2).all()) with iris.unlocked(): for e in iris: e.set_class("?") clf = learn(iris) y = clf(iris) self.assertEqual(y.all(), 1)
def test_replacement(self): from Orange.classification import MajorityLearner, SimpleTreeLearner from Orange.regression import MeanLearner nan = np.nan X = [ [1.0, nan, 0.0], [2.0, 1.0, 3.0], [nan, nan, nan] ] unknowns = np.isnan(X) domain = data.Domain( (data.DiscreteVariable("A", values=["0", "1", "2"]), data.ContinuousVariable("B"), data.ContinuousVariable("C")) ) table = data.Table.from_numpy(domain, np.array(X)) v = impute.Model(MajorityLearner())(table, domain[0]) self.assertTrue(np.all(np.isfinite(v.compute_value(table)))) self.assertTrue(np.all(v.compute_value(table) == [1., 2., 1.]) or np.all(v.compute_value(table) == [1., 2., 2.])) v = impute.Model(MeanLearner())(table, domain[1]) self.assertTrue(np.all(np.isfinite(v.compute_value(table)))) self.assertTrue(np.all(v.compute_value(table) == [1., 1., 1.])) imputer = preprocess.Impute(impute.Model(SimpleTreeLearner())) itable = imputer(table) # Original data should keep unknowns self.assertTrue(np.all(np.isnan(table.X) == unknowns)) self.assertTrue(np.all(itable.X[~unknowns] == table.X[~unknowns])) Aimp = itable.domain["A"].compute_value self.assertIsInstance(Aimp, impute.ReplaceUnknownsModel) col = Aimp(table) self.assertEqual(col.shape, (len(table),)) self.assertTrue(np.all(np.isfinite(col))) v = Aimp(table[-1]) self.assertEqual(v.shape, (1,)) self.assertTrue(np.all(np.isfinite(v)))
def test_basic(self): data = Table("iris")[::3] self.send_signal("Data", data) self.send_signal("Learner", MajorityLearner(), 0) res = self.get_output("Evaluation Results") self.assertIsInstance(res, Results) self.assertIsNotNone(res.domain) self.assertIsNotNone(res.data) self.assertIsNotNone(res.probabilities) self.send_signal("Learner", None, 0) data = Table("housing")[::10] self.send_signal("Data", data) self.send_signal("Learner", MeanLearner(), 0) res = self.get_output("Evaluation Results") self.assertIsInstance(res, Results) self.assertIsNotNone(res.domain) self.assertIsNotNone(res.data)
def test_replacement(self): nan = np.nan X = [[1.0, nan, 0.0], [2.0, 1.0, 3.0], [nan, nan, nan]] unknowns = np.isnan(X) domain = data.Domain( (data.DiscreteVariable("A", values=("0", "1", "2")), data.ContinuousVariable("B"), data.ContinuousVariable("C")), # the class is here to ensure the backmapper in model does not # run and raise exception data.DiscreteVariable("Z", values=("P", "M"))) table = data.Table.from_numpy(domain, np.array(X), [ 0, ] * 3) v = impute.Model(MajorityLearner())(table, domain[0]) self.assertTrue(np.all(np.isfinite(v.compute_value(table)))) self.assertTrue( np.all(v.compute_value(table) == [1., 2., 1.]) or np.all(v.compute_value(table) == [1., 2., 2.])) v = impute.Model(MeanLearner())(table, domain[1]) self.assertTrue(np.all(np.isfinite(v.compute_value(table)))) self.assertTrue(np.all(v.compute_value(table) == [1., 1., 1.])) imputer = preprocess.Impute(impute.Model(SimpleTreeLearner())) itable = imputer(table) # Original data should keep unknowns self.assertTrue(np.all(np.isnan(table.X) == unknowns)) self.assertTrue(np.all(itable.X[~unknowns] == table.X[~unknowns])) Aimp = itable.domain["A"].compute_value self.assertIsInstance(Aimp, impute.ReplaceUnknownsModel) col = Aimp(table) self.assertEqual(col.shape, (len(table), )) self.assertTrue(np.all(np.isfinite(col))) v = Aimp(table[-1]) self.assertEqual(v.shape, (1, )) self.assertTrue(np.all(np.isfinite(v)))