def test_adaboost_base_estimator(self): np.random.seed(0) stump_estimator = TreeLearner(max_depth=1) tree_estimator = TreeLearner() stump = SklAdaBoostLearner(base_estimator=stump_estimator) tree = SklAdaBoostLearner(base_estimator=tree_estimator) results = CrossValidation(self.iris, [stump, tree], k=4) ca = CA(results) self.assertLess(ca[0], ca[1])
def test_set_learner(self): """ Test if learner is set correctly """ w = self.widget learner = TreeLearner() # check if empty self.assertEqual(w.learner_other, None) self.assertTrue(isinstance(w.learner, LogisticRegressionLearner)) self.assertTrue(isinstance(w.learner, w.LEARNER)) self.assertEqual(type(self.get_output(w.Outputs.learner)), type(LogisticRegressionLearner())) self.send_signal(w.Inputs.learner, learner) # check if learners set correctly self.assertEqual(w.learner_other, learner) self.assertEqual(type(w.learner), type(learner)) self.assertEqual(type(self.get_output(w.Outputs.learner)), type(learner)) # after learner is removed there should be LEARNER used self.send_signal(w.Inputs.learner, None) self.assertEqual(w.learner_other, None) self.assertTrue(isinstance(w.learner, LogisticRegressionLearner)) self.assertTrue(isinstance(w.learner, w.LEARNER)) self.assertEqual(type(self.get_output(w.Outputs.learner)), type(LogisticRegressionLearner())) # set it again just in case something goes wrong learner = RandomForestLearner() self.send_signal(w.Inputs.learner, learner) self.assertEqual(w.learner_other, learner) self.assertEqual(type(w.learner), type(learner)) self.assertEqual(type(self.get_output(w.Outputs.learner)), type(learner)) # change learner this time not from None learner = TreeLearner() self.send_signal(w.Inputs.learner, learner) self.assertEqual(w.learner_other, learner) self.assertEqual(type(w.learner), type(learner)) self.assertEqual(type(self.get_output(w.Outputs.learner)), type(learner))
def add_main_layout(self): box = gui.widgetBox(self.controlArea, "Parameters") self.base_estimator = TreeLearner() self.base_label = gui.label( box, self, "Base estimator: " + self.base_estimator.name) gui.spin(box, self, "n_estimators", 1, 100, label="Number of estimators:", alignment=Qt.AlignRight, callback=self.settings_changed) gui.doubleSpin(box, self, "learning_rate", 1e-5, 1.0, 1e-5, label="Learning rate:", decimals=5, alignment=Qt.AlignRight, controlWidth=90, callback=self.settings_changed) self.add_specific_parameters(box)
def test_uses_preprocessors(self): iris = Table('iris') mock_preprocessor = Mock(return_value=iris) tree = TreeLearner(preprocessors=[mock_preprocessor]) tree(iris) mock_preprocessor.assert_called_with(iris)
def test_raise_no_classifier_error(self): """ Regression learner must raise error """ w = self.widget # linear regression learner is regression - should raise learner = LinearRegressionLearner() self.send_signal(w.Inputs.learner, learner) self.assertTrue(w.Error.no_classifier.is_shown()) # make it empty to test if error disappear self.send_signal(w.Inputs.learner, None) self.assertFalse(w.Error.no_classifier.is_shown()) # test with some other learners learner = LogisticRegressionLearner() self.send_signal(w.Inputs.learner, learner) self.assertFalse(w.Error.no_classifier.is_shown()) learner = TreeLearner() self.send_signal(w.Inputs.learner, learner) self.assertFalse(w.Error.no_classifier.is_shown()) learner = RandomForestLearner() self.send_signal(w.Inputs.learner, learner) self.assertFalse(w.Error.no_classifier.is_shown()) learner = SVMLearner() self.send_signal(w.Inputs.learner, learner) self.assertFalse(w.Error.no_classifier.is_shown())
def test_missing_values(self): heart_disease = Table("heart_disease.tab") learner = TreeLearner() model = learner(heart_disease) shap_values, _, _, _ = compute_shap_values(model, heart_disease, heart_disease) self.assertEqual(len(shap_values), 2) self.assertTupleEqual(shap_values[0].shape, heart_disease.X.shape) self.assertTupleEqual(shap_values[1].shape, heart_disease.X.shape)
def setUpClass(cls): super().setUpClass() WidgetOutputsTestMixin.init(cls) tree = TreeLearner() cls.model = tree(cls.data) cls.model.instances = cls.data cls.signal_name = "Tree" cls.signal_data = cls.model
def setUpClass(cls): super().setUpClass() bayes = NaiveBayesLearner() tree = TreeLearner() iris = Table("iris") titanic = Table("titanic") common = dict(k=3, store_data=True) cls.results_1_iris = CrossValidation(iris, [bayes], **common) cls.results_2_iris = CrossValidation(iris, [bayes, tree], **common) cls.results_2_titanic = CrossValidation(titanic, [bayes, tree], **common)
def test_input_learner(self): """Check if base learner properly changes with learner on the input""" max_depth = 2 default_base_est = self.widget.base_estimator self.assertIsInstance(default_base_est, TreeLearner) self.assertIsNone(default_base_est.params.get("max_depth")) self.send_signal("Learner", TreeLearner(max_depth=max_depth)) self.assertEqual(self.widget.base_estimator.params.get("max_depth"), max_depth) self.widget.apply_button.button.click() output_base_est = self.get_output("Learner").params.get("base_estimator") self.assertEqual(output_base_est.max_depth, max_depth)
def setUpClass(cls): super().setUpClass() WidgetOutputsTestMixin.init(cls) tree = TreeLearner() cls.model = tree(cls.data) cls.model.instances = cls.data cls.signal_name = "Tree" cls.signal_data = cls.model # Load a dataset that contains two variables with the same entropy data_same_entropy = Table( path.join(path.dirname(path.dirname(path.dirname(__file__))), "tests", "datasets", "same_entropy.tab")) cls.data_same_entropy = tree(data_same_entropy) cls.data_same_entropy.instances = data_same_entropy vara = DiscreteVariable("aaa", values=("e", "f", "g")) root = DiscreteNode(vara, 0, np.array([42, 8])) root.subset = np.arange(50) varb = DiscreteVariable("bbb", values=tuple("ijkl")) child0 = MappedDiscreteNode(varb, 1, np.array([0, 1, 0, 0]), (38, 5)) child0.subset = np.arange(16) child1 = Node(None, 0, (13, 3)) child1.subset = np.arange(16, 30) varc = ContinuousVariable("ccc") child2 = NumericNode(varc, 2, 42, (78, 12)) child2.subset = np.arange(30, 50) root.children = (child0, child1, child2) child00 = Node(None, 0, (15, 4)) child00.subset = np.arange(10) child01 = Node(None, 0, (10, 5)) child01.subset = np.arange(10, 16) child0.children = (child00, child01) child20 = Node(None, 0, (90, 4)) child20.subset = np.arange(30, 35) child21 = Node(None, 0, (70, 9)) child21.subset = np.arange(35, 50) child2.children = (child20, child21) domain = Domain([vara, varb, varc], ContinuousVariable("y")) t = [[i, j, k] for i in range(3) for j in range(4) for k in (40, 44)] x = np.array((t * 3)[:50]) data = Table.from_numpy(domain, x, np.arange(len(x))) cls.tree = TreeModel(data, root)
def setUpClass(cls): super().setUpClass() WidgetOutputsTestMixin.init(cls) tree = TreeLearner() cls.model = tree(cls.data) cls.model.instances = cls.data cls.signal_name = "Tree" cls.signal_data = cls.model # Load a dataset that contains two variables with the same entropy data_same_entropy = Table( path.join(path.dirname(path.dirname(path.dirname(__file__))), "tests", "datasets", "same_entropy.tab")) cls.data_same_entropy = tree(data_same_entropy) cls.data_same_entropy.instances = data_same_entropy
def setUpClass(cls): super().setUpClass() WidgetOutputsTestMixin.init(cls) bayes = NaiveBayesLearner() tree = TreeLearner() # `data` is defined in WidgetOutputsTestMixin, pylint: disable=no-member cls.iris = cls.data titanic = Table("titanic") cv = CrossValidation(k=3, store_data=True) cls.results_1_iris = cv(cls.iris, [bayes]) cls.results_2_iris = cv(cls.iris, [bayes, tree]) cls.results_2_titanic = cv(titanic, [bayes, tree]) cls.signal_name = "Evaluation Results" cls.signal_data = cls.results_1_iris cls.same_input_output_domain = False
def setUpClass(cls): super().setUpClass() WidgetOutputsTestMixin.init(cls) bayes = NaiveBayesLearner() tree = TreeLearner() iris = cls.data titanic = Table("titanic") common = dict(k=3, store_data=True) cls.results_1_iris = CrossValidation(iris, [bayes], **common) cls.results_2_iris = CrossValidation(iris, [bayes, tree], **common) cls.results_2_titanic = CrossValidation(titanic, [bayes, tree], **common) cls.signal_name = "Evaluation Results" cls.signal_data = cls.results_1_iris cls.same_input_output_domain = False
def test_orange_models(self): data = self.heart n_repeats = self.n_repeats model = NaiveBayesLearner()(data) res = permutation_feature_importance(model, data, CA(), n_repeats) shape = len(data.domain.attributes), n_repeats self.assertEqual(res[0].shape, shape) self.assertEqual(res[1], [a.name for a in data.domain.attributes]) data = self.iris model = TreeLearner()(data) res = permutation_feature_importance(model, data, AUC(), n_repeats) shape = len(data.domain.attributes), n_repeats self.assertEqual(res[0].shape, shape) self.assertEqual(res[1], [a.name for a in data.domain.attributes]) data = self.housing model = TreeRegressionLearner()(data) res = permutation_feature_importance(model, data, MSE(), n_repeats) shape = len(data.domain.attributes), n_repeats self.assertEqual(res[0].shape, (shape)) self.assertEqual(res[1], [a.name for a in data.domain.attributes])
def test_set_learner(self): """ Test if learner is set correctly """ w = self.widget learner = TreeLearner() self.send_signal(w.Inputs.learner, learner) # check if learners set correctly self.assertEqual(w.learner_other, learner) self.assertEqual(type(w.learner), type(learner)) self.assertEqual(type(self.get_output(w.Outputs.learner)), type(learner)) # after learner is removed there should be LEARNER used self.send_signal(w.Inputs.learner, None) self.assertEqual(w.learner_other, None) self.assertTrue(isinstance(w.learner, LogisticRegressionLearner)) self.assertTrue(isinstance(w.learner, w.LEARNER)) self.assertEqual(type(self.get_output(w.Outputs.learner)), type(LogisticRegressionLearner()))
def test_reprs(self): lr = LogisticRegressionLearner(tol=0.0002) m = MajorityLearner() nb = NaiveBayesLearner() rf = RandomForestLearner(bootstrap=False, n_jobs=3) st = SimpleTreeLearner(seed=1, bootstrap=True) sm = SoftmaxRegressionLearner() svm = SVMLearner(shrinking=False) lsvm = LinearSVMLearner(tol=0.022, dual=False) nsvm = NuSVMLearner(tol=0.003, cache_size=190) osvm = OneClassSVMLearner(degree=2) tl = TreeLearner(max_depth=3, min_samples_split=1) knn = KNNLearner(n_neighbors=4) el = EllipticEnvelopeLearner(store_precision=False) srf = SimpleRandomForestLearner(n_estimators=20) learners = [lr, m, nb, rf, st, sm, svm, lsvm, nsvm, osvm, tl, knn, el, srf] for l in learners: repr_str = repr(l) new_l = eval(repr_str) self.assertEqual(repr(new_l), repr_str)
def test_tree(self): tree = TreeLearner() res = CrossValidation(self.iris, [tree], k=2) self.assertGreater(AUC(res)[0], 0.8) self.assertLess(AUC(res)[0], 1.)
def test_init_learner(self): """ Test init """ w = self.widget learner = TreeLearner() # check if empty self.assertTrue(isinstance(w.learner, LogisticRegressionLearner)) self.assertTrue(isinstance(w.learner, w.LEARNER)) self.assertTrue( reduce(lambda x, y: x or isinstance(y, w.default_preprocessor), w.learner.preprocessors, False)) self.send_signal(w.Inputs.learner, learner) # check if learners set correctly self.assertEqual(type(w.learner), type(learner)) # after learner is removed there should be LEARNER used self.send_signal(w.Inputs.learner, None) self.assertTrue(isinstance(w.learner, LogisticRegressionLearner)) self.assertTrue(isinstance(w.learner, w.LEARNER)) self.assertTrue( reduce(lambda x, y: x or isinstance(y, w.default_preprocessor), w.learner.preprocessors, False)) # set it again just in case something goes wrong learner = RandomForestLearner() self.send_signal(w.Inputs.learner, learner) self.assertEqual(type(w.learner), type(learner)) self.assertTrue( reduce(lambda x, y: x or isinstance(y, w.default_preprocessor), w.learner.preprocessors, False)) # change learner this time not from None learner = TreeLearner() self.send_signal(w.Inputs.learner, learner) self.assertEqual(type(w.learner), type(learner)) self.assertTrue( reduce(lambda x, y: x or isinstance(y, w.default_preprocessor), w.learner.preprocessors, False)) # set other preprocessor preprocessor = Discretize # selected this preprocessor because know that not exist in LogReg self.send_signal(w.Inputs.preprocessor, preprocessor()) self.assertEqual(type(w.learner), type(learner)) self.assertTrue( reduce(lambda x, y: x or isinstance(y, w.default_preprocessor), w.learner.preprocessors, False)) self.assertTrue( reduce(lambda x, y: x or isinstance(y, preprocessor), w.learner.preprocessors, False)) # remove preprocessor self.send_signal(w.Inputs.preprocessor, None) self.assertEqual(type(w.learner), type(learner)) self.assertTrue( reduce(lambda x, y: x or isinstance(y, w.default_preprocessor), w.learner.preprocessors, False)) self.assertFalse( reduce(lambda x, y: x or isinstance(y, preprocessor), w.learner.preprocessors, False))
def test_get_tree_classification(self): table = Table('iris') learn = TreeLearner() clf = learn(table) self.assertIsInstance(clf.tree, Tree)
from Orange.data import Domain, Table from Orange.classification import LogisticRegressionLearner from Orange.classification import NaiveBayesLearner from Orange.classification import TreeLearner from Orange.classification import RandomForestLearner from Orange.classification import KNNLearner from Orange.classification import SVMLearner ### create models ### models = [ LogisticRegressionLearner(), NaiveBayesLearner(), TreeLearner(), RandomForestLearner(), KNNLearner(), SVMLearner(), ] ### read train data ### train = Table.from_file('train.csv') # move `sex` from X to Y (from attributes/features to class_var/target) domain = Domain(train.domain.attributes[1:], train.domain.attributes[0]) train = train.transform(domain) print('\n=== train.X ===') print(train.X) print('\n=== train.Y ===') print(train.Y)
class OWAdaBoostClassification(OWBaseLearner): name = "AdaBoost" description = "An ensemble meta-algorithm that combines weak learners " \ "and adapts to the 'hardness' of each training sample. " icon = "icons/AdaBoost.svg" priority = 80 LEARNER = SklAdaBoostLearner inputs = [("Learner", LearnerClassification, "set_base_learner")] losses = ["SAMME", "SAMME.R"] n_estimators = Setting(50) learning_rate = Setting(1.) algorithm = Setting(0) DEFAULT_BASE_ESTIMATOR = TreeLearner() def add_main_layout(self): box = gui.widgetBox(self.controlArea, "Parameters") self.base_estimator = self.DEFAULT_BASE_ESTIMATOR self.base_label = gui.label( box, self, "Base estimator: " + self.base_estimator.name) self.n_estimators_spin = gui.spin(box, self, "n_estimators", 1, 100, label="Number of estimators:", alignment=Qt.AlignRight, callback=self.settings_changed) self.learning_rate_spin = gui.doubleSpin( box, self, "learning_rate", 1e-5, 1.0, 1e-5, label="Learning rate:", decimals=5, alignment=Qt.AlignRight, controlWidth=90, callback=self.settings_changed) self.add_specific_parameters(box) def add_specific_parameters(self, box): self.algorithm_combo = gui.comboBox(box, self, "algorithm", label="Algorithm:", items=self.losses, orientation=Qt.Horizontal, callback=self.settings_changed) def create_learner(self): return self.LEARNER(base_estimator=self.base_estimator, n_estimators=self.n_estimators, learning_rate=self.learning_rate, preprocessors=self.preprocessors, algorithm=self.losses[self.algorithm]) def set_base_learner(self, learner): self.base_estimator = learner if learner \ else self.DEFAULT_BASE_ESTIMATOR self.base_label.setText("Base estimator: " + self.base_estimator.name) if self.auto_apply: self.apply() def get_learner_parameters(self): return (("Base estimator", self.base_estimator), ("Number of estimators", self.n_estimators), ("Algorithm", self.losses[self.algorithm].capitalize()))
def test_classification(self): table = Table('iris') learn = TreeLearner() clf = learn(table) Z = clf(table) self.assertTrue(np.all(table.Y.flatten() == Z))