def test_integration(self): res = self.extractor.extract_features(self.files) self.assertIsNotNone(res, "Failed to parse files.") X, y, _, = res train_X, test_X, train_y, test_y = \ model_selection.train_test_split(X, y, random_state=1989) model = tree.DecisionTreeClassifier(min_samples_leaf=26, random_state=1989, max_depth=None, max_features="auto", min_samples_split=2) model.fit(train_X, train_y) rules = TrainableRules( base_model_name="sklearn.tree.DecisionTreeClassifier", prune_branches_algorithms=[], prune_attributes=False, min_samples_leaf=26, random_state=1989, max_depth=None, max_features="auto", min_samples_split=2, confidence_threshold=0) rules.fit(train_X, train_y) model_score_train = model.score(train_X, train_y) model_score_test = model.score(test_X, test_y) rules_score_train = rules.score(train_X, train_y) rules_score_test = rules.score(test_X, test_y) self.assertEqual(rules_score_train, model_score_train) self.assertEqual(rules_score_test, model_score_test)
def test_integration(self): X, y, _ = self.extractor.extract_features(self.files) train_X, test_X, train_y, test_y = \ model_selection.train_test_split(X, y, random_state=1989) model = tree.DecisionTreeClassifier(min_samples_leaf=26, random_state=1989, max_depth=None, max_features="auto", min_samples_split=2) model.fit(train_X, train_y) rules = TrainableRules("sklearn.tree.DecisionTreeClassifier", prune_branches_algorithms=[], prune_attributes=False, min_samples_leaf=26, random_state=1989, max_depth=None, max_features="auto", min_samples_split=2) rules.fit(train_X, train_y) model_score_train = model.score(train_X, train_y) model_score_test = model.score(test_X, test_y) rules_score_train = rules.score(train_X, train_y) rules_score_test = rules.score(test_X, test_y) self.assertEqual(rules_score_train, model_score_train) self.assertEqual(rules_score_test, model_score_test)
def test_budget(budget): rules = TrainableRules( "sklearn.tree.DecisionTreeClassifier", prune_branches_algorithms=["top-down-greedy"], prune_attributes=False, top_down_greedy_budget=(False, budget), random_state=1989) rules.fit(self.train_x, self.train_y) return rules.score(self.train_x, self.train_y)
def test_tree_attr_pruning(self): model = tree.DecisionTreeClassifier(min_samples_leaf=26, random_state=1989) model = model.fit(self.train_x, self.train_y) rules = TrainableRules("sklearn.tree.DecisionTreeClassifier", prune_branches_algorithms=[], prune_attributes=True, min_samples_leaf=26, random_state=1989) rules.fit(self.train_x, self.train_y) tree_score = model.score(self.test_x, self.test_y) rules_score = rules.score(self.test_x, self.test_y) self.assertGreater(rules_score * 1.1, tree_score)
def test_tree_no_pruning(self): model = tree.DecisionTreeClassifier(min_samples_leaf=26, random_state=1989) model = model.fit(self.train_x, self.train_y) rules = TrainableRules( base_model_name="sklearn.tree.DecisionTreeClassifier", prune_branches_algorithms=[], confidence_threshold=0, prune_attributes=False, min_samples_leaf=26, random_state=1989) rules.fit(self.train_x, self.train_y) tree_score = model.score(self.train_x, self.train_y) rules_score = rules.score(self.train_x, self.train_y) self.assertGreater(rules_score * 1.1, tree_score)
def test_forest_no_pruning(self): model = ensemble.RandomForestClassifier(n_estimators=50, min_samples_leaf=26, random_state=1989) model = model.fit(self.train_x, self.train_y) rules = TrainableRules("sklearn.ensemble.RandomForestClassifier", prune_branches_algorithms=[], prune_attributes=False, n_estimators=50, min_samples_leaf=26, random_state=1989) rules.fit(self.train_x, self.train_y) forest_score = model.score(self.train_x, self.train_y) rules_score = rules.score(self.train_x, self.train_y) self.assertGreater(rules_score * 1.1, forest_score)