def setUp(self): self.decision_forest = DecisionForestClassifier() self.X = np.array(['A', 'B', 'A', 'B', 'B', 'C', 'A', 'C', 'B']).reshape((3, 3)) self.y = np.array([1, 1, 0])
from sklearn.model_selection import train_test_split from examples import load_data from proactive_forest.estimator import DecisionForestClassifier, ProactiveForestClassifier if __name__ == '__main__': X, y = load_data.load_kr_vs_kp() X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=4) pf = ProactiveForestClassifier(alpha=0.1, bootstrap=True) rf = DecisionForestClassifier(split_chooser='best', feature_selection='log', bootstrap=True) pf.fit(X_train, y_train) print('Processed: Proactive Forest') rf.fit(X_train, y_train) print('Processed: Random Forest') pf_diversity = pf.diversity_measure(X_test, y_test) rf_diversity = rf.diversity_measure(X_test, y_test) print('Proactive Forest Diversity: ', pf_diversity) print('Random Forest Diversity: ', rf_diversity)
def test_bootstrap_admissible_value(self): self.decision_forest = DecisionForestClassifier(bootstrap=True) self.assertTrue(self.decision_forest.bootstrap)
class DecisionForestClassifierTest(TestCase): def setUp(self): self.decision_forest = DecisionForestClassifier() self.X = np.array(['A', 'B', 'A', 'B', 'B', 'C', 'A', 'C', 'B']).reshape((3, 3)) self.y = np.array([1, 1, 0]) def tearDown(self): pass def test_fit(self): self.decision_forest.fit(self.X, self.y) self.assertIsNotNone(self.decision_forest._encoder) self.assertIsNotNone(self.decision_forest._tree_builder) self.assertIsNotNone(self.decision_forest._trees) self.assertIsInstance(self.decision_forest._trees, list) self.assertEqual(len(self.decision_forest._trees), self.decision_forest.n_estimators) def test_predict_one_instance(self): self.decision_forest._n_features = 3 self.decision_forest._n_classes = 2 tree_1 = mock.MagicMock(spec=DecisionTree) tree_1.weight = 1 tree_1.predict.return_value = 1 tree_2 = mock.MagicMock(spec=DecisionTree) tree_2.weight = 1 tree_2.predict.return_value = 0 tree_3 = mock.MagicMock(spec=DecisionTree) tree_3.weight = 1 tree_3.predict.return_value = 1 self.decision_forest._encoder = mock.MagicMock(spec=LabelEncoder) self.decision_forest._encoder.inverse_transform.return_value = 1 self.decision_forest._trees = [tree_1, tree_2, tree_3] expected_prediction = 1 a = np.array(['A', 'B', 'A']).reshape((1, 3)) resulted_prediction = self.decision_forest.predict(a) self.assertEqual(expected_prediction, resulted_prediction) def test_predict_two_instances(self): self.decision_forest._n_features = 3 self.decision_forest._n_classes = 2 tree_1 = mock.MagicMock(spec=DecisionTree) tree_1.weight = 1 tree_1.predict.return_value = 1 tree_2 = mock.MagicMock(spec=DecisionTree) tree_2.weight = 1 tree_2.predict.return_value = 0 tree_3 = mock.MagicMock(spec=DecisionTree) tree_3.weight = 1 tree_3.predict.return_value = 1 self.decision_forest._encoder = mock.MagicMock(spec=LabelEncoder) self.decision_forest._encoder.inverse_transform.return_value = [1, 1] self.decision_forest._trees = [tree_1, tree_2, tree_3] expected_len_prediction = 2 x = np.array(['A', 'B', 'A', 'C', 'A', 'A']).reshape((2, 3)) resulted_prediction = self.decision_forest.predict(x) self.assertEqual(expected_len_prediction, len(resulted_prediction)) def test_feature_importances(self): self.decision_forest._n_features = 3 tree_1 = mock.MagicMock(spec=DecisionTree) tree_1.feature_importances.return_value = [0.2, 0.2, 0.2] tree_2 = mock.MagicMock(spec=DecisionTree) tree_2.feature_importances.return_value = [0.3, 0.3, 0.3] tree_3 = mock.MagicMock(spec=DecisionTree) tree_3.feature_importances.return_value = [0.4, 0.4, 0.4] self.decision_forest._trees = [tree_1, tree_2, tree_3] expected_feature_importances = [0.3, 0.3, 0.3] resulted_feature_importances = self.decision_forest.feature_importances( ) self.assertEqual(len(expected_feature_importances), len(resulted_feature_importances)) for a, b in zip(expected_feature_importances, resulted_feature_importances): self.assertAlmostEqual(a, b, places=2) def test_trees_mean_weight(self): tree_1 = mock.MagicMock(spec=DecisionTree) tree_1.weight = 1 tree_2 = mock.MagicMock(spec=DecisionTree) tree_2.weight = 0.8 tree_3 = mock.MagicMock(spec=DecisionTree) tree_3.weight = 0.8 self.decision_forest._trees = [tree_1, tree_2, tree_3] expected_weight = 0.87 resulted_weight = self.decision_forest.trees_mean_weight() self.assertAlmostEqual(expected_weight, resulted_weight, places=2) def test_diversity_measure_exception(self): x = np.array(['A', 'B', 'A', 'C', 'A', 'A']).reshape((2, 3)) y = [1, 0] self.decision_forest._encoder = mock.MagicMock(spec=LabelEncoder) self.decision_forest._encoder.transform.return_value = [1, 0] with self.assertRaises(ValueError): self.decision_forest.diversity_measure(x, y, diversity='kappa') def test_diversity_measure(self): tree_1 = mock.MagicMock(spec=DecisionTree) tree_1.predict.return_value = 1 tree_2 = mock.MagicMock(spec=DecisionTree) tree_2.predict.return_value = 1 tree_3 = mock.MagicMock(spec=DecisionTree) tree_3.predict.return_value = 0 self.decision_forest._trees = [tree_1, tree_2, tree_3] x = np.array(['A', 'B', 'A', 'C', 'A', 'A']).reshape((2, 3)) y = [1, 0] self.decision_forest._encoder = mock.MagicMock(spec=LabelEncoder) self.decision_forest._encoder.transform.return_value = [1, 0] self.assertIsNotNone(self.decision_forest.diversity_measure(x, y)) def test__validate_exception_not_fitted(self): x = np.array(['A', 'B', 'A', 'C', 'A', 'A']).reshape((2, 3)) with self.assertRaises(NotFittedError): self.decision_forest._validate(x, False) def test__validate_exception_n_instances(self): self.decision_forest._trees = mock.MagicMock(spec=DecisionTree) x = np.array(['A', 'B', 'A', 'C', 'A', 'A']).reshape((2, 3)) self.decision_forest._n_features = 1 with self.assertRaises(ValueError): self.decision_forest._validate(x, False) def test__predict_on_tree(self): tree = mock.MagicMock(spec=DecisionTree) tree.predict.return_value = 1 x = np.array(['A', 'B', 'A', 'C', 'A', 'A']).reshape((2, 3)) expected_prediction = [1, 1] resulted_prediction = self.decision_forest._predict_on_tree( x, tree, False) for expected, resulted in zip(expected_prediction, resulted_prediction): self.assertEqual(expected, resulted)
def test_bootstrap_exception_none_value(self): with self.assertRaises(ValueError): self.decision_forest = DecisionForestClassifier(bootstrap=None)
def test_n_estimators_admissible_value(self): self.decision_forest = DecisionForestClassifier(n_estimators=12) self.assertEqual(self.decision_forest.n_estimators, 12)
def test_n_estimators_none_value(self): self.decision_forest = DecisionForestClassifier(n_estimators=None) self.assertIsNone(self.decision_forest.n_estimators)
def test_n_estimators_exception_inadmissible_value(self): with self.assertRaises(ValueError): self.decision_forest = DecisionForestClassifier(n_estimators=-1)
from sklearn.model_selection import cross_val_score, KFold from examples import load_data from proactive_forest.estimator import DecisionForestClassifier, ProactiveForestClassifier import pandas as pd import numpy as np if __name__ == '__main__': X, y = load_data.load_iris() pf = ProactiveForestClassifier(alpha=0.1, bootstrap=False) rf = DecisionForestClassifier(feature_selection='log', split_chooser='best', bootstrap=False) """ pf_scores = cross_val_score(pf, X, y, cv=5) print('Processed: Proactive Forest') rf_scores = cross_val_score(rf, X, y, cv=5) print('Processed: Random Forest') """ pf_scores = [] rf_scores = [] skf = KFold(n_splits=5, random_state=4) for train_index, test_index in skf.split(X, y): X_train, X_test = X.loc[train_index], X.loc[test_index] y_train, y_test = y.loc[train_index], y.loc[test_index] pf.fit(X_train, y_train) rf.fit(X_train, y_train) pf_scores.append(pf.score(X_test, y_test)) rf_scores.append(rf.score(X_test, y_test))