def test_compile_and_run_cont_sparse(self): # pylint: disable=protected-access model = TreeModel(self.data, self.root) expected_values = np.vstack((np.arange(8), [42] * 8)).T np.testing.assert_equal(model._values, expected_values) self.assertEqual(model._thresholds[0], 13) self.assertEqual(model._thresholds.shape, (8, )) nan = float("nan") x = sp.csr_matrix( np.array([[nan, 0, 0], [13, nan, 0], [13, 0, 0], [13, 1, 0], [13, 2, 0], [14, 2, nan], [14, 2, 2], [14, 2, 1]], dtype=float)) np.testing.assert_equal(model.get_values(x), expected_values) x = sp.csc_matrix( np.array([[nan, 0, 0], [13, nan, 0], [13, 0, 0], [13, 1, 0], [13, 2, 0], [14, 2, nan], [14, 2, 2], [14, 2, 1]], dtype=float)) np.testing.assert_equal(model.get_values(x), expected_values) x = sp.lil_matrix( np.array([[nan, 0, 0], [13, nan, 0], [13, 0, 0], [13, 1, 0], [13, 2, 0], [14, 2, nan], [14, 2, 2], [14, 2, 1]], dtype=float)) np.testing.assert_equal(model.get_values(x), expected_values)
def test_print(self): model = TreeModel(self.data, self.root) self.assertEqual(model.print_tree(), """ [ 1 42] v1 ≤ 13 [ 2 42] v2 a [ 3 42] v2 b [ 4 42] v2 c [ 5 42] v1 > 13 [ 6 42] v3 f [ 7 42] v3 d or e """)
def test_null_nodes(self): a = DiscreteVariable("d4", "ab") y = ContinuousVariable("ey") domain = Domain([a], y) data = Table(domain) values = np.array([[42., 43], [44, 45]]) root = DiscreteNode(a, 0, values[1]) root.children = [Node(None, -1, values[0]), None] model = TreeModel(data, root) x = np.array([[0.], [1]]) np.testing.assert_equal(model.get_values(x), values) np.testing.assert_equal(model.get_values_in_python(x), values) np.testing.assert_equal(model.get_values_by_nodes(x), values)
def test_compile_and_run_cont_sparse(self): # pylint: disable=protected-access model = TreeModel(self.data, self.root) expected_values = np.vstack((np.arange(8), [42] * 8)).T np.testing.assert_equal(model._values, expected_values) self.assertEqual(model._thresholds[0], 13) self.assertEqual(model._thresholds.shape, (8,)) nan = float("nan") x = sp.csr_matrix(np.array( [[nan, 0, 0], [13, nan, 0], [13, 0, 0], [13, 1, 0], [13, 2, 0], [14, 2, nan], [14, 2, 2], [14, 2, 1]], dtype=float )) np.testing.assert_equal(model.get_values(x), expected_values) x = sp.csc_matrix(np.array( [[nan, 0, 0], [13, nan, 0], [13, 0, 0], [13, 1, 0], [13, 2, 0], [14, 2, nan], [14, 2, 2], [14, 2, 1]], dtype=float )) np.testing.assert_equal(model.get_values(x), expected_values) x = sp.lil_matrix(np.array( [[nan, 0, 0], [13, nan, 0], [13, 0, 0], [13, 1, 0], [13, 2, 0], [14, 2, nan], [14, 2, 2], [14, 2, 1]], dtype=float )) np.testing.assert_equal(model.get_values(x), expected_values)
def test_compile_and_run_cont(self): # I investigate, I have a warrant # pylint: disable=protected-access model = TreeModel(self.data, self.root) expected_values = np.vstack((np.arange(8), [42] * 8)).T np.testing.assert_equal(model._values, expected_values) self.assertEqual(model._thresholds[0], 13) self.assertEqual(model._thresholds.shape, (8, )) nan = float("nan") x = np.array( [ [nan, 0, 0], [13, nan, 0], [13, 0, 0], [13, 1, 0], [13, 2, 0], [14, 2, nan], [14, 2, 2], [14, 2, 1], ], dtype=float, ) np.testing.assert_equal(model.get_values(x), expected_values) np.testing.assert_equal(model.get_values_in_python(x), expected_values) np.testing.assert_equal(model.get_values_by_nodes(x), expected_values) np.testing.assert_equal(model.predict(x), np.arange(8).astype(int)) v1 = ContinuousVariable("d1") v2 = DiscreteVariable("d2", "abc") v3 = DiscreteVariable("d3", "def") y = DiscreteVariable("dy") domain = Domain([v1, v2, v3], y) data = Table(domain, np.zeros((10, 4))) root = NumericNode(v1, 0, 13, np.array([0.0, 42])) left = DiscreteNode(v2, 1, np.array([1, 42])) left.children = [ Node(None, None, np.array([x, 42])) for x in [2, 3, 4] ] right = MappedDiscreteNode(v3, 2, np.array([1, 1, 0]), np.array([5, 42])) right.children = [Node(None, None, np.array([x, 42])) for x in [6, 7]] root.children = [left, right] model = TreeModel(data, root) normalized = expected_values / np.sum(expected_values, axis=1)[:, np.newaxis] np.testing.assert_equal(model.predict(x), normalized)
def test_methods(self): model = TreeModel(self.data, self.root) self.assertEqual(model.node_count(), 8) self.assertEqual(model.leaf_count(), 5) self.assertEqual(model.depth(), 2) self.assertIs(model.root, self.root) left = self.root.children[0] left.subset = np.array([2, 3]) subset = model.get_instances([self.root, left]) self.assertIsInstance(subset, Table) self.assertEqual(len(subset), 2) np.testing.assert_equal(subset.X, np.array([[8, 9, 10], [12, 13, 14]])) np.testing.assert_equal(subset.Y, np.array([11, 15]))
def test_compile_and_run_cont(self): # I investigate, I have a warrant # pylint: disable=protected-access model = TreeModel(self.data, self.root) expected_values = np.vstack((np.arange(8), [42] * 8)).T np.testing.assert_equal(model._values, expected_values) self.assertEqual(model._thresholds[0], 13) self.assertEqual(model._thresholds.shape, (8,)) nan = float("nan") x = np.array( [[nan, 0, 0], [13, nan, 0], [13, 0, 0], [13, 1, 0], [13, 2, 0], [14, 2, nan], [14, 2, 2], [14, 2, 1]], dtype=float ) np.testing.assert_equal(model.get_values(x), expected_values) np.testing.assert_equal(model.get_values_in_python(x), expected_values) np.testing.assert_equal(model.get_values_by_nodes(x), expected_values) np.testing.assert_equal(model.predict(x), np.arange(8).astype(int)) v1 = ContinuousVariable("d1") v2 = DiscreteVariable("d2", "abc") v3 = DiscreteVariable("d3", "def") y = DiscreteVariable("dy") domain = Domain([v1, v2, v3], y) data = Table(domain, np.zeros((10, 4))) root = NumericNode(v1, 0, 13, np.array([0., 42])) left = DiscreteNode(v2, 1, np.array([1, 42])) left.children = [Node(None, None, np.array([x, 42])) for x in [2, 3, 4]] right = MappedDiscreteNode(v3, 2, np.array([1, 1, 0]), np.array([5, 42])) right.children = [Node(None, None, np.array([x, 42])) for x in [6, 7]] root.children = [left, right] model = TreeModel(data, root) normalized = \ expected_values / np.sum(expected_values, axis=1)[:, np.newaxis] np.testing.assert_equal(model.predict(x), normalized)
def setUp(self): # pylint: disable=invalid-name v1 = self.v1 = ContinuousVariable("v1") v2 = self.v2 = DiscreteVariable("v2", "abc") v3 = self.v3 = DiscreteVariable("v3", "def") y = self.y = ContinuousVariable("y") self.domain = Domain([v1, v2, v3], y) self.data = Table(self.domain, np.arange(40).reshape(10, 4)) self.root = NumericNode(v1, 0, 13, np.array([0.0, 42])) self.root.subset = np.array(np.arange(10), dtype=np.int32) left = self.left = DiscreteNode(v2, 1, np.array([1, 42])) left.subset = np.array([2, 3, 4, 5]) left.children = [Node(None, None, np.array([x, 42])) for x in [2, 3, 4]] right = self.right = MappedDiscreteNode( v3, 2, np.array([1, 1, 0]), np.array([5, 42]) ) right.children = [Node(None, None, np.array([6, 42])), None] right.subset = np.array([8, 9]) self.root.children = [left, right] self.model = TreeModel(self.data, self.root) self.adapter = TreeAdapter(self.model)