def test_dimension_location(): """ Test dimension and location of split. """ rng = np.random.RandomState(0) X = rng.rand(100, 2) X[:, 1] *= 100 y = rng.randn(100) mtr = MondrianTreeRegressor(random_state=0, max_depth=1) n = 1000 features = [] thresholds = [] for random_state in np.arange(1000): mtr.set_params(random_state=random_state).fit(X, y) features.append(mtr.tree_.feature[0]) thresholds.append(mtr.tree_.threshold[0]) # Check that this converges to the actual probability p of the bernoulli. diff = np.max(X, axis=0) - np.min(X, axis=0) p_act = diff / np.sum(diff) features = np.array(features) thresholds = np.array(thresholds) counts = np.bincount(features) p_sim = counts / np.sum(counts) assert_array_almost_equal(p_act, p_sim, 2) # Check that the split location converges to the (u + l) / 2 where # u and l are the upper and lower bounds of the feature. u = np.max(X, axis=0)[-1] l = np.min(X, axis=0)[-1] thresh_sim = np.mean(thresholds[features == 1]) thresh_act = (u + l) / 2.0 assert_array_almost_equal(thresh_act, thresh_sim, 2)
def test_tau(): """ Test time of split for the root. """ X, y = make_regression(random_state=0, n_features=10) rate = np.sum(np.max(X, axis=0) - np.min(X, axis=0)) mtr = MondrianTreeRegressor(random_state=0, max_depth=1) taus = [] for random_state in np.arange(100): mtr.set_params(random_state=random_state).fit(X, y) taus.append(mtr.tree_.tau[0]) assert_almost_equal(np.mean(taus), 1.0 / rate, 2)