Пример #1
0
def test_dimension_location():
    """
    Test dimension and location of split.
    """
    rng = np.random.RandomState(0)
    X = rng.rand(100, 2)
    X[:, 1] *= 100
    y = rng.randn(100)

    mtr = MondrianTreeRegressor(random_state=0, max_depth=1)
    n = 1000

    features = []
    thresholds = []
    for random_state in np.arange(1000):
        mtr.set_params(random_state=random_state).fit(X, y)
        features.append(mtr.tree_.feature[0])
        thresholds.append(mtr.tree_.threshold[0])

    # Check that this converges to the actual probability p of the bernoulli.
    diff = np.max(X, axis=0) - np.min(X, axis=0)
    p_act = diff / np.sum(diff)
    features = np.array(features)
    thresholds = np.array(thresholds)
    counts = np.bincount(features)
    p_sim = counts / np.sum(counts)
    assert_array_almost_equal(p_act, p_sim, 2)

    # Check that the split location converges to the (u + l) / 2 where
    # u and l are the upper and lower bounds of the feature.
    u = np.max(X, axis=0)[-1]
    l = np.min(X, axis=0)[-1]
    thresh_sim = np.mean(thresholds[features == 1])
    thresh_act = (u + l) / 2.0
    assert_array_almost_equal(thresh_act, thresh_sim, 2)
Пример #2
0
def test_tau():
    """
    Test time of split for the root.
    """
    X, y = make_regression(random_state=0, n_features=10)
    rate = np.sum(np.max(X, axis=0) - np.min(X, axis=0))
    mtr = MondrianTreeRegressor(random_state=0, max_depth=1)

    taus = []
    for random_state in np.arange(100):
        mtr.set_params(random_state=random_state).fit(X, y)
        taus.append(mtr.tree_.tau[0])
    assert_almost_equal(np.mean(taus), 1.0 / rate, 2)