Пример #1
0
def test_partial_fit_duplicates():
    rng = np.random.RandomState(0)
    X = rng.randn(1, 100)
    X_dup = np.tile(X, (100, 1))
    y = [2] * 100
    mtr = MondrianTreeRegressor(random_state=0)
    mtr.partial_fit(X_dup, y)
    check_partial_fit_duplicates(mtr, [[[2.0]]])

    mtc = MondrianTreeClassifier(random_state=0)
    mtc.partial_fit(X_dup, y, classes=[1, 2])
    check_partial_fit_duplicates(mtc, [[[0.0, 100.0]]])
Пример #2
0
def test_partial_fit_equivalence():
    X, y = make_regression(random_state=0, n_samples=100)
    mtr = MondrianTreeRegressor(random_state=0)
    mtr.partial_fit(X, y)
    for batch_size in [10, 20, 25, 50, 90]:
        check_partial_fit_equivalence(batch_size, mtr, 0, X, y)

    X, y = make_classification(random_state=0, n_samples=100)
    mtc = MondrianTreeClassifier(random_state=0)
    mtc.partial_fit(X, y)
    for batch_size in [10, 20, 25, 50, 90]:
        check_partial_fit_equivalence(batch_size, mtc, 0, X, y, is_clf=True)
Пример #3
0
def test_partial_fit_one_sample():
    rng = np.random.RandomState(0)
    X = rng.randn(1, 5)
    y = [4.5]
    mtr = MondrianTreeRegressor(random_state=0)
    mtr.partial_fit(X, y)
    assert_array_equal(mtr.tree_.value, [[[4.5]]])
    assert_array_equal(mtr.tree_.variance, [0.0])
    check_partial_fit_one_sample(mtr.tree_)

    y = [1]
    mtc = MondrianTreeClassifier(random_state=0)
    mtc.partial_fit(X, y, classes=[0, 1])
    check_partial_fit_one_sample(mtr.tree_)
Пример #4
0
def check_partial_fit_equivalence(size_batch, est, random_state, X, y, is_clf=False):
    start_ptr = list(range(0, 100, size_batch))
    end_ptr = start_ptr[1:] + [100]
    if not is_clf:
        p_est = MondrianTreeRegressor(random_state=random_state)
    else:
        p_est = MondrianTreeClassifier(random_state=random_state)
    for start, end in zip(start_ptr, end_ptr):
        p_est.partial_fit(X[start:end], y[start:end])
    assert_array_equal(p_est.tree_.n_node_samples, est.tree_.n_node_samples)
    assert_array_equal(p_est.tree_.threshold, est.tree_.threshold)
    assert_array_equal(p_est.tree_.feature, est.tree_.feature)
    assert_equal(p_est.tree_.root, est.tree_.root)
    assert_array_equal(p_est.tree_.value, est.tree_.value)
def test_partial_fit_toy_data1():
    X = [
        [2.0, 1.0, 3.0],
        [-1.0, 2.0, 2.0],
        [1.0, 1.5, 2.5],  # inside the bounds of the first two samples.
        [10.0, 5.0, 6.0]
    ]  # induces a split and creates a new root.

    #             [0, 1, 2, 3]
    #                 /\
    #                /  \
    #          [0, 1, 2] [3]
    #            (d=2, f=2.3608)
    #              /\
    #             / \
    #          [1]    [0, 2]
    #                 / \
    #              (d=0, f=1.17251138)
    #                [2]   [0]
    X = np.array(X)
    mtr = MondrianTreeRegressor(random_state=1)
    y_reg = [2, 1, 3, 4]
    mtr.partial_fit(X, y_reg)
    tree_reg = mtr.tree_

    y_clf = [0, 1, 2, 0]
    mtc = MondrianTreeClassifier(random_state=1)
    mtc.partial_fit(X, y_clf)
    tree_clf = mtc.tree_

    l, r = check_and_return_children(tree_reg, tree_reg.root, np.mean(y_reg),
                                     np.var(y_reg))
    ll, lr = check_and_return_children(tree_reg, l, np.mean(y_reg[:3]),
                                       np.var(y_reg[:3]))
    check_and_return_children(tree_reg, r, 4.0, 0.0)
    check_and_return_children(tree_reg, ll, 1.0, 0.0)
    lrl, lrr = check_and_return_children(tree_reg, lr,
                                         (y_reg[0] + y_reg[2]) / 2.0,
                                         np.var([y_reg[0], y_reg[2]]))
    check_and_return_children(tree_reg, lrl, y_reg[2], 0.0)
    check_and_return_children(tree_reg, lrr, y_reg[0], 0.0)

    l, r = check_and_return_children(tree_clf, tree_clf.root, [[2, 1, 1]])
    ll, lr = check_and_return_children(tree_clf, l, [[1, 1, 1]])
    check_and_return_children(tree_clf, r, [[1, 0, 0]])
    check_and_return_children(tree_clf, ll, [[0, 1, 0]])
    lrl, lrr = check_and_return_children(tree_clf, lr, [[1, 0, 1]])
    check_and_return_children(tree_clf, lrl, [[0, 0, 1]])
    check_and_return_children(tree_clf, lrr, [[1, 0, 0]])
Пример #6
0
def test_min_samples_split():
    X_c, y_c = load_digits(return_X_y=True)
    X_r, y_r = make_regression(n_samples=10000, random_state=0)

    for mss in [2, 4, 10, 20]:
        mtr = MondrianTreeRegressor(random_state=0, min_samples_split=mss)
        mtr.partial_fit(X_r[: X_r.shape[0] // 2], y_r[: X_r.shape[0] // 2])
        mtr.partial_fit(X_r[X_r.shape[0] // 2:], y_r[X_r.shape[0] // 2:])
        n_node_samples = mtr.tree_.n_node_samples[mtr.tree_.children_left != -1]
        assert_greater(np.min(n_node_samples) + 1, mss)

        mtc = MondrianTreeClassifier(random_state=0, min_samples_split=mss)
        mtc.partial_fit(X_c[: X_c.shape[0] // 2], y_c[: X_c.shape[0] // 2])
        mtc.partial_fit(X_c[X_c.shape[0] // 2:], y_c[X_c.shape[0] // 2:])
        n_node_samples = mtc.tree_.n_node_samples[mtc.tree_.children_left != -1]
        assert_greater(np.min(n_node_samples) + 1, mss)
Пример #7
0
def test_partial_fit_two_samples():
    rng = np.random.RandomState(10)
    X = rng.randn(2, 5)
    y = rng.randn(2)
    for r in range(10):
        mtr = MondrianTreeRegressor(random_state=r)
        mtr.partial_fit(X, y)
        tree = mtr.tree_
        assert_array_almost_equal(tree.value[:, 0, 0], [y[0], y[1], np.mean(y)])
        assert_array_almost_equal(tree.variance, [0, 0, np.var(y)])
        check_partial_fit_two_samples(tree, X)

    y = [0, 1]
    for r in range(10):
        mtc = MondrianTreeClassifier(random_state=r)
        mtc.partial_fit(X, y)
        tree = mtc.tree_
        assert_array_almost_equal(tree.value[:, 0, :], [[1, 0], [0, 1], [1, 1]])
        check_partial_fit_two_samples(tree, X)
Пример #8
0
def test_partial_fit_toy_data2():
    X = [[2.0, 1.0, 3.0],
         [-1.0, 2.0, 2.0],
         [11.0, 7.0, 4.5],
         [10.0, 5.0, 6.0]]
    X = np.array(X)

    #            [0, 1, 2, 3]
    #                /\
    #               /  \
    #          [0, 1]  [2, 3]
    #    (d=2, f=2.36) (d=1, f=5.345)
    #          /\        /\
    #         / \       / \
    #       [1] [0]    [3]  [2]

    y_reg = [2, 1, 3, 4]
    mtr = MondrianTreeRegressor(random_state=1)
    mtr.partial_fit(X, y_reg)
    tree = mtr.tree_
    l, r = check_and_return_children(
        tree, tree.root, np.mean(y_reg), np.var(y_reg))
    ll, lr = check_and_return_children(
        tree, l, np.mean(y_reg[:2]), np.var(y_reg[:2]))
    rl, rr = check_and_return_children(
        tree, r, np.mean(y_reg[2:]), np.var(y_reg[:2]))
    check_and_return_children(tree, ll, y_reg[1], 0.0)
    check_and_return_children(tree, lr, y_reg[0], 0.0)
    check_and_return_children(tree, rl, y_reg[3], 0.0)
    check_and_return_children(tree, rr, y_reg[2], 0.0)

    y_clf = [0, 1, 1, 2]
    mtc = MondrianTreeClassifier(random_state=1)
    mtc.partial_fit(X, y_clf)
    tree = mtc.tree_
    l, r = check_and_return_children(tree, tree.root, [[1, 2, 1]])
    ll, lr = check_and_return_children(tree, l, [[1, 1, 0]])
    rl, rr = check_and_return_children(tree, r, [[0, 1, 1]])
    check_and_return_children(tree, ll, [[0, 1, 0]])
    check_and_return_children(tree, lr, [[1, 0, 0]])
    check_and_return_children(tree, rl, [[0, 0, 1]])
    check_and_return_children(tree, rr, [[0, 1, 0]])
Пример #9
0
def test_mondrian_tree_n_node_samples():
    for r in range(1000):
        X, y = make_regression(n_samples=2, random_state=r)
        mtr = MondrianTreeRegressor(random_state=0)
        mtr.partial_fit(X, y)
        assert_array_equal(mtr.tree_.n_node_samples, [1, 1, 2])