Пример #1
0
def test_weighted_decision_path():
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        train_size=0.6,
                                                        test_size=0.4)
    mr = MondrianForestRegressor(random_state=0)
    mr.fit(X_train, y_train)

    # decision_path is implemented in sklearn while
    # weighted_decision_path is implemented here so check
    paths, col_inds = mr.decision_path(X_train)
    weight_paths, weight_col_inds = mr.weighted_decision_path(X_train)
    assert_array_equal(col_inds, weight_col_inds)

    n_nodes = [est.tree_.node_count for est in mr.estimators_]
    assert_equal(weight_paths.shape[0], X_train.shape[0])
    assert_equal(weight_paths.shape[1], sum(n_nodes))

    # We are calculating the weighted decision path on train data, so
    # the weights should be concentrated at the leaves.
    leaf_indices = mr.apply(X_train)
    for est_ind, curr_leaf_indices in enumerate(leaf_indices.T):
        curr_path = weight_paths[:, col_inds[est_ind]:col_inds[est_ind +
                                                               1]].toarray()
        assert_array_equal(np.where(curr_path)[1], curr_leaf_indices)

    # Sum of the weights across all the nodes in each estimator
    # for each sample should sum up to 1.0
    assert_array_almost_equal(
        np.ravel(mr.weighted_decision_path(X_test)[0].sum(axis=1)),
        mr.n_estimators * np.ones(X_test.shape[0]), 5)
Пример #2
0
def test_regressor_attributes():
    mr = MondrianForestRegressor(n_estimators=5, random_state=0)
    assert_false(hasattr(mr, "classes"))
    assert_false(hasattr(mr, "n_classes_"))

    mr.fit([[1, 2, 3], [4, 5, 6]], [1, 2])
    assert_false(hasattr(mr, "classes"))
    assert_false(hasattr(mr, "n_classes_"))
Пример #3
0
def test_pickle():
    mr1 = MondrianForestRegressor(random_state=0)
    mr1.fit(X, y)
    score1 = mr1.score(X, y)
    pickle_obj = pickle.dumps(mr1)

    mr2 = pickle.loads(pickle_obj)
    assert_equal(type(mr2), mr1.__class__)
    score2 = mr2.score(X, y)
    assert_equal(score1, score2)
Пример #4
0
def test_min_samples_split():
    min_samples_split = 5
    mr = MondrianForestRegressor(random_state=0,
                                 n_estimators=100,
                                 min_samples_split=min_samples_split)
    mr.fit(X, y)
    for est in mr.estimators_:
        n_samples = est.tree_.n_node_samples
        leaves = est.tree_.children_left == -1
        assert_true(np.all(n_samples[~leaves] >= min_samples_split))
        imp_leaves = np.logical_and(leaves, est.tree_.variance > 1e-7)
        assert_true(np.all(n_samples[imp_leaves] < min_samples_split))
Пример #5
0
def test_decision_path():
    mr = MondrianForestRegressor(random_state=0)
    mr.fit(X, y)
    indicator, col_inds = mr.decision_path(X)
    indices, indptr, data = indicator.indices, indicator.indptr, indicator.data

    n_nodes = [est.tree_.node_count for est in mr.estimators_]
    assert_equal(indicator.shape[0], X.shape[0])
    assert_equal(indicator.shape[1], sum(n_nodes))
    assert_array_equal(np.diff(col_inds), n_nodes)

    # Check that all leaf nodes are in the decision path.
    leaf_indices = mr.apply(X) + np.reshape(col_inds[:-1], (1, -1))
    for sample_ind, curr_leaf in enumerate(leaf_indices):
        sample_indices = indices[indptr[sample_ind]:indptr[sample_ind + 1]]
        assert_true(np.all(np.in1d(curr_leaf, sample_indices)))
Пример #6
0
def test_mean_std():
    mr = MondrianForestRegressor(random_state=0)
    mr.fit(X, y)

    # For points completely in the training data.
    # mean should converge to the actual target value.
    # variance should converge to 0.0
    mean, std = mr.predict(X, return_std=True)
    assert_array_almost_equal(mean, y, 5)
    assert_array_almost_equal(std, 0.0, 2)

    # For points completely far away from the training data, this
    # should converge to the empirical mean and variance.
    # X is scaled between to -1.0 and 1.0
    X_inf = np.vstack(
        (20.0 * np.ones(X.shape[1]), -20.0 * np.ones(X.shape[1])))
    inf_mean, inf_std = mr.predict(X_inf, return_std=True)
    assert_array_almost_equal(inf_mean, y.mean(), 1)
    assert_array_almost_equal(inf_std, y.std(), 2)
Пример #7
0
def test_memory_layout():
    mr = MondrianForestRegressor(random_state=0)

    for dtype in [np.float32, np.float64]:
        X_curr = np.asarray(X, dtype=dtype)
        assert_array_almost_equal(mr.fit(X_curr, y).predict(X_curr), y, 3)

        # C-order
        X_curr = np.asarray(X, order="C", dtype=dtype)
        assert_array_almost_equal(mr.fit(X_curr, y).predict(X_curr), y, 3)

        X_curr = np.asarray(X, order="F", dtype=dtype)
        assert_array_almost_equal(mr.fit(X_curr, y).predict(X_curr), y, 3)

        # Contiguous
        X_curr = np.ascontiguousarray(X_curr, dtype=dtype)
        assert_array_almost_equal(mr.fit(X_curr, y).predict(X_curr), y, 3)

        X_curr = np.array(X[::2], dtype=dtype)
        y_curr = np.asarray(y[::2])
        assert_array_almost_equal(
            mr.fit(X_curr, y_curr).predict(X_curr), y_curr, 3)
Пример #8
0
def test_boston():
    mr = MondrianForestRegressor(n_estimators=5, random_state=0)
    mr.fit(X, y)
    score = mr.score(X, y)
    assert_greater(score, 0.94, "Failed with score = %f" % score)