Exemplo n.º 1
0
def test_zero_weights():
    x = [1, 2, 3, 4, 5]
    w = [0, 0, 0, 0.1, 0.1]

    for q in np.arange(0, 110, 10):
        assert_equal(weighted_percentile(x, q, w),
                     weighted_percentile([4, 5], q, [0.1, 0.1]))
Exemplo n.º 2
0
def test_percentile_equal_weights():
    rng = np.random.RandomState(0)
    x = rng.randn(10)
    weights = 0.1 * np.ones(10)

    # since weights are equal, quantiles lie in the midpoint.
    sorted_x = np.sort(x)
    expected = 0.5 * (sorted_x[1:] + sorted_x[:-1])
    actual = (
        [weighted_percentile(x, q, weights) for q in np.arange(10, 100, 10)]
    )
    assert_array_almost_equal(expected, actual)

    # check quantiles at (5, 95) at intervals of 10
    actual = (
        [weighted_percentile(x, q, weights) for q in np.arange(5, 105, 10)]
    )
    assert_array_almost_equal(sorted_x, actual)
Exemplo n.º 3
0
    def predict(self, X, quantiles=None):
        """
        Predict regression value for X.

        Parameters
        ----------
        X : array-like or sparse matrix of shape = [n_samples, n_features]
            The input samples. Internally, it will be converted to
            ``dtype=np.float32`` and if a sparse matrix is provided
            to a sparse ``csr_matrix``.

        quantile : int, optional
            Value ranging from 0 to 100. By default, the mean is returned.

        check_input : boolean, (default=True)
            Allow to bypass several input checking.
            Don't use this parameter unless you know what you do.

        Returns
        -------
        y : array of shape = [n_samples]
            If quantile is set to None, then return E(Y | X). Else return
            y such that F(Y=y | x) = quantile.
        """
        # apply method requires X to be of dtype np.float32

        if quantiles is None:
            quantiles = [0.50]

        if quantiles == 'mean':
            quantiles = None
            column_names = ['mean']
        else:
            column_names = [
                str(round(100 * quantile, 1)) + '%' for quantile in quantiles
            ]
        index = X.index

        X = check_array(X, dtype=np.float32, accept_sparse="csc")
        if quantiles is None:
            preds = super(MyRandomForestQuantileRegressor, self).predict(X)
            return pd.DataFrame(preds, index=index, columns=column_names)

        sorter = np.argsort(self.y_train_)
        X_leaves = self.apply(X)
        quantile_values = np.zeros((X.shape[0], len(quantiles)))
        for i, x_leaf in enumerate(X_leaves):
            mask = self.y_train_leaves_ != np.expand_dims(x_leaf, 1)
            x_weights = ma.masked_array(self.y_weights_, mask)
            weights = x_weights.sum(axis=0)
            for i_q, quantile in enumerate(quantiles):
                quantile_values[i, i_q] = weighted_percentile(
                    self.y_train_, int(100 * quantile), weights, sorter)
        return pd.DataFrame(quantile_values, index=index, columns=column_names)
Exemplo n.º 4
0
def test_quantiles():
    # Test with max depth 1.
    for est in estimators:
        est.set_params(max_depth=1)
        est.fit(X_train, y_train)
        tree = est.tree_

        for q in [20, 40, 50, 60, 80, 90]:
            left_ind = X_train[:, tree.feature[0]] <= tree.threshold[0]
            right_ind = X_train[:, tree.feature[0]] > tree.threshold[0]

            # fixme
            left_q = weighted_percentile(y_train[left_ind], q)
            right_q = weighted_percentile(y_train[right_ind], q)

            for curr_X, curr_y in [[X_train, y_train], [X_test, y_test]]:
                actual_q = np.zeros(curr_X.shape[0])
                left_ind = curr_X[:, tree.feature[0]] <= tree.threshold[0]
                actual_q[left_ind] = left_q
                right_ind = curr_X[:, tree.feature[0]] > tree.threshold[0]
                actual_q[right_ind] = right_q

                expected_q = est.predict(curr_X, quantile=q)
                assert_array_almost_equal(expected_q, actual_q)
Exemplo n.º 5
0
def test_percentile_toy_data():
    x = [1, 2, 3]
    weights = [1, 4, 5]

    # Test 0 and 100th quantile
    assert_equal(weighted_percentile(x, 0, weights), 1)
    assert_equal(weighted_percentile(x, 100, weights), 3)

    assert_equal(weighted_percentile(x, 5, weights), 1)
    assert_equal(weighted_percentile(x, 30, weights), 2)
    assert_equal(weighted_percentile(x, 75, weights), 3)
    assert_almost_equal(weighted_percentile(x, 50, weights), 2.44, 2)
Exemplo n.º 6
0
    def predict(self, X, quantiles=None, check_input=False):
        """
        Predict regression value for X.

        Parameters
        ----------
        X : array-like or sparse matrix of shape = [n_samples, n_features]
            The input samples. Internally, it will be converted to
            ``dtype=np.float32`` and if a sparse matrix is provided
            to a sparse ``csr_matrix``.

        quantile : int, optional
            Value ranging from 0 to 100. By default, the mean is returned.

        check_input : boolean, (default=True)
            Allow to bypass several input checking.
            Don't use this parameter unless you know what you do.

        Returns
        -------
        y : array of shape = [n_samples]
            If quantile is set to None, then return E(Y | X). Else return
            y such that F(Y=y | x) = quantile.
        """
        # apply method requires X to be of dtype np.float32
        X = check_array(X, dtype=np.float32, accept_sparse="csc")
        if quantiles is None:
            return super(MyDecisionTreeQuantileRegressor, self).predict(X, check_input=check_input)

        quantile_values = np.zeros((X.shape[0], len(quantiles)))
        X_leaves = self.apply(X)
        unique_leaves = np.unique(X_leaves)
        for leaf in unique_leaves:
            for i_q, quantile in enumerate(quantiles):
                quantile_values[X_leaves == leaf, i_q] = weighted_percentile(
                    self.y_train_[self.y_train_leaves_ == leaf], int(100 * quantile))
        return quantile_values