Пример #1
0
    def _generate_leaf_node_predictions(self, data: xgb.DMatrix) -> np.ndarray:
        """Method to generate leaf node predictions from the xgboost model.

        Method calls xgb.Booster.predict with pred_leaf = True and
        ntree_limit = model's best_iteration + 1.

        If the output of predict is not a 2d matrix the output is shaped to
        be 2d.

        Parameters
        ----------
        data : xgb.DMatrix
            Data to generate predictions on.

        """

        check_type(data, [xgb.DMatrix], "data")

        # matrix of (nsample, ntrees) with each record giving
        # the leaf node of each sample in each tree
        leaf_node_predictions = self.model.predict(
            data=data,
            pred_leaf=True,
            ntree_limit=self.model.best_iteration + 1)

        # if the input data is a single column reshape the output to
        # be 2d array rather than 1d
        if len(leaf_node_predictions.shape) == 1:

            leaf_node_predictions = leaf_node_predictions.reshape(
                (data.num_row(), 1))

        return leaf_node_predictions
Пример #2
0
def xgbooster_predict_proba(booster: xgb.Booster,
                            d_x: xgb.DMatrix) -> np.ndarray:
    """ Simulate the `predict_proba` interface from sklearn
    
    This function will only work as expected if `booster` has been
    training using the `binary:logistic` loss.
    
    Parameters
    ----------
    booster : xgboost.Booster
        The trained booster
        
    d_x : xgboost.DMatrix
        The dataset
        
    Returns
    -------
    y_proba_pred : numpy.ndarray
        The probabilistic predictions. The shape of the array
        is (n_row, 2).
    """
    y_score = booster.predict(d_x)
    y_false = 1 - y_score
    size = (d_x.num_row(), 2)

    y_probas_pred = np.zeros(size)
    y_probas_pred[:, 0] = y_false
    y_probas_pred[:, 1] = y_score

    return y_probas_pred
Пример #3
0
def dmatrix_to_numpy(dmatrix: xgb.DMatrix) -> np.ndarray:
    """Convert DMatrix to 2d numpy array

    Parameters
    ----------
    dmatrix : xgb.DMatrix
        DMatrix to convert

    Returns
    -------
    np.ndarray
        2d numpy array with the corresponding DMatrix feature values

    Raises
    ------
    InvalidInput
        Input is not a valid DMatrix
    """
    if not isinstance(dmatrix, xgb.DMatrix):
        raise InvalidInput("Type error: input parameter is not DMatrix")

    stream_parser = DMatrixStreamParserV0_80 if version.parse(xgb.__version__) < version.parse('1.0.0') \
        else DMatrixStreamParserV1_0_0

    # We set delete=False to avoid permissions error. This way, file can be accessed
    # by XGBoost without being deleted while handle is closed
    try:
        with tempfile.NamedTemporaryFile(delete=False) as fp:
            dmatrix.save_binary(fp.name)
            result = stream_parser(fp, dmatrix.num_row(),
                                   dmatrix.num_col()).parse()
    finally:
        # We can safely remove the temp file now, parsing process finished
        with suppress(OSError):
            os.remove(fp.name)

    return result