def get_feature_importances_from_booster(cls,
                                             booster: Booster) -> np.ndarray:
        """Gets feauture importances from a XGB booster.
            This is based on the feature_importance_ property defined in:
            https://github.com/dmlc/xgboost/blob/master/python-package/xgboost/sklearn.py

        Args:
            booster(Booster): Booster object,
            most of the times the median model (quantile=0.5) is preferred

        Returns:
            (np.ndarray) with normalized feature importances

        """

        # Get score
        score = booster.get_score(importance_type="gain")

        # Get feature names from booster
        feature_names = booster.feature_names

        # Get importance
        feature_importance = [score.get(f, 0.0) for f in feature_names]
        # Convert to array
        features_importance_array = np.array(feature_importance,
                                             dtype=np.float32)

        total = features_importance_array.sum()  # For normalizing
        if total == 0:
            return features_importance_array
        return features_importance_array / total  # Normalize
示例#2
0
def merge_labeled_weight_importance(model: Booster, label_encoder: OneHotLabelEncoder) -> Dict:
    f_imp = model.get_score(importance_type='weight')

    merged: Dict[str, int] = {}
    for f in f_imp:
        src_feature = label_encoder.source_column(f)
        merged[src_feature] = merged.get(src_feature, 0) + f_imp[f]

    return merged
示例#3
0
def merge_labeled_weight_importance(
        model: Booster,
        dummy_col_sep=categorical_util.DUMMY_COL_SEP) -> Dict[str, int]:
    f_imp = model.get_score(importance_type='weight')

    merged: Dict[str, int] = {}
    for f in f_imp:
        src_feature = categorical_util.get_source_name_from_dummy(
            f, dummy_col_sep)
        merged[src_feature] = merged.get(src_feature, 0) + f_imp[f]

    return merged