def get_feature_importances_from_booster(cls, booster: Booster) -> np.ndarray: """Gets feauture importances from a XGB booster. This is based on the feature_importance_ property defined in: https://github.com/dmlc/xgboost/blob/master/python-package/xgboost/sklearn.py Args: booster(Booster): Booster object, most of the times the median model (quantile=0.5) is preferred Returns: (np.ndarray) with normalized feature importances """ # Get score score = booster.get_score(importance_type="gain") # Get feature names from booster feature_names = booster.feature_names # Get importance feature_importance = [score.get(f, 0.0) for f in feature_names] # Convert to array features_importance_array = np.array(feature_importance, dtype=np.float32) total = features_importance_array.sum() # For normalizing if total == 0: return features_importance_array return features_importance_array / total # Normalize
def merge_labeled_weight_importance(model: Booster, label_encoder: OneHotLabelEncoder) -> Dict: f_imp = model.get_score(importance_type='weight') merged: Dict[str, int] = {} for f in f_imp: src_feature = label_encoder.source_column(f) merged[src_feature] = merged.get(src_feature, 0) + f_imp[f] return merged
def merge_labeled_weight_importance( model: Booster, dummy_col_sep=categorical_util.DUMMY_COL_SEP) -> Dict[str, int]: f_imp = model.get_score(importance_type='weight') merged: Dict[str, int] = {} for f in f_imp: src_feature = categorical_util.get_source_name_from_dummy( f, dummy_col_sep) merged[src_feature] = merged.get(src_feature, 0) + f_imp[f] return merged