def explain_decision_tree( clf, vec=None, top=_TOP, target_names=None, targets=None, # ignored feature_names=None, feature_re=None, **export_graphviz_kwargs): """ Return an explanation of a decision tree classifier in the following format (compatible with random forest explanations):: Explanation( estimator="<classifier repr>", method="<interpretation method>", description="<human readable description>", decision_tree={...tree information}, feature_importances=[ FeatureWeight(feature_name, importance, std_deviation), ... ] ) """ feature_names = get_feature_names(clf, vec, feature_names=feature_names) coef = clf.feature_importances_ tree_feature_names = feature_names if feature_re is not None: feature_names, flt_indices = feature_names.filtered_by_re(feature_re) coef = coef[flt_indices] indices = argsort_k_largest(coef, top) names, values = feature_names[indices], coef[indices] std = np.zeros_like(values) export_graphviz_kwargs.setdefault("proportion", True) tree_info = get_tree_info(clf, feature_names=tree_feature_names, class_names=target_names, **export_graphviz_kwargs) return Explanation( feature_importances=[ FeatureWeight(*x) for x in zip(names, values, std) ], decision_tree=tree_info, description=DESCRIPTION_DECISION_TREE, estimator=repr(clf), method='decision tree', )
def explain_rf_feature_importance( clf, vec=None, top=_TOP, target_names=None, # ignored targets=None, # ignored feature_names=None, feature_re=None): """ Return an explanation of a tree-based ensemble classifier in the following format:: Explanation( estimator="<classifier repr>", method="<interpretation method>", description="<human readable description>", feature_importances=[ FeatureWeight(feature_name, importance, std_deviation), ... ] ) """ feature_names = get_feature_names(clf, vec, feature_names=feature_names) coef = clf.feature_importances_ trees = np.array(clf.estimators_).ravel() coef_std = np.std([tree.feature_importances_ for tree in trees], axis=0) if feature_re is not None: feature_names, flt_indices = feature_names.filtered_by_re(feature_re) coef = coef[flt_indices] coef_std = coef_std[flt_indices] indices = argsort_k_largest(coef, top) names, values, std = feature_names[indices], coef[indices], coef_std[ indices] return Explanation( feature_importances=[ FeatureWeight(*x) for x in zip(names, values, std) ], description=DESCRIPTION_RANDOM_FOREST, estimator=repr(clf), method='feature importances', )
def get_feature_importances_filtered(coef, feature_names, flt_indices, top, coef_std=None): # type: (...) -> FeatureImportances if flt_indices is not None: coef = coef[flt_indices] if coef_std is not None: coef_std = coef_std[flt_indices] indices = argsort_k_largest(coef, top) names, values = feature_names[indices], coef[indices] std = None if coef_std is None else coef_std[indices] return FeatureImportances.from_names_values( names, values, std, remaining=coef.shape[0] - len(indices), )
def test_argsort_k_largest_empty(): x = np.array([0]) empty = np.array([]) assert _np_eq(x[argsort_k_largest(x, 0)], empty) assert _np_eq(x[argsort_k_largest_positive(x, None)], empty)
def test_argsort_k_largest_None(x): assert len(argsort_k_largest(x, None)) == len(x)
def test_argsort_k_largest_zero(x): assert len(argsort_k_largest(x, 0)) == 0
def test_argsort_k_largest(x, k): assume(len(x) >= k) assume(len(set(x)) == len(x)) assume(not np.isnan(x).any()) assert (np.argsort(x)[-k:][::-1] == argsort_k_largest(x, k)).all()