def test_tree2dict(): X = [[1, 1], [0, 2], [0, 3], [1, 3], [2, 3], [0, 4]] y = [0, 0, 0, 1, 1, 1] clf = DecisionTreeClassifier(random_state=42).fit(X, y) text = tree2text(get_tree_info(clf)) print(text) expected = """ x1 <= 2.500 (33.3%) ---> [1.000, 0.000] x1 > 2.500 (66.7%) x0 <= 0.500 (33.3%) x1 <= 3.500 (16.7%) ---> [1.000, 0.000] x1 > 3.500 (16.7%) ---> [0.000, 1.000] x0 > 0.500 (33.3%) ---> [0.000, 1.000] """.strip() assert text == expected # check it with feature_names text = tree2text(get_tree_info(clf, feature_names=['x', 'y'])) print(text) expected = """ y <= 2.500 (33.3%) ---> [1.000, 0.000] y > 2.500 (66.7%) x <= 0.500 (33.3%) y <= 3.500 (16.7%) ---> [1.000, 0.000] y > 3.500 (16.7%) ---> [0.000, 1.000] x > 0.500 (33.3%) ---> [0.000, 1.000] """.strip() assert text == expected
def explain_decision_tree( estimator, vec=None, top=_TOP, target_names=None, targets=None, # ignored feature_names=None, feature_re=None, feature_filter=None, **export_graphviz_kwargs): """ Return an explanation of a decision tree. See :func:`eli5.explain_weights` for description of ``top``, ``target_names``, ``feature_names``, ``feature_re`` and ``feature_filter`` parameters. ``targets`` parameter is ignored. ``vec`` is a vectorizer instance used to transform raw features to the input of the estimator (e.g. a fitted CountVectorizer instance); you can pass it instead of ``feature_names``. All other keyword arguments are passed to `sklearn.tree.export_graphviz`_ function. .. _sklearn.tree.export_graphviz: http://scikit-learn.org/stable/modules/generated/sklearn.tree.export_graphviz.html """ feature_names = get_feature_names(estimator, vec, feature_names=feature_names) coef = estimator.feature_importances_ tree_feature_names = feature_names feature_names, flt_indices = feature_names.handle_filter( feature_filter, feature_re) if flt_indices is not None: coef = coef[flt_indices] indices = argsort_k_largest_positive(coef, top) names, values = feature_names[indices], coef[indices] export_graphviz_kwargs.setdefault("proportion", True) tree_info = get_tree_info(estimator, feature_names=tree_feature_names, class_names=target_names, **export_graphviz_kwargs) return Explanation( feature_importances=FeatureImportances( [FeatureWeight(*x) for x in zip(names, values)], remaining=np.count_nonzero(coef) - len(indices), ), decision_tree=tree_info, description=DESCRIPTION_DECISION_TREE, estimator=repr(estimator), method='decision tree', )
def explain_decision_tree( estimator, vec=None, top=_TOP, target_names=None, targets=None, # ignored feature_names=None, feature_re=None, feature_filter=None, **export_graphviz_kwargs): """ Return an explanation of a decision tree. See :func:`eli5.explain_weights` for description of ``top``, ``target_names``, ``feature_names``, ``feature_re`` and ``feature_filter`` parameters. ``targets`` parameter is ignored. ``vec`` is a vectorizer instance used to transform raw features to the input of the estimator (e.g. a fitted CountVectorizer instance); you can pass it instead of ``feature_names``. All other keyword arguments are passed to `sklearn.tree.export_graphviz`_ function. .. _sklearn.tree.export_graphviz: http://scikit-learn.org/stable/modules/generated/sklearn.tree.export_graphviz.html """ feature_names = get_feature_names(estimator, vec, feature_names=feature_names) tree_feature_names = feature_names feature_names, flt_indices = feature_names.handle_filter( feature_filter, feature_re) feature_importances = get_feature_importances_filtered( estimator.feature_importances_, feature_names, flt_indices, top) export_graphviz_kwargs.setdefault("proportion", True) tree_info = get_tree_info(estimator, feature_names=tree_feature_names, class_names=target_names, **export_graphviz_kwargs) return Explanation( feature_importances=feature_importances, decision_tree=tree_info, description=DESCRIPTION_DECISION_TREE, estimator=repr(estimator), method='decision tree', )
def explain_decision_tree( clf, vec=None, top=_TOP, target_names=None, targets=None, # ignored feature_names=None, feature_re=None, **export_graphviz_kwargs): """ Return an explanation of a decision tree classifier in the following format (compatible with random forest explanations):: Explanation( estimator="<classifier repr>", method="<interpretation method>", description="<human readable description>", decision_tree={...tree information}, feature_importances=[ FeatureWeight(feature_name, importance, std_deviation), ... ] ) """ feature_names = get_feature_names(clf, vec, feature_names=feature_names) coef = clf.feature_importances_ tree_feature_names = feature_names if feature_re is not None: feature_names, flt_indices = feature_names.filtered_by_re(feature_re) coef = coef[flt_indices] indices = argsort_k_largest(coef, top) names, values = feature_names[indices], coef[indices] std = np.zeros_like(values) export_graphviz_kwargs.setdefault("proportion", True) tree_info = get_tree_info(clf, feature_names=tree_feature_names, class_names=target_names, **export_graphviz_kwargs) return Explanation( feature_importances=[ FeatureWeight(*x) for x in zip(names, values, std) ], decision_tree=tree_info, description=DESCRIPTION_DECISION_TREE, estimator=repr(clf), method='decision tree', )