Python LearnerInspector примеры, facet.inspection.LearnerInspector Python примеры использования

Пример #1

0

Показать файл

Файл: test_shap_decomposition.py Проект: danielschulz/facet

def test_shap_decomposition_matrices(
    best_lgbm_crossfit: LearnerCrossfit[RegressorPipelineDF],
    feature_names: Set[str],
    regressor_inspector: LearnerInspector,
) -> None:
    # Shap decomposition matrices (feature dependencies)
    # check that dimensions of pairwise feature matrices are equal to # of features,
    # and value ranges:
    for matrix, matrix_name in zip(
        (
            regressor_inspector.feature_association_matrix(),
            regressor_inspector.feature_synergy_matrix(),
            regressor_inspector.feature_redundancy_matrix(),
        ),
        ("association", "synergy", "redundancy"),
    ):
        matrix_full_name = f"feature {matrix_name} matrix"
        n_features = len(feature_names)
        assert len(matrix) == n_features, f"rows in {matrix_full_name}"
        assert len(
            matrix.columns) == n_features, f"columns in {matrix_full_name}"

        # check values
        for c in matrix.columns:
            assert (0.0 <= matrix.fillna(0).loc[:, c].min() <=
                    matrix.fillna(0).loc[:, c].max() <=
                    1.0), f"Values of [0.0, 1.0] in {matrix_full_name}"

Пример #2

0

Показать файл

def test_model_inspection_classifier_binary_single_shap_output() -> None:
    # simulate some data
    x, y = make_classification(
        n_samples=200, n_features=5, n_informative=5, n_redundant=0, random_state=42
    )
    sim_df = pd.DataFrame(
        np.hstack((x, y[:, np.newaxis])),
        columns=[*(f"f{i}" for i in range(5)), "target"],
    )

    # create sample object
    sample_df = Sample(observations=sim_df, target_name="target")

    # fit the crossfit
    crossfit = LearnerCrossfit(
        pipeline=ClassifierPipelineDF(
            classifier=GradientBoostingClassifierDF(random_state=42)
        ),
        cv=BootstrapCV(n_splits=5, random_state=42),
        random_state=42,
        n_jobs=-3,
    ).fit(sample_df)

    # fit the inspector
    LearnerInspector(n_jobs=-3).fit(crossfit=crossfit)

Пример #3

0

Показать файл

Файл: conftest.py Проект: danielschulz/facet

def regressor_inspector(
        best_lgbm_crossfit: LearnerCrossfit[RegressorPipelineDF],
        n_jobs: int) -> LearnerInspector:
    inspector = LearnerInspector(
        explainer_factory=TreeExplainerFactory(
            feature_perturbation="tree_path_dependent",
            use_background_dataset=True),
        legacy=True,
        n_jobs=n_jobs,
    ).fit(crossfit=best_lgbm_crossfit)

    # disable legacy calculations; we used them in the constructor so the legacy
    # SHAP decomposer is created along with the new SHAP vector projector
    inspector._legacy = False

    return inspector

Пример #4

0

Показать файл

def regressor_inspector(
    best_lgbm_crossfit: LearnerCrossfit[RegressorPipelineDF], n_jobs: int
) -> LearnerInspector:
    return LearnerInspector(
        explainer_factory=TreeExplainerFactory(
            feature_perturbation="tree_path_dependent", use_background_dataset=True
        ),
        n_jobs=n_jobs,
    ).fit(crossfit=best_lgbm_crossfit)

Пример #5

0

Показать файл

def iris_inspector_multi_class(
    iris_classifier_crossfit_multi_class: LearnerCrossfit[
        ClassifierPipelineDF[RandomForestClassifierDF]
    ],
    n_jobs: int,
) -> LearnerInspector[ClassifierPipelineDF[RandomForestClassifierDF]]:
    return LearnerInspector(shap_interaction=True, n_jobs=n_jobs).fit(
        crossfit=iris_classifier_crossfit_multi_class
    )

Пример #6

0

Показать файл

def test_model_inspection_classifier_binary(
    iris_sample_binary: Sample, iris_classifier_crossfit_binary, n_jobs: int
) -> None:

    model_inspector = LearnerInspector(shap_interaction=False, n_jobs=n_jobs).fit(
        crossfit=iris_classifier_crossfit_binary
    )

    # calculate the shap value matrix, without any consolidation
    shap_values = model_inspector.shap_values(consolidate=None)

    # do the shap values add up to predictions minus a constant value?
    _validate_shap_values_against_predictions(
        shap_values=shap_values, crossfit=iris_classifier_crossfit_binary
    )

    shap_matrix_mean = model_inspector.shap_values()

    # is the consolidation correct?
    assert_frame_equal(shap_matrix_mean, shap_values.mean(level=1))

    # the length of rows in shap_values should be equal to the unique observation
    # indices we have had in the predictions_df
    assert len(shap_matrix_mean) == len(iris_sample_binary)

    # Shap decomposition matrices (feature dependencies)

    assert model_inspector.feature_association_matrix(
        clustered=True, symmetrical=True
    ).values == pytest.approx(
        np.array(
            [
                [1.0, 0.678, 0.133, 0.005],
                [0.678, 1.0, 0.145, 0.007],
                [0.133, 0.145, 1.0, 0.029],
                [0.005, 0.007, 0.029, 1.0],
            ]
        ),
        abs=0.02,
    )

    linkage_tree = model_inspector.feature_association_linkage()

    print()
    DendrogramDrawer(style=DendrogramReportStyle()).draw(
        data=linkage_tree, title="Iris (binary) feature association linkage"
    )

Пример #7

0

Показать файл

def test_model_inspection_classifier_interaction_dual_target(
    iris_sample_binary_dual_target: Sample,
    iris_classifier_ranker_dual_target: LearnerRanker[
        ClassifierPipelineDF[RandomForestClassifierDF]
    ],
    iris_target_name,
    n_jobs: int,
) -> None:
    iris_classifier_crossfit_dual_target = (
        iris_classifier_ranker_dual_target.best_model_crossfit_
    )

    with pytest.raises(
        ValueError,
        match=(
            f"only single-output classifiers .* are supported.*"
            f"{iris_target_name}.*{iris_target_name}2"
        ),
    ):
        LearnerInspector(n_jobs=n_jobs).fit(
            crossfit=iris_classifier_crossfit_dual_target
        )

Пример #8

0

Показать файл

Файл: test_shap_decomposition.py Проект: wwwK/facet

def test_shap_decomposition(regressor_inspector: LearnerInspector) -> None:

    # noinspection PyPep8Naming
    def _calculate_relative_syn_and_red(
            feature_x: str, feature_y: str,
            is_indirect_syn_valid: bool) -> Tuple[float, float, float, float]:
        iv = regressor_inspector.shap_interaction_values(consolidate=None)
        # Get 3 components for each feature:
        # S = interaction SHAP
        # A, B = independent SHAP
        # U, V = sum of interactions with 3rd variables
        iv_x = iv.xs(feature_x, level=-1)
        iv_y = iv.xs(feature_y, level=-1)
        X = iv_x.sum(axis=1).rename("X")
        Y = iv_y.sum(axis=1).rename("Y")
        A = iv_x.loc[:, feature_x]
        B = iv_y.loc[:, feature_y]
        S = iv_x.loc[:, feature_y]
        U = X - A - S
        V = Y - B - S
        # calculate the "indirect" S, such that cov(U, S) == 0 and cov(V, S) == 0
        k_U = max(0.0, cov(S, U) / var(S)) if is_indirect_syn_valid else 0.0
        k_V = max(0.0, cov(S, V) / var(S)) if is_indirect_syn_valid else 0.0
        print_list(**{"cov(U, S) / var(S)": k_U, "cov(V, S) / var(S)": k_V})
        varS = var(S)
        Su = S if varS == 0 else S * k_U
        Sv = S if varS == 0 else S * k_V
        U_ = U - Su
        V_ = V - Sv
        print_list(
            stdS=std(S),
            stdSu=std(Su),
            stdSv=std(Sv),
            stdU=std(U),
            stdU_=std(U_),
            stdV=std(V),
            stdV_=std(V_),
        )
        # calculate the minimal shared vector R, such that cov(X_ - R, Y_ - R) == 0
        X_ = X - S - Su
        Y_ = Y - S - Sv
        AUT = X_ + Y_
        AUT_asym = X_
        R_ = AUT / 2
        dXY = std(X_ - Y_)
        dR = std(R_)
        R = R_ * (1 - dXY / (2 * dR))
        print_list(
            stdX=std(X),
            stdY=std(Y),
            stdX_=std(X_),
            stdY_=std(Y_),
            stdR=std(R),
            covX_R_Y_R=round(cov(X_ - R, Y_ - R), 15),
        )
        SYN = 2 * S + Su + Sv
        SYN_asym = S + Su
        RED = 2 * R
        RED_asym = R
        UNI = X + Y - RED
        UNI_asym = X - RED_asym
        syn = std(SYN)
        aut = std(AUT)
        red = std(RED)
        uni = std(UNI)
        syn_asym = std(SYN_asym)
        aut_asym = std(AUT_asym)
        red_asym = std(RED_asym)
        uni_asym = std(UNI_asym)
        print_list(syn=syn, aut=aut, red=red, uni=uni)
        return (
            syn / (syn + aut),
            red / (red + uni),
            syn_asym / (syn_asym + aut_asym),
            red_asym / (red_asym + uni_asym),
        )

    for i, j, indirect_syn in [
        ("LSTAT", "RM", False),
        ("LSTAT", "DIS", True),
        ("LSTAT", "AGE", False),
        ("LSTAT", "NOX", False),
        ("LSTAT", "CRIM", False),
        ("RM", "DIS", False),
        ("RM", "AGE", False),
        ("RM", "NOX", False),
        ("RM", "CRIM", False),
    ]:
        print(f"\ncomparing features X={i} and Y={j}")

        syn_rel, red_rel, syn_rel_asym, red_rel_asym = _calculate_relative_syn_and_red(
            feature_x=i, feature_y=j, is_indirect_syn_valid=indirect_syn)

        syn_matrix = regressor_inspector.feature_synergy_matrix(
            symmetrical=True)
        red_matrix = regressor_inspector.feature_redundancy_matrix(
            symmetrical=True)
        syn_matrix_asym = regressor_inspector.feature_synergy_matrix()
        red_matrix_asym = regressor_inspector.feature_redundancy_matrix()

        print_list(
            syn_rel=syn_rel,
            red_rel=red_rel,
            syn_rel_asym=syn_rel_asym,
            red_rel_asym=red_rel_asym,
            syn_matrix=syn_matrix.loc[i, j],
            red_matrix=red_matrix.loc[i, j],
            syn_matrix_asym=syn_matrix_asym.loc[i, j],
            red_matrix_asym=red_matrix_asym.loc[i, j],
            percentage=True,
        )

        assert np.isclose(red_matrix.loc[i, j], red_rel)
        assert np.isclose(red_matrix.loc[j, i], red_rel)
        assert np.isclose(syn_matrix.loc[i, j], syn_rel)
        assert np.isclose(syn_matrix.loc[j, i], syn_rel)
        assert np.isclose(red_matrix_asym.loc[i, j], red_rel_asym)
        assert np.isclose(syn_matrix_asym.loc[i, j], syn_rel_asym)

        # check basic matrix properties

        n_features = len(regressor_inspector.features)

        for matrix in (syn_matrix, syn_matrix_asym, red_matrix,
                       red_matrix_asym):
            # matrix shape is n_features x n_features
            assert matrix.shape == (n_features, n_features)

            # values on the diagonal are all 1.0
            for a in range(n_features):
                assert matrix.iloc[a, a] == 1.0

            # there are no nan values
            assert matrix.notna().all().all()

Пример #9

0

Показать файл

Файл: test_shap_decomposition.py Проект: wwwK/facet

def test_shap_decomposition_matrices(
    best_lgbm_crossfit: LearnerCrossfit[RegressorPipelineDF],
    feature_names: Set[str],
    regressor_inspector: LearnerInspector,
) -> None:
    # Shap decomposition matrices (feature dependencies)
    association_matrix: pd.DataFrame = regressor_inspector.feature_association_matrix(
        clustered=False, symmetrical=True)

    # check that dimensions of pairwise feature matrices are equal to # of features,
    # and value ranges:
    for matrix, matrix_name in zip(
        (
            association_matrix,
            regressor_inspector.feature_synergy_matrix(),
            regressor_inspector.feature_redundancy_matrix(),
        ),
        ("association", "synergy", "redundancy"),
    ):
        matrix_full_name = f"feature {matrix_name} matrix"
        n_features = len(feature_names)
        assert len(matrix) == n_features, f"rows in {matrix_full_name}"
        assert len(
            matrix.columns) == n_features, f"columns in {matrix_full_name}"

        # check values
        for c in matrix.columns:
            assert (0.0 <= matrix.fillna(0).loc[:, c].min() <=
                    matrix.fillna(0).loc[:, c].max() <=
                    1.0), f"Values of [0.0, 1.0] in {matrix_full_name}"

    # check actual values:
    assert association_matrix.values == pytest.approx(
        np.array([
            [1.0, 0.043, 0.233, 0.0, 0.162, 0.078] +
            [0.192, 0.156, 0.009, 0.022, 0.035, 0.008, 0.07],
            [0.043, 1.0, 0.155, 0.0, 0.056, 0.055] +
            [0.017, 0.225, 0.024, 0.021, 0.049, 0.145, 0.034],
            [0.233, 0.155, 1.0, 0.0, 0.123, 0.207] +
            [0.15, 0.044, 0.069, 0.225, 0.241, 0.149, 0.209],
            [0.0, 0.0, 0.0, 1.0, 0.0, 0.0] +
            [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
            [0.162, 0.056, 0.123, 0.0, 1.0, 0.051] +
            [0.017, 0.156, 0.19, 0.08, 0.15, 0.025, 0.029],
            [0.078, 0.055, 0.207, 0.0, 0.051, 1.0] +
            [0.088, 0.005, 0.081, 0.14, 0.027, 0.058, 0.49],
            [0.192, 0.017, 0.15, 0.0, 0.017, 0.088] +
            [1.0, 0.128, 0.015, 0.269, 0.14, 0.096, 0.295],
            [0.156, 0.225, 0.044, 0.0, 0.156, 0.005] +
            [0.128, 1.0, 0.255, 0.158, 0.273, 0.132, 0.023],
            [0.009, 0.024, 0.069, 0.0, 0.19, 0.081] +
            [0.015, 0.255, 1.0, 0.223, 0.188, 0.035, 0.049],
            [0.022, 0.021, 0.225, 0.0, 0.08, 0.14] +
            [0.269, 0.158, 0.223, 1.0, 0.284, 0.182, 0.097],
            [0.035, 0.049, 0.241, 0.0, 0.15, 0.027] +
            [0.14, 0.273, 0.188, 0.284, 1.0, 0.027, 0.031],
            [0.008, 0.145, 0.149, 0.0, 0.025, 0.058] +
            [0.096, 0.132, 0.035, 0.182, 0.027, 1.0, 0.057],
            [0.07, 0.034, 0.209, 0.0, 0.029, 0.49] +
            [0.295, 0.023, 0.049, 0.097, 0.031, 0.057, 1.0],
        ]),
        abs=0.02,
    )

    # cluster associated features
    association_linkage = regressor_inspector.feature_association_linkage()

    assert isinstance(association_linkage, LinkageTree)

Пример #10

0

Показать файл

def test_model_inspection(
    regressor_grids: Sequence[LearnerGrid[RegressorPipelineDF]],
    regressor_ranker: LearnerRanker[RegressorPipelineDF],
    best_lgbm_crossfit: LearnerCrossfit[RegressorPipelineDF],
    feature_names: Set[str],
    regressor_inspector: LearnerInspector,
    cv_kfold: KFold,
    sample: Sample,
    simple_preprocessor: TransformerDF,
    n_jobs: int,
) -> None:

    # define checksums for this test
    expected_scores = [0.418, 0.4, 0.386, 0.385, 0.122] + [
        0.122,
        -0.074,
        -0.074,
        -0.074,
        -0.074,
    ]

    log.debug(f"\n{regressor_ranker.summary_report()}")

    check_ranking(
        ranking=regressor_ranker.ranking_,
        expected_scores=expected_scores,
        expected_learners=None,
        expected_parameters=None,
    )

    # using an invalid consolidation method raises an exception
    with pytest.raises(ValueError, match="unknown consolidation method: invalid"):
        regressor_inspector.shap_values(consolidate="invalid")

    shap_values_raw = regressor_inspector.shap_values(consolidate=None)
    shap_values_mean = regressor_inspector.shap_values(consolidate="mean")
    shap_values_std = regressor_inspector.shap_values(consolidate="std")

    # method shap_values without parameter is equal to "mean" consolidation
    assert_frame_equal(shap_values_mean, regressor_inspector.shap_values())

    # the length of rows in shap_values should be equal to the unique observation
    # indices we have had in the predictions_df
    assert len(shap_values_mean) == len(sample)

    # index names
    assert shap_values_mean.index.names == [Sample.IDX_OBSERVATION]
    assert shap_values_mean.columns.names == [Sample.IDX_FEATURE]
    assert shap_values_std.index.names == [Sample.IDX_OBSERVATION]
    assert shap_values_std.columns.names == [Sample.IDX_FEATURE]
    assert shap_values_raw.index.names == (["split", "observation"])
    assert shap_values_raw.columns.names == [Sample.IDX_FEATURE]

    # column index
    assert set(shap_values_mean.columns) == feature_names

    # check that the SHAP values add up to the predictions
    shap_totals_raw = shap_values_raw.sum(axis=1)

    for split_id, model in enumerate(best_lgbm_crossfit.models()):
        # for each model in the crossfit, calculate the difference between total
        # SHAP values and prediction for every observation. This is always the same
        # constant value, so `mad` (mean absolute deviation) is zero

        shap_minus_pred = shap_totals_raw.xs(key=split_id) - model.predict(
            X=sample.features
        )
        assert (
            round(shap_minus_pred.mad(), 12) == 0.0
        ), f"predictions matching total SHAP for split {split_id}"

    #  test the ModelInspector with a KernelExplainer:

    inspector_2 = LearnerInspector(
        explainer_factory=KernelExplainerFactory(link="identity", data_size_limit=20),
        n_jobs=n_jobs,
    ).fit(crossfit=best_lgbm_crossfit)
    inspector_2.shap_values()

    linkage_tree = inspector_2.feature_association_linkage()

    print()
    DendrogramDrawer(style="text").draw(data=linkage_tree, title="Test")

Пример #11

0

Показать файл

def test_model_inspection_classifier_interaction(
    iris_sample_binary: Sample,
    iris_classifier_crossfit_binary: LearnerCrossfit[
        ClassifierPipelineDF[RandomForestClassifierDF]
    ],
    n_jobs: int,
) -> None:
    warnings.filterwarnings("ignore", message="You are accessing a training score")

    model_inspector = LearnerInspector(
        explainer_factory=TreeExplainerFactory(
            feature_perturbation="tree_path_dependent", use_background_dataset=True
        ),
        n_jobs=n_jobs,
    ).fit(crossfit=iris_classifier_crossfit_binary)

    model_inspector_no_interaction = LearnerInspector(
        shap_interaction=False,
        explainer_factory=TreeExplainerFactory(
            feature_perturbation="tree_path_dependent", use_background_dataset=True
        ),
        n_jobs=n_jobs,
    ).fit(crossfit=iris_classifier_crossfit_binary)

    # calculate shap interaction values
    shap_interaction_values = model_inspector.shap_interaction_values()

    # calculate shap values from interaction values
    shap_values = shap_interaction_values.groupby(by="observation").sum()

    # shap interaction values add up to shap values
    # we have to live with differences of up to 0.02, given the different results
    # returned for SHAP values and SHAP interaction values
    # todo: review accuracy after implementing use of a background dataset
    assert (
        model_inspector_no_interaction.shap_values() - shap_values
    ).abs().max().max() < 0.015

    # the column names of the shap value data frames are the feature names
    feature_columns = iris_sample_binary.feature_names
    assert shap_values.columns.to_list() == feature_columns
    assert shap_interaction_values.columns.to_list() == feature_columns

    # the length of rows in shap_values should be equal to the number of observations
    assert len(shap_values) == len(iris_sample_binary)

    # the length of rows in shap_interaction_values should be equal to the number of
    # observations, times the number of features
    assert len(shap_interaction_values) == (
        len(iris_sample_binary) * len(feature_columns)
    )

    # do the shap values add up to predictions minus a constant value?
    _validate_shap_values_against_predictions(
        shap_values=model_inspector.shap_interaction_values(consolidate=None)
        .groupby(level=[0, 1])
        .sum(),
        crossfit=iris_classifier_crossfit_binary,
    )

    assert model_inspector.feature_synergy_matrix(
        clustered=False, symmetrical=True
    ).values == pytest.approx(
        np.array(
            [
                [1.000, 0.047, 0.101, 0.120],
                [0.047, 1.000, 0.017, 0.021],
                [0.101, 0.017, 1.000, 0.100],
                [0.120, 0.021, 0.100, 1.000],
            ]
        ),
        abs=0.02,
    )

    assert model_inspector.feature_synergy_matrix(
        clustered=True, symmetrical=True
    ).values == pytest.approx(
        np.array(
            [
                [1.000, 0.101, 0.100, 0.017],
                [0.101, 1.000, 0.120, 0.047],
                [0.100, 0.120, 1.000, 0.021],
                [0.017, 0.047, 0.021, 1.000],
            ]
        ),
        abs=0.02,
    )

    assert model_inspector.feature_redundancy_matrix(
        clustered=False, symmetrical=True
    ).values == pytest.approx(
        np.array(
            [
                [1.0, 0.039, 0.181, 0.206],
                [0.039, 1.0, 0.005, 0.011],
                [0.181, 0.005, 1.0, 0.792],
                [0.206, 0.011, 0.792, 1.0],
            ]
        ),
        abs=0.02,
    )

    assert model_inspector.feature_redundancy_matrix(
        clustered=True, symmetrical=True
    ).values == pytest.approx(
        np.array(
            [
                [1.000, 0.792, 0.181, 0.005],
                [0.792, 1.000, 0.206, 0.011],
                [0.181, 0.206, 1.000, 0.039],
                [0.005, 0.011, 0.039, 1.000],
            ]
        ),
        abs=0.02,
    )

    assert model_inspector.feature_association_matrix(
        clustered=False, symmetrical=True
    ).values == pytest.approx(
        np.array(
            [
                [1.0, 0.028, 0.14, 0.128],
                [0.028, 1.0, 0.005, 0.002],
                [0.14, 0.005, 1.0, 0.681],
                [0.128, 0.002, 0.681, 1.0],
            ]
        ),
        abs=0.02,
    )

    assert model_inspector.feature_association_matrix(
        clustered=True, symmetrical=True
    ).values == pytest.approx(
        np.array(
            [
                [1.000, 0.681, 0.128, 0.002],
                [0.681, 1.000, 0.140, 0.005],
                [0.128, 0.140, 1.000, 0.026],
                [0.002, 0.005, 0.026, 1.000],
            ]
        ),
        abs=0.02,
    )

    linkage_tree = model_inspector.feature_redundancy_linkage()

    print()
    DendrogramDrawer(style=DendrogramReportStyle()).draw(
        data=linkage_tree, title="Iris (binary) feature redundancy linkage"
    )

Пример #12

0

Показать файл

Файл: run_inpection.py Проект: iamrehman/dsa2

"""
pip install gamma-facet

Model Inspection
FACET implements several model inspection methods for scikit-learn estimators. FACET enhances model inspection by providing global metrics that complement the local perspective of SHAP. The key global metrics for each pair of features in a model are:

Synergy

The degree to which the model combines information from one feature with another to predict the target. For example, let's assume we are predicting cardiovascular health using age and gender and the fitted model includes a complex interaction between them. This means these two features are synergistic for predicting cardiovascular health. Further, both features are important to the model and removing either one would significantly impact performance. Let's assume age brings more information to the joint contribution than gender. This asymmetric contribution means the synergy for (age, gender) is less than the synergy for (gender, age). To think about it another way, imagine the prediction is a coordinate you are trying to reach. From your starting point, age gets you much closer to this point than gender, however, you need both to get there. Synergy reflects the fact that gender gets more help from age (higher synergy from the perspective of gender) than age does from gender (lower synergy from the perspective of age) to reach the prediction. This leads to an important point: synergy is a naturally asymmetric property of the global information two interacting features contribute to the model predictions. Synergy is expressed as a percentage ranging from 0% (full autonomy) to 100% (full synergy).

Redundancy

The degree to which a feature in a model duplicates the information of a second feature to predict the target. For example, let's assume we had house size and number of bedrooms for predicting house price. These features capture similar information as the more bedrooms the larger the house and likely a higher price on average. The redundancy for (number of bedrooms, house size) will be greater than the redundancy for (house size, number of bedrooms). This is because house size "knows" more of what number of bedrooms does for predicting house price than vice-versa. Hence, there is greater redundancy from the perspective of number of bedrooms. Another way to think about it is removing house size will be more detrimental to model performance than removing number of bedrooms, as house size can better compensate for the absence of number of bedrooms. This also implies that house size would be a more important feature than number of bedrooms in the model. The important point here is that like synergy, redundancy is a naturally asymmetric property of the global information feature pairs have for predicting an outcome. Redundancy is expressed as a percentage ranging from 0% (full uniqueness) to 100% (full redundancy).

"""

# fit the model inspector
from facet.inspection import LearnerInspector
inspector = LearnerInspector()
inspector.fit(crossfit=mymodel)
Synergy

# visualise synergy as a matrix
from pytools.viz.matrix import MatrixDrawer
synergy_matrix = inspector.feature_synergy_matrix(symmetrical=True)
MatrixDrawer(style="matplot%").draw(synergy_matrix, title="Synergy Matrix")

Python LearnerInspector примеры использования