Exemplo n.º 1
0
def write(session: Dict[str, Any]):
    st.title("Global Effects")
    st.markdown("#### Partial Dependence of Predictions on a Feature")
    feat = st.selectbox(
        "Please select a feature",
        ["Don't plot partial dependence"] + sorted(session["X_train"].columns),
    )
    if not feat == "Don't plot partial dependence":
        dataset = st.selectbox(
            "Please select dataset on which to calculate partial depedence",
            ["Test", "Train"],
        )
        if dataset == "Train":
            X = session["X_train"]
        else:
            X = session["X_valid"]
        plot_partial_dependence(
            session["m"],
            X,
            features=[feat],
            feature_names=X.columns,
            grid_resolution=20,
        )
        plt.tight_layout()
        st.pyplot()
def test_plot_partial_dependence_multiclass(pyplot):
    # Test partial dependence plot function on multi-class input.
    iris = load_iris()
    clf = GradientBoostingClassifier(n_estimators=10, random_state=1)
    clf.fit(iris.data, iris.target)

    grid_resolution = 25
    plot_partial_dependence(clf, iris.data, [0, 1],
                            target=0,
                            grid_resolution=grid_resolution)
    fig = pyplot.gcf()
    axs = fig.get_axes()
    assert len(axs) == 2
    assert all(ax.has_data for ax in axs)

    # now with symbol labels
    target = iris.target_names[iris.target]
    clf = GradientBoostingClassifier(n_estimators=10, random_state=1)
    clf.fit(iris.data, target)

    grid_resolution = 25
    plot_partial_dependence(clf, iris.data, [0, 1],
                            target='setosa',
                            grid_resolution=grid_resolution)
    fig = pyplot.gcf()
    axs = fig.get_axes()
    assert len(axs) == 2
    assert all(ax.has_data for ax in axs)
def test_plot_partial_dependence_incorrent_num_axes(pyplot, clf_diabetes,
                                                    diabetes, nrows, ncols):
    grid_resolution = 5
    fig, axes = pyplot.subplots(nrows, ncols)
    axes_formats = [list(axes.ravel()), tuple(axes.ravel()), axes]

    msg = "Expected ax to have 2 axes, got {}".format(nrows * ncols)

    disp = plot_partial_dependence(
        clf_diabetes,
        diabetes.data,
        ["age", "bmi"],
        grid_resolution=grid_resolution,
        feature_names=diabetes.feature_names,
    )

    for ax_format in axes_formats:
        with pytest.raises(ValueError, match=msg):
            plot_partial_dependence(
                clf_diabetes,
                diabetes.data,
                ["age", "bmi"],
                grid_resolution=grid_resolution,
                feature_names=diabetes.feature_names,
                ax=ax_format,
            )

        # with axes object
        with pytest.raises(ValueError, match=msg):
            disp.plot(ax=ax_format)
def test_plot_partial_dependence_multioutput():
    # Test partial dependence plot function on multi-output input.
    import matplotlib.pyplot as plt  # noqa
    (X, y), _ = multioutput_regression_data
    clf = LinearRegression()
    clf.fit(X, y)

    grid_resolution = 25
    plot_partial_dependence(clf, X, [0, 1],
                            target=0,
                            grid_resolution=grid_resolution)
    fig = plt.gcf()
    axs = fig.get_axes()
    assert len(axs) == 2
    assert all(ax.has_data for ax in axs)

    plot_partial_dependence(clf, X, [0, 1],
                            target=1,
                            grid_resolution=grid_resolution)
    fig = plt.gcf()
    axs = fig.get_axes()
    assert len(axs) == 2
    assert all(ax.has_data for ax in axs)

    plt.close('all')
Exemplo n.º 5
0
 def make_pdp_all(self):
     # Partial Dependence Plots
     print(
         f"Creating partial_dependence_all plot. This will take a moment.")
     fig, ax = plt.subplots(figsize=(16, 12), facecolor='white')
     plot_partial_dependence(self.model,
                             self.X,
                             self.top_n_feature_indicies,
                             feature_names=self.pretty_features,
                             fig=fig,
                             line_kw={
                                 'c': '#40FF40',
                                 'linewidth': 8
                             },
                             n_jobs=-1)
     # plt.tight_layout(pad=1.08, h_pad=None, w_pad=None, rect=None)
     save_image_path = os.path.join(ROOT_IMGS_DIRECTORY,
                                    'charts/partial_dependence_all')
     plt.savefig(save_image_path,
                 dpi=None,
                 facecolor='w',
                 edgecolor='w',
                 orientation='portrait',
                 papertype=None,
                 format=None,
                 transparent=False,
                 bbox_inches=None,
                 pad_inches=0.1,
                 frameon=None,
                 metadata=None)
     print(f"barpartial_dependence_all plot saved to {save_image_path}.")
     plt.show()
Exemplo n.º 6
0
def partial_dependence_titanic(pth):
    from data.real_data import get_titanic
    df = get_titanic(original=True)
    # lending= get_lending(2000, original=True)
    # X = df.drop(columns=['Survived'])
    df = df.dropna(subset=['Pclass', 'Age', 'Fare', 'Sex'])
    X = df[['Pclass', 'Age', 'Fare', 'Parch', 'Sex']]
    X['Sex'].replace({'male': 1, 'female': 0}, inplace=True)
    # X = df.drop(columns=['Survived', ])
    y = df['Survived']
    # X,y = lending.drop(columns=['loan_amnt']), lending['loan_amnt']
    # clf = GradientBoostingClassifier(n_estimators=500, learning_rate=0.2, max_depth = 1, random_state = 0).fit(X,y)
    clf = RandomForestClassifier(200,
                                 min_samples_split=20,
                                 min_samples_leaf=5,
                                 max_features=3).fit(X, y)
    # clf = LogisticRegression().fit(X,y)
    fig, _ = plt.subplots(ncols=3, figsize=(8, 4))
    plot_partial_dependence(clf,
                            X, ['Pclass', 'Age', 'Fare'],
                            fig=fig,
                            grid_resolution=50)
    fig = plt.gcf()
    axes = fig.get_axes()

    axes[1].set_ylabel('Partial dependence Survived')
    axes[1].set_xticks([1, 2, 3])
    axes[1].set_xticklabels([1, 2, 3])
    fig.tight_layout()
    fig.savefig(pth, bbox_inches='tight')
    plt.show()
Exemplo n.º 7
0
def pdp(model, X, features):
    print('Начинает работать алгоритм pdp/ice')
    # plt.figure(figsize=(10, 9))
    # fig = plt.gcf()
    # plot_partial_dependence(model, X, features, target=4)
    fig1 = plot_partial_dependence(model,
                                   X,
                                   features,
                                   kind='average',
                                   target=1)
    fig2 = plot_partial_dependence(model,
                                   X,
                                   features,
                                   kind='average',
                                   target=2)
    fig3 = plot_partial_dependence(model,
                                   X,
                                   features,
                                   kind='average',
                                   target=3)
    fig4 = plot_partial_dependence(model,
                                   X,
                                   features,
                                   kind='average',
                                   target=4)
    fig5 = plot_partial_dependence(model,
                                   X,
                                   features,
                                   kind='average',
                                   target=5)
    # fig.savefig('test2png.png', dpi=100)
    plt.show()
def plotpdpOfDistanceToTrueResultSklearn(data, subplots, pr):
    '''
    :param data: pandas dataframe with datasets where each row represents a dataset
    :param subplots: indicates columns to examine in pdp plot
    :param pr: Predictor of ML-System
    saves and plots indicated PDPplots that are calculated with sklearn
    '''
    pr.setReturnDistanceOfClass(True)
    resultColumnName = pr.resultColumn
    data = pr.encode(data)

    pr.standardColumnsNoResultColumn()
    plot_partial_dependence(pr,
                            data,
                            subplots,
                            feature_names=pr.standardColumns)

    for i in range(len(subplots)):
        ax = plt.gcf().axes[i]
        spreadfourSubplotsHorizontally(ax, i)
        subplotXLabel = subplots[i]  #ax.get_xlabel()
        ticks(ax, pr, subplotXLabel, "x")
        plt.title("PDP for " + subplotXLabel)
    plt.gcf().set_size_inches(30, 7)

    save("plot_partial_dependence textBruttoClient", plt=plt)
    writeDictToFile(pr.encodingDictionary, pr.decodedColumns)
def test_plot_partial_dependence_multioutput():
    # Test partial dependence plot function on multi-output input.
    import matplotlib.pyplot as plt  # noqa
    (X, y), _ = multioutput_regression_data
    clf = LinearRegression()
    clf.fit(X, y)

    grid_resolution = 25
    plot_partial_dependence(clf, X, [0, 1],
                            target=0,
                            grid_resolution=grid_resolution)
    fig = plt.gcf()
    axs = fig.get_axes()
    assert len(axs) == 2
    assert all(ax.has_data for ax in axs)

    plot_partial_dependence(clf, X, [0, 1],
                            target=1,
                            grid_resolution=grid_resolution)
    fig = plt.gcf()
    axs = fig.get_axes()
    assert len(axs) == 2
    assert all(ax.has_data for ax in axs)

    close_figure()
Exemplo n.º 10
0
def test_plot_partial_dependence_with_same_axes(pyplot, clf_boston, boston):
    # The first call to plot_partial_dependence will create two new axes to
    # place in the space of the passed in axes, which results in a total of
    # three axes in the figure.
    # Currently the API does not allow for the second call to
    # plot_partial_dependence to use the same axes again, because it will
    # create two new axes in the space resulting in five axes. To get the
    # expected behavior one needs to pass the generated axes into the second
    # call:
    # disp1 = plot_partial_dependence(...)
    # disp2 = plot_partial_dependence(..., ax=disp1.axes_)

    grid_resolution = 25
    fig, ax = pyplot.subplots()
    plot_partial_dependence(clf_boston,
                            boston.data, ['CRIM', 'ZN'],
                            grid_resolution=grid_resolution,
                            feature_names=boston.feature_names,
                            ax=ax)

    msg = ("The ax was already used in another plot function, please set "
           "ax=display.axes_ instead")

    with pytest.raises(ValueError, match=msg):
        plot_partial_dependence(clf_boston,
                                boston.data, ['CRIM', 'ZN'],
                                grid_resolution=grid_resolution,
                                feature_names=boston.feature_names,
                                ax=ax)
Exemplo n.º 11
0
def test_plot_partial_dependence_multiclass(pyplot):
    # Test partial dependence plot function on multi-class input.
    iris = load_iris()
    clf = GradientBoostingClassifier(n_estimators=10, random_state=1)
    clf.fit(iris.data, iris.target)

    grid_resolution = 25
    plot_partial_dependence(clf,
                            iris.data, [0, 1],
                            target=0,
                            grid_resolution=grid_resolution)
    fig = pyplot.gcf()
    axs = fig.get_axes()
    assert len(axs) == 2
    assert all(ax.has_data for ax in axs)

    # now with symbol labels
    target = iris.target_names[iris.target]
    clf = GradientBoostingClassifier(n_estimators=10, random_state=1)
    clf.fit(iris.data, target)

    grid_resolution = 25
    plot_partial_dependence(clf,
                            iris.data, [0, 1],
                            target='setosa',
                            grid_resolution=grid_resolution)
    fig = pyplot.gcf()
    axs = fig.get_axes()
    assert len(axs) == 2
    assert all(ax.has_data for ax in axs)
Exemplo n.º 12
0
 def make_pdp_pair(self, feat_idx_lst=[0, 35]):
     print(f"Creating partial_dependence_pair plot.")
     fig, ax = plt.subplots(figsize=(12, 6), facecolor='white')
     plt.title("Top Features Partial Dependence Plots", fontsize='large')
     ax.set_facecolor('whitesmoke')
     plot_partial_dependence(self.model,
                             self.X,
                             feat_idx_lst,
                             feature_names=self.pretty_features,
                             fig=fig,
                             line_kw={
                                 'c': '#40FF40',
                                 'linewidth': 10
                             },
                             n_jobs=-1)
     plt.tight_layout(pad=1.08, h_pad=None, w_pad=None, rect=None)
     save_image_path = os.path.join(ROOT_IMGS_DIRECTORY,
                                    'charts/partial_dependence_pair')
     plt.savefig(save_image_path,
                 dpi=None,
                 facecolor='w',
                 edgecolor='w',
                 orientation='portrait',
                 papertype=None,
                 format=None,
                 transparent=False,
                 bbox_inches=None,
                 pad_inches=0.1,
                 frameon=None,
                 metadata=None)
     print(f"barpartial_dependence_pair plot saved to {save_image_path}.")
     plt.show()
Exemplo n.º 13
0
def test_plot_partial_dependence_passing_numpy_axes(pyplot, clf_boston,
                                                    boston):
    grid_resolution = 25
    feature_names = boston.feature_names.tolist()
    disp1 = plot_partial_dependence(clf_boston,
                                    boston.data, ['CRIM', 'ZN'],
                                    grid_resolution=grid_resolution,
                                    feature_names=feature_names)
    assert disp1.axes_.shape == (1, 2)
    assert disp1.axes_[0, 0].get_ylabel() == "Partial dependence"
    assert disp1.axes_[0, 1].get_ylabel() == ""
    assert len(disp1.axes_[0, 0].get_lines()) == 1
    assert len(disp1.axes_[0, 1].get_lines()) == 1

    lr = LinearRegression()
    lr.fit(boston.data, boston.target)

    disp2 = plot_partial_dependence(lr,
                                    boston.data, ['CRIM', 'ZN'],
                                    grid_resolution=grid_resolution,
                                    feature_names=feature_names,
                                    ax=disp1.axes_)

    assert np.all(disp1.axes_ == disp2.axes_)
    assert len(disp2.axes_[0, 0].get_lines()) == 2
    assert len(disp2.axes_[0, 1].get_lines()) == 2
Exemplo n.º 14
0
def production_plotting(model, X, axes):
    '''
    Takes in a model for inferential modeling and makes partial dependence
    plots for the two production metrics (num cols, and yield per col).

    Parameters
    ----------
    model - The instantiated and fit model to be used for plotting.
    X - The data that is used to creat the grid of values to cycle through for
        partial dependence.
    
    Returns
    ----------
    None
    '''
    plot_partial_dependence(estimator=model, X=X, features=[0,1], ax=axes)
    axes[0].set_xlabel('Number of Colonies (in Thousands)', fontsize=15)
    axes[0].set_ylabel('Partial Dependence', fontsize=15)
    axes[0].set_xticks([0, 50000, 100000, 150000, 200000, 250000])
    axes[0].set_xticklabels(['0', '50', '100', '150', '200', '250'])
    axes[0].set_title('Colonies per State', fontsize=20)
    axes[1].set_xlabel('Yield per Colony (in Lbs)', fontsize=15)
    axes[1].set_ylabel('Partial Dependence', fontsize=15)
    axes[1].set_xticks([40, 80, 120])
    axes[1].set_xticklabels(['40', '80', '120'])
    axes[1].set_title('Production per Colony', fontsize=20)
def test_plot_partial_dependence_passing_numpy_axes(pyplot, clf_diabetes,
                                                    diabetes, kind, lines):
    grid_resolution = 25
    feature_names = diabetes.feature_names
    disp1 = plot_partial_dependence(
        clf_diabetes,
        diabetes.data,
        ["age", "bmi"],
        kind=kind,
        grid_resolution=grid_resolution,
        feature_names=feature_names,
    )
    assert disp1.axes_.shape == (1, 2)
    assert disp1.axes_[0, 0].get_ylabel() == "Partial dependence"
    assert disp1.axes_[0, 1].get_ylabel() == ""
    assert len(disp1.axes_[0, 0].get_lines()) == lines
    assert len(disp1.axes_[0, 1].get_lines()) == lines

    lr = LinearRegression()
    lr.fit(diabetes.data, diabetes.target)

    disp2 = plot_partial_dependence(
        lr,
        diabetes.data,
        ["age", "bmi"],
        kind=kind,
        grid_resolution=grid_resolution,
        feature_names=feature_names,
        ax=disp1.axes_,
    )

    assert np.all(disp1.axes_ == disp2.axes_)
    assert len(disp2.axes_[0, 0].get_lines()) == 2 * lines
    assert len(disp2.axes_[0, 1].get_lines()) == 2 * lines
Exemplo n.º 16
0
def test_plot_partial_dependence_multiclass_error(pyplot, params, err_msg):
    iris = load_iris()
    clf = GradientBoostingClassifier(n_estimators=10, random_state=1)
    clf.fit(iris.data, iris.target)

    with pytest.raises(ValueError, match=err_msg):
        plot_partial_dependence(clf, iris.data, **params)
Exemplo n.º 17
0
def test_plot_partial_dependence_error(data, params, err_msg):
    X, y = data
    estimator = LinearRegression().fit(X, y)

    with pytest.raises(ValueError, match=err_msg):
        plot_partial_dependence(estimator, X, **params)

    close_figure()
Exemplo n.º 18
0
def test_plot_partial_dependence_error(data, params, err_msg):
    import matplotlib.pyplot as plt  # noqa
    X, y = data
    estimator = LinearRegression().fit(X, y)

    with pytest.raises(ValueError, match=err_msg):
        plot_partial_dependence(estimator, X, **params)

    plt.close()
def test_plot_partial_dependence_error(data, params, err_msg):
    import matplotlib.pyplot as plt  # noqa
    X, y = data
    estimator = LinearRegression().fit(X, y)

    with pytest.raises(ValueError, match=err_msg):
        plot_partial_dependence(estimator, X, **params)

    plt.close()
Exemplo n.º 20
0
def plot_boston_pd(estimator, X, var_name):
    fig, ax = plt.subplots()
    plot_partial_dependence(estimator=estimator,
                            X=X,
                            features=[var_name],
                            kind="average",
                            ax=ax)
    fig.suptitle(f"Partial Dependence Plot ({var_name})")
    fig.show()
Exemplo n.º 21
0
def pdp(est, x, feature, feature_names, no, target):
    if no == -1:
        plot_partial_dependence(est, x, feature, feature_names, target=target)
    else:
        plot_partial_dependence(est, x, feature[:no], feature_names, target=target)
    fig = plt.gcf()
    fig.suptitle('Partial dependence')
    plt.subplots_adjust(top=0.9)
    plt.show()
def test_plot_partial_dependence_does_not_override_ylabel(
        pyplot, clf_diabetes, diabetes):
    # Non-regression test to be sure to not override the ylabel if it has been
    # See https://github.com/scikit-learn/scikit-learn/issues/15772
    _, axes = pyplot.subplots(1, 2)
    axes[0].set_ylabel("Hello world")
    plot_partial_dependence(clf_diabetes, diabetes.data, [0, 1], ax=axes)

    assert axes[0].get_ylabel() == "Hello world"
    assert axes[1].get_ylabel() == "Partial dependence"
Exemplo n.º 23
0
def pdp(est, x, feature, feature_names, no):
    fig = plt.figure(figsize=(24, 18))
    if no == -1:
        plot_partial_dependence(est, x, feature, feature_names, fig=fig)
    else:
        plot_partial_dependence(est, x, feature[:no], feature_names, fig=fig)
    fig = plt.gcf()
    fig.suptitle('Partial dependence', fontsize=30)
    plt.subplots_adjust(top=0.95)
    plt.show()
Exemplo n.º 24
0
def partial_dependency_analysis(method: Method,
                                X: DataFrame,
                                y: Series,
                                features: List = None) -> None:
    if features is None:
        features = list(X.columns)
    _pipeline = method.get_pipeline()
    _pipeline.fit_transform(X, y)
    plot_partial_dependence(_pipeline, X, features, target=y)
    pass
Exemplo n.º 25
0
def test_plot_partial_dependence_dataframe(pyplot, clf_boston, boston):
    pd = pytest.importorskip('pandas')
    df = pd.DataFrame(boston.data, columns=boston.feature_names)

    grid_resolution = 25

    plot_partial_dependence(clf_boston,
                            df, ['TAX', 'AGE'],
                            grid_resolution=grid_resolution,
                            feature_names=df.columns.tolist())
Exemplo n.º 26
0
def test_plot_partial_dependence_dataframe(pyplot, clf_diabetes, diabetes):
    pd = pytest.importorskip('pandas')
    df = pd.DataFrame(diabetes.data, columns=diabetes.feature_names)

    grid_resolution = 25

    plot_partial_dependence(clf_diabetes,
                            df, ['bp', 's1'],
                            grid_resolution=grid_resolution,
                            feature_names=df.columns.tolist())
def test_plot_partial_dependence_fig(pyplot):
    # Make sure fig object is correctly used if not None
    (X, y), _ = regression_data
    clf = LinearRegression()
    clf.fit(X, y)

    fig = pyplot.figure()
    grid_resolution = 25
    plot_partial_dependence(
        clf, X, [0, 1], target=0, grid_resolution=grid_resolution, fig=fig)

    assert pyplot.gcf() is fig
def test_plot_partial_dependence_fig(pyplot):
    # Make sure fig object is correctly used if not None
    (X, y), _ = regression_data
    clf = LinearRegression()
    clf.fit(X, y)

    fig = pyplot.figure()
    grid_resolution = 25
    plot_partial_dependence(
        clf, X, [0, 1], target=0, grid_resolution=grid_resolution, fig=fig)

    assert pyplot.gcf() is fig
def test_plot_partial_dependence_multiclass(pyplot):
    grid_resolution = 25
    clf_int = GradientBoostingClassifier(n_estimators=10, random_state=1)
    iris = load_iris()

    # Test partial dependence plot function on multi-class input.
    clf_int.fit(iris.data, iris.target)
    disp_target_0 = plot_partial_dependence(clf_int,
                                            iris.data, [0, 1],
                                            target=0,
                                            grid_resolution=grid_resolution)
    assert disp_target_0.figure_ is pyplot.gcf()
    assert disp_target_0.axes_.shape == (1, 2)
    assert disp_target_0.lines_.shape == (1, 2)
    assert disp_target_0.contours_.shape == (1, 2)
    assert disp_target_0.deciles_vlines_.shape == (1, 2)
    assert disp_target_0.deciles_hlines_.shape == (1, 2)
    assert all(c is None for c in disp_target_0.contours_.flat)
    assert disp_target_0.target_idx == 0

    # now with symbol labels
    target = iris.target_names[iris.target]
    clf_symbol = GradientBoostingClassifier(n_estimators=10, random_state=1)
    clf_symbol.fit(iris.data, target)
    disp_symbol = plot_partial_dependence(clf_symbol,
                                          iris.data, [0, 1],
                                          target='setosa',
                                          grid_resolution=grid_resolution)
    assert disp_symbol.figure_ is pyplot.gcf()
    assert disp_symbol.axes_.shape == (1, 2)
    assert disp_symbol.lines_.shape == (1, 2)
    assert disp_symbol.contours_.shape == (1, 2)
    assert disp_symbol.deciles_vlines_.shape == (1, 2)
    assert disp_symbol.deciles_hlines_.shape == (1, 2)
    assert all(c is None for c in disp_symbol.contours_.flat)
    assert disp_symbol.target_idx == 0

    for int_result, symbol_result in zip(disp_target_0.pd_results,
                                         disp_symbol.pd_results):
        avg_preds_int, values_int = int_result
        avg_preds_symbol, values_symbol = symbol_result
        assert_allclose(avg_preds_int, avg_preds_symbol)
        assert_allclose(values_int, values_symbol)

    # check that the pd plots are different for another target
    disp_target_1 = plot_partial_dependence(clf_int,
                                            iris.data, [0, 1],
                                            target=1,
                                            grid_resolution=grid_resolution)
    target_0_data_y = disp_target_0.lines_[0, 0].get_data()[1]
    target_1_data_y = disp_target_1.lines_[0, 0].get_data()[1]
    assert any(target_0_data_y != target_1_data_y)
Exemplo n.º 30
0
def plot_two_ways_pdp(model, test, columns):
    """
    Plots a two ways partial dependence plot with the variables given through the argument columns. Two-ways Partial
    dependence plots show how a pair of variables or predictors affects the model's predictions.

    :param model: the model considered. The partial dependence plot is calculated only after the model has been fit.
    :param test: test dataset.
    :param columns: variables studied. It must be in the form [(var1, var2)].
    :return:
    """
    sn.set()
    plt.figure()
    plot_partial_dependence(model, test, columns, n_jobs=-1)
    plt.tight_layout()
    plt.show()
Exemplo n.º 31
0
def plot_partial_dependencies(model, test, column):
    """
    Plots a one way partial dependence plot with the variables in the test dataset. Partial dependence plots show how
    a particular variable or predictor affects the model's predictions.

    :param model: the model considered. The partial dependence plot is calculated only after the model has been fit.
    :param test: test dataset.
    :param column: variables studied.
    :return:
    """
    sn.set()
    plot_partial_dependence(model, test, column, n_jobs=-1)
    # Format the figure
    plt.tight_layout()
    plt.show()
Exemplo n.º 32
0
 def plot_pdp(self, learner, X, features_idx, feature_names=None):
     '''
     Plots the partial dependence plot for the given learner.
     Parameters:
     - learner: already trained learner to be analyzed
     - X: matrix of input data on which the learner has been trained
     - features_idx: features to be analyzed by pdp, should be column indexes in X
     - feature_names: features names of X
     '''
     fig = plt.figure(figsize=(20, 10))
     plot_partial_dependence(learner,
                             X,
                             features_idx,
                             feature_names=feature_names,
                             fig=fig)
Exemplo n.º 33
0
def test_plot_partial_dependence_custom_axes(pyplot, clf_boston, boston):
    grid_resolution = 25
    fig, (ax1, ax2) = pyplot.subplots(1, 2)
    feature_names = boston.feature_names.tolist()
    disp = plot_partial_dependence(clf_boston,
                                   boston.data, ['CRIM', ('CRIM', 'ZN')],
                                   grid_resolution=grid_resolution,
                                   feature_names=feature_names,
                                   ax=[ax1, ax2])
    assert fig is disp.figure_
    assert disp.bounding_ax_ is None
    assert disp.axes_.shape == (2, )
    assert disp.axes_[0] is ax1
    assert disp.axes_[1] is ax2

    ax = disp.axes_[0]
    assert ax.get_xlabel() == "CRIM"
    assert ax.get_ylabel() == "Partial dependence"

    line = disp.lines_[0]
    avg_preds, values = disp.pd_results[0]
    target_idx = disp.target_idx

    line_data = line.get_data()
    assert_allclose(line_data[0], values[0])
    assert_allclose(line_data[1], avg_preds[target_idx].ravel())

    # contour
    ax = disp.axes_[1]
    coutour = disp.contours_[1]
    expect_levels = np.linspace(*disp.pdp_lim[2], num=8)
    assert_allclose(coutour.levels, expect_levels)
    assert ax.get_xlabel() == "CRIM"
    assert ax.get_ylabel() == "ZN"
def test_partial_dependence_overwrite_labels(
    pyplot,
    clf_diabetes,
    diabetes,
    kind,
    line_kw,
    label,
):
    """Test that make sure that we can overwrite the label of the PDP plot"""
    disp = plot_partial_dependence(
        clf_diabetes,
        diabetes.data,
        [0, 2],
        grid_resolution=25,
        feature_names=diabetes.feature_names,
        kind=kind,
        line_kw=line_kw,
    )

    for ax in disp.axes_.ravel():
        if label is None:
            assert ax.get_legend() is None
        else:
            legend_text = ax.get_legend().get_texts()
            assert len(legend_text) == 1
            assert legend_text[0].get_text() == label
def test_plot_partial_dependence():
    # Test partial dependence plot function.
    import matplotlib.pyplot as plt  # noqa

    boston = load_boston()
    clf = GradientBoostingRegressor(n_estimators=10, random_state=1)
    clf.fit(boston.data, boston.target)

    grid_resolution = 25
    plot_partial_dependence(clf, boston.data, [0, 1, (0, 1)],
                            grid_resolution=grid_resolution,
                            feature_names=boston.feature_names)
    fig = plt.gcf()
    axs = fig.get_axes()
    assert len(axs) == 3
    assert all(ax.has_data for ax in axs)

    # check with str features and array feature names
    plot_partial_dependence(clf, boston.data, ['CRIM', 'ZN',
                                               ('CRIM', 'ZN')],
                            grid_resolution=grid_resolution,
                            feature_names=boston.feature_names)

    fig = plt.gcf()
    axs = fig.get_axes()
    assert len(axs) == 3
    assert all(ax.has_data for ax in axs)

    # check with list feature_names
    feature_names = boston.feature_names.tolist()
    plot_partial_dependence(clf, boston.data, ['CRIM', 'ZN',
                                               ('CRIM', 'ZN')],
                            grid_resolution=grid_resolution,
                            feature_names=feature_names)
    fig = plt.gcf()
    axs = fig.get_axes()
    assert len(axs) == 3
    assert all(ax.has_data for ax in axs)

    plt.close('all')
def test_plot_partial_dependence_error(pyplot, data, params, err_msg):
    X, y = data
    estimator = LinearRegression().fit(X, y)

    with pytest.raises(ValueError, match=err_msg):
        plot_partial_dependence(estimator, X, **params)
def main():
    cal_housing = fetch_california_housing()

    X, y = cal_housing.data, cal_housing.target
    names = cal_housing.feature_names

    # Center target to avoid gradient boosting init bias: gradient boosting
    # with the 'recursion' method does not account for the initial estimator
    # (here the average target, by default)
    y -= y.mean()

    print("Training MLPRegressor...")
    est = MLPRegressor(activation='logistic')
    est.fit(X, y)
    print('Computing partial dependence plots...')
    # We don't compute the 2-way PDP (5, 1) here, because it is a lot slower
    # with the brute method.
    features = [0, 5, 1, 2]
    plot_partial_dependence(est, X, features, feature_names=names,
                            n_jobs=3, grid_resolution=50)
    fig = plt.gcf()
    fig.suptitle('Partial dependence of house value on non-location features\n'
                 'for the California housing dataset, with MLPRegressor')
    plt.subplots_adjust(top=0.9)  # tight_layout causes overlap with suptitle

    print("Training GradientBoostingRegressor...")
    est = GradientBoostingRegressor(n_estimators=100, max_depth=4,
                                    learning_rate=0.1, loss='huber',
                                    random_state=1)
    est.fit(X, y)
    print('Computing partial dependence plots...')
    features = [0, 5, 1, 2, (5, 1)]
    plot_partial_dependence(est, X, features, feature_names=names,
                            n_jobs=3, grid_resolution=50)
    fig = plt.gcf()
    fig.suptitle('Partial dependence of house value on non-location features\n'
                 'for the California housing dataset, with Gradient Boosting')
    plt.subplots_adjust(top=0.9)

    print('Custom 3d plot via ``partial_dependence``')
    fig = plt.figure()

    target_feature = (1, 5)
    pdp, axes = partial_dependence(est, X, target_feature,
                                   grid_resolution=50)
    XX, YY = np.meshgrid(axes[0], axes[1])
    Z = pdp[0].T
    ax = Axes3D(fig)
    surf = ax.plot_surface(XX, YY, Z, rstride=1, cstride=1,
                           cmap=plt.cm.BuPu, edgecolor='k')
    ax.set_xlabel(names[target_feature[0]])
    ax.set_ylabel(names[target_feature[1]])
    ax.set_zlabel('Partial dependence')
    #  pretty init view
    ax.view_init(elev=22, azim=122)
    plt.colorbar(surf)
    plt.suptitle('Partial dependence of house value on median\n'
                 'age and average occupancy, with Gradient Boosting')
    plt.subplots_adjust(top=0.9)

    plt.show()