示例#1
0
def _get_jensen_shannon_core(Ks, dim, X_ns, Y_ns):
    # precompute the max/min possible digamma(i) values: the floors/ceils of
    #
    #   M/(n+m-1) / (1 / (2 n - 1))
    #   M/(n+m-1) / (n / (m (2 n - 1)))
    #
    # for any valid value of n, m.

    min_X_n = np.min(X_ns)
    max_X_n = np.max(X_ns)
    if Y_ns is None:
        min_Y_n = min_X_n
        max_Y_n = max_X_n
    else:
        min_Y_n = np.min(Y_ns)
        max_Y_n = np.max(Y_ns)
    min_K = np.min(Ks)
    max_K = np.max(Ks)

    # figure out the smallest i value we might need (# of neighbors in ball)

    wt_bounds = [np.inf, -np.inf]
    min_wt_n = None
    min_wt_m = None
    # max_wt_n = None; max_wt_m = None
    n_ms = list(itertools.product([min_X_n, max_X_n], [min_Y_n, max_Y_n]))
    for n, m in itertools.chain(n_ms, map(reversed, n_ms)):
        base = (2 * n - 1) / (n + m - 1)

        for wt in (base, base * m / n):
            if wt < wt_bounds[0]:
                wt_bounds[0] = wt
                min_wt_n = n
                min_wt_m = m

            if wt > wt_bounds[1]:
                wt_bounds[1] = wt
                # max_wt_n = n
                # max_wt_m = m

    if wt_bounds[0] * min_K < 1:
        msg = "K={} is too small for Jensen-Shannon estimator with n={}, m={}"
        raise ValueError((msg + "; must be at least {}").format(
            min_K, min_wt_n, min_wt_m, int(np.ceil(1 / wt_bounds[0]))))

    min_i = int(np.floor(wt_bounds[0] * min_K))
    max_i = int(np.ceil(wt_bounds[1] * max_K))
    digamma_vals = psi(np.arange(min_i, max_i + 1))

    # TODO: If we don't actually hit the worst case, might be nice to still
    #       run and just nan those elements that we can't compute. This is
    #       over-conservative.
    return partial(_jensen_shannon_core, Ks, dim, min_i, digamma_vals), max_i
示例#2
0
文件: knn.py 项目: cimor/skl-groups
def _get_jensen_shannon_core(Ks, dim, X_ns, Y_ns):
    # precompute the max/min possible digamma(i) values: the floors/ceils of
    #
    #   M/(n+m-1) / (1 / (2 n - 1))
    #   M/(n+m-1) / (n / (m (2 n - 1)))
    #
    # for any valid value of n, m.

    min_X_n = np.min(X_ns)
    max_X_n = np.max(X_ns)
    if Y_ns is None:
        min_Y_n = min_X_n
        max_Y_n = max_X_n
    else:
        min_Y_n = np.min(Y_ns)
        max_Y_n = np.max(Y_ns)
    min_K = np.min(Ks)
    max_K = np.max(Ks)

    # figure out the smallest i value we might need (# of neighbors in ball)

    wt_bounds = [np.inf, -np.inf]
    min_wt_n = None; min_wt_m = None
    # max_wt_n = None; max_wt_m = None
    n_ms = list(itertools.product([min_X_n, max_X_n], [min_Y_n, max_Y_n]))
    for n, m in itertools.chain(n_ms, map(reversed, n_ms)):
        base = (2 * n - 1) / (n + m - 1)

        for wt in (base, base * m / n):
            if wt < wt_bounds[0]:
                wt_bounds[0] = wt
                min_wt_n = n
                min_wt_m = m

            if wt > wt_bounds[1]:
                wt_bounds[1] = wt
                # max_wt_n = n
                # max_wt_m = m

    if wt_bounds[0] * min_K < 1:
        msg = "K={} is too small for Jensen-Shannon estimator with n={}, m={}"
        raise ValueError((msg + "; must be at least {}").format(
             min_K, min_wt_n, min_wt_m, int(np.ceil(1 / wt_bounds[0]))))

    min_i = int(np.floor(wt_bounds[0] * min_K))
    max_i = int(np.ceil( wt_bounds[1] * max_K))
    digamma_vals = psi(np.arange(min_i, max_i + 1))

    # TODO: If we don't actually hit the worst case, might be nice to still
    #       run and just nan those elements that we can't compute. This is
    #       over-conservative.
    return partial(_jensen_shannon_core, Ks, dim, min_i, digamma_vals), max_i
示例#3
0
def plot_partial_dependence(gbrt,
                            X,
                            features,
                            feature_names=None,
                            label=None,
                            n_cols=3,
                            grid_resolution=100,
                            percentiles=(0.05, 0.95),
                            n_jobs=1,
                            verbose=0,
                            ax=None,
                            line_kw=None,
                            contour_kw=None,
                            **fig_kw):
    """Partial dependence plots for ``features``.
    The ``len(features)`` plots are arranged in a grid with ``n_cols``
    columns. Two-way partial dependence plots are plotted as contour
    plots.
    Read more in the :ref:`User Guide <partial_dependence>`.
    Parameters
    ----------
    gbrt : BaseGradientBoosting
        A fitted gradient boosting model.
    X : array-like, shape=(n_samples, n_features)
        The data on which ``gbrt`` was trained.
    features : seq of tuples or ints
        If seq[i] is an int or a tuple with one int value, a one-way
        PDP is created; if seq[i] is a tuple of two ints, a two-way
        PDP is created.
    feature_names : seq of str
        Name of each feature; feature_names[i] holds
        the name of the feature with index i.
    label : object
        The class label for which the PDPs should be computed.
        Only if gbrt is a multi-class model. Must be in ``gbrt.classes_``.
    n_cols : int
        The number of columns in the grid plot (default: 3).
    percentiles : (low, high), default=(0.05, 0.95)
        The lower and upper percentile used to create the extreme values
        for the PDP axes.
    grid_resolution : int, default=100
        The number of equally spaced points on the axes.
    n_jobs : int
        The number of CPUs to use to compute the PDs. -1 means 'all CPUs'.
        Defaults to 1.
    verbose : int
        Verbose output during PD computations. Defaults to 0.
    ax : Matplotlib axis object, default None
        An axis object onto which the plots will be drawn.
    line_kw : dict
        Dict with keywords passed to the ``pylab.plot`` call.
        For one-way partial dependence plots.
    contour_kw : dict
        Dict with keywords passed to the ``pylab.plot`` call.
        For two-way partial dependence plots.
    fig_kw : dict
        Dict with keywords passed to the figure() call.
        Note that all keywords not recognized above will be automatically
        included here.
    Returns
    -------
    fig : figure
        The Matplotlib Figure object.
    axs : seq of Axis objects
        A seq of Axis objects, one for each subplot.
    Examples
    --------
    >>> from sklearn.datasets import make_friedman1
    >>> from sklearn.ensemble import GradientBoostingRegressor
    >>> X, y = make_friedman1()
    >>> clf = GradientBoostingRegressor(n_estimators=10).fit(X, y)
    >>> fig, axs = plot_partial_dependence(clf, X, [0, (0, 1)]) #doctest: +SKIP
    ...
    """
    import matplotlib.pyplot as plt
    from matplotlib import transforms
    from matplotlib.ticker import MaxNLocator
    from matplotlib.ticker import ScalarFormatter

    # if not isinstance(gbrt, BaseGradientBoosting):
    #     raise ValueError('gbrt has to be an instance of BaseGradientBoosting')
    if gbrt.estimators_.shape[0] == 0:
        raise ValueError('Call %s.fit before partial_dependence' %
                         gbrt.__class__.__name__)

    # set label_idx for multi-class GBRT
    if hasattr(gbrt, 'classes_') and np.size(gbrt.classes_) > 2:
        if label is None:
            raise ValueError('label is not given for multi-class PDP')
        label_idx = np.searchsorted(gbrt.classes_, label)
        if gbrt.classes_[label_idx] != label:
            raise ValueError('label %s not in ``gbrt.classes_``' % str(label))
    else:
        # regression and binary classification
        label_idx = 0

    X = check_array(X, dtype=DTYPE, order='C')
    if gbrt.n_features != X.shape[1]:
        raise ValueError('X.shape[1] does not match gbrt.n_features')

    if line_kw is None:
        line_kw = {'color': 'green'}
    if contour_kw is None:
        contour_kw = {}

    # convert feature_names to list
    if feature_names is None:
        # if not feature_names use fx indices as name
        feature_names = [str(i) for i in range(gbrt.n_features)]
    elif isinstance(feature_names, np.ndarray):
        feature_names = feature_names.tolist()

    def convert_feature(fx):
        if isinstance(fx, six.string_types):
            try:
                fx = feature_names.index(fx)
            except ValueError:
                raise ValueError('Feature %s not in feature_names' % fx)
        return fx

    # convert features into a seq of int tuples
    tmp_features = []
    for fxs in features:
        if isinstance(fxs, (numbers.Integral, ) + six.string_types):
            fxs = (fxs, )
        try:
            fxs = np.array([convert_feature(fx) for fx in fxs], dtype=np.int32)
        except TypeError:
            raise ValueError('features must be either int, str, or tuple '
                             'of int/str')
        if not (1 <= np.size(fxs) <= 2):
            raise ValueError('target features must be either one or two')

        tmp_features.append(fxs)

    features = tmp_features

    names = []
    try:
        for fxs in features:
            l = []
            # explicit loop so "i" is bound for exception below
            for i in fxs:
                l.append(feature_names[i])
            names.append(l)
    except IndexError:
        raise ValueError('features[i] must be in [0, n_features) '
                         'but was %d' % i)

    # compute PD functions
    pd_result = Parallel(n_jobs=n_jobs, verbose=verbose)(delayed(
        partial_dependence
    )(gbrt, fxs, X=X, grid_resolution=grid_resolution, percentiles=percentiles)
                                                         for fxs in features)

    # get global min and max values of PD grouped by plot type
    pdp_lim = {}
    for pdp, axes in pd_result:
        min_pd, max_pd = pdp[label_idx].min(), pdp[label_idx].max()
        n_fx = len(axes)
        old_min_pd, old_max_pd = pdp_lim.get(n_fx, (min_pd, max_pd))
        min_pd = min(min_pd, old_min_pd)
        max_pd = max(max_pd, old_max_pd)
        pdp_lim[n_fx] = (min_pd, max_pd)

    # create contour levels for two-way plots
    if 2 in pdp_lim:
        Z_level = np.linspace(*pdp_lim[2], num=8)

    if ax is None:
        fig = plt.figure(**fig_kw)
    else:
        fig = ax.get_figure()
        fig.clear()

    n_cols = min(n_cols, len(features))
    n_rows = int(np.ceil(len(features) / float(n_cols)))
    axs = []
    for i, fx, name, (pdp, axes) in zip(count(), features, names, pd_result):
        ax = fig.add_subplot(n_rows, n_cols, i + 1)

        if len(axes) == 1:
            ax.plot(axes[0], pdp[label_idx].ravel(), **line_kw)
        else:
            # make contour plot
            assert len(axes) == 2
            XX, YY = np.meshgrid(axes[0], axes[1])
            Z = pdp[label_idx].reshape(list(map(np.size, axes))).T
            CS = ax.contour(XX,
                            YY,
                            Z,
                            levels=Z_level,
                            linewidths=0.5,
                            colors='k')
            ax.contourf(XX,
                        YY,
                        Z,
                        levels=Z_level,
                        vmax=Z_level[-1],
                        vmin=Z_level[0],
                        alpha=0.75,
                        **contour_kw)
            ax.clabel(CS, fmt='%2.2f', colors='k', fontsize=10, inline=True)

        # plot data deciles + axes labels
        deciles = mquantiles(X[:, fx[0]], prob=np.arange(0.1, 1.0, 0.1))
        trans = transforms.blended_transform_factory(ax.transData,
                                                     ax.transAxes)
        ylim = ax.get_ylim()
        ax.vlines(deciles, [0], 0.05, transform=trans, color='k')
        ax.set_xlabel(name[0])
        ax.set_ylim(ylim)

        # prevent x-axis ticks from overlapping
        ax.xaxis.set_major_locator(MaxNLocator(nbins=6, prune='lower'))
        tick_formatter = ScalarFormatter()
        tick_formatter.set_powerlimits((-3, 4))
        ax.xaxis.set_major_formatter(tick_formatter)

        if len(axes) > 1:
            # two-way PDP - y-axis deciles + labels
            deciles = mquantiles(X[:, fx[1]], prob=np.arange(0.1, 1.0, 0.1))
            trans = transforms.blended_transform_factory(
                ax.transAxes, ax.transData)
            xlim = ax.get_xlim()
            ax.hlines(deciles, [0], 0.05, transform=trans, color='k')
            ax.set_ylabel(name[1])
            # hline erases xlim
            ax.set_xlim(xlim)
        else:
            ax.set_ylabel('Partial dependence')

        if len(axes) == 1:
            ax.set_ylim(pdp_lim[1])
        axs.append(ax)

    fig.subplots_adjust(bottom=0.15,
                        top=0.7,
                        left=0.1,
                        right=0.95,
                        wspace=0.4,
                        hspace=0.3)
    return fig, axs
示例#4
0
                          '_final_predictions_' + str(k) + '.npy')

    for x in [9]:
        for y in [25]:
            target_feature = (x, y)
            fig = plt.figure()
            names = [priors[target_feature[0]], priors[target_feature[1]]]
            print(
                'Convenience plot with ``partial_dependence_plots`` for %s and %s'
                % (names[0], names[1]))
            pdp, axes = partial_dependence(stacked_clf,
                                           target_feature,
                                           X=X_train,
                                           grid_resolution=50)
            XX, YY = np.meshgrid(axes[0], axes[1])
            Z = pdp[0].reshape(list(map(np.size, axes))).T
            ax = Axes3D(fig)
            surf = ax.plot_surface(XX,
                                   YY,
                                   Z,
                                   rstride=1,
                                   cstride=1,
                                   cmap=plt.cm.BuPu)
            ax.set_xlabel(names[0], fontsize=12)
            ax.set_ylabel(names[1], fontsize=12)
            ax.set_zlabel('Partial dependence', fontsize=12)
            ax.view_init(elev=12, azim=-142)
            plt.xticks([0, 0.5, 1])
            plt.yticks([0, 0.5, 1])
            ax.set_zticks([-0.2, -0.1, 0, 0.1, 0.2])
            ax.set_zlim(-0.2, 0.2)
def plot_partial_dependence(est, X, features, feature_names=None,
                            target=None, n_cols=3, grid_resolution=100,
                            percentiles=(0.05, 0.95), method='auto',
                            n_jobs=1, verbose=0, ax=None, line_kw=None,
                            contour_kw=None, **fig_kw):
    """Partial dependence plots.
    The ``len(features)`` plots are arranged in a grid with ``n_cols``
    columns. Two-way partial dependence plots are plotted as contour plots.
    Read more in the :ref:`User Guide <partial_dependence>`.
    Parameters
    ----------
    est : BaseEstimator
        A fitted classification or regression model. Classifiers must have a
        ``predict_proba()`` method. Multioutput-multiclass estimators aren't
        supported.
    X : array-like, shape=(n_samples, n_features)
        The data to use to build the grid of values on which the dependence
        will be evaluated. This is usually the training data.
    features : list of ints or strings, or tuples of ints or strings
        The target features for which to create the PDPs.
        If features[i] is an int or a string, a one-way PDP is created; if
        features[i] is a tuple, a two-way PDP is created. Each tuple must be
        of size 2.
        if any entry is a string, then it must be in ``feature_names``.
    feature_names : seq of str, shape=(n_features,)
        Name of each feature; feature_names[i] holds the name of the feature
        with index i.
    target : int, optional (default=None)
        - In a multiclass setting, specifies the class for which the PDPs
          should be computed. Note that for binary classification, the
          positive class (index 1) is always used.
        - In a multioutput setting, specifies the task for which the PDPs
          should be computed
        Ignored in binary classification or classical regression settings.
    n_cols : int, optional (default=3)
        The number of columns in the grid plot.
    grid_resolution : int, optional (default=100)
        The number of equally spaced points on the axes of the plots, for each
        target feature.
    percentiles : tuple of float, optional (default=(0.05, 0.95))
        The lower and upper percentile used to create the extreme values
        for the PDP axes.
    method : str, optional (default='auto')
        The method to use to calculate the partial dependence predictions:
        - 'recursion' is only supported for objects inheriting from
          `BaseGradientBoosting`, but is more efficient in terms of speed.
        - 'brute' is supported for any estimator, but is more
          computationally intensive.
        - If 'auto', then 'recursion' will be used for
          ``BaseGradientBoosting`` estimators, and 'brute' used for other
          estimators.
        Unlike the 'brute' method, 'recursion' does not account for the
        ``init`` predictor of the boosting process. In practice this still
        produces the same plots, up to a constant offset in the target
        response.
    n_jobs : int, optional (default=1)
        The number of CPUs to use to compute the PDs. -1 means 'all CPUs'.
        See :term:`Glossary <n_jobs>` for more details.
    verbose : int, optional (default=0)
        Verbose output during PD computations.
    ax : Matplotlib axis object, optional (default=None)
        An axis object onto which the plots will be drawn.
    line_kw : dict, optional
        Dict with keywords passed to the ``matplotlib.pyplot.plot`` call.
        For one-way partial dependence plots.
    contour_kw : dict, optional
        Dict with keywords passed to the ``matplotlib.pyplot.plot`` call.
        For two-way partial dependence plots.
    **fig_kw : dict, optional
        Dict with keywords passed to the figure() call.
        Note that all keywords not recognized above will be automatically
        included here.
    Returns
    -------
    fig : figure
        The Matplotlib Figure object.
    axs : seq of Axis objects
        A seq of Axis objects, one for each subplot.
    Examples
    --------
    >>> from sklearn.datasets import make_friedman1
    >>> from sklearn.ensemble import GradientBoostingRegressor
    >>> X, y = make_friedman1()
    >>> clf = GradientBoostingRegressor(n_estimators=10).fit(X, y)
    >>> fig, axs = plot_partial_dependence(clf, X, [0, (0, 1)]) #doctest: +SKIP
    ...
    """
    import matplotlib.pyplot as plt
    from matplotlib import transforms
    from matplotlib.ticker import MaxNLocator
    from matplotlib.ticker import ScalarFormatter

    # set target_idx for multi-class estimators
    if hasattr(est, 'classes_') and np.size(est.classes_) > 2:
        if target is None:
            raise ValueError('target must be specified for multi-class')
        target_idx = np.searchsorted(est.classes_, target)
        if (not (0 <= target_idx < len(est.classes_)) or
                est.classes_[target_idx] != target):
            raise ValueError('target not in est.classes_, got {}'.format(
                target))
    else:
        # regression and binary classification
        target_idx = 0

    X = check_array(X)
    n_features = X.shape[1]

    # convert feature_names to list
    if feature_names is None:
        # if feature_names is None, use feature indices as name
        feature_names = [str(i) for i in range(n_features)]
    elif isinstance(feature_names, np.ndarray):
        feature_names = feature_names.tolist()

    def convert_feature(fx):
        if isinstance(fx, six.string_types):
            try:
                fx = feature_names.index(fx)
            except ValueError:
                raise ValueError('Feature %s not in feature_names' % fx)
        return int(fx)

    # convert features into a seq of int tuples
    tmp_features = []
    for fxs in features:
        if isinstance(fxs, (numbers.Integral, six.string_types)):
            fxs = (fxs,)
        try:
            fxs = [convert_feature(fx) for fx in fxs]
        except TypeError:
            raise ValueError('Each entry in features must be either an int, '
                             'a string, or an iterable of size at most 2.')
        if not (1 <= np.size(fxs) <= 2):
            raise ValueError('Each entry in features must be either an int, '
                             'a string, or an iterable of size at most 2.')

        tmp_features.append(fxs)

    features = tmp_features

    names = []
    try:
        for fxs in features:
            names_ = []
            # explicit loop so "i" is bound for exception below
            for i in fxs:
                names_.append(feature_names[i])
            names.append(names_)
    except IndexError:
        raise ValueError('All entries of features must be less than '
                         'len(feature_names) = {0}, got {1}.'
                         .format(len(feature_names), i))

    # compute averaged predictions
    pd_result = Parallel(n_jobs=n_jobs, verbose=verbose)(
        delayed(partial_dependence)(est, fxs, X=X, method=method,
                                    grid_resolution=grid_resolution,
                                    percentiles=percentiles)
        for fxs in features)

    # For multioutput regression, we can only check the validity of target
    # now that we have the predictions.
    # Also note: as multiclass-multioutput classifiers are not supported,
    # multiclass and multioutput scenario are mutually exclusive. So there is
    # no risk of overwriting target_idx here.
    pd, _ = pd_result[0]  # checking the first result is enough
    if is_regressor(est) and pd.shape[0] > 1:
        if target is None:
            raise ValueError(
                'target must be specified for multi-output regressors')
        if not 0 <= target <= pd.shape[0]:
                raise ValueError(
                    'target must be in [0, n_tasks], got {}.'.format(
                        target))
        target_idx = target
    else:
        target_idx = 0

    # get global min and max values of PD grouped by plot type
    pdp_lim = {}
    for pd, values in pd_result:
        min_pd, max_pd = pd[target_idx].min(), pd[target_idx].max()
        n_fx = len(values)
        old_min_pd, old_max_pd = pdp_lim.get(n_fx, (min_pd, max_pd))
        min_pd = min(min_pd, old_min_pd)
        max_pd = max(max_pd, old_max_pd)
        pdp_lim[n_fx] = (min_pd, max_pd)

    # create contour levels for two-way plots
    if 2 in pdp_lim:
        Z_level = np.linspace(*pdp_lim[2], num=8)

    if ax is None:
        fig = plt.figure(**fig_kw)
    else:
        fig = ax.get_figure()
        fig.clear()

    if line_kw is None:
        line_kw = {'color': 'green'}
    if contour_kw is None:
        contour_kw = {}

    n_cols = min(n_cols, len(features))
    n_rows = int(np.ceil(len(features) / float(n_cols)))
    axs = []
    for i, fx, name, (pd, values) in zip(count(), features, names, pd_result):
        ax = fig.add_subplot(n_rows, n_cols, i + 1)

        if len(values) == 1:
            ax.plot(values[0], pd[target_idx].ravel(), **line_kw)
        else:
            # make contour plot
            assert len(values) == 2
            XX, YY = np.meshgrid(values[0], values[1])
            Z = pd[target_idx].reshape(list(map(np.size, values))).T
            CS = ax.contour(XX, YY, Z, levels=Z_level, linewidths=0.5,
                            colors='k')
            ax.contourf(XX, YY, Z, levels=Z_level, vmax=Z_level[-1],
                        vmin=Z_level[0], alpha=0.75, **contour_kw)
            ax.clabel(CS, fmt='%2.2f', colors='k', fontsize=10, inline=True)

        # plot data deciles + axes labels
        deciles = mquantiles(X[:, fx[0]], prob=np.arange(0.1, 1.0, 0.1))
        trans = transforms.blended_transform_factory(ax.transData,
                                                     ax.transAxes)
        ylim = ax.get_ylim()
        ax.vlines(deciles, [0], 0.05, transform=trans, color='k')
        ax.set_xlabel(name[0])
        ax.set_ylim(ylim)

        # prevent x-axis ticks from overlapping
        ax.xaxis.set_major_locator(MaxNLocator(nbins=6, prune='lower'))
        tick_formatter = ScalarFormatter()
        tick_formatter.set_powerlimits((-3, 4))
        ax.xaxis.set_major_formatter(tick_formatter)

        if len(values) > 1:
            # two-way PDP - y-axis deciles + labels
            deciles = mquantiles(X[:, fx[1]], prob=np.arange(0.1, 1.0, 0.1))
            trans = transforms.blended_transform_factory(ax.transAxes,
                                                         ax.transData)
            xlim = ax.get_xlim()
            ax.hlines(deciles, [0], 0.05, transform=trans, color='k')
            ax.set_ylabel(name[1])
            # hline erases xlim
            ax.set_xlim(xlim)
        else:
            ax.set_ylabel('Partial dependence')

        if len(values) == 1:
            ax.set_ylim(pdp_lim[1])
        axs.append(ax)

    fig.subplots_adjust(bottom=0.15, top=0.7, left=0.1, right=0.95, wspace=0.4,
                        hspace=0.3)
    return fig, axs