Пример #1
0
def hist(dataset,
         xgroup_attr=None,
         ygroup_attr=None,
         xlim=None,
         ylim=None,
         noticks=False,
         **kwargs):
    """Compute and draw feature histograms (for groups of samples)

    This is a convenience wrapper around matplotlib's hist() function.  It
    supports it entire API, but data is taken from an input dataset.  In
    addition, feature histograms for groups of dataset samples can be drawn as
    an array of subplots. Using ``xgroup_attr`` and ``ygroup_attr`` up to two
    sample attributes can be selected and samples groups are defined by their
    unique values. For example, plotting histograms for all combinations of
    ``targets`` and ``chunks`` attribute values in a dataset is done by this
    code:

    >>> from mvpa2.viz import hist
    >>> from mvpa2.misc.data_generators import normal_feature_dataset
    >>> ds = normal_feature_dataset(10, 3, 10, 5)
    >>> plots = hist(ds, ygroup_attr='targets', xgroup_attr='chunks',
    ...              noticks=None, xlim=(-.5,.5), normed=True)
    >>> len(plots)
    15

    This function can also be used with plain arrays, in which case it will
    fall back on the behavior of matplotlib's hist() and additional
    functionality is not available.

    Parameters
    ----------
    dataset : Dataset or array
    xgroup_attr : string, optional
      Name of a samples attribute to be used as targets
    ygroup_attr : None or string, optional
      If a string, a histogram will be plotted per each target and each
      chunk (as defined in sa named `chunks_attr`), resulting is a
      histogram grid (targets x chunks).
    xlim : None or 2-tuple, optional
      Common x-axis limits for all histograms.
    ylim : None or 2-tuple, optional
      Common y-axis limits for all histograms.
    noticks : bool or None, optional
      If True, no axis ticks will be plotted. If False, each histogram subplot
      will have its own ticks. If None, only the outer subplots will
      have ticks. This is useful to save space in large plots, but should be
      combined with ``xlim`` and ``ylim`` arguments in order to ensure equal
      axes across subplots.
    **kwargs
      Any additional arguments are passed to matplotlib's hist().

    Returns
    -------
    list
      List of figure handlers for all generated subplots.
    """
    xgroup = {'attr': xgroup_attr}
    ygroup = {'attr': ygroup_attr}
    for grp in (xgroup, ygroup):
        if grp['attr'] is not None and is_datasetlike(dataset):
            grp['split'] = ChainNode([
                NFoldPartitioner(1, attr=grp['attr']),
                Splitter('partitions', attr_values=[2])
            ])
            grp['gen'] = lambda s, x: s.generate(x)
            grp['npanels'] = len(dataset.sa[grp['attr']].unique)
        else:
            grp['split'] = None
            grp['gen'] = lambda s, x: [x]
            grp['npanels'] = 1

    fig = 1
    nrows = ygroup['npanels']
    ncols = xgroup['npanels']
    subplots = []
    # for all labels
    for row, ds in enumerate(ygroup['gen'](ygroup['split'], dataset)):
        for col, d in enumerate(xgroup['gen'](xgroup['split'], ds)):
            ax = pl.subplot(nrows, ncols, fig)
            if is_datasetlike(d):
                data = d.samples
            else:
                data = d
            ax.hist(data.ravel(), **kwargs)
            if xlim is not None:
                pl.xlim(xlim)

            if noticks is True or (noticks is None and row < nrows - 1):
                pl.xticks([])
            if noticks is True or (noticks is None and col > 0):
                pl.yticks([])

            if ncols > 1 and row == 0:
                pl.title(str(d.sa[xgroup['attr']].unique[0]))
            if nrows > 1 and col == 0:
                pl.ylabel(str(d.sa[ygroup['attr']].unique[0]))
            fig += 1
            subplots.append(ax)
    return subplots
Пример #2
0
def hist(dataset, xgroup_attr=None, ygroup_attr=None,
         xlim='same', ylim='same', noticks=False,
         **kwargs):
    """Compute and draw feature histograms (for groups of samples)

    This is a convenience wrapper around matplotlib's hist() function.  It
    supports it entire API, but data is taken from an input dataset.  In
    addition, feature histograms for groups of dataset samples can be drawn as
    an array of subplots. Using ``xgroup_attr`` and ``ygroup_attr`` up to two
    sample attributes can be selected and samples groups are defined by their
    unique values. For example, plotting histograms for all combinations of
    ``targets`` and ``chunks`` attribute values in a dataset is done by this
    code:

    >>> from mvpa2.viz import hist
    >>> from mvpa2.misc.data_generators import normal_feature_dataset
    >>> ds = normal_feature_dataset(10, 3, 10, 5)
    >>> plots = hist(ds, ygroup_attr='targets', xgroup_attr='chunks',
    ...              noticks=None, xlim=(-.5,.5), normed=True)
    >>> len(plots)
    15

    This function can also be used with plain arrays, in which case it will
    fall back on the behavior of matplotlib's hist() and additional
    functionality is not available.

    Parameters
    ----------
    dataset : Dataset or array
    xgroup_attr : string, optional
      Name of a samples attribute to be used as targets
    ygroup_attr : None or string, optional
      If a string, a histogram will be plotted per each target and each
      chunk (as defined in sa named `chunks_attr`), resulting is a
      histogram grid (targets x chunks).
    xlim : None or 2-tuple or 'same', optional
      Common x-axis limits for all histograms.  By default all plots will have
      the same range of values.  Set to None if you would like to let them vary.
    ylim : None or 2-tuple or 'same', optional
      Common y-axis limits for all histograms.  If same, heights for all
      histograms will be made equal depending on the data.
    noticks : bool or None, optional
      If True, no axis ticks will be plotted. If False, each histogram subplot
      will have its own ticks. If None, only the outer subplots will
      have ticks. This is useful to save space in large plots, but should be
      combined with ``xlim`` and ``ylim`` arguments in order to ensure equal
      axes across subplots.
    **kwargs
      Any additional arguments are passed to matplotlib's hist().

    Returns
    -------
    list
      List of figure handlers for all generated subplots.
    """
    externals.exists("pylab", raise_=True)
    import pylab as pl

    xgroup = {'attr': xgroup_attr}
    ygroup = {'attr': ygroup_attr}
    for grp in (xgroup, ygroup):
        if grp['attr'] is not None and is_datasetlike(dataset):
            grp['split'] = ChainNode([NFoldPartitioner(1, attr=grp['attr']),
                                      Splitter('partitions', attr_values=[2])])
            grp['gen'] = lambda s, x: s.generate(x)
            grp['npanels'] = len(dataset.sa[grp['attr']].unique)
        else:
            grp['split'] = None
            grp['gen'] = lambda s, x: [x]
            grp['npanels'] = 1

    fig = 1
    nrows = ygroup['npanels']
    ncols = xgroup['npanels']
    subplots = []
    ylim_ = (0, 0)
    # for all labels
    for row, ds in enumerate(ygroup['gen'](ygroup['split'], dataset)):
        for col, d in enumerate(xgroup['gen'](xgroup['split'], ds)):
            ax = pl.subplot(nrows, ncols, fig)
            if is_datasetlike(d):
                data = d.samples
            else:
                data = d
            (n, bins, patches) = ax.hist(data.ravel(), **kwargs)
            if xlim is not None:
                pl.xlim(_get_lim(data, xlim))
            if ylim is not None:
                if isinstance(ylim, basestring) and ylim.lower() == 'same':
                    ylim_ = (min(ylim_[0], min(n)), max(ylim_[1], max(n)))
                else:
                    pl.ylim(ylim)
                # for 'same' we would have needed to estimate histograms first
                # and then adjust their ylim
            if noticks is True or (noticks is None and row < nrows - 1):
                pl.xticks([])
            if noticks is True or (noticks is None and col > 0):
                pl.yticks([])

            if ncols > 1 and row == 0:
                pl.title(str(d.sa[xgroup['attr']].unique[0]))
            if nrows > 1 and col == 0:
                pl.ylabel(str(d.sa[ygroup['attr']].unique[0]))
            fig += 1
            subplots.append(ax)
    # if we have changed ylim_
    if ylim_ != (0, 0):
        for ax in subplots:
            ax.set_ylim(ylim_)
    return subplots
Пример #3
0
def matshow(matrix,
            xlabel_attr=None,
            ylabel_attr=None,
            numbers=None,
            **kwargs):
    """Enhanced version of matplotlib's matshow().

    This version is able to handle datasets, and label axis according to
    dataset attribute values.

    >>> from mvpa2.viz import matshow
    >>> from mvpa2.misc.data_generators import normal_feature_dataset
    >>> ds = normal_feature_dataset(10, 2, 18, 5)
    >>> im = matshow(ds, ylabel_attr='targets', xlabel_attr='chunks',
    ...               numbers='%.0f')

    Parameters
    ----------
    matrix : 2D array
      The matrix that is to be plotted as an image. If 'matrix' is of
      type Dataset the function tries to plot the corresponding samples.
    xlabel_attr : str or None
      If not 'None' matrix is treated as a Dataset and labels are
      extracted from the sample attribute named 'xlabel_attr'.
      The labels are used as the 'x_tick_lables' of the image.
    ylabel_attr : str or None
      If not 'None' matrix is treated as a Dataset and labels are
      extracted from the feature attribute named 'ylabel_attr'.
      The labels are used as the 'y_tick_lables' of the image.
    numbers : dict, str or None
      If not 'None' plots matrix values as text inside the image.
      If a string is provided, then this string is used as format string.
      In case that a dictionary is provided, the dictionary gets passed
      on to the text command, and, '%d' is used to format the values.
    **kwargs
      Additional parameters passed on to matshow().

    Returns
    -------
    matplotlib.AxesImage
      Handler for the created image.
    """

    externals.exists("pylab", raise_=True)
    import pylab as pl

    if numbers is not None:
        if isinstance(numbers, str):
            numbers_format = numbers
            numbers_alpha = None
            numbers_kwargs_ = {}
        elif isinstance(numbers, dict):
            numbers_format = '%d'
            numbers_alpha = numbers.pop('numbers_alpha', None)
            numbers_kwargs_ = numbers
        else:
            raise TypeError("The argument to keyword 'numbers' must be "
                            "either of type string or type dict")

    _xlabel = None
    _ylabel = None

    # check if dataset 'is' a confusion matrix
    if is_datasetlike(matrix):
        if xlabel_attr is not None and ylabel_attr is not None:
            _xlabel = matrix.get_attr(xlabel_attr)[0].value  # LookupError
            _ylabel = matrix.get_attr(
                ylabel_attr)[0].value  # if it's not there

    matrix = np.asanyarray(matrix)

    fig = pl.gcf()
    ax = pl.gca()
    im = ax.matshow(matrix, **kwargs)

    # customize labels if _xlabel  and _ylabel are set
    if _xlabel is not None and _ylabel is not None:
        xlabels = [item.get_text() for item in ax.get_xticklabels()]
        xlabels[1:-1] = _xlabel
        ax.set_xticklabels(xlabels)
        pl.xlabel(xlabel_attr)
        ylabels = [item.get_text() for item in ax.get_yticklabels()]
        ylabels[1:-1] = _ylabel
        ax.set_yticklabels(ylabels)
        pl.ylabel(ylabel_attr)

    # colorbar customization for discrete matrix
    # code taken from old ConfusionMatrix.plot()
    # TODO: colorbar should be discrete as well
    cb_kwargs_ = {}
    maxv = np.max(matrix)
    if ('int' in matrix.dtype.name) and (maxv > 0):
        boundaries = np.linspace(0, maxv, np.min((maxv, 10)), True)
        cb_kwargs_['format'] = '%d'
        cb_kwargs_['ticks'] = boundaries

    cb = pl.colorbar(im, **cb_kwargs_)

    # plot matrix values inside the image if number is set
    if numbers is not None:
        colors = [im.to_rgba(0), im.to_rgba(maxv)]
        for i, cas in enumerate(matrix):
            for j, v in enumerate(cas):
                numbers_kwargs_['color'] = colors[int(v < maxv / 2)]
                # code taken from old ConfusionMatrix.plot()
                if numbers_alpha is None:
                    alpha = 1.0
                else:
                    # scale non-linearly w.r.t. value
                    alpha = 1 - np.array(1 - np.float(v) / maxv)**numbers_alpha
                pl.text(j,
                        i,
                        numbers_format % v,
                        alpha=alpha,
                        **numbers_kwargs_)

    return im
Пример #4
0
def matshow(matrix, xlabel_attr=None, ylabel_attr=None, numbers=None,
            **kwargs):
    """Enhanced version of matplotlib's matshow().

    This version is able to handle datasets, and label axis according to
    dataset attribute values.

    >>> from mvpa2.viz import matshow
    >>> from mvpa2.misc.data_generators import normal_feature_dataset
    >>> ds = normal_feature_dataset(10, 2, 18, 5)
    >>> im = matshow(ds, ylabel_attr='targets', xlabel_attr='chunks',
    ...               numbers='%.0f')

    Parameters
    ----------
    matrix : 2D array
      The matrix that is to be plotted as an image. If 'matrix' is of
      type Dataset the function tries to plot the corresponding samples.
    xlabel_attr : str or None
      If not 'None' matrix is treated as a Dataset and labels are
      extracted from the sample attribute named 'xlabel_attr'.
      The labels are used as the 'x_tick_lables' of the image.
    ylabel_attr : str or None
      If not 'None' matrix is treated as a Dataset and labels are
      extracted from the feature attribute named 'ylabel_attr'.
      The labels are used as the 'y_tick_lables' of the image.
    numbers : dict, str or None
      If not 'None' plots matrix values as text inside the image.
      If a string is provided, then this string is used as format string.
      In case that a dictionary is provided, the dictionary gets passed
      on to the text command, and, '%d' is used to format the values.
    **kwargs
      Additional parameters passed on to matshow().

    Returns
    -------
    matplotlib.AxesImage
      Handler for the created image.
    """

    externals.exists("pylab", raise_=True)
    import pylab as pl

    if numbers is not None:
        if isinstance(numbers, str):
            numbers_format = numbers
            numbers_alpha = None
            numbers_kwargs_ = {}
        elif isinstance(numbers, dict):
            numbers_format = '%d'
            numbers_alpha = numbers.pop('numbers_alpha', None)
            numbers_kwargs_ = numbers
        else:
            raise TypeError("The argument to keyword 'numbers' must be "
                            "either of type string or type dict")

    _xlabel = None
    _ylabel = None

    # check if dataset 'is' a confusion matrix
    if is_datasetlike(matrix):
        if xlabel_attr is not None and ylabel_attr is not None:
            _xlabel = matrix.get_attr(xlabel_attr)[0].value  # LookupError
            _ylabel = matrix.get_attr(ylabel_attr)[0].value  # if it's not there

    matrix = np.asanyarray(matrix)

    fig = pl.gcf()
    ax = pl.gca()
    im = ax.matshow(matrix, **kwargs)

    # customize labels if _xlabel  and _ylabel are set
    if _xlabel is not None and _ylabel is not None:
        xlabels = [item.get_text() for item in ax.get_xticklabels()]
        xlabels[1:-1] = _xlabel
        ax.set_xticklabels(xlabels)
        pl.xlabel(xlabel_attr)
        ylabels = [item.get_text() for item in ax.get_yticklabels()]
        ylabels[1:-1] = _ylabel
        ax.set_yticklabels(ylabels)
        pl.ylabel(ylabel_attr)

    # colorbar customization for discrete matrix
    # code taken from old ConfusionMatrix.plot()
    # TODO: colorbar should be discrete as well
    cb_kwargs_ = {}
    maxv = np.max(matrix)
    if ('int' in matrix.dtype.name) and (maxv > 0):
        boundaries = np.linspace(0, maxv, np.min((maxv, 10)), True)
        cb_kwargs_['format'] = '%d'
        cb_kwargs_['ticks'] = boundaries

    cb = pl.colorbar(im, **cb_kwargs_)

    # plot matrix values inside the image if number is set
    if numbers is not None:
        colors = [im.to_rgba(0), im.to_rgba(maxv)]
        for i, cas in enumerate(matrix):
            for j, v in enumerate(cas):
                numbers_kwargs_['color'] = colors[int(v<maxv/2)]
                # code taken from old ConfusionMatrix.plot()
                if numbers_alpha is None:
                    alpha = 1.0
                else:
                    # scale non-linearly w.r.t. value
                    alpha = 1 - np.array(1 - np.float(v)/maxv) ** numbers_alpha
                pl.text(j, i, numbers_format % v,
                        alpha=alpha, **numbers_kwargs_)

    return im