def _grouped_plot(plotf, data, column=None, by=None, numeric_only=True, figsize=None, sharex=True, sharey=True, layout=None, rot=0, ax=None, **kwargs): if figsize == 'default': # allowed to specify mpl default with 'default' warnings.warn("figsize='default' is deprecated. Specify figure " "size by tuple instead", FutureWarning, stacklevel=5) figsize = None grouped = data.groupby(by) if column is not None: grouped = grouped[column] naxes = len(grouped) fig, axes = _subplots(naxes=naxes, figsize=figsize, sharex=sharex, sharey=sharey, ax=ax, layout=layout) _axes = _flatten(axes) for i, (key, group) in enumerate(grouped): ax = _axes[i] if numeric_only and isinstance(group, ABCDataFrame): group = group._get_numeric_data() plotf(group, ax, **kwargs) ax.set_title(pprint_thing(key)) return fig, axes
def boxplot_frame_groupby(grouped, subplots=True, column=None, fontsize=None, rot=0, grid=True, ax=None, figsize=None, layout=None, sharex=False, sharey=True, **kwds): if subplots is True: naxes = len(grouped) fig, axes = _subplots(naxes=naxes, squeeze=False, ax=ax, sharex=sharex, sharey=sharey, figsize=figsize, layout=layout) axes = _flatten(axes) from pandas.core.series import Series ret = Series() for (key, group), ax in zip(grouped, axes): d = group.boxplot(ax=ax, column=column, fontsize=fontsize, rot=rot, grid=grid, **kwds) ax.set_title(pprint_thing(key)) ret.loc[key] = d fig.subplots_adjust(bottom=0.15, top=0.9, left=0.1, right=0.9, wspace=0.2) else: from pandas.core.reshape.concat import concat keys, frames = zip(*grouped) if grouped.axis == 0: df = concat(frames, keys=keys, axis=1) else: if len(frames) > 1: df = frames[0].join(frames[1::]) else: df = frames[0] ret = df.boxplot(column=column, fontsize=fontsize, rot=rot, grid=grid, ax=ax, figsize=figsize, layout=layout, **kwds) return ret
def _grouped_plot_by_column( plotf, data, columns=None, by=None, numeric_only=True, grid=False, figsize=None, ax=None, layout=None, return_type=None, **kwargs, ): grouped = data.groupby(by) if columns is None: if not isinstance(by, (list, tuple)): by = [by] columns = data._get_numeric_data().columns.difference(by) naxes = len(columns) fig, axes = _subplots(naxes=naxes, sharex=True, sharey=True, figsize=figsize, ax=ax, layout=layout) _axes = _flatten(axes) ax_values = [] for i, col in enumerate(columns): ax = _axes[i] gp_col = grouped[col] keys, values = zip(*gp_col) re_plotf = plotf(keys, values, ax, **kwargs) ax.set_title(col) ax.set_xlabel(pprint_thing(by)) ax_values.append(re_plotf) ax.grid(grid) result = pd.Series(ax_values, index=columns) # Return axes in multiplot case, maybe revisit later # 985 if return_type is None: result = axes byline = by[0] if len(by) == 1 else by fig.suptitle(f"Boxplot grouped by {byline}") fig.subplots_adjust(bottom=0.15, top=0.9, left=0.1, right=0.9, wspace=0.2) return result
def _grouped_plot(plotf, data, column=None, by=None, numeric_only=True, figsize=None, sharex=True, sharey=True, layout=None, rot=0, ax=None, **kwargs): if figsize == "default": # allowed to specify mpl default with 'default' warnings.warn( "figsize='default' is deprecated. Specify figure " "size by tuple instead", FutureWarning, stacklevel=5, ) figsize = None grouped = data.groupby(by) if column is not None: grouped = grouped[column] naxes = len(grouped) fig, axes = _subplots(naxes=naxes, figsize=figsize, sharex=sharex, sharey=sharey, ax=ax, layout=layout) _axes = _flatten(axes) for i, (key, group) in enumerate(grouped): ax = _axes[i] if numeric_only and isinstance(group, ABCDataFrame): group = group._get_numeric_data() plotf(group, ax, **kwargs) ax.set_title(pprint_thing(key)) return fig, axes
def _setup_subplots(self): if self.subplots: fig, axes = _subplots( naxes=self.nseries, sharex=self.sharex, sharey=self.sharey, figsize=self.figsize, ax=self.ax, layout=self.layout, layout_type=self._layout_type, ) else: if self.ax is None: fig = self.plt.figure(figsize=self.figsize) axes = fig.add_subplot(111) else: fig = self.ax.get_figure() if self.figsize is not None: fig.set_size_inches(self.figsize) axes = self.ax axes = _flatten(axes) valid_log = {False, True, "sym", None} input_log = {self.logx, self.logy, self.loglog} if input_log - valid_log: invalid_log = next(iter((input_log - valid_log))) raise ValueError( "Boolean, None and 'sym' are valid options," " '{}' is given.".format(invalid_log) ) if self.logx is True or self.loglog is True: [a.set_xscale("log") for a in axes] elif self.logx == "sym" or self.loglog == "sym": [a.set_xscale("symlog") for a in axes] if self.logy is True or self.loglog is True: [a.set_yscale("log") for a in axes] elif self.logy == "sym" or self.loglog == "sym": [a.set_yscale("symlog") for a in axes] self.fig = fig self.axes = axes
def hist_frame(data, column=None, by=None, grid=True, xlabelsize=None, xrot=None, ylabelsize=None, yrot=None, ax=None, sharex=False, sharey=False, figsize=None, layout=None, bins=10, **kwds): if by is not None: axes = _grouped_hist(data, column=column, by=by, ax=ax, grid=grid, figsize=figsize, sharex=sharex, sharey=sharey, layout=layout, bins=bins, xlabelsize=xlabelsize, xrot=xrot, ylabelsize=ylabelsize, yrot=yrot, **kwds) return axes if column is not None: if not isinstance(column, (list, np.ndarray, ABCIndexClass)): column = [column] data = data[column] data = data._get_numeric_data() naxes = len(data.columns) if naxes == 0: raise ValueError("hist method requires numerical columns, " "nothing to plot.") fig, axes = _subplots(naxes=naxes, ax=ax, squeeze=False, sharex=sharex, sharey=sharey, figsize=figsize, layout=layout) _axes = _flatten(axes) for i, col in enumerate(com.try_sort(data.columns)): ax = _axes[i] ax.hist(data[col].dropna().values, bins=bins, **kwds) ax.set_title(col) ax.grid(grid) _set_ticks_props(axes, xlabelsize=xlabelsize, xrot=xrot, ylabelsize=ylabelsize, yrot=yrot) fig.subplots_adjust(wspace=0.3, hspace=0.3) return axes
def scatter_matrix( frame, alpha=0.5, figsize=None, ax=None, grid=False, diagonal="hist", marker=".", density_kwds=None, hist_kwds=None, range_padding=0.05, **kwds, ): df = frame._get_numeric_data() n = df.columns.size naxes = n * n fig, axes = _subplots(naxes=naxes, figsize=figsize, ax=ax, squeeze=False) # no gaps between subplots fig.subplots_adjust(wspace=0, hspace=0) mask = notna(df) marker = _get_marker_compat(marker) hist_kwds = hist_kwds or {} density_kwds = density_kwds or {} # GH 14855 kwds.setdefault("edgecolors", "none") boundaries_list = [] for a in df.columns: values = df[a].values[mask[a].values] rmin_, rmax_ = np.min(values), np.max(values) rdelta_ext = (rmax_ - rmin_) * range_padding / 2.0 boundaries_list.append((rmin_ - rdelta_ext, rmax_ + rdelta_ext)) for i, a in enumerate(df.columns): for j, b in enumerate(df.columns): ax = axes[i, j] if i == j: values = df[a].values[mask[a].values] # Deal with the diagonal by drawing a histogram there. if diagonal == "hist": ax.hist(values, **hist_kwds) elif diagonal in ("kde", "density"): from scipy.stats import gaussian_kde y = values gkde = gaussian_kde(y) ind = np.linspace(y.min(), y.max(), 1000) ax.plot(ind, gkde.evaluate(ind), **density_kwds) ax.set_xlim(boundaries_list[i]) else: common = (mask[a] & mask[b]).values ax.scatter(df[b][common], df[a][common], marker=marker, alpha=alpha, **kwds) ax.set_xlim(boundaries_list[j]) ax.set_ylim(boundaries_list[i]) ax.set_xlabel(b) ax.set_ylabel(a) if j != 0: ax.yaxis.set_visible(False) if i != n - 1: ax.xaxis.set_visible(False) if len(df.columns) > 1: lim1 = boundaries_list[0] locs = axes[0][1].yaxis.get_majorticklocs() locs = locs[(lim1[0] <= locs) & (locs <= lim1[1])] adj = (locs - lim1[0]) / (lim1[1] - lim1[0]) lim0 = axes[0][0].get_ylim() adj = adj * (lim0[1] - lim0[0]) + lim0[0] axes[0][0].yaxis.set_ticks(adj) if np.all(locs == locs.astype(int)): # if all ticks are int locs = locs.astype(int) axes[0][0].yaxis.set_ticklabels(locs) _set_ticks_props(axes, xlabelsize=8, xrot=90, ylabelsize=8, yrot=0) return axes
def hist_frame( data, column=None, by=None, grid=True, xlabelsize=None, xrot=None, ylabelsize=None, yrot=None, ax=None, sharex=False, sharey=False, figsize=None, layout=None, bins=10, **kwds, ): # Start with empty pandas data frame derived from ed_df_bins, ed_df_weights = data._hist(num_bins=bins) converter._WARN = False # no warning for pandas plots if by is not None: raise NotImplementedError("TODO") if column is not None: if not isinstance(column, (list, np.ndarray, ABCIndexClass)): column = [column] ed_df_bins = ed_df_bins[column] ed_df_weights = ed_df_weights[column] naxes = len(ed_df_bins.columns) if naxes == 0: raise ValueError("hist method requires numerical columns, " "nothing to plot.") fig, axes = _subplots( naxes=naxes, ax=ax, squeeze=False, sharex=sharex, sharey=sharey, figsize=figsize, layout=layout, ) _axes = _flatten(axes) for i, col in enumerate(com.try_sort(data.columns)): ax = _axes[i] ax.hist( ed_df_bins[col][:-1], bins=ed_df_bins[col], weights=ed_df_weights[col], **kwds, ) ax.set_title(col) ax.grid(grid) _set_ticks_props( axes, xlabelsize=xlabelsize, xrot=xrot, ylabelsize=ylabelsize, yrot=yrot ) fig.subplots_adjust(wspace=0.3, hspace=0.3) return axes
def hist_frame(data, column=None, by=None, grid=True, xlabelsize=None, xrot=None, ylabelsize=None, yrot=None, ax=None, sharex=False, sharey=False, figsize=None, layout=None, bins=10, **kwds): converter._WARN = False # no warning for pandas plots if by is not None: axes = _grouped_hist(data, column=column, by=by, ax=ax, grid=grid, figsize=figsize, sharex=sharex, sharey=sharey, layout=layout, bins=bins, xlabelsize=xlabelsize, xrot=xrot, ylabelsize=ylabelsize, yrot=yrot, **kwds) return axes if column is not None: if not isinstance(column, (list, np.ndarray, ABCIndexClass)): column = [column] data = data[column] data = data._get_numeric_data() naxes = len(data.columns) if naxes == 0: raise ValueError("hist method requires numerical columns, " "nothing to plot.") fig, axes = _subplots(naxes=naxes, ax=ax, squeeze=False, sharex=sharex, sharey=sharey, figsize=figsize, layout=layout) _axes = _flatten(axes) for i, col in enumerate(com.try_sort(data.columns)): ax = _axes[i] ax.hist(data[col].dropna().values, bins=bins, **kwds) ax.set_title(col) ax.grid(grid) _set_ticks_props(axes, xlabelsize=xlabelsize, xrot=xrot, ylabelsize=ylabelsize, yrot=yrot) fig.subplots_adjust(wspace=0.3, hspace=0.3) return axes
def hist_frame( data, column=None, by=None, grid=True, xlabelsize=None, xrot=None, ylabelsize=None, yrot=None, ax=None, sharex=False, sharey=False, figsize=None, layout=None, bins=10, legend: bool = False, **kwds, ): if legend and "label" in kwds: raise ValueError("Cannot use both legend and label") if by is not None: axes = _grouped_hist( data, column=column, by=by, ax=ax, grid=grid, figsize=figsize, sharex=sharex, sharey=sharey, layout=layout, bins=bins, xlabelsize=xlabelsize, xrot=xrot, ylabelsize=ylabelsize, yrot=yrot, legend=legend, **kwds, ) return axes if column is not None: if not isinstance(column, (list, np.ndarray, ABCIndexClass)): column = [column] data = data[column] data = data._get_numeric_data() naxes = len(data.columns) if naxes == 0: raise ValueError( "hist method requires numerical columns, nothing to plot.") fig, axes = _subplots( naxes=naxes, ax=ax, squeeze=False, sharex=sharex, sharey=sharey, figsize=figsize, layout=layout, ) _axes = _flatten(axes) can_set_label = "label" not in kwds for i, col in enumerate(data.columns): ax = _axes[i] if legend and can_set_label: kwds["label"] = col ax.hist(data[col].dropna().values, bins=bins, **kwds) ax.set_title(col) ax.grid(grid) if legend: ax.legend() _set_ticks_props(axes, xlabelsize=xlabelsize, xrot=xrot, ylabelsize=ylabelsize, yrot=yrot) fig.subplots_adjust(wspace=0.3, hspace=0.3) return axes
def scatter_matrix(frame, alpha=0.5, figsize=None, ax=None, grid=False, diagonal='hist', marker='.', density_kwds=None, hist_kwds=None, range_padding=0.05, **kwds): df = frame._get_numeric_data() n = df.columns.size naxes = n * n fig, axes = _subplots(naxes=naxes, figsize=figsize, ax=ax, squeeze=False) # no gaps between subplots fig.subplots_adjust(wspace=0, hspace=0) mask = notna(df) marker = _get_marker_compat(marker) hist_kwds = hist_kwds or {} density_kwds = density_kwds or {} # GH 14855 kwds.setdefault('edgecolors', 'none') boundaries_list = [] for a in df.columns: values = df[a].values[mask[a].values] rmin_, rmax_ = np.min(values), np.max(values) rdelta_ext = (rmax_ - rmin_) * range_padding / 2. boundaries_list.append((rmin_ - rdelta_ext, rmax_ + rdelta_ext)) for i, a in enumerate(df.columns): for j, b in enumerate(df.columns): ax = axes[i, j] if i == j: values = df[a].values[mask[a].values] # Deal with the diagonal by drawing a histogram there. if diagonal == 'hist': ax.hist(values, **hist_kwds) elif diagonal in ('kde', 'density'): from scipy.stats import gaussian_kde y = values gkde = gaussian_kde(y) ind = np.linspace(y.min(), y.max(), 1000) ax.plot(ind, gkde.evaluate(ind), **density_kwds) ax.set_xlim(boundaries_list[i]) else: common = (mask[a] & mask[b]).values ax.scatter(df[b][common], df[a][common], marker=marker, alpha=alpha, **kwds) ax.set_xlim(boundaries_list[j]) ax.set_ylim(boundaries_list[i]) ax.set_xlabel(b) ax.set_ylabel(a) if j != 0: ax.yaxis.set_visible(False) if i != n - 1: ax.xaxis.set_visible(False) if len(df.columns) > 1: lim1 = boundaries_list[0] locs = axes[0][1].yaxis.get_majorticklocs() locs = locs[(lim1[0] <= locs) & (locs <= lim1[1])] adj = (locs - lim1[0]) / (lim1[1] - lim1[0]) lim0 = axes[0][0].get_ylim() adj = adj * (lim0[1] - lim0[0]) + lim0[0] axes[0][0].yaxis.set_ticks(adj) if np.all(locs == locs.astype(int)): # if all ticks are int locs = locs.astype(int) axes[0][0].yaxis.set_ticklabels(locs) _set_ticks_props(axes, xlabelsize=8, xrot=90, ylabelsize=8, yrot=0) return axes
def scatter_matrix( frame, alpha=0.5, figsize=None, ax=None, grid=False, diagonal="hist", marker=".", density_kwds=None, hist_kwds=None, range_padding=0.05, plot_axes="all", # "all", "lower", "upper" ylabel_direction=None, # "left", "right", refresh_labels=False, scales=None, # list of scale ["linear" or "log"] **kwds): ''' Modification of pandas.scatter_matrix. ''' if scales is None: scales = ["linear"] * len(frame.columns) def _get_marker_compat(marker): if marker not in mlines.lineMarkers: return "o" return marker df = frame._get_numeric_data() n = df.columns.size naxes = n * n fig, axes = _subplots(naxes=naxes, figsize=figsize, ax=ax, squeeze=False) if axes.ndim != 2: # Note: axに二次元配列入れてても_subplotsは一次元配列を生成する axes = axes.reshape((n, n)) ylabel_direction = ylabel_direction or ( "left" if plot_axes != "upper" else "right" ) # default: "lower" --> "left", "upper" --> "right" # no gaps between subplots fig.subplots_adjust(wspace=0, hspace=0) mask = notna(df) marker = _get_marker_compat(marker) hist_kwds = hist_kwds or {} density_kwds = density_kwds or {} # GH 14855 kwds.setdefault("edgecolors", "none") boundaries_list = [] for i, a in enumerate(df.columns): values = df[a].values[mask[a].values] rmin_, rmax_ = np.min(values), np.max(values) if scales[i] == "linear": rdelta_ext = (rmax_ - rmin_) * range_padding / 2.0 boundaries_list.append((rmin_ - rdelta_ext, rmax_ + rdelta_ext)) elif scales[i] == "log": rdelta_ext = np.exp( (np.log(rmax_) - np.log(rmin_)) * range_padding / 2.0) boundaries_list.append((rmin_ / rdelta_ext, rmax_ * rdelta_ext)) for i, a in enumerate(df.columns): for j, b in enumerate(df.columns): ax = axes[i, j] ax.set_visible(False) # 一旦非表示にしておく if i == j: values = df[a].values[mask[a].values] # Deal with the diagonal by drawing a histogram there. if diagonal == "hist": if "bins" not in hist_kwds: hist_kwds["bins"] = 16 _hist_kwds = {key: val for key, val in hist_kwds.items()} if scales[i] == "log" and type(hist_kwds["bins"]) is int: _hist_kwds["bins"] = np.logspace( np.log10(np.min(values)), np.log10(np.max(values)), hist_kwds["bins"]) ax.hist(values, **_hist_kwds) elif diagonal in ("kde", "density"): from scipy.stats import gaussian_kde y = values gkde = gaussian_kde(y) ind = np.linspace(y.min(), y.max(), 1000) ax.plot(ind, gkde.evaluate(ind), **density_kwds) ax.set_xscale(scales[i]) ax.set_xlim(boundaries_list[i]) ax.set_visible(True) elif plot_axes == "all" or (i > j and plot_axes == "lower") or ( i < j and plot_axes == "upper"): common = (mask[a] & mask[b]).values ax.scatter(df[b][common], df[a][common], marker=marker, alpha=alpha, **kwds) ax.set_xscale(scales[j]) ax.set_yscale(scales[i]) ax.set_xlim(boundaries_list[j]) ax.set_ylim(boundaries_list[i]) ax.set_visible(True) ax.set_xlabel(b) ax.set_ylabel(a) if refresh_labels: ax.xaxis.set_visible(True) ax.yaxis.set_visible(True) if plot_axes in ("all", "lower"): if j != 0: ax.yaxis.set_visible(False) if i != n - 1: ax.xaxis.set_visible(False) elif plot_axes == "upper": if ylabel_direction == "left" and i != j: ax.yaxis.set_visible(False) else: # elif ylabel_direction == "right": if j == n - 1: ax.yaxis.tick_right() ax.yaxis.set_label_position('right') else: ax.yaxis.set_visible(False) if i == 0: ax.xaxis.tick_top() ax.xaxis.set_label_position('top') else: ax.xaxis.set_visible(False) if len(df.columns) > 1: lim1 = boundaries_list[0] locs = axes[0][1].yaxis.get_majorticklocs() locs = locs[(lim1[0] <= locs) & (locs <= lim1[1])] adj = (locs - lim1[0]) / (lim1[1] - lim1[0]) lim0 = axes[0][0].get_ylim() adj = adj * (lim0[1] - lim0[0]) + lim0[0] axes[0][0].yaxis.set_ticks(adj) if np.all(locs == locs.astype(int)): # if all ticks are int locs = locs.astype(int) axes[0][0].yaxis.set_ticklabels(locs) _set_ticks_props(axes, xlabelsize=8, xrot=90, ylabelsize=8, yrot=0) return axes